From d936866c98b9507bc09432e5640051289402569c Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 16:34:13 +0800 Subject: [PATCH 01/14] Add a dockerfile to make pretty Use tools what GitHub Precommit workflow uses --- tools/precommit/Dockerfile | 54 ++++++++++++ tools/precommit/make-pretty.py | 145 +++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 tools/precommit/Dockerfile create mode 100644 tools/precommit/make-pretty.py diff --git a/tools/precommit/Dockerfile b/tools/precommit/Dockerfile new file mode 100644 index 00000000000..ed7670234cc --- /dev/null +++ b/tools/precommit/Dockerfile @@ -0,0 +1,54 @@ +# syntax=docker/dockerfile:1.7 +# Usage: DOCKER_BUILDKIT=1 docker buildx build -f Dockerfile -t abacus-make-pretty ../.. --target export --progress=plain --output type=local,dest=../.. + +FROM ubuntu:24.04 AS pretty + +ARG DEBIAN_FRONTEND=noninteractive +ARG BUILD_DIR=build +ARG JOBS=1 +ARG CMAKE_ARGS="" +ARG CLANG_TIDY_EXTRA_ARGS="" +ARG STRICT_CLANG_TIDY=0 + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + bash \ + git \ + findutils \ + build-essential \ + cmake \ + ninja-build \ + pkg-config \ + python3 \ + clang \ + clang-tidy \ + clang-format \ + openmpi-bin \ + libopenmpi-dev \ + libfftw3-dev \ + libelpa-dev \ + libopenblas-dev \ + libscalapack-openmpi-dev \ + libxc-dev \ + libcereal-dev \ + libgtest-dev \ + libgmock-dev \ + libomp-dev \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /repo + +COPY . . + +ENV BUILD_DIR=${BUILD_DIR} +ENV JOBS=${JOBS} +ENV CMAKE_ARGS=${CMAKE_ARGS} +ENV CLANG_TIDY_EXTRA_ARGS=${CLANG_TIDY_EXTRA_ARGS} +ENV STRICT_CLANG_TIDY=${STRICT_CLANG_TIDY} + +RUN python3 /repo/tools/precommit/make-pretty.py + +FROM scratch AS export +COPY --from=pretty /out/ / diff --git a/tools/precommit/make-pretty.py b/tools/precommit/make-pretty.py new file mode 100644 index 00000000000..884cfa98379 --- /dev/null +++ b/tools/precommit/make-pretty.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +import json +import os +import shutil +import subprocess +from pathlib import Path + +root = Path("/repo") +build_dir = Path(os.environ.get("BUILD_DIR", "build")) + +cpp_format_exts = { + ".c", ".cc", ".cpp", ".cxx", ".c++", + ".h", ".hh", ".hpp", ".hxx", + ".ipp", ".tpp", + ".cu", ".cuh", +} + +tidy_source_exts = { + ".cc", ".cpp", ".cxx", ".c++", +} + +exclude_names = { + ".git", + "build", + build_dir.name, + ".cache", +} + +def is_excluded(path: Path) -> bool: + return any(part in exclude_names or part.startswith("cmake-build-") for part in path.parts) + +def run(cmd, check=True): + print("+ " + " ".join(cmd), flush=True) + return subprocess.run(cmd, check=check) + +os.chdir(root) + +compile_db = root / build_dir / "compile_commands.json" + +if not compile_db.exists(): + cmake_args = os.environ.get("CMAKE_ARGS", "").split() + run([ + "cmake", + "-S", ".", + "-B", str(build_dir), + "-G", "Ninja", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", + *cmake_args, + ]) + +if not compile_db.exists(): + raise SystemExit(f"ERROR: {compile_db} was not generated") + +with compile_db.open("r", encoding="utf-8") as f: + db = json.load(f) + +tidy_files = [] +seen = set() + +for entry in db: + filename = entry.get("file") + if not filename: + continue + + path = Path(filename) + if not path.is_absolute(): + path = Path(entry.get("directory", root)) / path + + try: + rel = path.resolve().relative_to(root.resolve()) + except ValueError: + continue + + if is_excluded(rel): + continue + + if rel.suffix in tidy_source_exts and rel.exists() and rel not in seen: + tidy_files.append(rel) + seen.add(rel) + +print(f"==> clang-tidy translation units: {len(tidy_files)}", flush=True) + +tidy_failures = [] +extra = os.environ.get("CLANG_TIDY_EXTRA_ARGS", "").split() +strict = os.environ.get("STRICT_CLANG_TIDY", "0") == "1" + +for rel in tidy_files: + cmd = [ + "clang-tidy", + str(rel), + f"-p={build_dir}", + "--fix-errors", + *extra, + ] + print("+ " + " ".join(cmd), flush=True) + ret = subprocess.run(cmd).returncode + if ret != 0: + tidy_failures.append((str(rel), ret)) + print(f"WARNING: clang-tidy failed for {rel} with exit code {ret}", flush=True) + +if tidy_failures: + print("==> clang-tidy failures:", flush=True) + for filename, ret in tidy_failures: + print(f" {ret}: {filename}", flush=True) + if strict: + raise SystemExit("ERROR: clang-tidy failed and STRICT_CLANG_TIDY=1") + +format_files = [] + +for path in root.rglob("*"): + if not path.is_file(): + continue + + rel = path.relative_to(root) + if is_excluded(rel): + continue + + if path.suffix in cpp_format_exts: + format_files.append(rel) + +print(f"==> clang-format files: {len(format_files)}", flush=True) + +batch = [] +for rel in format_files: + batch.append(str(rel)) + if len(batch) >= 100: + run(["clang-format", "-i", "-style=file", "--fallback-style=GNU", *batch]) + batch.clear() + +if batch: + run(["clang-format", "-i", "-style=file", "--fallback-style=GNU", *batch]) + +out = Path("/out") +if out.exists(): + shutil.rmtree(out) +out.mkdir(parents=True) + +for rel in format_files: + src = root / rel + dst = out / rel + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dst) + +print("==> Exported C/C++ files only to /out", flush=True) From 72c4966ec4376645ed199efbd08d11994ea56340 Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 19:23:31 +0800 Subject: [PATCH 02/14] Split Precommit and Tests --- .github/workflows/precommit.yml | 49 +++++++++++++++++++++++++++++++++ .github/workflows/test.yml | 11 -------- 2 files changed, 49 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/precommit.yml diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml new file mode 100644 index 00000000000..b594c781fb0 --- /dev/null +++ b/.github/workflows/precommit.yml @@ -0,0 +1,49 @@ +name: Precommit + +on: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: Precommit + runs-on: X64 + if: github.repository_owner == 'deepmodeling' + container: + image: ghcr.io/deepmodeling/abacus-gnu + volumes: + - /tmp/ccache:/github/home/.ccache + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + # We will handle submodules manually after fixing ownership + submodules: 'false' + + - name: Take ownership of the workspace and update submodules + run: | + sudo chown -R $(whoami) . + git submodule update --init --recursive + + - name: Install CI tools + run: | + sudo apt-get update + sudo apt-get install -y ccache ca-certificates python-is-python3 python3-pip + sudo pip install clang-format clang-tidy + + - name: Configure + run: | + cmake -B build -DBUILD_TESTING=ON -DENABLE_MLALGO=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_GOOGLEBENCH=ON -DENABLE_RAPIDJSON=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DENABLE_FLOAT_FFTW=ON + + - uses: pre-commit/action@v3.0.1 + with: + extra_args: + --from-ref ${{ github.event.pull_request.base.sha }} + --to-ref ${{ github.event.pull_request.head.sha }} + continue-on-error: true + - uses: pre-commit-ci/lite-action@v1.0.3 + diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a21d54b2a82..f7e418d3a00 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,17 +39,6 @@ jobs: run: | cmake -B build -DBUILD_TESTING=ON -DENABLE_MLALGO=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_GOOGLEBENCH=ON -DENABLE_RAPIDJSON=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DENABLE_FLOAT_FFTW=ON -# Temporarily removed because no one maintains this now. -# And it will break the CI test workflow. - -# - uses: pre-commit/action@v3.0.1 -# with: -# extra_args: -# --from-ref ${{ github.event.pull_request.base.sha }} -# --to-ref ${{ github.event.pull_request.head.sha }} -# continue-on-error: true -# - uses: pre-commit-ci/lite-action@v1.0.3 - - name: Build run: | cmake --build build -j8 From ee07e54856b0fc3635c45648cd5ca0abdd2198e0 Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 19:25:02 +0800 Subject: [PATCH 03/14] Fix .clang-format and .clang-tidy --- .clang-format | 6 +++--- .clang-tidy | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.clang-format b/.clang-format index 8970df99c29..2d0f02744d8 100644 --- a/.clang-format +++ b/.clang-format @@ -1,6 +1,6 @@ --- Language: Cpp -BasedOnStyle: Microsoft +BasedOnStyle: GNU AlwaysBreakTemplateDeclarations: Yes @@ -18,7 +18,7 @@ FixNamespaceComments: true # InsertBraces: true # only for clang-format version15.0.0 or later # About include -IncludeBlocks: Regroup +IncludeBlocks: Preserve IncludeIsMainRegex: '([-_](test|unittest))?$' IncludeIsMainSourceRegex: '' @@ -30,7 +30,7 @@ DerivePointerAlignment: false PointerAlignment: Left ReflowComments: true -SortIncludes: true +SortIncludes: false SortUsingDeclarations: true # About space diff --git a/.clang-tidy b/.clang-tidy index d5587c46012..28672da9c5e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,5 +1,6 @@ --- Checks: ' + -*, modernize-deprecated-headers, modernize-redundant-void-arg, modernize-use-bool-literals, From dff547abebdc07d151a6c718ff6b8778504e19f2 Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 19:33:16 +0800 Subject: [PATCH 04/14] Tweaks --- tools/precommit/Dockerfile | 19 ++++++++++++------- tools/precommit/make-pretty.py | 21 +++++++++++++++------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/tools/precommit/Dockerfile b/tools/precommit/Dockerfile index ed7670234cc..5f4ce106fb5 100644 --- a/tools/precommit/Dockerfile +++ b/tools/precommit/Dockerfile @@ -5,14 +5,15 @@ FROM ubuntu:24.04 AS pretty ARG DEBIAN_FRONTEND=noninteractive ARG BUILD_DIR=build -ARG JOBS=1 +ARG JOBS=16 ARG CMAKE_ARGS="" ARG CLANG_TIDY_EXTRA_ARGS="" ARG STRICT_CLANG_TIDY=0 SHELL ["/bin/bash", "-o", "pipefail", "-c"] -RUN apt-get update && apt-get install -y --no-install-recommends \ +RUN apt-get update -qq +RUN apt-get install -qy --no-install-recommends \ ca-certificates \ bash \ git \ @@ -22,9 +23,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ninja-build \ pkg-config \ python3 \ - clang \ - clang-tidy \ - clang-format \ + python3-pip \ + python3-venv \ openmpi-bin \ libopenmpi-dev \ libfftw3-dev \ @@ -35,8 +35,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libcereal-dev \ libgtest-dev \ libgmock-dev \ - libomp-dev \ - && rm -rf /var/lib/apt/lists/* + libomp-dev +RUN apt-get clean + +RUN python3 -m venv /opt/precommit-venv \ + && /opt/precommit-venv/bin/pip install --no-cache-dir --upgrade pip \ + && /opt/precommit-venv/bin/pip install --no-cache-dir clang-format clang-tidy +ENV PATH="/opt/precommit-venv/bin:${PATH}" WORKDIR /repo diff --git a/tools/precommit/make-pretty.py b/tools/precommit/make-pretty.py index 884cfa98379..a28020739de 100644 --- a/tools/precommit/make-pretty.py +++ b/tools/precommit/make-pretty.py @@ -2,12 +2,15 @@ import json import os +import shlex import shutil import subprocess +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path root = Path("/repo") build_dir = Path(os.environ.get("BUILD_DIR", "build")) +jobs = int(os.environ.get("JOBS", "1")) cpp_format_exts = { ".c", ".cc", ".cpp", ".cxx", ".c++", @@ -39,7 +42,7 @@ def run(cmd, check=True): compile_db = root / build_dir / "compile_commands.json" if not compile_db.exists(): - cmake_args = os.environ.get("CMAKE_ARGS", "").split() + cmake_args = shlex.split(os.environ.get("CMAKE_ARGS", "")) run([ "cmake", "-S", ".", @@ -82,10 +85,10 @@ def run(cmd, check=True): print(f"==> clang-tidy translation units: {len(tidy_files)}", flush=True) tidy_failures = [] -extra = os.environ.get("CLANG_TIDY_EXTRA_ARGS", "").split() +extra = shlex.split(os.environ.get("CLANG_TIDY_EXTRA_ARGS", "")) strict = os.environ.get("STRICT_CLANG_TIDY", "0") == "1" -for rel in tidy_files: +def run_tidy(rel): cmd = [ "clang-tidy", str(rel), @@ -95,9 +98,15 @@ def run(cmd, check=True): ] print("+ " + " ".join(cmd), flush=True) ret = subprocess.run(cmd).returncode - if ret != 0: - tidy_failures.append((str(rel), ret)) - print(f"WARNING: clang-tidy failed for {rel} with exit code {ret}", flush=True) + return str(rel), ret + +with ThreadPoolExecutor(max_workers=max(1, jobs)) as pool: + futures = [pool.submit(run_tidy, rel) for rel in tidy_files] + for fut in as_completed(futures): + filename, ret = fut.result() + if ret != 0: + tidy_failures.append((str(rel), ret)) + print(f"WARNING: clang-tidy failed for {rel} with exit code {ret}", flush=True) if tidy_failures: print("==> clang-tidy failures:", flush=True) From 41ba217f32a46890429c398d1ef447b16a21850d Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 19:54:55 +0800 Subject: [PATCH 05/14] Fixes make_pretty.py --- tools/precommit/make-pretty.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/precommit/make-pretty.py b/tools/precommit/make-pretty.py index a28020739de..a45c562b0de 100644 --- a/tools/precommit/make-pretty.py +++ b/tools/precommit/make-pretty.py @@ -89,13 +89,21 @@ def run(cmd, check=True): strict = os.environ.get("STRICT_CLANG_TIDY", "0") == "1" def run_tidy(rel): + abs_file = str((root / rel).resolve()) + + line_filter = json.dumps([ + {"name": abs_file, "lines": [[1, 1000000]]} + ]) + cmd = [ "clang-tidy", str(rel), f"-p={build_dir}", + f"-line-filter={line_filter}", "--fix-errors", *extra, ] + print("+ " + " ".join(cmd), flush=True) ret = subprocess.run(cmd).returncode return str(rel), ret From ff378cd6d3da4abe1c61311ae03110edf654adfb Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 20:09:17 +0800 Subject: [PATCH 06/14] Precommit: Use latest pip and clang-tidy version --- .github/workflows/precommit.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index b594c781fb0..a78092c4ba9 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -33,6 +33,7 @@ jobs: run: | sudo apt-get update sudo apt-get install -y ccache ca-certificates python-is-python3 python3-pip + sudo pip install --upgrade pip sudo pip install clang-format clang-tidy - name: Configure From aa299d3789ad60af3ceeade36815b10dde92a390 Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 20:23:40 +0800 Subject: [PATCH 07/14] Include headers to clang-tidy --- tools/precommit/Dockerfile | 2 +- tools/precommit/{make-pretty.py => make_pretty.py} | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) rename tools/precommit/{make-pretty.py => make_pretty.py} (97%) diff --git a/tools/precommit/Dockerfile b/tools/precommit/Dockerfile index 5f4ce106fb5..af638614df6 100644 --- a/tools/precommit/Dockerfile +++ b/tools/precommit/Dockerfile @@ -53,7 +53,7 @@ ENV CMAKE_ARGS=${CMAKE_ARGS} ENV CLANG_TIDY_EXTRA_ARGS=${CLANG_TIDY_EXTRA_ARGS} ENV STRICT_CLANG_TIDY=${STRICT_CLANG_TIDY} -RUN python3 /repo/tools/precommit/make-pretty.py +RUN python3 /repo/tools/precommit/make_pretty.py FROM scratch AS export COPY --from=pretty /out/ / diff --git a/tools/precommit/make-pretty.py b/tools/precommit/make_pretty.py similarity index 97% rename from tools/precommit/make-pretty.py rename to tools/precommit/make_pretty.py index a45c562b0de..0b4abc0f634 100644 --- a/tools/precommit/make-pretty.py +++ b/tools/precommit/make_pretty.py @@ -20,7 +20,10 @@ } tidy_source_exts = { - ".cc", ".cpp", ".cxx", ".c++", + ".c", ".cc", ".cpp", ".cxx", ".c++", + ".h", ".hh", ".hpp", ".hxx", + ".ipp", ".tpp", + ".cu", ".cuh", } exclude_names = { From c8d3f220dd61222f9c2f74905d70abd77bb514a9 Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 20:27:58 +0800 Subject: [PATCH 08/14] Relax column limit --- .clang-format | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-format b/.clang-format index 2d0f02744d8..97a0989e8a3 100644 --- a/.clang-format +++ b/.clang-format @@ -4,6 +4,7 @@ BasedOnStyle: GNU AlwaysBreakTemplateDeclarations: Yes +ColumnLimit: 120 AllowAllArgumentsOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false BinPackArguments: false From fbc59f9184cd82c502b4af45a9adfec5bea263cc Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 20:44:51 +0800 Subject: [PATCH 09/14] Use STRICT_CLANG_TIDY=1 --- tools/precommit/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/precommit/Dockerfile b/tools/precommit/Dockerfile index af638614df6..85bba41b635 100644 --- a/tools/precommit/Dockerfile +++ b/tools/precommit/Dockerfile @@ -8,7 +8,7 @@ ARG BUILD_DIR=build ARG JOBS=16 ARG CMAKE_ARGS="" ARG CLANG_TIDY_EXTRA_ARGS="" -ARG STRICT_CLANG_TIDY=0 +ARG STRICT_CLANG_TIDY=1 SHELL ["/bin/bash", "-o", "pipefail", "-c"] From 12189060a0d981a51106430ccc6f45f0c5a05b46 Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 21:03:20 +0800 Subject: [PATCH 10/14] Do not rely on CMake for files to be checked --- tools/precommit/make_pretty.py | 236 +++++++++++++++++++-------------- 1 file changed, 140 insertions(+), 96 deletions(-) diff --git a/tools/precommit/make_pretty.py b/tools/precommit/make_pretty.py index 0b4abc0f634..43089613ba0 100644 --- a/tools/precommit/make_pretty.py +++ b/tools/precommit/make_pretty.py @@ -2,24 +2,18 @@ import json import os -import shlex import shutil import subprocess from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path -root = Path("/repo") -build_dir = Path(os.environ.get("BUILD_DIR", "build")) -jobs = int(os.environ.get("JOBS", "1")) +root = Path("/repo").resolve() +build_dir = root / "build" +out = Path("/out").resolve() -cpp_format_exts = { - ".c", ".cc", ".cpp", ".cxx", ".c++", - ".h", ".hh", ".hpp", ".hxx", - ".ipp", ".tpp", - ".cu", ".cuh", -} +jobs = int(os.environ.get("JOBS", "1")) -tidy_source_exts = { +precommit_exts = { ".c", ".cc", ".cpp", ".cxx", ".c++", ".h", ".hh", ".hpp", ".hxx", ".ipp", ".tpp", @@ -29,137 +23,187 @@ exclude_names = { ".git", "build", - build_dir.name, ".cache", + "__pycache__", } def is_excluded(path: Path) -> bool: - return any(part in exclude_names or part.startswith("cmake-build-") for part in path.parts) + return any( + part in exclude_names + or part.startswith("cmake-build-") + or part.startswith("build-") + for part in path.parts + ) def run(cmd, check=True): - print("+ " + " ".join(cmd), flush=True) + print("+ " + " ".join(str(x) for x in cmd), flush=True) return subprocess.run(cmd, check=check) -os.chdir(root) +def collect_precommit_files(): + files = [] -compile_db = root / build_dir / "compile_commands.json" + for path in root.rglob("*"): + if not path.is_file(): + continue -if not compile_db.exists(): - cmake_args = shlex.split(os.environ.get("CMAKE_ARGS", "")) - run([ - "cmake", - "-S", ".", - "-B", str(build_dir), - "-G", "Ninja", - "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", - *cmake_args, - ]) + rel = path.relative_to(root) + + if is_excluded(rel): + continue + + if path.suffix in precommit_exts: + files.append(rel) + + return sorted(files) + +def language_for_file(path: Path): + suffix = path.suffix + + if suffix == ".c": + return "clang", "c", "-std=c11" + + if suffix in {".cu", ".cuh"}: + return "clang++", "cuda", "-std=c++17" -if not compile_db.exists(): - raise SystemExit(f"ERROR: {compile_db} was not generated") + if suffix in {".h", ".hh", ".hpp", ".hxx", ".ipp", ".tpp"}: + return "clang++", "c++-header", "-std=c++17" -with compile_db.open("r", encoding="utf-8") as f: - db = json.load(f) + return "clang++", "c++", "-std=c++17" -tidy_files = [] -seen = set() +def collect_include_dirs(files): + include_dirs = { + root, + root / "source", + root / "source" / "source_base", + root / "python" / "pyabacus" / "src", + } -for entry in db: - filename = entry.get("file") - if not filename: - continue + for rel in files: + include_dirs.add((root / rel).parent.resolve()) - path = Path(filename) - if not path.is_absolute(): - path = Path(entry.get("directory", root)) / path + return sorted(include_dirs) - try: - rel = path.resolve().relative_to(root.resolve()) - except ValueError: - continue +def generate_compile_commands(files): + if build_dir.exists(): + shutil.rmtree(build_dir) - if is_excluded(rel): - continue + build_dir.mkdir(parents=True, exist_ok=True) - if rel.suffix in tidy_source_exts and rel.exists() and rel not in seen: - tidy_files.append(rel) - seen.add(rel) + include_dirs = collect_include_dirs(files) + commands = [] -print(f"==> clang-tidy translation units: {len(tidy_files)}", flush=True) + for rel in files: + path = (root / rel).resolve() + compiler, lang, std_flag = language_for_file(path) -tidy_failures = [] -extra = shlex.split(os.environ.get("CLANG_TIDY_EXTRA_ARGS", "")) -strict = os.environ.get("STRICT_CLANG_TIDY", "0") == "1" + args = [ + compiler, + "-x", lang, + std_flag, + "-fsyntax-only", + "-Wno-unknown-warning-option", + "-Wno-unused-command-line-argument", + *[f"-I{inc}" for inc in include_dirs], + str(path), + ] + + commands.append({ + "directory": str(root), + "file": str(path), + "arguments": args, + }) + + compile_db = build_dir / "compile_commands.json" + compile_db.write_text(json.dumps(commands, indent=2), encoding="utf-8") + + print(f"==> Generated synthetic compile database: {compile_db}", flush=True) + print(f"==> Synthetic entries: {len(commands)}", flush=True) + +def run_clang_format(files): + print(f"==> clang-format files: {len(files)}", flush=True) + + batch = [] + + for rel in files: + batch.append(str(rel)) + + if len(batch) >= 100: + run(["clang-format", "-i", "-style=file", "--fallback-style=GNU", *batch]) + batch.clear() + + if batch: + run(["clang-format", "-i", "-style=file", "--fallback-style=GNU", *batch]) def run_tidy(rel): abs_file = str((root / rel).resolve()) line_filter = json.dumps([ - {"name": abs_file, "lines": [[1, 1000000]]} + {"name": abs_file, "lines": [[1, 100000000]]}, + {"name": str(rel), "lines": [[1, 100000000]]}, ]) cmd = [ "clang-tidy", - str(rel), + abs_file, f"-p={build_dir}", f"-line-filter={line_filter}", "--fix-errors", - *extra, + "--quiet", ] print("+ " + " ".join(cmd), flush=True) ret = subprocess.run(cmd).returncode return str(rel), ret -with ThreadPoolExecutor(max_workers=max(1, jobs)) as pool: - futures = [pool.submit(run_tidy, rel) for rel in tidy_files] - for fut in as_completed(futures): - filename, ret = fut.result() - if ret != 0: - tidy_failures.append((str(rel), ret)) - print(f"WARNING: clang-tidy failed for {rel} with exit code {ret}", flush=True) +def run_clang_tidy(files): + print(f"==> clang-tidy files: {len(files)}", flush=True) -if tidy_failures: - print("==> clang-tidy failures:", flush=True) - for filename, ret in tidy_failures: - print(f" {ret}: {filename}", flush=True) - if strict: - raise SystemExit("ERROR: clang-tidy failed and STRICT_CLANG_TIDY=1") + failures = [] -format_files = [] + with ThreadPoolExecutor(max_workers=max(1, jobs)) as pool: + futures = [pool.submit(run_tidy, rel) for rel in files] -for path in root.rglob("*"): - if not path.is_file(): - continue + for fut in as_completed(futures): + filename, ret = fut.result() - rel = path.relative_to(root) - if is_excluded(rel): - continue + if ret != 0: + failures.append((filename, ret)) + print( + f"WARNING: clang-tidy failed for {filename} with exit code {ret}", + flush=True, + ) - if path.suffix in cpp_format_exts: - format_files.append(rel) + if failures: + print("==> clang-tidy failures:", flush=True) + for filename, ret in failures: + print(f" {ret}: {filename}", flush=True) -print(f"==> clang-format files: {len(format_files)}", flush=True) +def export_files(files): + if out.exists(): + shutil.rmtree(out) -batch = [] -for rel in format_files: - batch.append(str(rel)) - if len(batch) >= 100: - run(["clang-format", "-i", "-style=file", "--fallback-style=GNU", *batch]) - batch.clear() + out.mkdir(parents=True) + + for rel in files: + src = root / rel + dst = out / rel + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dst) + + print("==> Exported C/C++ files only to /out", flush=True) + +def main(): + os.chdir(root) + + files = collect_precommit_files() + print(f"==> C/C++ files discovered: {len(files)}", flush=True) -if batch: - run(["clang-format", "-i", "-style=file", "--fallback-style=GNU", *batch]) + generate_compile_commands(files) -out = Path("/out") -if out.exists(): - shutil.rmtree(out) -out.mkdir(parents=True) + run_clang_tidy(files) + run_clang_format(files) -for rel in format_files: - src = root / rel - dst = out / rel - dst.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(src, dst) + export_files(files) -print("==> Exported C/C++ files only to /out", flush=True) +if __name__ == "__main__": + main() From 26e884261b1349eec524e46206161bc9e236d9a0 Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 21:42:30 +0800 Subject: [PATCH 11/14] Use "--fix" instead of "--fix-errors" to prevent compilation failure --- tools/precommit/make_pretty.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/precommit/make_pretty.py b/tools/precommit/make_pretty.py index 43089613ba0..7b9f69b6c43 100644 --- a/tools/precommit/make_pretty.py +++ b/tools/precommit/make_pretty.py @@ -147,7 +147,7 @@ def run_tidy(rel): abs_file, f"-p={build_dir}", f"-line-filter={line_filter}", - "--fix-errors", + "--fix", "--quiet", ] From c519d74a91feadd5667d2ca3fe98c7718505b29c Mon Sep 17 00:00:00 2001 From: Growl Date: Fri, 8 May 2026 21:45:03 +0800 Subject: [PATCH 12/14] Make pretty (6th attempt) --- .../pyabacus/src/ModuleBase/py_base_math.cpp | 210 +- .../pyabacus/src/ModuleDriver/py_driver.cpp | 660 +- .../pyabacus/src/ModuleDriver/py_driver.hpp | 169 +- .../components/charge_mixer_wrapper.hpp | 320 +- .../components/diagonalizer_wrapper.hpp | 295 +- .../components/hamiltonian_builder_lcao.hpp | 242 +- .../components/scf_controller_lcao.hpp | 182 +- .../interfaces/i_charge_mixer.hpp | 86 +- .../interfaces/i_diagonalizer.hpp | 118 +- .../interfaces/i_hamiltonian_builder.hpp | 36 +- .../interfaces/i_scf_controller.hpp | 43 +- .../src/ModuleESolver/py_esolver_lcao.cpp | 755 +- .../src/ModuleESolver/py_esolver_lcao.hpp | 378 +- python/pyabacus/src/ModuleNAO/py_m_nao.cpp | 394 +- python/pyabacus/src/hsolver/diago_adapter.hpp | 309 +- python/pyabacus/src/hsolver/diago_traits.hpp | 470 +- python/pyabacus/src/hsolver/py_diago_cg.hpp | 212 +- .../src/hsolver/py_diago_dav_subspace.hpp | 179 +- .../pyabacus/src/hsolver/py_diago_david.hpp | 175 +- python/pyabacus/src/hsolver/py_hsolver.cpp | 152 +- python/pyabacus/src/py_numerical_radial.cpp | 186 +- python/pyabacus/src/utils/pybind_utils.h | 326 +- source/source_base/assoc_laguerre.cpp | 217 +- source/source_base/assoc_laguerre.h | 74 +- source/source_base/atom_in.h | 323 +- source/source_base/clebsch_gordan_coeff.cpp | 95 +- source/source_base/clebsch_gordan_coeff.h | 26 +- source/source_base/complexarray.cpp | 528 +- source/source_base/complexarray.h | 257 +- source/source_base/complexmatrix.cpp | 728 +- source/source_base/complexmatrix.h | 188 +- source/source_base/constants.h | 120 +- source/source_base/container_operator.h | 100 +- source/source_base/cubic_spline.cpp | 734 +- source/source_base/cubic_spline.h | 301 +- source/source_base/element_basis_index.cpp | 52 +- source/source_base/element_basis_index.h | 48 +- source/source_base/element_covalent_radius.h | 245 +- source/source_base/element_elec_config.h | 507 +- source/source_base/element_name.h | 128 +- source/source_base/export.cpp | 35 +- source/source_base/export.h | 30 +- source/source_base/formatter.h | 526 +- source/source_base/gather_math_lib_info.cpp | 133 +- source/source_base/global_file.cpp | 597 +- source/source_base/global_file.h | 31 +- .../source_base/global_function-func_each_2.h | 59 +- source/source_base/global_function.cpp | 275 +- source/source_base/global_function.h | 240 +- .../source_base/global_function_ddotreal.cpp | 21 +- source/source_base/global_variable.cpp | 4 +- source/source_base/gram_schmidt_orth-inl.h | 179 +- source/source_base/gram_schmidt_orth.h | 39 +- source/source_base/intarray.cpp | 419 +- source/source_base/intarray.h | 274 +- source/source_base/inverse_matrix.cpp | 129 +- source/source_base/inverse_matrix.h | 35 +- .../kernels/cuda/math_kernel_op.cu | 596 +- .../kernels/cuda/math_kernel_op_vec.cu | 353 +- .../source_base/kernels/cuda/math_ylm_op.cu | 194 +- .../source_base/kernels/cuda/sph_harm_gpu.cuh | 723 +- .../source_base/kernels/dsp/dsp_connector.cpp | 743 +- .../source_base/kernels/dsp/dsp_connector.h | 349 +- source/source_base/kernels/math_kernel_op.cpp | 110 +- source/source_base/kernels/math_kernel_op.h | 797 +- .../kernels/math_kernel_op_vec.cpp | 110 +- source/source_base/kernels/math_ylm_op.cpp | 191 +- source/source_base/kernels/math_ylm_op.h | 55 +- .../kernels/rocm/math_kernel_op.hip.cu | 622 +- .../kernels/rocm/math_kernel_op_vec.hip.cu | 506 +- .../kernels/rocm/math_ylm_op.hip.cu | 207 +- .../kernels/test/math_kernel_test.cpp | 648 +- .../kernels/test/math_ylm_op_test.cpp | 615 +- source/source_base/libm/branred.cpp | 289 +- source/source_base/libm/cexp.cpp | 285 +- source/source_base/libm/exp.cpp | 723 +- source/source_base/libm/libm.h | 153 +- source/source_base/libm/sincos.cpp | 2365 +- source/source_base/libm/test/libm_test.cpp | 291 +- source/source_base/macros.h | 9 +- source/source_base/main.cpp | 72 +- source/source_base/math_bspline.cpp | 88 +- source/source_base/math_bspline.h | 20 +- source/source_base/math_chebyshev.cpp | 901 +- source/source_base/math_chebyshev.h | 162 +- source/source_base/math_erf_complex.cpp | 3763 +- source/source_base/math_erf_complex.h | 30 +- source/source_base/math_integral.cpp | 533 +- source/source_base/math_integral.h | 101 +- source/source_base/math_lebedev_laikov.cpp | 10705 +-- source/source_base/math_lebedev_laikov.h | 35 +- source/source_base/math_polyint.cpp | 348 +- source/source_base/math_polyint.h | 97 +- source/source_base/math_sphbes.cpp | 1307 +- source/source_base/math_sphbes.h | 146 +- source/source_base/math_ylmreal.cpp | 1152 +- source/source_base/math_ylmreal.h | 152 +- source/source_base/mathzone.h | 96 +- source/source_base/mathzone_add1.cpp | 692 +- source/source_base/mathzone_add1.h | 40 +- source/source_base/matrix-inl.h | 103 +- source/source_base/matrix.cpp | 637 +- source/source_base/matrix.h | 124 +- source/source_base/matrix3.cpp | 327 +- source/source_base/matrix3.h | 89 +- source/source_base/matrix_wrapper.h | 117 +- source/source_base/matrix_wrapper_tianhe2.h | 84 +- source/source_base/mcd.c | 1269 +- source/source_base/mcd.h | 271 +- source/source_base/memory.cpp | 823 +- source/source_base/memory.h | 63 +- .../module_container/ATen/core/tensor.cpp | 662 +- .../module_container/ATen/core/tensor.h | 351 +- .../ATen/core/tensor_accessor.h | 103 +- .../ATen/core/tensor_buffer.cpp | 148 +- .../ATen/core/tensor_buffer.h | 65 +- .../module_container/ATen/core/tensor_map.cpp | 52 +- .../module_container/ATen/core/tensor_map.h | 16 +- .../ATen/core/tensor_shape.cpp | 138 +- .../module_container/ATen/core/tensor_shape.h | 60 +- .../ATen/core/tensor_types.cpp | 25 +- .../module_container/ATen/core/tensor_types.h | 130 +- .../module_container/ATen/core/tensor_utils.h | 300 +- .../module_container/ATen/kernels/blas.cpp | 352 +- .../module_container/ATen/kernels/blas.h | 257 +- .../ATen/kernels/cuda/blas.cu | 381 +- .../ATen/kernels/cuda/lapack.cu | 336 +- .../ATen/kernels/cuda/linalg.cu | 661 +- .../ATen/kernels/cuda/memory.cu | 178 +- .../module_container/ATen/kernels/lapack.cpp | 667 +- .../module_container/ATen/kernels/lapack.h | 153 +- .../module_container/ATen/kernels/linalg.cpp | 369 +- .../module_container/ATen/kernels/linalg.h | 111 +- .../module_container/ATen/kernels/memory.h | 72 +- .../ATen/kernels/memory_impl.cpp | 146 +- .../ATen/kernels/rocm/blas.hip.cu | 375 +- .../ATen/kernels/rocm/lapack.hip.cu | 164 +- .../ATen/kernels/rocm/linalg.hip.cu | 631 +- .../ATen/kernels/rocm/memory.hip.cu | 169 +- .../ATen/kernels/test/blas_test.cpp | 516 +- .../ATen/kernels/test/lapack_test.cpp | 543 +- .../ATen/kernels/test/linalg_test.cpp | 477 +- .../ATen/kernels/test/memory_test.cpp | 92 +- .../module_container/ATen/ops/einsum_op.cpp | 1570 +- .../module_container/ATen/ops/einsum_op.h | 240 +- .../module_container/ATen/ops/linalg_op.cpp | 389 +- .../module_container/ATen/ops/linalg_op.h | 91 +- .../ATen/ops/test/einsum_op_test.cpp | 742 +- .../ATen/ops/test/linalg_op_test.cpp | 521 +- .../module_container/base/core/allocator.h | 23 +- .../base/core/bfc_allocator.h | 71 +- .../base/core/cpu_allocator.cpp | 35 +- .../base/core/cpu_allocator.h | 21 +- .../base/core/gpu_allocator.cpp | 48 +- .../base/core/gpu_allocator.h | 21 +- .../module_container/base/core/refcount.cpp | 39 +- .../module_container/base/core/refcount.h | 80 +- .../module_container/base/macros/cuda.h | 46 +- .../module_container/base/macros/macros.h | 363 +- .../module_container/base/macros/rocm.h | 194 +- .../module_container/base/third_party/blas.h | 848 +- .../base/third_party/cublas.h | 1067 +- .../base/third_party/cusolver.h | 2674 +- .../base/third_party/hipblas.h | 930 +- .../base/third_party/hipsolver.h | 708 +- .../base/third_party/lapack.h | 1596 +- .../module_container/base/utils/array_ref.h | 119 +- .../module_container/base/utils/gtest.h | 86 +- .../module_container/base/utils/logging.h | 24 +- .../module_container/test/allocator_test.cpp | 49 +- .../test/tensor_accessor_test.cpp | 37 +- .../test/tensor_buffer_test.cpp | 56 +- .../module_container/test/tensor_map_test.cpp | 42 +- .../test/tensor_shape_test.cpp | 77 +- .../module_container/test/tensor_test.cpp | 693 +- .../test/tensor_utils_test.cpp | 38 +- .../module_device/cuda/memory_op.cu | 278 +- .../source_base/module_device/cuda_compat.cpp | 95 +- .../source_base/module_device/cuda_compat.h | 19 +- source/source_base/module_device/device.cpp | 202 +- source/source_base/module_device/device.h | 110 +- .../source_base/module_device/device_check.h | 749 +- .../module_device/device_helpers.cpp | 12 +- .../module_device/device_helpers.h | 35 +- .../source_base/module_device/gpu_runtime.h | 48 +- .../source_base/module_device/kernel_compat.h | 16 +- .../source_base/module_device/memory_op.cpp | 354 +- source/source_base/module_device/memory_op.h | 168 +- .../module_device/output_device.cpp | 703 +- .../module_device/rocm/memory_op.hip.cu | 176 +- .../module_device/test/device_test.cpp | 18 +- .../module_device/test/memory_test.cpp | 410 +- .../module_external/blacs_connector.h | 163 +- .../module_external/blas_connector.h | 1317 +- .../module_external/blas_connector_base.cpp | 124 +- .../module_external/blas_connector_matrix.cpp | 1314 +- .../module_external/blas_connector_vector.cpp | 931 +- .../module_external/lapack_connector.h | 1211 +- .../module_external/scalapack_connector.h | 1385 +- source/source_base/module_fft/fft_base.h | 78 +- source/source_base/module_fft/fft_bundle.cpp | 326 +- source/source_base/module_fft/fft_bundle.h | 87 +- source/source_base/module_fft/fft_cpu.cpp | 737 +- source/source_base/module_fft/fft_cpu.h | 227 +- .../source_base/module_fft/fft_cpu_float.cpp | 696 +- source/source_base/module_fft/fft_cuda.cpp | 123 +- source/source_base/module_fft/fft_cuda.h | 92 +- source/source_base/module_fft/fft_dsp.cpp | 110 +- source/source_base/module_fft/fft_dsp.h | 124 +- .../source_base/module_fft/fft_dsp_float.cpp | 30 +- source/source_base/module_fft/fft_rocm.cpp | 144 +- source/source_base/module_fft/fft_rocm.h | 92 +- source/source_base/module_grid/batch.cpp | 116 +- source/source_base/module_grid/batch.h | 8 +- source/source_base/module_grid/delley.cpp | 622 +- source/source_base/module_grid/delley.h | 13 +- source/source_base/module_grid/partition.cpp | 150 +- source/source_base/module_grid/partition.h | 33 +- source/source_base/module_grid/radial.cpp | 100 +- source/source_base/module_grid/radial.h | 20 +- .../module_grid/test/test_batch.cpp | 245 +- .../module_grid/test/test_delley.cpp | 160 +- .../module_grid/test/test_partition.cpp | 400 +- .../module_grid/test/test_radial.cpp | 123 +- .../module_mixing/broyden_mixing.cpp | 260 +- .../module_mixing/broyden_mixing.h | 78 +- source/source_base/module_mixing/mixing.cpp | 118 +- source/source_base/module_mixing/mixing.h | 64 +- .../source_base/module_mixing/mixing_data.cpp | 33 +- .../source_base/module_mixing/mixing_data.h | 27 +- .../module_mixing/plain_mixing.cpp | 98 +- .../source_base/module_mixing/plain_mixing.h | 103 +- .../module_mixing/pulay_mixing.cpp | 222 +- .../source_base/module_mixing/pulay_mixing.h | 73 +- .../module_mixing/test/mixing_test.cpp | 387 +- source/source_base/mymath.cpp | 207 +- source/source_base/mymath.h | 10 +- source/source_base/name_angular.h | 14 +- source/source_base/ndarray.h | 300 +- source/source_base/opt_CG.cpp | 233 +- source/source_base/opt_CG.h | 49 +- source/source_base/opt_DCsrch.cpp | 720 +- source/source_base/opt_DCsrch.h | 9 +- source/source_base/opt_TN.hpp | 199 +- source/source_base/para_gemm.cpp | 524 +- source/source_base/para_gemm.h | 24 +- source/source_base/parallel_2d.cpp | 127 +- source/source_base/parallel_2d.h | 64 +- source/source_base/parallel_comm.cpp | 68 +- source/source_base/parallel_comm.h | 38 +- source/source_base/parallel_common.cpp | 70 +- source/source_base/parallel_common.h | 20 +- source/source_base/parallel_device.cpp | 462 +- source/source_base/parallel_device.h | 259 +- source/source_base/parallel_global.cpp | 376 +- source/source_base/parallel_global.h | 64 +- source/source_base/parallel_reduce.cpp | 203 +- source/source_base/parallel_reduce.h | 54 +- source/source_base/projgen.cpp | 285 +- source/source_base/projgen.h | 22 +- source/source_base/random.h | 84 +- source/source_base/realarray.cpp | 247 +- source/source_base/realarray.h | 62 +- .../source_base/sph_bessel_recursive-d1.cpp | 714 +- .../source_base/sph_bessel_recursive-d2.cpp | 770 +- source/source_base/sph_bessel_recursive.h | 127 +- .../spherical_bessel_transformer.cpp | 473 +- .../spherical_bessel_transformer.h | 62 +- .../source_base/test/assoc_laguerre_test.cpp | 348 +- source/source_base/test/atom_in_test.cpp | 20 +- .../source_base/test/blas_connector_test.cpp | 1128 +- .../test/clebsch_gordan_coeff_test.cpp | 26 +- source/source_base/test/complexarray_test.cpp | 650 +- .../source_base/test/complexmatrix_test.cpp | 737 +- .../test/container_operator_test.cpp | 173 +- source/source_base/test/cubic_spline_test.cpp | 470 +- .../test/element_basis_index_test.cpp | 62 +- source/source_base/test/formatter_test.cpp | 310 +- source/source_base/test/global_file_test.cpp | 257 +- .../source_base/test/global_function_test.cpp | 968 +- .../test/gram_schmidt_orth_test.cpp | 185 +- source/source_base/test/intarray_test.cpp | 556 +- .../source_base/test/inverse_matrix_test.cpp | 118 +- .../test/lapack_connector_test.cpp | 123 +- source/source_base/test/math_bspline_test.cpp | 64 +- .../source_base/test/math_chebyshev_test.cpp | 557 +- .../source_base/test/math_integral_test.cpp | 520 +- source/source_base/test/math_polyint_test.cpp | 191 +- source/source_base/test/math_sphbes_test.cpp | 380 +- source/source_base/test/math_ylmreal_test.cpp | 1147 +- .../source_base/test/mathzone_add1_test.cpp | 270 +- source/source_base/test/mathzone_test.cpp | 118 +- source/source_base/test/matrix3_test.cpp | 541 +- source/source_base/test/matrix_test.cpp | 476 +- source/source_base/test/memory_test.cpp | 201 +- source/source_base/test/mymath_test.cpp | 78 +- source/source_base/test/ndarray_test.cpp | 223 +- source/source_base/test/opt_CG_test.cpp | 263 +- source/source_base/test/opt_TN_test.cpp | 174 +- source/source_base/test/opt_test_tools.cpp | 118 +- source/source_base/test/opt_test_tools.h | 80 +- source/source_base/test/perf_sphbes_test.cpp | 81 +- source/source_base/test/realarray_test.cpp | 257 +- .../test/sph_bessel_recursive_test.cpp | 91 +- .../spherical_bessel_transformer_test.cpp | 401 +- .../source_base/test/test_lebedev_laikov.cpp | 172 +- source/source_base/test/timer_test.cpp | 296 +- source/source_base/test/tool_check_test.cpp | 176 +- source/source_base/test/tool_quit_no_exit.cpp | 41 +- source/source_base/test/tool_quit_test.cpp | 159 +- .../source_base/test/tool_threading_test.cpp | 235 +- source/source_base/test/tool_title_test.cpp | 61 +- source/source_base/test/vector3_test.cpp | 1206 +- source/source_base/test/ylm_test.cpp | 309 +- .../test_parallel/blacs_connector_test.cpp | 66 +- .../test_parallel/math_chebyshev_mpi_test.cpp | 186 +- .../test_parallel/parallel_2d_test.cpp | 270 +- .../test_parallel/parallel_common_test.cpp | 113 +- .../test_parallel/parallel_global_test.cpp | 337 +- .../test_parallel/parallel_reduce_test.cpp | 748 +- .../test_parallel/test_para_gemm.cpp | 637 +- source/source_base/timer.cpp | 492 +- source/source_base/timer.h | 25 +- source/source_base/timer_wrapper.h | 38 +- source/source_base/tool_check.cpp | 142 +- source/source_base/tool_check.h | 8 +- source/source_base/tool_quit.cpp | 216 +- source/source_base/tool_quit.h | 17 +- source/source_base/tool_threading.h | 124 +- source/source_base/tool_title.cpp | 56 +- source/source_base/tool_title.h | 26 +- source/source_base/truncated_func.h | 69 +- source/source_base/vector3.h | 221 +- source/source_base/ylm.cpp | 4078 +- source/source_base/ylm.h | 273 +- source/source_base/ylmcoef.h | 75 +- source/source_basis/module_ao/ORB_atomic.cpp | 65 +- source/source_basis/module_ao/ORB_atomic.h | 297 +- .../source_basis/module_ao/ORB_atomic_lm.cpp | 1400 +- source/source_basis/module_ao/ORB_atomic_lm.h | 411 +- .../module_ao/ORB_gaunt_table.cpp | 820 +- .../source_basis/module_ao/ORB_gaunt_table.h | 210 +- .../source_basis/module_ao/ORB_nonlocal.cpp | 112 +- source/source_basis/module_ao/ORB_nonlocal.h | 103 +- .../module_ao/ORB_nonlocal_lm.cpp | 424 +- .../source_basis/module_ao/ORB_nonlocal_lm.h | 198 +- source/source_basis/module_ao/ORB_read.cpp | 659 +- source/source_basis/module_ao/ORB_read.h | 249 +- .../module_ao/element_basis_index-ORB.cpp | 55 +- .../module_ao/element_basis_index-ORB.h | 12 +- .../module_ao/parallel_orbitals.cpp | 308 +- .../module_ao/parallel_orbitals.h | 71 +- .../module_ao/test/1_snap_equal_test.cpp | 165 +- .../module_ao/test/ORB_atomic_lm_test.cpp | 1175 +- .../module_ao/test/ORB_nonlocal_lm_test.cpp | 890 +- .../module_ao/test/ORB_nonlocal_test.cpp | 117 +- .../module_ao/test/ORB_read_test.cpp | 385 +- .../module_ao/test/ORB_unittest.cpp | 265 +- .../module_ao/test/ORB_unittest.h | 22 +- .../module_ao/test/parallel_orbitals_test.cpp | 321 +- .../module_nao/atomic_radials.cpp | 352 +- .../source_basis/module_nao/atomic_radials.h | 37 +- .../source_basis/module_nao/beta_radials.cpp | 97 +- source/source_basis/module_nao/beta_radials.h | 16 +- .../module_nao/hydrogen_radials.cpp | 695 +- .../module_nao/hydrogen_radials.h | 319 +- .../module_nao/numerical_radial.cpp | 556 +- .../module_nao/numerical_radial.h | 261 +- .../source_basis/module_nao/pswfc_radials.cpp | 549 +- .../source_basis/module_nao/pswfc_radials.h | 155 +- .../module_nao/radial_collection.cpp | 359 +- .../module_nao/radial_collection.h | 183 +- source/source_basis/module_nao/radial_set.cpp | 248 +- source/source_basis/module_nao/radial_set.h | 211 +- .../module_nao/real_gaunt_table.cpp | 270 +- .../module_nao/real_gaunt_table.h | 65 +- .../module_nao/sphbes_radials.cpp | 335 +- .../source_basis/module_nao/sphbes_radials.h | 96 +- .../module_nao/test/atomic_radials_test.cpp | 332 +- .../module_nao/test/beta_radials_test.cpp | 546 +- .../module_nao/test/hydrogen_radials_test.cpp | 695 +- .../module_nao/test/numerical_radial_test.cpp | 540 +- .../module_nao/test/projgen_test.cpp | 150 +- .../module_nao/test/pswfc_radials_test.cpp | 260 +- .../test/radial_collection_test.cpp | 390 +- .../module_nao/test/real_gaunt_table_test.cpp | 416 +- .../module_nao/test/sphbes_radials_test.cpp | 166 +- .../test/two_center_bundle_test.cpp | 77 +- .../test/two_center_integrator_test.cpp | 398 +- .../module_nao/test/two_center_table_test.cpp | 239 +- .../module_nao/two_center_bundle.cpp | 318 +- .../module_nao/two_center_bundle.h | 28 +- .../module_nao/two_center_integrator.cpp | 281 +- .../module_nao/two_center_integrator.h | 77 +- .../module_nao/two_center_table.cpp | 245 +- .../module_nao/two_center_table.h | 150 +- .../module_pw/kernels/cuda/pw_op.cu | 265 +- .../source_basis/module_pw/kernels/pw_op.cpp | 87 +- source/source_basis/module_pw/kernels/pw_op.h | 117 +- .../module_pw/kernels/rocm/pw_op.hip.cu | 288 +- .../module_pw/kernels/test/pw_op_test.cpp | 7127 +- source/source_basis/module_pw/pw_basis.cpp | 438 +- source/source_basis/module_pw/pw_basis.h | 506 +- source/source_basis/module_pw/pw_basis_big.h | 609 +- source/source_basis/module_pw/pw_basis_k.cpp | 614 +- source/source_basis/module_pw/pw_basis_k.h | 304 +- .../source_basis/module_pw/pw_basis_k_big.h | 74 +- .../source_basis/module_pw/pw_basis_sup.cpp | 568 +- source/source_basis/module_pw/pw_basis_sup.h | 35 +- .../source_basis/module_pw/pw_distributeg.cpp | 310 +- .../module_pw/pw_distributeg_method1.cpp | 421 +- .../module_pw/pw_distributeg_method2.cpp | 192 +- .../source_basis/module_pw/pw_distributer.cpp | 39 +- .../source_basis/module_pw/pw_gatherscatter.h | 260 +- source/source_basis/module_pw/pw_init.cpp | 331 +- .../source_basis/module_pw/pw_transform.cpp | 317 +- .../module_pw/pw_transform_gpu.cpp | 199 +- .../source_basis/module_pw/pw_transform_k.cpp | 577 +- .../module_pw/pw_transform_k_dsp.cpp | 161 +- .../module_pw/test/depend_mock.cpp | 44 +- .../source_basis/module_pw/test/depend_mock.h | 16 +- .../source_basis/module_pw/test/generate.cpp | 192 +- .../source_basis/module_pw/test/pw_test.cpp | 55 +- source/source_basis/module_pw/test/pw_test.h | 44 +- .../source_basis/module_pw/test/test-big.cpp | 143 +- .../module_pw/test/test-other.cpp | 252 +- .../source_basis/module_pw/test/test1-1-1.cpp | 351 +- .../source_basis/module_pw/test/test1-1-2.cpp | 308 +- .../source_basis/module_pw/test/test1-2-2.cpp | 275 +- .../source_basis/module_pw/test/test1-2.cpp | 253 +- .../source_basis/module_pw/test/test1-3.cpp | 256 +- .../source_basis/module_pw/test/test1-4.cpp | 291 +- .../source_basis/module_pw/test/test1-5.cpp | 300 +- .../source_basis/module_pw/test/test2-1-1.cpp | 315 +- .../source_basis/module_pw/test/test2-1-2.cpp | 308 +- .../source_basis/module_pw/test/test2-2.cpp | 168 +- .../source_basis/module_pw/test/test2-3.cpp | 183 +- .../source_basis/module_pw/test/test3-1.cpp | 318 +- .../source_basis/module_pw/test/test3-2.cpp | 195 +- .../source_basis/module_pw/test/test3-3-2.cpp | 213 +- .../source_basis/module_pw/test/test3-3.cpp | 203 +- .../source_basis/module_pw/test/test4-1.cpp | 302 +- .../source_basis/module_pw/test/test4-2.cpp | 253 +- .../source_basis/module_pw/test/test4-3.cpp | 261 +- .../source_basis/module_pw/test/test4-4.cpp | 298 +- .../source_basis/module_pw/test/test4-5.cpp | 308 +- .../source_basis/module_pw/test/test5-1-1.cpp | 355 +- .../source_basis/module_pw/test/test5-1-2.cpp | 355 +- .../source_basis/module_pw/test/test5-2-1.cpp | 311 +- .../source_basis/module_pw/test/test5-2-2.cpp | 311 +- .../source_basis/module_pw/test/test5-3-1.cpp | 258 +- .../source_basis/module_pw/test/test5-4-1.cpp | 241 +- .../source_basis/module_pw/test/test5-4-2.cpp | 241 +- .../source_basis/module_pw/test/test6-1-1.cpp | 355 +- .../source_basis/module_pw/test/test6-1-2.cpp | 355 +- .../source_basis/module_pw/test/test6-2-1.cpp | 311 +- .../source_basis/module_pw/test/test6-2-2.cpp | 313 +- .../source_basis/module_pw/test/test6-3-1.cpp | 258 +- .../source_basis/module_pw/test/test6-4-1.cpp | 241 +- .../source_basis/module_pw/test/test6-4-2.cpp | 241 +- .../source_basis/module_pw/test/test7-1.cpp | 307 +- .../source_basis/module_pw/test/test7-2-1.cpp | 205 +- .../source_basis/module_pw/test/test7-3-1.cpp | 197 +- .../source_basis/module_pw/test/test7-3-2.cpp | 197 +- .../source_basis/module_pw/test/test8-1.cpp | 307 +- .../source_basis/module_pw/test/test8-2-1.cpp | 207 +- .../source_basis/module_pw/test/test8-3-1.cpp | 197 +- .../source_basis/module_pw/test/test8-3-2.cpp | 197 +- .../source_basis/module_pw/test/test_sup.cpp | 293 +- .../source_basis/module_pw/test/test_tool.cpp | 82 +- .../source_basis/module_pw/test/test_tool.h | 6 +- source/source_basis/module_pw/test/time.cpp | 413 +- .../module_pw/test_gpu/pw_basis_C2C.cpp | 280 +- .../module_pw/test_gpu/pw_basis_C2R.cpp | 279 +- .../module_pw/test_gpu/pw_basis_k_C2C.cpp | 318 +- .../module_pw/test_gpu/pw_test.cpp | 55 +- .../source_basis/module_pw/test_gpu/pw_test.h | 44 +- .../module_pw/test_serial/pw_basis_k_test.cpp | 271 +- .../module_pw/test_serial/pw_basis_test.cpp | 546 +- source/source_cell/atom_pseudo.cpp | 371 +- source/source_cell/atom_pseudo.h | 67 +- source/source_cell/atom_spec.cpp | 208 +- source/source_cell/atom_spec.h | 28 +- source/source_cell/bcast_cell.cpp | 198 +- source/source_cell/bcast_cell.h | 74 +- source/source_cell/cal_atoms_info.h | 80 +- source/source_cell/cell_index.cpp | 365 +- source/source_cell/cell_index.h | 46 +- source/source_cell/check_atomic_stru.cpp | 290 +- source/source_cell/check_atomic_stru.h | 2 +- source/source_cell/k_vector_utils.cpp | 1224 +- source/source_cell/k_vector_utils.h | 24 +- source/source_cell/klist.cpp | 780 +- source/source_cell/klist.h | 101 +- .../source_cell/module_neighbor/sltk_atom.cpp | 18 +- .../source_cell/module_neighbor/sltk_atom.h | 57 +- .../module_neighbor/sltk_atom_arrange.cpp | 167 +- .../module_neighbor/sltk_atom_arrange.h | 36 +- .../source_cell/module_neighbor/sltk_grid.cpp | 301 +- .../source_cell/module_neighbor/sltk_grid.h | 108 +- .../module_neighbor/sltk_grid_driver.cpp | 82 +- .../module_neighbor/sltk_grid_driver.h | 55 +- .../source_cell/module_neighbor/sltk_util.h | 55 +- .../module_neighbor/test/prepare_unitcell.h | 600 +- .../test/sltk_atom_arrange_test.cpp | 132 +- .../module_neighbor/test/sltk_atom_test.cpp | 23 +- .../module_neighbor/test/sltk_grid_test.cpp | 90 +- .../module_symmetry/run_symmetry.cpp | 66 +- .../module_symmetry/symm_analysis.cpp | 490 +- .../module_symmetry/symm_check.cpp | 248 +- .../module_symmetry/symm_getgroup.cpp | 170 +- .../module_symmetry/symm_hermite.cpp | 389 +- .../module_symmetry/symm_lattice.cpp | 742 +- .../module_symmetry/symm_magnetic.cpp | 156 +- .../module_symmetry/symm_other.cpp | 242 +- .../source_cell/module_symmetry/symm_other.h | 16 +- .../module_symmetry/symm_pricell.cpp | 521 +- .../source_cell/module_symmetry/symm_rho.cpp | 536 +- .../source_cell/module_symmetry/symmetry.cpp | 561 +- source/source_cell/module_symmetry/symmetry.h | 336 +- .../module_symmetry/symmetry_basic.cpp | 1930 +- .../module_symmetry/symmetry_basic.h | 102 +- .../module_symmetry/test/symmetry_test.cpp | 135 +- .../module_symmetry/test/symmetry_test.h | 19 +- .../test/symmetry_test_analysis.cpp | 469 +- .../test/symmetry_test_cases.h | 373 +- .../test/symmetry_test_symtrz.cpp | 184 +- source/source_cell/parallel_kpoints.cpp | 238 +- source/source_cell/parallel_kpoints.h | 52 +- source/source_cell/print_cell.cpp | 274 +- source/source_cell/print_cell.h | 66 +- source/source_cell/pseudo.cpp | 125 +- source/source_cell/pseudo.h | 16 +- source/source_cell/read_atom_species.cpp | 741 +- source/source_cell/read_atoms.cpp | 199 +- source/source_cell/read_atoms_helper.cpp | 924 +- source/source_cell/read_atoms_helper.h | 71 +- source/source_cell/read_pp.cpp | 843 +- source/source_cell/read_pp.h | 128 +- source/source_cell/read_pp_blps.cpp | 193 +- source/source_cell/read_pp_complete.cpp | 302 +- source/source_cell/read_pp_upf100.cpp | 698 +- source/source_cell/read_pp_upf201.cpp | 1454 +- source/source_cell/read_pp_vwr.cpp | 836 +- source/source_cell/read_stru.cpp | 161 +- source/source_cell/read_stru.h | 39 +- source/source_cell/sep.cpp | 142 +- source/source_cell/sep.h | 12 +- source/source_cell/sep_cell.cpp | 138 +- source/source_cell/sep_cell.h | 33 +- source/source_cell/setup_nonlocal.cpp | 591 +- source/source_cell/setup_nonlocal.h | 71 +- source/source_cell/test/atom_pseudo_test.cpp | 112 +- source/source_cell/test/atom_spec_test.cpp | 302 +- source/source_cell/test/cell_index_test.cpp | 68 +- source/source_cell/test/klist_test.cpp | 892 +- source/source_cell/test/klist_test_para.cpp | 454 +- .../test/parallel_kpoints_test.cpp | 295 +- source/source_cell/test/prepare_unitcell.h | 1013 +- source/source_cell/test/pseudo_nc_test.cpp | 169 +- .../test/read_atoms_helper_test.cpp | 573 +- source/source_cell/test/read_pp_test.cpp | 1283 +- source/source_cell/test/read_sep_test.cpp | 177 +- source/source_cell/test/sepcell_test.cpp | 307 +- .../test/support/mock_unitcell.cpp | 69 +- source/source_cell/test/unitcell_test.cpp | 2181 +- .../source_cell/test/unitcell_test_para.cpp | 290 +- .../source_cell/test/unitcell_test_readpp.cpp | 638 +- .../test/unitcell_test_setupcell.cpp | 274 +- .../source_cell/test_pw/unitcell_test_pw.cpp | 171 +- source/source_cell/unitcell.cpp | 764 +- source/source_cell/unitcell.h | 172 +- source/source_cell/unitcell_data.h | 39 +- source/source_cell/update_cell.cpp | 785 +- source/source_cell/update_cell.h | 135 +- source/source_esolver/esolver.cpp | 488 +- source/source_esolver/esolver.h | 29 +- source/source_esolver/esolver_dm2rho.cpp | 81 +- source/source_esolver/esolver_dm2rho.h | 10 +- source/source_esolver/esolver_double_xc.cpp | 596 +- source/source_esolver/esolver_double_xc.h | 13 +- source/source_esolver/esolver_dp.cpp | 158 +- source/source_esolver/esolver_dp.h | 18 +- source/source_esolver/esolver_fp.cpp | 277 +- source/source_esolver/esolver_fp.h | 29 +- source/source_esolver/esolver_gets.cpp | 199 +- source/source_esolver/esolver_gets.h | 16 +- source/source_esolver/esolver_ks.cpp | 378 +- source/source_esolver/esolver_ks.h | 54 +- source/source_esolver/esolver_ks_lcao.cpp | 697 +- source/source_esolver/esolver_ks_lcao.h | 109 +- .../source_esolver/esolver_ks_lcao_tddft.cpp | 807 +- source/source_esolver/esolver_ks_lcao_tddft.h | 34 +- source/source_esolver/esolver_ks_lcaopw.cpp | 345 +- source/source_esolver/esolver_ks_lcaopw.h | 48 +- source/source_esolver/esolver_ks_pw.cpp | 402 +- source/source_esolver/esolver_ks_pw.h | 29 +- source/source_esolver/esolver_lj.cpp | 419 +- source/source_esolver/esolver_lj.h | 67 +- source/source_esolver/esolver_nep.cpp | 185 +- source/source_esolver/esolver_nep.h | 52 +- source/source_esolver/esolver_of.cpp | 554 +- source/source_esolver/esolver_of.h | 59 +- .../source_esolver/esolver_of_interface.cpp | 424 +- source/source_esolver/esolver_of_tddft.cpp | 134 +- source/source_esolver/esolver_of_tddft.h | 10 +- source/source_esolver/esolver_of_tool.cpp | 448 +- source/source_esolver/esolver_sdft_pw.cpp | 372 +- source/source_esolver/esolver_sdft_pw.h | 23 +- source/source_esolver/lcao_others.cpp | 363 +- source/source_esolver/pw_others.cpp | 59 +- .../source_esolver/test/esolver_dp_test.cpp | 118 +- source/source_esolver/test/for_test.h | 68 +- source/source_estate/cal_dm.h | 170 +- source/source_estate/cal_nelec_nband.cpp | 202 +- source/source_estate/cal_nelec_nband.h | 39 +- source/source_estate/cal_ux.cpp | 148 +- source/source_estate/cal_ux.h | 13 +- source/source_estate/cal_wfc.cpp | 292 +- source/source_estate/elecstate.cpp | 41 +- source/source_estate/elecstate.h | 102 +- source/source_estate/elecstate_energy.cpp | 413 +- .../source_estate/elecstate_energy_terms.cpp | 41 +- source/source_estate/elecstate_exx.cpp | 13 +- source/source_estate/elecstate_lcao.cpp | 78 +- source/source_estate/elecstate_lcao.h | 22 +- source/source_estate/elecstate_print.cpp | 488 +- source/source_estate/elecstate_print.h | 27 +- source/source_estate/elecstate_pw.cpp | 821 +- source/source_estate/elecstate_pw.h | 43 +- source/source_estate/elecstate_pw_cal_tau.cpp | 99 +- source/source_estate/elecstate_pw_sdft.cpp | 39 +- source/source_estate/elecstate_pw_sdft.h | 20 +- source/source_estate/elecstate_tools.cpp | 269 +- source/source_estate/elecstate_tools.h | 28 +- source/source_estate/fp_energy.cpp | 98 +- source/source_estate/fp_energy.h | 22 +- .../kernels/cuda/elecstate_op.cu | 130 +- source/source_estate/kernels/elecstate_op.cpp | 96 +- source/source_estate/kernels/elecstate_op.h | 114 +- .../kernels/rocm/elecstate_op.hip.cu | 139 +- .../kernels/test/elecstate_op_test.cpp | 2713 +- source/source_estate/magnetism.cpp | 184 +- source/source_estate/magnetism.h | 36 +- source/source_estate/math_tools.h | 176 +- source/source_estate/module_charge/charge.cpp | 1129 +- source/source_estate/module_charge/charge.h | 156 +- .../module_charge/charge_extra.cpp | 367 +- .../module_charge/charge_extra.h | 30 +- .../module_charge/charge_init.cpp | 682 +- .../module_charge/charge_mixing.cpp | 289 +- .../module_charge/charge_mixing.h | 167 +- .../module_charge/charge_mixing_dmr.cpp | 339 +- .../charge_mixing_preconditioner.cpp | 221 +- .../module_charge/charge_mixing_residual.cpp | 656 +- .../module_charge/charge_mixing_rho.cpp | 949 +- .../module_charge/charge_mixing_uspp.cpp | 109 +- .../module_charge/charge_mpi.cpp | 236 +- .../source_estate/module_charge/chgmixing.cpp | 343 +- .../source_estate/module_charge/chgmixing.h | 61 +- .../gint_precision_controller.cpp | 71 +- .../module_charge/gint_precision_controller.h | 12 +- .../module_charge/symmetry_rho.cpp | 174 +- .../module_charge/symmetry_rho.h | 76 +- .../module_charge/symmetry_rhog.cpp | 477 +- source/source_estate/module_dm/cal_dm_psi.cpp | 370 +- source/source_estate/module_dm/cal_dm_psi.h | 54 +- .../source_estate/module_dm/cal_edm_tddft.cpp | 1446 +- .../source_estate/module_dm/cal_edm_tddft.h | 36 +- .../module_dm/density_matrix.cpp | 1090 +- .../source_estate/module_dm/density_matrix.h | 211 +- .../module_dm/density_matrix_io.cpp | 478 +- source/source_estate/module_dm/init_dm.cpp | 89 +- source/source_estate/module_dm/init_dm.h | 24 +- .../module_dm/test/prepare_unitcell.h | 532 +- .../module_dm/test/test_cal_dm_R.cpp | 366 +- .../module_dm/test/test_cal_dmk_psi.cpp | 131 +- .../module_dm/test/test_dm_R_init.cpp | 293 +- .../module_dm/test/test_dm_constructor.cpp | 219 +- .../module_dm/test/test_dm_io.cpp | 119 +- .../module_dm/test/tmp_mocks.cpp | 128 +- .../source_estate/module_pot/H_Hartree_pw.cpp | 85 +- .../source_estate/module_pot/H_Hartree_pw.h | 44 +- .../source_estate/module_pot/H_TDDFT_pw.cpp | 532 +- source/source_estate/module_pot/H_TDDFT_pw.h | 50 +- source/source_estate/module_pot/efield.cpp | 419 +- source/source_estate/module_pot/efield.h | 104 +- source/source_estate/module_pot/gatefield.cpp | 275 +- source/source_estate/module_pot/gatefield.h | 41 +- source/source_estate/module_pot/pot_base.h | 42 +- .../source_estate/module_pot/pot_cosikr.cpp | 72 +- source/source_estate/module_pot/pot_cosikr.h | 22 +- source/source_estate/module_pot/pot_local.cpp | 31 +- source/source_estate/module_pot/pot_local.h | 19 +- .../source_estate/module_pot/pot_ml_exx.cpp | 275 +- source/source_estate/module_pot/pot_ml_exx.h | 78 +- .../module_pot/pot_ml_exx_label.cpp | 446 +- source/source_estate/module_pot/pot_sep.cpp | 17 +- source/source_estate/module_pot/pot_sep.h | 8 +- .../source_estate/module_pot/pot_surchem.hpp | 47 +- source/source_estate/module_pot/pot_xc.cpp | 49 +- source/source_estate/module_pot/pot_xc.h | 12 +- .../source_estate/module_pot/pot_xc_fdm.cpp | 64 +- source/source_estate/module_pot/pot_xc_fdm.h | 17 +- .../module_pot/potential_new.cpp | 404 +- .../source_estate/module_pot/potential_new.h | 155 +- .../module_pot/potential_types.cpp | 74 +- source/source_estate/occupy.cpp | 594 +- source/source_estate/occupy.h | 158 +- source/source_estate/read_orb.cpp | 115 +- source/source_estate/read_orb.h | 23 +- source/source_estate/read_pseudo.cpp | 537 +- source/source_estate/read_pseudo.h | 65 +- source/source_estate/setup_estate_pw.cpp | 375 +- source/source_estate/setup_estate_pw.h | 66 +- .../source_estate/test/charge_extra_test.cpp | 262 +- .../source_estate/test/charge_mixing_test.cpp | 1413 +- source/source_estate/test/charge_test.cpp | 201 +- .../test/elecstate_base_test.cpp | 655 +- .../test/elecstate_energy_test.cpp | 215 +- .../test/elecstate_fp_energy_test.cpp | 74 +- .../test/elecstate_magnetism_test.cpp | 176 +- .../test/elecstate_occupy_test.cpp | 404 +- .../test/elecstate_print_test.cpp | 238 +- .../source_estate/test/elecstate_pw_test.cpp | 261 +- .../test/gint_precision_controller_test.cpp | 58 +- .../source_estate/test/potential_new_test.cpp | 750 +- .../test/potentials_base_test.cpp | 34 +- source/source_estate/test/prepare_unitcell.h | 516 +- .../test_mpi/charge_mpi_test.cpp | 387 +- source/source_estate/update_pot.cpp | 26 +- source/source_estate/update_pot.h | 9 +- source/source_hamilt/hamilt.h | 82 +- source/source_hamilt/hamilt_base.h | 26 +- source/source_hamilt/matrixblock.h | 3 +- .../source_hamilt/module_ewald/H_Ewald_pw.cpp | 608 +- .../source_hamilt/module_ewald/H_Ewald_pw.h | 45 +- source/source_hamilt/module_ewald/dnrm2.cpp | 31 +- source/source_hamilt/module_ewald/dnrm2.h | 5 +- .../module_surchem/H_correction_pw.cpp | 58 +- .../module_surchem/cal_epsilon.cpp | 17 +- .../module_surchem/cal_pseudo.cpp | 60 +- .../source_hamilt/module_surchem/cal_totn.cpp | 60 +- .../source_hamilt/module_surchem/cal_vcav.cpp | 194 +- .../source_hamilt/module_surchem/cal_vel.cpp | 151 +- .../module_surchem/minimize_cg.cpp | 296 +- .../module_surchem/sol_force.cpp | 270 +- .../source_hamilt/module_surchem/surchem.cpp | 57 +- source/source_hamilt/module_surchem/surchem.h | 167 +- .../module_surchem/test/cal_epsilon_test.cpp | 61 +- .../module_surchem/test/cal_pseudo_test.cpp | 89 +- .../module_surchem/test/cal_totn_test.cpp | 91 +- .../module_surchem/test/cal_vcav_test.cpp | 181 +- .../module_surchem/test/cal_vel_test.cpp | 153 +- .../module_surchem/test/setcell.h | 50 +- .../module_vdw/test/vdw_test.cpp | 722 +- source/source_hamilt/module_vdw/vdw.cpp | 88 +- source/source_hamilt/module_vdw/vdw.h | 56 +- .../source_hamilt/module_vdw/vdw_parameters.h | 18 +- source/source_hamilt/module_vdw/vdwd2.cpp | 146 +- source/source_hamilt/module_vdw/vdwd2.h | 107 +- .../module_vdw/vdwd2_parameters.cpp | 146 +- .../module_vdw/vdwd2_parameters.h | 39 +- source/source_hamilt/module_vdw/vdwd3.cpp | 3164 +- source/source_hamilt/module_vdw/vdwd3.h | 66 +- .../module_vdw/vdwd3_autoset_xcname.cpp | 344 +- .../module_vdw/vdwd3_autoset_xcparam.cpp | 236 +- .../module_vdw/vdwd3_parameters.cpp | 93 +- .../module_vdw/vdwd3_parameters.h | 188 +- .../module_vdw/vdwd3_parameters_tab.cpp | 56600 +++++++--------- source/source_hamilt/module_xc/exx_info.cpp | 2 +- source/source_hamilt/module_xc/exx_info.h | 39 +- .../kernels/cuda/xc_functional_op.cu | 97 +- .../kernels/rocm/xc_functional_op.hip.cu | 97 +- .../kernels/test/xc_functional_op_test.cpp | 113 +- .../module_xc/kernels/xc_functional_op.cpp | 57 +- .../module_xc/kernels/xc_functional_op.h | 33 +- .../source_hamilt/module_xc/test/test_xc.cpp | 1272 +- .../source_hamilt/module_xc/test/test_xc1.cpp | 163 +- .../source_hamilt/module_xc/test/test_xc2.cpp | 790 +- .../source_hamilt/module_xc/test/test_xc3.cpp | 291 +- .../source_hamilt/module_xc/test/test_xc4.cpp | 83 +- .../source_hamilt/module_xc/test/test_xc5.cpp | 547 +- .../source_hamilt/module_xc/test/xc3_mock.h | 369 +- source/source_hamilt/module_xc/test/xctest.h | 16 +- source/source_hamilt/module_xc/xc_funcs.h | 36 +- .../module_xc/xc_funct_corr_gga.cpp | 263 +- .../module_xc/xc_funct_corr_lda.cpp | 388 +- .../module_xc/xc_funct_exch_gga.cpp | 255 +- .../module_xc/xc_funct_exch_lda.cpp | 113 +- .../source_hamilt/module_xc/xc_funct_hcth.cpp | 81 +- .../source_hamilt/module_xc/xc_functional.cpp | 596 +- .../source_hamilt/module_xc/xc_functional.h | 538 +- .../module_xc/xc_functional_gradcorr.cpp | 1403 +- .../module_xc/xc_functional_libxc.cpp | 489 +- .../module_xc/xc_functional_libxc.h | 293 +- .../module_xc/xc_functional_libxc_tools.cpp | 518 +- .../module_xc/xc_functional_libxc_vxc.cpp | 637 +- .../xc_functional_libxc_wrapper_gcxc.cpp | 118 +- .../xc_functional_libxc_wrapper_tauxc.cpp | 163 +- .../xc_functional_libxc_wrapper_xc.cpp | 39 +- .../module_xc/xc_functional_vxc.cpp | 252 +- .../module_xc/xc_functional_wrapper_gcxc.cpp | 340 +- .../module_xc/xc_functional_wrapper_xc.cpp | 224 +- source/source_hamilt/operator.cpp | 282 +- source/source_hamilt/operator.h | 41 +- source/source_hamilt/test/dnrm2_test.cpp | 38 +- source/source_hamilt/test/rgen_test.cpp | 111 +- source/source_hsolver/diag_comm_info.h | 8 +- source/source_hsolver/diag_const_nums.cpp | 44 +- source/source_hsolver/diag_const_nums.h | 2 +- source/source_hsolver/diag_hs_para.cpp | 212 +- source/source_hsolver/diag_hs_para.h | 31 +- source/source_hsolver/diago_bpcg.cpp | 412 +- source/source_hsolver/diago_bpcg.h | 115 +- source/source_hsolver/diago_cg.cpp | 792 +- source/source_hsolver/diago_cg.h | 126 +- source/source_hsolver/diago_cusolver.cpp | 32 +- source/source_hsolver/diago_cusolver.h | 15 +- source/source_hsolver/diago_cusolvermp.cpp | 25 +- source/source_hsolver/diago_cusolvermp.h | 6 +- source/source_hsolver/diago_dav_subspace.cpp | 1100 +- source/source_hsolver/diago_dav_subspace.h | 130 +- source/source_hsolver/diago_david.cpp | 1340 +- source/source_hsolver/diago_david.h | 156 +- source/source_hsolver/diago_elpa.cpp | 305 +- source/source_hsolver/diago_elpa.h | 16 +- source/source_hsolver/diago_elpa_native.cpp | 163 +- source/source_hsolver/diago_elpa_native.h | 17 +- source/source_hsolver/diago_iter_assist.cpp | 929 +- source/source_hsolver/diago_iter_assist.h | 85 +- source/source_hsolver/diago_lapack.cpp | 602 +- source/source_hsolver/diago_lapack.h | 62 +- source/source_hsolver/diago_params.cpp | 102 +- source/source_hsolver/diago_params.h | 10 +- source/source_hsolver/diago_pexsi.cpp | 94 +- source/source_hsolver/diago_pexsi.h | 8 +- source/source_hsolver/diago_pxxxgvx.cpp | 747 +- source/source_hsolver/diago_pxxxgvx.h | 22 +- source/source_hsolver/diago_scalapack.cpp | 744 +- source/source_hsolver/diago_scalapack.h | 75 +- source/source_hsolver/hsolver.cpp | 285 +- source/source_hsolver/hsolver.h | 67 +- source/source_hsolver/hsolver_lcao.cpp | 717 +- source/source_hsolver/hsolver_lcao.h | 31 +- source/source_hsolver/hsolver_lcaopw.cpp | 138 +- source/source_hsolver/hsolver_lcaopw.h | 16 +- source/source_hsolver/hsolver_pw.cpp | 736 +- source/source_hsolver/hsolver_pw.h | 73 +- source/source_hsolver/hsolver_pw_sdft.cpp | 125 +- source/source_hsolver/hsolver_pw_sdft.h | 70 +- .../source_hsolver/kernels/bpcg_kernel_op.cpp | 251 +- .../source_hsolver/kernels/bpcg_kernel_op.h | 200 +- .../kernels/cuda/bpcg_kernel_op.cu | 739 +- .../kernels/cuda/diag_cusolver.cu | 230 +- .../kernels/cuda/diag_cusolver.cuh | 114 +- .../kernels/cuda/diag_cusolvermp.cu | 310 +- .../kernels/cuda/diag_cusolvermp.cuh | 19 +- .../source_hsolver/kernels/cuda/hegvd_op.cu | 344 +- source/source_hsolver/kernels/hegvd_op.cpp | 75 +- source/source_hsolver/kernels/hegvd_op.h | 37 +- .../kernels/rocm/bpcg_kernel_op.hip.cu | 528 +- .../kernels/rocm/hegvd_op.hip.cu | 541 +- .../kernels/test/math_hegvd_test.cpp | 101 +- .../kernels/test/perf_math_kernel.cpp | 328 +- source/source_hsolver/module_genelpa/Cblacs.h | 42 +- .../module_genelpa/elpa_generic.hpp | 602 +- .../module_genelpa/elpa_new.cpp | 622 +- .../source_hsolver/module_genelpa/elpa_new.h | 8 +- .../module_genelpa/elpa_new_complex.cpp | 711 +- .../module_genelpa/elpa_new_real.cpp | 607 +- .../module_genelpa/elpa_solver.h | 104 +- source/source_hsolver/module_genelpa/pblas.h | 215 +- .../source_hsolver/module_genelpa/scalapack.h | 66 +- .../source_hsolver/module_genelpa/utils.cpp | 382 +- source/source_hsolver/module_genelpa/utils.h | 38 +- .../module_pexsi/dist_bcd_matrix.cpp | 122 +- .../module_pexsi/dist_bcd_matrix.h | 29 +- .../module_pexsi/dist_ccs_matrix.cpp | 89 +- .../module_pexsi/dist_ccs_matrix.h | 49 +- .../module_pexsi/dist_matrix_transformer.cpp | 1217 +- .../module_pexsi/dist_matrix_transformer.h | 112 +- .../module_pexsi/pexsi_solver.cpp | 72 +- .../module_pexsi/pexsi_solver.h | 129 +- .../module_pexsi/simple_pexsi.cpp | 296 +- .../module_pexsi/simple_pexsi.h | 40 +- source/source_hsolver/my_elpa.h | 16 +- .../source_hsolver/para_linear_transform.cpp | 248 +- source/source_hsolver/para_linear_transform.h | 16 +- source/source_hsolver/parallel_k2d.cpp | 160 +- source/source_hsolver/parallel_k2d.h | 54 +- .../source_hsolver/test/diago_bpcg_test.cpp | 316 +- .../test/diago_cg_float_test.cpp | 364 +- .../test/diago_cg_real_test.cpp | 343 +- source/source_hsolver/test/diago_cg_test.cpp | 362 +- .../test/diago_david_float_test.cpp | 421 +- .../test/diago_david_real_test.cpp | 270 +- .../source_hsolver/test/diago_david_test.cpp | 427 +- source/source_hsolver/test/diago_elpa_utils.h | 256 +- .../source_hsolver/test/diago_lapack_test.cpp | 327 +- .../test/diago_lcao_cusolver_test.cpp | 377 +- .../source_hsolver/test/diago_lcao_test.cpp | 393 +- source/source_hsolver/test/diago_mock.h | 955 +- .../source_hsolver/test/diago_pexsi_test.cpp | 470 +- source/source_hsolver/test/hsolver_pw_sup.h | 178 +- .../test/hsolver_supplementary_mock.h | 59 +- .../source_hsolver/test/parallel_k2d_test.cpp | 196 +- .../source_hsolver/test/test_diago_assist.cpp | 40 +- .../test/test_diago_hs_para.cpp | 613 +- .../source_hsolver/test/test_hsolver_pw.cpp | 331 +- .../source_hsolver/test/test_hsolver_sdft.cpp | 266 +- .../test/test_para_linear_trans.cpp | 270 +- source/source_io/input_help.cpp | 907 +- source/source_io/input_help.h | 43 +- .../source_io/module_bessel/bessel_basis.cpp | 899 +- source/source_io/module_bessel/bessel_basis.h | 352 +- .../module_bessel/numerical_basis.cpp | 1288 +- .../source_io/module_bessel/numerical_basis.h | 105 +- .../module_bessel/numerical_basis_jyjy.cpp | 150 +- .../module_bessel/numerical_basis_jyjy.h | 18 +- .../module_bessel/numerical_descriptor.cpp | 363 +- .../module_bessel/numerical_descriptor.h | 41 +- .../source_io/module_chgpot/get_pchg_lcao.cpp | 608 +- .../source_io/module_chgpot/get_pchg_lcao.h | 100 +- source/source_io/module_chgpot/get_pchg_pw.h | 410 +- source/source_io/module_chgpot/rhog_io.cpp | 468 +- source/source_io/module_chgpot/rhog_io.h | 30 +- .../module_chgpot/write_elecstat_pot.cpp | 115 +- .../module_chgpot/write_elecstat_pot.h | 2 +- source/source_io/module_chgpot/write_init.cpp | 156 +- source/source_io/module_chgpot/write_init.h | 33 +- .../source_io/module_chgpot/write_libxc_r.cpp | 936 +- .../source_io/module_chgpot/write_libxc_r.h | 69 +- .../source_io/module_ctrl/ctrl_iter_lcao.cpp | 239 +- source/source_io/module_ctrl/ctrl_iter_lcao.h | 56 +- .../source_io/module_ctrl/ctrl_output_fp.cpp | 277 +- source/source_io/module_ctrl/ctrl_output_fp.h | 16 +- .../source_io/module_ctrl/ctrl_output_pw.cpp | 737 +- source/source_io/module_ctrl/ctrl_output_pw.h | 100 +- .../source_io/module_ctrl/ctrl_output_td.cpp | 151 +- source/source_io/module_ctrl/ctrl_output_td.h | 33 +- .../module_ctrl/ctrl_runner_lcao.cpp | 334 +- .../source_io/module_ctrl/ctrl_runner_lcao.h | 56 +- .../source_io/module_ctrl/ctrl_scf_lcao.cpp | 732 +- source/source_io/module_ctrl/ctrl_scf_lcao.h | 44 +- .../module_current/td_current_io.cpp | 1009 +- .../source_io/module_current/td_current_io.h | 176 +- .../module_current/td_current_io_comm.cpp | 1396 +- source/source_io/module_dipole/dipole_io.h | 16 +- .../source_io/module_dipole/write_dipole.cpp | 202 +- .../module_dm/test/write_dmk_test.cpp | 279 +- source/source_io/module_dm/write_dmk.cpp | 543 +- source/source_io/module_dm/write_dmk.h | 37 +- source/source_io/module_dm/write_dmr.cpp | 118 +- source/source_io/module_dm/write_dmr.h | 35 +- source/source_io/module_dos/cal_dos.cpp | 350 +- source/source_io/module_dos/cal_dos.h | 53 +- source/source_io/module_dos/cal_ldos.cpp | 494 +- source/source_io/module_dos/cal_ldos.h | 77 +- .../source_io/module_dos/cal_pdos_gamma.cpp | 453 +- source/source_io/module_dos/cal_pdos_gamma.h | 59 +- .../source_io/module_dos/cal_pdos_multik.cpp | 555 +- source/source_io/module_dos/cal_pdos_multik.h | 61 +- .../source_io/module_dos/write_dos_lcao.cpp | 204 +- source/source_io/module_dos/write_dos_lcao.h | 52 +- source/source_io/module_dos/write_dos_pw.cpp | 115 +- source/source_io/module_dos/write_dos_pw.h | 27 +- source/source_io/module_elf/write_elf.cpp | 267 +- source/source_io/module_elf/write_elf.h | 4 +- .../module_energy/nscf_fermi_surf.cpp | 138 +- .../source_io/module_energy/nscf_fermi_surf.h | 13 +- .../source_io/module_energy/write_bands.cpp | 256 +- source/source_io/module_energy/write_bands.h | 21 +- .../module_energy/write_eband_terms.hpp | 330 +- .../source_io/module_energy/write_eig_occ.cpp | 436 +- .../source_io/module_energy/write_eig_occ.h | 11 +- .../module_energy/write_proj_band_lcao.cpp | 653 +- .../module_energy/write_proj_band_lcao.h | 15 +- source/source_io/module_hs/cal_pLpR.cpp | 569 +- source/source_io/module_hs/cal_pLpR.h | 371 +- .../source_io/module_hs/cal_r_overlap_R.cpp | 1698 +- source/source_io/module_hs/cal_r_overlap_R.h | 69 +- .../source_io/module_hs/output_mat_sparse.cpp | 102 +- .../source_io/module_hs/output_mat_sparse.h | 28 +- source/source_io/module_hs/single_R_io.cpp | 206 +- source/source_io/module_hs/single_R_io.h | 14 +- source/source_io/module_hs/write_HS.h | 76 +- source/source_io/module_hs/write_HS.hpp | 492 +- source/source_io/module_hs/write_HS_R.cpp | 553 +- source/source_io/module_hs/write_HS_R.h | 128 +- .../source_io/module_hs/write_HS_sparse.cpp | 834 +- source/source_io/module_hs/write_HS_sparse.h | 30 +- source/source_io/module_hs/write_vxc.hpp | 368 +- source/source_io/module_hs/write_vxc_lip.hpp | 528 +- source/source_io/module_hs/write_vxc_r.hpp | 135 +- source/source_io/module_json/abacusjson.cpp | 244 +- source/source_io/module_json/abacusjson.h | 117 +- source/source_io/module_json/general_info.cpp | 31 +- source/source_io/module_json/general_info.h | 8 +- source/source_io/module_json/init_info.cpp | 167 +- source/source_io/module_json/init_info.h | 6 +- source/source_io/module_json/json_node.h | 23 +- source/source_io/module_json/output_info.cpp | 262 +- source/source_io/module_json/output_info.h | 35 +- source/source_io/module_json/para_json.cpp | 32 +- source/source_io/module_json/para_json.h | 8 +- source/source_io/module_json/readin_info.cpp | 2 +- source/source_io/module_json/readin_info.h | 12 +- .../module_json/test/para_json_test.cpp | 415 +- .../module_ml/cal_mlkedf_descriptors.cpp | 699 +- .../module_ml/cal_mlkedf_descriptors.h | 144 +- source/source_io/module_ml/io_npz.cpp | 743 +- source/source_io/module_ml/io_npz.h | 10 +- .../module_ml/write_mlkedf_descriptors.cpp | 357 +- .../module_ml/write_mlkedf_descriptors.h | 104 +- .../source_io/module_mulliken/output_dmk.cpp | 9 +- source/source_io/module_mulliken/output_dmk.h | 7 +- .../module_mulliken/output_mulliken.cpp | 962 +- .../module_mulliken/output_mulliken.h | 209 +- .../source_io/module_mulliken/output_sk.cpp | 43 +- source/source_io/module_mulliken/output_sk.h | 4 +- source/source_io/module_output/binstream.cpp | 64 +- source/source_io/module_output/binstream.h | 140 +- source/source_io/module_output/cal_test.cpp | 396 +- source/source_io/module_output/cal_test.h | 112 +- source/source_io/module_output/cif_io.cpp | 616 +- source/source_io/module_output/cif_io.h | 233 +- source/source_io/module_output/csr_reader.cpp | 303 +- source/source_io/module_output/csr_reader.h | 18 +- source/source_io/module_output/cube_io.h | 110 +- .../source_io/module_output/file_reader.cpp | 84 +- source/source_io/module_output/file_reader.h | 12 +- source/source_io/module_output/filename.cpp | 178 +- source/source_io/module_output/filename.h | 37 +- source/source_io/module_output/orb_io.cpp | 268 +- source/source_io/module_output/orb_io.h | 62 +- source/source_io/module_output/output.cpp | 141 +- source/source_io/module_output/output.h | 151 +- source/source_io/module_output/output_log.cpp | 435 +- source/source_io/module_output/output_log.h | 58 +- source/source_io/module_output/print_info.cpp | 568 +- source/source_io/module_output/print_info.h | 21 +- source/source_io/module_output/read_cube.cpp | 327 +- .../module_output/read_exit_file.cpp | 152 +- .../source_io/module_output/read_exit_file.h | 2 +- .../source_io/module_output/sparse_matrix.cpp | 131 +- .../source_io/module_output/sparse_matrix.h | 42 +- source/source_io/module_output/ucell_io.cpp | 55 +- source/source_io/module_output/ucell_io.h | 14 +- source/source_io/module_output/write_cube.cpp | 302 +- .../module_output/write_orb_info.cpp | 51 +- .../source_io/module_output/write_orb_info.h | 2 +- source/source_io/module_output/write_pao.cpp | 43 +- source/source_io/module_output/write_pao.h | 14 +- .../source_io/module_parameter/input_conv.cpp | 716 +- .../source_io/module_parameter/input_conv.h | 131 +- .../source_io/module_parameter/input_item.h | 29 +- .../module_parameter/input_parameter.h | 276 +- .../source_io/module_parameter/md_parameter.h | 3 +- .../source_io/module_parameter/parameter.cpp | 6 +- source/source_io/module_parameter/parameter.h | 23 +- .../source_io/module_parameter/read_input.cpp | 848 +- .../source_io/module_parameter/read_input.h | 92 +- .../read_input_item_deepks.cpp | 361 +- .../read_input_item_elec_stru.cpp | 1469 +- .../read_input_item_exx_dftu.cpp | 1093 +- .../module_parameter/read_input_item_md.cpp | 680 +- .../read_input_item_model.cpp | 577 +- .../read_input_item_ofdft.cpp | 694 +- .../read_input_item_other.cpp | 803 +- .../read_input_item_output.cpp | 1438 +- .../read_input_item_postprocess.cpp | 401 +- .../read_input_item_relax.cpp | 373 +- .../module_parameter/read_input_item_sdft.cpp | 149 +- .../read_input_item_system.cpp | 1582 +- .../read_input_item_tddft.cpp | 522 +- .../module_parameter/read_input_tool.h | 235 +- .../module_parameter/read_set_globalv.cpp | 148 +- .../module_parameter/system_parameter.h | 64 +- source/source_io/module_qo/to_qo.h | 490 +- source/source_io/module_qo/to_qo_kernel.cpp | 770 +- source/source_io/module_qo/to_qo_mpi.cpp | 92 +- .../source_io/module_qo/to_qo_structures.cpp | 442 +- source/source_io/module_restart/restart.cpp | 107 +- source/source_io/module_restart/restart.h | 89 +- .../module_restart/restart_exx_csr.h | 49 +- .../module_restart/restart_exx_csr.hpp | 223 +- source/source_io/module_unk/berryphase.cpp | 1048 +- source/source_io/module_unk/berryphase.h | 77 +- .../source_io/module_unk/unk_overlap_lcao.cpp | 872 +- .../source_io/module_unk/unk_overlap_lcao.h | 56 +- .../source_io/module_unk/unk_overlap_pw.cpp | 254 +- source/source_io/module_unk/unk_overlap_pw.h | 48 +- .../source_io/module_wannier/fR_overlap.cpp | 465 +- source/source_io/module_wannier/fR_overlap.h | 65 +- .../source_io/module_wannier/to_wannier90.cpp | 755 +- .../source_io/module_wannier/to_wannier90.h | 66 +- .../module_wannier/to_wannier90_lcao.cpp | 1889 +- .../module_wannier/to_wannier90_lcao.h | 101 +- .../to_wannier90_lcao_in_pw.cpp | 360 +- .../module_wannier/to_wannier90_lcao_in_pw.h | 56 +- .../module_wannier/to_wannier90_pw.cpp | 1804 +- .../module_wannier/to_wannier90_pw.h | 135 +- source/source_io/module_wf/get_wf_lcao.cpp | 812 +- source/source_io/module_wf/get_wf_lcao.h | 104 +- source/source_io/module_wf/get_wf_pw.h | 447 +- source/source_io/module_wf/read_wf2rho_pw.cpp | 300 +- source/source_io/module_wf/read_wf2rho_pw.h | 33 +- source/source_io/module_wf/read_wfc_nao.cpp | 417 +- source/source_io/module_wf/read_wfc_nao.h | 35 +- source/source_io/module_wf/read_wfc_pw.cpp | 456 +- source/source_io/module_wf/read_wfc_pw.h | 20 +- source/source_io/module_wf/write_wfc_nao.cpp | 492 +- source/source_io/module_wf/write_wfc_nao.h | 60 +- source/source_io/module_wf/write_wfc_pw.cpp | 518 +- source/source_io/module_wf/write_wfc_pw.h | 35 +- source/source_io/parse_args.cpp | 404 +- source/source_io/parse_args.h | 20 +- source/source_io/test/bessel_basis_test.cpp | 713 +- source/source_io/test/binstream_test.cpp | 121 +- source/source_io/test/cal_dos_test.cpp | 229 +- source/source_io/test/cal_pLpR_test.cpp | 158 +- source/source_io/test/cif_io_test.cpp | 662 +- source/source_io/test/csr_reader_test.cpp | 84 +- source/source_io/test/dos_test.h | 195 +- source/source_io/test/file_reader_test.cpp | 54 +- .../source_io/test/for_testing_input_conv.h | 220 +- source/source_io/test/for_testing_klist.h | 56 +- source/source_io/test/input_help_test.cpp | 440 +- .../source_io/test/numerical_basis_test.cpp | 233 +- source/source_io/test/orb_io_test.cpp | 174 +- .../source_io/test/output_mulliken_mock.cpp | 205 +- .../source_io/test/output_mulliken_test.cpp | 144 +- source/source_io/test/output_test.cpp | 226 +- source/source_io/test/outputlog_test.cpp | 382 +- source/source_io/test/parse_args_test.cpp | 275 +- source/source_io/test/prepare_unitcell.h | 600 +- source/source_io/test/print_info_test.cpp | 330 +- source/source_io/test/read_exit_file_test.cpp | 135 +- source/source_io/test/read_input_ptest.cpp | 856 +- source/source_io/test/read_rhog_test.cpp | 148 +- source/source_io/test/read_wf2rho_pw_test.cpp | 412 +- source/source_io/test/read_wfc_lcao_test.cpp | 1007 +- source/source_io/test/read_wfc_nao_test.cpp | 194 +- source/source_io/test/read_wfc_pw_test.cpp | 540 +- source/source_io/test/single_R_io_test.cpp | 105 +- source/source_io/test/sparse_matrix_test.cpp | 186 +- source/source_io/test/tmp_mocks.cpp | 48 +- source/source_io/test/to_qo_test.cpp | 2133 +- source/source_io/test/write_dos_pw_test.cpp | 247 +- source/source_io/test/write_eig_occ_test.cpp | 112 +- .../source_io/test/write_elf_logic_test.cpp | 172 +- source/source_io/test/write_orb_info_test.cpp | 78 +- source/source_io/test/write_wfc_nao_test.cpp | 709 +- .../test_serial/io_system_variable_test.cpp | 59 +- .../source_io/test_serial/prepare_unitcell.h | 600 +- .../test_serial/read_input_item_test.cpp | 1940 +- .../source_io/test_serial/read_input_test.cpp | 148 +- .../test_serial/read_input_tool_test.cpp | 37 +- source/source_io/test_serial/rho_io_test.cpp | 264 +- .../test_serial/write_bands_test.cpp | 53 +- source/source_lcao/FORCE.h | 144 +- source/source_lcao/FORCE_STRESS.cpp | 1530 +- source/source_lcao/FORCE_STRESS.h | 159 +- source/source_lcao/FORCE_gamma.cpp | 279 +- source/source_lcao/FORCE_k.cpp | 354 +- source/source_lcao/LCAO_HS_arrays.hpp | 51 +- source/source_lcao/LCAO_allocate.cpp | 53 +- source/source_lcao/LCAO_domain.h | 252 +- source/source_lcao/LCAO_init_basis.cpp | 90 +- source/source_lcao/LCAO_nl_mu.cpp | 1158 +- source/source_lcao/LCAO_set.cpp | 505 +- source/source_lcao/LCAO_set.h | 104 +- source/source_lcao/LCAO_set_fs.cpp | 206 +- source/source_lcao/LCAO_set_mat2d.cpp | 73 +- source/source_lcao/LCAO_set_st.cpp | 778 +- source/source_lcao/LCAO_set_zero.cpp | 34 +- source/source_lcao/center2_orb-orb11.cpp | 343 +- source/source_lcao/center2_orb-orb11.h | 22 +- source/source_lcao/center2_orb-orb21.cpp | 250 +- source/source_lcao/center2_orb-orb21.h | 40 +- source/source_lcao/center2_orb-orb22.cpp | 256 +- source/source_lcao/center2_orb-orb22.h | 46 +- source/source_lcao/center2_orb.cpp | 460 +- source/source_lcao/center2_orb.h | 50 +- source/source_lcao/dftu_lcao.cpp | 109 +- source/source_lcao/dftu_lcao.h | 4 +- source/source_lcao/edm.cpp | 120 +- source/source_lcao/force_stress_arrays.h | 12 +- source/source_lcao/hamilt_lcao.cpp | 917 +- source/source_lcao/hamilt_lcao.h | 112 +- source/source_lcao/hs_matrix_k.hpp | 72 +- .../source_lcao/module_deepks/LCAO_deepks.cpp | 274 +- .../source_lcao/module_deepks/LCAO_deepks.h | 48 +- .../module_deepks/LCAO_deepks_interface.cpp | 1077 +- .../module_deepks/LCAO_deepks_interface.h | 38 +- .../module_deepks/LCAO_deepks_io.cpp | 414 +- .../module_deepks/LCAO_deepks_io.h | 52 +- .../module_deepks/deepks_basic.cpp | 418 +- .../source_lcao/module_deepks/deepks_basic.h | 46 +- .../module_deepks/deepks_check.cpp | 87 +- .../source_lcao/module_deepks/deepks_check.h | 2 +- .../module_deepks/deepks_descriptor.cpp | 151 +- .../module_deepks/deepks_descriptor.h | 28 +- .../module_deepks/deepks_force.cpp | 496 +- .../source_lcao/module_deepks/deepks_force.h | 26 +- .../source_lcao/module_deepks/deepks_fpre.cpp | 306 +- .../source_lcao/module_deepks/deepks_fpre.h | 32 +- .../module_deepks/deepks_iterate.cpp | 158 +- .../module_deepks/deepks_iterate.h | 57 +- .../module_deepks/deepks_orbital.cpp | 81 +- .../module_deepks/deepks_orbital.h | 12 +- .../module_deepks/deepks_orbpre.cpp | 465 +- .../source_lcao/module_deepks/deepks_orbpre.h | 24 +- .../source_lcao/module_deepks/deepks_pdm.cpp | 782 +- source/source_lcao/module_deepks/deepks_pdm.h | 50 +- .../module_deepks/deepks_phialpha.cpp | 498 +- .../module_deepks/deepks_phialpha.h | 44 +- .../source_lcao/module_deepks/deepks_spre.cpp | 319 +- .../source_lcao/module_deepks/deepks_spre.h | 32 +- .../module_deepks/deepks_vdelta.cpp | 234 +- .../source_lcao/module_deepks/deepks_vdelta.h | 34 +- .../module_deepks/deepks_vdpre.cpp | 429 +- .../source_lcao/module_deepks/deepks_vdpre.h | 56 +- .../module_deepks/deepks_vdrpre.cpp | 408 +- .../source_lcao/module_deepks/deepks_vdrpre.h | 54 +- .../module_deepks/test/LCAO_deepks_test.cpp | 676 +- .../module_deepks/test/LCAO_deepks_test.h | 62 +- .../test/LCAO_deepks_test_prep.cpp | 245 +- source/source_lcao/module_deepks/test/klist.h | 40 +- .../module_deepks/test/klist_1.cpp | 775 +- .../module_deepks/test/main_deepks.cpp | 103 +- .../module_deepks/test/mock_tdinfo.cpp | 14 +- .../module_deepks/test/parallel_orbitals.h | 6 +- .../module_deltaspin/basic_funcs.cpp | 165 +- .../module_deltaspin/basic_funcs.h | 52 +- .../source_lcao/module_deltaspin/cal_mw.cpp | 248 +- .../module_deltaspin/cal_mw_from_lambda.cpp | 999 +- .../module_deltaspin/deltaspin_lcao.cpp | 136 +- .../module_deltaspin/deltaspin_lcao.h | 8 +- .../source_lcao/module_deltaspin/init_sc.cpp | 58 +- .../module_deltaspin/lambda_loop.cpp | 274 +- .../module_deltaspin/lambda_loop_helper.cpp | 240 +- .../module_deltaspin/spin_constrain.cpp | 653 +- .../module_deltaspin/spin_constrain.h | 261 +- .../module_deltaspin/template_helpers.cpp | 55 +- .../module_deltaspin/test/basic_test.cpp | 245 +- .../test/lambda_loop_helper_test.cpp | 156 +- .../module_deltaspin/test/prepare_unitcell.h | 445 +- .../test/spin_constrain_test.cpp | 166 +- .../test/template_helpers_test.cpp | 23 +- source/source_lcao/module_dftu/dftu.cpp | 624 +- source/source_lcao/module_dftu/dftu.h | 373 +- .../source_lcao/module_dftu/dftu_folding.cpp | 507 +- source/source_lcao/module_dftu/dftu_force.cpp | 885 +- .../source_lcao/module_dftu/dftu_hamilt.cpp | 267 +- source/source_lcao/module_dftu/dftu_io.cpp | 921 +- source/source_lcao/module_dftu/dftu_occup.cpp | 899 +- source/source_lcao/module_dftu/dftu_pw.cpp | 376 +- source/source_lcao/module_dftu/dftu_tools.cpp | 334 +- .../source_lcao/module_dftu/dftu_yukawa.cpp | 380 +- .../source_lcao/module_gint/batch_biggrid.cpp | 32 +- .../source_lcao/module_gint/batch_biggrid.h | 82 +- source/source_lcao/module_gint/big_grid.cpp | 143 +- source/source_lcao/module_gint/big_grid.h | 200 +- .../source_lcao/module_gint/biggrid_info.cpp | 108 +- source/source_lcao/module_gint/biggrid_info.h | 202 +- .../source_lcao/module_gint/divide_info.cpp | 40 +- source/source_lcao/module_gint/divide_info.h | 36 +- source/source_lcao/module_gint/gint.h | 13 +- source/source_lcao/module_gint/gint_atom.cpp | 374 +- source/source_lcao/module_gint/gint_atom.h | 289 +- .../source_lcao/module_gint/gint_common.cpp | 735 +- source/source_lcao/module_gint/gint_common.h | 47 +- .../source_lcao/module_gint/gint_dvlocal.cpp | 412 +- source/source_lcao/module_gint/gint_dvlocal.h | 58 +- .../module_gint/gint_env_gamma.cpp | 55 +- .../source_lcao/module_gint/gint_env_gamma.h | 15 +- source/source_lcao/module_gint/gint_env_k.cpp | 75 +- source/source_lcao/module_gint/gint_env_k.h | 33 +- source/source_lcao/module_gint/gint_fvl.cpp | 132 +- source/source_lcao/module_gint/gint_fvl.h | 40 +- .../source_lcao/module_gint/gint_fvl_gpu.cpp | 208 +- source/source_lcao/module_gint/gint_fvl_gpu.h | 48 +- .../source_lcao/module_gint/gint_fvl_meta.cpp | 209 +- .../source_lcao/module_gint/gint_fvl_meta.h | 40 +- .../module_gint/gint_fvl_meta_gpu.cpp | 336 +- .../module_gint/gint_fvl_meta_gpu.h | 38 +- source/source_lcao/module_gint/gint_helper.h | 57 +- source/source_lcao/module_gint/gint_info.cpp | 454 +- source/source_lcao/module_gint/gint_info.h | 183 +- .../module_gint/gint_interface.cpp | 263 +- .../source_lcao/module_gint/gint_interface.h | 104 +- source/source_lcao/module_gint/gint_rho.cpp | 167 +- source/source_lcao/module_gint/gint_rho.h | 56 +- .../source_lcao/module_gint/gint_rho_gpu.cpp | 133 +- source/source_lcao/module_gint/gint_rho_gpu.h | 30 +- source/source_lcao/module_gint/gint_tau.cpp | 79 +- source/source_lcao/module_gint/gint_tau.h | 29 +- .../source_lcao/module_gint/gint_tau_gpu.cpp | 156 +- source/source_lcao/module_gint/gint_tau_gpu.h | 37 +- source/source_lcao/module_gint/gint_type.h | 12 +- source/source_lcao/module_gint/gint_vl.cpp | 114 +- source/source_lcao/module_gint/gint_vl.h | 25 +- .../source_lcao/module_gint/gint_vl_gpu.cpp | 148 +- source/source_lcao/module_gint/gint_vl_gpu.h | 25 +- .../module_gint/gint_vl_metagga.cpp | 86 +- .../source_lcao/module_gint/gint_vl_metagga.h | 29 +- .../module_gint/gint_vl_metagga_gpu.cpp | 159 +- .../module_gint/gint_vl_metagga_gpu.h | 30 +- .../module_gint/gint_vl_metagga_nspin4.cpp | 99 +- .../module_gint/gint_vl_metagga_nspin4.h | 25 +- .../gint_vl_metagga_nspin4_gpu.cpp | 187 +- .../module_gint/gint_vl_metagga_nspin4_gpu.h | 33 +- .../module_gint/gint_vl_nspin4.cpp | 66 +- .../source_lcao/module_gint/gint_vl_nspin4.h | 25 +- .../module_gint/gint_vl_nspin4_gpu.cpp | 126 +- .../module_gint/gint_vl_nspin4_gpu.h | 35 +- .../module_gint/kernel/cuda_mem_wrapper.h | 222 +- .../module_gint/kernel/dgemm_vbatch.cu | 169 +- .../module_gint/kernel/dgemm_vbatch.h | 134 +- .../module_gint/kernel/gemm_nn_vbatch.cuh | 368 +- .../module_gint/kernel/gemm_tn_vbatch.cuh | 368 +- .../module_gint/kernel/gint_gpu_vars.cpp | 179 +- .../module_gint/kernel/gint_gpu_vars.h | 13 +- .../module_gint/kernel/gint_helper.cuh | 58 +- .../module_gint/kernel/phi_operator_gpu.cu | 755 +- .../module_gint/kernel/phi_operator_gpu.h | 96 +- .../module_gint/kernel/phi_operator_kernel.cu | 1006 +- .../kernel/phi_operator_kernel.cuh | 233 +- .../module_gint/kernel/set_const_mem.cu | 13 +- .../module_gint/kernel/set_const_mem.cuh | 2 +- source/source_lcao/module_gint/kernel/sph.cuh | 8 +- .../module_gint/localcell_info.cpp | 239 +- .../source_lcao/module_gint/localcell_info.h | 261 +- .../source_lcao/module_gint/meshgrid_info.h | 109 +- .../source_lcao/module_gint/phi_operator.cpp | 377 +- source/source_lcao/module_gint/phi_operator.h | 168 +- .../source_lcao/module_gint/phi_operator.hpp | 320 +- source/source_lcao/module_gint/set_ddphi.cpp | 434 +- .../module_gint/test/test_gint_common.cpp | 111 +- .../module_gint/test/test_gint_precision.cpp | 12 +- .../module_gint/test/tmp_mocks.cpp | 81 +- .../source_lcao/module_gint/unitcell_info.cpp | 49 +- .../source_lcao/module_gint/unitcell_info.h | 362 +- .../module_hcontainer/atom_pair.cpp | 1013 +- .../source_lcao/module_hcontainer/atom_pair.h | 236 +- .../module_hcontainer/base_matrix.cpp | 205 +- .../module_hcontainer/base_matrix.h | 89 +- .../module_hcontainer/func_folding.cpp | 140 +- .../module_hcontainer/func_transfer.cpp | 980 +- .../module_hcontainer/hcontainer.cpp | 1122 +- .../module_hcontainer/hcontainer.h | 136 +- .../module_hcontainer/hcontainer_funcs.h | 62 +- .../module_hcontainer/output_hcontainer.cpp | 201 +- .../module_hcontainer/output_hcontainer.h | 14 +- .../module_hcontainer/read_hcontainer.cpp | 229 +- .../module_hcontainer/read_hcontainer.h | 16 +- .../module_hcontainer/test/prepare_unitcell.h | 575 +- .../test/test_func_folding.cpp | 248 +- .../test/test_hcontainer.cpp | 870 +- .../test/test_hcontainer_complex.cpp | 720 +- .../test/test_hcontainer_output.cpp | 170 +- .../test/test_hcontainer_readCSR.cpp | 171 +- .../test/test_hcontainer_time.cpp | 145 +- .../module_hcontainer/test/test_transfer.cpp | 529 +- .../module_hcontainer/test/tmp_mocks.cpp | 91 +- .../module_hcontainer/transfer.cpp | 860 +- .../source_lcao/module_hcontainer/transfer.h | 53 +- .../module_lr/ao_to_mo_transformer/ao_to_mo.h | 66 +- .../ao_to_mo_parallel.cpp | 243 +- .../ao_to_mo_transformer/ao_to_mo_serial.cpp | 288 +- .../test/ao_to_mo_test.cpp | 640 +- .../source_lcao/module_lr/dm_trans/dm_trans.h | 66 +- .../module_lr/dm_trans/dm_trans_parallel.cpp | 280 +- .../module_lr/dm_trans/dm_trans_serial.cpp | 301 +- .../module_lr/dm_trans/dmr_complex.cpp | 135 +- .../module_lr/dm_trans/test/dm_trans_test.cpp | 627 +- .../module_lr/esolver_lrtd_lcao.cpp | 1244 +- .../source_lcao/module_lr/esolver_lrtd_lcao.h | 206 +- .../source_lcao/module_lr/hamilt_casida.cpp | 133 +- source/source_lcao/module_lr/hamilt_casida.h | 335 +- source/source_lcao/module_lr/hamilt_ulr.hpp | 412 +- source/source_lcao/module_lr/hsolver_lrtd.hpp | 284 +- source/source_lcao/module_lr/lr_spectrum.cpp | 466 +- source/source_lcao/module_lr/lr_spectrum.h | 174 +- .../module_lr/lr_spectrum_velocity.cpp | 339 +- .../operator_casida/operator_lr_diag.h | 101 +- .../operator_casida/operator_lr_exx.cpp | 269 +- .../operator_casida/operator_lr_exx.h | 212 +- .../operator_casida/operator_lr_hxc.cpp | 230 +- .../operator_casida/operator_lr_hxc.h | 138 +- .../module_lr/potentials/pot_hxc_lrtd.cpp | 581 +- .../module_lr/potentials/pot_hxc_lrtd.h | 103 +- .../module_lr/potentials/xc_kernel.cpp | 587 +- .../module_lr/potentials/xc_kernel.h | 150 +- .../ri_benchmark/operator_ri_hartree.h | 143 +- .../module_lr/ri_benchmark/ri_benchmark.h | 125 +- .../module_lr/ri_benchmark/ri_benchmark.hpp | 999 +- .../ri_benchmark/test/ri_benchmark_test.cpp | 134 +- .../source_lcao/module_lr/utils/lr_util.cpp | 406 +- source/source_lcao/module_lr/utils/lr_util.h | 175 +- .../source_lcao/module_lr/utils/lr_util.hpp | 332 +- .../module_lr/utils/lr_util_hcontainer.cpp | 103 +- .../module_lr/utils/lr_util_hcontainer.h | 230 +- .../module_lr/utils/lr_util_print.h | 469 +- .../module_lr/utils/lr_util_xc.hpp | 82 +- .../utils/test/lr_util_algorithms_test.cpp | 273 +- .../utils/test/lr_util_physics_test.cpp | 111 +- .../module_operator_lcao/deepks_lcao.cpp | 647 +- .../module_operator_lcao/deepks_lcao.h | 37 +- .../source_lcao/module_operator_lcao/dftu.hpp | 2 +- .../dftu_force_stress.hpp | 662 +- .../module_operator_lcao/dftu_lcao.cpp | 779 +- .../module_operator_lcao/dftu_lcao.h | 98 +- .../dspin_force_stress.hpp | 631 +- .../module_operator_lcao/dspin_lcao.cpp | 768 +- .../module_operator_lcao/dspin_lcao.h | 110 +- .../module_operator_lcao/ekinetic.cpp | 328 +- .../module_operator_lcao/ekinetic.h | 76 +- .../ekinetic_force_stress.hpp | 84 +- .../module_operator_lcao/meta_lcao.cpp | 2 +- .../module_operator_lcao/meta_lcao.h | 18 +- .../module_operator_lcao/nonlocal.cpp | 437 +- .../module_operator_lcao/nonlocal.h | 80 +- .../nonlocal_force_stress.hpp | 737 +- .../module_operator_lcao/op_dftu_lcao.cpp | 84 +- .../module_operator_lcao/op_dftu_lcao.h | 19 +- .../module_operator_lcao/op_exx_lcao.cpp | 65 +- .../module_operator_lcao/op_exx_lcao.h | 91 +- .../module_operator_lcao/op_exx_lcao.hpp | 743 +- .../operator_force_stress_utils.cpp | 53 +- .../operator_force_stress_utils.h | 57 +- .../operator_force_stress_utils.hpp | 250 +- .../module_operator_lcao/operator_lcao.cpp | 465 +- .../module_operator_lcao/operator_lcao.h | 86 +- .../module_operator_lcao/overlap.cpp | 564 +- .../module_operator_lcao/overlap.h | 87 +- .../overlap_force_stress.hpp | 84 +- .../module_operator_lcao/td_ekinetic_lcao.cpp | 522 +- .../module_operator_lcao/td_ekinetic_lcao.h | 56 +- .../module_operator_lcao/td_nonlocal_lcao.cpp | 682 +- .../module_operator_lcao/td_nonlocal_lcao.h | 44 +- .../module_operator_lcao/td_pot_hybrid.cpp | 372 +- .../module_operator_lcao/td_pot_hybrid.h | 60 +- .../test/test_T_NL_cd.cpp | 295 +- .../module_operator_lcao/test/test_dftu.cpp | 271 +- .../test/test_ekinetic.cpp | 210 +- .../test/test_ekinetic_serial.cpp | 865 +- .../test/test_nonlocal.cpp | 278 +- .../test/test_overlap.cpp | 185 +- .../test/test_overlap_cd.cpp | 201 +- .../test/test_overlap_serial.cpp | 785 +- .../module_operator_lcao/test/tmp_mocks.cpp | 257 +- .../module_operator_lcao/veff_lcao.cpp | 186 +- .../module_operator_lcao/veff_lcao.h | 63 +- source/source_lcao/module_rdmft/rdmft.cpp | 537 +- source/source_lcao/module_rdmft/rdmft.h | 73 +- source/source_lcao/module_rdmft/rdmft_pot.cpp | 470 +- .../source_lcao/module_rdmft/rdmft_tools.cpp | 539 +- source/source_lcao/module_rdmft/rdmft_tools.h | 340 +- .../module_rdmft/update_state_rdmft.cpp | 202 +- .../module_ri/ABFs_Construct-PCA.cpp | 294 +- .../module_ri/ABFs_Construct-PCA.h | 14 +- source/source_lcao/module_ri/Exx_LRI.h | 219 +- source/source_lcao/module_ri/Exx_LRI.hpp | 1171 +- .../source_lcao/module_ri/Exx_LRI_interface.h | 160 +- .../module_ri/Exx_LRI_interface.hpp | 676 +- source/source_lcao/module_ri/Inverse_Matrix.h | 18 +- .../source_lcao/module_ri/Inverse_Matrix.hpp | 205 +- source/source_lcao/module_ri/LRI_CV.h | 214 +- source/source_lcao/module_ri/LRI_CV.hpp | 823 +- source/source_lcao/module_ri/LRI_CV_Tools.h | 198 +- source/source_lcao/module_ri/LRI_CV_Tools.hpp | 589 +- .../source_lcao/module_ri/Matrix_Orbs11.cpp | 191 +- source/source_lcao/module_ri/Matrix_Orbs11.h | 57 +- .../source_lcao/module_ri/Matrix_Orbs11.hpp | 304 +- .../source_lcao/module_ri/Matrix_Orbs21.cpp | 258 +- source/source_lcao/module_ri/Matrix_Orbs21.h | 61 +- .../source_lcao/module_ri/Matrix_Orbs21.hpp | 481 +- .../source_lcao/module_ri/Matrix_Orbs22.cpp | 220 +- source/source_lcao/module_ri/Matrix_Orbs22.h | 73 +- .../source_lcao/module_ri/Matrix_Orbs22.hpp | 1105 +- source/source_lcao/module_ri/Mix_DMk_2D.cpp | 95 +- source/source_lcao/module_ri/Mix_DMk_2D.h | 88 +- source/source_lcao/module_ri/Mix_Matrix.cpp | 166 +- source/source_lcao/module_ri/Mix_Matrix.h | 8 +- source/source_lcao/module_ri/RI_2D_Comm.cpp | 97 +- source/source_lcao/module_ri/RI_2D_Comm.h | 148 +- source/source_lcao/module_ri/RI_2D_Comm.hpp | 735 +- source/source_lcao/module_ri/RI_Util.h | 102 +- source/source_lcao/module_ri/RI_Util.hpp | 296 +- source/source_lcao/module_ri/RPA_LRI.h | 86 +- source/source_lcao/module_ri/RPA_LRI.hpp | 1430 +- .../module_ri/abfs-vector3_order.h | 154 +- source/source_lcao/module_ri/abfs.h | 10 +- .../module_ri/conv_coulomb_pot_k.cpp | 489 +- .../module_ri/conv_coulomb_pot_k.h | 85 +- .../module_ri/conv_coulomb_pot_k.hpp | 113 +- source/source_lcao/module_ri/ewald_Vq.h | 221 +- source/source_lcao/module_ri/ewald_Vq.hpp | 1131 +- .../module_ri/exx_abfs-construct_orbs.cpp | 883 +- .../module_ri/exx_abfs-construct_orbs.h | 169 +- source/source_lcao/module_ri/exx_abfs-io.cpp | 484 +- source/source_lcao/module_ri/exx_abfs-io.h | 38 +- source/source_lcao/module_ri/exx_abfs-jle.cpp | 104 +- source/source_lcao/module_ri/exx_abfs-jle.h | 19 +- source/source_lcao/module_ri/exx_abfs.h | 17 +- source/source_lcao/module_ri/exx_lip.h | 119 +- source/source_lcao/module_ri/exx_lip.hpp | 835 +- .../module_ri/exx_opt_orb-print.cpp | 418 +- source/source_lcao/module_ri/exx_opt_orb.cpp | 860 +- source/source_lcao/module_ri/exx_opt_orb.h | 83 +- .../source_lcao/module_ri/exx_rotate_abfs.h | 38 +- .../source_lcao/module_ri/exx_rotate_abfs.hpp | 984 +- .../source_lcao/module_ri/gaussian_abfs.cpp | 472 +- source/source_lcao/module_ri/gaussian_abfs.h | 155 +- .../irreducible_sector.cpp | 452 +- .../module_exx_symmetry/irreducible_sector.h | 309 +- .../irreducible_sector_bvk.cpp | 322 +- .../module_exx_symmetry/symmetry_rotation.cpp | 943 +- .../module_exx_symmetry/symmetry_rotation.h | 435 +- .../symmetry_rotation_R.hpp | 580 +- .../symmetry_rotation_R_hcontainer.hpp | 443 +- .../symmetry_rotation_output.cpp | 188 +- .../test/symmetry_rotation_test.cpp | 219 +- .../module_ri/serialization_boost.h | 143 +- .../module_ri/serialization_cereal.h | 110 +- .../source_lcao/module_ri/singular_value.cpp | 312 +- source/source_lcao/module_ri/singular_value.h | 88 +- .../test/abfs-vector3_order_test.cpp | 18 +- .../module_ri/test/dm_mixing_test.cpp | 251 +- .../module_ri/test/ri_cv_io_test.cpp | 34 +- .../module_ri/test_code/Inverse_Matrix-test.h | 248 +- .../test_code/element_basis_index-test.h | 59 +- .../element_basis_index-unittest.cpp | 138 +- .../test_code/exx_abfs-construct_orbs-test.h | 52 +- .../test_code/make_gaunt_table-unittest.h | 62 +- .../module_ri/test_code/scalapack-test.cpp | 776 +- .../module_ri/test_code/test_function.h | 155 +- source/source_lcao/module_ri/write_ri_cv.hpp | 221 +- source/source_lcao/module_rt/band_energy.cpp | 762 +- source/source_lcao/module_rt/band_energy.h | 44 +- source/source_lcao/module_rt/boundary_fix.cpp | 120 +- source/source_lcao/module_rt/boundary_fix.h | 93 +- source/source_lcao/module_rt/evolve_elec.cpp | 375 +- source/source_lcao/module_rt/evolve_elec.h | 170 +- source/source_lcao/module_rt/evolve_psi.cpp | 371 +- source/source_lcao/module_rt/evolve_psi.h | 58 +- source/source_lcao/module_rt/gather_mat.h | 66 +- .../module_rt/kernels/cublasmp_context.h | 113 +- .../kernels/cuda/band_energy_kernel.cu | 77 +- .../kernels/cuda/band_energy_kernel.cuh | 22 +- .../module_rt/kernels/cuda/norm_psi_kernel.cu | 85 +- .../kernels/cuda/norm_psi_kernel.cuh | 20 +- .../kernels/cuda/snap_psibeta_gpu.cu | 390 +- .../kernels/cuda/snap_psibeta_kernel.cu | 405 +- .../kernels/cuda/snap_psibeta_kernel.cuh | 70 +- .../module_rt/kernels/snap_psibeta_gpu.h | 4 +- .../source_lcao/module_rt/middle_hamilt.cpp | 315 +- source/source_lcao/module_rt/middle_hamilt.h | 56 +- source/source_lcao/module_rt/norm_psi.cpp | 985 +- source/source_lcao/module_rt/norm_psi.h | 44 +- source/source_lcao/module_rt/propagator.cpp | 130 +- source/source_lcao/module_rt/propagator.h | 190 +- .../source_lcao/module_rt/propagator_cn2.cpp | 873 +- .../source_lcao/module_rt/propagator_etrs.cpp | 49 +- .../module_rt/propagator_taylor.cpp | 453 +- .../module_rt/snap_psibeta_half_tddft.cpp | 438 +- .../module_rt/snap_psibeta_half_tddft.h | 28 +- .../module_rt/solve_propagation.cpp | 142 +- .../source_lcao/module_rt/solve_propagation.h | 42 +- source/source_lcao/module_rt/td_folding.cpp | 276 +- source/source_lcao/module_rt/td_folding.h | 55 +- source/source_lcao/module_rt/td_info.cpp | 298 +- source/source_lcao/module_rt/td_info.h | 39 +- .../source_lcao/module_rt/td_moving_gauge.cpp | 500 +- .../source_lcao/module_rt/td_moving_gauge.h | 24 +- .../module_rt/test/band_energy_test.cpp | 36 +- .../module_rt/test/middle_hamilt_test.cpp | 176 +- .../module_rt/test/norm_psi_test.cpp | 86 +- .../module_rt/test/propagator_test1.cpp | 106 +- .../module_rt/test/propagator_test2.cpp | 98 +- .../module_rt/test/propagator_test3.cpp | 100 +- .../source_lcao/module_rt/test/tddft_test.cpp | 28 +- .../source_lcao/module_rt/test/tddft_test.h | 42 +- .../source_lcao/module_rt/test/upsi_test1.cpp | 42 +- .../source_lcao/module_rt/test/upsi_test2.cpp | 34 +- .../source_lcao/module_rt/test/upsi_test3.cpp | 54 +- source/source_lcao/module_rt/upsi.cpp | 329 +- source/source_lcao/module_rt/upsi.h | 52 +- source/source_lcao/module_rt/velocity_op.cpp | 822 +- source/source_lcao/module_rt/velocity_op.h | 55 +- source/source_lcao/pulay_fs.h | 83 +- source/source_lcao/pulay_fs_center2.cpp | 277 +- source/source_lcao/pulay_fs_gint.hpp | 59 +- source/source_lcao/pulay_fs_temp.hpp | 194 +- source/source_lcao/record_adj.cpp | 382 +- source/source_lcao/record_adj.h | 25 +- source/source_lcao/rho_tau_lcao.cpp | 58 +- source/source_lcao/rho_tau_lcao.h | 11 +- source/source_lcao/setup_deepks.cpp | 223 +- source/source_lcao/setup_deepks.h | 94 +- source/source_lcao/setup_dm.cpp | 7 +- source/source_lcao/setup_dm.h | 25 +- source/source_lcao/setup_exx.cpp | 107 +- source/source_lcao/setup_exx.h | 45 +- source/source_lcao/spar_dh.cpp | 449 +- source/source_lcao/spar_dh.h | 54 +- source/source_lcao/spar_exx.cpp | 183 +- source/source_lcao/spar_exx.h | 15 +- source/source_lcao/spar_hsr.cpp | 356 +- source/source_lcao/spar_hsr.h | 35 +- source/source_lcao/spar_st.cpp | 262 +- source/source_lcao/spar_st.h | 31 +- source/source_lcao/spar_u.cpp | 345 +- source/source_lcao/spar_u.h | 35 +- source/source_lcao/stress_tools.cpp | 21 +- source/source_lcao/stress_tools.h | 2 +- .../test/test_init_chg_hr_error.cpp | 44 +- .../test/test_init_dm_from_file.cpp | 361 +- .../test_output_hcontainer_consistency.cpp | 295 +- source/source_lcao/test/tmp_mocks.cpp | 56 +- source/source_lcao/wavefunc_in_pw.cpp | 957 +- source/source_lcao/wavefunc_in_pw.h | 43 +- source/source_main/driver.cpp | 151 +- source/source_main/driver.h | 19 +- source/source_main/driver_run.cpp | 119 +- source/source_main/main.cpp | 19 +- source/source_md/fire.cpp | 234 +- source/source_md/fire.h | 21 +- source/source_md/langevin.cpp | 101 +- source/source_md/langevin.h | 18 +- source/source_md/md_base.cpp | 220 +- source/source_md/md_base.h | 30 +- source/source_md/md_func.cpp | 580 +- source/source_md/md_func.h | 120 +- source/source_md/msst.cpp | 325 +- source/source_md/msst.h | 24 +- source/source_md/nhchain.cpp | 1170 +- source/source_md/nhchain.h | 80 +- source/source_md/run_md.cpp | 187 +- source/source_md/run_md.h | 2 +- source/source_md/test/fire_test.cpp | 267 +- source/source_md/test/langevin_test.cpp | 224 +- source/source_md/test/lj_pot_test.cpp | 216 +- source/source_md/test/md_func_test.cpp | 552 +- source/source_md/test/msst_test.cpp | 328 +- source/source_md/test/nhchain_test.cpp | 318 +- source/source_md/test/setcell.h | 78 +- source/source_md/test/verlet_test.cpp | 449 +- source/source_md/verlet.cpp | 164 +- source/source_md/verlet.h | 20 +- source/source_psi/psi.cpp | 472 +- source/source_psi/psi.h | 82 +- source/source_psi/psi_init_atomic.cpp | 925 +- source/source_psi/psi_init_atomic.h | 27 +- source/source_psi/psi_init_atomic_random.cpp | 36 +- source/source_psi/psi_init_atomic_random.h | 20 +- source/source_psi/psi_init_file.cpp | 74 +- source/source_psi/psi_init_file.h | 23 +- source/source_psi/psi_init_nao.cpp | 560 +- source/source_psi/psi_init_nao.h | 65 +- source/source_psi/psi_init_nao_random.cpp | 36 +- source/source_psi/psi_init_nao_random.h | 20 +- source/source_psi/psi_init_random.cpp | 28 +- source/source_psi/psi_init_random.h | 23 +- source/source_psi/psi_initializer.cpp | 266 +- source/source_psi/psi_initializer.h | 70 +- source/source_psi/psi_prepare.cpp | 429 +- source/source_psi/psi_prepare.h | 38 +- source/source_psi/psi_prepare_base.h | 6 +- source/source_psi/setup_psi.cpp | 69 +- source/source_psi/setup_psi.h | 18 +- source/source_psi/setup_psi_pw.cpp | 503 +- source/source_psi/setup_psi_pw.h | 143 +- .../test/psi_initializer_unit_test.cpp | 814 +- source/source_psi/test/psi_test.cpp | 457 +- .../source_pw/module_ofdft/evolve_ofdft.cpp | 463 +- source/source_pw/module_ofdft/evolve_ofdft.h | 69 +- source/source_pw/module_ofdft/kedf_extwt.cpp | 556 +- source/source_pw/module_ofdft/kedf_extwt.h | 62 +- source/source_pw/module_ofdft/kedf_lkt.cpp | 328 +- source/source_pw/module_ofdft/kedf_lkt.h | 31 +- .../source_pw/module_ofdft/kedf_manager.cpp | 514 +- source/source_pw/module_ofdft/kedf_manager.h | 75 +- source/source_pw/module_ofdft/kedf_ml.cpp | 345 +- source/source_pw/module_ofdft/kedf_ml.h | 117 +- .../source_pw/module_ofdft/kedf_ml_label.cpp | 476 +- source/source_pw/module_ofdft/kedf_tf.cpp | 107 +- source/source_pw/module_ofdft/kedf_tf.h | 27 +- source/source_pw/module_ofdft/kedf_vw.cpp | 333 +- source/source_pw/module_ofdft/kedf_vw.h | 23 +- source/source_pw/module_ofdft/kedf_wt.cpp | 609 +- source/source_pw/module_ofdft/kedf_wt.h | 54 +- source/source_pw/module_ofdft/kedf_xwm.cpp | 428 +- source/source_pw/module_ofdft/kedf_xwm.h | 61 +- source/source_pw/module_ofdft/ml_base.cpp | 354 +- source/source_pw/module_ofdft/ml_base.h | 130 +- source/source_pw/module_ofdft/ml_base_pot.cpp | 731 +- source/source_pw/module_ofdft/nn_of.cpp | 31 +- source/source_pw/module_ofdft/nn_of.h | 42 +- .../source_pw/module_ofdft/of_print_info.cpp | 203 +- source/source_pw/module_ofdft/of_print_info.h | 19 +- .../source_pw/module_ofdft/of_stress_pw.cpp | 128 +- source/source_pw/module_ofdft/of_stress_pw.h | 22 +- .../source_pw/module_pwdft/deltaspin_pw.cpp | 69 +- source/source_pw/module_pwdft/deltaspin_pw.h | 12 +- source/source_pw/module_pwdft/dftu_pw.cpp | 25 +- source/source_pw/module_pwdft/dftu_pw.h | 14 +- source/source_pw/module_pwdft/elecond.cpp | 347 +- source/source_pw/module_pwdft/elecond.h | 70 +- source/source_pw/module_pwdft/exx_helper.cpp | 224 +- source/source_pw/module_pwdft/exx_helper.h | 75 +- .../source_pw/module_pwdft/exx_helper_base.h | 38 +- source/source_pw/module_pwdft/forces.cpp | 964 +- source/source_pw/module_pwdft/forces.h | 141 +- source/source_pw/module_pwdft/forces_cc.cpp | 556 +- source/source_pw/module_pwdft/forces_nl.cpp | 79 +- .../source_pw/module_pwdft/forces_onsite.cpp | 112 +- source/source_pw/module_pwdft/forces_scc.cpp | 336 +- source/source_pw/module_pwdft/forces_us.cpp | 179 +- .../source_pw/module_pwdft/fs_kin_tools.cpp | 186 +- source/source_pw/module_pwdft/fs_kin_tools.h | 19 +- .../module_pwdft/fs_nonlocal_tools.cpp | 1115 +- .../module_pwdft/fs_nonlocal_tools.h | 96 +- source/source_pw/module_pwdft/hamilt_lcaopw.h | 39 +- source/source_pw/module_pwdft/hamilt_pw.cpp | 655 +- source/source_pw/module_pwdft/hamilt_pw.h | 39 +- .../kernels/cal_density_real_op.cpp | 16 +- .../kernels/cal_density_real_op.h | 6 +- .../kernels/cuda/cal_density_real_op.cu | 41 +- .../module_pwdft/kernels/cuda/ekinetic_op.cu | 85 +- .../kernels/cuda/exx_cal_energy_op.cu | 57 +- .../module_pwdft/kernels/cuda/force_op.cu | 1238 +- .../module_pwdft/kernels/cuda/meta_op.cu | 72 +- .../kernels/cuda/mul_potential_op.cu | 51 +- .../module_pwdft/kernels/cuda/nonlocal_op.cu | 188 +- .../module_pwdft/kernels/cuda/onsite_op.cu | 142 +- .../module_pwdft/kernels/cuda/stress_op.cu | 1575 +- .../kernels/cuda/vec_mul_vec_complex.cu | 44 +- .../module_pwdft/kernels/cuda/veff_op.cu | 66 +- .../module_pwdft/kernels/cuda/vnl_op.cu | 197 +- .../kernels/cuda/vnl_tools_cu.hpp | 38 +- .../module_pwdft/kernels/cuda/wf_op.cu | 163 +- .../module_pwdft/kernels/ekinetic_op.cpp | 53 +- .../module_pwdft/kernels/ekinetic_op.h | 75 +- .../kernels/exx_cal_energy_op.cpp | 17 +- .../module_pwdft/kernels/exx_cal_energy_op.h | 7 +- .../module_pwdft/kernels/force_op.cpp | 657 +- .../source_pw/module_pwdft/kernels/force_op.h | 472 +- .../module_pwdft/kernels/meta_op.cpp | 29 +- .../source_pw/module_pwdft/kernels/meta_op.h | 101 +- .../module_pwdft/kernels/mul_potential_op.cpp | 23 +- .../module_pwdft/kernels/mul_potential_op.h | 31 +- .../module_pwdft/kernels/nonlocal_op.cpp | 118 +- .../module_pwdft/kernels/nonlocal_op.h | 196 +- .../module_pwdft/kernels/onsite_op.cpp | 107 +- .../module_pwdft/kernels/onsite_op.h | 95 +- .../kernels/rocm/ekinetic_op.hip.cu | 90 +- .../module_pwdft/kernels/rocm/force_op.hip.cu | 922 +- .../module_pwdft/kernels/rocm/meta_op.hip.cu | 77 +- .../kernels/rocm/nonlocal_op.hip.cu | 196 +- .../kernels/rocm/onsite_op.hip.cu | 154 +- .../kernels/rocm/stress_op.hip.cu | 1663 +- .../module_pwdft/kernels/rocm/veff_op.hip.cu | 76 +- .../module_pwdft/kernels/rocm/vnl_op.hip.cu | 202 +- .../module_pwdft/kernels/rocm/wf_op.hip.cu | 168 +- .../module_pwdft/kernels/stress_op.cpp | 755 +- .../module_pwdft/kernels/stress_op.h | 638 +- .../kernels/test/ekinetic_op_test.cpp | 516 +- .../kernels/test/force_op_test.cpp | 254 +- .../kernels/test/meta_op_test.cpp | 108 +- .../kernels/test/nonlocal_op_test.cpp | 1037 +- .../kernels/test/stress_op_mgga_test.cpp | 59 +- .../kernels/test/stress_op_test.cpp | 4138 +- .../kernels/test/veff_op_test.cpp | 2413 +- .../module_pwdft/kernels/test/vnl_op_test.cpp | 192 +- .../module_pwdft/kernels/test/wf_op_test.cpp | 882 +- .../kernels/vec_mul_vec_complex_op.cpp | 21 +- .../kernels/vec_mul_vec_complex_op.h | 7 +- .../module_pwdft/kernels/veff_op.cpp | 39 +- .../source_pw/module_pwdft/kernels/veff_op.h | 37 +- .../source_pw/module_pwdft/kernels/vnl_op.cpp | 86 +- .../source_pw/module_pwdft/kernels/vnl_op.h | 88 +- .../module_pwdft/kernels/vnl_tools.hpp | 38 +- .../source_pw/module_pwdft/kernels/wf_op.cpp | 87 +- source/source_pw/module_pwdft/kernels/wf_op.h | 110 +- .../source_pw/module_pwdft/nonlocal_maths.hpp | 499 +- source/source_pw/module_pwdft/onsite_proj.cpp | 739 +- source/source_pw/module_pwdft/onsite_proj.h | 315 +- .../module_pwdft/onsite_proj_print.cpp | 156 +- .../module_pwdft/onsite_proj_print.h | 32 +- .../module_pwdft/onsite_proj_tools.cpp | 1379 +- .../module_pwdft/onsite_proj_tools.h | 142 +- source/source_pw/module_pwdft/op_pw.cpp | 9 +- source/source_pw/module_pwdft/op_pw.h | 13 +- source/source_pw/module_pwdft/op_pw_ekin.cpp | 113 +- source/source_pw/module_pwdft/op_pw_ekin.h | 70 +- source/source_pw/module_pwdft/op_pw_exx.cpp | 962 +- source/source_pw/module_pwdft/op_pw_exx.h | 191 +- .../source_pw/module_pwdft/op_pw_exx_ace.cpp | 483 +- .../source_pw/module_pwdft/op_pw_exx_pot.cpp | 795 +- source/source_pw/module_pwdft/op_pw_meta.cpp | 152 +- source/source_pw/module_pwdft/op_pw_meta.h | 100 +- source/source_pw/module_pwdft/op_pw_nl.cpp | 495 +- source/source_pw/module_pwdft/op_pw_nl.h | 87 +- source/source_pw/module_pwdft/op_pw_proj.cpp | 542 +- source/source_pw/module_pwdft/op_pw_proj.h | 76 +- source/source_pw/module_pwdft/op_pw_veff.cpp | 217 +- source/source_pw/module_pwdft/op_pw_veff.h | 73 +- source/source_pw/module_pwdft/op_pw_vel.cpp | 450 +- source/source_pw/module_pwdft/op_pw_vel.h | 72 +- .../source_pw/module_pwdft/parallel_grid.cpp | 582 +- source/source_pw/module_pwdft/parallel_grid.h | 90 +- source/source_pw/module_pwdft/radial_proj.cpp | 352 +- source/source_pw/module_pwdft/radial_proj.h | 425 +- source/source_pw/module_pwdft/setup_pot.cpp | 269 +- source/source_pw/module_pwdft/setup_pot.h | 34 +- source/source_pw/module_pwdft/setup_pwrho.cpp | 179 +- source/source_pw/module_pwdft/setup_pwrho.h | 38 +- source/source_pw/module_pwdft/setup_pwwfc.cpp | 80 +- source/source_pw/module_pwdft/setup_pwwfc.h | 22 +- source/source_pw/module_pwdft/soc.cpp | 288 +- source/source_pw/module_pwdft/soc.h | 55 +- source/source_pw/module_pwdft/stress_cc.cpp | 588 +- source/source_pw/module_pwdft/stress_ewa.cpp | 391 +- source/source_pw/module_pwdft/stress_exx.cpp | 197 +- source/source_pw/module_pwdft/stress_func.h | 222 +- source/source_pw/module_pwdft/stress_gga.cpp | 93 +- source/source_pw/module_pwdft/stress_har.cpp | 235 +- source/source_pw/module_pwdft/stress_kin.cpp | 47 +- source/source_pw/module_pwdft/stress_loc.cpp | 503 +- source/source_pw/module_pwdft/stress_mgga.cpp | 154 +- source/source_pw/module_pwdft/stress_nl.cpp | 460 +- .../source_pw/module_pwdft/stress_onsite.cpp | 245 +- source/source_pw/module_pwdft/stress_pw.cpp | 197 +- source/source_pw/module_pwdft/stress_pw.h | 50 +- source/source_pw/module_pwdft/stress_us.cpp | 427 +- .../module_pwdft/structure_factor.cpp | 518 +- .../source_pw/module_pwdft/structure_factor.h | 82 +- .../module_pwdft/structure_factor_k.cpp | 232 +- .../module_pwdft/test/radial_proj_test.cpp | 325 +- .../source_pw/module_pwdft/test/soc_test.cpp | 198 +- .../test/structure_factor_test.cpp | 135 +- .../source_pw/module_pwdft/update_cell_pw.cpp | 23 +- .../source_pw/module_pwdft/update_cell_pw.h | 10 +- source/source_pw/module_pwdft/vl_pw.cpp | 417 +- source/source_pw/module_pwdft/vl_pw.h | 61 +- source/source_pw/module_pwdft/vnl_pw.cpp | 2290 +- source/source_pw/module_pwdft/vnl_pw.h | 90 +- source/source_pw/module_pwdft/vnl_pw_grad.cpp | 366 +- source/source_pw/module_pwdft/vsep_pw.cpp | 194 +- source/source_pw/module_pwdft/vsep_pw.h | 10 +- .../module_stodft/hamilt_sdft_pw.cpp | 55 +- .../source_pw/module_stodft/hamilt_sdft_pw.h | 22 +- .../kernels/cuda/hpsi_norm_op.cu | 34 +- .../module_stodft/kernels/hpsi_norm_op.cpp | 13 +- .../module_stodft/kernels/hpsi_norm_op.h | 32 +- .../kernels/rocm/hpsi_norm_op.hip.cu | 41 +- source/source_pw/module_stodft/sto_che.cpp | 22 +- source/source_pw/module_stodft/sto_che.h | 23 +- source/source_pw/module_stodft/sto_dos.cpp | 391 +- source/source_pw/module_stodft/sto_dos.h | 34 +- .../source_pw/module_stodft/sto_elecond.cpp | 1767 +- source/source_pw/module_stodft/sto_elecond.h | 92 +- source/source_pw/module_stodft/sto_forces.cpp | 339 +- source/source_pw/module_stodft/sto_forces.h | 46 +- source/source_pw/module_stodft/sto_func.cpp | 280 +- source/source_pw/module_stodft/sto_func.h | 36 +- source/source_pw/module_stodft/sto_iter.cpp | 1070 +- source/source_pw/module_stodft/sto_iter.h | 58 +- .../source_pw/module_stodft/sto_stress_pw.cpp | 249 +- .../source_pw/module_stodft/sto_stress_pw.h | 56 +- source/source_pw/module_stodft/sto_tool.cpp | 165 +- source/source_pw/module_stodft/sto_tool.h | 74 +- source/source_pw/module_stodft/sto_wf.cpp | 421 +- source/source_pw/module_stodft/sto_wf.h | 30 +- .../module_stodft/test/test_hamilt_sto.cpp | 132 +- .../module_stodft/test/test_sto_tool.cpp | 105 +- source/source_relax/bfgs.cpp | 494 +- source/source_relax/bfgs.h | 64 +- source/source_relax/bfgs_basic.cpp | 407 +- source/source_relax/bfgs_basic.h | 28 +- source/source_relax/ions_move_basic.cpp | 303 +- source/source_relax/ions_move_basic.h | 28 +- source/source_relax/ions_move_bfgs.cpp | 460 +- source/source_relax/ions_move_bfgs.h | 15 +- source/source_relax/ions_move_cg.cpp | 598 +- source/source_relax/ions_move_cg.h | 54 +- source/source_relax/ions_move_methods.cpp | 131 +- source/source_relax/ions_move_methods.h | 31 +- source/source_relax/ions_move_sd.cpp | 148 +- source/source_relax/ions_move_sd.h | 10 +- source/source_relax/lattice_change_basic.cpp | 488 +- source/source_relax/lattice_change_basic.h | 10 +- source/source_relax/lattice_change_cg.cpp | 598 +- source/source_relax/lattice_change_cg.h | 60 +- .../source_relax/lattice_change_methods.cpp | 28 +- source/source_relax/lattice_change_methods.h | 25 +- source/source_relax/lbfgs.cpp | 436 +- source/source_relax/lbfgs.h | 105 +- source/source_relax/line_search.cpp | 578 +- source/source_relax/line_search.h | 26 +- source/source_relax/matrix_methods.cpp | 219 +- source/source_relax/matrix_methods.h | 27 +- source/source_relax/relax_driver.cpp | 329 +- source/source_relax/relax_driver.h | 10 +- source/source_relax/relax_nsync.cpp | 173 +- source/source_relax/relax_nsync.h | 40 +- source/source_relax/relax_sync.cpp | 826 +- source/source_relax/relax_sync.h | 26 +- source/source_relax/test/bfgs_basic_test.cpp | 319 +- source/source_relax/test/bfgs_test.cpp | 237 +- source/source_relax/test/for_test.h | 102 +- .../test/ions_move_basic_test.cpp | 270 +- .../source_relax/test/ions_move_bfgs_test.cpp | 410 +- .../source_relax/test/ions_move_cg_test.cpp | 530 +- .../test/ions_move_methods_test.cpp | 184 +- .../source_relax/test/ions_move_sd_test.cpp | 186 +- .../test/lattice_change_basic_test.cpp | 758 +- .../test/lattice_change_cg_test.cpp | 426 +- .../test/lattice_change_methods_test.cpp | 54 +- source/source_relax/test/line_search_test.cpp | 103 +- source/source_relax/test/mock_remake_cell.cpp | 66 +- source/source_relax/test/mock_remake_cell.h | 20 +- source/source_relax/test/relax_test.cpp | 670 +- source/source_relax/test/relax_test.h | 40 +- tests/09_DeePKS/check_file.cpp | 136 +- tests/integrate/check_file.cpp | 136 +- .../source/src_parallel/parallel_common.cpp | 148 +- .../source/src_parallel/parallel_common.h | 28 +- .../source/src_parallel/parallel_global.cpp | 80 +- .../source/src_parallel/parallel_global.h | 9 +- .../source/src_parallel/parallel_kpoints.cpp | 501 +- .../source/src_parallel/parallel_kpoints.h | 42 +- .../source/src_parallel/parallel_reduce.cpp | 241 +- .../source/src_parallel/parallel_reduce.h | 50 +- .../source/src_pw/bessel_basis.cpp | 279 +- .../source/src_pw/bessel_basis.h | 45 +- .../source/src_pw/heapsort.cpp | 216 +- .../source/src_pw/heapsort.h | 8 +- .../source/src_pw/memory_calculation.cpp | 102 +- .../source/src_pw/memory_calculation.h | 9 +- .../source/src_pw/numerical_basis.cpp | 18 +- .../source/src_pw/numerical_basis.h | 16 +- .../source/src_pw/pw_basis.cpp | 1198 +- .../source/src_pw/pw_basis.h | 219 +- .../source/src_pw/pw_complement.cpp | 307 +- .../source/src_pw/pw_complement.h | 32 +- .../source/src_pw/ylm_real.cpp | 425 +- .../source/src_pw/ylm_real.h | 14 +- .../source/src_spillage/Calculate_C4.cpp | 1033 +- .../source/src_spillage/Calculate_C4.h | 62 +- .../source/src_spillage/Coefficients.cpp | 477 +- .../source/src_spillage/Coefficients.h | 94 +- .../source/src_spillage/Inverse_Matrix_S.cpp | 80 +- .../source/src_spillage/Inverse_Matrix_S.h | 69 +- .../src_spillage/Inverse_Matrix_S_inline.h | 41 +- .../source/src_spillage/Metropolis.cpp | 254 +- .../source/src_spillage/Metropolis.h | 220 +- .../source/src_spillage/Metropolis_2.cpp | 886 +- .../source/src_spillage/Metropolis_Info.h | 56 +- .../source/src_spillage/MultiZeta.cpp | 666 +- .../source/src_spillage/MultiZeta.h | 42 +- .../source/src_spillage/Orthogonal.cpp | 271 +- .../source/src_spillage/Orthogonal.h | 11 +- .../source/src_spillage/Out_Orbital.cpp | 413 +- .../source/src_spillage/Out_Orbital.h | 23 +- .../source/src_spillage/Plot_Psi.cpp | 1211 +- .../source/src_spillage/Plot_Psi.h | 98 +- .../source/src_spillage/Psi_Second.cpp | 637 +- .../source/src_spillage/Psi_Second.h | 105 +- .../source/src_spillage/ReadData.cpp | 992 +- .../source/src_spillage/ReadData.h | 58 +- .../Simulated_Annealing_Orbital.h | 317 +- .../source/src_spillage/SpillageStep.cpp | 1753 +- .../source/src_spillage/SpillageStep.h | 81 +- .../source/src_spillage/SpillageValue.cpp | 224 +- .../source/src_spillage/SpillageValue.h | 86 +- .../source/src_spillage/Step_Data.cpp | 192 +- .../source/src_spillage/Step_Data.h | 64 +- .../source/src_spillage/Type_Information.cpp | 145 +- .../source/src_spillage/Type_Information.h | 42 +- .../source/src_spillage/common.cpp | 166 +- .../source/src_spillage/common.h | 77 +- .../source/src_spillage/main.cpp | 123 +- .../source/src_spillage/read_INPUT.cpp | 924 +- .../source/src_spillage/read_INPUT.h | 51 +- .../source/src_spillage/tools.cpp | 82 +- .../source/src_spillage/tools.h | 38 +- .../source/src_tools/Gram_Schmidt_Orth.h | 327 +- .../src_tools/Gram_Schmidt_Orth_bak.cpp | 137 +- .../source/src_tools/Gram_Schmidt_Orth_bak.h | 152 +- .../source/src_tools/Random.h | 82 +- .../source/src_tools/Simulated_Annealing.cpp | 187 +- .../source/src_tools/Simulated_Annealing.h | 167 +- .../source/src_tools/complexarray.cpp | 175 +- .../source/src_tools/complexarray.h | 121 +- .../source/src_tools/complexmatrix.h | 208 +- .../source/src_tools/complexmatrix_inline.h | 743 +- .../source/src_tools/intarray.cpp | 346 +- .../source/src_tools/intarray.h | 123 +- .../source/src_tools/inverse_matrix.cpp | 64 +- .../source/src_tools/inverse_matrix.h | 18 +- .../src_tools/inverse_matrix_iterate.cpp | 4 - .../source/src_tools/inverse_matrix_iterate.h | 123 +- .../src_tools/inverse_matrix_iterate_bak.cpp | 150 +- .../src_tools/inverse_matrix_iterate_inline.h | 155 +- .../src_tools/inverse_matrix_perturbe.cpp | 54 +- .../src_tools/inverse_matrix_perturbe.h | 82 +- .../source/src_tools/lapack_connector.h | 863 +- .../source/src_tools/mathzone.cpp | 1083 +- .../source/src_tools/mathzone.h | 81 +- .../source/src_tools/matrix.cpp | 283 +- .../source/src_tools/matrix.h | 102 +- .../source/src_tools/matrix3.cpp | 346 +- .../source/src_tools/matrix3.h | 73 +- .../source/src_tools/realarray.cpp | 207 +- .../source/src_tools/realarray.h | 133 +- .../source/src_tools/timer.cpp | 393 +- .../source/src_tools/timer.h | 99 +- .../source/src_tools/vector3.h | 249 +- .../src_unittest/src_tools/common_test.h | 75 +- .../inverse_matrix_iterate_unittest.h | 48 +- .../inverse_matrix_perturbe_unittest.h | 137 +- .../src_tools/inverse_matrix_unittest.h | 28 +- .../src_unittest/src_tools/matrix_multiply.h | 82 +- 2028 files changed, 339134 insertions(+), 299738 deletions(-) diff --git a/python/pyabacus/src/ModuleBase/py_base_math.cpp b/python/pyabacus/src/ModuleBase/py_base_math.cpp index fd58284e78a..01cb6a4e577 100644 --- a/python/pyabacus/src/ModuleBase/py_base_math.cpp +++ b/python/pyabacus/src/ModuleBase/py_base_math.cpp @@ -14,116 +14,122 @@ using namespace pyabacus::utils; template using overload_cast_ = pybind11::detail::overload_cast_impl; -void bind_base_math(py::module& m) +void + bind_base_math (py::module& m) { // python binding for class Sphbes - py::class_(m, "Sphbes") - .def(py::init<>()) - .def_static("sphbesj", overload_cast_()(&ModuleBase::Sphbes::sphbesj), "l"_a, "x"_a) - .def_static("dsphbesj", overload_cast_()(&ModuleBase::Sphbes::dsphbesj), "l"_a, "x"_a) - .def_static("sphbesj", - [](const int n, py::array_t r, const double q, const int l, py::array_t jl) { - check_1d_array(r, "r"); - check_1d_array(jl, "jl"); - ModuleBase::Sphbes::sphbesj(n, - get_array_ptr(r), - q, - l, - get_array_ptr(jl)); - }) - .def_static("dsphbesj", - [](const int n, py::array_t r, const double q, const int l, py::array_t djl) { - check_1d_array(r, "r"); - check_1d_array(djl, "djl"); - ModuleBase::Sphbes::dsphbesj(n, - get_array_ptr(r), - q, - l, - get_array_ptr(djl)); - }) - .def_static("sphbes_zeros", [](const int l, const int n, py::array_t zeros) { - check_1d_array(zeros, "zeros"); - ModuleBase::Sphbes::sphbes_zeros(l, n, get_array_ptr(zeros)); - }); + py::class_ (m, "Sphbes") + .def (py::init<> ()) + .def_static ("sphbesj", overload_cast_ () (&ModuleBase::Sphbes::sphbesj), "l"_a, "x"_a) + .def_static ("dsphbesj", + overload_cast_ () (&ModuleBase::Sphbes::dsphbesj), + "l"_a, + "x"_a) + .def_static ("sphbesj", + [] (const int n, py::array_t r, const double q, const int l, py::array_t jl) + { + check_1d_array (r, "r"); + check_1d_array (jl, "jl"); + ModuleBase::Sphbes::sphbesj (n, get_array_ptr (r), q, l, get_array_ptr (jl)); + }) + .def_static ("dsphbesj", + [] (const int n, py::array_t r, const double q, const int l, py::array_t djl) + { + check_1d_array (r, "r"); + check_1d_array (djl, "djl"); + ModuleBase::Sphbes::dsphbesj (n, get_array_ptr (r), q, l, get_array_ptr (djl)); + }) + .def_static ("sphbes_zeros", + [] (const int l, const int n, py::array_t zeros) + { + check_1d_array (zeros, "zeros"); + ModuleBase::Sphbes::sphbes_zeros (l, n, get_array_ptr (zeros)); + }); // python binding for class Integral - py::class_(m, "Integral") - .def(py::init<>()) - .def_static("Simpson_Integral", [](const int mesh, py::array_t func, py::array_t rab, double asum) { - check_1d_array(func, "func"); - check_1d_array(rab, "rab"); + py::class_ (m, "Integral") + .def (py::init<> ()) + .def_static ( + "Simpson_Integral", + [] (const int mesh, py::array_t func, py::array_t rab, double asum) + { + check_1d_array (func, "func"); + check_1d_array (rab, "rab"); - double isum = asum; - ModuleBase::Integral::Simpson_Integral(mesh, - get_array_ptr(func), - get_array_ptr(rab), - isum); - return isum; - }) - .def_static("Simpson_Integral", [](const int mesh, py::array_t func, const double dr, double asum){ - check_1d_array(func, "func"); + double isum = asum; + ModuleBase::Integral::Simpson_Integral (mesh, get_array_ptr (func), get_array_ptr (rab), isum); + return isum; + }) + .def_static ("Simpson_Integral", + [] (const int mesh, py::array_t func, const double dr, double asum) + { + check_1d_array (func, "func"); - double isum = asum; - ModuleBase::Integral::Simpson_Integral(mesh, - get_array_ptr(func), - dr, - isum); - return isum; - }) - .def_static("Simpson_Integral_0toall", [](const int mesh, py::array_t func, py::array_t rab, py::array_t asum){ - check_1d_array(func, "func"); - check_1d_array(rab, "rab"); - check_1d_array(asum, "asum"); - ModuleBase::Integral::Simpson_Integral_0toall(mesh, - get_array_ptr(func), - get_array_ptr(rab), - get_array_ptr(asum)); - }) - .def_static("Simpson_Integral_alltoinf", [](const int mesh, py::array_t func, py::array_t rab, py::array_t asum){ - check_1d_array(func, "func"); - check_1d_array(rab, "rab"); - check_1d_array(asum, "asum"); - ModuleBase::Integral::Simpson_Integral_alltoinf(mesh, - get_array_ptr(func), - get_array_ptr(rab), - get_array_ptr(asum)); - }) - .def_static("simpson", [](const int n, py::array_t f, const double dx){ - check_1d_array(f, "f"); - return ModuleBase::Integral::simpson(n, - get_array_ptr(f), - dx); - }) - .def_static("simpson", [](const int n, py::array_t f, py::array_t h){ - check_1d_array(f, "f"); - check_1d_array(h, "h"); - return ModuleBase::Integral::simpson(n, - get_array_ptr(f), - get_array_ptr(h)); - }) - .def_static("Gauss_Legendre_grid_and_weight", [](const int n, py::array_t x, py::array_t w){ - check_1d_array(x, "x"); - check_1d_array(w, "w"); - ModuleBase::Integral::Gauss_Legendre_grid_and_weight(n, - get_array_ptr(x), - get_array_ptr(w)); - }) - .def_static("Gauss_Legendre_grid_and_weight", [](const double xmin, const double xmax, const int n, py::array_t x, py::array_t w){ - check_1d_array(x, "x"); - check_1d_array(w, "w"); - ModuleBase::Integral::Gauss_Legendre_grid_and_weight(xmin, - xmax, - n, - get_array_ptr(x), - get_array_ptr(w)); - }); - py::class_(m, "SphericalBesselTransformer") - .def(py::init<>()); + double isum = asum; + ModuleBase::Integral::Simpson_Integral (mesh, get_array_ptr (func), dr, isum); + return isum; + }) + .def_static ("Simpson_Integral_0toall", + [] (const int mesh, py::array_t func, py::array_t rab, py::array_t asum) + { + check_1d_array (func, "func"); + check_1d_array (rab, "rab"); + check_1d_array (asum, "asum"); + ModuleBase::Integral::Simpson_Integral_0toall (mesh, + get_array_ptr (func), + get_array_ptr (rab), + get_array_ptr (asum)); + }) + .def_static ("Simpson_Integral_alltoinf", + [] (const int mesh, py::array_t func, py::array_t rab, py::array_t asum) + { + check_1d_array (func, "func"); + check_1d_array (rab, "rab"); + check_1d_array (asum, "asum"); + ModuleBase::Integral::Simpson_Integral_alltoinf (mesh, + get_array_ptr (func), + get_array_ptr (rab), + get_array_ptr (asum)); + }) + .def_static ("simpson", + [] (const int n, py::array_t f, const double dx) + { + check_1d_array (f, "f"); + return ModuleBase::Integral::simpson (n, get_array_ptr (f), dx); + }) + .def_static ("simpson", + [] (const int n, py::array_t f, py::array_t h) + { + check_1d_array (f, "f"); + check_1d_array (h, "h"); + return ModuleBase::Integral::simpson (n, get_array_ptr (f), get_array_ptr (h)); + }) + .def_static ( + "Gauss_Legendre_grid_and_weight", + [] (const int n, py::array_t x, py::array_t w) + { + check_1d_array (x, "x"); + check_1d_array (w, "w"); + ModuleBase::Integral::Gauss_Legendre_grid_and_weight (n, get_array_ptr (x), get_array_ptr (w)); + }) + .def_static ( + "Gauss_Legendre_grid_and_weight", + [] (const double xmin, const double xmax, const int n, py::array_t x, py::array_t w) + { + check_1d_array (x, "x"); + check_1d_array (w, "w"); + ModuleBase::Integral::Gauss_Legendre_grid_and_weight (xmin, + xmax, + n, + get_array_ptr (x), + get_array_ptr (w)); + }); + py::class_ (m, "SphericalBesselTransformer").def (py::init<> ()); } -PYBIND11_MODULE(_base_pack, m) +PYBIND11_MODULE (_base_pack, m) { - m.doc() = "Submodule for pyabacus: ModuleBase"; + m.doc () = "Submodule for pyabacus: ModuleBase"; - bind_base_math(m); + bind_base_math (m); } diff --git a/python/pyabacus/src/ModuleDriver/py_driver.cpp b/python/pyabacus/src/ModuleDriver/py_driver.cpp index 7f5279e8e78..d52ec758b25 100644 --- a/python/pyabacus/src/ModuleDriver/py_driver.cpp +++ b/python/pyabacus/src/ModuleDriver/py_driver.cpp @@ -48,22 +48,22 @@ namespace py_driver */ class GlobalStateGuard { -public: - GlobalStateGuard() + public: + GlobalStateGuard () { // Save current state saved_my_rank_ = GlobalV::MY_RANK; saved_nproc_ = GlobalV::NPROC; } - ~GlobalStateGuard() + ~GlobalStateGuard () { // Restore state GlobalV::MY_RANK = saved_my_rank_; GlobalV::NPROC = saved_nproc_; } -private: + private: int saved_my_rank_ = 0; int saved_nproc_ = 1; }; @@ -73,18 +73,19 @@ class GlobalStateGuard */ class PyDriver::Impl { -public: - Impl() = default; - ~Impl() { cleanup(); } + public: + Impl () = default; + ~Impl () { cleanup (); } - void cleanup() + void + cleanup () { if (p_esolver_) - { - delete p_esolver_; - p_esolver_ = nullptr; - } - ucell_.reset(); + { + delete p_esolver_; + p_esolver_ = nullptr; + } + ucell_.reset (); } // ESolver instance @@ -108,183 +109,182 @@ class PyDriver::Impl std::streambuf* orig_warning_buf_ = nullptr; }; -PyDriver::PyDriver() : impl_(std::make_unique()) -{ -} +PyDriver::PyDriver () : impl_ (std::make_unique ()) {} -PyDriver::~PyDriver() -{ - cleanup_context(); -} +PyDriver::~PyDriver () { cleanup_context (); } -void PyDriver::initialize_context() +void + PyDriver::initialize_context () { // Set up for serial mode (no MPI in Python context) - PARAM.set_pal_param(0, 1, 1); // rank=0, nproc=1, nthread=1 + PARAM.set_pal_param (0, 1, 1); // rank=0, nproc=1, nthread=1 GlobalV::MY_RANK = 0; GlobalV::NPROC = 1; initialized_ = true; } -void PyDriver::cleanup_context() +void + PyDriver::cleanup_context () { if (impl_) - { - impl_->cleanup(); - - // Restore original stream buffers - // Note: We use static_cast to std::ostream& because std::ofstream::rdbuf() - // doesn't accept arguments, but std::ostream::rdbuf(streambuf*) does - if (impl_->orig_running_buf_) - { - static_cast(GlobalV::ofs_running).rdbuf(impl_->orig_running_buf_); - impl_->orig_running_buf_ = nullptr; - } - if (impl_->orig_warning_buf_) { - static_cast(GlobalV::ofs_warning).rdbuf(impl_->orig_warning_buf_); - impl_->orig_warning_buf_ = nullptr; - } + impl_->cleanup (); - // Close output streams - if (impl_->ofs_running_.is_open()) - { - impl_->ofs_running_.close(); - } - if (impl_->ofs_warning_.is_open()) - { - impl_->ofs_warning_.close(); - } - if (impl_->null_stream_.is_open()) - { - impl_->null_stream_.close(); - } + // Restore original stream buffers + // Note: We use static_cast to std::ostream& because std::ofstream::rdbuf() + // doesn't accept arguments, but std::ostream::rdbuf(streambuf*) does + if (impl_->orig_running_buf_) + { + static_cast (GlobalV::ofs_running).rdbuf (impl_->orig_running_buf_); + impl_->orig_running_buf_ = nullptr; + } + if (impl_->orig_warning_buf_) + { + static_cast (GlobalV::ofs_warning).rdbuf (impl_->orig_warning_buf_); + impl_->orig_warning_buf_ = nullptr; + } - // Restore working directory if changed - if (!impl_->original_cwd_.empty()) - { - try - { - fs::current_path(impl_->original_cwd_); - } - catch (...) - { - // Ignore errors - } - impl_->original_cwd_.clear(); + // Close output streams + if (impl_->ofs_running_.is_open ()) + { + impl_->ofs_running_.close (); + } + if (impl_->ofs_warning_.is_open ()) + { + impl_->ofs_warning_.close (); + } + if (impl_->null_stream_.is_open ()) + { + impl_->null_stream_.close (); + } + + // Restore working directory if changed + if (!impl_->original_cwd_.empty ()) + { + try + { + fs::current_path (impl_->original_cwd_); + } + catch (...) + { + // Ignore errors + } + impl_->original_cwd_.clear (); + } } - } initialized_ = false; } -void PyDriver::setup_output(const std::string& output_dir, int verbosity) +void + PyDriver::setup_output (const std::string& output_dir, int verbosity) { - std::string out_dir = output_dir.empty() ? "OUT.PYABACUS" : output_dir; + std::string out_dir = output_dir.empty () ? "OUT.PYABACUS" : output_dir; // Create output directory - fs::create_directories(out_dir); + fs::create_directories (out_dir); // Save original stream buffers - impl_->orig_running_buf_ = GlobalV::ofs_running.rdbuf(); - impl_->orig_warning_buf_ = GlobalV::ofs_warning.rdbuf(); + impl_->orig_running_buf_ = GlobalV::ofs_running.rdbuf (); + impl_->orig_warning_buf_ = GlobalV::ofs_warning.rdbuf (); // Open log files based on verbosity if (verbosity >= 1) - { - std::string running_log = out_dir + "/running.log"; - impl_->ofs_running_.open(running_log); - if (impl_->ofs_running_.is_open()) { - static_cast(GlobalV::ofs_running).rdbuf(impl_->ofs_running_.rdbuf()); + std::string running_log = out_dir + "/running.log"; + impl_->ofs_running_.open (running_log); + if (impl_->ofs_running_.is_open ()) + { + static_cast (GlobalV::ofs_running).rdbuf (impl_->ofs_running_.rdbuf ()); + } } - } else - { - // Silent mode - redirect to null - impl_->null_stream_.open("/dev/null"); - if (impl_->null_stream_.is_open()) { - static_cast(GlobalV::ofs_running).rdbuf(impl_->null_stream_.rdbuf()); + // Silent mode - redirect to null + impl_->null_stream_.open ("/dev/null"); + if (impl_->null_stream_.is_open ()) + { + static_cast (GlobalV::ofs_running).rdbuf (impl_->null_stream_.rdbuf ()); + } } - } std::string warning_log = out_dir + "/warning.log"; - impl_->ofs_warning_.open(warning_log); - if (impl_->ofs_warning_.is_open()) - { - static_cast(GlobalV::ofs_warning).rdbuf(impl_->ofs_warning_.rdbuf()); - } + impl_->ofs_warning_.open (warning_log); + if (impl_->ofs_warning_.is_open ()) + { + static_cast (GlobalV::ofs_warning).rdbuf (impl_->ofs_warning_.rdbuf ()); + } } -void PyDriver::read_input( - const std::string& input_dir, - const std::string& input_file, - const std::string& stru_file, - const std::string& kpt_file, - const std::string& pseudo_dir, - const std::string& orbital_dir, - const std::string& output_dir) +void + PyDriver::read_input (const std::string& input_dir, + const std::string& input_file, + const std::string& stru_file, + const std::string& kpt_file, + const std::string& pseudo_dir, + const std::string& orbital_dir, + const std::string& output_dir) { // Save original working directory - impl_->original_cwd_ = fs::current_path().string(); + impl_->original_cwd_ = fs::current_path ().string (); // Determine input file path std::string input_path; - if (!input_file.empty()) - { - input_path = fs::absolute(input_file).string(); - } + if (!input_file.empty ()) + { + input_path = fs::absolute (input_file).string (); + } else - { - input_path = (fs::absolute(input_dir) / "INPUT").string(); - } + { + input_path = (fs::absolute (input_dir) / "INPUT").string (); + } // Check if input file exists - if (!fs::exists(input_path)) - { - throw std::runtime_error("INPUT file not found: " + input_path); - } + if (!fs::exists (input_path)) + { + throw std::runtime_error ("INPUT file not found: " + input_path); + } // Change to input directory for relative paths std::string work_dir = input_dir; - if (work_dir.empty()) - { - work_dir = fs::path(input_path).parent_path().string(); - } - if (!work_dir.empty() && work_dir != ".") - { - fs::current_path(work_dir); - } + if (work_dir.empty ()) + { + work_dir = fs::path (input_path).parent_path ().string (); + } + if (!work_dir.empty () && work_dir != ".") + { + fs::current_path (work_dir); + } // Read INPUT file // Note: ReadInput will set PARAM.globalv.global_in_card internally - ModuleIO::ReadInput reader(0); // rank 0 - std::string input_filename = fs::path(input_path).filename().string(); - reader.read_parameters(PARAM, input_filename); + ModuleIO::ReadInput reader (0); // rank 0 + std::string input_filename = fs::path (input_path).filename ().string (); + reader.read_parameters (PARAM, input_filename); // Create output directory - reader.create_directory(PARAM); + reader.create_directory (PARAM); // Convert input parameters to internal format - Input_Conv::Convert(); + Input_Conv::Convert (); } -CalculationResult PyDriver::collect_results(bool calculate_force, bool calculate_stress) +CalculationResult + PyDriver::collect_results (bool calculate_force, bool calculate_stress) { CalculationResult result; if (!impl_->p_esolver_ || !impl_->ucell_) - { - return result; - } + { + return result; + } // Get convergence info result.converged = impl_->p_esolver_->conv_esolver; // Get energy - result.etot = impl_->p_esolver_->cal_energy(); + result.etot = impl_->p_esolver_->cal_energy (); // Get system info from UnitCell result.nat = impl_->ucell_->nat; @@ -292,187 +292,182 @@ CalculationResult PyDriver::collect_results(bool calculate_force, bool calculate // Calculate forces if requested if (calculate_force) - { - ModuleBase::matrix force(result.nat, 3); - impl_->p_esolver_->cal_force(*impl_->ucell_, force); + { + ModuleBase::matrix force (result.nat, 3); + impl_->p_esolver_->cal_force (*impl_->ucell_, force); - // Convert to numpy array - std::vector shape = {static_cast(result.nat), 3}; - result.forces = py::array_t(shape); - auto buf = result.forces.request(); - double* ptr = static_cast(buf.ptr); + // Convert to numpy array + std::vector shape = {static_cast (result.nat), 3}; + result.forces = py::array_t (shape); + auto buf = result.forces.request (); + double* ptr = static_cast (buf.ptr); - for (int i = 0; i < result.nat; ++i) - { - for (int j = 0; j < 3; ++j) - { - ptr[i * 3 + j] = force(i, j); - } + for (int i = 0; i < result.nat; ++i) + { + for (int j = 0; j < 3; ++j) + { + ptr[i * 3 + j] = force (i, j); + } + } + result.has_forces = true; } - result.has_forces = true; - } // Calculate stress if requested if (calculate_stress) - { - ModuleBase::matrix stress(3, 3); - impl_->p_esolver_->cal_stress(*impl_->ucell_, stress); + { + ModuleBase::matrix stress (3, 3); + impl_->p_esolver_->cal_stress (*impl_->ucell_, stress); - // Convert to numpy array - std::vector shape = {3, 3}; - result.stress = py::array_t(shape); - auto buf = result.stress.request(); - double* ptr = static_cast(buf.ptr); + // Convert to numpy array + std::vector shape = {3, 3}; + result.stress = py::array_t (shape); + auto buf = result.stress.request (); + double* ptr = static_cast (buf.ptr); - for (int i = 0; i < 3; ++i) - { - for (int j = 0; j < 3; ++j) - { - ptr[i * 3 + j] = stress(i, j); - } + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + ptr[i * 3 + j] = stress (i, j); + } + } + result.has_stress = true; } - result.has_stress = true; - } // Collect output file tracking information result.output_dir = PARAM.sys.global_out_dir; // Find the log file - if (!result.output_dir.empty() && fs::exists(result.output_dir)) - { - // Look for running_*.log files - std::vector log_patterns = { - "running_scf.log", - "running_relax.log", - "running_cell-relax.log", - "running_nscf.log", - "running_md.log" - }; - - for (const auto& log_name : log_patterns) + if (!result.output_dir.empty () && fs::exists (result.output_dir)) { - std::string log_path = result.output_dir + "/" + log_name; - if (fs::exists(log_path)) - { - result.log_file = log_path; - break; - } - } + // Look for running_*.log files + std::vector log_patterns = {"running_scf.log", + "running_relax.log", + "running_cell-relax.log", + "running_nscf.log", + "running_md.log"}; + + for (const auto& log_name: log_patterns) + { + std::string log_path = result.output_dir + "/" + log_name; + if (fs::exists (log_path)) + { + result.log_file = log_path; + break; + } + } - // Iterate directory to populate output_files map - try - { - for (const auto& entry : fs::directory_iterator(result.output_dir)) - { - if (entry.is_regular_file()) + // Iterate directory to populate output_files map + try { - std::string filename = entry.path().filename().string(); - std::string full_path = entry.path().string(); - result.output_files[filename] = full_path; + for (const auto& entry: fs::directory_iterator (result.output_dir)) + { + if (entry.is_regular_file ()) + { + std::string filename = entry.path ().filename ().string (); + std::string full_path = entry.path ().string (); + result.output_files[filename] = full_path; + } + } + } + catch (const std::exception& e) + { + // Ignore errors during directory iteration } - } - } - catch (const std::exception& e) - { - // Ignore errors during directory iteration } - } return result; } -CalculationResult PyDriver::run( - const std::string& input_dir, - const std::string& input_file, - const std::string& stru_file, - const std::string& kpt_file, - const std::string& pseudo_dir, - const std::string& orbital_dir, - const std::string& output_dir, - bool calculate_force, - bool calculate_stress, - int verbosity) +CalculationResult + PyDriver::run (const std::string& input_dir, + const std::string& input_file, + const std::string& stru_file, + const std::string& kpt_file, + const std::string& pseudo_dir, + const std::string& orbital_dir, + const std::string& output_dir, + bool calculate_force, + bool calculate_stress, + int verbosity) { // Use RAII guard for global state GlobalStateGuard state_guard; // Clean up any previous calculation - cleanup_context(); + cleanup_context (); // Initialize context - initialize_context(); + initialize_context (); // Setup output - setup_output(output_dir, verbosity); + setup_output (output_dir, verbosity); // Start timer - ModuleBase::timer::start(); + ModuleBase::timer::start (); try - { - // Read input files - read_input(input_dir, input_file, stru_file, kpt_file, - pseudo_dir, orbital_dir, output_dir); + { + // Read input files + read_input (input_dir, input_file, stru_file, kpt_file, pseudo_dir, orbital_dir, output_dir); - // Create UnitCell - impl_->ucell_ = std::make_unique(); - impl_->ucell_->setup( - PARAM.inp.latname, - PARAM.inp.ntype, - PARAM.inp.lmaxmax, - PARAM.inp.init_vel, - PARAM.inp.fixed_axes - ); + // Create UnitCell + impl_->ucell_ = std::make_unique (); + impl_->ucell_->setup (PARAM.inp.latname, + PARAM.inp.ntype, + PARAM.inp.lmaxmax, + PARAM.inp.init_vel, + PARAM.inp.fixed_axes); - // Read structure - impl_->ucell_->setup_cell(PARAM.globalv.global_in_stru, GlobalV::ofs_running); + // Read structure + impl_->ucell_->setup_cell (PARAM.globalv.global_in_stru, GlobalV::ofs_running); - // Check atomic structure - unitcell::check_atomic_stru(*impl_->ucell_, PARAM.inp.min_dist_coef); + // Check atomic structure + unitcell::check_atomic_stru (*impl_->ucell_, PARAM.inp.min_dist_coef); - // Initialize ESolver - impl_->p_esolver_ = ModuleESolver::init_esolver(PARAM.inp, *impl_->ucell_); + // Initialize ESolver + impl_->p_esolver_ = ModuleESolver::init_esolver (PARAM.inp, *impl_->ucell_); - // Run before_all_runners - impl_->p_esolver_->before_all_runners(*impl_->ucell_, PARAM.inp); + // Run before_all_runners + impl_->p_esolver_->before_all_runners (*impl_->ucell_, PARAM.inp); - // Run calculation based on calculation type - const std::string& cal = PARAM.inp.calculation; + // Run calculation based on calculation type + const std::string& cal = PARAM.inp.calculation; - if (cal == "scf" || cal == "relax" || cal == "cell-relax" || cal == "nscf") - { - Relax_Driver rl_driver; - rl_driver.relax_driver(impl_->p_esolver_, *impl_->ucell_, PARAM.inp); - } - else if (cal == "get_s") - { - impl_->p_esolver_->runner(*impl_->ucell_, 0); - } - else - { - throw std::runtime_error("Unsupported calculation type: " + cal); - } + if (cal == "scf" || cal == "relax" || cal == "cell-relax" || cal == "nscf") + { + Relax_Driver rl_driver; + rl_driver.relax_driver (impl_->p_esolver_, *impl_->ucell_, PARAM.inp); + } + else if (cal == "get_s") + { + impl_->p_esolver_->runner (*impl_->ucell_, 0); + } + else + { + throw std::runtime_error ("Unsupported calculation type: " + cal); + } - // Collect results - last_result_ = collect_results(calculate_force, calculate_stress); + // Collect results + last_result_ = collect_results (calculate_force, calculate_stress); - // Run after_all_runners - impl_->p_esolver_->after_all_runners(*impl_->ucell_); - } + // Run after_all_runners + impl_->p_esolver_->after_all_runners (*impl_->ucell_); + } catch (const std::exception& e) - { - // Stop timer on error - ModuleBase::timer::finish(GlobalV::ofs_running); - // Clean up on error - cleanup_context(); - throw; - } + { + // Stop timer on error + ModuleBase::timer::finish (GlobalV::ofs_running); + // Clean up on error + cleanup_context (); + throw; + } // Stop timer - ModuleBase::timer::finish(GlobalV::ofs_running); + ModuleBase::timer::finish (GlobalV::ofs_running); // Print memory usage - ModuleBase::Memory::print_all(GlobalV::ofs_running); + ModuleBase::Memory::print_all (GlobalV::ofs_running); return last_result_; } @@ -483,9 +478,9 @@ CalculationResult PyDriver::run( // Pybind11 Module Definition // ============================================================================ -PYBIND11_MODULE(_driver_pack, m) +PYBIND11_MODULE (_driver_pack, m) { - m.doc() = R"pbdoc( + m.doc () = R"pbdoc( PyABACUS Driver Module ---------------------- @@ -507,8 +502,9 @@ PYBIND11_MODULE(_driver_pack, m) )pbdoc"; // Bind CalculationResult - py::class_(m, "CalculationResult", - R"pbdoc( + py::class_ (m, + "CalculationResult", + R"pbdoc( Container for DFT calculation results. Attributes @@ -526,73 +522,52 @@ PYBIND11_MODULE(_driver_pack, m) stress : numpy.ndarray Stress tensor (3, 3) in kbar )pbdoc") - .def(py::init<>()) - .def_readonly("converged", &py_driver::CalculationResult::converged, - "Whether SCF converged") - .def_readonly("niter", &py_driver::CalculationResult::niter, - "Number of SCF iterations") - .def_readonly("drho", &py_driver::CalculationResult::drho, - "Final charge density difference") - .def_readonly("etot", &py_driver::CalculationResult::etot, - "Total energy (Ry)") - .def_readonly("eband", &py_driver::CalculationResult::eband, - "Band energy (Ry)") - .def_readonly("hartree_energy", &py_driver::CalculationResult::hartree_energy, - "Hartree energy (Ry)") - .def_readonly("etxc", &py_driver::CalculationResult::etxc, - "Exchange-correlation energy (Ry)") - .def_readonly("ewald_energy", &py_driver::CalculationResult::ewald_energy, - "Ewald energy (Ry)") - .def_readonly("demet", &py_driver::CalculationResult::demet, - "-TS term for metals (Ry)") - .def_readonly("exx", &py_driver::CalculationResult::exx, - "Exact exchange energy (Ry)") - .def_readonly("evdw", &py_driver::CalculationResult::evdw, - "van der Waals energy (Ry)") - .def_readonly("forces", &py_driver::CalculationResult::forces, - "Forces on atoms (nat, 3) in Ry/Bohr") - .def_readonly("has_forces", &py_driver::CalculationResult::has_forces, - "Whether forces are available") - .def_readonly("stress", &py_driver::CalculationResult::stress, - "Stress tensor (3, 3) in kbar") - .def_readonly("has_stress", &py_driver::CalculationResult::has_stress, - "Whether stress is available") - .def_readonly("fermi_energy", &py_driver::CalculationResult::fermi_energy, - "Fermi energy (eV)") - .def_readonly("bandgap", &py_driver::CalculationResult::bandgap, - "Band gap (eV)") - .def_readonly("nat", &py_driver::CalculationResult::nat, - "Number of atoms") - .def_readonly("ntype", &py_driver::CalculationResult::ntype, - "Number of atom types") - .def_readonly("nbands", &py_driver::CalculationResult::nbands, - "Number of bands") - .def_readonly("nks", &py_driver::CalculationResult::nks, - "Number of k-points") - .def_readonly("output_dir", &py_driver::CalculationResult::output_dir, - "Path to output directory (OUT.$suffix)") - .def_readonly("log_file", &py_driver::CalculationResult::log_file, - "Path to the main log file") - .def_readonly("output_files", &py_driver::CalculationResult::output_files, - "Dictionary of output files (filename -> full path)") - .def("etot_eV", &py_driver::CalculationResult::etot_eV, - "Get total energy in eV") - .def("get_energies", &py_driver::CalculationResult::get_energies, - "Get all energies as a dictionary") - .def("get_forces_eV_Ang", &py_driver::CalculationResult::get_forces_eV_Ang, - "Get forces in eV/Angstrom") - .def("summary", &py_driver::CalculationResult::summary, - "Get a summary string of the calculation result") - .def("__repr__", [](const py_driver::CalculationResult& r) { - std::ostringstream ss; - ss << ""; - return ss.str(); - }); + .def (py::init<> ()) + .def_readonly ("converged", &py_driver::CalculationResult::converged, "Whether SCF converged") + .def_readonly ("niter", &py_driver::CalculationResult::niter, "Number of SCF iterations") + .def_readonly ("drho", &py_driver::CalculationResult::drho, "Final charge density difference") + .def_readonly ("etot", &py_driver::CalculationResult::etot, "Total energy (Ry)") + .def_readonly ("eband", &py_driver::CalculationResult::eband, "Band energy (Ry)") + .def_readonly ("hartree_energy", &py_driver::CalculationResult::hartree_energy, "Hartree energy (Ry)") + .def_readonly ("etxc", &py_driver::CalculationResult::etxc, "Exchange-correlation energy (Ry)") + .def_readonly ("ewald_energy", &py_driver::CalculationResult::ewald_energy, "Ewald energy (Ry)") + .def_readonly ("demet", &py_driver::CalculationResult::demet, "-TS term for metals (Ry)") + .def_readonly ("exx", &py_driver::CalculationResult::exx, "Exact exchange energy (Ry)") + .def_readonly ("evdw", &py_driver::CalculationResult::evdw, "van der Waals energy (Ry)") + .def_readonly ("forces", &py_driver::CalculationResult::forces, "Forces on atoms (nat, 3) in Ry/Bohr") + .def_readonly ("has_forces", &py_driver::CalculationResult::has_forces, "Whether forces are available") + .def_readonly ("stress", &py_driver::CalculationResult::stress, "Stress tensor (3, 3) in kbar") + .def_readonly ("has_stress", &py_driver::CalculationResult::has_stress, "Whether stress is available") + .def_readonly ("fermi_energy", &py_driver::CalculationResult::fermi_energy, "Fermi energy (eV)") + .def_readonly ("bandgap", &py_driver::CalculationResult::bandgap, "Band gap (eV)") + .def_readonly ("nat", &py_driver::CalculationResult::nat, "Number of atoms") + .def_readonly ("ntype", &py_driver::CalculationResult::ntype, "Number of atom types") + .def_readonly ("nbands", &py_driver::CalculationResult::nbands, "Number of bands") + .def_readonly ("nks", &py_driver::CalculationResult::nks, "Number of k-points") + .def_readonly ("output_dir", + &py_driver::CalculationResult::output_dir, + "Path to output directory (OUT.$suffix)") + .def_readonly ("log_file", &py_driver::CalculationResult::log_file, "Path to the main log file") + .def_readonly ("output_files", + &py_driver::CalculationResult::output_files, + "Dictionary of output files (filename -> full path)") + .def ("etot_eV", &py_driver::CalculationResult::etot_eV, "Get total energy in eV") + .def ("get_energies", &py_driver::CalculationResult::get_energies, "Get all energies as a dictionary") + .def ("get_forces_eV_Ang", &py_driver::CalculationResult::get_forces_eV_Ang, "Get forces in eV/Angstrom") + .def ("summary", &py_driver::CalculationResult::summary, "Get a summary string of the calculation result") + .def ("__repr__", + [] (const py_driver::CalculationResult& r) + { + std::ostringstream ss; + ss << ""; + return ss.str (); + }); // Bind PyDriver - py::class_(m, "PyDriver", - R"pbdoc( + py::class_ (m, + "PyDriver", + R"pbdoc( Python wrapper for ABACUS Driver. This class provides a Python interface for running complete ABACUS @@ -609,9 +584,10 @@ PYBIND11_MODULE(_driver_pack, m) >>> print(f"Energy: {result.etot_eV()} eV") >>> print(f"Converged: {result.converged}") )pbdoc") - .def(py::init<>()) - .def("run", &py_driver::PyDriver::run, - R"pbdoc( + .def (py::init<> ()) + .def ("run", + &py_driver::PyDriver::run, + R"pbdoc( Run a complete DFT calculation. Parameters @@ -642,19 +618,19 @@ PYBIND11_MODULE(_driver_pack, m) CalculationResult Container with all calculation results )pbdoc", - py::arg("input_dir") = ".", - py::arg("input_file") = "", - py::arg("stru_file") = "", - py::arg("kpt_file") = "", - py::arg("pseudo_dir") = "", - py::arg("orbital_dir") = "", - py::arg("output_dir") = "", - py::arg("calculate_force") = true, - py::arg("calculate_stress") = false, - py::arg("verbosity") = 1) - .def("is_ready", &py_driver::PyDriver::is_ready, - "Check if the driver is ready for calculation") - .def("get_last_result", &py_driver::PyDriver::get_last_result, - py::return_value_policy::reference_internal, - "Get the last calculation result"); + py::arg ("input_dir") = ".", + py::arg ("input_file") = "", + py::arg ("stru_file") = "", + py::arg ("kpt_file") = "", + py::arg ("pseudo_dir") = "", + py::arg ("orbital_dir") = "", + py::arg ("output_dir") = "", + py::arg ("calculate_force") = true, + py::arg ("calculate_stress") = false, + py::arg ("verbosity") = 1) + .def ("is_ready", &py_driver::PyDriver::is_ready, "Check if the driver is ready for calculation") + .def ("get_last_result", + &py_driver::PyDriver::get_last_result, + py::return_value_policy::reference_internal, + "Get the last calculation result"); } diff --git a/python/pyabacus/src/ModuleDriver/py_driver.hpp b/python/pyabacus/src/ModuleDriver/py_driver.hpp index db426cee0b6..d2159a31c17 100644 --- a/python/pyabacus/src/ModuleDriver/py_driver.hpp +++ b/python/pyabacus/src/ModuleDriver/py_driver.hpp @@ -57,26 +57,31 @@ struct CalculationResult bool has_stress = false; // Electronic structure info - double fermi_energy = 0.0; // in eV - double bandgap = 0.0; // in eV + double fermi_energy = 0.0; // in eV + double bandgap = 0.0; // in eV int nat = 0; int ntype = 0; int nbands = 0; int nks = 0; // Output file tracking - std::string output_dir = ""; // Path to OUT.$suffix folder - std::string log_file = ""; // Path to the main log file - std::map output_files; // filename -> full path + std::string output_dir = ""; // Path to OUT.$suffix folder + std::string log_file = ""; // Path to the main log file + std::map output_files; // filename -> full path // Unit conversion constants static constexpr double Ry_to_eV = 13.605693122994; static constexpr double Bohr_to_Ang = 0.529177249; // Convenience methods - double etot_eV() const { return etot * Ry_to_eV; } + double + etot_eV () const + { + return etot * Ry_to_eV; + } - py::dict get_energies() const + py::dict + get_energies () const { py::dict result; result["etot"] = etot; @@ -91,27 +96,29 @@ struct CalculationResult return result; } - py::array_t get_forces_eV_Ang() const + py::array_t + get_forces_eV_Ang () const { if (!has_forces) - { - throw std::runtime_error("Forces not available. Set calculate_force=True."); - } + { + throw std::runtime_error ("Forces not available. Set calculate_force=True."); + } // Convert from Ry/Bohr to eV/Ang - auto buf = forces.request(); - auto result = py::array_t(buf.shape); - auto result_buf = result.request(); - double* src = static_cast(buf.ptr); - double* dst = static_cast(result_buf.ptr); + auto buf = forces.request (); + auto result = py::array_t (buf.shape); + auto result_buf = result.request (); + double* src = static_cast (buf.ptr); + double* dst = static_cast (result_buf.ptr); double factor = Ry_to_eV / Bohr_to_Ang; for (ssize_t i = 0; i < buf.size; ++i) - { - dst[i] = src[i] * factor; - } + { + dst[i] = src[i] * factor; + } return result; } - std::string summary() const + std::string + summary () const { std::ostringstream ss; ss << "=== ABACUS Calculation Result ===\n"; @@ -119,49 +126,49 @@ struct CalculationResult ss << "SCF iterations: " << niter << "\n"; ss << "Final drho: " << std::scientific << drho << "\n"; ss << "\nEnergies:\n"; - ss << std::fixed << std::setprecision(8); + ss << std::fixed << std::setprecision (8); ss << " Total energy: " << etot << " Ry (" << etot * Ry_to_eV << " eV)\n"; ss << " Band energy: " << eband << " Ry\n"; ss << " Hartree: " << hartree_energy << " Ry\n"; ss << " XC energy: " << etxc << " Ry\n"; ss << " Ewald: " << ewald_energy << " Ry\n"; if (has_forces) - { - ss << "\nForces: calculated (" << nat << " atoms)\n"; - } + { + ss << "\nForces: calculated (" << nat << " atoms)\n"; + } if (has_stress) - { - ss << "Stress: calculated\n"; - } + { + ss << "Stress: calculated\n"; + } ss << "\nSystem info:\n"; ss << " Atoms: " << nat << ", Types: " << ntype << "\n"; ss << " Bands: " << nbands << ", K-points: " << nks << "\n"; if (fermi_energy != 0.0) - { - ss << " Fermi energy: " << fermi_energy << " eV\n"; - } + { + ss << " Fermi energy: " << fermi_energy << " eV\n"; + } if (bandgap > 0.0) - { - ss << " Band gap: " << bandgap << " eV\n"; - } - // Output file tracking - if (!output_dir.empty()) - { - ss << "\nOutput:\n"; - ss << " Directory: " << output_dir << "\n"; - if (!log_file.empty()) { - // Extract just the filename from the path - size_t pos = log_file.find_last_of("/\\"); - std::string log_filename = (pos != std::string::npos) ? log_file.substr(pos + 1) : log_file; - ss << " Log file: " << log_filename << "\n"; + ss << " Band gap: " << bandgap << " eV\n"; } - if (!output_files.empty()) + // Output file tracking + if (!output_dir.empty ()) { - ss << " Files: " << output_files.size() << " output files\n"; + ss << "\nOutput:\n"; + ss << " Directory: " << output_dir << "\n"; + if (!log_file.empty ()) + { + // Extract just the filename from the path + size_t pos = log_file.find_last_of ("/\\"); + std::string log_filename = (pos != std::string::npos) ? log_file.substr (pos + 1) : log_file; + ss << " Log file: " << log_filename << "\n"; + } + if (!output_files.empty ()) + { + ss << " Files: " << output_files.size () << " output files\n"; + } } - } - return ss.str(); + return ss.str (); } }; @@ -177,13 +184,13 @@ struct CalculationResult */ class PyDriver { -public: - PyDriver(); - ~PyDriver(); + public: + PyDriver (); + ~PyDriver (); // Disable copy - PyDriver(const PyDriver&) = delete; - PyDriver& operator=(const PyDriver&) = delete; + PyDriver (const PyDriver&) = delete; + PyDriver& operator= (const PyDriver&) = delete; /** * @brief Run a complete DFT calculation @@ -200,30 +207,36 @@ class PyDriver * @param verbosity Output verbosity level (0=silent, 1=normal, 2=verbose) * @return CalculationResult containing all results */ - CalculationResult run( - const std::string& input_dir = ".", - const std::string& input_file = "", - const std::string& stru_file = "", - const std::string& kpt_file = "", - const std::string& pseudo_dir = "", - const std::string& orbital_dir = "", - const std::string& output_dir = "", - bool calculate_force = true, - bool calculate_stress = false, - int verbosity = 1 - ); + CalculationResult run (const std::string& input_dir = ".", + const std::string& input_file = "", + const std::string& stru_file = "", + const std::string& kpt_file = "", + const std::string& pseudo_dir = "", + const std::string& orbital_dir = "", + const std::string& output_dir = "", + bool calculate_force = true, + bool calculate_stress = false, + int verbosity = 1); /** * @brief Check if the driver is ready for calculation */ - bool is_ready() const { return initialized_; } + bool + is_ready () const + { + return initialized_; + } /** * @brief Get the last calculation result */ - const CalculationResult& get_last_result() const { return last_result_; } + const CalculationResult& + get_last_result () const + { + return last_result_; + } -private: + private: class Impl; std::unique_ptr impl_; @@ -231,17 +244,17 @@ class PyDriver CalculationResult last_result_; // Internal methods - void initialize_context(); - void cleanup_context(); - void read_input(const std::string& input_dir, - const std::string& input_file, - const std::string& stru_file, - const std::string& kpt_file, - const std::string& pseudo_dir, - const std::string& orbital_dir, - const std::string& output_dir); - void setup_output(const std::string& output_dir, int verbosity); - CalculationResult collect_results(bool calculate_force, bool calculate_stress); + void initialize_context (); + void cleanup_context (); + void read_input (const std::string& input_dir, + const std::string& input_file, + const std::string& stru_file, + const std::string& kpt_file, + const std::string& pseudo_dir, + const std::string& orbital_dir, + const std::string& output_dir); + void setup_output (const std::string& output_dir, int verbosity); + CalculationResult collect_results (bool calculate_force, bool calculate_stress); }; } // namespace py_driver diff --git a/python/pyabacus/src/ModuleESolver/components/charge_mixer_wrapper.hpp b/python/pyabacus/src/ModuleESolver/components/charge_mixer_wrapper.hpp index 0fc299ddf6c..446d8de1c23 100644 --- a/python/pyabacus/src/ModuleESolver/components/charge_mixer_wrapper.hpp +++ b/python/pyabacus/src/ModuleESolver/components/charge_mixer_wrapper.hpp @@ -14,8 +14,10 @@ #include #include -namespace pyabacus { -namespace esolver { +namespace pyabacus +{ +namespace esolver +{ /** * @brief Wrapper for ABACUS Charge_Mixing class @@ -25,274 +27,300 @@ namespace esolver { */ class ChargeMixerWrapper : public IChargeMixer { -public: - ChargeMixerWrapper() = default; + public: + ChargeMixerWrapper () = default; - ChargeMixerWrapper(int nspin, int nrxx) - : nspin_(nspin), nrxx_(nrxx) + ChargeMixerWrapper (int nspin, int nrxx) : nspin_ (nspin), nrxx_ (nrxx) { // Initialize history buffers for Pulay mixing - rho_history_.reserve(config_.ndim); - residual_history_.reserve(config_.ndim); + rho_history_.reserve (config_.ndim); + residual_history_.reserve (config_.ndim); } - ~ChargeMixerWrapper() override = default; + ~ChargeMixerWrapper () override = default; // ==================== Core Mixing Operations ==================== - py::array_t mix(const py::array_t& rho_in, - const py::array_t& rho_out) override + py::array_t + mix (const py::array_t& rho_in, const py::array_t& rho_out) override { using namespace pyabacus::utils; // Validate input arrays - check_array_size(rho_in, static_cast(nspin_ * nrxx_), "rho_in"); - check_array_size(rho_out, static_cast(nspin_ * nrxx_), "rho_out"); + check_array_size (rho_in, static_cast (nspin_ * nrxx_), "rho_in"); + check_array_size (rho_out, static_cast (nspin_ * nrxx_), "rho_out"); - const double* in_ptr = get_array_ptr(rho_in); - const double* out_ptr = get_array_ptr(rho_out); + const double* in_ptr = get_array_ptr (rho_in); + const double* out_ptr = get_array_ptr (rho_out); // Create output array - py::array_t rho_mixed({static_cast(nspin_), - static_cast(nrxx_)}); - double* mixed_ptr = get_array_ptr(rho_mixed); + py::array_t rho_mixed ({static_cast (nspin_), static_cast (nrxx_)}); + double* mixed_ptr = get_array_ptr (rho_mixed); // Calculate drho drho_ = 0.0; - for (size_t i = 0; i < static_cast(nspin_ * nrxx_); ++i) - { - double diff = out_ptr[i] - in_ptr[i]; - drho_ += diff * diff; - } - drho_ = std::sqrt(drho_ / (nspin_ * nrxx_)); + for (size_t i = 0; i < static_cast (nspin_ * nrxx_); ++i) + { + double diff = out_ptr[i] - in_ptr[i]; + drho_ += diff * diff; + } + drho_ = std::sqrt (drho_ / (nspin_ * nrxx_)); // Perform mixing based on method switch (config_.method) - { + { case MixingMethod::Plain: - mix_plain(in_ptr, out_ptr, mixed_ptr); + mix_plain (in_ptr, out_ptr, mixed_ptr); break; case MixingMethod::Pulay: - mix_pulay(in_ptr, out_ptr, mixed_ptr); + mix_pulay (in_ptr, out_ptr, mixed_ptr); break; case MixingMethod::Broyden: - mix_broyden(in_ptr, out_ptr, mixed_ptr); + mix_broyden (in_ptr, out_ptr, mixed_ptr); break; case MixingMethod::Anderson: - mix_anderson(in_ptr, out_ptr, mixed_ptr); + mix_anderson (in_ptr, out_ptr, mixed_ptr); break; default: - mix_plain(in_ptr, out_ptr, mixed_ptr); - } + mix_plain (in_ptr, out_ptr, mixed_ptr); + } iteration_++; return rho_mixed; } - void reset() override + void + reset () override { iteration_ = 0; drho_ = 0.0; - rho_history_.clear(); - residual_history_.clear(); + rho_history_.clear (); + residual_history_.clear (); } // ==================== State Queries ==================== - double get_drho() const override { return drho_; } + double + get_drho () const override + { + return drho_; + } - int get_iteration() const override { return iteration_; } + int + get_iteration () const override + { + return iteration_; + } // ==================== Configuration ==================== - void set_config(const MixingConfig& config) override + void + set_config (const MixingConfig& config) override { config_ = config; - reset(); // Reset history when config changes + reset (); // Reset history when config changes } - MixingConfig get_config() const override { return config_; } + MixingConfig + get_config () const override + { + return config_; + } - void set_mixing_beta(double beta) override + void + set_mixing_beta (double beta) override { if (beta <= 0.0 || beta > 1.0) - { - throw std::invalid_argument("beta must be in (0, 1]"); - } + { + throw std::invalid_argument ("beta must be in (0, 1]"); + } config_.beta = beta; } - double get_mixing_beta() const override { return config_.beta; } + double + get_mixing_beta () const override + { + return config_.beta; + } - void set_mixing_method(MixingMethod method) override + void + set_mixing_method (MixingMethod method) override { config_.method = method; - reset(); + reset (); } - MixingMethod get_mixing_method() const override { return config_.method; } + MixingMethod + get_mixing_method () const override + { + return config_.method; + } // ==================== Dimension Setters ==================== - void set_dimensions(int nspin, int nrxx) + void + set_dimensions (int nspin, int nrxx) { nspin_ = nspin; nrxx_ = nrxx; - reset(); + reset (); } -private: + private: // Plain linear mixing: rho_new = (1-beta)*rho_in + beta*rho_out - void mix_plain(const double* rho_in, const double* rho_out, double* rho_mixed) + void + mix_plain (const double* rho_in, const double* rho_out, double* rho_mixed) { const double beta = config_.beta; const double one_minus_beta = 1.0 - beta; - for (size_t i = 0; i < static_cast(nspin_ * nrxx_); ++i) - { - rho_mixed[i] = one_minus_beta * rho_in[i] + beta * rho_out[i]; - } + for (size_t i = 0; i < static_cast (nspin_ * nrxx_); ++i) + { + rho_mixed[i] = one_minus_beta * rho_in[i] + beta * rho_out[i]; + } } // Pulay mixing (DIIS) - void mix_pulay(const double* rho_in, const double* rho_out, double* rho_mixed) + void + mix_pulay (const double* rho_in, const double* rho_out, double* rho_mixed) { - const size_t size = static_cast(nspin_ * nrxx_); + const size_t size = static_cast (nspin_ * nrxx_); // Store current rho and residual in history - std::vector current_rho(rho_in, rho_in + size); - std::vector current_residual(size); + std::vector current_rho (rho_in, rho_in + size); + std::vector current_residual (size); for (size_t i = 0; i < size; ++i) - { - current_residual[i] = rho_out[i] - rho_in[i]; - } + { + current_residual[i] = rho_out[i] - rho_in[i]; + } // Add to history (circular buffer) - if (static_cast(rho_history_.size()) >= config_.ndim) - { - rho_history_.erase(rho_history_.begin()); - residual_history_.erase(residual_history_.begin()); - } - rho_history_.push_back(current_rho); - residual_history_.push_back(current_residual); + if (static_cast (rho_history_.size ()) >= config_.ndim) + { + rho_history_.erase (rho_history_.begin ()); + residual_history_.erase (residual_history_.begin ()); + } + rho_history_.push_back (current_rho); + residual_history_.push_back (current_residual); - const int nhist = static_cast(rho_history_.size()); + const int nhist = static_cast (rho_history_.size ()); if (nhist < 2) - { - // Not enough history, use plain mixing - mix_plain(rho_in, rho_out, rho_mixed); - return; - } + { + // Not enough history, use plain mixing + mix_plain (rho_in, rho_out, rho_mixed); + return; + } // Build overlap matrix of residuals - std::vector A((nhist + 1) * (nhist + 1), 0.0); - std::vector b(nhist + 1, 0.0); + std::vector A ((nhist + 1) * (nhist + 1), 0.0); + std::vector b (nhist + 1, 0.0); for (int i = 0; i < nhist; ++i) - { - for (int j = 0; j <= i; ++j) { - double dot = 0.0; - for (size_t k = 0; k < size; ++k) - { - dot += residual_history_[i][k] * residual_history_[j][k]; - } - A[i * (nhist + 1) + j] = dot; - A[j * (nhist + 1) + i] = dot; + for (int j = 0; j <= i; ++j) + { + double dot = 0.0; + for (size_t k = 0; k < size; ++k) + { + dot += residual_history_[i][k] * residual_history_[j][k]; + } + A[i * (nhist + 1) + j] = dot; + A[j * (nhist + 1) + i] = dot; + } + A[i * (nhist + 1) + nhist] = 1.0; + A[nhist * (nhist + 1) + i] = 1.0; } - A[i * (nhist + 1) + nhist] = 1.0; - A[nhist * (nhist + 1) + i] = 1.0; - } b[nhist] = 1.0; // Solve linear system for coefficients (simple Gaussian elimination) - std::vector coeff = solve_linear_system(A, b, nhist + 1); + std::vector coeff = solve_linear_system (A, b, nhist + 1); // Compute mixed density - std::fill(rho_mixed, rho_mixed + size, 0.0); + std::fill (rho_mixed, rho_mixed + size, 0.0); for (int i = 0; i < nhist; ++i) - { - for (size_t k = 0; k < size; ++k) { - rho_mixed[k] += coeff[i] * (rho_history_[i][k] + - config_.beta * residual_history_[i][k]); + for (size_t k = 0; k < size; ++k) + { + rho_mixed[k] += coeff[i] * (rho_history_[i][k] + config_.beta * residual_history_[i][k]); + } } - } } // Broyden mixing (simplified) - void mix_broyden(const double* rho_in, const double* rho_out, double* rho_mixed) + void + mix_broyden (const double* rho_in, const double* rho_out, double* rho_mixed) { // For simplicity, use Pulay mixing as approximation - mix_pulay(rho_in, rho_out, rho_mixed); + mix_pulay (rho_in, rho_out, rho_mixed); } // Anderson mixing - void mix_anderson(const double* rho_in, const double* rho_out, double* rho_mixed) + void + mix_anderson (const double* rho_in, const double* rho_out, double* rho_mixed) { // Anderson mixing is similar to Pulay - mix_pulay(rho_in, rho_out, rho_mixed); + mix_pulay (rho_in, rho_out, rho_mixed); } // Simple linear system solver (Gaussian elimination with partial pivoting) - std::vector solve_linear_system(std::vector& A, - std::vector& b, - int n) + std::vector + solve_linear_system (std::vector& A, std::vector& b, int n) { - std::vector x(n, 0.0); + std::vector x (n, 0.0); // Forward elimination for (int k = 0; k < n - 1; ++k) - { - // Find pivot - int max_row = k; - double max_val = std::abs(A[k * n + k]); - for (int i = k + 1; i < n; ++i) { - if (std::abs(A[i * n + k]) > max_val) - { - max_val = std::abs(A[i * n + k]); - max_row = i; - } + // Find pivot + int max_row = k; + double max_val = std::abs (A[k * n + k]); + for (int i = k + 1; i < n; ++i) + { + if (std::abs (A[i * n + k]) > max_val) + { + max_val = std::abs (A[i * n + k]); + max_row = i; + } + } + + // Swap rows + if (max_row != k) + { + for (int j = 0; j < n; ++j) + { + std::swap (A[k * n + j], A[max_row * n + j]); + } + std::swap (b[k], b[max_row]); + } + + // Eliminate + for (int i = k + 1; i < n; ++i) + { + if (std::abs (A[k * n + k]) < 1e-12) + continue; + double factor = A[i * n + k] / A[k * n + k]; + for (int j = k; j < n; ++j) + { + A[i * n + j] -= factor * A[k * n + j]; + } + b[i] -= factor * b[k]; + } } - // Swap rows - if (max_row != k) - { - for (int j = 0; j < n; ++j) - { - std::swap(A[k * n + j], A[max_row * n + j]); - } - std::swap(b[k], b[max_row]); - } - - // Eliminate - for (int i = k + 1; i < n; ++i) - { - if (std::abs(A[k * n + k]) < 1e-12) continue; - double factor = A[i * n + k] / A[k * n + k]; - for (int j = k; j < n; ++j) - { - A[i * n + j] -= factor * A[k * n + j]; - } - b[i] -= factor * b[k]; - } - } - // Back substitution for (int i = n - 1; i >= 0; --i) - { - x[i] = b[i]; - for (int j = i + 1; j < n; ++j) - { - x[i] -= A[i * n + j] * x[j]; - } - if (std::abs(A[i * n + i]) > 1e-12) { - x[i] /= A[i * n + i]; + x[i] = b[i]; + for (int j = i + 1; j < n; ++j) + { + x[i] -= A[i * n + j] * x[j]; + } + if (std::abs (A[i * n + i]) > 1e-12) + { + x[i] /= A[i * n + i]; + } } - } return x; } diff --git a/python/pyabacus/src/ModuleESolver/components/diagonalizer_wrapper.hpp b/python/pyabacus/src/ModuleESolver/components/diagonalizer_wrapper.hpp index 4b22fcc53c4..c3e36df3129 100644 --- a/python/pyabacus/src/ModuleESolver/components/diagonalizer_wrapper.hpp +++ b/python/pyabacus/src/ModuleESolver/components/diagonalizer_wrapper.hpp @@ -14,8 +14,10 @@ #include -namespace pyabacus { -namespace esolver { +namespace pyabacus +{ +namespace esolver +{ /** * @brief Wrapper for diagonalization solvers @@ -28,22 +30,20 @@ namespace esolver { template class DiagonalizerWrapper : public IDiagonalizer { -public: - DiagonalizerWrapper() = default; + public: + DiagonalizerWrapper () = default; - DiagonalizerWrapper(int nbasis, int nbands) - : nbasis_(nbasis), nbands_(nbands) - { - } + DiagonalizerWrapper (int nbasis, int nbands) : nbasis_ (nbasis), nbands_ (nbands) {} - ~DiagonalizerWrapper() override = default; + ~DiagonalizerWrapper () override = default; // ==================== Direct Diagonalization ==================== - DiagResult diagonalize(int ik, - const py::array_t& Hk, - const py::array_t& Sk, - const py::array_t& psi_init) override + DiagResult + diagonalize (int ik, + const py::array_t& Hk, + const py::array_t& Sk, + const py::array_t& psi_init) override { DiagResult result; @@ -52,116 +52,135 @@ class DiagonalizerWrapper : public IDiagonalizer // the appropriate ABACUS solver // Create hpsi and spsi functions from matrices - auto hpsi_func = [&Hk, this](const py::array_t& psi) -> py::array_t { - return matrix_vector_multiply(Hk, psi); - }; + auto hpsi_func + = [&Hk, this] (const py::array_t& psi) -> py::array_t { return matrix_vector_multiply (Hk, psi); }; - auto spsi_func = [&Sk, this](const py::array_t& psi) -> py::array_t { - return matrix_vector_multiply(Sk, psi); - }; + auto spsi_func + = [&Sk, this] (const py::array_t& psi) -> py::array_t { return matrix_vector_multiply (Sk, psi); }; // Use iterative method as fallback - py::array_t precond = compute_preconditioner(Hk); - return diagonalize_iterative(ik, hpsi_func, spsi_func, psi_init, precond); + py::array_t precond = compute_preconditioner (Hk); + return diagonalize_iterative (ik, hpsi_func, spsi_func, psi_init, precond); } // ==================== Iterative Diagonalization ==================== - DiagResult diagonalize_iterative( - int ik, - std::function(const py::array_t&)> hpsi_func, - std::function(const py::array_t&)> spsi_func, - const py::array_t& psi_init, - const py::array_t& precond) override + DiagResult + diagonalize_iterative (int ik, + std::function (const py::array_t&)> hpsi_func, + std::function (const py::array_t&)> spsi_func, + const py::array_t& psi_init, + const py::array_t& precond) override { DiagResult result; switch (config_.method) - { + { case DiagMethod::Davidson: - result = diagonalize_davidson(hpsi_func, psi_init, precond); + result = diagonalize_davidson (hpsi_func, psi_init, precond); break; case DiagMethod::DavSubspace: - result = diagonalize_dav_subspace(hpsi_func, psi_init, precond); + result = diagonalize_dav_subspace (hpsi_func, psi_init, precond); break; case DiagMethod::CG: - result = diagonalize_cg(hpsi_func, psi_init, precond); + result = diagonalize_cg (hpsi_func, psi_init, precond); break; default: - result = diagonalize_davidson(hpsi_func, psi_init, precond); - } + result = diagonalize_davidson (hpsi_func, psi_init, precond); + } return result; } // ==================== Configuration ==================== - void set_config(const DiagConfig& config) override + void + set_config (const DiagConfig& config) override { config_ = config; } - DiagConfig get_config() const override { return config_; } + DiagConfig + get_config () const override + { + return config_; + } - void set_tolerance(double tol) override + void + set_tolerance (double tol) override { config_.tolerance = tol; } - void set_max_iterations(int max_iter) override + void + set_max_iterations (int max_iter) override { config_.max_iterations = max_iter; } // ==================== Dimension Queries ==================== - int get_nbasis() const override { return nbasis_; } + int + get_nbasis () const override + { + return nbasis_; + } - int get_nbands() const override { return nbands_; } + int + get_nbands () const override + { + return nbands_; + } - void set_nbands(int nbands) override { nbands_ = nbands; } + void + set_nbands (int nbands) override + { + nbands_ = nbands; + } - void set_nbasis(int nbasis) { nbasis_ = nbasis; } + void + set_nbasis (int nbasis) + { + nbasis_ = nbasis; + } -private: + private: // Davidson diagonalization - DiagResult diagonalize_davidson( - std::function(const py::array_t&)> hpsi_func, - const py::array_t& psi_init, - const py::array_t& precond) + DiagResult + diagonalize_davidson (std::function (const py::array_t&)> hpsi_func, + const py::array_t& psi_init, + const py::array_t& precond) { DiagResult result; // Create Davidson adapter - hsolver::PyDiagoDavidAdapter david(nbasis_, nbands_); + hsolver::PyDiagoDavidAdapter david (nbasis_, nbands_); // Set initial psi - david.set_psi(psi_init); - david.init_eigenvalue(); + david.set_psi (psi_init); + david.init_eigenvalue (); // Convert preconditioner to vector - std::vector precond_vec(precond.data(), precond.data() + precond.size()); + std::vector precond_vec (precond.data (), precond.data () + precond.size ()); // Create diag_ethr vector - std::vector diag_ethr(nbands_, config_.tolerance); + std::vector diag_ethr (nbands_, config_.tolerance); // Create comm_info (single process for now) - ::hsolver::diag_comm_info comm_info(0, 1); + ::hsolver::diag_comm_info comm_info (0, 1); // Run diagonalization - int niter = david.diag( - hpsi_func, - precond_vec, - config_.dav_ndim, - config_.tolerance, - diag_ethr, - config_.max_iterations, - comm_info - ); + int niter = david.diag (hpsi_func, + precond_vec, + config_.dav_ndim, + config_.tolerance, + diag_ethr, + config_.max_iterations, + comm_info); // Get results - result.psi = david.get_psi(); - result.eigenvalues = david.get_eigenvalue(); + result.psi = david.get_psi (); + result.eigenvalues = david.get_eigenvalue (); result.iterations = niter; result.converged = (niter < config_.max_iterations); @@ -169,47 +188,46 @@ class DiagonalizerWrapper : public IDiagonalizer } // Davidson-Subspace diagonalization - DiagResult diagonalize_dav_subspace( - std::function(const py::array_t&)> hpsi_func, - const py::array_t& psi_init, - const py::array_t& precond) + DiagResult + diagonalize_dav_subspace (std::function (const py::array_t&)> hpsi_func, + const py::array_t& psi_init, + const py::array_t& precond) { DiagResult result; // Create DavSubspace adapter - hsolver::PyDiagoDavSubspaceAdapter dav_sub(nbasis_, nbands_); + hsolver::PyDiagoDavSubspaceAdapter dav_sub (nbasis_, nbands_); // Set initial psi - dav_sub.set_psi(psi_init); - dav_sub.init_eigenvalue(); + dav_sub.set_psi (psi_init); + dav_sub.init_eigenvalue (); // Convert preconditioner to vector - std::vector precond_vec(precond.data(), precond.data() + precond.size()); + std::vector precond_vec (precond.data (), precond.data () + precond.size ()); // Create diag_ethr vector - std::vector diag_ethr(nbands_, config_.tolerance); + std::vector diag_ethr (nbands_, config_.tolerance); // Create comm_info - ::hsolver::diag_comm_info comm_info(0, 1); + ::hsolver::diag_comm_info comm_info (0, 1); // Run diagonalization - int niter = dav_sub.diag( - hpsi_func, - precond_vec, - config_.dav_ndim, - config_.tolerance, - config_.max_iterations, - false, // need_subspace - diag_ethr, - true, // scf_type - comm_info, - 0, // diag_subspace (LAPACK) - 1 // nb2d + int niter = dav_sub.diag (hpsi_func, + precond_vec, + config_.dav_ndim, + config_.tolerance, + config_.max_iterations, + false, // need_subspace + diag_ethr, + true, // scf_type + comm_info, + 0, // diag_subspace (LAPACK) + 1 // nb2d ); // Get results - result.psi = dav_sub.get_psi(); - result.eigenvalues = dav_sub.get_eigenvalue(); + result.psi = dav_sub.get_psi (); + result.eigenvalues = dav_sub.get_eigenvalue (); result.iterations = niter; result.converged = (niter < config_.max_iterations); @@ -217,106 +235,105 @@ class DiagonalizerWrapper : public IDiagonalizer } // CG diagonalization - DiagResult diagonalize_cg( - std::function(const py::array_t&)> hpsi_func, - const py::array_t& psi_init, - const py::array_t& precond) + DiagResult + diagonalize_cg (std::function (const py::array_t&)> hpsi_func, + const py::array_t& psi_init, + const py::array_t& precond) { DiagResult result; #ifdef __ENABLE_ATEN // Create CG adapter - hsolver::PyDiagoCGAdapter cg(nbasis_, nbands_); + hsolver::PyDiagoCGAdapter cg (nbasis_, nbands_); // Set initial psi and preconditioner - cg.set_psi(psi_init); - cg.init_eig(); - cg.set_prec(precond); + cg.set_psi (psi_init); + cg.init_eig (); + cg.set_prec (precond); // Create diag_ethr vector - std::vector diag_ethr(nbands_, config_.tolerance); + std::vector diag_ethr (nbands_, config_.tolerance); // Run diagonalization - cg.diag( - hpsi_func, - config_.dav_ndim, - config_.tolerance, - diag_ethr, - false, // need_subspace - true, // scf_type - config_.nproc_in_pool - ); + cg.diag (hpsi_func, + config_.dav_ndim, + config_.tolerance, + diag_ethr, + false, // need_subspace + true, // scf_type + config_.nproc_in_pool); // Get results - result.psi = cg.get_psi(); - result.eigenvalues = cg.get_eig(); + result.psi = cg.get_psi (); + result.eigenvalues = cg.get_eig (); result.converged = true; #else // Fall back to Davidson if ATen not available - result = diagonalize_davidson(hpsi_func, psi_init, precond); + result = diagonalize_davidson (hpsi_func, psi_init, precond); #endif return result; } // Helper: Matrix-vector multiplication - py::array_t matrix_vector_multiply(const py::array_t& matrix, - const py::array_t& vec) + py::array_t + matrix_vector_multiply (const py::array_t& matrix, const py::array_t& vec) { using namespace pyabacus::utils; - auto mat_buf = matrix.request(); - auto vec_buf = vec.request(); + auto mat_buf = matrix.request (); + auto vec_buf = vec.request (); if (mat_buf.ndim != 2) - { - throw std::runtime_error("Matrix must be 2D"); - } + { + throw std::runtime_error ("Matrix must be 2D"); + } const ssize_t nrow = mat_buf.shape[0]; const ssize_t ncol = mat_buf.shape[1]; const ssize_t nvec = (vec_buf.ndim == 1) ? 1 : vec_buf.shape[1]; - py::array_t result({nrow, nvec}); - auto res_buf = result.request(); + py::array_t result ({nrow, nvec}); + auto res_buf = result.request (); - const TK* mat_ptr = static_cast(mat_buf.ptr); - const TK* vec_ptr = static_cast(vec_buf.ptr); - TK* res_ptr = static_cast(res_buf.ptr); + const TK* mat_ptr = static_cast (mat_buf.ptr); + const TK* vec_ptr = static_cast (vec_buf.ptr); + TK* res_ptr = static_cast (res_buf.ptr); // Simple matrix-vector multiplication for (ssize_t i = 0; i < nrow; ++i) - { - for (ssize_t v = 0; v < nvec; ++v) { - TK sum = TK(0); - for (ssize_t j = 0; j < ncol; ++j) - { - sum += mat_ptr[i * ncol + j] * vec_ptr[j * nvec + v]; - } - res_ptr[i * nvec + v] = sum; + for (ssize_t v = 0; v < nvec; ++v) + { + TK sum = TK (0); + for (ssize_t j = 0; j < ncol; ++j) + { + sum += mat_ptr[i * ncol + j] * vec_ptr[j * nvec + v]; + } + res_ptr[i * nvec + v] = sum; + } } - } return result; } // Helper: Compute diagonal preconditioner from Hamiltonian - py::array_t compute_preconditioner(const py::array_t& Hk) + py::array_t + compute_preconditioner (const py::array_t& Hk) { - auto buf = Hk.request(); + auto buf = Hk.request (); const ssize_t n = buf.shape[0]; - const TK* ptr = static_cast(buf.ptr); + const TK* ptr = static_cast (buf.ptr); - py::array_t precond(n); - double* prec_ptr = precond.mutable_data(); + py::array_t precond (n); + double* prec_ptr = precond.mutable_data (); for (ssize_t i = 0; i < n; ++i) - { - // Use diagonal elements as preconditioner - TK diag = ptr[i * n + i]; - prec_ptr[i] = std::max(std::abs(diag), 1.0); - } + { + // Use diagonal elements as preconditioner + TK diag = ptr[i * n + i]; + prec_ptr[i] = std::max (std::abs (diag), 1.0); + } return precond; } diff --git a/python/pyabacus/src/ModuleESolver/components/hamiltonian_builder_lcao.hpp b/python/pyabacus/src/ModuleESolver/components/hamiltonian_builder_lcao.hpp index 97965674609..26d1d06c7df 100644 --- a/python/pyabacus/src/ModuleESolver/components/hamiltonian_builder_lcao.hpp +++ b/python/pyabacus/src/ModuleESolver/components/hamiltonian_builder_lcao.hpp @@ -16,8 +16,10 @@ #include #include -namespace pyabacus { -namespace esolver { +namespace pyabacus +{ +namespace esolver +{ /** * @brief LCAO Hamiltonian builder implementation @@ -31,27 +33,28 @@ namespace esolver { template class HamiltonianBuilderLCAO : public IHamiltonianBuilder { -public: - HamiltonianBuilderLCAO() = default; + public: + HamiltonianBuilderLCAO () = default; - HamiltonianBuilderLCAO(int nbasis, int nks, int nrow, int ncol) - : nbasis_(nbasis), nks_(nks), nrow_(nrow), ncol_(ncol) + HamiltonianBuilderLCAO (int nbasis, int nks, int nrow, int ncol) + : nbasis_ (nbasis), nks_ (nks), nrow_ (nrow), ncol_ (ncol) { // Allocate storage for H(k) and S(k) matrices - Hk_matrices_.resize(nks); - Sk_matrices_.resize(nks); + Hk_matrices_.resize (nks); + Sk_matrices_.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - Hk_matrices_[ik].resize(nrow * ncol, TK(0)); - Sk_matrices_[ik].resize(nrow * ncol, TK(0)); - } + { + Hk_matrices_[ik].resize (nrow * ncol, TK (0)); + Sk_matrices_[ik].resize (nrow * ncol, TK (0)); + } } - ~HamiltonianBuilderLCAO() override = default; + ~HamiltonianBuilderLCAO () override = default; // ==================== Build/Update ==================== - void build_from_rho(const py::array_t& rho) override + void + build_from_rho (const py::array_t& rho) override { // Store rho for potential reconstruction // In full implementation, this would: @@ -61,31 +64,31 @@ class HamiltonianBuilderLCAO : public IHamiltonianBuilder // 4. Invalidate H(k) cache using namespace pyabacus::utils; - auto buf = rho.request(); + auto buf = rho.request (); if (buf.ndim != 2) - { - throw std::runtime_error("rho must be 2D array with shape (nspin, nrxx)"); - } + { + throw std::runtime_error ("rho must be 2D array with shape (nspin, nrxx)"); + } - nspin_ = static_cast(buf.shape[0]); - nrxx_ = static_cast(buf.shape[1]); + nspin_ = static_cast (buf.shape[0]); + nrxx_ = static_cast (buf.shape[1]); // Store rho - rho_data_.assign(static_cast(buf.ptr), - static_cast(buf.ptr) + nspin_ * nrxx_); + rho_data_.assign (static_cast (buf.ptr), static_cast (buf.ptr) + nspin_ * nrxx_); // Mark H(k) as needing update - hk_valid_.assign(nks_, false); + hk_valid_.assign (nks_, false); valid_ = true; } - void update_Hk(int ik) override + void + update_Hk (int ik) override { if (ik < 0 || ik >= nks_) - { - throw std::out_of_range("k-point index out of range"); - } + { + throw std::out_of_range ("k-point index out of range"); + } // In full implementation, this would: // 1. Fourier transform H(R) to H(k) for k-point ik @@ -94,39 +97,39 @@ class HamiltonianBuilderLCAO : public IHamiltonianBuilder hk_valid_[ik] = true; } - void invalidate() override + void + invalidate () override { - hk_valid_.assign(nks_, false); + hk_valid_.assign (nks_, false); valid_ = false; } // ==================== K-space Matrix Access ==================== - py::array_t get_Hk(int ik) const override + py::array_t + get_Hk (int ik) const override { - validate_kpoint(ik); + validate_kpoint (ik); - return utils::numpy_from_ptr_copy_2d( - Hk_matrices_[ik].data(), - static_cast(nrow_), - static_cast(ncol_) - ); + return utils::numpy_from_ptr_copy_2d (Hk_matrices_[ik].data (), + static_cast (nrow_), + static_cast (ncol_)); } - py::array_t get_Sk(int ik) const override + py::array_t + get_Sk (int ik) const override { - validate_kpoint(ik); + validate_kpoint (ik); - return utils::numpy_from_ptr_copy_2d( - Sk_matrices_[ik].data(), - static_cast(nrow_), - static_cast(ncol_) - ); + return utils::numpy_from_ptr_copy_2d (Sk_matrices_[ik].data (), + static_cast (nrow_), + static_cast (ncol_)); } // ==================== Real-space Matrix Access ==================== - py::dict get_HR() const override + py::dict + get_HR () const override { py::dict result; // In full implementation, would return H(R) data @@ -134,7 +137,8 @@ class HamiltonianBuilderLCAO : public IHamiltonianBuilder return result; } - py::dict get_SR() const override + py::dict + get_SR () const override { py::dict result; // In full implementation, would return S(R) data @@ -143,135 +147,153 @@ class HamiltonianBuilderLCAO : public IHamiltonianBuilder // ==================== Matrix-Vector Products ==================== - py::array_t apply_H(int ik, const py::array_t& psi_in) const override + py::array_t + apply_H (int ik, const py::array_t& psi_in) const override { - validate_kpoint(ik); + validate_kpoint (ik); // H * psi - return matrix_multiply(Hk_matrices_[ik], psi_in); + return matrix_multiply (Hk_matrices_[ik], psi_in); } - py::array_t apply_S(int ik, const py::array_t& psi_in) const override + py::array_t + apply_S (int ik, const py::array_t& psi_in) const override { - validate_kpoint(ik); + validate_kpoint (ik); // S * psi - return matrix_multiply(Sk_matrices_[ik], psi_in); + return matrix_multiply (Sk_matrices_[ik], psi_in); } // ==================== Dimension Queries ==================== - int get_nbasis() const override { return nbasis_; } + int + get_nbasis () const override + { + return nbasis_; + } - int get_nks() const override { return nks_; } + int + get_nks () const override + { + return nks_; + } - std::pair get_local_dims() const override + std::pair + get_local_dims () const override { return {nrow_, ncol_}; } - bool is_valid() const override { return valid_; } + bool + is_valid () const override + { + return valid_; + } // ==================== Data Setters (for testing/compatibility) ==================== - void set_Hk_data(int ik, const TK* data, int nrow, int ncol) + void + set_Hk_data (int ik, const TK* data, int nrow, int ncol) { if (ik < 0 || ik >= nks_) - { - throw std::out_of_range("k-point index out of range"); - } + { + throw std::out_of_range ("k-point index out of range"); + } nrow_ = nrow; ncol_ = ncol; - if (static_cast(Hk_matrices_[ik].size()) != nrow * ncol) - { - Hk_matrices_[ik].resize(nrow * ncol); - } + if (static_cast (Hk_matrices_[ik].size ()) != nrow * ncol) + { + Hk_matrices_[ik].resize (nrow * ncol); + } - std::copy(data, data + nrow * ncol, Hk_matrices_[ik].begin()); + std::copy (data, data + nrow * ncol, Hk_matrices_[ik].begin ()); hk_valid_[ik] = true; valid_ = true; } - void set_Sk_data(int ik, const TK* data, int nrow, int ncol) + void + set_Sk_data (int ik, const TK* data, int nrow, int ncol) { if (ik < 0 || ik >= nks_) - { - throw std::out_of_range("k-point index out of range"); - } + { + throw std::out_of_range ("k-point index out of range"); + } - if (static_cast(Sk_matrices_[ik].size()) != nrow * ncol) - { - Sk_matrices_[ik].resize(nrow * ncol); - } + if (static_cast (Sk_matrices_[ik].size ()) != nrow * ncol) + { + Sk_matrices_[ik].resize (nrow * ncol); + } - std::copy(data, data + nrow * ncol, Sk_matrices_[ik].begin()); + std::copy (data, data + nrow * ncol, Sk_matrices_[ik].begin ()); } - void set_dimensions(int nbasis, int nks, int nrow, int ncol) + void + set_dimensions (int nbasis, int nks, int nrow, int ncol) { nbasis_ = nbasis; nks_ = nks; nrow_ = nrow; ncol_ = ncol; - Hk_matrices_.resize(nks); - Sk_matrices_.resize(nks); - hk_valid_.resize(nks, false); + Hk_matrices_.resize (nks); + Sk_matrices_.resize (nks); + hk_valid_.resize (nks, false); for (int ik = 0; ik < nks; ++ik) - { - Hk_matrices_[ik].resize(nrow * ncol, TK(0)); - Sk_matrices_[ik].resize(nrow * ncol, TK(0)); - } + { + Hk_matrices_[ik].resize (nrow * ncol, TK (0)); + Sk_matrices_[ik].resize (nrow * ncol, TK (0)); + } } -private: - void validate_kpoint(int ik) const + private: + void + validate_kpoint (int ik) const { if (!valid_) - { - throw std::runtime_error("Hamiltonian not built. Call build_from_rho first."); - } + { + throw std::runtime_error ("Hamiltonian not built. Call build_from_rho first."); + } if (ik < 0 || ik >= nks_) - { - throw std::out_of_range("k-point index out of range"); - } + { + throw std::out_of_range ("k-point index out of range"); + } } - py::array_t matrix_multiply(const std::vector& matrix, - const py::array_t& vec) const + py::array_t + matrix_multiply (const std::vector& matrix, const py::array_t& vec) const { - auto vec_buf = vec.request(); - const TK* vec_ptr = static_cast(vec_buf.ptr); + auto vec_buf = vec.request (); + const TK* vec_ptr = static_cast (vec_buf.ptr); - const ssize_t nvec = (vec_buf.ndim == 1) ? 1 : - (vec_buf.ndim == 2) ? vec_buf.shape[1] : 1; + const ssize_t nvec = (vec_buf.ndim == 1) ? 1 : (vec_buf.ndim == 2) ? vec_buf.shape[1] : 1; const ssize_t vec_rows = (vec_buf.ndim == 1) ? vec_buf.shape[0] : vec_buf.shape[0]; if (vec_rows != ncol_) - { - throw std::runtime_error("Vector dimension mismatch"); - } + { + throw std::runtime_error ("Vector dimension mismatch"); + } - py::array_t result({static_cast(nrow_), nvec}); - auto res_buf = result.request(); - TK* res_ptr = static_cast(res_buf.ptr); + py::array_t result ({static_cast (nrow_), nvec}); + auto res_buf = result.request (); + TK* res_ptr = static_cast (res_buf.ptr); // Matrix-vector multiplication for (int i = 0; i < nrow_; ++i) - { - for (ssize_t v = 0; v < nvec; ++v) { - TK sum = TK(0); - for (int j = 0; j < ncol_; ++j) - { - sum += matrix[i * ncol_ + j] * vec_ptr[j * nvec + v]; - } - res_ptr[i * nvec + v] = sum; + for (ssize_t v = 0; v < nvec; ++v) + { + TK sum = TK (0); + for (int j = 0; j < ncol_; ++j) + { + sum += matrix[i * ncol_ + j] * vec_ptr[j * nvec + v]; + } + res_ptr[i * nvec + v] = sum; + } } - } return result; } diff --git a/python/pyabacus/src/ModuleESolver/components/scf_controller_lcao.hpp b/python/pyabacus/src/ModuleESolver/components/scf_controller_lcao.hpp index c5dac087fb3..09f70fd4010 100644 --- a/python/pyabacus/src/ModuleESolver/components/scf_controller_lcao.hpp +++ b/python/pyabacus/src/ModuleESolver/components/scf_controller_lcao.hpp @@ -19,8 +19,10 @@ #include #include -namespace pyabacus { -namespace esolver { +namespace pyabacus +{ +namespace esolver +{ /** * @brief LCAO SCF controller implementation @@ -34,28 +36,24 @@ namespace esolver { template class SCFControllerLCAO : public ISCFController { -public: - SCFControllerLCAO() = default; + public: + SCFControllerLCAO () = default; - SCFControllerLCAO(int nbasis, int nks, int nbands, int nspin, int nrxx) - : nbasis_(nbasis) - , nks_(nks) - , nbands_(nbands) - , nspin_(nspin) - , nrxx_(nrxx) + SCFControllerLCAO (int nbasis, int nks, int nbands, int nspin, int nrxx) + : nbasis_ (nbasis), nks_ (nks), nbands_ (nbands), nspin_ (nspin), nrxx_ (nrxx) { // Initialize components - hamilt_builder_ = std::make_unique>( - nbasis, nks, nbasis, nbasis); - charge_mixer_ = std::make_unique(nspin, nrxx); - diagonalizer_ = std::make_unique>(nbasis, nbands); + hamilt_builder_ = std::make_unique> (nbasis, nks, nbasis, nbasis); + charge_mixer_ = std::make_unique (nspin, nrxx); + diagonalizer_ = std::make_unique> (nbasis, nbands); } - ~SCFControllerLCAO() override = default; + ~SCFControllerLCAO () override = default; // ==================== Lifecycle ==================== - void initialize(int istep) override + void + initialize (int istep) override { istep_ = istep; iteration_ = 0; @@ -65,14 +63,15 @@ class SCFControllerLCAO : public ISCFController // Reset charge mixer if (charge_mixer_) - { - charge_mixer_->reset(); - } + { + charge_mixer_->reset (); + } initialized_ = true; } - void finalize(int istep) override + void + finalize (int istep) override { // Cleanup after SCF initialized_ = false; @@ -80,12 +79,13 @@ class SCFControllerLCAO : public ISCFController // ==================== Iteration Control ==================== - SCFStatus run_iteration(int iter) override + SCFStatus + run_iteration (int iter) override { if (!initialized_) - { - throw std::runtime_error("SCF not initialized. Call initialize() first."); - } + { + throw std::runtime_error ("SCF not initialized. Call initialize() first."); + } iteration_ = iter; status_ = SCFStatus::Running; @@ -109,118 +109,144 @@ class SCFControllerLCAO : public ISCFController return status_; } - SCFStatus run_scf(const SCFConvergenceCriteria& criteria, - SCFIterationCallback callback) override + SCFStatus + run_scf (const SCFConvergenceCriteria& criteria, SCFIterationCallback callback) override { - initialize(istep_); + initialize (istep_); status_ = SCFStatus::Running; for (int iter = 1; iter <= criteria.max_iterations; ++iter) - { - run_iteration(iter); - - // Check convergence - bool converged = true; - if (criteria.check_drho && drho_ > criteria.drho_threshold) - { - converged = false; - } - - // Call callback if provided - if (callback) - { - bool continue_scf = callback(iter, drho_, energy_); - if (!continue_scf) - { - status_ = SCFStatus::Failed; - return status_; - } - } - - if (converged) { - status_ = SCFStatus::Converged; - return status_; + run_iteration (iter); + + // Check convergence + bool converged = true; + if (criteria.check_drho && drho_ > criteria.drho_threshold) + { + converged = false; + } + + // Call callback if provided + if (callback) + { + bool continue_scf = callback (iter, drho_, energy_); + if (!continue_scf) + { + status_ = SCFStatus::Failed; + return status_; + } + } + + if (converged) + { + status_ = SCFStatus::Converged; + return status_; + } } - } status_ = SCFStatus::MaxIterReached; return status_; } - bool is_converged() const override + bool + is_converged () const override { return status_ == SCFStatus::Converged; } - SCFStatus get_status() const override { return status_; } + SCFStatus + get_status () const override + { + return status_; + } // ==================== State Queries ==================== - int get_iteration() const override { return iteration_; } + int + get_iteration () const override + { + return iteration_; + } - double get_drho() const override { return drho_; } + double + get_drho () const override + { + return drho_; + } - double get_energy() const override { return energy_; } + double + get_energy () const override + { + return energy_; + } // ==================== Component Access ==================== - void* get_hamiltonian_builder() override + void* + get_hamiltonian_builder () override { - return hamilt_builder_.get(); + return hamilt_builder_.get (); } - IChargeMixer* get_charge_mixer() override + IChargeMixer* + get_charge_mixer () override { - return charge_mixer_.get(); + return charge_mixer_.get (); } - void* get_diagonalizer() override + void* + get_diagonalizer () override { - return diagonalizer_.get(); + return diagonalizer_.get (); } // ==================== Typed Component Access ==================== - HamiltonianBuilderLCAO* get_hamiltonian_builder_typed() + HamiltonianBuilderLCAO* + get_hamiltonian_builder_typed () { - return hamilt_builder_.get(); + return hamilt_builder_.get (); } - DiagonalizerWrapper* get_diagonalizer_typed() + DiagonalizerWrapper* + get_diagonalizer_typed () { - return diagonalizer_.get(); + return diagonalizer_.get (); } // ==================== Configuration ==================== - void set_convergence_criteria(const SCFConvergenceCriteria& criteria) + void + set_convergence_criteria (const SCFConvergenceCriteria& criteria) { criteria_ = criteria; } - SCFConvergenceCriteria get_convergence_criteria() const + SCFConvergenceCriteria + get_convergence_criteria () const { return criteria_; } - void set_mixing_config(const MixingConfig& config) + void + set_mixing_config (const MixingConfig& config) { if (charge_mixer_) - { - charge_mixer_->set_config(config); - } + { + charge_mixer_->set_config (config); + } } - void set_diag_config(const DiagConfig& config) + void + set_diag_config (const DiagConfig& config) { if (diagonalizer_) - { - diagonalizer_->set_config(config); - } + { + diagonalizer_->set_config (config); + } } -private: + private: // Dimensions int nbasis_ = 0; int nks_ = 0; diff --git a/python/pyabacus/src/ModuleESolver/interfaces/i_charge_mixer.hpp b/python/pyabacus/src/ModuleESolver/interfaces/i_charge_mixer.hpp index 43ac40fc58e..947fa16e881 100644 --- a/python/pyabacus/src/ModuleESolver/interfaces/i_charge_mixer.hpp +++ b/python/pyabacus/src/ModuleESolver/interfaces/i_charge_mixer.hpp @@ -15,18 +15,20 @@ namespace py = pybind11; -namespace pyabacus { -namespace esolver { +namespace pyabacus +{ +namespace esolver +{ /** * @brief Mixing method types */ enum class MixingMethod { - Plain, ///< Simple linear mixing - Pulay, ///< Pulay mixing (DIIS) - Broyden, ///< Broyden mixing - Anderson ///< Anderson mixing + Plain, ///< Simple linear mixing + Pulay, ///< Pulay mixing (DIIS) + Broyden, ///< Broyden mixing + Anderson ///< Anderson mixing }; /** @@ -35,11 +37,11 @@ enum class MixingMethod struct MixingConfig { MixingMethod method = MixingMethod::Pulay; - double beta = 0.7; ///< Mixing parameter - int ndim = 8; ///< Number of history steps for Pulay/Broyden - double gg0 = 1.0; ///< Kerker mixing parameter - bool mix_gg0 = false; ///< Whether to use Kerker mixing - bool mix_rho = true; ///< Mix charge density (vs potential) + double beta = 0.7; ///< Mixing parameter + int ndim = 8; ///< Number of history steps for Pulay/Broyden + double gg0 = 1.0; ///< Kerker mixing parameter + bool mix_gg0 = false; ///< Whether to use Kerker mixing + bool mix_rho = true; ///< Mix charge density (vs potential) }; /** @@ -50,8 +52,8 @@ struct MixingConfig */ class IChargeMixer { -public: - virtual ~IChargeMixer() = default; + public: + virtual ~IChargeMixer () = default; // ==================== Core Mixing Operations ==================== @@ -61,13 +63,12 @@ class IChargeMixer * @param rho_out Output charge density (from current iteration) * @return Mixed charge density for next iteration */ - virtual py::array_t mix(const py::array_t& rho_in, - const py::array_t& rho_out) = 0; + virtual py::array_t mix (const py::array_t& rho_in, const py::array_t& rho_out) = 0; /** * @brief Reset mixer state (clear history) */ - virtual void reset() = 0; + virtual void reset () = 0; // ==================== State Queries ==================== @@ -75,13 +76,13 @@ class IChargeMixer * @brief Get charge density difference from last mixing * @return drho value */ - virtual double get_drho() const = 0; + virtual double get_drho () const = 0; /** * @brief Get current iteration count * @return Number of mixing iterations performed */ - virtual int get_iteration() const = 0; + virtual int get_iteration () const = 0; // ==================== Configuration ==================== @@ -89,63 +90,74 @@ class IChargeMixer * @brief Set mixing configuration * @param config Mixing configuration */ - virtual void set_config(const MixingConfig& config) = 0; + virtual void set_config (const MixingConfig& config) = 0; /** * @brief Get current mixing configuration * @return Current configuration */ - virtual MixingConfig get_config() const = 0; + virtual MixingConfig get_config () const = 0; /** * @brief Set mixing parameter (beta) * @param beta Mixing parameter (0 < beta <= 1) */ - virtual void set_mixing_beta(double beta) = 0; + virtual void set_mixing_beta (double beta) = 0; /** * @brief Get mixing parameter * @return Current beta value */ - virtual double get_mixing_beta() const = 0; + virtual double get_mixing_beta () const = 0; /** * @brief Set mixing method * @param method Mixing method to use */ - virtual void set_mixing_method(MixingMethod method) = 0; + virtual void set_mixing_method (MixingMethod method) = 0; /** * @brief Get current mixing method * @return Current method */ - virtual MixingMethod get_mixing_method() const = 0; + virtual MixingMethod get_mixing_method () const = 0; }; /** * @brief Convert MixingMethod enum to string */ -inline std::string mixing_method_to_string(MixingMethod method) +inline std::string + mixing_method_to_string (MixingMethod method) { switch (method) - { - case MixingMethod::Plain: return "plain"; - case MixingMethod::Pulay: return "pulay"; - case MixingMethod::Broyden: return "broyden"; - case MixingMethod::Anderson: return "anderson"; - default: return "unknown"; - } + { + case MixingMethod::Plain: + return "plain"; + case MixingMethod::Pulay: + return "pulay"; + case MixingMethod::Broyden: + return "broyden"; + case MixingMethod::Anderson: + return "anderson"; + default: + return "unknown"; + } } /** * @brief Convert string to MixingMethod enum */ -inline MixingMethod string_to_mixing_method(const std::string& str) +inline MixingMethod + string_to_mixing_method (const std::string& str) { - if (str == "plain") return MixingMethod::Plain; - if (str == "pulay") return MixingMethod::Pulay; - if (str == "broyden") return MixingMethod::Broyden; - if (str == "anderson") return MixingMethod::Anderson; + if (str == "plain") + return MixingMethod::Plain; + if (str == "pulay") + return MixingMethod::Pulay; + if (str == "broyden") + return MixingMethod::Broyden; + if (str == "anderson") + return MixingMethod::Anderson; return MixingMethod::Pulay; // default } diff --git a/python/pyabacus/src/ModuleESolver/interfaces/i_diagonalizer.hpp b/python/pyabacus/src/ModuleESolver/interfaces/i_diagonalizer.hpp index 3f626b06b31..7d746e77f06 100644 --- a/python/pyabacus/src/ModuleESolver/interfaces/i_diagonalizer.hpp +++ b/python/pyabacus/src/ModuleESolver/interfaces/i_diagonalizer.hpp @@ -20,20 +20,22 @@ namespace py = pybind11; -namespace pyabacus { -namespace esolver { +namespace pyabacus +{ +namespace esolver +{ /** * @brief Diagonalization method types */ enum class DiagMethod { - Davidson, ///< Davidson iterative method - DavSubspace, ///< Davidson with subspace rotation - CG, ///< Conjugate gradient - LAPACK, ///< Direct LAPACK diagonalization - ScaLAPACK, ///< Parallel ScaLAPACK - ELPA ///< ELPA eigensolver + Davidson, ///< Davidson iterative method + DavSubspace, ///< Davidson with subspace rotation + CG, ///< Conjugate gradient + LAPACK, ///< Direct LAPACK diagonalization + ScaLAPACK, ///< Parallel ScaLAPACK + ELPA ///< ELPA eigensolver }; /** @@ -42,11 +44,11 @@ enum class DiagMethod template struct DiagResult { - py::array_t psi; ///< Eigenvectors + py::array_t psi; ///< Eigenvectors py::array_t eigenvalues; ///< Eigenvalues - int iterations = 0; ///< Number of iterations (for iterative methods) - bool converged = false; ///< Whether converged - double residual = 0.0; ///< Final residual + int iterations = 0; ///< Number of iterations (for iterative methods) + bool converged = false; ///< Whether converged + double residual = 0.0; ///< Final residual }; /** @@ -55,10 +57,10 @@ struct DiagResult struct DiagConfig { DiagMethod method = DiagMethod::Davidson; - double tolerance = 1e-6; ///< Convergence tolerance - int max_iterations = 100; ///< Maximum iterations - int dav_ndim = 4; ///< Davidson subspace dimension multiplier - int nproc_in_pool = 1; ///< Number of processes in pool + double tolerance = 1e-6; ///< Convergence tolerance + int max_iterations = 100; ///< Maximum iterations + int dav_ndim = 4; ///< Davidson subspace dimension multiplier + int nproc_in_pool = 1; ///< Number of processes in pool }; /** @@ -72,8 +74,8 @@ struct DiagConfig template class IDiagonalizer { -public: - virtual ~IDiagonalizer() = default; + public: + virtual ~IDiagonalizer () = default; // ==================== Direct Diagonalization ==================== @@ -85,10 +87,8 @@ class IDiagonalizer * @param psi_init Initial guess for eigenvectors (optional) * @return Diagonalization result */ - virtual DiagResult diagonalize(int ik, - const py::array_t& Hk, - const py::array_t& Sk, - const py::array_t& psi_init) = 0; + virtual DiagResult + diagonalize (int ik, const py::array_t& Hk, const py::array_t& Sk, const py::array_t& psi_init) = 0; // ==================== Iterative Diagonalization ==================== @@ -104,12 +104,11 @@ class IDiagonalizer * @param precond Preconditioner (diagonal approximation to H) * @return Diagonalization result */ - virtual DiagResult diagonalize_iterative( - int ik, - std::function(const py::array_t&)> hpsi_func, - std::function(const py::array_t&)> spsi_func, - const py::array_t& psi_init, - const py::array_t& precond) = 0; + virtual DiagResult diagonalize_iterative (int ik, + std::function (const py::array_t&)> hpsi_func, + std::function (const py::array_t&)> spsi_func, + const py::array_t& psi_init, + const py::array_t& precond) = 0; // ==================== Configuration ==================== @@ -117,25 +116,25 @@ class IDiagonalizer * @brief Set diagonalization configuration * @param config Configuration */ - virtual void set_config(const DiagConfig& config) = 0; + virtual void set_config (const DiagConfig& config) = 0; /** * @brief Get current configuration * @return Current configuration */ - virtual DiagConfig get_config() const = 0; + virtual DiagConfig get_config () const = 0; /** * @brief Set convergence tolerance * @param tol Tolerance */ - virtual void set_tolerance(double tol) = 0; + virtual void set_tolerance (double tol) = 0; /** * @brief Set maximum iterations * @param max_iter Maximum iterations */ - virtual void set_max_iterations(int max_iter) = 0; + virtual void set_max_iterations (int max_iter) = 0; // ==================== Dimension Queries ==================== @@ -143,19 +142,19 @@ class IDiagonalizer * @brief Get number of basis functions * @return Number of basis functions */ - virtual int get_nbasis() const = 0; + virtual int get_nbasis () const = 0; /** * @brief Get number of bands to compute * @return Number of bands */ - virtual int get_nbands() const = 0; + virtual int get_nbands () const = 0; /** * @brief Set number of bands to compute * @param nbands Number of bands */ - virtual void set_nbands(int nbands) = 0; + virtual void set_nbands (int nbands) = 0; }; // Type aliases for common use cases @@ -165,31 +164,46 @@ using IDiagonalizerMultiK = IDiagonalizer>; /** * @brief Convert DiagMethod enum to string */ -inline std::string diag_method_to_string(DiagMethod method) +inline std::string + diag_method_to_string (DiagMethod method) { switch (method) - { - case DiagMethod::Davidson: return "davidson"; - case DiagMethod::DavSubspace: return "dav_subspace"; - case DiagMethod::CG: return "cg"; - case DiagMethod::LAPACK: return "lapack"; - case DiagMethod::ScaLAPACK: return "scalapack"; - case DiagMethod::ELPA: return "elpa"; - default: return "unknown"; - } + { + case DiagMethod::Davidson: + return "davidson"; + case DiagMethod::DavSubspace: + return "dav_subspace"; + case DiagMethod::CG: + return "cg"; + case DiagMethod::LAPACK: + return "lapack"; + case DiagMethod::ScaLAPACK: + return "scalapack"; + case DiagMethod::ELPA: + return "elpa"; + default: + return "unknown"; + } } /** * @brief Convert string to DiagMethod enum */ -inline DiagMethod string_to_diag_method(const std::string& str) +inline DiagMethod + string_to_diag_method (const std::string& str) { - if (str == "davidson") return DiagMethod::Davidson; - if (str == "dav_subspace") return DiagMethod::DavSubspace; - if (str == "cg") return DiagMethod::CG; - if (str == "lapack") return DiagMethod::LAPACK; - if (str == "scalapack") return DiagMethod::ScaLAPACK; - if (str == "elpa") return DiagMethod::ELPA; + if (str == "davidson") + return DiagMethod::Davidson; + if (str == "dav_subspace") + return DiagMethod::DavSubspace; + if (str == "cg") + return DiagMethod::CG; + if (str == "lapack") + return DiagMethod::LAPACK; + if (str == "scalapack") + return DiagMethod::ScaLAPACK; + if (str == "elpa") + return DiagMethod::ELPA; return DiagMethod::Davidson; // default } diff --git a/python/pyabacus/src/ModuleESolver/interfaces/i_hamiltonian_builder.hpp b/python/pyabacus/src/ModuleESolver/interfaces/i_hamiltonian_builder.hpp index 794b8c119ee..95eef8a5119 100644 --- a/python/pyabacus/src/ModuleESolver/interfaces/i_hamiltonian_builder.hpp +++ b/python/pyabacus/src/ModuleESolver/interfaces/i_hamiltonian_builder.hpp @@ -17,8 +17,10 @@ namespace py = pybind11; -namespace pyabacus { -namespace esolver { +namespace pyabacus +{ +namespace esolver +{ /** * @brief Abstract interface for Hamiltonian builder @@ -32,8 +34,8 @@ namespace esolver { template class IHamiltonianBuilder { -public: - virtual ~IHamiltonianBuilder() = default; + public: + virtual ~IHamiltonianBuilder () = default; // ==================== Build/Update ==================== @@ -41,18 +43,18 @@ class IHamiltonianBuilder * @brief Build Hamiltonian from charge density * @param rho Charge density array with shape (nspin, nrxx) */ - virtual void build_from_rho(const py::array_t& rho) = 0; + virtual void build_from_rho (const py::array_t& rho) = 0; /** * @brief Update H(k) for a specific k-point * @param ik K-point index */ - virtual void update_Hk(int ik) = 0; + virtual void update_Hk (int ik) = 0; /** * @brief Invalidate cached matrices (force rebuild) */ - virtual void invalidate() = 0; + virtual void invalidate () = 0; // ==================== K-space Matrix Access ==================== @@ -61,14 +63,14 @@ class IHamiltonianBuilder * @param ik K-point index * @return Hamiltonian matrix as numpy array */ - virtual py::array_t get_Hk(int ik) const = 0; + virtual py::array_t get_Hk (int ik) const = 0; /** * @brief Get S(k) overlap matrix for specific k-point * @param ik K-point index * @return Overlap matrix as numpy array */ - virtual py::array_t get_Sk(int ik) const = 0; + virtual py::array_t get_Sk (int ik) const = 0; // ==================== Real-space Matrix Access ==================== @@ -76,13 +78,13 @@ class IHamiltonianBuilder * @brief Get H(R) in sparse format * @return Dictionary mapping (iat1, iat2, R) -> matrix */ - virtual py::dict get_HR() const = 0; + virtual py::dict get_HR () const = 0; /** * @brief Get S(R) in sparse format * @return Dictionary mapping (iat1, iat2, R) -> matrix */ - virtual py::dict get_SR() const = 0; + virtual py::dict get_SR () const = 0; // ==================== Matrix-Vector Products ==================== @@ -92,7 +94,7 @@ class IHamiltonianBuilder * @param psi_in Input wave function * @return H * psi_in */ - virtual py::array_t apply_H(int ik, const py::array_t& psi_in) const = 0; + virtual py::array_t apply_H (int ik, const py::array_t& psi_in) const = 0; /** * @brief Apply overlap matrix to wave function: S|psi> @@ -100,7 +102,7 @@ class IHamiltonianBuilder * @param psi_in Input wave function * @return S * psi_in */ - virtual py::array_t apply_S(int ik, const py::array_t& psi_in) const = 0; + virtual py::array_t apply_S (int ik, const py::array_t& psi_in) const = 0; // ==================== Dimension Queries ==================== @@ -108,25 +110,25 @@ class IHamiltonianBuilder * @brief Get number of basis functions * @return Number of basis functions */ - virtual int get_nbasis() const = 0; + virtual int get_nbasis () const = 0; /** * @brief Get number of k-points * @return Number of k-points */ - virtual int get_nks() const = 0; + virtual int get_nks () const = 0; /** * @brief Get local matrix dimensions (for 2D distribution) * @return Pair of (nrow, ncol) */ - virtual std::pair get_local_dims() const = 0; + virtual std::pair get_local_dims () const = 0; /** * @brief Check if Hamiltonian data is valid * @return true if valid */ - virtual bool is_valid() const = 0; + virtual bool is_valid () const = 0; }; // Type aliases for common use cases diff --git a/python/pyabacus/src/ModuleESolver/interfaces/i_scf_controller.hpp b/python/pyabacus/src/ModuleESolver/interfaces/i_scf_controller.hpp index 401a7e6dac1..c6bc36e0a91 100644 --- a/python/pyabacus/src/ModuleESolver/interfaces/i_scf_controller.hpp +++ b/python/pyabacus/src/ModuleESolver/interfaces/i_scf_controller.hpp @@ -12,8 +12,10 @@ #include #include -namespace pyabacus { -namespace esolver { +namespace pyabacus +{ +namespace esolver +{ /** * @brief SCF convergence status @@ -49,12 +51,14 @@ struct SCFConvergenceCriteria * * Return true to continue, false to stop SCF. */ -using SCFIterationCallback = std::function; +using SCFIterationCallback = std::function; // Forward declarations for component interfaces -template class IHamiltonianBuilder; +template +class IHamiltonianBuilder; class IChargeMixer; -template class IDiagonalizer; +template +class IDiagonalizer; /** * @brief Abstract interface for SCF controller @@ -65,8 +69,8 @@ template class IDiagonalizer; */ class ISCFController { -public: - virtual ~ISCFController() = default; + public: + virtual ~ISCFController () = default; // ==================== Lifecycle ==================== @@ -74,13 +78,13 @@ class ISCFController * @brief Initialize SCF calculation * @param istep Ion step index (for MD/relaxation) */ - virtual void initialize(int istep = 0) = 0; + virtual void initialize (int istep = 0) = 0; /** * @brief Finalize SCF calculation * @param istep Ion step index */ - virtual void finalize(int istep = 0) = 0; + virtual void finalize (int istep = 0) = 0; // ==================== Iteration Control ==================== @@ -89,7 +93,7 @@ class ISCFController * @param iter Iteration number (1-based) * @return Current SCF status */ - virtual SCFStatus run_iteration(int iter) = 0; + virtual SCFStatus run_iteration (int iter) = 0; /** * @brief Run complete SCF loop @@ -97,20 +101,19 @@ class ISCFController * @param callback Optional callback for each iteration * @return Final SCF status */ - virtual SCFStatus run_scf(const SCFConvergenceCriteria& criteria, - SCFIterationCallback callback = nullptr) = 0; + virtual SCFStatus run_scf (const SCFConvergenceCriteria& criteria, SCFIterationCallback callback = nullptr) = 0; /** * @brief Check if SCF is converged * @return true if converged */ - virtual bool is_converged() const = 0; + virtual bool is_converged () const = 0; /** * @brief Get current SCF status * @return Current status */ - virtual SCFStatus get_status() const = 0; + virtual SCFStatus get_status () const = 0; // ==================== State Queries ==================== @@ -118,19 +121,19 @@ class ISCFController * @brief Get current iteration number * @return Iteration number (0 if not started) */ - virtual int get_iteration() const = 0; + virtual int get_iteration () const = 0; /** * @brief Get current charge density difference * @return drho value */ - virtual double get_drho() const = 0; + virtual double get_drho () const = 0; /** * @brief Get current total energy * @return Total energy in Ry */ - virtual double get_energy() const = 0; + virtual double get_energy () const = 0; // ==================== Component Access ==================== @@ -138,19 +141,19 @@ class ISCFController * @brief Get Hamiltonian builder component * @return Pointer to Hamiltonian builder (may be nullptr) */ - virtual void* get_hamiltonian_builder() = 0; + virtual void* get_hamiltonian_builder () = 0; /** * @brief Get charge mixer component * @return Pointer to charge mixer (may be nullptr) */ - virtual IChargeMixer* get_charge_mixer() = 0; + virtual IChargeMixer* get_charge_mixer () = 0; /** * @brief Get diagonalizer component * @return Pointer to diagonalizer (may be nullptr) */ - virtual void* get_diagonalizer() = 0; + virtual void* get_diagonalizer () = 0; }; } // namespace esolver diff --git a/python/pyabacus/src/ModuleESolver/py_esolver_lcao.cpp b/python/pyabacus/src/ModuleESolver/py_esolver_lcao.cpp index 9261a9225c3..6bd5fdaf0aa 100644 --- a/python/pyabacus/src/ModuleESolver/py_esolver_lcao.cpp +++ b/python/pyabacus/src/ModuleESolver/py_esolver_lcao.cpp @@ -37,114 +37,119 @@ namespace py_esolver // PyChargeAccessor Implementation // ============================================================================ -void PyChargeAccessor::set_from_charge(const Charge* chr) +void + PyChargeAccessor::set_from_charge (const Charge* chr) { if (chr == nullptr) - { - chr_ptr_ = nullptr; - rho_ptr_ = nullptr; - nspin_ = 0; - nrxx_ = 0; - ngmc_ = 0; - return; - } + { + chr_ptr_ = nullptr; + rho_ptr_ = nullptr; + nspin_ = 0; + nrxx_ = 0; + ngmc_ = 0; + return; + } chr_ptr_ = chr; - rho_ptr_ = nullptr; // Use chr_ptr_ instead + rho_ptr_ = nullptr; // Use chr_ptr_ instead nspin_ = chr->nspin; nrxx_ = chr->nrxx; ngmc_ = chr->ngmc; } -void PyChargeAccessor::set_data(const double* rho_ptr, int nspin, int nrxx) +void + PyChargeAccessor::set_data (const double* rho_ptr, int nspin, int nrxx) { - chr_ptr_ = nullptr; // Not using Charge object + chr_ptr_ = nullptr; // Not using Charge object rho_ptr_ = rho_ptr; nspin_ = nspin; nrxx_ = nrxx; ngmc_ = 0; } -py::array_t PyChargeAccessor::get_rho() const +py::array_t + PyChargeAccessor::get_rho () const { - if (!is_valid()) - { - throw std::runtime_error("Charge data not available. Run SCF first."); - } + if (!is_valid ()) + { + throw std::runtime_error ("Charge data not available. Run SCF first."); + } // Create numpy array with shape (nspin, nrxx) - std::vector shape = {static_cast(nspin_), static_cast(nrxx_)}; + std::vector shape = {static_cast (nspin_), static_cast (nrxx_)}; - auto result = py::array_t(shape); - auto buf = result.request(); - double* ptr = static_cast(buf.ptr); + auto result = py::array_t (shape); + auto buf = result.request (); + double* ptr = static_cast (buf.ptr); // Copy data from either chr_ptr_ or rho_ptr_ if (chr_ptr_ != nullptr && chr_ptr_->rho != nullptr) - { - // Copy from Charge object (rho is double** with shape [nspin][nrxx]) - for (int is = 0; is < nspin_; ++is) { - if (chr_ptr_->rho[is] != nullptr) - { - std::copy(chr_ptr_->rho[is], chr_ptr_->rho[is] + nrxx_, ptr + is * nrxx_); - } + // Copy from Charge object (rho is double** with shape [nspin][nrxx]) + for (int is = 0; is < nspin_; ++is) + { + if (chr_ptr_->rho[is] != nullptr) + { + std::copy (chr_ptr_->rho[is], chr_ptr_->rho[is] + nrxx_, ptr + is * nrxx_); + } + } } - } else if (rho_ptr_ != nullptr) - { - // Copy from flat array (legacy mode) - std::copy(rho_ptr_, rho_ptr_ + nspin_ * nrxx_, ptr); - } + { + // Copy from flat array (legacy mode) + std::copy (rho_ptr_, rho_ptr_ + nspin_ * nrxx_, ptr); + } else - { - throw std::runtime_error("No valid charge data source."); - } + { + throw std::runtime_error ("No valid charge data source."); + } return result; } -py::array_t> PyChargeAccessor::get_rhog() const +py::array_t> + PyChargeAccessor::get_rhog () const { if (chr_ptr_ == nullptr || chr_ptr_->rhog == nullptr) - { - throw std::runtime_error("Reciprocal-space charge density not available."); - } + { + throw std::runtime_error ("Reciprocal-space charge density not available."); + } // Create numpy array with shape (nspin, ngmc) - std::vector shape = {static_cast(nspin_), static_cast(ngmc_)}; + std::vector shape = {static_cast (nspin_), static_cast (ngmc_)}; - auto result = py::array_t>(shape); - auto buf = result.request(); - std::complex* ptr = static_cast*>(buf.ptr); + auto result = py::array_t> (shape); + auto buf = result.request (); + std::complex* ptr = static_cast*> (buf.ptr); // Copy from Charge object (rhog is complex** with shape [nspin][ngmc]) for (int is = 0; is < nspin_; ++is) - { - if (chr_ptr_->rhog[is] != nullptr) { - std::copy(chr_ptr_->rhog[is], chr_ptr_->rhog[is] + ngmc_, ptr + is * ngmc_); + if (chr_ptr_->rhog[is] != nullptr) + { + std::copy (chr_ptr_->rhog[is], chr_ptr_->rhog[is] + ngmc_, ptr + is * ngmc_); + } } - } return result; } -py::array_t PyChargeAccessor::get_rho_core() const +py::array_t + PyChargeAccessor::get_rho_core () const { if (chr_ptr_ == nullptr || chr_ptr_->rho_core == nullptr) - { - throw std::runtime_error("Core charge density not available."); - } + { + throw std::runtime_error ("Core charge density not available."); + } // Create numpy array with shape (nrxx,) - std::vector shape = {static_cast(nrxx_)}; + std::vector shape = {static_cast (nrxx_)}; - auto result = py::array_t(shape); - auto buf = result.request(); - double* ptr = static_cast(buf.ptr); + auto result = py::array_t (shape); + auto buf = result.request (); + double* ptr = static_cast (buf.ptr); - std::copy(chr_ptr_->rho_core, chr_ptr_->rho_core + nrxx_, ptr); + std::copy (chr_ptr_->rho_core, chr_ptr_->rho_core + nrxx_, ptr); return result; } @@ -153,20 +158,21 @@ py::array_t PyChargeAccessor::get_rho_core() const // PyEnergyAccessor Implementation // ============================================================================ -void PyEnergyAccessor::set_from_fenergy(const elecstate::fenergy* f_en) +void + PyEnergyAccessor::set_from_fenergy (const elecstate::fenergy* f_en) { if (f_en == nullptr) - { - etot_ = 0.0; - eband_ = 0.0; - hartree_energy_ = 0.0; - etxc_ = 0.0; - ewald_energy_ = 0.0; - demet_ = 0.0; - exx_ = 0.0; - evdw_ = 0.0; - return; - } + { + etot_ = 0.0; + eband_ = 0.0; + hartree_energy_ = 0.0; + etxc_ = 0.0; + ewald_energy_ = 0.0; + demet_ = 0.0; + exx_ = 0.0; + evdw_ = 0.0; + return; + } etot_ = f_en->etot; eband_ = f_en->eband; @@ -178,9 +184,15 @@ void PyEnergyAccessor::set_from_fenergy(const elecstate::fenergy* f_en) evdw_ = f_en->evdw; } -void PyEnergyAccessor::set_energies(double etot, double eband, double hartree, - double etxc, double ewald, double demet, - double exx, double evdw) +void + PyEnergyAccessor::set_energies (double etot, + double eband, + double hartree, + double etxc, + double ewald, + double demet, + double exx, + double evdw) { etot_ = etot; eband_ = eband; @@ -192,7 +204,8 @@ void PyEnergyAccessor::set_energies(double etot, double eband, double hartree, evdw_ = evdw; } -py::dict PyEnergyAccessor::get_all_energies() const +py::dict + PyEnergyAccessor::get_all_energies () const { py::dict result; result["etot"] = etot_; @@ -211,118 +224,127 @@ py::dict PyEnergyAccessor::get_all_energies() const // ============================================================================ template -void PyHamiltonianAccessor::set_from_hamilt(hamilt::HamiltLCAO* hamilt_lcao, int nks, const Parallel_Orbitals* pv) +void + PyHamiltonianAccessor::set_from_hamilt (hamilt::HamiltLCAO* hamilt_lcao, + int nks, + const Parallel_Orbitals* pv) { hamilt_ptr_ = hamilt_lcao; pv_ = pv; nks_ = nks; if (hamilt_lcao == nullptr) - { - nbasis_ = 0; - nloc_ = 0; - nrow_ = 0; - ncol_ = 0; - return; - } + { + nbasis_ = 0; + nloc_ = 0; + nrow_ = 0; + ncol_ = 0; + return; + } // Get dimensions from Parallel_Orbitals if available if (pv != nullptr) - { - nrow_ = pv->get_row_size(); - ncol_ = pv->get_col_size(); - nloc_ = nrow_ * ncol_; - nbasis_ = pv->get_global_row_size(); - } + { + nrow_ = pv->get_row_size (); + ncol_ = pv->get_col_size (); + nloc_ = nrow_ * ncol_; + nbasis_ = pv->get_global_row_size (); + } // Initialize pointer arrays for compatibility mode - hk_ptrs_.resize(nks, nullptr); - sk_ptrs_.resize(nks, nullptr); - matrix_dims_.resize(nks, {nrow_, ncol_}); + hk_ptrs_.resize (nks, nullptr); + sk_ptrs_.resize (nks, nullptr); + matrix_dims_.resize (nks, {nrow_, ncol_}); } template -void PyHamiltonianAccessor::set_dimensions(int nbasis, int nks) +void + PyHamiltonianAccessor::set_dimensions (int nbasis, int nks) { nbasis_ = nbasis; nks_ = nks; - hk_ptrs_.resize(nks, nullptr); - sk_ptrs_.resize(nks, nullptr); - matrix_dims_.resize(nks, {0, 0}); + hk_ptrs_.resize (nks, nullptr); + sk_ptrs_.resize (nks, nullptr); + matrix_dims_.resize (nks, {0, 0}); } template -void PyHamiltonianAccessor::set_Hk_data(int ik, const TK* data, int nrow, int ncol) +void + PyHamiltonianAccessor::set_Hk_data (int ik, const TK* data, int nrow, int ncol) { if (ik >= 0 && ik < nks_) - { - hk_ptrs_[ik] = data; - matrix_dims_[ik] = {nrow, ncol}; - } + { + hk_ptrs_[ik] = data; + matrix_dims_[ik] = {nrow, ncol}; + } } template -void PyHamiltonianAccessor::set_Sk_data(int ik, const TK* data, int nrow, int ncol) +void + PyHamiltonianAccessor::set_Sk_data (int ik, const TK* data, int nrow, int ncol) { if (ik >= 0 && ik < nks_) - { - sk_ptrs_[ik] = data; - matrix_dims_[ik] = {nrow, ncol}; - } + { + sk_ptrs_[ik] = data; + matrix_dims_[ik] = {nrow, ncol}; + } } template -py::array_t PyHamiltonianAccessor::get_Hk(int ik) const +py::array_t + PyHamiltonianAccessor::get_Hk (int ik) const { - if (!is_valid() || ik < 0 || ik >= nks_) - { - throw std::runtime_error("Invalid k-point index or Hamiltonian not available."); - } + if (!is_valid () || ik < 0 || ik >= nks_) + { + throw std::runtime_error ("Invalid k-point index or Hamiltonian not available."); + } if (hk_ptrs_[ik] == nullptr) - { - throw std::runtime_error("H(k) data not set for this k-point."); - } + { + throw std::runtime_error ("H(k) data not set for this k-point."); + } auto [nrow, ncol] = matrix_dims_[ik]; std::vector shape = {nrow, ncol}; - auto result = py::array_t(shape); - auto buf = result.request(); - TK* ptr = static_cast(buf.ptr); + auto result = py::array_t (shape); + auto buf = result.request (); + TK* ptr = static_cast (buf.ptr); - std::copy(hk_ptrs_[ik], hk_ptrs_[ik] + nrow * ncol, ptr); + std::copy (hk_ptrs_[ik], hk_ptrs_[ik] + nrow * ncol, ptr); return result; } template -py::array_t PyHamiltonianAccessor::get_Sk(int ik) const +py::array_t + PyHamiltonianAccessor::get_Sk (int ik) const { - if (!is_valid() || ik < 0 || ik >= nks_) - { - throw std::runtime_error("Invalid k-point index or overlap matrix not available."); - } + if (!is_valid () || ik < 0 || ik >= nks_) + { + throw std::runtime_error ("Invalid k-point index or overlap matrix not available."); + } if (sk_ptrs_[ik] == nullptr) - { - throw std::runtime_error("S(k) data not set for this k-point."); - } + { + throw std::runtime_error ("S(k) data not set for this k-point."); + } auto [nrow, ncol] = matrix_dims_[ik]; std::vector shape = {nrow, ncol}; - auto result = py::array_t(shape); - auto buf = result.request(); - TK* ptr = static_cast(buf.ptr); + auto result = py::array_t (shape); + auto buf = result.request (); + TK* ptr = static_cast (buf.ptr); - std::copy(sk_ptrs_[ik], sk_ptrs_[ik] + nrow * ncol, ptr); + std::copy (sk_ptrs_[ik], sk_ptrs_[ik] + nrow * ncol, ptr); return result; } template -py::dict PyHamiltonianAccessor::get_HR() const +py::dict + PyHamiltonianAccessor::get_HR () const { // Placeholder: will be implemented when full ABACUS integration is available py::dict result; @@ -330,7 +352,8 @@ py::dict PyHamiltonianAccessor::get_HR() const } template -py::dict PyHamiltonianAccessor::get_SR() const +py::dict + PyHamiltonianAccessor::get_SR () const { // Placeholder: will be implemented when full ABACUS integration is available py::dict result; @@ -346,81 +369,87 @@ template class PyHamiltonianAccessor, double>; // ============================================================================ template -void PyDensityMatrixAccessor::set_from_dm(elecstate::DensityMatrix* dm) +void + PyDensityMatrixAccessor::set_from_dm (elecstate::DensityMatrix* dm) { dm_ptr_ = dm; if (dm == nullptr) - { - nks_ = 0; - nrow_ = 0; - ncol_ = 0; - return; - } + { + nks_ = 0; + nrow_ = 0; + ncol_ = 0; + return; + } - nks_ = dm->get_DMK_nks(); - nrow_ = dm->get_DMK_nrow(); - ncol_ = dm->get_DMK_ncol(); + nks_ = dm->get_DMK_nks (); + nrow_ = dm->get_DMK_nrow (); + ncol_ = dm->get_DMK_ncol (); // Initialize pointer arrays for compatibility mode - dmk_ptrs_.resize(nks_, nullptr); + dmk_ptrs_.resize (nks_, nullptr); } template -void PyDensityMatrixAccessor::set_dimensions(int nks, int nrow, int ncol) +void + PyDensityMatrixAccessor::set_dimensions (int nks, int nrow, int ncol) { nks_ = nks; nrow_ = nrow; ncol_ = ncol; - dmk_ptrs_.resize(nks, nullptr); + dmk_ptrs_.resize (nks, nullptr); } template -void PyDensityMatrixAccessor::set_DMK_data(int ik, const TK* data) +void + PyDensityMatrixAccessor::set_DMK_data (int ik, const TK* data) { if (ik >= 0 && ik < nks_) - { - dmk_ptrs_[ik] = data; - } + { + dmk_ptrs_[ik] = data; + } } template -py::array_t PyDensityMatrixAccessor::get_DMK(int ik) const +py::array_t + PyDensityMatrixAccessor::get_DMK (int ik) const { - if (!is_valid() || ik < 0 || ik >= nks_) - { - throw std::runtime_error("Invalid k-point index or density matrix not available."); - } + if (!is_valid () || ik < 0 || ik >= nks_) + { + throw std::runtime_error ("Invalid k-point index or density matrix not available."); + } if (dmk_ptrs_[ik] == nullptr) - { - throw std::runtime_error("DM(k) data not set for this k-point."); - } + { + throw std::runtime_error ("DM(k) data not set for this k-point."); + } - std::vector shape = {static_cast(nrow_), static_cast(ncol_)}; + std::vector shape = {static_cast (nrow_), static_cast (ncol_)}; - auto result = py::array_t(shape); - auto buf = result.request(); - TK* ptr = static_cast(buf.ptr); + auto result = py::array_t (shape); + auto buf = result.request (); + TK* ptr = static_cast (buf.ptr); - std::copy(dmk_ptrs_[ik], dmk_ptrs_[ik] + nrow_ * ncol_, ptr); + std::copy (dmk_ptrs_[ik], dmk_ptrs_[ik] + nrow_ * ncol_, ptr); return result; } template -std::vector> PyDensityMatrixAccessor::get_DMK_all() const +std::vector> + PyDensityMatrixAccessor::get_DMK_all () const { std::vector> result; for (int ik = 0; ik < nks_; ++ik) - { - result.push_back(get_DMK(ik)); - } + { + result.push_back (get_DMK (ik)); + } return result; } template -py::dict PyDensityMatrixAccessor::get_DMR() const +py::dict + PyDensityMatrixAccessor::get_DMR () const { // Placeholder: will be implemented when full ABACUS integration is available py::dict result; @@ -436,19 +465,20 @@ template class PyDensityMatrixAccessor, double>; // ============================================================================ template -PyESolverLCAO::PyESolverLCAO() +PyESolverLCAO::PyESolverLCAO () { // Constructor - initialization deferred to initialize() } template -PyESolverLCAO::~PyESolverLCAO() +PyESolverLCAO::~PyESolverLCAO () { // Destructor - cleanup will be implemented in Phase 3 } template -void PyESolverLCAO::initialize(const std::string& input_dir) +void + PyESolverLCAO::initialize (const std::string& input_dir) { // Placeholder: will be implemented in Phase 3 // This will: @@ -460,23 +490,25 @@ void PyESolverLCAO::initialize(const std::string& input_dir) } template -void PyESolverLCAO::before_all_runners() +void + PyESolverLCAO::before_all_runners () { if (!initialized_) - { - throw std::runtime_error("ESolver not initialized. Call initialize() first."); - } + { + throw std::runtime_error ("ESolver not initialized. Call initialize() first."); + } // Placeholder: will call esolver_->before_all_runners() in Phase 3 std::cout << "[PyESolverLCAO] before_all_runners called" << std::endl; } template -void PyESolverLCAO::before_scf(int istep) +void + PyESolverLCAO::before_scf (int istep) { if (!initialized_) - { - throw std::runtime_error("ESolver not initialized. Call initialize() first."); - } + { + throw std::runtime_error ("ESolver not initialized. Call initialize() first."); + } istep_ = istep; scf_started_ = true; conv_esolver_ = false; @@ -486,12 +518,13 @@ void PyESolverLCAO::before_scf(int istep) } template -void PyESolverLCAO::run_scf_iteration(int iter) +void + PyESolverLCAO::run_scf_iteration (int iter) { if (!scf_started_) - { - throw std::runtime_error("SCF not started. Call before_scf() first."); - } + { + throw std::runtime_error ("SCF not started. Call before_scf() first."); + } niter_ = iter; // Placeholder: will implement actual SCF iteration in Phase 3 // 1. iter_init() @@ -501,34 +534,37 @@ void PyESolverLCAO::run_scf_iteration(int iter) } template -void PyESolverLCAO::run_scf(int max_iter) +void + PyESolverLCAO::run_scf (int max_iter) { - before_scf(istep_); + before_scf (istep_); for (int iter = 1; iter <= max_iter; ++iter) - { - run_scf_iteration(iter); - if (conv_esolver_) { - break; + run_scf_iteration (iter); + if (conv_esolver_) + { + break; + } } - } } template -void PyESolverLCAO::after_scf(int istep) +void + PyESolverLCAO::after_scf (int istep) { if (!scf_started_) - { - throw std::runtime_error("SCF not started. Call before_scf() first."); - } + { + throw std::runtime_error ("SCF not started. Call before_scf() first."); + } // Placeholder: will call esolver_->after_scf() in Phase 3 std::cout << "[PyESolverLCAO] after_scf called for step " << istep << std::endl; scf_started_ = false; } template -PyChargeAccessor PyESolverLCAO::get_charge() const +PyChargeAccessor + PyESolverLCAO::get_charge () const { PyChargeAccessor accessor; // Note: esolver_ connection will be implemented when full ABACUS integration is available @@ -537,7 +573,8 @@ PyChargeAccessor PyESolverLCAO::get_charge() const } template -PyEnergyAccessor PyESolverLCAO::get_energy() const +PyEnergyAccessor + PyESolverLCAO::get_energy () const { PyEnergyAccessor accessor; // Note: esolver_ connection will be implemented when full ABACUS integration is available @@ -546,7 +583,8 @@ PyEnergyAccessor PyESolverLCAO::get_energy() const } template -PyHamiltonianAccessor PyESolverLCAO::get_hamiltonian() const +PyHamiltonianAccessor + PyESolverLCAO::get_hamiltonian () const { PyHamiltonianAccessor accessor; // Note: esolver_ connection will be implemented when full ABACUS integration is available @@ -555,7 +593,8 @@ PyHamiltonianAccessor PyESolverLCAO::get_hamiltonian() const } template -PyDensityMatrixAccessor PyESolverLCAO::get_density_matrix() const +PyDensityMatrixAccessor + PyESolverLCAO::get_density_matrix () const { PyDensityMatrixAccessor accessor; // Note: esolver_ connection will be implemented when full ABACUS integration is available @@ -564,74 +603,84 @@ PyDensityMatrixAccessor PyESolverLCAO::get_density_matrix() cons } template -py::array_t PyESolverLCAO::get_psi(int ik) const +py::array_t + PyESolverLCAO::get_psi (int ik) const { // Note: Will return wave function coefficients when full ABACUS integration is available - return py::array_t(); + return py::array_t (); } template -py::array_t PyESolverLCAO::get_eigenvalues(int ik) const +py::array_t + PyESolverLCAO::get_eigenvalues (int ik) const { // Note: Will return eigenvalues when full ABACUS integration is available - return py::array_t(); + return py::array_t (); } template -py::array_t PyESolverLCAO::get_occupations(int ik) const +py::array_t + PyESolverLCAO::get_occupations (int ik) const { // Note: Will return occupation numbers when full ABACUS integration is available - return py::array_t(); + return py::array_t (); } template -int PyESolverLCAO::get_nks() const +int + PyESolverLCAO::get_nks () const { // Note: Will return actual nks when full ABACUS integration is available return 0; } template -py::array_t PyESolverLCAO::get_kvec_d(int ik) const +py::array_t + PyESolverLCAO::get_kvec_d (int ik) const { std::vector shape = {3}; - auto result = py::array_t(shape); - auto buf = result.request(); - double* ptr = static_cast(buf.ptr); + auto result = py::array_t (shape); + auto buf = result.request (); + double* ptr = static_cast (buf.ptr); ptr[0] = ptr[1] = ptr[2] = 0.0; return result; } template -py::array_t PyESolverLCAO::get_wk() const +py::array_t + PyESolverLCAO::get_wk () const { // Note: Will return k-point weights when full ABACUS integration is available - return py::array_t(); + return py::array_t (); } template -int PyESolverLCAO::get_nbasis() const +int + PyESolverLCAO::get_nbasis () const { // Note: Will return actual nbasis when full ABACUS integration is available return 0; } template -int PyESolverLCAO::get_nbands() const +int + PyESolverLCAO::get_nbands () const { // Note: Will return actual nbands when full ABACUS integration is available return 0; } template -int PyESolverLCAO::get_nspin() const +int + PyESolverLCAO::get_nspin () const { // Note: Will return actual nspin when full ABACUS integration is available return 1; } template -int PyESolverLCAO::get_nat() const +int + PyESolverLCAO::get_nat () const { // Note: Will return actual nat when full ABACUS integration is available return 0; @@ -647,17 +696,20 @@ template class PyESolverLCAO, double>; // Pybind11 Module Definition // ============================================================================ -void bind_charge_accessor(py::module& m) +void + bind_charge_accessor (py::module& m) { - py::class_(m, "ChargeAccessor", - R"pbdoc( + py::class_ (m, + "ChargeAccessor", + R"pbdoc( Accessor for charge density data. Provides access to real-space charge density (rho) and related quantities. )pbdoc") - .def(py::init<>()) - .def("get_rho", &py_esolver::PyChargeAccessor::get_rho, - R"pbdoc( + .def (py::init<> ()) + .def ("get_rho", + &py_esolver::PyChargeAccessor::get_rho, + R"pbdoc( Get real-space charge density as numpy array. Returns @@ -665,8 +717,9 @@ void bind_charge_accessor(py::module& m) numpy.ndarray Charge density with shape (nspin, nrxx) )pbdoc") - .def("get_rhog", &py_esolver::PyChargeAccessor::get_rhog, - R"pbdoc( + .def ("get_rhog", + &py_esolver::PyChargeAccessor::get_rhog, + R"pbdoc( Get reciprocal-space charge density as numpy array. Returns @@ -674,8 +727,9 @@ void bind_charge_accessor(py::module& m) numpy.ndarray Charge density in G-space with shape (nspin, ngmc) )pbdoc") - .def("get_rho_core", &py_esolver::PyChargeAccessor::get_rho_core, - R"pbdoc( + .def ("get_rho_core", + &py_esolver::PyChargeAccessor::get_rho_core, + R"pbdoc( Get core charge density as numpy array. Returns @@ -683,66 +737,60 @@ void bind_charge_accessor(py::module& m) numpy.ndarray Core charge density with shape (nrxx,) )pbdoc") - .def_property_readonly("nspin", &py_esolver::PyChargeAccessor::get_nspin, - "Number of spin channels") - .def_property_readonly("nrxx", &py_esolver::PyChargeAccessor::get_nrxx, - "Number of real-space grid points") - .def_property_readonly("ngmc", &py_esolver::PyChargeAccessor::get_ngmc, - "Number of G-vectors for charge density") - .def("is_valid", &py_esolver::PyChargeAccessor::is_valid, - "Check if charge data is available"); + .def_property_readonly ("nspin", &py_esolver::PyChargeAccessor::get_nspin, "Number of spin channels") + .def_property_readonly ("nrxx", &py_esolver::PyChargeAccessor::get_nrxx, "Number of real-space grid points") + .def_property_readonly ("ngmc", + &py_esolver::PyChargeAccessor::get_ngmc, + "Number of G-vectors for charge density") + .def ("is_valid", &py_esolver::PyChargeAccessor::is_valid, "Check if charge data is available"); } -void bind_energy_accessor(py::module& m) +void + bind_energy_accessor (py::module& m) { - py::class_(m, "EnergyAccessor", - R"pbdoc( + py::class_ (m, + "EnergyAccessor", + R"pbdoc( Accessor for energy data. Provides access to various energy components from the calculation. All energies are in Rydberg units. )pbdoc") - .def(py::init<>()) - .def_property_readonly("etot", &py_esolver::PyEnergyAccessor::get_etot, - "Total energy (Ry)") - .def_property_readonly("eband", &py_esolver::PyEnergyAccessor::get_eband, - "Band energy (Ry)") - .def_property_readonly("hartree_energy", &py_esolver::PyEnergyAccessor::get_hartree_energy, - "Hartree energy (Ry)") - .def_property_readonly("etxc", &py_esolver::PyEnergyAccessor::get_etxc, - "Exchange-correlation energy (Ry)") - .def_property_readonly("ewald_energy", &py_esolver::PyEnergyAccessor::get_ewald_energy, - "Ewald energy (Ry)") - .def_property_readonly("demet", &py_esolver::PyEnergyAccessor::get_demet, - "-TS term for metals (Ry)") - .def_property_readonly("exx", &py_esolver::PyEnergyAccessor::get_exx, - "Exact exchange energy (Ry)") - .def_property_readonly("evdw", &py_esolver::PyEnergyAccessor::get_evdw, - "van der Waals energy (Ry)") - .def("get_all_energies", &py_esolver::PyEnergyAccessor::get_all_energies, - "Get all energies as a dictionary"); + .def (py::init<> ()) + .def_property_readonly ("etot", &py_esolver::PyEnergyAccessor::get_etot, "Total energy (Ry)") + .def_property_readonly ("eband", &py_esolver::PyEnergyAccessor::get_eband, "Band energy (Ry)") + .def_property_readonly ("hartree_energy", + &py_esolver::PyEnergyAccessor::get_hartree_energy, + "Hartree energy (Ry)") + .def_property_readonly ("etxc", &py_esolver::PyEnergyAccessor::get_etxc, "Exchange-correlation energy (Ry)") + .def_property_readonly ("ewald_energy", &py_esolver::PyEnergyAccessor::get_ewald_energy, "Ewald energy (Ry)") + .def_property_readonly ("demet", &py_esolver::PyEnergyAccessor::get_demet, "-TS term for metals (Ry)") + .def_property_readonly ("exx", &py_esolver::PyEnergyAccessor::get_exx, "Exact exchange energy (Ry)") + .def_property_readonly ("evdw", &py_esolver::PyEnergyAccessor::get_evdw, "van der Waals energy (Ry)") + .def ("get_all_energies", &py_esolver::PyEnergyAccessor::get_all_energies, "Get all energies as a dictionary"); } template -void bind_hamiltonian_accessor(py::module& m, const std::string& suffix) +void + bind_hamiltonian_accessor (py::module& m, const std::string& suffix) { using HamiltAccessor = py_esolver::PyHamiltonianAccessor; std::string class_name = "HamiltonianAccessor" + suffix; - py::class_(m, class_name.c_str(), - R"pbdoc( + py::class_ (m, + class_name.c_str (), + R"pbdoc( Accessor for Hamiltonian matrix data. Provides access to H(k), S(k), H(R), and S(R) matrices. )pbdoc") - .def(py::init<>()) - .def_property_readonly("nbasis", &HamiltAccessor::get_nbasis, - "Number of basis functions") - .def_property_readonly("nks", &HamiltAccessor::get_nks, - "Number of k-points") - .def("get_Hk", &HamiltAccessor::get_Hk, - R"pbdoc( + .def (py::init<> ()) + .def_property_readonly ("nbasis", &HamiltAccessor::get_nbasis, "Number of basis functions") + .def_property_readonly ("nks", &HamiltAccessor::get_nks, "Number of k-points") + .def ("get_Hk", + &HamiltAccessor::get_Hk, + R"pbdoc( Get H(k) matrix for specific k-point. Parameters @@ -754,9 +802,11 @@ void bind_hamiltonian_accessor(py::module& m, const std::string& suffix) ------- numpy.ndarray Hamiltonian matrix at k-point ik - )pbdoc", "ik"_a) - .def("get_Sk", &HamiltAccessor::get_Sk, - R"pbdoc( + )pbdoc", + "ik"_a) + .def ("get_Sk", + &HamiltAccessor::get_Sk, + R"pbdoc( Get S(k) overlap matrix for specific k-point. Parameters @@ -768,37 +818,35 @@ void bind_hamiltonian_accessor(py::module& m, const std::string& suffix) ------- numpy.ndarray Overlap matrix at k-point ik - )pbdoc", "ik"_a) - .def("get_HR", &HamiltAccessor::get_HR, - "Get H(R) in sparse format") - .def("get_SR", &HamiltAccessor::get_SR, - "Get S(R) in sparse format") - .def("is_valid", &HamiltAccessor::is_valid, - "Check if Hamiltonian data is available"); + )pbdoc", + "ik"_a) + .def ("get_HR", &HamiltAccessor::get_HR, "Get H(R) in sparse format") + .def ("get_SR", &HamiltAccessor::get_SR, "Get S(R) in sparse format") + .def ("is_valid", &HamiltAccessor::is_valid, "Check if Hamiltonian data is available"); } template -void bind_density_matrix_accessor(py::module& m, const std::string& suffix) +void + bind_density_matrix_accessor (py::module& m, const std::string& suffix) { using DMAccessor = py_esolver::PyDensityMatrixAccessor; std::string class_name = "DensityMatrixAccessor" + suffix; - py::class_(m, class_name.c_str(), - R"pbdoc( + py::class_ (m, + class_name.c_str (), + R"pbdoc( Accessor for density matrix data. Provides access to DM(k) and DM(R) matrices. )pbdoc") - .def(py::init<>()) - .def_property_readonly("nks", &DMAccessor::get_nks, - "Number of k-points") - .def_property_readonly("nrow", &DMAccessor::get_nrow, - "Number of rows in density matrix") - .def_property_readonly("ncol", &DMAccessor::get_ncol, - "Number of columns in density matrix") - .def("get_DMK", &DMAccessor::get_DMK, - R"pbdoc( + .def (py::init<> ()) + .def_property_readonly ("nks", &DMAccessor::get_nks, "Number of k-points") + .def_property_readonly ("nrow", &DMAccessor::get_nrow, "Number of rows in density matrix") + .def_property_readonly ("ncol", &DMAccessor::get_ncol, "Number of columns in density matrix") + .def ("get_DMK", + &DMAccessor::get_DMK, + R"pbdoc( Get DM(k) for specific k-point. Parameters @@ -810,24 +858,24 @@ void bind_density_matrix_accessor(py::module& m, const std::string& suffix) ------- numpy.ndarray Density matrix at k-point ik - )pbdoc", "ik"_a) - .def("get_DMK_all", &DMAccessor::get_DMK_all, - "Get all DM(k) matrices as a list") - .def("get_DMR", &DMAccessor::get_DMR, - "Get DM(R) in sparse format") - .def("is_valid", &DMAccessor::is_valid, - "Check if density matrix data is available"); + )pbdoc", + "ik"_a) + .def ("get_DMK_all", &DMAccessor::get_DMK_all, "Get all DM(k) matrices as a list") + .def ("get_DMR", &DMAccessor::get_DMR, "Get DM(R) in sparse format") + .def ("is_valid", &DMAccessor::is_valid, "Check if density matrix data is available"); } template -void bind_esolver_lcao(py::module& m, const std::string& suffix) +void + bind_esolver_lcao (py::module& m, const std::string& suffix) { using ESolver = py_esolver::PyESolverLCAO; std::string class_name = "ESolverLCAO" + suffix; - py::class_(m, class_name.c_str(), - R"pbdoc( + py::class_ (m, + class_name.c_str (), + R"pbdoc( Python wrapper for ESolver_KS_LCAO. This class provides a Python interface for LCAO calculations @@ -850,109 +898,100 @@ void bind_esolver_lcao(py::module& m, const std::string& suffix) >>> hamiltonian = esolver.get_hamiltonian() >>> esolver.after_scf(0) )pbdoc") - .def(py::init<>()) + .def (py::init<> ()) // Initialization - .def("initialize", &ESolver::initialize, - R"pbdoc( + .def ("initialize", + &ESolver::initialize, + R"pbdoc( Initialize ESolver from INPUT file. Parameters ---------- input_dir : str Directory containing INPUT, STRU, and other input files - )pbdoc", "input_dir"_a) - .def("before_all_runners", &ESolver::before_all_runners, - "Initialize calculation environment") + )pbdoc", + "input_dir"_a) + .def ("before_all_runners", &ESolver::before_all_runners, "Initialize calculation environment") // SCF Control - .def("before_scf", &ESolver::before_scf, - R"pbdoc( + .def ("before_scf", + &ESolver::before_scf, + R"pbdoc( Prepare for SCF calculation. Parameters ---------- istep : int, optional Ion step index (default: 0) - )pbdoc", "istep"_a = 0) - .def("run_scf_iteration", &ESolver::run_scf_iteration, - R"pbdoc( + )pbdoc", + "istep"_a = 0) + .def ("run_scf_iteration", + &ESolver::run_scf_iteration, + R"pbdoc( Run a single SCF iteration. Parameters ---------- iter : int Iteration number (1-based) - )pbdoc", "iter"_a) - .def("run_scf", &ESolver::run_scf, - R"pbdoc( + )pbdoc", + "iter"_a) + .def ("run_scf", + &ESolver::run_scf, + R"pbdoc( Run complete SCF loop. Parameters ---------- max_iter : int, optional Maximum number of iterations (default: 100) - )pbdoc", "max_iter"_a = 100) - .def("after_scf", &ESolver::after_scf, - R"pbdoc( + )pbdoc", + "max_iter"_a = 100) + .def ("after_scf", + &ESolver::after_scf, + R"pbdoc( Finalize SCF calculation. Parameters ---------- istep : int, optional Ion step index (default: 0) - )pbdoc", "istep"_a = 0) + )pbdoc", + "istep"_a = 0) // Status - .def("is_converged", &ESolver::is_converged, - "Check if SCF is converged") - .def_property_readonly("niter", &ESolver::get_niter, - "Current iteration number") - .def_property_readonly("drho", &ESolver::get_drho, - "Charge density difference") - .def_property_readonly("istep", &ESolver::get_istep, - "Current ion step") + .def ("is_converged", &ESolver::is_converged, "Check if SCF is converged") + .def_property_readonly ("niter", &ESolver::get_niter, "Current iteration number") + .def_property_readonly ("drho", &ESolver::get_drho, "Charge density difference") + .def_property_readonly ("istep", &ESolver::get_istep, "Current ion step") // Data Accessors - .def("get_charge", &ESolver::get_charge, - "Get charge density accessor") - .def("get_energy", &ESolver::get_energy, - "Get energy accessor") - .def("get_hamiltonian", &ESolver::get_hamiltonian, - "Get Hamiltonian accessor") - .def("get_density_matrix", &ESolver::get_density_matrix, - "Get density matrix accessor") + .def ("get_charge", &ESolver::get_charge, "Get charge density accessor") + .def ("get_energy", &ESolver::get_energy, "Get energy accessor") + .def ("get_hamiltonian", &ESolver::get_hamiltonian, "Get Hamiltonian accessor") + .def ("get_density_matrix", &ESolver::get_density_matrix, "Get density matrix accessor") // Wave functions - .def("get_psi", &ESolver::get_psi, - "Get wave function coefficients for k-point ik", "ik"_a) - .def("get_eigenvalues", &ESolver::get_eigenvalues, - "Get eigenvalues for k-point ik", "ik"_a) - .def("get_occupations", &ESolver::get_occupations, - "Get occupation numbers for k-point ik", "ik"_a) + .def ("get_psi", &ESolver::get_psi, "Get wave function coefficients for k-point ik", "ik"_a) + .def ("get_eigenvalues", &ESolver::get_eigenvalues, "Get eigenvalues for k-point ik", "ik"_a) + .def ("get_occupations", &ESolver::get_occupations, "Get occupation numbers for k-point ik", "ik"_a) // K-points - .def_property_readonly("nks", &ESolver::get_nks, - "Number of k-points") - .def("get_kvec_d", &ESolver::get_kvec_d, - "Get k-vector in direct coordinates", "ik"_a) - .def("get_wk", &ESolver::get_wk, - "Get k-point weights") + .def_property_readonly ("nks", &ESolver::get_nks, "Number of k-points") + .def ("get_kvec_d", &ESolver::get_kvec_d, "Get k-vector in direct coordinates", "ik"_a) + .def ("get_wk", &ESolver::get_wk, "Get k-point weights") // System info - .def_property_readonly("nbasis", &ESolver::get_nbasis, - "Number of basis functions") - .def_property_readonly("nbands", &ESolver::get_nbands, - "Number of bands") - .def_property_readonly("nspin", &ESolver::get_nspin, - "Number of spin channels") - .def_property_readonly("nat", &ESolver::get_nat, - "Number of atoms"); + .def_property_readonly ("nbasis", &ESolver::get_nbasis, "Number of basis functions") + .def_property_readonly ("nbands", &ESolver::get_nbands, "Number of bands") + .def_property_readonly ("nspin", &ESolver::get_nspin, "Number of spin channels") + .def_property_readonly ("nat", &ESolver::get_nat, "Number of atoms"); } -PYBIND11_MODULE(_esolver_pack, m) +PYBIND11_MODULE (_esolver_pack, m) { - m.doc() = R"pbdoc( + m.doc () = R"pbdoc( PyABACUS ESolver Module ----------------------- @@ -990,14 +1029,14 @@ PYBIND11_MODULE(_esolver_pack, m) )pbdoc"; // Bind accessor classes - bind_charge_accessor(m); - bind_energy_accessor(m); - bind_hamiltonian_accessor(m, "_gamma"); - bind_hamiltonian_accessor>(m, "_multi_k"); - bind_density_matrix_accessor(m, "_gamma"); - bind_density_matrix_accessor>(m, "_multi_k"); + bind_charge_accessor (m); + bind_energy_accessor (m); + bind_hamiltonian_accessor (m, "_gamma"); + bind_hamiltonian_accessor> (m, "_multi_k"); + bind_density_matrix_accessor (m, "_gamma"); + bind_density_matrix_accessor> (m, "_multi_k"); // Bind ESolver classes - bind_esolver_lcao(m, "_gamma"); - bind_esolver_lcao, double>(m, "_multi_k"); + bind_esolver_lcao (m, "_gamma"); + bind_esolver_lcao, double> (m, "_multi_k"); } diff --git a/python/pyabacus/src/ModuleESolver/py_esolver_lcao.hpp b/python/pyabacus/src/ModuleESolver/py_esolver_lcao.hpp index 61e6b24cbe2..60a61507776 100644 --- a/python/pyabacus/src/ModuleESolver/py_esolver_lcao.hpp +++ b/python/pyabacus/src/ModuleESolver/py_esolver_lcao.hpp @@ -24,19 +24,28 @@ class UnitCell; class Charge; class Parallel_Orbitals; -namespace elecstate { - struct fenergy; - class ElecState; - template class DensityMatrix; -} -namespace hamilt { - template class HContainer; - template class AtomPair; - template class BaseMatrix; - template class HamiltLCAO; -} -namespace ModuleESolver { - template class ESolver_KS_LCAO; +namespace elecstate +{ +struct fenergy; +class ElecState; +template +class DensityMatrix; +} // namespace elecstate +namespace hamilt +{ +template +class HContainer; +template +class AtomPair; +template +class BaseMatrix; +template +class HamiltLCAO; +} // namespace hamilt +namespace ModuleESolver +{ +template +class ESolver_KS_LCAO; } namespace py = pybind11; @@ -51,39 +60,55 @@ namespace py_esolver */ class PyChargeAccessor { -public: - PyChargeAccessor() = default; + public: + PyChargeAccessor () = default; /// Set internal pointers from Charge object - void set_from_charge(const Charge* chr); + void set_from_charge (const Charge* chr); /// Set data directly (for compatibility with existing code) - void set_data(const double* rho_ptr, int nspin, int nrxx); + void set_data (const double* rho_ptr, int nspin, int nrxx); /// Get real-space charge density as numpy array (nspin, nrxx) - py::array_t get_rho() const; + py::array_t get_rho () const; /// Get reciprocal-space charge density as numpy array (nspin, ngmc) - py::array_t> get_rhog() const; + py::array_t> get_rhog () const; /// Get core charge density - py::array_t get_rho_core() const; + py::array_t get_rho_core () const; /// Get number of spin channels - int get_nspin() const { return nspin_; } + int + get_nspin () const + { + return nspin_; + } /// Get number of real-space grid points - int get_nrxx() const { return nrxx_; } + int + get_nrxx () const + { + return nrxx_; + } /// Get number of G-vectors - int get_ngmc() const { return ngmc_; } + int + get_ngmc () const + { + return ngmc_; + } /// Check if data is valid - bool is_valid() const { return (chr_ptr_ != nullptr || rho_ptr_ != nullptr) && nspin_ > 0; } + bool + is_valid () const + { + return (chr_ptr_ != nullptr || rho_ptr_ != nullptr) && nspin_ > 0; + } -private: + private: const Charge* chr_ptr_ = nullptr; - const double* rho_ptr_ = nullptr; // Direct pointer for compatibility + const double* rho_ptr_ = nullptr; // Direct pointer for compatibility int nspin_ = 0; int nrxx_ = 0; int ngmc_ = 0; @@ -96,45 +121,82 @@ class PyChargeAccessor */ class PyEnergyAccessor { -public: - PyEnergyAccessor() = default; + public: + PyEnergyAccessor () = default; /// Set from fenergy structure - void set_from_fenergy(const elecstate::fenergy* f_en); + void set_from_fenergy (const elecstate::fenergy* f_en); /// Set energies directly (for compatibility) - void set_energies(double etot, double eband, double hartree, - double etxc, double ewald, double demet, - double exx, double evdw); + void set_energies (double etot, + double eband, + double hartree, + double etxc, + double ewald, + double demet, + double exx, + double evdw); /// Get total energy (Ry) - double get_etot() const { return etot_; } + double + get_etot () const + { + return etot_; + } /// Get band energy (Ry) - double get_eband() const { return eband_; } + double + get_eband () const + { + return eband_; + } /// Get Hartree energy (Ry) - double get_hartree_energy() const { return hartree_energy_; } + double + get_hartree_energy () const + { + return hartree_energy_; + } /// Get exchange-correlation energy (Ry) - double get_etxc() const { return etxc_; } + double + get_etxc () const + { + return etxc_; + } /// Get Ewald energy (Ry) - double get_ewald_energy() const { return ewald_energy_; } + double + get_ewald_energy () const + { + return ewald_energy_; + } /// Get -TS term for metals (Ry) - double get_demet() const { return demet_; } + double + get_demet () const + { + return demet_; + } /// Get exact exchange energy (Ry) - double get_exx() const { return exx_; } + double + get_exx () const + { + return exx_; + } /// Get van der Waals energy (Ry) - double get_evdw() const { return evdw_; } + double + get_evdw () const + { + return evdw_; + } /// Get all energies as a dictionary - py::dict get_all_energies() const; + py::dict get_all_energies () const; -private: + private: double etot_ = 0.0; double eband_ = 0.0; double hartree_energy_ = 0.0; @@ -153,52 +215,68 @@ class PyEnergyAccessor template class PyHamiltonianAccessor { -public: - PyHamiltonianAccessor() = default; + public: + PyHamiltonianAccessor () = default; /// Set from HamiltLCAO object - void set_from_hamilt(hamilt::HamiltLCAO* hamilt_lcao, int nks, const Parallel_Orbitals* pv); + void set_from_hamilt (hamilt::HamiltLCAO* hamilt_lcao, int nks, const Parallel_Orbitals* pv); /// Set dimensions directly (for compatibility) - void set_dimensions(int nbasis, int nks); + void set_dimensions (int nbasis, int nks); /// Set H(k) data for a specific k-point - void set_Hk_data(int ik, const TK* data, int nrow, int ncol); + void set_Hk_data (int ik, const TK* data, int nrow, int ncol); /// Set S(k) data for a specific k-point - void set_Sk_data(int ik, const TK* data, int nrow, int ncol); + void set_Sk_data (int ik, const TK* data, int nrow, int ncol); /// Get number of basis functions - int get_nbasis() const { return nbasis_; } + int + get_nbasis () const + { + return nbasis_; + } /// Get number of k-points - int get_nks() const { return nks_; } + int + get_nks () const + { + return nks_; + } /// Get local matrix size (for 2D distribution) - int get_nloc() const { return nloc_; } + int + get_nloc () const + { + return nloc_; + } /// Get H(k) matrix for specific k-point (local part in 2D distribution) - py::array_t get_Hk(int ik) const; + py::array_t get_Hk (int ik) const; /// Get S(k) matrix for specific k-point (local part in 2D distribution) - py::array_t get_Sk(int ik) const; + py::array_t get_Sk (int ik) const; /// Get H(R) in sparse COO format: returns (row_indices, col_indices, R_vectors, values) - py::tuple get_HR_sparse() const; + py::tuple get_HR_sparse () const; /// Get S(R) in sparse COO format: returns (row_indices, col_indices, R_vectors, values) - py::tuple get_SR_sparse() const; + py::tuple get_SR_sparse () const; /// Get H(R) as dictionary: {(iat1, iat2, R): matrix} - py::dict get_HR() const; + py::dict get_HR () const; /// Get S(R) as dictionary: {(iat1, iat2, R): matrix} - py::dict get_SR() const; + py::dict get_SR () const; /// Check if data is valid - bool is_valid() const { return (hamilt_ptr_ != nullptr || nbasis_ > 0) && nks_ > 0; } + bool + is_valid () const + { + return (hamilt_ptr_ != nullptr || nbasis_ > 0) && nks_ > 0; + } -private: + private: hamilt::HamiltLCAO* hamilt_ptr_ = nullptr; const Parallel_Orbitals* pv_ = nullptr; int nbasis_ = 0; @@ -221,40 +299,56 @@ class PyHamiltonianAccessor template class PyDensityMatrixAccessor { -public: - PyDensityMatrixAccessor() = default; + public: + PyDensityMatrixAccessor () = default; /// Set from DensityMatrix object - void set_from_dm(elecstate::DensityMatrix* dm); + void set_from_dm (elecstate::DensityMatrix* dm); /// Set dimensions directly (for compatibility) - void set_dimensions(int nks, int nrow, int ncol); + void set_dimensions (int nks, int nrow, int ncol); /// Set DM(k) data for a specific k-point - void set_DMK_data(int ik, const TK* data); + void set_DMK_data (int ik, const TK* data); /// Get DM(k) for specific k-point - py::array_t get_DMK(int ik) const; + py::array_t get_DMK (int ik) const; /// Get all DM(k) matrices - std::vector> get_DMK_all() const; + std::vector> get_DMK_all () const; /// Get DM(R) in sparse format as dictionary - py::dict get_DMR() const; + py::dict get_DMR () const; /// Get number of k-points - int get_nks() const { return nks_; } + int + get_nks () const + { + return nks_; + } /// Get matrix row dimension - int get_nrow() const { return nrow_; } + int + get_nrow () const + { + return nrow_; + } /// Get matrix column dimension - int get_ncol() const { return ncol_; } + int + get_ncol () const + { + return ncol_; + } /// Check if data is valid - bool is_valid() const { return (dm_ptr_ != nullptr || nks_ > 0); } + bool + is_valid () const + { + return (dm_ptr_ != nullptr || nks_ > 0); + } -private: + private: elecstate::DensityMatrix* dm_ptr_ = nullptr; int nks_ = 0; int nrow_ = 0; @@ -277,171 +371,193 @@ class PyDensityMatrixAccessor template class PyESolverLCAO { -public: - PyESolverLCAO(); - ~PyESolverLCAO(); + public: + PyESolverLCAO (); + ~PyESolverLCAO (); // ==================== Initialization ==================== /// Initialize from INPUT file directory - void initialize(const std::string& input_dir); + void initialize (const std::string& input_dir); /// Call before_all_runners - void before_all_runners(); + void before_all_runners (); // ==================== SCF Control ==================== /// Prepare for SCF calculation - void before_scf(int istep = 0); + void before_scf (int istep = 0); /// Run a single SCF iteration - void run_scf_iteration(int iter); + void run_scf_iteration (int iter); /// Run complete SCF loop - void run_scf(int max_iter = 100); + void run_scf (int max_iter = 100); /// Finalize SCF calculation - void after_scf(int istep = 0); + void after_scf (int istep = 0); // ==================== Status Queries ==================== /// Check if SCF is converged - bool is_converged() const { return conv_esolver_; } + bool + is_converged () const + { + return conv_esolver_; + } /// Get current iteration number - int get_niter() const { return niter_; } + int + get_niter () const + { + return niter_; + } /// Get charge density difference (drho) - double get_drho() const { return drho_; } + double + get_drho () const + { + return drho_; + } /// Get current SCF step - int get_istep() const { return istep_; } + int + get_istep () const + { + return istep_; + } // ==================== Data Accessors ==================== /// Get charge density accessor - PyChargeAccessor get_charge() const; + PyChargeAccessor get_charge () const; /// Get energy accessor - PyEnergyAccessor get_energy() const; + PyEnergyAccessor get_energy () const; /// Get Hamiltonian accessor - PyHamiltonianAccessor get_hamiltonian() const; + PyHamiltonianAccessor get_hamiltonian () const; /// Get density matrix accessor - PyDensityMatrixAccessor get_density_matrix() const; + PyDensityMatrixAccessor get_density_matrix () const; // ==================== Wave Function Access ==================== /// Get wave function coefficients for k-point ik - py::array_t get_psi(int ik) const; + py::array_t get_psi (int ik) const; /// Get eigenvalues for k-point ik - py::array_t get_eigenvalues(int ik) const; + py::array_t get_eigenvalues (int ik) const; /// Get occupation numbers for k-point ik - py::array_t get_occupations(int ik) const; + py::array_t get_occupations (int ik) const; // ==================== K-point Information ==================== /// Get number of k-points - int get_nks() const; + int get_nks () const; /// Get k-vector in direct coordinates for k-point ik - py::array_t get_kvec_d(int ik) const; + py::array_t get_kvec_d (int ik) const; /// Get k-point weights - py::array_t get_wk() const; + py::array_t get_wk () const; // ==================== System Information ==================== /// Get number of basis functions - int get_nbasis() const; + int get_nbasis () const; /// Get number of bands - int get_nbands() const; + int get_nbands () const; /// Get number of spin channels - int get_nspin() const; + int get_nspin () const; /// Get number of atoms - int get_nat() const; + int get_nat () const; // ==================== Component Access (New API) ==================== /// Get SCF controller component - pyabacus::esolver::ISCFController* get_scf_controller() + pyabacus::esolver::ISCFController* + get_scf_controller () { - return scf_controller_.get(); + return scf_controller_.get (); } /// Get Hamiltonian builder component - pyabacus::esolver::IHamiltonianBuilder* get_hamiltonian_builder() + pyabacus::esolver::IHamiltonianBuilder* + get_hamiltonian_builder () { if (scf_controller_) - { - return static_cast*>( - scf_controller_->get_hamiltonian_builder()); - } + { + return static_cast*> ( + scf_controller_->get_hamiltonian_builder ()); + } return nullptr; } /// Get charge mixer component - pyabacus::esolver::IChargeMixer* get_charge_mixer() + pyabacus::esolver::IChargeMixer* + get_charge_mixer () { if (scf_controller_) - { - return scf_controller_->get_charge_mixer(); - } + { + return scf_controller_->get_charge_mixer (); + } return nullptr; } /// Get diagonalizer component - pyabacus::esolver::IDiagonalizer* get_diagonalizer() + pyabacus::esolver::IDiagonalizer* + get_diagonalizer () { if (scf_controller_) - { - return static_cast*>( - scf_controller_->get_diagonalizer()); - } + { + return static_cast*> (scf_controller_->get_diagonalizer ()); + } return nullptr; } // ==================== Configuration (New API) ==================== /// Set SCF convergence criteria - void set_convergence_criteria(double drho_threshold, double energy_threshold, int max_iter) + void + set_convergence_criteria (double drho_threshold, double energy_threshold, int max_iter) { pyabacus::esolver::SCFConvergenceCriteria criteria; criteria.drho_threshold = drho_threshold; criteria.energy_threshold = energy_threshold; criteria.max_iterations = max_iter; - if (auto* ctrl = dynamic_cast*>(scf_controller_.get())) - { - ctrl->set_convergence_criteria(criteria); - } + if (auto* ctrl = dynamic_cast*> (scf_controller_.get ())) + { + ctrl->set_convergence_criteria (criteria); + } } /// Set mixing parameters - void set_mixing_beta(double beta) + void + set_mixing_beta (double beta) { - if (auto* mixer = get_charge_mixer()) - { - mixer->set_mixing_beta(beta); - } + if (auto* mixer = get_charge_mixer ()) + { + mixer->set_mixing_beta (beta); + } } /// Set mixing method - void set_mixing_method(const std::string& method) + void + set_mixing_method (const std::string& method) { - if (auto* mixer = get_charge_mixer()) - { - mixer->set_mixing_method(pyabacus::esolver::string_to_mixing_method(method)); - } + if (auto* mixer = get_charge_mixer ()) + { + mixer->set_mixing_method (pyabacus::esolver::string_to_mixing_method (method)); + } } -private: + private: // Internal state bool initialized_ = false; bool scf_started_ = false; diff --git a/python/pyabacus/src/ModuleNAO/py_m_nao.cpp b/python/pyabacus/src/ModuleNAO/py_m_nao.cpp index c6c21e5fdf5..9489a939572 100644 --- a/python/pyabacus/src/ModuleNAO/py_m_nao.cpp +++ b/python/pyabacus/src/ModuleNAO/py_m_nao.cpp @@ -15,84 +15,86 @@ using namespace pyabacus::utils; template using overload_cast_ = pybind11::detail::overload_cast_impl; -void bind_m_nao(py::module& m) +void + bind_m_nao (py::module& m) { // Bind the RadialCollection class - py::class_(m, "RadialCollection") - .def(py::init<>(), R"pbdoc( + py::class_ (m, "RadialCollection") + .def (py::init<> (), R"pbdoc( A class that holds all numerical radial functions of the same kind. An instance of this class could be the collection of all radial functions of numerical atomic orbitals, or all Kleinman-Bylander beta functions from all elements involved in a calculation. )pbdoc") - .def( + .def ( "build", - [](RadialCollection& self, int nfile, const py::list& file_list, char ftype) { - std::vector files; - files.reserve(nfile); - for (auto file: file_list) + [] (RadialCollection& self, int nfile, const py::list& file_list, char ftype) { - files.push_back(file.cast()); - } - self.build(nfile, files.data(), ftype); - }, + std::vector files; + files.reserve (nfile); + for (auto file: file_list) + { + files.push_back (file.cast ()); + } + self.build (nfile, files.data (), ftype); + }, "Builds the collection from (orbital) files", "nfile"_a, "file_list"_a, "ftype"_a = '\0') - .def("set_transformer", - &RadialCollection::set_transformer, - "Sets a spherical Bessel transformers for all RadialSet objects.", - "sbt"_a, - "update"_a = 0) - .def("set_uniform_grid", - &RadialCollection::set_uniform_grid, - "Sets a common uniform grid for all RadialSet objects.", - "for_r_space"_a, - "ngrid"_a, - "cutoff"_a, - "mode"_a = 'i', - "enable_fft"_a = false) - .def( + .def ("set_transformer", + &RadialCollection::set_transformer, + "Sets a spherical Bessel transformers for all RadialSet objects.", + "sbt"_a, + "update"_a = 0) + .def ("set_uniform_grid", + &RadialCollection::set_uniform_grid, + "Sets a common uniform grid for all RadialSet objects.", + "for_r_space"_a, + "ngrid"_a, + "cutoff"_a, + "mode"_a = 'i', + "enable_fft"_a = false) + .def ( "set_grid", - [](RadialCollection& self, - const bool for_r_space, - const int ngrid, - py::array_t grid, - const char mode = 'i') { - check_array_size(grid, static_cast(ngrid), "grid"); - self.set_grid(for_r_space, ngrid, get_array_ptr(grid), mode); - }, + [] (RadialCollection& self, + const bool for_r_space, + const int ngrid, + py::array_t grid, + const char mode = 'i') + { + check_array_size (grid, static_cast (ngrid), "grid"); + self.set_grid (for_r_space, ngrid, get_array_ptr (grid), mode); + }, "Sets a common grid for all RadialSet objects.", "for_r_space"_a, "ngrid"_a, "grid"_a, "mode"_a = 'i') - .def( + .def ( "__call__", - [](RadialCollection& self, const int itype, const int l, const int izeta) -> const NumericalRadial& { - return self(itype, l, izeta); - }, + [] (RadialCollection& self, const int itype, const int l, const int izeta) -> const NumericalRadial& + { return self (itype, l, izeta); }, py::return_value_policy::reference_internal, "itype"_a, "l"_a, "izeta"_a) // Getters - .def("symbol", &RadialCollection::symbol, "itype"_a) - .def_property_readonly("ntype", &RadialCollection::ntype) - .def("lmax", overload_cast_()(&RadialCollection::lmax, py::const_), "itype"_a) - .def("lmax", overload_cast_<>()(&RadialCollection::lmax, py::const_)) - .def("rcut_max", overload_cast_()(&RadialCollection::rcut_max, py::const_), "itype"_a) - .def("rcut_max", overload_cast_<>()(&RadialCollection::rcut_max, py::const_)) - .def("nzeta", &RadialCollection::nzeta, "itype"_a, "l"_a) - .def("nzeta_max", overload_cast_()(&RadialCollection::nzeta_max, py::const_), "itype"_a) - .def("nzeta_max", overload_cast_<>()(&RadialCollection::nzeta_max, py::const_)) - .def("nchi", overload_cast_()(&RadialCollection::nchi, py::const_), "itype"_a) - .def("nchi", overload_cast_<>()(&RadialCollection::nchi, py::const_)); + .def ("symbol", &RadialCollection::symbol, "itype"_a) + .def_property_readonly ("ntype", &RadialCollection::ntype) + .def ("lmax", overload_cast_ () (&RadialCollection::lmax, py::const_), "itype"_a) + .def ("lmax", overload_cast_<> () (&RadialCollection::lmax, py::const_)) + .def ("rcut_max", overload_cast_ () (&RadialCollection::rcut_max, py::const_), "itype"_a) + .def ("rcut_max", overload_cast_<> () (&RadialCollection::rcut_max, py::const_)) + .def ("nzeta", &RadialCollection::nzeta, "itype"_a, "l"_a) + .def ("nzeta_max", overload_cast_ () (&RadialCollection::nzeta_max, py::const_), "itype"_a) + .def ("nzeta_max", overload_cast_<> () (&RadialCollection::nzeta_max, py::const_)) + .def ("nchi", overload_cast_ () (&RadialCollection::nchi, py::const_), "itype"_a) + .def ("nchi", overload_cast_<> () (&RadialCollection::nchi, py::const_)); // Bind the TwoCenterIntegrator class - py::class_(m, "TwoCenterIntegrator") - .def(py::init<>(), R"pbdoc( + py::class_ (m, "TwoCenterIntegrator") + .def (py::init<> (), R"pbdoc( A class to compute two-center integrals. This class computes two-center integrals of the form: @@ -114,9 +116,9 @@ void bind_m_nao(py::module& m) Kleinman-Bylander nonlocal projectors, the overlap & kinetic integrals between all numerical atomic orbitals, etc. This is done by tabulating the radial part of the integrals on an r-space grid and the real Gaunt coefficients in advance. )pbdoc") - .def("tabulate", - &TwoCenterIntegrator::tabulate, - R"pbdoc( + .def ("tabulate", + &TwoCenterIntegrator::tabulate, + R"pbdoc( Tabulates the radial part of a two-center integral. Parameters: @@ -126,43 +128,44 @@ void bind_m_nao(py::module& m) nr (int): Number of r-space grid points. cutoff (float): r-space cutoff radius. )pbdoc", - "bra"_a, - "ket"_a, - "op"_a, - "nr"_a, - "cutoff"_a) - .def( + "bra"_a, + "ket"_a, + "op"_a, + "nr"_a, + "cutoff"_a) + .def ( "calculate", - [](TwoCenterIntegrator& self, - const int itype1, - const int l1, - const int izeta1, - const int m1, - const int itype2, - const int l2, - const int izeta2, - const int m2, - py::array_t pvR, - bool cal_grad = false) { - check_array_size(pvR, 3, "pvR"); - double* cvR = get_array_ptr(pvR); - ModuleBase::Vector3 vR(cvR[0], cvR[1], cvR[2]); - double out[1] = {0.0}; - double grad_out[3] = {0.0, 0.0, 0.0}; - double* grad_ptr = cal_grad ? grad_out : nullptr; - self.calculate(itype1, l1, izeta1, m1, itype2, l2, izeta2, m2, vR, out, grad_ptr); - py::array_t out_array(1, out); - if (cal_grad) - { - py::array_t grad_out_array(3, grad_out); - return py::make_tuple(out_array, grad_out_array); - } - else + [] (TwoCenterIntegrator& self, + const int itype1, + const int l1, + const int izeta1, + const int m1, + const int itype2, + const int l2, + const int izeta2, + const int m2, + py::array_t pvR, + bool cal_grad = false) { - py::array_t grad_out_array(0); - return py::make_tuple(out_array, grad_out_array); - } - }, + check_array_size (pvR, 3, "pvR"); + double* cvR = get_array_ptr (pvR); + ModuleBase::Vector3 vR (cvR[0], cvR[1], cvR[2]); + double out[1] = {0.0}; + double grad_out[3] = {0.0, 0.0, 0.0}; + double* grad_ptr = cal_grad ? grad_out : nullptr; + self.calculate (itype1, l1, izeta1, m1, itype2, l2, izeta2, m2, vR, out, grad_ptr); + py::array_t out_array (1, out); + if (cal_grad) + { + py::array_t grad_out_array (3, grad_out); + return py::make_tuple (out_array, grad_out_array); + } + else + { + py::array_t grad_out_array (0); + return py::make_tuple (out_array, grad_out_array); + } + }, R"pbdoc( Compute the two-center integrals. @@ -214,24 +217,25 @@ void bind_m_nao(py::module& m) "m2"_a, "pvR"_a, "cal_grad"_a = false) - .def( + .def ( "snap", - [](TwoCenterIntegrator& self, - const int itype1, - const int l1, - const int izeta1, - const int m1, - const int itype2, - py::array_t pvR, - const bool deriv) { - check_array_size(pvR, 3, "pvR"); - double* cvR = get_array_ptr(pvR); - ModuleBase::Vector3 vR(cvR[0], cvR[1], cvR[2]); - // TODO: check deriv & out memory allocation - std::vector> out; - self.snap(itype1, l1, izeta1, m1, itype2, vR, deriv, out); - return out; - }, + [] (TwoCenterIntegrator& self, + const int itype1, + const int l1, + const int izeta1, + const int m1, + const int itype2, + py::array_t pvR, + const bool deriv) + { + check_array_size (pvR, 3, "pvR"); + double* cvR = get_array_ptr (pvR); + ModuleBase::Vector3 vR (cvR[0], cvR[1], cvR[2]); + // TODO: check deriv & out memory allocation + std::vector> out; + self.snap (itype1, l1, izeta1, m1, itype2, vR, deriv, out); + return out; + }, R"pbdoc( Compute a batch of two-center integrals. @@ -246,8 +250,8 @@ void bind_m_nao(py::module& m) "pvR"_a, "deriv"_a = false); // Bind the NumericalRadial class - py::class_(m, "NumericalRadial") - .def(py::init<>(), R"pbdoc( + py::class_ (m, "NumericalRadial") + .def (py::init<> (), R"pbdoc( A class that represents a numerical radial function. This class is designed to be the container for the radial part of numerical atomic orbitals, Kleinman-Bylander beta functions, and all other similar numerical radial functions in three-dimensional space, each of which is associated with some angular momentum l and whose r and k space values are related by an l-th order spherical Bessel transform. @@ -255,31 +259,32 @@ void bind_m_nao(py::module& m) A NumericalRadial object can be initialized by "build", which requires the angular momentum, the number of grid points, the grid and the corresponding values. Grid does not have to be uniform. One can initialize the object in either r or k space. After initialization, one can set the grid in the other space via set_grid or set_uniform_grid. Values in the other space are automatically computed by a spherical Bessel transform. )pbdoc") - .def( + .def ( "build", - [](NumericalRadial& self, - const int l, - const bool for_r_space, - const int ngrid, - py::array_t grid, - py::array_t value, - const int p = 0, - const int izeta = 0, - const std::string symbol = "", - const int itype = 0, - const bool init_sbt = true) { - check_array_size(grid, static_cast(ngrid), "grid"); - self.build(l, - for_r_space, - ngrid, - get_array_ptr(grid), - get_array_ptr(value), - p, - izeta, - symbol, - itype, - init_sbt); - }, + [] (NumericalRadial& self, + const int l, + const bool for_r_space, + const int ngrid, + py::array_t grid, + py::array_t value, + const int p = 0, + const int izeta = 0, + const std::string symbol = "", + const int itype = 0, + const bool init_sbt = true) + { + check_array_size (grid, static_cast (ngrid), "grid"); + self.build (l, + for_r_space, + ngrid, + get_array_ptr (grid), + get_array_ptr (value), + p, + izeta, + symbol, + itype, + init_sbt); + }, R"pbdoc( Initializes the object by providing the grid & values in one space. @@ -320,9 +325,9 @@ void bind_m_nao(py::module& m) "symbol"_a = "", "itype"_a = 0, "init_sbt"_a = true) - .def("set_transformer", - &NumericalRadial::set_transformer, - R"pbdoc( + .def ("set_transformer", + &NumericalRadial::set_transformer, + R"pbdoc( Sets a SphericalBesselTransformer. By default, the class uses an internal SphericalBesselTransformer, but one can optionally use a shared one. This could be beneficial when there are a lot of NumericalRadial objects whose grids have the same size. @@ -338,18 +343,19 @@ void bind_m_nao(py::module& m) * 1: calls a forward transform; * -1: calls a backward transform. )pbdoc", - "sbt"_a, - "update"_a = 0) - .def( + "sbt"_a, + "update"_a = 0) + .def ( "set_grid", - [](NumericalRadial& self, - const bool for_r_space, - const int ngrid, - py::array_t grid, - const char mode = 'i') { - check_array_size(grid, static_cast(ngrid), "grid"); - self.set_grid(for_r_space, ngrid, get_array_ptr(grid), mode); - }, + [] (NumericalRadial& self, + const bool for_r_space, + const int ngrid, + py::array_t grid, + const char mode = 'i') + { + check_array_size (grid, static_cast (ngrid), "grid"); + self.set_grid (for_r_space, ngrid, get_array_ptr (grid), mode); + }, R"pbdoc( Sets up a grid. @@ -376,9 +382,9 @@ void bind_m_nao(py::module& m) "ngrid"_a, "grid"_a, "mode"_a = 'i') - .def("set_uniform_grid", - &NumericalRadial::set_uniform_grid, - R"pbdoc( + .def ("set_uniform_grid", + &NumericalRadial::set_uniform_grid, + R"pbdoc( Sets up a uniform grid. The functionality of this function is similar to set_grid, except that the new grid is a uniform grid specified by the cutoff and the number of grid points, which are calculated as: @@ -398,16 +404,15 @@ void bind_m_nao(py::module& m) mode : char Specifies how values are updated, could be 'i' or 't'. )pbdoc", - "for_r_space"_a, - "ngrid"_a, - "cutoff"_a, - "mode"_a = 'i', - "enable_fft"_a = false) - .def( + "for_r_space"_a, + "ngrid"_a, + "cutoff"_a, + "mode"_a = 'i', + "enable_fft"_a = false) + .def ( "set_value", - [](NumericalRadial& self, const bool for_r_space, py::array_t value, const int p) { - self.set_value(for_r_space, get_array_ptr(value), p); - }, + [] (NumericalRadial& self, const bool for_r_space, py::array_t value, const int p) + { self.set_value (for_r_space, get_array_ptr (value), p); }, R"pbdoc( Updates values on an existing grid. @@ -420,10 +425,10 @@ void bind_m_nao(py::module& m) "for_r_space"_a, "value"_a, "p"_a) - .def("wipe", &NumericalRadial::wipe, "r_space"_a = true, "k_space"_a = true) - .def("normalize", - &NumericalRadial::normalize, - R"pbdoc( + .def ("wipe", &NumericalRadial::wipe, "r_space"_a = true, "k_space"_a = true) + .def ("normalize", + &NumericalRadial::normalize, + R"pbdoc( Normalizes the radial function. The radial function is normalized such that the integral of the square of the function multiplied by the square of the radial coordinate over the entire space is equal to one: @@ -432,43 +437,40 @@ void bind_m_nao(py::module& m) where x is r or k. The integral is evaluated with Simpson's rule. Values in the other space are updated automatically via a spherical Bessel transform. )pbdoc", - "for_r_space"_a = true) + "for_r_space"_a = true) // Getters - .def_property_readonly("symbol", &NumericalRadial::symbol) - .def_property_readonly("itype", &NumericalRadial::itype) - .def_property_readonly("izeta", &NumericalRadial::izeta) - .def_property_readonly("l", &NumericalRadial::l) - .def_property_readonly("nr", &NumericalRadial::nr) - .def_property_readonly("nk", &NumericalRadial::nk) - .def_property_readonly("rcut", &NumericalRadial::rcut) - .def_property_readonly("kcut", &NumericalRadial::kcut) - .def_property_readonly("rmax", &NumericalRadial::rmax) - .def_property_readonly("kmax", &NumericalRadial::kmax) - .def_property_readonly("pr", &NumericalRadial::pr) - .def_property_readonly("pk", &NumericalRadial::pk) - .def_property_readonly("sbt", &NumericalRadial::sbt) - .def_property_readonly("rgrid", - [](NumericalRadial& self) { - return numpy_from_ptr_copy(self.rgrid(), static_cast(self.nr())); - }) - .def_property_readonly("kgrid", - [](NumericalRadial& self) { - return numpy_from_ptr_copy(self.kgrid(), static_cast(self.nk())); - }) - .def_property_readonly("rvalue", - [](NumericalRadial& self) { - return numpy_from_ptr_copy(self.rvalue(), static_cast(self.nr())); - }) - .def_property_readonly("kvalue", - [](NumericalRadial& self) { - return numpy_from_ptr_copy(self.kvalue(), static_cast(self.nk())); - }) - .def_property_readonly("is_fft_compliant", overload_cast_<>()(&NumericalRadial::is_fft_compliant, py::const_)); + .def_property_readonly ("symbol", &NumericalRadial::symbol) + .def_property_readonly ("itype", &NumericalRadial::itype) + .def_property_readonly ("izeta", &NumericalRadial::izeta) + .def_property_readonly ("l", &NumericalRadial::l) + .def_property_readonly ("nr", &NumericalRadial::nr) + .def_property_readonly ("nk", &NumericalRadial::nk) + .def_property_readonly ("rcut", &NumericalRadial::rcut) + .def_property_readonly ("kcut", &NumericalRadial::kcut) + .def_property_readonly ("rmax", &NumericalRadial::rmax) + .def_property_readonly ("kmax", &NumericalRadial::kmax) + .def_property_readonly ("pr", &NumericalRadial::pr) + .def_property_readonly ("pk", &NumericalRadial::pk) + .def_property_readonly ("sbt", &NumericalRadial::sbt) + .def_property_readonly ("rgrid", + [] (NumericalRadial& self) + { return numpy_from_ptr_copy (self.rgrid (), static_cast (self.nr ())); }) + .def_property_readonly ("kgrid", + [] (NumericalRadial& self) + { return numpy_from_ptr_copy (self.kgrid (), static_cast (self.nk ())); }) + .def_property_readonly ("rvalue", + [] (NumericalRadial& self) + { return numpy_from_ptr_copy (self.rvalue (), static_cast (self.nr ())); }) + .def_property_readonly ("kvalue", + [] (NumericalRadial& self) + { return numpy_from_ptr_copy (self.kvalue (), static_cast (self.nk ())); }) + .def_property_readonly ("is_fft_compliant", + overload_cast_<> () (&NumericalRadial::is_fft_compliant, py::const_)); } -PYBIND11_MODULE(_nao_pack, m) +PYBIND11_MODULE (_nao_pack, m) { - m.doc() = "Module for Numerical Atomic Orbitals (NAO) in ABACUS"; + m.doc () = "Module for Numerical Atomic Orbitals (NAO) in ABACUS"; - bind_m_nao(m); + bind_m_nao (m); } diff --git a/python/pyabacus/src/hsolver/diago_adapter.hpp b/python/pyabacus/src/hsolver/diago_adapter.hpp index d57c8fd82a5..3f5855f608a 100644 --- a/python/pyabacus/src/hsolver/diago_adapter.hpp +++ b/python/pyabacus/src/hsolver/diago_adapter.hpp @@ -24,8 +24,10 @@ namespace py = pybind11; -namespace pyabacus { -namespace hsolver { +namespace pyabacus +{ +namespace hsolver +{ // ============================================================================ // PyDiagoDavid Adapter @@ -36,79 +38,72 @@ namespace hsolver { */ class PyDiagoDavidAdapter { -public: + public: using Traits = DiagoDavidTraits; using T = typename Traits::T; using SolverType = typename Traits::SolverType; - PyDiagoDavidAdapter(int nbasis, int nband) - : nbasis_(nbasis), nband_(nband) + PyDiagoDavidAdapter (int nbasis, int nband) : nbasis_ (nbasis), nband_ (nband) { - storage_.allocate(nbasis, nband); + storage_.allocate (nbasis, nband); } - PyDiagoDavidAdapter(const PyDiagoDavidAdapter&) = delete; - PyDiagoDavidAdapter& operator=(const PyDiagoDavidAdapter&) = delete; + PyDiagoDavidAdapter (const PyDiagoDavidAdapter&) = delete; + PyDiagoDavidAdapter& operator= (const PyDiagoDavidAdapter&) = delete; - PyDiagoDavidAdapter(PyDiagoDavidAdapter&& other) noexcept - : storage_(std::move(other.storage_)) - , nbasis_(other.nbasis_) - , nband_(other.nband_) + PyDiagoDavidAdapter (PyDiagoDavidAdapter&& other) noexcept + : storage_ (std::move (other.storage_)), nbasis_ (other.nbasis_), nband_ (other.nband_) { } - void set_psi(py::array_t psi_in) + void + set_psi (py::array_t psi_in) { - storage_.set_psi(psi_in); + storage_.set_psi (psi_in); } - py::array_t get_psi() const + py::array_t + get_psi () const { - return storage_.get_psi(); + return storage_.get_psi (); } - void init_eigenvalue() + void + init_eigenvalue () { - storage_.init_eigenvalue(); + storage_.init_eigenvalue (); } - py::array_t get_eigenvalue() const + py::array_t + get_eigenvalue () const { - return storage_.get_eigenvalue(); + return storage_.get_eigenvalue (); } - int diag( - std::function(py::array_t)> mm_op, - std::vector& precond_vec, - int dav_ndim, - double tol, - std::vector& diag_ethr, - int max_iter, - ::hsolver::diag_comm_info comm_info) + int + diag (std::function (py::array_t)> mm_op, + std::vector& precond_vec, + int dav_ndim, + double tol, + std::vector& diag_ethr, + int max_iter, + ::hsolver::diag_comm_info comm_info) { - auto hpsi_func = make_hpsi_func_fstyle(mm_op); - auto spsi_func = make_spsi_func_identity(); - - solver_ = std::make_unique( - precond_vec.data(), - nband_, - nbasis_, - dav_ndim, - comm_info - ); - - return solver_->diag( - hpsi_func, - spsi_func, - nbasis_, - storage_.psi_ptr(), - storage_.eigenvalue_ptr(), - diag_ethr, - max_iter - ); + auto hpsi_func = make_hpsi_func_fstyle (mm_op); + auto spsi_func = make_spsi_func_identity (); + + solver_ = std::make_unique (precond_vec.data (), nband_, nbasis_, dav_ndim, comm_info); + + return solver_->diag (hpsi_func, + spsi_func, + nbasis_, + storage_.psi_ptr (), + storage_.eigenvalue_ptr (), + diag_ethr, + max_iter); } -private: + private: RawPointerStorage storage_; std::unique_ptr solver_; int nbasis_; @@ -124,87 +119,84 @@ class PyDiagoDavidAdapter */ class PyDiagoDavSubspaceAdapter { -public: + public: using Traits = DiagoDavSubspaceTraits; using T = typename Traits::T; using SolverType = typename Traits::SolverType; - PyDiagoDavSubspaceAdapter(int nbasis, int nband) - : nbasis_(nbasis), nband_(nband) + PyDiagoDavSubspaceAdapter (int nbasis, int nband) : nbasis_ (nbasis), nband_ (nband) { - storage_.allocate(nbasis, nband); + storage_.allocate (nbasis, nband); } - PyDiagoDavSubspaceAdapter(const PyDiagoDavSubspaceAdapter&) = delete; - PyDiagoDavSubspaceAdapter& operator=(const PyDiagoDavSubspaceAdapter&) = delete; + PyDiagoDavSubspaceAdapter (const PyDiagoDavSubspaceAdapter&) = delete; + PyDiagoDavSubspaceAdapter& operator= (const PyDiagoDavSubspaceAdapter&) = delete; - PyDiagoDavSubspaceAdapter(PyDiagoDavSubspaceAdapter&& other) noexcept - : storage_(std::move(other.storage_)) - , nbasis_(other.nbasis_) - , nband_(other.nband_) + PyDiagoDavSubspaceAdapter (PyDiagoDavSubspaceAdapter&& other) noexcept + : storage_ (std::move (other.storage_)), nbasis_ (other.nbasis_), nband_ (other.nband_) { } - void set_psi(py::array_t psi_in) + void + set_psi (py::array_t psi_in) { - storage_.set_psi(psi_in); + storage_.set_psi (psi_in); } - py::array_t get_psi() const + py::array_t + get_psi () const { - return storage_.get_psi(); + return storage_.get_psi (); } - void init_eigenvalue() + void + init_eigenvalue () { - storage_.init_eigenvalue(); + storage_.init_eigenvalue (); } - py::array_t get_eigenvalue() const + py::array_t + get_eigenvalue () const { - return storage_.get_eigenvalue(); + return storage_.get_eigenvalue (); } - int diag( - std::function(py::array_t)> mm_op, - std::vector& precond_vec, - int dav_ndim, - double tol, - int max_iter, - bool need_subspace, - std::vector& diag_ethr, - bool scf_type, - ::hsolver::diag_comm_info comm_info, - int diag_subspace, - int nb2d) + int + diag (std::function (py::array_t)> mm_op, + std::vector& precond_vec, + int dav_ndim, + double tol, + int max_iter, + bool need_subspace, + std::vector& diag_ethr, + bool scf_type, + ::hsolver::diag_comm_info comm_info, + int diag_subspace, + int nb2d) { - auto hpsi_func = make_hpsi_func_fstyle(mm_op); - auto spsi_func = make_spsi_func_identity(); - - solver_ = std::make_unique( - precond_vec, - nband_, - nbasis_, - dav_ndim, - tol, - max_iter, - comm_info, - diag_subspace, - nb2d - ); - - return solver_->diag( - hpsi_func, - spsi_func, - storage_.psi_ptr(), - nbasis_, - storage_.eigenvalue_ptr(), - diag_ethr, - scf_type - ); + auto hpsi_func = make_hpsi_func_fstyle (mm_op); + auto spsi_func = make_spsi_func_identity (); + + solver_ = std::make_unique (precond_vec, + nband_, + nbasis_, + dav_ndim, + tol, + max_iter, + comm_info, + diag_subspace, + nb2d); + + return solver_->diag (hpsi_func, + spsi_func, + storage_.psi_ptr (), + nbasis_, + storage_.eigenvalue_ptr (), + diag_ethr, + scf_type); } -private: + private: RawPointerStorage storage_; std::unique_ptr solver_; int nbasis_; @@ -221,91 +213,87 @@ class PyDiagoDavSubspaceAdapter */ class PyDiagoCGAdapter { -public: + public: using Traits = DiagoCGTraits; using T = typename Traits::T; using SolverType = typename Traits::SolverType; - PyDiagoCGAdapter(int dim, int num_eigs) - : dim_(dim), num_eigs_(num_eigs) - { - storage_.allocate(dim, num_eigs); - } + PyDiagoCGAdapter (int dim, int num_eigs) : dim_ (dim), num_eigs_ (num_eigs) { storage_.allocate (dim, num_eigs); } - PyDiagoCGAdapter(const PyDiagoCGAdapter&) = delete; - PyDiagoCGAdapter& operator=(const PyDiagoCGAdapter&) = delete; + PyDiagoCGAdapter (const PyDiagoCGAdapter&) = delete; + PyDiagoCGAdapter& operator= (const PyDiagoCGAdapter&) = delete; - PyDiagoCGAdapter(PyDiagoCGAdapter&& other) noexcept - : storage_(std::move(other.storage_)) - , dim_(other.dim_) - , num_eigs_(other.num_eigs_) + PyDiagoCGAdapter (PyDiagoCGAdapter&& other) noexcept + : storage_ (std::move (other.storage_)), dim_ (other.dim_), num_eigs_ (other.num_eigs_) { } - void set_psi(py::array_t psi_in) + void + set_psi (py::array_t psi_in) { - storage_.set_psi(psi_in); + storage_.set_psi (psi_in); } - py::array_t get_psi() const + py::array_t + get_psi () const { - return storage_.get_psi(); + return storage_.get_psi (); } - void init_eig() + void + init_eig () { - storage_.init_eigenvalue(); + storage_.init_eigenvalue (); } - py::array_t get_eig() const + py::array_t + get_eig () const { - return storage_.get_eigenvalue(); + return storage_.get_eigenvalue (); } - void set_prec(py::array_t prec_in) + void + set_prec (py::array_t prec_in) { - storage_.set_preconditioner(prec_in); + storage_.set_preconditioner (prec_in); } - void diag( - std::function(py::array_t)> mm_op, - int diag_ndim, - double tol, - const std::vector& diag_ethr, - bool need_subspace, - bool scf_type, - int nproc_in_pool = 1) + void + diag (std::function (py::array_t)> mm_op, + int diag_ndim, + double tol, + const std::vector& diag_ethr, + bool need_subspace, + bool scf_type, + int nproc_in_pool = 1) { const std::string basis_type = "pw"; const std::string calculation = scf_type ? "scf" : "nscf"; - auto hpsi_func = make_hpsi_func_tensor(mm_op); - auto spsi_func = make_spsi_func_tensor_identity(); - auto subspace_func = [](const ct::Tensor& psi_in, ct::Tensor& psi_out, const bool S_orth) { - // Do nothing - placeholder - }; - - solver_ = std::make_unique( - basis_type, - calculation, - need_subspace, - subspace_func, - tol, - diag_ndim, - nproc_in_pool - ); - - solver_->diag( - hpsi_func, - spsi_func, - *storage_.psi_tensor(), - *storage_.eig_tensor(), - diag_ethr, - *storage_.prec_tensor() - ); + auto hpsi_func = make_hpsi_func_tensor (mm_op); + auto spsi_func = make_spsi_func_tensor_identity (); + auto subspace_func = [] (const ct::Tensor& psi_in, ct::Tensor& psi_out, const bool S_orth) + { + // Do nothing - placeholder + }; + + solver_ = std::make_unique (basis_type, + calculation, + need_subspace, + subspace_func, + tol, + diag_ndim, + nproc_in_pool); + + solver_->diag (hpsi_func, + spsi_func, + *storage_.psi_tensor (), + *storage_.eig_tensor (), + diag_ethr, + *storage_.prec_tensor ()); } -private: + private: TensorStorage storage_; std::unique_ptr solver_; int dim_; @@ -317,7 +305,8 @@ class PyDiagoCGAdapter // Backward Compatibility Aliases // ============================================================================ -namespace py_hsolver_compat { +namespace py_hsolver_compat +{ using PyDiagoDavid = PyDiagoDavidAdapter; using PyDiagoDavSubspace = PyDiagoDavSubspaceAdapter; diff --git a/python/pyabacus/src/hsolver/diago_traits.hpp b/python/pyabacus/src/hsolver/diago_traits.hpp index c23d9f3a9f2..72ef4d1b452 100644 --- a/python/pyabacus/src/hsolver/diago_traits.hpp +++ b/python/pyabacus/src/hsolver/diago_traits.hpp @@ -34,8 +34,10 @@ namespace py = pybind11; -namespace pyabacus { -namespace hsolver { +namespace pyabacus +{ +namespace hsolver +{ // ============================================================================ // Storage Policies @@ -49,12 +51,13 @@ namespace hsolver { template class RawPointerStorage { -public: + public: using value_type = T; - RawPointerStorage() = default; + RawPointerStorage () = default; - void allocate(int nbasis, int nband) + void + allocate (int nbasis, int nband) { nbasis_ = nbasis; nband_ = nband; @@ -62,104 +65,120 @@ class RawPointerStorage eigenvalue_ = new double[nband]; } - ~RawPointerStorage() - { - cleanup(); - } + ~RawPointerStorage () { cleanup (); } // Move semantics - RawPointerStorage(RawPointerStorage&& other) noexcept - : psi_(other.psi_) - , eigenvalue_(other.eigenvalue_) - , nbasis_(other.nbasis_) - , nband_(other.nband_) + RawPointerStorage (RawPointerStorage&& other) noexcept + : psi_ (other.psi_), eigenvalue_ (other.eigenvalue_), nbasis_ (other.nbasis_), nband_ (other.nband_) { other.psi_ = nullptr; other.eigenvalue_ = nullptr; } - RawPointerStorage& operator=(RawPointerStorage&& other) noexcept + RawPointerStorage& + operator= (RawPointerStorage&& other) noexcept { if (this != &other) - { - cleanup(); - psi_ = other.psi_; - eigenvalue_ = other.eigenvalue_; - nbasis_ = other.nbasis_; - nband_ = other.nband_; - other.psi_ = nullptr; - other.eigenvalue_ = nullptr; - } + { + cleanup (); + psi_ = other.psi_; + eigenvalue_ = other.eigenvalue_; + nbasis_ = other.nbasis_; + nband_ = other.nband_; + other.psi_ = nullptr; + other.eigenvalue_ = nullptr; + } return *this; } // Disable copy - RawPointerStorage(const RawPointerStorage&) = delete; - RawPointerStorage& operator=(const RawPointerStorage&) = delete; + RawPointerStorage (const RawPointerStorage&) = delete; + RawPointerStorage& operator= (const RawPointerStorage&) = delete; - void set_psi(py::array_t psi_in) + void + set_psi (py::array_t psi_in) { - if (static_cast(psi_in.size()) != static_cast(nbasis_ * nband_)) - { - throw std::runtime_error("psi_in size mismatch"); - } - for (size_t i = 0; i < static_cast(nbasis_ * nband_); ++i) - { - psi_[i] = psi_in.at(i); - } + if (static_cast (psi_in.size ()) != static_cast (nbasis_ * nband_)) + { + throw std::runtime_error ("psi_in size mismatch"); + } + for (size_t i = 0; i < static_cast (nbasis_ * nband_); ++i) + { + psi_[i] = psi_in.at (i); + } } - py::array_t get_psi() const + py::array_t + get_psi () const { - py::array_t psi_out(nband_ * nbasis_); - py::buffer_info buf = psi_out.request(); - T* ptr = static_cast(buf.ptr); - for (size_t i = 0; i < static_cast(nband_ * nbasis_); ++i) - { - ptr[i] = psi_[i]; - } + py::array_t psi_out (nband_ * nbasis_); + py::buffer_info buf = psi_out.request (); + T* ptr = static_cast (buf.ptr); + for (size_t i = 0; i < static_cast (nband_ * nbasis_); ++i) + { + ptr[i] = psi_[i]; + } return psi_out; } - void init_eigenvalue() + void + init_eigenvalue () { for (int i = 0; i < nband_; ++i) - { - eigenvalue_[i] = 0.0; - } + { + eigenvalue_[i] = 0.0; + } } - py::array_t get_eigenvalue() const + py::array_t + get_eigenvalue () const { - py::array_t eig_out(nband_); - py::buffer_info buf = eig_out.request(); - double* ptr = static_cast(buf.ptr); + py::array_t eig_out (nband_); + py::buffer_info buf = eig_out.request (); + double* ptr = static_cast (buf.ptr); for (int i = 0; i < nband_; ++i) - { - ptr[i] = eigenvalue_[i]; - } + { + ptr[i] = eigenvalue_[i]; + } return eig_out; } // Accessors for solver - T* psi_ptr() { return psi_; } - double* eigenvalue_ptr() { return eigenvalue_; } - int nbasis() const { return nbasis_; } - int nband() const { return nband_; } + T* + psi_ptr () + { + return psi_; + } + double* + eigenvalue_ptr () + { + return eigenvalue_; + } + int + nbasis () const + { + return nbasis_; + } + int + nband () const + { + return nband_; + } -private: - void cleanup() + private: + void + cleanup () { if (psi_ != nullptr) - { - delete[] psi_; - psi_ = nullptr; - } + { + delete[] psi_; + psi_ = nullptr; + } if (eigenvalue_ != nullptr) - { - delete[] eigenvalue_; - eigenvalue_ = nullptr; - } + { + delete[] eigenvalue_; + eigenvalue_ = nullptr; + } } T* psi_ = nullptr; @@ -177,142 +196,156 @@ class RawPointerStorage template class TensorStorage { -public: + public: using value_type = T; - TensorStorage() = default; + TensorStorage () = default; - void allocate(int dim, int num_eigs) + void + allocate (int dim, int num_eigs) { dim_ = dim; num_eigs_ = num_eigs; // Tensors are allocated lazily } - ~TensorStorage() - { - cleanup(); - } + ~TensorStorage () { cleanup (); } // Move semantics - TensorStorage(TensorStorage&& other) noexcept - : psi_(other.psi_) - , eig_(other.eig_) - , prec_(other.prec_) - , dim_(other.dim_) - , num_eigs_(other.num_eigs_) + TensorStorage (TensorStorage&& other) noexcept + : psi_ (other.psi_), eig_ (other.eig_), prec_ (other.prec_), dim_ (other.dim_), num_eigs_ (other.num_eigs_) { other.psi_ = nullptr; other.eig_ = nullptr; other.prec_ = nullptr; } - TensorStorage& operator=(TensorStorage&& other) noexcept + TensorStorage& + operator= (TensorStorage&& other) noexcept { if (this != &other) - { - cleanup(); - psi_ = other.psi_; - eig_ = other.eig_; - prec_ = other.prec_; - dim_ = other.dim_; - num_eigs_ = other.num_eigs_; - other.psi_ = nullptr; - other.eig_ = nullptr; - other.prec_ = nullptr; - } + { + cleanup (); + psi_ = other.psi_; + eig_ = other.eig_; + prec_ = other.prec_; + dim_ = other.dim_; + num_eigs_ = other.num_eigs_; + other.psi_ = nullptr; + other.eig_ = nullptr; + other.prec_ = nullptr; + } return *this; } // Disable copy - TensorStorage(const TensorStorage&) = delete; - TensorStorage& operator=(const TensorStorage&) = delete; + TensorStorage (const TensorStorage&) = delete; + TensorStorage& operator= (const TensorStorage&) = delete; - void set_psi(py::array_t psi_in) + void + set_psi (py::array_t psi_in) { - py::buffer_info buf = psi_in.request(); - T* ptr = static_cast(buf.ptr); - - psi_ = new ct::TensorMap( - ptr, - ct::DataType::DT_COMPLEX_DOUBLE, - ct::DeviceType::CpuDevice, - ct::TensorShape({num_eigs_, dim_}) - ); + py::buffer_info buf = psi_in.request (); + T* ptr = static_cast (buf.ptr); + + psi_ = new ct::TensorMap (ptr, + ct::DataType::DT_COMPLEX_DOUBLE, + ct::DeviceType::CpuDevice, + ct::TensorShape ({num_eigs_, dim_})); } - py::array_t get_psi() const + py::array_t + get_psi () const { if (psi_ == nullptr) - { - throw std::runtime_error("psi is not initialized"); - } - py::array_t psi_out({num_eigs_, dim_}); - py::buffer_info buf = psi_out.request(); - T* ptr = static_cast(buf.ptr); - T* psi_ptr = psi_->data(); - std::copy(psi_ptr, psi_ptr + psi_->NumElements(), ptr); + { + throw std::runtime_error ("psi is not initialized"); + } + py::array_t psi_out ({num_eigs_, dim_}); + py::buffer_info buf = psi_out.request (); + T* ptr = static_cast (buf.ptr); + T* psi_ptr = psi_->data (); + std::copy (psi_ptr, psi_ptr + psi_->NumElements (), ptr); return psi_out; } - void init_eigenvalue() + void + init_eigenvalue () { - eig_ = new ct::Tensor(ct::DataType::DT_DOUBLE, {num_eigs_}); - eig_->zero(); + eig_ = new ct::Tensor (ct::DataType::DT_DOUBLE, {num_eigs_}); + eig_->zero (); } - py::array_t get_eigenvalue() const + py::array_t + get_eigenvalue () const { if (eig_ == nullptr) - { - throw std::runtime_error("eigenvalue is not initialized"); - } - py::array_t eig_out(eig_->NumElements()); - py::buffer_info buf = eig_out.request(); - double* ptr = static_cast(buf.ptr); - double* eig_ptr = eig_->data(); - std::copy(eig_ptr, eig_ptr + eig_->NumElements(), ptr); + { + throw std::runtime_error ("eigenvalue is not initialized"); + } + py::array_t eig_out (eig_->NumElements ()); + py::buffer_info buf = eig_out.request (); + double* ptr = static_cast (buf.ptr); + double* eig_ptr = eig_->data (); + std::copy (eig_ptr, eig_ptr + eig_->NumElements (), ptr); return eig_out; } - void set_preconditioner(py::array_t prec_in) + void + set_preconditioner (py::array_t prec_in) { - py::buffer_info buf = prec_in.request(); - double* ptr = static_cast(buf.ptr); - - prec_ = new ct::TensorMap( - ptr, - ct::DataType::DT_DOUBLE, - ct::DeviceType::CpuDevice, - ct::TensorShape({dim_}) - ); + py::buffer_info buf = prec_in.request (); + double* ptr = static_cast (buf.ptr); + + prec_ = new ct::TensorMap (ptr, ct::DataType::DT_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape ({dim_})); } // Accessors for solver - ct::Tensor* psi_tensor() { return psi_; } - ct::Tensor* eig_tensor() { return eig_; } - ct::Tensor* prec_tensor() { return prec_; } - int dim() const { return dim_; } - int num_eigs() const { return num_eigs_; } - -private: - void cleanup() + ct::Tensor* + psi_tensor () + { + return psi_; + } + ct::Tensor* + eig_tensor () + { + return eig_; + } + ct::Tensor* + prec_tensor () + { + return prec_; + } + int + dim () const + { + return dim_; + } + int + num_eigs () const + { + return num_eigs_; + } + + private: + void + cleanup () { if (psi_ != nullptr) - { - delete psi_; - psi_ = nullptr; - } + { + delete psi_; + psi_ = nullptr; + } if (eig_ != nullptr) - { - delete eig_; - eig_ = nullptr; - } + { + delete eig_; + eig_ = nullptr; + } if (prec_ != nullptr) - { - delete prec_; - prec_ = nullptr; - } + { + delete prec_; + prec_ = nullptr; + } } ct::Tensor* psi_ = nullptr; @@ -337,12 +370,11 @@ struct DiagoDavidTraits using StoragePolicy = RawPointerStorage; static constexpr const char* name = "diago_david"; - static constexpr bool uses_f_style = true; // Column-major arrays + static constexpr bool uses_f_style = true; // Column-major arrays static constexpr bool has_preconditioner = true; // Memory synchronization operation - using syncmem_op = base_device::memory::synchronize_memory_op< - T, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; + using syncmem_op = base_device::memory::synchronize_memory_op; }; /** @@ -355,12 +387,11 @@ struct DiagoDavSubspaceTraits using StoragePolicy = RawPointerStorage; static constexpr const char* name = "diago_dav_subspace"; - static constexpr bool uses_f_style = true; // Column-major arrays + static constexpr bool uses_f_style = true; // Column-major arrays static constexpr bool has_preconditioner = true; // Memory synchronization operation - using syncmem_op = base_device::memory::synchronize_memory_op< - T, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; + using syncmem_op = base_device::memory::synchronize_memory_op; }; #ifdef __ENABLE_ATEN @@ -374,12 +405,11 @@ struct DiagoCGTraits using StoragePolicy = TensorStorage; static constexpr const char* name = "diago_cg"; - static constexpr bool uses_f_style = false; // Row-major arrays + static constexpr bool uses_f_style = false; // Row-major arrays static constexpr bool has_preconditioner = true; // Memory synchronization operation for tensor interface - using syncmem_op = base_device::memory::synchronize_memory_op< - T, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; + using syncmem_op = base_device::memory::synchronize_memory_op; }; #endif // __ENABLE_ATEN @@ -395,24 +425,25 @@ struct DiagoCGTraits * ABACUS (column-major for Davidson methods). */ template -auto make_hpsi_func_fstyle( - std::function(py::array_t)> mm_op) +auto + make_hpsi_func_fstyle (std::function (py::array_t)> mm_op) { - return [mm_op](T* psi_in, T* hpsi_out, const int ld_psi, const int nvec) { - // Create F-style numpy array (column-major) - py::array_t psi({ld_psi, nvec}); - py::buffer_info buf = psi.request(); - T* ptr = static_cast(buf.ptr); - std::copy(psi_in, psi_in + nvec * ld_psi, ptr); - - // Call Python function - py::array_t hpsi = mm_op(psi); - - // Copy result back - py::buffer_info hpsi_buf = hpsi.request(); - T* hpsi_ptr = static_cast(hpsi_buf.ptr); - std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out); - }; + return [mm_op] (T* psi_in, T* hpsi_out, const int ld_psi, const int nvec) + { + // Create F-style numpy array (column-major) + py::array_t psi ({ld_psi, nvec}); + py::buffer_info buf = psi.request (); + T* ptr = static_cast (buf.ptr); + std::copy (psi_in, psi_in + nvec * ld_psi, ptr); + + // Call Python function + py::array_t hpsi = mm_op (psi); + + // Copy result back + py::buffer_info hpsi_buf = hpsi.request (); + T* hpsi_ptr = static_cast (hpsi_buf.ptr); + std::copy (hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out); + }; } /** @@ -421,14 +452,14 @@ auto make_hpsi_func_fstyle( * For non-orthogonal basis, S*psi = psi (identity operation). */ template -auto make_spsi_func_identity() +auto + make_spsi_func_identity () { using T = typename Traits::T; using syncmem_op = typename Traits::syncmem_op; - return [](const T* psi_in, T* spsi_out, const int nrow, const int nbands) { - syncmem_op()(spsi_out, psi_in, static_cast(nbands * nrow)); - }; + return [] (const T* psi_in, T* spsi_out, const int nrow, const int nbands) + { syncmem_op () (spsi_out, psi_in, static_cast (nbands * nrow)); }; } #ifdef __ENABLE_ATEN @@ -436,51 +467,50 @@ auto make_spsi_func_identity() * @brief Create hpsi_func lambda for tensor interface */ template -auto make_hpsi_func_tensor( - std::function(py::array_t)> mm_op) +auto + make_hpsi_func_tensor (std::function (py::array_t)> mm_op) { - return [mm_op](const ct::Tensor& psi_in, ct::Tensor& hpsi_out) { - const auto ndim = psi_in.shape().ndim(); - REQUIRES_OK(ndim <= 2, "dims of psi_in should be less than or equal to 2"); - const int nvec = ndim == 1 ? 1 : psi_in.shape().dim_size(0); - const int ld_psi = ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1); - - // Create numpy array (row-major for CG) - py::array_t psi({ld_psi, nvec}); - py::buffer_info buf = psi.request(); - T* ptr = static_cast(buf.ptr); - std::copy(psi_in.data(), psi_in.data() + nvec * ld_psi, ptr); - - // Call Python function - py::array_t hpsi = mm_op(psi); - - // Copy result back - py::buffer_info hpsi_buf = hpsi.request(); - T* hpsi_ptr = static_cast(hpsi_buf.ptr); - std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out.data()); - }; + return [mm_op] (const ct::Tensor& psi_in, ct::Tensor& hpsi_out) + { + const auto ndim = psi_in.shape ().ndim (); + REQUIRES_OK (ndim <= 2, "dims of psi_in should be less than or equal to 2"); + const int nvec = ndim == 1 ? 1 : psi_in.shape ().dim_size (0); + const int ld_psi = ndim == 1 ? psi_in.NumElements () : psi_in.shape ().dim_size (1); + + // Create numpy array (row-major for CG) + py::array_t psi ({ld_psi, nvec}); + py::buffer_info buf = psi.request (); + T* ptr = static_cast (buf.ptr); + std::copy (psi_in.data (), psi_in.data () + nvec * ld_psi, ptr); + + // Call Python function + py::array_t hpsi = mm_op (psi); + + // Copy result back + py::buffer_info hpsi_buf = hpsi.request (); + T* hpsi_ptr = static_cast (hpsi_buf.ptr); + std::copy (hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out.data ()); + }; } /** * @brief Create spsi_func lambda for tensor interface (identity) */ template -auto make_spsi_func_tensor_identity() +auto + make_spsi_func_tensor_identity () { using T = typename Traits::T; using syncmem_op = typename Traits::syncmem_op; - return [](const ct::Tensor& psi_in, ct::Tensor& spsi_out) { - const auto ndim = psi_in.shape().ndim(); - REQUIRES_OK(ndim <= 2, "dims of psi_in should be less than or equal to 2"); - const int nrow = ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1); - const int nbands = ndim == 1 ? 1 : psi_in.shape().dim_size(0); - syncmem_op()( - spsi_out.data(), - psi_in.data(), - static_cast(nrow * nbands) - ); - }; + return [] (const ct::Tensor& psi_in, ct::Tensor& spsi_out) + { + const auto ndim = psi_in.shape ().ndim (); + REQUIRES_OK (ndim <= 2, "dims of psi_in should be less than or equal to 2"); + const int nrow = ndim == 1 ? psi_in.NumElements () : psi_in.shape ().dim_size (1); + const int nbands = ndim == 1 ? 1 : psi_in.shape ().dim_size (0); + syncmem_op () (spsi_out.data (), psi_in.data (), static_cast (nrow * nbands)); + }; } #endif // __ENABLE_ATEN diff --git a/python/pyabacus/src/hsolver/py_diago_cg.hpp b/python/pyabacus/src/hsolver/py_diago_cg.hpp index 0ab2cb25dab..29255664273 100644 --- a/python/pyabacus/src/hsolver/py_diago_cg.hpp +++ b/python/pyabacus/src/hsolver/py_diago_cg.hpp @@ -24,11 +24,11 @@ namespace py_hsolver class PyDiagoCG { -public: - PyDiagoCG(int dim, int num_eigs) : dim{dim}, num_eigs{num_eigs} { } - PyDiagoCG(const PyDiagoCG&) = delete; - PyDiagoCG& operator=(const PyDiagoCG&) = delete; - PyDiagoCG(PyDiagoCG&& other) + public: + PyDiagoCG (int dim, int num_eigs) : dim{dim}, num_eigs{num_eigs} {} + PyDiagoCG (const PyDiagoCG&) = delete; + PyDiagoCG& operator= (const PyDiagoCG&) = delete; + PyDiagoCG (PyDiagoCG&& other) { psi = other.psi; other.psi = nullptr; @@ -37,142 +37,144 @@ class PyDiagoCG other.eig = nullptr; } - ~PyDiagoCG() + ~PyDiagoCG () { - if (psi != nullptr) - { - delete psi; - psi = nullptr; - } + if (psi != nullptr) + { + delete psi; + psi = nullptr; + } if (eig != nullptr) - { - delete eig; - eig = nullptr; - } + { + delete eig; + eig = nullptr; + } } - void init_eig() + void + init_eig () { - eig = new ct::Tensor(ct::DataType::DT_DOUBLE, {num_eigs}); - eig->zero(); + eig = new ct::Tensor (ct::DataType::DT_DOUBLE, {num_eigs}); + eig->zero (); } - py::array_t get_eig() + py::array_t + get_eig () { - py::array_t eig_out(eig->NumElements()); - py::buffer_info eig_buf = eig_out.request(); - double* eig_out_ptr = static_cast(eig_buf.ptr); + py::array_t eig_out (eig->NumElements ()); + py::buffer_info eig_buf = eig_out.request (); + double* eig_out_ptr = static_cast (eig_buf.ptr); - if (eig == nullptr) { - throw std::runtime_error("eig is not initialized"); - } - double* eig_ptr = eig->data(); + if (eig == nullptr) + { + throw std::runtime_error ("eig is not initialized"); + } + double* eig_ptr = eig->data (); - std::copy(eig_ptr, eig_ptr + eig->NumElements(), eig_out_ptr); + std::copy (eig_ptr, eig_ptr + eig->NumElements (), eig_out_ptr); return eig_out; } - void set_psi(py::array_t> psi_in) + void + set_psi (py::array_t> psi_in) { - py::buffer_info psi_buf = psi_in.request(); - std::complex* psi_ptr = static_cast*>(psi_buf.ptr); - - psi = new ct::TensorMap( - psi_ptr, - ct::DataType::DT_COMPLEX_DOUBLE, - ct::DeviceType::CpuDevice, - ct::TensorShape({num_eigs, dim}) - ); + py::buffer_info psi_buf = psi_in.request (); + std::complex* psi_ptr = static_cast*> (psi_buf.ptr); + + psi = new ct::TensorMap (psi_ptr, + ct::DataType::DT_COMPLEX_DOUBLE, + ct::DeviceType::CpuDevice, + ct::TensorShape ({num_eigs, dim})); } - py::array_t> get_psi() + py::array_t> + get_psi () { - py::array_t> psi_out({num_eigs, dim}); - py::buffer_info psi_buf = psi_out.request(); - std::complex* psi_out_ptr = static_cast*>(psi_buf.ptr); + py::array_t> psi_out ({num_eigs, dim}); + py::buffer_info psi_buf = psi_out.request (); + std::complex* psi_out_ptr = static_cast*> (psi_buf.ptr); - if (psi == nullptr) { - throw std::runtime_error("psi is not initialized"); - } - std::complex* psi_ptr = psi->data>(); + if (psi == nullptr) + { + throw std::runtime_error ("psi is not initialized"); + } + std::complex* psi_ptr = psi->data> (); - std::copy(psi_ptr, psi_ptr + psi->NumElements(), psi_out_ptr); + std::copy (psi_ptr, psi_ptr + psi->NumElements (), psi_out_ptr); return psi_out; } - void set_prec(py::array_t prec_in) + void + set_prec (py::array_t prec_in) { - py::buffer_info prec_buf = prec_in.request(); - double* prec_ptr = static_cast(prec_buf.ptr); - - prec = new ct::TensorMap( - prec_ptr, - ct::DataType::DT_DOUBLE, - ct::DeviceType::CpuDevice, - ct::TensorShape({dim}) - ); + py::buffer_info prec_buf = prec_in.request (); + double* prec_ptr = static_cast (prec_buf.ptr); + + prec + = new ct::TensorMap (prec_ptr, ct::DataType::DT_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape ({dim})); } - void diag(std::function>(py::array_t>)> mm_op, + void + diag (std::function> (py::array_t>)> mm_op, int diag_ndim, double tol, const std::vector& diag_ethr, bool need_subspace, bool scf_type, - int nproc_in_pool = 1 - ) { + int nproc_in_pool = 1) + { const std::string basis_type = "pw"; const std::string calculation = scf_type ? "scf" : "nscf"; - auto hpsi_func = [mm_op] (const ct::Tensor& psi_in, ct::Tensor& hpsi_out) { - const auto ndim = psi_in.shape().ndim(); - REQUIRES_OK(ndim <= 2, "dims of psi_in should be less than or equal to 2"); - const int nvec = ndim == 1 ? 1 : psi_in.shape().dim_size(0); - const int ld_psi = ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1); - - // Note: numpy's py::array_t is row-major, and - // our tensor-array is row-major - py::array_t> psi({ld_psi, nvec}); - py::buffer_info psi_buf = psi.request(); - std::complex* psi_ptr = static_cast*>(psi_buf.ptr); - std::copy(psi_in.data>(), psi_in.data>() + nvec * ld_psi, psi_ptr); - - py::array_t> hpsi = mm_op(psi); - - py::buffer_info hpsi_buf = hpsi.request(); - std::complex* hpsi_ptr = static_cast*>(hpsi_buf.ptr); - std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out.data>()); - }; - - auto subspace_func = [](const ct::Tensor& psi_in, ct::Tensor& psi_out, const bool S_orth) { /*do nothing*/ }; - - auto spsi_func = [this] (const ct::Tensor& psi_in, ct::Tensor& spsi_out) { - const auto ndim = psi_in.shape().ndim(); - REQUIRES_OK(ndim <= 2, "dims of psi_in should be less than or equal to 2"); - const int nrow = ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1); - const int nbands = ndim == 1 ? 1 : psi_in.shape().dim_size(0); - syncmem_z2z_h2h_op()( - spsi_out.data>(), - psi_in.data>(), - static_cast(nrow * nbands) - ); - }; - - cg = std::make_unique, base_device::DEVICE_CPU>>( - basis_type, - calculation, - need_subspace, - subspace_func, - tol, - diag_ndim, - nproc_in_pool - ); - - cg->diag(hpsi_func, spsi_func, *psi, *eig, diag_ethr, *prec); + auto hpsi_func = [mm_op] (const ct::Tensor& psi_in, ct::Tensor& hpsi_out) + { + const auto ndim = psi_in.shape ().ndim (); + REQUIRES_OK (ndim <= 2, "dims of psi_in should be less than or equal to 2"); + const int nvec = ndim == 1 ? 1 : psi_in.shape ().dim_size (0); + const int ld_psi = ndim == 1 ? psi_in.NumElements () : psi_in.shape ().dim_size (1); + + // Note: numpy's py::array_t is row-major, and + // our tensor-array is row-major + py::array_t> psi ({ld_psi, nvec}); + py::buffer_info psi_buf = psi.request (); + std::complex* psi_ptr = static_cast*> (psi_buf.ptr); + std::copy (psi_in.data> (), + psi_in.data> () + nvec * ld_psi, + psi_ptr); + + py::array_t> hpsi = mm_op (psi); + + py::buffer_info hpsi_buf = hpsi.request (); + std::complex* hpsi_ptr = static_cast*> (hpsi_buf.ptr); + std::copy (hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out.data> ()); + }; + + auto subspace_func = [] (const ct::Tensor& psi_in, ct::Tensor& psi_out, const bool S_orth) { /*do nothing*/ }; + + auto spsi_func = [this] (const ct::Tensor& psi_in, ct::Tensor& spsi_out) + { + const auto ndim = psi_in.shape ().ndim (); + REQUIRES_OK (ndim <= 2, "dims of psi_in should be less than or equal to 2"); + const int nrow = ndim == 1 ? psi_in.NumElements () : psi_in.shape ().dim_size (1); + const int nbands = ndim == 1 ? 1 : psi_in.shape ().dim_size (0); + syncmem_z2z_h2h_op () (spsi_out.data> (), + psi_in.data> (), + static_cast (nrow * nbands)); + }; + + cg = std::make_unique, base_device::DEVICE_CPU>> (basis_type, + calculation, + need_subspace, + subspace_func, + tol, + diag_ndim, + nproc_in_pool); + + cg->diag (hpsi_func, spsi_func, *psi, *eig, diag_ethr, *prec); } -private: + private: base_device::DEVICE_CPU* ctx = {}; int dim; diff --git a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp index c258de44061..e73a2261b45 100644 --- a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp +++ b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp @@ -19,16 +19,16 @@ namespace py_hsolver class PyDiagoDavSubspace { -public: - PyDiagoDavSubspace(int nbasis, int nband) : nbasis(nbasis), nband(nband) + public: + PyDiagoDavSubspace (int nbasis, int nband) : nbasis (nbasis), nband (nband) { psi = new std::complex[nbasis * nband]; eigenvalue = new double[nband]; } - PyDiagoDavSubspace(const PyDiagoDavSubspace&) = delete; - PyDiagoDavSubspace& operator=(const PyDiagoDavSubspace&) = delete; - PyDiagoDavSubspace(PyDiagoDavSubspace&& other) : nbasis(other.nbasis), nband(other.nband) + PyDiagoDavSubspace (const PyDiagoDavSubspace&) = delete; + PyDiagoDavSubspace& operator= (const PyDiagoDavSubspace&) = delete; + PyDiagoDavSubspace (PyDiagoDavSubspace&& other) : nbasis (other.nbasis), nband (other.nband) { psi = other.psi; eigenvalue = other.eigenvalue; @@ -37,122 +37,123 @@ class PyDiagoDavSubspace other.eigenvalue = nullptr; } - ~PyDiagoDavSubspace() + ~PyDiagoDavSubspace () { - if (psi != nullptr) - { - delete[] psi; - psi = nullptr; - } - if (eigenvalue != nullptr) - { - delete[] eigenvalue; - eigenvalue = nullptr; - } + if (psi != nullptr) + { + delete[] psi; + psi = nullptr; + } + if (eigenvalue != nullptr) + { + delete[] eigenvalue; + eigenvalue = nullptr; + } } - void set_psi(py::array_t> psi_in) + void + set_psi (py::array_t> psi_in) { - assert(psi_in.size() == nbasis * nband); + assert (psi_in.size () == nbasis * nband); for (size_t i = 0; i < nbasis * nband; ++i) - { - psi[i] = psi_in.at(i); - } + { + psi[i] = psi_in.at (i); + } } - py::array_t> get_psi() + py::array_t> + get_psi () { - py::array_t> psi_out(nband * nbasis); - py::buffer_info psi_out_buf = psi_out.request(); + py::array_t> psi_out (nband * nbasis); + py::buffer_info psi_out_buf = psi_out.request (); - std::complex* psi_out_ptr = static_cast*>(psi_out_buf.ptr); + std::complex* psi_out_ptr = static_cast*> (psi_out_buf.ptr); for (size_t i = 0; i < nband * nbasis; ++i) - { - psi_out_ptr[i] = psi[i]; - } + { + psi_out_ptr[i] = psi[i]; + } - return psi_out; + return psi_out; } - void init_eigenvalue() + void + init_eigenvalue () { for (size_t i = 0; i < nband; ++i) - { - eigenvalue[i] = 0.0; - } + { + eigenvalue[i] = 0.0; + } } - py::array_t get_eigenvalue() + py::array_t + get_eigenvalue () { - py::array_t eigenvalue_out(nband); - py::buffer_info eigenvalue_out_buf = eigenvalue_out.request(); + py::array_t eigenvalue_out (nband); + py::buffer_info eigenvalue_out_buf = eigenvalue_out.request (); - double* eigenvalue_out_ptr = static_cast(eigenvalue_out_buf.ptr); + double* eigenvalue_out_ptr = static_cast (eigenvalue_out_buf.ptr); for (size_t i = 0; i < nband; ++i) - { - eigenvalue_out_ptr[i] = eigenvalue[i]; - } + { + eigenvalue_out_ptr[i] = eigenvalue[i]; + } return eigenvalue_out; } - int diag( - std::function>(py::array_t>)> mm_op, - std::vector& precond_vec, - int dav_ndim, - double tol, - int max_iter, - bool need_subspace, - std::vector& diag_ethr, - bool scf_type, - hsolver::diag_comm_info comm_info, - int diag_subspace, - int nb2d - ) { - auto hpsi_func = [mm_op] ( - std::complex *psi_in, - std::complex *hpsi_out, - const int ld_psi, - const int nvec - ) { - // Note: numpy's py::array_t is row-major, but - // our raw pointer-array is column-major - py::array_t, py::array::f_style> psi({ld_psi, nvec}); - py::buffer_info psi_buf = psi.request(); - std::complex* psi_ptr = static_cast*>(psi_buf.ptr); - std::copy(psi_in, psi_in + nvec * ld_psi, psi_ptr); - - py::array_t, py::array::f_style> hpsi = mm_op(psi); - - py::buffer_info hpsi_buf = hpsi.request(); - std::complex* hpsi_ptr = static_cast*>(hpsi_buf.ptr); - std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out); - }; - - auto spsi_func = [this](const std::complex* psi_in, - std::complex* spsi_out, - const int ld_psi, - const int nvec) { syncmem_op()(spsi_out, psi_in, static_cast(ld_psi * nvec)); }; - - obj = std::make_unique, base_device::DEVICE_CPU>>( - precond_vec, - nband, - nbasis, - dav_ndim, - tol, - max_iter, + int + diag (std::function> (py::array_t>)> mm_op, + std::vector& precond_vec, + int dav_ndim, + double tol, + int max_iter, + bool need_subspace, + std::vector& diag_ethr, + bool scf_type, + hsolver::diag_comm_info comm_info, + int diag_subspace, + int nb2d) + { + auto hpsi_func + = [mm_op] (std::complex* psi_in, std::complex* hpsi_out, const int ld_psi, const int nvec) + { + // Note: numpy's py::array_t is row-major, but + // our raw pointer-array is column-major + py::array_t, py::array::f_style> psi ({ld_psi, nvec}); + py::buffer_info psi_buf = psi.request (); + std::complex* psi_ptr = static_cast*> (psi_buf.ptr); + std::copy (psi_in, psi_in + nvec * ld_psi, psi_ptr); + + py::array_t, py::array::f_style> hpsi = mm_op (psi); + + py::buffer_info hpsi_buf = hpsi.request (); + std::complex* hpsi_ptr = static_cast*> (hpsi_buf.ptr); + std::copy (hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out); + }; + + auto spsi_func + = [this] (const std::complex* psi_in, + std::complex* spsi_out, + const int ld_psi, + const int nvec) { syncmem_op () (spsi_out, psi_in, static_cast (ld_psi * nvec)); }; + + obj = std::make_unique, base_device::DEVICE_CPU>> ( + precond_vec, + nband, + nbasis, + dav_ndim, + tol, + max_iter, comm_info, diag_subspace, - nb2d - ); + nb2d); - return obj->diag(hpsi_func, spsi_func, psi, nbasis, eigenvalue, diag_ethr, scf_type); + return obj->diag (hpsi_func, spsi_func, psi, nbasis, eigenvalue, diag_ethr, scf_type); } -private: + private: std::complex* psi = nullptr; double* eigenvalue = nullptr; diff --git a/python/pyabacus/src/hsolver/py_diago_david.hpp b/python/pyabacus/src/hsolver/py_diago_david.hpp index 40261b01b93..43a336c5037 100644 --- a/python/pyabacus/src/hsolver/py_diago_david.hpp +++ b/python/pyabacus/src/hsolver/py_diago_david.hpp @@ -19,16 +19,16 @@ namespace py_hsolver class PyDiagoDavid { -public: - PyDiagoDavid(int nbasis, int nband) : nbasis(nbasis), nband(nband) + public: + PyDiagoDavid (int nbasis, int nband) : nbasis (nbasis), nband (nband) { psi = new std::complex[nbasis * nband]; eigenvalue = new double[nband]; } - PyDiagoDavid(const PyDiagoDavid&) = delete; - PyDiagoDavid& operator=(const PyDiagoDavid&) = delete; - PyDiagoDavid(PyDiagoDavid&& other) : nbasis(other.nbasis), nband(other.nband) + PyDiagoDavid (const PyDiagoDavid&) = delete; + PyDiagoDavid& operator= (const PyDiagoDavid&) = delete; + PyDiagoDavid (PyDiagoDavid&& other) : nbasis (other.nbasis), nband (other.nband) { psi = other.psi; eigenvalue = other.eigenvalue; @@ -37,118 +37,114 @@ class PyDiagoDavid other.eigenvalue = nullptr; } - ~PyDiagoDavid() + ~PyDiagoDavid () { - if (psi != nullptr) - { - delete[] psi; - psi = nullptr; - } - if (eigenvalue != nullptr) - { - delete[] eigenvalue; - eigenvalue = nullptr; - } + if (psi != nullptr) + { + delete[] psi; + psi = nullptr; + } + if (eigenvalue != nullptr) + { + delete[] eigenvalue; + eigenvalue = nullptr; + } } - void set_psi(py::array_t> psi_in) + void + set_psi (py::array_t> psi_in) { - assert(psi_in.size() == nbasis * nband); + assert (psi_in.size () == nbasis * nband); for (size_t i = 0; i < nbasis * nband; ++i) - { - psi[i] = psi_in.at(i); - } + { + psi[i] = psi_in.at (i); + } } - py::array_t> get_psi() + py::array_t> + get_psi () { - py::array_t> psi_out(nband * nbasis); - py::buffer_info psi_out_buf = psi_out.request(); + py::array_t> psi_out (nband * nbasis); + py::buffer_info psi_out_buf = psi_out.request (); - std::complex* psi_out_ptr = static_cast*>(psi_out_buf.ptr); + std::complex* psi_out_ptr = static_cast*> (psi_out_buf.ptr); for (size_t i = 0; i < nband * nbasis; ++i) - { - psi_out_ptr[i] = psi[i]; - } + { + psi_out_ptr[i] = psi[i]; + } - return psi_out; + return psi_out; } - void init_eigenvalue() + void + init_eigenvalue () { for (size_t i = 0; i < nband; ++i) - { - eigenvalue[i] = 0.0; - } + { + eigenvalue[i] = 0.0; + } } - py::array_t get_eigenvalue() + py::array_t + get_eigenvalue () { - py::array_t eigenvalue_out(nband); - py::buffer_info eigenvalue_out_buf = eigenvalue_out.request(); + py::array_t eigenvalue_out (nband); + py::buffer_info eigenvalue_out_buf = eigenvalue_out.request (); - double* eigenvalue_out_ptr = static_cast(eigenvalue_out_buf.ptr); + double* eigenvalue_out_ptr = static_cast (eigenvalue_out_buf.ptr); for (size_t i = 0; i < nband; ++i) - { - eigenvalue_out_ptr[i] = eigenvalue[i]; - } + { + eigenvalue_out_ptr[i] = eigenvalue[i]; + } return eigenvalue_out; } - int diag( - std::function>(py::array_t>)> mm_op, - std::vector& precond_vec, - int dav_ndim, - double tol, - std::vector& diag_ethr, - int max_iter, - hsolver::diag_comm_info comm_info - ) { - auto hpsi_func = [mm_op] ( - std::complex *psi_in, - std::complex *hpsi_out, - const int ld_psi, - const int nvec - ) { - // Note: numpy's py::array_t is row-major, but - // our raw pointer-array is column-major - py::array_t, py::array::f_style> psi({ld_psi, nvec}); - py::buffer_info psi_buf = psi.request(); - std::complex* psi_ptr = static_cast*>(psi_buf.ptr); - std::copy(psi_in, psi_in + nvec * ld_psi, psi_ptr); - - py::array_t, py::array::f_style> hpsi = mm_op(psi); - - py::buffer_info hpsi_buf = hpsi.request(); - std::complex* hpsi_ptr = static_cast*>(hpsi_buf.ptr); - std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out); - }; - - auto spsi_func = [this] ( - const std::complex *psi_in, - std::complex *spsi_out, - const int nrow, - const int nbands - ) { - syncmem_op()(spsi_out, psi_in, static_cast(nbands * nrow)); - }; - - obj = std::make_unique, base_device::DEVICE_CPU>>( - precond_vec.data(), - nband, - nbasis, - dav_ndim, - comm_info - ); - - return obj->diag(hpsi_func, spsi_func, nbasis, psi, eigenvalue, diag_ethr, max_iter); + int + diag (std::function> (py::array_t>)> mm_op, + std::vector& precond_vec, + int dav_ndim, + double tol, + std::vector& diag_ethr, + int max_iter, + hsolver::diag_comm_info comm_info) + { + auto hpsi_func + = [mm_op] (std::complex* psi_in, std::complex* hpsi_out, const int ld_psi, const int nvec) + { + // Note: numpy's py::array_t is row-major, but + // our raw pointer-array is column-major + py::array_t, py::array::f_style> psi ({ld_psi, nvec}); + py::buffer_info psi_buf = psi.request (); + std::complex* psi_ptr = static_cast*> (psi_buf.ptr); + std::copy (psi_in, psi_in + nvec * ld_psi, psi_ptr); + + py::array_t, py::array::f_style> hpsi = mm_op (psi); + + py::buffer_info hpsi_buf = hpsi.request (); + std::complex* hpsi_ptr = static_cast*> (hpsi_buf.ptr); + std::copy (hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out); + }; + + auto spsi_func + = [this] (const std::complex* psi_in, + std::complex* spsi_out, + const int nrow, + const int nbands) { syncmem_op () (spsi_out, psi_in, static_cast (nbands * nrow)); }; + + obj = std::make_unique, base_device::DEVICE_CPU>> (precond_vec.data (), + nband, + nbasis, + dav_ndim, + comm_info); + + return obj->diag (hpsi_func, spsi_func, nbasis, psi, eigenvalue, diag_ethr, max_iter); } -private: + private: std::complex* psi = nullptr; double* eigenvalue = nullptr; @@ -158,7 +154,8 @@ class PyDiagoDavid std::unique_ptr, base_device::DEVICE_CPU>> obj; base_device::DEVICE_CPU* ctx = {}; - using syncmem_op = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; + using syncmem_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; }; } // namespace py_hsolver diff --git a/python/pyabacus/src/hsolver/py_hsolver.cpp b/python/pyabacus/src/hsolver/py_hsolver.cpp index 02f91c9441e..1004fa44aaf 100644 --- a/python/pyabacus/src/hsolver/py_hsolver.cpp +++ b/python/pyabacus/src/hsolver/py_hsolver.cpp @@ -24,17 +24,19 @@ using namespace pybind11::literals; using namespace pyabacus::hsolver; -void bind_hsolver(py::module& m) +void + bind_hsolver (py::module& m) { // Bind diag_comm_info struct - py::class_(m, "diag_comm_info") - .def(py::init(), "rank"_a, "nproc"_a) - .def_readonly("rank", &hsolver::diag_comm_info::rank) - .def_readonly("nproc", &hsolver::diag_comm_info::nproc); + py::class_ (m, "diag_comm_info") + .def (py::init (), "rank"_a, "nproc"_a) + .def_readonly ("rank", &hsolver::diag_comm_info::rank) + .def_readonly ("nproc", &hsolver::diag_comm_info::nproc); // Bind PyDiagoDavSubspace using adapter - py::class_(m, "diago_dav_subspace") - .def(py::init(), R"pbdoc( + py::class_ (m, "diago_dav_subspace") + .def (py::init (), + R"pbdoc( Constructor of diago_dav_subspace, a class for diagonalizing a linear operator using the Davidson-Subspace Method. @@ -48,8 +50,12 @@ void bind_hsolver(py::module& m) The number of basis functions. nband : int The number of bands to be calculated. - )pbdoc", "nbasis"_a, "nband"_a) - .def("diag", &PyDiagoDavSubspaceAdapter::diag, R"pbdoc( + )pbdoc", + "nbasis"_a, + "nband"_a) + .def ("diag", + &PyDiagoDavSubspaceAdapter::diag, + R"pbdoc( Diagonalize the linear operator using the Davidson-Subspace Method. Parameters @@ -85,33 +91,37 @@ void bind_hsolver(py::module& m) nb2d : int The block size in 2d block cyclic distribution if use elpa or scalapack. )pbdoc", - "mm_op"_a, - "precond_vec"_a, - "dav_ndim"_a, - "tol"_a, - "max_iter"_a, - "need_subspace"_a, - "diag_ethr"_a, - "scf_type"_a, - "comm_info"_a, - "diago_subspace"_a, - "nb2d"_a) - .def("set_psi", &PyDiagoDavSubspaceAdapter::set_psi, R"pbdoc( + "mm_op"_a, + "precond_vec"_a, + "dav_ndim"_a, + "tol"_a, + "max_iter"_a, + "need_subspace"_a, + "diag_ethr"_a, + "scf_type"_a, + "comm_info"_a, + "diago_subspace"_a, + "nb2d"_a) + .def ("set_psi", + &PyDiagoDavSubspaceAdapter::set_psi, + R"pbdoc( Set the initial guess of the eigenvectors, i.e. the wave functions. - )pbdoc", "psi_in"_a) - .def("get_psi", &PyDiagoDavSubspaceAdapter::get_psi, R"pbdoc( + )pbdoc", + "psi_in"_a) + .def ("get_psi", &PyDiagoDavSubspaceAdapter::get_psi, R"pbdoc( Get the eigenvectors. )pbdoc") - .def("init_eigenvalue", &PyDiagoDavSubspaceAdapter::init_eigenvalue, R"pbdoc( + .def ("init_eigenvalue", &PyDiagoDavSubspaceAdapter::init_eigenvalue, R"pbdoc( Initialize the eigenvalues as zero. )pbdoc") - .def("get_eigenvalue", &PyDiagoDavSubspaceAdapter::get_eigenvalue, R"pbdoc( + .def ("get_eigenvalue", &PyDiagoDavSubspaceAdapter::get_eigenvalue, R"pbdoc( Get the eigenvalues. )pbdoc"); // Bind PyDiagoDavid using adapter - py::class_(m, "diago_david") - .def(py::init(), R"pbdoc( + py::class_ (m, "diago_david") + .def (py::init (), + R"pbdoc( Constructor of diago_david, a class for diagonalizing a linear operator using the Davidson Method. @@ -125,8 +135,12 @@ void bind_hsolver(py::module& m) The number of basis functions. nband : int The number of bands to be calculated. - )pbdoc", "nbasis"_a, "nband"_a) - .def("diag", &PyDiagoDavidAdapter::diag, R"pbdoc( + )pbdoc", + "nbasis"_a, + "nband"_a) + .def ("diag", + &PyDiagoDavidAdapter::diag, + R"pbdoc( Diagonalize the linear operator using the Davidson Method. Parameters @@ -146,30 +160,33 @@ void bind_hsolver(py::module& m) max_iter : int The maximum number of iterations. )pbdoc", - "mm_op"_a, - "precond_vec"_a, - "dav_ndim"_a, - "tol"_a, - "diag_ethr"_a, - "max_iter"_a, - "comm_info"_a) - .def("set_psi", &PyDiagoDavidAdapter::set_psi, R"pbdoc( + "mm_op"_a, + "precond_vec"_a, + "dav_ndim"_a, + "tol"_a, + "diag_ethr"_a, + "max_iter"_a, + "comm_info"_a) + .def ("set_psi", + &PyDiagoDavidAdapter::set_psi, + R"pbdoc( Set the initial guess of the eigenvectors, i.e. the wave functions. - )pbdoc", "psi_in"_a) - .def("get_psi", &PyDiagoDavidAdapter::get_psi, R"pbdoc( + )pbdoc", + "psi_in"_a) + .def ("get_psi", &PyDiagoDavidAdapter::get_psi, R"pbdoc( Get the eigenvectors. )pbdoc") - .def("init_eigenvalue", &PyDiagoDavidAdapter::init_eigenvalue, R"pbdoc( + .def ("init_eigenvalue", &PyDiagoDavidAdapter::init_eigenvalue, R"pbdoc( Initialize the eigenvalues as zero. )pbdoc") - .def("get_eigenvalue", &PyDiagoDavidAdapter::get_eigenvalue, R"pbdoc( + .def ("get_eigenvalue", &PyDiagoDavidAdapter::get_eigenvalue, R"pbdoc( Get the eigenvalues. )pbdoc"); #ifdef __ENABLE_ATEN // Bind PyDiagoCG using adapter (only when ATen is available) - py::class_(m, "diago_cg") - .def(py::init(), R"pbdoc( + py::class_ (m, "diago_cg") + .def (py::init (), R"pbdoc( Constructor of diago_cg, a class for diagonalizing a linear operator using the Conjugate Gradient Method. @@ -177,9 +194,9 @@ void bind_hsolver(py::module& m) for invoking this class is a function defined in _hsolver.py, which uses this class to perform the calculations. )pbdoc") - .def("diag", - &PyDiagoCGAdapter::diag, - R"pbdoc( + .def ("diag", + &PyDiagoCGAdapter::diag, + R"pbdoc( Diagonalize the linear operator using the Conjugate Gradient Method. Parameters @@ -197,39 +214,44 @@ void bind_hsolver(py::module& m) Whether to use the SCF type, which is used to determine the convergence criterion. )pbdoc", - "mm_op"_a, - "max_iter"_a, - "tol"_a, - "diag_ethr"_a, - "need_subspace"_a, - "scf_type"_a, - "nproc_in_pool"_a) - .def("init_eig", &PyDiagoCGAdapter::init_eig, R"pbdoc( + "mm_op"_a, + "max_iter"_a, + "tol"_a, + "diag_ethr"_a, + "need_subspace"_a, + "scf_type"_a, + "nproc_in_pool"_a) + .def ("init_eig", &PyDiagoCGAdapter::init_eig, R"pbdoc( Initialize the eigenvalues. )pbdoc") - .def("get_eig", &PyDiagoCGAdapter::get_eig, R"pbdoc( + .def ("get_eig", &PyDiagoCGAdapter::get_eig, R"pbdoc( Get the eigenvalues. )pbdoc") - .def("set_psi", &PyDiagoCGAdapter::set_psi, R"pbdoc( + .def ("set_psi", + &PyDiagoCGAdapter::set_psi, + R"pbdoc( Set the eigenvectors. - )pbdoc", "psi_in"_a) - .def("get_psi", &PyDiagoCGAdapter::get_psi, R"pbdoc( + )pbdoc", + "psi_in"_a) + .def ("get_psi", &PyDiagoCGAdapter::get_psi, R"pbdoc( Get the eigenvectors. )pbdoc") - .def("set_prec", &PyDiagoCGAdapter::set_prec, R"pbdoc( + .def ("set_prec", + &PyDiagoCGAdapter::set_prec, + R"pbdoc( Set the preconditioner. - )pbdoc", "prec_in"_a); + )pbdoc", + "prec_in"_a); #else // Provide stub binding when ATen is not available // This allows the module to load but will raise an error if used - m.def("diago_cg_available", []() { return false; }, - "Check if diago_cg is available (requires ATen)"); + m.def ("diago_cg_available", [] () { return false; }, "Check if diago_cg is available (requires ATen)"); #endif } -PYBIND11_MODULE(_hsolver_pack, m) +PYBIND11_MODULE (_hsolver_pack, m) { - m.doc() = "Submodule for pyabacus: hsolver"; + m.doc () = "Submodule for pyabacus: hsolver"; - bind_hsolver(m); + bind_hsolver (m); } diff --git a/python/pyabacus/src/py_numerical_radial.cpp b/python/pyabacus/src/py_numerical_radial.cpp index 18500641cf0..f47fce37d00 100644 --- a/python/pyabacus/src/py_numerical_radial.cpp +++ b/python/pyabacus/src/py_numerical_radial.cpp @@ -12,41 +12,43 @@ using namespace pyabacus::utils; template using overload_cast_ = pybind11::detail::overload_cast_impl; -void bind_numerical_radial(py::module& m) +void + bind_numerical_radial (py::module& m) { // Create the submodule for NumericalRadial - py::module m_numerical_radial = m.def_submodule("NumericalRadial"); + py::module m_numerical_radial = m.def_submodule ("NumericalRadial"); - py::class_(m_numerical_radial, "NumericalRadial") - .def(py::init<>()) - .def( + py::class_ (m_numerical_radial, "NumericalRadial") + .def (py::init<> ()) + .def ( "build", - [](NumericalRadial& self, - const int l, - const bool for_r_space, - const int ngrid, - py::array_t grid, - py::array_t value, - const int p = 0, - const int izeta = 0, - const std::string symbol = "", - const int itype, - const bool init_sbt = true) { - check_1d_array(grid, "grid"); - check_1d_array(value, "value"); - check_same_size(grid, value, "grid", "value"); + [] (NumericalRadial& self, + const int l, + const bool for_r_space, + const int ngrid, + py::array_t grid, + py::array_t value, + const int p = 0, + const int izeta = 0, + const std::string symbol = "", + const int itype, + const bool init_sbt = true) + { + check_1d_array (grid, "grid"); + check_1d_array (value, "value"); + check_same_size (grid, value, "grid", "value"); - self.build(l, - for_r_space, - static_cast(grid.size()), - get_array_ptr(grid), - get_array_ptr(value), - p, - izeta, - symbol, - itype, - init_sbt); - }, + self.build (l, + for_r_space, + static_cast (grid.size ()), + get_array_ptr (grid), + get_array_ptr (value), + p, + izeta, + symbol, + itype, + init_sbt); + }, "l"_a, "for_r_space"_a, "ngrid"_a, @@ -58,53 +60,56 @@ void bind_numerical_radial(py::module& m) "itype"_a = 0, "init_sbt"_a = true) // leave set_transformer for future since no wrapper for Transformer yet - .def( + .def ( "set_grid", - [](NumericalRadial& self, - const bool for_r_space, - const int ngrid, - py::array_t grid, - const char mode) { - check_1d_array(grid, "grid"); + [] (NumericalRadial& self, + const bool for_r_space, + const int ngrid, + py::array_t grid, + const char mode) + { + check_1d_array (grid, "grid"); - self.set_grid(for_r_space, ngrid, get_array_ptr(grid), mode); - }, + self.set_grid (for_r_space, ngrid, get_array_ptr (grid), mode); + }, "for_r_space"_a, "ngrid"_a, "grid"_a, "mode"_a = 'i') - .def("set_uniform_grid", - &NumericalRadial::set_uniform_grid, - "for_r_space"_a, - "ngrid"_a, - "cutoff"_a, - "mode"_a = 'i', - "enable_fft"_a = false) - .def( + .def ("set_uniform_grid", + &NumericalRadial::set_uniform_grid, + "for_r_space"_a, + "ngrid"_a, + "cutoff"_a, + "mode"_a = 'i', + "enable_fft"_a = false) + .def ( "set_value", - [](NumericalRadial& self, const bool for_r_space, py::array_t value, const int p) { - check_1d_array(value, "value"); + [] (NumericalRadial& self, const bool for_r_space, py::array_t value, const int p) + { + check_1d_array (value, "value"); - self.set_value(for_r_space, get_array_ptr(value), p); - }, + self.set_value (for_r_space, get_array_ptr (value), p); + }, "for_r_space"_a, "value"_a, "p"_a) - .def("wipe", &NumericalRadial::wipe, "r_space"_a = true, "k_space"_a = true) - .def( + .def ("wipe", &NumericalRadial::wipe, "r_space"_a = true, "k_space"_a = true) + .def ( "radtab", - [](NumericalRadial& self, - const char op, - NumericalRadial& ket, - const int l, - py::array_t table, - const int nr_tab, - const double rmax_tab, - const bool deriv) { - check_1d_array(table, "table"); + [] (NumericalRadial& self, + const char op, + NumericalRadial& ket, + const int l, + py::array_t table, + const int nr_tab, + const double rmax_tab, + const bool deriv) + { + check_1d_array (table, "table"); - self.radtab(op, ket, l, get_array_ptr(table), nr_tab, rmax_tab, deriv); - }, + self.radtab (op, ket, l, get_array_ptr (table), nr_tab, rmax_tab, deriv); + }, "op"_a, "ket"_a, "l"_a, @@ -112,33 +117,30 @@ void bind_numerical_radial(py::module& m) "nr_tab"_a, "rmax_tab"_a, "deriv"_a = false) - .def("normalize", &NumericalRadial::normalize, "for_r_space"_a = true) + .def ("normalize", &NumericalRadial::normalize, "for_r_space"_a = true) // Getters - .def_property_readonly("symbol", &NumericalRadial::symbol) - .def_property_readonly("itype", &NumericalRadial::itype) - .def_property_readonly("izeta", &NumericalRadial::izeta) - .def_property_readonly("l", &NumericalRadial::l) - .def_property_readonly("nr", &NumericalRadial::nr) - .def_property_readonly("nk", &NumericalRadial::nk) - .def_property_readonly("rcut", &NumericalRadial::rcut) - .def_property_readonly("kcut", &NumericalRadial::kcut) - .def_property_readonly("rgrid", - [](NumericalRadial& self) { - return numpy_from_ptr_copy(self.rgrid(), static_cast(self.nr())); - }) - .def_property_readonly("kgrid", - [](NumericalRadial& self) { - return numpy_from_ptr_copy(self.kgrid(), static_cast(self.nk())); - }) - .def_property_readonly("rvalue", - [](NumericalRadial& self) { - return numpy_from_ptr_copy(self.rvalue(), static_cast(self.nr())); - }) - .def_property_readonly("kvalue", - [](NumericalRadial& self) { - return numpy_from_ptr_copy(self.kvalue(), static_cast(self.nk())); - }) - .def_property_readonly("pr", &NumericalRadial::pr) - .def_property_readonly("pk", &NumericalRadial::pk) - .def_property_readonly("is_fft_compliant", overload_cast_<>()(&NumericalRadial::is_fft_compliant, py::const_)); + .def_property_readonly ("symbol", &NumericalRadial::symbol) + .def_property_readonly ("itype", &NumericalRadial::itype) + .def_property_readonly ("izeta", &NumericalRadial::izeta) + .def_property_readonly ("l", &NumericalRadial::l) + .def_property_readonly ("nr", &NumericalRadial::nr) + .def_property_readonly ("nk", &NumericalRadial::nk) + .def_property_readonly ("rcut", &NumericalRadial::rcut) + .def_property_readonly ("kcut", &NumericalRadial::kcut) + .def_property_readonly ("rgrid", + [] (NumericalRadial& self) + { return numpy_from_ptr_copy (self.rgrid (), static_cast (self.nr ())); }) + .def_property_readonly ("kgrid", + [] (NumericalRadial& self) + { return numpy_from_ptr_copy (self.kgrid (), static_cast (self.nk ())); }) + .def_property_readonly ("rvalue", + [] (NumericalRadial& self) + { return numpy_from_ptr_copy (self.rvalue (), static_cast (self.nr ())); }) + .def_property_readonly ("kvalue", + [] (NumericalRadial& self) + { return numpy_from_ptr_copy (self.kvalue (), static_cast (self.nk ())); }) + .def_property_readonly ("pr", &NumericalRadial::pr) + .def_property_readonly ("pk", &NumericalRadial::pk) + .def_property_readonly ("is_fft_compliant", + overload_cast_<> () (&NumericalRadial::is_fft_compliant, py::const_)); } diff --git a/python/pyabacus/src/utils/pybind_utils.h b/python/pyabacus/src/utils/pybind_utils.h index 1fc24fbf2be..88e90b9195d 100644 --- a/python/pyabacus/src/utils/pybind_utils.h +++ b/python/pyabacus/src/utils/pybind_utils.h @@ -26,8 +26,10 @@ namespace py = pybind11; -namespace pyabacus { -namespace utils { +namespace pyabacus +{ +namespace utils +{ // ============================================================================ // Array Validation Functions @@ -40,14 +42,15 @@ namespace utils { * @throws std::runtime_error if array is not 1D */ template -void check_1d_array(const py::array_t& arr, const std::string& name = "array") +void + check_1d_array (const py::array_t& arr, const std::string& name = "array") { - py::buffer_info info = arr.request(); + py::buffer_info info = arr.request (); if (info.ndim != 1) - { - throw std::runtime_error(name + " must be 1-dimensional, got " + - std::to_string(info.ndim) + " dimensions"); - } + { + throw std::runtime_error (name + " must be 1-dimensional, got " + std::to_string (info.ndim) + + " dimensions"); + } } /** @@ -57,14 +60,15 @@ void check_1d_array(const py::array_t& arr, const std::string& name = "array" * @throws std::runtime_error if array is not 2D */ template -void check_2d_array(const py::array_t& arr, const std::string& name = "array") +void + check_2d_array (const py::array_t& arr, const std::string& name = "array") { - py::buffer_info info = arr.request(); + py::buffer_info info = arr.request (); if (info.ndim != 2) - { - throw std::runtime_error(name + " must be 2-dimensional, got " + - std::to_string(info.ndim) + " dimensions"); - } + { + throw std::runtime_error (name + " must be 2-dimensional, got " + std::to_string (info.ndim) + + " dimensions"); + } } /** @@ -75,15 +79,14 @@ void check_2d_array(const py::array_t& arr, const std::string& name = "array" * @throws std::runtime_error if size doesn't match */ template -void check_array_size(const py::array_t& arr, size_t expected_size, - const std::string& name = "array") +void + check_array_size (const py::array_t& arr, size_t expected_size, const std::string& name = "array") { - if (static_cast(arr.size()) != expected_size) - { - throw std::runtime_error(name + " must have size " + - std::to_string(expected_size) + ", got " + - std::to_string(arr.size())); - } + if (static_cast (arr.size ()) != expected_size) + { + throw std::runtime_error (name + " must have size " + std::to_string (expected_size) + ", got " + + std::to_string (arr.size ())); + } } /** @@ -94,23 +97,23 @@ void check_array_size(const py::array_t& arr, size_t expected_size, * @throws std::runtime_error if shape doesn't match */ template -void check_array_shape(const py::array_t& arr, +void + check_array_shape (const py::array_t& arr, const std::vector& expected_shape, const std::string& name = "array") { - py::buffer_info info = arr.request(); - if (info.ndim != static_cast(expected_shape.size())) - { - throw std::runtime_error(name + " has wrong number of dimensions"); - } - for (size_t i = 0; i < expected_shape.size(); ++i) - { - if (info.shape[i] != expected_shape[i]) + py::buffer_info info = arr.request (); + if (info.ndim != static_cast (expected_shape.size ())) { - throw std::runtime_error(name + " has wrong shape at dimension " + - std::to_string(i)); + throw std::runtime_error (name + " has wrong number of dimensions"); + } + for (size_t i = 0; i < expected_shape.size (); ++i) + { + if (info.shape[i] != expected_shape[i]) + { + throw std::runtime_error (name + " has wrong shape at dimension " + std::to_string (i)); + } } - } } /** @@ -122,15 +125,16 @@ void check_array_shape(const py::array_t& arr, * @throws std::runtime_error if sizes don't match */ template -void check_same_size(const py::array_t& arr1, const py::array_t& arr2, +void + check_same_size (const py::array_t& arr1, + const py::array_t& arr2, const std::string& name1 = "array1", const std::string& name2 = "array2") { - if (arr1.size() != arr2.size()) - { - throw std::runtime_error(name1 + " and " + name2 + - " must have the same size"); - } + if (arr1.size () != arr2.size ()) + { + throw std::runtime_error (name1 + " and " + name2 + " must have the same size"); + } } // ============================================================================ @@ -143,10 +147,11 @@ void check_same_size(const py::array_t& arr1, const py::array_t& arr2, * @return Pointer to the array data */ template -T* get_array_ptr(py::array_t& arr) +T* + get_array_ptr (py::array_t& arr) { - py::buffer_info info = arr.request(); - return static_cast(info.ptr); + py::buffer_info info = arr.request (); + return static_cast (info.ptr); } /** @@ -155,10 +160,11 @@ T* get_array_ptr(py::array_t& arr) * @return Const pointer to the array data */ template -const T* get_array_ptr(const py::array_t& arr) +const T* + get_array_ptr (const py::array_t& arr) { - py::buffer_info info = arr.request(); - return static_cast(info.ptr); + py::buffer_info info = arr.request (); + return static_cast (info.ptr); } /** @@ -168,10 +174,11 @@ const T* get_array_ptr(const py::array_t& arr) * @return Pointer to the array data */ template -T* get_1d_array_ptr(py::array_t& arr, const std::string& name = "array") +T* + get_1d_array_ptr (py::array_t& arr, const std::string& name = "array") { - check_1d_array(arr, name); - return get_array_ptr(arr); + check_1d_array (arr, name); + return get_array_ptr (arr); } /** @@ -181,10 +188,11 @@ T* get_1d_array_ptr(py::array_t& arr, const std::string& name = "array") * @return Const pointer to the array data */ template -const T* get_1d_array_ptr(const py::array_t& arr, const std::string& name = "array") +const T* + get_1d_array_ptr (const py::array_t& arr, const std::string& name = "array") { - check_1d_array(arr, name); - return get_array_ptr(arr); + check_1d_array (arr, name); + return get_array_ptr (arr); } // ============================================================================ @@ -197,10 +205,11 @@ const T* get_1d_array_ptr(const py::array_t& arr, const std::string& name = " * @return New numpy array with copied data */ template -py::array_t numpy_from_vector_copy(const std::vector& vec) +py::array_t + numpy_from_vector_copy (const std::vector& vec) { - py::array_t result(vec.size()); - std::memcpy(result.mutable_data(), vec.data(), vec.size() * sizeof(T)); + py::array_t result (vec.size ()); + std::memcpy (result.mutable_data (), vec.data (), vec.size () * sizeof (T)); return result; } @@ -211,10 +220,11 @@ py::array_t numpy_from_vector_copy(const std::vector& vec) * @return New numpy array with copied data */ template -py::array_t numpy_from_ptr_copy(const T* ptr, size_t size) +py::array_t + numpy_from_ptr_copy (const T* ptr, size_t size) { - py::array_t result(size); - std::memcpy(result.mutable_data(), ptr, size * sizeof(T)); + py::array_t result (size); + std::memcpy (result.mutable_data (), ptr, size * sizeof (T)); return result; } @@ -226,12 +236,12 @@ py::array_t numpy_from_ptr_copy(const T* ptr, size_t size) * @return New numpy array with copied data */ template -py::array_t numpy_from_ptr_copy_2d(const T* ptr, size_t nrow, size_t ncol) +py::array_t + numpy_from_ptr_copy_2d (const T* ptr, size_t nrow, size_t ncol) { - std::vector shape = {static_cast(nrow), - static_cast(ncol)}; - py::array_t result(shape); - std::memcpy(result.mutable_data(), ptr, nrow * ncol * sizeof(T)); + std::vector shape = {static_cast (nrow), static_cast (ncol)}; + py::array_t result (shape); + std::memcpy (result.mutable_data (), ptr, nrow * ncol * sizeof (T)); return result; } @@ -242,15 +252,16 @@ py::array_t numpy_from_ptr_copy_2d(const T* ptr, size_t nrow, size_t ncol) * @return New numpy array with copied data */ template -py::array_t numpy_from_ptr_copy_nd(const T* ptr, const std::vector& shape) +py::array_t + numpy_from_ptr_copy_nd (const T* ptr, const std::vector& shape) { - py::array_t result(shape); + py::array_t result (shape); size_t total_size = 1; - for (auto dim : shape) - { - total_size *= static_cast(dim); - } - std::memcpy(result.mutable_data(), ptr, total_size * sizeof(T)); + for (auto dim: shape) + { + total_size *= static_cast (dim); + } + std::memcpy (result.mutable_data (), ptr, total_size * sizeof (T)); return result; } @@ -270,25 +281,26 @@ py::array_t numpy_from_ptr_copy_nd(const T* ptr, const std::vector& * @return Numpy array view */ template -py::array_t numpy_view_with_keepalive(T* ptr, - const std::vector& shape, - std::shared_ptr owner_ptr) +py::array_t + numpy_view_with_keepalive (T* ptr, const std::vector& shape, std::shared_ptr owner_ptr) { // Calculate strides for C-contiguous array - std::vector strides(shape.size()); - ssize_t stride = sizeof(T); - for (int i = static_cast(shape.size()) - 1; i >= 0; --i) - { - strides[i] = stride; - stride *= shape[i]; - } + std::vector strides (shape.size ()); + ssize_t stride = sizeof (T); + for (int i = static_cast (shape.size ()) - 1; i >= 0; --i) + { + strides[i] = stride; + stride *= shape[i]; + } // Create capsule to prevent deallocation - py::capsule free_when_done(owner_ptr.get(), [](void*) { - // The shared_ptr prevents deallocation, capsule just holds reference - }); + py::capsule free_when_done (owner_ptr.get (), + [] (void*) + { + // The shared_ptr prevents deallocation, capsule just holds reference + }); - return py::array_t(shape, strides, ptr, free_when_done); + return py::array_t (shape, strides, ptr, free_when_done); } /** @@ -301,12 +313,10 @@ py::array_t numpy_view_with_keepalive(T* ptr, * @return Numpy array view */ template -py::array_t numpy_view_1d(T* ptr, size_t size) +py::array_t + numpy_view_1d (T* ptr, size_t size) { - return py::array_t({static_cast(size)}, - {sizeof(T)}, - ptr, - py::none()); + return py::array_t ({static_cast (size)}, {sizeof (T)}, ptr, py::none ()); } /** @@ -320,14 +330,13 @@ py::array_t numpy_view_1d(T* ptr, size_t size) * @return Numpy array view (row-major) */ template -py::array_t numpy_view_2d(T* ptr, size_t nrow, size_t ncol) +py::array_t + numpy_view_2d (T* ptr, size_t nrow, size_t ncol) { - return py::array_t( - {static_cast(nrow), static_cast(ncol)}, - {static_cast(ncol * sizeof(T)), sizeof(T)}, - ptr, - py::none() - ); + return py::array_t ({static_cast (nrow), static_cast (ncol)}, + {static_cast (ncol * sizeof (T)), sizeof (T)}, + ptr, + py::none ()); } // ============================================================================ @@ -345,20 +354,21 @@ py::array_t numpy_view_2d(T* ptr, size_t nrow, size_t ncol) * @return Return value from the function */ template -Ret call_python_safe(const py::function& func, Args&&... args) +Ret + call_python_safe (const py::function& func, Args&&... args) { try - { - return func(std::forward(args)...).template cast(); - } + { + return func (std::forward (args)...).template cast (); + } catch (const py::error_already_set& e) - { - throw std::runtime_error(std::string("Python callback error: ") + e.what()); - } + { + throw std::runtime_error (std::string ("Python callback error: ") + e.what ()); + } catch (const py::cast_error& e) - { - throw std::runtime_error(std::string("Python return type error: ") + e.what()); - } + { + throw std::runtime_error (std::string ("Python return type error: ") + e.what ()); + } } /** @@ -367,16 +377,17 @@ Ret call_python_safe(const py::function& func, Args&&... args) * @param args Arguments to pass to the function */ template -void call_python_safe_void(const py::function& func, Args&&... args) +void + call_python_safe_void (const py::function& func, Args&&... args) { try - { - func(std::forward(args)...); - } + { + func (std::forward (args)...); + } catch (const py::error_already_set& e) - { - throw std::runtime_error(std::string("Python callback error: ") + e.what()); - } + { + throw std::runtime_error (std::string ("Python callback error: ") + e.what ()); + } } // ============================================================================ @@ -399,10 +410,8 @@ struct BufferHelper * @brief Construct from numpy array * @param arr The numpy array */ - explicit BufferHelper(py::array_t& arr) - : info(arr.request()) - , ptr(static_cast(info.ptr)) - , size(static_cast(arr.size())) + explicit BufferHelper (py::array_t& arr) + : info (arr.request ()), ptr (static_cast (info.ptr)), size (static_cast (arr.size ())) { } @@ -410,10 +419,8 @@ struct BufferHelper * @brief Construct from const numpy array * @param arr The numpy array */ - explicit BufferHelper(const py::array_t& arr) - : info(arr.request()) - , ptr(static_cast(info.ptr)) - , size(static_cast(arr.size())) + explicit BufferHelper (const py::array_t& arr) + : info (arr.request ()), ptr (static_cast (info.ptr)), size (static_cast (arr.size ())) { } @@ -422,12 +429,13 @@ struct BufferHelper * @param name Name for error messages * @throws std::runtime_error if not 1D */ - void require_1d(const std::string& name = "array") const + void + require_1d (const std::string& name = "array") const { if (info.ndim != 1) - { - throw std::runtime_error(name + " must be 1-dimensional"); - } + { + throw std::runtime_error (name + " must be 1-dimensional"); + } } /** @@ -435,12 +443,13 @@ struct BufferHelper * @param name Name for error messages * @throws std::runtime_error if not 2D */ - void require_2d(const std::string& name = "array") const + void + require_2d (const std::string& name = "array") const { if (info.ndim != 2) - { - throw std::runtime_error(name + " must be 2-dimensional"); - } + { + throw std::runtime_error (name + " must be 2-dimensional"); + } } /** @@ -449,14 +458,14 @@ struct BufferHelper * @param name Name for error messages * @throws std::runtime_error if size doesn't match */ - void require_size(size_t expected, const std::string& name = "array") const + void + require_size (size_t expected, const std::string& name = "array") const { if (size != expected) - { - throw std::runtime_error(name + " must have size " + - std::to_string(expected) + ", got " + - std::to_string(size)); - } + { + throw std::runtime_error (name + " must have size " + std::to_string (expected) + ", got " + + std::to_string (size)); + } } /** @@ -465,47 +474,62 @@ struct BufferHelper * @param name Name for error messages * @throws std::runtime_error if shape doesn't match */ - void require_shape(const std::vector& expected_shape, - const std::string& name = "array") const + void + require_shape (const std::vector& expected_shape, const std::string& name = "array") const { - if (info.ndim != static_cast(expected_shape.size())) - { - throw std::runtime_error(name + " has wrong number of dimensions"); - } - for (size_t i = 0; i < expected_shape.size(); ++i) - { - if (info.shape[i] != expected_shape[i]) + if (info.ndim != static_cast (expected_shape.size ())) { - throw std::runtime_error(name + " has wrong shape at dimension " + - std::to_string(i)); + throw std::runtime_error (name + " has wrong number of dimensions"); + } + for (size_t i = 0; i < expected_shape.size (); ++i) + { + if (info.shape[i] != expected_shape[i]) + { + throw std::runtime_error (name + " has wrong shape at dimension " + std::to_string (i)); + } } - } } /** * @brief Get number of dimensions * @return Number of dimensions */ - ssize_t ndim() const { return info.ndim; } + ssize_t + ndim () const + { + return info.ndim; + } /** * @brief Get shape at dimension i * @param i Dimension index * @return Size at dimension i */ - ssize_t shape(size_t i) const { return info.shape[i]; } + ssize_t + shape (size_t i) const + { + return info.shape[i]; + } /** * @brief Get number of rows (for 2D arrays) * @return Number of rows */ - ssize_t nrow() const { return info.shape[0]; } + ssize_t + nrow () const + { + return info.shape[0]; + } /** * @brief Get number of columns (for 2D arrays) * @return Number of columns */ - ssize_t ncol() const { return info.shape[1]; } + ssize_t + ncol () const + { + return info.shape[1]; + } }; // ============================================================================ diff --git a/source/source_base/assoc_laguerre.cpp b/source/source_base/assoc_laguerre.cpp index 2b2171a4c8f..b4b4e2d14c4 100644 --- a/source/source_base/assoc_laguerre.cpp +++ b/source/source_base/assoc_laguerre.cpp @@ -1,137 +1,138 @@ #include "source_base/assoc_laguerre.h" #include "source_base/global_function.h" -//#include // use cmath the factorial function +// #include // use cmath the factorial function #include -Assoc_Laguerre::Assoc_Laguerre() -{ -} +Assoc_Laguerre::Assoc_Laguerre () {} -Assoc_Laguerre::~Assoc_Laguerre() -{ -} +Assoc_Laguerre::~Assoc_Laguerre () {} -void Assoc_Laguerre::generate(const int &n, const int &l, const double ns, double* const &s, double* L) +void + Assoc_Laguerre::generate (const int& n, const int& l, const double ns, double* const& s, double* L) { - for(int i = 0; i < ns; i++) - { - L[i] = this->value(n, l, s[i]); - } + for (int i = 0; i < ns; i++) + { + L[i] = this->value (n, l, s[i]); + } } -void Assoc_Laguerre::generate(const int &n, const int &l, std::vector &x, std::vector &y) +void + Assoc_Laguerre::generate (const int& n, const int& l, std::vector& x, std::vector& y) { - for(int i = 0; i < x.size(); i++) - { - y[i] = this->value(n, l, x[i]); - } + for (int i = 0; i < x.size (); i++) + { + y[i] = this->value (n, l, x[i]); + } } -double Assoc_Laguerre::laguerre(const int &n, const double x) +double + Assoc_Laguerre::laguerre (const int& n, const double x) { - if(n == 0) - { - return 1; - } - else if(n == 1) - { - return -x + 1; - } - else if(n == 2) - { - return 0.5 * x * x - 2 * x + 1; - } - else if(n == 3) - { - return -x * x * x / 6.0 + 3.0 * x * x / 2.0 - 3.0 * x + 1; - } - else if(n >= 4) - { - double n_ = static_cast(n); - double first = (2*n_ - 1 - x)/n_ * Assoc_Laguerre::laguerre(n-1, x); - double second = (n_ - 1)/n_ * Assoc_Laguerre::laguerre(n-2, x); - return first - second; - } + if (n == 0) + { + return 1; + } + else if (n == 1) + { + return -x + 1; + } + else if (n == 2) + { + return 0.5 * x * x - 2 * x + 1; + } + else if (n == 3) + { + return -x * x * x / 6.0 + 3.0 * x * x / 2.0 - 3.0 * x + 1; + } + else if (n >= 4) + { + double n_ = static_cast (n); + double first = (2 * n_ - 1 - x) / n_ * Assoc_Laguerre::laguerre (n - 1, x); + double second = (n_ - 1) / n_ * Assoc_Laguerre::laguerre (n - 2, x); + return first - second; + } else - { - ModuleBase::WARNING_QUIT("Assoc_Laguerre::laguerre", "n is out of range"); - return 0; - } + { + ModuleBase::WARNING_QUIT ("Assoc_Laguerre::laguerre", "n is out of range"); + return 0; + } } -double Assoc_Laguerre::associate_laguerre(const int &n, const double x, const int &a) +double + Assoc_Laguerre::associate_laguerre (const int& n, const double x, const int& a) { // formula from https://en.wikipedia.org/wiki/Laguerre_polynomials - double n_ = static_cast(n); - double a_ = static_cast(a); - if(n == 0) - { - return 1; - } - else if(n == 1) - { - return -x + 1 + a_; - } - else if(n == 2) - { - return 0.5 * (x*x - 2*(a_+2)*x + (a_+1)*(a_+2)); - } - else if(n == 3) - { - return -x*x*x/6.0 + (a_+3)*x*x/2.0 - (a_+2)*(a_+3)*x/2.0 + (a_+1)*(a_+2)*(a_+3)/6.0; - } - else if(n >= 4) - { - double first = (2*n_ - 1 + a_ - x)/n_ * this->associate_laguerre(n-1, x, a); - double second = (n_ + a_ - 1)/n_ * this->associate_laguerre(n-2, x, a); - return first - second; - } + double n_ = static_cast (n); + double a_ = static_cast (a); + if (n == 0) + { + return 1; + } + else if (n == 1) + { + return -x + 1 + a_; + } + else if (n == 2) + { + return 0.5 * (x * x - 2 * (a_ + 2) * x + (a_ + 1) * (a_ + 2)); + } + else if (n == 3) + { + return -x * x * x / 6.0 + (a_ + 3) * x * x / 2.0 - (a_ + 2) * (a_ + 3) * x / 2.0 + + (a_ + 1) * (a_ + 2) * (a_ + 3) / 6.0; + } + else if (n >= 4) + { + double first = (2 * n_ - 1 + a_ - x) / n_ * this->associate_laguerre (n - 1, x, a); + double second = (n_ + a_ - 1) / n_ * this->associate_laguerre (n - 2, x, a); + return first - second; + } else - { - ModuleBase::WARNING_QUIT("Assoc_Laguerre::associate_laguerre", "n is out of range"); - return 0; - } + { + ModuleBase::WARNING_QUIT ("Assoc_Laguerre::associate_laguerre", "n is out of range"); + return 0; + } } -int Assoc_Laguerre::factorial(const int &n) +int + Assoc_Laguerre::factorial (const int& n) { - if(n == 0) - { - return 1; - } - else if(n > 0) - { - return n * this->factorial(n-1); - } + if (n == 0) + { + return 1; + } + else if (n > 0) + { + return n * this->factorial (n - 1); + } else - { - ModuleBase::WARNING_QUIT("Assoc_Laguerre::factorial", "n is out of range"); - return 0; - } + { + ModuleBase::WARNING_QUIT ("Assoc_Laguerre::factorial", "n is out of range"); + return 0; + } } -double Assoc_Laguerre::value(const int &n, const int &l, const double &s) +double + Assoc_Laguerre::value (const int& n, const int& l, const double& s) { - int k_ = 2*l + 1; + int k_ = 2 * l + 1; int n_ = n - l - 1; - if(k_ < 0) - { - ModuleBase::WARNING_QUIT("Assoc_Laguerre::value", "k is out of range"); - return 0; - } - if(n_ < 0) - { - ModuleBase::WARNING_QUIT("Assoc_Laguerre::value", "n is out of range"); - return 0; - } + if (k_ < 0) + { + ModuleBase::WARNING_QUIT ("Assoc_Laguerre::value", "k is out of range"); + return 0; + } + if (n_ < 0) + { + ModuleBase::WARNING_QUIT ("Assoc_Laguerre::value", "n is out of range"); + return 0; + } double L = 0; - for(int iq = 0; iq <= n_; iq++) - { - L += std::pow(-s, iq) * - static_cast(this->factorial(n_ + k_)) / - static_cast(this->factorial(n_ - iq)) / - static_cast(this->factorial(k_ + iq)) / - static_cast(this->factorial(iq)); - } - //L = std::tr1::assoc_laguerre(n_, k_, s); // use standard library + for (int iq = 0; iq <= n_; iq++) + { + L += std::pow (-s, iq) * static_cast (this->factorial (n_ + k_)) + / static_cast (this->factorial (n_ - iq)) / static_cast (this->factorial (k_ + iq)) + / static_cast (this->factorial (iq)); + } + // L = std::tr1::assoc_laguerre(n_, k_, s); // use standard library return L; } \ No newline at end of file diff --git a/source/source_base/assoc_laguerre.h b/source/source_base/assoc_laguerre.h index 6992c941a5a..3d834238414 100644 --- a/source/source_base/assoc_laguerre.h +++ b/source/source_base/assoc_laguerre.h @@ -6,42 +6,42 @@ class Assoc_Laguerre { - public: - Assoc_Laguerre(); - ~Assoc_Laguerre(); - /// @brief generate the associated Laguerre polynomial (overloaded for double*) - /// @param n principal quantum number - /// @param l orbital quantum number - /// @param ns number of x-coordinates - /// @param s x-coordinates - /// @param L y-coordinates - void generate(const int &n, const int &l, const double ns, double* const &s, double* L); - /// @brief generate the associated Laguerre polynomial (overloaded for std::vector) - /// @param n principal quantum number - /// @param l orbital quantum number - /// @param x x-coordinates in std::vector - /// @param y y-coordinates in std::vector - void generate(const int &n, const int &l, std::vector &x, std::vector &y); - /// @brief Laguerre polynomial - /// @param n degree of the polynomial - /// @param x radial coordinate - /// @return L_n(x) - double laguerre(const int &n, const double x); - /// @brief recursive relationship to find the associated Laguerre polynomial - /// @param n degree of the polynomial - /// @param x radial coordinate - /// @param a order of the polynomial - /// @return L^(a)_n(x) - double associate_laguerre(const int &n, const double x, const int &a); - /// @brief wrapper for associate_laguerre - /// @param n principal quantum number - /// @param l orbital quantum number - /// @param s radial coordinate - /// @return L^(2l+1)_(n-l-1)(s) - double value(const int &n, const int &l, const double &s); - /// @brief factorial function - /// @param n - /// @return n! - int factorial(const int &n); + public: + Assoc_Laguerre (); + ~Assoc_Laguerre (); + /// @brief generate the associated Laguerre polynomial (overloaded for double*) + /// @param n principal quantum number + /// @param l orbital quantum number + /// @param ns number of x-coordinates + /// @param s x-coordinates + /// @param L y-coordinates + void generate (const int& n, const int& l, const double ns, double* const& s, double* L); + /// @brief generate the associated Laguerre polynomial (overloaded for std::vector) + /// @param n principal quantum number + /// @param l orbital quantum number + /// @param x x-coordinates in std::vector + /// @param y y-coordinates in std::vector + void generate (const int& n, const int& l, std::vector& x, std::vector& y); + /// @brief Laguerre polynomial + /// @param n degree of the polynomial + /// @param x radial coordinate + /// @return L_n(x) + double laguerre (const int& n, const double x); + /// @brief recursive relationship to find the associated Laguerre polynomial + /// @param n degree of the polynomial + /// @param x radial coordinate + /// @param a order of the polynomial + /// @return L^(a)_n(x) + double associate_laguerre (const int& n, const double x, const int& a); + /// @brief wrapper for associate_laguerre + /// @param n principal quantum number + /// @param l orbital quantum number + /// @param s radial coordinate + /// @return L^(2l+1)_(n-l-1)(s) + double value (const int& n, const int& l, const double& s); + /// @brief factorial function + /// @param n + /// @return n! + int factorial (const int& n); }; #endif // ASSOC_LAGUEERRE_H \ No newline at end of file diff --git a/source/source_base/atom_in.h b/source/source_base/atom_in.h index 5458a28fd79..aaa1f4165b7 100644 --- a/source/source_base/atom_in.h +++ b/source/source_base/atom_in.h @@ -10,25 +10,24 @@ class atom_in { public: - atom_in(){}; - ~atom_in(){}; + atom_in () {}; + ~atom_in () {}; std::map atom_Z - = {{"H", 1}, {"He", 2}, {"Li", 3}, {"Be", 4}, {"B", 5}, {"C", 6}, {"N", 7}, {"O", 8}, {"F", 9}, - {"Ne", 10}, {"Na", 11}, {"Mg", 12}, {"Al", 13}, {"Si", 14}, {"P", 15}, {"S", 16}, {"Cl", 17}, {"Ar", 18}, - {"K", 19}, {"Ca", 20}, {"Sc", 21}, {"Ti", 22}, {"V", 23}, {"Cr", 24}, {"Mn", 25}, {"Fe", 26}, {"Co", 27}, - {"Ni", 28}, {"Cu", 29}, {"Zn", 30}, {"Ga", 31}, {"Ge", 32}, {"As", 33}, {"Se", 34}, {"Br", 35}, {"Kr", 36}, - {"Rb", 37}, {"Sr", 38}, {"Y", 39}, {"Zr", 40}, {"Nb", 41}, {"Mo", 42}, {"Tc", 43}, {"Ru", 44}, {"Rh", 45}, - {"Pd", 46}, {"Ag", 47}, {"Cd", 48}, {"In", 49}, {"Sn", 50}, {"Sb", 51}, {"Te", 52}, {"I", 53}, {"Xe", 54}, - {"Cs", 55}, {"Ba", 56}, {"La", 57}, {"Ce", 58}, {"Pr", 59}, {"Nd", 60}, {"Pm", 61}, {"Sm", 62}, {"Eu", 63}, - {"Gd", 64}, {"Tb", 65}, {"Dy", 66}, {"Ho", 67}, {"Er", 68}, {"Tm", 69}, {"Yb", 70}, {"Lu", 71}, {"Hf", 72}, - {"Ta", 73}, {"W", 74}, {"Re", 75}, {"Os", 76}, {"Ir", 77}, {"Pt", 78}, {"Au", 79}, {"Hg", 80}, {"Tl", 81}, - {"Pb", 82}, {"Bi", 83}, {"Po", 84}, {"At", 85}, {"Rn", 86}, - {"Fr", 87}, {"Ra", 88}, {"Ac", 89}, {"Th", 90}, {"Pa", 91}, - {"U", 92}, {"Np", 93}, {"Pu", 94}, {"Am", 95}, {"Cm", 96}, - {"Bk", 97}, {"Cf", 98}, {"Es", 99}, {"Fm", 100}, {"Md", 101}, {"No", 102}, - {"Lr", 103}, {"Rf", 104}, {"Db", 105}, {"Sg", 106}, {"Bh", 107}, {"Hs", 108}, - {"Mt", 109}, {"Ds", 110}, {"Rg", 111}, {"Cn", 112}, {"Nh", 113}, {"Fl", 114}, - {"Mc", 115}, {"Lv", 116}, {"Ts", 117}, {"Og", 118}}; + = {{"H", 1}, {"He", 2}, {"Li", 3}, {"Be", 4}, {"B", 5}, {"C", 6}, {"N", 7}, {"O", 8}, + {"F", 9}, {"Ne", 10}, {"Na", 11}, {"Mg", 12}, {"Al", 13}, {"Si", 14}, {"P", 15}, {"S", 16}, + {"Cl", 17}, {"Ar", 18}, {"K", 19}, {"Ca", 20}, {"Sc", 21}, {"Ti", 22}, {"V", 23}, {"Cr", 24}, + {"Mn", 25}, {"Fe", 26}, {"Co", 27}, {"Ni", 28}, {"Cu", 29}, {"Zn", 30}, {"Ga", 31}, {"Ge", 32}, + {"As", 33}, {"Se", 34}, {"Br", 35}, {"Kr", 36}, {"Rb", 37}, {"Sr", 38}, {"Y", 39}, {"Zr", 40}, + {"Nb", 41}, {"Mo", 42}, {"Tc", 43}, {"Ru", 44}, {"Rh", 45}, {"Pd", 46}, {"Ag", 47}, {"Cd", 48}, + {"In", 49}, {"Sn", 50}, {"Sb", 51}, {"Te", 52}, {"I", 53}, {"Xe", 54}, {"Cs", 55}, {"Ba", 56}, + {"La", 57}, {"Ce", 58}, {"Pr", 59}, {"Nd", 60}, {"Pm", 61}, {"Sm", 62}, {"Eu", 63}, {"Gd", 64}, + {"Tb", 65}, {"Dy", 66}, {"Ho", 67}, {"Er", 68}, {"Tm", 69}, {"Yb", 70}, {"Lu", 71}, {"Hf", 72}, + {"Ta", 73}, {"W", 74}, {"Re", 75}, {"Os", 76}, {"Ir", 77}, {"Pt", 78}, {"Au", 79}, {"Hg", 80}, + {"Tl", 81}, {"Pb", 82}, {"Bi", 83}, {"Po", 84}, {"At", 85}, {"Rn", 86}, {"Fr", 87}, {"Ra", 88}, + {"Ac", 89}, {"Th", 90}, {"Pa", 91}, {"U", 92}, {"Np", 93}, {"Pu", 94}, {"Am", 95}, {"Cm", 96}, + {"Bk", 97}, {"Cf", 98}, {"Es", 99}, {"Fm", 100}, {"Md", 101}, {"No", 102}, {"Lr", 103}, {"Rf", 104}, + {"Db", 105}, {"Sg", 106}, {"Bh", 107}, {"Hs", 108}, {"Mt", 109}, {"Ds", 110}, {"Rg", 111}, {"Cn", 112}, + {"Nh", 113}, {"Fl", 114}, {"Mc", 115}, {"Lv", 116}, {"Ts", 117}, {"Og", 118}}; std::map atom_RCS = {{"H", 0.603774}, {"He", 1.75472}, {"Li", 2.32075}, {"Be", 1.69811}, {"B", 1.54717}, {"C", 1.45283}, @@ -45,120 +44,188 @@ class atom_in {"Ho", 2.98113}, {"Er", 2.96226}, {"Tm", 2.9434}, {"Yb", 3.28302}, {"Lu", 2.9434}, {"Hf", 2.71698}, {"Ta", 2.5283}, {"W", 2.45283}, {"Re", 2.41509}, {"Os", 2.37736}, {"Ir", 2.39623}, {"Pt", 2.45283}, {"Au", 2.5283}, {"Hg", 2.81132}, {"Tl", 2.79245}, {"Pb", 2.77358}, {"Bi", 2.75472}, {"Po", 2.75472}, - {"At", 2.73585}, {"Rn", 2.69811} }; + {"At", 2.73585}, {"Rn", 2.69811}}; std::map atom_symbol - = { {"H", "Hydrogen"}, {"He", "Helium"}, {"Li", "Lithium"}, {"Be", "Beryllium"}, {"B", "Boron"}, {"C", "Carbon"}, - {"N", "Nitrogen"}, {"O", "Oxygen"}, {"F", "Fluorine"}, {"Ne", "Neon"}, {"Na", "Sodium"}, {"Mg", "Magnesium"}, - {"Al", "Aluminum"}, {"Si", "Silicon"}, {"P", "Phosphorus"}, {"S", "Sulfur"}, {"Cl", "Chlorine"}, {"Ar", "Argon"}, - {"K", "Potassium"}, {"Ca", "Calcium"}, {"Sc", "Scandium"}, {"Ti", "Titanium"}, {"V", "Vanadium"}, {"Cr", "Chromium"}, - {"Mn", "Manganese"}, {"Fe", "Iron"}, {"Co", "Cobalt"}, {"Ni", "Nickel"}, {"Cu", "Copper"}, {"Zn", "Zinc"}, - {"Ga", "Gallium"}, {"Ge", "Germanium"}, {"As", "Arsenic"}, {"Se", "Selenium"}, {"Br", "Bromine"}, {"Kr", "Krypton"}, - {"Rb", "Rubidium"}, {"Sr", "Strontium"}, {"Y", "Yttrium"}, {"Zr", "Zirconium"}, {"Nb", "Niobium"}, {"Mo", "Molybdenum"}, - {"Tc", "Technetium"}, {"Ru", "Ruthenium"}, {"Rh", "Rhodium"}, {"Pd", "Palladium"}, {"Ag", "Silver"}, {"Cd", "Cadmium"}, - {"In", "Indium"}, {"Sn", "Tin"}, {"Sb", "Antimony"}, {"Te", "Tellurium"}, {"I", "Iodine"}, {"Xe", "Xenon"}, - {"Cs", "Cesium"}, {"Ba", "Barium"}, {"La", "Lanthanum"}, {"Ce", "Cerium"}, {"Pr", "Praseodymium"}, {"Nd", "Neodymium"}, - {"Pm", "Promethium"}, {"Sm", "Samarium"}, {"Eu", "Europium"}, {"Gd", "Gadolinium"}, {"Tb", "Terbium"}, {"Dy", "Dysprosium"}, - {"Ho", "Holmium"}, {"Er", "Erbium"}, {"Tm", "Thulium"}, {"Yb", "Ytterbium"}, {"Lu", "Lutetium"}, {"Hf", "Hafnium"}, - {"Ta", "Tantalum"}, {"W", "Tungsten"}, {"Re", "Rhenium"}, {"Os", "Osmium"}, {"Ir", "Iridium"}, {"Pt", "Platinum"}, - {"Au", "Gold"}, {"Hg", "Mercury"}, {"Tl", "Thallium"}, {"Pb", "Lead"}, {"Bi", "Bismuth"}, {"Po", "Polonium"}, - {"At", "Astatine"}, {"Rn", "Radon"}, {"Fr", "Francium"}, {"Ra", "Radium"}, {"Ac", "Actinium"}, {"Th", "Thorium"}, - {"Pa", "Protactinium"}, {"U", "Uranium"}, {"Np", "Neptunium"}, {"Pu", "Plutonium"}, {"Am", "Americium"}, {"Cm", "Curium"}, - {"Bk", "Berkelium"}, {"Cf", "Californium"}, {"Es", "Einsteinium"}, {"Fm", "Fermium"}, {"Md", "Mendelevium"}, {"No", "Nobelium"}, - {"Lr", "Lawrencium"}, {"Rf", "Rutherfordium"}, {"Db", "Dubnium"}, {"Sg", "Seaborgium"}, {"Bh", "Bohrium"}, {"Hs", "Hassium"}, - {"Mt", "Meitnerium"}, {"Ds", "Darmstadtium"}, {"Rg", "Roentgenium"}, {"Cn", "Copernicium"}, {"Nh", "Nihonium"}, {"Fl", "Flerovium"}, - {"Mc", "Moscovium"}, {"Lv", "Livermorium"}, {"Ts", "Tennessine"}, {"Og", "Oganesson"}}; - - std::map symbol_Z - = { {"Hydrogen", 1}, {"Helium", 2}, {"Lithium", 3}, {"Beryllium", 4}, {"Boron", 5}, {"Carbon", 6}, - {"Nitrogen", 7}, {"Oxygen", 8}, {"Fluorine", 9}, {"Neon", 10}, {"Sodium", 11}, {"Magnesium", 12}, - {"Aluminum", 13}, {"Silicon", 14}, {"Phosphorus", 15}, {"Sulfur", 16}, {"Chlorine", 17}, {"Argon", 18}, - {"Potassium", 19}, {"Calcium", 20}, {"Scandium", 21}, {"Titanium", 22}, {"Vanadium", 23}, {"Chromium", 24}, - {"Manganese", 25}, {"Iron", 26}, {"Cobalt", 27}, {"Nickel", 28}, {"Copper", 29}, {"Zinc", 30}, - {"Gallium", 31}, {"Germanium", 32}, {"Arsenic", 33}, {"Selenium", 34}, {"Bromine", 35}, {"Krypton", 36}, - {"Rubidium", 37}, {"Strontium", 38}, {"Yttrium", 39}, {"Zirconium", 40}, {"Niobium", 41}, {"Molybdenum", 42}, - {"Technetium", 43}, {"Ruthenium", 44}, {"Rhodium", 45}, {"Palladium", 46}, {"Silver", 47}, {"Cadmium", 48}, - {"Indium", 49}, {"Tin", 50}, {"Antimony", 51}, {"Tellurium", 52}, {"Iodine", 53}, {"Xenon", 54}, - {"Cesium", 55}, {"Barium", 56}, {"Lanthanum", 57}, {"Cerium", 58}, {"Praseodymium", 59}, {"Neodymium", 60}, - {"Promethium", 61}, {"Samarium", 62}, {"Europium", 63}, {"Gadolinium", 64}, {"Terbium", 65}, {"Dysprosium", 66}, - {"Holmium", 67}, {"Erbium", 68}, {"Thulium", 69}, {"Ytterbium", 70}, {"Lutetium", 71}, {"Hafnium", 72}, - {"Tantalum", 73}, {"Tungsten", 74}, {"Rhenium", 75}, {"Osmium", 76}, {"Iridium", 77}, {"Platinum", 78}, - {"Gold", 79}, {"Mercury", 80}, {"Thallium", 81}, {"Lead", 82}, {"Bismuth", 83}, {"Polonium", 84}, - {"Astatine", 85}, {"Radon", 86}, {"Francium", 87}, {"Radium", 88}, {"Actinium", 89}, {"Thorium", 90}, - {"Protactinium", 91}, {"Uranium", 92}, {"Neptunium", 93}, {"Plutonium", 94}, {"Americium", 95}, {"Curium", 96}, - {"Berkelium", 97}, {"Californium", 98}, {"Einsteinium", 99}, {"Fermium", 100}, {"Mendelevium", 101}, {"Nobelium", 102}, - {"Lawrencium", 103}, {"Rutherfordium", 104}, {"Dubnium", 105}, {"Seaborgium", 106}, {"Bohrium", 107}, {"Hassium", 108}, - {"Meitnerium", 109}, {"Darmstadtium", 110}, {"Roentgenium", 111}, {"Copernicium", 112}, {"Nihonium", 113}, {"Flerovium", 114}, - {"Moscovium", 115}, {"Livermorium", 116}, {"Tennessine", 117}, {"Oganesson", 118} - }; + = {{"H", "Hydrogen"}, {"He", "Helium"}, {"Li", "Lithium"}, {"Be", "Beryllium"}, + {"B", "Boron"}, {"C", "Carbon"}, {"N", "Nitrogen"}, {"O", "Oxygen"}, + {"F", "Fluorine"}, {"Ne", "Neon"}, {"Na", "Sodium"}, {"Mg", "Magnesium"}, + {"Al", "Aluminum"}, {"Si", "Silicon"}, {"P", "Phosphorus"}, {"S", "Sulfur"}, + {"Cl", "Chlorine"}, {"Ar", "Argon"}, {"K", "Potassium"}, {"Ca", "Calcium"}, + {"Sc", "Scandium"}, {"Ti", "Titanium"}, {"V", "Vanadium"}, {"Cr", "Chromium"}, + {"Mn", "Manganese"}, {"Fe", "Iron"}, {"Co", "Cobalt"}, {"Ni", "Nickel"}, + {"Cu", "Copper"}, {"Zn", "Zinc"}, {"Ga", "Gallium"}, {"Ge", "Germanium"}, + {"As", "Arsenic"}, {"Se", "Selenium"}, {"Br", "Bromine"}, {"Kr", "Krypton"}, + {"Rb", "Rubidium"}, {"Sr", "Strontium"}, {"Y", "Yttrium"}, {"Zr", "Zirconium"}, + {"Nb", "Niobium"}, {"Mo", "Molybdenum"}, {"Tc", "Technetium"}, {"Ru", "Ruthenium"}, + {"Rh", "Rhodium"}, {"Pd", "Palladium"}, {"Ag", "Silver"}, {"Cd", "Cadmium"}, + {"In", "Indium"}, {"Sn", "Tin"}, {"Sb", "Antimony"}, {"Te", "Tellurium"}, + {"I", "Iodine"}, {"Xe", "Xenon"}, {"Cs", "Cesium"}, {"Ba", "Barium"}, + {"La", "Lanthanum"}, {"Ce", "Cerium"}, {"Pr", "Praseodymium"}, {"Nd", "Neodymium"}, + {"Pm", "Promethium"}, {"Sm", "Samarium"}, {"Eu", "Europium"}, {"Gd", "Gadolinium"}, + {"Tb", "Terbium"}, {"Dy", "Dysprosium"}, {"Ho", "Holmium"}, {"Er", "Erbium"}, + {"Tm", "Thulium"}, {"Yb", "Ytterbium"}, {"Lu", "Lutetium"}, {"Hf", "Hafnium"}, + {"Ta", "Tantalum"}, {"W", "Tungsten"}, {"Re", "Rhenium"}, {"Os", "Osmium"}, + {"Ir", "Iridium"}, {"Pt", "Platinum"}, {"Au", "Gold"}, {"Hg", "Mercury"}, + {"Tl", "Thallium"}, {"Pb", "Lead"}, {"Bi", "Bismuth"}, {"Po", "Polonium"}, + {"At", "Astatine"}, {"Rn", "Radon"}, {"Fr", "Francium"}, {"Ra", "Radium"}, + {"Ac", "Actinium"}, {"Th", "Thorium"}, {"Pa", "Protactinium"}, {"U", "Uranium"}, + {"Np", "Neptunium"}, {"Pu", "Plutonium"}, {"Am", "Americium"}, {"Cm", "Curium"}, + {"Bk", "Berkelium"}, {"Cf", "Californium"}, {"Es", "Einsteinium"}, {"Fm", "Fermium"}, + {"Md", "Mendelevium"}, {"No", "Nobelium"}, {"Lr", "Lawrencium"}, {"Rf", "Rutherfordium"}, + {"Db", "Dubnium"}, {"Sg", "Seaborgium"}, {"Bh", "Bohrium"}, {"Hs", "Hassium"}, + {"Mt", "Meitnerium"}, {"Ds", "Darmstadtium"}, {"Rg", "Roentgenium"}, {"Cn", "Copernicium"}, + {"Nh", "Nihonium"}, {"Fl", "Flerovium"}, {"Mc", "Moscovium"}, {"Lv", "Livermorium"}, + {"Ts", "Tennessine"}, {"Og", "Oganesson"}}; + + std::map symbol_Z = { + {"Hydrogen", 1}, {"Helium", 2}, {"Lithium", 3}, {"Beryllium", 4}, {"Boron", 5}, + {"Carbon", 6}, {"Nitrogen", 7}, {"Oxygen", 8}, {"Fluorine", 9}, {"Neon", 10}, + {"Sodium", 11}, {"Magnesium", 12}, {"Aluminum", 13}, {"Silicon", 14}, {"Phosphorus", 15}, + {"Sulfur", 16}, {"Chlorine", 17}, {"Argon", 18}, {"Potassium", 19}, {"Calcium", 20}, + {"Scandium", 21}, {"Titanium", 22}, {"Vanadium", 23}, {"Chromium", 24}, {"Manganese", 25}, + {"Iron", 26}, {"Cobalt", 27}, {"Nickel", 28}, {"Copper", 29}, {"Zinc", 30}, + {"Gallium", 31}, {"Germanium", 32}, {"Arsenic", 33}, {"Selenium", 34}, {"Bromine", 35}, + {"Krypton", 36}, {"Rubidium", 37}, {"Strontium", 38}, {"Yttrium", 39}, {"Zirconium", 40}, + {"Niobium", 41}, {"Molybdenum", 42}, {"Technetium", 43}, {"Ruthenium", 44}, {"Rhodium", 45}, + {"Palladium", 46}, {"Silver", 47}, {"Cadmium", 48}, {"Indium", 49}, {"Tin", 50}, + {"Antimony", 51}, {"Tellurium", 52}, {"Iodine", 53}, {"Xenon", 54}, {"Cesium", 55}, + {"Barium", 56}, {"Lanthanum", 57}, {"Cerium", 58}, {"Praseodymium", 59}, {"Neodymium", 60}, + {"Promethium", 61}, {"Samarium", 62}, {"Europium", 63}, {"Gadolinium", 64}, {"Terbium", 65}, + {"Dysprosium", 66}, {"Holmium", 67}, {"Erbium", 68}, {"Thulium", 69}, {"Ytterbium", 70}, + {"Lutetium", 71}, {"Hafnium", 72}, {"Tantalum", 73}, {"Tungsten", 74}, {"Rhenium", 75}, + {"Osmium", 76}, {"Iridium", 77}, {"Platinum", 78}, {"Gold", 79}, {"Mercury", 80}, + {"Thallium", 81}, {"Lead", 82}, {"Bismuth", 83}, {"Polonium", 84}, {"Astatine", 85}, + {"Radon", 86}, {"Francium", 87}, {"Radium", 88}, {"Actinium", 89}, {"Thorium", 90}, + {"Protactinium", 91}, {"Uranium", 92}, {"Neptunium", 93}, {"Plutonium", 94}, {"Americium", 95}, + {"Curium", 96}, {"Berkelium", 97}, {"Californium", 98}, {"Einsteinium", 99}, {"Fermium", 100}, + {"Mendelevium", 101}, {"Nobelium", 102}, {"Lawrencium", 103}, {"Rutherfordium", 104}, {"Dubnium", 105}, + {"Seaborgium", 106}, {"Bohrium", 107}, {"Hassium", 108}, {"Meitnerium", 109}, {"Darmstadtium", 110}, + {"Roentgenium", 111}, {"Copernicium", 112}, {"Nihonium", 113}, {"Flerovium", 114}, {"Moscovium", 115}, + {"Livermorium", 116}, {"Tennessine", 117}, {"Oganesson", 118}}; std::map principle_quantum_number - = { - {"H", 1}, {"He", 1}, {"Li", 2}, {"Be", 2}, {"B", 2}, {"C", 2}, {"N", 2}, {"O", 2}, {"F", 2}, - {"Ne", 1}, {"Na", 2}, {"Mg", 3}, {"Al", 3}, {"Si", 3}, {"P", 3}, {"S", 3}, {"Cl", 3}, {"Ar", 2}, - {"K", 3}, {"Ca", 4}, {"Sc", 4}, {"Ti", 4}, {"V", 4}, {"Cr", 4}, {"Mn", 4}, {"Fe", 4}, {"Co", 4}, - {"Ni", 4}, {"Cu", 4}, {"Zn", 4}, {"Ga", 4}, {"Ge", 4}, {"As", 4}, {"Se", 4}, {"Br", 4}, {"Kr", 3}, - {"Rb", 4}, {"Sr", 5}, {"Y", 5}, {"Zr", 5}, {"Nb", 5}, {"Mo", 5}, {"Tc", 5}, {"Ru", 5}, {"Rh", 5}, - {"Pd", 5}, {"Ag", 5}, {"Cd", 5}, {"In", 5}, {"Sn", 5}, {"Sb", 5}, {"Te", 5}, {"I", 5}, {"Xe", 4}, - {"Cs", 5}, {"Ba", 6}, {"La", 6}, {"Ce", 6}, {"Pr", 6}, {"Nd", 6}, {"Pm", 6}, {"Sm", 6}, {"Eu", 6}, - {"Gd", 6}, {"Tb", 6}, {"Dy", 6}, {"Ho", 6}, {"Er", 6}, {"Tm", 6}, {"Yb", 6}, {"Lu", 6}, {"Hf", 6}, - {"Ta", 6}, {"W", 6}, {"Re", 6}, {"Os", 6}, {"Ir", 6}, {"Pt", 6}, {"Au", 6}, {"Hg", 6}, {"Tl", 6}, - {"Pb", 6}, {"Bi", 6}, {"Po", 6}, {"At", 6}, {"Rn", 6}, {"Fr", 7}, {"Ra", 7}, {"Ac", 7}, {"Th", 7}, - {"Pa", 7}, {"U", 7}, {"Np", 7}, {"Pu", 7}, {"Am", 7}, {"Cm", 7}, {"Bk", 7}, {"Cf", 7}, {"Es", 7}, - {"Fm", 7}, {"Md", 7}, {"No", 7}, {"Lr", 7}, {"Rf", 7}, {"Db", 7}, {"Sg", 7}, {"Bh", 7}, {"Hs", 7}, - {"Mt", 7}, {"Ds", 7}, {"Rg", 7}, {"Cn", 7}, {"Nh", 7}, {"Fl", 7}, {"Mc", 7}, {"Lv", 7}, {"Ts", 7}, - {"Og", 7} - }; - /// @brief ground state electron configuration, sequence of orbitals in key is in accord with the sequence of n then l - /// @note 1s2s2p3s3p 4s 3d4p5s 4d5p6s 4f5d6p7s 5f6d7p, from NIST periodic table, + = {{"H", 1}, {"He", 1}, {"Li", 2}, {"Be", 2}, {"B", 2}, {"C", 2}, {"N", 2}, {"O", 2}, {"F", 2}, {"Ne", 1}, + {"Na", 2}, {"Mg", 3}, {"Al", 3}, {"Si", 3}, {"P", 3}, {"S", 3}, {"Cl", 3}, {"Ar", 2}, {"K", 3}, {"Ca", 4}, + {"Sc", 4}, {"Ti", 4}, {"V", 4}, {"Cr", 4}, {"Mn", 4}, {"Fe", 4}, {"Co", 4}, {"Ni", 4}, {"Cu", 4}, {"Zn", 4}, + {"Ga", 4}, {"Ge", 4}, {"As", 4}, {"Se", 4}, {"Br", 4}, {"Kr", 3}, {"Rb", 4}, {"Sr", 5}, {"Y", 5}, {"Zr", 5}, + {"Nb", 5}, {"Mo", 5}, {"Tc", 5}, {"Ru", 5}, {"Rh", 5}, {"Pd", 5}, {"Ag", 5}, {"Cd", 5}, {"In", 5}, {"Sn", 5}, + {"Sb", 5}, {"Te", 5}, {"I", 5}, {"Xe", 4}, {"Cs", 5}, {"Ba", 6}, {"La", 6}, {"Ce", 6}, {"Pr", 6}, {"Nd", 6}, + {"Pm", 6}, {"Sm", 6}, {"Eu", 6}, {"Gd", 6}, {"Tb", 6}, {"Dy", 6}, {"Ho", 6}, {"Er", 6}, {"Tm", 6}, {"Yb", 6}, + {"Lu", 6}, {"Hf", 6}, {"Ta", 6}, {"W", 6}, {"Re", 6}, {"Os", 6}, {"Ir", 6}, {"Pt", 6}, {"Au", 6}, {"Hg", 6}, + {"Tl", 6}, {"Pb", 6}, {"Bi", 6}, {"Po", 6}, {"At", 6}, {"Rn", 6}, {"Fr", 7}, {"Ra", 7}, {"Ac", 7}, {"Th", 7}, + {"Pa", 7}, {"U", 7}, {"Np", 7}, {"Pu", 7}, {"Am", 7}, {"Cm", 7}, {"Bk", 7}, {"Cf", 7}, {"Es", 7}, {"Fm", 7}, + {"Md", 7}, {"No", 7}, {"Lr", 7}, {"Rf", 7}, {"Db", 7}, {"Sg", 7}, {"Bh", 7}, {"Hs", 7}, {"Mt", 7}, {"Ds", 7}, + {"Rg", 7}, {"Cn", 7}, {"Nh", 7}, {"Fl", 7}, {"Mc", 7}, {"Lv", 7}, {"Ts", 7}, {"Og", 7}}; + /// @brief ground state electron configuration, sequence of orbitals in key is in accord with the sequence of n then + /// l + /// @note 1s2s2p3s3p 4s 3d4p5s 4d5p6s 4f5d6p7s 5f6d7p, from NIST periodic table, /// @details see: https://www.nist.gov/system/files/documents/2019/12/10/nist_periodictable_july2019_crop.pdf - std::map> groundstate_electronconfiguration - = { - // 1s - {"H", {1}}, {"He", {2}}, // 1st period - // 1s 2s - {"Li", {2, 1}}, {"Be", {2, 2}}, // 2nd period - // 1s 2s 2p - {"B", {2, 2, 1}}, {"C", {2, 2, 2}}, {"N", {2, 2, 3}}, {"O", {2, 2, 4}}, {"F", {2, 2, 5}}, {"Ne", {2, 2, 6}}, // 2nd period - // 1s 2s 2p 3s 1s 2s 2p 3s - {"Na", {2, 2, 6, 1}}, {"Mg", {2, 2, 6, 2}}, // 3rd period - // 1s 2s 2p 3s 3p 1s 2s 2p 3s 3p 1s 2s 2p 3s 3p - {"Al", {2, 2, 6, 2, 1}}, {"Si", {2, 2, 6, 2, 2}}, {"P", {2, 2, 6, 2, 3}}, // 3rd period - {"S", {2, 2, 6, 2, 4}}, {"Cl", {2, 2, 6, 2, 5}}, {"Ar", {2, 2, 6, 2, 6}}, // 3rd period - // 1s 2s 2p 3s 3p 3d 4s 1s 2s 2p 3s 3p 3d 4s 1s 2s 2p 3s 3p 3d 4s - {"K", {2, 2, 6, 2, 6, 0, 1}}, {"Ca", {2, 2, 6, 2, 6, 0, 2}}, {"Sc", {2, 2, 6, 2, 6, 1, 2}}, // 4th period - {"Ti", {2, 2, 6, 2, 6, 2, 2}}, {"V", {2, 2, 6, 2, 6, 3, 2}}, {"Cr", {2, 2, 6, 2, 6, 4, 2}}, // 4th period - {"Mn", {2, 2, 6, 2, 6, 5, 2}}, {"Fe", {2, 2, 6, 2, 6, 6, 2}}, {"Co", {2, 2, 6, 2, 6, 7, 2}}, // 4th period - {"Ni", {2, 2, 6, 2, 6, 8, 2}}, {"Cu", {2, 2, 6, 2, 6, 10, 1}}, {"Zn", {2, 2, 6, 2, 6, 10, 2}}, // 4th period - // 1s 2s 2p 3s 3p 3d 4s 4p 1s 2s 2p 3s 3p 3d 4s 4p 1s 2s 2p 3s 3p 3d 4s 4p - {"Ga", {2, 2, 6, 2, 6, 10, 2, 1}}, {"Ge", {2, 2, 6, 2, 6, 10, 2, 2}}, {"As", {2, 2, 6, 2, 6, 10, 2, 3}}, // 4th period - {"Se", {2, 2, 6, 2, 6, 10, 2, 4}}, {"Br", {2, 2, 6, 2, 6, 10, 2, 5}}, {"Kr", {2, 2, 6, 2, 6, 10, 2, 6}}, // 4th period - // 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s - {"Rb", {2, 2, 6, 2, 6, 10, 2, 6, 0, 0, 1}}, {"Sr", {2, 2, 6, 2, 6, 10, 2, 6, 0, 0, 2}}, {"Y", {2, 2, 6, 2, 6, 10, 2, 6, 1, 0, 2}}, // 5th period - {"Zr", {2, 2, 6, 2, 6, 10, 2, 6, 2, 0, 2}}, {"Nb", {2, 2, 6, 2, 6, 10, 2, 6, 4, 0, 1}}, {"Mo", {2, 2, 6, 2, 6, 10, 2, 6, 5, 0, 1}}, // 5th period - {"Tc", {2, 2, 6, 2, 6, 10, 2, 6, 5, 0, 2}}, {"Ru", {2, 2, 6, 2, 6, 10, 2, 6, 7, 0, 1}}, {"Rh", {2, 2, 6, 2, 6, 10, 2, 6, 8, 0, 1}}, // 5th period - {"Pd", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 0}}, {"Ag", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 1}}, {"Cd", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2}}, // 5th period - // 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p - {"In", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 1}}, {"Sn", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 2}}, {"Sb", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 3}}, // 6th period - {"Te", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 4}}, {"I", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 5}}, {"Xe", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 6}}, // 6th period - // 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 5d 5f 5g 6s 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 5d 5f 5g 6s - {"Cs", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 6, 0, 0, 0, 1}}, {"Ba", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 6, 0, 0, 0, 2}}, // 6th period - {"La", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 6, 1, 0, 0, 2}}, {"Ce", {2, 2, 6, 2, 6, 10, 2, 6, 10, 1, 2, 6, 1, 0, 0, 2}}, // 6th period - {"Pr", {2, 2, 6, 2, 6, 10, 2, 6, 10, 3, 2, 6, 0, 0, 0, 2}}, {"Nd", {2, 2, 6, 2, 6, 10, 2, 6, 10, 4, 2, 6, 0, 0, 0, 2}}, // 6th period - {"Pm", {2, 2, 6, 2, 6, 10, 2, 6, 10, 5, 2, 6, 0, 0, 0, 2}}, {"Sm", {2, 2, 6, 2, 6, 10, 2, 6, 10, 6, 2, 6, 0, 0, 0, 2}}, // 6th period - {"Eu", {2, 2, 6, 2, 6, 10, 2, 6, 10, 7, 2, 6, 0, 0, 0, 2}}, {"Gd", {2, 2, 6, 2, 6, 10, 2, 6, 10, 7, 2, 6, 1, 0, 0, 2}}, // 6th period - {"Tb", {2, 2, 6, 2, 6, 10, 2, 6, 10, 9, 2, 6, 0, 0, 0, 2}}, {"Dy", {2, 2, 6, 2, 6, 10, 2, 6, 10, 10, 2, 6, 0, 0, 0, 2}}, // 6th period - {"Ho", {2, 2, 6, 2, 6, 10, 2, 6, 10, 11, 2, 6, 0, 0, 0, 2}}, {"Er", {2, 2, 6, 2, 6, 10, 2, 6, 10, 12, 2, 6, 0, 0, 0, 2}}, // 6th period - {"Tm", {2, 2, 6, 2, 6, 10, 2, 6, 10, 13, 2, 6, 0, 0, 0, 2}}, {"Yb", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 0, 0, 0, 2}}, // 6th period - {"Lu", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 1, 0, 0, 2}}, {"Hf", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 2, 0, 0, 2}}, // 6th period - {"Ta", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 3, 0, 0, 2}}, {"W", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 4, 0, 0, 2}}, // 6th period - {"Re", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 5, 0, 0, 2}}, {"Os", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 6, 0, 0, 2}}, // 6th period - {"Ir", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 7, 0, 0, 2}}, {"Pt", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 9, 0, 0, 1}}, // 6th period - {"Au", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 1}}, {"Hg", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2}}, // 6th period - // 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 5d 5f 5g 6s 6p 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 5d 5f 5g 6s 6p - {"Tl", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 1}}, {"Pb", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 2}}, // 6th period - {"Bi", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 3}}, {"Po", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 4}}, // 6th period - {"At", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 5}}, {"Rn", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 6}} // 6th period - }; + std::map> groundstate_electronconfiguration = { + // 1s + {"H", {1}}, + {"He", {2}}, // 1st period + // 1s 2s + {"Li", {2, 1}}, + {"Be", {2, 2}}, // 2nd period + // 1s 2s 2p + {"B", {2, 2, 1}}, + {"C", {2, 2, 2}}, + {"N", {2, 2, 3}}, + {"O", {2, 2, 4}}, + {"F", {2, 2, 5}}, + {"Ne", {2, 2, 6}}, // 2nd period + // 1s 2s 2p 3s 1s 2s 2p 3s + {"Na", {2, 2, 6, 1}}, + {"Mg", {2, 2, 6, 2}}, // 3rd period + // 1s 2s 2p 3s 3p 1s 2s 2p 3s 3p 1s 2s 2p 3s 3p + {"Al", {2, 2, 6, 2, 1}}, + {"Si", {2, 2, 6, 2, 2}}, + {"P", {2, 2, 6, 2, 3}}, // 3rd period + {"S", {2, 2, 6, 2, 4}}, + {"Cl", {2, 2, 6, 2, 5}}, + {"Ar", {2, 2, 6, 2, 6}}, // 3rd period + // 1s 2s 2p 3s 3p 3d 4s 1s 2s 2p 3s 3p 3d 4s 1s 2s 2p 3s 3p 3d 4s + {"K", {2, 2, 6, 2, 6, 0, 1}}, + {"Ca", {2, 2, 6, 2, 6, 0, 2}}, + {"Sc", {2, 2, 6, 2, 6, 1, 2}}, // 4th period + {"Ti", {2, 2, 6, 2, 6, 2, 2}}, + {"V", {2, 2, 6, 2, 6, 3, 2}}, + {"Cr", {2, 2, 6, 2, 6, 4, 2}}, // 4th period + {"Mn", {2, 2, 6, 2, 6, 5, 2}}, + {"Fe", {2, 2, 6, 2, 6, 6, 2}}, + {"Co", {2, 2, 6, 2, 6, 7, 2}}, // 4th period + {"Ni", {2, 2, 6, 2, 6, 8, 2}}, + {"Cu", {2, 2, 6, 2, 6, 10, 1}}, + {"Zn", {2, 2, 6, 2, 6, 10, 2}}, // 4th period + // 1s 2s 2p 3s 3p 3d 4s 4p 1s 2s 2p 3s 3p 3d 4s 4p 1s + // 2s 2p 3s 3p 3d 4s 4p + {"Ga", {2, 2, 6, 2, 6, 10, 2, 1}}, + {"Ge", {2, 2, 6, 2, 6, 10, 2, 2}}, + {"As", {2, 2, 6, 2, 6, 10, 2, 3}}, // 4th period + {"Se", {2, 2, 6, 2, 6, 10, 2, 4}}, + {"Br", {2, 2, 6, 2, 6, 10, 2, 5}}, + {"Kr", {2, 2, 6, 2, 6, 10, 2, 6}}, // 4th period + // 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 1s 2s 2p 3s 3p 3d 4s 4p + // 4d 4f 5s 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s + {"Rb", {2, 2, 6, 2, 6, 10, 2, 6, 0, 0, 1}}, + {"Sr", {2, 2, 6, 2, 6, 10, 2, 6, 0, 0, 2}}, + {"Y", {2, 2, 6, 2, 6, 10, 2, 6, 1, 0, 2}}, // 5th period + {"Zr", {2, 2, 6, 2, 6, 10, 2, 6, 2, 0, 2}}, + {"Nb", {2, 2, 6, 2, 6, 10, 2, 6, 4, 0, 1}}, + {"Mo", {2, 2, 6, 2, 6, 10, 2, 6, 5, 0, 1}}, // 5th period + {"Tc", {2, 2, 6, 2, 6, 10, 2, 6, 5, 0, 2}}, + {"Ru", {2, 2, 6, 2, 6, 10, 2, 6, 7, 0, 1}}, + {"Rh", {2, 2, 6, 2, 6, 10, 2, 6, 8, 0, 1}}, // 5th period + {"Pd", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 0}}, + {"Ag", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 1}}, + {"Cd", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2}}, // 5th period + // 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 1s 2s 2p 3s + // 3p 3d 4s 4p 4d 4f 5s 5p 1s 2s 2p 3s 3p 3d 4s + // 4p 4d 4f 5s 5p + {"In", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 1}}, + {"Sn", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 2}}, + {"Sb", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 3}}, // 6th period + {"Te", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 4}}, + {"I", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 5}}, + {"Xe", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 6}}, // 6th period + // 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 5d 5f 5g 6s 1s 2s + // 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 5d 5f 5g 6s + {"Cs", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 6, 0, 0, 0, 1}}, + {"Ba", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 6, 0, 0, 0, 2}}, // 6th period + {"La", {2, 2, 6, 2, 6, 10, 2, 6, 10, 0, 2, 6, 1, 0, 0, 2}}, + {"Ce", {2, 2, 6, 2, 6, 10, 2, 6, 10, 1, 2, 6, 1, 0, 0, 2}}, // 6th period + {"Pr", {2, 2, 6, 2, 6, 10, 2, 6, 10, 3, 2, 6, 0, 0, 0, 2}}, + {"Nd", {2, 2, 6, 2, 6, 10, 2, 6, 10, 4, 2, 6, 0, 0, 0, 2}}, // 6th period + {"Pm", {2, 2, 6, 2, 6, 10, 2, 6, 10, 5, 2, 6, 0, 0, 0, 2}}, + {"Sm", {2, 2, 6, 2, 6, 10, 2, 6, 10, 6, 2, 6, 0, 0, 0, 2}}, // 6th period + {"Eu", {2, 2, 6, 2, 6, 10, 2, 6, 10, 7, 2, 6, 0, 0, 0, 2}}, + {"Gd", {2, 2, 6, 2, 6, 10, 2, 6, 10, 7, 2, 6, 1, 0, 0, 2}}, // 6th period + {"Tb", {2, 2, 6, 2, 6, 10, 2, 6, 10, 9, 2, 6, 0, 0, 0, 2}}, + {"Dy", {2, 2, 6, 2, 6, 10, 2, 6, 10, 10, 2, 6, 0, 0, 0, 2}}, // 6th period + {"Ho", {2, 2, 6, 2, 6, 10, 2, 6, 10, 11, 2, 6, 0, 0, 0, 2}}, + {"Er", {2, 2, 6, 2, 6, 10, 2, 6, 10, 12, 2, 6, 0, 0, 0, 2}}, // 6th period + {"Tm", {2, 2, 6, 2, 6, 10, 2, 6, 10, 13, 2, 6, 0, 0, 0, 2}}, + {"Yb", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 0, 0, 0, 2}}, // 6th period + {"Lu", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 1, 0, 0, 2}}, + {"Hf", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 2, 0, 0, 2}}, // 6th period + {"Ta", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 3, 0, 0, 2}}, + {"W", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 4, 0, 0, 2}}, // 6th period + {"Re", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 5, 0, 0, 2}}, + {"Os", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 6, 0, 0, 2}}, // 6th period + {"Ir", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 7, 0, 0, 2}}, + {"Pt", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 9, 0, 0, 1}}, // 6th period + {"Au", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 1}}, + {"Hg", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2}}, // 6th period + // 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f 5s 5p 5d + // 5f 5g 6s 6p 1s 2s 2p 3s 3p 3d 4s + // 4p 4d 4f 5s 5p 5d 5f 5g 6s 6p + {"Tl", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 1}}, + {"Pb", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 2}}, // 6th period + {"Bi", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 3}}, + {"Po", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 4}}, // 6th period + {"At", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 5}}, + {"Rn", {2, 2, 6, 2, 6, 10, 2, 6, 10, 14, 2, 6, 10, 0, 0, 2, 6}} // 6th period + }; }; #endif \ No newline at end of file diff --git a/source/source_base/clebsch_gordan_coeff.cpp b/source/source_base/clebsch_gordan_coeff.cpp index 7a62b4dba19..a1884b84bbc 100644 --- a/source/source_base/clebsch_gordan_coeff.cpp +++ b/source/source_base/clebsch_gordan_coeff.cpp @@ -7,87 +7,86 @@ namespace ModuleBase { -Clebsch_Gordan::Clebsch_Gordan() -{ -} -Clebsch_Gordan::~Clebsch_Gordan() -{ -} +Clebsch_Gordan::Clebsch_Gordan () {} +Clebsch_Gordan::~Clebsch_Gordan () {} -void Clebsch_Gordan::clebsch_gordan(const int& lli, +void + Clebsch_Gordan::clebsch_gordan (const int& lli, ModuleBase::realArray& ap, ModuleBase::IntArray& lpx, ModuleBase::IntArray& lpl) { if (lli < 0) - { - std::cout << "Clebsch_Gordan: lmaxkb + 1 < 0" << std::endl; - exit(1); - } + { + std::cout << "Clebsch_Gordan: lmaxkb + 1 < 0" << std::endl; + exit (1); + } const int llx = (2 * lli - 1) * (2 * lli - 1); ModuleBase::Vector3* r = new ModuleBase::Vector3[llx]; - ModuleBase::matrix ylm(llx, llx); - ModuleBase::matrix mly(llx, llx); + ModuleBase::matrix ylm (llx, llx); + ModuleBase::matrix mly (llx, llx); // generate an array of random vectors (uniform deviate on unitary sphere) - gen_rndm_r(llx, r); + gen_rndm_r (llx, r); // generate the real spherical harmonics for the array: ylm(ir,lm) - ModuleBase::YlmReal::Ylm_Real(llx, llx, r, ylm); + ModuleBase::YlmReal::Ylm_Real (llx, llx, r, ylm); // store the inverse of ylm(ir,lm) in mly(lm,ir) - ModuleBase::Inverse_Matrix_Real(llx, ylm.c, mly.c); + ModuleBase::Inverse_Matrix_Real (llx, ylm.c, mly.c); // for each li,lj compute ap(l,li,lj) and the indices, lpx and lpl - lpx.create(lli * lli, lli * lli); - lpl.create(lli * lli, lli * lli, llx); - ap.create(llx, lli * lli, lli * lli); + lpx.create (lli * lli, lli * lli); + lpl.create (lli * lli, lli * lli, llx); + ap.create (llx, lli * lli, lli * lli); for (int li = 0; li < lli * lli; li++) - { - for (int lj = 0; lj < lli * lli; lj++) { - lpx(li, lj) = 0; - for (int L = 0; L < llx; L++) - { - ap(L, li, lj) = compute_ap(L, li, lj, llx, ylm, mly); - if (std::abs(ap(L, li, lj)) > 1.0e-3) + for (int lj = 0; lj < lli * lli; lj++) { - lpl(li, lj, lpx(li, lj)) = L; - lpx(li, lj)++; + lpx (li, lj) = 0; + for (int L = 0; L < llx; L++) + { + ap (L, li, lj) = compute_ap (L, li, lj, llx, ylm, mly); + if (std::abs (ap (L, li, lj)) > 1.0e-3) + { + lpl (li, lj, lpx (li, lj)) = L; + lpx (li, lj)++; + } + } } - } } - } delete[] r; } -void Clebsch_Gordan::gen_rndm_r(const int& llx, ModuleBase::Vector3* r) +void + Clebsch_Gordan::gen_rndm_r (const int& llx, ModuleBase::Vector3* r) { for (int i = 0; i < llx; i++) - { - double costheta = 2.0 * static_cast(std::rand()) / RAND_MAX - 1.0; - double sintheta = std::sqrt(1.0 - costheta * costheta); - double phi = ModuleBase::TWO_PI * static_cast(std::rand()) / RAND_MAX; - r[i].x = sintheta * std::cos(phi); - r[i].y = sintheta * std::sin(phi); - r[i].z = costheta; - } + { + double costheta = 2.0 * static_cast (std::rand ()) / RAND_MAX - 1.0; + double sintheta = std::sqrt (1.0 - costheta * costheta); + double phi = ModuleBase::TWO_PI * static_cast (std::rand ()) / RAND_MAX; + r[i].x = sintheta * std::cos (phi); + r[i].y = sintheta * std::sin (phi); + r[i].z = costheta; + } } -double Clebsch_Gordan::compute_ap(const int& L, - const int& li, - const int& lj, - const int& llx, - const ModuleBase::matrix& ylm, - const ModuleBase::matrix& mly) +double + Clebsch_Gordan::compute_ap (const int& L, + const int& li, + const int& lj, + const int& llx, + const ModuleBase::matrix& ylm, + const ModuleBase::matrix& mly) { double compute_ap = 0.0; for (int ir = 0; ir < llx; ir++) - { - compute_ap += mly(ir, L) * ylm(li, ir) * ylm(lj, ir); - } + { + compute_ap += mly (ir, L) * ylm (li, ir) * ylm (lj, ir); + } return compute_ap; } diff --git a/source/source_base/clebsch_gordan_coeff.h b/source/source_base/clebsch_gordan_coeff.h index 9e29ae5c97f..e2d378b46ce 100644 --- a/source/source_base/clebsch_gordan_coeff.h +++ b/source/source_base/clebsch_gordan_coeff.h @@ -12,8 +12,8 @@ namespace ModuleBase class Clebsch_Gordan { public: - Clebsch_Gordan(); - ~Clebsch_Gordan(); + Clebsch_Gordan (); + ~Clebsch_Gordan (); /** * @brief computes Clebsch-Gordan coefficient @@ -31,10 +31,10 @@ class Clebsch_Gordan * @param lpx [out] for each input limi,ljmj is the number of LM in the sum * @param lpl [out] for each input limi,ljmj points to the allowed LM */ - static void clebsch_gordan(const int& lli, - ModuleBase::realArray& ap, - ModuleBase::IntArray& lpx, - ModuleBase::IntArray& lpl); + static void clebsch_gordan (const int& lli, + ModuleBase::realArray& ap, + ModuleBase::IntArray& lpx, + ModuleBase::IntArray& lpl); private: /** @@ -43,7 +43,7 @@ class Clebsch_Gordan * @param llx [in] the number of vectors * @param r [out] an array of vectors */ - static void gen_rndm_r(const int& llx, ModuleBase::Vector3* r); + static void gen_rndm_r (const int& llx, ModuleBase::Vector3* r); /** * @brief store the inverse of ylm(ir,lm) in mly(lm,ir) @@ -56,12 +56,12 @@ class Clebsch_Gordan * @param mly [in] the inverse of ylm(ir,lm) * @return double the expansion coefficients */ - static double compute_ap(const int& L, - const int& li, - const int& lj, - const int& llx, - const ModuleBase::matrix& ylm, - const ModuleBase::matrix& mly); + static double compute_ap (const int& L, + const int& li, + const int& lj, + const int& llx, + const ModuleBase::matrix& ylm, + const ModuleBase::matrix& mly); }; } // namespace ModuleBase diff --git a/source/source_base/complexarray.cpp b/source/source_base/complexarray.cpp index 3652746b385..cf0fd90a25d 100644 --- a/source/source_base/complexarray.cpp +++ b/source/source_base/complexarray.cpp @@ -8,75 +8,76 @@ #include "complexarray.h" namespace ModuleBase { -void complexArrayxAlloc() +void + complexArrayxAlloc () { std::cerr << "Allocation error for complexArray" << std::endl; - std::exit(EXIT_FAILURE); + std::exit (EXIT_FAILURE); } -ComplexArray::ComplexArray(const int bnd1, const int bnd2, const int bnd3, const int bnd4) +ComplexArray::ComplexArray (const int bnd1, const int bnd2, const int bnd3, const int bnd4) { bound1 = bnd1; bound2 = bnd2; bound3 = bnd3; bound4 = bnd4; - init(this->getSize()); + init (this->getSize ()); } -ComplexArray::~ComplexArray() +ComplexArray::~ComplexArray () { freemem (); } +void + ComplexArray::init (const int size) { - freemem(); -} -void ComplexArray::init(const int size) -{ - assert(size>=0); - if(size>0) - { - ptr = new std::complex [size]; - assert(ptr != 0); - } + assert (size >= 0); + if (size > 0) + { + ptr = new std::complex[size]; + assert (ptr != nullptr); + } else - { - ptr = nullptr; - } + { + ptr = nullptr; + } } -void ComplexArray::freemem() +void + ComplexArray::freemem () { - delete [] ptr; + delete[] ptr; ptr = nullptr; bound1 = 0; bound2 = 0; bound3 = 0; bound4 = 0; } -void ComplexArray::create(const int bnd1, const int bnd2, const int bnd3, const int bnd4) +void + ComplexArray::create (const int bnd1, const int bnd2, const int bnd3, const int bnd4) { - delete [] ptr; + delete[] ptr; bound1 = bnd1; bound2 = bnd2; bound3 = bnd3; bound4 = bnd4; - const int size = this->getSize(); - this->init(size); - this->zero_out(); + const int size = this->getSize (); + this->init (size); + this->zero_out (); } -ComplexArray::ComplexArray(const ComplexArray &cd) +ComplexArray::ComplexArray (const ComplexArray& cd) { - this->freemem(); - const int size = cd.getSize(); - this->init(size); + this->freemem (); + const int size = cd.getSize (); + this->init (size); for (int i = 0; i < size; i++) - { - ptr[i] = cd.ptr[i]; - } + { + ptr[i] = cd.ptr[i]; + } this->bound1 = cd.bound1; this->bound2 = cd.bound2; this->bound3 = cd.bound3; this->bound4 = cd.bound4; } -ComplexArray::ComplexArray(ComplexArray &&cd) +ComplexArray::ComplexArray (ComplexArray&& cd) { - delete [] this->ptr; + delete[] this->ptr; this->ptr = cd.ptr; cd.ptr = nullptr; this->bound1 = cd.bound1; @@ -88,9 +89,10 @@ ComplexArray::ComplexArray(ComplexArray &&cd) this->bound4 = cd.bound4; cd.bound4 = 0; } -ComplexArray& ComplexArray::operator=(ComplexArray &&cd) +ComplexArray& + ComplexArray::operator= (ComplexArray&& cd) { - delete [] this->ptr; + delete[] this->ptr; this->ptr = cd.ptr; cd.ptr = nullptr; this->bound1 = cd.bound1; @@ -103,245 +105,263 @@ ComplexArray& ComplexArray::operator=(ComplexArray &&cd) cd.bound4 = 0; return *this; } -ComplexArray &ComplexArray::operator=(const ComplexArray & cd) +ComplexArray& + ComplexArray::operator= (const ComplexArray& cd) { - const int size = this->getSize(); - assert(size==cd.getSize()); + const int size = this->getSize (); + assert (size == cd.getSize ()); for (int i = 0; i < size; i++) - { - ptr[i] = cd.ptr[i]; - } + { + ptr[i] = cd.ptr[i]; + } return *this; } -void ComplexArray::operator=(const std::complex < double> c) +void + ComplexArray::operator= (const std::complex c) { - const int size = this->getSize(); + const int size = this->getSize (); for (int i = 0; i < size; i++) - { - ptr[i] = c; - } + { + ptr[i] = c; + } } -ComplexArray ComplexArray::operator+(const ComplexArray &cd) const +ComplexArray + ComplexArray::operator+ (const ComplexArray& cd) const { - const int size = this->getSize(); - assert(size==cd.getSize()); - ComplexArray cd2(*this); + const int size = this->getSize (); + assert (size == cd.getSize ()); + ComplexArray cd2 (*this); for (int i = 0; i < size; i++) - { - cd2.ptr[i] += cd.ptr[i]; - } + { + cd2.ptr[i] += cd.ptr[i]; + } return cd2; } -void ComplexArray::operator+=(const ComplexArray & cd) +void + ComplexArray::operator+= (const ComplexArray& cd) { - const int size = this->getSize(); - assert(size==cd.getSize()); + const int size = this->getSize (); + assert (size == cd.getSize ()); for (int i = 0; i < size; i++) - { - ptr[i] += cd.ptr[i]; - } + { + ptr[i] += cd.ptr[i]; + } } -ComplexArray ComplexArray::operator-(const ComplexArray &cd) const +ComplexArray + ComplexArray::operator- (const ComplexArray& cd) const { - const int size = this->getSize(); - assert(size==cd.getSize()); - ComplexArray cd2(*this); + const int size = this->getSize (); + assert (size == cd.getSize ()); + ComplexArray cd2 (*this); for (int i = 0; i < size; i++) - { - cd2.ptr[i] -= cd.ptr[i]; - } + { + cd2.ptr[i] -= cd.ptr[i]; + } return cd2; } -void ComplexArray::operator-=(const ComplexArray & cd) +void + ComplexArray::operator-= (const ComplexArray& cd) { - const int size = this->getSize(); - assert(size==cd.getSize()); + const int size = this->getSize (); + assert (size == cd.getSize ()); for (int i = 0; i < size; i++) - { - ptr[i] -= cd.ptr[i]; - } + { + ptr[i] -= cd.ptr[i]; + } } -void ComplexArray::operator*=(const ComplexArray & cd) +void + ComplexArray::operator*= (const ComplexArray& cd) { - const int size = this->getSize(); - assert(size==cd.getSize()); + const int size = this->getSize (); + assert (size == cd.getSize ()); for (int i = 0; i < size; i++) - { - ptr[i] *= cd.ptr[i]; - } + { + ptr[i] *= cd.ptr[i]; + } } -ComplexArray operator*(const double r, const ComplexArray &cd) +ComplexArray + operator* (const double r, const ComplexArray& cd) { - ComplexArray cd2(cd); - const int size = cd.getSize(); + ComplexArray cd2 (cd); + const int size = cd.getSize (); for (int i = 0; i < size; i++) - { - cd2.ptr[i] *= r; - } + { + cd2.ptr[i] *= r; + } return cd2; } -ComplexArray ComplexArray::operator*(const double r) const +ComplexArray + ComplexArray::operator* (const double r) const { - ComplexArray cd2(*this); - const int size = this->getSize(); + ComplexArray cd2 (*this); + const int size = this->getSize (); for (int i = 0; i < size; i++) - { - cd2.ptr[i] *= r; - } + { + cd2.ptr[i] *= r; + } return cd2; } -ComplexArray operator*(const std::complex < double> c, const ComplexArray &cd) +ComplexArray + operator* (const std::complex c, const ComplexArray& cd) { - const int size = cd.getSize(); - ComplexArray cd2(cd.getSize()); + const int size = cd.getSize (); + ComplexArray cd2 (cd.getSize ()); for (int i = 0; i < size; i++) - { - cd2.ptr[i] = c * cd.ptr[i]; - } + { + cd2.ptr[i] = c * cd.ptr[i]; + } return cd2; } -ComplexArray ComplexArray::operator*(const std::complex < double> c) const +ComplexArray + ComplexArray::operator* (const std::complex c) const { - const int size = this->getSize(); - ComplexArray cd(size); + const int size = this->getSize (); + ComplexArray cd (size); for (int i = 0; i < size; i++) - { - cd.ptr[i] = ptr[i] * c; - } + { + cd.ptr[i] = ptr[i] * c; + } return cd; } -void ComplexArray::operator*=(const std::complex c) +void + ComplexArray::operator*= (const std::complex c) { - const int size = this->getSize(); + const int size = this->getSize (); for (int i = 0; i < size; i++) - { - ptr[i] *= c; - } + { + ptr[i] *= c; + } } -void ComplexArray::operator*=(const double r) +void + ComplexArray::operator*= (const double r) { - const int size = this->getSize(); + const int size = this->getSize (); for (int i = 0; i < size; i++) - { - ptr[i] *= r; - } -} -bool ComplexArray::operator==(const ComplexArray &cd2)const -{ - const int size1 = this->getSize(); - const int size2 = cd2.getSize(); - const int b11 = this->getBound1(); - const int b12 = this->getBound2(); - const int b13 = this->getBound3(); - const int b14 = this->getBound4(); - const int b21 = cd2.getBound1(); - const int b22 = cd2.getBound2(); - const int b23 = cd2.getBound3(); - const int b24 = cd2.getBound4(); + { + ptr[i] *= r; + } +} +bool + ComplexArray::operator== (const ComplexArray& cd2) const +{ + const int size1 = this->getSize (); + const int size2 = cd2.getSize (); + const int b11 = this->getBound1 (); + const int b12 = this->getBound2 (); + const int b13 = this->getBound3 (); + const int b14 = this->getBound4 (); + const int b21 = cd2.getBound1 (); + const int b22 = cd2.getBound2 (); + const int b23 = cd2.getBound3 (); + const int b24 = cd2.getBound4 (); if (size1 != size2) - { - return false; - } + { + return false; + } if (b11 != b21) - { - return false; - } + { + return false; + } if (b12 != b22) - { - return false; - } + { + return false; + } if (b13 != b23) - { - return false; - } + { + return false; + } if (b14 != b24) - { - return false; - } - for ( int i = 0;i ptr[i] != cd2.ptr[i]) { return false; } - } + for (int i = 0; i < size1; ++i) + { + if (this->ptr[i] != cd2.ptr[i]) + { + return false; + } + } return true; } -bool ComplexArray::operator!=(const ComplexArray &cd2)const -{ - const int size1 = this->getSize(); - const int size2 = cd2.getSize(); - const int b11 = this->getBound1(); - const int b12 = this->getBound2(); - const int b13 = this->getBound3(); - const int b14 = this->getBound4(); - const int b21 = cd2.getBound1(); - const int b22 = cd2.getBound2(); - const int b23 = cd2.getBound3(); - const int b24 = cd2.getBound4(); +bool + ComplexArray::operator!= (const ComplexArray& cd2) const +{ + const int size1 = this->getSize (); + const int size2 = cd2.getSize (); + const int b11 = this->getBound1 (); + const int b12 = this->getBound2 (); + const int b13 = this->getBound3 (); + const int b14 = this->getBound4 (); + const int b21 = cd2.getBound1 (); + const int b22 = cd2.getBound2 (); + const int b23 = cd2.getBound3 (); + const int b24 = cd2.getBound4 (); if (size1 != size2) - { - return true; - } + { + return true; + } if (b11 != b21) - { - return true; - } + { + return true; + } if (b12 != b22) - { - return true; - } + { + return true; + } if (b13 != b23) - { - return true; - } + { + return true; + } if (b14 != b24) - { - return true; - } - for ( int i = 0;i ptr[i] != cd2.ptr[i]) { return true; } - } + for (int i = 0; i < size1; ++i) + { + if (this->ptr[i] != cd2.ptr[i]) + { + return true; + } + } return false; } -void ComplexArray::zero_out(void) +void + ComplexArray::zero_out () { - const int size = this->getSize(); - for (int i = 0;i < size; i++) - { - ptr[i] = std::complex < double> (0.0, 0.0); - } + const int size = this->getSize (); + for (int i = 0; i < size; i++) + { + ptr[i] = std::complex (0.0, 0.0); + } } -void ComplexArray::negate(void) +void + ComplexArray::negate () { - const int size = this->getSize(); - for (int i = 0;i < size; i++) - { - ptr[i] = -ptr[i]; - } + const int size = this->getSize (); + for (int i = 0; i < size; i++) + { + ptr[i] = -ptr[i]; + } } -void ComplexArray::randomize(void) +void + ComplexArray::randomize () { - const int size = this->getSize(); - for (int i = 0;i < size; i++) - { - ptr[i] = std::complex < double> (rand() / (RAND_MAX + 1.) - .5, - rand() / (RAND_MAX + 1.) - .5); - } + const int size = this->getSize (); + for (int i = 0; i < size; i++) + { + ptr[i] = std::complex (rand () / (RAND_MAX + 1.) - .5, rand () / (RAND_MAX + 1.) - .5); + } } -double abs2(const ComplexArray &cd) +double + abs2 (const ComplexArray& cd) { - double cdcd= 0.0; - const int size = cd.getSize(); + double cdcd = 0.0; + const int size = cd.getSize (); for (int i = 0; i < size; i++) - { - const std::complex < double> c = cd.ptr[i]; - cdcd += c.real() * c.real() + c.imag() * c.imag(); - } + { + const std::complex c = cd.ptr[i]; + cdcd += c.real () * c.real () + c.imag () * c.imag (); + } return cdcd; } // void add_scale_abs2(const std::complex < double> &c, const ComplexArray & in, ComplexArray &out){ @@ -349,71 +369,77 @@ double abs2(const ComplexArray &cd) // const int size = in.getSize(); // for (int i = 0; i < size; i++) // out.ptr[i] += std::complex < double> (c.real() * 22, c.imag() * 22);} -std::complex dot(const ComplexArray &cd1, const ComplexArray &cd2) +std::complex + dot (const ComplexArray& cd1, const ComplexArray& cd2) { - assert(cd1.getSize()==cd2.getSize()); - const int size = cd1.getSize(); - std::complex < double> dot12(0.0,0.0); + assert (cd1.getSize () == cd2.getSize ()); + const int size = cd1.getSize (); + std::complex dot12 (0.0, 0.0); for (int i = 0; i < size; i++) - { - dot12 += std::complex < double> - (cd1.ptr[i].real() * cd2.ptr[i].real() + - cd1.ptr[i].imag() * cd2.ptr[i].imag(), - cd1.ptr[i].real() * cd2.ptr[i].imag() - - cd1.ptr[i].imag() * cd2.ptr[i].real()); - } + { + dot12 += std::complex ( + cd1.ptr[i].real () * cd2.ptr[i].real () + cd1.ptr[i].imag () * cd2.ptr[i].imag (), + cd1.ptr[i].real () * cd2.ptr[i].imag () - cd1.ptr[i].imag () * cd2.ptr[i].real ()); + } return dot12; } -void scale_accumulate(double r, const ComplexArray &cd1, ComplexArray &cd2) +void + scale_accumulate (double r, const ComplexArray& cd1, ComplexArray& cd2) { - assert(cd1.getSize()==cd2.getSize()); - const int size = cd1.getSize(); + assert (cd1.getSize () == cd2.getSize ()); + const int size = cd1.getSize (); for (int i = 0; i < size; i++) - { - cd2.ptr[i] += r * cd1.ptr[i]; - } + { + cd2.ptr[i] += r * cd1.ptr[i]; + } } -void scale_accumulate(const std::complex c, const ComplexArray &cd1, ComplexArray &cd2) +void + scale_accumulate (const std::complex c, const ComplexArray& cd1, ComplexArray& cd2) { - assert(cd1.getSize()==cd2.getSize()); - const int size = cd1.getSize(); + assert (cd1.getSize () == cd2.getSize ()); + const int size = cd1.getSize (); for (int i = 0; i < size; i++) - { - cd2.ptr[i] += c * cd1.ptr[i]; - } + { + cd2.ptr[i] += c * cd1.ptr[i]; + } } -void scaled_sum(double r1, const ComplexArray &cd1,double r2, const ComplexArray &cd2,ComplexArray &cd3) +void + scaled_sum (double r1, const ComplexArray& cd1, double r2, const ComplexArray& cd2, ComplexArray& cd3) { - assert(cd1.getSize()==cd2.getSize()); - assert(cd1.getSize()==cd3.getSize()); - const int size = cd1.getSize(); + assert (cd1.getSize () == cd2.getSize ()); + assert (cd1.getSize () == cd3.getSize ()); + const int size = cd1.getSize (); for (int i = 0; i < size; i++) - { - cd3.ptr[i] = r1 * cd1.ptr[i] + r2 * cd2.ptr[i]; - } + { + cd3.ptr[i] = r1 * cd1.ptr[i] + r2 * cd2.ptr[i]; + } } -void scaled_sum(std::complex < double> c1, const ComplexArray &cd1,std::complex < double> c2, const ComplexArray &cd2,ComplexArray &cd3) +void + scaled_sum (std::complex c1, + const ComplexArray& cd1, + std::complex c2, + const ComplexArray& cd2, + ComplexArray& cd3) { - assert(cd1.getSize()==cd2.getSize()); - assert(cd1.getSize()==cd3.getSize()); - const int size = cd1.getSize(); + assert (cd1.getSize () == cd2.getSize ()); + assert (cd1.getSize () == cd3.getSize ()); + const int size = cd1.getSize (); for (int i = 0; i < size; i++) - { - cd3.ptr[i] = c1 * cd1.ptr[i] + c2 * cd2.ptr[i]; - } + { + cd3.ptr[i] = c1 * cd1.ptr[i] + c2 * cd2.ptr[i]; + } } -void point_mult(ComplexArray &in1, ComplexArray &in2, ComplexArray &out) +void + point_mult (ComplexArray& in1, ComplexArray& in2, ComplexArray& out) { - assert(in1.getSize()==in2.getSize()); - assert(in1.getSize()==out.getSize()); - const int size = in1.getSize(); + assert (in1.getSize () == in2.getSize ()); + assert (in1.getSize () == out.getSize ()); + const int size = in1.getSize (); for (int i = 0; i < size; i++) - { - out.ptr[i] = std::complex < double> - (in1.ptr[i].real() * in2.ptr[i].real() - - in1.ptr[i].imag() * in2.ptr[i].imag(), - in1.ptr[i].real() * in2.ptr[i].imag() + - in1.ptr[i].imag() * in2.ptr[i].real()); - } -} + { + out.ptr[i] = std::complex ( + in1.ptr[i].real () * in2.ptr[i].real () - in1.ptr[i].imag () * in2.ptr[i].imag (), + in1.ptr[i].real () * in2.ptr[i].imag () + in1.ptr[i].imag () * in2.ptr[i].real ()); + } } +} // namespace ModuleBase diff --git a/source/source_base/complexarray.h b/source/source_base/complexarray.h index 831f42ee02d..690b0235463 100644 --- a/source/source_base/complexarray.h +++ b/source/source_base/complexarray.h @@ -13,144 +13,173 @@ namespace ModuleBase /// @brief A basic type of data for complex array class ComplexArray { -public: - std::complex *ptr=nullptr; // data array - - ComplexArray(const int bnd1=0, const int bnd2=1, const int bnd3=1, const int bnd4=1); - - ~ComplexArray(); - - void freemem(); - - void create(const int bnd1=0, const int bnd2=1, const int bnd3=1, const int bnd4=1); - - ComplexArray(const ComplexArray &cd); - ComplexArray(ComplexArray &&cd); - - /**************************************************** - * OPERATOR FUNCTIONS - ***************************************************/ - ComplexArray& operator=(ComplexArray &&cd); - ComplexArray &operator=(const ComplexArray &cd); - /// Assignment of scalar: all entries set to c. - void operator=(std::complex c); - /// Add two ComplexArray - ComplexArray operator+(const ComplexArray &cd) const; - /// Accumulate sum of ComplexArray - void operator+=(const ComplexArray &cd); - /// Subtract two ComplexArray - ComplexArray operator-(const ComplexArray &cd) const; - /// Accumulate difference of arrays - void operator-=(const ComplexArray &cd); - /// Scale a ComplexArray by real r - ComplexArray operator*(const double r) const; - /// Scale a ComplexArray by a std::complex number c - ComplexArray operator*(const std::complex c) const; - /// Scale a ComplexArray by real number in place - void operator*=(const double r); - /// Scale a ComplexArray by std::complex c in place - void operator*=(const std::complex c); - /// accumulate pointwise multiply - void operator*=(const ComplexArray &cd); - /// Judge if two ComplexArray is equal - bool operator== (const ComplexArray &cd2)const; - /// Judge if two ComplexArray is not equal - bool operator!= (const ComplexArray &cd2)const; - - /// overloaded subscript operator for non-const std::complex Array const reference return creates an lvakue - std::complex &operator() - (const int ind1=0, const int ind2=0, const int ind3=0, const int ind4=0) - { - assert(ind1>=0); assert(ind1=0); assert(ind2=0); assert(ind3=0); assert(ind4 &operator()(int, int, int, int, int); - /// overloaded subscript operator for const std::complex Array const reference return creates an cvakue - const std::complex &operator() - (const int ind1=0, const int ind2=0, const int ind3=0, const int ind4=0) const - { - assert(ind1>=0); assert(ind1=0); assert(ind2=0); assert(ind3=0); assert(ind4 &operator()(int, int, int, int, int)const; - - /**************************************************** - * MEMBER FUNCTIONS - ***************************************************/ - /// set all elements to be {0.0,0.0} - void zero_out(void); - - /// Negates all the entries in the array - void negate(void); - - /// set all elements to a random number whose real/image is between [-0.5,0.5). - void randomize(void); - int getBound1()const{ return bound1; } - int getBound2()const{ return bound2; } - int getBound3()const{ return bound3; } - int getBound4()const{ return bound4; } - int getSize()const{ return bound1*bound2*bound3*bound4; } - -private: - int bound1, bound2, bound3, bound4; - void init(const int size); + public: + std::complex* ptr = nullptr; // data array + + ComplexArray (const int bnd1 = 0, const int bnd2 = 1, const int bnd3 = 1, const int bnd4 = 1); + + ~ComplexArray (); + + void freemem (); + + void create (const int bnd1 = 0, const int bnd2 = 1, const int bnd3 = 1, const int bnd4 = 1); + + ComplexArray (const ComplexArray& cd); + ComplexArray (ComplexArray&& cd); + + /**************************************************** + * OPERATOR FUNCTIONS + ***************************************************/ + ComplexArray& operator= (ComplexArray&& cd); + ComplexArray& operator= (const ComplexArray& cd); + /// Assignment of scalar: all entries set to c. + void operator= (std::complex c); + /// Add two ComplexArray + ComplexArray operator+ (const ComplexArray& cd) const; + /// Accumulate sum of ComplexArray + void operator+= (const ComplexArray& cd); + /// Subtract two ComplexArray + ComplexArray operator- (const ComplexArray& cd) const; + /// Accumulate difference of arrays + void operator-= (const ComplexArray& cd); + /// Scale a ComplexArray by real r + ComplexArray operator* (const double r) const; + /// Scale a ComplexArray by a std::complex number c + ComplexArray operator* (const std::complex c) const; + /// Scale a ComplexArray by real number in place + void operator*= (const double r); + /// Scale a ComplexArray by std::complex c in place + void operator*= (const std::complex c); + /// accumulate pointwise multiply + void operator*= (const ComplexArray& cd); + /// Judge if two ComplexArray is equal + bool operator== (const ComplexArray& cd2) const; + /// Judge if two ComplexArray is not equal + bool operator!= (const ComplexArray& cd2) const; + + /// overloaded subscript operator for non-const std::complex Array const reference return creates an lvakue + std::complex& + operator() (const int ind1 = 0, const int ind2 = 0, const int ind3 = 0, const int ind4 = 0) + { + assert (ind1 >= 0); + assert (ind1 < bound1); + assert (ind2 >= 0); + assert (ind2 < bound2); + assert (ind3 >= 0); + assert (ind3 < bound3); + assert (ind4 >= 0); + assert (ind4 < bound4); + const int ind = ((ind1 * bound2 + ind2) * bound3 + ind3) * bound4 + ind4; + return ptr[ind]; + }; + // std::complex < double> &operator()(int, int, int, int, int); + /// overloaded subscript operator for const std::complex Array const reference return creates an cvakue + const std::complex& + operator() (const int ind1 = 0, const int ind2 = 0, const int ind3 = 0, const int ind4 = 0) const + { + assert (ind1 >= 0); + assert (ind1 < bound1); + assert (ind2 >= 0); + assert (ind2 < bound2); + assert (ind3 >= 0); + assert (ind3 < bound3); + assert (ind4 >= 0); + assert (ind4 < bound4); + const int ind = ((ind1 * bound2 + ind2) * bound3 + ind3) * bound4 + ind4; + return ptr[ind]; + }; + // const std::complex < double> &operator()(int, int, int, int, int)const; + + /**************************************************** + * MEMBER FUNCTIONS + ***************************************************/ + /// set all elements to be {0.0,0.0} + void zero_out (); + + /// Negates all the entries in the array + void negate (); + + /// set all elements to a random number whose real/image is between [-0.5,0.5). + void randomize (); + int + getBound1 () const + { + return bound1; + } + int + getBound2 () const + { + return bound2; + } + int + getBound3 () const + { + return bound3; + } + int + getBound4 () const + { + return bound4; + } + int + getSize () const + { + return bound1 * bound2 * bound3 * bound4; + } + + private: + int bound1, bound2, bound3, bound4; + void init (const int size); }; /// Scale a ComplexArray cd by real r -ComplexArray operator*(const double r, const ComplexArray &cd); +ComplexArray operator* (const double r, const ComplexArray& cd); /// Scale a ComplexArray cd by std::complex number c -ComplexArray operator*(const std::complex c, const ComplexArray &cd); +ComplexArray operator* (const std::complex c, const ComplexArray& cd); /// Sum of absolute squares of all elements in cd -double abs2(const ComplexArray &cd); +double abs2 (const ComplexArray& cd); // void add_scale_abs2(const std::complex &c, const ComplexArray & in, // ComplexArray &out); /// Take "dot-product" of two ComplexArray: sum of cd1(conjugate)[i] * cd2[i] -std::complex dot(const ComplexArray &cd1, const ComplexArray &cd2); +std::complex dot (const ComplexArray& cd1, const ComplexArray& cd2); /// Does cd2 += r * cd1 -void scale_accumulate(double r, const ComplexArray &cd1, ComplexArray &cd2); +void scale_accumulate (double r, const ComplexArray& cd1, ComplexArray& cd2); /// Does cd2 += c * cd1 -void scale_accumulate(std::complex c, const ComplexArray &cd1, ComplexArray &cd2); +void scale_accumulate (std::complex c, const ComplexArray& cd1, ComplexArray& cd2); /// Does cd3 = r1*cd1 + r2*cd2 -void scaled_sum(double r1, const ComplexArray &cd1, - double r2, const ComplexArray &cd2, - ComplexArray &cd3); +void scaled_sum (double r1, const ComplexArray& cd1, double r2, const ComplexArray& cd2, ComplexArray& cd3); /// Does cd3 = c1*cd1 + c2*cd2 -void scaled_sum(std::complex c1, const ComplexArray &cd1, - std::complex c2, const ComplexArray &cd2, - ComplexArray &cd3); +void scaled_sum (std::complex c1, + const ComplexArray& cd1, + std::complex c2, + const ComplexArray& cd2, + ComplexArray& cd3); /// out[i] = a1[i] * in2[i] -void point_mult(ComplexArray &a1, ComplexArray &in2, ComplexArray &out); +void point_mult (ComplexArray& a1, ComplexArray& in2, ComplexArray& out); /// set elements of u as zero which u is 1_d std::complex array template -void zeros(std::complex *u, int n) +void + zeros (std::complex* u, int n) { - if (n == 0 || u == 0) - { - std::cout << "\n error in zeros(),n or u = 0"; - return; - } - - for (int i = 0;i < n;i++) - { - u[i] = std::complex (0.0, 0.0); - } -} + if (n == 0 || u == 0) + { + std::cout << "\n error in zeros(),n or u = 0"; + return; + } + + for (int i = 0; i < n; i++) + { + u[i] = std::complex (0.0, 0.0); + } } +} // namespace ModuleBase #endif // COMPLEX_ARRAY_H diff --git a/source/source_base/complexmatrix.cpp b/source/source_base/complexmatrix.cpp index 217ebaf0dd7..478a0becc9d 100644 --- a/source/source_base/complexmatrix.cpp +++ b/source/source_base/complexmatrix.cpp @@ -13,445 +13,519 @@ namespace ModuleBase { // constructor with sizes -ComplexMatrix::ComplexMatrix(const int nrows, const int ncols, const bool flag_zero) - :nr(nrows), - nc(ncols), - size(nrows*ncols), - c(nullptr) +ComplexMatrix::ComplexMatrix (const int nrows, const int ncols, const bool flag_zero) + : nr (nrows), nc (ncols), size (nrows * ncols), c (nullptr) { - if( size ) - { - c = new std::complex[size]; - if(flag_zero) zero_out(); - } + if (size) + { + c = new std::complex[size]; + if (flag_zero) + { + zero_out (); + } + } } // zero out the ComplexMatrix -void ComplexMatrix::zero_out(void) +void + ComplexMatrix::zero_out () { - for (int i=0; i(0.0,0.0); + for (int i = 0; i < size; i++) + { + c[i] = std::complex (0.0, 0.0); + } } /* void need_more_memory() { - std::cout << "\n Sorry to crash... but the running need more momory! Exit." << std::endl; - exit(0); + std::cout << "\n Sorry to crash... but the running need more momory! Exit." << std::endl; + exit(0); } */ // Copy constructor -ComplexMatrix::ComplexMatrix(const ComplexMatrix &m1) - :nr(m1.nr), - nc(m1.nc), - size(m1.size), - c(nullptr) +ComplexMatrix::ComplexMatrix (const ComplexMatrix& m1) : nr (m1.nr), nc (m1.nc), size (m1.size), c (nullptr) { - if(size) - { - c = new std::complex[size]; - memcpy( c, m1.c, size*sizeof(std::complex) ); - } + if (size) + { + c = new std::complex[size]; + memcpy (c, m1.c, size * sizeof (std::complex)); + } } // Peize Lin add 2016-08-05 -ComplexMatrix::ComplexMatrix( ComplexMatrix && m1 ) - :nr(m1.nr), - nc(m1.nc), - size(m1.size), - c(m1.c) +ComplexMatrix::ComplexMatrix (ComplexMatrix&& m1) : nr (m1.nr), nc (m1.nc), size (m1.size), c (m1.c) { - m1.nr = m1.nc = m1.size = 0; - m1.c = nullptr; + m1.nr = m1.nc = m1.size = 0; + m1.c = nullptr; } // Peize Lin add 2017-03-29 -ComplexMatrix::ComplexMatrix(const matrix &m) - :nr(m.nr), - nc(m.nc), - size(m.nr*m.nc), - c(nullptr) -{ - if( size ) - { - c = new std::complex[size]; - for( int i=0; i[size]; + for (int i = 0; i < size; ++i) + { + c[i] = m.c[i]; + } + } } // deconstructor -ComplexMatrix::~ComplexMatrix() +ComplexMatrix::~ComplexMatrix () { - if(c) - { - delete[] c; - c = nullptr; - } + if (c) + { + delete[] c; + c = nullptr; + } } // reallocate memory for Complex Matrix -void ComplexMatrix::create(const int nr_in, const int nc_in, const bool flag_zero) -{ - if( nr_in && nc_in ) - { - if(c) - { - const int size_in=nr_in*nc_in; - if( size_in!=nr*nc ) - { - delete[] c; - c = new std::complex[size_in]; - } - } - else - { - c = new std::complex[nr_in * nc_in]; - } - - nr = nr_in; - nc = nc_in; - size = nr*nc; - if(flag_zero) zero_out(); - } - else - { - if(c) delete[] c; - c = nullptr; - nr = nr_in; - nc = nc_in; - size = nr*nc; - } -} - -void ComplexMatrix::set_as_identity_matrix(void) -{ - for(int i=0; i(1.0, 0.0); - else c[nc * i + j] = std::complex(0.0, 0.0); - } - } - return; +void + ComplexMatrix::create (const int nr_in, const int nc_in, const bool flag_zero) +{ + if (nr_in && nc_in) + { + if (c) + { + const int size_in = nr_in * nc_in; + if (size_in != nr * nc) + { + delete[] c; + c = new std::complex[size_in]; + } + } + else + { + c = new std::complex[nr_in * nc_in]; + } + + nr = nr_in; + nc = nc_in; + size = nr * nc; + if (flag_zero) + { + zero_out (); + } + } + else + { + if (c) + { + delete[] c; + } + c = nullptr; + nr = nr_in; + nc = nc_in; + size = nr * nc; + } +} + +void + ComplexMatrix::set_as_identity_matrix () +{ + for (int i = 0; i < nr; i++) + { + for (int j = 0; j < nc; j++) + { + if (i == j) + { + c[nc * i + j] = std::complex (1.0, 0.0); + } + else + { + c[nc * i + j] = std::complex (0.0, 0.0); + } + } + } + return; } // Adding matrices, as a friend -ComplexMatrix operator+(const ComplexMatrix &m1, const ComplexMatrix &m2) +ComplexMatrix + operator+ (const ComplexMatrix& m1, const ComplexMatrix& m2) { - assert(m1.nr == m2.nr); - assert(m2.nc == m2.nc); - - ComplexMatrix tm(m1); - tm+=m2; - return tm; + assert (m1.nr == m2.nr); + assert (m2.nc == m2.nc); + + ComplexMatrix tm (m1); + tm += m2; + return tm; } // Subtracting matrices, as a friend -ComplexMatrix operator-(const ComplexMatrix &m1, const ComplexMatrix &m2) +ComplexMatrix + operator- (const ComplexMatrix& m1, const ComplexMatrix& m2) { - assert(m1.nr == m2.nr); - assert(m2.nc == m2.nc); - - ComplexMatrix tm(m1); - tm-=m2; - return tm; + assert (m1.nr == m2.nr); + assert (m2.nc == m2.nc); + + ComplexMatrix tm (m1); + tm -= m2; + return tm; } // Multiplying matrices, as a friend // mprod = m1 * m2 -ComplexMatrix operator*(const ComplexMatrix &m1, const ComplexMatrix &m2) +ComplexMatrix + operator* (const ComplexMatrix& m1, const ComplexMatrix& m2) { - assert(m1.nc == m2.nr); - ComplexMatrix mprod(m1.nr, m2.nc); + assert (m1.nc == m2.nr); + ComplexMatrix mprod (m1.nr, m2.nc); // mohan add 2021-04-05 #ifdef __NORMAL - std::complex z; - for (int i = 0;i < m1.nr;i++) - { - for (int j = 0;j < m2.nc;j++) - { - z = std::complex(0,0); - for (int k = 0;k < m1.nc;k++) - { - z += m1(i, k) * m2(k, j); - } - mprod(i, j) = z; - } - } + std::complex z; + for (int i = 0; i < m1.nr; i++) + { + for (int j = 0; j < m2.nc; j++) + { + z = std::complex (0, 0); + for (int k = 0; k < m1.nc; k++) + { + z += m1 (i, k) * m2 (k, j); + } + mprod (i, j) = z; + } + } #else - // Peize Lin accelerate 2017-10-27 - BlasConnector::gemm('N', 'N', m1.nr, m2.nc, m1.nc, - 1, m1.c, m1.nc, m2.c, m2.nc, - 0, mprod.c, mprod.nc); + // Peize Lin accelerate 2017-10-27 + BlasConnector::gemm ('N', 'N', m1.nr, m2.nc, m1.nc, 1, m1.c, m1.nc, m2.c, m2.nc, 0, mprod.c, mprod.nc); #endif - return mprod; + return mprod; } // Scale a ComplexMatrix -ComplexMatrix operator*(const std::complex &c,const ComplexMatrix &m) +ComplexMatrix + operator* (const std::complex& c, const ComplexMatrix& m) { - ComplexMatrix sm(m); - for (int i=0 ;i &c) +ComplexMatrix + operator* (const ComplexMatrix& m, const std::complex& c) { - ComplexMatrix sm(m); - for (int i = 0;i < m.size;i++) sm.c[i] *= c; - return sm; + ComplexMatrix sm (m); + for (int i = 0; i < m.size; i++) + { + sm.c[i] *= c; + } + return sm; } -ComplexMatrix operator*(const double &r,const ComplexMatrix &m) +ComplexMatrix + operator* (const double& r, const ComplexMatrix& m) { - ComplexMatrix sm(m); - for(int i=0; icreate(m.nr, m.nc, false); - memcpy( c, m.c, size*sizeof(std::complex) ); - return *this; + this->create (m.nr, m.nc, false); + memcpy (c, m.c, size * sizeof (std::complex)); + return *this; } // Peize Lin add 2016-08-05 -ComplexMatrix& ComplexMatrix::operator=( ComplexMatrix && m ) +ComplexMatrix& + ComplexMatrix::operator= (ComplexMatrix&& m) { - nr = m.nr; nc = m.nc; size = m.size; - if(c) delete[] c; - c = m.c; - m.nr = m.nc = m.size = 0; - m.c = nullptr; - return *this; + nr = m.nr; + nc = m.nc; + size = m.size; + if (c) + { + delete[] c; + } + c = m.c; + m.nr = m.nc = m.size = 0; + m.c = nullptr; + return *this; } -ComplexMatrix& ComplexMatrix::operator*=(const std::complex &s) +ComplexMatrix& + ComplexMatrix::operator*= (const std::complex& s) { - for (int i = 0;i < this->size;i++) c[i] *= s; - return *this; + for (int i = 0; i < this->size; i++) + { + c[i] *= s; + } + return *this; } // Accumulate to a ComplexMatrix in place -ComplexMatrix& ComplexMatrix::operator+=(const ComplexMatrix &m) +ComplexMatrix& + ComplexMatrix::operator+= (const ComplexMatrix& m) { - for(int i=0; ic[i] += m.c[i]; - return *this; + for (int i = 0; i < size; i++) + { + this->c[i] += m.c[i]; + } + return *this; } // decumulate to a ComplexMatrix in place -ComplexMatrix& ComplexMatrix::operator-=(const ComplexMatrix &m) +ComplexMatrix& + ComplexMatrix::operator-= (const ComplexMatrix& m) { - for(int i=0; ic[i] -= m.c[i]; - return *this; + for (int i = 0; i < size; i++) + { + this->c[i] -= m.c[i]; + } + return *this; } // Peize Lin add 2017-03-29 -matrix ComplexMatrix::real() const +matrix + ComplexMatrix::real () const { - matrix m(nr,nc,false); - for( int i=0; isize; ++i) m.c[i] = c[i].real(); - return m; + matrix m (nr, nc, false); + for (int i = 0; i < this->size; ++i) + { + m.c[i] = c[i].real (); + } + return m; } // Returns trace of ComplexMatrix -std::complex trace(const ComplexMatrix &m) +std::complex + trace (const ComplexMatrix& m) { - std::complex tr=std::complex(0,0); - assert(m.nr == m.nc); - for (int i=0; i tr = std::complex (0, 0); + assert (m.nr == m.nc); + for (int i = 0; i < m.nr; i++) + { + tr += m (i, i); + } + return tr; } // Do mout += s*min -void scale_accumulate(const std::complex &s, - const ComplexMatrix &min, - ComplexMatrix &mout) +void + scale_accumulate (const std::complex& s, const ComplexMatrix& min, ComplexMatrix& mout) { - assert(min.nr == mout.nr); - assert(min.nc == mout.nc); - for (int j=0; j &s, - ComplexMatrix **min, - ComplexMatrix **mout) +void + scale_accumulate (const int& nmat, const std::complex& s, ComplexMatrix** min, ComplexMatrix** mout) { - assert(nmat>=0); - for (int i=0; i= 0); + for (int i = 0; i < nmat; i++) + { + scale_accumulate (s, *min[i], *mout[i]); + } + return; } // Do mout = s1*m1 + s2*m2 -void scaled_sum(const std::complex &s1, - const ComplexMatrix &m1, - const std::complex &s2, - const ComplexMatrix &m2, - ComplexMatrix &mout) +void + scaled_sum (const std::complex& s1, + const ComplexMatrix& m1, + const std::complex& s2, + const ComplexMatrix& m2, + ComplexMatrix& mout) { - assert(m1.nr == m2.nr); - assert(m1.nr == mout.nr); - assert(m1.nc == m2.nc); - assert(m1.nc == mout.nc); + assert (m1.nr == m2.nr); + assert (m1.nr == mout.nr); + assert (m1.nc == m2.nc); + assert (m1.nc == mout.nc); - for(int i=0; i &s1, - ComplexMatrix **m1, - const std::complex &s2, - ComplexMatrix **m2, - ComplexMatrix **mout) -{ - assert(nmat>0); - for(int i=0; i z; - for(int ic=0;ic z; - for(int ir=0;ir& s1, + ComplexMatrix** m1, + const std::complex& s2, + ComplexMatrix** m2, + ComplexMatrix** mout) +{ + assert (nmat > 0); + for (int i = 0; i < nmat; i++) + { + scaled_sum (s1, *m1[i], s2, *m2[i], *mout[i]); + } + return; +} + +double + abs2_row (const ComplexMatrix& m, const int ir) +{ + double r = 0.0; + std::complex z; + for (int ic = 0; ic < m.nc; ic++) + { + z = m.c[m.nc * ir + ic]; + r += z.real () * z.real () + z.imag () * z.imag (); + } + return r; +} + +double + abs2_column (const ComplexMatrix& m, const int ic) +{ + double r = 0.0; + std::complex z; + for (int ir = 0; ir < m.nr; ir++) + { + z = m.c[m.nc * ir + ic]; + r += z.real () * z.real () + z.imag () * z.imag (); + } + return r; } // returns absolute square magnitude of sum of all ComplexMatrix elements -double abs2(const ComplexMatrix &m) +double + abs2 (const ComplexMatrix& m) { - double r=0.0; - std::complex z; + double r = 0.0; + std::complex z; - for (int i = 0;i < m.size;i++) - { - z = m.c[i]; - r += z.real() * z.real() + z.imag() * z.imag(); - } - return r; + for (int i = 0; i < m.size; i++) + { + z = m.c[i]; + r += z.real () * z.real () + z.imag () * z.imag (); + } + return r; } // Same for an array of matrices -double abs2(const int nmat, ComplexMatrix **m) -{ - double r = 0.0; - for (int i = 0;i < nmat;i++) - { - r += abs2(*m[i]); - } - return r; -} - -ComplexMatrix transpose(const ComplexMatrix &m, const bool &conjugate) -{ - ComplexMatrix tm(m.nc, m.nr, false); - if(conjugate) - for (int i = 0;i < m.nr;i++) - for (int j = 0;j < m.nc;j++) - tm(j, i) = conj ( m(i, j) ); - else - for (int i = 0;i < m.nr;i++) - for (int j = 0;j < m.nc;j++) - tm(j, i) = m(i, j); - return tm; -} - -ComplexMatrix conj(const ComplexMatrix &m) -{ - ComplexMatrix cm( m.nr, m.nc, false ); - for(int i=0; i!=m.size; ++i) - cm.c[i] = conj(m.c[i]); - return cm; +double + abs2 (const int nmat, ComplexMatrix** m) +{ + double r = 0.0; + for (int i = 0; i < nmat; i++) + { + r += abs2 (*m[i]); + } + return r; +} + +ComplexMatrix + transpose (const ComplexMatrix& m, const bool& conjugate) +{ + ComplexMatrix tm (m.nc, m.nr, false); + if (conjugate) + { + for (int i = 0; i < m.nr; i++) + { + for (int j = 0; j < m.nc; j++) + { + tm (j, i) = conj (m (i, j)); + } + } + } + else + { + for (int i = 0; i < m.nr; i++) + { + for (int j = 0; j < m.nc; j++) + { + tm (j, i) = m (i, j); + } + } + } + return tm; +} + +ComplexMatrix + conj (const ComplexMatrix& m) +{ + ComplexMatrix cm (m.nr, m.nc, false); + for (int i = 0; i != m.size; ++i) + { + cm.c[i] = conj (m.c[i]); + } + return cm; } // Peize Lin add 2021.09.08 -std::ostream & ComplexMatrix::print( std::ostream & os, const double threshold_abs, const double threshold_imag ) const -{ - for( int ir=0; ir!=this->nr; ++ir ) - { - for( int ic=0; ic!=this->nc; ++ic ) - { - const std::complex & data = (*this)(ir,ic); - if(std::abs(data)>threshold_abs) - { - if(std::abs(std::imag(data))>threshold_imag) - os<nr;i++) - { - for(int j=0;jnc;j++) - { - if(std::imag((*this)(i,j)) > tiny) - { - return 0; - } - } - } - return 1; -} - -} \ No newline at end of file +std::ostream& + ComplexMatrix::print (std::ostream& os, const double threshold_abs, const double threshold_imag) const +{ + for (int ir = 0; ir != this->nr; ++ir) + { + for (int ic = 0; ic != this->nc; ++ic) + { + const std::complex& data = (*this) (ir, ic); + if (std::abs (data) > threshold_abs) + { + if (std::abs (std::imag (data)) > threshold_imag) + { + os << data << "\t"; + } + else + { + os << std::real (data) << "\t"; + } + } + else + { + os << 0 << "\t"; + } + } + os << std::endl; + } + return os; +} + +bool + ComplexMatrix::checkreal () const +{ + const double tiny = 1e-12; + for (int i = 0; i < this->nr; i++) + { + for (int j = 0; j < this->nc; j++) + { + if (std::imag ((*this) (i, j)) > tiny) + { + return false; + } + } + } + return true; +} + +} // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/complexmatrix.h b/source/source_base/complexmatrix.h index df1585e4c18..5d47ccd8edf 100644 --- a/source/source_base/complexmatrix.h +++ b/source/source_base/complexmatrix.h @@ -12,114 +12,114 @@ namespace ModuleBase class ComplexMatrix { -public: - - int nr=0; - int nc=0; - int size=0; - std::complex *c=nullptr; - - ComplexMatrix(): nr(0), nc(0), size(0), c(nullptr){} - ComplexMatrix(const int nrows,const int ncols,const bool flag_zero=true); // Peize Lin add flag_zero 2019-05-13 - ComplexMatrix(const ComplexMatrix &m1); - ComplexMatrix(ComplexMatrix && m1); // Peize Lin add 2016-08-05 - explicit ComplexMatrix(const matrix &m); // Peize Lin add 2017-03-29 - ~ComplexMatrix(); - - void create(const int nrow,const int ncol,const bool flag_zero=true); // Peize Lin add flag_zero 2019-05-13 - ComplexMatrix& operator=(const ComplexMatrix &m); - ComplexMatrix& operator=(ComplexMatrix && m); // Peize Lin add 2016-08-05 - - //============ - // Operators - //============ - std::complex &operator()(const int ir,const int ic) - { - assert(ir>=0); assert(ir=0); assert(ic &operator()(const int ir,const int ic)const - { - assert(ir>=0); assert(ir=0); assert(ic &s); - ComplexMatrix& operator+=(const ComplexMatrix &m); - ComplexMatrix& operator-=(const ComplexMatrix &m); - //return a matrix whose element is the real part of element of the ComplexMatrix. - matrix real() const; // Peize Lin add 2017-03-29 - - //================== - // member function: - //================== - //set all elements to be std::complex {0.0,0.0} - void zero_out(void); - //set to be a unit matrix, - void set_as_identity_matrix(void); - - std::ostream & print( std::ostream & os, const double threshold_abs=0.0, const double threshold_imag=0.0 ) const; // Peize Lin add 2021.09.08 - - // check if all the elements are real - bool checkreal(void) const; - - using type=std::complex; // Peiae Lin add 2022.08.08 for template + public: + int nr = 0; + int nc = 0; + int size = 0; + std::complex* c = nullptr; + + ComplexMatrix () : nr (0), nc (0), size (0), c (nullptr) {} + ComplexMatrix (const int nrows, const int ncols, const bool flag_zero = true); // Peize Lin add flag_zero 2019-05-13 + ComplexMatrix (const ComplexMatrix& m1); + ComplexMatrix (ComplexMatrix&& m1); // Peize Lin add 2016-08-05 + explicit ComplexMatrix (const matrix& m); // Peize Lin add 2017-03-29 + ~ComplexMatrix (); + + void create (const int nrow, const int ncol, const bool flag_zero = true); // Peize Lin add flag_zero 2019-05-13 + ComplexMatrix& operator= (const ComplexMatrix& m); + ComplexMatrix& operator= (ComplexMatrix&& m); // Peize Lin add 2016-08-05 + + //============ + // Operators + //============ + std::complex& + operator() (const int ir, const int ic) + { + assert (ir >= 0); + assert (ir < nr); + assert (ic >= 0); + assert (ic < nc); + return c[ir * nc + ic]; // mohan modify in-line 2007-10-1 + } + const std::complex& + operator() (const int ir, const int ic) const + { + assert (ir >= 0); + assert (ir < nr); + assert (ic >= 0); + assert (ic < nc); + return c[ir * nc + ic]; // mohan modify in-line 2007-10-13 + } + + ComplexMatrix& operator*= (const std::complex& s); + ComplexMatrix& operator+= (const ComplexMatrix& m); + ComplexMatrix& operator-= (const ComplexMatrix& m); + // return a matrix whose element is the real part of element of the ComplexMatrix. + matrix real () const; // Peize Lin add 2017-03-29 + + //================== + // member function: + //================== + // set all elements to be std::complex {0.0,0.0} + void zero_out (); + // set to be a unit matrix, + void set_as_identity_matrix (); + + std::ostream& print (std::ostream& os, + const double threshold_abs = 0.0, + const double threshold_imag = 0.0) const; // Peize Lin add 2021.09.08 + + // check if all the elements are real + bool checkreal () const; + + using type = std::complex; // Peiae Lin add 2022.08.08 for template }; -ComplexMatrix operator+(const ComplexMatrix &m1, const ComplexMatrix &m2); -ComplexMatrix operator-(const ComplexMatrix &m1, const ComplexMatrix &m2); -ComplexMatrix operator*(const ComplexMatrix &m1, const ComplexMatrix &m2); -ComplexMatrix operator*(const std::complex &s, const ComplexMatrix &m); -ComplexMatrix operator*(const ComplexMatrix &m, const std::complex &s); -ComplexMatrix operator*(const double &s, const ComplexMatrix &m); -ComplexMatrix operator*(const ComplexMatrix &m, const double &s); +ComplexMatrix operator+ (const ComplexMatrix& m1, const ComplexMatrix& m2); +ComplexMatrix operator- (const ComplexMatrix& m1, const ComplexMatrix& m2); +ComplexMatrix operator* (const ComplexMatrix& m1, const ComplexMatrix& m2); +ComplexMatrix operator* (const std::complex& s, const ComplexMatrix& m); +ComplexMatrix operator* (const ComplexMatrix& m, const std::complex& s); +ComplexMatrix operator* (const double& s, const ComplexMatrix& m); +ComplexMatrix operator* (const ComplexMatrix& m, const double& s); -//calculate the trace -std::complex trace(const ComplexMatrix &m); +// calculate the trace +std::complex trace (const ComplexMatrix& m); -//calculate the sum of the square of the modulus of the elements in ir row. -double abs2_row(const ComplexMatrix &m,const int ir); // mohan add 2008-7-1 +// calculate the sum of the square of the modulus of the elements in ir row. +double abs2_row (const ComplexMatrix& m, const int ir); // mohan add 2008-7-1 -//calculate the sum of the square of the modulus of the elements in ic-th column. -double abs2_column(const ComplexMatrix &m,const int ic); // mohan add 2008-7-1 +// calculate the sum of the square of the modulus of the elements in ic-th column. +double abs2_column (const ComplexMatrix& m, const int ic); // mohan add 2008-7-1 // calculate the sum of the square of the modulus of all elements. -double abs2(const ComplexMatrix &m); +double abs2 (const ComplexMatrix& m); // calculate the sum of the square of the modulus of all elements of an array of ComplexMatrix. -double abs2(const int nmat, ComplexMatrix **m); +double abs2 (const int nmat, ComplexMatrix** m); -ComplexMatrix transpose(const ComplexMatrix &m, const bool &conjugate); -ComplexMatrix conj(const ComplexMatrix &m); // Peize Lin add 2019-05-13 +ComplexMatrix transpose (const ComplexMatrix& m, const bool& conjugate); +ComplexMatrix conj (const ComplexMatrix& m); // Peize Lin add 2019-05-13 -//do mout += s*min -void scale_accumulate( - const std::complex &s, - const ComplexMatrix &min, - ComplexMatrix &mout); +// do mout += s*min +void scale_accumulate (const std::complex& s, const ComplexMatrix& min, ComplexMatrix& mout); -//do (*mout[i]) += s * (*min[i]); int i &s, - ComplexMatrix **min, - ComplexMatrix **mout); +// do (*mout[i]) += s * (*min[i]); int i& s, ComplexMatrix** min, ComplexMatrix** mout); // Do mout = s1*m1 + s2*m2 -void scaled_sum( - const std::complex &s1, - const ComplexMatrix &m1, - const std::complex &s2, - const ComplexMatrix &m2, - ComplexMatrix &mout); +void scaled_sum (const std::complex& s1, + const ComplexMatrix& m1, + const std::complex& s2, + const ComplexMatrix& m2, + ComplexMatrix& mout); // Do (*mout[i]) = s1 * (*m1[i]) + s2 * (*m2[i]) -void scaled_sum( - const int &nmat, - const std::complex &s1, - ComplexMatrix **m1, - const std::complex &s2, - ComplexMatrix **m2, - ComplexMatrix **mout); -} +void scaled_sum (const int& nmat, + const std::complex& s1, + ComplexMatrix** m1, + const std::complex& s2, + ComplexMatrix** m2, + ComplexMatrix** mout); +} // namespace ModuleBase #endif diff --git a/source/source_base/constants.h b/source/source_base/constants.h index 2b8623d1cd6..9e604fcb8ae 100644 --- a/source/source_base/constants.h +++ b/source/source_base/constants.h @@ -3,7 +3,7 @@ #include //========================================================== -// GLOBAL CONSTANTS +// GLOBAL CONSTANTS //========================================================== //========================================================== @@ -16,85 +16,85 @@ //========================================================== namespace ModuleBase { -const double PI = 3.14159265358979323846; -const double PI_HALF = PI / 2.0; -const double TWO_PI = 2 * PI; -const double FOUR_PI = 4.0 * 3.14159265358979323846; -//const double SQRT_PI = 1.77245385090551602729; -//const double INVERSE_SQRT_PI = 1.0 / SQRT_PI; -const double INVERSE_FOUR_PI = 1.0/FOUR_PI; -const double SQRT_INVERSE_FOUR_PI = sqrt(INVERSE_FOUR_PI); +const double PI = 3.14159265358979323846; +const double PI_HALF = PI / 2.0; +const double TWO_PI = 2 * PI; +const double FOUR_PI = 4.0 * 3.14159265358979323846; +// const double SQRT_PI = 1.77245385090551602729; +// const double INVERSE_SQRT_PI = 1.0 / SQRT_PI; +const double INVERSE_FOUR_PI = 1.0 / FOUR_PI; +const double SQRT_INVERSE_FOUR_PI = sqrt (INVERSE_FOUR_PI); const double SQRT2 = 1.41421356237309504880; -//const double SQRT3 = 1.73205080756887729352; +// const double SQRT3 = 1.73205080756887729352; //========================================================== // EXPLAIN : std::complex constants //========================================================== -const std::complex ZERO(0.0, 0.0); -const std::complex ONE(1.0, 0.0); -const std::complex NEG_ONE(-1.0, 0.0); -const std::complex IMAG_UNIT(0.0,1.0); -const std::complex NEG_IMAG_UNIT(0.0,-1.0); +const std::complex ZERO (0.0, 0.0); +const std::complex ONE (1.0, 0.0); +const std::complex NEG_ONE (-1.0, 0.0); +const std::complex IMAG_UNIT (0.0, 1.0); +const std::complex NEG_IMAG_UNIT (0.0, -1.0); //========================================================== // EXPLAIN : physical constants //========================================================== -const double K_BOLTZMAN_SI = 1.3806504e-23;// J K^-1 -const double K_BOLTZMAN_AU = 3.1667e-6;// Hartree K^-1 -const double Hartree_to_K = 3.1577464e5; // Hartree to K -//const double K_BOLTZMAN_RY = 6.3335e-6;// Rydberg K^-1; mohan add 2010-09-03 -//const double K_BOLTZMAN_EV = 8.6173e-5; // eV; mohan add 2010-09-03 -//const double K_BOLTZMAN_M1_AU = 315795.260;// Hartree^-1 K -//const double FACTEM = 315795.260;// 27.212d0*11605.d0 Hartree^-1 K +const double K_BOLTZMAN_SI = 1.3806504e-23; // J K^-1 +const double K_BOLTZMAN_AU = 3.1667e-6; // Hartree K^-1 +const double Hartree_to_K = 3.1577464e5; // Hartree to K +// const double K_BOLTZMAN_RY = 6.3335e-6;// Rydberg K^-1; mohan add 2010-09-03 +// const double K_BOLTZMAN_EV = 8.6173e-5; // eV; mohan add 2010-09-03 +// const double K_BOLTZMAN_M1_AU = 315795.260;// Hartree^-1 K +// const double FACTEM = 315795.260;// 27.212d0*11605.d0 Hartree^-1 K //========================================================== -// EXPLAIN : physical constants define the Atomic Units +// EXPLAIN : physical constants define the Atomic Units //========================================================== -const double BOHR_RADIUS_SI = 0.529177e-10; // m -//const double BOHR_RADIUS_CM = 0.529177e-8; // cm -const double BOHR_TO_A = 0.5291770; // angstrom -//const double ELECTRONMASS_SI = 9.10953e-31; // kg -//const double ELECTRONMASS_UMA = 5.4858e-4; // uma +const double BOHR_RADIUS_SI = 0.529177e-10; // m +// const double BOHR_RADIUS_CM = 0.529177e-8; // cm +const double BOHR_TO_A = 0.5291770; // angstrom +// const double ELECTRONMASS_SI = 9.10953e-31; // kg +// const double ELECTRONMASS_UMA = 5.4858e-4; // uma //========================================================== // EXPLAIN : units conversion factors //========================================================== -const double ELECTRONVOLT_SI = 1.6021892e-19; // J -//const double UMA_SI = 1.66057e-27; // Kg -const double ANGSTROM_AU = 1.8897270; // au -//const double AU_TO_OHMCMM1 = 46000.00; // (ohm cm)^-1 -//const double AU_KB = 294210.00; // Kbar -//const double KB_AU = 1.00 / 294210.00;// au -//const double AU_GPA = 29421.00; // GPa -//const double GPA_AU = 1.00 / 29421.00;// au -//const double SCMASS = 1822.890; // uma to au ( mass of a proton ) -//const double UMA_AU = 1822.890; // au -//const double AU_TERAHERTZ = 2.418e-5; // THz -//const double TERAHERTZ = 2.418e-5; // from au to THz -//const double AU_SEC = 2.4189e-17; // sec -const double AU_to_FS = 2.418884326505e-2; // from a.u. to fs -//const double rhothr = 1.0e-5; // tolerance -//const double gsmall = 1.0e-12; -const double e2 = 2.0; // the square of the electron charge -const double DEGSPIN = 2.0; // the number of spins per level -const double Hartree_to_eV = 27.211396;// slcbb // 27.21138344; // eV -const double Ry_to_eV = 13.605698; // 13.60569172; // conversion from Ry to eV -//const double eV_to_kelvin = 1.16044; // from ev to Kelvin -const double NA = 6.02214129e23; // mol +const double ELECTRONVOLT_SI = 1.6021892e-19; // J +// const double UMA_SI = 1.66057e-27; // Kg +const double ANGSTROM_AU = 1.8897270; // au +// const double AU_TO_OHMCMM1 = 46000.00; // (ohm cm)^-1 +// const double AU_KB = 294210.00; // Kbar +// const double KB_AU = 1.00 / 294210.00;// au +// const double AU_GPA = 29421.00; // GPa +// const double GPA_AU = 1.00 / 29421.00;// au +// const double SCMASS = 1822.890; // uma to au ( mass of a proton ) +// const double UMA_AU = 1822.890; // au +// const double AU_TERAHERTZ = 2.418e-5; // THz +// const double TERAHERTZ = 2.418e-5; // from au to THz +// const double AU_SEC = 2.4189e-17; // sec +const double AU_to_FS = 2.418884326505e-2; // from a.u. to fs +// const double rhothr = 1.0e-5; // tolerance +// const double gsmall = 1.0e-12; +const double e2 = 2.0; // the square of the electron charge +const double DEGSPIN = 2.0; // the number of spins per level +const double Hartree_to_eV = 27.211396; // slcbb // 27.21138344; // eV +const double Ry_to_eV = 13.605698; // 13.60569172; // conversion from Ry to eV +// const double eV_to_kelvin = 1.16044; // from ev to Kelvin +const double NA = 6.02214129e23; // mol const double EMASS_SI = 9.1093826e-31; // mass of electron (kg) -const double AU_to_MASS = NA*EMASS_SI*1e3; // mass a.u. to g/mol +const double AU_to_MASS = NA * EMASS_SI * 1e3; // mass a.u. to g/mol -const double HARTREE_SI = 4.35974394e-18; //J -const double RYDBERG_SI = HARTREE_SI/2.0; //J -//const double RY_TO_KELVIN = RYDBERG_SI / K_BOLTZMAN_SI; +const double HARTREE_SI = 4.35974394e-18; // J +const double RYDBERG_SI = HARTREE_SI / 2.0; // J +// const double RY_TO_KELVIN = RYDBERG_SI / K_BOLTZMAN_SI; -//const double AMCONV = 1.660538782e-27 / 9.10938215e-31 * 0.50; // mass conversion: a.m.u to a.u. (Ry) +// const double AMCONV = 1.660538782e-27 / 9.10938215e-31 * 0.50; // mass conversion: a.m.u to a.u. (Ry) -//const double uakbar = 147105.0; // pressure conversion from Ry/(a.u)^3 to K +// const double uakbar = 147105.0; // pressure conversion from Ry/(a.u)^3 to K // zero up to a given accuracy -//const double epsr = 1.0e-6; -const double threshold_wg = 1.0e-10; -} +// const double epsr = 1.0e-6; +const double threshold_wg = 1.0e-10; +} // namespace ModuleBase -#endif +#endif diff --git a/source/source_base/container_operator.h b/source/source_base/container_operator.h index ec1f3fbc237..f49fc3dce87 100644 --- a/source/source_base/container_operator.h +++ b/source/source_base/container_operator.h @@ -5,62 +5,80 @@ #include #include -template< typename T> -std::vector operator + ( const std::vector & x1, const std::vector & x2 ) +template +std::vector + operator+ (const std::vector& x1, const std::vector& x2) { - assert(x1.size()==x2.size()); - std::vector x; - for(std::size_t i=0; i!=x1.size(); ++i ) - x.push_back(x1[i]+x2[i]); - return x; + assert (x1.size () == x2.size ()); + std::vector x; + for (std::size_t i = 0; i != x1.size (); ++i) + { + x.push_back (x1[i] + x2[i]); + } + return x; } -template< typename T> -std::vector operator - ( const std::vector & x1, const std::vector & x2 ) +template +std::vector + operator- (const std::vector& x1, const std::vector& x2) { - assert(x1.size()==x2.size()); - std::vector x; - for(std::size_t i=0; i!=x1.size(); ++i ) - x.push_back(x1[i]-x2[i]); - return x; + assert (x1.size () == x2.size ()); + std::vector x; + for (std::size_t i = 0; i != x1.size (); ++i) + { + x.push_back (x1[i] - x2[i]); + } + return x; } -template< typename T1, typename T2 > -std::map operator + ( const std::map & x1, const std::map & x2 ) +template +std::map + operator+ (const std::map& x1, const std::map& x2) { - assert(x1.size()==x2.size()); - std::map x; - for( const auto &x1i : x1 ) - x.insert(std::make_pair( x1i.first, x1i.second + x2.at(x1i.first) )); - return x; + assert (x1.size () == x2.size ()); + std::map x; + for (const auto& x1i: x1) + { + x.insert (std::make_pair (x1i.first, x1i.second + x2.at (x1i.first))); + } + return x; } -template< typename T1, typename T2 > -std::map operator - ( const std::map & x1, const std::map & x2 ) +template +std::map + operator- (const std::map& x1, const std::map& x2) { - assert(x1.size()==x2.size()); - std::map x; - for( const auto &x1i : x1 ) - x.insert(std::make_pair( x1i.first, x1i.second - x2.at(x1i.first) )); - return x; + assert (x1.size () == x2.size ()); + std::map x; + for (const auto& x1i: x1) + { + x.insert (std::make_pair (x1i.first, x1i.second - x2.at (x1i.first))); + } + return x; } -template< typename T1, typename T2 > -std::vector operator * ( const T1 & x1, const std::vector & x2 ) +template +std::vector + operator* (const T1& x1, const std::vector& x2) { - std::vector x; - for(std::size_t i=0; i!=x2.size(); ++i ) - x.push_back(x1*x2[i]); - return x; + std::vector x; + for (std::size_t i = 0; i != x2.size (); ++i) + { + x.push_back (x1 * x2[i]); + } + return x; } -template< typename T1, typename T21, typename T22 > -std::map operator * ( const T1 & x1, const std::map & x2 ) +template +std::map + operator* (const T1& x1, const std::map& x2) { - std::map x; - for( const auto & x2i : x2 ) - x.insert(std::make_pair( x2i.first, x1*x2i.second )); - return x; + std::map x; + for (const auto& x2i: x2) + { + x.insert (std::make_pair (x2i.first, x1 * x2i.second)); + } + return x; } -#endif // CONTAINER_OPERATOR_H +#endif // CONTAINER_OPERATOR_H diff --git a/source/source_base/cubic_spline.cpp b/source/source_base/cubic_spline.cpp index cfd9f57943b..fc29d6c7df1 100644 --- a/source/source_base/cubic_spline.cpp +++ b/source/source_base/cubic_spline.cpp @@ -11,138 +11,117 @@ using ModuleBase::CubicSpline; extern "C" { // solve a tridiagonal linear system - void dgtsv_(int* N, int* NRHS, double* DL, double* D, double* DU, double* B, int* LDB, int* INFO); + void dgtsv_ (int* N, int* NRHS, double* DL, double* D, double* DU, double* B, int* LDB, int* INFO); }; - -CubicSpline::BoundaryCondition::BoundaryCondition(BoundaryType type) - : type(type) +CubicSpline::BoundaryCondition::BoundaryCondition (BoundaryType type) : type (type) { - assert(type == BoundaryType::periodic || type == BoundaryType::not_a_knot); + assert (type == BoundaryType::periodic || type == BoundaryType::not_a_knot); } - -CubicSpline::BoundaryCondition::BoundaryCondition(BoundaryType type, double val) - : type(type), val(val) +CubicSpline::BoundaryCondition::BoundaryCondition (BoundaryType type, double val) : type (type), val (val) { - assert(type == BoundaryType::first_deriv || type == BoundaryType::second_deriv); + assert (type == BoundaryType::first_deriv || type == BoundaryType::second_deriv); } - -CubicSpline::CubicSpline( - int n, - const double* x, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end -): n_spline_(1), n_(n), xmin_(x[0]), xmax_(x[n - 1]), x_(x, x + n), y_(2 * n) +CubicSpline::CubicSpline (int n, + const double* x, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end) + : n_spline_ (1), n_ (n), xmin_ (x[0]), xmax_ (x[n - 1]), x_ (x, x + n), y_ (2 * n) { - std::copy(y, y + n, y_.begin()); - build(n, x, y, bc_start, bc_end, &y_[n]); + std::copy (y, y + n, y_.begin ()); + build (n, x, y, bc_start, bc_end, &y_[n]); } - -CubicSpline::CubicSpline( - int n, - double x0, - double dx, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end -): n_spline_(1), n_(n), xmin_(x0), xmax_(x0 + (n - 1) * dx), dx_(dx), y_(2 * n) +CubicSpline::CubicSpline (int n, + double x0, + double dx, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end) + : n_spline_ (1), n_ (n), xmin_ (x0), xmax_ (x0 + (n - 1) * dx), dx_ (dx), y_ (2 * n) { - std::copy(y, y + n, y_.begin()); - build(n, dx, y, bc_start, bc_end, &y_[n]); + std::copy (y, y + n, y_.begin ()); + build (n, dx, y, bc_start, bc_end, &y_[n]); } - -CubicSpline::CubicSpline(int n, const double* x) - : n_spline_(0), n_(n), xmin_(x[0]), xmax_(x[n - 1]), x_(x, x + n) +CubicSpline::CubicSpline (int n, const double* x) : n_spline_ (0), n_ (n), xmin_ (x[0]), xmax_ (x[n - 1]), x_ (x, x + n) { } - -CubicSpline::CubicSpline(int n, double x0, double dx) - : n_spline_(0), n_(n), xmin_(x0), xmax_(x0 + (n - 1) * dx), dx_(dx) +CubicSpline::CubicSpline (int n, double x0, double dx) + : n_spline_ (0), n_ (n), xmin_ (x0), xmax_ (x0 + (n - 1) * dx), dx_ (dx) { } - -void CubicSpline::add( - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end -) +void + CubicSpline::add (const double* y, const BoundaryCondition& bc_start, const BoundaryCondition& bc_end) { int offset = n_spline_ * 2 * n_; - y_.resize(offset + 2 * n_); + y_.resize (offset + 2 * n_); - std::copy(y, y + n_, &y_[offset]); + std::copy (y, y + n_, &y_[offset]); double* dy = &y_[offset + n_]; - if (x_.empty()) // evenly spaced knots - { - build(n_, dx_, y, bc_start, bc_end, dy); - } + if (x_.empty ()) // evenly spaced knots + { + build (n_, dx_, y, bc_start, bc_end, dy); + } else - { - build(n_, x_.data(), y, bc_start, bc_end, dy); - } + { + build (n_, x_.data (), y, bc_start, bc_end, dy); + } ++n_spline_; } - -void CubicSpline::eval( - int n_interp, - const double* x_interp, - double* y_interp, - double* dy_interp, - double* d2y_interp, - int i_spline -) const +void + CubicSpline::eval (int n_interp, + const double* x_interp, + double* y_interp, + double* dy_interp, + double* d2y_interp, + int i_spline) const { - assert(0 <= i_spline && i_spline < n_spline_); + assert (0 <= i_spline && i_spline < n_spline_); const double* y = &y_[i_spline * 2 * n_]; const double* dy = y + n_; - if (x_.empty()) // evenly spaced knots - { - eval(n_, xmin_, dx_, y, dy, n_interp, x_interp, y_interp, dy_interp, d2y_interp); - } + if (x_.empty ()) // evenly spaced knots + { + eval (n_, xmin_, dx_, y, dy, n_interp, x_interp, y_interp, dy_interp, d2y_interp); + } else - { - eval(n_, x_.data(), y, dy, n_interp, x_interp, y_interp, dy_interp, d2y_interp); - } + { + eval (n_, x_.data (), y, dy, n_interp, x_interp, y_interp, dy_interp, d2y_interp); + } } - -void CubicSpline::multi_eval( - int n_spline, - const int* i_spline, - double x_interp, - double* y_interp, - double* dy_interp, - double* d2y_interp -) const +void + CubicSpline::multi_eval (int n_spline, + const int* i_spline, + double x_interp, + double* y_interp, + double* dy_interp, + double* d2y_interp) const { - assert(std::all_of(i_spline, i_spline + n_spline, - [this](int i) { return 0 <= i && i < n_spline_; })); - _validate_eval(n_, {xmin_, dx_}, x_.empty() ? nullptr : x_.data(), - y_.data(), &y_[n_], 1, &x_interp); + assert (std::all_of (i_spline, i_spline + n_spline, [this] (int i) { return 0 <= i && i < n_spline_; })); + _validate_eval (n_, {xmin_, dx_}, x_.empty () ? nullptr : x_.data (), y_.data (), &y_[n_], 1, &x_interp); int p = 0; - double dx = 0.0, r = 0.0; - if (x_.empty()) // evenly spaced knots - { - p = _index(n_, xmin_, dx_, x_interp); - dx = dx_; - r = (x_interp - xmin_) / dx - p; - } + double dx = 0.0, r = 0.0; + if (x_.empty ()) // evenly spaced knots + { + p = _index (n_, xmin_, dx_, x_interp); + dx = dx_; + r = (x_interp - xmin_) / dx - p; + } else - { - p = _index(n_, x_.data(), x_interp); - dx = x_[p + 1] - x_[p]; - r = (x_interp - x_[p]) / dx; - } + { + p = _index (n_, x_.data (), x_interp); + dx = x_[p + 1] - x_[p]; + r = (x_interp - x_[p]) / dx; + } const double r2 = r * r; const double r3 = r2 * r; @@ -150,154 +129,142 @@ void CubicSpline::multi_eval( int offset = 0; if (y_interp) - { - wy1 = 3.0 * r2 - 2.0 * r3; - wy0 = 1.0 - wy1; - ws0 = (r - 2.0 * r2 + r3) * dx; - ws1 = (r3 - r2) * dx; - for (int i = 0; i < n_spline; ++i) { - offset = i_spline[i] * 2 * n_ + p; - y_interp[i] = wy0 * y_[offset] + wy1 * y_[offset + 1] - + ws0 * y_[offset + n_] + ws1 * y_[offset + n_ + 1]; + wy1 = 3.0 * r2 - 2.0 * r3; + wy0 = 1.0 - wy1; + ws0 = (r - 2.0 * r2 + r3) * dx; + ws1 = (r3 - r2) * dx; + for (int i = 0; i < n_spline; ++i) + { + offset = i_spline[i] * 2 * n_ + p; + y_interp[i] + = wy0 * y_[offset] + wy1 * y_[offset + 1] + ws0 * y_[offset + n_] + ws1 * y_[offset + n_ + 1]; + } } - } if (dy_interp) - { - wy1 = 6.0 * (r - r2) / dx; // wy0 = -wy1 - ws0 = 3.0 * r2 - 4.0 * r + 1.0; - ws1 = 3.0 * r2 - 2.0 * r; - for (int i = 0; i < n_spline; ++i) { - offset = i_spline[i] * 2 * n_ + p; - dy_interp[i] = wy1 * (y_[offset + 1] - y_[offset]) - + ws0 * y_[offset + n_] + ws1 * y_[offset + n_ + 1]; + wy1 = 6.0 * (r - r2) / dx; // wy0 = -wy1 + ws0 = 3.0 * r2 - 4.0 * r + 1.0; + ws1 = 3.0 * r2 - 2.0 * r; + for (int i = 0; i < n_spline; ++i) + { + offset = i_spline[i] * 2 * n_ + p; + dy_interp[i] + = wy1 * (y_[offset + 1] - y_[offset]) + ws0 * y_[offset + n_] + ws1 * y_[offset + n_ + 1]; + } } - } if (d2y_interp) - { - wy1 = (6.0 - 12.0 * r) / (dx * dx); // wy0 = -wy1 - ws0 = (6.0 * r - 4.0) / dx; - ws1 = (6.0 * r - 2.0) / dx; - for (int i = 0; i < n_spline; ++i) { - offset = i_spline[i] * 2 * n_ + p; - d2y_interp[i] = wy1 * (y_[offset + 1] - y_[offset]) - + ws0 * y_[offset + n_] + ws1 * y_[offset + n_ + 1]; + wy1 = (6.0 - 12.0 * r) / (dx * dx); // wy0 = -wy1 + ws0 = (6.0 * r - 4.0) / dx; + ws1 = (6.0 * r - 2.0) / dx; + for (int i = 0; i < n_spline; ++i) + { + offset = i_spline[i] * 2 * n_ + p; + d2y_interp[i] + = wy1 * (y_[offset + 1] - y_[offset]) + ws0 * y_[offset + n_] + ws1 * y_[offset + n_ + 1]; + } } - } } - -void CubicSpline::multi_eval( - double x, - double* y, - double* dy, - double* d2y -) const +void + CubicSpline::multi_eval (double x, double* y, double* dy, double* d2y) const { - std::vector i_spline(n_spline_); - std::iota(i_spline.begin(), i_spline.end(), 0); - multi_eval(i_spline.size(), i_spline.data(), x, y, dy, d2y); + std::vector i_spline (n_spline_); + std::iota (i_spline.begin (), i_spline.end (), 0); + multi_eval (i_spline.size (), i_spline.data (), x, y, dy, d2y); } - -void CubicSpline::build( - int n, - const double* x, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end, - double* dy -) +void + CubicSpline::build (int n, + const double* x, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end, + double* dy) { - std::vector dx(n); - std::adjacent_difference(x, x + n, dx.begin()); - _build(n, &dx[1], y, bc_start, bc_end, dy); + std::vector dx (n); + std::adjacent_difference (x, x + n, dx.begin ()); + _build (n, &dx[1], y, bc_start, bc_end, dy); } - -void CubicSpline::build( - int n, - double dx, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end, - double* dy -) +void + CubicSpline::build (int n, + double dx, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end, + double* dy) { - std::vector dx_(n - 1, dx); - _build(n, dx_.data(), y, bc_start, bc_end, dy); + std::vector dx_ (n - 1, dx); + _build (n, dx_.data (), y, bc_start, bc_end, dy); } - -void CubicSpline::eval( - int n, - const double* x, - const double* y, - const double* dy, - int n_interp, - const double* x_interp, - double* y_interp, - double* dy_interp, - double* d2y_interp -) +void + CubicSpline::eval (int n, + const double* x, + const double* y, + const double* dy, + int n_interp, + const double* x_interp, + double* y_interp, + double* dy_interp, + double* d2y_interp) { - _validate_eval(n, {}, x, y, dy, n_interp, x_interp); + _validate_eval (n, {}, x, y, dy, n_interp, x_interp); // indices of the polynomial segments that contain x_interp - std::vector _ind(n_interp); - std::transform(x_interp, x_interp + n_interp, _ind.begin(), - [n, x](double x_i) { return _index(n, x, x_i); }); + std::vector _ind (n_interp); + std::transform (x_interp, x_interp + n_interp, _ind.begin (), [n, x] (double x_i) { return _index (n, x, x_i); }); - std::vector buffer(n_interp * 5); - double* _w = buffer.data(); + std::vector buffer (n_interp * 5); + double* _w = buffer.data (); double* _c0 = _w + n_interp; double* _c1 = _c0 + n_interp; double* _c2 = _c1 + n_interp; double* _c3 = _c2 + n_interp; for (int i = 0; i < n_interp; ++i) - { - int p = _ind[i]; - double dx = x[p + 1] - x[p]; - double inv_dx = 1.0 / dx; - double dd = (y[p + 1] - y[p]) * inv_dx; - _w[i] = x_interp[i] - x[p]; - _c0[i] = y[p]; - _c1[i] = dy[p]; - _c3[i] = (_c1[i] + dy[p + 1] - 2.0 * dd) * inv_dx * inv_dx; - _c2[i] = (dd - _c1[i]) * inv_dx - _c3[i] * dx; - } - - _cubic(n_interp, _w, _c0, _c1, _c2, _c3, y_interp, dy_interp, d2y_interp); -} + { + int p = _ind[i]; + double dx = x[p + 1] - x[p]; + double inv_dx = 1.0 / dx; + double dd = (y[p + 1] - y[p]) * inv_dx; + _w[i] = x_interp[i] - x[p]; + _c0[i] = y[p]; + _c1[i] = dy[p]; + _c3[i] = (_c1[i] + dy[p + 1] - 2.0 * dd) * inv_dx * inv_dx; + _c2[i] = (dd - _c1[i]) * inv_dx - _c3[i] * dx; + } + _cubic (n_interp, _w, _c0, _c1, _c2, _c3, y_interp, dy_interp, d2y_interp); +} -void CubicSpline::eval( - int n, - double x0, - double dx, - const double* y, - const double* dy, - int n_interp, - const double* x_interp, - double* y_interp, - double* dy_interp, - double* d2y_interp -) +void + CubicSpline::eval (int n, + double x0, + double dx, + const double* y, + const double* dy, + int n_interp, + const double* x_interp, + double* y_interp, + double* dy_interp, + double* d2y_interp) { - _validate_eval(n, {x0, dx}, nullptr, y, dy, n_interp, x_interp); + _validate_eval (n, {x0, dx}, nullptr, y, dy, n_interp, x_interp); // indices of the polynomial segments that contain x_interp - std::vector _ind(n_interp); - std::transform(x_interp, x_interp + n_interp, _ind.begin(), - [n, x0, dx](double x_i) { return _index(n, x0, dx, x_i); }); - - std::vector buffer(n_interp * 5); - double* _w = buffer.data(); + std::vector _ind (n_interp); + std::transform (x_interp, + x_interp + n_interp, + _ind.begin (), + [n, x0, dx] (double x_i) { return _index (n, x0, dx, x_i); }); + + std::vector buffer (n_interp * 5); + double* _w = buffer.data (); double* _c0 = _w + n_interp; double* _c1 = _c0 + n_interp; double* _c2 = _c1 + n_interp; @@ -306,245 +273,235 @@ void CubicSpline::eval( double inv_dx = 1.0 / dx; double inv_dx2 = inv_dx * inv_dx; for (int i = 0; i < n_interp; ++i) - { - int p = _ind[i]; - double dd = (y[p + 1] - y[p]) * inv_dx; - _w[i] = x_interp[i] - x0 - p * dx; - _c0[i] = y[p]; - _c1[i] = dy[p]; - _c3[i] = (_c1[i] + dy[p + 1] - 2.0 * dd) * inv_dx2; - _c2[i] = (dd - _c1[i]) * inv_dx - _c3[i] * dx; - } - - _cubic(n_interp, _w, _c0, _c1, _c2, _c3, y_interp, dy_interp, d2y_interp); -} + { + int p = _ind[i]; + double dd = (y[p + 1] - y[p]) * inv_dx; + _w[i] = x_interp[i] - x0 - p * dx; + _c0[i] = y[p]; + _c1[i] = dy[p]; + _c3[i] = (_c1[i] + dy[p + 1] - 2.0 * dd) * inv_dx2; + _c2[i] = (dd - _c1[i]) * inv_dx - _c3[i] * dx; + } + _cubic (n_interp, _w, _c0, _c1, _c2, _c3, y_interp, dy_interp, d2y_interp); +} -void CubicSpline::_validate_build( - int n, - const double* dx, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end -) +void + CubicSpline::_validate_build (int n, + const double* dx, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end) { - assert(n > 1); + assert (n > 1); // if periodic boundary condition is specified, it must be applied to both ends - assert((bc_start.type == BoundaryType::periodic) - == (bc_end.type == BoundaryType::periodic)); + assert ((bc_start.type == BoundaryType::periodic) == (bc_end.type == BoundaryType::periodic)); // y[0] must equal y[n-1] for periodic boundary condition - assert(bc_start.type != BoundaryType::periodic || y[0] == y[n - 1]); + assert (bc_start.type != BoundaryType::periodic || y[0] == y[n - 1]); // not-a-knot boundary condition requires the existence of "internal" knot // so n must be at least 3 - assert((bc_start.type != BoundaryType::not_a_knot && - bc_end.type != BoundaryType::not_a_knot) || n > 2); + assert ((bc_start.type != BoundaryType::not_a_knot && bc_end.type != BoundaryType::not_a_knot) || n > 2); // knots must be strictly increasing - assert(std::all_of(dx, dx + n - 1, [](double d) { return d > 0.0; })); + assert (std::all_of (dx, dx + n - 1, [] (double d) { return d > 0.0; })); } - -void CubicSpline::_validate_eval( - int n, - const double (&u)[2], - const double* x, - const double* y, - const double* dy, - int n_interp, - const double* x_interp -) +void + CubicSpline::_validate_eval (int n, + const double (&u)[2], + const double* x, + const double* y, + const double* dy, + int n_interp, + const double* x_interp) { - assert(n > 1 && y && dy); - assert((x && std::is_sorted(x, x + n, std::less_equal())) || u[1] > 0.0); + assert (n > 1 && y && dy); + assert ((x && std::is_sorted (x, x + n, std::less_equal ())) || u[1] > 0.0); - assert((n_interp > 0 && x_interp) || n_interp == 0); + assert ((n_interp > 0 && x_interp) || n_interp == 0); double xmin = x ? x[0] : u[0]; double xmax = x ? x[n - 1] : u[0] + (n - 1) * u[1]; - assert(std::all_of(x_interp, x_interp + n_interp, - [xmin, xmax](double x_i) { return xmin <= x_i && x_i <= xmax; })); + assert ( + std::all_of (x_interp, x_interp + n_interp, [xmin, xmax] (double x_i) { return xmin <= x_i && x_i <= xmax; })); } - -void CubicSpline::_build( - int n, - const double* dx, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end, - double* dy -) +void + CubicSpline::_build (int n, + const double* dx, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end, + double* dy) { - _validate_build(n, dx, y, bc_start, bc_end); + _validate_build (n, dx, y, bc_start, bc_end); if (n == 2 && bc_start.type == BoundaryType::periodic) - { - dy[0] = dy[1] = 0.0; // the only possible solution: constant - } - else if (n == 3 && bc_start.type == BoundaryType::not_a_knot - && bc_end.type == BoundaryType::not_a_knot) - { - // in this case two conditions coincide - // simply build a parabola that passes through the three data points - double dd01 = (y[1] - y[0]) / dx[0]; // divided difference f[x0,x1] - double dd12 = (y[2] - y[1]) / dx[1]; // f[x1,x2] - double dd012 = (dd12 - dd01) / (dx[0] + dx[1]); // f[x0,x1,x2] - - dy[0] = dd01 - dd012 * dx[0]; - dy[1] = 2.0 * dd01 - dy[0]; - dy[2] = dd01 + dd012 * (dx[0] + 2.0 * dx[1]); - } - else - { - std::vector buffer(4 * n); - - double* dd = buffer.data(); // divided differences - std::adjacent_difference(y, y + n, dd); - dd += 1; // the first element computed by adjacent_difference is not a difference - std::transform(dd, dd + n - 1, dx, dd, std::divides()); - - // tridiagonal linear system (cyclic tridiagonal if periodic boundary condition) - double* d = buffer.data() + n; // main diagonal - double* l = buffer.data() + 2 * n; // subdiagonal - double* u = buffer.data() + 3 * n; // superdiagonal - - //*********************************************** - // common part of the tridiagonal linear system - //*********************************************** - std::copy(dx + 1, dx + n - 1, l); - std::copy(dx, dx + n - 2, u + 1); - - for (int i = 1; i != n - 1; ++i) { - d[i] = 2.0 * (dx[i - 1] + dx[i]); - dy[i] = 3.0 * (dd[i - 1] * dx[i] + dd[i] * dx[i - 1]); + dy[0] = dy[1] = 0.0; // the only possible solution: constant } - - //*********************************************** - // boundary-specific part - //*********************************************** - if (bc_start.type == BoundaryType::periodic) + else if (n == 3 && bc_start.type == BoundaryType::not_a_knot && bc_end.type == BoundaryType::not_a_knot) { - // exclude s[n-1] and solve a a cyclic tridiagonal linear system of size n-1 - d[0] = 2.0 * (dx[n - 2] + dx[0]); - u[0] = dx[n - 2]; - l[n - 2] = dx[0]; - dy[0] = 3.0 * (dd[0] * dx[n - 2] + dd[n - 2] * dx[0]); - _solve_cyctri(n - 1, d, u, l, dy); - dy[n - 1] = dy[0]; + // in this case two conditions coincide + // simply build a parabola that passes through the three data points + double dd01 = (y[1] - y[0]) / dx[0]; // divided difference f[x0,x1] + double dd12 = (y[2] - y[1]) / dx[1]; // f[x1,x2] + double dd012 = (dd12 - dd01) / (dx[0] + dx[1]); // f[x0,x1,x2] + + dy[0] = dd01 - dd012 * dx[0]; + dy[1] = 2.0 * dd01 - dy[0]; + dy[2] = dd01 + dd012 * (dx[0] + 2.0 * dx[1]); } - else + else { - switch (bc_start.type) - { - case BoundaryType::first_deriv: - d[0] = 1.0 * dx[0]; - u[0] = 0.0; - dy[0] = bc_start.val * dx[0]; - break; - case BoundaryType::second_deriv: - d[0] = 2.0 * dx[0]; - u[0] = 1.0 * dx[0]; - dy[0] = (3.0 * dd[0] - 0.5 * bc_start.val * dx[0]) * dx[0]; - break; - default: // BoundaryCondition::not_a_knot - d[0] = dx[1]; - u[0] = dx[0] + dx[1]; - dy[0] = (dd[0] * dx[1] * (dx[0] + 2 * u[0]) + dd[1] * dx[0] * dx[0]) / u[0]; - } - - switch (bc_end.type) - { - case BoundaryType::first_deriv: - d[n - 1] = 1.0 * dx[n - 2]; - l[n - 2] = 0.0; - dy[n - 1] = bc_end.val * dx[n - 2]; - break; - case BoundaryType::second_deriv: - d[n - 1] = 2.0 * dx[n - 2]; - l[n - 2] = 1.0 * dx[n - 2]; - dy[n - 1] = (3.0 * dd[n - 2] + 0.5 * bc_end.val * dx[n - 2]) * dx[n - 2]; - break; - default: // BoundaryCondition::not_a_knot - d[n - 1] = dx[n - 3]; - l[n - 2] = dx[n - 3] + dx[n - 2]; - dy[n - 1] = (dd[n - 2] * dx[n - 3] * (dx[n - 2] + 2 * l[n - 2]) - + dd[n - 3] * dx[n - 2] * dx[n - 2]) / l[n - 2]; - } - - int nrhs = 1; - int ldb = n; - int info = 0; - dgtsv_(&n, &nrhs, l, d, u, dy, &ldb, &info); + std::vector buffer (4 * n); + + double* dd = buffer.data (); // divided differences + std::adjacent_difference (y, y + n, dd); + dd += 1; // the first element computed by adjacent_difference is not a difference + std::transform (dd, dd + n - 1, dx, dd, std::divides ()); + + // tridiagonal linear system (cyclic tridiagonal if periodic boundary condition) + double* d = buffer.data () + n; // main diagonal + double* l = buffer.data () + 2 * n; // subdiagonal + double* u = buffer.data () + 3 * n; // superdiagonal + + //*********************************************** + // common part of the tridiagonal linear system + //*********************************************** + std::copy (dx + 1, dx + n - 1, l); + std::copy (dx, dx + n - 2, u + 1); + + for (int i = 1; i != n - 1; ++i) + { + d[i] = 2.0 * (dx[i - 1] + dx[i]); + dy[i] = 3.0 * (dd[i - 1] * dx[i] + dd[i] * dx[i - 1]); + } + + //*********************************************** + // boundary-specific part + //*********************************************** + if (bc_start.type == BoundaryType::periodic) + { + // exclude s[n-1] and solve a a cyclic tridiagonal linear system of size n-1 + d[0] = 2.0 * (dx[n - 2] + dx[0]); + u[0] = dx[n - 2]; + l[n - 2] = dx[0]; + dy[0] = 3.0 * (dd[0] * dx[n - 2] + dd[n - 2] * dx[0]); + _solve_cyctri (n - 1, d, u, l, dy); + dy[n - 1] = dy[0]; + } + else + { + switch (bc_start.type) + { + case BoundaryType::first_deriv: + d[0] = 1.0 * dx[0]; + u[0] = 0.0; + dy[0] = bc_start.val * dx[0]; + break; + case BoundaryType::second_deriv: + d[0] = 2.0 * dx[0]; + u[0] = 1.0 * dx[0]; + dy[0] = (3.0 * dd[0] - 0.5 * bc_start.val * dx[0]) * dx[0]; + break; + default: // BoundaryCondition::not_a_knot + d[0] = dx[1]; + u[0] = dx[0] + dx[1]; + dy[0] = (dd[0] * dx[1] * (dx[0] + 2 * u[0]) + dd[1] * dx[0] * dx[0]) / u[0]; + } + + switch (bc_end.type) + { + case BoundaryType::first_deriv: + d[n - 1] = 1.0 * dx[n - 2]; + l[n - 2] = 0.0; + dy[n - 1] = bc_end.val * dx[n - 2]; + break; + case BoundaryType::second_deriv: + d[n - 1] = 2.0 * dx[n - 2]; + l[n - 2] = 1.0 * dx[n - 2]; + dy[n - 1] = (3.0 * dd[n - 2] + 0.5 * bc_end.val * dx[n - 2]) * dx[n - 2]; + break; + default: // BoundaryCondition::not_a_knot + d[n - 1] = dx[n - 3]; + l[n - 2] = dx[n - 3] + dx[n - 2]; + dy[n - 1] = (dd[n - 2] * dx[n - 3] * (dx[n - 2] + 2 * l[n - 2]) + + dd[n - 3] * dx[n - 2] * dx[n - 2]) + / l[n - 2]; + } + + int nrhs = 1; + int ldb = n; + int info = 0; + dgtsv_ (&n, &nrhs, l, d, u, dy, &ldb, &info); + } } - } } - -void CubicSpline::_cubic( - int n, - const double* w, - const double* c0, - const double* c1, - const double* c2, - const double* c3, - double* y, - double* dy, - double* d2y -) +void + CubicSpline::_cubic (int n, + const double* w, + const double* c0, + const double* c1, + const double* c2, + const double* c3, + double* y, + double* dy, + double* d2y) { if (y) - { - for (int i = 0; i < n; ++i) { - y[i] = ((c3[i] * w[i] + c2[i]) * w[i] + c1[i]) * w[i] + c0[i]; + for (int i = 0; i < n; ++i) + { + y[i] = ((c3[i] * w[i] + c2[i]) * w[i] + c1[i]) * w[i] + c0[i]; + } } - } if (dy) - { - for (int i = 0; i < n; ++i) { - dy[i] = (3.0 * c3[i] * w[i] + 2.0 * c2[i]) * w[i] + c1[i]; + for (int i = 0; i < n; ++i) + { + dy[i] = (3.0 * c3[i] * w[i] + 2.0 * c2[i]) * w[i] + c1[i]; + } } - } if (d2y) - { - for (int i = 0; i < n; ++i) { - d2y[i] = 6.0 * c3[i] * w[i] + 2.0 * c2[i]; + for (int i = 0; i < n; ++i) + { + d2y[i] = 6.0 * c3[i] * w[i] + 2.0 * c2[i]; + } } - } } - -int CubicSpline::_index(int n, const double* knots, double x) +int + CubicSpline::_index (int n, const double* knots, double x) { - int i = (std::upper_bound(knots, knots + n, x) - knots) - 1; + int i = (std::upper_bound (knots, knots + n, x) - knots) - 1; return i - (i == n - 1); } - -int CubicSpline::_index(int n, double x0, double dx, double x) +int + CubicSpline::_index (int n, double x0, double dx, double x) { int i = (x - x0) / dx; return i - (i == n - 1); } - -void CubicSpline::_solve_cyctri(int n, double* d, double* u, double* l, double* b) +void + CubicSpline::_solve_cyctri (int n, double* d, double* u, double* l, double* b) { // flexible non-zero parameters that can affect the condition number of the // tridiagonal linear system double alpha = 1.0; double beta = -d[0] / u[n - 1]; - std::vector bp(2 * n, 0.0); - std::copy(b, b + n, bp.begin()); + std::vector bp (2 * n, 0.0); + std::copy (b, b + n, bp.begin ()); bp[n] = 1. / alpha; bp[2 * n - 1] = 1. / beta; @@ -554,13 +511,14 @@ void CubicSpline::_solve_cyctri(int n, double* d, double* u, double* l, double* int nrhs = 2; int info = 0; int ldb = n; - dgtsv_(&n, &nrhs, l, d, u, bp.data(), &ldb, &info); + dgtsv_ (&n, &nrhs, l, d, u, bp.data (), &ldb, &info); double fac = (beta * u[n - 1] * bp[0] + alpha * l[n - 1] * bp[n - 1]) / (1. + beta * u[n - 1] * bp[n] + alpha * l[n - 1] * bp[2 * n - 1]); - std::transform(bp.begin(), bp.begin() + n, bp.begin() + n, b, - [fac](double yi, double zi) { return yi - fac * zi; }); + std::transform (bp.begin (), + bp.begin () + n, + bp.begin () + n, + b, + [fac] (double yi, double zi) { return yi - fac * zi; }); } - - diff --git a/source/source_base/cubic_spline.h b/source/source_base/cubic_spline.h index 4455b080e9c..313995bf3ab 100644 --- a/source/source_base/cubic_spline.h +++ b/source/source_base/cubic_spline.h @@ -116,13 +116,12 @@ namespace ModuleBase * */ class CubicSpline -{ +{ //***************************************************************** // boundary condition //***************************************************************** -public: - + public: /** * @brief Types of cubic spline boundary conditions. * @@ -145,7 +144,6 @@ class CubicSpline periodic }; - /** * @brief Boundary condition for cubic spline interpolation. * @@ -156,31 +154,28 @@ class CubicSpline struct BoundaryCondition { // for not_a_knot and periodic - BoundaryCondition(BoundaryType type = BoundaryType::not_a_knot); + BoundaryCondition (BoundaryType type = BoundaryType::not_a_knot); // for first/second_deriv - BoundaryCondition(BoundaryType type, double val); + BoundaryCondition (BoundaryType type, double val); BoundaryType type; double val = 0.0; }; - //***************************************************************** // interpolant object //***************************************************************** -public: - - CubicSpline() = delete; - CubicSpline(CubicSpline const&) = default; - CubicSpline(CubicSpline &&) = default; + public: + CubicSpline () = delete; + CubicSpline (CubicSpline const&) = default; + CubicSpline (CubicSpline&&) = default; - CubicSpline& operator=(CubicSpline const&) = default; - CubicSpline& operator=(CubicSpline &&) = default; - - ~CubicSpline() = default; + CubicSpline& operator= (CubicSpline const&) = default; + CubicSpline& operator= (CubicSpline&&) = default; + ~CubicSpline () = default; /** * @brief Builds an interpolant object. @@ -196,14 +191,11 @@ class CubicSpline * @param[in] bc_end boundary condition at end * */ - CubicSpline( - int n, - const double* x, - const double* y, - const BoundaryCondition& bc_start = {}, - const BoundaryCondition& bc_end = {} - ); - + CubicSpline (int n, + const double* x, + const double* y, + const BoundaryCondition& bc_start = {}, + const BoundaryCondition& bc_end = {}); /** * @brief Builds an interpolant object with evenly-spaced knots. @@ -219,15 +211,12 @@ class CubicSpline * @param[in] bc_end boundary condition at end * */ - CubicSpline( - int n, - double x0, - double dx, - const double* y, - const BoundaryCondition& bc_start = {}, - const BoundaryCondition& bc_end = {} - ); - + CubicSpline (int n, + double x0, + double dx, + const double* y, + const BoundaryCondition& bc_start = {}, + const BoundaryCondition& bc_end = {}); /** * @brief Builds an empty object with specified knots only. @@ -235,13 +224,13 @@ class CubicSpline * An object of this class can hold multiple interpolants with the same knots. * This constructor allows the user to initialize the object with knots only, * so that interpolants can be added later. - * + * * @param[in] n number of knots * @param[in] x x coordinates of data points * ("knots", must be strictly increasing) * */ - CubicSpline(int n, const double* x); + CubicSpline (int n, const double* x); /** * @brief Builds an empty object with specified knots only. @@ -255,7 +244,7 @@ class CubicSpline * @param[in] dx spacing between knots (must be positive) * */ - CubicSpline(int n, double x0, double dx); + CubicSpline (int n, double x0, double dx); /** * @brief Adds an interpolant that shares the same knots. @@ -270,12 +259,7 @@ class CubicSpline * @param[in] bc_end boundary condition at end * */ - void add( - const double* y, - const BoundaryCondition& bc_start = {}, - const BoundaryCondition& bc_end = {} - ); - + void add (const double* y, const BoundaryCondition& bc_start = {}, const BoundaryCondition& bc_end = {}); /** * @brief Evaluates a single interpolant at multiple places. @@ -291,15 +275,12 @@ class CubicSpline * @note pass nullptr to any of the output would suppress the corresponding calculation * */ - void eval( - int n_interp, - const double* x_interp, - double* y_interp, - double* dy_interp = nullptr, - double* d2y_interp = nullptr, - int i_spline = 0 - ) const; - + void eval (int n_interp, + const double* x_interp, + double* y_interp, + double* dy_interp = nullptr, + double* d2y_interp = nullptr, + int i_spline = 0) const; /** * @brief Evaluates multiple interpolants at a single place. @@ -315,15 +296,12 @@ class CubicSpline * @note pass nullptr to any of the output would suppress the corresponding calculation * */ - void multi_eval( - int n_spline, - const int* i_spline, - double x_interp, - double* y_interp, - double* dy_interp = nullptr, - double* d2y_interp = nullptr - ) const; - + void multi_eval (int n_spline, + const int* i_spline, + double x_interp, + double* y_interp, + double* dy_interp = nullptr, + double* d2y_interp = nullptr) const; /** * @brief Evaluates all interpolants at a single place. @@ -337,13 +315,8 @@ class CubicSpline * @note pass nullptr to any of the output would suppress the corresponding calculation * */ - void multi_eval( - double x_interp, - double* y_interp, - double* dy_interp = nullptr, - double* d2y_interp = nullptr - ) const; - + void + multi_eval (double x_interp, double* y_interp, double* dy_interp = nullptr, double* d2y_interp = nullptr) const; /** * @brief Reserves memory for holding more interpolants. @@ -359,24 +332,41 @@ class CubicSpline * @param[in] n_spline expected total number of interpolants * */ - void reserve(int n_spline) { y_.reserve(n_spline * n_ * 2); } - + void + reserve (int n_spline) + { + y_.reserve (n_spline * n_ * 2); + } /// heap memory usage in bytes - size_t heap_usage() const { return (x_.capacity() + y_.capacity()) * sizeof(double); } + size_t + heap_usage () const + { + return (x_.capacity () + y_.capacity ()) * sizeof (double); + } /// first knot - double xmin() const { return xmin_; } + double + xmin () const + { + return xmin_; + } /// last knot - double xmax() const { return xmax_; } + double + xmax () const + { + return xmax_; + } /// number of interpolants held by this object - int n_spline() const { return n_spline_; } - - -private: + int + n_spline () const + { + return n_spline_; + } + private: /// number of cubic spline interpolants int n_spline_ = 0; @@ -398,13 +388,11 @@ class CubicSpline /// values and first derivatives at knots std::vector y_; - //***************************************************************** // static functions //***************************************************************** -public: - + public: /** * @brief Computes the first derivatives at knots for cubic spline * interpolation. @@ -418,15 +406,12 @@ class CubicSpline * @param[out] dy first derivatives at knots * */ - static void build( - int n, - const double* x, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end, - double* dy - ); - + static void build (int n, + const double* x, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end, + double* dy); /** * @brief Computes the first derivatives at evenly-spaced knots for @@ -440,15 +425,12 @@ class CubicSpline * @param[out] dy first derivatives at knots * */ - static void build( - int n, - double dx, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end, - double* dy - ); - + static void build (int n, + double dx, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end, + double* dy); /** * @brief Evaluates a cubic spline polynomial at multiple places. @@ -467,18 +449,15 @@ class CubicSpline * @note pass nullptr to any of the output would suppress the corresponding calculation * */ - static void eval( - int n, - const double* x, - const double* y, - const double* dy, - int n_interp, - const double* x_interp, - double* y_interp, - double* dy_interp = nullptr, - double* d2y_interp = nullptr - ); - + static void eval (int n, + const double* x, + const double* y, + const double* dy, + int n_interp, + const double* x_interp, + double* y_interp, + double* dy_interp = nullptr, + double* d2y_interp = nullptr); /** * @brief Evaluates a cubic spline polynomial with evenly spaced knots. @@ -498,32 +477,25 @@ class CubicSpline * @note pass nullptr to any of the output would suppress the corresponding calculation * */ - static void eval( - int n, - double x0, - double dx, - const double* y, - const double* dy, - int n_interp, - const double* x_interp, - double* y_interp, - double* dy_interp = nullptr, - double* d2y_interp = nullptr - ); - - -private: - + static void eval (int n, + double x0, + double dx, + const double* y, + const double* dy, + int n_interp, + const double* x_interp, + double* y_interp, + double* dy_interp = nullptr, + double* d2y_interp = nullptr); + + private: /// Computational routine for building cubic spline interpolant - static void _build( - int n, - const double* dx, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end, - double* dy - ); - + static void _build (int n, + const double* dx, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end, + double* dy); /** * @brief Segment index lookup. @@ -533,55 +505,44 @@ class CubicSpline * if target != x[n-1], or n-2 if t == x[n-1]. * */ - static inline int _index(int n, const double* x, double target); - + static inline int _index (int n, const double* x, double target); /// Segment index lookup (evenly spaced knots). - static inline int _index(int n, double x0, double dx, double target); - + static inline int _index (int n, double x0, double dx, double target); /// Evaluates a batch of cubic polynomials. - static inline void _cubic( - int n, - const double* w, - const double* c0, - const double* c1, - const double* c2, - const double* c3, - double* y, - double* dy, - double* d2y - ); - + static inline void _cubic (int n, + const double* w, + const double* c0, + const double* c1, + const double* c2, + const double* c3, + double* y, + double* dy, + double* d2y); /// Asserts that the input arguments are valid for constructing a cubic spline. - static void _validate_build( - int n, - const double* dx, - const double* y, - const BoundaryCondition& bc_start, - const BoundaryCondition& bc_end - ); - + static void _validate_build (int n, + const double* dx, + const double* y, + const BoundaryCondition& bc_start, + const BoundaryCondition& bc_end); /// Asserts that the input arguments are valid for interpolating a cubic spline. - static void _validate_eval( - int n, - const double (&u)[2], - const double* x, - const double* y, - const double* dy, - int n_interp, - const double* x_interp - ); - + static void _validate_eval (int n, + const double (&u)[2], + const double* x, + const double* y, + const double* dy, + int n_interp, + const double* x_interp); /** * @brief Solves a cyclic tridiagonal linear system. * * A cyclic tridiagonal linear system A*x=b where b is a vector and * - * -- -- + * -- -- * | d[0] u[0] l[n-1] | * | l[0] d[1] u[1] | * A = | l[1] d[2] u[2] | @@ -603,7 +564,7 @@ class CubicSpline * @note d, l, u are all overwritten in this function. * */ - static void _solve_cyctri(int n, double* d, double* u, double* l, double* b); + static void _solve_cyctri (int n, double* d, double* u, double* l, double* b); }; } // namespace ModuleBase diff --git a/source/source_base/element_basis_index.cpp b/source/source_base/element_basis_index.cpp index 32eafd01e1d..a45301f546c 100644 --- a/source/source_base/element_basis_index.cpp +++ b/source/source_base/element_basis_index.cpp @@ -9,32 +9,32 @@ namespace ModuleBase { Element_Basis_Index::IndexLNM -Element_Basis_Index::construct_index( const Range &range ) + Element_Basis_Index::construct_index (const Range& range) { - IndexLNM index; - index.resize( range.size() ); - for( std::size_t T=0; T!=range.size(); ++T ) - { - std::size_t count=0; - index[T].resize( range[T].size() ); - for( std::size_t L=0; L!=range[T].size(); ++L ) - { - index[T][L].resize( range[T][L].N ); - for( std::size_t N=0; N!=range[T][L].N; ++N ) - { - index[T][L][N].resize( range[T][L].M ); - for( std::size_t M=0; M!=range[T][L].M; ++M ) - { - index[T][L][N][M] = count; - ++count; - } - } - index[T][L].N = range[T][L].N; - index[T][L].M = range[T][L].M; - } - index[T].count_size = count; - } - return index; + IndexLNM index; + index.resize (range.size ()); + for (std::size_t T = 0; T != range.size (); ++T) + { + std::size_t count = 0; + index[T].resize (range[T].size ()); + for (std::size_t L = 0; L != range[T].size (); ++L) + { + index[T][L].resize (range[T][L].N); + for (std::size_t N = 0; N != range[T][L].N; ++N) + { + index[T][L][N].resize (range[T][L].M); + for (std::size_t M = 0; M != range[T][L].M; ++M) + { + index[T][L][N][M] = count; + ++count; + } + } + index[T][L].N = range[T][L].N; + index[T][L].M = range[T][L].M; + } + index[T].count_size = count; + } + return index; } -} \ No newline at end of file +} // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/element_basis_index.h b/source/source_base/element_basis_index.h index bb81ebf40e9..b4ce6dc432f 100644 --- a/source/source_base/element_basis_index.h +++ b/source/source_base/element_basis_index.h @@ -14,36 +14,36 @@ namespace ModuleBase namespace Element_Basis_Index { - //private: +// private: - struct NM - { - public: - std::size_t N; - std::size_t M; - }; +struct NM +{ + public: + std::size_t N; + std::size_t M; +}; - class Index_TL: public std::vector> - { - public: - std::size_t N; - std::size_t M; - }; +class Index_TL : public std::vector> +{ + public: + std::size_t N; + std::size_t M; +}; - class Index_T: public std::vector - { - public: - std::size_t count_size; - }; +class Index_T : public std::vector +{ + public: + std::size_t count_size; +}; - //public: +// public: - typedef std::vector> Range; // range[T][L] - typedef std::vector IndexLNM; // index[T][L][N][M] +typedef std::vector> Range; // range[T][L] +typedef std::vector IndexLNM; // index[T][L][N][M] - extern IndexLNM construct_index( const Range &range ); -} +extern IndexLNM construct_index (const Range& range); +} // namespace Element_Basis_Index -} +} // namespace ModuleBase #endif \ No newline at end of file diff --git a/source/source_base/element_covalent_radius.h b/source/source_base/element_covalent_radius.h index 88f5afff3d5..84bed470fba 100644 --- a/source/source_base/element_covalent_radius.h +++ b/source/source_base/element_covalent_radius.h @@ -4,133 +4,132 @@ #include #include -//The covalent radius is from -//https://www.rsc.org/periodic-table/ -//unit in Angstrom +// The covalent radius is from +// https://www.rsc.org/periodic-table/ +// unit in Angstrom namespace ModuleBase { -const std::map CovalentRadius -= { -{"H" ,0.32}, //1 -{"He" ,0.37}, //2 -{"Li" ,1.30}, //3 -{"Be" ,0.99}, //4 -{"B" ,0.84}, //5 -{"C" ,0.75}, //6 -{"N" ,0.71}, //7 -{"O" ,0.64}, //8 -{"F" ,0.60}, //9 -{"Ne" ,0.62}, //10 -{"Na" ,1.60}, //11 -{"Mg" ,1.40}, //12 -{"Al" ,1.24}, //13 -{"Si" ,1.14}, //14 -{"P" ,1.09}, //15 -{"S" ,1.04}, //16 -{"Cl" ,1.00}, //17 -{"Ar" ,1.01}, //18 -{"K" ,2.00}, //19 -{"Ca" ,1.74}, //20 -{"Sc" ,1.59}, //21 -{"Ti" ,1.48}, //22 -{"V" ,1.44}, //23 -{"Cr" ,1.30}, //24 -{"Mn" ,1.29}, //25 -{"Fe" ,1.24}, //26 -{"Co" ,1.18}, //27 -{"Ni" ,1.17}, //28 -{"Cu" ,1.22}, //29 -{"Zn" ,1.20}, //30 -{"Ga" ,1.23}, //31 -{"Ge" ,1.20}, //32 -{"As" ,1.20}, //33 -{"Se" ,1.18}, //34 -{"Br" ,1.17}, //35 -{"Kr" ,1.16}, //36 -{"Rb" ,2.15}, //37 -{"Sr" ,1.90}, //38 -{"Y" ,1.76}, //39 -{"Zr" ,1.64}, //40 -{"Nb" ,1.56}, //41 -{"Mo" ,1.46}, //42 -{"Tc" ,1.38}, //43 -{"Ru" ,1.36}, //44 -{"Rh" ,1.34}, //45 -{"Pd" ,1.30}, //46 -{"Ag" ,1.36}, //47 -{"Cd" ,1.40}, //48 -{"In" ,1.42}, //49 -{"Sn" ,1.40}, //50 -{"Sb" ,1.40}, //51 -{"Te" ,1.37}, //52 -{"I" ,1.36}, //53 -{"Xe" ,1.36}, //54 -{"Cs" ,2.38}, //55 -{"Ba" ,2.06}, //56 -{"La" ,1.94}, //57 -{"Ce" ,1.84}, //58 -{"Pr" ,1.90}, //59 -{"Nd" ,1.88}, //60 -{"Pm" ,1.86}, //61 -{"Sm" ,1.85}, //62 -{"Eu" ,1.83}, //63 -{"Gd" ,1.82}, //64 -{"Tb" ,1.81}, //65 -{"Dy" ,1.80}, //66 -{"Ho" ,1.79}, //67 -{"Er" ,1.77}, //68 -{"Tm" ,1.77}, //69 -{"Yb" ,1.78}, //70 -{"Lu" ,1.74}, //71 -{"Hf" ,1.64}, //72 -{"Ta" ,1.58}, //73 -{"W" ,1.50}, //74 -{"Re" ,1.41}, //75 -{"Os" ,1.36}, //76 -{"Ir" ,1.32}, //77 -{"Pt" ,1.30}, //78 -{"Au" ,1.30}, //79 -{"Hg" ,1.32}, //80 -{"Tl" ,1.44}, //81 -{"Pb" ,1.45}, //82 -{"Bi" ,1.50}, //83 -{"Po" ,1.42}, //84 -{"At" ,1.48}, //85 -{"Rn" ,1.46}, //86 -{"Fr" ,2.42}, //87 -{"Ra" ,2.11}, //88 -{"Ac" ,2.01}, //89 -{"Th" ,1.90}, //90 -{"Pa" ,1.84}, //91 -{"U" ,1.83}, //92 -{"Np" ,1.80}, //93 -{"Pu" ,1.80}, //94 -{"Am" ,1.73}, //95 -{"Cm" ,1.68}, //96 -{"Bk" ,1.68}, //97 -{"Cf" ,1.68}, //98 -{"Es" ,1.65}, //99 -{"Fm" ,1.67}, //100 -{"Md" ,1.73}, //101 -{"No" ,1.76}, //102 -{"Lr" ,1.61}, //103 -{"Rf" ,1.57}, //104 -{"Db" ,1.49}, //105 -{"Sg" ,1.43}, //106 -{"Bh" ,1.41}, //107 -{"Hs" ,1.34}, //108 -{"Mt" ,1.29}, //109 -{"Ds" ,1.28}, //110 -{"Rg" ,1.21}, //111 -{"Cn" ,1.22}, //112 -{"Nh" ,1.36}, //113 -{"Fl" ,1.43}, //114 -{"Mc" ,1.62}, //115 -{"Lv" ,1.75}, //116 -{"Ts" ,1.65}, //117 -{"Og" ,1.57} //118 +const std::map CovalentRadius = { + {"H", 0.32}, // 1 + {"He", 0.37}, // 2 + {"Li", 1.30}, // 3 + {"Be", 0.99}, // 4 + {"B", 0.84}, // 5 + {"C", 0.75}, // 6 + {"N", 0.71}, // 7 + {"O", 0.64}, // 8 + {"F", 0.60}, // 9 + {"Ne", 0.62}, // 10 + {"Na", 1.60}, // 11 + {"Mg", 1.40}, // 12 + {"Al", 1.24}, // 13 + {"Si", 1.14}, // 14 + {"P", 1.09}, // 15 + {"S", 1.04}, // 16 + {"Cl", 1.00}, // 17 + {"Ar", 1.01}, // 18 + {"K", 2.00}, // 19 + {"Ca", 1.74}, // 20 + {"Sc", 1.59}, // 21 + {"Ti", 1.48}, // 22 + {"V", 1.44}, // 23 + {"Cr", 1.30}, // 24 + {"Mn", 1.29}, // 25 + {"Fe", 1.24}, // 26 + {"Co", 1.18}, // 27 + {"Ni", 1.17}, // 28 + {"Cu", 1.22}, // 29 + {"Zn", 1.20}, // 30 + {"Ga", 1.23}, // 31 + {"Ge", 1.20}, // 32 + {"As", 1.20}, // 33 + {"Se", 1.18}, // 34 + {"Br", 1.17}, // 35 + {"Kr", 1.16}, // 36 + {"Rb", 2.15}, // 37 + {"Sr", 1.90}, // 38 + {"Y", 1.76}, // 39 + {"Zr", 1.64}, // 40 + {"Nb", 1.56}, // 41 + {"Mo", 1.46}, // 42 + {"Tc", 1.38}, // 43 + {"Ru", 1.36}, // 44 + {"Rh", 1.34}, // 45 + {"Pd", 1.30}, // 46 + {"Ag", 1.36}, // 47 + {"Cd", 1.40}, // 48 + {"In", 1.42}, // 49 + {"Sn", 1.40}, // 50 + {"Sb", 1.40}, // 51 + {"Te", 1.37}, // 52 + {"I", 1.36}, // 53 + {"Xe", 1.36}, // 54 + {"Cs", 2.38}, // 55 + {"Ba", 2.06}, // 56 + {"La", 1.94}, // 57 + {"Ce", 1.84}, // 58 + {"Pr", 1.90}, // 59 + {"Nd", 1.88}, // 60 + {"Pm", 1.86}, // 61 + {"Sm", 1.85}, // 62 + {"Eu", 1.83}, // 63 + {"Gd", 1.82}, // 64 + {"Tb", 1.81}, // 65 + {"Dy", 1.80}, // 66 + {"Ho", 1.79}, // 67 + {"Er", 1.77}, // 68 + {"Tm", 1.77}, // 69 + {"Yb", 1.78}, // 70 + {"Lu", 1.74}, // 71 + {"Hf", 1.64}, // 72 + {"Ta", 1.58}, // 73 + {"W", 1.50}, // 74 + {"Re", 1.41}, // 75 + {"Os", 1.36}, // 76 + {"Ir", 1.32}, // 77 + {"Pt", 1.30}, // 78 + {"Au", 1.30}, // 79 + {"Hg", 1.32}, // 80 + {"Tl", 1.44}, // 81 + {"Pb", 1.45}, // 82 + {"Bi", 1.50}, // 83 + {"Po", 1.42}, // 84 + {"At", 1.48}, // 85 + {"Rn", 1.46}, // 86 + {"Fr", 2.42}, // 87 + {"Ra", 2.11}, // 88 + {"Ac", 2.01}, // 89 + {"Th", 1.90}, // 90 + {"Pa", 1.84}, // 91 + {"U", 1.83}, // 92 + {"Np", 1.80}, // 93 + {"Pu", 1.80}, // 94 + {"Am", 1.73}, // 95 + {"Cm", 1.68}, // 96 + {"Bk", 1.68}, // 97 + {"Cf", 1.68}, // 98 + {"Es", 1.65}, // 99 + {"Fm", 1.67}, // 100 + {"Md", 1.73}, // 101 + {"No", 1.76}, // 102 + {"Lr", 1.61}, // 103 + {"Rf", 1.57}, // 104 + {"Db", 1.49}, // 105 + {"Sg", 1.43}, // 106 + {"Bh", 1.41}, // 107 + {"Hs", 1.34}, // 108 + {"Mt", 1.29}, // 109 + {"Ds", 1.28}, // 110 + {"Rg", 1.21}, // 111 + {"Cn", 1.22}, // 112 + {"Nh", 1.36}, // 113 + {"Fl", 1.43}, // 114 + {"Mc", 1.62}, // 115 + {"Lv", 1.75}, // 116 + {"Ts", 1.65}, // 117 + {"Og", 1.57} // 118 }; } diff --git a/source/source_base/element_elec_config.h b/source/source_base/element_elec_config.h index b293a9b6668..bdaaa8384f2 100644 --- a/source/source_base/element_elec_config.h +++ b/source/source_base/element_elec_config.h @@ -7,372 +7,155 @@ namespace ModuleBase { -const std::map EleConfig -= { -{"H", "1s1"}, -{"He", "1s2"}, -{"Li", "[He] 2s1"}, -{"Be", "[He] 2s2"}, -{"B", "[He] 2s2 2p1"}, -{"C", "[He] 2s2 2p2"}, -{"N", "[He] 2s2 2p3"}, -{"O", "[He] 2s2 2p4"}, -{"F", "[He] 2s2 2p5"}, -{"Ne", "[He] 2s2 2p6"}, -{"Na", "[Ne] 3s1"}, -{"Mg", "[Ne] 3s2"}, -{"Al", "[Ne] 3s2 3p1"}, -{"Si", "[Ne] 3s2 3p2"}, -{"P", "[Ne] 3s2 3p3"}, -{"S", "[Ne] 3s2 3p4"}, -{"Cl", "[Ne] 3s2 3p5"}, -{"Ar", "[Ne] 3s2 3p6"}, -{"K", "[Ar] 4s1"}, -{"Ca", "[Ar] 4s2"}, -{"Sc", "[Ar] 3d1 4s2"}, -{"Ti", "[Ar] 3d2 4s2"}, -{"V", "[Ar] 3d3 4s2"}, -{"Cr", "[Ar] 3d5 4s1"}, -{"Mn", "[Ar] 3d5 4s2"}, -{"Fe", "[Ar] 3d6 4s2"}, -{"Co", "[Ar] 3d7 4s2"}, -{"Ni", "[Ar] 3d8 4s2"}, -{"Cu", "[Ar] 3d10 4s1"}, -{"Zn", "[Ar] 3d10 4s2"}, -{"Ga", "[Ar] 3d10 4s2 4p1"}, -{"Ge", "[Ar] 3d10 4s2 4p2"}, -{"As", "[Ar] 3d10 4s2 4p3"}, -{"Se", "[Ar] 3d10 4s2 4p4"}, -{"Br", "[Ar] 3d10 4s2 4p5"}, -{"Kr", "[Ar] 3d10 4s2 4p6"}, -{"Rb", "[Kr] 5s1"}, -{"Sr", "[Kr] 5s2"}, -{"Y", "[Kr] 4d1 5s2"}, -{"Zr", "[Kr] 4d2 5s2"}, -{"Nb", "[Kr] 4d4 5s1"}, -{"Mo", "[Kr] 4d5 5s1"}, -{"Tc", "[Kr] 4d5 5s2"}, -{"Ru", "[Kr] 4d7 5s1"}, -{"Rh", "[Kr] 4d8 5s1"}, -{"Pd", "[Kr] 4d10"}, -{"Ag", "[Kr] 4d10 5s1"}, -{"Cd", "[Kr] 4d10 5s2"}, -{"In", "[Kr] 4d10 5s2 5p1"}, -{"Sn", "[Kr] 4d10 5s2 5p2"}, -{"Sb", "[Kr] 4d10 5s2 5p3"}, -{"Te", "[Kr] 4d10 5s2 5p4"}, -{"I", "[Kr] 4d10 5s2 5p5"}, -{"Xe", "[Kr] 4d10 5s2 5p6"}, -{"Cs", "[Xe] 6s1"}, -{"Ba", "[Xe] 6s2"}, -{"La", "[Xe] 5d1 6s2"}, -{"Ce", "[Xe] 4f1 5d1 6s2"}, -{"Pr", "[Xe] 4f3 6s2"}, -{"Nd", "[Xe] 4f4 6s2"}, -{"Pm", "[Xe] 4f5 6s2"}, -{"Sm", "[Xe] 4f6 6s2"}, -{"Eu", "[Xe] 4f7 6s2"}, -{"Gd", "[Xe] 4f7 5d1 6s2"}, -{"Tb", "[Xe] 4f9 6s2"}, -{"Dy", "[Xe] 4f10 6s2"}, -{"Ho", "[Xe] 4f11 6s2"}, -{"Er", "[Xe] 4f12 6s2"}, -{"Tm", "[Xe] 4f13 6s2"}, -{"Yb", "[Xe] 4f14 6s2"}, -{"Lu", "[Xe] 4f14 5d1 6s2"}, -{"Hf", "[Xe] 4f14 5d2 6s2"}, -{"Ta", "[Xe] 4f14 5d3 6s2"}, -{"W", "[Xe] 4f14 5d4 6s2"}, -{"Re", "[Xe] 4f14 5d5 6s2"}, -{"Os", "[Xe] 4f14 5d6 6s2"}, -{"Ir", "[Xe] 4f14 5d7 6s2"}, -{"Pt", "[Xe] 4f14 5d9 6s1"}, -{"Au", "[Xe] 4f14 5d10 6s1"}, -{"Hg", "[Xe] 4f14 5d10 6s2"}, -{"Tl", "[Xe] 4f14 5d10 6s2 6p1"}, -{"Pb", "[Xe] 4f14 5d10 6s2 6p2"}, -{"Bi", "[Xe] 4f14 5d10 6s2 6p3"}, -{"Po", "[Xe] 4f14 5d10 6s2 6p4"}, -{"At", "[Xe] 4f14 5d10 6s2 6p5"}, -{"Rn", "[Xe] 4f14 5d10 6s2 6p6"}, -{"Fr", "[Rn] 7s1"}, -{"Ra", "[Rn] 7s2"}, -{"Ac", "[Rn] 6d1 7s2"}, -{"Th", "[Rn] 6d2 7s2"}, -{"Pa", "[Rn] 5f2 6d1 7s2"}, -{"U" , "[Rn] 5f3 6d1 7s2"}, -{"Np", "[Rn] 5f4 6d1 7s2"}, -{"Pu", "[Rn] 5f6 7s2"}, -{"Am", "[Rn] 5f7 7s2"}, -{"Cm", "[Rn] 5f7 6d1 7s2"}, -{"Bk", "[Rn] 5f9 7s2"}, -{"Cf", "[Rn] 5f10 7s2"}, -{"Es", "[Rn] 5f11 7s2"}, -{"Fm", "[Rn] 5f12 7s2"}, -{"Md", "[Rn] 5f13 7s2"}, -{"No", "[Rn] 5f14 7s2"}, -{"Lr", "[Rn] 5f14 7s2 7p1"}, -{"Rf", "[Rn] 5f14 6d2 7s2"}, -{"Db", "[Rn] 5f14 6d3 7s2"}, -{"Sg", "[Rn] 5f14 6d4 7s2"}, -{"Bh", "[Rn] 5f14 6d5 7s2"}, -{"Hs", "[Rn] 5f14 6d6 7s2"}, -{"Mt", "[Rn] 5f14 6d7 7s2"}, -{"Ds", "[Rn] 5f14 6d8 7s2"}, -{"Rg", "[Rn] 5f14 6d10 7s1"}, -{"Cn", "[Rn] 5f14 6d10 7s2"}, -{"Nh", "[Rn] 5f14 6d10 7s2 7p1"}, -{"Fl", "[Rn] 5f14 6d10 7s2 7p2"}, -{"Mc", "[Rn] 5f14 6d10 7s2 7p3"}, -{"Lv", "[Rn] 5f14 6d10 7s2 7p4"}, -{"Ts", "[Rn] 5f14 6d10 7s2 7p5"}, -{"Og", "[Rn] 5f14 6d10 7s2 7p6"} -}; +const std::map EleConfig = {{"H", "1s1"}, + {"He", "1s2"}, + {"Li", "[He] 2s1"}, + {"Be", "[He] 2s2"}, + {"B", "[He] 2s2 2p1"}, + {"C", "[He] 2s2 2p2"}, + {"N", "[He] 2s2 2p3"}, + {"O", "[He] 2s2 2p4"}, + {"F", "[He] 2s2 2p5"}, + {"Ne", "[He] 2s2 2p6"}, + {"Na", "[Ne] 3s1"}, + {"Mg", "[Ne] 3s2"}, + {"Al", "[Ne] 3s2 3p1"}, + {"Si", "[Ne] 3s2 3p2"}, + {"P", "[Ne] 3s2 3p3"}, + {"S", "[Ne] 3s2 3p4"}, + {"Cl", "[Ne] 3s2 3p5"}, + {"Ar", "[Ne] 3s2 3p6"}, + {"K", "[Ar] 4s1"}, + {"Ca", "[Ar] 4s2"}, + {"Sc", "[Ar] 3d1 4s2"}, + {"Ti", "[Ar] 3d2 4s2"}, + {"V", "[Ar] 3d3 4s2"}, + {"Cr", "[Ar] 3d5 4s1"}, + {"Mn", "[Ar] 3d5 4s2"}, + {"Fe", "[Ar] 3d6 4s2"}, + {"Co", "[Ar] 3d7 4s2"}, + {"Ni", "[Ar] 3d8 4s2"}, + {"Cu", "[Ar] 3d10 4s1"}, + {"Zn", "[Ar] 3d10 4s2"}, + {"Ga", "[Ar] 3d10 4s2 4p1"}, + {"Ge", "[Ar] 3d10 4s2 4p2"}, + {"As", "[Ar] 3d10 4s2 4p3"}, + {"Se", "[Ar] 3d10 4s2 4p4"}, + {"Br", "[Ar] 3d10 4s2 4p5"}, + {"Kr", "[Ar] 3d10 4s2 4p6"}, + {"Rb", "[Kr] 5s1"}, + {"Sr", "[Kr] 5s2"}, + {"Y", "[Kr] 4d1 5s2"}, + {"Zr", "[Kr] 4d2 5s2"}, + {"Nb", "[Kr] 4d4 5s1"}, + {"Mo", "[Kr] 4d5 5s1"}, + {"Tc", "[Kr] 4d5 5s2"}, + {"Ru", "[Kr] 4d7 5s1"}, + {"Rh", "[Kr] 4d8 5s1"}, + {"Pd", "[Kr] 4d10"}, + {"Ag", "[Kr] 4d10 5s1"}, + {"Cd", "[Kr] 4d10 5s2"}, + {"In", "[Kr] 4d10 5s2 5p1"}, + {"Sn", "[Kr] 4d10 5s2 5p2"}, + {"Sb", "[Kr] 4d10 5s2 5p3"}, + {"Te", "[Kr] 4d10 5s2 5p4"}, + {"I", "[Kr] 4d10 5s2 5p5"}, + {"Xe", "[Kr] 4d10 5s2 5p6"}, + {"Cs", "[Xe] 6s1"}, + {"Ba", "[Xe] 6s2"}, + {"La", "[Xe] 5d1 6s2"}, + {"Ce", "[Xe] 4f1 5d1 6s2"}, + {"Pr", "[Xe] 4f3 6s2"}, + {"Nd", "[Xe] 4f4 6s2"}, + {"Pm", "[Xe] 4f5 6s2"}, + {"Sm", "[Xe] 4f6 6s2"}, + {"Eu", "[Xe] 4f7 6s2"}, + {"Gd", "[Xe] 4f7 5d1 6s2"}, + {"Tb", "[Xe] 4f9 6s2"}, + {"Dy", "[Xe] 4f10 6s2"}, + {"Ho", "[Xe] 4f11 6s2"}, + {"Er", "[Xe] 4f12 6s2"}, + {"Tm", "[Xe] 4f13 6s2"}, + {"Yb", "[Xe] 4f14 6s2"}, + {"Lu", "[Xe] 4f14 5d1 6s2"}, + {"Hf", "[Xe] 4f14 5d2 6s2"}, + {"Ta", "[Xe] 4f14 5d3 6s2"}, + {"W", "[Xe] 4f14 5d4 6s2"}, + {"Re", "[Xe] 4f14 5d5 6s2"}, + {"Os", "[Xe] 4f14 5d6 6s2"}, + {"Ir", "[Xe] 4f14 5d7 6s2"}, + {"Pt", "[Xe] 4f14 5d9 6s1"}, + {"Au", "[Xe] 4f14 5d10 6s1"}, + {"Hg", "[Xe] 4f14 5d10 6s2"}, + {"Tl", "[Xe] 4f14 5d10 6s2 6p1"}, + {"Pb", "[Xe] 4f14 5d10 6s2 6p2"}, + {"Bi", "[Xe] 4f14 5d10 6s2 6p3"}, + {"Po", "[Xe] 4f14 5d10 6s2 6p4"}, + {"At", "[Xe] 4f14 5d10 6s2 6p5"}, + {"Rn", "[Xe] 4f14 5d10 6s2 6p6"}, + {"Fr", "[Rn] 7s1"}, + {"Ra", "[Rn] 7s2"}, + {"Ac", "[Rn] 6d1 7s2"}, + {"Th", "[Rn] 6d2 7s2"}, + {"Pa", "[Rn] 5f2 6d1 7s2"}, + {"U", "[Rn] 5f3 6d1 7s2"}, + {"Np", "[Rn] 5f4 6d1 7s2"}, + {"Pu", "[Rn] 5f6 7s2"}, + {"Am", "[Rn] 5f7 7s2"}, + {"Cm", "[Rn] 5f7 6d1 7s2"}, + {"Bk", "[Rn] 5f9 7s2"}, + {"Cf", "[Rn] 5f10 7s2"}, + {"Es", "[Rn] 5f11 7s2"}, + {"Fm", "[Rn] 5f12 7s2"}, + {"Md", "[Rn] 5f13 7s2"}, + {"No", "[Rn] 5f14 7s2"}, + {"Lr", "[Rn] 5f14 7s2 7p1"}, + {"Rf", "[Rn] 5f14 6d2 7s2"}, + {"Db", "[Rn] 5f14 6d3 7s2"}, + {"Sg", "[Rn] 5f14 6d4 7s2"}, + {"Bh", "[Rn] 5f14 6d5 7s2"}, + {"Hs", "[Rn] 5f14 6d6 7s2"}, + {"Mt", "[Rn] 5f14 6d7 7s2"}, + {"Ds", "[Rn] 5f14 6d8 7s2"}, + {"Rg", "[Rn] 5f14 6d10 7s1"}, + {"Cn", "[Rn] 5f14 6d10 7s2"}, + {"Nh", "[Rn] 5f14 6d10 7s2 7p1"}, + {"Fl", "[Rn] 5f14 6d10 7s2 7p2"}, + {"Mc", "[Rn] 5f14 6d10 7s2 7p3"}, + {"Lv", "[Rn] 5f14 6d10 7s2 7p4"}, + {"Ts", "[Rn] 5f14 6d10 7s2 7p5"}, + {"Og", "[Rn] 5f14 6d10 7s2 7p6"}}; const std::map MinZval -= { -{"H", 1}, -{"He", 2}, -{"Li", 1}, -{"Be", 2}, -{"B", 3}, -{"C", 4}, -{"N", 5}, -{"O", 6}, -{"F", 7}, -{"Ne", 8}, -{"Na", 1}, -{"Mg", 2}, -{"Al", 3}, -{"Si", 4}, -{"P", 5}, -{"S", 6}, -{"Cl", 7}, -{"Ar", 8}, -{"K", 1}, -{"Ca", 2}, -{"Sc", 3}, -{"Ti", 4}, -{"V", 5}, -{"Cr", 6}, -{"Mn", 7}, -{"Fe", 8}, -{"Co", 9}, -{"Ni", 10}, -{"Cu", 11}, -{"Zn", 12}, -{"Ga", 3}, -{"Ge", 4}, -{"As", 5}, -{"Se", 6}, -{"Br", 7}, -{"Kr", 8}, -{"Rb", 1}, -{"Sr", 2}, -{"Y", 3}, -{"Zr", 4}, -{"Nb", 5}, -{"Mo", 6}, -{"Tc", 7}, -{"Ru", 8}, -{"Rh", 9}, -{"Pd", 10}, -{"Ag", 11}, -{"Cd", 12}, -{"In", 3}, -{"Sn", 4}, -{"Sb", 5}, -{"Te", 6}, -{"I", 7}, -{"Xe", 8}, -{"Cs", 1}, -{"Ba", 2}, -{"La", 3}, -{"Ce", 4}, -{"Pr", 5}, -{"Nd", 6}, -{"Pm", 7}, -{"Sm", 8}, -{"Eu", 9}, -{"Gd", 10}, -{"Tb", 11}, -{"Dy", 12}, -{"Ho", 13}, -{"Er", 14}, -{"Tm", 15}, -{"Yb", 16}, -{"Lu", 17}, -{"Hf", 4}, -{"Ta", 5}, -{"W", 6}, -{"Re", 7}, -{"Os", 8}, -{"Ir", 9}, -{"Pt", 10}, -{"Au", 11}, -{"Hg", 12}, -{"Tl", 3}, -{"Pb", 4}, -{"Bi", 5}, -{"Po", 6}, -{"At", 7}, -{"Rn", 8}, -{"Fr", 1}, -{"Ra", 2}, -{"Ac", 3}, -{"Th", 4}, -{"Pa", 5}, -{"U" , 6}, -{"Np", 7}, -{"Pu", 8}, -{"Am", 9}, -{"Cm", 10}, -{"Bk", 11}, -{"Cf", 12}, -{"Es", 13}, -{"Fm", 14}, -{"Md", 15}, -{"No", 16}, -{"Lr", 17}, -{"Rf", 18}, -{"Db", 19}, -{"Sg", 20}, -{"Bh", 21}, -{"Hs", 22}, -{"Mt", 23}, -{"Ds", 24}, -{"Rg", 25}, -{"Cn", 26}, -{"Nh", 27}, -{"Fl", 28}, -{"Mc", 29}, -{"Lv", 30}, -{"Ts", 31}, -{"Og", 32} -}; + = {{"H", 1}, {"He", 2}, {"Li", 1}, {"Be", 2}, {"B", 3}, {"C", 4}, {"N", 5}, {"O", 6}, {"F", 7}, + {"Ne", 8}, {"Na", 1}, {"Mg", 2}, {"Al", 3}, {"Si", 4}, {"P", 5}, {"S", 6}, {"Cl", 7}, {"Ar", 8}, + {"K", 1}, {"Ca", 2}, {"Sc", 3}, {"Ti", 4}, {"V", 5}, {"Cr", 6}, {"Mn", 7}, {"Fe", 8}, {"Co", 9}, + {"Ni", 10}, {"Cu", 11}, {"Zn", 12}, {"Ga", 3}, {"Ge", 4}, {"As", 5}, {"Se", 6}, {"Br", 7}, {"Kr", 8}, + {"Rb", 1}, {"Sr", 2}, {"Y", 3}, {"Zr", 4}, {"Nb", 5}, {"Mo", 6}, {"Tc", 7}, {"Ru", 8}, {"Rh", 9}, + {"Pd", 10}, {"Ag", 11}, {"Cd", 12}, {"In", 3}, {"Sn", 4}, {"Sb", 5}, {"Te", 6}, {"I", 7}, {"Xe", 8}, + {"Cs", 1}, {"Ba", 2}, {"La", 3}, {"Ce", 4}, {"Pr", 5}, {"Nd", 6}, {"Pm", 7}, {"Sm", 8}, {"Eu", 9}, + {"Gd", 10}, {"Tb", 11}, {"Dy", 12}, {"Ho", 13}, {"Er", 14}, {"Tm", 15}, {"Yb", 16}, {"Lu", 17}, {"Hf", 4}, + {"Ta", 5}, {"W", 6}, {"Re", 7}, {"Os", 8}, {"Ir", 9}, {"Pt", 10}, {"Au", 11}, {"Hg", 12}, {"Tl", 3}, + {"Pb", 4}, {"Bi", 5}, {"Po", 6}, {"At", 7}, {"Rn", 8}, {"Fr", 1}, {"Ra", 2}, {"Ac", 3}, {"Th", 4}, + {"Pa", 5}, {"U", 6}, {"Np", 7}, {"Pu", 8}, {"Am", 9}, {"Cm", 10}, {"Bk", 11}, {"Cf", 12}, {"Es", 13}, + {"Fm", 14}, {"Md", 15}, {"No", 16}, {"Lr", 17}, {"Rf", 18}, {"Db", 19}, {"Sg", 20}, {"Bh", 21}, {"Hs", 22}, + {"Mt", 23}, {"Ds", 24}, {"Rg", 25}, {"Cn", 26}, {"Nh", 27}, {"Fl", 28}, {"Mc", 29}, {"Lv", 30}, {"Ts", 31}, + {"Og", 32}}; const std::map IsTransMetal -= { -{"H", 0}, -{"He", 0}, -{"Li", 0}, -{"Be", 0}, -{"B", 0}, -{"C", 0}, -{"N", 0}, -{"O", 0}, -{"F", 0}, -{"Ne", 0}, -{"Na", 0}, -{"Mg", 0}, -{"Al", 0}, -{"Si", 0}, -{"P", 0}, -{"S", 0}, -{"Cl", 0}, -{"Ar", 0}, -{"K", 0}, -{"Ca", 0}, -{"Sc", 1}, -{"Ti", 1}, -{"V", 1}, -{"Cr", 1}, -{"Mn", 1}, -{"Fe", 1}, -{"Co", 1}, -{"Ni", 1}, -{"Cu", 1}, -{"Zn", 1}, -{"Ga", 0}, -{"Ge", 0}, -{"As", 0}, -{"Se", 0}, -{"Br", 0}, -{"Kr", 0}, -{"Rb", 0}, -{"Sr", 0}, -{"Y", 1}, -{"Zr", 1}, -{"Nb", 1}, -{"Mo", 1}, -{"Tc", 1}, -{"Ru", 1}, -{"Rh", 1}, -{"Pd", 1}, -{"Ag", 1}, -{"Cd", 1}, -{"In", 0}, -{"Sn", 0}, -{"Sb", 0}, -{"Te", 0}, -{"I", 0}, -{"Xe", 0}, -{"Cs", 0}, -{"Ba", 0}, -{"La", 1}, -{"Ce", 1}, -{"Pr", 1}, -{"Nd", 1}, -{"Pm", 1}, -{"Sm", 1}, -{"Eu", 1}, -{"Gd", 1}, -{"Tb", 1}, -{"Dy", 1}, -{"Ho", 1}, -{"Er", 1}, -{"Tm", 1}, -{"Yb", 1}, -{"Lu", 1}, -{"Hf", 1}, -{"Ta", 1}, -{"W", 1}, -{"Re", 1}, -{"Os", 1}, -{"Ir", 1}, -{"Pt", 1}, -{"Au", 1}, -{"Hg", 1}, -{"Tl", 0}, -{"Pb", 0}, -{"Bi", 0}, -{"Po", 0}, -{"At", 0}, -{"Rn", 0}, -{"Fr", 0}, -{"Ra", 0}, -{"Ac", 1}, -{"Th", 1}, -{"Pa", 1}, -{"U" , 1}, -{"Np", 1}, -{"Pu", 1}, -{"Am", 1}, -{"Cm", 1}, -{"Bk", 1}, -{"Cf", 1}, -{"Es", 1}, -{"Fm", 1}, -{"Md", 1}, -{"No", 1}, -{"Lr", 1}, -{"Rf", 1}, -{"Db", 1}, -{"Sg", 1}, -{"Bh", 1}, -{"Hs", 1}, -{"Mt", 1}, -{"Ds", 1}, -{"Rg", 1}, -{"Cn", 1}, -{"Nh", 0}, -{"Fl", 0}, -{"Mc", 0}, -{"Lv", 0}, -{"Ts", 0}, -{"Og", 0} -}; + = {{"H", 0}, {"He", 0}, {"Li", 0}, {"Be", 0}, {"B", 0}, {"C", 0}, {"N", 0}, {"O", 0}, {"F", 0}, {"Ne", 0}, + {"Na", 0}, {"Mg", 0}, {"Al", 0}, {"Si", 0}, {"P", 0}, {"S", 0}, {"Cl", 0}, {"Ar", 0}, {"K", 0}, {"Ca", 0}, + {"Sc", 1}, {"Ti", 1}, {"V", 1}, {"Cr", 1}, {"Mn", 1}, {"Fe", 1}, {"Co", 1}, {"Ni", 1}, {"Cu", 1}, {"Zn", 1}, + {"Ga", 0}, {"Ge", 0}, {"As", 0}, {"Se", 0}, {"Br", 0}, {"Kr", 0}, {"Rb", 0}, {"Sr", 0}, {"Y", 1}, {"Zr", 1}, + {"Nb", 1}, {"Mo", 1}, {"Tc", 1}, {"Ru", 1}, {"Rh", 1}, {"Pd", 1}, {"Ag", 1}, {"Cd", 1}, {"In", 0}, {"Sn", 0}, + {"Sb", 0}, {"Te", 0}, {"I", 0}, {"Xe", 0}, {"Cs", 0}, {"Ba", 0}, {"La", 1}, {"Ce", 1}, {"Pr", 1}, {"Nd", 1}, + {"Pm", 1}, {"Sm", 1}, {"Eu", 1}, {"Gd", 1}, {"Tb", 1}, {"Dy", 1}, {"Ho", 1}, {"Er", 1}, {"Tm", 1}, {"Yb", 1}, + {"Lu", 1}, {"Hf", 1}, {"Ta", 1}, {"W", 1}, {"Re", 1}, {"Os", 1}, {"Ir", 1}, {"Pt", 1}, {"Au", 1}, {"Hg", 1}, + {"Tl", 0}, {"Pb", 0}, {"Bi", 0}, {"Po", 0}, {"At", 0}, {"Rn", 0}, {"Fr", 0}, {"Ra", 0}, {"Ac", 1}, {"Th", 1}, + {"Pa", 1}, {"U", 1}, {"Np", 1}, {"Pu", 1}, {"Am", 1}, {"Cm", 1}, {"Bk", 1}, {"Cf", 1}, {"Es", 1}, {"Fm", 1}, + {"Md", 1}, {"No", 1}, {"Lr", 1}, {"Rf", 1}, {"Db", 1}, {"Sg", 1}, {"Bh", 1}, {"Hs", 1}, {"Mt", 1}, {"Ds", 1}, + {"Rg", 1}, {"Cn", 1}, {"Nh", 0}, {"Fl", 0}, {"Mc", 0}, {"Lv", 0}, {"Ts", 0}, {"Og", 0}}; -} +} // namespace ModuleBase #endif diff --git a/source/source_base/element_name.h b/source/source_base/element_name.h index 195c2a85f94..6407dab0c5c 100644 --- a/source/source_base/element_name.h +++ b/source/source_base/element_name.h @@ -10,126 +10,14 @@ namespace ModuleBase { -static const std::vector element_name = { -"H" , -"He" , -"Li" , -"Be" , -"B" , -"C" , -"N" , -"O" , -"F" , -"Ne" , -"Na" , -"Mg" , -"Al" , -"Si" , -"P" , -"S" , -"Cl" , -"Ar" , -"K" , -"Ca" , -"Sc" , -"Ti" , -"V" , -"Cr" , -"Mn" , -"Fe" , -"Co" , -"Ni" , -"Cu" , -"Zn" , -"Ga" , -"Ge" , -"As" , -"Se" , -"Br" , -"Kr" , -"Rb" , -"Sr" , -"Y" , -"Zr" , -"Nb" , -"Mo" , -"Tc" , -"Ru" , -"Rh" , -"Pd" , -"Ag" , -"Cd" , -"In" , -"Sn" , -"Sb" , -"Te" , -"I" , -"Xe" , -"Cs" , -"Ba" , -"La" , -"Ce" , -"Pr" , -"Nd" , -"Pm" , -"Sm" , -"Eu" , -"Gd" , -"Tb" , -"Dy" , -"Ho" , -"Er" , -"Tm" , -"Yb" , -"Lu" , -"Hf" , -"Ta" , -"W" , -"Re" , -"Os" , -"Ir" , -"Pt" , -"Au" , -"Hg" , -"Tl" , -"Pb" , -"Bi" , -"Po" , -"At" , -"Rn" , -"Fr" , -"Ra" , -"Ac" , -"Th" , -"Pa" , -"U" , -"Np" , -"Pu" , -"Am" , -"Cm" , -"Bk" , -"Cf" , -"Es" , -"Fm" , -"Md" , -"No" , -"Lr" , -"Rf" , -"Db" , -"Sg" , -"Bh" , -"Hs" , -"Mt" , -"Ds" , -"Rg" , -"Cn" , -"Nh" , -"Fl" , -"Mc" , -"Lv" , -"Ts" , -"Og" -}; +static const std::vector element_name + = {"H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", "S", "Cl", + "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", + "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", + "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", + "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", + "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", + "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"}; } diff --git a/source/source_base/export.cpp b/source/source_base/export.cpp index 413673a8495..701172e36ec 100644 --- a/source/source_base/export.cpp +++ b/source/source_base/export.cpp @@ -1,33 +1,30 @@ // ============================================================================= // C++ Header File -// Project: -// File: export.h +// Project: +// File: export.h // Author: mohan -// Comment: -// Warning: +// Comment: +// Warning: // Start time: 2008-9-3 -// Last modified: +// Last modified: // ============================================================================= #include "export.h" /* void ModuleBase::IF_MATCH(const std::string &name,const std::string &name2) { - if(name!=name2) - { - if(GlobalV::MY_RANK == 0) - { - std::cout<<"\n Can not match : "< void ModuleBase::GlobalFunc::AUTO_SET(std::ofstream &ofs,const std::string &name,const T &a) { - ofs<<" AUTO_SET "< void IF_MATCH(const T &a,const T &b) { - if(a!=b) - { - if(GlobalV::MY_RANK == 0) - { - std::cout<<"\n Can not match : "< #include "source_base/ndarray.h" /** - * @brief - * - * In C++20, the std::format library is introduced. However, it is not supported under restriction of ABACUS development that not later than C++11. Plus in ABACUS the formatting-output demands is not quite general but more specific, therefore, a simple alternative is proposed here. - * To use: + * @brief + * + * In C++20, the std::format library is introduced. However, it is not supported under restriction of ABACUS development + * that not later than C++11. Plus in ABACUS the formatting-output demands is not quite general but more specific, + * therefore, a simple alternative is proposed here. To use: * 1. Use the static function format() to format data like `FmtCore::format("%d", 1);` * 2. Use the class FmtCore to format data like `FmtCore fmt("%d"); fmt.format(1);`. - * The first way is more flexible while the second way is more efficient. The format string can be reset by reset() function. If empty, the format string is empty, otherwise it will be updated. + * The first way is more flexible while the second way is more efficient. The format string can be reset by reset() + * function. If empty, the format string is empty, otherwise it will be updated. */ class FmtCore { -public: - FmtCore(const std::string& fmt): fmt_(fmt) {}; - ~FmtCore() {}; + public: + FmtCore (const std::string& fmt) : fmt_ (fmt) {}; + ~FmtCore () {}; /** * @brief static function to format data - * + * * @tparam Ts datatype of the data * @param fmt format string * @param args data to format - * @return std::string + * @return std::string */ - template - static inline std::string format(const char* fmt, const Ts&... args) + template + static inline std::string + format (const char* fmt, const Ts&... args) { - const int size = snprintf(nullptr, 0, fmt, FmtCore::filter(args)...) + 1; - std::string dst(size, ' '); - const int size_filled = snprintf(&dst[0], size, fmt, FmtCore::filter(args)...); - dst.resize(size_filled); + const int size = snprintf (nullptr, 0, fmt, FmtCore::filter (args)...) + 1; + std::string dst (size, ' '); + const int size_filled = snprintf (&dst[0], size, fmt, FmtCore::filter (args)...); + dst.resize (size_filled); return dst; } /** * @brief std::string overload of the varadic template function - * - * @param fmt - * @param arg - * @return std::string + * + * @param fmt + * @param arg + * @return std::string */ - template - std::string format(const Ts&... args) { return FmtCore::format(fmt_.c_str(), args...); } + template + std::string + format (const Ts&... args) + { + return FmtCore::format (fmt_.c_str (), args...); + } /** * @brief reset the format string (std::string overloads) - * - * @param fmt + * + * @param fmt */ - void reset(const std::string& fmt = "") { fmt_ = fmt; } + void + reset (const std::string& fmt = "") + { + fmt_ = fmt; + } /** * @brief get the format string - * - * @return std::string + * + * @return std::string */ - const std::string& fmt() const { return fmt_; } + const std::string& + fmt () const + { + return fmt_; + } /** * Python-style string functions will be implemented here as toolbox */ @@ -74,337 +89,428 @@ class FmtCore * @brief split a string with a delimiter, return uncollapse vector * @param in string to split * @param delim delimiter - * @return std::vector - */ - static std::vector split(const std::string& in, const std::string& delim) + * @return std::vector + */ + static std::vector + split (const std::string& in, const std::string& delim) { std::vector dst; std::string::size_type beg = 0, end; - while((end = in.find(delim, beg)) != std::string::npos) - { - dst.push_back(in.substr(beg, end - beg)); - beg = end + delim.size(); - } - dst.push_back(in.substr(beg)); + while ((end = in.find (delim, beg)) != std::string::npos) + { + dst.push_back (in.substr (beg, end - beg)); + beg = end + delim.size (); + } + dst.push_back (in.substr (beg)); return dst; } /** * @brief split a string with a delimiter, return only non-empty elements - * + * * @param in string to split - * @return std::vector + * @return std::vector */ - static std::vector split(const std::string& in) + static std::vector + split (const std::string& in) { std::vector dst; std::string::size_type beg = 0, end = 0; - while((beg = in.find_first_not_of(" ", end)) != std::string::npos) - { - end = in.find_first_of(" ", beg); - dst.push_back(in.substr(beg, end - beg)); - } + while ((beg = in.find_first_not_of (" ", end)) != std::string::npos) + { + end = in.find_first_of (" ", beg); + dst.push_back (in.substr (beg, end - beg)); + } return dst; } - static bool startswith(const std::string& in, const std::string& prefix) + static bool + startswith (const std::string& in, const std::string& prefix) { - return (in.size() >= prefix.size()) && (in.substr(0, prefix.size()) == prefix); + return (in.size () >= prefix.size ()) && (in.substr (0, prefix.size ()) == prefix); } - static bool endswith(const std::string& in, const std::string& suffix) + static bool + endswith (const std::string& in, const std::string& suffix) { - return (in.size() >= suffix.size()) && (in.substr(in.size() - suffix.size()) == suffix); + return (in.size () >= suffix.size ()) && (in.substr (in.size () - suffix.size ()) == suffix); } - static std::string strip(const std::string& in, const std::string& chars = " ") + static std::string + strip (const std::string& in, const std::string& chars = " ") { - std::string::size_type beg = in.find_first_not_of(chars); - return (beg == std::string::npos)? "": in.substr(beg, in.find_last_not_of(chars) - beg + 1); + std::string::size_type beg = in.find_first_not_of (chars); + return (beg == std::string::npos) ? "" : in.substr (beg, in.find_last_not_of (chars) - beg + 1); } - static std::string center(const std::string& in, const size_t& width, const char& fillchar = ' ') + static std::string + center (const std::string& in, const size_t& width, const char& fillchar = ' ') { - if (in.size() >= width) { - return in; - } - const size_t nwhitespaces = width - in.size(); + if (in.size () >= width) + { + return in; + } + const size_t nwhitespaces = width - in.size (); const size_t nleft = nwhitespaces / 2; const size_t nright = nwhitespaces - nleft; - return std::string(nleft, fillchar) + in + std::string(nright, fillchar); + return std::string (nleft, fillchar) + in + std::string (nright, fillchar); } - static std::string replace(const std::string& in, const std::string& old, const std::string& new_) + static std::string + replace (const std::string& in, const std::string& old, const std::string& new_) { std::string dst = in; - std::string::size_type pos = dst.find(old); - while(pos != std::string::npos) - { - dst.replace(pos, old.size(), new_); - pos = dst.find(old, pos + new_.size()); - } + std::string::size_type pos = dst.find (old); + while (pos != std::string::npos) + { + dst.replace (pos, old.size (), new_); + pos = dst.find (old, pos + new_.size ()); + } return dst; } - static std::string join(const std::string& delim, const std::vector& src) + static std::string + join (const std::string& delim, const std::vector& src) { - return (src.empty())? "": std::accumulate(src.begin() + 1, src.end(), src[0], - [&delim](const std::string& acc, const std::string& s) { return acc + delim + s; }); + return (src.empty ()) ? "" + : std::accumulate (src.begin () + 1, + src.end (), + src[0], + [&delim] (const std::string& acc, const std::string& s) + { return acc + delim + s; }); } - static std::string upper(const std::string& in) + static std::string + upper (const std::string& in) { std::string dst = in; - std::transform(dst.begin(), dst.end(), dst.begin(), ::toupper); + std::transform (dst.begin (), dst.end (), dst.begin (), ::toupper); return dst; } - static std::string lower(const std::string& in) + static std::string + lower (const std::string& in) { std::string dst = in; - std::transform(dst.begin(), dst.end(), dst.begin(), ::tolower); + std::transform (dst.begin (), dst.end (), dst.begin (), ::tolower); return dst; } -private: + private: std::string fmt_; - template - static typename std::enable_if::value, const char*>::type filter(const T& s) { return s.c_str(); } - template - static typename std::enable_if::value, const T&>::type filter(const T& s) { return s; } + template + static typename std::enable_if::value, const char*>::type + filter (const T& s) + { + return s.c_str (); + } + template + static typename std::enable_if::value, const T&>::type + filter (const T& s) + { + return s; + } }; class FmtTable { -public: - enum class Align{LEFT, RIGHT, CENTER}; -private: + public: + enum class Align + { + LEFT, + RIGHT, + CENTER + }; + + private: typedef FmtCore core; - struct Alignments{ - Alignments(const Align& val = Align::RIGHT, const Align& title = Align::CENTER): val_(val), title_(title) {}; + struct Alignments + { + Alignments (const Align& val = Align::RIGHT, const Align& title = Align::CENTER) + : val_ (val), title_ (title) {}; Align val_, title_; // value and title alignments } aligns_; - struct Frames{ - Frames(char up = '-', char mid = '-', char dw = '-', char l = ' ', char r = ' '): up_(up), mid_(mid), dw_(dw), l_(l), r_(r) {}; - char up_, mid_ , dw_, l_, r_; // up, middle, down, left, right frames. up: the frame above title, middle: the one between title and data - } frames_; // down: the frame below data, left: the frame at left, right: the frame at right - struct Delimiters{ - Delimiters(char h = '-', char v = ' '): h_(h), v_(v) {}; + struct Frames + { + Frames (char up = '-', char mid = '-', char dw = '-', char l = ' ', char r = ' ') + : up_ (up), mid_ (mid), dw_ (dw), l_ (l), r_ (r) {}; + char up_, mid_, dw_, l_, r_; // up, middle, down, left, right frames. up: the frame above title, middle: the one + // between title and data + } frames_; // down: the frame below data, left: the frame at left, right: the frame at right + struct Delimiters + { + Delimiters (char h = '-', char v = ' ') : h_ (h), v_ (v) {}; char h_, v_; // horizontal and vertical delimiters } delimiters_; -public: + + public: /** * @brief Construct a new Fmt Table object - * + * * @param titles titles, its size should be the same as the number of columns * @param nrows number of rows * @param fmts format strings for each column, its size should be the same as the number of columns * @param indent indent for each column, default is 0 - * @param aligns Alignments instance, for alignment of values and titles, e.g. {Align::LEFT, Align::RIGHT} for left alignment of values and right alignment of titles - * @param frames Frames instance, can be constructed with initializer_list like {'-', '-', '-', ' ', ' '}, for up, middle, down, left and right frames - * @param delimiters Delimiters instance, can be constructed with initializer_list like {'-', ' '}, for horizontal and vertical delimiters + * @param aligns Alignments instance, for alignment of values and titles, e.g. {Align::LEFT, Align::RIGHT} for left + * alignment of values and right alignment of titles + * @param frames Frames instance, can be constructed with initializer_list like {'-', '-', '-', ' ', ' '}, for + * up, middle, down, left and right frames + * @param delimiters Delimiters instance, can be constructed with initializer_list like {'-', ' '}, for + * horizontal and vertical delimiters */ - FmtTable(const std::vector& titles, - const size_t nrows, - const std::vector& fmts, - const size_t indent = 0, - const Alignments& aligns = {}, - const Frames& frames = {}, - const Delimiters& delimiters = {}): - titles_(titles), data_(nrows, titles.size()), // data - fmts_(fmts), indent_(indent), aligns_(aligns), frames_(frames), delimiters_(delimiters) // styles - { assert(titles.size() == fmts.size()||titles.size() == 0); }; - ~FmtTable() {}; + FmtTable (const std::vector& titles, + const size_t nrows, + const std::vector& fmts, + const size_t indent = 0, + const Alignments& aligns = {}, + const Frames& frames = {}, + const Delimiters& delimiters = {}) + : titles_ (titles), data_ (nrows, titles.size ()), // data + fmts_ (fmts), indent_ (indent), aligns_ (aligns), frames_ (frames), delimiters_ (delimiters) // styles + { + assert (titles.size () == fmts.size () || titles.size () == 0); + }; + ~FmtTable () {}; /** * @brief import data from std::vector - * + * * @tparam T datatype of the data * @param src source data * @return FmtTable& itself */ - template - FmtTable& operator<<(const std::vector& src) + template + FmtTable& + operator<< (const std::vector& src) { // create a copy of source data, then format - std::vector data(src.size()); - for(size_t i = 0UL; i < src.size(); i++) { data[i] = core::format(fmts_[j_].c_str(), src[i]); } - set_value(0, j_, 'v', data); - j_ = (j_ + 1) % titles_.size(); + std::vector data (src.size ()); + for (size_t i = 0UL; i < src.size (); i++) + { + data[i] = core::format (fmts_[j_].c_str (), src[i]); + } + set_value (0, j_, 'v', data); + j_ = (j_ + 1) % titles_.size (); return *this; } /** * @brief Set the value object - * + * * @tparam T datatype of the data * @param i row index * @param j col index * @param value value to set */ - template - void set_value(const size_t& i, const size_t& j, const T& value) { data_(i, j) = core::format(fmts_[j].c_str(), value); } + template + void + set_value (const size_t& i, const size_t& j, const T& value) + { + data_ (i, j) = core::format (fmts_[j].c_str (), value); + } /** * @brief adjust the width of each column - * + * * @param col col to relax, organized as std::vector * @param title title of the column * @param vlyot value layout, can be Align::LEFT, Align::RIGHT, Align::CENTER * @param tlyot title layout, can be Align::LEFT, Align::RIGHT, Align::CENTER * @return std::vector newly relaxed column */ - std::vector relax_col_width(const std::vector& col, - const std::string& title = "", - const Align valign = Align::RIGHT, // because enum type would be the smallest integral type, so it is safe to pass by value - const Align talign = Align::CENTER) + std::vector + relax_col_width ( + const std::vector& col, + const std::string& title = "", + const Align valign + = Align::RIGHT, // because enum type would be the smallest integral type, so it is safe to pass by value + const Align talign = Align::CENTER) { - size_t max_width = title.size(); - for(const std::string& s : col) { max_width = std::max(max_width, s.size()); } - std::vector new_col(col.size() + 1); // the first is column title - for(size_t i = 0; i < col.size() + 1; i++) - { - new_col[i] = (i == 0)? FmtCore::strip(title): FmtCore::strip(col[i - 1]); - const size_t nwhitespaces = max_width - new_col[i].size(); - switch((i == 0)? talign: valign) + size_t max_width = title.size (); + for (const std::string& s: col) + { + max_width = std::max (max_width, s.size ()); + } + std::vector new_col (col.size () + 1); // the first is column title + for (size_t i = 0; i < col.size () + 1; i++) { - case Align::RIGHT: new_col[i] = std::string(nwhitespaces, ' ') + new_col[i]; break; - case Align::LEFT: new_col[i] += std::string(nwhitespaces, ' '); break; - case Align::CENTER: new_col[i] = FmtCore::center(new_col[i], max_width); break; + new_col[i] = (i == 0) ? FmtCore::strip (title) : FmtCore::strip (col[i - 1]); + const size_t nwhitespaces = max_width - new_col[i].size (); + switch ((i == 0) ? talign : valign) + { + case Align::RIGHT: + new_col[i] = std::string (nwhitespaces, ' ') + new_col[i]; + break; + case Align::LEFT: + new_col[i] += std::string (nwhitespaces, ' '); + break; + case Align::CENTER: + new_col[i] = FmtCore::center (new_col[i], max_width); + break; + } } - } return new_col; } /** * @brief concatenate titles into a string - * + * * @param titles titles to concatenate - * @return std::string + * @return std::string */ - std::string concat_title(const std::vector& titles) const + std::string + concat_title (const std::vector& titles) const { std::string dst = ""; // first sum width of all titles - size_t width = std::accumulate(titles.begin(), titles.end(), 0, [](const size_t& acc, const std::string& s) { return acc + s.size(); }); + size_t width = std::accumulate (titles.begin (), + titles.end (), + 0, + [] (const size_t& acc, const std::string& s) { return acc + s.size (); }); // add width of delimiters - width += titles.size() - 1; + width += titles.size () - 1; // add width of left and right frames width += 2; - dst += std::string(indent_, ' ') + std::string(width, frames_.up_) + "\n"; // first line: the upper frame - dst += std::string(indent_, ' ') + std::string(1, frames_.l_); // second line: the left frame + titles + right frame - for(size_t i = 0; i < titles.size(); i++) - { - dst += titles[i]; - if (i != titles.size() - 1) { - dst += delimiters_.v_; + dst += std::string (indent_, ' ') + std::string (width, frames_.up_) + "\n"; // first line: the upper frame + dst += std::string (indent_, ' ') + + std::string (1, frames_.l_); // second line: the left frame + titles + right frame + for (size_t i = 0; i < titles.size (); i++) + { + dst += titles[i]; + if (i != titles.size () - 1) + { + dst += delimiters_.v_; + } } - } - dst += std::string(1, frames_.r_) + "\n"; - dst += std::string(indent_, ' ') + std::string(width, frames_.mid_) + "\n"; // third line: the middle frame + dst += std::string (1, frames_.r_) + "\n"; + dst += std::string (indent_, ' ') + std::string (width, frames_.mid_) + "\n"; // third line: the middle frame return dst; } /** * @brief concatenate a row into a string - * + * * @param row row to concatenate * @param pos position, can be 't' for top, 'b' for bottom, 'n' for normal - * @return std::string + * @return std::string */ - std::string concat_row(const std::vector& row, const char& pos) const + std::string + concat_row (const std::vector& row, const char& pos) const { std::string dst = ""; // first sum width of all elements of the row - size_t width = std::accumulate(row.begin(), row.end(), 0, [](const size_t& acc, const std::string& s) { return acc + s.size(); }); + size_t width = std::accumulate (row.begin (), + row.end (), + 0, + [] (const size_t& acc, const std::string& s) { return acc + s.size (); }); // for the delimiters - width += row.size() - 1; + width += row.size () - 1; // for the left and right frame width += 2; - if (pos == 't') { // 't' for top - dst += std::string(indent_, ' ') + std::string(width, frames_.up_) + "\n"; - } - dst += std::string(indent_, ' ') + std::string(1, frames_.l_); - for(size_t i = 0; i < row.size(); i++) - { - dst += row[i]; - if (i != row.size() - 1) { - dst += delimiters_.v_; + if (pos == 't') + { // 't' for top + dst += std::string (indent_, ' ') + std::string (width, frames_.up_) + "\n"; + } + dst += std::string (indent_, ' ') + std::string (1, frames_.l_); + for (size_t i = 0; i < row.size (); i++) + { + dst += row[i]; + if (i != row.size () - 1) + { + dst += delimiters_.v_; + } + } + dst += std::string (1, frames_.r_) + "\n"; + if (pos == 'b') + { // 'b' for bottom + dst += std::string (indent_, ' ') + std::string (width, frames_.dw_) + "\n"; // the last line } - } - dst += std::string(1, frames_.r_) + "\n"; - if (pos == 'b') { // 'b' for bottom - dst += std::string(indent_, ' ') + std::string(width, frames_.dw_) + "\n"; // the last line - } return dst; } /** * @brief to get the table as a string - * - * @return std::string + * + * @return std::string */ - std::string str() + std::string + str () { std::string dst = ""; - const size_t nrows = data_.shape()[0]; - const size_t ncols = data_.shape()[1]; + const size_t nrows = data_.shape ()[0]; + const size_t ncols = data_.shape ()[1]; // if not all titles are empty, then with_title boolean will be true bool with_title = false; - for (auto& title: titles_) { - if (!title.empty()) { - with_title = true; - break; + for (auto& title: titles_) + { + if (!title.empty ()) + { + with_title = true; + break; + } } - } // first to relax each column - for(size_t j = 0UL; j < ncols; j++) - { - std::vector col(nrows); - for (size_t i = 0UL; i < nrows; i++) { - col[i] = data_(i, j); + for (size_t j = 0UL; j < ncols; j++) + { + std::vector col (nrows); + for (size_t i = 0UL; i < nrows; i++) + { + col[i] = data_ (i, j); + } + col = relax_col_width (col, titles_[j], aligns_.val_, aligns_.title_); + titles_[j] = col[0UL]; + std::vector col_new (col.begin () + 1, col.end ()); + set_value (0UL, j, 'v', col_new); } - col = relax_col_width(col, titles_[j], aligns_.val_, aligns_.title_); - titles_[j] = col[0UL]; - std::vector col_new(col.begin() + 1, col.end()); - set_value(0UL, j, 'v', col_new); - } // then print titles - if (with_title) { - dst += concat_title(titles_); - } + if (with_title) + { + dst += concat_title (titles_); + } // then print contents - for(size_t i = 0UL; i < nrows; i++) - { - std::vector row(ncols); - for (size_t j = 0; j < ncols; j++) { - row[j] = data_(i, j); + for (size_t i = 0UL; i < nrows; i++) + { + std::vector row (ncols); + for (size_t j = 0; j < ncols; j++) + { + row[j] = data_ (i, j); + } + dst += concat_row (row, ((i == 0UL) && !with_title) ? 't' : (i == nrows - 1) ? 'b' : 'n'); } - dst += concat_row(row, ((i == 0UL)&&!with_title)? 't': (i == nrows - 1)? 'b': 'n'); - } return dst; } - void str(const std::string& s) {}; + void str (const std::string& s) {}; // reuse - void iter_set(const size_t val) { j_ = val; } -private: + void + iter_set (const size_t val) + { + j_ = val; + } + + private: /** * @brief Set the value object from std::vector - * + * * @tparam T datatype of the data * @param i row index * @param j column index * @param dir direction, if 'v' then vertical, if 'h' then horizontal * @param src source data */ - template - void set_value(const size_t& i, const size_t& j, const char& dir, const std::vector& src) + template + void + set_value (const size_t& i, const size_t& j, const char& dir, const std::vector& src) { - if (dir == 'v') { - for (size_t k = 0UL; k < src.size(); k++) { - data_(i + k, j) = src[k]; + if (dir == 'v') + { + for (size_t k = 0UL; k < src.size (); k++) + { + data_ (i + k, j) = src[k]; + } } - } else if (dir == 'h') { - for (size_t k = 0UL; k < src.size(); k++) { - data_(j, i + k) = src[k]; + else if (dir == 'h') + { + for (size_t k = 0UL; k < src.size (); k++) + { + data_ (j, i + k) = src[k]; + } } - } } // iterator support indices size_t j_ = 0; std::vector titles_; - NDArray data_; // data + NDArray data_; // data std::vector fmts_; // format strings for each column - size_t indent_ = 0; // indent for each column + size_t indent_ = 0; // indent for each column }; #endif \ No newline at end of file diff --git a/source/source_base/gather_math_lib_info.cpp b/source/source_base/gather_math_lib_info.cpp index 3cd89a94ebb..4418e5ceaf5 100644 --- a/source/source_base/gather_math_lib_info.cpp +++ b/source/source_base/gather_math_lib_info.cpp @@ -13,68 +13,93 @@ Results will output to OUT/math_info.log, see ModuleBase::Global_File::make_dir_ #include -void zgemm_i(const char *transa, - const char *transb, - const int *m, - const int *n, - const int *k, - const std::complex *alpha, - const std::complex *a, - const int *lda, - const std::complex *b, - const int *ldb, - const std::complex *beta, - std::complex *c, - const int *ldc) +void + zgemm_i (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc) { - GlobalV::ofs_info.unsetf(std::ios_base::floatfield); - GlobalV::ofs_info << "zgemm " << *transa << " " << *transb << " " << *m << " " << *n << " " - << *k << " " << *alpha << " " << *lda << " " << *ldb << " " << *beta << " " << *ldc << std::endl; - zgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + GlobalV::ofs_info.unsetf (std::ios_base::floatfield); + GlobalV::ofs_info << "zgemm " << *transa << " " << *transb << " " << *m << " " << *n << " " << *k << " " << *alpha + << " " << *lda << " " << *ldb << " " << *beta << " " << *ldc << std::endl; + zgemm_ (transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void zaxpy_i(const int *N, - const std::complex *alpha, - const std::complex *X, - const int *incX, - std::complex *Y, - const int *incY) +void + zaxpy_i (const int* N, + const std::complex* alpha, + const std::complex* X, + const int* incX, + std::complex* Y, + const int* incY) { // std::cout << "zaxpy " << *N << std::endl; // alpha is a coefficient // incX, incY is always 1 - zaxpy_(N, alpha, X, incX, Y, incY); + zaxpy_ (N, alpha, X, incX, Y, incY); } -void zhegvx_i(const int *itype, - const char *jobz, - const char *range, - const char *uplo, - const int *n, - std::complex *a, - const int *lda, - std::complex *b, - const int *ldb, - const double *vl, - const double *vu, - const int *il, - const int *iu, - const double *abstol, - int *m, - double *w, - std::complex *z, - const int *ldz, - std::complex *work, - const int *lwork, - double *rwork, - int *iwork, - int *ifail, - int *info) +void + zhegvx_i (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + std::complex* z, + const int* ldz, + std::complex* work, + const int* lwork, + double* rwork, + int* iwork, + int* ifail, + int* info) { - GlobalV::ofs_info.unsetf(std::ios_base::floatfield); - GlobalV::ofs_info << "zhegvx " << *itype << " " << *jobz << " " << *range << " " << *uplo - << " " << *n << " " << *lda << " " << *ldb << " " << *vl << " " << *vu << " " << *il << " " << *iu - << " " << *abstol << " " << *m << " " << *lwork << " " << *info << std::endl; - zhegvx_(itype, jobz, range, uplo, n, a, lda, b, ldb, vl, vu, il, iu, abstol, m, w, z, ldz, work, lwork, rwork, - iwork, ifail, info); + GlobalV::ofs_info.unsetf (std::ios_base::floatfield); + GlobalV::ofs_info << "zhegvx " << *itype << " " << *jobz << " " << *range << " " << *uplo << " " << *n << " " + << *lda << " " << *ldb << " " << *vl << " " << *vu << " " << *il << " " << *iu << " " << *abstol + << " " << *m << " " << *lwork << " " << *info << std::endl; + zhegvx_ (itype, + jobz, + range, + uplo, + n, + a, + lda, + b, + ldb, + vl, + vu, + il, + iu, + abstol, + m, + w, + z, + ldz, + work, + lwork, + rwork, + iwork, + ifail, + info); } diff --git a/source/source_base/global_file.cpp b/source/source_base/global_file.cpp index ce3ce613c20..c12157af6f5 100644 --- a/source/source_base/global_file.cpp +++ b/source/source_base/global_file.cpp @@ -24,381 +24,408 @@ //---------------------------------------------------------- namespace ModuleBase { -void ModuleBase::Global_File::make_dir_out( - const std::string &suffix, - const std::string &calculation, - const bool &out_dir, - const bool &out_wfc_dir, - const int rank, - const bool &restart, - const bool out_alllog) +void + ModuleBase::Global_File::make_dir_out (const std::string& suffix, + const std::string& calculation, + const bool& out_dir, + const bool& out_wfc_dir, + const int rank, + const bool& restart, + const bool out_alllog) { -//---------------------------------------------------------- -// USE STL FUNCTION -// NAME : system -//---------------------------------------------------------- + //---------------------------------------------------------- + // USE STL FUNCTION + // NAME : system + //---------------------------------------------------------- #ifdef __MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier (MPI_COMM_WORLD); #endif int make_dir = 0; - // mohan update 2011-05-03 - //std::string command0 = "test -d " + PARAM.globalv.global_out_dir + " || mkdir " + PARAM.globalv.global_out_dir; - - int times = 0; - while(times0) {break; -} - ++times; - } - -#ifdef __MPI - if(make_dir==0) - { - std::cout << " CAN NOT MAKE THE OUT DIR......." << std::endl; - ModuleBase::QUIT(); - } - MPI_Barrier(MPI_COMM_WORLD); -#endif + // mohan update 2011-05-03 + // std::string command0 = "test -d " + PARAM.globalv.global_out_dir + " || mkdir " + PARAM.globalv.global_out_dir; - if(calculation == "md") - { - int make_dir_stru = 0; - //std::string command1 = "test -d " + PARAM.globalv.global_stru_dir + " || mkdir " + PARAM.globalv.global_stru_dir; - - times = 0; - while(times0) { break; -} + if (make_dir > 0) + { + break; + } ++times; } #ifdef __MPI - if(make_dir_stru==0) + if (make_dir == 0) { - std::cout << " CAN NOT MAKE THE STRU DIR......." << std::endl; - ModuleBase::QUIT(); + std::cout << " CAN NOT MAKE THE OUT DIR......." << std::endl; + ModuleBase::QUIT (); } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier (MPI_COMM_WORLD); #endif - } - - // make dir for HS matrix output in md calculation - if((out_dir) && calculation == "md") - { - int make_dir_matrix = 0; - //std::string command1 = "test -d " + PARAM.globalv.global_matrix_dir + " || mkdir " + PARAM.globalv.global_matrix_dir; - times = 0; - while(times 0) + { + break; + } + ++times; } - else + +#ifdef __MPI + if (make_dir_stru == 0) { - std::cout << " PROC " << rank << " CAN NOT MAKE THE MATRIX DIR !!! " << std::endl; - make_dir_matrix = 0; + std::cout << " CAN NOT MAKE THE STRU DIR......." << std::endl; + ModuleBase::QUIT (); } - } -#ifdef __MPI - Parallel_Reduce::reduce_all(make_dir_matrix); + MPI_Barrier (MPI_COMM_WORLD); #endif - if(make_dir_matrix>0) { break; -} - ++times; } -#ifdef __MPI - if(make_dir_matrix==0) + // make dir for HS matrix output in md calculation + if ((out_dir) && calculation == "md") { - std::cout << " CAN NOT MAKE THE MATRIX DIR......." << std::endl; - ModuleBase::QUIT(); - } - MPI_Barrier(MPI_COMM_WORLD); -#endif - } + int make_dir_matrix = 0; + // std::string command1 = "test -d " + PARAM.globalv.global_matrix_dir + " || mkdir " + + // PARAM.globalv.global_matrix_dir; - if(out_wfc_dir) - { - int make_dir_wfc = 0; - //std::string command1 = "test -d " + PARAM.globalv.global_wfc_dir + " || mkdir " + PARAM.globalv.global_wfc_dir; - - times = 0; - while(times 0) + { + break; + } + ++times; } - else + +#ifdef __MPI + if (make_dir_matrix == 0) { - std::cout << " PROC " << rank << " CAN NOT MAKE THE WFC DIR !!! " << std::endl; - make_dir_wfc = 0; + std::cout << " CAN NOT MAKE THE MATRIX DIR......." << std::endl; + ModuleBase::QUIT (); } - } -#ifdef __MPI - Parallel_Reduce::reduce_all(make_dir_wfc); + MPI_Barrier (MPI_COMM_WORLD); #endif - if(make_dir_wfc>0) { break; -} - ++times; } -#ifdef __MPI - if(make_dir_wfc==0) + if (out_wfc_dir) { - std::cout << " CAN NOT MAKE THE WFC DIR......." << std::endl; - ModuleBase::QUIT(); - } - MPI_Barrier(MPI_COMM_WORLD); -#endif - } - - if(PARAM.inp.of_ml_gene_data == 1) - { - int make_dir_descrip = 0; - //std::string command1 = "test -d " + PARAM.globalv.global_mlkedf_descriptor_dir + " || mkdir " + PARAM.globalv.global_mlkedf_descriptor_dir; + int make_dir_wfc = 0; + // std::string command1 = "test -d " + PARAM.globalv.global_wfc_dir + " || mkdir " + + // PARAM.globalv.global_wfc_dir; - times = 0; - while(times 0) + { + break; + } + ++times; } - else + +#ifdef __MPI + if (make_dir_wfc == 0) { - std::cout << " PROC " << rank << " CAN NOT MAKE THE MLKEDF DESCRIPTOR DIR !!! " << std::endl; - make_dir_descrip = 0; + std::cout << " CAN NOT MAKE THE WFC DIR......." << std::endl; + ModuleBase::QUIT (); } - } -#ifdef __MPI - Parallel_Reduce::reduce_all(make_dir_descrip); + MPI_Barrier (MPI_COMM_WORLD); #endif - if(make_dir_descrip > 0) - { - break; - } - ++times; } -#ifdef __MPI - if(make_dir_descrip == 0) + if (PARAM.inp.of_ml_gene_data == 1) { - std::cout << " CAN NOT MAKE THE MLKEDF DESCRIPTOR DIR......." << std::endl; - ModuleBase::QUIT(); - } - MPI_Barrier(MPI_COMM_WORLD); + int make_dir_descrip = 0; + // std::string command1 = "test -d " + PARAM.globalv.global_mlkedf_descriptor_dir + " || mkdir " + + // PARAM.globalv.global_mlkedf_descriptor_dir; + + times = 0; + while (times < GlobalV::NPROC) + { + if (rank == times) + { + int ret = mkdir (PARAM.globalv.global_mlkedf_descriptor_dir.c_str (), 0755); + if (ret == 0 || errno == EEXIST) + { + std::cout << " MAKE THE MLKEDF DESCRIPTOR DIR : " + << PARAM.globalv.global_mlkedf_descriptor_dir << std::endl; + make_dir_descrip = 1; + } + else + { + std::cout << " PROC " << rank << " CAN NOT MAKE THE MLKEDF DESCRIPTOR DIR !!! " + << std::endl; + make_dir_descrip = 0; + } + } +#ifdef __MPI + Parallel_Reduce::reduce_all (make_dir_descrip); #endif - } + if (make_dir_descrip > 0) + { + break; + } + ++times; + } - if(PARAM.inp.deepks_out_freq_elec > 0) - { - int make_dir_deepks_elec = 0; - //std::string command1 = "test -d " + PARAM.globalv.global_deepks_label_elec_dir + " || mkdir " + PARAM.globalv.global_deepks_label_elec_dir; +#ifdef __MPI + if (make_dir_descrip == 0) + { + std::cout << " CAN NOT MAKE THE MLKEDF DESCRIPTOR DIR......." << std::endl; + ModuleBase::QUIT (); + } + MPI_Barrier (MPI_COMM_WORLD); +#endif + } - times = 0; - while(times 0) { - if(rank==times) - { - int ret = mkdir(PARAM.globalv.global_deepks_label_elec_dir.c_str(), 0755); - if ( ret == 0 || errno == EEXIST ) + int make_dir_deepks_elec = 0; + // std::string command1 = "test -d " + PARAM.globalv.global_deepks_label_elec_dir + " || mkdir " + + // PARAM.globalv.global_deepks_label_elec_dir; + + times = 0; + while (times < GlobalV::NPROC) { - std::cout << " MAKE THE DEEPKS LABELS (ELEC) DIR : " << PARAM.globalv.global_deepks_label_elec_dir << std::endl; - make_dir_deepks_elec = 1; + if (rank == times) + { + int ret = mkdir (PARAM.globalv.global_deepks_label_elec_dir.c_str (), 0755); + if (ret == 0 || errno == EEXIST) + { + std::cout << " MAKE THE DEEPKS LABELS (ELEC) DIR : " + << PARAM.globalv.global_deepks_label_elec_dir << std::endl; + make_dir_deepks_elec = 1; + } + else + { + std::cout << " PROC " << rank << " CAN NOT MAKE THE DEEPKS LABELS (ELEC) DIR !!! " + << std::endl; + make_dir_deepks_elec = 0; + } + } +#ifdef __MPI + Parallel_Reduce::reduce_all (make_dir_deepks_elec); +#endif + if (make_dir_deepks_elec > 0) + { + break; + } + ++times; } - else + +#ifdef __MPI + if (make_dir_deepks_elec == 0) { - std::cout << " PROC " << rank << " CAN NOT MAKE THE DEEPKS LABELS (ELEC) DIR !!! " << std::endl; - make_dir_deepks_elec = 0; + std::cout << " CAN NOT MAKE THE DEEPKS LABELS (ELEC) DIR......." << std::endl; + ModuleBase::QUIT (); } - } -#ifdef __MPI - Parallel_Reduce::reduce_all(make_dir_deepks_elec); + MPI_Barrier (MPI_COMM_WORLD); #endif - if(make_dir_deepks_elec > 0) - { - break; - } - ++times; } -#ifdef __MPI - if(make_dir_deepks_elec == 0) + // mohan add 2010-09-12 + if (out_alllog) { - std::cout << " CAN NOT MAKE THE DEEPKS LABELS (ELEC) DIR......." << std::endl; - ModuleBase::QUIT(); + open_log (GlobalV::ofs_running, PARAM.globalv.log_file, calculation, restart); +#if defined(__CUDA) || defined(__ROCM) + open_log (GlobalV::ofs_device, "device" + std::to_string (rank) + ".log", calculation, restart); +#endif } - MPI_Barrier(MPI_COMM_WORLD); + else + { + if (rank == 0) + { + open_log (GlobalV::ofs_running, PARAM.globalv.log_file, calculation, restart); +#if defined(__CUDA) || defined(__ROCM) + open_log (GlobalV::ofs_device, "device.log", calculation, restart); #endif - } + } + } - // mohan add 2010-09-12 - if(out_alllog) - { - open_log(GlobalV::ofs_running, PARAM.globalv.log_file, calculation, restart); - #if defined(__CUDA) || defined(__ROCM) - open_log(GlobalV::ofs_device, "device" + std::to_string(rank) + ".log", calculation, restart); - #endif - } - else - { - if(rank==0) - { - open_log(GlobalV::ofs_running, PARAM.globalv.log_file, calculation, restart); - #if defined(__CUDA) || defined(__ROCM) - open_log(GlobalV::ofs_device, "device.log", calculation, restart); - #endif - } - } - - if(rank==0) - { - open_log(GlobalV::ofs_warning, "warning.log", calculation, restart); - } + if (rank == 0) + { + open_log (GlobalV::ofs_warning, "warning.log", calculation, restart); + } #ifdef GATHER_INFO - open_log(GlobalV::ofs_info, "math_info_" + std::to_string(rank) + ".log", calculation, restart); + open_log (GlobalV::ofs_info, "math_info_" + std::to_string (rank) + ".log", calculation, restart); #endif return; } -void ModuleBase::Global_File::make_dir_atom(const std::string &label) +void + ModuleBase::Global_File::make_dir_atom (const std::string& label) { -//---------------------------------------------------------- -// EXPLAIN : generate atom dir for each type of atom -//---------------------------------------------------------- + //---------------------------------------------------------- + // EXPLAIN : generate atom dir for each type of atom + //---------------------------------------------------------- std::stringstream ss; ss << PARAM.globalv.global_out_dir << label << "/"; - ModuleBase::GlobalFunc::MAKE_DIR(ss.str()); + ModuleBase::GlobalFunc::MAKE_DIR (ss.str ()); return; } -void ModuleBase::Global_File::open_log(std::ofstream &ofs, const std::string &fn, const std::string &calculation, const bool &restart) +void + ModuleBase::Global_File::open_log (std::ofstream& ofs, + const std::string& fn, + const std::string& calculation, + const bool& restart) { -//---------------------------------------------------------- -// USE GLOBAL VARIABLE : -// PARAM.globalv.global_out_dir : (default dir to store "*.log" file) -//---------------------------------------------------------- + //---------------------------------------------------------- + // USE GLOBAL VARIABLE : + // PARAM.globalv.global_out_dir : (default dir to store "*.log" file) + //---------------------------------------------------------- std::stringstream ss; ss << PARAM.globalv.global_out_dir << fn; - if(calculation == "md" && restart) - { - ofs.open(ss.str(), std::ios::app); - } + if (calculation == "md" && restart) + { + ofs.open (ss.str (), std::ios::app); + } else - { - ofs.open( ss.str() ); - } -// ofs << " WELCOME TO MESIA PROGRAM." << std::endl; -// ofs << " OPEN "< -void FUNC_EACH_2( - Ti & tA, - const Ti & tB, - std::function< void( Ti&, const Ti&, T_tail... ) > func, - const T_tail&... t_tail ) + +template +void + FUNC_EACH_2 (Ti& tA, const Ti& tB, std::function func, const T_tail&... t_tail) { - func( tA, tB, t_tail... ); + func (tA, tB, t_tail...); } - -template -void FUNC_EACH_2( - std::vector & tA, - const std::vector & tB, - std::function< void( Ti&, const Ti&, T_tail... ) > func, - const T_tail&... t_tail ) +template +void + FUNC_EACH_2 (std::vector& tA, + const std::vector& tB, + std::function func, + const T_tail&... t_tail) { - for( size_t i=0; i!=tA.size(); ++i ) - { - FUNC_EACH_2( tA[i], tB[i], func, t_tail... ); - } + for (size_t i = 0; i != tA.size (); ++i) + { + FUNC_EACH_2 (tA[i], tB[i], func, t_tail...); + } } - -template -void FUNC_EACH_2( - std::map & tA, - const std::map & tB, - std::function< void( Ti&, const Ti&, T_tail... ) > func, - const T_tail&... t_tail ) +template +void + FUNC_EACH_2 (std::map& tA, + const std::map& tB, + std::function func, + const T_tail&... t_tail) { - for( auto & ta : tA ) - { - FUNC_EACH_2( ta.second, tB.at(ta.first), func, t_tail... ); - } + for (auto& ta: tA) + { + FUNC_EACH_2 (ta.second, tB.at (ta.first), func, t_tail...); + } } -} -} +} // namespace GlobalFunc +} // namespace ModuleBase #endif // FUNC_EACH_2_H diff --git a/source/source_base/global_function.cpp b/source/source_base/global_function.cpp index 98e6e1ebf9f..c7a170f1b25 100644 --- a/source/source_base/global_function.cpp +++ b/source/source_base/global_function.cpp @@ -26,22 +26,25 @@ namespace ModuleBase namespace GlobalFunc { -void NOTE(const std::string &words) +void + NOTE (const std::string& words) { return; if (GlobalV::ofs_running) - { - // GlobalV::ofs_running << " *********************************************************************************" - // << std::endl; - GlobalV::ofs_running << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" - << std::endl; - GlobalV::ofs_running << " " << words << std::endl; - GlobalV::ofs_running << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" - << std::endl; - } + { + // GlobalV::ofs_running << " + // *********************************************************************************" + // << std::endl; + GlobalV::ofs_running << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + << std::endl; + GlobalV::ofs_running << " " << words << std::endl; + GlobalV::ofs_running << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + << std::endl; + } } -void NEW_PART(const std::string &words) +void + NEW_PART (const std::string& words) { GlobalV::ofs_running << "\n ><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><" << std::endl; GlobalV::ofs_running << "\n " << words << std::endl; @@ -53,9 +56,10 @@ void NEW_PART(const std::string &words) // GLOBAL FUNCTION : // NAME : OUT( output date for checking ) //========================================================== -void OUT(std::ofstream &ofs, const std::string &name) +void + OUT (std::ofstream& ofs, const std::string& name) { - ofs << "\n" << std::setw(18) << name << std::endl; + ofs << "\n" << std::setw (18) << name << std::endl; return; } @@ -63,135 +67,135 @@ void OUT(std::ofstream &ofs, const std::string &name) // GLOBAL FUNCTION : // NAME : MAKE_DIR( make dir ,using system function) //========================================================== -void MAKE_DIR(const std::string &fn) +void + MAKE_DIR (const std::string& fn) { // ModuleBase::TITLE("global_function","MAKE_DIR"); if (GlobalV::MY_RANK == 0) - { - int ret = mkdir(fn.c_str(), 0755); - if (ret != 0 && errno != EEXIST) { - ModuleBase::WARNING_QUIT("MAKE_DIR", fn); + int ret = mkdir (fn.c_str (), 0755); + if (ret != 0 && errno != EEXIST) + { + ModuleBase::WARNING_QUIT ("MAKE_DIR", fn); + } } - } return; } -void DONE(std::ofstream &ofs, const std::string &description, const bool only_rank0) +void + DONE (std::ofstream& ofs, const std::string& description, const bool only_rank0) { if (only_rank0) - { - if (GlobalV::MY_RANK == 0) { - // ofs << " ---------------------------------------------------------------------------------\n"; + if (GlobalV::MY_RANK == 0) + { + // ofs << " + // ---------------------------------------------------------------------------------\n"; + ofs << " DONE : " << description; + ofs << " Time : " << ModuleBase::timer::print_until_now () << " (SEC)"; + ofs << std::endl << std::endl; + // ofs << "\n + // ---------------------------------------------------------------------------------\n"; + } + } + else + { + // ofs << " ---------------------------------------------------------------------------------\n"; ofs << " DONE : " << description; - ofs << " Time : " << ModuleBase::timer::print_until_now() << " (SEC)"; + ofs << " Time : " << ModuleBase::timer::print_until_now () << " (SEC)"; ofs << std::endl << std::endl; - // ofs << "\n ---------------------------------------------------------------------------------\n"; + // ofs << "\n ---------------------------------------------------------------------------------\n"; } - } - else - { - // ofs << " ---------------------------------------------------------------------------------\n"; - ofs << " DONE : " << description; - ofs << " Time : " << ModuleBase::timer::print_until_now() << " (SEC)"; - ofs << std::endl << std::endl; - // ofs << "\n ---------------------------------------------------------------------------------\n"; - } // std::cout << "\n---------------------------------------------------------------------------------\n"; - std::cout << " DONE(" << std::setw(10) << ModuleBase::timer::print_until_now() << " SEC) : " << description + std::cout << " DONE(" << std::setw (10) << ModuleBase::timer::print_until_now () << " SEC) : " << description << std::endl; // std::cout << "\n---------------------------------------------------------------------------------\n"; return; } - -bool SCAN_BEGIN(std::ifstream &ifs, - const std::string &TargetName, - const bool restart, - const bool ifwarn) +bool + SCAN_BEGIN (std::ifstream& ifs, const std::string& TargetName, const bool restart, const bool ifwarn) { std::string SearchName; bool find = false; if (restart) - { - ifs.clear(); - ifs.seekg(0); - } - ifs.rdstate(); - while (ifs.good()) - { - ifs >> SearchName; - if (SearchName == TargetName) { - find = true; - break; + ifs.clear (); + ifs.seekg (0); + } + ifs.rdstate (); + while (ifs.good ()) + { + ifs >> SearchName; + if (SearchName == TargetName) + { + find = true; + break; + } } - } if (!find && ifwarn) - { - GlobalV::ofs_warning << " In SCAN_BEGIN, can't find: " << TargetName << " block." << std::endl; - } + { + GlobalV::ofs_warning << " In SCAN_BEGIN, can't find: " << TargetName << " block." << std::endl; + } return find; } - -bool SCAN_LINE_BEGIN(std::ifstream &ifs, - const std::string &TargetName, - const bool restart, - const bool ifwarn) +bool + SCAN_LINE_BEGIN (std::ifstream& ifs, const std::string& TargetName, const bool restart, const bool ifwarn) { bool find = false; if (restart) - { - ifs.clear(); - ifs.seekg(0); - } - ifs.rdstate(); + { + ifs.clear (); + ifs.seekg (0); + } + ifs.rdstate (); std::string line; - while (std::getline(ifs,line)) - { - //! obtain the first character, should not be # - size_t first_char_pos = line.find_first_not_of(" \t"); - if (first_char_pos != std::string::npos && line[first_char_pos] == '#') + while (std::getline (ifs, line)) { - continue; - } + //! obtain the first character, should not be # + size_t first_char_pos = line.find_first_not_of (" \t"); + if (first_char_pos != std::string::npos && line[first_char_pos] == '#') + { + continue; + } - //! search in each line - std::istringstream iss(line); - std::string SearchName; - while (iss >> SearchName) - { - if (SearchName == TargetName) - { - find = true; - //std::cout << " search name = " << SearchName << std::endl; - return find; - } - } - } + //! search in each line + std::istringstream iss (line); + std::string SearchName; + while (iss >> SearchName) + { + if (SearchName == TargetName) + { + find = true; + // std::cout << " search name = " << SearchName << std::endl; + return find; + } + } + } if (!find && ifwarn) - { - GlobalV::ofs_warning << " In SCAN_LINE_BEGIN, can't find: " << TargetName << " block." << std::endl; - } + { + GlobalV::ofs_warning << " In SCAN_LINE_BEGIN, can't find: " << TargetName << " block." << std::endl; + } return find; } -void SCAN_END(std::ifstream &ifs, const std::string &TargetName, const bool ifwarn) +void + SCAN_END (std::ifstream& ifs, const std::string& TargetName, const bool ifwarn) { std::string SearchName; ifs >> SearchName; if (SearchName != TargetName && ifwarn) - { - GlobalV::ofs_warning << " In SCAN_END, can't find: " << TargetName << " block." << std::endl; - } + { + GlobalV::ofs_warning << " In SCAN_END, can't find: " << TargetName << " block." << std::endl; + } return; } -void BLOCK_HERE(const std::string &description) +void + BLOCK_HERE (const std::string& description) { // return; std::cout << "\n********************************************"; @@ -200,65 +204,72 @@ void BLOCK_HERE(const std::string &description) std::cout << "\n********************************************" << std::endl; bool go_on = false; if (GlobalV::MY_RANK == 0) - { - std::cin >> go_on; - } + { + std::cin >> go_on; + } #ifdef __MPI int swap = go_on; if (GlobalV::MY_RANK == 0) swap = go_on; - MPI_Bcast(&swap, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast (&swap, 1, MPI_INT, 0, MPI_COMM_WORLD); if (GlobalV::MY_RANK != 0) - go_on = static_cast(swap); + go_on = static_cast (swap); #endif if (go_on) - { - return; - } + { + return; + } else - { - ModuleBase::QUIT(); - } + { + ModuleBase::QUIT (); + } } -void OUT_TIME(const std::string &name, time_t &start, time_t &end) +void + OUT_TIME (const std::string& name, time_t& start, time_t& end) { - double mini = difftime(end, start) / 60.0; + double mini = difftime (end, start) / 60.0; if (mini > 0.1) - { - if(GlobalV::ofs_warning) - { - GlobalV::ofs_warning << std::setprecision(2); - GlobalV::ofs_warning << " -------------------------------------------------------" << std::endl; - GlobalV::ofs_warning << " NAME < " << name << " > = " << std::endl; - GlobalV::ofs_warning << " -> " << ctime(&start) << " -> " << ctime(&end); - GlobalV::ofs_warning << " TIME = " << mini << " [Minutes]" << std::endl; - GlobalV::ofs_warning << " -------------------------------------------------------" << std::endl; - GlobalV::ofs_warning << std::setprecision(6); - } - } + { + if (GlobalV::ofs_warning) + { + GlobalV::ofs_warning << std::setprecision (2); + GlobalV::ofs_warning << " -------------------------------------------------------" << std::endl; + GlobalV::ofs_warning << " NAME < " << name << " > = " << std::endl; + GlobalV::ofs_warning << " -> " << ctime (&start) << " -> " << ctime (&end); + GlobalV::ofs_warning << " TIME = " << mini << " [Minutes]" << std::endl; + GlobalV::ofs_warning << " -------------------------------------------------------" << std::endl; + GlobalV::ofs_warning << std::setprecision (6); + } + } } -size_t MemAvailable() +size_t + MemAvailable () { size_t mem_sum = 0; int i = 0; - std::ifstream ifs("/proc/meminfo"); - while (ifs.good()) - { - std::string label, size, kB; - ifs >> label >> size >> kB; - if (label == "MemAvailable:") - return std::stol(size); - else if (label == "MemFree:" || label == "Buffers:" || label == "Cached:") + std::ifstream ifs ("/proc/meminfo"); + while (ifs.good ()) { - mem_sum += std::stol(size); - ++i; + std::string label, size, kB; + ifs >> label >> size >> kB; + if (label == "MemAvailable:") + { + return std::stol (size); + } + else if (label == "MemFree:" || label == "Buffers:" || label == "Cached:") + { + mem_sum += std::stol (size); + ++i; + } + if (i == 3) + { + return mem_sum; + } } - if (i == 3) return mem_sum; - } - throw std::runtime_error("read /proc/meminfo error in " + TO_STRING(__FILE__) + " line " + TO_STRING(__LINE__)); + throw std::runtime_error ("read /proc/meminfo error in " + TO_STRING (__FILE__) + " line " + TO_STRING (__LINE__)); } } // namespace GlobalFunc diff --git a/source/source_base/global_function.h b/source/source_base/global_function.h index 3d6f1e35be5..77e2fdc0a84 100644 --- a/source/source_base/global_function.h +++ b/source/source_base/global_function.h @@ -23,8 +23,8 @@ namespace ModuleBase namespace GlobalFunc { -void NOTE(const std::string& words); -void NEW_PART(const std::string& words); +void NOTE (const std::string& words); +void NEW_PART (const std::string& words); //========================================================== // GLOBAL FUNCTION : @@ -32,62 +32,68 @@ void NEW_PART(const std::string& words); // NAME : OUT( output date into file "ofs") // NAME : OUTP( output parameters ) //========================================================== -void OUT(std::ofstream& ofs, const std::string& name); +void OUT (std::ofstream& ofs, const std::string& name); template -void OUT(std::ofstream& ofs, const std::string& name, const T& a) +void + OUT (std::ofstream& ofs, const std::string& name, const T& a) { std::stringstream name2; name2 << name; - ofs << " " << std::setw(40) << name2.str() << " = " << a << std::endl; + ofs << " " << std::setw (40) << name2.str () << " = " << a << std::endl; // ofs << " " << name << a << std::endl; return; } template -void OUT(std::ofstream& ofs, const std::string& name, const T& x, const T& y) +void + OUT (std::ofstream& ofs, const std::string& name, const T& x, const T& y) { - ofs << " " << std::setw(40) << name << " = [ " << x << ", " << y << " ]" << std::endl; + ofs << " " << std::setw (40) << name << " = [ " << x << ", " << y << " ]" << std::endl; // ofs << " " << name << a << std::endl; return; } template -void OUT(std::ofstream& ofs, const std::string& name, const T& x, const T& y, const T& z) +void + OUT (std::ofstream& ofs, const std::string& name, const T& x, const T& y, const T& z) { - ofs << " " << std::setw(40) << name << " = [ " << x << ", " << y << ", " << z << " ]" << std::endl; + ofs << " " << std::setw (40) << name << " = [ " << x << ", " << y << ", " << z << " ]" << std::endl; return; } // output parameters and explanations template -void OUTP(std::ofstream& ofs, const std::string& name, const T& a, const std::string& explanation = "") +void + OUTP (std::ofstream& ofs, const std::string& name, const T& a, const std::string& explanation = "") { - ofs << std::setw(30) << name << " " << a << " #" << explanation << std::endl; + ofs << std::setw (30) << name << " " << a << " #" << explanation << std::endl; } template -void OUT(const std::string& name, const T& a) +void + OUT (const std::string& name, const T& a) { - std::cout << " " << std::setw(40) << name << " = " << a << std::endl; + std::cout << " " << std::setw (40) << name << " = " << a << std::endl; // std::cout << " " << name << a << std::endl; return; } -void OUT_TIME(const std::string& name, time_t& start, time_t& end); +void OUT_TIME (const std::string& name, time_t& start, time_t& end); //========================================================== // GLOBAL FUNCTION : // NAME : MAKE_DIR( make dir ,using system function) //========================================================== -void MAKE_DIR(const std::string& file); +void MAKE_DIR (const std::string& file); //========================================================== // GLOBAL FUNCTION : // NAME : AUTO_SET( auto_set variables ) //========================================================== template -void AUTO_SET(const std::string& name, const T& a) +void + AUTO_SET (const std::string& name, const T& a) { GlobalV::ofs_warning << " AUTO_SET " << name << " to " << a << std::endl; return; @@ -98,7 +104,7 @@ void AUTO_SET(const std::string& name, const T& a) // NAME : DONE( ouput information(time) on screen and log) // we can regard it as a milestone. //========================================================== -void DONE(std::ofstream& ofs, const std::string& description, bool only_rank0 = false); +void DONE (std::ofstream& ofs, const std::string& description, bool only_rank0 = false); //========================================================== // GLOBAL FUNCTION : @@ -106,24 +112,26 @@ void DONE(std::ofstream& ofs, const std::string& description, bool only_rank0 = // set elements of u as zero which u is 1_d std::complex array //========================================================== template -inline void ZEROS(std::complex* u, const TI n) // Peize Lin change int to TI at 2020.03.03 +inline void + ZEROS (std::complex* u, const TI n) // Peize Lin change int to TI at 2020.03.03 { - assert(n >= 0); + assert (n >= 0); for (TI i = 0; i < n; i++) - { - u[i] = std::complex(0.0, 0.0); - } + { + u[i] = std::complex (0.0, 0.0); + } return; } template -inline void ZEROS(T* u, const TI n) // Peize Lin change int to TI at 2020.03.03 +inline void + ZEROS (T* u, const TI n) // Peize Lin change int to TI at 2020.03.03 { - assert(n >= 0); + assert (n >= 0); for (TI i = 0; i < n; i++) - { - u[i] = 0; - } + { + u[i] = 0; + } } //========================================================== @@ -131,74 +139,80 @@ inline void ZEROS(T* u, const TI n) // Peize Lin change int to TI at 2020.03.03 // NAME : TEST_LEVEL // control the test_level //========================================================== -void TEST_LEVEL(const std::string& name, bool disable); +void TEST_LEVEL (const std::string& name, bool disable); //========================================================== // GLOBAL FUNCTION : //========================================================== template -static void READ_VALUE(std::ifstream& ifs, T& v) +static void + READ_VALUE (std::ifstream& ifs, T& v) { ifs >> v; std::string line; - getline(ifs, line); + getline (ifs, line); return; } //------------------------------------------------------------- -//! The `SCAN_BEGIN` function efficiently searches -//! text files for specified keywords +//! The `SCAN_BEGIN` function efficiently searches +//! text files for specified keywords //------------------------------------------------------------- -bool SCAN_BEGIN(std::ifstream& ifs, - const std::string& TargetName, - const bool restart = true, - const bool ifwarn = true); +bool + SCAN_BEGIN (std::ifstream& ifs, const std::string& TargetName, const bool restart = true, const bool ifwarn = true); //------------------------------------------------------------- -// The `SCAN_LINE_BEGIN` function efficiently searches +// The `SCAN_LINE_BEGIN` function efficiently searches // text files for specified keywords while ignoring comment -// lines and whitespace. It skips any line starting with '#' +// lines and whitespace. It skips any line starting with '#' //------------------------------------------------------------- -bool SCAN_LINE_BEGIN(std::ifstream& ifs, - const std::string& TargetName, - const bool restart = true, - const bool ifwarn = true); +bool SCAN_LINE_BEGIN (std::ifstream& ifs, + const std::string& TargetName, + const bool restart = true, + const bool ifwarn = true); -void SCAN_END(std::ifstream& ifs, const std::string& TargetName, const bool ifwarn = true); +void SCAN_END (std::ifstream& ifs, const std::string& TargetName, const bool ifwarn = true); template -static inline void DCOPY(const T& a, T& b, const int& dim) +static inline void + DCOPY (const T& a, T& b, const int& dim) { - for (int i = 0; i < dim; ++i) { - b[i] = a[i]; - } + for (int i = 0; i < dim; ++i) + { + b[i] = a[i]; + } } template -inline void DCOPY(const T* a, T* b, const int& dim) { - for (int i = 0; i < dim; ++i) { - b[i] = a[i]; - } +inline void + DCOPY (const T* a, T* b, const int& dim) +{ + for (int i = 0; i < dim; ++i) + { + b[i] = a[i]; + } } template -inline void COPYARRAY(const T* a, T* b, const int dim); +inline void COPYARRAY (const T* a, T* b, const int dim); template <> -inline void COPYARRAY(const std::complex* a, std::complex* b, const int dim) +inline void + COPYARRAY (const std::complex* a, std::complex* b, const int dim) { const int one = 1; - zcopy_(&dim, a, &one, b, &one); + zcopy_ (&dim, a, &one, b, &one); } template <> -inline void COPYARRAY(const double* a, double* b, const int dim) +inline void + COPYARRAY (const double* a, double* b, const int dim) { const int one = 1; - dcopy_(&dim, a, &one, b, &one); + dcopy_ (&dim, a, &one, b, &one); } -void BLOCK_HERE(const std::string& description); +void BLOCK_HERE (const std::string& description); //========================================================== // GLOBAL FUNCTION : @@ -207,23 +221,27 @@ void BLOCK_HERE(const std::string& description); // Peize Lin add 2016-02-25 //========================================================== template -static inline T* VECTOR_TO_PTR(std::vector& v) +static inline T* + VECTOR_TO_PTR (std::vector& v) { return &(v[0]); } template -static inline T* VECTOR_TO_PTR(std::valarray& v) +static inline T* + VECTOR_TO_PTR (std::valarray& v) { return &(v[0]); } template -static inline const T* VECTOR_TO_PTR(const std::vector& v) +static inline const T* + VECTOR_TO_PTR (const std::vector& v) { return &(v[0]); } template -static inline const T* VECTOR_TO_PTR(const std::valarray& v) +static inline const T* + VECTOR_TO_PTR (const std::valarray& v) { return &(v[0]); } @@ -236,11 +254,12 @@ static inline const T* VECTOR_TO_PTR(const std::valarray& v) // Peize Lin add 2016-07-18 //========================================================== template -std::string TO_STRING(const T& t, const int n=20) // n=20 since LDBL_EPSILON is 1E-16 or 1E-19 +std::string + TO_STRING (const T& t, const int n = 20) // n=20 since LDBL_EPSILON is 1E-16 or 1E-19 { std::stringstream newstr; - newstr << std::setprecision(n) << t; - return newstr.str(); + newstr << std::setprecision (n) << t; + return newstr.str (); } //========================================================== @@ -252,43 +271,51 @@ std::string TO_STRING(const T& t, const int n=20) // n=20 since LDBL_EPSILON is // Peize Lin add 2018-07-16 //========================================================== template -inline void* MAP_EXIST(T_map& ms, const T_key1& key1) +inline void* + MAP_EXIST (T_map& ms, const T_key1& key1) { - auto ms1 = ms.find(key1); - if (ms1 == ms.end()) { - return nullptr; - } - return static_cast(&ms1->second); + auto ms1 = ms.find (key1); + if (ms1 == ms.end ()) + { + return nullptr; + } + return static_cast (&ms1->second); } template -inline void* MAP_EXIST(T_map& ms, const T_key1& key1, const T_key_tail&... key_tail) +inline void* + MAP_EXIST (T_map& ms, const T_key1& key1, const T_key_tail&... key_tail) { - auto ms1 = ms.find(key1); - if (ms1 == ms.end()) { - return nullptr; - } - return MAP_EXIST(ms1->second, key_tail...); + auto ms1 = ms.find (key1); + if (ms1 == ms.end ()) + { + return nullptr; + } + return MAP_EXIST (ms1->second, key_tail...); } template -inline const void* MAP_EXIST(const T_map& ms, const T_key1& key1) +inline const void* + MAP_EXIST (const T_map& ms, const T_key1& key1) { - auto ms1 = ms.find(key1); - if (ms1 == ms.end()) { - return nullptr; - } - return static_cast(&ms1->second); + auto ms1 = ms.find (key1); + if (ms1 == ms.end ()) + { + return nullptr; + } + return static_cast (&ms1->second); } template -inline const void* MAP_EXIST(const T_map& ms, const T_key1& key1, const T_key_tail&... key_tail) +inline const void* + MAP_EXIST (const T_map& ms, const T_key1& key1, const T_key_tail&... key_tail) { - auto ms1 = ms.find(key1); - if (ms1 == ms.end()) { - return nullptr; - } - return MAP_EXIST(ms1->second, key_tail...); + auto ms1 = ms.find (key1); + if (ms1 == ms.end ()) + { + return nullptr; + } + return MAP_EXIST (ms1->second, key_tail...); } //========================================================== @@ -298,7 +325,7 @@ inline const void* MAP_EXIST(const T_map& ms, const T_key1& key1, const T_key_ta // unit: kB // Peize Lin add 2019-12-21 //========================================================== -size_t MemAvailable(); +size_t MemAvailable (); //========================================================== // GLOBAL FUNCTION : @@ -315,17 +342,20 @@ size_t MemAvailable(); // Peize Lin add 2021-05-09 //========================================================== template -static inline void DELETE_MUL_PTR(T_element* v) +static inline void + DELETE_MUL_PTR (T_element* v) { delete[] v; v = nullptr; } template -static inline void DELETE_MUL_PTR(T_element* v, const T_N_first N_first, const T_N_tail... N_tail) +static inline void + DELETE_MUL_PTR (T_element* v, const T_N_first N_first, const T_N_tail... N_tail) { - for (T_N_first i = 0; i < N_first; ++i) { - DELETE_MUL_PTR(v[i], N_tail...); - } + for (T_N_first i = 0; i < N_first; ++i) + { + DELETE_MUL_PTR (v[i], N_tail...); + } delete[] v; v = nullptr; } @@ -345,30 +375,34 @@ static inline void DELETE_MUL_PTR(T_element* v, const T_N_first N_first, const T // Peize Lin add 2021-05-09 //========================================================== template -static inline void FREE_MUL_PTR(T_element* v) +static inline void + FREE_MUL_PTR (T_element* v) { - free(v); + free (v); v = nullptr; } template -static inline void FREE_MUL_PTR(T_element* v, const T_N_first N_first, const T_N_tail... N_tail) +static inline void + FREE_MUL_PTR (T_element* v, const T_N_first N_first, const T_N_tail... N_tail) { - for (T_N_first i = 0; i < N_first; ++i) { - FREE_MUL_PTR(v[i], N_tail...); - } - free(v); + for (T_N_first i = 0; i < N_first; ++i) + { + FREE_MUL_PTR (v[i], N_tail...); + } + free (v); v = nullptr; } template -T ddot_real(const int& dim, const std::complex* psi_L, const std::complex* psi_R, const bool reduce = true); +T ddot_real (const int& dim, const std::complex* psi_L, const std::complex* psi_R, const bool reduce = true); //========================================================== // GLOBAL FUNCTION : // NAME : IS_COLUMN_MAJOR_KS_SOLVER // check ks_solver requires column major or not //========================================================== -static inline bool IS_COLUMN_MAJOR_KS_SOLVER(std::string ks_solver) +static inline bool + IS_COLUMN_MAJOR_KS_SOLVER (std::string ks_solver) { return ks_solver == "genelpa" || ks_solver == "elpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cusolvermp" || ks_solver == "cg_in_lcao" || ks_solver == "pexsi" || ks_solver == "lapack"; diff --git a/source/source_base/global_function_ddotreal.cpp b/source/source_base/global_function_ddotreal.cpp index b38dabd1de2..64a9616316f 100644 --- a/source/source_base/global_function_ddotreal.cpp +++ b/source/source_base/global_function_ddotreal.cpp @@ -7,17 +7,14 @@ namespace ModuleBase namespace GlobalFunc { -template double ddot_real(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce); -template float ddot_real(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce); +template double + ddot_real (const int& dim, const std::complex* psi_L, const std::complex* psi_R, const bool reduce); +template float + ddot_real (const int& dim, const std::complex* psi_L, const std::complex* psi_R, const bool reduce); template -T ddot_real(const int& dim, const std::complex* psi_L, const std::complex* psi_R, const bool reduce) +T + ddot_real (const int& dim, const std::complex* psi_L, const std::complex* psi_R, const bool reduce) { //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // qianrui modify 2021-3-14 @@ -26,9 +23,11 @@ T ddot_real(const int& dim, const std::complex* psi_L, const std::complex* T *pL, *pR; pL = (T*)psi_L; pR = (T*)psi_R; - T result = BlasConnector::dot(dim2, pL, 1, pR, 1); + T result = BlasConnector::dot (dim2, pL, 1, pR, 1); if (reduce) - Parallel_Reduce::reduce_pool(result); + { + Parallel_Reduce::reduce_pool (result); + } return result; //====================================================================== /*std::complex result(0,0); diff --git a/source/source_base/global_variable.cpp b/source/source_base/global_variable.cpp index 71f41c1b4ba..b7cb9df916e 100644 --- a/source/source_base/global_variable.cpp +++ b/source/source_base/global_variable.cpp @@ -16,8 +16,8 @@ namespace GlobalV // EXPLAIN : Parallel information //---------------------------------------------------------- -int NPROC = 1; ///< global number of process -int KPAR = 1; ///< global number of pools +int NPROC = 1; ///< global number of process +int KPAR = 1; ///< global number of pools int MY_RANK = 0; ///< global index of process int MY_POOL = 0; ///< global index of pool (count in pool) int MY_BNDGROUP = 0; diff --git a/source/source_base/gram_schmidt_orth-inl.h b/source/source_base/gram_schmidt_orth-inl.h index 809f9fbb391..82c2b711cd6 100644 --- a/source/source_base/gram_schmidt_orth-inl.h +++ b/source/source_base/gram_schmidt_orth-inl.h @@ -14,94 +14,113 @@ namespace ModuleBase { -template -Gram_Schmidt_Orth::Gram_Schmidt_Orth( const std::vector &rab_in, const Coordinate &coordinate_in ) - :rab(rab_in), - coordinate(coordinate_in) -{ - if( Coordinate::Sphere == coordinate ) - { - std::vector radial( rab.size() ); - radial[0] = 0; - for( int ir=1; ir!=radial.size(); ++ir ) - radial[ir] = radial[ir-1] + rab[ir-1]; - this->radial_2 = Mathzone::Pointwise_Product( radial, radial ); - } +template +Gram_Schmidt_Orth::Gram_Schmidt_Orth (const std::vector& rab_in, + const Coordinate& coordinate_in) + : rab (rab_in), coordinate (coordinate_in) +{ + if (Coordinate::Sphere == coordinate) + { + std::vector radial (rab.size ()); + radial[0] = 0; + for (int ir = 1; ir != radial.size (); ++ir) + { + radial[ir] = radial[ir - 1] + rab[ir - 1]; + } + this->radial_2 = Mathzone::Pointwise_Product (radial, radial); + } } -template -std::vector> Gram_Schmidt_Orth::cal_orth( - const std::vector> &func, - const Func_Type norm_threshold ) +template +std::vector> + Gram_Schmidt_Orth::cal_orth (const std::vector>& func, + const Func_Type norm_threshold) { - // Schmidt: hn to en - // e1 = h1 / ||h1|| - // gn = hn - \sum{i=1 to n-1}(hn,ei)ei - // en = gn / ||gn|| - - std::vector> func_new; - - for( size_t if1=0; if1!=func.size(); ++if1 ) - { - //use CGS2 algorithm to do twice orthogonalization - //DOI 10.1007/s00211-005-0615-4 - std::vector func_try = func[if1]; - for(int niter=0;niter<3;niter++) - { - std::vector func_tmp = func_try; - for( size_t if_minus=0; if_minus!=func_new.size(); ++if_minus ) - { - // (hn,ei) - const std::vector && mul_func = Mathzone::Pointwise_Product( func_tmp, func_new[if_minus] ); - const Func_Type in_product = cal_norm(mul_func); + // Schmidt: hn to en + // e1 = h1 / ||h1|| + // gn = hn - \sum{i=1 to n-1}(hn,ei)ei + // en = gn / ||gn|| + + std::vector> func_new; + + for (size_t if1 = 0; if1 != func.size (); ++if1) + { + // use CGS2 algorithm to do twice orthogonalization + // DOI 10.1007/s00211-005-0615-4 + std::vector func_try = func[if1]; + for (int niter = 0; niter < 3; niter++) + { + std::vector func_tmp = func_try; + for (size_t if_minus = 0; if_minus != func_new.size (); ++if_minus) + { + // (hn,ei) + const std::vector&& mul_func + = Mathzone::Pointwise_Product (func_tmp, func_new[if_minus]); + const Func_Type in_product = cal_norm (mul_func); - // hn - (hn,ei)ei - BlasConnector::axpy( mul_func.size(), -in_product, ModuleBase::GlobalFunc::VECTOR_TO_PTR(func_new[if_minus]), 1, ModuleBase::GlobalFunc::VECTOR_TO_PTR(func_try), 1); - } - } - - // ||gn|| - const std::vector && func_2 = Mathzone::Pointwise_Product( func_try, func_try ); - const Func_Type norm = sqrt(cal_norm(func_2)); - - // en = gn / ||gn|| - // if ||gn|| too small, filter out - if( norm >= norm_threshold ) - { - BlasConnector::scal( func_try.size(), 1.0/norm, ModuleBase::GlobalFunc::VECTOR_TO_PTR(func_try), 1 ); - func_new.push_back( func_try ); - } - } - return func_new; + // hn - (hn,ei)ei + BlasConnector::axpy (mul_func.size (), + -in_product, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (func_new[if_minus]), + 1, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (func_try), + 1); + } + } + + // ||gn|| + const std::vector&& func_2 = Mathzone::Pointwise_Product (func_try, func_try); + const Func_Type norm = sqrt (cal_norm (func_2)); + + // en = gn / ||gn|| + // if ||gn|| too small, filter out + if (norm >= norm_threshold) + { + BlasConnector::scal (func_try.size (), + 1.0 / norm, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (func_try), + 1); + func_new.push_back (func_try); + } + } + return func_new; } // cal ||f|| -template -Func_Type Gram_Schmidt_Orth::cal_norm( const std::vector &f ) +template +Func_Type + Gram_Schmidt_Orth::cal_norm (const std::vector& f) { - Func_Type norm = 0.0; - switch( this->coordinate ) - { - case Coordinate::Cartesian: - { - Integral::Simpson_Integral( f.size(), ModuleBase::GlobalFunc::VECTOR_TO_PTR(f), ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab), norm); - break; - } - case Coordinate::Sphere: - { - const std::vector &&tmp_func = Mathzone::Pointwise_Product( f, radial_2 ); - Integral::Simpson_Integral( f.size(), ModuleBase::GlobalFunc::VECTOR_TO_PTR(tmp_func), ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab), norm); - break; - } - default: - { - throw std::invalid_argument("coordinate must be Cartesian or Sphere "+std::string(__FILE__)+" line "+std::to_string(__LINE__)); - break; - } - } - return norm; + Func_Type norm = 0.0; + switch (this->coordinate) + { + case Coordinate::Cartesian: + { + Integral::Simpson_Integral (f.size (), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (f), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (rab), + norm); + break; + } + case Coordinate::Sphere: + { + const std::vector&& tmp_func = Mathzone::Pointwise_Product (f, radial_2); + Integral::Simpson_Integral (f.size (), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (tmp_func), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (rab), + norm); + break; + } + default: + { + throw std::invalid_argument ("coordinate must be Cartesian or Sphere " + std::string (__FILE__) + + " line " + std::to_string (__LINE__)); + break; + } + } + return norm; } -} +} // namespace ModuleBase -#endif // GRAM_SCHMIDT_ORTH_INL_H +#endif // GRAM_SCHMIDT_ORTH_INL_H diff --git a/source/source_base/gram_schmidt_orth.h b/source/source_base/gram_schmidt_orth.h index 897e3c7a98d..0c3ac1c2ccd 100644 --- a/source/source_base/gram_schmidt_orth.h +++ b/source/source_base/gram_schmidt_orth.h @@ -6,33 +6,34 @@ #ifndef GRAM_SCHMIDT_ORTH_H #define GRAM_SCHMIDT_ORTH_H -#include -#include +#include +#include namespace ModuleBase { -template +template class Gram_Schmidt_Orth { -public: + public: + enum class Coordinate + { + Cartesian, + Sphere + }; - enum class Coordinate { Cartesian, Sphere }; - - Gram_Schmidt_Orth( const std::vector &rab, const Coordinate &coordinate ); - - std::vector> cal_orth( - const std::vector> &func, - const Func_Type norm_threshold = std::numeric_limits::min() ); - -private: + Gram_Schmidt_Orth (const std::vector& rab, const Coordinate& coordinate); - const Coordinate coordinate; - const std::vector &rab; - std::vector radial_2; + std::vector> cal_orth (const std::vector>& func, + const Func_Type norm_threshold + = std::numeric_limits::min ()); - Func_Type cal_norm( const std::vector &f ); + private: + const Coordinate coordinate; + const std::vector& rab; + std::vector radial_2; + Func_Type cal_norm (const std::vector& f); }; -} -#endif // GRAM_SCHMIDT_ORTH_H \ No newline at end of file +} // namespace ModuleBase +#endif // GRAM_SCHMIDT_ORTH_H \ No newline at end of file diff --git a/source/source_base/intarray.cpp b/source/source_base/intarray.cpp index 7c9cc0e71d9..39cdc31a1ce 100644 --- a/source/source_base/intarray.cpp +++ b/source/source_base/intarray.cpp @@ -3,59 +3,60 @@ namespace ModuleBase { -void IntArrayAlloc() +void + IntArrayAlloc () { std::cout << "\n Allocation error for IntArray " << std::endl; - exit(0); + exit (0); } -IntArray::IntArray(const int d1,const int d2) +IntArray::IntArray (const int d1, const int d2) { dim = 2; bound1 = (d1 <= 0) ? 1 : d1; bound2 = (d2 <= 0) ? 1 : d2; bound3 = bound4 = bound5 = bound6 = 0; size = bound1 * bound2; - try - { - ptr = new int[size]; - zero_out(); - } + try + { + ptr = new int[size]; + zero_out (); + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error for IntArray: " << e.what() << std::endl; - ptr = nullptr; - size = 0; - throw; - } - assert( ptr != nullptr); + { + std::cerr << "Allocation error for IntArray: " << e.what () << std::endl; + ptr = nullptr; + size = 0; + throw; + } + assert (ptr != nullptr); } -IntArray::IntArray(const int d1,const int d2,const int d3) +IntArray::IntArray (const int d1, const int d2, const int d3) { dim = 3; bound1 = (d1 <= 0) ? 1 : d1; bound2 = (d2 <= 0) ? 1 : d2; bound3 = (d3 <= 0) ? 1 : d3; bound4 = bound5 = bound6 = 0; - //set_new_handler(IntArrayAlloc); - size = bound1 * bound2 * bound3 ; //* sizeof(float); - try - { - ptr = new int[size]; - zero_out(); - } + // set_new_handler(IntArrayAlloc); + size = bound1 * bound2 * bound3; //* sizeof(float); + try + { + ptr = new int[size]; + zero_out (); + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error for IntArray: " << e.what() << std::endl; - ptr = nullptr; - size = 0; - throw; - } - assert(ptr != nullptr); + { + std::cerr << "Allocation error for IntArray: " << e.what () << std::endl; + ptr = nullptr; + size = 0; + throw; + } + assert (ptr != nullptr); } -IntArray::IntArray(const int d1,const int d2,const int d3,const int d4) +IntArray::IntArray (const int d1, const int d2, const int d3, const int d4) { dim = 4; bound1 = (d1 <= 0) ? 1 : d1; @@ -63,25 +64,24 @@ IntArray::IntArray(const int d1,const int d2,const int d3,const int d4) bound3 = (d3 <= 0) ? 1 : d3; bound4 = (d4 <= 0) ? 1 : d4; bound5 = bound6 = 0; - //set_new_handler(IntArrayAlloc); - size = bound1 * bound2 * bound3 * bound4 ; //* sizeof(float); - try - { - ptr = new int[size]; - zero_out(); - } + // set_new_handler(IntArrayAlloc); + size = bound1 * bound2 * bound3 * bound4; //* sizeof(float); + try + { + ptr = new int[size]; + zero_out (); + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error for IntArray: " << e.what() << std::endl; - ptr = nullptr; - size = 0; - throw; - } - assert(ptr != nullptr); + { + std::cerr << "Allocation error for IntArray: " << e.what () << std::endl; + ptr = nullptr; + size = 0; + throw; + } + assert (ptr != nullptr); } -IntArray::IntArray(const int d1,const int d2,const int d3, - const int d4,const int d5) +IntArray::IntArray (const int d1, const int d2, const int d3, const int d4, const int d5) { dim = 5; bound1 = (d1 <= 0) ? 1 : d1; @@ -89,25 +89,24 @@ IntArray::IntArray(const int d1,const int d2,const int d3, bound3 = (d3 <= 0) ? 1 : d3; bound4 = (d4 <= 0) ? 1 : d4; bound5 = (d5 <= 0) ? 1 : d5; - //set_new_handler(IntArrayAlloc); + // set_new_handler(IntArrayAlloc); size = bound1 * bound2 * bound3 * bound4 * bound5; - try - { - ptr = new int[size]; - zero_out(); - } + try + { + ptr = new int[size]; + zero_out (); + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error for IntArray: " << e.what() << std::endl; - ptr = nullptr; - size = 0; - throw; - } - assert(ptr != nullptr); + { + std::cerr << "Allocation error for IntArray: " << e.what () << std::endl; + ptr = nullptr; + size = 0; + throw; + } + assert (ptr != nullptr); } -IntArray::IntArray(const int d1,const int d2,const int d3, - const int d4,const int d5,const int d6) +IntArray::IntArray (const int d1, const int d2, const int d3, const int d4, const int d5, const int d6) { dim = 6; bound1 = (d1 <= 0) ? 1 : d1; @@ -116,25 +115,25 @@ IntArray::IntArray(const int d1,const int d2,const int d3, bound4 = (d4 <= 0) ? 1 : d4; bound5 = (d5 <= 0) ? 1 : d5; bound6 = (d6 <= 0) ? 1 : d6; - //set_new_handler(IntArrayAlloc); + // set_new_handler(IntArrayAlloc); size = bound1 * bound2 * bound3 * bound4 * bound5 * bound6; - try - { - ptr = new int[size]; - zero_out(); - } + try + { + ptr = new int[size]; + zero_out (); + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error for IntArray: " << e.what() << std::endl; - ptr = nullptr; - size = 0; - throw; - } - assert(ptr != nullptr); + { + std::cerr << "Allocation error for IntArray: " << e.what () << std::endl; + ptr = nullptr; + size = 0; + throw; + } + assert (ptr != nullptr); } // Copy constructor -IntArray::IntArray(const IntArray& other) +IntArray::IntArray (const IntArray& other) { size = other.size; dim = other.dim; @@ -144,35 +143,28 @@ IntArray::IntArray(const IntArray& other) bound4 = other.bound4; bound5 = other.bound5; bound6 = other.bound6; - try - { - ptr = new int[size]; - for (int i = 0; i < size; i++) + try { - ptr[i] = other.ptr[i]; + ptr = new int[size]; + for (int i = 0; i < size; i++) + { + ptr[i] = other.ptr[i]; + } } - } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error in IntArray copy constructor: " << e.what() << std::endl; - ptr = nullptr; - size = 0; - throw; - } - assert(ptr != nullptr); + { + std::cerr << "Allocation error in IntArray copy constructor: " << e.what () << std::endl; + ptr = nullptr; + size = 0; + throw; + } + assert (ptr != nullptr); } // Move constructor -IntArray::IntArray(IntArray&& other) noexcept - : size(other.size), - dim(other.dim), - bound1(other.bound1), - bound2(other.bound2), - bound3(other.bound3), - bound4(other.bound4), - bound5(other.bound5), - bound6(other.bound6), - ptr(other.ptr) +IntArray::IntArray (IntArray&& other) noexcept + : size (other.size), dim (other.dim), bound1 (other.bound1), bound2 (other.bound2), bound3 (other.bound3), + bound4 (other.bound4), bound5 (other.bound5), bound6 (other.bound6), ptr (other.ptr) { other.ptr = nullptr; other.size = 0; @@ -183,162 +175,189 @@ IntArray::IntArray(IntArray&& other) noexcept //******************************** // Destructor for class IntArray //******************************** -IntArray ::~IntArray() -{ - freemem(); -} +IntArray ::~IntArray () { freemem (); } -void IntArray::freemem() +void + IntArray::freemem () { if (ptr != nullptr) - { - delete[] ptr; - ptr = nullptr; - } + { + delete[] ptr; + ptr = nullptr; + } } // Move assignment operator -IntArray& IntArray::operator=(IntArray&& other) noexcept +IntArray& + IntArray::operator= (IntArray&& other) noexcept { if (this != &other) - { - freemem(); - size = other.size; - dim = other.dim; - bound1 = other.bound1; - bound2 = other.bound2; - bound3 = other.bound3; - bound4 = other.bound4; - bound5 = other.bound5; - bound6 = other.bound6; - ptr = other.ptr; - other.ptr = nullptr; - other.size = 0; - other.dim = 0; - other.bound1 = other.bound2 = other.bound3 = other.bound4 = other.bound5 = other.bound6 = 0; - } + { + freemem (); + size = other.size; + dim = other.dim; + bound1 = other.bound1; + bound2 = other.bound2; + bound3 = other.bound3; + bound4 = other.bound4; + bound5 = other.bound5; + bound6 = other.bound6; + ptr = other.ptr; + other.ptr = nullptr; + other.size = 0; + other.dim = 0; + other.bound1 = other.bound2 = other.bound3 = other.bound4 = other.bound5 = other.bound6 = 0; + } return *this; } -void IntArray::create(const int d1,const int d2,const int d3,const int d4,const int d5,const int d6) +void + IntArray::create (const int d1, const int d2, const int d3, const int d4, const int d5, const int d6) { - size = d1 * d2 * d3 * d4 * d5 * d6;assert(size>0); + size = d1 * d2 * d3 * d4 * d5 * d6; + assert (size > 0); dim = 6; - bound1 = d1;bound2 = d2;bound3 = d3;bound4 = d4;bound5 = d5;bound6 = d6; + bound1 = d1; + bound2 = d2; + bound3 = d3; + bound4 = d4; + bound5 = d5; + bound6 = d6; int* new_ptr = nullptr; - try - { - new_ptr = new int[size]; - } + try + { + new_ptr = new int[size]; + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error in IntArray::create: " << e.what() << std::endl; - assert(new_ptr != nullptr); - return; - } + { + std::cerr << "Allocation error in IntArray::create: " << e.what () << std::endl; + assert (new_ptr != nullptr); + return; + } delete[] ptr; ptr = new_ptr; - zero_out(); + zero_out (); } -void IntArray::create(const int d1,const int d2,const int d3,const int d4,const int d5) +void + IntArray::create (const int d1, const int d2, const int d3, const int d4, const int d5) { - size = d1 * d2 * d3 * d4 * d5;assert(size>0); + size = d1 * d2 * d3 * d4 * d5; + assert (size > 0); dim = 5; - bound1 = d1;bound2 = d2;bound3 = d3;bound4 = d4;bound5 = d5; + bound1 = d1; + bound2 = d2; + bound3 = d3; + bound4 = d4; + bound5 = d5; int* new_ptr = nullptr; - try - { - new_ptr = new int[size]; - } + try + { + new_ptr = new int[size]; + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error in IntArray::create: " << e.what() << std::endl; - assert(new_ptr != nullptr); - return; - } + { + std::cerr << "Allocation error in IntArray::create: " << e.what () << std::endl; + assert (new_ptr != nullptr); + return; + } delete[] ptr; ptr = new_ptr; - zero_out(); + zero_out (); } -void IntArray::create(const int d1,const int d2,const int d3,const int d4) +void + IntArray::create (const int d1, const int d2, const int d3, const int d4) { - size = d1 * d2 * d3 * d4;assert(size>0); + size = d1 * d2 * d3 * d4; + assert (size > 0); dim = 4; - bound1 = d1;bound2 = d2;bound3 = d3;bound4 = d4; + bound1 = d1; + bound2 = d2; + bound3 = d3; + bound4 = d4; int* new_ptr = nullptr; - try - { - new_ptr = new int[size]; - } + try + { + new_ptr = new int[size]; + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error in IntArray::create: " << e.what() << std::endl; - assert(new_ptr != nullptr); - return; - } + { + std::cerr << "Allocation error in IntArray::create: " << e.what () << std::endl; + assert (new_ptr != nullptr); + return; + } delete[] ptr; ptr = new_ptr; - zero_out(); + zero_out (); } -void IntArray::create(const int d1,const int d2,const int d3) +void + IntArray::create (const int d1, const int d2, const int d3) { - size = d1 * d2 * d3;assert(size>0); + size = d1 * d2 * d3; + assert (size > 0); dim = 3; - bound1 = d1;bound2 = d2;bound3 = d3;bound4 = 1; + bound1 = d1; + bound2 = d2; + bound3 = d3; + bound4 = 1; int* new_ptr = nullptr; - try - { - new_ptr = new int[size]; - } + try + { + new_ptr = new int[size]; + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error in IntArray::create: " << e.what() << std::endl; - assert(new_ptr != nullptr); - return; - } - delete [] ptr; + { + std::cerr << "Allocation error in IntArray::create: " << e.what () << std::endl; + assert (new_ptr != nullptr); + return; + } + delete[] ptr; ptr = new_ptr; - zero_out(); + zero_out (); } -void IntArray::create(const int d1, const int d2) +void + IntArray::create (const int d1, const int d2) { - size = d1 * d2;assert(size>0); + size = d1 * d2; + assert (size > 0); dim = 2; - bound1 = d1;bound2 = d2;bound3 = bound4 = 1; + bound1 = d1; + bound2 = d2; + bound3 = bound4 = 1; int* new_ptr = nullptr; - try - { - new_ptr = new int[size]; - } + try + { + new_ptr = new int[size]; + } catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error in IntArray::create: " << e.what() << std::endl; - assert(new_ptr != nullptr); - return; - } + { + std::cerr << "Allocation error in IntArray::create: " << e.what () << std::endl; + assert (new_ptr != nullptr); + return; + } delete[] ptr; ptr = new_ptr; - zero_out(); + zero_out (); } //**************************** // zeroes out the whole array //**************************** -void IntArray::zero_out() +void + IntArray::zero_out () { if (size <= 0 || ptr == nullptr) - { - return; - } - for (int i = 0;i < size; i++) - { - ptr[i] = 0; - } + { + return; + } + for (int i = 0; i < size; i++) + { + ptr[i] = 0; + } return; } -} +} // namespace ModuleBase diff --git a/source/source_base/intarray.h b/source/source_base/intarray.h index c518ad3e670..6bd951b981a 100644 --- a/source/source_base/intarray.h +++ b/source/source_base/intarray.h @@ -16,7 +16,7 @@ namespace ModuleBase class IntArray { public: - int * ptr = nullptr; + int* ptr = nullptr; /** * @brief Construct a new Int Array object @@ -24,17 +24,17 @@ class IntArray * @param d1 The first dimension size * @param d2 The second dimension size */ - IntArray(const int d1 = 1, const int d2 = 1); - IntArray(const int d1, const int d2, const int d3); - IntArray(const int d1, const int d2, const int d3, const int d4); - IntArray(const int d1, const int d2, const int d3, const int d4, const int d5); - IntArray(const int d1, const int d2, const int d3, const int d4, const int d5, const int d6); + IntArray (const int d1 = 1, const int d2 = 1); + IntArray (const int d1, const int d2, const int d3); + IntArray (const int d1, const int d2, const int d3, const int d4); + IntArray (const int d1, const int d2, const int d3, const int d4, const int d5); + IntArray (const int d1, const int d2, const int d3, const int d4, const int d5, const int d6); // Copy constructor - IntArray(const IntArray& other); + IntArray (const IntArray& other); // Move constructor - IntArray(IntArray&& other) noexcept; + IntArray (IntArray&& other) noexcept; - ~IntArray(); + ~IntArray (); /** * @brief Create integer arrays @@ -42,11 +42,11 @@ class IntArray * @param[in] d1 * @param[in] d2 */ - void create(const int d1, const int d2); - void create(const int d1, const int d2, const int d3); - void create(const int d1, const int d2, const int d3, const int d4); - void create(const int d1, const int d2, const int d3, const int d4, const int d5); - void create(const int d1, const int d2, const int d3, const int d4, const int d5, const int d6); + void create (const int d1, const int d2); + void create (const int d1, const int d2, const int d3); + void create (const int d1, const int d2, const int d3, const int d4); + void create (const int d1, const int d2, const int d3, const int d4, const int d5); + void create (const int d1, const int d2, const int d3, const int d4, const int d5, const int d6); /** * @brief copy assignment @@ -54,41 +54,41 @@ class IntArray * @param right * @return const IntArray& */ - IntArray &operator=(const IntArray &other) + IntArray& + operator= (const IntArray& other) { - if(this != &other) - { - delete[] ptr; - size = other.size; - dim = other.dim; - bound1 = other.bound1; - bound2 = other.bound2; - bound3 = other.bound3; - bound4 = other.bound4; - bound5 = other.bound5; - bound6 = other.bound6; - try + if (this != &other) { - ptr = new int[size]; - for (int i = 0;i < size;i++) - { - ptr[i] = other.ptr[i]; - } + delete[] ptr; + size = other.size; + dim = other.dim; + bound1 = other.bound1; + bound2 = other.bound2; + bound3 = other.bound3; + bound4 = other.bound4; + bound5 = other.bound5; + bound6 = other.bound6; + try + { + ptr = new int[size]; + for (int i = 0; i < size; i++) + { + ptr[i] = other.ptr[i]; + } + } + catch (const std::bad_alloc& e) + { + std::cerr << "Allocation error in IntArray copy assignment: " << e.what () << std::endl; + ptr = nullptr; + size = 0; + throw; + } } - catch (const std::bad_alloc& e) - { - std::cerr << "Allocation error in IntArray copy assignment: " << e.what() << std::endl; - ptr = nullptr; - size = 0; - throw; - } - } return *this; } - - // Move assignment operator - IntArray& operator=(IntArray&& other) noexcept; + // Move assignment operator + IntArray& operator= (IntArray&& other) noexcept; /** * @brief Equal all elements of an IntArray to an @@ -97,15 +97,17 @@ class IntArray * @param right * @return const IntArray& */ - const IntArray &operator=(const int &right) + const IntArray& + operator= (const int& right) { - if (ptr != nullptr && size > 0) { - for (int i = 0;i < size;i++) + if (ptr != nullptr && size > 0) { - ptr[i] = right; + for (int i = 0; i < size; i++) + { + ptr[i] = right; + } } - } - return *this;// enables x = y = z; + return *this; // enables x = y = z; } /** @@ -115,45 +117,50 @@ class IntArray * @param d2 * @return int& */ - int &operator()(const int d1, const int d2) + int& + operator() (const int d1, const int d2) { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - return ptr[ d1 * bound2 + d2 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + return ptr[d1 * bound2 + d2]; } - int &operator()(const int d1, const int d2, const int d3) + int& + operator() (const int d1, const int d2, const int d3) { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - assert( d3 >= 0 && d3 < bound3 ); - return ptr[ (d1 * bound2 + d2) * bound3 + d3 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + assert (d3 >= 0 && d3 < bound3); + return ptr[(d1 * bound2 + d2) * bound3 + d3]; } - int &operator()(const int d1, const int d2, const int d3, const int d4) + int& + operator() (const int d1, const int d2, const int d3, const int d4) { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - assert( d3 >= 0 && d3 < bound3 ); - assert( d4 >= 0 && d4 < bound4 ); - return ptr[ ((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + assert (d3 >= 0 && d3 < bound3); + assert (d4 >= 0 && d4 < bound4); + return ptr[((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4]; } - int &operator()(const int d1, const int d2, const int d3, const int d4, const int d5) + int& + operator() (const int d1, const int d2, const int d3, const int d4, const int d5) { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - assert( d3 >= 0 && d3 < bound3 ); - assert( d4 >= 0 && d4 < bound4 ); - assert( d5 >= 0 && d5 < bound5 ); - return ptr[ (((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4) * bound5 + d5 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + assert (d3 >= 0 && d3 < bound3); + assert (d4 >= 0 && d4 < bound4); + assert (d5 >= 0 && d5 < bound5); + return ptr[(((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4) * bound5 + d5]; } - int &operator()(const int d1, const int d2, const int d3, const int d4, const int d5, const int d6) + int& + operator() (const int d1, const int d2, const int d3, const int d4, const int d5, const int d6) { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - assert( d3 >= 0 && d3 < bound3 ); - assert( d4 >= 0 && d4 < bound4 ); - assert( d5 >= 0 && d5 < bound5 ); - assert( d6 >= 0 && d6 < bound6 ); - return ptr[ ((((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4) * bound5 + d5) * bound6 + d6 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + assert (d3 >= 0 && d3 < bound3); + assert (d4 >= 0 && d4 < bound4); + assert (d5 >= 0 && d5 < bound5); + assert (d6 >= 0 && d6 < bound6); + return ptr[((((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4) * bound5 + d5) * bound6 + d6]; } /** @@ -164,96 +171,109 @@ class IntArray * @param d2 * @return const int& */ - const int &operator()(const int d1, const int d2) const + const int& + operator() (const int d1, const int d2) const { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - return ptr[ d1 * bound2 + d2 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + return ptr[d1 * bound2 + d2]; } - const int &operator()(const int d1, const int d2, const int d3) const + const int& + operator() (const int d1, const int d2, const int d3) const { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - assert( d3 >= 0 && d3 < bound3 ); - return ptr[ (d1 * bound2 + d2) * bound3 + d3 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + assert (d3 >= 0 && d3 < bound3); + return ptr[(d1 * bound2 + d2) * bound3 + d3]; } - const int &operator()(const int d1, const int d2, const int d3, const int d4) const + const int& + operator() (const int d1, const int d2, const int d3, const int d4) const { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - assert( d3 >= 0 && d3 < bound3 ); - assert( d4 >= 0 && d4 < bound4 ); - return ptr[ ((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + assert (d3 >= 0 && d3 < bound3); + assert (d4 >= 0 && d4 < bound4); + return ptr[((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4]; } - const int &operator()(const int d1, const int d2, const int d3, const int d4, const int d5) const + const int& + operator() (const int d1, const int d2, const int d3, const int d4, const int d5) const { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - assert( d3 >= 0 && d3 < bound3 ); - assert( d4 >= 0 && d4 < bound4 ); - assert( d5 >= 0 && d5 < bound5 ); - return ptr[ (((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4) * bound5 + d5 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + assert (d3 >= 0 && d3 < bound3); + assert (d4 >= 0 && d4 < bound4); + assert (d5 >= 0 && d5 < bound5); + return ptr[(((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4) * bound5 + d5]; } - const int &operator()(const int d1, const int d2, const int d3, const int d4, const int d5, const int d6) const + const int& + operator() (const int d1, const int d2, const int d3, const int d4, const int d5, const int d6) const { - assert( d1 >= 0 && d1 < bound1 ); - assert( d2 >= 0 && d2 < bound2 ); - assert( d3 >= 0 && d3 < bound3 ); - assert( d4 >= 0 && d4 < bound4 ); - assert( d5 >= 0 && d5 < bound5 ); - assert( d6 >= 0 && d6 < bound6 ); - return ptr[ ((((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4) * bound5 + d5) * bound6 + d6 ]; + assert (d1 >= 0 && d1 < bound1); + assert (d2 >= 0 && d2 < bound2); + assert (d3 >= 0 && d3 < bound3); + assert (d4 >= 0 && d4 < bound4); + assert (d5 >= 0 && d5 < bound5); + assert (d6 >= 0 && d6 < bound6); + return ptr[((((d1 * bound2 + d2) * bound3 + d3) * bound4 + d4) * bound5 + d5) * bound6 + d6]; } /** * @brief Set all elements of an IntArray to zero * */ - void zero_out(void); + void zero_out (); - int getSize() const + int + getSize () const { return size; } - int getDim() const + int + getDim () const { return dim; } - int getBound1() const + int + getBound1 () const { return bound1; } - int getBound2() const + int + getBound2 () const { return bound2; } - int getBound3() const + int + getBound3 () const { return bound3; } - int getBound4() const + int + getBound4 () const { return bound4; } - int getBound5() const + int + getBound5 () const { return bound5; } - int getBound6() const + int + getBound6 () const { return bound6; } private: - int size=0; - int dim=0; - int bound1=0; - int bound2=0; - int bound3=0; - int bound4=0; - int bound5=0; - int bound6=0; - void freemem(); + int size = 0; + int dim = 0; + int bound1 = 0; + int bound2 = 0; + int bound3 = 0; + int bound4 = 0; + int bound5 = 0; + int bound6 = 0; + void freemem (); }; } // namespace ModuleBase diff --git a/source/source_base/inverse_matrix.cpp b/source/source_base/inverse_matrix.cpp index 66ced1c46e9..b5d16366acb 100644 --- a/source/source_base/inverse_matrix.cpp +++ b/source/source_base/inverse_matrix.cpp @@ -7,74 +7,73 @@ namespace ModuleBase { -Inverse_Matrix_Complex::Inverse_Matrix_Complex() +Inverse_Matrix_Complex::Inverse_Matrix_Complex () { allocate = false; } +Inverse_Matrix_Complex::~Inverse_Matrix_Complex () { - allocate=false; -} -Inverse_Matrix_Complex::~Inverse_Matrix_Complex() -{ - if(allocate) - { - delete[] e; //mohan fix bug 2012-04-02 - delete[] work2; - delete[] rwork; - allocate=false; - } + if (allocate) + { + delete[] e; // mohan fix bug 2012-04-02 + delete[] work2; + delete[] rwork; + allocate = false; + } } -void Inverse_Matrix_Complex::init(const int &dim_in) +void + Inverse_Matrix_Complex::init (const int& dim_in) { -// GlobalV::ofs_running << " allocate=" << allocate << std::endl; - if(allocate) - { - delete[] e; //mohan fix bug 2012-04-02 - delete[] work2; - delete[] rwork; - allocate=false; - } + // GlobalV::ofs_running << " allocate=" << allocate << std::endl; + if (allocate) + { + delete[] e; // mohan fix bug 2012-04-02 + delete[] work2; + delete[] rwork; + allocate = false; + } - this->dim = dim_in; + this->dim = dim_in; - assert(dim>0); - this->e = new double[dim]; - this->lwork = 2*dim; + assert (dim > 0); + this->e = new double[dim]; + this->lwork = 2 * dim; - assert(lwork>0); - this->work2 = new std::complex[lwork]; + assert (lwork > 0); + this->work2 = new std::complex[lwork]; - assert(3*dim-2>0); - this->rwork = new double[3*dim-2]; - this->info = 0; - this->A.create(dim, dim); - this->EA.create(dim, dim); + assert (3 * dim - 2 > 0); + this->rwork = new double[3 * dim - 2]; + this->info = 0; + this->A.create (dim, dim); + this->EA.create (dim, dim); - this->allocate = true; + this->allocate = true; - return; + return; } - -void Inverse_Matrix_Complex::using_zheev( const ModuleBase::ComplexMatrix &Sin, ModuleBase::ComplexMatrix &Sout) +void + Inverse_Matrix_Complex::using_zheev (const ModuleBase::ComplexMatrix& Sin, ModuleBase::ComplexMatrix& Sout) { - ModuleBase::timer::start("Inverse","using_zheev"); - this->A = Sin; - - LapackConnector::zheev('V', 'U', dim, this->A, dim, e, work2, lwork, rwork, &info); - - for(int i=0; iA(j,i) ) / e[i] ; - } - } + ModuleBase::timer::start ("Inverse", "using_zheev"); + this->A = Sin; + + LapackConnector::zheev ('V', 'U', dim, this->A, dim, e, work2, lwork, rwork, &info); + + for (int i = 0; i < dim; i++) + { + for (int j = 0; j < dim; j++) + { + EA (i, j) = conj (this->A (j, i)) / e[i]; + } + } Sout = this->A * this->EA; - ModuleBase::timer::end("Inverse","using_zheev"); + ModuleBase::timer::end ("Inverse", "using_zheev"); return; } -void Inverse_Matrix_Real(const int dim, const double* in, double* out) +void + Inverse_Matrix_Real (const int dim, const double* in, double* out) { int info = 0; int lda = dim; @@ -83,27 +82,27 @@ void Inverse_Matrix_Real(const int dim, const double* in, double* out) double* work = new double[lwork]; for (int i = 0; i < dim; i++) - { - for (int j = 0; j < dim; j++) { - out[i * dim + j] = in[i * dim + j]; + for (int j = 0; j < dim; j++) + { + out[i * dim + j] = in[i * dim + j]; + } } - } - dgetrf_(&dim, &dim, out, &lda, ipiv, &info); + dgetrf_ (&dim, &dim, out, &lda, ipiv, &info); if (info != 0) - { - std::cout << "ERROR: LAPACK dgetrf error, info = " << info << std::endl; - exit(1); - } - dgetri_(&dim, out, &lda, ipiv, work, &lwork, &info); + { + std::cout << "ERROR: LAPACK dgetrf error, info = " << info << std::endl; + exit (1); + } + dgetri_ (&dim, out, &lda, ipiv, work, &lwork, &info); if (info != 0) - { - std::cout << "ERROR: LAPACK dgetri error, info = " << info << std::endl; - exit(1); - } + { + std::cout << "ERROR: LAPACK dgetri error, info = " << info << std::endl; + exit (1); + } delete[] ipiv; delete[] work; } -} \ No newline at end of file +} // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/inverse_matrix.h b/source/source_base/inverse_matrix.h index bac2e5b979a..6b7106eaa88 100644 --- a/source/source_base/inverse_matrix.h +++ b/source/source_base/inverse_matrix.h @@ -8,26 +8,25 @@ namespace ModuleBase class Inverse_Matrix_Complex { - public: + public: + Inverse_Matrix_Complex (); + ~Inverse_Matrix_Complex (); - Inverse_Matrix_Complex(); - ~Inverse_Matrix_Complex(); - - ModuleBase::ComplexMatrix A; + ModuleBase::ComplexMatrix A; - void using_zheev(const ModuleBase::ComplexMatrix &in, ModuleBase::ComplexMatrix &out); - void init( const int &dim_in); + void using_zheev (const ModuleBase::ComplexMatrix& in, ModuleBase::ComplexMatrix& out); + void init (const int& dim_in); - private: - int dim=0; - double *e=nullptr; - int lwork=0; - std::complex *work2=nullptr; - double* rwork=nullptr; - int info=0; - bool allocate=false; //mohan add 2012-04-02 + private: + int dim = 0; + double* e = nullptr; + int lwork = 0; + std::complex* work2 = nullptr; + double* rwork = nullptr; + int info = 0; + bool allocate = false; // mohan add 2012-04-02 - ModuleBase::ComplexMatrix EA; + ModuleBase::ComplexMatrix EA; }; /** @@ -38,6 +37,6 @@ class Inverse_Matrix_Complex * @param in [in] input matrix * @param out [out] output matrix */ -void Inverse_Matrix_Real(const int dim, const double* in, double* out); -} +void Inverse_Matrix_Real (const int dim, const double* in, double* out); +} // namespace ModuleBase #endif diff --git a/source/source_base/kernels/cuda/math_kernel_op.cu b/source/source_base/kernels/cuda/math_kernel_op.cu index c5b0648c49b..45ffb2c5d0b 100644 --- a/source/source_base/kernels/cuda/math_kernel_op.cu +++ b/source/source_base/kernels/cuda/math_kernel_op.cu @@ -9,43 +9,57 @@ #include #include #include -namespace ModuleBase { +namespace ModuleBase +{ template -struct GetTypeThrust { +struct GetTypeThrust +{ using type = T; }; template <> -struct GetTypeThrust> { +struct GetTypeThrust> +{ using type = thrust::complex; /**< The return type specialization for std::complex. */ }; template <> -struct GetTypeThrust> { +struct GetTypeThrust> +{ using type = thrust::complex; /**< The return type specialization for std::complex. */ }; static cublasHandle_t cublas_handle = nullptr; -void xdot_wrapper(const int &n, const float * x, const int &incx, const float * y, const int &incy, float &result) { - CHECK_CUBLAS(cublasSdot(cublas_handle, n, x, incx, y, incy, &result)); +void + xdot_wrapper (const int& n, const float* x, const int& incx, const float* y, const int& incy, float& result) +{ + CHECK_CUBLAS (cublasSdot (cublas_handle, n, x, incx, y, incy, &result)); } -void xdot_wrapper(const int &n, const double * x, const int &incx, const double * y, const int &incy, double &result) { - CHECK_CUBLAS(cublasDdot(cublas_handle, n, x, incx, y, incy, &result)); +void + xdot_wrapper (const int& n, const double* x, const int& incx, const double* y, const int& incy, double& result) +{ + CHECK_CUBLAS (cublasDdot (cublas_handle, n, x, incx, y, incy, &result)); } -void createGpuBlasHandle(){ - if (cublas_handle == nullptr) { - CHECK_CUBLAS(cublasCreate(&cublas_handle)); - } +void + createGpuBlasHandle () +{ + if (cublas_handle == nullptr) + { + CHECK_CUBLAS (cublasCreate (&cublas_handle)); + } } -void destoryBLAShandle(){ - if (cublas_handle != nullptr) { - CHECK_CUBLAS(cublasDestroy(cublas_handle)); - cublas_handle = nullptr; - } +void + destoryBLAShandle () +{ + if (cublas_handle != nullptr) + { + CHECK_CUBLAS (cublasDestroy (cublas_handle)); + cublas_handle = nullptr; + } } // template @@ -54,116 +68,130 @@ void destoryBLAShandle(){ // val += __shfl_down_sync(full_mask, val, offset); // } template <> -void scal_op::operator()(const int& N, +void + scal_op::operator() (const int& N, const std::complex* alpha, std::complex* X, const int& incx) { - CHECK_CUBLAS(cublasCscal(cublas_handle, N, (float2*)alpha, (float2*)X, incx)); + CHECK_CUBLAS (cublasCscal (cublas_handle, N, (float2*)alpha, (float2*)X, incx)); } template <> -void scal_op::operator()(const int& N, +void + scal_op::operator() (const int& N, const std::complex* alpha, std::complex* X, const int& incx) { - CHECK_CUBLAS(cublasZscal(cublas_handle, N, (double2*)alpha, (double2*)X, incx)); + CHECK_CUBLAS (cublasZscal (cublas_handle, N, (double2*)alpha, (double2*)X, incx)); } template <> -void axpy_op::operator()(const int& N, +void + axpy_op::operator() (const int& N, const double* alpha, const double* X, const int& incX, double* Y, const int& incY) { - CHECK_CUBLAS(cublasDaxpy(cublas_handle, N, alpha, X, incX, Y, incY)); + CHECK_CUBLAS (cublasDaxpy (cublas_handle, N, alpha, X, incX, Y, incY)); } template <> -void axpy_op, base_device::DEVICE_GPU>::operator()(const int& N, +void + axpy_op, base_device::DEVICE_GPU>::operator() (const int& N, const std::complex* alpha, const std::complex* X, const int& incX, std::complex* Y, const int& incY) { - CHECK_CUBLAS(cublasCaxpy(cublas_handle, N, (float2*)alpha, (float2*)X, incX, (float2*)Y, incY)); + CHECK_CUBLAS (cublasCaxpy (cublas_handle, N, (float2*)alpha, (float2*)X, incX, (float2*)Y, incY)); } template <> -void axpy_op, base_device::DEVICE_GPU>::operator()(const int& N, +void + axpy_op, base_device::DEVICE_GPU>::operator() (const int& N, const std::complex* alpha, const std::complex* X, const int& incX, std::complex* Y, const int& incY) { - CHECK_CUBLAS(cublasZaxpy(cublas_handle, N, (double2*)alpha, (double2*)X, incX, (double2*)Y, incY)); + CHECK_CUBLAS (cublasZaxpy (cublas_handle, N, (double2*)alpha, (double2*)X, incX, (double2*)Y, incY)); } - template -__global__ void matrix_transpose_kernel( - const int row, - const int col, - const T* in, - T* out) +__global__ void + matrix_transpose_kernel (const int row, const int col, const T* in, T* out) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < row) - { - for (int j = 0; j < col; j++) { - out[j * row + i] = in[i * col + j]; + for (int j = 0; j < col; j++) + { + out[j * row + i] = in[i * col + j]; + } } - } } template -__global__ void matrix_copy_kernel(const int n1, const int n2, const T* A, const int LDA, T* B, const int LDB) +__global__ void + matrix_copy_kernel (const int n1, const int n2, const T* A, const int LDA, T* B, const int LDB) { const int i = blockIdx.x * blockDim.x + threadIdx.x; const int j = blockIdx.y * blockDim.y + threadIdx.y; if (i < n1 && j < n2) - { - B[i * LDB + j] = A[i * LDA + j]; - } + { + B[i * LDB + j] = A[i * LDA + j]; + } } template -__global__ void matrix_multiply_vector_kernel(const int m, const int n, T *a, const int lda, const Real *b, const Real alpha, T *c, const int ldc){ +__global__ void + matrix_multiply_vector_kernel (const int m, + const int n, + T* a, + const int lda, + const Real* b, + const Real alpha, + T* c, + const int ldc) +{ int row = blockIdx.x * blockDim.x + threadIdx.x; int col = blockIdx.y * blockDim.y + threadIdx.y; - if (col >= n || row >= m) return; + if (col >= n || row >= m) + return; c[col * ldc + row] = a[col * lda + row] * b[col] * alpha; } -cublasOperation_t judge_trans_op(bool is_complex, const char& trans, const char* name) +cublasOperation_t + judge_trans_op (bool is_complex, const char& trans, const char* name) { if (trans == 'N') - { - return CUBLAS_OP_N; - } - else if(trans == 'T') - { - return CUBLAS_OP_T; - } - else if(is_complex && trans == 'C') - { - return CUBLAS_OP_C; - } + { + return CUBLAS_OP_N; + } + else if (trans == 'T') + { + return CUBLAS_OP_T; + } + else if (is_complex && trans == 'C') + { + return CUBLAS_OP_C; + } else - { - ModuleBase::WARNING_QUIT(name, std::string("Unknown trans type ") + trans + std::string(" !")); - } + { + ModuleBase::WARNING_QUIT (name, std::string ("Unknown trans type ") + trans + std::string (" !")); + } } template <> -void gemv_op::operator()(const char& trans, +void + gemv_op::operator() (const char& trans, const int& m, const int& n, const double* alpha, @@ -175,31 +203,31 @@ void gemv_op::operator()(const char& trans, double* Y, const int& incy) { - cublasOperation_t cutrans = judge_trans_op(false, trans, "gemv_op"); - CHECK_CUBLAS(cublasDgemv(cublas_handle, cutrans, m, n, alpha, A, lda, X, incx, beta, Y, incy)); + cublasOperation_t cutrans = judge_trans_op (false, trans, "gemv_op"); + CHECK_CUBLAS (cublasDgemv (cublas_handle, cutrans, m, n, alpha, A, lda, X, incx, beta, Y, incy)); } template <> -void gemv_op::operator()(const char& trans, - const int& m, - const int& n, - const float* alpha, - const float* A, - const int& lda, - const float* X, - const int& incx, - const float* beta, - float* Y, - const int& incy) +void + gemv_op::operator() (const char& trans, + const int& m, + const int& n, + const float* alpha, + const float* A, + const int& lda, + const float* X, + const int& incx, + const float* beta, + float* Y, + const int& incy) { - cublasOperation_t cutrans = judge_trans_op(false, trans, "gemv_op"); - CHECK_CUBLAS(cublasSgemv(cublas_handle, cutrans, m, n, alpha, A, lda, X, incx, beta, Y, incy)); + cublasOperation_t cutrans = judge_trans_op (false, trans, "gemv_op"); + CHECK_CUBLAS (cublasSgemv (cublas_handle, cutrans, m, n, alpha, A, lda, X, incx, beta, Y, incy)); } - - template <> -void gemv_op, base_device::DEVICE_GPU>::operator()(const char& trans, +void + gemv_op, base_device::DEVICE_GPU>::operator() (const char& trans, const int& m, const int& n, const std::complex* alpha_in, @@ -211,14 +239,26 @@ void gemv_op, base_device::DEVICE_GPU>::operator()(const cha std::complex* Y, const int& incy) { - cublasOperation_t cutrans = judge_trans_op(true, trans, "gemv_op"); - cuFloatComplex alpha = make_cuFloatComplex(alpha_in->real(), alpha_in->imag()); - cuFloatComplex beta = make_cuFloatComplex(beta_in->real(), beta_in->imag()); - CHECK_CUBLAS(cublasCgemv(cublas_handle, cutrans, m, n, &alpha, (cuFloatComplex*)A, lda, (cuFloatComplex*)X, incx, &beta, (cuFloatComplex*)Y, incy)); + cublasOperation_t cutrans = judge_trans_op (true, trans, "gemv_op"); + cuFloatComplex alpha = make_cuFloatComplex (alpha_in->real (), alpha_in->imag ()); + cuFloatComplex beta = make_cuFloatComplex (beta_in->real (), beta_in->imag ()); + CHECK_CUBLAS (cublasCgemv (cublas_handle, + cutrans, + m, + n, + &alpha, + (cuFloatComplex*)A, + lda, + (cuFloatComplex*)X, + incx, + &beta, + (cuFloatComplex*)Y, + incy)); } template <> -void gemv_op, base_device::DEVICE_GPU>::operator()(const char& trans, +void + gemv_op, base_device::DEVICE_GPU>::operator() (const char& trans, const int& m, const int& n, const std::complex* alpha_in, @@ -230,16 +270,28 @@ void gemv_op, base_device::DEVICE_GPU>::operator()(const ch std::complex* Y, const int& incy) { - cublasOperation_t cutrans = judge_trans_op(true, trans, "gemv_op"); - cuDoubleComplex alpha = make_cuDoubleComplex(alpha_in->real(), alpha_in->imag()); - cuDoubleComplex beta = make_cuDoubleComplex(beta_in->real(), beta_in->imag()); + cublasOperation_t cutrans = judge_trans_op (true, trans, "gemv_op"); + cuDoubleComplex alpha = make_cuDoubleComplex (alpha_in->real (), alpha_in->imag ()); + cuDoubleComplex beta = make_cuDoubleComplex (beta_in->real (), beta_in->imag ()); // icpc and nvcc have some compatible problems // We must use cuDoubleComplex instead of converting std::complex* to cuDoubleComplex* - CHECK_CUBLAS(cublasZgemv(cublas_handle, cutrans, m, n, &alpha, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)X, incx, &beta, (cuDoubleComplex*)Y, incy)); + CHECK_CUBLAS (cublasZgemv (cublas_handle, + cutrans, + m, + n, + &alpha, + (cuDoubleComplex*)A, + lda, + (cuDoubleComplex*)X, + incx, + &beta, + (cuDoubleComplex*)Y, + incy)); } template <> -void gemm_op::operator()(const char& transa, +void + gemm_op::operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -253,13 +305,14 @@ void gemm_op::operator()(const char& transa, float* c, const int& ldc) { - cublasOperation_t cutransA = judge_trans_op(false, transa, "gemm_op"); - cublasOperation_t cutransB = judge_trans_op(false, transb, "gemm_op"); - CHECK_CUBLAS(cublasSgemm(cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + cublasOperation_t cutransA = judge_trans_op (false, transa, "gemm_op"); + cublasOperation_t cutransB = judge_trans_op (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasSgemm (cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } template <> -void gemm_op::operator()(const char& transa, +void + gemm_op::operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -273,12 +326,13 @@ void gemm_op::operator()(const char& transa, double* c, const int& ldc) { - cublasOperation_t cutransA = judge_trans_op(false, transa, "gemm_op"); - cublasOperation_t cutransB = judge_trans_op(false, transb, "gemm_op"); - CHECK_CUBLAS(cublasDgemm(cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + cublasOperation_t cutransA = judge_trans_op (false, transa, "gemm_op"); + cublasOperation_t cutransB = judge_trans_op (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasDgemm (cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } template <> -void gemm_op, base_device::DEVICE_GPU>::operator()(const char& transa, +void + gemm_op, base_device::DEVICE_GPU>::operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -292,13 +346,27 @@ void gemm_op, base_device::DEVICE_GPU>::operator()(const cha std::complex* c, const int& ldc) { - cublasOperation_t cutransA = judge_trans_op(true, transa, "gemm_op"); - cublasOperation_t cutransB = judge_trans_op(true, transb, "gemm_op"); - CHECK_CUBLAS(cublasCgemm(cublas_handle, cutransA, cutransB, m, n ,k, (float2*)alpha, (float2*)a , lda, (float2*)b, ldb, (float2*)beta, (float2*)c, ldc)); + cublasOperation_t cutransA = judge_trans_op (true, transa, "gemm_op"); + cublasOperation_t cutransB = judge_trans_op (true, transb, "gemm_op"); + CHECK_CUBLAS (cublasCgemm (cublas_handle, + cutransA, + cutransB, + m, + n, + k, + (float2*)alpha, + (float2*)a, + lda, + (float2*)b, + ldb, + (float2*)beta, + (float2*)c, + ldc)); } template <> -void gemm_op, base_device::DEVICE_GPU>::operator()(const char& transa, +void + gemm_op, base_device::DEVICE_GPU>::operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -312,189 +380,295 @@ void gemm_op, base_device::DEVICE_GPU>::operator()(const ch std::complex* c, const int& ldc) { - cublasOperation_t cutransA = judge_trans_op(true, transa, "gemm_op"); - cublasOperation_t cutransB = judge_trans_op(true, transb, "gemm_op"); - CHECK_CUBLAS(cublasZgemm(cublas_handle, cutransA, cutransB, m, n ,k, (double2*)alpha, (double2*)a , lda, (double2*)b, ldb, (double2*)beta, (double2*)c, ldc)); + cublasOperation_t cutransA = judge_trans_op (true, transa, "gemm_op"); + cublasOperation_t cutransB = judge_trans_op (true, transb, "gemm_op"); + CHECK_CUBLAS (cublasZgemm (cublas_handle, + cutransA, + cutransB, + m, + n, + k, + (double2*)alpha, + (double2*)a, + lda, + (double2*)b, + ldb, + (double2*)beta, + (double2*)c, + ldc)); } template <> -void matrixTranspose_op::operator()(const int& row, +void + matrixTranspose_op::operator() (const int& row, const int& col, const double* input_matrix, double* output_matrix) { double* device_temp = nullptr; - base_device::memory::resize_memory_op()(device_temp, row * col); + base_device::memory::resize_memory_op () (device_temp, row * col); if (row == col) - { - double ONE = 1.0, ZERO = 0.0; - - // use 'geam' API todo transpose. - CHECK_CUBLAS(cublasDgeam(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, col, row, &ONE, input_matrix, col, &ZERO, input_matrix, col, device_temp, col)); - } + { + double ONE = 1.0, ZERO = 0.0; + + // use 'geam' API todo transpose. + CHECK_CUBLAS (cublasDgeam (cublas_handle, + CUBLAS_OP_T, + CUBLAS_OP_N, + col, + row, + &ONE, + input_matrix, + col, + &ZERO, + input_matrix, + col, + device_temp, + col)); + } else - { - int thread = 1024; - int block = (row + col + thread - 1) / thread; - matrix_transpose_kernel <<>> (row, col, input_matrix, device_temp); + { + int thread = 1024; + int block = (row + col + thread - 1) / thread; + matrix_transpose_kernel<<>> (row, col, input_matrix, device_temp); - CHECK_CUDA_SYNC(); - } + CHECK_CUDA_SYNC (); + } - base_device::memory::synchronize_memory_op()( + base_device::memory::synchronize_memory_op () ( output_matrix, device_temp, row * col); - base_device::memory::delete_memory_op()(device_temp); + base_device::memory::delete_memory_op () (device_temp); } template <> -void matrixTranspose_op, base_device::DEVICE_GPU>::operator()( - const int& row, - const int& col, - const std::complex* input_matrix, - std::complex* output_matrix) +void + matrixTranspose_op, base_device::DEVICE_GPU>::operator() ( + const int& row, + const int& col, + const std::complex* input_matrix, + std::complex* output_matrix) { std::complex* device_temp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_temp, row * col); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU> () (device_temp, row * col); if (row == col) - { - double2 ONE, ZERO; - ONE.x = 1.0; - ONE.y = 0.0; - ZERO.x = ZERO.y = 0.0; - - // use 'geam' API todo transpose. - CHECK_CUBLAS(cublasCgeam(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, col, row, - reinterpret_cast(&ONE), (float2*)input_matrix, col, - reinterpret_cast(&ZERO), (float2*)input_matrix, col, (float2*)device_temp, col)); - } else - { - int thread = 1024; - int block = (row + col + thread - 1) / thread; - matrix_transpose_kernel> <<>> (row, col, (thrust::complex*)input_matrix, (thrust::complex*)device_temp); - - CHECK_CUDA_SYNC(); - } - - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()( - output_matrix, - device_temp, - row * col); + { + double2 ONE, ZERO; + ONE.x = 1.0; + ONE.y = 0.0; + ZERO.x = ZERO.y = 0.0; + + // use 'geam' API todo transpose. + CHECK_CUBLAS (cublasCgeam (cublas_handle, + CUBLAS_OP_T, + CUBLAS_OP_N, + col, + row, + reinterpret_cast (&ONE), + (float2*)input_matrix, + col, + reinterpret_cast (&ZERO), + (float2*)input_matrix, + col, + (float2*)device_temp, + col)); + } + else + { + int thread = 1024; + int block = (row + col + thread - 1) / thread; + matrix_transpose_kernel><<>> (row, + col, + (thrust::complex*)input_matrix, + (thrust::complex*)device_temp); + + CHECK_CUDA_SYNC (); + } - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(device_temp); + base_device::memory::synchronize_memory_op, + base_device::DEVICE_GPU, + base_device::DEVICE_GPU> () (output_matrix, device_temp, row * col); - CHECK_CUDA_SYNC(); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU> () (device_temp); + CHECK_CUDA_SYNC (); } template <> -void matrixTranspose_op, base_device::DEVICE_GPU>::operator()( - const int& row, - const int& col, - const std::complex* input_matrix, - std::complex* output_matrix) +void + matrixTranspose_op, base_device::DEVICE_GPU>::operator() ( + const int& row, + const int& col, + const std::complex* input_matrix, + std::complex* output_matrix) { std::complex* device_temp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_temp, row * col); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU> () (device_temp, row * col); if (row == col) - { - double2 ONE, ZERO; - ONE.x = 1.0; - ONE.y = 0.0; - ZERO.x = ZERO.y = 0.0; - - // use 'geam' API todo transpose. - CHECK_CUBLAS(cublasZgeam(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, col, row, &ONE, (double2*)input_matrix, col, &ZERO, (double2*)input_matrix, col, (double2*)device_temp, col)); - } else - { - int thread = 1024; - int block = (row + col + thread - 1) / thread; - matrix_transpose_kernel> <<>> (row, col, (thrust::complex*)input_matrix, (thrust::complex*)device_temp); - CHECK_CUDA_SYNC(); - } + { + double2 ONE, ZERO; + ONE.x = 1.0; + ONE.y = 0.0; + ZERO.x = ZERO.y = 0.0; + + // use 'geam' API todo transpose. + CHECK_CUBLAS (cublasZgeam (cublas_handle, + CUBLAS_OP_T, + CUBLAS_OP_N, + col, + row, + &ONE, + (double2*)input_matrix, + col, + &ZERO, + (double2*)input_matrix, + col, + (double2*)device_temp, + col)); + } + else + { + int thread = 1024; + int block = (row + col + thread - 1) / thread; + matrix_transpose_kernel><<>> (row, + col, + (thrust::complex*)input_matrix, + (thrust::complex*)device_temp); + CHECK_CUDA_SYNC (); + } base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, - base_device::DEVICE_GPU>()(output_matrix, device_temp, row * col); + base_device::DEVICE_GPU> () (output_matrix, device_temp, row * col); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(device_temp); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU> () (device_temp); } template <> -void matrixCopy::operator()(const int& n1, +void + matrixCopy::operator() (const int& n1, const int& n2, const double* A, const int& LDA, double* B, const int& LDB) { - const dim3 blockSize(16, 16); - const dim3 gridSize((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); - matrix_copy_kernel <<>> (n1, n2, A, LDA, B, LDB); - CHECK_CUDA_SYNC(); + const dim3 blockSize (16, 16); + const dim3 gridSize ((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); + matrix_copy_kernel<<>> (n1, n2, A, LDA, B, LDB); + CHECK_CUDA_SYNC (); } template <> -void matrixCopy, base_device::DEVICE_GPU>::operator()(const int& n1, +void + matrixCopy, base_device::DEVICE_GPU>::operator() (const int& n1, const int& n2, const std::complex* A, const int& LDA, std::complex* B, const int& LDB) { - const dim3 blockSize(16, 16); - const dim3 gridSize((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); - matrix_copy_kernel> <<>> (n1, n2, reinterpret_cast*>(A), LDA, reinterpret_cast*>(B), LDB); - CHECK_CUDA_SYNC(); - + const dim3 blockSize (16, 16); + const dim3 gridSize ((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); + matrix_copy_kernel> + <<>> (n1, + n2, + reinterpret_cast*> (A), + LDA, + reinterpret_cast*> (B), + LDB); + CHECK_CUDA_SYNC (); } template <> -void matrixCopy, base_device::DEVICE_GPU>::operator()(const int& n1, +void + matrixCopy, base_device::DEVICE_GPU>::operator() (const int& n1, const int& n2, const std::complex* A, const int& LDA, std::complex* B, const int& LDB) { - const dim3 blockSize(16, 16); - const dim3 gridSize((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); - matrix_copy_kernel> <<>> (n1, n2, reinterpret_cast*>(A), LDA, reinterpret_cast*>(B), LDB); - CHECK_CUDA_SYNC(); + const dim3 blockSize (16, 16); + const dim3 gridSize ((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); + matrix_copy_kernel> + <<>> (n1, + n2, + reinterpret_cast*> (A), + LDA, + reinterpret_cast*> (B), + LDB); + CHECK_CUDA_SYNC (); } template <> -void matrix_mul_vector_op::operator()(const int &m, const int &n, - double *a, const int &lda, const double *b, const double alpha, double *c, const int &ldc){ - dim3 thread(16, 16, 1); - dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); - matrix_multiply_vector_kernel <<>>(m, n, a, lda, - b, alpha, c, ldc); - CHECK_CUDA_SYNC(); +void + matrix_mul_vector_op::operator() (const int& m, + const int& n, + double* a, + const int& lda, + const double* b, + const double alpha, + double* c, + const int& ldc) +{ + dim3 thread (16, 16, 1); + dim3 block ((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + matrix_multiply_vector_kernel<<>> (m, n, a, lda, b, alpha, c, ldc); + CHECK_CUDA_SYNC (); } template <> -void matrix_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int &m, const int &n, - std::complex *a, const int &lda, const float *b, const float alpha, std::complex *c, const int &ldc){ - dim3 thread(16, 16, 1); - dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); - matrix_multiply_vector_kernel, float> <<>>(m, n, reinterpret_cast*>(a), lda, - b, alpha, reinterpret_cast*>(c), ldc); - CHECK_CUDA_SYNC(); +void + matrix_mul_vector_op, base_device::DEVICE_GPU>::operator() (const int& m, + const int& n, + std::complex* a, + const int& lda, + const float* b, + const float alpha, + std::complex* c, + const int& ldc) +{ + dim3 thread (16, 16, 1); + dim3 block ((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + matrix_multiply_vector_kernel, float> + <<>> (m, + n, + reinterpret_cast*> (a), + lda, + b, + alpha, + reinterpret_cast*> (c), + ldc); + CHECK_CUDA_SYNC (); } template <> -void matrix_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int &m, const int &n, - std::complex *a, const int &lda, const double *b, const double alpha, std::complex *c, const int &ldc) -{ - dim3 thread(16, 16, 1); - dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); - matrix_multiply_vector_kernel, double> <<>>(m, n, reinterpret_cast*>(a), lda, - b, alpha, reinterpret_cast*>(c), ldc); - CHECK_CUDA_SYNC(); +void + matrix_mul_vector_op, base_device::DEVICE_GPU>::operator() (const int& m, + const int& n, + std::complex* a, + const int& lda, + const double* b, + const double alpha, + std::complex* c, + const int& ldc) +{ + dim3 thread (16, 16, 1); + dim3 block ((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + matrix_multiply_vector_kernel, double> + <<>> (m, + n, + reinterpret_cast*> (a), + lda, + b, + alpha, + reinterpret_cast*> (c), + ldc); + CHECK_CUDA_SYNC (); } // Explicitly instantiate functors for the types of functor registered. @@ -506,4 +680,4 @@ template struct matrixCopy, base_device::DEVICE_GPU>; template struct matrix_mul_vector_op, base_device::DEVICE_GPU>; template struct matrix_mul_vector_op; template struct matrix_mul_vector_op, base_device::DEVICE_GPU>; -} // namespace ModuleBase +} // namespace ModuleBase diff --git a/source/source_base/kernels/cuda/math_kernel_op_vec.cu b/source/source_base/kernels/cuda/math_kernel_op_vec.cu index f0edc47d101..05ef8e27a67 100644 --- a/source/source_base/kernels/cuda/math_kernel_op_vec.cu +++ b/source/source_base/kernels/cuda/math_kernel_op_vec.cu @@ -5,98 +5,100 @@ #include template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = float; /**< The return type specialization for std::complex. */ }; template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = double; /**< The return type specialization for std::complex. */ }; namespace ModuleBase { const int thread_per_block = 256; -void xdot_wrapper(const int &n, const float * x, const int &incx, const float * y, const int &incy, float &result); -void xdot_wrapper(const int &n, const double * x, const int &incx, const double * y, const int &incy, double &result); +void xdot_wrapper (const int& n, const float* x, const int& incx, const float* y, const int& incy, float& result); +void xdot_wrapper (const int& n, const double* x, const int& incx, const double* y, const int& incy, double& result); // Define the CUDA kernel: template -__global__ void vector_mul_real_kernel(const int size, - T* result, - const T* vector, - const typename GetTypeReal::type constant) +__global__ void + vector_mul_real_kernel (const int size, T* result, const T* vector, const typename GetTypeReal::type constant) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - result[i] = vector[i] * constant; - } + { + result[i] = vector[i] * constant; + } } template -__global__ void vector_mul_vector_kernel(const int size, - T* result, - const T* vector1, - const typename GetTypeReal::type* vector2, - const bool add) +__global__ void + vector_mul_vector_kernel (const int size, + T* result, + const T* vector1, + const typename GetTypeReal::type* vector2, + const bool add) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - if (add) { - result[i] += vector1[i] * vector2[i]; + if (add) + { + result[i] += vector1[i] * vector2[i]; + } + else + { + result[i] = vector1[i] * vector2[i]; + } } - else - { - result[i] = vector1[i] * vector2[i]; - } - } } template -__global__ void vector_div_constant_kernel(const int size, - T* result, - const T* vector, - const typename GetTypeReal::type constant) +__global__ void + vector_div_constant_kernel (const int size, + T* result, + const T* vector, + const typename GetTypeReal::type constant) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - result[i] = vector[i] / constant; - } + { + result[i] = vector[i] / constant; + } } template -__global__ void vector_div_vector_kernel(const int size, - T* result, - const T* vector1, - const typename GetTypeReal::type* vector2) +__global__ void + vector_div_vector_kernel (const int size, T* result, const T* vector1, const typename GetTypeReal::type* vector2) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - result[i] = vector1[i] / vector2[i]; - } + { + result[i] = vector1[i] / vector2[i]; + } } template -__global__ void constantvector_addORsub_constantVector_kernel(const int size, - T* result, - const T* vector1, - const Real constant1, - const T* vector2, - const Real constant2) +__global__ void + constantvector_addORsub_constantVector_kernel (const int size, + T* result, + const T* vector1, + const Real constant1, + const T* vector2, + const Real constant2) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; - } + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } } // vector operator: result[i] = vector[i] * constant template <> -void vector_mul_real_op::operator()(const int dim, +void + vector_mul_real_op::operator() (const int dim, double* result, const double* vector, const double constant) @@ -104,95 +106,104 @@ void vector_mul_real_op::operator()(const int d // In small cases, 1024 threads per block will only utilize 17 blocks, much less than 40 int thread = thread_per_block; int block = (dim + thread - 1) / thread; - vector_mul_real_kernel<<>>(dim, result, vector, constant); + vector_mul_real_kernel<<>> (dim, result, vector, constant); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template -inline void vector_mul_real_wrapper(const int dim, - std::complex* result, - const std::complex* vector, - const FPTYPE constant) +inline void + vector_mul_real_wrapper (const int dim, + std::complex* result, + const std::complex* vector, + const FPTYPE constant) { - thrust::complex* result_tmp = reinterpret_cast*>(result); - const thrust::complex* vector_tmp = reinterpret_cast*>(vector); + thrust::complex* result_tmp = reinterpret_cast*> (result); + const thrust::complex* vector_tmp = reinterpret_cast*> (vector); int thread = thread_per_block; int block = (dim + thread - 1) / thread; - vector_mul_real_kernel><<>>(dim, result_tmp, vector_tmp, constant); + vector_mul_real_kernel><<>> (dim, result_tmp, vector_tmp, constant); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template <> -void vector_mul_real_op, base_device::DEVICE_GPU>::operator()(const int dim, +void + vector_mul_real_op, base_device::DEVICE_GPU>::operator() (const int dim, std::complex* result, const std::complex* vector, const float constant) { - vector_mul_real_wrapper(dim, result, vector, constant); + vector_mul_real_wrapper (dim, result, vector, constant); } template <> -void vector_mul_real_op, base_device::DEVICE_GPU>::operator()(const int dim, +void + vector_mul_real_op, base_device::DEVICE_GPU>::operator() (const int dim, std::complex* result, const std::complex* vector, const double constant) { - vector_mul_real_wrapper(dim, result, vector, constant); + vector_mul_real_wrapper (dim, result, vector, constant); } // vector operator: result[i] = vector[i] / constant template <> -void vector_div_constant_op::operator()(const int& dim, - double* result, - const double* vector, - const double constant) +void + vector_div_constant_op::operator() (const int& dim, + double* result, + const double* vector, + const double constant) { // In small cases, 1024 threads per block will only utilize 17 blocks, much less than 40 int thread = thread_per_block; int block = (dim + thread - 1) / thread; - vector_div_constant_kernel<<>>(dim, result, vector, constant); + vector_div_constant_kernel<<>> (dim, result, vector, constant); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template -inline void vector_div_constant_wrapper(const int& dim, - std::complex* result, - const std::complex* vector, - const FPTYPE constant) +inline void + vector_div_constant_wrapper (const int& dim, + std::complex* result, + const std::complex* vector, + const FPTYPE constant) { - thrust::complex* result_tmp = reinterpret_cast*>(result); - const thrust::complex* vector_tmp = reinterpret_cast*>(vector); + thrust::complex* result_tmp = reinterpret_cast*> (result); + const thrust::complex* vector_tmp = reinterpret_cast*> (vector); int thread = thread_per_block; int block = (dim + thread - 1) / thread; - vector_div_constant_kernel><<>>(dim, result_tmp, vector_tmp, constant); + vector_div_constant_kernel><<>> (dim, result_tmp, vector_tmp, constant); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template <> -void vector_div_constant_op, base_device::DEVICE_GPU>::operator()(const int& dim, - std::complex* result, - const std::complex* vector, - const float constant) +void + vector_div_constant_op, base_device::DEVICE_GPU>::operator() (const int& dim, + std::complex* result, + const std::complex* vector, + const float constant) { - vector_div_constant_wrapper(dim, result, vector, constant); + vector_div_constant_wrapper (dim, result, vector, constant); } template <> -void vector_div_constant_op, base_device::DEVICE_GPU>::operator()(const int& dim, - std::complex* result, - const std::complex* vector, - const double constant) +void + vector_div_constant_op, base_device::DEVICE_GPU>::operator() ( + const int& dim, + std::complex* result, + const std::complex* vector, + const double constant) { - vector_div_constant_wrapper(dim, result, vector, constant); + vector_div_constant_wrapper (dim, result, vector, constant); } // vector operator: result[i] = vector1[i](not complex) * vector2[i](not complex) template <> -void vector_mul_vector_op::operator()(const int& dim, +void + vector_mul_vector_op::operator() (const int& dim, double* result, const double* vector1, const double* vector2, @@ -200,168 +211,180 @@ void vector_mul_vector_op::operator()(const int { int thread = thread_per_block; int block = (dim + thread - 1) / thread; - vector_mul_vector_kernel<<>>(dim, result, vector1, vector2, add); + vector_mul_vector_kernel<<>> (dim, result, vector1, vector2, add); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } // vector operator: result[i] = vector1[i](complex) * vector2[i](not complex) template -inline void vector_mul_vector_complex_wrapper(const int& dim, - std::complex* result, - const std::complex* vector1, - const FPTYPE* vector2, - const bool& add) +inline void + vector_mul_vector_complex_wrapper (const int& dim, + std::complex* result, + const std::complex* vector1, + const FPTYPE* vector2, + const bool& add) { - thrust::complex* result_tmp = reinterpret_cast*>(result); - const thrust::complex* vector1_tmp = reinterpret_cast*>(vector1); + thrust::complex* result_tmp = reinterpret_cast*> (result); + const thrust::complex* vector1_tmp = reinterpret_cast*> (vector1); int thread = thread_per_block; int block = (dim + thread - 1) / thread; - vector_mul_vector_kernel><<>>(dim, result_tmp, vector1_tmp, vector2, add); + vector_mul_vector_kernel><<>> (dim, result_tmp, vector1_tmp, vector2, add); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template <> -void vector_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int& dim, +void + vector_mul_vector_op, base_device::DEVICE_GPU>::operator() (const int& dim, std::complex* result, const std::complex* vector1, const float* vector2, const bool& add) { - vector_mul_vector_complex_wrapper(dim, result, vector1, vector2, add); + vector_mul_vector_complex_wrapper (dim, result, vector1, vector2, add); } template <> -void vector_mul_vector_op, base_device::DEVICE_GPU>::operator()( - const int& dim, - std::complex* result, - const std::complex* vector1, - const double* vector2, - const bool& add) +void + vector_mul_vector_op, base_device::DEVICE_GPU>::operator() ( + const int& dim, + std::complex* result, + const std::complex* vector1, + const double* vector2, + const bool& add) { - vector_mul_vector_complex_wrapper(dim, result, vector1, vector2, add); + vector_mul_vector_complex_wrapper (dim, result, vector1, vector2, add); } // vector operator: result[i] = vector1[i](not complex) / vector2[i](not complex) template <> -void vector_div_vector_op::operator()(const int& dim, +void + vector_div_vector_op::operator() (const int& dim, double* result, const double* vector1, const double* vector2) { int thread = thread_per_block; int block = (dim + thread - 1) / thread; - vector_div_vector_kernel<<>>(dim, result, vector1, vector2); + vector_div_vector_kernel<<>> (dim, result, vector1, vector2); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } // vector operator: result[i] = vector1[i](complex) / vector2[i](not complex) template -inline void vector_div_vector_complex_wrapper(const int& dim, - std::complex* result, - const std::complex* vector1, - const FPTYPE* vector2) +inline void + vector_div_vector_complex_wrapper (const int& dim, + std::complex* result, + const std::complex* vector1, + const FPTYPE* vector2) { - thrust::complex* result_tmp = reinterpret_cast*>(result); - const thrust::complex* vector1_tmp = reinterpret_cast*>(vector1); + thrust::complex* result_tmp = reinterpret_cast*> (result); + const thrust::complex* vector1_tmp = reinterpret_cast*> (vector1); int thread = thread_per_block; int block = (dim + thread - 1) / thread; - vector_div_vector_kernel><<>>(dim, result_tmp, vector1_tmp, vector2); + vector_div_vector_kernel><<>> (dim, result_tmp, vector1_tmp, vector2); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template <> -void vector_div_vector_op, base_device::DEVICE_GPU>::operator()(const int& dim, +void + vector_div_vector_op, base_device::DEVICE_GPU>::operator() (const int& dim, std::complex* result, const std::complex* vector1, const float* vector2) { - vector_div_vector_complex_wrapper(dim, result, vector1, vector2); + vector_div_vector_complex_wrapper (dim, result, vector1, vector2); } template <> -void vector_div_vector_op, base_device::DEVICE_GPU>::operator()( - const int& dim, - std::complex* result, - const std::complex* vector1, - const double* vector2) +void + vector_div_vector_op, base_device::DEVICE_GPU>::operator() ( + const int& dim, + std::complex* result, + const std::complex* vector1, + const double* vector2) { - vector_div_vector_complex_wrapper(dim, result, vector1, vector2); + vector_div_vector_complex_wrapper (dim, result, vector1, vector2); } // vector operator: result[i] = vector1[i] * constant1 + vector2[i] * constant2 template -void vector_add_vector_op::operator()(const int& dim, - T* result, - const T* vector1, - const Real constant1, - const T* vector2, - const Real constant2) +void + vector_add_vector_op::operator() (const int& dim, + T* result, + const T* vector1, + const Real constant1, + const T* vector2, + const Real constant2) { using Type = typename GetTypeThrust::type; using Real = typename GetTypeReal::type; - auto result_tmp = reinterpret_cast(result); - auto vector1_tmp = reinterpret_cast(vector1); - auto vector2_tmp = reinterpret_cast(vector2); + auto result_tmp = reinterpret_cast (result); + auto vector1_tmp = reinterpret_cast (vector1); + auto vector2_tmp = reinterpret_cast (vector2); int thread = thread_per_block; int block = (dim + thread - 1) / thread; constantvector_addORsub_constantVector_kernel - <<>>(dim, result_tmp, vector1_tmp, constant1, vector2_tmp, constant2); + <<>> (dim, result_tmp, vector1_tmp, constant1, vector2_tmp, constant2); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template <> -double dot_real_op::operator()(const int& dim, - const double* psi_L, - const double* psi_R, - const bool reduce) +double + dot_real_op::operator() (const int& dim, + const double* psi_L, + const double* psi_R, + const bool reduce) { double result = 0.0; - xdot_wrapper(dim, psi_L, 1, psi_R, 1, result); + xdot_wrapper (dim, psi_L, 1, psi_R, 1, result); if (reduce) - { - Parallel_Reduce::reduce_pool(result); - } + { + Parallel_Reduce::reduce_pool (result); + } return result; } // for this implementation, please check // https://thrust.github.io/doc/group__transformed__reductions_ga321192d85c5f510e52300ae762c7e995.html denghui modify // 2022-10-03 Note that ddot_(2*dim,a,1,b,1) = REAL( zdotc_(dim,a,1,b,1) ) GPU specialization of actual computation. template -inline FPTYPE dot_complex_wrapper(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce) +inline FPTYPE + dot_complex_wrapper (const int& dim, + const std::complex* psi_L, + const std::complex* psi_R, + const bool reduce) { //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // denghui modify 2022-10-07 // Note that ddot_(2*dim,a,1,b,1) = REAL( zdotc_(dim,a,1,b,1) ) - const FPTYPE* pL = reinterpret_cast(psi_L); - const FPTYPE* pR = reinterpret_cast(psi_R); + const FPTYPE* pL = reinterpret_cast (psi_L); + const FPTYPE* pR = reinterpret_cast (psi_R); FPTYPE result = 0.0; - xdot_wrapper(dim * 2, pL, 1, pR, 1, result); + xdot_wrapper (dim * 2, pL, 1, pR, 1, result); if (reduce) - { - Parallel_Reduce::reduce_pool(result); - } + { + Parallel_Reduce::reduce_pool (result); + } return result; } template <> -float dot_real_op, base_device::DEVICE_GPU>::operator()(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce) +float + dot_real_op, base_device::DEVICE_GPU>::operator() (const int& dim, + const std::complex* psi_L, + const std::complex* psi_R, + const bool reduce) { - return dot_complex_wrapper(dim, psi_L, psi_R, reduce); + return dot_complex_wrapper (dim, psi_L, psi_R, reduce); } template <> -double dot_real_op, base_device::DEVICE_GPU>::operator()(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce) +double + dot_real_op, base_device::DEVICE_GPU>::operator() (const int& dim, + const std::complex* psi_L, + const std::complex* psi_R, + const bool reduce) { - return dot_complex_wrapper(dim, psi_L, psi_R, reduce); + return dot_complex_wrapper (dim, psi_L, psi_R, reduce); } // Explicitly instantiate functors for the types of functor registered. diff --git a/source/source_base/kernels/cuda/math_ylm_op.cu b/source/source_base/kernels/cuda/math_ylm_op.cu index 01b3676ff7c..cad42a85c35 100644 --- a/source/source_base/kernels/cuda/math_ylm_op.cu +++ b/source/source_base/kernels/cuda/math_ylm_op.cu @@ -3,127 +3,142 @@ #include -namespace ModuleBase { +namespace ModuleBase +{ #define THREADS_PER_BLOCK 256 template -__device__ __inline__ -FPTYPE __fact(const int n) { +__device__ __inline__ FPTYPE + __fact (const int n) +{ FPTYPE f = 1.0; - for (int i = n; i > 1; i--) { - f *= i; - } + for (int i = n; i > 1; i--) + { + f *= i; + } return f; } -__device__ __inline__ -int __semi_fact(const int n) +__device__ __inline__ int + __semi_fact (const int n) { int semif = 1; for (int i = n; i > 2; i -= 2) - { - semif *= i; - } + { + semif *= i; + } return semif; } template -__global__ void cal_ylm_real( - const int ng, - const int lmax, - const FPTYPE SQRT2, - const FPTYPE PI, - const FPTYPE PI_HALF, - const FPTYPE FOUR_PI, - const FPTYPE SQRT_INVERSE_FOUR_PI, - const FPTYPE *g, - FPTYPE * p, - FPTYPE * ylm) +__global__ void + cal_ylm_real (const int ng, + const int lmax, + const FPTYPE SQRT2, + const FPTYPE PI, + const FPTYPE PI_HALF, + const FPTYPE FOUR_PI, + const FPTYPE SQRT_INVERSE_FOUR_PI, + const FPTYPE* g, + FPTYPE* p, + FPTYPE* ylm) { int ig = blockIdx.x * blockDim.x + threadIdx.x; - if (ig >= ng) {return;} + if (ig >= ng) + { + return; + } FPTYPE cost = 0.0, phi = 0.0; //---------------------------------------------------------- // EXPLAIN : if lmax = 1,only use Y00 , output result. //---------------------------------------------------------- - if (lmax == 0) { - ylm[0 * ng + ig] = SQRT_INVERSE_FOUR_PI; - return; - } + if (lmax == 0) + { + ylm[0 * ng + ig] = SQRT_INVERSE_FOUR_PI; + return; + } //---------------------------------------------------------- // LOCAL VARIABLES : // NAME : cost = cos(theta),theta and phi are polar angles // NAME : phi //---------------------------------------------------------- - const FPTYPE gmod = sqrt(g[ig * 3 + 0] * g[ig * 3 + 0] + g[ig * 3 + 1] * g[ig * 3 + 1] + g[ig * 3 + 2] * g[ig * 3 + 2]); + const FPTYPE gmod + = sqrt (g[ig * 3 + 0] * g[ig * 3 + 0] + g[ig * 3 + 1] * g[ig * 3 + 1] + g[ig * 3 + 2] * g[ig * 3 + 2]); cost = gmod < 1.0e-9 ? 0.0 : g[ig * 3 + 2] / gmod; // beware the arc tan, it is defined modulo pi - if (g[ig * 3 + 0] > 1.0e-9) { - phi = atan(g[ig * 3 + 1] / g[ig * 3 + 0]); - } - else if (g[ig * 3 + 0] < -1.e-9) { - phi = atan(g[ig * 3 + 1] / g[ig * 3 + 0]) + PI; - } - else { - phi = PI_HALF * ((g[ig * 3 + 1] >= 0.0) ? 1.0 : -1.0); //HLX: modified on 10/13/2006 - } // end if + if (g[ig * 3 + 0] > 1.0e-9) + { + phi = atan (g[ig * 3 + 1] / g[ig * 3 + 0]); + } + else if (g[ig * 3 + 0] < -1.e-9) + { + phi = atan (g[ig * 3 + 1] / g[ig * 3 + 0]) + PI; + } + else + { + phi = PI_HALF * ((g[ig * 3 + 1] >= 0.0) ? 1.0 : -1.0); // HLX: modified on 10/13/2006 + } // end if //========================================================== // NAME : p(Legendre Polynomials) (0 <= m <= l) //========================================================== int lm = -1; - for (int l = 0; l <= lmax; l++) { - const FPTYPE c = sqrt((2 * l + 1) / FOUR_PI); - if (l == 0) { - p[0 * (lmax + 1) * ng + 0 * ng + ig] = 1.0; - } - else if (l == 1) { - p[0 * (lmax + 1) * ng + 1 * ng + ig] = cost; - FPTYPE var = (1.0 - cost * cost) > 0.0 ? (1.0 - cost * cost) : 0.0; - p[1 * (lmax + 1) * ng + 1 * ng + ig] = -sqrt(var); - } - else { - const int l1 = l - 1, - l2 = l - 2, - l3 = 2 * l - 1; - // recursion on l for P(:,l,m) - for (int m = 0; m <= l2; m++) { // do m = 0, l - 2//mohan modify 2007-10-13 - p[m * (lmax + 1) * ng + l * ng + ig] = - (cost * l3 * p[m * (lmax + 1) * ng + l1 * ng + ig] - - (l1 + m) * p[m * (lmax + 1) * ng + l2 * ng + ig]) / (l - m); - } // end do - p[l1 * (lmax + 1) * ng + l * ng + ig] = - cost * l3 * p[l1 * (lmax + 1) * ng + l1 * ng + ig]; - FPTYPE x2 = (1.0 - cost * cost) > 0.0 ? (1.0 - cost * cost) : 0.0; - p[l * (lmax + 1) * ng + l * ng + ig] = __semi_fact(l3) * pow(x2, static_cast(l) / 2.0);//mohan modify 2007-10-13 - if (l % 2 == 1) { - p[l * (lmax + 1) * ng + l * ng + ig] *= -1; - } - } // end if + for (int l = 0; l <= lmax; l++) + { + const FPTYPE c = sqrt ((2 * l + 1) / FOUR_PI); + if (l == 0) + { + p[0 * (lmax + 1) * ng + 0 * ng + ig] = 1.0; + } + else if (l == 1) + { + p[0 * (lmax + 1) * ng + 1 * ng + ig] = cost; + FPTYPE var = (1.0 - cost * cost) > 0.0 ? (1.0 - cost * cost) : 0.0; + p[1 * (lmax + 1) * ng + 1 * ng + ig] = -sqrt (var); + } + else + { + const int l1 = l - 1, l2 = l - 2, l3 = 2 * l - 1; + // recursion on l for P(:,l,m) + for (int m = 0; m <= l2; m++) + { // do m = 0, l - 2//mohan modify 2007-10-13 + p[m * (lmax + 1) * ng + l * ng + ig] = (cost * l3 * p[m * (lmax + 1) * ng + l1 * ng + ig] + - (l1 + m) * p[m * (lmax + 1) * ng + l2 * ng + ig]) + / (l - m); + } // end do + p[l1 * (lmax + 1) * ng + l * ng + ig] = cost * l3 * p[l1 * (lmax + 1) * ng + l1 * ng + ig]; + FPTYPE x2 = (1.0 - cost * cost) > 0.0 ? (1.0 - cost * cost) : 0.0; + p[l * (lmax + 1) * ng + l * ng + ig] + = __semi_fact (l3) * pow (x2, static_cast (l) / 2.0); // mohan modify 2007-10-13 + if (l % 2 == 1) + { + p[l * (lmax + 1) * ng + l * ng + ig] *= -1; + } + } // end if - // Y_lm, m = 0 - ++lm; - ylm[lm * ng + ig] = c * p[0 * (lmax + 1) * ng + l * ng + ig]; + // Y_lm, m = 0 + ++lm; + ylm[lm * ng + ig] = c * p[0 * (lmax + 1) * ng + l * ng + ig]; - for (int m = 1; m <= l; m++) { - // Y_lm, m > 0 - const FPTYPE same = - c * sqrt(__fact(l - m) / - __fact(l + m)) * SQRT2; + for (int m = 1; m <= l; m++) + { + // Y_lm, m > 0 + const FPTYPE same = c * sqrt (__fact (l - m) / __fact (l + m)) * SQRT2; - ++lm; - ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * cos(m * phi); + ++lm; + ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * cos (m * phi); - // Y_lm, m < 0 - ++lm; - ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * sin(m * phi); - } - }// end do + // Y_lm, m < 0 + ++lm; + ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * sin (m * phi); + } + } // end do } template -void cal_ylm_real_op::operator()(const base_device::DEVICE_GPU* ctx, +void + cal_ylm_real_op::operator() (const base_device::DEVICE_GPU* ctx, const int& ng, const int& lmax, const FPTYPE& SQRT2, @@ -136,22 +151,13 @@ void cal_ylm_real_op::operator()(const base_dev FPTYPE* ylm) { int block = (ng + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - cal_ylm_real<<>>( - ng, - lmax, - SQRT2, - PI, - PI_HALF, - FOUR_PI, - SQRT_INVERSE_FOUR_PI, - g, - p, - ylm); + cal_ylm_real + <<>> (ng, lmax, SQRT2, PI, PI_HALF, FOUR_PI, SQRT_INVERSE_FOUR_PI, g, p, ylm); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template struct cal_ylm_real_op; template struct cal_ylm_real_op; -} // namespace ModuleBase +} // namespace ModuleBase diff --git a/source/source_base/kernels/cuda/sph_harm_gpu.cuh b/source/source_base/kernels/cuda/sph_harm_gpu.cuh index d4fa5f5666f..1e566b0035a 100644 --- a/source/source_base/kernels/cuda/sph_harm_gpu.cuh +++ b/source/source_base/kernels/cuda/sph_harm_gpu.cuh @@ -2,384 +2,383 @@ #include "source_base/ylmcoef.h" -namespace ModuleBase { +namespace ModuleBase +{ /// Spherical harmonics computation (table lookup method) /// Directly uses constexpr ylmcoef, compiler auto-inlines /// @param nwl Maximum angular momentum L (0 <= nwl <= 5) /// @param x,y,z Direction vector (need not be normalized, normalization is done internally) /// @param ylma Output array, size (nwl+1)^2 -__device__ static void sph_harm( - const int nwl, - const double x_in, - const double y_in, - const double z_in, - double* __restrict__ ylma) +__device__ static void + sph_harm (const int nwl, const double x_in, const double y_in, const double z_in, double* __restrict__ ylma) { - // Normalize the input direction vector - double r = sqrt(x_in * x_in + y_in * y_in + z_in * z_in); - double x, y, z; - if (r < 1e-10) - { - // At origin, default to z-axis direction - x = 0.0; - y = 0.0; - z = 1.0; - } - else - { - const double inv_r = 1.0 / r; - x = x_in * inv_r; - y = y_in * inv_r; - z = z_in * inv_r; - } - - /*************************** - L = 0 - ***************************/ - ylma[0] = ylmcoef[0]; // l=0, m=0 - double tmp0; - if (nwl == 0) - return; - - /*************************** - L = 1 - ***************************/ - ylma[1] = ylmcoef[1] * z; // l=1, m=0 - ylma[2] = -ylmcoef[1] * x; // l=1, m=1 - ylma[3] = -ylmcoef[1] * y; // l=1, m=-1 - if (nwl == 1) - return; - - /*************************** - L = 2 - ***************************/ - tmp0=ylmcoef[3] * ylma[0]; - ylma[4] = ylmcoef[2] * z * ylma[1] - tmp0 ; // l=2, m=0 - tmp0 = ylmcoef[4] * z; - ylma[5] = tmp0 * ylma[2]; // l=2,m=1 - ylma[6] = tmp0 * ylma[3]; // l=2,m=-1 - - tmp0 = ylmcoef[4] * x; - ylma[7] = ylmcoef[5] * ylma[4] - ylmcoef[6] * ylma[0] - - tmp0 * ylma[2]; // l=2,m=2 - ylma[8] = -tmp0 * ylma[3]; - if (nwl == 2) - return; - - /*************************** - L = 3 - ***************************/ - tmp0=ylmcoef[8] * ylma[1]; - ylma[9] = ylmcoef[7] * z * ylma[4] - tmp0; // l=3, m=0 - - tmp0 = ylmcoef[9] * z; - ylma[10] = tmp0 * ylma[5] - ylmcoef[10] * ylma[2]; // l=3,m=1 - ylma[11] = tmp0 * ylma[6] - ylmcoef[10] * ylma[3]; // l=3,m=-1 - - tmp0 = ylmcoef[11] * z; - ylma[12] = tmp0 * ylma[7]; // l=3,m=2 - ylma[13] = tmp0 * ylma[8]; // l=3,m=-2 - - tmp0 = ylmcoef[14] * x; - ylma[14] = ylmcoef[12] * ylma[10] - ylmcoef[13] * ylma[2] - - tmp0 * ylma[7]; // l=3,m=3 - ylma[15] = ylmcoef[12] * ylma[11] - ylmcoef[13] * ylma[3] - - tmp0 * ylma[8]; // l=3,m=-3 - if (nwl == 3) - return; - - /*************************** - L = 4 - ***************************/ - tmp0=ylmcoef[16] * ylma[4]; - ylma[16] = ylmcoef[15] * z * ylma[9] - tmp0; // l=4,m=0 - - tmp0 = ylmcoef[17] * z; - ylma[17] = tmp0 * ylma[10] - ylmcoef[18] * ylma[5]; // l=4,m=1 - ylma[18] = tmp0 * ylma[11] - ylmcoef[18] * ylma[6]; // l=4,m=-1 - - tmp0 = ylmcoef[19] * z; - ylma[19] = tmp0 * ylma[12] - ylmcoef[20] * ylma[7]; // l=4,m=2 - ylma[20] = tmp0 * ylma[13] - ylmcoef[20] * ylma[8]; // l=4,m=-2 - - tmp0 = 3.0 * z; - ylma[21] = tmp0 * ylma[14]; // l=4,m=3 - ylma[22] = tmp0 * ylma[15]; // l=4,m=-3 - - tmp0 = ylmcoef[23] * x; - ylma[23] = ylmcoef[21] * ylma[19] - ylmcoef[22] * ylma[7] - - tmp0 * ylma[14]; // l=4,m=4 - ylma[24] = ylmcoef[21] * ylma[20] - ylmcoef[22] * ylma[8] - - tmp0 * ylma[15]; // l=4,m=-4 - if (nwl == 4) - return; - - /*************************** - L = 5 - ***************************/ - tmp0=ylmcoef[25] * ylma[9]; - ylma[25] - = ylmcoef[24] * z * ylma[16] - tmp0; // l=5,m=0 - - tmp0 = ylmcoef[26] * z; - ylma[26] = tmp0 * ylma[17] - ylmcoef[27] * ylma[10]; // l=5,m=1 - ylma[27] = tmp0 * ylma[18] - ylmcoef[27] * ylma[11]; // l=5,m=-1 - - tmp0 = ylmcoef[28] * z; - ylma[28] = tmp0 * ylma[19] - ylmcoef[29] * ylma[12]; // l=5,m=2 - ylma[29] = tmp0 * ylma[20] - ylmcoef[29] * ylma[13]; // l=5,m=-2 - - tmp0 = ylmcoef[30] * z; - ylma[30] = tmp0 * ylma[21] - ylmcoef[31] * ylma[14]; // l=5,m=3 - ylma[31] = tmp0 * ylma[22] - ylmcoef[31] * ylma[15]; // l=5,m=-3 - - tmp0 = ylmcoef[32] * z; - ylma[32] = tmp0 * ylma[23]; // l=5,m=4 - ylma[33] = tmp0 * ylma[24]; // l=5,m=-4 - - tmp0 = ylmcoef[35] * x; - ylma[34] = ylmcoef[33] * ylma[30] - ylmcoef[34] * ylma[14] - - tmp0 * ylma[23]; // l=5,m=5 - ylma[35] = ylmcoef[33] * ylma[31] - ylmcoef[34] * ylma[15] - - tmp0 * ylma[24]; // l=5,m=-5 - if (nwl == 5) - return; + // Normalize the input direction vector + double r = sqrt (x_in * x_in + y_in * y_in + z_in * z_in); + double x, y, z; + if (r < 1e-10) + { + // At origin, default to z-axis direction + x = 0.0; + y = 0.0; + z = 1.0; + } + else + { + const double inv_r = 1.0 / r; + x = x_in * inv_r; + y = y_in * inv_r; + z = z_in * inv_r; + } + + /*************************** + L = 0 + ***************************/ + ylma[0] = ylmcoef[0]; // l=0, m=0 + double tmp0; + if (nwl == 0) + return; + + /*************************** + L = 1 + ***************************/ + ylma[1] = ylmcoef[1] * z; // l=1, m=0 + ylma[2] = -ylmcoef[1] * x; // l=1, m=1 + ylma[3] = -ylmcoef[1] * y; // l=1, m=-1 + if (nwl == 1) + return; + + /*************************** + L = 2 + ***************************/ + tmp0 = ylmcoef[3] * ylma[0]; + ylma[4] = ylmcoef[2] * z * ylma[1] - tmp0; // l=2, m=0 + tmp0 = ylmcoef[4] * z; + ylma[5] = tmp0 * ylma[2]; // l=2,m=1 + ylma[6] = tmp0 * ylma[3]; // l=2,m=-1 + + tmp0 = ylmcoef[4] * x; + ylma[7] = ylmcoef[5] * ylma[4] - ylmcoef[6] * ylma[0] - tmp0 * ylma[2]; // l=2,m=2 + ylma[8] = -tmp0 * ylma[3]; + if (nwl == 2) + return; + + /*************************** + L = 3 + ***************************/ + tmp0 = ylmcoef[8] * ylma[1]; + ylma[9] = ylmcoef[7] * z * ylma[4] - tmp0; // l=3, m=0 + + tmp0 = ylmcoef[9] * z; + ylma[10] = tmp0 * ylma[5] - ylmcoef[10] * ylma[2]; // l=3,m=1 + ylma[11] = tmp0 * ylma[6] - ylmcoef[10] * ylma[3]; // l=3,m=-1 + + tmp0 = ylmcoef[11] * z; + ylma[12] = tmp0 * ylma[7]; // l=3,m=2 + ylma[13] = tmp0 * ylma[8]; // l=3,m=-2 + + tmp0 = ylmcoef[14] * x; + ylma[14] = ylmcoef[12] * ylma[10] - ylmcoef[13] * ylma[2] - tmp0 * ylma[7]; // l=3,m=3 + ylma[15] = ylmcoef[12] * ylma[11] - ylmcoef[13] * ylma[3] - tmp0 * ylma[8]; // l=3,m=-3 + if (nwl == 3) + return; + + /*************************** + L = 4 + ***************************/ + tmp0 = ylmcoef[16] * ylma[4]; + ylma[16] = ylmcoef[15] * z * ylma[9] - tmp0; // l=4,m=0 + + tmp0 = ylmcoef[17] * z; + ylma[17] = tmp0 * ylma[10] - ylmcoef[18] * ylma[5]; // l=4,m=1 + ylma[18] = tmp0 * ylma[11] - ylmcoef[18] * ylma[6]; // l=4,m=-1 + + tmp0 = ylmcoef[19] * z; + ylma[19] = tmp0 * ylma[12] - ylmcoef[20] * ylma[7]; // l=4,m=2 + ylma[20] = tmp0 * ylma[13] - ylmcoef[20] * ylma[8]; // l=4,m=-2 + + tmp0 = 3.0 * z; + ylma[21] = tmp0 * ylma[14]; // l=4,m=3 + ylma[22] = tmp0 * ylma[15]; // l=4,m=-3 + + tmp0 = ylmcoef[23] * x; + ylma[23] = ylmcoef[21] * ylma[19] - ylmcoef[22] * ylma[7] - tmp0 * ylma[14]; // l=4,m=4 + ylma[24] = ylmcoef[21] * ylma[20] - ylmcoef[22] * ylma[8] - tmp0 * ylma[15]; // l=4,m=-4 + if (nwl == 4) + return; + + /*************************** + L = 5 + ***************************/ + tmp0 = ylmcoef[25] * ylma[9]; + ylma[25] = ylmcoef[24] * z * ylma[16] - tmp0; // l=5,m=0 + + tmp0 = ylmcoef[26] * z; + ylma[26] = tmp0 * ylma[17] - ylmcoef[27] * ylma[10]; // l=5,m=1 + ylma[27] = tmp0 * ylma[18] - ylmcoef[27] * ylma[11]; // l=5,m=-1 + + tmp0 = ylmcoef[28] * z; + ylma[28] = tmp0 * ylma[19] - ylmcoef[29] * ylma[12]; // l=5,m=2 + ylma[29] = tmp0 * ylma[20] - ylmcoef[29] * ylma[13]; // l=5,m=-2 + + tmp0 = ylmcoef[30] * z; + ylma[30] = tmp0 * ylma[21] - ylmcoef[31] * ylma[14]; // l=5,m=3 + ylma[31] = tmp0 * ylma[22] - ylmcoef[31] * ylma[15]; // l=5,m=-3 + + tmp0 = ylmcoef[32] * z; + ylma[32] = tmp0 * ylma[23]; // l=5,m=4 + ylma[33] = tmp0 * ylma[24]; // l=5,m=-4 + + tmp0 = ylmcoef[35] * x; + ylma[34] = ylmcoef[33] * ylma[30] - ylmcoef[34] * ylma[14] - tmp0 * ylma[23]; // l=5,m=5 + ylma[35] = ylmcoef[33] * ylma[31] - ylmcoef[34] * ylma[15] - tmp0 * ylma[24]; // l=5,m=-5 + if (nwl == 5) + return; } /// Spherical harmonics and gradient computation -__device__ static void grad_rl_sph_harm( - const int nwl, - const double x, - const double y, - const double z, - double* __restrict__ rly, - double* __restrict__ grly) +__device__ static void + grad_rl_sph_harm (const int nwl, + const double x, + const double y, + const double z, + double* __restrict__ rly, + double* __restrict__ grly) { double r2 = x * x + y * y + z * z; double tx = x * 2; double ty = y * 2; double tz = z * 2; - //begin calculation - /*************************** - L = 0 - ***************************/ - rly[0] = ylmcoef[0]; //l=0, m=0 - grly[0] = grly[1] = grly[2] = 0.0; - if (nwl == 0) return; - - /*************************** - L = 1 - ***************************/ - rly[1] = ylmcoef[1]*z; //l=1, m=0 - grly[3] = grly[4] = 0.0; - grly[5] = ylmcoef[1]; - - rly[2] = -ylmcoef[1]*x; //l=1, m=1 - grly[7] = grly[8] = 0.0; - grly[6] = -ylmcoef[1]; - - rly[3] = -ylmcoef[1]*y; //l=1, m=-1 - grly[9] = grly[11] = 0.0; - grly[10] = -ylmcoef[1]; - - if (nwl == 1) return; - - /*************************** - L = 2 - ***************************/ - rly[4] = ylmcoef[2]*z*rly[1]-ylmcoef[3]*rly[0]*r2;//l=2, m=0 - grly[12] = ylmcoef[2]*z*grly[3]-ylmcoef[3]*(grly[0]*r2+rly[0]*tx);//l=2, m=0 - grly[13] = ylmcoef[2]*z*grly[4]-ylmcoef[3]*(grly[1]*r2+rly[0]*ty);//l=2, m=0 - grly[14] = ylmcoef[2]*(z*grly[5]+rly[1])-ylmcoef[3]*(grly[2]*r2+rly[0]*tz);//l=2, m=0 - - - double tmp0 = ylmcoef[4]*z; - rly[5] = tmp0*rly[2];//l=2,m=1 - grly[15] = tmp0*grly[6]; - grly[16] = tmp0*grly[7]; - grly[17] = ylmcoef[4]*(rly[2]+z*grly[8]); - - rly[6] = tmp0*rly[3];//l=2,m=-1 - grly[18] = tmp0*grly[9]; - grly[19] = tmp0*grly[10]; - grly[20] = ylmcoef[4]*(rly[3]+z*grly[11]); - - double tmp2 = ylmcoef[4]*x; - rly[7]= ylmcoef[5]*rly[4]-ylmcoef[6]*rly[0]*r2 - tmp2*rly[2];//l=2,m=2 - grly[21] = ylmcoef[5]*grly[12]-ylmcoef[6]*(rly[0]*tx+grly[0]*r2)-ylmcoef[4]*(x*grly[6]+rly[2]); - - grly[22] = ylmcoef[5]*grly[13]-ylmcoef[6]*(rly[0]*ty+grly[1]*r2)-tmp2*grly[7]; - grly[23] = ylmcoef[5]*grly[14]-ylmcoef[6]*(rly[0]*tz+grly[2]*r2)-tmp2*grly[8]; - - rly[8] = -tmp2*rly[3]; - grly[24] = -ylmcoef[4]*(rly[3]+x*grly[9]); - grly[25] = -tmp2*grly[10]; - grly[26] = -tmp2*grly[11]; - if (nwl == 2) return; - - /*************************** - L = 3 - ***************************/ - rly[9] = ylmcoef[7]*z*rly[4]-ylmcoef[8]*rly[1]*r2; //l=3, m=0 - grly[27] = ylmcoef[7]*z*grly[12]-ylmcoef[8]*(rly[1]*tx+grly[3]*r2); - grly[28] = ylmcoef[7]*z*grly[13]-ylmcoef[8]*(rly[1]*ty+grly[4]*r2); - grly[29] = ylmcoef[7]*(rly[4]+z*grly[14])-ylmcoef[8]*(rly[1]*tz+grly[5]*r2); - - double tmp3 = ylmcoef[9]*z; - rly[10] = tmp3*rly[5]-ylmcoef[10]*rly[2]*r2;//l=3,m=1 - grly[30] = tmp3*grly[15]-ylmcoef[10]*(grly[6]*r2+rly[2]*tx); - grly[31] = tmp3*grly[16]-ylmcoef[10]*(grly[7]*r2+rly[2]*ty); - grly[32] = ylmcoef[9]*(z*grly[17]+rly[5])-ylmcoef[10]*(grly[8]*r2+rly[2]*tz); - - rly[11] = tmp3*rly[6]-ylmcoef[10]*rly[3]*r2;//l=3,m=-1 - grly[33] = tmp3*grly[18]-ylmcoef[10]*(grly[9]*r2+rly[3]*tx); - grly[34] = tmp3*grly[19]-ylmcoef[10]*(grly[10]*r2+rly[3]*ty); - grly[35] = ylmcoef[9]*(z*grly[20]+rly[6])-ylmcoef[10]*(grly[11]*r2+rly[3]*tz); - - double tmp4 = ylmcoef[11]*z; - rly[12] = tmp4*rly[7];//l=3,m=2 - grly[36] = tmp4*grly[21]; - grly[37] = tmp4*grly[22]; - grly[38] = ylmcoef[11]*(z*grly[23]+rly[7]); - - rly[13] = tmp4*rly[8];//l=3,m=-2 - grly[39] = tmp4*grly[24]; - grly[40] = tmp4*grly[25]; - grly[41] = ylmcoef[11]*(z*grly[26]+rly[8]); - - double tmp5 = ylmcoef[14]*x; - rly[14] = ylmcoef[12]*rly[10]-ylmcoef[13]*rly[2]*r2-tmp5*rly[7];//l=3,m=3 - grly[42] = ylmcoef[12]*grly[30]-ylmcoef[13]*(rly[2]*tx+grly[6]*r2)-ylmcoef[14]*(rly[7]+x*grly[21]); - grly[43] = ylmcoef[12]*grly[31]-ylmcoef[13]*(rly[2]*ty+grly[7]*r2)-tmp5*grly[22]; - grly[44] = ylmcoef[12]*grly[32]-ylmcoef[13]*(rly[2]*tz+grly[8]*r2)-tmp5*grly[23]; - - rly[15] = ylmcoef[12]*rly[11]-ylmcoef[13]*rly[3]*r2-tmp5*rly[8];//l=3,m=-3 - grly[45] = ylmcoef[12]*grly[33]-ylmcoef[13]*(rly[3]*tx+grly[9]*r2)-ylmcoef[14]*(rly[8]+x*grly[24]); - grly[46] = ylmcoef[12]*grly[34]-ylmcoef[13]*(rly[3]*ty+grly[10]*r2)-tmp5*grly[25]; - grly[47] = ylmcoef[12]*grly[35]-ylmcoef[13]*(rly[3]*tz+grly[11]*r2)-tmp5*grly[26]; - if (nwl == 3) return; - - /*************************** - L = 4 - ***************************/ - rly[16] = ylmcoef[15]*z*rly[9]-ylmcoef[16]*rly[4]*r2;//l=4,m=0 - grly[48] = ylmcoef[15]*z*grly[27]-ylmcoef[16]*(rly[4]*tx+grly[12]*r2); - grly[49] = ylmcoef[15]*z*grly[28]-ylmcoef[16]*(rly[4]*ty+grly[13]*r2); - grly[50] = ylmcoef[15]*(z*grly[29]+rly[9])-ylmcoef[16]*(rly[4]*tz+grly[14]*r2); - - double tmp6 = ylmcoef[17]*z; - rly[17] = tmp6*rly[10]-ylmcoef[18]*rly[5]*r2;//l=4,m=1 - grly[51] = tmp6*grly[30]-ylmcoef[18]*(rly[5]*tx+grly[15]*r2); - grly[52] = tmp6*grly[31]-ylmcoef[18]*(rly[5]*ty+grly[16]*r2); - grly[53] = ylmcoef[17]*(z*grly[32]+rly[10])-ylmcoef[18]*(rly[5]*tz+grly[17]*r2); - - rly[18] = tmp6*rly[11]-ylmcoef[18]*rly[6]*r2;//l=4,m=-1 - grly[54] = tmp6*grly[33]-ylmcoef[18]*(rly[6]*tx+grly[18]*r2); - grly[55] = tmp6*grly[34]-ylmcoef[18]*(rly[6]*ty+grly[19]*r2); - grly[56] = ylmcoef[17]*(z*grly[35]+rly[11])-ylmcoef[18]*(rly[6]*tz+grly[20]*r2); - - double tmp7 = ylmcoef[19]*z; - rly[19] = tmp7*rly[12]-ylmcoef[20]*rly[7]*r2;//l=4,m=2 - grly[57] = tmp7*grly[36]-ylmcoef[20]*(rly[7]*tx+grly[21]*r2); - grly[58] = tmp7*grly[37]-ylmcoef[20]*(rly[7]*ty+grly[22]*r2); - grly[59] = ylmcoef[19]*(z*grly[38]+rly[12])-ylmcoef[20]*(rly[7]*tz+grly[23]*r2); - - rly[20] = tmp7*rly[13]-ylmcoef[20]*rly[8]*r2;//l=4,m=-2 - grly[60] = tmp7*grly[39]-ylmcoef[20]*(rly[8]*tx+grly[24]*r2); - grly[61] = tmp7*grly[40]-ylmcoef[20]*(rly[8]*ty+grly[25]*r2); - grly[62] = ylmcoef[19]*(z*grly[41]+rly[13])-ylmcoef[20]*(rly[8]*tz+grly[26]*r2); - - double tmp8 = 3.0*z; - rly[21] = tmp8*rly[14];//l=4,m=3 - grly[63] = tmp8*grly[42]; - grly[64] = tmp8*grly[43]; - grly[65] = 3.0*(z*grly[44]+rly[14]); - - - rly[22] = tmp8*rly[15];//l=4,m=-3 - grly[66] = tmp8*grly[45]; - grly[67] = tmp8*grly[46]; - grly[68] = 3.0*(z*grly[47]+rly[15]); - - double tmp9 = ylmcoef[23]*x; - rly[23] = ylmcoef[21]*rly[19]-ylmcoef[22]*rly[7]*r2-tmp9*rly[14];//l=4,m=4 - grly[69] = ylmcoef[21]*grly[57]-ylmcoef[22]*(rly[7]*tx+grly[21]*r2)-ylmcoef[23]*(x*grly[42]+rly[14]); - grly[70] = ylmcoef[21]*grly[58]-ylmcoef[22]*(rly[7]*ty+grly[22]*r2)-tmp9*grly[43]; - grly[71] = ylmcoef[21]*grly[59]-ylmcoef[22]*(rly[7]*tz+grly[23]*r2)-tmp9*grly[44]; - - rly[24] = ylmcoef[21]*rly[20]-ylmcoef[22]*rly[8]*r2-tmp9*rly[15];//l=4,m=-4 - grly[72] = ylmcoef[21]*grly[60]-ylmcoef[22]*(rly[8]*tx+grly[24]*r2)-ylmcoef[23]*(x*grly[45]+rly[15]); - grly[73] = ylmcoef[21]*grly[61]-ylmcoef[22]*(rly[8]*ty+grly[25]*r2)-tmp9*grly[46]; - grly[74] = ylmcoef[21]*grly[62]-ylmcoef[22]*(rly[8]*tz+grly[26]*r2)-tmp9*grly[47]; - - if (nwl == 4) return; - - /*************************** - L = 5 - ***************************/ - rly[25] = ylmcoef[24]*z*rly[16]-ylmcoef[25]*rly[9]*r2;//l=5,m=0 - grly[75] = ylmcoef[24]*z*grly[48]-ylmcoef[25]*(rly[9]*tx+grly[27]*r2); - grly[76] = ylmcoef[24]*z*grly[49]-ylmcoef[25]*(rly[9]*ty+grly[28]*r2); - grly[77] = ylmcoef[24]*(z*grly[50]+rly[16])-ylmcoef[25]*(rly[9]*tz+grly[29]*r2); - - double tmp10 = ylmcoef[26]*z; - rly[26] = tmp10*rly[17]-ylmcoef[27]*rly[10]*r2;//l=5,m=1 - grly[78] = tmp10*grly[51]-ylmcoef[27]*(rly[10]*tx+grly[30]*r2); - grly[79] = tmp10*grly[52]-ylmcoef[27]*(rly[10]*ty+grly[31]*r2); - grly[80] = ylmcoef[26]*(z*grly[53]+rly[17])-ylmcoef[27]*(rly[10]*tz+grly[32]*r2); - - rly[27] = tmp10*rly[18]-ylmcoef[27]*rly[11]*r2;//l=5,m=-1 - grly[81] = tmp10*grly[54]-ylmcoef[27]*(rly[11]*tx+grly[33]*r2); - grly[82] = tmp10*grly[55]-ylmcoef[27]*(rly[11]*ty+grly[34]*r2); - grly[83] = ylmcoef[26]*(z*grly[56]+rly[18])-ylmcoef[27]*(rly[11]*tz+grly[35]*r2); - - double tmp11 = ylmcoef[28]*z; - rly[28] = tmp11*rly[19]-ylmcoef[29]*rly[12]*r2;//l=5,m=2 - grly[84] = tmp11*grly[57]-ylmcoef[29]*(rly[12]*tx+grly[36]*r2); - grly[85] = tmp11*grly[58]-ylmcoef[29]*(rly[12]*ty+grly[37]*r2); - grly[86] = ylmcoef[28]*(z*grly[59]+rly[19])-ylmcoef[29]*(rly[12]*tz+grly[38]*r2); - - rly[29] = tmp11*rly[20]-ylmcoef[29]*rly[13]*r2;//l=5,m=-2 - grly[87] = tmp11*grly[60]-ylmcoef[29]*(rly[13]*tx+grly[39]*r2); - grly[88] = tmp11*grly[61]-ylmcoef[29]*(rly[13]*ty+grly[40]*r2); - grly[89] = ylmcoef[28]*(z*grly[62]+rly[20])-ylmcoef[29]*(rly[13]*tz+grly[41]*r2); - - double tmp12 = ylmcoef[30]*z; - rly[30] = tmp12*rly[21]-ylmcoef[31]*rly[14]*r2;//l=5,m=3 - grly[90] = tmp12*grly[63]-ylmcoef[31]*(grly[42]*r2+rly[14]*tx); - grly[91] = tmp12*grly[64]-ylmcoef[31]*(grly[43]*r2+rly[14]*ty); - grly[92] = ylmcoef[30]*(z*grly[65]+rly[21])-ylmcoef[31]*(grly[44]*r2+rly[14]*tz); - - rly[31] = tmp12*rly[22]-ylmcoef[31]*rly[15]*r2;//l=5,m=-3 - grly[93] = tmp12*grly[66]-ylmcoef[31]*(grly[45]*r2+rly[15]*tx); - grly[94] = tmp12*grly[67]-ylmcoef[31]*(grly[46]*r2+rly[15]*ty); - grly[95] = ylmcoef[30]*(z*grly[68]+rly[22])-ylmcoef[31]*(grly[47]*r2+rly[15]*tz); - - double tmp13 = ylmcoef[32]*z; - rly[32] = tmp13*rly[23];//l=5,m=4 - grly[96] = tmp13*grly[69]; - grly[97] = tmp13*grly[70]; - grly[98] = ylmcoef[32]*(rly[23]+z*grly[71]); - - rly[33] = tmp13*rly[24];//l=5,m=-4 - grly[99] = tmp13*grly[72]; - grly[100] = tmp13*grly[73]; - grly[101] = ylmcoef[32]*(rly[24]+z*grly[74]); - - double tmp14 = ylmcoef[35]*x; - rly[34] = ylmcoef[33]*rly[30]-ylmcoef[34]*rly[14]*r2-tmp14*rly[23];//l=5,m=5 - grly[102] = ylmcoef[33]*grly[90]-ylmcoef[34]*(rly[14]*tx+grly[42]*r2)-ylmcoef[35]*(x*grly[69]+rly[23]); - grly[103] = ylmcoef[33]*grly[91]-ylmcoef[34]*(rly[14]*ty+grly[43]*r2)-tmp14*grly[70]; - grly[104] = ylmcoef[33]*grly[92]-ylmcoef[34]*(rly[14]*tz+grly[44]*r2)-tmp14*grly[71]; - - rly[35] = ylmcoef[33]*rly[31]-ylmcoef[34]*rly[15]*r2-tmp14*rly[24];//l=5,m=-5 - grly[105] = ylmcoef[33]*grly[93]-ylmcoef[34]*(rly[15]*tx+grly[45]*r2)-ylmcoef[35]*(x*grly[72]+rly[24]); - grly[106] = ylmcoef[33]*grly[94]-ylmcoef[34]*(rly[15]*ty+grly[46]*r2)-tmp14*grly[73]; - grly[107] = ylmcoef[33]*grly[95]-ylmcoef[34]*(rly[15]*tz+grly[47]*r2)-tmp14*grly[74]; - - if (nwl == 5) return; + // begin calculation + /*************************** + L = 0 + ***************************/ + rly[0] = ylmcoef[0]; // l=0, m=0 + grly[0] = grly[1] = grly[2] = 0.0; + if (nwl == 0) + return; + + /*************************** + L = 1 + ***************************/ + rly[1] = ylmcoef[1] * z; // l=1, m=0 + grly[3] = grly[4] = 0.0; + grly[5] = ylmcoef[1]; + + rly[2] = -ylmcoef[1] * x; // l=1, m=1 + grly[7] = grly[8] = 0.0; + grly[6] = -ylmcoef[1]; + + rly[3] = -ylmcoef[1] * y; // l=1, m=-1 + grly[9] = grly[11] = 0.0; + grly[10] = -ylmcoef[1]; + + if (nwl == 1) + return; + + /*************************** + L = 2 + ***************************/ + rly[4] = ylmcoef[2] * z * rly[1] - ylmcoef[3] * rly[0] * r2; // l=2, m=0 + grly[12] = ylmcoef[2] * z * grly[3] - ylmcoef[3] * (grly[0] * r2 + rly[0] * tx); // l=2, m=0 + grly[13] = ylmcoef[2] * z * grly[4] - ylmcoef[3] * (grly[1] * r2 + rly[0] * ty); // l=2, m=0 + grly[14] = ylmcoef[2] * (z * grly[5] + rly[1]) - ylmcoef[3] * (grly[2] * r2 + rly[0] * tz); // l=2, m=0 + + double tmp0 = ylmcoef[4] * z; + rly[5] = tmp0 * rly[2]; // l=2,m=1 + grly[15] = tmp0 * grly[6]; + grly[16] = tmp0 * grly[7]; + grly[17] = ylmcoef[4] * (rly[2] + z * grly[8]); + + rly[6] = tmp0 * rly[3]; // l=2,m=-1 + grly[18] = tmp0 * grly[9]; + grly[19] = tmp0 * grly[10]; + grly[20] = ylmcoef[4] * (rly[3] + z * grly[11]); + + double tmp2 = ylmcoef[4] * x; + rly[7] = ylmcoef[5] * rly[4] - ylmcoef[6] * rly[0] * r2 - tmp2 * rly[2]; // l=2,m=2 + grly[21] = ylmcoef[5] * grly[12] - ylmcoef[6] * (rly[0] * tx + grly[0] * r2) - ylmcoef[4] * (x * grly[6] + rly[2]); + + grly[22] = ylmcoef[5] * grly[13] - ylmcoef[6] * (rly[0] * ty + grly[1] * r2) - tmp2 * grly[7]; + grly[23] = ylmcoef[5] * grly[14] - ylmcoef[6] * (rly[0] * tz + grly[2] * r2) - tmp2 * grly[8]; + + rly[8] = -tmp2 * rly[3]; + grly[24] = -ylmcoef[4] * (rly[3] + x * grly[9]); + grly[25] = -tmp2 * grly[10]; + grly[26] = -tmp2 * grly[11]; + if (nwl == 2) + return; + + /*************************** + L = 3 + ***************************/ + rly[9] = ylmcoef[7] * z * rly[4] - ylmcoef[8] * rly[1] * r2; // l=3, m=0 + grly[27] = ylmcoef[7] * z * grly[12] - ylmcoef[8] * (rly[1] * tx + grly[3] * r2); + grly[28] = ylmcoef[7] * z * grly[13] - ylmcoef[8] * (rly[1] * ty + grly[4] * r2); + grly[29] = ylmcoef[7] * (rly[4] + z * grly[14]) - ylmcoef[8] * (rly[1] * tz + grly[5] * r2); + + double tmp3 = ylmcoef[9] * z; + rly[10] = tmp3 * rly[5] - ylmcoef[10] * rly[2] * r2; // l=3,m=1 + grly[30] = tmp3 * grly[15] - ylmcoef[10] * (grly[6] * r2 + rly[2] * tx); + grly[31] = tmp3 * grly[16] - ylmcoef[10] * (grly[7] * r2 + rly[2] * ty); + grly[32] = ylmcoef[9] * (z * grly[17] + rly[5]) - ylmcoef[10] * (grly[8] * r2 + rly[2] * tz); + + rly[11] = tmp3 * rly[6] - ylmcoef[10] * rly[3] * r2; // l=3,m=-1 + grly[33] = tmp3 * grly[18] - ylmcoef[10] * (grly[9] * r2 + rly[3] * tx); + grly[34] = tmp3 * grly[19] - ylmcoef[10] * (grly[10] * r2 + rly[3] * ty); + grly[35] = ylmcoef[9] * (z * grly[20] + rly[6]) - ylmcoef[10] * (grly[11] * r2 + rly[3] * tz); + + double tmp4 = ylmcoef[11] * z; + rly[12] = tmp4 * rly[7]; // l=3,m=2 + grly[36] = tmp4 * grly[21]; + grly[37] = tmp4 * grly[22]; + grly[38] = ylmcoef[11] * (z * grly[23] + rly[7]); + + rly[13] = tmp4 * rly[8]; // l=3,m=-2 + grly[39] = tmp4 * grly[24]; + grly[40] = tmp4 * grly[25]; + grly[41] = ylmcoef[11] * (z * grly[26] + rly[8]); + + double tmp5 = ylmcoef[14] * x; + rly[14] = ylmcoef[12] * rly[10] - ylmcoef[13] * rly[2] * r2 - tmp5 * rly[7]; // l=3,m=3 + grly[42] + = ylmcoef[12] * grly[30] - ylmcoef[13] * (rly[2] * tx + grly[6] * r2) - ylmcoef[14] * (rly[7] + x * grly[21]); + grly[43] = ylmcoef[12] * grly[31] - ylmcoef[13] * (rly[2] * ty + grly[7] * r2) - tmp5 * grly[22]; + grly[44] = ylmcoef[12] * grly[32] - ylmcoef[13] * (rly[2] * tz + grly[8] * r2) - tmp5 * grly[23]; + + rly[15] = ylmcoef[12] * rly[11] - ylmcoef[13] * rly[3] * r2 - tmp5 * rly[8]; // l=3,m=-3 + grly[45] + = ylmcoef[12] * grly[33] - ylmcoef[13] * (rly[3] * tx + grly[9] * r2) - ylmcoef[14] * (rly[8] + x * grly[24]); + grly[46] = ylmcoef[12] * grly[34] - ylmcoef[13] * (rly[3] * ty + grly[10] * r2) - tmp5 * grly[25]; + grly[47] = ylmcoef[12] * grly[35] - ylmcoef[13] * (rly[3] * tz + grly[11] * r2) - tmp5 * grly[26]; + if (nwl == 3) + return; + + /*************************** + L = 4 + ***************************/ + rly[16] = ylmcoef[15] * z * rly[9] - ylmcoef[16] * rly[4] * r2; // l=4,m=0 + grly[48] = ylmcoef[15] * z * grly[27] - ylmcoef[16] * (rly[4] * tx + grly[12] * r2); + grly[49] = ylmcoef[15] * z * grly[28] - ylmcoef[16] * (rly[4] * ty + grly[13] * r2); + grly[50] = ylmcoef[15] * (z * grly[29] + rly[9]) - ylmcoef[16] * (rly[4] * tz + grly[14] * r2); + + double tmp6 = ylmcoef[17] * z; + rly[17] = tmp6 * rly[10] - ylmcoef[18] * rly[5] * r2; // l=4,m=1 + grly[51] = tmp6 * grly[30] - ylmcoef[18] * (rly[5] * tx + grly[15] * r2); + grly[52] = tmp6 * grly[31] - ylmcoef[18] * (rly[5] * ty + grly[16] * r2); + grly[53] = ylmcoef[17] * (z * grly[32] + rly[10]) - ylmcoef[18] * (rly[5] * tz + grly[17] * r2); + + rly[18] = tmp6 * rly[11] - ylmcoef[18] * rly[6] * r2; // l=4,m=-1 + grly[54] = tmp6 * grly[33] - ylmcoef[18] * (rly[6] * tx + grly[18] * r2); + grly[55] = tmp6 * grly[34] - ylmcoef[18] * (rly[6] * ty + grly[19] * r2); + grly[56] = ylmcoef[17] * (z * grly[35] + rly[11]) - ylmcoef[18] * (rly[6] * tz + grly[20] * r2); + + double tmp7 = ylmcoef[19] * z; + rly[19] = tmp7 * rly[12] - ylmcoef[20] * rly[7] * r2; // l=4,m=2 + grly[57] = tmp7 * grly[36] - ylmcoef[20] * (rly[7] * tx + grly[21] * r2); + grly[58] = tmp7 * grly[37] - ylmcoef[20] * (rly[7] * ty + grly[22] * r2); + grly[59] = ylmcoef[19] * (z * grly[38] + rly[12]) - ylmcoef[20] * (rly[7] * tz + grly[23] * r2); + + rly[20] = tmp7 * rly[13] - ylmcoef[20] * rly[8] * r2; // l=4,m=-2 + grly[60] = tmp7 * grly[39] - ylmcoef[20] * (rly[8] * tx + grly[24] * r2); + grly[61] = tmp7 * grly[40] - ylmcoef[20] * (rly[8] * ty + grly[25] * r2); + grly[62] = ylmcoef[19] * (z * grly[41] + rly[13]) - ylmcoef[20] * (rly[8] * tz + grly[26] * r2); + + double tmp8 = 3.0 * z; + rly[21] = tmp8 * rly[14]; // l=4,m=3 + grly[63] = tmp8 * grly[42]; + grly[64] = tmp8 * grly[43]; + grly[65] = 3.0 * (z * grly[44] + rly[14]); + + rly[22] = tmp8 * rly[15]; // l=4,m=-3 + grly[66] = tmp8 * grly[45]; + grly[67] = tmp8 * grly[46]; + grly[68] = 3.0 * (z * grly[47] + rly[15]); + + double tmp9 = ylmcoef[23] * x; + rly[23] = ylmcoef[21] * rly[19] - ylmcoef[22] * rly[7] * r2 - tmp9 * rly[14]; // l=4,m=4 + grly[69] + = ylmcoef[21] * grly[57] - ylmcoef[22] * (rly[7] * tx + grly[21] * r2) - ylmcoef[23] * (x * grly[42] + rly[14]); + grly[70] = ylmcoef[21] * grly[58] - ylmcoef[22] * (rly[7] * ty + grly[22] * r2) - tmp9 * grly[43]; + grly[71] = ylmcoef[21] * grly[59] - ylmcoef[22] * (rly[7] * tz + grly[23] * r2) - tmp9 * grly[44]; + + rly[24] = ylmcoef[21] * rly[20] - ylmcoef[22] * rly[8] * r2 - tmp9 * rly[15]; // l=4,m=-4 + grly[72] + = ylmcoef[21] * grly[60] - ylmcoef[22] * (rly[8] * tx + grly[24] * r2) - ylmcoef[23] * (x * grly[45] + rly[15]); + grly[73] = ylmcoef[21] * grly[61] - ylmcoef[22] * (rly[8] * ty + grly[25] * r2) - tmp9 * grly[46]; + grly[74] = ylmcoef[21] * grly[62] - ylmcoef[22] * (rly[8] * tz + grly[26] * r2) - tmp9 * grly[47]; + + if (nwl == 4) + return; + + /*************************** + L = 5 + ***************************/ + rly[25] = ylmcoef[24] * z * rly[16] - ylmcoef[25] * rly[9] * r2; // l=5,m=0 + grly[75] = ylmcoef[24] * z * grly[48] - ylmcoef[25] * (rly[9] * tx + grly[27] * r2); + grly[76] = ylmcoef[24] * z * grly[49] - ylmcoef[25] * (rly[9] * ty + grly[28] * r2); + grly[77] = ylmcoef[24] * (z * grly[50] + rly[16]) - ylmcoef[25] * (rly[9] * tz + grly[29] * r2); + + double tmp10 = ylmcoef[26] * z; + rly[26] = tmp10 * rly[17] - ylmcoef[27] * rly[10] * r2; // l=5,m=1 + grly[78] = tmp10 * grly[51] - ylmcoef[27] * (rly[10] * tx + grly[30] * r2); + grly[79] = tmp10 * grly[52] - ylmcoef[27] * (rly[10] * ty + grly[31] * r2); + grly[80] = ylmcoef[26] * (z * grly[53] + rly[17]) - ylmcoef[27] * (rly[10] * tz + grly[32] * r2); + + rly[27] = tmp10 * rly[18] - ylmcoef[27] * rly[11] * r2; // l=5,m=-1 + grly[81] = tmp10 * grly[54] - ylmcoef[27] * (rly[11] * tx + grly[33] * r2); + grly[82] = tmp10 * grly[55] - ylmcoef[27] * (rly[11] * ty + grly[34] * r2); + grly[83] = ylmcoef[26] * (z * grly[56] + rly[18]) - ylmcoef[27] * (rly[11] * tz + grly[35] * r2); + + double tmp11 = ylmcoef[28] * z; + rly[28] = tmp11 * rly[19] - ylmcoef[29] * rly[12] * r2; // l=5,m=2 + grly[84] = tmp11 * grly[57] - ylmcoef[29] * (rly[12] * tx + grly[36] * r2); + grly[85] = tmp11 * grly[58] - ylmcoef[29] * (rly[12] * ty + grly[37] * r2); + grly[86] = ylmcoef[28] * (z * grly[59] + rly[19]) - ylmcoef[29] * (rly[12] * tz + grly[38] * r2); + + rly[29] = tmp11 * rly[20] - ylmcoef[29] * rly[13] * r2; // l=5,m=-2 + grly[87] = tmp11 * grly[60] - ylmcoef[29] * (rly[13] * tx + grly[39] * r2); + grly[88] = tmp11 * grly[61] - ylmcoef[29] * (rly[13] * ty + grly[40] * r2); + grly[89] = ylmcoef[28] * (z * grly[62] + rly[20]) - ylmcoef[29] * (rly[13] * tz + grly[41] * r2); + + double tmp12 = ylmcoef[30] * z; + rly[30] = tmp12 * rly[21] - ylmcoef[31] * rly[14] * r2; // l=5,m=3 + grly[90] = tmp12 * grly[63] - ylmcoef[31] * (grly[42] * r2 + rly[14] * tx); + grly[91] = tmp12 * grly[64] - ylmcoef[31] * (grly[43] * r2 + rly[14] * ty); + grly[92] = ylmcoef[30] * (z * grly[65] + rly[21]) - ylmcoef[31] * (grly[44] * r2 + rly[14] * tz); + + rly[31] = tmp12 * rly[22] - ylmcoef[31] * rly[15] * r2; // l=5,m=-3 + grly[93] = tmp12 * grly[66] - ylmcoef[31] * (grly[45] * r2 + rly[15] * tx); + grly[94] = tmp12 * grly[67] - ylmcoef[31] * (grly[46] * r2 + rly[15] * ty); + grly[95] = ylmcoef[30] * (z * grly[68] + rly[22]) - ylmcoef[31] * (grly[47] * r2 + rly[15] * tz); + + double tmp13 = ylmcoef[32] * z; + rly[32] = tmp13 * rly[23]; // l=5,m=4 + grly[96] = tmp13 * grly[69]; + grly[97] = tmp13 * grly[70]; + grly[98] = ylmcoef[32] * (rly[23] + z * grly[71]); + + rly[33] = tmp13 * rly[24]; // l=5,m=-4 + grly[99] = tmp13 * grly[72]; + grly[100] = tmp13 * grly[73]; + grly[101] = ylmcoef[32] * (rly[24] + z * grly[74]); + + double tmp14 = ylmcoef[35] * x; + rly[34] = ylmcoef[33] * rly[30] - ylmcoef[34] * rly[14] * r2 - tmp14 * rly[23]; // l=5,m=5 + grly[102] = ylmcoef[33] * grly[90] - ylmcoef[34] * (rly[14] * tx + grly[42] * r2) + - ylmcoef[35] * (x * grly[69] + rly[23]); + grly[103] = ylmcoef[33] * grly[91] - ylmcoef[34] * (rly[14] * ty + grly[43] * r2) - tmp14 * grly[70]; + grly[104] = ylmcoef[33] * grly[92] - ylmcoef[34] * (rly[14] * tz + grly[44] * r2) - tmp14 * grly[71]; + + rly[35] = ylmcoef[33] * rly[31] - ylmcoef[34] * rly[15] * r2 - tmp14 * rly[24]; // l=5,m=-5 + grly[105] = ylmcoef[33] * grly[93] - ylmcoef[34] * (rly[15] * tx + grly[45] * r2) + - ylmcoef[35] * (x * grly[72] + rly[24]); + grly[106] = ylmcoef[33] * grly[94] - ylmcoef[34] * (rly[15] * ty + grly[46] * r2) - tmp14 * grly[73]; + grly[107] = ylmcoef[33] * grly[95] - ylmcoef[34] * (rly[15] * tz + grly[47] * r2) - tmp14 * grly[74]; + + if (nwl == 5) + return; } } // namespace ModuleBase diff --git a/source/source_base/kernels/dsp/dsp_connector.cpp b/source/source_base/kernels/dsp/dsp_connector.cpp index 7fa0f20ee7b..86f1b962f82 100644 --- a/source/source_base/kernels/dsp/dsp_connector.cpp +++ b/source/source_base/kernels/dsp/dsp_connector.cpp @@ -12,54 +12,58 @@ extern "C" } namespace mtfunc { -void dspInitHandle(int id) +void + dspInitHandle (int id) { - mt_blas_init(id); + mt_blas_init (id); std::cout << " ** DSP inited on cluster " << id << " **" << std::endl; } // Use this at the beginning of the program to start a dsp cluster -void dspDestoryHandle(int id) +void + dspDestoryHandle (int id) { - hthread_dev_close(id); + hthread_dev_close (id); std::cout << " ** DSP closed on cluster " << id << " **" << std::endl; } // Close dsp cluster at the end of the program -// MTBlas secretly removed its MTBLAS_TRANSPOSE data type and used the original CBLAS_TRANSPOSE. So this function is modified. +// MTBlas secretly removed its MTBLAS_TRANSPOSE data type and used the original CBLAS_TRANSPOSE. So this function is +// modified. -CBLAS_TRANSPOSE convertBLASTranspose(const char* blasTrans) +CBLAS_TRANSPOSE +convertBLASTranspose (const char* blasTrans) { switch (blasTrans[0]) - { - case 'N': - case 'n': - return CblasNoTrans; - case 'T': - case 't': - return CblasTrans; - case 'C': - case 'c': - return CblasConjTrans; - default: - std::cout << "Invalid BLAS transpose parameter!! Use default instead." << std::endl; - return CblasNoTrans; - } + { + case 'N': + case 'n': + return CblasNoTrans; + case 'T': + case 't': + return CblasTrans; + case 'C': + case 'c': + return CblasConjTrans; + default: + std::cout << "Invalid BLAS transpose parameter!! Use default instead." << std::endl; + return CblasNoTrans; + } } // Used to convert normal transpost char to cblas transpose flag -void* malloc_ht(size_t bytes, int cluster_id) +void* + malloc_ht (size_t bytes, int cluster_id) { - void* ptr = hthread_malloc((int)cluster_id, bytes, HT_MEM_RW); + void* ptr = hthread_malloc ((int)cluster_id, bytes, HT_MEM_RW); return ptr; } // Malloc on dsp. Used to replace original malloc - - -void free_ht(void* ptr) +void + free_ht (void* ptr) { - hthread_free(ptr); + hthread_free (ptr); } // Free on dsp. Used to replace original free - -void sgemm_mt_(const char* transa, +void + sgemm_mt_ (const char* transa, const char* transb, const int* m, const int* n, @@ -74,24 +78,25 @@ void sgemm_mt_(const char* transa, const int* ldc, int cluster_id) { - mtblas_sgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - *alpha, - a, - *lda, - b, - *ldb, - *beta, - c, - *ldc, - cluster_id); + mtblas_sgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + *alpha, + a, + *lda, + b, + *ldb, + *beta, + c, + *ldc, + cluster_id); } // zgemm that needn't malloc_ht or free_ht -void dgemm_mt_(const char* transa, +void + dgemm_mt_ (const char* transa, const char* transb, const int* m, const int* n, @@ -106,24 +111,25 @@ void dgemm_mt_(const char* transa, const int* ldc, int cluster_id) { - mtblas_dgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - *alpha, - a, - *lda, - b, - *ldb, - *beta, - c, - *ldc, - cluster_id); + mtblas_dgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + *alpha, + a, + *lda, + b, + *ldb, + *beta, + c, + *ldc, + cluster_id); } // cgemm that needn't malloc_ht or free_ht -void zgemm_mt_(const char* transa, +void + zgemm_mt_ (const char* transa, const char* transb, const int* m, const int* n, @@ -138,24 +144,25 @@ void zgemm_mt_(const char* transa, const int* ldc, int cluster_id) { - mtblas_zgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - (const void*)alpha, - (const void*)a, - *lda, - (const void*)b, - *ldb, - (const void*)beta, - (void*)c, - *ldc, - cluster_id); + mtblas_zgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + (const void*)alpha, + (const void*)a, + *lda, + (const void*)b, + *ldb, + (const void*)beta, + (void*)c, + *ldc, + cluster_id); } // zgemm that needn't malloc_ht or free_ht -void cgemm_mt_(const char* transa, +void + cgemm_mt_ (const char* transa, const char* transb, const int* m, const int* n, @@ -170,24 +177,25 @@ void cgemm_mt_(const char* transa, const int* ldc, int cluster_id) { - mtblas_cgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - (const void*)alpha, - (const void*)a, - *lda, - (const void*)b, - *ldb, - (const void*)beta, - (void*)c, - *ldc, - cluster_id); + mtblas_cgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + (const void*)alpha, + (const void*)a, + *lda, + (const void*)b, + *ldb, + (const void*)beta, + (void*)c, + *ldc, + cluster_id); } // cgemm that needn't malloc_ht or free_ht -void sgemv_mt_(const char* transa, +void + sgemv_mt_ (const char* transa, const int* m, const int* n, const float* alpha, @@ -200,22 +208,23 @@ void sgemv_mt_(const char* transa, const int* incy, int cluster_id) { - mtblas_sgemv(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - *m, - *n, - *alpha, - a, - *lda, - x, - *incx, - *beta, - y, - *incy, - cluster_id); + mtblas_sgemv (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + *m, + *n, + *alpha, + a, + *lda, + x, + *incx, + *beta, + y, + *incy, + cluster_id); } -void dgemv_mt_(const char* transa, +void + dgemv_mt_ (const char* transa, const int* m, const int* n, const double* alpha, @@ -228,22 +237,23 @@ void dgemv_mt_(const char* transa, const int* incy, int cluster_id) { - mtblas_dgemv(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - *m, - *n, - *alpha, - a, - *lda, - x, - *incx, - *beta, - y, - *incy, - cluster_id); + mtblas_dgemv (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + *m, + *n, + *alpha, + a, + *lda, + x, + *incx, + *beta, + y, + *incy, + cluster_id); } -void zgemv_mt_(const char* transa, +void + zgemv_mt_ (const char* transa, const int* m, const int* n, const std::complex* alpha, @@ -256,22 +266,23 @@ void zgemv_mt_(const char* transa, const int* incy, int cluster_id) { - mtblas_zgemv(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - *m, - *n, - (const void*)alpha, - (const void*)a, - *lda, - (const void*)x, - *incx, - (const void*)beta, - (void*)y, - *incy, - cluster_id); + mtblas_zgemv (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + *m, + *n, + (const void*)alpha, + (const void*)a, + *lda, + (const void*)x, + *incx, + (const void*)beta, + (void*)y, + *incy, + cluster_id); } -void cgemv_mt_(const char* transa, +void + cgemv_mt_ (const char* transa, const int* m, const int* n, const std::complex* alpha, @@ -284,24 +295,25 @@ void cgemv_mt_(const char* transa, const int* incy, int cluster_id) { - mtblas_cgemv(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - *m, - *n, - (const void*)alpha, - (const void*)a, - *lda, - (const void*)x, - *incx, - (const void*)beta, - (void*)y, - *incy, - cluster_id); + mtblas_cgemv (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + *m, + *n, + (const void*)alpha, + (const void*)a, + *lda, + (const void*)x, + *incx, + (const void*)beta, + (void*)y, + *incy, + cluster_id); } // Used to replace original free -void sgemm_mth_(const char* transa, +void + sgemm_mth_ (const char* transa, const char* transb, const int* m, const int* n, @@ -316,24 +328,25 @@ void sgemm_mth_(const char* transa, const int* ldc, int cluster_id) { - mt_hthread_sgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - *alpha, - a, - *lda, - b, - *ldb, - *beta, - c, - *ldc, - cluster_id); + mt_hthread_sgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + *alpha, + a, + *lda, + b, + *ldb, + *beta, + c, + *ldc, + cluster_id); } // zgemm that needn't malloc_ht or free_ht -void dgemm_mth_(const char* transa, +void + dgemm_mth_ (const char* transa, const char* transb, const int* m, const int* n, @@ -348,24 +361,25 @@ void dgemm_mth_(const char* transa, const int* ldc, int cluster_id) { - mt_hthread_dgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - *alpha, - a, - *lda, - b, - *ldb, - *beta, - c, - *ldc, - cluster_id); + mt_hthread_dgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + *alpha, + a, + *lda, + b, + *ldb, + *beta, + c, + *ldc, + cluster_id); } // cgemm that needn't malloc_ht or free_ht -void zgemm_mth_(const char* transa, +void + zgemm_mth_ (const char* transa, const char* transb, const int* m, const int* n, @@ -380,30 +394,31 @@ void zgemm_mth_(const char* transa, const int* ldc, int cluster_id) { - std::complex* alp = (std::complex*)malloc_ht(sizeof(std::complex), cluster_id); + std::complex* alp = (std::complex*)malloc_ht (sizeof (std::complex), cluster_id); *alp = *alpha; - std::complex* bet = (std::complex*)malloc_ht(sizeof(std::complex), cluster_id); + std::complex* bet = (std::complex*)malloc_ht (sizeof (std::complex), cluster_id); *bet = *beta; - mt_hthread_zgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - alp, - a, - *lda, - b, - *ldb, - bet, - c, - *ldc, - cluster_id); - free_ht(alp); - free_ht(bet); + mt_hthread_zgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + alp, + a, + *lda, + b, + *ldb, + bet, + c, + *ldc, + cluster_id); + free_ht (alp); + free_ht (bet); } // zgemm that needn't malloc_ht or free_ht -void zgemm_pack_mth_(const char* transa, +void + zgemm_pack_mth_ (const char* transa, const char* transb, const int* m, const int* n, @@ -420,50 +435,56 @@ void zgemm_pack_mth_(const char* transa, { const bool transa_not = (transa[0] == 'N' || transa[0] == 'n'); const bool transb_not = (transb[0] == 'N' || transb[0] == 'n'); - // const size_t a_elems = static_cast(*lda) * (transa_not ? static_cast(*k) : static_cast(*m)); - // const size_t b_elems = static_cast(*ldb) * (transb_not ? static_cast(*n) : static_cast(*k)); - const size_t c_elems = static_cast(*ldc) * static_cast(*n); - - // std::complex* A_dsp = static_cast*>(malloc_ht(a_elems * sizeof(std::complex), cluster_id)); - // std::complex* B_dsp = static_cast*>(malloc_ht(b_elems * sizeof(std::complex), cluster_id)); - std::complex* C_dsp = static_cast*>(malloc_ht(c_elems * sizeof(std::complex), cluster_id)); - std::complex* alp = static_cast*>(malloc_ht(sizeof(std::complex), cluster_id)); - std::complex* bet = static_cast*>(malloc_ht(sizeof(std::complex), cluster_id)); + // const size_t a_elems = static_cast(*lda) * (transa_not ? static_cast(*k) : + // static_cast(*m)); const size_t b_elems = static_cast(*ldb) * (transb_not ? + // static_cast(*n) : static_cast(*k)); + const size_t c_elems = static_cast (*ldc) * static_cast (*n); + + // std::complex* A_dsp = static_cast*>(malloc_ht(a_elems * + // sizeof(std::complex), cluster_id)); std::complex* B_dsp = + // static_cast*>(malloc_ht(b_elems * sizeof(std::complex), cluster_id)); + std::complex* C_dsp + = static_cast*> (malloc_ht (c_elems * sizeof (std::complex), cluster_id)); + std::complex* alp + = static_cast*> (malloc_ht (sizeof (std::complex), cluster_id)); + std::complex* bet + = static_cast*> (malloc_ht (sizeof (std::complex), cluster_id)); // memcpy(A_dsp, a, a_elems * sizeof(std::complex)); // memcpy(B_dsp, b, b_elems * sizeof(std::complex)); - memcpy(C_dsp, c, c_elems * sizeof(std::complex)); + memcpy (C_dsp, c, c_elems * sizeof (std::complex)); *alp = *alpha; *bet = *beta; - mt_hthread_zgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - alp, - a, - // A_dsp, - *lda, - b, - // B_dsp, - *ldb, - bet, - // c, - C_dsp, - *ldc, - cluster_id); - memcpy(c, C_dsp, c_elems * sizeof(std::complex)); + mt_hthread_zgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + alp, + a, + // A_dsp, + *lda, + b, + // B_dsp, + *ldb, + bet, + // c, + C_dsp, + *ldc, + cluster_id); + memcpy (c, C_dsp, c_elems * sizeof (std::complex)); // free_ht(A_dsp); // free_ht(B_dsp); - free_ht(C_dsp); - free_ht(alp); - free_ht(bet); + free_ht (C_dsp); + free_ht (alp); + free_ht (bet); } -void cgemm_mth_(const char* transa, +void + cgemm_mth_ (const char* transa, const char* transb, const int* m, const int* n, @@ -478,32 +499,33 @@ void cgemm_mth_(const char* transa, const int* ldc, int cluster_id) { - std::complex* alp = (std::complex*)malloc_ht(sizeof(std::complex), cluster_id); + std::complex* alp = (std::complex*)malloc_ht (sizeof (std::complex), cluster_id); *alp = *alpha; - std::complex* bet = (std::complex*)malloc_ht(sizeof(std::complex), cluster_id); + std::complex* bet = (std::complex*)malloc_ht (sizeof (std::complex), cluster_id); *bet = *beta; - mt_hthread_cgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - (const void*)alp, - (const void*)a, - *lda, - (const void*)b, - *ldb, - (const void*)bet, - (void*)c, - *ldc, - cluster_id); - - free_ht(alp); - free_ht(bet); + mt_hthread_cgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + (const void*)alp, + (const void*)a, + *lda, + (const void*)b, + *ldb, + (const void*)bet, + (void*)c, + *ldc, + cluster_id); + + free_ht (alp); + free_ht (bet); } // cgemm that needn't malloc_ht or free_ht -void cgemm_pack_mth_(const char* transa, +void + cgemm_pack_mth_ (const char* transa, const char* transb, const int* m, const int* n, @@ -520,48 +542,54 @@ void cgemm_pack_mth_(const char* transa, { const bool transa_not = (transa[0] == 'N' || transa[0] == 'n'); const bool transb_not = (transb[0] == 'N' || transb[0] == 'n'); - const size_t a_elems = static_cast(*lda) * (transa_not ? static_cast(*k) : static_cast(*m)); - const size_t b_elems = static_cast(*ldb) * (transb_not ? static_cast(*n) : static_cast(*k)); - const size_t c_elems = static_cast(*ldc) * static_cast(*n); - - std::complex* A_dsp = static_cast*>(malloc_ht(a_elems * sizeof(std::complex), cluster_id)); - std::complex* B_dsp = static_cast*>(malloc_ht(b_elems * sizeof(std::complex), cluster_id)); - std::complex* C_dsp = static_cast*>(malloc_ht(c_elems * sizeof(std::complex), cluster_id)); - std::complex* alp = static_cast*>(malloc_ht(sizeof(std::complex), cluster_id)); - std::complex* bet = static_cast*>(malloc_ht(sizeof(std::complex), cluster_id)); - - memcpy(A_dsp, a, a_elems * sizeof(std::complex)); - memcpy(B_dsp, b, b_elems * sizeof(std::complex)); - memcpy(C_dsp, c, c_elems * sizeof(std::complex)); + const size_t a_elems + = static_cast (*lda) * (transa_not ? static_cast (*k) : static_cast (*m)); + const size_t b_elems + = static_cast (*ldb) * (transb_not ? static_cast (*n) : static_cast (*k)); + const size_t c_elems = static_cast (*ldc) * static_cast (*n); + + std::complex* A_dsp + = static_cast*> (malloc_ht (a_elems * sizeof (std::complex), cluster_id)); + std::complex* B_dsp + = static_cast*> (malloc_ht (b_elems * sizeof (std::complex), cluster_id)); + std::complex* C_dsp + = static_cast*> (malloc_ht (c_elems * sizeof (std::complex), cluster_id)); + std::complex* alp = static_cast*> (malloc_ht (sizeof (std::complex), cluster_id)); + std::complex* bet = static_cast*> (malloc_ht (sizeof (std::complex), cluster_id)); + + memcpy (A_dsp, a, a_elems * sizeof (std::complex)); + memcpy (B_dsp, b, b_elems * sizeof (std::complex)); + memcpy (C_dsp, c, c_elems * sizeof (std::complex)); *alp = *alpha; *bet = *beta; - mt_hthread_cgemm(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - convertBLASTranspose(transb), - *m, - *n, - *k, - (const void*)alp, - (const void*)A_dsp, - *lda, - (const void*)B_dsp, - *ldb, - (const void*)bet, - (void*)C_dsp, - *ldc, - cluster_id); - - memcpy(c, C_dsp, c_elems * sizeof(std::complex)); - - free_ht(A_dsp); - free_ht(B_dsp); - free_ht(C_dsp); - free_ht(alp); - free_ht(bet); + mt_hthread_cgemm (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + convertBLASTranspose (transb), + *m, + *n, + *k, + (const void*)alp, + (const void*)A_dsp, + *lda, + (const void*)B_dsp, + *ldb, + (const void*)bet, + (void*)C_dsp, + *ldc, + cluster_id); + + memcpy (c, C_dsp, c_elems * sizeof (std::complex)); + + free_ht (A_dsp); + free_ht (B_dsp); + free_ht (C_dsp); + free_ht (alp); + free_ht (bet); } -void sgemv_mth_(const char* transa, +void + sgemv_mth_ (const char* transa, const int* m, const int* n, const float* alpha, @@ -574,22 +602,23 @@ void sgemv_mth_(const char* transa, const int* incy, int cluster_id) { - mt_hthread_sgemv(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - *m, - *n, - *alpha, - a, - *lda, - x, - *incx, - *beta, - y, - *incy, - cluster_id); + mt_hthread_sgemv (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + *m, + *n, + *alpha, + a, + *lda, + x, + *incx, + *beta, + y, + *incy, + cluster_id); } -void dgemv_mth_(const char* transa, +void + dgemv_mth_ (const char* transa, const int* m, const int* n, const double* alpha, @@ -602,22 +631,23 @@ void dgemv_mth_(const char* transa, const int* incy, int cluster_id) { - mt_hthread_dgemv(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - *m, - *n, - *alpha, - a, - *lda, - x, - *incx, - *beta, - y, - *incy, - cluster_id); + mt_hthread_dgemv (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + *m, + *n, + *alpha, + a, + *lda, + x, + *incx, + *beta, + y, + *incy, + cluster_id); } -void zgemv_mth_(const char* transa, +void + zgemv_mth_ (const char* transa, const int* m, const int* n, const std::complex* alpha, @@ -630,30 +660,31 @@ void zgemv_mth_(const char* transa, const int* incy, int cluster_id) { - std::complex* alp = (std::complex*)malloc_ht(sizeof(std::complex), cluster_id); + std::complex* alp = (std::complex*)malloc_ht (sizeof (std::complex), cluster_id); *alp = *alpha; - std::complex* bet = (std::complex*)malloc_ht(sizeof(std::complex), cluster_id); + std::complex* bet = (std::complex*)malloc_ht (sizeof (std::complex), cluster_id); *bet = *beta; - mt_hthread_zgemv(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - *m, - *n, - (const void*)alp, - (const void*)a, - *lda, - (const void*)x, - *incx, - (const void*)bet, - (void*)y, - *incy, - cluster_id); - - free_ht(alp); - free_ht(bet); + mt_hthread_zgemv (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + *m, + *n, + (const void*)alp, + (const void*)a, + *lda, + (const void*)x, + *incx, + (const void*)bet, + (void*)y, + *incy, + cluster_id); + + free_ht (alp); + free_ht (bet); } -void cgemv_mth_(const char* transa, +void + cgemv_mth_ (const char* transa, const int* m, const int* n, const std::complex* alpha, @@ -666,26 +697,26 @@ void cgemv_mth_(const char* transa, const int* incy, int cluster_id) { - std::complex* alp = (std::complex*)malloc_ht(sizeof(std::complex), cluster_id); + std::complex* alp = (std::complex*)malloc_ht (sizeof (std::complex), cluster_id); *alp = *alpha; - std::complex* bet = (std::complex*)malloc_ht(sizeof(std::complex), cluster_id); + std::complex* bet = (std::complex*)malloc_ht (sizeof (std::complex), cluster_id); *bet = *beta; - mt_hthread_cgemv(CBLAS_ORDER::CblasColMajor, - convertBLASTranspose(transa), - *m, - *n, - (const void*)alp, - (const void*)a, - *lda, - (const void*)x, - *incx, - (const void*)bet, - (void*)y, - *incy, - cluster_id); - - free_ht(alp); - free_ht(bet); + mt_hthread_cgemv (CBLAS_ORDER::CblasColMajor, + convertBLASTranspose (transa), + *m, + *n, + (const void*)alp, + (const void*)a, + *lda, + (const void*)x, + *incx, + (const void*)bet, + (void*)y, + *incy, + cluster_id); + + free_ht (alp); + free_ht (bet); } } // namespace mtfunc diff --git a/source/source_base/kernels/dsp/dsp_connector.h b/source/source_base/kernels/dsp/dsp_connector.h index 2b21eb70357..fe8aa4c6876 100644 --- a/source/source_base/kernels/dsp/dsp_connector.h +++ b/source/source_base/kernels/dsp/dsp_connector.h @@ -8,128 +8,14 @@ namespace mtfunc { // Base dsp functions -void dspInitHandle(int id); -void dspDestoryHandle(int id); -void* malloc_ht(size_t bytes, int cluster_id); -void free_ht(void* ptr); +void dspInitHandle (int id); +void dspDestoryHandle (int id); +void* malloc_ht (size_t bytes, int cluster_id); +void free_ht (void* ptr); // mtblas functions -void sgemm_mt_(const char* transa, - const char* transb, - const int* m, - const int* n, - const int* k, - const float* alpha, - const float* a, - const int* lda, - const float* b, - const int* ldb, - const float* beta, - float* c, - const int* ldc, - int cluster_id); - -void dgemm_mt_(const char* transa, - const char* transb, - const int* m, - const int* n, - const int* k, - const double* alpha, - const double* a, - const int* lda, - const double* b, - const int* ldb, - const double* beta, - double* c, - const int* ldc, - int cluster_id); - -void zgemm_mt_(const char* transa, - const char* transb, - const int* m, - const int* n, - const int* k, - const std::complex* alpha, - const std::complex* a, - const int* lda, - const std::complex* b, - const int* ldb, - const std::complex* beta, - std::complex* c, - const int* ldc, - int cluster_id); - -void cgemm_mt_(const char* transa, - const char* transb, - const int* m, - const int* n, - const int* k, - const std::complex* alpha, - const std::complex* a, - const int* lda, - const std::complex* b, - const int* ldb, - const std::complex* beta, - std::complex* c, - const int* ldc, - int cluster_id); - - - -void sgemv_mt_(const char* transa, - const int* m, - const int* n, - const float* alpha, - const float* a, - const int* lda, - const float* x, - const int* incx, - const float* beta, - float* y, - const int* incy, - int cluster_id); - -void dgemv_mt_(const char* transa, - const int* m, - const int* n, - const double* alpha, - const double* a, - const int* lda, - const double* x, - const int* incx, - const double* beta, - double* y, - const int* incy, - int cluster_id); - -void zgemv_mt_(const char* transa, - const int* m, - const int* n, - const std::complex* alpha, - const std::complex* a, - const int* lda, - const std::complex* x, - const int* incx, - const std::complex* beta, - std::complex* y, - const int* incy, - int cluster_id); - -void cgemv_mt_(const char* transa, - const int* m, - const int* n, - const std::complex* alpha, - const std::complex* a, - const int* lda, - const std::complex* x, - const int* incx, - const std::complex* beta, - std::complex* y, - const int* incy, - int cluster_id); - -void sgemm_mth_(const char* transa, +void sgemm_mt_ (const char* transa, const char* transb, const int* m, const int* n, @@ -144,7 +30,7 @@ void sgemm_mth_(const char* transa, const int* ldc, int cluster_id); -void dgemm_mth_(const char* transa, +void dgemm_mt_ (const char* transa, const char* transb, const int* m, const int* n, @@ -159,7 +45,7 @@ void dgemm_mth_(const char* transa, const int* ldc, int cluster_id); -void zgemm_mth_(const char* transa, +void zgemm_mt_ (const char* transa, const char* transb, const int* m, const int* n, @@ -174,22 +60,7 @@ void zgemm_mth_(const char* transa, const int* ldc, int cluster_id); -void zgemm_pack_mth_(const char* transa, - const char* transb, - const int* m, - const int* n, - const int* k, - const std::complex* alpha, - const std::complex* a, - const int* lda, - const std::complex* b, - const int* ldb, - const std::complex* beta, - std::complex* c, - const int* ldc, - int cluster_id); - -void cgemm_mth_(const char* transa, +void cgemm_mt_ (const char* transa, const char* transb, const int* m, const int* n, @@ -204,22 +75,7 @@ void cgemm_mth_(const char* transa, const int* ldc, int cluster_id); -void cgemm_pack_mth_(const char* transa, - const char* transb, - const int* m, - const int* n, - const int* k, - const std::complex* alpha, - const std::complex* a, - const int* lda, - const std::complex* b, - const int* ldb, - const std::complex* beta, - std::complex* c, - const int* ldc, - int cluster_id); - -void sgemv_mth_(const char* transa, +void sgemv_mt_ (const char* transa, const int* m, const int* n, const float* alpha, @@ -232,7 +88,7 @@ void sgemv_mth_(const char* transa, const int* incy, int cluster_id); -void dgemv_mth_(const char* transa, +void dgemv_mt_ (const char* transa, const int* m, const int* n, const double* alpha, @@ -245,7 +101,7 @@ void dgemv_mth_(const char* transa, const int* incy, int cluster_id); -void zgemv_mth_(const char* transa, +void zgemv_mt_ (const char* transa, const int* m, const int* n, const std::complex* alpha, @@ -258,7 +114,7 @@ void zgemv_mth_(const char* transa, const int* incy, int cluster_id); -void cgemv_mth_(const char* transa, +void cgemv_mt_ (const char* transa, const int* m, const int* n, const std::complex* alpha, @@ -271,12 +127,155 @@ void cgemv_mth_(const char* transa, const int* incy, int cluster_id); +void sgemm_mth_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const float* alpha, + const float* a, + const int* lda, + const float* b, + const int* ldb, + const float* beta, + float* c, + const int* ldc, + int cluster_id); + +void dgemm_mth_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const double* alpha, + const double* a, + const int* lda, + const double* b, + const int* ldb, + const double* beta, + double* c, + const int* ldc, + int cluster_id); + +void zgemm_mth_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc, + int cluster_id); + +void zgemm_pack_mth_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc, + int cluster_id); + +void cgemm_mth_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc, + int cluster_id); + +void cgemm_pack_mth_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc, + int cluster_id); + +void sgemv_mth_ (const char* transa, + const int* m, + const int* n, + const float* alpha, + const float* a, + const int* lda, + const float* x, + const int* incx, + const float* beta, + float* y, + const int* incy, + int cluster_id); + +void dgemv_mth_ (const char* transa, + const int* m, + const int* n, + const double* alpha, + const double* a, + const int* lda, + const double* x, + const int* incx, + const double* beta, + double* y, + const int* incy, + int cluster_id); + +void zgemv_mth_ (const char* transa, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* x, + const int* incx, + const std::complex* beta, + std::complex* y, + const int* incy, + int cluster_id); + +void cgemv_mth_ (const char* transa, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* x, + const int* incx, + const std::complex* beta, + std::complex* y, + const int* incy, + int cluster_id); + // #define zgemm_ zgemm_mt // The next is dsp utils. It may be moved to other files if this file get too huge template -void dsp_dav_subspace_reduce(T* hcc, T* scc, int nbase, int nbase_x, int notconv, MPI_Comm diag_comm) +void + dsp_dav_subspace_reduce (T* hcc, T* scc, int nbase, int nbase_x, int notconv, MPI_Comm diag_comm) { using syncmem_complex_op @@ -284,29 +283,29 @@ void dsp_dav_subspace_reduce(T* hcc, T* scc, int nbase, int nbase_x, int notconv auto* swap = new T[notconv * nbase_x]; auto* target = new T[notconv * nbase_x]; - syncmem_complex_op()(swap, hcc + nbase * nbase_x, notconv * nbase_x); - if (base_device::get_current_precision(swap) == "single") - { - MPI_Reduce(swap, target, notconv * nbase_x, MPI_COMPLEX, MPI_SUM, 0, diag_comm); - } + syncmem_complex_op () (swap, hcc + nbase * nbase_x, notconv * nbase_x); + if (base_device::get_current_precision (swap) == "single") + { + MPI_Reduce (swap, target, notconv * nbase_x, MPI_COMPLEX, MPI_SUM, 0, diag_comm); + } else - { - MPI_Reduce(swap, target, notconv * nbase_x, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, diag_comm); - } + { + MPI_Reduce (swap, target, notconv * nbase_x, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, diag_comm); + } - syncmem_complex_op()(hcc + nbase * nbase_x, target, notconv * nbase_x); - syncmem_complex_op()(swap, scc + nbase * nbase_x, notconv * nbase_x); + syncmem_complex_op () (hcc + nbase * nbase_x, target, notconv * nbase_x); + syncmem_complex_op () (swap, scc + nbase * nbase_x, notconv * nbase_x); - if (base_device::get_current_precision(swap) == "single") - { - MPI_Reduce(swap, target, notconv * nbase_x, MPI_COMPLEX, MPI_SUM, 0, diag_comm); - } + if (base_device::get_current_precision (swap) == "single") + { + MPI_Reduce (swap, target, notconv * nbase_x, MPI_COMPLEX, MPI_SUM, 0, diag_comm); + } else - { - MPI_Reduce(swap, target, notconv * nbase_x, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, diag_comm); - } + { + MPI_Reduce (swap, target, notconv * nbase_x, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, diag_comm); + } - syncmem_complex_op()(scc + nbase * nbase_x, target, notconv * nbase_x); + syncmem_complex_op () (scc + nbase * nbase_x, target, notconv * nbase_x); delete[] swap; delete[] target; } diff --git a/source/source_base/kernels/math_kernel_op.cpp b/source/source_base/kernels/math_kernel_op.cpp index 307da6c65d8..7611f6b791d 100644 --- a/source/source_base/kernels/math_kernel_op.cpp +++ b/source/source_base/kernels/math_kernel_op.cpp @@ -10,7 +10,8 @@ namespace ModuleBase template struct gemv_op { - void operator()(const char& trans, + void + operator() (const char& trans, const int& m, const int& n, const T* alpha, @@ -22,14 +23,15 @@ struct gemv_op T* Y, const int& incy) { - BlasConnector::gemv(trans, m, n, *alpha, A, lda, X, incx, *beta, Y, incy); + BlasConnector::gemv (trans, m, n, *alpha, A, lda, X, incx, *beta, Y, incy); } }; template struct gemm_op { - void operator()(const char& transa, + void + operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -43,7 +45,7 @@ struct gemm_op T* c, const int& ldc) { - BlasConnector::gemm(transb, transa, n, m, k, *alpha, b, ldb, a, lda, *beta, c, ldc); + BlasConnector::gemm (transb, transa, n, m, k, *alpha, b, ldb, a, lda, *beta, c, ldc); } }; @@ -51,7 +53,8 @@ struct gemm_op template struct gemv_op_mt { - void operator()(const char& trans, + void + operator() (const char& trans, const int& m, const int& n, const T* alpha, @@ -63,14 +66,26 @@ struct gemv_op_mt T* Y, const int& incy) { - BlasConnector::gemv(trans, m, n, *alpha, A, lda, X, incx, *beta, Y, incy, base_device::AbacusDevice_t::DspDevice); + BlasConnector::gemv (trans, + m, + n, + *alpha, + A, + lda, + X, + incx, + *beta, + Y, + incy, + base_device::AbacusDevice_t::DspDevice); } }; template struct gemm_op_mt { - void operator()(const char& transa, + void + operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -84,7 +99,20 @@ struct gemm_op_mt T* c, const int& ldc) { - BlasConnector::gemm(transb, transa, n, m, k, *alpha, b, ldb, a, lda, *beta, c, ldc, base_device::AbacusDevice_t::DspDevice); + BlasConnector::gemm (transb, + transa, + n, + m, + k, + *alpha, + b, + ldb, + a, + lda, + *beta, + c, + ldc, + base_device::AbacusDevice_t::DspDevice); } }; #endif @@ -92,71 +120,75 @@ struct gemm_op_mt template struct matrixTranspose_op { - void operator()(const int& row, - const int& col, - const T* input_matrix, - T* output_matrix) + void + operator() (const int& row, const int& col, const T* input_matrix, T* output_matrix) { T* temp = nullptr; - base_device::memory::resize_memory_op()(temp, row * col, "MTransOp"); + base_device::memory::resize_memory_op () (temp, row * col, "MTransOp"); #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif for (int j = 0; j < col; j++) - { - for (int i = 0; i < row; i++) { - temp[j * row + i] = input_matrix[i * col + j]; + for (int i = 0; i < row; i++) + { + temp[j * row + i] = input_matrix[i * col + j]; + } } - } #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < row * col; i++) - { - output_matrix[i] = temp[i]; - } - base_device::memory::delete_memory_op()(temp); + { + output_matrix[i] = temp[i]; + } + base_device::memory::delete_memory_op () (temp); } }; template struct matrixCopy { - void operator()(const int& n1, const int& n2, const T* A, const int& LDA, T* B, const int& LDB) + void + operator() (const int& n1, const int& n2, const T* A, const int& LDA, T* B, const int& LDB) { #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif for (int i = 0; i < n1; i++) - { - for (int j = 0; j < n2; j++) { - B[i * LDB + j] = A[i * LDA + j]; + for (int j = 0; j < n2; j++) + { + B[i * LDB + j] = A[i * LDA + j]; + } } - } } }; template -struct matrix_mul_vector_op { +struct matrix_mul_vector_op +{ using Real = typename GetTypeReal::type; - void operator()(const int& m, const int &n, - T *a, - const int &lda, - const Real *b, - const Real alpha, - T *c, - const int &ldc){ + void + operator() (const int& m, + const int& n, + T* a, + const int& lda, + const Real* b, + const Real alpha, + T* c, + const int& ldc) + { #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif - for (int j = 0; j < n; j++){ - for (int i = 0; i < m; i++){ - c[j * ldc + i] = a[j * lda + i] * b[j] * alpha; + for (int j = 0; j < n; j++) + { + for (int i = 0; i < m; i++) + { + c[j * ldc + i] = a[j * lda + i] * b[j] * alpha; + } } - } - } }; diff --git a/source/source_base/kernels/math_kernel_op.h b/source/source_base/kernels/math_kernel_op.h index 3bff00f9e9f..32019dd5eb2 100644 --- a/source/source_base/kernels/math_kernel_op.h +++ b/source/source_base/kernels/math_kernel_op.h @@ -5,7 +5,6 @@ #include "source_base/macros.h" - #include "source_base/module_device/memory_op.h" #include "source_base/module_device/types.h" @@ -15,468 +14,582 @@ #include "cublas_v2.h" #endif //__CUDA || __UT_USE_CUDA -namespace ModuleBase { +namespace ModuleBase +{ //--------------------------------------------------------------------------------- //-----------------------------0. Tool Functions----------------------------------- //--------------------------------------------------------------------------------- -inline std::complex set_real_tocomplex(const std::complex &x) { - return {x.real(), 0.0}; +inline std::complex + set_real_tocomplex (const std::complex& x) +{ + return {x.real (), 0.0}; } -inline std::complex set_real_tocomplex(const std::complex &x) { - return {x.real(), 0.0}; +inline std::complex + set_real_tocomplex (const std::complex& x) +{ + return {x.real (), 0.0}; } -inline double set_real_tocomplex(const double &x) { return x; } - -inline float set_real_tocomplex(const float &x) { return x; } +inline double + set_real_tocomplex (const double& x) +{ + return x; +} -inline std::complex get_conj(const std::complex &x) { - return {x.real(), -x.imag()}; +inline float + set_real_tocomplex (const float& x) +{ + return x; } -inline std::complex get_conj(const std::complex &x) { - return {x.real(), -x.imag()}; +inline std::complex + get_conj (const std::complex& x) +{ + return {x.real (), -x.imag ()}; } -inline double get_conj(const double &x) { return x; } +inline std::complex + get_conj (const std::complex& x) +{ + return {x.real (), -x.imag ()}; +} -inline float get_conj(const float &x) { return x; } +inline double + get_conj (const double& x) +{ + return x; +} +inline float + get_conj (const float& x) +{ + return x; +} //--------------------------------------------------------------------------------- //-----------------------------1. Vector Operations-------------------------------- //--------------------------------------------------------------------------------- -template struct scal_op { - /// @brief x = alpha * x, where alpha and x are complex numbers - /// - /// Input Parameters - /// \param N : array size - /// \param alpha : input constant - /// \param X : input array - /// \param incx : computing strip of array X - /// - /// Output Parameters - /// \param X : output array - void operator()(const int &N, - const std::complex *alpha, std::complex *X, - const int &incx); +template +struct scal_op +{ + /// @brief x = alpha * x, where alpha and x are complex numbers + /// + /// Input Parameters + /// \param N : array size + /// \param alpha : input constant + /// \param X : input array + /// \param incx : computing strip of array X + /// + /// Output Parameters + /// \param X : output array + void operator() (const int& N, const std::complex* alpha, std::complex* X, const int& incx); }; -template struct vector_mul_real_op { - using Real = typename GetTypeReal::type; - /// @brief result[i] = vector[i] * constant, where vector is complex number and constant is real number。 - /// It is different from the scal_op, which is used to multiply a complex number by a complex number. - /// - /// Input Parameters - /// \param dim : array size - /// \param vector : input array - /// \param constant : input constant - /// - /// Output Parameters - /// \param result : output array - /// \note Use mulitple instead of divide. It is faster. - void operator()(const int dim, T* result, const T* vector, const Real constant); +template +struct vector_mul_real_op +{ + using Real = typename GetTypeReal::type; + /// @brief result[i] = vector[i] * constant, where vector is complex number and constant is real number。 + /// It is different from the scal_op, which is used to multiply a complex number by a complex number. + /// + /// Input Parameters + /// \param dim : array size + /// \param vector : input array + /// \param constant : input constant + /// + /// Output Parameters + /// \param result : output array + /// \note Use mulitple instead of divide. It is faster. + void operator() (const int dim, T* result, const T* vector, const Real constant); }; // vector operator: result[i] = vector1[i](complex) * vector2[i](not complex) -template struct vector_mul_vector_op { - using Real = typename GetTypeReal::type; - /// @brief result[i] = vector1[i](complex) * vector2[i](not complex) - /// - /// Input Parameters - /// \param dim : array size - /// \param vector1 : input array A - /// \param vector2 : input array B - /// \param add : flag to control whether to add the result to the output array - /// - /// Output Parameters - /// \param result : output array - void operator()(const int& dim, T* result, const T* vector1, const Real* vector2, const bool& add = false); +template +struct vector_mul_vector_op +{ + using Real = typename GetTypeReal::type; + /// @brief result[i] = vector1[i](complex) * vector2[i](not complex) + /// + /// Input Parameters + /// \param dim : array size + /// \param vector1 : input array A + /// \param vector2 : input array B + /// \param add : flag to control whether to add the result to the output array + /// + /// Output Parameters + /// \param result : output array + void operator() (const int& dim, T* result, const T* vector1, const Real* vector2, const bool& add = false); }; // vector operator: result[i] = vector[i] / constant -template struct vector_div_constant_op { - using Real = typename GetTypeReal::type; - /// @brief result[i] = vector[i] / constant - /// - /// Input Parameters - /// \param dim : array size - /// \param vector : input array - /// \param constant : input constant - /// - /// Output Parameters - /// \param result : output array - void operator()(const int& dim, T* result, const T* vector, const Real constant); +template +struct vector_div_constant_op +{ + using Real = typename GetTypeReal::type; + /// @brief result[i] = vector[i] / constant + /// + /// Input Parameters + /// \param dim : array size + /// \param vector : input array + /// \param constant : input constant + /// + /// Output Parameters + /// \param result : output array + void operator() (const int& dim, T* result, const T* vector, const Real constant); }; // vector operator: result[i] = vector1[i](complex) / vector2[i](not complex) -template struct vector_div_vector_op { - using Real = typename GetTypeReal::type; - /// @brief result[i] = vector1[i](complex) / vector2[i](not complex) - /// - /// Input Parameters - /// \param dim : array size - /// \param vector1 : input array A - /// \param vector2 : input array B - /// - /// Output Parameters - /// \param result : output array - void operator()(const int &dim, T *result, const T *vector1, - const Real *vector2); +template +struct vector_div_vector_op +{ + using Real = typename GetTypeReal::type; + /// @brief result[i] = vector1[i](complex) / vector2[i](not complex) + /// + /// Input Parameters + /// \param dim : array size + /// \param vector1 : input array A + /// \param vector2 : input array B + /// + /// Output Parameters + /// \param result : output array + void operator() (const int& dim, T* result, const T* vector1, const Real* vector2); }; // compute Y = alpha * X + Y -template struct axpy_op { - /// @brief Y = alpha * X + Y - /// - /// Input Parameters - /// \param N : array size - /// \param alpha : input constant alpha - /// \param X : input array X - /// \param incX : computing strip of X - /// \param Y : computing strip of Y - /// \param incY : computing strip of Y - /// - /// Output Parameters - /// \param Y : output array Y - void operator()(const int &N, const T *alpha, const T *X, - const int &incX, T *Y, const int &incY); +template +struct axpy_op +{ + /// @brief Y = alpha * X + Y + /// + /// Input Parameters + /// \param N : array size + /// \param alpha : input constant alpha + /// \param X : input array X + /// \param incX : computing strip of X + /// \param Y : computing strip of Y + /// \param incY : computing strip of Y + /// + /// Output Parameters + /// \param Y : output array Y + void operator() (const int& N, const T* alpha, const T* X, const int& incX, T* Y, const int& incY); }; // vector operator: result[i] = vector1[i] * constant1 + vector2[i] * constant2 template -struct vector_add_vector_op { - using Real = typename GetTypeReal::type; - /// @brief result[i] = vector1[i] * constant1 + vector2[i] * constant2 - /// - /// Input Parameters - /// \param dim : array size - /// \param vector1 : input array A - /// \param constant1 : input constant a - /// \param vector2 : input array B - /// \param constant2 : input constant b - /// - /// Output Parameters - /// \param result : output array - void operator()(const int &dim, T *result, const T *vector1, - const Real constant1, const T *vector2, const Real constant2); +struct vector_add_vector_op +{ + using Real = typename GetTypeReal::type; + /// @brief result[i] = vector1[i] * constant1 + vector2[i] * constant2 + /// + /// Input Parameters + /// \param dim : array size + /// \param vector1 : input array A + /// \param constant1 : input constant a + /// \param vector2 : input array B + /// \param constant2 : input constant b + /// + /// Output Parameters + /// \param result : output array + void operator() (const int& dim, + T* result, + const T* vector1, + const Real constant1, + const T* vector2, + const Real constant2); }; -template struct dot_real_op { - using Real = typename GetTypeReal::type; - /// @brief dot_real_op computes the dot product of the given complex - /// arrays(treated as float arrays). And there's may have MPI communications - /// while enabling planewave parallization strategy. - /// - /// Input Parameters - /// \param dim : array size - /// \param psi_L : input array A - /// \param psi_R : input array B - /// \param reduce : flag to control whether to perform the MPI communications - /// - /// \return - /// FPTYPE : dot product result - Real operator()(const int &dim, const T *psi_L, - const T *psi_R, const bool reduce = true); +template +struct dot_real_op +{ + using Real = typename GetTypeReal::type; + /// @brief dot_real_op computes the dot product of the given complex + /// arrays(treated as float arrays). And there's may have MPI communications + /// while enabling planewave parallization strategy. + /// + /// Input Parameters + /// \param dim : array size + /// \param psi_L : input array A + /// \param psi_R : input array B + /// \param reduce : flag to control whether to perform the MPI communications + /// + /// \return + /// FPTYPE : dot product result + Real operator() (const int& dim, const T* psi_L, const T* psi_R, const bool reduce = true); }; - //--------------------------------------------------------------------------------- //-----------------------------2. Matrix Operations-------------------------------- //--------------------------------------------------------------------------------- // compute y = alpha * op(A) * x + beta * y -template struct gemv_op { - /// @brief y = alpha * op(A) * x + beta * y - /// - /// Input Parameters - /// \param trans : whether to transpose A - /// \param m : first dimension of matrix - /// \param n : second dimension of matrix - /// \param alpha : input constant alpha - /// \param A : input matrix A - /// \param lda : leading dimention of A - /// \param X : input array X - /// \param incx : computing strip of X - /// \param beta : input constant beta - /// \param Y : input array Y - /// \param incy : computing strip of Y - /// - /// Output Parameters - /// \param Y : output array Y - void operator()(const char &trans, const int &m, - const int &n, const T *alpha, const T *A, const int &lda, - const T *X, const int &incx, const T *beta, T *Y, - const int &incy); +template +struct gemv_op +{ + /// @brief y = alpha * op(A) * x + beta * y + /// + /// Input Parameters + /// \param trans : whether to transpose A + /// \param m : first dimension of matrix + /// \param n : second dimension of matrix + /// \param alpha : input constant alpha + /// \param A : input matrix A + /// \param lda : leading dimention of A + /// \param X : input array X + /// \param incx : computing strip of X + /// \param beta : input constant beta + /// \param Y : input array Y + /// \param incy : computing strip of Y + /// + /// Output Parameters + /// \param Y : output array Y + void operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const T* X, + const int& incx, + const T* beta, + T* Y, + const int& incy); }; // compute C = alpha * op(A) * op(B) + beta * C -template struct gemm_op { - /// @brief C = alpha * op(A) * op(B) + beta * C - /// - /// Input Parameters - /// \param transa : whether to transpose matrix A - /// \param transb : whether to transpose matrix B - /// \param m : first dimension of matrix mulplication - /// \param n : second dimension of matrix mulplication - /// \param k : third dimension of matrix mulplication - /// \param alpha : input constant alpha - /// \param a : input matrix A - /// \param lda : leading dimention of A - /// \param b : input matrix B - /// \param ldb : leading dimention of A - /// \param beta : input constant beta - /// \param c : input matrix C - /// \param ldc : leading dimention of C - /// - /// Output Parameters - /// \param c : output matrix C - void operator()(const char &transa, const char &transb, - const int &m, const int &n, const int &k, const T *alpha, - const T *a, const int &lda, const T *b, const int &ldb, - const T *beta, T *c, const int &ldc); +template +struct gemm_op +{ + /// @brief C = alpha * op(A) * op(B) + beta * C + /// + /// Input Parameters + /// \param transa : whether to transpose matrix A + /// \param transb : whether to transpose matrix B + /// \param m : first dimension of matrix mulplication + /// \param n : second dimension of matrix mulplication + /// \param k : third dimension of matrix mulplication + /// \param alpha : input constant alpha + /// \param a : input matrix A + /// \param lda : leading dimention of A + /// \param b : input matrix B + /// \param ldb : leading dimention of A + /// \param beta : input constant beta + /// \param c : input matrix C + /// \param ldc : leading dimention of C + /// + /// Output Parameters + /// \param c : output matrix C + void operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* a, + const int& lda, + const T* b, + const int& ldb, + const T* beta, + T* c, + const int& ldc); }; #ifdef __DSP // compute Y = alpha * op(A) * X + beta * Y on DSP Hardware -template struct gemv_op_mt { - /// @brief Y = alpha * op(A) * X + beta * Y - /// - /// Input Parameters - /// \param trans : whether to transpose matrix A - /// \param m : row number of A - /// \param n : column number of A - /// \param alpha : input constant alpha - /// \param A : input matrix A - /// \param lda : leading dimension of A - /// \param X : input vector X - /// \param incx : increment of X - /// \param beta : input constant beta - /// \param Y : input vector Y - /// \param incy : increment of Y - /// - /// Output Parameters - /// \param Y : output vector Y - void operator()(const char &trans, const int &m, - const int &n, const T *alpha, const T *A, const int &lda, - const T *X, const int &incx, const T *beta, T *Y, - const int &incy); +template +struct gemv_op_mt +{ + /// @brief Y = alpha * op(A) * X + beta * Y + /// + /// Input Parameters + /// \param trans : whether to transpose matrix A + /// \param m : row number of A + /// \param n : column number of A + /// \param alpha : input constant alpha + /// \param A : input matrix A + /// \param lda : leading dimension of A + /// \param X : input vector X + /// \param incx : increment of X + /// \param beta : input constant beta + /// \param Y : input vector Y + /// \param incy : increment of Y + /// + /// Output Parameters + /// \param Y : output vector Y + void operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const T* X, + const int& incx, + const T* beta, + T* Y, + const int& incy); }; // compute C = alpha * op(A) * op(B) + beta * C on DSP Hardware -template struct gemm_op_mt { - /// @brief C = alpha * op(A) * op(B) + beta * C - /// - /// Input Parameters - /// \param transa : whether to transpose matrix A - /// \param transb : whether to transpose matrix B - /// \param m : first dimension of matrix mulplication - /// \param n : second dimension of matrix mulplication - /// \param k : third dimension of matrix mulplication - /// \param alpha : input constant alpha - /// \param a : input matrix A - /// \param lda : leading dimention of A - /// \param b : input matrix B - /// \param ldb : leading dimention of A - /// \param beta : input constant beta - /// \param c : input matrix C - /// \param ldc : leading dimention of C - /// - /// Output Parameters - /// \param c : output matrix C - void operator()(const char &transa, const char &transb, - const int &m, const int &n, const int &k, const T *alpha, - const T *a, const int &lda, const T *b, const int &ldb, - const T *beta, T *c, const int &ldc); +template +struct gemm_op_mt +{ + /// @brief C = alpha * op(A) * op(B) + beta * C + /// + /// Input Parameters + /// \param transa : whether to transpose matrix A + /// \param transb : whether to transpose matrix B + /// \param m : first dimension of matrix mulplication + /// \param n : second dimension of matrix mulplication + /// \param k : third dimension of matrix mulplication + /// \param alpha : input constant alpha + /// \param a : input matrix A + /// \param lda : leading dimention of A + /// \param b : input matrix B + /// \param ldb : leading dimention of A + /// \param beta : input constant beta + /// \param c : input matrix C + /// \param ldc : leading dimention of C + /// + /// Output Parameters + /// \param c : output matrix C + void operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* a, + const int& lda, + const T* b, + const int& ldb, + const T* beta, + T* c, + const int& ldc); }; #endif -template struct matrixTranspose_op { - /// @brief transpose the input matrix - /// - /// Input Parameters - /// \param row : first dimension of matrix - /// \param col : second dimension of matrix - /// \param input_matrix : input matrix - /// - /// Output Parameters - /// \param output_matrix : output matrix - void operator()(const int &row, const int &col, - const T *input_matrix, T *output_matrix); +template +struct matrixTranspose_op +{ + /// @brief transpose the input matrix + /// + /// Input Parameters + /// \param row : first dimension of matrix + /// \param col : second dimension of matrix + /// \param input_matrix : input matrix + /// + /// Output Parameters + /// \param output_matrix : output matrix + void operator() (const int& row, const int& col, const T* input_matrix, T* output_matrix); }; -template struct matrixCopy { - /// @brief copy matrix A to B, they can have different leading dimensions - /// - /// Input Parameters - /// \param n1 : first dimension of matrix - /// \param n2 : second dimension of matrix - /// \param A : input matrix A - /// \param LDA : leading dimension of A - /// \param LDB : leading dimension of B - /// - /// Output Parameters - /// \param B : output matrix B - void operator()(const int& n1, const int& n2, const T* A, const int& LDA, T* B, const int& LDB); +template +struct matrixCopy +{ + /// @brief copy matrix A to B, they can have different leading dimensions + /// + /// Input Parameters + /// \param n1 : first dimension of matrix + /// \param n2 : second dimension of matrix + /// \param A : input matrix A + /// \param LDA : leading dimension of A + /// \param LDB : leading dimension of B + /// + /// Output Parameters + /// \param B : output matrix B + void operator() (const int& n1, const int& n2, const T* A, const int& LDA, T* B, const int& LDB); }; template -struct matrix_mul_vector_op { +struct matrix_mul_vector_op +{ using Real = typename GetTypeReal::type; - /// @brief a * b * beta by each column - /// - /// Input Parameters - /// \param m : row number - /// \param n : column number - /// \param a : input matrix - /// \param lda : leading dimension of matrix a - /// \param b : input vector - /// \param alpha : factor - /// \param ldc : leading dimension of matrix c - /// - /// Output Parameters - /// \param c : output matrix - void operator()(const int &m, const int &n, - T *a, - const int &lda, - const Real *b, - const Real alpha, - T *c, - const int &ldc); + /// @brief a * b * beta by each column + /// + /// Input Parameters + /// \param m : row number + /// \param n : column number + /// \param a : input matrix + /// \param lda : leading dimension of matrix a + /// \param b : input vector + /// \param alpha : factor + /// \param ldc : leading dimension of matrix c + /// + /// Output Parameters + /// \param c : output matrix + void operator() (const int& m, + const int& n, + T* a, + const int& lda, + const Real* b, + const Real alpha, + T* c, + const int& ldc); }; template -struct apply_eigenvalues_op { +struct apply_eigenvalues_op +{ using Real = typename GetTypeReal::type; - void operator()(const Device *d, const int &nbase, const int &nbase_x, const int ¬conv, - T *result, const T *vectors, const Real *eigenvalues); + void operator() (const Device* d, + const int& nbase, + const int& nbase_x, + const int& notconv, + T* result, + const T* vectors, + const Real* eigenvalues); }; template -struct precondition_op { +struct precondition_op +{ using Real = typename GetTypeReal::type; - void operator()(const Device* d, - const int& dim, - T* psi_iter, - const int& nbase, - const int& notconv, - const Real* precondition, - const Real* eigenvalues); + void operator() (const Device* d, + const int& dim, + T* psi_iter, + const int& nbase, + const int& notconv, + const Real* precondition, + const Real* eigenvalues); }; template -struct normalize_op { +struct normalize_op +{ using Real = typename GetTypeReal::type; - void operator()(const Device* d, - const int& dim, - T* psi_iter, - const int& nbase, - const int& notconv, - Real* psi_norm = nullptr); + void operator() (const Device* d, + const int& dim, + T* psi_iter, + const int& nbase, + const int& notconv, + Real* psi_norm = nullptr); }; template -struct normalize_op { +struct normalize_op +{ using Real = typename GetTypeReal::type; - void operator()(const base_device::DEVICE_GPU* d, - const int& dim, - T* psi_iter, - const int& nbase, - const int& notconv, - Real* psi_norm); + void operator() (const base_device::DEVICE_GPU* d, + const int& dim, + T* psi_iter, + const int& nbase, + const int& notconv, + Real* psi_norm); }; #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM // Partially specialize functor for base_device::GpuDevice. -template struct dot_real_op { - using Real = typename GetTypeReal::type; - Real operator()(const int &dim, - const T *psi_L, const T *psi_R, const bool reduce = true); +template +struct dot_real_op +{ + using Real = typename GetTypeReal::type; + Real operator() (const int& dim, const T* psi_L, const T* psi_R, const bool reduce = true); }; // vector operator: result[i] = vector[i] / constant template struct vector_mul_real_op { - using Real = typename GetTypeReal::type; - void operator()(const int dim, T* result, const T* vector, const Real constant); + using Real = typename GetTypeReal::type; + void operator() (const int dim, T* result, const T* vector, const Real constant); }; // vector operator: result[i] = vector1[i](complex) * vector2[i](not complex) -template struct vector_mul_vector_op { - using Real = typename GetTypeReal::type; - void operator()(const int& dim, T* result, const T* vector1, const Real* vector2, const bool& add = false); +template +struct vector_mul_vector_op +{ + using Real = typename GetTypeReal::type; + void operator() (const int& dim, T* result, const T* vector1, const Real* vector2, const bool& add = false); }; // vector operator: result[i] = vector[i] / constant -template struct vector_div_constant_op { - using Real = typename GetTypeReal::type; - void operator()(const int& dim, T* result, const T* vector, const Real constant); +template +struct vector_div_constant_op +{ + using Real = typename GetTypeReal::type; + void operator() (const int& dim, T* result, const T* vector, const Real constant); }; // vector operator: result[i] = vector1[i](complex) / vector2[i](not complex) -template struct vector_div_vector_op { - using Real = typename GetTypeReal::type; - void operator()(const int &dim, T *result, - const T *vector1, const Real *vector2); +template +struct vector_div_vector_op +{ + using Real = typename GetTypeReal::type; + void operator() (const int& dim, T* result, const T* vector1, const Real* vector2); }; // vector operator: result[i] = vector1[i] * constant1 + vector2[i] * constant2 template -struct vector_add_vector_op { - using Real = typename GetTypeReal::type; - void operator()(const int &dim, T *result, - const T *vector1, const Real constant1, const T *vector2, - const Real constant2); +struct vector_add_vector_op +{ + using Real = typename GetTypeReal::type; + void operator() (const int& dim, + T* result, + const T* vector1, + const Real constant1, + const T* vector2, + const Real constant2); }; -template struct matrixCopy { - void operator()(const int& n1, - const int& n2, - const T* A, // input - const int& LDA, - T* B, // output - const int& LDB); +template +struct matrixCopy +{ + void operator() (const int& n1, + const int& n2, + const T* A, // input + const int& LDA, + T* B, // output + const int& LDB); }; -template struct matrix_mul_vector_op { - using Real = typename GetTypeReal::type; - void operator()(const int &m, const int &n, - T *a, - const int &lda, - const Real *b, - const Real alpha, - T *c, - const int &ldc); +template +struct matrix_mul_vector_op +{ + using Real = typename GetTypeReal::type; + void operator() (const int& m, + const int& n, + T* a, + const int& lda, + const Real* b, + const Real alpha, + T* c, + const int& ldc); }; -void createGpuBlasHandle(); -void destoryBLAShandle(); +void createGpuBlasHandle (); +void destoryBLAShandle (); // vector operator: result[i] = -lambda[i] * vector[i] -template struct apply_eigenvalues_op { +template +struct apply_eigenvalues_op +{ using Real = typename GetTypeReal::type; - void operator()(const base_device::DEVICE_GPU *d, const int &nbase, const int &nbase_x, const int ¬conv, - T *result, const T *vectors, const Real *eigenvalues); + void operator() (const base_device::DEVICE_GPU* d, + const int& nbase, + const int& nbase_x, + const int& notconv, + T* result, + const T* vectors, + const Real* eigenvalues); }; template -struct precondition_op { +struct precondition_op +{ using Real = typename GetTypeReal::type; - void operator()(const base_device::DEVICE_GPU* d, - const int& dim, - T* psi_iter, - const int& nbase, - const int& notconv, - const Real* precondition, - const Real* eigenvalues); + void operator() (const base_device::DEVICE_GPU* d, + const int& dim, + T* psi_iter, + const int& nbase, + const int& notconv, + const Real* precondition, + const Real* eigenvalues); }; #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM -} // namespace hsolver +} // namespace ModuleBase #endif // MODULE_HSOLVER_MATH_KERNEL_H \ No newline at end of file diff --git a/source/source_base/kernels/math_kernel_op_vec.cpp b/source/source_base/kernels/math_kernel_op_vec.cpp index 8957a96ba11..38d41c99e69 100644 --- a/source/source_base/kernels/math_kernel_op_vec.cpp +++ b/source/source_base/kernels/math_kernel_op_vec.cpp @@ -2,19 +2,16 @@ #include "source_base/module_external/blas_connector.h" #include "source_base/parallel_reduce.h" - namespace ModuleBase { template struct scal_op { - void operator()(const int& N, - const std::complex* alpha, - std::complex* X, - const int& incx) + void + operator() (const int& N, const std::complex* alpha, std::complex* X, const int& incx) { - BlasConnector::scal(N, *alpha, X, incx); + BlasConnector::scal (N, *alpha, X, incx); } }; @@ -22,15 +19,16 @@ template struct vector_mul_real_op { using Real = typename GetTypeReal::type; - void operator()(const int dim, T* result, const T* vector, const Real constant) + void + operator() (const int dim, T* result, const T* vector, const Real constant) { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < dim; i++) - { - result[i] = vector[i] * constant; - } + { + result[i] = vector[i] * constant; + } } }; @@ -38,28 +36,29 @@ template struct vector_mul_vector_op { using Real = typename GetTypeReal::type; - void operator()(const int& dim, T* result, const T* vector1, const Real* vector2, const bool& add) + void + operator() (const int& dim, T* result, const T* vector1, const Real* vector2, const bool& add) { if (add) - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < dim; i++) - { - result[i] += vector1[i] * vector2[i]; + for (int i = 0; i < dim; i++) + { + result[i] += vector1[i] * vector2[i]; + } } - } else - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * vector2[i]; + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * vector2[i]; + } } - } } }; @@ -67,15 +66,16 @@ template struct vector_div_constant_op { using Real = typename GetTypeReal::type; - void operator()(const int& dim, T* result, const T* vector, const Real constant) + void + operator() (const int& dim, T* result, const T* vector, const Real constant) { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < dim; i++) - { - result[i] = vector[i] / constant; - } + { + result[i] = vector[i] / constant; + } } }; @@ -83,38 +83,35 @@ template struct vector_div_vector_op { using Real = typename GetTypeReal::type; - void operator()(const int& dim, T* result, const T* vector1, const Real* vector2) + void + operator() (const int& dim, T* result, const T* vector1, const Real* vector2) { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] / vector2[i]; - } + { + result[i] = vector1[i] / vector2[i]; + } } }; template struct axpy_op { - void operator()(const int& dim, - const T* alpha, - const T* X, - const int& incX, - T* Y, - const int& incY) + void + operator() (const int& dim, const T* alpha, const T* X, const int& incX, T* Y, const int& incY) { - BlasConnector::axpy(dim, *alpha, X, incX, Y, incY); + BlasConnector::axpy (dim, *alpha, X, incX, Y, incY); } }; - template struct vector_add_vector_op { using Real = typename GetTypeReal::type; - void operator()(const int& dim, + void + operator() (const int& dim, T* result, const T* vector1, const Real constant1, @@ -125,24 +122,23 @@ struct vector_add_vector_op #pragma omp parallel for schedule(static) #endif for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; - } + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } } }; - - template struct dot_real_op { - FPTYPE operator()(const int& dim, const FPTYPE* psi_L, const FPTYPE* psi_R, const bool reduce) + FPTYPE + operator() (const int& dim, const FPTYPE * psi_L, const FPTYPE * psi_R, const bool reduce) { - FPTYPE result = BlasConnector::dot(dim, psi_L, 1, psi_R, 1); + FPTYPE result = BlasConnector::dot (dim, psi_L, 1, psi_R, 1); if (reduce) - { - Parallel_Reduce::reduce_pool(result); - } + { + Parallel_Reduce::reduce_pool (result); + } return result; } }; @@ -150,19 +146,17 @@ struct dot_real_op template struct dot_real_op, base_device::DEVICE_CPU> { - FPTYPE operator()(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce) + FPTYPE + operator() (const int& dim, const std::complex* psi_L, const std::complex* psi_R, const bool reduce) { // Note that ddot_(2*dim,a,1,b,1) = REAL( zdotc_(dim,a,1,b,1) ) - const FPTYPE* pL = reinterpret_cast(psi_L); - const FPTYPE* pR = reinterpret_cast(psi_R); - FPTYPE result = BlasConnector::dot(2 * dim, pL, 1, pR, 1); + const FPTYPE* pL = reinterpret_cast (psi_L); + const FPTYPE* pR = reinterpret_cast (psi_R); + FPTYPE result = BlasConnector::dot (2 * dim, pL, 1, pR, 1); if (reduce) - { - Parallel_Reduce::reduce_pool(result); - } + { + Parallel_Reduce::reduce_pool (result); + } return result; } }; diff --git a/source/source_base/kernels/math_ylm_op.cpp b/source/source_base/kernels/math_ylm_op.cpp index 833f84d4086..07641ae53c2 100644 --- a/source/source_base/kernels/math_ylm_op.cpp +++ b/source/source_base/kernels/math_ylm_op.cpp @@ -1,33 +1,37 @@ #include "source_base/kernels/math_ylm_op.h" #include "source_base/libm/libm.h" -namespace ModuleBase { +namespace ModuleBase +{ template -__inline__ -FPTYPE __fact(const int n) { +__inline__ FPTYPE + __fact (const int n) +{ FPTYPE f = 1.0; - for (int i = n; i > 1; i--) { - f *= i; - } + for (int i = n; i > 1; i--) + { + f *= i; + } return f; } -__inline__ -int __semi_fact(const int n) +__inline__ int + __semi_fact (const int n) { int semif = 1; for (int i = n; i > 2; i -= 2) - { - semif *= i; - } + { + semif *= i; + } return semif; } template struct cal_ylm_real_op { - void operator()(const base_device::DEVICE_CPU* ctx, + void + operator() (const base_device::DEVICE_CPU* ctx, const int& ng, const int& lmax, const FPTYPE& SQRT2, @@ -42,89 +46,100 @@ struct cal_ylm_real_op #ifdef _OPENMP #pragma omp parallel for #endif - for (int ig = 0; ig < ng; ig++) { - //---------------------------------------------------------- - // EXPLAIN : if lmax = 1,only use Y00 , output result. - //---------------------------------------------------------- - if (lmax == 0) { - ylm[0 * ng + ig] = SQRT_INVERSE_FOUR_PI; - continue; - } - //---------------------------------------------------------- - // LOCAL VARIABLES : - // NAME : cost = cos(theta),theta and phi are polar angles - // NAME : phi - //---------------------------------------------------------- - const FPTYPE gmod = sqrt(g[ig * 3 + 0] * g[ig * 3 + 0] + g[ig * 3 + 1] * g[ig * 3 + 1] + g[ig * 3 + 2] * g[ig * 3 + 2]); - FPTYPE cost = gmod < 1.0e-9 ? 0.0 : g[ig * 3 + 2] / gmod; - FPTYPE phi; - // beware the arc tan, it is defined modulo pi - if (g[ig * 3 + 0] > 1.0e-9) { - phi = atan(g[ig * 3 + 1] / g[ig * 3 + 0]); - } - else if (g[ig * 3 + 0] < -1.e-9) { - phi = atan(g[ig * 3 + 1] / g[ig * 3 + 0]) + PI; - } - else { - phi = PI_HALF * ((g[ig * 3 + 1] >= 0.0) ? 1.0 : -1.0); //HLX: modified on 10/13/2006 - } // end if - //========================================================== - // NAME : p(Legendre Polynomials) (0 <= m <= l) - //========================================================== - int lm = -1; - for (int l = 0; l <= lmax; l++) { - const FPTYPE c = sqrt((2 * l + 1) / FOUR_PI); - if (l == 0) { - p[0 * (lmax + 1) * ng + 0 * ng + ig] = 1.0; - } - else if (l == 1) { - p[0 * (lmax + 1) * ng + 1 * ng + ig] = cost; - p[1 * (lmax + 1) * ng + 1 * ng + ig] = -sqrt(std::max(0.0, 1.0 - cost * cost)); - } - else { - const int l1 = l - 1, - l2 = l - 2, - l3 = 2 * l - 1; - // recursion on l for P(:,l,m) - for (int m = 0; m <= l2; m++) { // do m = 0, l - 2//mohan modify 2007-10-13 - p[m * (lmax + 1) * ng + l * ng + ig] = - (cost * l3 * p[m * (lmax + 1) * ng + l1 * ng + ig] - - (l1 + m) * p[m * (lmax + 1) * ng + l2 * ng + ig]) / (l - m); - } // end do - p[l1 * (lmax + 1) * ng + l * ng + ig] = - cost * l3 * p[l1 * (lmax + 1) * ng + l1 * ng + ig]; - FPTYPE x2 = std::max(0.0, 1.0 - cost * cost); - p[l * (lmax + 1) * ng + l * ng + ig] = __semi_fact(l3) * pow(x2, static_cast(l) / 2.0);//mohan modify 2007-10-13 - if (l % 2 == 1) { - p[l * (lmax + 1) * ng + l * ng + ig] *= -1; + for (int ig = 0; ig < ng; ig++) + { + //---------------------------------------------------------- + // EXPLAIN : if lmax = 1,only use Y00 , output result. + //---------------------------------------------------------- + if (lmax == 0) + { + ylm[0 * ng + ig] = SQRT_INVERSE_FOUR_PI; + continue; + } + //---------------------------------------------------------- + // LOCAL VARIABLES : + // NAME : cost = cos(theta),theta and phi are polar angles + // NAME : phi + //---------------------------------------------------------- + const FPTYPE gmod = sqrt (g[ig * 3 + 0] * g[ig * 3 + 0] + g[ig * 3 + 1] * g[ig * 3 + 1] + + g[ig * 3 + 2] * g[ig * 3 + 2]); + FPTYPE cost = gmod < 1.0e-9 ? 0.0 : g[ig * 3 + 2] / gmod; + FPTYPE phi; + // beware the arc tan, it is defined modulo pi + if (g[ig * 3 + 0] > 1.0e-9) + { + phi = atan (g[ig * 3 + 1] / g[ig * 3 + 0]); + } + else if (g[ig * 3 + 0] < -1.e-9) + { + phi = atan (g[ig * 3 + 1] / g[ig * 3 + 0]) + PI; } - } // end if + else + { + phi = PI_HALF * ((g[ig * 3 + 1] >= 0.0) ? 1.0 : -1.0); // HLX: modified on 10/13/2006 + } // end if + //========================================================== + // NAME : p(Legendre Polynomials) (0 <= m <= l) + //========================================================== + int lm = -1; + for (int l = 0; l <= lmax; l++) + { + const FPTYPE c = sqrt ((2 * l + 1) / FOUR_PI); + if (l == 0) + { + p[0 * (lmax + 1) * ng + 0 * ng + ig] = 1.0; + } + else if (l == 1) + { + p[0 * (lmax + 1) * ng + 1 * ng + ig] = cost; + p[1 * (lmax + 1) * ng + 1 * ng + ig] = -sqrt (std::max (0.0, 1.0 - cost * cost)); + } + else + { + const int l1 = l - 1, l2 = l - 2, l3 = 2 * l - 1; + // recursion on l for P(:,l,m) + for (int m = 0; m <= l2; m++) + { // do m = 0, l - 2//mohan modify 2007-10-13 + p[m * (lmax + 1) * ng + l * ng + ig] + = (cost * l3 * p[m * (lmax + 1) * ng + l1 * ng + ig] + - (l1 + m) * p[m * (lmax + 1) * ng + l2 * ng + ig]) + / (l - m); + } // end do + p[l1 * (lmax + 1) * ng + l * ng + ig] + = cost * l3 * p[l1 * (lmax + 1) * ng + l1 * ng + ig]; + FPTYPE x2 = std::max (0.0, 1.0 - cost * cost); + p[l * (lmax + 1) * ng + l * ng + ig] + = __semi_fact (l3) + * pow (x2, static_cast (l) / 2.0); // mohan modify 2007-10-13 + if (l % 2 == 1) + { + p[l * (lmax + 1) * ng + l * ng + ig] *= -1; + } + } // end if - // Y_lm, m = 0 - ++lm; - ylm[lm * ng + ig] = c * p[0 * (lmax + 1) * ng + l * ng + ig]; + // Y_lm, m = 0 + ++lm; + ylm[lm * ng + ig] = c * p[0 * (lmax + 1) * ng + l * ng + ig]; - for (int m = 1; m <= l; m++) { - // Y_lm, m > 0 - const FPTYPE same = - c * sqrt(__fact(l - m) / - __fact(l + m)) * SQRT2; - FPTYPE sinp, cosp; - ModuleBase::libm::sincos(m * phi, &sinp, &cosp); - ++lm; - ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * cosp; + for (int m = 1; m <= l; m++) + { + // Y_lm, m > 0 + const FPTYPE same = c * sqrt (__fact (l - m) / __fact (l + m)) * SQRT2; + FPTYPE sinp, cosp; + ModuleBase::libm::sincos (m * phi, &sinp, &cosp); + ++lm; + ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * cosp; - // Y_lm, m < 0 - ++lm; - ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * sinp; - } - }// end do - } + // Y_lm, m < 0 + ++lm; + ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * sinp; + } + } // end do + } } }; template struct cal_ylm_real_op; template struct cal_ylm_real_op; -} // namespace ModuleBase - +} // namespace ModuleBase diff --git a/source/source_base/kernels/math_ylm_op.h b/source/source_base/kernels/math_ylm_op.h index ef32c572fc6..2021937718e 100644 --- a/source/source_base/kernels/math_ylm_op.h +++ b/source/source_base/kernels/math_ylm_op.h @@ -4,10 +4,12 @@ #include "source_base/module_device/types.h" #include -namespace ModuleBase { +namespace ModuleBase +{ template -struct cal_ylm_real_op { +struct cal_ylm_real_op +{ /// @brief YLM_REAL::Real spherical harmonics ylm(G) up to l=lmax /// Use Numerical recursive algorithm as given in Numerical Recipes /// @@ -25,37 +27,36 @@ struct cal_ylm_real_op { /// /// Output Parameters /// @param ylm - output array - void operator() ( - const Device *ctx, - const int &ng, - const int &lmax, - const FPTYPE &SQRT2, - const FPTYPE &PI, - const FPTYPE &PI_HALF, - const FPTYPE &FOUR_PI, - const FPTYPE &SQRT_INVERSE_FOUR_PI, - const FPTYPE *g, - FPTYPE * p, - FPTYPE * ylm); + void operator() (const Device* ctx, + const int& ng, + const int& lmax, + const FPTYPE& SQRT2, + const FPTYPE& PI, + const FPTYPE& PI_HALF, + const FPTYPE& FOUR_PI, + const FPTYPE& SQRT_INVERSE_FOUR_PI, + const FPTYPE* g, + FPTYPE* p, + FPTYPE* ylm); }; #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM template struct cal_ylm_real_op { - void operator()(const base_device::DEVICE_GPU* ctx, - const int& ng, - const int& lmax, - const FPTYPE& SQRT2, - const FPTYPE& PI, - const FPTYPE& PI_HALF, - const FPTYPE& FOUR_PI, - const FPTYPE& SQRT_INVERSE_FOUR_PI, - const FPTYPE* g, - FPTYPE* p, - FPTYPE* ylm); + void operator() (const base_device::DEVICE_GPU* ctx, + const int& ng, + const int& lmax, + const FPTYPE& SQRT2, + const FPTYPE& PI, + const FPTYPE& PI_HALF, + const FPTYPE& FOUR_PI, + const FPTYPE& SQRT_INVERSE_FOUR_PI, + const FPTYPE* g, + FPTYPE* p, + FPTYPE* ylm); }; #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM -} // namespace ModuleBase -#endif //MODULE_BASE_MATH_MULTI_DEVICE_H \ No newline at end of file +} // namespace ModuleBase +#endif // MODULE_BASE_MATH_MULTI_DEVICE_H \ No newline at end of file diff --git a/source/source_base/kernels/rocm/math_kernel_op.hip.cu b/source/source_base/kernels/rocm/math_kernel_op.hip.cu index 1b4f30d6b23..55baaf42764 100644 --- a/source/source_base/kernels/rocm/math_kernel_op.hip.cu +++ b/source/source_base/kernels/rocm/math_kernel_op.hip.cu @@ -8,174 +8,200 @@ #include #include template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = float; /**< The return type specialization for std::complex. */ }; template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = double; /**< The return type specialization for std::complex. */ }; -namespace ModuleBase { +namespace ModuleBase +{ template -struct GetTypeThrust { +struct GetTypeThrust +{ using type = T; }; template <> -struct GetTypeThrust> { +struct GetTypeThrust> +{ using type = thrust::complex; /**< The return type specialization for std::complex. */ }; template <> -struct GetTypeThrust> { +struct GetTypeThrust> +{ using type = thrust::complex; /**< The return type specialization for std::complex. */ }; static hipblasHandle_t cublas_handle = nullptr; -void xdot_wrapper(const int &n, const float * x, const int &incx, const float * y, const int &incy, float &result) { - hipblasErrcheck(hipblasSdot(cublas_handle, n, x, incx, y, incy, &result)); +void + xdot_wrapper (const int& n, const float* x, const int& incx, const float* y, const int& incy, float& result) +{ + hipblasErrcheck (hipblasSdot (cublas_handle, n, x, incx, y, incy, &result)); } -void xdot_wrapper(const int &n, const double * x, const int &incx, const double * y, const int &incy, double &result) { - hipblasErrcheck(hipblasDdot(cublas_handle, n, x, incx, y, incy, &result)); +void + xdot_wrapper (const int& n, const double* x, const int& incx, const double* y, const int& incy, double& result) +{ + hipblasErrcheck (hipblasDdot (cublas_handle, n, x, incx, y, incy, &result)); } -void createGpuBlasHandle(){ - if (cublas_handle == nullptr) { - hipblasErrcheck(hipblasCreate(&cublas_handle)); - } +void + createGpuBlasHandle () +{ + if (cublas_handle == nullptr) + { + hipblasErrcheck (hipblasCreate (&cublas_handle)); + } } -void destoryBLAShandle(){ - if (cublas_handle != nullptr) { - hipblasErrcheck(hipblasDestroy(cublas_handle)); - cublas_handle = nullptr; - } +void + destoryBLAShandle () +{ + if (cublas_handle != nullptr) + { + hipblasErrcheck (hipblasDestroy (cublas_handle)); + cublas_handle = nullptr; + } } template <> -void scal_op::operator()(const int& N, +void + scal_op::operator() (const int& N, const std::complex* alpha, std::complex* X, const int& incx) { - hipblasErrcheck(hipblasCscal(cublas_handle, N, (hipblasComplex*)alpha, (hipblasComplex*)X, incx)); + hipblasErrcheck (hipblasCscal (cublas_handle, N, (hipblasComplex*)alpha, (hipblasComplex*)X, incx)); } template <> -void scal_op::operator()(const int& N, +void + scal_op::operator() (const int& N, const std::complex* alpha, std::complex* X, const int& incx) { - hipblasErrcheck(hipblasZscal(cublas_handle, N, (hipblasDoubleComplex*)alpha, (hipblasDoubleComplex*)X, incx)); + hipblasErrcheck (hipblasZscal (cublas_handle, N, (hipblasDoubleComplex*)alpha, (hipblasDoubleComplex*)X, incx)); } template <> -void axpy_op::operator()(const int& N, +void + axpy_op::operator() (const int& N, const double* alpha, const double* X, const int& incX, double* Y, const int& incY) { - hipblasErrcheck(hipblasDaxpy(cublas_handle, N, alpha, X, incX, Y, incY)); + hipblasErrcheck (hipblasDaxpy (cublas_handle, N, alpha, X, incX, Y, incY)); } template <> -void axpy_op, base_device::DEVICE_GPU>::operator()(const int& N, +void + axpy_op, base_device::DEVICE_GPU>::operator() (const int& N, const std::complex* alpha, const std::complex* X, const int& incX, std::complex* Y, const int& incY) { - hipblasErrcheck( - hipblasCaxpy(cublas_handle, N, (hipblasComplex*)alpha, (hipblasComplex*)X, incX, (hipblasComplex*)Y, incY)); + hipblasErrcheck ( + hipblasCaxpy (cublas_handle, N, (hipblasComplex*)alpha, (hipblasComplex*)X, incX, (hipblasComplex*)Y, incY)); } template <> -void axpy_op, base_device::DEVICE_GPU>::operator()(const int& N, +void + axpy_op, base_device::DEVICE_GPU>::operator() (const int& N, const std::complex* alpha, const std::complex* X, const int& incX, std::complex* Y, const int& incY) { - hipblasErrcheck(hipblasZaxpy(cublas_handle, - N, - (hipblasDoubleComplex*)alpha, - (hipblasDoubleComplex*)X, - incX, - (hipblasDoubleComplex*)Y, - incY)); + hipblasErrcheck (hipblasZaxpy (cublas_handle, + N, + (hipblasDoubleComplex*)alpha, + (hipblasDoubleComplex*)X, + incX, + (hipblasDoubleComplex*)Y, + incY)); } template -__launch_bounds__(1024) -__global__ void matrix_transpose_kernel( - const int row, - const int col, - const T* in, - T* out) +__launch_bounds__ (1024) __global__ void matrix_transpose_kernel (const int row, const int col, const T* in, T* out) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < row) - { - for (int j = 0; j < col; j++) { - out[j * row + i] = in[i * col + j]; + for (int j = 0; j < col; j++) + { + out[j * row + i] = in[i * col + j]; + } } - } } template -__launch_bounds__(1024) __global__ - void matrix_copy_kernel(const int n1, const int n2, const T* A, const int LDA, T* B, const int LDB) +__launch_bounds__ (1024) __global__ + void matrix_copy_kernel (const int n1, const int n2, const T* A, const int LDA, T* B, const int LDB) { const int i = blockIdx.x * blockDim.x + threadIdx.x; const int j = blockIdx.y * blockDim.y + threadIdx.y; if (i < n1 && j < n2) - { - B[i * LDB + j] = A[i * LDA + j]; - } + { + B[i * LDB + j] = A[i * LDA + j]; + } } template -__launch_bounds__(1024) __global__ -void matrix_multiply_vector_kernel(const int m, const int n, T *a, const int lda, const Real *b, const Real alpha, T *c, const int ldc){ +__launch_bounds__ (1024) __global__ void matrix_multiply_vector_kernel (const int m, + const int n, + T* a, + const int lda, + const Real* b, + const Real alpha, + T* c, + const int ldc) +{ int row = blockIdx.x * blockDim.x + threadIdx.x; int col = blockIdx.y * blockDim.y + threadIdx.y; - if (col >= n || row >= m) return; + if (col >= n || row >= m) + return; c[col * ldc + row] = a[col * lda + row] * b[col] * alpha; } -hipblasOperation_t judge_trans_op(bool is_complex, const char& trans, const char* name) +hipblasOperation_t + judge_trans_op (bool is_complex, const char& trans, const char* name) { if (trans == 'N') - { - return HIPBLAS_OP_N; - } - else if(trans == 'T') - { - return HIPBLAS_OP_T; - } - else if(is_complex && trans == 'C') - { - return HIPBLAS_OP_C; - } + { + return HIPBLAS_OP_N; + } + else if (trans == 'T') + { + return HIPBLAS_OP_T; + } + else if (is_complex && trans == 'C') + { + return HIPBLAS_OP_C; + } else - { - ModuleBase::WARNING_QUIT(name, std::string("Unknown trans type ") + trans + std::string(" !")); - } + { + ModuleBase::WARNING_QUIT (name, std::string ("Unknown trans type ") + trans + std::string (" !")); + } } template <> -void gemv_op::operator()(const char& trans, +void + gemv_op::operator() (const char& trans, const int& m, const int& n, const double* alpha, @@ -187,12 +213,13 @@ void gemv_op::operator()(const char& trans, double* Y, const int& incy) { - hipblasOperation_t cutrans = judge_trans_op(false, trans, "gemv_op"); - hipblasErrcheck(hipblasDgemv(cublas_handle, cutrans, m, n, alpha, A, lda, X, incx, beta, Y, incy)); + hipblasOperation_t cutrans = judge_trans_op (false, trans, "gemv_op"); + hipblasErrcheck (hipblasDgemv (cublas_handle, cutrans, m, n, alpha, A, lda, X, incx, beta, Y, incy)); } template <> -void gemv_op, base_device::DEVICE_GPU>::operator()(const char& trans, +void + gemv_op, base_device::DEVICE_GPU>::operator() (const char& trans, const int& m, const int& n, const std::complex* alpha, @@ -204,12 +231,24 @@ void gemv_op, base_device::DEVICE_GPU>::operator()(const cha std::complex* Y, const int& incy) { - hipblasOperation_t cutrans = judge_trans_op(true, trans, "gemv_op"); - hipblasErrcheck(hipblasCgemv(cublas_handle, cutrans, m, n, (hipblasComplex*)alpha, (hipblasComplex*)A, lda, (hipblasComplex*)X, incx, (hipblasComplex*)beta, (hipblasComplex*)Y, incy)); + hipblasOperation_t cutrans = judge_trans_op (true, trans, "gemv_op"); + hipblasErrcheck (hipblasCgemv (cublas_handle, + cutrans, + m, + n, + (hipblasComplex*)alpha, + (hipblasComplex*)A, + lda, + (hipblasComplex*)X, + incx, + (hipblasComplex*)beta, + (hipblasComplex*)Y, + incy)); } template <> -void gemv_op, base_device::DEVICE_GPU>::operator()(const char& trans, +void + gemv_op, base_device::DEVICE_GPU>::operator() (const char& trans, const int& m, const int& n, const std::complex* alpha, @@ -221,12 +260,24 @@ void gemv_op, base_device::DEVICE_GPU>::operator()(const ch std::complex* Y, const int& incy) { - hipblasOperation_t cutrans = judge_trans_op(true, trans, "gemv_op"); - hipblasErrcheck(hipblasZgemv(cublas_handle, cutrans, m, n, (hipblasDoubleComplex*)alpha, (hipblasDoubleComplex*)A, lda, (hipblasDoubleComplex*)X, incx, (hipblasDoubleComplex*)beta, (hipblasDoubleComplex*)Y, incy)); + hipblasOperation_t cutrans = judge_trans_op (true, trans, "gemv_op"); + hipblasErrcheck (hipblasZgemv (cublas_handle, + cutrans, + m, + n, + (hipblasDoubleComplex*)alpha, + (hipblasDoubleComplex*)A, + lda, + (hipblasDoubleComplex*)X, + incx, + (hipblasDoubleComplex*)beta, + (hipblasDoubleComplex*)Y, + incy)); } template <> -void gemm_op::operator()(const char& transa, +void + gemm_op::operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -240,13 +291,14 @@ void gemm_op::operator()(const char& transa, float* c, const int& ldc) { - hipblasOperation_t cutransA = judge_trans_op(false, transa, "gemm_op"); - hipblasOperation_t cutransB = judge_trans_op(false, transb, "gemm_op"); - hipblasErrcheck(hipblasSgemm(cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + hipblasOperation_t cutransA = judge_trans_op (false, transa, "gemm_op"); + hipblasOperation_t cutransB = judge_trans_op (false, transb, "gemm_op"); + hipblasErrcheck (hipblasSgemm (cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } template <> -void gemm_op::operator()(const char& transa, +void + gemm_op::operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -260,13 +312,14 @@ void gemm_op::operator()(const char& transa, double* c, const int& ldc) { - hipblasOperation_t cutransA = judge_trans_op(false, transa, "gemm_op"); - hipblasOperation_t cutransB = judge_trans_op(false, transb, "gemm_op"); - hipblasErrcheck(hipblasDgemm(cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + hipblasOperation_t cutransA = judge_trans_op (false, transa, "gemm_op"); + hipblasOperation_t cutransB = judge_trans_op (false, transb, "gemm_op"); + hipblasErrcheck (hipblasDgemm (cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } template <> -void gemm_op, base_device::DEVICE_GPU>::operator()(const char& transa, +void + gemm_op, base_device::DEVICE_GPU>::operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -280,13 +333,27 @@ void gemm_op, base_device::DEVICE_GPU>::operator()(const cha std::complex* c, const int& ldc) { - hipblasOperation_t cutransA = judge_trans_op(true, transa, "gemm_op"); - hipblasOperation_t cutransB = judge_trans_op(true, transb, "gemm_op"); - hipblasErrcheck(hipblasCgemm(cublas_handle, cutransA, cutransB, m, n ,k, (hipblasComplex*)alpha, (hipblasComplex*)a , lda, (hipblasComplex*)b, ldb, (hipblasComplex*)beta, (hipblasComplex*)c, ldc)); + hipblasOperation_t cutransA = judge_trans_op (true, transa, "gemm_op"); + hipblasOperation_t cutransB = judge_trans_op (true, transb, "gemm_op"); + hipblasErrcheck (hipblasCgemm (cublas_handle, + cutransA, + cutransB, + m, + n, + k, + (hipblasComplex*)alpha, + (hipblasComplex*)a, + lda, + (hipblasComplex*)b, + ldb, + (hipblasComplex*)beta, + (hipblasComplex*)c, + ldc)); } template <> -void gemm_op, base_device::DEVICE_GPU>::operator()(const char& transa, +void + gemm_op, base_device::DEVICE_GPU>::operator() (const char& transa, const char& transb, const int& m, const int& n, @@ -300,187 +367,340 @@ void gemm_op, base_device::DEVICE_GPU>::operator()(const ch std::complex* c, const int& ldc) { - hipblasOperation_t cutransA = judge_trans_op(true, transa, "gemm_op"); - hipblasOperation_t cutransB = judge_trans_op(true, transb, "gemm_op"); - hipblasErrcheck(hipblasZgemm(cublas_handle, cutransA, cutransB, m, n ,k, (hipblasDoubleComplex*)alpha, (hipblasDoubleComplex*)a , lda, (hipblasDoubleComplex*)b, ldb, (hipblasDoubleComplex*)beta, (hipblasDoubleComplex*)c, ldc)); + hipblasOperation_t cutransA = judge_trans_op (true, transa, "gemm_op"); + hipblasOperation_t cutransB = judge_trans_op (true, transb, "gemm_op"); + hipblasErrcheck (hipblasZgemm (cublas_handle, + cutransA, + cutransB, + m, + n, + k, + (hipblasDoubleComplex*)alpha, + (hipblasDoubleComplex*)a, + lda, + (hipblasDoubleComplex*)b, + ldb, + (hipblasDoubleComplex*)beta, + (hipblasDoubleComplex*)c, + ldc)); } template <> -void matrixTranspose_op::operator()(const int& row, +void + matrixTranspose_op::operator() (const int& row, const int& col, const double* input_matrix, double* output_matrix) { double* device_temp = nullptr; - base_device::memory::resize_memory_op()(device_temp, row * col); + base_device::memory::resize_memory_op () (device_temp, row * col); if (row == col) - { - double ONE = 1.0, ZERO = 0.0; - // use 'geam' API todo transpose. - hipblasErrcheck(hipblasDgeam(cublas_handle, HIPBLAS_OP_T, HIPBLAS_OP_N, col, row, &ONE, input_matrix, col, &ZERO, input_matrix, col, device_temp, col)); - } + { + double ONE = 1.0, ZERO = 0.0; + // use 'geam' API todo transpose. + hipblasErrcheck (hipblasDgeam (cublas_handle, + HIPBLAS_OP_T, + HIPBLAS_OP_N, + col, + row, + &ONE, + input_matrix, + col, + &ZERO, + input_matrix, + col, + device_temp, + col)); + } else - { - int thread = 1024; - int block = (row + col + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_transpose_kernel), dim3(block), dim3(thread), 0, 0, row, col, input_matrix, device_temp); - hipCheckOnDebug(); - } - - base_device::memory::synchronize_memory_op()( + { + int thread = 1024; + int block = (row + col + thread - 1) / thread; + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_transpose_kernel), + dim3 (block), + dim3 (thread), + 0, + 0, + row, + col, + input_matrix, + device_temp); + hipCheckOnDebug (); + } + + base_device::memory::synchronize_memory_op () ( output_matrix, device_temp, row * col); - base_device::memory::delete_memory_op()(device_temp); + base_device::memory::delete_memory_op () (device_temp); } template <> -void matrixTranspose_op, base_device::DEVICE_GPU>::operator()( - const int& row, - const int& col, - const std::complex* input_matrix, - std::complex* output_matrix) +void + matrixTranspose_op, base_device::DEVICE_GPU>::operator() ( + const int& row, + const int& col, + const std::complex* input_matrix, + std::complex* output_matrix) { std::complex* device_temp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_temp, row * col); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU> () (device_temp, row * col); if (row == col) - { - float2 ONE, ZERO; - ONE.x = 1.0; - ONE.y = 0.0; - ZERO.x = ZERO.y = 0.0; - - // use 'geam' API todo transpose. - hipblasErrcheck(hipblasCgeam(cublas_handle, HIPBLAS_OP_T, HIPBLAS_OP_N, col, row, - reinterpret_cast(&ONE), (hipblasComplex*)input_matrix, col, - reinterpret_cast(&ZERO), (hipblasComplex*)input_matrix, col, (hipblasComplex*)device_temp, col)); - } else - { - int thread = 1024; - int block = (row + col + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_transpose_kernel>), dim3(block), dim3(thread), 0, 0, row, col, (thrust::complex*)input_matrix, (thrust::complex*)device_temp); - hipCheckOnDebug(); - } - - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()( - output_matrix, - device_temp, - row * col); + { + float2 ONE, ZERO; + ONE.x = 1.0; + ONE.y = 0.0; + ZERO.x = ZERO.y = 0.0; + + // use 'geam' API todo transpose. + hipblasErrcheck (hipblasCgeam (cublas_handle, + HIPBLAS_OP_T, + HIPBLAS_OP_N, + col, + row, + reinterpret_cast (&ONE), + (hipblasComplex*)input_matrix, + col, + reinterpret_cast (&ZERO), + (hipblasComplex*)input_matrix, + col, + (hipblasComplex*)device_temp, + col)); + } + else + { + int thread = 1024; + int block = (row + col + thread - 1) / thread; + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_transpose_kernel>), + dim3 (block), + dim3 (thread), + 0, + 0, + row, + col, + (thrust::complex*)input_matrix, + (thrust::complex*)device_temp); + hipCheckOnDebug (); + } - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(device_temp); + base_device::memory::synchronize_memory_op, + base_device::DEVICE_GPU, + base_device::DEVICE_GPU> () (output_matrix, device_temp, row * col); + + base_device::memory::delete_memory_op, base_device::DEVICE_GPU> () (device_temp); } template <> -void matrixTranspose_op, base_device::DEVICE_GPU>::operator()( - const int& row, - const int& col, - const std::complex* input_matrix, - std::complex* output_matrix) +void + matrixTranspose_op, base_device::DEVICE_GPU>::operator() ( + const int& row, + const int& col, + const std::complex* input_matrix, + std::complex* output_matrix) { std::complex* device_temp = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_temp, row * col); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU> () (device_temp, row * col); if (row == col) - { - hipblasDoubleComplex ONE{1.0, 0.0}, ZERO{0.0, 0.0}; - // use 'geam' API todo transpose. - hipblasErrcheck(hipblasZgeam(cublas_handle, HIPBLAS_OP_T, HIPBLAS_OP_N, col, row, &ONE, (hipblasDoubleComplex*)input_matrix, col, &ZERO, (hipblasDoubleComplex*)input_matrix, col, (hipblasDoubleComplex*)device_temp, col)); - } else - { - int thread = 1024; - int block = (row + col + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_transpose_kernel>), dim3(block), dim3(thread), 0, 0, row, col, (thrust::complex*)input_matrix, (thrust::complex*)device_temp); - hipCheckOnDebug(); - } + { + hipblasDoubleComplex ONE{1.0, 0.0}, ZERO{0.0, 0.0}; + // use 'geam' API todo transpose. + hipblasErrcheck (hipblasZgeam (cublas_handle, + HIPBLAS_OP_T, + HIPBLAS_OP_N, + col, + row, + &ONE, + (hipblasDoubleComplex*)input_matrix, + col, + &ZERO, + (hipblasDoubleComplex*)input_matrix, + col, + (hipblasDoubleComplex*)device_temp, + col)); + } + else + { + int thread = 1024; + int block = (row + col + thread - 1) / thread; + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_transpose_kernel>), + dim3 (block), + dim3 (thread), + 0, + 0, + row, + col, + (thrust::complex*)input_matrix, + (thrust::complex*)device_temp); + hipCheckOnDebug (); + } base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, - base_device::DEVICE_GPU>()(output_matrix, device_temp, row * col); + base_device::DEVICE_GPU> () (output_matrix, device_temp, row * col); - base_device::memory::delete_memory_op, base_device::DEVICE_GPU>()(device_temp); + base_device::memory::delete_memory_op, base_device::DEVICE_GPU> () (device_temp); } template <> -void matrixCopy::operator()(const int& n1, +void + matrixCopy::operator() (const int& n1, const int& n2, const double* A, const int& LDA, double* B, const int& LDB) { - const dim3 blockSize(16, 16); - const dim3 gridSize((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_copy_kernel), gridSize, blockSize, 0, 0, n1, n2, A, LDA, B, LDB); - hipCheckOnDebug(); + const dim3 blockSize (16, 16); + const dim3 gridSize ((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); + + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_copy_kernel), + gridSize, + blockSize, + 0, + 0, + n1, + n2, + A, + LDA, + B, + LDB); + hipCheckOnDebug (); } template <> -void matrixCopy, base_device::DEVICE_GPU>::operator()(const int& n1, +void + matrixCopy, base_device::DEVICE_GPU>::operator() (const int& n1, const int& n2, const std::complex* A, const int& LDA, std::complex* B, const int& LDB) { - const dim3 blockSize(16, 16); - const dim3 gridSize((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_copy_kernel>), gridSize, blockSize, 0, 0, n1, n2, reinterpret_cast*>(A), LDA, reinterpret_cast*>(B), LDB); - hipCheckOnDebug(); + const dim3 blockSize (16, 16); + const dim3 gridSize ((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); + + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_copy_kernel>), + gridSize, + blockSize, + 0, + 0, + n1, + n2, + reinterpret_cast*> (A), + LDA, + reinterpret_cast*> (B), + LDB); + hipCheckOnDebug (); } template <> -void matrixCopy, base_device::DEVICE_GPU>::operator()(const int& n1, +void + matrixCopy, base_device::DEVICE_GPU>::operator() (const int& n1, const int& n2, const std::complex* A, const int& LDA, std::complex* B, const int& LDB) { - const dim3 blockSize(16, 16); - const dim3 gridSize((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_copy_kernel>), gridSize, blockSize, 0, 0, n1, n2, reinterpret_cast*>(A), LDA, reinterpret_cast*>(B), LDB); - hipCheckOnDebug(); + const dim3 blockSize (16, 16); + const dim3 gridSize ((n1 + blockSize.x - 1) / blockSize.x, (n2 + blockSize.y - 1) / blockSize.y); + + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_copy_kernel>), + gridSize, + blockSize, + 0, + 0, + n1, + n2, + reinterpret_cast*> (A), + LDA, + reinterpret_cast*> (B), + LDB); + hipCheckOnDebug (); } template <> -void matrix_mul_vector_op::operator()(const int &m, const int &n, - double *a, const int &lda, const double *b, const double alpha, double *c, const int &ldc){ - dim3 thread(16, 16, 1); - dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_multiply_vector_kernel), dim3(block, thread), - m, n, a, lda, b, alpha, c, ldc); - hipCheckOnDebug(); +void + matrix_mul_vector_op::operator() (const int& m, + const int& n, + double* a, + const int& lda, + const double* b, + const double alpha, + double* c, + const int& ldc) +{ + dim3 thread (16, 16, 1); + dim3 block ((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_multiply_vector_kernel), + dim3 (block, thread), + m, + n, + a, + lda, + b, + alpha, + c, + ldc); + hipCheckOnDebug (); } template <> -void matrix_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int &m, const int &n, - std::complex *a, const int &lda, const float *b, const float alpha, std::complex *c, const int &ldc){ - dim3 thread(16, 16, 1); - dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_multiply_vector_kernel, float>), dim3(block, thread), - m, n, reinterpret_cast*>(a), lda, - b, alpha, reinterpret_cast*>(c), ldc); - hipCheckOnDebug(); +void + matrix_mul_vector_op, base_device::DEVICE_GPU>::operator() (const int& m, + const int& n, + std::complex* a, + const int& lda, + const float* b, + const float alpha, + std::complex* c, + const int& ldc) +{ + dim3 thread (16, 16, 1); + dim3 block ((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_multiply_vector_kernel, float>), + dim3 (block, thread), + m, + n, + reinterpret_cast*> (a), + lda, + b, + alpha, + reinterpret_cast*> (c), + ldc); + hipCheckOnDebug (); } template <> -void matrix_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int &m, const int &n, - std::complex *a, const int &lda, const double *b, const double alpha, std::complex *c, const int &ldc) -{ - dim3 thread(16, 16, 1); - dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_multiply_vector_kernel, double>), dim3(block, thread), - m, n, reinterpret_cast*>(a), lda, - b, alpha, reinterpret_cast*>(c), ldc); - hipCheckOnDebug(); +void + matrix_mul_vector_op, base_device::DEVICE_GPU>::operator() (const int& m, + const int& n, + std::complex* a, + const int& lda, + const double* b, + const double alpha, + std::complex* c, + const int& ldc) +{ + dim3 thread (16, 16, 1); + dim3 block ((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + hipLaunchKernelGGL (HIP_KERNEL_NAME (matrix_multiply_vector_kernel, double>), + dim3 (block, thread), + m, + n, + reinterpret_cast*> (a), + lda, + b, + alpha, + reinterpret_cast*> (c), + ldc); + hipCheckOnDebug (); } // Explicitly instantiate functors for the types of functor registered. template struct matrixCopy; template struct matrixCopy, base_device::DEVICE_GPU>; template struct matrixCopy, base_device::DEVICE_GPU>; -} // namespace ModuleBase +} // namespace ModuleBase diff --git a/source/source_base/kernels/rocm/math_kernel_op_vec.hip.cu b/source/source_base/kernels/rocm/math_kernel_op_vec.hip.cu index 31c75f8fbd6..59e8b788837 100644 --- a/source/source_base/kernels/rocm/math_kernel_op_vec.hip.cu +++ b/source/source_base/kernels/rocm/math_kernel_op_vec.hip.cu @@ -3,224 +3,236 @@ #include #include template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = float; /**< The return type specialization for std::complex. */ }; template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = double; /**< The return type specialization for std::complex. */ }; namespace ModuleBase { -void xdot_wrapper(const int &n, const float * x, const int &incx, const float * y, const int &incy, float &result); -void xdot_wrapper(const int &n, const double * x, const int &incx, const double * y, const int &incy, double &result); +void xdot_wrapper (const int& n, const float* x, const int& incx, const float* y, const int& incy, float& result); +void xdot_wrapper (const int& n, const double* x, const int& incx, const double* y, const int& incy, double& result); // Define the CUDA kernel: template -__launch_bounds__(1024) __global__ void vector_mul_real_kernel(const int size, - T* result, - const T* vector, - const typename GetTypeReal::type constant) +__launch_bounds__ (1024) __global__ void vector_mul_real_kernel (const int size, + T* result, + const T* vector, + const typename GetTypeReal::type constant) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - result[i] = vector[i] * constant; - } + { + result[i] = vector[i] * constant; + } } template -__launch_bounds__(1024) __global__ void vector_mul_vector_kernel(const int size, - T* result, - const T* vector1, - const typename GetTypeReal::type* vector2, - const bool add) +__launch_bounds__ (1024) __global__ void vector_mul_vector_kernel (const int size, + T* result, + const T* vector1, + const typename GetTypeReal::type* vector2, + const bool add) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - if (add) - { - result[i] += vector1[i] * vector2[i]; - } - else { - result[i] = vector1[i] * vector2[i]; + if (add) + { + result[i] += vector1[i] * vector2[i]; + } + else + { + result[i] = vector1[i] * vector2[i]; + } } - } } template -__launch_bounds__(1024) __global__ void vector_div_constant_kernel(const int size, - T* result, - const T* vector, - const typename GetTypeReal::type constant) +__launch_bounds__ (1024) __global__ void vector_div_constant_kernel (const int size, + T* result, + const T* vector, + const typename GetTypeReal::type constant) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - result[i] = vector[i] / constant; - } + { + result[i] = vector[i] / constant; + } } template -__launch_bounds__(1024) __global__ void vector_div_vector_kernel(const int size, - T* result, - const T* vector1, - const typename GetTypeReal::type* vector2) +__launch_bounds__ (1024) __global__ void vector_div_vector_kernel (const int size, + T* result, + const T* vector1, + const typename GetTypeReal::type* vector2) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - result[i] = vector1[i] / vector2[i]; - } + { + result[i] = vector1[i] / vector2[i]; + } } template -__launch_bounds__(1024) __global__ void constantvector_addORsub_constantVector_kernel(const int size, - T* result, - const T* vector1, - const Real constant1, - const T* vector2, - const Real constant2) +__launch_bounds__ (1024) __global__ void constantvector_addORsub_constantVector_kernel (const int size, + T* result, + const T* vector1, + const Real constant1, + const T* vector2, + const Real constant2) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; - } + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } } // vector operator: result[i] = vector[i] * constant template <> -void vector_mul_real_op::operator()(const int dim, +void + vector_mul_real_op::operator() (const int dim, double* result, const double* vector, const double constant) { int thread = 1024; int block = (dim + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_mul_real_kernel), - dim3(block), - dim3(thread), - 0, - 0, - dim, - result, - vector, - constant); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (vector_mul_real_kernel), + dim3 (block), + dim3 (thread), + 0, + 0, + dim, + result, + vector, + constant); + + hipCheckOnDebug (); } template -inline void vector_mul_real_wrapper(const int dim, - std::complex* result, - const std::complex* vector, - const FPTYPE constant) +inline void + vector_mul_real_wrapper (const int dim, + std::complex* result, + const std::complex* vector, + const FPTYPE constant) { - thrust::complex* result_tmp = reinterpret_cast*>(result); - const thrust::complex* vector_tmp = reinterpret_cast*>(vector); + thrust::complex* result_tmp = reinterpret_cast*> (result); + const thrust::complex* vector_tmp = reinterpret_cast*> (vector); int thread = 1024; int block = (dim + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_mul_real_kernel>), - dim3(block), - dim3(thread), - 0, - 0, - dim, - result_tmp, - vector_tmp, - constant); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (vector_mul_real_kernel>), + dim3 (block), + dim3 (thread), + 0, + 0, + dim, + result_tmp, + vector_tmp, + constant); + + hipCheckOnDebug (); } template <> -void vector_mul_real_op, base_device::DEVICE_GPU>::operator()(const int dim, +void + vector_mul_real_op, base_device::DEVICE_GPU>::operator() (const int dim, std::complex* result, const std::complex* vector, const float constant) { - vector_mul_real_wrapper(dim, result, vector, constant); + vector_mul_real_wrapper (dim, result, vector, constant); - hipCheckOnDebug(); + hipCheckOnDebug (); } template <> -void vector_mul_real_op, base_device::DEVICE_GPU>::operator()(const int dim, +void + vector_mul_real_op, base_device::DEVICE_GPU>::operator() (const int dim, std::complex* result, const std::complex* vector, const double constant) { - vector_mul_real_wrapper(dim, result, vector, constant); + vector_mul_real_wrapper (dim, result, vector, constant); - hipCheckOnDebug(); + hipCheckOnDebug (); } // vector operator: result[i] = vector[i] / constant template <> -void vector_div_constant_op::operator()(const int& dim, - double* result, - const double* vector, - const double constant) +void + vector_div_constant_op::operator() (const int& dim, + double* result, + const double* vector, + const double constant) { int thread = 1024; int block = (dim + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_div_constant_kernel), - dim3(block), - dim3(thread), - 0, - 0, - dim, - result, - vector, - constant); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (vector_div_constant_kernel), + dim3 (block), + dim3 (thread), + 0, + 0, + dim, + result, + vector, + constant); + + hipCheckOnDebug (); } template -inline void vector_div_constant_wrapper(const int& dim, - std::complex* result, - const std::complex* vector, - const FPTYPE constant) +inline void + vector_div_constant_wrapper (const int& dim, + std::complex* result, + const std::complex* vector, + const FPTYPE constant) { - thrust::complex* result_tmp = reinterpret_cast*>(result); - const thrust::complex* vector_tmp = reinterpret_cast*>(vector); + thrust::complex* result_tmp = reinterpret_cast*> (result); + const thrust::complex* vector_tmp = reinterpret_cast*> (vector); int thread = 1024; int block = (dim + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_div_constant_kernel>), - dim3(block), - dim3(thread), - 0, - 0, - dim, - result_tmp, - vector_tmp, - constant); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (vector_div_constant_kernel>), + dim3 (block), + dim3 (thread), + 0, + 0, + dim, + result_tmp, + vector_tmp, + constant); + + hipCheckOnDebug (); } template <> -void vector_div_constant_op, base_device::DEVICE_GPU>::operator()(const int& dim, - std::complex* result, - const std::complex* vector, - const float constant) +void + vector_div_constant_op, base_device::DEVICE_GPU>::operator() (const int& dim, + std::complex* result, + const std::complex* vector, + const float constant) { - vector_div_constant_wrapper(dim, result, vector, constant); + vector_div_constant_wrapper (dim, result, vector, constant); } template <> -void vector_div_constant_op, base_device::DEVICE_GPU>::operator()(const int& dim, - std::complex* result, - const std::complex* vector, - const double constant) +void + vector_div_constant_op, base_device::DEVICE_GPU>::operator() ( + const int& dim, + std::complex* result, + const std::complex* vector, + const double constant) { - vector_div_constant_wrapper(dim, result, vector, constant); + vector_div_constant_wrapper (dim, result, vector, constant); } // vector operator: result[i] = vector1[i](not complex) * vector2[i](not complex) template <> -void vector_mul_vector_op::operator()(const int& dim, +void + vector_mul_vector_op::operator() (const int& dim, double* result, const double* vector1, const double* vector2, @@ -228,163 +240,172 @@ void vector_mul_vector_op::operator()(const int { int thread = 1024; int block = (dim + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_mul_vector_kernel), - dim3(block), - dim3(thread), - 0, - 0, - dim, - result, - vector1, - vector2, - add); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (vector_mul_vector_kernel), + dim3 (block), + dim3 (thread), + 0, + 0, + dim, + result, + vector1, + vector2, + add); + + hipCheckOnDebug (); } // vector operator: result[i] = vector1[i](complex) * vector2[i](not complex) template -inline void vector_mul_vector_complex_wrapper(const int& dim, - std::complex* result, - const std::complex* vector1, - const FPTYPE* vector2, - const bool& add) +inline void + vector_mul_vector_complex_wrapper (const int& dim, + std::complex* result, + const std::complex* vector1, + const FPTYPE* vector2, + const bool& add) { - thrust::complex* result_tmp = reinterpret_cast*>(result); - const thrust::complex* vector1_tmp = reinterpret_cast*>(vector1); + thrust::complex* result_tmp = reinterpret_cast*> (result); + const thrust::complex* vector1_tmp = reinterpret_cast*> (vector1); int thread = 1024; int block = (dim + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_mul_vector_kernel>), - dim3(block), - dim3(thread), - 0, - 0, - dim, - result_tmp, - vector1_tmp, - vector2, - add); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (vector_mul_vector_kernel>), + dim3 (block), + dim3 (thread), + 0, + 0, + dim, + result_tmp, + vector1_tmp, + vector2, + add); + + hipCheckOnDebug (); } template <> -void vector_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int& dim, +void + vector_mul_vector_op, base_device::DEVICE_GPU>::operator() (const int& dim, std::complex* result, const std::complex* vector1, const float* vector2, const bool& add) { - vector_mul_vector_complex_wrapper(dim, result, vector1, vector2, add); + vector_mul_vector_complex_wrapper (dim, result, vector1, vector2, add); } template <> -void vector_mul_vector_op, base_device::DEVICE_GPU>::operator()( - const int& dim, - std::complex* result, - const std::complex* vector1, - const double* vector2, - const bool& add) +void + vector_mul_vector_op, base_device::DEVICE_GPU>::operator() ( + const int& dim, + std::complex* result, + const std::complex* vector1, + const double* vector2, + const bool& add) { - vector_mul_vector_complex_wrapper(dim, result, vector1, vector2, add); + vector_mul_vector_complex_wrapper (dim, result, vector1, vector2, add); } // vector operator: result[i] = vector1[i](complex) / vector2[i](not complex) template <> -void vector_div_vector_op::operator()(const int& dim, +void + vector_div_vector_op::operator() (const int& dim, double* result, const double* vector1, const double* vector2) { int thread = 1024; int block = (dim + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_div_vector_kernel), - dim3(block), - dim3(thread), - 0, - 0, - dim, - result, - vector1, - vector2); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (vector_div_vector_kernel), + dim3 (block), + dim3 (thread), + 0, + 0, + dim, + result, + vector1, + vector2); + + hipCheckOnDebug (); } // vector operator: result[i] = vector1[i](complex) / vector2[i](not complex) template -inline void vector_div_vector_op_complex_wrapper(const int& dim, - std::complex* result, - const std::complex* vector1, - const FPTYPE* vector2) +inline void + vector_div_vector_op_complex_wrapper (const int& dim, + std::complex* result, + const std::complex* vector1, + const FPTYPE* vector2) { - thrust::complex* result_tmp = reinterpret_cast*>(result); - const thrust::complex* vector1_tmp = reinterpret_cast*>(vector1); + thrust::complex* result_tmp = reinterpret_cast*> (result); + const thrust::complex* vector1_tmp = reinterpret_cast*> (vector1); int thread = 1024; int block = (dim + thread - 1) / thread; - hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_div_vector_kernel>), - dim3(block), - dim3(thread), - 0, - 0, - dim, - result_tmp, - vector1_tmp, - vector2); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (vector_div_vector_kernel>), + dim3 (block), + dim3 (thread), + 0, + 0, + dim, + result_tmp, + vector1_tmp, + vector2); + + hipCheckOnDebug (); } template <> -void vector_div_vector_op, base_device::DEVICE_GPU>::operator()(const int& dim, +void + vector_div_vector_op, base_device::DEVICE_GPU>::operator() (const int& dim, std::complex* result, const std::complex* vector1, const float* vector2) { - vector_div_vector_op_complex_wrapper(dim, result, vector1, vector2); + vector_div_vector_op_complex_wrapper (dim, result, vector1, vector2); } template <> -void vector_div_vector_op, base_device::DEVICE_GPU>::operator()( - const int& dim, - std::complex* result, - const std::complex* vector1, - const double* vector2) +void + vector_div_vector_op, base_device::DEVICE_GPU>::operator() ( + const int& dim, + std::complex* result, + const std::complex* vector1, + const double* vector2) { - vector_div_vector_op_complex_wrapper(dim, result, vector1, vector2); + vector_div_vector_op_complex_wrapper (dim, result, vector1, vector2); } // vector operator: result[i] = vector1[i] * constant1 + vector2[i] * constant2 template -void vector_add_vector_op::operator()(const int& dim, - T* result, - const T* vector1, - const Real constant1, - const T* vector2, - const Real constant2) +void + vector_add_vector_op::operator() (const int& dim, + T* result, + const T* vector1, + const Real constant1, + const T* vector2, + const Real constant2) { using Type = typename GetTypeThrust::type; using Real = typename GetTypeReal::type; - auto result_tmp = reinterpret_cast(result); - auto vector1_tmp = reinterpret_cast(vector1); - auto vector2_tmp = reinterpret_cast(vector2); + auto result_tmp = reinterpret_cast (result); + auto vector1_tmp = reinterpret_cast (vector1); + auto vector2_tmp = reinterpret_cast (vector2); int thread = 1024; int block = (dim + thread - 1) / thread; constantvector_addORsub_constantVector_kernel - <<>>(dim, result_tmp, vector1_tmp, constant1, vector2_tmp, constant2); + <<>> (dim, result_tmp, vector1_tmp, constant1, vector2_tmp, constant2); - hipCheckOnDebug(); + hipCheckOnDebug (); } template <> -double dot_real_op::operator()(const int& dim, - const double* psi_L, - const double* psi_R, - const bool reduce) +double + dot_real_op::operator() (const int& dim, + const double* psi_L, + const double* psi_R, + const bool reduce) { double result = 0.0; - xdot_wrapper(dim, psi_L, 1, psi_R, 1, result); + xdot_wrapper (dim, psi_L, 1, psi_R, 1, result); if (reduce) - { - Parallel_Reduce::reduce_pool(result); - } + { + Parallel_Reduce::reduce_pool (result); + } return result; } @@ -393,39 +414,42 @@ double dot_real_op::operator()(const int& dim, // 2022-10-03 Note that ddot_(2*dim,a,1, b,1) = REAL( zdotc_(dim,a,1,b,1) ) GPU specialization of actual // computation. template -inline FPTYPE dot_complex_wrapper(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce) +inline FPTYPE + dot_complex_wrapper (const int& dim, + const std::complex* psi_L, + const std::complex* psi_R, + const bool reduce) { //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // denghui modify 2022-10-07 // Note that ddot_(2*dim,a,1,b,1) = REAL( zdotc_(dim,a,1,b,1) ) - const FPTYPE* pL = reinterpret_cast(psi_L); - const FPTYPE* pR = reinterpret_cast(psi_R); + const FPTYPE* pL = reinterpret_cast (psi_L); + const FPTYPE* pR = reinterpret_cast (psi_R); FPTYPE result = 0.0; - xdot_wrapper(dim * 2, pL, 1, pR, 1, result); + xdot_wrapper (dim * 2, pL, 1, pR, 1, result); if (reduce) - { - Parallel_Reduce::reduce_pool(result); - } + { + Parallel_Reduce::reduce_pool (result); + } return result; } template <> -float dot_real_op, base_device::DEVICE_GPU>::operator()(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce) +float + dot_real_op, base_device::DEVICE_GPU>::operator() (const int& dim, + const std::complex* psi_L, + const std::complex* psi_R, + const bool reduce) { - return dot_complex_wrapper(dim, psi_L, psi_R, reduce); + return dot_complex_wrapper (dim, psi_L, psi_R, reduce); } template <> -double dot_real_op, base_device::DEVICE_GPU>::operator()(const int& dim, - const std::complex* psi_L, - const std::complex* psi_R, - const bool reduce) +double + dot_real_op, base_device::DEVICE_GPU>::operator() (const int& dim, + const std::complex* psi_L, + const std::complex* psi_R, + const bool reduce) { - return dot_complex_wrapper(dim, psi_L, psi_R, reduce); + return dot_complex_wrapper (dim, psi_L, psi_R, reduce); } // Explicitly instantiate functors for the types of functor registered. diff --git a/source/source_base/kernels/rocm/math_ylm_op.hip.cu b/source/source_base/kernels/rocm/math_ylm_op.hip.cu index db64d9c9de3..985b9dde8bb 100644 --- a/source/source_base/kernels/rocm/math_ylm_op.hip.cu +++ b/source/source_base/kernels/rocm/math_ylm_op.hip.cu @@ -3,127 +3,142 @@ #include #include -namespace ModuleBase { +namespace ModuleBase +{ #define THREADS_PER_BLOCK 256 template -__device__ __inline__ -FPTYPE __fact(const int n) { +__device__ __inline__ FPTYPE + __fact (const int n) +{ FPTYPE f = 1.0; - for (int i = n; i > 1; i--) { - f *= i; - } + for (int i = n; i > 1; i--) + { + f *= i; + } return f; } -__device__ __inline__ -int __semi_fact(const int n) +__device__ __inline__ int + __semi_fact (const int n) { int semif = 1; for (int i = n; i > 2; i -= 2) - { - semif *= i; - } + { + semif *= i; + } return semif; } template -__global__ void cal_ylm_real( - const int ng, - const int lmax, - const FPTYPE SQRT2, - const FPTYPE PI, - const FPTYPE PI_HALF, - const FPTYPE FOUR_PI, - const FPTYPE SQRT_INVERSE_FOUR_PI, - const FPTYPE *g, - FPTYPE * p, - FPTYPE * ylm) +__global__ void + cal_ylm_real (const int ng, + const int lmax, + const FPTYPE SQRT2, + const FPTYPE PI, + const FPTYPE PI_HALF, + const FPTYPE FOUR_PI, + const FPTYPE SQRT_INVERSE_FOUR_PI, + const FPTYPE* g, + FPTYPE* p, + FPTYPE* ylm) { int ig = blockIdx.x * blockDim.x + threadIdx.x; - if (ig >= ng) {return;} + if (ig >= ng) + { + return; + } FPTYPE cost = 0.0, phi = 0.0; //---------------------------------------------------------- // EXPLAIN : if lmax = 1,only use Y00 , output result. //---------------------------------------------------------- - if (lmax == 0) { - ylm[0 * ng + ig] = SQRT_INVERSE_FOUR_PI; - return; - } + if (lmax == 0) + { + ylm[0 * ng + ig] = SQRT_INVERSE_FOUR_PI; + return; + } //---------------------------------------------------------- // LOCAL VARIABLES : // NAME : cost = cos(theta),theta and phi are polar angles // NAME : phi //---------------------------------------------------------- - const FPTYPE gmod = sqrt(g[ig * 3 + 0] * g[ig * 3 + 0] + g[ig * 3 + 1] * g[ig * 3 + 1] + g[ig * 3 + 2] * g[ig * 3 + 2]); + const FPTYPE gmod + = sqrt (g[ig * 3 + 0] * g[ig * 3 + 0] + g[ig * 3 + 1] * g[ig * 3 + 1] + g[ig * 3 + 2] * g[ig * 3 + 2]); cost = gmod < 1.0e-9 ? 0.0 : g[ig * 3 + 2] / gmod; // beware the arc tan, it is defined modulo pi - if (g[ig * 3 + 0] > 1.0e-9) { - phi = atan(g[ig * 3 + 1] / g[ig * 3 + 0]); - } - else if (g[ig * 3 + 0] < -1.e-9) { - phi = atan(g[ig * 3 + 1] / g[ig * 3 + 0]) + PI; - } - else { - phi = PI_HALF * ((g[ig * 3 + 1] >= 0.0) ? 1.0 : -1.0); //HLX: modified on 10/13/2006 - } // end if + if (g[ig * 3 + 0] > 1.0e-9) + { + phi = atan (g[ig * 3 + 1] / g[ig * 3 + 0]); + } + else if (g[ig * 3 + 0] < -1.e-9) + { + phi = atan (g[ig * 3 + 1] / g[ig * 3 + 0]) + PI; + } + else + { + phi = PI_HALF * ((g[ig * 3 + 1] >= 0.0) ? 1.0 : -1.0); // HLX: modified on 10/13/2006 + } // end if //========================================================== // NAME : p(Legendre Polynomials) (0 <= m <= l) //========================================================== int lm = -1; - for (int l = 0; l <= lmax; l++) { - const FPTYPE c = sqrt((2 * l + 1) / FOUR_PI); - if (l == 0) { - p[0 * (lmax + 1) * ng + 0 * ng + ig] = 1.0; - } - else if (l == 1) { - p[0 * (lmax + 1) * ng + 1 * ng + ig] = cost; - FPTYPE var = (1.0 - cost * cost) > 0.0 ? (1.0 - cost * cost) : 0.0; - p[1 * (lmax + 1) * ng + 1 * ng + ig] = -sqrt(var); - } - else { - const int l1 = l - 1, - l2 = l - 2, - l3 = 2 * l - 1; - // recursion on l for P(:,l,m) - for (int m = 0; m <= l2; m++) { // do m = 0, l - 2//mohan modify 2007-10-13 - p[m * (lmax + 1) * ng + l * ng + ig] = - (cost * l3 * p[m * (lmax + 1) * ng + l1 * ng + ig] - - (l1 + m) * p[m * (lmax + 1) * ng + l2 * ng + ig]) / (l - m); - } // end do - p[l1 * (lmax + 1) * ng + l * ng + ig] = - cost * l3 * p[l1 * (lmax + 1) * ng + l1 * ng + ig]; - FPTYPE x2 = (1.0 - cost * cost) > 0.0 ? (1.0 - cost * cost) : 0.0; - p[l * (lmax + 1) * ng + l * ng + ig] = __semi_fact(l3) * pow(x2, static_cast(l) / 2.0);//mohan modify 2007-10-13 - if (l % 2 == 1) { - p[l * (lmax + 1) * ng + l * ng + ig] *= -1; - } - } // end if + for (int l = 0; l <= lmax; l++) + { + const FPTYPE c = sqrt ((2 * l + 1) / FOUR_PI); + if (l == 0) + { + p[0 * (lmax + 1) * ng + 0 * ng + ig] = 1.0; + } + else if (l == 1) + { + p[0 * (lmax + 1) * ng + 1 * ng + ig] = cost; + FPTYPE var = (1.0 - cost * cost) > 0.0 ? (1.0 - cost * cost) : 0.0; + p[1 * (lmax + 1) * ng + 1 * ng + ig] = -sqrt (var); + } + else + { + const int l1 = l - 1, l2 = l - 2, l3 = 2 * l - 1; + // recursion on l for P(:,l,m) + for (int m = 0; m <= l2; m++) + { // do m = 0, l - 2//mohan modify 2007-10-13 + p[m * (lmax + 1) * ng + l * ng + ig] = (cost * l3 * p[m * (lmax + 1) * ng + l1 * ng + ig] + - (l1 + m) * p[m * (lmax + 1) * ng + l2 * ng + ig]) + / (l - m); + } // end do + p[l1 * (lmax + 1) * ng + l * ng + ig] = cost * l3 * p[l1 * (lmax + 1) * ng + l1 * ng + ig]; + FPTYPE x2 = (1.0 - cost * cost) > 0.0 ? (1.0 - cost * cost) : 0.0; + p[l * (lmax + 1) * ng + l * ng + ig] + = __semi_fact (l3) * pow (x2, static_cast (l) / 2.0); // mohan modify 2007-10-13 + if (l % 2 == 1) + { + p[l * (lmax + 1) * ng + l * ng + ig] *= -1; + } + } // end if - // Y_lm, m = 0 - ++lm; - ylm[lm * ng + ig] = c * p[0 * (lmax + 1) * ng + l * ng + ig]; + // Y_lm, m = 0 + ++lm; + ylm[lm * ng + ig] = c * p[0 * (lmax + 1) * ng + l * ng + ig]; - for (int m = 1; m <= l; m++) { - // Y_lm, m > 0 - const FPTYPE same = - c * sqrt(__fact(l - m) / - __fact(l + m)) * SQRT2; + for (int m = 1; m <= l; m++) + { + // Y_lm, m > 0 + const FPTYPE same = c * sqrt (__fact (l - m) / __fact (l + m)) * SQRT2; - ++lm; - ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * cos(m * phi); + ++lm; + ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * cos (m * phi); - // Y_lm, m < 0 - ++lm; - ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * sin(m * phi); - } - }// end do + // Y_lm, m < 0 + ++lm; + ylm[lm * ng + ig] = same * p[m * (lmax + 1) * ng + l * ng + ig] * sin (m * phi); + } + } // end do } template -void cal_ylm_real_op::operator()(const base_device::DEVICE_GPU* ctx, +void + cal_ylm_real_op::operator() (const base_device::DEVICE_GPU* ctx, const int& ng, const int& lmax, const FPTYPE& SQRT2, @@ -136,22 +151,26 @@ void cal_ylm_real_op::operator()(const base_dev FPTYPE* ylm) { int block = (ng + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(HIP_KERNEL_NAME(cal_ylm_real), dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, - ng, - lmax, - SQRT2, - PI, - PI_HALF, - FOUR_PI, - SQRT_INVERSE_FOUR_PI, - g, - p, - ylm); + hipLaunchKernelGGL (HIP_KERNEL_NAME (cal_ylm_real), + dim3 (block), + dim3 (THREADS_PER_BLOCK), + 0, + 0, + ng, + lmax, + SQRT2, + PI, + PI_HALF, + FOUR_PI, + SQRT_INVERSE_FOUR_PI, + g, + p, + ylm); - hipCheckOnDebug(); + hipCheckOnDebug (); } template struct cal_ylm_real_op; template struct cal_ylm_real_op; -} // namespace ModuleBase +} // namespace ModuleBase diff --git a/source/source_base/kernels/test/math_kernel_test.cpp b/source/source_base/kernels/test/math_kernel_test.cpp index a756923f989..40453307bc2 100644 --- a/source/source_base/kernels/test/math_kernel_test.cpp +++ b/source/source_base/kernels/test/math_kernel_test.cpp @@ -12,42 +12,44 @@ class TestModuleHsolverMathKernel : public ::testing::Test protected: // xx = tf.random.uniform([100], minval=-4, maxval=4, dtype = tf.float64) std::vector> psi_L = {{-0.65412617, -0.74208893}, - {-2.21731157, 0.42540039}, - {3.36373004, -2.51647562}, - {-2.985111, -0.53251562}, - {0.37908265, 0.81605825}, - {1.66281318, 2.71761869}, - {2.2010268, 0.65498149}, - {1.51153638, 0.71501482}, - {0.53546578, 1.4564317}, - {-2.36701143, 1.23009056}, - {3.41302551, -2.3175205}, - {-0.27628221, -1.35701656}}; + {-2.21731157, 0.42540039}, + {3.36373004, -2.51647562}, + {-2.985111, -0.53251562}, + {0.37908265, 0.81605825}, + {1.66281318, 2.71761869}, + {2.2010268, 0.65498149}, + {1.51153638, 0.71501482}, + {0.53546578, 1.4564317}, + {-2.36701143, 1.23009056}, + {3.41302551, -2.3175205}, + {-0.27628221, -1.35701656}}; std::vector> psi_R = {{-1.67837557e-01, -1.70017454e-01}, - {-2.92128115e-02, 2.82765887e-01}, - {-8.71641062e-02, -1.15934278e-01}, - {3.36269232e+00, -1.44692661e-02}, - {-3.81342874e-03, -1.58276988e-01}, - {2.33504238e-01, -1.93195840e-03}, - {2.45520665e-01, 6.46854620e-01}, - {1.58255340e+00, 2.70915699e+00}, - {-1.66142311e-01, 6.27839507e-02}, - {2.17077193e+00, 4.87104731e-01}, - {1.41257916e+00, 5.45282609e-01}, - {-1.29333636e-01, -5.04228492e-03}}; - - const int dim = psi_L.size(); + {-2.92128115e-02, 2.82765887e-01}, + {-8.71641062e-02, -1.15934278e-01}, + {3.36269232e+00, -1.44692661e-02}, + {-3.81342874e-03, -1.58276988e-01}, + {2.33504238e-01, -1.93195840e-03}, + {2.45520665e-01, 6.46854620e-01}, + {1.58255340e+00, 2.70915699e+00}, + {-1.66142311e-01, 6.27839507e-02}, + {2.17077193e+00, 4.87104731e-01}, + {1.41257916e+00, 5.45282609e-01}, + {-1.29333636e-01, -5.04228492e-03}}; + + const int dim = psi_L.size (); const double expected_result = -5.0016151713691288; const base_device::DEVICE_CPU* cpu_ctx = {}; const base_device::DEVICE_GPU* gpu_ctx = {}; - void SetUp() override + void + SetUp () override { } - void TearDown() override + void + TearDown () override { } @@ -75,8 +77,7 @@ class TestModuleHsolverMathKernel : public ::testing::Test using vector_mul_real_op_cpu = ModuleBase::vector_mul_real_op, base_device::DEVICE_CPU>; using vector_mul_vector_op_cpu = ModuleBase::vector_mul_vector_op, base_device::DEVICE_CPU>; using vector_div_vector_op_cpu = ModuleBase::vector_div_vector_op, base_device::DEVICE_CPU>; - using vector_add_vector_op_cpu - = ModuleBase::vector_add_vector_op, base_device::DEVICE_CPU>; + using vector_add_vector_op_cpu = ModuleBase::vector_add_vector_op, base_device::DEVICE_CPU>; using axpy_op_cpu = ModuleBase::axpy_op, base_device::DEVICE_CPU>; using scal_op_cpu = ModuleBase::scal_op; using gemv_op_cpu = ModuleBase::gemv_op, base_device::DEVICE_CPU>; @@ -84,54 +85,53 @@ class TestModuleHsolverMathKernel : public ::testing::Test using vector_mul_real_op_gpu = ModuleBase::vector_mul_real_op, base_device::DEVICE_GPU>; using vector_mul_vector_op_gpu = ModuleBase::vector_mul_vector_op, base_device::DEVICE_GPU>; using vector_div_vector_op_gpu = ModuleBase::vector_div_vector_op, base_device::DEVICE_GPU>; - using vector_add_vector_op_gpu - = ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU>; + using vector_add_vector_op_gpu = ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU>; using axpy_op_gpu = ModuleBase::axpy_op, base_device::DEVICE_GPU>; using scal_op_gpu = ModuleBase::scal_op; using gemv_op_gpu = ModuleBase::gemv_op, base_device::DEVICE_GPU>; // haozhihan add std::vector> L = {{-0.65412617, -0.74208893}, - {-2.21731157, 0.42540039}, - {3.36373004, -2.51647562}, - {-2.985111, -0.53251562}, - {0.37908265, 0.81605825}, - {1.66281318, 2.71761869}, - {2.2010268, 0.65498149}, - {1.51153638, 0.71501482}, - {0.53546578, 1.4564317}, - {-2.36701143, 1.23009056}, - {3.41302551, -2.3175205}, - {-0.27628221, -1.35701656}}; + {-2.21731157, 0.42540039}, + {3.36373004, -2.51647562}, + {-2.985111, -0.53251562}, + {0.37908265, 0.81605825}, + {1.66281318, 2.71761869}, + {2.2010268, 0.65498149}, + {1.51153638, 0.71501482}, + {0.53546578, 1.4564317}, + {-2.36701143, 1.23009056}, + {3.41302551, -2.3175205}, + {-0.27628221, -1.35701656}}; std::vector> R = {{-1.67837557e-01, -1.70017454e-01}, - {-2.92128115e-02, 2.82765887e-01}, - {-8.71641062e-02, -1.15934278e-01}, - {3.36269232e+00, -1.44692661e-02}, - {-3.81342874e-03, -1.58276988e-01}, - {2.33504238e-01, -1.93195840e-03}, - {2.45520665e-01, 6.46854620e-01}, - {1.58255340e+00, 2.70915699e+00}, - {-1.66142311e-01, 6.27839507e-02}, - {2.17077193e+00, 4.87104731e-01}, - {1.41257916e+00, 5.45282609e-01}, - {-1.29333636e-01, -5.04228492e-03}}; + {-2.92128115e-02, 2.82765887e-01}, + {-8.71641062e-02, -1.15934278e-01}, + {3.36269232e+00, -1.44692661e-02}, + {-3.81342874e-03, -1.58276988e-01}, + {2.33504238e-01, -1.93195840e-03}, + {2.45520665e-01, 6.46854620e-01}, + {1.58255340e+00, 2.70915699e+00}, + {-1.66142311e-01, 6.27839507e-02}, + {2.17077193e+00, 4.87104731e-01}, + {1.41257916e+00, 5.45282609e-01}, + {-1.29333636e-01, -5.04228492e-03}}; // (1) for test vector_mul_real_op const std::vector> input = L; const double constant = 5.5; const std::vector> output_vector_mul_real_op = {{-0.11893203, -0.13492526}, - {-0.40314756, 0.07734553}, - {0.61158728, -0.45754102}, - {-0.54274745, -0.09682102}, - {0.06892412, 0.14837423}, - {0.30232967, 0.49411249}, - {0.40018669, 0.11908754}, - {0.27482480, 0.13000269}, - {0.09735741, 0.26480576}, - {-0.43036571, 0.22365283}, - {0.62055009, -0.42136736}, - {-0.05023313, -0.24673028}}; + {-0.40314756, 0.07734553}, + {0.61158728, -0.45754102}, + {-0.54274745, -0.09682102}, + {0.06892412, 0.14837423}, + {0.30232967, 0.49411249}, + {0.40018669, 0.11908754}, + {0.27482480, 0.13000269}, + {0.09735741, 0.26480576}, + {-0.43036571, 0.22365283}, + {0.62055009, -0.42136736}, + {-0.05023313, -0.24673028}}; // (2) for test vector_mul_vector_op & vector_div_vector_op const std::vector input_double = { @@ -179,19 +179,18 @@ class TestModuleHsolverMathKernel : public ::testing::Test const double constant2 = 4.4; const std::vector> input1 = L; const std::vector> input2 = R; - const std::vector> output_vector_add_vector_op - = {{-5.05571797, -5.64586374}, - {-14.76279273, 4.05181248}, - {21.81709620, -17.11884992}, - {-4.90588639, -3.57826786}, - {2.48516640, 4.68956570}, - {12.00198564, 17.92778274}, - {15.60706781, 7.16903816}, - {16.93937507, 16.63938857}, - {2.80304798, 9.88869860}, - {-6.07087895, 10.26185851}, - {28.74131667, -12.89639182}, - {-2.39253058, -8.97849535}}; + const std::vector> output_vector_add_vector_op = {{-5.05571797, -5.64586374}, + {-14.76279273, 4.05181248}, + {21.81709620, -17.11884992}, + {-4.90588639, -3.57826786}, + {2.48516640, 4.68956570}, + {12.00198564, 17.92778274}, + {15.60706781, 7.16903816}, + {16.93937507, 16.63938857}, + {2.80304798, 9.88869860}, + {-6.07087895, 10.26185851}, + {28.74131667, -12.89639182}, + {-2.39253058, -8.97849535}}; // (4) for test axpy_op (compute Y = alpha * X + Y ) const std::complex alpha_axpy{-1.5, -2.5}; @@ -255,161 +254,156 @@ class TestModuleHsolverMathKernel : public ::testing::Test // template // FPTYPE zdot_real(const int &dim, const std::complex* psi_L, const std::complex* psi_R, const // base_device::AbacusDevice_t device = base_device::CpuDevice, const bool reduce = true); -TEST_F(TestModuleHsolverMathKernel, zdot_real_op_cpu) +TEST_F (TestModuleHsolverMathKernel, zdot_real_op_cpu) { - double result = zdot_real_cpu_op()(dim, psi_L.data(), psi_R.data(), false); - EXPECT_LT(fabs(result - expected_result), 1e-12); + double result = zdot_real_cpu_op () (dim, psi_L.data (), psi_R.data (), false); + EXPECT_LT (fabs (result - expected_result), 1e-12); } -TEST_F(TestModuleHsolverMathKernel, vector_mul_real_op_cpu) +TEST_F (TestModuleHsolverMathKernel, vector_mul_real_op_cpu) { - std::vector> output(input.size()); - vector_mul_real_op_cpu()(dim, output.data(), input.data(), 1.0 / constant); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(output[i].imag() - output_vector_mul_real_op[i].imag()), 1e-8); - EXPECT_LT(fabs(output[i].real() - output_vector_mul_real_op[i].real()), 1e-8); - } + std::vector> output (input.size ()); + vector_mul_real_op_cpu () (dim, output.data (), input.data (), 1.0 / constant); + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (output[i].imag () - output_vector_mul_real_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (output[i].real () - output_vector_mul_real_op[i].real ()), 1e-8); + } } -TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_cpu) +TEST_F (TestModuleHsolverMathKernel, vector_mul_vector_op_cpu) { - std::vector> output(input.size()); - vector_mul_vector_op_cpu()(dim, output.data(), input.data(), input_double.data()); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(output[i].imag() - output_vector_mul_vector_op[i].imag()), 1e-8); - EXPECT_LT(fabs(output[i].real() - output_vector_mul_vector_op[i].real()), 1e-8); - } + std::vector> output (input.size ()); + vector_mul_vector_op_cpu () (dim, output.data (), input.data (), input_double.data ()); + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (output[i].imag () - output_vector_mul_vector_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (output[i].real () - output_vector_mul_vector_op[i].real ()), 1e-8); + } } -TEST_F(TestModuleHsolverMathKernel, vector_div_vector_op_cpu) +TEST_F (TestModuleHsolverMathKernel, vector_div_vector_op_cpu) { - std::vector> output(input.size()); - vector_div_vector_op_cpu()(dim, output.data(), input.data(), input_double.data()); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(output[i].imag() - output_vector_div_vector_op[i].imag()), 1e-8); - EXPECT_LT(fabs(output[i].real() - output_vector_div_vector_op[i].real()), 1e-8); - } + std::vector> output (input.size ()); + vector_div_vector_op_cpu () (dim, output.data (), input.data (), input_double.data ()); + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (output[i].imag () - output_vector_div_vector_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (output[i].real () - output_vector_div_vector_op[i].real ()), 1e-8); + } } -TEST_F(TestModuleHsolverMathKernel, vector_add_vector_op_cpu) +TEST_F (TestModuleHsolverMathKernel, vector_add_vector_op_cpu) { - std::vector> output(input.size()); - vector_add_vector_op_cpu()(dim, - output.data(), - input1.data(), - constant1, - input2.data(), - constant2); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(output[i].imag() - output_vector_add_vector_op[i].imag()), 1e-8); - EXPECT_LT(fabs(output[i].real() - output_vector_add_vector_op[i].real()), 1e-8); - } + std::vector> output (input.size ()); + vector_add_vector_op_cpu () (dim, output.data (), input1.data (), constant1, input2.data (), constant2); + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (output[i].imag () - output_vector_add_vector_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (output[i].real () - output_vector_add_vector_op[i].real ()), 1e-8); + } } -TEST_F(TestModuleHsolverMathKernel, axpy_op_cpu) +TEST_F (TestModuleHsolverMathKernel, axpy_op_cpu) { - axpy_op_cpu()(dim, &alpha_axpy, X_axpy.data(), 1, Y_axpy.data(), 1); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(Y_axpy[i].imag() - output_axpy_op[i].imag()), 1e-8); - EXPECT_LT(fabs(Y_axpy[i].real() - output_axpy_op[i].real()), 1e-8); - } + axpy_op_cpu () (dim, &alpha_axpy, X_axpy.data (), 1, Y_axpy.data (), 1); + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (Y_axpy[i].imag () - output_axpy_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (Y_axpy[i].real () - output_axpy_op[i].real ()), 1e-8); + } } -TEST_F(TestModuleHsolverMathKernel, scal_op_cpu) +TEST_F (TestModuleHsolverMathKernel, scal_op_cpu) { - scal_op_cpu()(dim, &alpha_scal, X_scal.data(), 1); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(X_scal[i].imag() - output_scal_op[i].imag()), 1e-8); - EXPECT_LT(fabs(X_scal[i].real() - output_scal_op[i].real()), 1e-8); - } + scal_op_cpu () (dim, &alpha_scal, X_scal.data (), 1); + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (X_scal[i].imag () - output_scal_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (X_scal[i].real () - output_scal_op[i].real ()), 1e-8); + } } -TEST_F(TestModuleHsolverMathKernel, gemv_op_cpu) +TEST_F (TestModuleHsolverMathKernel, gemv_op_cpu) { - gemv_op_cpu()('C', - 2, - 3, - &ModuleBase::ONE, - A_gemv.data(), - 2, - X_gemv.data(), - 1, - &ModuleBase::ONE, - Y_gemv.data(), - 1); + gemv_op_cpu () ('C', + 2, + 3, + &ModuleBase::ONE, + A_gemv.data (), + 2, + X_gemv.data (), + 1, + &ModuleBase::ONE, + Y_gemv.data (), + 1); char trans = 'C'; int inc = 1; int row = 2; int col = 3; - zgemv_(&trans, - &row, - &col, - &ModuleBase::ONE, - A_gemv.data(), - &row, - X_gemv.data(), - &inc, - &ModuleBase::ONE, - Y_test_gemv.data(), - &inc); - for (int i = 0; i < Y_gemv.size(); i++) - { - EXPECT_LT(fabs(Y_gemv[i].imag() - Y_test_gemv[i].imag()), 1e-12); - EXPECT_LT(fabs(Y_gemv[i].real() - Y_test_gemv[i].real()), 1e-12); - } + zgemv_ (&trans, + &row, + &col, + &ModuleBase::ONE, + A_gemv.data (), + &row, + X_gemv.data (), + &inc, + &ModuleBase::ONE, + Y_test_gemv.data (), + &inc); + for (int i = 0; i < Y_gemv.size (); i++) + { + EXPECT_LT (fabs (Y_gemv[i].imag () - Y_test_gemv[i].imag ()), 1e-12); + EXPECT_LT (fabs (Y_gemv[i].real () - Y_test_gemv[i].real ()), 1e-12); + } } #if __UT_USE_CUDA || __UT_USE_ROCM -TEST_F(TestModuleHsolverMathKernel, zdot_real_op_gpu) +TEST_F (TestModuleHsolverMathKernel, zdot_real_op_gpu) { std::complex*psi_L_dev = NULL, *psi_R_dev = NULL; - resize_memory_op()(psi_L_dev, psi_L.size()); - resize_memory_op()(psi_R_dev, psi_R.size()); - synchronize_memory_op()(psi_L_dev, psi_L.data(), psi_L.size()); - synchronize_memory_op()(psi_R_dev, psi_R.data(), psi_R.size()); - ModuleBase::createGpuBlasHandle(); - double result = zdot_real_gpu_op()(dim, psi_L_dev, psi_R_dev, false); - ModuleBase::destoryBLAShandle(); - EXPECT_LT(fabs(result - expected_result), 1e-12); - delete_memory_op()(psi_L_dev); - delete_memory_op()(psi_R_dev); + resize_memory_op () (psi_L_dev, psi_L.size ()); + resize_memory_op () (psi_R_dev, psi_R.size ()); + synchronize_memory_op () (psi_L_dev, psi_L.data (), psi_L.size ()); + synchronize_memory_op () (psi_R_dev, psi_R.data (), psi_R.size ()); + ModuleBase::createGpuBlasHandle (); + double result = zdot_real_gpu_op () (dim, psi_L_dev, psi_R_dev, false); + ModuleBase::destoryBLAShandle (); + EXPECT_LT (fabs (result - expected_result), 1e-12); + delete_memory_op () (psi_L_dev); + delete_memory_op () (psi_R_dev); } -TEST_F(TestModuleHsolverMathKernel, vector_mul_real_op_gpu) +TEST_F (TestModuleHsolverMathKernel, vector_mul_real_op_gpu) { // in CPU - std::vector> output(input.size()); + std::vector> output (input.size ()); // in GPU std::complex* input_dev = NULL; std::complex* output_dev = NULL; - resize_memory_op()(input_dev, input.size()); - resize_memory_op()(output_dev, input.size()); + resize_memory_op () (input_dev, input.size ()); + resize_memory_op () (output_dev, input.size ()); // syn the input data in CPU to GPU - synchronize_memory_op()(input_dev, input.data(), input.size()); + synchronize_memory_op () (input_dev, input.data (), input.size ()); // run - vector_mul_real_op_gpu()(dim, output_dev, input_dev, 1.0 / constant); + vector_mul_real_op_gpu () (dim, output_dev, input_dev, 1.0 / constant); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(output.data(), output_dev, output.size()); - - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(output[i].imag() - output_vector_mul_real_op[i].imag()), 1e-8); - EXPECT_LT(fabs(output[i].real() - output_vector_mul_real_op[i].real()), 1e-8); - } - delete_memory_op()(input_dev); - delete_memory_op()(output_dev); + synchronize_memory_op_gpu () (output.data (), output_dev, output.size ()); + + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (output[i].imag () - output_vector_mul_real_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (output[i].real () - output_vector_mul_real_op[i].real ()), 1e-8); + } + delete_memory_op () (input_dev); + delete_memory_op () (output_dev); } -TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_gpu) +TEST_F (TestModuleHsolverMathKernel, vector_mul_vector_op_gpu) { // in CPU - std::vector> output(input.size()); + std::vector> output (input.size ()); // in GPU std::complex* input_dev = NULL; @@ -417,35 +411,35 @@ TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_gpu) std::complex* output_dev = NULL; // resize memory for values - resize_memory_op()(input_dev, input.size()); - resize_memory_op_double()(input_double_dev, input.size()); - resize_memory_op()(output_dev, input.size()); + resize_memory_op () (input_dev, input.size ()); + resize_memory_op_double () (input_double_dev, input.size ()); + resize_memory_op () (output_dev, input.size ()); // syn the input data in CPU to GPU - synchronize_memory_op()(input_dev, input.data(), input.size()); - synchronize_memory_op_double()(input_double_dev, input_double.data(), input.size()); + synchronize_memory_op () (input_dev, input.data (), input.size ()); + synchronize_memory_op_double () (input_double_dev, input_double.data (), input.size ()); // run - vector_mul_vector_op_gpu()(dim, output_dev, input_dev, input_double_dev); + vector_mul_vector_op_gpu () (dim, output_dev, input_dev, input_double_dev); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(output.data(), output_dev, output.size()); + synchronize_memory_op_gpu () (output.data (), output_dev, output.size ()); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(output[i].imag() - output_vector_mul_vector_op[i].imag()), 1e-8); - EXPECT_LT(fabs(output[i].real() - output_vector_mul_vector_op[i].real()), 1e-8); - } + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (output[i].imag () - output_vector_mul_vector_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (output[i].real () - output_vector_mul_vector_op[i].real ()), 1e-8); + } - delete_memory_op()(input_dev); - delete_memory_op_double()(input_double_dev); - delete_memory_op()(output_dev); + delete_memory_op () (input_dev); + delete_memory_op_double () (input_double_dev); + delete_memory_op () (output_dev); } -TEST_F(TestModuleHsolverMathKernel, vector_div_vector_op_gpu) +TEST_F (TestModuleHsolverMathKernel, vector_div_vector_op_gpu) { // in CPU - std::vector> output(input.size()); + std::vector> output (input.size ()); // in GPU std::complex* input_dev = NULL; @@ -453,35 +447,35 @@ TEST_F(TestModuleHsolverMathKernel, vector_div_vector_op_gpu) std::complex* output_dev = NULL; // resize memory for values in GPU - resize_memory_op()(input_dev, input.size()); - resize_memory_op_double()(input_double_dev, input.size()); - resize_memory_op()(output_dev, input.size()); + resize_memory_op () (input_dev, input.size ()); + resize_memory_op_double () (input_double_dev, input.size ()); + resize_memory_op () (output_dev, input.size ()); // syn the input data in CPU to GPU - synchronize_memory_op()(input_dev, input.data(), input.size()); - synchronize_memory_op_double()(input_double_dev, input_double.data(), input.size()); + synchronize_memory_op () (input_dev, input.data (), input.size ()); + synchronize_memory_op_double () (input_double_dev, input_double.data (), input.size ()); // run - vector_div_vector_op_gpu()(dim, output_dev, input_dev, input_double_dev); + vector_div_vector_op_gpu () (dim, output_dev, input_dev, input_double_dev); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(output.data(), output_dev, output.size()); + synchronize_memory_op_gpu () (output.data (), output_dev, output.size ()); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(output[i].imag() - output_vector_div_vector_op[i].imag()), 1e-8); - EXPECT_LT(fabs(output[i].real() - output_vector_div_vector_op[i].real()), 1e-8); - } + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (output[i].imag () - output_vector_div_vector_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (output[i].real () - output_vector_div_vector_op[i].real ()), 1e-8); + } - delete_memory_op()(input_dev); - delete_memory_op_double()(input_double_dev); - delete_memory_op()(output_dev); + delete_memory_op () (input_dev); + delete_memory_op_double () (input_double_dev); + delete_memory_op () (output_dev); } -TEST_F(TestModuleHsolverMathKernel, vector_add_vector_op_gpu) +TEST_F (TestModuleHsolverMathKernel, vector_add_vector_op_gpu) { // in CPU - std::vector> output(input.size()); + std::vector> output (input.size ()); // in GPU std::complex* input1_dev = NULL; @@ -489,96 +483,91 @@ TEST_F(TestModuleHsolverMathKernel, vector_add_vector_op_gpu) std::complex* output_dev = NULL; // resize memory for values in GPU - resize_memory_op()(input1_dev, input.size()); - resize_memory_op()(input2_dev, input.size()); - resize_memory_op()(output_dev, input.size()); + resize_memory_op () (input1_dev, input.size ()); + resize_memory_op () (input2_dev, input.size ()); + resize_memory_op () (output_dev, input.size ()); // syn the input data in CPU to GPU - synchronize_memory_op()(input1_dev, input1.data(), input.size()); - synchronize_memory_op()(input2_dev, input2.data(), input.size()); + synchronize_memory_op () (input1_dev, input1.data (), input.size ()); + synchronize_memory_op () (input2_dev, input2.data (), input.size ()); // run - vector_add_vector_op_gpu()(dim, - output_dev, - input1_dev, - constant1, - input2_dev, - constant2); + vector_add_vector_op_gpu () (dim, output_dev, input1_dev, constant1, input2_dev, constant2); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(output.data(), output_dev, output.size()); + synchronize_memory_op_gpu () (output.data (), output_dev, output.size ()); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(output[i].imag() - output_vector_add_vector_op[i].imag()), 1e-8); - EXPECT_LT(fabs(output[i].real() - output_vector_add_vector_op[i].real()), 1e-8); - } + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (output[i].imag () - output_vector_add_vector_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (output[i].real () - output_vector_add_vector_op[i].real ()), 1e-8); + } - delete_memory_op()(input1_dev); - delete_memory_op()(input2_dev); - delete_memory_op()(output_dev); + delete_memory_op () (input1_dev); + delete_memory_op () (input2_dev); + delete_memory_op () (output_dev); } -TEST_F(TestModuleHsolverMathKernel, axpy_op_gpu) +TEST_F (TestModuleHsolverMathKernel, axpy_op_gpu) { // in GPU std::complex* X_axpy_dev = NULL; std::complex* Y_axpy_dev = NULL; // resize memory for values in GPU - resize_memory_op()(X_axpy_dev, X_axpy.size()); - resize_memory_op()(Y_axpy_dev, Y_axpy.size()); + resize_memory_op () (X_axpy_dev, X_axpy.size ()); + resize_memory_op () (Y_axpy_dev, Y_axpy.size ()); // syn the input data in CPU to GPU - synchronize_memory_op()(X_axpy_dev, X_axpy.data(), X_axpy.size()); - synchronize_memory_op()(Y_axpy_dev, Y_axpy.data(), Y_axpy.size()); + synchronize_memory_op () (X_axpy_dev, X_axpy.data (), X_axpy.size ()); + synchronize_memory_op () (Y_axpy_dev, Y_axpy.data (), Y_axpy.size ()); // run - ModuleBase::createGpuBlasHandle(); - axpy_op_gpu()(dim, &alpha_axpy, X_axpy_dev, 1, Y_axpy_dev, 1); - ModuleBase::destoryBLAShandle(); + ModuleBase::createGpuBlasHandle (); + axpy_op_gpu () (dim, &alpha_axpy, X_axpy_dev, 1, Y_axpy_dev, 1); + ModuleBase::destoryBLAShandle (); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(Y_axpy.data(), Y_axpy_dev, Y_axpy.size()); + synchronize_memory_op_gpu () (Y_axpy.data (), Y_axpy_dev, Y_axpy.size ()); - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(Y_axpy[i].imag() - output_axpy_op[i].imag()), 1e-8); - EXPECT_LT(fabs(Y_axpy[i].real() - output_axpy_op[i].real()), 1e-8); - } + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (Y_axpy[i].imag () - output_axpy_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (Y_axpy[i].real () - output_axpy_op[i].real ()), 1e-8); + } - delete_memory_op()(X_axpy_dev); - delete_memory_op()(Y_axpy_dev); + delete_memory_op () (X_axpy_dev); + delete_memory_op () (Y_axpy_dev); } -TEST_F(TestModuleHsolverMathKernel, scal_op_gpu) +TEST_F (TestModuleHsolverMathKernel, scal_op_gpu) { // in GPU std::complex* X_scal_dev = NULL; // resize memory for values in GPU - resize_memory_op()(X_scal_dev, X_scal.size()); + resize_memory_op () (X_scal_dev, X_scal.size ()); // syn the input data in CPU to GPU - synchronize_memory_op()(X_scal_dev, X_scal.data(), X_scal.size()); + synchronize_memory_op () (X_scal_dev, X_scal.data (), X_scal.size ()); // run - ModuleBase::createGpuBlasHandle(); - scal_op_gpu()(dim, &alpha_scal, X_scal_dev, 1); - ModuleBase::destoryBLAShandle(); + ModuleBase::createGpuBlasHandle (); + scal_op_gpu () (dim, &alpha_scal, X_scal_dev, 1); + ModuleBase::destoryBLAShandle (); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(X_scal.data(), X_scal_dev, X_scal.size()); - - for (int i = 0; i < input.size(); i++) - { - EXPECT_LT(fabs(X_scal[i].imag() - output_scal_op[i].imag()), 1e-8); - EXPECT_LT(fabs(X_scal[i].real() - output_scal_op[i].real()), 1e-8); - } - delete_memory_op()(X_scal_dev); + synchronize_memory_op_gpu () (X_scal.data (), X_scal_dev, X_scal.size ()); + + for (int i = 0; i < input.size (); i++) + { + EXPECT_LT (fabs (X_scal[i].imag () - output_scal_op[i].imag ()), 1e-8); + EXPECT_LT (fabs (X_scal[i].real () - output_scal_op[i].real ()), 1e-8); + } + delete_memory_op () (X_scal_dev); } -TEST_F(TestModuleHsolverMathKernel, gemv_op_gpu) +TEST_F (TestModuleHsolverMathKernel, gemv_op_gpu) { // in GPU std::complex* A_gemv_dev = NULL; @@ -586,51 +575,51 @@ TEST_F(TestModuleHsolverMathKernel, gemv_op_gpu) std::complex* Y_gemv_dev = NULL; // resize memory for values in GPU - resize_memory_op()(A_gemv_dev, A_gemv.size()); - resize_memory_op()(X_gemv_dev, X_gemv.size()); - resize_memory_op()(Y_gemv_dev, Y_gemv.size()); + resize_memory_op () (A_gemv_dev, A_gemv.size ()); + resize_memory_op () (X_gemv_dev, X_gemv.size ()); + resize_memory_op () (Y_gemv_dev, Y_gemv.size ()); // syn the input data in CPU to GPU - synchronize_memory_op()(A_gemv_dev, A_gemv.data(), A_gemv.size()); - synchronize_memory_op()(X_gemv_dev, X_gemv.data(), X_gemv.size()); - synchronize_memory_op()(Y_gemv_dev, Y_gemv.data(), Y_gemv.size()); + synchronize_memory_op () (A_gemv_dev, A_gemv.data (), A_gemv.size ()); + synchronize_memory_op () (X_gemv_dev, X_gemv.data (), X_gemv.size ()); + synchronize_memory_op () (Y_gemv_dev, Y_gemv.data (), Y_gemv.size ()); // run - ModuleBase::createGpuBlasHandle(); - gemv_op_gpu()('C', 2, 3, &ModuleBase::ONE, A_gemv_dev, 2, X_gemv_dev, 1, &ModuleBase::ONE, Y_gemv_dev, 1); - ModuleBase::destoryBLAShandle(); + ModuleBase::createGpuBlasHandle (); + gemv_op_gpu () ('C', 2, 3, &ModuleBase::ONE, A_gemv_dev, 2, X_gemv_dev, 1, &ModuleBase::ONE, Y_gemv_dev, 1); + ModuleBase::destoryBLAShandle (); // syn the output data in GPU to CPU - synchronize_memory_op_gpu()(Y_gemv.data(), Y_gemv_dev, Y_gemv.size()); + synchronize_memory_op_gpu () (Y_gemv.data (), Y_gemv_dev, Y_gemv.size ()); // cal right answer: Y_test_gemv char trans = 'C'; int inc = 1; int row = 2; int col = 3; - zgemv_(&trans, - &row, - &col, - &ModuleBase::ONE, - A_gemv.data(), - &row, - X_gemv.data(), - &inc, - &ModuleBase::ONE, - Y_test_gemv.data(), - &inc); - - for (int i = 0; i < Y_gemv.size(); i++) - { - EXPECT_LT(fabs(Y_gemv[i].imag() - Y_test_gemv[i].imag()), 1e-12); - EXPECT_LT(fabs(Y_gemv[i].real() - Y_test_gemv[i].real()), 1e-12); - } - - delete_memory_op()(A_gemv_dev); - delete_memory_op()(X_gemv_dev); - delete_memory_op()(Y_gemv_dev); + zgemv_ (&trans, + &row, + &col, + &ModuleBase::ONE, + A_gemv.data (), + &row, + X_gemv.data (), + &inc, + &ModuleBase::ONE, + Y_test_gemv.data (), + &inc); + + for (int i = 0; i < Y_gemv.size (); i++) + { + EXPECT_LT (fabs (Y_gemv[i].imag () - Y_test_gemv[i].imag ()), 1e-12); + EXPECT_LT (fabs (Y_gemv[i].real () - Y_test_gemv[i].real ()), 1e-12); + } + + delete_memory_op () (A_gemv_dev); + delete_memory_op () (X_gemv_dev); + delete_memory_op () (Y_gemv_dev); } -TEST_F(TestModuleHsolverMathKernel, matrixCopy_op_gpu) +TEST_F (TestModuleHsolverMathKernel, matrixCopy_op_gpu) { // const std::vector > expect_result = { // {-0.11893203,-0.13492526}, {-0.40314756, 0.07734553}, {0.06892412, 0.14837423}, {0.0, 0.0}, @@ -644,36 +633,41 @@ TEST_F(TestModuleHsolverMathKernel, matrixCopy_op_gpu) {-0.54274745, -0.09682102}, {0.30232967, 0.49411249}}; - const std::vector> B(8); + const std::vector> B (8); int n = 2; int LDA = 3; int LDB = 4; std::complex* device_A = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_A, A.size()); - base_device::memory:: - synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(device_A, - A.data(), - A.size()); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU> () (device_A, A.size ()); + base_device::memory::synchronize_memory_op, + base_device::DEVICE_GPU, + base_device::DEVICE_CPU> () (device_A, A.data (), A.size ()); std::complex* device_B = nullptr; - base_device::memory::resize_memory_op, base_device::DEVICE_GPU>()(device_B, B.size()); - base_device::memory:: - synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>()(device_B, - B.data(), - B.size()); + base_device::memory::resize_memory_op, base_device::DEVICE_GPU> () (device_B, B.size ()); + base_device::memory::synchronize_memory_op, + base_device::DEVICE_GPU, + base_device::DEVICE_CPU> () (device_B, B.data (), B.size ()); // run - ModuleBase::matrixCopy, base_device::DEVICE_GPU>()(n, LDA, device_A, LDA, device_B, LDB); + ModuleBase::matrixCopy, base_device::DEVICE_GPU> () (n, LDA, device_A, LDA, device_B, LDB); - std::vector> B_gpu2cpu(8); + std::vector> B_gpu2cpu (8); base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, - base_device::DEVICE_GPU>()(B_gpu2cpu.data(), device_B, B_gpu2cpu.size()); - - std::vector> B_cpu(8); - ModuleBase::matrixCopy, base_device::DEVICE_CPU>()(n, LDA, A.data(), LDA, B_cpu.data(), LDB); + base_device::DEVICE_GPU> () (B_gpu2cpu.data (), + device_B, + B_gpu2cpu.size ()); + + std::vector> B_cpu (8); + ModuleBase::matrixCopy, base_device::DEVICE_CPU> () (n, + LDA, + A.data (), + LDA, + B_cpu.data (), + LDB); // for (int i = 0; i < 4; i++) // { @@ -694,14 +688,14 @@ TEST_F(TestModuleHsolverMathKernel, matrixCopy_op_gpu) // std::cout << std::endl; // } - for (int i = 0; i < B_cpu.size(); i++) - { - EXPECT_LT(fabs(B_gpu2cpu[i].imag() - B_cpu[i].imag()), 1e-12); - EXPECT_LT(fabs(B_gpu2cpu[i].real() - B_cpu[i].real()), 1e-12); - } + for (int i = 0; i < B_cpu.size (); i++) + { + EXPECT_LT (fabs (B_gpu2cpu[i].imag () - B_cpu[i].imag ()), 1e-12); + EXPECT_LT (fabs (B_gpu2cpu[i].real () - B_cpu[i].real ()), 1e-12); + } - delete_memory_op()(device_A); - delete_memory_op()(device_B); + delete_memory_op () (device_A); + delete_memory_op () (device_B); } #endif // __UT_USE_CUDA || __UT_USE_ROCM diff --git a/source/source_base/kernels/test/math_ylm_op_test.cpp b/source/source_base/kernels/test/math_ylm_op_test.cpp index 52cee9bba9a..3def8d938d0 100644 --- a/source/source_base/kernels/test/math_ylm_op_test.cpp +++ b/source/source_base/kernels/test/math_ylm_op_test.cpp @@ -8,333 +8,338 @@ class TestModuleBaseMathMultiDevice : public ::testing::Test { -protected: + protected: // xx = tf.random.uniform([100], minval=-4, maxval=4, dtype = tf.float64) - const base_device::DEVICE_CPU* cpu_ctx = {}; - const base_device::DEVICE_GPU* gpu_ctx = {}; + const base_device::DEVICE_CPU* cpu_ctx = {}; + const base_device::DEVICE_GPU* gpu_ctx = {}; - int ng = 59, lmax = 1; + int ng = 59, lmax = 1; - double SQRT2 = 1.4142135623730951, PI = 3.1415926535897931, PI_HALF = 1.5707963267948966, - FOUR_PI = 12.566370614359172, SQRT_INVERSE_FOUR_PI = 0.28209479177387814; + double SQRT2 = 1.4142135623730951, PI = 3.1415926535897931, PI_HALF = 1.5707963267948966, + FOUR_PI = 12.566370614359172, SQRT_INVERSE_FOUR_PI = 0.28209479177387814; - std::vector g - = {2, -2, -2, 1, -1, -1, 0, 0, 0, -1, 1, 1, -2, 2, 2, 2, -2, 0, 1, -1, 1, 0, 0, 2, -1, 1, - 3, 2, -2, 2, 1, -1, 3, -1, 1, -3, -2, 2, -2, 1, -1, -3, 0, 0, -2, -1, 1, -1, -2, 2, 0, 2, - 0, -2, 1, 1, -1, 0, 2, 0, -1, 3, 1, 3, -1, -1, 2, 0, 0, 1, 1, 1, 0, 2, 2, 3, -1, 1, - 2, 0, 2, 1, 1, 3, 1, 1, -3, 0, 2, -2, -1, 3, -1, 2, 2, -2, 1, 3, -1, 3, 1, -1, 2, 2, - 0, 1, 3, 1, 3, 1, 1, 2, 2, 2, -1, -3, 1, -2, -2, 2, -2, -2, -2, -3, -1, -1, -1, -3, -1, -2, - -2, 0, -3, -1, 1, 1, -3, -1, 0, -2, 0, -1, -1, 1, -2, 0, 2, 1, -3, 1, 0, -2, 2, -1, -1, 3, - -1, -1, -3, -2, 0, -2, -3, 1, -1, 0, -2, -2, -1, -1, -1, -2, 0, 0, -3, 1, 1}; - std::vector expected_ylm = {0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - 0.282095, - -0.282095, - -0.282095, - 0, - 0.282095, - 0.282095, - 0, - 0.282095, - 0.488603, - 0.441958, - 0.282095, - 0.441958, - -0.441958, - -0.282095, - -0.441958, - -0.488603, - -0.282095, - 0, - -0.345494, - -0.282095, - 0, - 0.147319, - -0.147319, - 0, - 0.282095, - 0.345494, - 0.147319, - 0.345494, - 0.441958, - -0.441958, - -0.345494, - -0.147319, - -0.282095, - -0.147319, - -0.147319, - 0, - 0.147319, - 0.147319, - 0.282095, - 0.147319, - 0.282095, - -0.282095, - -0.147319, - -0.147319, - 0, - 0.147319, - -0.147319, - 0, - 0.282095, - 0.345494, - 0.147319, - 0.345494, - 0.441958, - -0.441958, - -0.345494, - -0.147319, - -0.345494, - -0.282095, - 0, - 0.147319, - -0.282095, - -0.282095, - -2.99183e-17, - 0.282095, - 0.282095, - -0.345494, - -0.282095, - -0, - 0.147319, - -0.282095, - -0.147319, - 0.147319, - 0.282095, - -0.147319, - -0, - 0.282095, - 0.345494, - -0.345494, - -0.282095, - -2.99183e-17, - 0.147319, - -0.441958, - -0.488603, - -0.282095, - -2.11554e-17, - -0.441958, - -0.345494, - -0.147319, - -0.147319, - -2.11554e-17, - 0.147319, - -0.282095, - -0.147319, - -0.441958, - -0.345494, - -0.147319, - -0.441958, - -0.282095, - 0.147319, - 0.282095, - 0.282095, - 0.441958, - 0.147319, - 0.345494, - 0.441958, - -0.147319, - -2.99183e-17, - 0.282095, - 0.345494, - -0.147319, - -2.11554e-17, - 0.147319, - 0.147319, - 0.345494, - 0.441958, - -2.11554e-17, - 0.282095, - 0.488603, - 0.441958, - 0.282095, - 0.282095, - -0.488603, - -0.282095, - -0.282095, - 0.345494, - 0.282095, - -0, - -0.147319, - 0.282095, - 0.147319, - -0.147319, - -0.282095, - 0.147319, - -0, - -0.282095, - -0.345494, - -0, - -0.282095, - -0.488603, - -0.441958, - 0.147319, - -0, - -0.282095, - -0.345494, - 0.147319, - -0, - -0.147319, - -0.147319, - -0.345494, - -0.441958, - -0.282095, - -0.441958, - -0.147319, - -0.345494, - -0.441958, - -0.147319, - -0.282095, - 0.441958, - 0.282095, - 0.282095, - 0.147319, - 0.441958, - 0.345494, - 0.147319, - 0.441958, - 0.488603, - 0.282095, - -4.23108e-17, - 0.441958, - 0.345494, - 0.147319, - 0.147319, - -4.23108e-17, - -0.147319, - 0.345494, - 0.282095, - -5.98366e-17, - -0.147319}; + std::vector g + = {2, -2, -2, 1, -1, -1, 0, 0, 0, -1, 1, 1, -2, 2, 2, 2, -2, 0, 1, -1, 1, 0, 0, 2, -1, 1, + 3, 2, -2, 2, 1, -1, 3, -1, 1, -3, -2, 2, -2, 1, -1, -3, 0, 0, -2, -1, 1, -1, -2, 2, 0, 2, + 0, -2, 1, 1, -1, 0, 2, 0, -1, 3, 1, 3, -1, -1, 2, 0, 0, 1, 1, 1, 0, 2, 2, 3, -1, 1, + 2, 0, 2, 1, 1, 3, 1, 1, -3, 0, 2, -2, -1, 3, -1, 2, 2, -2, 1, 3, -1, 3, 1, -1, 2, 2, + 0, 1, 3, 1, 3, 1, 1, 2, 2, 2, -1, -3, 1, -2, -2, 2, -2, -2, -2, -3, -1, -1, -1, -3, -1, -2, + -2, 0, -3, -1, 1, 1, -3, -1, 0, -2, 0, -1, -1, 1, -2, 0, 2, 1, -3, 1, 0, -2, 2, -1, -1, 3, + -1, -1, -3, -2, 0, -2, -3, 1, -1, 0, -2, -2, -1, -1, -1, -2, 0, 0, -3, 1, 1}; + std::vector expected_ylm = {0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + 0.282095, + -0.282095, + -0.282095, + 0, + 0.282095, + 0.282095, + 0, + 0.282095, + 0.488603, + 0.441958, + 0.282095, + 0.441958, + -0.441958, + -0.282095, + -0.441958, + -0.488603, + -0.282095, + 0, + -0.345494, + -0.282095, + 0, + 0.147319, + -0.147319, + 0, + 0.282095, + 0.345494, + 0.147319, + 0.345494, + 0.441958, + -0.441958, + -0.345494, + -0.147319, + -0.282095, + -0.147319, + -0.147319, + 0, + 0.147319, + 0.147319, + 0.282095, + 0.147319, + 0.282095, + -0.282095, + -0.147319, + -0.147319, + 0, + 0.147319, + -0.147319, + 0, + 0.282095, + 0.345494, + 0.147319, + 0.345494, + 0.441958, + -0.441958, + -0.345494, + -0.147319, + -0.345494, + -0.282095, + 0, + 0.147319, + -0.282095, + -0.282095, + -2.99183e-17, + 0.282095, + 0.282095, + -0.345494, + -0.282095, + -0, + 0.147319, + -0.282095, + -0.147319, + 0.147319, + 0.282095, + -0.147319, + -0, + 0.282095, + 0.345494, + -0.345494, + -0.282095, + -2.99183e-17, + 0.147319, + -0.441958, + -0.488603, + -0.282095, + -2.11554e-17, + -0.441958, + -0.345494, + -0.147319, + -0.147319, + -2.11554e-17, + 0.147319, + -0.282095, + -0.147319, + -0.441958, + -0.345494, + -0.147319, + -0.441958, + -0.282095, + 0.147319, + 0.282095, + 0.282095, + 0.441958, + 0.147319, + 0.345494, + 0.441958, + -0.147319, + -2.99183e-17, + 0.282095, + 0.345494, + -0.147319, + -2.11554e-17, + 0.147319, + 0.147319, + 0.345494, + 0.441958, + -2.11554e-17, + 0.282095, + 0.488603, + 0.441958, + 0.282095, + 0.282095, + -0.488603, + -0.282095, + -0.282095, + 0.345494, + 0.282095, + -0, + -0.147319, + 0.282095, + 0.147319, + -0.147319, + -0.282095, + 0.147319, + -0, + -0.282095, + -0.345494, + -0, + -0.282095, + -0.488603, + -0.441958, + 0.147319, + -0, + -0.282095, + -0.345494, + 0.147319, + -0, + -0.147319, + -0.147319, + -0.345494, + -0.441958, + -0.282095, + -0.441958, + -0.147319, + -0.345494, + -0.441958, + -0.147319, + -0.282095, + 0.441958, + 0.282095, + 0.282095, + 0.147319, + 0.441958, + 0.345494, + 0.147319, + 0.441958, + 0.488603, + 0.282095, + -4.23108e-17, + 0.441958, + 0.345494, + 0.147319, + 0.147319, + -4.23108e-17, + -0.147319, + 0.345494, + 0.282095, + -5.98366e-17, + -0.147319}; - using delmem_var_op = base_device::memory::delete_memory_op; - using resmem_var_op = base_device::memory::resize_memory_op; - using syncmem_var_h2d_op - = base_device::memory::synchronize_memory_op; - using syncmem_var_d2h_op - = base_device::memory::synchronize_memory_op; + using delmem_var_op = base_device::memory::delete_memory_op; + using resmem_var_op = base_device::memory::resize_memory_op; + using syncmem_var_h2d_op + = base_device::memory::synchronize_memory_op; + using syncmem_var_d2h_op + = base_device::memory::synchronize_memory_op; - void SetUp() override - { - } - void TearDown() override { + void + SetUp () override + { + } + void + TearDown () override + { } }; -TEST_F(TestModuleBaseMathMultiDevice, cal_ylm_real_op_cpu) +TEST_F (TestModuleBaseMathMultiDevice, cal_ylm_real_op_cpu) { - std::vector p((lmax + 1) * (lmax + 1) * ng, 0.0); - std::vector ylm(expected_ylm.size(), 0.0); - ModuleBase::cal_ylm_real_op()(cpu_ctx, - ng, - lmax, - SQRT2, - PI, - PI_HALF, - FOUR_PI, - SQRT_INVERSE_FOUR_PI, - g.data(), - p.data(), - ylm.data()); + std::vector p ((lmax + 1) * (lmax + 1) * ng, 0.0); + std::vector ylm (expected_ylm.size (), 0.0); + ModuleBase::cal_ylm_real_op () (cpu_ctx, + ng, + lmax, + SQRT2, + PI, + PI_HALF, + FOUR_PI, + SQRT_INVERSE_FOUR_PI, + g.data (), + p.data (), + ylm.data ()); - for (int ii = 0; ii < ylm.size(); ii++) { - EXPECT_LT(fabs(ylm[ii] - expected_ylm[ii]), 6e-5); - } + for (int ii = 0; ii < ylm.size (); ii++) + { + EXPECT_LT (fabs (ylm[ii] - expected_ylm[ii]), 6e-5); + } } #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM -TEST_F(TestModuleBaseMathMultiDevice, cal_ylm_real_op_gpu) +TEST_F (TestModuleBaseMathMultiDevice, cal_ylm_real_op_gpu) { - std::vector p((lmax + 1) * (lmax + 1) * ng, 0.0); - std::vector ylm(expected_ylm.size(), 0.0); - double * d_ylm = nullptr, * d_g = nullptr, * d_p = nullptr; + std::vector p ((lmax + 1) * (lmax + 1) * ng, 0.0); + std::vector ylm (expected_ylm.size (), 0.0); + double *d_ylm = nullptr, *d_g = nullptr, *d_p = nullptr; - resmem_var_op()(d_g, g.size()); - resmem_var_op()(d_p, p.size()); - resmem_var_op()(d_ylm, ylm.size()); + resmem_var_op () (d_g, g.size ()); + resmem_var_op () (d_p, p.size ()); + resmem_var_op () (d_ylm, ylm.size ()); - syncmem_var_h2d_op()(d_g, g.data(), g.size()); - syncmem_var_h2d_op()(d_p, p.data(), p.size()); - syncmem_var_h2d_op()(d_ylm, ylm.data(), ylm.size()); + syncmem_var_h2d_op () (d_g, g.data (), g.size ()); + syncmem_var_h2d_op () (d_p, p.data (), p.size ()); + syncmem_var_h2d_op () (d_ylm, ylm.data (), ylm.size ()); - ModuleBase::cal_ylm_real_op()(gpu_ctx, - ng, - lmax, - SQRT2, - PI, - PI_HALF, - FOUR_PI, - SQRT_INVERSE_FOUR_PI, - d_g, - d_p, - d_ylm); + ModuleBase::cal_ylm_real_op () (gpu_ctx, + ng, + lmax, + SQRT2, + PI, + PI_HALF, + FOUR_PI, + SQRT_INVERSE_FOUR_PI, + d_g, + d_p, + d_ylm); - syncmem_var_d2h_op()(ylm.data(), d_ylm, ylm.size()); + syncmem_var_d2h_op () (ylm.data (), d_ylm, ylm.size ()); - for (int ii = 0; ii < ylm.size(); ii++) { - EXPECT_LT(fabs(ylm[ii] - expected_ylm[ii]), 6e-5); - } + for (int ii = 0; ii < ylm.size (); ii++) + { + EXPECT_LT (fabs (ylm[ii] - expected_ylm[ii]), 6e-5); + } - delmem_var_op()(d_g); - delmem_var_op()(d_p); - delmem_var_op()(d_ylm); + delmem_var_op () (d_g); + delmem_var_op () (d_p); + delmem_var_op () (d_ylm); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM \ No newline at end of file diff --git a/source/source_base/libm/branred.cpp b/source/source_base/libm/branred.cpp index ef9bd0e485b..4b1986a3f48 100644 --- a/source/source_base/libm/branred.cpp +++ b/source/source_base/libm/branred.cpp @@ -3,7 +3,7 @@ // DATE : 2023-01-06 //========================================================== -#include +#include #include namespace ModuleBase @@ -12,70 +12,71 @@ namespace libm { typedef int int4; -typedef union { unsigned int u[2]; int4 i[2]; double x; double d; } mynumber; +typedef union +{ + unsigned int u[2]; + int4 i[2]; + double x; + double d; +} mynumber; -#define max(x, y) (((y) > (x)) ? (y) : (x)) -#define min(x, y) (((y) < (x)) ? (y) : (x)) +#define max(x, y) (((y) > (x)) ? (y) : (x)) +#define min(x, y) (((y) < (x)) ? (y) : (x)) #if (__BYTE_ORDER == __BIG_ENDIAN) #define HIGH_HALF 0 -#define LOW_HALF 1 +#define LOW_HALF 1 static const mynumber -/**/ t576 = {{0x63f00000, 0x00000000}}, /* 2 ^ 576 */ -/**/ tm600 = {{0x1a700000, 0x00000000}}, /* 2 ^- 600 */ -/**/ tm24 = {{0x3e700000, 0x00000000}}, /* 2 ^- 24 */ -/**/ big = {{0x43380000, 0x00000000}}, /* 6755399441055744 */ -/**/ big1 = {{0x43580000, 0x00000000}}, /* 27021597764222976 */ -/**/ hp0 = {{0x3FF921FB, 0x54442D18}} ,/* 1.5707963267948966 */ -/**/ hp1 = {{0x3C91A626, 0x33145C07}} ,/* 6.123233995736766e-17 */ -/**/ mp1 = {{0x3FF921FB, 0x58000000}}, /* 1.5707963407039642 */ -/**/ mp2 = {{0xBE4DDE97, 0x40000000}}; /*-1.3909067675399456e-08 */ + /**/ t576 = {{0x63f00000, 0x00000000}}, /* 2 ^ 576 */ + /**/ tm600 = {{0x1a700000, 0x00000000}}, /* 2 ^- 600 */ + /**/ tm24 = {{0x3e700000, 0x00000000}}, /* 2 ^- 24 */ + /**/ big = {{0x43380000, 0x00000000}}, /* 6755399441055744 */ + /**/ big1 = {{0x43580000, 0x00000000}}, /* 27021597764222976 */ + /**/ hp0 = {{0x3FF921FB, 0x54442D18}}, /* 1.5707963267948966 */ + /**/ hp1 = {{0x3C91A626, 0x33145C07}}, /* 6.123233995736766e-17 */ + /**/ mp1 = {{0x3FF921FB, 0x58000000}}, /* 1.5707963407039642 */ + /**/ mp2 = {{0xBE4DDE97, 0x40000000}}; /*-1.3909067675399456e-08 */ #endif #if (__BYTE_ORDER == __LITTLE_ENDIAN) #define HIGH_HALF 1 -#define LOW_HALF 0 +#define LOW_HALF 0 static const mynumber -/**/ t576 = {{0x00000000, 0x63f00000}}, /* 2 ^ 576 */ -/**/ tm600 = {{0x00000000, 0x1a700000}}, /* 2 ^- 600 */ -/**/ tm24 = {{0x00000000, 0x3e700000}}, /* 2 ^- 24 */ -/**/ big = {{0x00000000, 0x43380000}}, /* 6755399441055744 */ -/**/ big1 = {{0x00000000, 0x43580000}}, /* 27021597764222976 */ -/**/ hp0 = {{0x54442D18, 0x3FF921FB}}, /* 1.5707963267948966 */ -/**/ hp1 = {{0x33145C07, 0x3C91A626}}, /* 6.123233995736766e-17 */ -/**/ mp1 = {{0x58000000, 0x3FF921FB}}, /* 1.5707963407039642 */ -/**/ mp2 = {{0x40000000, 0xBE4DDE97}}; /*-1.3909067675399456e-08 */ + /**/ t576 = {{0x00000000, 0x63f00000}}, /* 2 ^ 576 */ + /**/ tm600 = {{0x00000000, 0x1a700000}}, /* 2 ^- 600 */ + /**/ tm24 = {{0x00000000, 0x3e700000}}, /* 2 ^- 24 */ + /**/ big = {{0x00000000, 0x43380000}}, /* 6755399441055744 */ + /**/ big1 = {{0x00000000, 0x43580000}}, /* 27021597764222976 */ + /**/ hp0 = {{0x54442D18, 0x3FF921FB}}, /* 1.5707963267948966 */ + /**/ hp1 = {{0x33145C07, 0x3C91A626}}, /* 6.123233995736766e-17 */ + /**/ mp1 = {{0x58000000, 0x3FF921FB}}, /* 1.5707963407039642 */ + /**/ mp2 = {{0x40000000, 0xBE4DDE97}}; /*-1.3909067675399456e-08 */ #endif -static const double toverp[75] = { /* 2/ PI base 24*/ - 10680707.0, 7228996.0, 1387004.0, 2578385.0, 16069853.0, - 12639074.0, 9804092.0, 4427841.0, 16666979.0, 11263675.0, - 12935607.0, 2387514.0, 4345298.0, 14681673.0, 3074569.0, - 13734428.0, 16653803.0, 1880361.0, 10960616.0, 8533493.0, - 3062596.0, 8710556.0, 7349940.0, 6258241.0, 3772886.0, - 3769171.0, 3798172.0, 8675211.0, 12450088.0, 3874808.0, - 9961438.0, 366607.0, 15675153.0, 9132554.0, 7151469.0, - 3571407.0, 2607881.0, 12013382.0, 4155038.0, 6285869.0, - 7677882.0, 13102053.0, 15825725.0, 473591.0, 9065106.0, - 15363067.0, 6271263.0, 9264392.0, 5636912.0, 4652155.0, - 7056368.0, 13614112.0, 10155062.0, 1944035.0, 9527646.0, - 15080200.0, 6658437.0, 6231200.0, 6832269.0, 16767104.0, - 5075751.0, 3212806.0, 1398474.0, 7579849.0, 6349435.0, - 12618859.0, 4703257.0, 12806093.0, 14477321.0, 2786137.0, - 12875403.0, 9837734.0, 14528324.0, 13719321.0, 343717.0 }; +static const double toverp[75] + = {/* 2/ PI base 24*/ + 10680707.0, 7228996.0, 1387004.0, 2578385.0, 16069853.0, 12639074.0, 9804092.0, 4427841.0, 16666979.0, + 11263675.0, 12935607.0, 2387514.0, 4345298.0, 14681673.0, 3074569.0, 13734428.0, 16653803.0, 1880361.0, + 10960616.0, 8533493.0, 3062596.0, 8710556.0, 7349940.0, 6258241.0, 3772886.0, 3769171.0, 3798172.0, + 8675211.0, 12450088.0, 3874808.0, 9961438.0, 366607.0, 15675153.0, 9132554.0, 7151469.0, 3571407.0, + 2607881.0, 12013382.0, 4155038.0, 6285869.0, 7677882.0, 13102053.0, 15825725.0, 473591.0, 9065106.0, + 15363067.0, 6271263.0, 9264392.0, 5636912.0, 4652155.0, 7056368.0, 13614112.0, 10155062.0, 1944035.0, + 9527646.0, 15080200.0, 6658437.0, 6231200.0, 6832269.0, 16767104.0, 5075751.0, 3212806.0, 1398474.0, + 7579849.0, 6349435.0, 12618859.0, 4703257.0, 12806093.0, 14477321.0, 2786137.0, 12875403.0, 9837734.0, + 14528324.0, 13719321.0, 343717.0}; /* CN = 1+2**27 = '41a0000002000000' IEEE double format. Use it to split a double for better accuracy. */ -#define CN 134217729.0 -static const double split = CN; /* 2^27 + 1 */ +#define CN 134217729.0 +static const double split = CN; /* 2^27 + 1 */ /*******************************************************************/ /* Routine branred() performs range reduction of a double number */ @@ -84,97 +85,119 @@ static const double split = CN; /* 2^27 + 1 */ /* Routine return integer (n mod 4) */ /*******************************************************************/ int -__branred(double x, double *a, double *aa) + __branred (double x, double* a, double* aa) { - int i=0,k=0; - mynumber u,gor; - double r[6],s,t,sum,b,bb,sum1,sum2,b1,bb1,b2,bb2,x1,x2,t1,t2; - - x*=tm600.x; - t=x*split; /* split x to two numbers */ - x1=t-(t-x); - x2=x-x1; - sum=0; - u.x = x1; - k = (u.i[HIGH_HALF]>>20)&2047; - k = (k-450)/24; - if (k<0) - k=0; - gor.x = t576.x; - gor.i[HIGH_HALF] -= ((k*24)<<20); - for (i=0;i<6;i++) - { r[i] = x1*toverp[k+i]*gor.x; gor.x *= tm24.x; } - for (i=0;i<3;i++) { - s=(r[i]+big.x)-big.x; - sum+=s; - r[i]-=s; - } - t=0; - for (i=0;i<6;i++) - t+=r[5-i]; - bb=(((((r[0]-t)+r[1])+r[2])+r[3])+r[4])+r[5]; - s=(t+big.x)-big.x; - sum+=s; - t-=s; - b=t+bb; - bb=(t-b)+bb; - s=(sum+big1.x)-big1.x; - sum-=s; - b1=b; - bb1=bb; - sum1=sum; - sum=0; - - u.x = x2; - k = (u.i[HIGH_HALF]>>20)&2047; - k = (k-450)/24; - if (k<0) - k=0; - gor.x = t576.x; - gor.i[HIGH_HALF] -= ((k*24)<<20); - for (i=0;i<6;i++) - { r[i] = x2*toverp[k+i]*gor.x; gor.x *= tm24.x; } - for (i=0;i<3;i++) { - s=(r[i]+big.x)-big.x; - sum+=s; - r[i]-=s; - } - t=0; - for (i=0;i<6;i++) - t+=r[5-i]; - bb=(((((r[0]-t)+r[1])+r[2])+r[3])+r[4])+r[5]; - s=(t+big.x)-big.x; - sum+=s; - t-=s; - b=t+bb; - bb=(t-b)+bb; - s=(sum+big1.x)-big1.x; - sum-=s; - - b2=b; - bb2=bb; - sum2=sum; - - sum=sum1+sum2; - b=b1+b2; - bb = (fabs(b1)>fabs(b2))? (b1-b)+b2 : (b2-b)+b1; - if (b > 0.5) - {b-=1.0; sum+=1.0;} - else if (b < -0.5) - {b+=1.0; sum-=1.0;} - s=b+(bb+bb1+bb2); - t=((b-s)+bb)+(bb1+bb2); - b=s*split; - t1=b-(b-s); - t2=s-t1; - b=s*hp0.x; - bb=(((t1*mp1.x-b)+t1*mp2.x)+t2*mp1.x)+(t2*mp2.x+s*hp1.x+t*hp0.x); - s=b+bb; - t=(b-s)+bb; - *a=s; - *aa=t; - return ((int) sum)&3; /* return quater of unit circle */ + int i = 0, k = 0; + mynumber u, gor; + double r[6], s, t, sum, b, bb, sum1, sum2, b1, bb1, b2, bb2, x1, x2, t1, t2; + + x *= tm600.x; + t = x * split; /* split x to two numbers */ + x1 = t - (t - x); + x2 = x - x1; + sum = 0; + u.x = x1; + k = (u.i[HIGH_HALF] >> 20) & 2047; + k = (k - 450) / 24; + if (k < 0) + { + k = 0; + } + gor.x = t576.x; + gor.i[HIGH_HALF] -= ((k * 24) << 20); + for (i = 0; i < 6; i++) + { + r[i] = x1 * toverp[k + i] * gor.x; + gor.x *= tm24.x; + } + for (i = 0; i < 3; i++) + { + s = (r[i] + big.x) - big.x; + sum += s; + r[i] -= s; + } + t = 0; + for (i = 0; i < 6; i++) + { + t += r[5 - i]; + } + bb = (((((r[0] - t) + r[1]) + r[2]) + r[3]) + r[4]) + r[5]; + s = (t + big.x) - big.x; + sum += s; + t -= s; + b = t + bb; + bb = (t - b) + bb; + s = (sum + big1.x) - big1.x; + sum -= s; + b1 = b; + bb1 = bb; + sum1 = sum; + sum = 0; + + u.x = x2; + k = (u.i[HIGH_HALF] >> 20) & 2047; + k = (k - 450) / 24; + if (k < 0) + { + k = 0; + } + gor.x = t576.x; + gor.i[HIGH_HALF] -= ((k * 24) << 20); + for (i = 0; i < 6; i++) + { + r[i] = x2 * toverp[k + i] * gor.x; + gor.x *= tm24.x; + } + for (i = 0; i < 3; i++) + { + s = (r[i] + big.x) - big.x; + sum += s; + r[i] -= s; + } + t = 0; + for (i = 0; i < 6; i++) + { + t += r[5 - i]; + } + bb = (((((r[0] - t) + r[1]) + r[2]) + r[3]) + r[4]) + r[5]; + s = (t + big.x) - big.x; + sum += s; + t -= s; + b = t + bb; + bb = (t - b) + bb; + s = (sum + big1.x) - big1.x; + sum -= s; + + b2 = b; + bb2 = bb; + sum2 = sum; + + sum = sum1 + sum2; + b = b1 + b2; + bb = (fabs (b1) > fabs (b2)) ? (b1 - b) + b2 : (b2 - b) + b1; + if (b > 0.5) + { + b -= 1.0; + sum += 1.0; + } + else if (b < -0.5) + { + b += 1.0; + sum -= 1.0; + } + s = b + (bb + bb1 + bb2); + t = ((b - s) + bb) + (bb1 + bb2); + b = s * split; + t1 = b - (b - s); + t2 = s - t1; + b = s * hp0.x; + bb = (((t1 * mp1.x - b) + t1 * mp2.x) + t2 * mp1.x) + (t2 * mp2.x + s * hp1.x + t * hp0.x); + s = b + bb; + t = (b - s) + bb; + *a = s; + *aa = t; + return ((int)sum) & 3; /* return quater of unit circle */ } -}; -}; +}; // namespace libm +}; // namespace ModuleBase diff --git a/source/source_base/libm/cexp.cpp b/source/source_base/libm/cexp.cpp index 1677fd7dcdc..d7ee33b448a 100644 --- a/source/source_base/libm/cexp.cpp +++ b/source/source_base/libm/cexp.cpp @@ -4,9 +4,9 @@ //========================================================== #include -#include -#include -#include +#include +#include +#include #include namespace ModuleBase @@ -14,141 +14,164 @@ namespace ModuleBase namespace libm { -template void __msincos (FLOAT x, FLOAT *s, FLOAT *c); -template FLOAT __mexp (FLOAT x); -template FLOAT __mhugeval (); +template +void __msincos (FLOAT x, FLOAT* s, FLOAT* c); +template +FLOAT __mexp (FLOAT x); +template +FLOAT __mhugeval (); -void __sincos (double x, double *sinx, double *cosx); +void __sincos (double x, double* sinx, double* cosx); double __exp (double x); -template<> inline double __mexp (double x) { return __exp (x); } -template<> inline void __msincos (double x, double *s, double *c) { __sincos (x, s, c); } -template<> inline double __mhugeval () { return HUGE_VAL; } +template <> +inline double + __mexp (double x) +{ + return __exp (x); +} +template <> +inline void + __msincos (double x, double* s, double* c) +{ + __sincos (x, s, c); +} +template <> +inline double + __mhugeval () +{ + return HUGE_VAL; +} -template +template inline std::complex -__cexp_impl (const std::complex &x) + __cexp_impl (const std::complex& x) { - std::complex retval; - int rcls = fpclassify (x.real()); - int icls = fpclassify (x.imag()); - - if (rcls >= FP_ZERO) - { - /* Real part is finite. */ - if (icls >= FP_ZERO) - { - - /* Imaginary part is finite. */ - const int t = (int) ((std::numeric_limits::max_exponent - 1) * (FLOAT)(M_LN2)); - FLOAT sinix, cosix; - - if ((fabs (x.imag()) > std::numeric_limits::min())) - { - __msincos (x.imag(), &sinix, &cosix); - } - else - { - sinix = x.imag(); - cosix = 1; - } - - std::complex __x = x; - if (__x.real() > t) - { - FLOAT exp_t = __mexp ((FLOAT)t); - __x -= std::complex(t, 0); - sinix *= exp_t; - cosix *= exp_t; - if (__x.real() > t) - { - __x -= std::complex(t, 0); - sinix *= exp_t; - cosix *= exp_t; - } - } - if (__x.real() > t) - { - /* Overflow (original real part of x > 3t). */ - retval.real(std::numeric_limits::max() * cosix); - retval.imag(std::numeric_limits::max() * sinix); - } - else - { - FLOAT exp_val = __mexp (__x.real()); - retval.real(exp_val * cosix); - retval.imag(exp_val * sinix); - } - } - else - { - /* If the imaginary part is +-inf or NaN and the real part - is not +-inf the result is NaN + iNaN. */ - retval.real(std::numeric_limits::quiet_NaN()); - retval.imag(std::numeric_limits::quiet_NaN()); - } - } - else if (rcls == FP_INFINITE) - { - /* Real part is infinite. */ - if (icls >= FP_ZERO) - { - /* Imaginary part is finite. */ - FLOAT value = signbit (x.real()) ? 0 : __mhugeval(); - - if (icls == FP_ZERO) - { - /* Imaginary part is 0.0. */ - retval.real(value); - retval.imag(x.imag()); - } - else - { - FLOAT sinix, cosix; - - if ((fabs (x.imag()) > std::numeric_limits::min())) - { - __msincos (x.imag(), &sinix, &cosix); - } - else - { - sinix = x.imag(); - cosix = 1; - } - - retval.real(copysign (value, cosix)); - retval.imag(copysign (value, sinix)); - } - } - else if (signbit (x.real()) == 0) - { - retval.real(__mhugeval()); - retval.imag(x.imag() - x.imag()); - } - else - { - retval.real(0); - retval.imag(copysign (0, x.imag())); - } - } - else - { - /* If the real part is NaN the result is NaN + iNaN unless the - imaginary part is zero. */ - retval.real(std::numeric_limits::quiet_NaN()); - if (icls == FP_ZERO) - retval.imag(x.imag()); - else - { - retval.imag(std::numeric_limits::quiet_NaN()); - } - } - - return retval; + std::complex retval; + int rcls = fpclassify (x.real ()); + int icls = fpclassify (x.imag ()); + + if (rcls >= FP_ZERO) + { + /* Real part is finite. */ + if (icls >= FP_ZERO) + { + + /* Imaginary part is finite. */ + const int t = (int)((std::numeric_limits::max_exponent - 1) * (FLOAT)(M_LN2)); + FLOAT sinix, cosix; + + if ((fabs (x.imag ()) > std::numeric_limits::min ())) + { + __msincos (x.imag (), &sinix, &cosix); + } + else + { + sinix = x.imag (); + cosix = 1; + } + + std::complex __x = x; + if (__x.real () > t) + { + FLOAT exp_t = __mexp ((FLOAT)t); + __x -= std::complex (t, 0); + sinix *= exp_t; + cosix *= exp_t; + if (__x.real () > t) + { + __x -= std::complex (t, 0); + sinix *= exp_t; + cosix *= exp_t; + } + } + if (__x.real () > t) + { + /* Overflow (original real part of x > 3t). */ + retval.real (std::numeric_limits::max () * cosix); + retval.imag (std::numeric_limits::max () * sinix); + } + else + { + FLOAT exp_val = __mexp (__x.real ()); + retval.real (exp_val * cosix); + retval.imag (exp_val * sinix); + } + } + else + { + /* If the imaginary part is +-inf or NaN and the real part + is not +-inf the result is NaN + iNaN. */ + retval.real (std::numeric_limits::quiet_NaN ()); + retval.imag (std::numeric_limits::quiet_NaN ()); + } + } + else if (rcls == FP_INFINITE) + { + /* Real part is infinite. */ + if (icls >= FP_ZERO) + { + /* Imaginary part is finite. */ + FLOAT value = signbit (x.real ()) ? 0 : __mhugeval (); + + if (icls == FP_ZERO) + { + /* Imaginary part is 0.0. */ + retval.real (value); + retval.imag (x.imag ()); + } + else + { + FLOAT sinix, cosix; + + if ((fabs (x.imag ()) > std::numeric_limits::min ())) + { + __msincos (x.imag (), &sinix, &cosix); + } + else + { + sinix = x.imag (); + cosix = 1; + } + + retval.real (copysign (value, cosix)); + retval.imag (copysign (value, sinix)); + } + } + else if (signbit (x.real ()) == 0) + { + retval.real (__mhugeval ()); + retval.imag (x.imag () - x.imag ()); + } + else + { + retval.real (0); + retval.imag (copysign (0, x.imag ())); + } + } + else + { + /* If the real part is NaN the result is NaN + iNaN unless the + imaginary part is zero. */ + retval.real (std::numeric_limits::quiet_NaN ()); + if (icls == FP_ZERO) + { + retval.imag (x.imag ()); + } + else + { + retval.imag (std::numeric_limits::quiet_NaN ()); + } + } + + return retval; } std::complex -__cexp (const std::complex &x) { return __cexp_impl(x); } + __cexp (const std::complex& x) +{ + return __cexp_impl (x); +} -}; -}; +}; // namespace libm +}; // namespace ModuleBase diff --git a/source/source_base/libm/exp.cpp b/source/source_base/libm/exp.cpp index 8eaaf03b39d..819c36762fc 100644 --- a/source/source_base/libm/exp.cpp +++ b/source/source_base/libm/exp.cpp @@ -3,9 +3,9 @@ // DATE : 2023-01-06 //========================================================== -#include -#include -#include +#include +#include +#include namespace ModuleBase { @@ -13,10 +13,16 @@ namespace libm { typedef int int4; -typedef union { unsigned int u[2]; int4 i[2]; double x; double d; } mynumber; +typedef union +{ + unsigned int u[2]; + int4 i[2]; + double x; + double d; +} mynumber; -#define max(x, y) (((y) > (x)) ? (y) : (x)) -#define min(x, y) (((y) < (x)) ? (y) : (x)) +#define max(x, y) (((y) > (x)) ? (y) : (x)) +#define min(x, y) (((y) < (x)) ? (y) : (x)) #define EXP_TABLE_BITS 7 #define EXP_POLY_ORDER 5 @@ -25,209 +31,337 @@ typedef union { unsigned int u[2]; int4 i[2]; double x; double d; } mynumber; struct exp_data { - double invln2N; - double shift; - double negln2hiN; - double negln2loN; - double poly[4]; /* Last four coefficients. */ - double exp2_shift; - double exp2_poly[EXP2_POLY_ORDER]; - uint64_t tab[2*(1 << EXP_TABLE_BITS)]; + double invln2N; + double shift; + double negln2hiN; + double negln2loN; + double poly[4]; /* Last four coefficients. */ + double exp2_shift; + double exp2_poly[EXP2_POLY_ORDER]; + uint64_t tab[2 * (1 << EXP_TABLE_BITS)]; }; const exp_data __exp_data = { -// N/ln2 -1.4426950408889634 * N, -// Used for rounding when !TOINT_INTRINSICS -6755399441055744, -// -ln2/N --0.005415212348111709, --1.2864023111638346e-14, + // N/ln2 + 1.4426950408889634 * N, + // Used for rounding when !TOINT_INTRINSICS + 6755399441055744, + // -ln2/N + -0.005415212348111709, + -1.2864023111638346e-14, -// exp polynomial coefficients. -{ -// abs error: 1.555*2^-66 -// ulp error: 0.509 (0.511 without fma) -// if |x| < ln2/256+eps -// abs error if |x| < ln2/128: 1.7145*2^-56 -0.49999999999996786, -0.16666666666665886, -0.0416666808410674, -0.008333335853059549, -}, -6755399441055744 / N, -// exp2 polynomial coefficients. -{ -// abs error: 1.2195*2^-65 -// ulp error: 0.507 (0.511 without fma) -// if |x| < 1/256 -// abs error if |x| < 1/128: 1.9941*2^-56 -0.6931471805599453, -0.24022650695909065, -0.0555041086686087, -0.009618131975721055, -0.0013332074570119598, -}, -// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N) -// tab[2*k] = asuint64(T[k]) -// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N -{ -0x0, 0x3ff0000000000000, -0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335, -0xbc7160139cd8dc5d, 0x3fefec9a3e778061, -0xbc905e7a108766d1, 0x3fefe315e86e7f85, -0x3c8cd2523567f613, 0x3fefd9b0d3158574, -0xbc8bce8023f98efa, 0x3fefd06b29ddf6de, -0x3c60f74e61e6c861, 0x3fefc74518759bc8, -0x3c90a3e45b33d399, 0x3fefbe3ecac6f383, -0x3c979aa65d837b6d, 0x3fefb5586cf9890f, -0x3c8eb51a92fdeffc, 0x3fefac922b7247f7, -0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2, -0xbc6a033489906e0b, 0x3fef9b66affed31b, -0xbc9556522a2fbd0e, 0x3fef9301d0125b51, -0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc, -0xbc91c923b9d5f416, 0x3fef829aaea92de0, -0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51, -0xbc801b15eaa59348, 0x3fef72b83c7d517b, -0xbc8f1ff055de323d, 0x3fef6af9388c8dea, -0x3c8b898c3f1353bf, 0x3fef635beb6fcb75, -0xbc96d99c7611eb26, 0x3fef5be084045cd4, -0x3c9aecf73e3a2f60, 0x3fef54873168b9aa, -0xbc8fe782cb86389d, 0x3fef4d5022fcd91d, -0x3c8a6f4144a6c38d, 0x3fef463b88628cd6, -0x3c807a05b0e4047d, 0x3fef3f49917ddc96, -0x3c968efde3a8a894, 0x3fef387a6e756238, -0x3c875e18f274487d, 0x3fef31ce4fb2a63f, -0x3c80472b981fe7f2, 0x3fef2b4565e27cdd, -0xbc96b87b3f71085e, 0x3fef24dfe1f56381, -0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1, -0xbc3d219b1a6fbffa, 0x3fef187fd0dad990, -0x3c8b3782720c0ab4, 0x3fef1285a6e4030b, -0x3c6e149289cecb8f, 0x3fef0cafa93e2f56, -0x3c834d754db0abb6, 0x3fef06fe0a31b715, -0x3c864201e2ac744c, 0x3fef0170fc4cd831, -0x3c8fdd395dd3f84a, 0x3feefc08b26416ff, -0xbc86a3803b8e5b04, 0x3feef6c55f929ff1, -0xbc924aedcc4b5068, 0x3feef1a7373aa9cb, -0xbc9907f81b512d8e, 0x3feeecae6d05d866, -0xbc71d1e83e9436d2, 0x3feee7db34e59ff7, -0xbc991919b3ce1b15, 0x3feee32dc313a8e5, -0x3c859f48a72a4c6d, 0x3feedea64c123422, -0xbc9312607a28698a, 0x3feeda4504ac801c, -0xbc58a78f4817895b, 0x3feed60a21f72e2a, -0xbc7c2c9b67499a1b, 0x3feed1f5d950a897, -0x3c4363ed60c2ac11, 0x3feece086061892d, -0x3c9666093b0664ef, 0x3feeca41ed1d0057, -0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0, -0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de, -0x3c7690cebb7aafb0, 0x3feebfdad5362a27, -0x3c931dbdeb54e077, 0x3feebcb299fddd0d, -0xbc8f94340071a38e, 0x3feeb9b2769d2ca7, -0xbc87deccdc93a349, 0x3feeb6daa2cf6642, -0xbc78dec6bd0f385f, 0x3feeb42b569d4f82, -0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f, -0x3c93350518fdd78e, 0x3feeaf4736b527da, -0x3c7b98b72f8a9b05, 0x3feead12d497c7fd, -0x3c9063e1e21c5409, 0x3feeab07dd485429, -0x3c34c7855019c6ea, 0x3feea9268a5946b7, -0x3c9432e62b64c035, 0x3feea76f15ad2148, -0xbc8ce44a6199769f, 0x3feea5e1b976dc09, -0xbc8c33c53bef4da8, 0x3feea47eb03a5585, -0xbc845378892be9ae, 0x3feea34634ccc320, -0xbc93cedd78565858, 0x3feea23882552225, -0x3c5710aa807e1964, 0x3feea155d44ca973, -0xbc93b3efbf5e2228, 0x3feea09e667f3bcd, -0xbc6a12ad8734b982, 0x3feea012750bdabf, -0xbc6367efb86da9ee, 0x3fee9fb23c651a2f, -0xbc80dc3d54e08851, 0x3fee9f7df9519484, -0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74, -0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174, -0xbc8619321e55e68a, 0x3fee9feb564267c9, -0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f, -0xbc7b32dcb94da51d, 0x3feea11473eb0187, -0x3c94ecfd5467c06b, 0x3feea1ed0130c132, -0x3c65ebe1abd66c55, 0x3feea2f336cf4e62, -0xbc88a1c52fb3cf42, 0x3feea427543e1a12, -0xbc9369b6f13b3734, 0x3feea589994cce13, -0xbc805e843a19ff1e, 0x3feea71a4623c7ad, -0xbc94d450d872576e, 0x3feea8d99b4492ed, -0x3c90ad675b0e8a00, 0x3feeaac7d98a6699, -0x3c8db72fc1f0eab4, 0x3feeace5422aa0db, -0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c, -0x3c7bf68359f35f44, 0x3feeb1ae99157736, -0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6, -0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5, -0xbc6c23f97c90b959, 0x3feeba44cbc8520f, -0xbc92434322f4f9aa, 0x3feebd829fde4e50, -0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba, -0x3c71affc2b91ce27, 0x3feec49182a3f090, -0x3c6dd235e10a73bb, 0x3feec86319e32323, -0xbc87c50422622263, 0x3feecc667b5de565, -0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33, -0xbc91bbd1d3bcbb15, 0x3feed503b23e255d, -0x3c90cc319cee31d2, 0x3feed99e1330b358, -0x3c8469846e735ab3, 0x3feede6b5579fdbf, -0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a, -0x3c8c1a7792cb3387, 0x3feee89f995ad3ad, -0xbc907b8f4ad1d9fa, 0x3feeee07298db666, -0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb, -0xbc90a40e3da6f640, 0x3feef9728de5593a, -0xbc68d6f438ad9334, 0x3feeff76f2fb5e47, -0xbc91eee26b588a35, 0x3fef05b030a1064a, -0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2, -0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09, -0x3c736eae30af0cb3, 0x3fef199bdd85529c, -0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a, -0x3c84e08fd10959ac, 0x3fef27f12e57d14b, -0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5, -0x3c676b2c6c921968, 0x3fef3720dcef9069, -0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa, -0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c, -0xbc900dae3875a949, 0x3fef4f87080d89f2, -0x3c74a385a63d07a7, 0x3fef5818dcfba487, -0xbc82919e2040220f, 0x3fef60e316c98398, -0x3c8e5a50d5c192ac, 0x3fef69e603db3285, -0x3c843a59ac016b4b, 0x3fef7321f301b460, -0xbc82d52107b43e1f, 0x3fef7c97337b9b5f, -0xbc892ab93b470dc9, 0x3fef864614f5a129, -0x3c74b604603a88d3, 0x3fef902ee78b3ff6, -0x3c83c5ec519d7271, 0x3fef9a51fbc74c83, -0xbc8ff7128fd391f0, 0x3fefa4afa2a490da, -0xbc8dae98e223747d, 0x3fefaf482d8e67f1, -0x3c8ec3bc41aa2008, 0x3fefba1bee615a27, -0x3c842b94c3a9eb32, 0x3fefc52b376bba97, -0x3c8a64a931d185ee, 0x3fefd0765b6e4540, -0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14, -0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8, -0x3c5305c14160cc89, 0x3feff3c22b8f71f1, -}, + // exp polynomial coefficients. + { + // abs error: 1.555*2^-66 + // ulp error: 0.509 (0.511 without fma) + // if |x| < ln2/256+eps + // abs error if |x| < ln2/128: 1.7145*2^-56 + 0.49999999999996786, + 0.16666666666665886, + 0.0416666808410674, + 0.008333335853059549, + }, + 6755399441055744 / N, + // exp2 polynomial coefficients. + { + // abs error: 1.2195*2^-65 + // ulp error: 0.507 (0.511 without fma) + // if |x| < 1/256 + // abs error if |x| < 1/128: 1.9941*2^-56 + 0.6931471805599453, + 0.24022650695909065, + 0.0555041086686087, + 0.009618131975721055, + 0.0013332074570119598, + }, + // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N) + // tab[2*k] = asuint64(T[k]) + // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N + { + 0x0, + 0x3ff0000000000000, + 0x3c9b3b4f1a88bf6e, + 0x3feff63da9fb3335, + 0xbc7160139cd8dc5d, + 0x3fefec9a3e778061, + 0xbc905e7a108766d1, + 0x3fefe315e86e7f85, + 0x3c8cd2523567f613, + 0x3fefd9b0d3158574, + 0xbc8bce8023f98efa, + 0x3fefd06b29ddf6de, + 0x3c60f74e61e6c861, + 0x3fefc74518759bc8, + 0x3c90a3e45b33d399, + 0x3fefbe3ecac6f383, + 0x3c979aa65d837b6d, + 0x3fefb5586cf9890f, + 0x3c8eb51a92fdeffc, + 0x3fefac922b7247f7, + 0x3c3ebe3d702f9cd1, + 0x3fefa3ec32d3d1a2, + 0xbc6a033489906e0b, + 0x3fef9b66affed31b, + 0xbc9556522a2fbd0e, + 0x3fef9301d0125b51, + 0xbc5080ef8c4eea55, + 0x3fef8abdc06c31cc, + 0xbc91c923b9d5f416, + 0x3fef829aaea92de0, + 0x3c80d3e3e95c55af, + 0x3fef7a98c8a58e51, + 0xbc801b15eaa59348, + 0x3fef72b83c7d517b, + 0xbc8f1ff055de323d, + 0x3fef6af9388c8dea, + 0x3c8b898c3f1353bf, + 0x3fef635beb6fcb75, + 0xbc96d99c7611eb26, + 0x3fef5be084045cd4, + 0x3c9aecf73e3a2f60, + 0x3fef54873168b9aa, + 0xbc8fe782cb86389d, + 0x3fef4d5022fcd91d, + 0x3c8a6f4144a6c38d, + 0x3fef463b88628cd6, + 0x3c807a05b0e4047d, + 0x3fef3f49917ddc96, + 0x3c968efde3a8a894, + 0x3fef387a6e756238, + 0x3c875e18f274487d, + 0x3fef31ce4fb2a63f, + 0x3c80472b981fe7f2, + 0x3fef2b4565e27cdd, + 0xbc96b87b3f71085e, + 0x3fef24dfe1f56381, + 0x3c82f7e16d09ab31, + 0x3fef1e9df51fdee1, + 0xbc3d219b1a6fbffa, + 0x3fef187fd0dad990, + 0x3c8b3782720c0ab4, + 0x3fef1285a6e4030b, + 0x3c6e149289cecb8f, + 0x3fef0cafa93e2f56, + 0x3c834d754db0abb6, + 0x3fef06fe0a31b715, + 0x3c864201e2ac744c, + 0x3fef0170fc4cd831, + 0x3c8fdd395dd3f84a, + 0x3feefc08b26416ff, + 0xbc86a3803b8e5b04, + 0x3feef6c55f929ff1, + 0xbc924aedcc4b5068, + 0x3feef1a7373aa9cb, + 0xbc9907f81b512d8e, + 0x3feeecae6d05d866, + 0xbc71d1e83e9436d2, + 0x3feee7db34e59ff7, + 0xbc991919b3ce1b15, + 0x3feee32dc313a8e5, + 0x3c859f48a72a4c6d, + 0x3feedea64c123422, + 0xbc9312607a28698a, + 0x3feeda4504ac801c, + 0xbc58a78f4817895b, + 0x3feed60a21f72e2a, + 0xbc7c2c9b67499a1b, + 0x3feed1f5d950a897, + 0x3c4363ed60c2ac11, + 0x3feece086061892d, + 0x3c9666093b0664ef, + 0x3feeca41ed1d0057, + 0x3c6ecce1daa10379, + 0x3feec6a2b5c13cd0, + 0x3c93ff8e3f0f1230, + 0x3feec32af0d7d3de, + 0x3c7690cebb7aafb0, + 0x3feebfdad5362a27, + 0x3c931dbdeb54e077, + 0x3feebcb299fddd0d, + 0xbc8f94340071a38e, + 0x3feeb9b2769d2ca7, + 0xbc87deccdc93a349, + 0x3feeb6daa2cf6642, + 0xbc78dec6bd0f385f, + 0x3feeb42b569d4f82, + 0xbc861246ec7b5cf6, + 0x3feeb1a4ca5d920f, + 0x3c93350518fdd78e, + 0x3feeaf4736b527da, + 0x3c7b98b72f8a9b05, + 0x3feead12d497c7fd, + 0x3c9063e1e21c5409, + 0x3feeab07dd485429, + 0x3c34c7855019c6ea, + 0x3feea9268a5946b7, + 0x3c9432e62b64c035, + 0x3feea76f15ad2148, + 0xbc8ce44a6199769f, + 0x3feea5e1b976dc09, + 0xbc8c33c53bef4da8, + 0x3feea47eb03a5585, + 0xbc845378892be9ae, + 0x3feea34634ccc320, + 0xbc93cedd78565858, + 0x3feea23882552225, + 0x3c5710aa807e1964, + 0x3feea155d44ca973, + 0xbc93b3efbf5e2228, + 0x3feea09e667f3bcd, + 0xbc6a12ad8734b982, + 0x3feea012750bdabf, + 0xbc6367efb86da9ee, + 0x3fee9fb23c651a2f, + 0xbc80dc3d54e08851, + 0x3fee9f7df9519484, + 0xbc781f647e5a3ecf, + 0x3fee9f75e8ec5f74, + 0xbc86ee4ac08b7db0, + 0x3fee9f9a48a58174, + 0xbc8619321e55e68a, + 0x3fee9feb564267c9, + 0x3c909ccb5e09d4d3, + 0x3feea0694fde5d3f, + 0xbc7b32dcb94da51d, + 0x3feea11473eb0187, + 0x3c94ecfd5467c06b, + 0x3feea1ed0130c132, + 0x3c65ebe1abd66c55, + 0x3feea2f336cf4e62, + 0xbc88a1c52fb3cf42, + 0x3feea427543e1a12, + 0xbc9369b6f13b3734, + 0x3feea589994cce13, + 0xbc805e843a19ff1e, + 0x3feea71a4623c7ad, + 0xbc94d450d872576e, + 0x3feea8d99b4492ed, + 0x3c90ad675b0e8a00, + 0x3feeaac7d98a6699, + 0x3c8db72fc1f0eab4, + 0x3feeace5422aa0db, + 0xbc65b6609cc5e7ff, + 0x3feeaf3216b5448c, + 0x3c7bf68359f35f44, + 0x3feeb1ae99157736, + 0xbc93091fa71e3d83, + 0x3feeb45b0b91ffc6, + 0xbc5da9b88b6c1e29, + 0x3feeb737b0cdc5e5, + 0xbc6c23f97c90b959, + 0x3feeba44cbc8520f, + 0xbc92434322f4f9aa, + 0x3feebd829fde4e50, + 0xbc85ca6cd7668e4b, + 0x3feec0f170ca07ba, + 0x3c71affc2b91ce27, + 0x3feec49182a3f090, + 0x3c6dd235e10a73bb, + 0x3feec86319e32323, + 0xbc87c50422622263, + 0x3feecc667b5de565, + 0x3c8b1c86e3e231d5, + 0x3feed09bec4a2d33, + 0xbc91bbd1d3bcbb15, + 0x3feed503b23e255d, + 0x3c90cc319cee31d2, + 0x3feed99e1330b358, + 0x3c8469846e735ab3, + 0x3feede6b5579fdbf, + 0xbc82dfcd978e9db4, + 0x3feee36bbfd3f37a, + 0x3c8c1a7792cb3387, + 0x3feee89f995ad3ad, + 0xbc907b8f4ad1d9fa, + 0x3feeee07298db666, + 0xbc55c3d956dcaeba, + 0x3feef3a2b84f15fb, + 0xbc90a40e3da6f640, + 0x3feef9728de5593a, + 0xbc68d6f438ad9334, + 0x3feeff76f2fb5e47, + 0xbc91eee26b588a35, + 0x3fef05b030a1064a, + 0x3c74ffd70a5fddcd, + 0x3fef0c1e904bc1d2, + 0xbc91bdfbfa9298ac, + 0x3fef12c25bd71e09, + 0x3c736eae30af0cb3, + 0x3fef199bdd85529c, + 0x3c8ee3325c9ffd94, + 0x3fef20ab5fffd07a, + 0x3c84e08fd10959ac, + 0x3fef27f12e57d14b, + 0x3c63cdaf384e1a67, + 0x3fef2f6d9406e7b5, + 0x3c676b2c6c921968, + 0x3fef3720dcef9069, + 0xbc808a1883ccb5d2, + 0x3fef3f0b555dc3fa, + 0xbc8fad5d3ffffa6f, + 0x3fef472d4a07897c, + 0xbc900dae3875a949, + 0x3fef4f87080d89f2, + 0x3c74a385a63d07a7, + 0x3fef5818dcfba487, + 0xbc82919e2040220f, + 0x3fef60e316c98398, + 0x3c8e5a50d5c192ac, + 0x3fef69e603db3285, + 0x3c843a59ac016b4b, + 0x3fef7321f301b460, + 0xbc82d52107b43e1f, + 0x3fef7c97337b9b5f, + 0xbc892ab93b470dc9, + 0x3fef864614f5a129, + 0x3c74b604603a88d3, + 0x3fef902ee78b3ff6, + 0x3c83c5ec519d7271, + 0x3fef9a51fbc74c83, + 0xbc8ff7128fd391f0, + 0x3fefa4afa2a490da, + 0xbc8dae98e223747d, + 0x3fefaf482d8e67f1, + 0x3c8ec3bc41aa2008, + 0x3fefba1bee615a27, + 0x3c842b94c3a9eb32, + 0x3fefc52b376bba97, + 0x3c8a64a931d185ee, + 0x3fefd0765b6e4540, + 0xbc8e37bae43be3ed, + 0x3fefdbfdad9cbe14, + 0x3c77893b4d91cd9d, + 0x3fefe7c1819e90d8, + 0x3c5305c14160cc89, + 0x3feff3c22b8f71f1, + }, }; #ifndef WANT_ROUNDING /* Correct special case results in non-nearest rounding modes. */ -# define WANT_ROUNDING 1 +#define WANT_ROUNDING 1 #endif static inline uint64_t -asuint64 (double f) + asuint64 (double f) { - union - { - double f; - uint64_t i; - } u = {f}; - return u.i; + union + { + double f; + uint64_t i; + } u = {f}; + return u.i; } static inline double -asdouble (uint64_t i) + asdouble (uint64_t i) { - union - { - uint64_t i; - double f; - } u = {i}; - return u.f; + union + { + uint64_t i; + double f; + } u = {i}; + return u.f; } #define InvLn2N __exp_data.invln2N @@ -248,125 +382,138 @@ asdouble (uint64_t i) adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ static inline double -specialcase (double_t tmp, uint64_t sbits, uint64_t ki) + specialcase (double_t tmp, uint64_t sbits, uint64_t ki) { - double_t scale, y; + double_t scale, y; - if ((ki & 0x80000000) == 0) - { - /* k > 0, the exponent of scale might have overflowed by <= 460. */ - sbits -= 1009ull << 52; - scale = asdouble (sbits); - y = 5.486124068793689e+303 * (scale + scale * tmp); - return y; - } - /* k < 0, need special care in the subnormal range. */ - sbits += 1022ull << 52; - scale = asdouble (sbits); - y = scale + scale * tmp; - if (y < 1.0) - { - /* Round y to the right precision before scaling it into the subnormal - range to avoid double rounding that can cause 0.5+E/2 ulp error where - E is the worst-case ulp error outside the subnormal range. So this - is only useful if the goal is better than 1 ulp worst-case error. */ - double_t hi, lo; - lo = scale - y + scale * tmp; - hi = 1.0 + y; - lo = 1.0 - hi + y + lo; - y = hi + lo - 1.0; - /* Avoid -0.0 with downward rounding. */ - if (WANT_ROUNDING && y == 0.0) - y = 0.0; - } - y = 2.2250738585072014e-308 * y; - return y; + if ((ki & 0x80000000) == 0) + { + /* k > 0, the exponent of scale might have overflowed by <= 460. */ + sbits -= 1009ull << 52; + scale = asdouble (sbits); + y = 5.486124068793689e+303 * (scale + scale * tmp); + return y; + } + /* k < 0, need special care in the subnormal range. */ + sbits += 1022ull << 52; + scale = asdouble (sbits); + y = scale + scale * tmp; + if (y < 1.0) + { + /* Round y to the right precision before scaling it into the subnormal + range to avoid double rounding that can cause 0.5+E/2 ulp error where + E is the worst-case ulp error outside the subnormal range. So this + is only useful if the goal is better than 1 ulp worst-case error. */ + double_t hi, lo; + lo = scale - y + scale * tmp; + hi = 1.0 + y; + lo = 1.0 - hi + y + lo; + y = hi + lo - 1.0; + /* Avoid -0.0 with downward rounding. */ + if (WANT_ROUNDING && y == 0.0) + { + y = 0.0; + } + } + y = 2.2250738585072014e-308 * y; + return y; } /* Top 12 bits of a double (sign and exponent bits). */ static inline uint32_t -top12 (double x) + top12 (double x) { - return asuint64 (x) >> 52; + return asuint64 (x) >> 52; } static inline double -xflow (uint32_t sign, double y) + xflow (uint32_t sign, double y) { - y = (sign ? -y : y) * y; - return y; + y = (sign ? -y : y) * y; + return y; } static inline double -__math_uflow (uint32_t sign) + __math_uflow (uint32_t sign) { - return xflow (sign, 1.2882297539194267e-231); + return xflow (sign, 1.2882297539194267e-231); } static inline double -__math_oflow (uint32_t sign) + __math_oflow (uint32_t sign) { - return xflow (sign, 3.105036184601418e+231); + return xflow (sign, 3.105036184601418e+231); } double -__exp (double x) + __exp (double x) { - uint32_t abstop; - uint64_t ki, idx, top, sbits; - /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ - double_t kd, z, r, r2, scale, tail, tmp; + uint32_t abstop; + uint64_t ki, idx, top, sbits; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, z, r, r2, scale, tail, tmp; - abstop = top12 (x) & 0x7ff; - if ((abstop - top12 (5.551115123125783e-17) - >= top12 (512.0) - top12 (5.551115123125783e-17))) - { - if (abstop - top12 (5.551115123125783e-17) >= 0x80000000) - /* Avoid spurious underflow for tiny x. */ - /* Note: 0 is common input. */ - return WANT_ROUNDING ? 1.0 + x : 1.0; - if (abstop >= top12 (1024.0)) - { - if (asuint64 (x) == asuint64 (-INFINITY)) - return 0.0; - if (abstop >= top12 (INFINITY)) - return 1.0 + x; - if (asuint64 (x) >> 63) - return __math_uflow (0); - else - return __math_oflow (0); - } - /* Large x is special cased below. */ - abstop = 0; - } + abstop = top12 (x) & 0x7ff; + if ((abstop - top12 (5.551115123125783e-17) >= top12 (512.0) - top12 (5.551115123125783e-17))) + { + if (abstop - top12 (5.551115123125783e-17) >= 0x80000000) + { + /* Avoid spurious underflow for tiny x. */ + /* Note: 0 is common input. */ + return WANT_ROUNDING ? 1.0 + x : 1.0; + } + if (abstop >= top12 (1024.0)) + { + if (asuint64 (x) == asuint64 (-INFINITY)) + { + return 0.0; + } + if (abstop >= top12 (INFINITY)) + { + return 1.0 + x; + } + if (asuint64 (x) >> 63) + { + return __math_uflow (0); + } + else + { + return __math_oflow (0); + } + } + /* Large x is special cased below. */ + abstop = 0; + } - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ - z = InvLn2N * x; - /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - kd = z + Shift; - ki = asuint64 (kd); - kd -= Shift; - r = x + kd * NegLn2hiN + kd * NegLn2loN; - /* 2^(k/N) ~= scale * (1 + tail). */ - idx = 2 * (ki % N); - top = ki << (52 - EXP_TABLE_BITS); - tail = asdouble (T[idx]); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - sbits = T[idx + 1] + top; - /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; - /* Without fma the worst case error is 0.25/N ulp larger. */ - /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ - tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); - if (abstop == 0) - return specialcase (tmp, sbits, ki); - scale = asdouble (sbits); - /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-739, so there - is no spurious underflow here even without fma. */ - return scale + scale * tmp; + /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ + /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ + z = InvLn2N * x; + /* z - kd is in [-1, 1] in non-nearest rounding modes. */ + kd = z + Shift; + ki = asuint64 (kd); + kd -= Shift; + r = x + kd * NegLn2hiN + kd * NegLn2loN; + /* 2^(k/N) ~= scale * (1 + tail). */ + idx = 2 * (ki % N); + top = ki << (52 - EXP_TABLE_BITS); + tail = asdouble (T[idx]); + /* This is only a valid scale when -1023*N < k < 1024*N. */ + sbits = T[idx + 1] + top; + /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + r2 = r * r; + /* Without fma the worst case error is 0.25/N ulp larger. */ + /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ + tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); + if (abstop == 0) + { + return specialcase (tmp, sbits, ki); + } + scale = asdouble (sbits); + /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-739, so there + is no spurious underflow here even without fma. */ + return scale + scale * tmp; } -}; -}; +}; // namespace libm +}; // namespace ModuleBase diff --git a/source/source_base/libm/libm.h b/source/source_base/libm/libm.h index 13bf65de5c7..1dfe8c469dc 100644 --- a/source/source_base/libm/libm.h +++ b/source/source_base/libm/libm.h @@ -19,44 +19,139 @@ namespace libm double __exp (double x); double __cos (double x); double __sin (double x); -void __sincos (double x, double *sinx, double *cosx); -std::complex __cexp (const std::complex &x); +void __sincos (double x, double* sinx, double* cosx); +std::complex __cexp (const std::complex& x); #else -inline double __exp (double x) { return std::exp(x); }; -inline double __cos (double x) { return std::cos(x); }; -inline double __sin (double x) { return std::sin(x); }; -inline void __sincos (double x, double *sinx, double *cosx) { sincos(x, sinx, cosx); }; -inline std::complex __cexp (const std::complex &x) { return std::exp(x); } +inline double + __exp (double x) +{ + return std::exp (x); +}; +inline double + __cos (double x) +{ + return std::cos (x); +}; +inline double + __sin (double x) +{ + return std::sin (x); +}; +inline void + __sincos (double x, double* sinx, double* cosx) +{ + sincos (x, sinx, cosx); +}; +inline std::complex + __cexp (const std::complex& x) +{ + return std::exp (x); +} #endif -inline float __expf (float x) { return std::exp(x); }; -inline float __cosf (float x) { return std::cos(x); }; -inline float __sinf (float x) { return std::sin(x); }; -inline void __sincosf (float x, float *sinx, float *cosx) { sincosf(x, sinx, cosx); }; -inline std::complex __cexpf (const std::complex &x) { return std::exp(x); } +inline float + __expf (float x) +{ + return std::exp (x); +}; +inline float + __cosf (float x) +{ + return std::cos (x); +}; +inline float + __sinf (float x) +{ + return std::sin (x); +}; +inline void + __sincosf (float x, float* sinx, float* cosx) +{ + sincosf (x, sinx, cosx); +}; +inline std::complex + __cexpf (const std::complex& x) +{ + return std::exp (x); +} -template Tp exp(Tp x); -template Tp cos(Tp x); -template Tp sin(Tp x); -template void sincos(Tp x, Tp *s, Tp *c); -template std::complex exp(const std::complex &x); +template +Tp exp (Tp x); +template +Tp cos (Tp x); +template +Tp sin (Tp x); +template +void sincos (Tp x, Tp* s, Tp* c); +template +std::complex exp (const std::complex& x); -template<> inline double exp(double x) { return __exp(x); } -template<> inline double cos(double x) { return __cos(x); } -template<> inline double sin(double x) { return __sin(x); } -template<> inline void sincos(double x, double *s, double *c) { __sincos(x, s, c); } -template<> inline std::complex exp(const std::complex &x) { return __cexp(x); } +template <> +inline double + exp (double x) +{ + return __exp (x); +} +template <> +inline double + cos (double x) +{ + return __cos (x); +} +template <> +inline double + sin (double x) +{ + return __sin (x); +} +template <> +inline void + sincos (double x, double* s, double* c) +{ + __sincos (x, s, c); +} +template <> +inline std::complex + exp (const std::complex& x) +{ + return __cexp (x); +} -template<> inline float exp(float x) { return __expf(x); } -template<> inline float cos(float x) { return __cosf(x); } -template<> inline float sin(float x) { return __sinf(x); } -template<> inline void sincos(float x, float *s, float *c) { __sincosf(x, s, c); } -template<> inline std::complex exp(const std::complex &x) { return __cexpf(x); } +template <> +inline float + exp (float x) +{ + return __expf (x); +} +template <> +inline float + cos (float x) +{ + return __cosf (x); +} +template <> +inline float + sin (float x) +{ + return __sinf (x); +} +template <> +inline void + sincos (float x, float* s, float* c) +{ + __sincosf (x, s, c); +} +template <> +inline std::complex + exp (const std::complex& x) +{ + return __cexpf (x); +} -}; -}; +}; // namespace libm +}; // namespace ModuleBase #endif diff --git a/source/source_base/libm/sincos.cpp b/source/source_base/libm/sincos.cpp index 6252f848421..3c433c0c1df 100644 --- a/source/source_base/libm/sincos.cpp +++ b/source/source_base/libm/sincos.cpp @@ -3,7 +3,7 @@ // DATE : 2023-01-06 //========================================================== -#include +#include #include namespace ModuleBase @@ -11,930 +11,943 @@ namespace ModuleBase namespace libm { -int -__branred(double x, double *a, double *aa); +int __branred (double x, double* a, double* aa); typedef int int4; -typedef union { unsigned int u[2]; int4 i[2]; double x; double d; } mynumber; +typedef union +{ + unsigned int u[2]; + int4 i[2]; + double x; + double d; +} mynumber; -#define max(x, y) (((y) > (x)) ? (y) : (x)) -#define min(x, y) (((y) < (x)) ? (y) : (x)) +#define max(x, y) (((y) > (x)) ? (y) : (x)) +#define min(x, y) (((y) < (x)) ? (y) : (x)) #if (__BYTE_ORDER == __BIG_ENDIAN) #define HIGH_HALF 0 -#define LOW_HALF 1 - -static const union {unsigned int u[880]; int4 i[880]; double x[440];} __sincostab = { .u = { -/**/ 0x00000000, 0x00000000, -/**/ 0x00000000, 0x00000000, -/**/ 0x3FF00000, 0x00000000, -/**/ 0x00000000, 0x00000000, -/**/ 0x3F7FFFEA, 0xAAAEEEEF, -/**/ 0xBC1E45E2, 0xEC67B77C, -/**/ 0x3FEFFFC0, 0x00155552, -/**/ 0x3C8F4A01, 0xA0196DAE, -/**/ 0x3F8FFFAA, 0xAAEEEED5, -/**/ 0xBC02AB63, 0x9A9F0777, -/**/ 0x3FEFFF00, 0x0155549F, -/**/ 0x3C828A28, 0xA03A5EF3, -/**/ 0x3F97FF70, 0x01033255, -/**/ 0x3BFEFE2B, 0x51527336, -/**/ 0x3FEFFDC0, 0x06BFF7E6, -/**/ 0x3C8AE6DA, 0xE86977BD, -/**/ 0x3F9FFEAA, 0xAEEEE86F, -/**/ 0xBC3CD406, 0xFB224AE2, -/**/ 0x3FEFFC00, 0x155527D3, -/**/ 0xBC83B544, 0x92D89B5B, -/**/ 0x3FA3FEB2, 0xB12D45D5, -/**/ 0x3C34EC54, 0x203D1C11, -/**/ 0x3FEFF9C0, 0x3414A7BA, -/**/ 0x3C6991F4, 0xBE6C59BF, -/**/ 0x3FA7FDC0, 0x1032FBA9, -/**/ 0xBC4599BD, 0xF46E997A, -/**/ 0x3FEFF700, 0x6BFDF99F, -/**/ 0xBC78B3B5, 0x60648D5F, -/**/ 0x3FABFC6D, 0x78586DAC, -/**/ 0x3C18E4FD, 0x03DBF236, -/**/ 0x3FEFF3C0, 0xC8103A31, -/**/ 0x3C74856D, 0xBDDC0E66, -/**/ 0x3FAFFAAA, 0xEEED4EDB, -/**/ 0xBC42D16D, 0x32684B69, -/**/ 0x3FEFF001, 0x5549F4D3, -/**/ 0x3C832838, 0x7B99426F, -/**/ 0x3FB1FC34, 0x3D808BEF, -/**/ 0xBC5F3D32, 0xE6F3BE4F, -/**/ 0x3FEFEBC2, 0x22A8EF9F, -/**/ 0x3C579349, 0x34F54C77, -/**/ 0x3FB3FACB, 0x12D1755B, -/**/ 0xBC592191, 0x5299468C, -/**/ 0x3FEFE703, 0x4129EF6F, -/**/ 0xBC6CBF43, 0x37C96F97, -/**/ 0x3FB5F911, 0xFD10B737, -/**/ 0xBC50184F, 0x02BE9102, -/**/ 0x3FEFE1C4, 0xC3C873EB, -/**/ 0xBC35A9C9, 0x057C4A02, -/**/ 0x3FB7F701, 0x032550E4, -/**/ 0x3C3AFC2D, 0x1800501A, -/**/ 0x3FEFDC06, 0xBF7E6B9B, -/**/ 0x3C831902, 0xB535F8DB, -/**/ 0x3FB9F490, 0x2D55D1F9, -/**/ 0x3C52696D, 0x7EAC1DC1, -/**/ 0x3FEFD5C9, 0x4B43E000, -/**/ 0xBC62E768, 0xCB4F92F9, -/**/ 0x3FBBF1B7, 0x8568391D, -/**/ 0x3C5E9184, 0x1DEA4CC8, -/**/ 0x3FEFCF0C, 0x800E99B1, -/**/ 0x3C6EA3D7, 0x86D186AC, -/**/ 0x3FBDEE6F, 0x16C1CCE6, -/**/ 0xBC450F8E, 0x2FB71673, -/**/ 0x3FEFC7D0, 0x78D1BC88, -/**/ 0x3C8075D2, 0x447DB685, -/**/ 0x3FBFEAAE, 0xEE86EE36, -/**/ 0xBC4AFCB2, 0xBCC6F03B, -/**/ 0x3FEFC015, 0x527D5BD3, -/**/ 0x3C8B68F3, 0x5094EFB8, -/**/ 0x3FC0F337, 0x8DDD71D1, -/**/ 0x3C6D8468, 0x724F0F9E, -/**/ 0x3FEFB7DB, 0x2BFE0695, -/**/ 0x3C821DAD, 0xF4F65AB1, -/**/ 0x3FC1F0D3, 0xD7AFCEAF, -/**/ 0xBC66EF95, 0x099769A5, -/**/ 0x3FEFAF22, 0x263C4BD3, -/**/ 0xBC552ACE, 0x133A2769, -/**/ 0x3FC2EE28, 0x5E4AB88F, -/**/ 0xBC6E4D0F, 0x05DEE058, -/**/ 0x3FEFA5EA, 0x641C36F2, -/**/ 0x3C404DA6, 0xED17CC7C, -/**/ 0x3FC3EB31, 0x2C5D66CB, -/**/ 0x3C647D66, 0x6B66CB91, -/**/ 0x3FEF9C34, 0x0A7CC428, -/**/ 0x3C8C5B6B, 0x063B7462, -/**/ 0x3FC4E7EA, 0x4DC5F27B, -/**/ 0x3C5949DB, 0x2AC072FC, -/**/ 0x3FEF91FF, 0x40374D01, -/**/ 0xBC67D03F, 0x4D3A9E4C, -/**/ 0x3FC5E44F, 0xCFA126F3, -/**/ 0xBC66F443, 0x063F89B6, -/**/ 0x3FEF874C, 0x2E1EECF6, -/**/ 0xBC8C6514, 0xE1332B16, -/**/ 0x3FC6E05D, 0xC05A4D4C, -/**/ 0xBBD32C5C, 0x8B81C940, -/**/ 0x3FEF7C1A, 0xFEFFDE24, -/**/ 0xBC78F55B, 0xC47540B1, -/**/ 0x3FC7DC10, 0x2FBAF2B5, -/**/ 0x3C45AB50, 0xE23C97C3, -/**/ 0x3FEF706B, 0xDF9ECE1C, -/**/ 0xBC8698C8, 0x0C36DCB4, -/**/ 0x3FC8D763, 0x2EFAA944, -/**/ 0xBC620FA2, 0x62CBB953, -/**/ 0x3FEF643E, 0xFEB82ACD, -/**/ 0x3C76B00A, 0xC1FE28AC, -/**/ 0x3FC9D252, 0xD0CEC312, -/**/ 0x3C59C43D, 0x80B1137D, -/**/ 0x3FEF5794, 0x8CFF6797, -/**/ 0x3C6E3A0D, 0x3E03B1D5, -/**/ 0x3FCACCDB, 0x297A0765, -/**/ 0xBC59883B, 0x57D6CDEB, -/**/ 0x3FEF4A6C, 0xBD1E3A79, -/**/ 0x3C813DF0, 0xEDAEBB57, -/**/ 0x3FCBC6F8, 0x4EDC6199, -/**/ 0x3C69C1A5, 0x6A7B0CAB, -/**/ 0x3FEF3CC7, 0xC3B3D16E, -/**/ 0xBC621A3A, 0xD28A3494, -/**/ 0x3FCCC0A6, 0x588289A3, -/**/ 0xBC6868D0, 0x9BC87C6B, -/**/ 0x3FEF2EA5, 0xD753FFED, -/**/ 0x3C8CC421, 0x5F56D583, -/**/ 0x3FCDB9E1, 0x5FB5A5D0, -/**/ 0xBC632E20, 0xD6CC6FC2, -/**/ 0x3FEF2007, 0x3086649F, -/**/ 0x3C7B9404, 0x16C1984B, -/**/ 0x3FCEB2A5, 0x7F8AE5A3, -/**/ 0xBC60BE06, 0xAF572CEB, -/**/ 0x3FEF10EC, 0x09C5873B, -/**/ 0x3C8D9072, 0x762C1283, -/**/ 0x3FCFAAEE, 0xD4F31577, -/**/ 0xBC615D88, 0x508E32B8, -/**/ 0x3FEF0154, 0x9F7DEEA1, -/**/ 0x3C8D3C1E, 0x99E5CAFD, -/**/ 0x3FD0515C, 0xBF65155C, -/**/ 0xBC79B8C2, 0x9DFD8EC8, -/**/ 0x3FEEF141, 0x300D2F26, -/**/ 0xBC82AA1B, 0x08DED372, -/**/ 0x3FD0CD00, 0xCEF36436, -/**/ 0xBC79FB0A, 0x0C93E2B5, -/**/ 0x3FEEE0B1, 0xFBC0F11C, -/**/ 0xBC4BFD23, 0x80BBC3B1, -/**/ 0x3FD14861, 0xAA94DDEB, -/**/ 0xBC6BE881, 0xB5B615A4, -/**/ 0x3FEECFA7, 0x44D5EFA1, -/**/ 0xBC556D0A, 0x4AF541D0, -/**/ 0x3FD1C37D, 0x64C6B876, -/**/ 0x3C746076, 0xFE0DCFF5, -/**/ 0x3FEEBE21, 0x4F76EFA8, -/**/ 0xBC802F9F, 0x12BA543E, -/**/ 0x3FD23E52, 0x111AAF36, -/**/ 0xBC74F080, 0x334EFF18, -/**/ 0x3FEEAC20, 0x61BBAF4F, -/**/ 0x3C62C1D5, 0x3E94658D, -/**/ 0x3FD2B8DD, 0xC43EB49F, -/**/ 0x3C615538, 0x99F2D807, -/**/ 0x3FEE99A4, 0xC3A7CD83, -/**/ 0xBC82264B, 0x1BC53CE8, -/**/ 0x3FD3331E, 0x94049F87, -/**/ 0x3C7E0CB6, 0xB40C302C, -/**/ 0x3FEE86AE, 0xBF29A9ED, -/**/ 0x3C89397A, 0xFDBB58A7, -/**/ 0x3FD3AD12, 0x9769D3D8, -/**/ 0x3C003D55, 0x04878398, -/**/ 0x3FEE733E, 0xA0193D40, -/**/ 0xBC86428B, 0x3546CE13, -/**/ 0x3FD426B7, 0xE69EE697, -/**/ 0xBC7F09C7, 0x5705C59F, -/**/ 0x3FEE5F54, 0xB436E9D0, -/**/ 0x3C87EB0F, 0xD02FC8BC, -/**/ 0x3FD4A00C, 0x9B0F3D20, -/**/ 0x3C7823BA, 0x6BB08EAD, -/**/ 0x3FEE4AF1, 0x4B2A449C, -/**/ 0xBC868CA0, 0x2E8A6833, -/**/ 0x3FD5190E, 0xCF68A77A, -/**/ 0x3C7B3571, 0x55EEF0F3, -/**/ 0x3FEE3614, 0xB680D6A5, -/**/ 0xBC727793, 0xAA015237, -/**/ 0x3FD591BC, 0x9FA2F597, -/**/ 0x3C67C74B, 0xAC3FE0CB, -/**/ 0x3FEE20BF, 0x49ACD6C1, -/**/ 0xBC5660AE, 0xC7EF636C, -/**/ 0x3FD60A14, 0x29078775, -/**/ 0x3C5B1FD8, 0x0BA89133, -/**/ 0x3FEE0AF1, 0x5A03DBCE, -/**/ 0x3C5FE8E7, 0x02771AE6, -/**/ 0x3FD68213, 0x8A38D7F7, -/**/ 0xBC7D8892, 0x02444AAD, -/**/ 0x3FEDF4AB, 0x3EBD875E, -/**/ 0xBC8E2D8A, 0x7E6736C4, -/**/ 0x3FD6F9B8, 0xE33A0255, -/**/ 0x3C742BC1, 0x4EE9DA0D, -/**/ 0x3FEDDDED, 0x50F228D6, -/**/ 0xBC6E80C8, 0xD42BA2BF, -/**/ 0x3FD77102, 0x55764214, -/**/ 0xBC66EAD7, 0x314BB6CE, -/**/ 0x3FEDC6B7, 0xEB995912, -/**/ 0x3C54B364, 0x776DCD35, -/**/ 0x3FD7E7EE, 0x03C86D4E, -/**/ 0xBC7B63BC, 0xDABF5AF2, -/**/ 0x3FEDAF0B, 0x6B888E83, -/**/ 0x3C8A249E, 0x2B5E5CEA, -/**/ 0x3FD85E7A, 0x12826949, -/**/ 0x3C78A40E, 0x9B5FACE0, -/**/ 0x3FED96E8, 0x2F71A9DC, -/**/ 0x3C8FF61B, 0xD5D2039D, -/**/ 0x3FD8D4A4, 0xA774992F, -/**/ 0x3C744A02, 0xEA766326, -/**/ 0x3FED7E4E, 0x97E17B4A, -/**/ 0xBC63B770, 0x352BED94, -/**/ 0x3FD94A6B, 0xE9F546C5, -/**/ 0xBC769CE1, 0x3E683F58, -/**/ 0x3FED653F, 0x073E4040, -/**/ 0xBC876236, 0x434BEC37, -/**/ 0x3FD9BFCE, 0x02E80510, -/**/ 0x3C709E39, 0xA320B0A4, -/**/ 0x3FED4BB9, 0xE1C619E0, -/**/ 0x3C8F34BB, 0x77858F61, -/**/ 0x3FDA34C9, 0x1CC50CCA, -/**/ 0xBC5A310E, 0x3B50CECD, -/**/ 0x3FED31BF, 0x8D8D7C06, -/**/ 0x3C7E60DD, 0x3089CBDD, -/**/ 0x3FDAA95B, 0x63A09277, -/**/ 0xBC66293E, 0xB13C0381, -/**/ 0x3FED1750, 0x727D94F0, -/**/ 0x3C80D52B, 0x1EC1A48E, -/**/ 0x3FDB1D83, 0x05321617, -/**/ 0xBC7AE242, 0xCB99F519, -/**/ 0x3FECFC6C, 0xFA52AD9F, -/**/ 0x3C88B5B5, 0x508F2A0D, -/**/ 0x3FDB913E, 0x30DBAC43, -/**/ 0xBC7E38AD, 0x2F6C3FF1, -/**/ 0x3FECE115, 0x909A82E5, -/**/ 0x3C81F139, 0xBB31109A, -/**/ 0x3FDC048B, 0x17B140A3, -/**/ 0x3C619FE6, 0x757E9FA7, -/**/ 0x3FECC54A, 0xA2B2972E, -/**/ 0x3C64EE16, 0x2BA83A98, -/**/ 0x3FDC7767, 0xEC7FD19E, -/**/ 0xBC5EB14D, 0x1A3D5826, -/**/ 0x3FECA90C, 0x9FC67D0B, -/**/ 0xBC646A81, 0x485E3462, -/**/ 0x3FDCE9D2, 0xE3D4A51F, -/**/ 0xBC62FC8A, 0x12DAE298, -/**/ 0x3FEC8C5B, 0xF8CE1A84, -/**/ 0x3C7AB3D1, 0xA1590123, -/**/ 0x3FDD5BCA, 0x34047661, -/**/ 0x3C728A44, 0xA75FC29C, -/**/ 0x3FEC6F39, 0x208BE53B, -/**/ 0xBC8741DB, 0xFBAADB42, -/**/ 0x3FDDCD4C, 0x15329C9A, -/**/ 0x3C70D4C6, 0xE171FD9A, -/**/ 0x3FEC51A4, 0x8B8B175E, -/**/ 0xBC61BBB4, 0x3B9AA880, -/**/ 0x3FDE3E56, 0xC1582A69, -/**/ 0xBC50A482, 0x1099F88F, -/**/ 0x3FEC339E, 0xB01DDD81, -/**/ 0xBC8CAAF5, 0xEE82C5C0, -/**/ 0x3FDEAEE8, 0x744B05F0, -/**/ 0xBC5789B4, 0x3C9B027D, -/**/ 0x3FEC1528, 0x065B7D50, -/**/ 0xBC889211, 0x1312E828, -/**/ 0x3FDF1EFF, 0x6BC4F97B, -/**/ 0x3C717212, 0xF8A7525C, -/**/ 0x3FEBF641, 0x081E7536, -/**/ 0x3C8B7BD7, 0x1628A9A1, -/**/ 0x3FDF8E99, 0xE76ABC97, -/**/ 0x3C59D950, 0xAF2D00A3, -/**/ 0x3FEBD6EA, 0x310294F5, -/**/ 0x3C731BBC, 0xC88C109D, -/**/ 0x3FDFFDB6, 0x28D2F57A, -/**/ 0x3C6F4A99, 0x2E905B6A, -/**/ 0x3FEBB723, 0xFE630F32, -/**/ 0x3C772BD2, 0x452D0A39, -/**/ 0x3FE03629, 0x39C69955, -/**/ 0xBC82D8CD, 0x78397B01, -/**/ 0x3FEB96EE, 0xEF58840E, -/**/ 0x3C545A3C, 0xC78FADE0, -/**/ 0x3FE06D36, 0x86946E5B, -/**/ 0x3C83F5AE, 0x4538FF1B, -/**/ 0x3FEB764B, 0x84B704C2, -/**/ 0xBC8F5848, 0xC21B389B, -/**/ 0x3FE0A402, 0x1E9E1001, -/**/ 0xBC86F643, 0xA13914F6, -/**/ 0x3FEB553A, 0x410C104E, -/**/ 0x3C58FF79, 0x47027A16, -/**/ 0x3FE0DA8B, 0x26B5672E, -/**/ 0xBC8A58DE, 0xF0BEE909, -/**/ 0x3FEB33BB, 0xA89C8948, -/**/ 0x3C8EA6A5, 0x1D1F6CA9, -/**/ 0x3FE110D0, 0xC4B69C3B, -/**/ 0x3C8D9189, 0x98809981, -/**/ 0x3FEB11D0, 0x4162A4C6, -/**/ 0x3C71DD56, 0x1EFBC0C2, -/**/ 0x3FE146D2, 0x1F8B7F82, -/**/ 0x3C7BF953, 0x5E2739A8, -/**/ 0x3FEAEF78, 0x930BD275, -/**/ 0xBC7F8362, 0x79746F94, -/**/ 0x3FE17C8E, 0x5F2EEDB0, -/**/ 0x3C635E57, 0x102E2488, -/**/ 0x3FEACCB5, 0x26F69DE5, -/**/ 0x3C88FB6A, 0x8DD6B6CC, -/**/ 0x3FE1B204, 0xACB02FDD, -/**/ 0xBC5F190C, 0x70CBB5FF, -/**/ 0x3FEAA986, 0x88308913, -/**/ 0xBC0B83D6, 0x07CD5070, -/**/ 0x3FE1E734, 0x3236574C, -/**/ 0x3C722A3F, 0xA4F41D5A, -/**/ 0x3FEA85ED, 0x4373E02D, -/**/ 0x3C69BE06, 0x385EC792, -/**/ 0x3FE21C1C, 0x1B0394CF, -/**/ 0x3C5E5B32, 0x4B23AA31, -/**/ 0x3FEA61E9, 0xE72586AF, -/**/ 0x3C858330, 0xE2FD453F, -/**/ 0x3FE250BB, 0x93788BBB, -/**/ 0x3C7EA3D0, 0x2457BCCE, -/**/ 0x3FEA3D7D, 0x0352BDCF, -/**/ 0xBC868DBA, 0xECA19669, -/**/ 0x3FE28511, 0xC917A067, -/**/ 0xBC801DF1, 0xD9A16B70, -/**/ 0x3FEA18A7, 0x29AEE445, -/**/ 0x3C395E25, 0x736C0358, -/**/ 0x3FE2B91D, 0xEA88421E, -/**/ 0xBC8FA371, 0xDB216AB0, -/**/ 0x3FE9F368, 0xED912F85, -/**/ 0xBC81D200, 0xC5791606, -/**/ 0x3FE2ECDF, 0x279A3082, -/**/ 0x3C8D3557, 0xE0E7E37E, -/**/ 0x3FE9CDC2, 0xE3F25E5C, -/**/ 0x3C83F991, 0x12993F62, -/**/ 0x3FE32054, 0xB148BC4F, -/**/ 0x3C8F6B42, 0x095A135B, -/**/ 0x3FE9A7B5, 0xA36A6514, -/**/ 0x3C8722CF, 0xCC9FA7A9, -/**/ 0x3FE3537D, 0xB9BE0367, -/**/ 0x3C6B327E, 0x7AF040F0, -/**/ 0x3FE98141, 0xC42E1310, -/**/ 0x3C8D1FF8, 0x0488F08D, -/**/ 0x3FE38659, 0x7456282B, -/**/ 0xBC710FAD, 0xA93B07A8, -/**/ 0x3FE95A67, 0xE00CB1FD, -/**/ 0xBC80BEFD, 0xA21F862D, -/**/ 0x3FE3B8E7, 0x15A2840A, -/**/ 0xBC797653, 0xA7D2F07B, -/**/ 0x3FE93328, 0x926D9E92, -/**/ 0xBC8BB770, 0x03600CDA, -/**/ 0x3FE3EB25, 0xD36CD53A, -/**/ 0xBC5BE570, 0xE1570FC0, -/**/ 0x3FE90B84, 0x784DDAF7, -/**/ 0xBC70FEB1, 0x0AB93B87, -/**/ 0x3FE41D14, 0xE4BA6790, -/**/ 0x3C84608F, 0xD287ECF5, -/**/ 0x3FE8E37C, 0x303D9AD1, -/**/ 0xBC6463A4, 0xB53D4BF8, -/**/ 0x3FE44EB3, 0x81CF386B, -/**/ 0xBC83ED6C, 0x1E6A5505, -/**/ 0x3FE8BB10, 0x5A5DC900, -/**/ 0x3C8863E0, 0x3E9474C1, -/**/ 0x3FE48000, 0xE431159F, -/**/ 0xBC8B194A, 0x7463ED10, -/**/ 0x3FE89241, 0x985D871F, -/**/ 0x3C8C48D9, 0xC413ED84, -/**/ 0x3FE4B0FC, 0x46AAB761, -/**/ 0x3C20DA05, 0x738CC59A, -/**/ 0x3FE86910, 0x8D77A6C6, -/**/ 0x3C7338FF, 0xE2BFE9DD, -/**/ 0x3FE4E1A4, 0xE54ED51B, -/**/ 0xBC8A492F, 0x89B7C76A, -/**/ 0x3FE83F7D, 0xDE701CA0, -/**/ 0xBC4152CF, 0x609BC6E8, -/**/ 0x3FE511F9, 0xFD7B351C, -/**/ 0xBC85C0E8, 0x61C48831, -/**/ 0x3FE8158A, 0x31916D5D, -/**/ 0xBC6DE8B9, 0x0B8228DE, -/**/ 0x3FE541FA, 0xCDDBB724, -/**/ 0x3C7232C2, 0x8520D391, -/**/ 0x3FE7EB36, 0x2EAA1488, -/**/ 0x3C5A1D65, 0xA4A5959F, -/**/ 0x3FE571A6, 0x966D59B3, -/**/ 0x3C5C843B, 0x4D0FB198, -/**/ 0x3FE7C082, 0x7F09E54F, -/**/ 0xBC6C73D6, 0xD72AEE68, -/**/ 0x3FE5A0FC, 0x98813A12, -/**/ 0xBC8D82E2, 0xB7D4227B, -/**/ 0x3FE7956F, 0xCD7F6543, -/**/ 0xBC8AB276, 0xE9D45AE4, -/**/ 0x3FE5CFFC, 0x16BF8F0D, -/**/ 0x3C896CB3, 0x70EB578A, -/**/ 0x3FE769FE, 0xC655211F, -/**/ 0xBC6827D5, 0xCF8C68C5, -/**/ 0x3FE5FEA4, 0x552A9E57, -/**/ 0x3C80B6CE, 0xF7EE20B7, -/**/ 0x3FE73E30, 0x174EFBA1, -/**/ 0xBC65D3AE, 0x3D94AD5F, -/**/ 0x3FE62CF4, 0x9921AC79, -/**/ 0xBC8EDD98, 0x55B6241A, -/**/ 0x3FE71204, 0x6FA77678, -/**/ 0x3C8425B0, 0xA5029C81, -/**/ 0x3FE65AEC, 0x2963E755, -/**/ 0x3C8126F9, 0x6B71053C, -/**/ 0x3FE6E57C, 0x800CF55E, -/**/ 0x3C860286, 0xDEDBD0A6, -/**/ 0x3FE6888A, 0x4E134B2F, -/**/ 0xBC86B7D3, 0x7644D5E6, -/**/ 0x3FE6B898, 0xFA9EFB5D, -/**/ 0x3C715AC7, 0x86CCF4B2, -/**/ 0x3FE6B5CE, 0x50B7821A, -/**/ 0xBC65D515, 0x8F702E0F, -/**/ 0x3FE68B5A, 0x92EB6253, -/**/ 0xBC89A91A, 0xD985F89C, -/**/ 0x3FE6E2B7, 0x7C40BDE1, -/**/ 0xBC70E729, 0x857FAD53, -/**/ 0x3FE65DC1, 0xFDEB8CBA, -/**/ 0xBC597C1B, 0x47337C77, -/**/ 0x3FE70F45, 0x1D0A8C40, -/**/ 0x3C697EDE, 0x3885770D, -/**/ 0x3FE62FCF, 0xF20191C7, -/**/ 0x3C6D9143, 0x895756EF, -/**/ 0x3FE73B76, 0x80DEA578, -/**/ 0xBC722483, 0x06DC12A2, -/**/ 0x3FE60185, 0x26F563DF, -/**/ 0x3C846CA5, 0xE0E432D0, -/**/ 0x3FE7674A, 0xF6F7B524, -/**/ 0x3C7E9D3F, 0x94AC84A8, -/**/ 0x3FE5D2E2, 0x55F1F17A, -/**/ 0x3C803141, 0x04C8892B, -/**/ 0x3FE792C1, 0xD0041D52, -/**/ 0xBC8ABF05, 0xEEB354EB, -/**/ 0x3FE5A3E8, 0x39824077, -/**/ 0x3C8428AA, 0x2759BE62, -/**/ 0x3FE7BDDA, 0x5E28B3C2, -/**/ 0x3C4AD119, 0x7CCD0393, -/**/ 0x3FE57497, 0x8D8E83F2, -/**/ 0x3C8F4714, 0xAF282D23, -/**/ 0x3FE7E893, 0xF5037959, -/**/ 0x3C80EEFB, 0xAA650C4C, -/**/ 0x3FE544F1, 0x0F592CA5, -/**/ 0xBC8E7AE8, 0xE6C7A62F, -/**/ 0x3FE812ED, 0xE9AE4BA4, -/**/ 0xBC87830A, 0xDF402DDA, -/**/ 0x3FE514F5, 0x7D7BF3DA, -/**/ 0x3C747A10, 0x8073C259 } }; +#define LOW_HALF 1 + +static const union +{ + unsigned int u[880]; + int4 i[880]; + double x[440]; +} __sincostab = {.u = {/**/ 0x00000000, 0x00000000, + /**/ 0x00000000, 0x00000000, + /**/ 0x3FF00000, 0x00000000, + /**/ 0x00000000, 0x00000000, + /**/ 0x3F7FFFEA, 0xAAAEEEEF, + /**/ 0xBC1E45E2, 0xEC67B77C, + /**/ 0x3FEFFFC0, 0x00155552, + /**/ 0x3C8F4A01, 0xA0196DAE, + /**/ 0x3F8FFFAA, 0xAAEEEED5, + /**/ 0xBC02AB63, 0x9A9F0777, + /**/ 0x3FEFFF00, 0x0155549F, + /**/ 0x3C828A28, 0xA03A5EF3, + /**/ 0x3F97FF70, 0x01033255, + /**/ 0x3BFEFE2B, 0x51527336, + /**/ 0x3FEFFDC0, 0x06BFF7E6, + /**/ 0x3C8AE6DA, 0xE86977BD, + /**/ 0x3F9FFEAA, 0xAEEEE86F, + /**/ 0xBC3CD406, 0xFB224AE2, + /**/ 0x3FEFFC00, 0x155527D3, + /**/ 0xBC83B544, 0x92D89B5B, + /**/ 0x3FA3FEB2, 0xB12D45D5, + /**/ 0x3C34EC54, 0x203D1C11, + /**/ 0x3FEFF9C0, 0x3414A7BA, + /**/ 0x3C6991F4, 0xBE6C59BF, + /**/ 0x3FA7FDC0, 0x1032FBA9, + /**/ 0xBC4599BD, 0xF46E997A, + /**/ 0x3FEFF700, 0x6BFDF99F, + /**/ 0xBC78B3B5, 0x60648D5F, + /**/ 0x3FABFC6D, 0x78586DAC, + /**/ 0x3C18E4FD, 0x03DBF236, + /**/ 0x3FEFF3C0, 0xC8103A31, + /**/ 0x3C74856D, 0xBDDC0E66, + /**/ 0x3FAFFAAA, 0xEEED4EDB, + /**/ 0xBC42D16D, 0x32684B69, + /**/ 0x3FEFF001, 0x5549F4D3, + /**/ 0x3C832838, 0x7B99426F, + /**/ 0x3FB1FC34, 0x3D808BEF, + /**/ 0xBC5F3D32, 0xE6F3BE4F, + /**/ 0x3FEFEBC2, 0x22A8EF9F, + /**/ 0x3C579349, 0x34F54C77, + /**/ 0x3FB3FACB, 0x12D1755B, + /**/ 0xBC592191, 0x5299468C, + /**/ 0x3FEFE703, 0x4129EF6F, + /**/ 0xBC6CBF43, 0x37C96F97, + /**/ 0x3FB5F911, 0xFD10B737, + /**/ 0xBC50184F, 0x02BE9102, + /**/ 0x3FEFE1C4, 0xC3C873EB, + /**/ 0xBC35A9C9, 0x057C4A02, + /**/ 0x3FB7F701, 0x032550E4, + /**/ 0x3C3AFC2D, 0x1800501A, + /**/ 0x3FEFDC06, 0xBF7E6B9B, + /**/ 0x3C831902, 0xB535F8DB, + /**/ 0x3FB9F490, 0x2D55D1F9, + /**/ 0x3C52696D, 0x7EAC1DC1, + /**/ 0x3FEFD5C9, 0x4B43E000, + /**/ 0xBC62E768, 0xCB4F92F9, + /**/ 0x3FBBF1B7, 0x8568391D, + /**/ 0x3C5E9184, 0x1DEA4CC8, + /**/ 0x3FEFCF0C, 0x800E99B1, + /**/ 0x3C6EA3D7, 0x86D186AC, + /**/ 0x3FBDEE6F, 0x16C1CCE6, + /**/ 0xBC450F8E, 0x2FB71673, + /**/ 0x3FEFC7D0, 0x78D1BC88, + /**/ 0x3C8075D2, 0x447DB685, + /**/ 0x3FBFEAAE, 0xEE86EE36, + /**/ 0xBC4AFCB2, 0xBCC6F03B, + /**/ 0x3FEFC015, 0x527D5BD3, + /**/ 0x3C8B68F3, 0x5094EFB8, + /**/ 0x3FC0F337, 0x8DDD71D1, + /**/ 0x3C6D8468, 0x724F0F9E, + /**/ 0x3FEFB7DB, 0x2BFE0695, + /**/ 0x3C821DAD, 0xF4F65AB1, + /**/ 0x3FC1F0D3, 0xD7AFCEAF, + /**/ 0xBC66EF95, 0x099769A5, + /**/ 0x3FEFAF22, 0x263C4BD3, + /**/ 0xBC552ACE, 0x133A2769, + /**/ 0x3FC2EE28, 0x5E4AB88F, + /**/ 0xBC6E4D0F, 0x05DEE058, + /**/ 0x3FEFA5EA, 0x641C36F2, + /**/ 0x3C404DA6, 0xED17CC7C, + /**/ 0x3FC3EB31, 0x2C5D66CB, + /**/ 0x3C647D66, 0x6B66CB91, + /**/ 0x3FEF9C34, 0x0A7CC428, + /**/ 0x3C8C5B6B, 0x063B7462, + /**/ 0x3FC4E7EA, 0x4DC5F27B, + /**/ 0x3C5949DB, 0x2AC072FC, + /**/ 0x3FEF91FF, 0x40374D01, + /**/ 0xBC67D03F, 0x4D3A9E4C, + /**/ 0x3FC5E44F, 0xCFA126F3, + /**/ 0xBC66F443, 0x063F89B6, + /**/ 0x3FEF874C, 0x2E1EECF6, + /**/ 0xBC8C6514, 0xE1332B16, + /**/ 0x3FC6E05D, 0xC05A4D4C, + /**/ 0xBBD32C5C, 0x8B81C940, + /**/ 0x3FEF7C1A, 0xFEFFDE24, + /**/ 0xBC78F55B, 0xC47540B1, + /**/ 0x3FC7DC10, 0x2FBAF2B5, + /**/ 0x3C45AB50, 0xE23C97C3, + /**/ 0x3FEF706B, 0xDF9ECE1C, + /**/ 0xBC8698C8, 0x0C36DCB4, + /**/ 0x3FC8D763, 0x2EFAA944, + /**/ 0xBC620FA2, 0x62CBB953, + /**/ 0x3FEF643E, 0xFEB82ACD, + /**/ 0x3C76B00A, 0xC1FE28AC, + /**/ 0x3FC9D252, 0xD0CEC312, + /**/ 0x3C59C43D, 0x80B1137D, + /**/ 0x3FEF5794, 0x8CFF6797, + /**/ 0x3C6E3A0D, 0x3E03B1D5, + /**/ 0x3FCACCDB, 0x297A0765, + /**/ 0xBC59883B, 0x57D6CDEB, + /**/ 0x3FEF4A6C, 0xBD1E3A79, + /**/ 0x3C813DF0, 0xEDAEBB57, + /**/ 0x3FCBC6F8, 0x4EDC6199, + /**/ 0x3C69C1A5, 0x6A7B0CAB, + /**/ 0x3FEF3CC7, 0xC3B3D16E, + /**/ 0xBC621A3A, 0xD28A3494, + /**/ 0x3FCCC0A6, 0x588289A3, + /**/ 0xBC6868D0, 0x9BC87C6B, + /**/ 0x3FEF2EA5, 0xD753FFED, + /**/ 0x3C8CC421, 0x5F56D583, + /**/ 0x3FCDB9E1, 0x5FB5A5D0, + /**/ 0xBC632E20, 0xD6CC6FC2, + /**/ 0x3FEF2007, 0x3086649F, + /**/ 0x3C7B9404, 0x16C1984B, + /**/ 0x3FCEB2A5, 0x7F8AE5A3, + /**/ 0xBC60BE06, 0xAF572CEB, + /**/ 0x3FEF10EC, 0x09C5873B, + /**/ 0x3C8D9072, 0x762C1283, + /**/ 0x3FCFAAEE, 0xD4F31577, + /**/ 0xBC615D88, 0x508E32B8, + /**/ 0x3FEF0154, 0x9F7DEEA1, + /**/ 0x3C8D3C1E, 0x99E5CAFD, + /**/ 0x3FD0515C, 0xBF65155C, + /**/ 0xBC79B8C2, 0x9DFD8EC8, + /**/ 0x3FEEF141, 0x300D2F26, + /**/ 0xBC82AA1B, 0x08DED372, + /**/ 0x3FD0CD00, 0xCEF36436, + /**/ 0xBC79FB0A, 0x0C93E2B5, + /**/ 0x3FEEE0B1, 0xFBC0F11C, + /**/ 0xBC4BFD23, 0x80BBC3B1, + /**/ 0x3FD14861, 0xAA94DDEB, + /**/ 0xBC6BE881, 0xB5B615A4, + /**/ 0x3FEECFA7, 0x44D5EFA1, + /**/ 0xBC556D0A, 0x4AF541D0, + /**/ 0x3FD1C37D, 0x64C6B876, + /**/ 0x3C746076, 0xFE0DCFF5, + /**/ 0x3FEEBE21, 0x4F76EFA8, + /**/ 0xBC802F9F, 0x12BA543E, + /**/ 0x3FD23E52, 0x111AAF36, + /**/ 0xBC74F080, 0x334EFF18, + /**/ 0x3FEEAC20, 0x61BBAF4F, + /**/ 0x3C62C1D5, 0x3E94658D, + /**/ 0x3FD2B8DD, 0xC43EB49F, + /**/ 0x3C615538, 0x99F2D807, + /**/ 0x3FEE99A4, 0xC3A7CD83, + /**/ 0xBC82264B, 0x1BC53CE8, + /**/ 0x3FD3331E, 0x94049F87, + /**/ 0x3C7E0CB6, 0xB40C302C, + /**/ 0x3FEE86AE, 0xBF29A9ED, + /**/ 0x3C89397A, 0xFDBB58A7, + /**/ 0x3FD3AD12, 0x9769D3D8, + /**/ 0x3C003D55, 0x04878398, + /**/ 0x3FEE733E, 0xA0193D40, + /**/ 0xBC86428B, 0x3546CE13, + /**/ 0x3FD426B7, 0xE69EE697, + /**/ 0xBC7F09C7, 0x5705C59F, + /**/ 0x3FEE5F54, 0xB436E9D0, + /**/ 0x3C87EB0F, 0xD02FC8BC, + /**/ 0x3FD4A00C, 0x9B0F3D20, + /**/ 0x3C7823BA, 0x6BB08EAD, + /**/ 0x3FEE4AF1, 0x4B2A449C, + /**/ 0xBC868CA0, 0x2E8A6833, + /**/ 0x3FD5190E, 0xCF68A77A, + /**/ 0x3C7B3571, 0x55EEF0F3, + /**/ 0x3FEE3614, 0xB680D6A5, + /**/ 0xBC727793, 0xAA015237, + /**/ 0x3FD591BC, 0x9FA2F597, + /**/ 0x3C67C74B, 0xAC3FE0CB, + /**/ 0x3FEE20BF, 0x49ACD6C1, + /**/ 0xBC5660AE, 0xC7EF636C, + /**/ 0x3FD60A14, 0x29078775, + /**/ 0x3C5B1FD8, 0x0BA89133, + /**/ 0x3FEE0AF1, 0x5A03DBCE, + /**/ 0x3C5FE8E7, 0x02771AE6, + /**/ 0x3FD68213, 0x8A38D7F7, + /**/ 0xBC7D8892, 0x02444AAD, + /**/ 0x3FEDF4AB, 0x3EBD875E, + /**/ 0xBC8E2D8A, 0x7E6736C4, + /**/ 0x3FD6F9B8, 0xE33A0255, + /**/ 0x3C742BC1, 0x4EE9DA0D, + /**/ 0x3FEDDDED, 0x50F228D6, + /**/ 0xBC6E80C8, 0xD42BA2BF, + /**/ 0x3FD77102, 0x55764214, + /**/ 0xBC66EAD7, 0x314BB6CE, + /**/ 0x3FEDC6B7, 0xEB995912, + /**/ 0x3C54B364, 0x776DCD35, + /**/ 0x3FD7E7EE, 0x03C86D4E, + /**/ 0xBC7B63BC, 0xDABF5AF2, + /**/ 0x3FEDAF0B, 0x6B888E83, + /**/ 0x3C8A249E, 0x2B5E5CEA, + /**/ 0x3FD85E7A, 0x12826949, + /**/ 0x3C78A40E, 0x9B5FACE0, + /**/ 0x3FED96E8, 0x2F71A9DC, + /**/ 0x3C8FF61B, 0xD5D2039D, + /**/ 0x3FD8D4A4, 0xA774992F, + /**/ 0x3C744A02, 0xEA766326, + /**/ 0x3FED7E4E, 0x97E17B4A, + /**/ 0xBC63B770, 0x352BED94, + /**/ 0x3FD94A6B, 0xE9F546C5, + /**/ 0xBC769CE1, 0x3E683F58, + /**/ 0x3FED653F, 0x073E4040, + /**/ 0xBC876236, 0x434BEC37, + /**/ 0x3FD9BFCE, 0x02E80510, + /**/ 0x3C709E39, 0xA320B0A4, + /**/ 0x3FED4BB9, 0xE1C619E0, + /**/ 0x3C8F34BB, 0x77858F61, + /**/ 0x3FDA34C9, 0x1CC50CCA, + /**/ 0xBC5A310E, 0x3B50CECD, + /**/ 0x3FED31BF, 0x8D8D7C06, + /**/ 0x3C7E60DD, 0x3089CBDD, + /**/ 0x3FDAA95B, 0x63A09277, + /**/ 0xBC66293E, 0xB13C0381, + /**/ 0x3FED1750, 0x727D94F0, + /**/ 0x3C80D52B, 0x1EC1A48E, + /**/ 0x3FDB1D83, 0x05321617, + /**/ 0xBC7AE242, 0xCB99F519, + /**/ 0x3FECFC6C, 0xFA52AD9F, + /**/ 0x3C88B5B5, 0x508F2A0D, + /**/ 0x3FDB913E, 0x30DBAC43, + /**/ 0xBC7E38AD, 0x2F6C3FF1, + /**/ 0x3FECE115, 0x909A82E5, + /**/ 0x3C81F139, 0xBB31109A, + /**/ 0x3FDC048B, 0x17B140A3, + /**/ 0x3C619FE6, 0x757E9FA7, + /**/ 0x3FECC54A, 0xA2B2972E, + /**/ 0x3C64EE16, 0x2BA83A98, + /**/ 0x3FDC7767, 0xEC7FD19E, + /**/ 0xBC5EB14D, 0x1A3D5826, + /**/ 0x3FECA90C, 0x9FC67D0B, + /**/ 0xBC646A81, 0x485E3462, + /**/ 0x3FDCE9D2, 0xE3D4A51F, + /**/ 0xBC62FC8A, 0x12DAE298, + /**/ 0x3FEC8C5B, 0xF8CE1A84, + /**/ 0x3C7AB3D1, 0xA1590123, + /**/ 0x3FDD5BCA, 0x34047661, + /**/ 0x3C728A44, 0xA75FC29C, + /**/ 0x3FEC6F39, 0x208BE53B, + /**/ 0xBC8741DB, 0xFBAADB42, + /**/ 0x3FDDCD4C, 0x15329C9A, + /**/ 0x3C70D4C6, 0xE171FD9A, + /**/ 0x3FEC51A4, 0x8B8B175E, + /**/ 0xBC61BBB4, 0x3B9AA880, + /**/ 0x3FDE3E56, 0xC1582A69, + /**/ 0xBC50A482, 0x1099F88F, + /**/ 0x3FEC339E, 0xB01DDD81, + /**/ 0xBC8CAAF5, 0xEE82C5C0, + /**/ 0x3FDEAEE8, 0x744B05F0, + /**/ 0xBC5789B4, 0x3C9B027D, + /**/ 0x3FEC1528, 0x065B7D50, + /**/ 0xBC889211, 0x1312E828, + /**/ 0x3FDF1EFF, 0x6BC4F97B, + /**/ 0x3C717212, 0xF8A7525C, + /**/ 0x3FEBF641, 0x081E7536, + /**/ 0x3C8B7BD7, 0x1628A9A1, + /**/ 0x3FDF8E99, 0xE76ABC97, + /**/ 0x3C59D950, 0xAF2D00A3, + /**/ 0x3FEBD6EA, 0x310294F5, + /**/ 0x3C731BBC, 0xC88C109D, + /**/ 0x3FDFFDB6, 0x28D2F57A, + /**/ 0x3C6F4A99, 0x2E905B6A, + /**/ 0x3FEBB723, 0xFE630F32, + /**/ 0x3C772BD2, 0x452D0A39, + /**/ 0x3FE03629, 0x39C69955, + /**/ 0xBC82D8CD, 0x78397B01, + /**/ 0x3FEB96EE, 0xEF58840E, + /**/ 0x3C545A3C, 0xC78FADE0, + /**/ 0x3FE06D36, 0x86946E5B, + /**/ 0x3C83F5AE, 0x4538FF1B, + /**/ 0x3FEB764B, 0x84B704C2, + /**/ 0xBC8F5848, 0xC21B389B, + /**/ 0x3FE0A402, 0x1E9E1001, + /**/ 0xBC86F643, 0xA13914F6, + /**/ 0x3FEB553A, 0x410C104E, + /**/ 0x3C58FF79, 0x47027A16, + /**/ 0x3FE0DA8B, 0x26B5672E, + /**/ 0xBC8A58DE, 0xF0BEE909, + /**/ 0x3FEB33BB, 0xA89C8948, + /**/ 0x3C8EA6A5, 0x1D1F6CA9, + /**/ 0x3FE110D0, 0xC4B69C3B, + /**/ 0x3C8D9189, 0x98809981, + /**/ 0x3FEB11D0, 0x4162A4C6, + /**/ 0x3C71DD56, 0x1EFBC0C2, + /**/ 0x3FE146D2, 0x1F8B7F82, + /**/ 0x3C7BF953, 0x5E2739A8, + /**/ 0x3FEAEF78, 0x930BD275, + /**/ 0xBC7F8362, 0x79746F94, + /**/ 0x3FE17C8E, 0x5F2EEDB0, + /**/ 0x3C635E57, 0x102E2488, + /**/ 0x3FEACCB5, 0x26F69DE5, + /**/ 0x3C88FB6A, 0x8DD6B6CC, + /**/ 0x3FE1B204, 0xACB02FDD, + /**/ 0xBC5F190C, 0x70CBB5FF, + /**/ 0x3FEAA986, 0x88308913, + /**/ 0xBC0B83D6, 0x07CD5070, + /**/ 0x3FE1E734, 0x3236574C, + /**/ 0x3C722A3F, 0xA4F41D5A, + /**/ 0x3FEA85ED, 0x4373E02D, + /**/ 0x3C69BE06, 0x385EC792, + /**/ 0x3FE21C1C, 0x1B0394CF, + /**/ 0x3C5E5B32, 0x4B23AA31, + /**/ 0x3FEA61E9, 0xE72586AF, + /**/ 0x3C858330, 0xE2FD453F, + /**/ 0x3FE250BB, 0x93788BBB, + /**/ 0x3C7EA3D0, 0x2457BCCE, + /**/ 0x3FEA3D7D, 0x0352BDCF, + /**/ 0xBC868DBA, 0xECA19669, + /**/ 0x3FE28511, 0xC917A067, + /**/ 0xBC801DF1, 0xD9A16B70, + /**/ 0x3FEA18A7, 0x29AEE445, + /**/ 0x3C395E25, 0x736C0358, + /**/ 0x3FE2B91D, 0xEA88421E, + /**/ 0xBC8FA371, 0xDB216AB0, + /**/ 0x3FE9F368, 0xED912F85, + /**/ 0xBC81D200, 0xC5791606, + /**/ 0x3FE2ECDF, 0x279A3082, + /**/ 0x3C8D3557, 0xE0E7E37E, + /**/ 0x3FE9CDC2, 0xE3F25E5C, + /**/ 0x3C83F991, 0x12993F62, + /**/ 0x3FE32054, 0xB148BC4F, + /**/ 0x3C8F6B42, 0x095A135B, + /**/ 0x3FE9A7B5, 0xA36A6514, + /**/ 0x3C8722CF, 0xCC9FA7A9, + /**/ 0x3FE3537D, 0xB9BE0367, + /**/ 0x3C6B327E, 0x7AF040F0, + /**/ 0x3FE98141, 0xC42E1310, + /**/ 0x3C8D1FF8, 0x0488F08D, + /**/ 0x3FE38659, 0x7456282B, + /**/ 0xBC710FAD, 0xA93B07A8, + /**/ 0x3FE95A67, 0xE00CB1FD, + /**/ 0xBC80BEFD, 0xA21F862D, + /**/ 0x3FE3B8E7, 0x15A2840A, + /**/ 0xBC797653, 0xA7D2F07B, + /**/ 0x3FE93328, 0x926D9E92, + /**/ 0xBC8BB770, 0x03600CDA, + /**/ 0x3FE3EB25, 0xD36CD53A, + /**/ 0xBC5BE570, 0xE1570FC0, + /**/ 0x3FE90B84, 0x784DDAF7, + /**/ 0xBC70FEB1, 0x0AB93B87, + /**/ 0x3FE41D14, 0xE4BA6790, + /**/ 0x3C84608F, 0xD287ECF5, + /**/ 0x3FE8E37C, 0x303D9AD1, + /**/ 0xBC6463A4, 0xB53D4BF8, + /**/ 0x3FE44EB3, 0x81CF386B, + /**/ 0xBC83ED6C, 0x1E6A5505, + /**/ 0x3FE8BB10, 0x5A5DC900, + /**/ 0x3C8863E0, 0x3E9474C1, + /**/ 0x3FE48000, 0xE431159F, + /**/ 0xBC8B194A, 0x7463ED10, + /**/ 0x3FE89241, 0x985D871F, + /**/ 0x3C8C48D9, 0xC413ED84, + /**/ 0x3FE4B0FC, 0x46AAB761, + /**/ 0x3C20DA05, 0x738CC59A, + /**/ 0x3FE86910, 0x8D77A6C6, + /**/ 0x3C7338FF, 0xE2BFE9DD, + /**/ 0x3FE4E1A4, 0xE54ED51B, + /**/ 0xBC8A492F, 0x89B7C76A, + /**/ 0x3FE83F7D, 0xDE701CA0, + /**/ 0xBC4152CF, 0x609BC6E8, + /**/ 0x3FE511F9, 0xFD7B351C, + /**/ 0xBC85C0E8, 0x61C48831, + /**/ 0x3FE8158A, 0x31916D5D, + /**/ 0xBC6DE8B9, 0x0B8228DE, + /**/ 0x3FE541FA, 0xCDDBB724, + /**/ 0x3C7232C2, 0x8520D391, + /**/ 0x3FE7EB36, 0x2EAA1488, + /**/ 0x3C5A1D65, 0xA4A5959F, + /**/ 0x3FE571A6, 0x966D59B3, + /**/ 0x3C5C843B, 0x4D0FB198, + /**/ 0x3FE7C082, 0x7F09E54F, + /**/ 0xBC6C73D6, 0xD72AEE68, + /**/ 0x3FE5A0FC, 0x98813A12, + /**/ 0xBC8D82E2, 0xB7D4227B, + /**/ 0x3FE7956F, 0xCD7F6543, + /**/ 0xBC8AB276, 0xE9D45AE4, + /**/ 0x3FE5CFFC, 0x16BF8F0D, + /**/ 0x3C896CB3, 0x70EB578A, + /**/ 0x3FE769FE, 0xC655211F, + /**/ 0xBC6827D5, 0xCF8C68C5, + /**/ 0x3FE5FEA4, 0x552A9E57, + /**/ 0x3C80B6CE, 0xF7EE20B7, + /**/ 0x3FE73E30, 0x174EFBA1, + /**/ 0xBC65D3AE, 0x3D94AD5F, + /**/ 0x3FE62CF4, 0x9921AC79, + /**/ 0xBC8EDD98, 0x55B6241A, + /**/ 0x3FE71204, 0x6FA77678, + /**/ 0x3C8425B0, 0xA5029C81, + /**/ 0x3FE65AEC, 0x2963E755, + /**/ 0x3C8126F9, 0x6B71053C, + /**/ 0x3FE6E57C, 0x800CF55E, + /**/ 0x3C860286, 0xDEDBD0A6, + /**/ 0x3FE6888A, 0x4E134B2F, + /**/ 0xBC86B7D3, 0x7644D5E6, + /**/ 0x3FE6B898, 0xFA9EFB5D, + /**/ 0x3C715AC7, 0x86CCF4B2, + /**/ 0x3FE6B5CE, 0x50B7821A, + /**/ 0xBC65D515, 0x8F702E0F, + /**/ 0x3FE68B5A, 0x92EB6253, + /**/ 0xBC89A91A, 0xD985F89C, + /**/ 0x3FE6E2B7, 0x7C40BDE1, + /**/ 0xBC70E729, 0x857FAD53, + /**/ 0x3FE65DC1, 0xFDEB8CBA, + /**/ 0xBC597C1B, 0x47337C77, + /**/ 0x3FE70F45, 0x1D0A8C40, + /**/ 0x3C697EDE, 0x3885770D, + /**/ 0x3FE62FCF, 0xF20191C7, + /**/ 0x3C6D9143, 0x895756EF, + /**/ 0x3FE73B76, 0x80DEA578, + /**/ 0xBC722483, 0x06DC12A2, + /**/ 0x3FE60185, 0x26F563DF, + /**/ 0x3C846CA5, 0xE0E432D0, + /**/ 0x3FE7674A, 0xF6F7B524, + /**/ 0x3C7E9D3F, 0x94AC84A8, + /**/ 0x3FE5D2E2, 0x55F1F17A, + /**/ 0x3C803141, 0x04C8892B, + /**/ 0x3FE792C1, 0xD0041D52, + /**/ 0xBC8ABF05, 0xEEB354EB, + /**/ 0x3FE5A3E8, 0x39824077, + /**/ 0x3C8428AA, 0x2759BE62, + /**/ 0x3FE7BDDA, 0x5E28B3C2, + /**/ 0x3C4AD119, 0x7CCD0393, + /**/ 0x3FE57497, 0x8D8E83F2, + /**/ 0x3C8F4714, 0xAF282D23, + /**/ 0x3FE7E893, 0xF5037959, + /**/ 0x3C80EEFB, 0xAA650C4C, + /**/ 0x3FE544F1, 0x0F592CA5, + /**/ 0xBC8E7AE8, 0xE6C7A62F, + /**/ 0x3FE812ED, 0xE9AE4BA4, + /**/ 0xBC87830A, 0xDF402DDA, + /**/ 0x3FE514F5, 0x7D7BF3DA, + /**/ 0x3C747A10, 0x8073C259}}; #endif #if (__BYTE_ORDER == __LITTLE_ENDIAN) #define HIGH_HALF 1 -#define LOW_HALF 0 - -static const union {unsigned int u[880]; int4 i[880]; double x[440];} __sincostab = { .u = { -/**/ 0x00000000, 0x00000000, -/**/ 0x00000000, 0x00000000, -/**/ 0x00000000, 0x3FF00000, -/**/ 0x00000000, 0x00000000, -/**/ 0xAAAEEEEF, 0x3F7FFFEA, -/**/ 0xEC67B77C, 0xBC1E45E2, -/**/ 0x00155552, 0x3FEFFFC0, -/**/ 0xA0196DAE, 0x3C8F4A01, -/**/ 0xAAEEEED5, 0x3F8FFFAA, -/**/ 0x9A9F0777, 0xBC02AB63, -/**/ 0x0155549F, 0x3FEFFF00, -/**/ 0xA03A5EF3, 0x3C828A28, -/**/ 0x01033255, 0x3F97FF70, -/**/ 0x51527336, 0x3BFEFE2B, -/**/ 0x06BFF7E6, 0x3FEFFDC0, -/**/ 0xE86977BD, 0x3C8AE6DA, -/**/ 0xAEEEE86F, 0x3F9FFEAA, -/**/ 0xFB224AE2, 0xBC3CD406, -/**/ 0x155527D3, 0x3FEFFC00, -/**/ 0x92D89B5B, 0xBC83B544, -/**/ 0xB12D45D5, 0x3FA3FEB2, -/**/ 0x203D1C11, 0x3C34EC54, -/**/ 0x3414A7BA, 0x3FEFF9C0, -/**/ 0xBE6C59BF, 0x3C6991F4, -/**/ 0x1032FBA9, 0x3FA7FDC0, -/**/ 0xF46E997A, 0xBC4599BD, -/**/ 0x6BFDF99F, 0x3FEFF700, -/**/ 0x60648D5F, 0xBC78B3B5, -/**/ 0x78586DAC, 0x3FABFC6D, -/**/ 0x03DBF236, 0x3C18E4FD, -/**/ 0xC8103A31, 0x3FEFF3C0, -/**/ 0xBDDC0E66, 0x3C74856D, -/**/ 0xEEED4EDB, 0x3FAFFAAA, -/**/ 0x32684B69, 0xBC42D16D, -/**/ 0x5549F4D3, 0x3FEFF001, -/**/ 0x7B99426F, 0x3C832838, -/**/ 0x3D808BEF, 0x3FB1FC34, -/**/ 0xE6F3BE4F, 0xBC5F3D32, -/**/ 0x22A8EF9F, 0x3FEFEBC2, -/**/ 0x34F54C77, 0x3C579349, -/**/ 0x12D1755B, 0x3FB3FACB, -/**/ 0x5299468C, 0xBC592191, -/**/ 0x4129EF6F, 0x3FEFE703, -/**/ 0x37C96F97, 0xBC6CBF43, -/**/ 0xFD10B737, 0x3FB5F911, -/**/ 0x02BE9102, 0xBC50184F, -/**/ 0xC3C873EB, 0x3FEFE1C4, -/**/ 0x057C4A02, 0xBC35A9C9, -/**/ 0x032550E4, 0x3FB7F701, -/**/ 0x1800501A, 0x3C3AFC2D, -/**/ 0xBF7E6B9B, 0x3FEFDC06, -/**/ 0xB535F8DB, 0x3C831902, -/**/ 0x2D55D1F9, 0x3FB9F490, -/**/ 0x7EAC1DC1, 0x3C52696D, -/**/ 0x4B43E000, 0x3FEFD5C9, -/**/ 0xCB4F92F9, 0xBC62E768, -/**/ 0x8568391D, 0x3FBBF1B7, -/**/ 0x1DEA4CC8, 0x3C5E9184, -/**/ 0x800E99B1, 0x3FEFCF0C, -/**/ 0x86D186AC, 0x3C6EA3D7, -/**/ 0x16C1CCE6, 0x3FBDEE6F, -/**/ 0x2FB71673, 0xBC450F8E, -/**/ 0x78D1BC88, 0x3FEFC7D0, -/**/ 0x447DB685, 0x3C8075D2, -/**/ 0xEE86EE36, 0x3FBFEAAE, -/**/ 0xBCC6F03B, 0xBC4AFCB2, -/**/ 0x527D5BD3, 0x3FEFC015, -/**/ 0x5094EFB8, 0x3C8B68F3, -/**/ 0x8DDD71D1, 0x3FC0F337, -/**/ 0x724F0F9E, 0x3C6D8468, -/**/ 0x2BFE0695, 0x3FEFB7DB, -/**/ 0xF4F65AB1, 0x3C821DAD, -/**/ 0xD7AFCEAF, 0x3FC1F0D3, -/**/ 0x099769A5, 0xBC66EF95, -/**/ 0x263C4BD3, 0x3FEFAF22, -/**/ 0x133A2769, 0xBC552ACE, -/**/ 0x5E4AB88F, 0x3FC2EE28, -/**/ 0x05DEE058, 0xBC6E4D0F, -/**/ 0x641C36F2, 0x3FEFA5EA, -/**/ 0xED17CC7C, 0x3C404DA6, -/**/ 0x2C5D66CB, 0x3FC3EB31, -/**/ 0x6B66CB91, 0x3C647D66, -/**/ 0x0A7CC428, 0x3FEF9C34, -/**/ 0x063B7462, 0x3C8C5B6B, -/**/ 0x4DC5F27B, 0x3FC4E7EA, -/**/ 0x2AC072FC, 0x3C5949DB, -/**/ 0x40374D01, 0x3FEF91FF, -/**/ 0x4D3A9E4C, 0xBC67D03F, -/**/ 0xCFA126F3, 0x3FC5E44F, -/**/ 0x063F89B6, 0xBC66F443, -/**/ 0x2E1EECF6, 0x3FEF874C, -/**/ 0xE1332B16, 0xBC8C6514, -/**/ 0xC05A4D4C, 0x3FC6E05D, -/**/ 0x8B81C940, 0xBBD32C5C, -/**/ 0xFEFFDE24, 0x3FEF7C1A, -/**/ 0xC47540B1, 0xBC78F55B, -/**/ 0x2FBAF2B5, 0x3FC7DC10, -/**/ 0xE23C97C3, 0x3C45AB50, -/**/ 0xDF9ECE1C, 0x3FEF706B, -/**/ 0x0C36DCB4, 0xBC8698C8, -/**/ 0x2EFAA944, 0x3FC8D763, -/**/ 0x62CBB953, 0xBC620FA2, -/**/ 0xFEB82ACD, 0x3FEF643E, -/**/ 0xC1FE28AC, 0x3C76B00A, -/**/ 0xD0CEC312, 0x3FC9D252, -/**/ 0x80B1137D, 0x3C59C43D, -/**/ 0x8CFF6797, 0x3FEF5794, -/**/ 0x3E03B1D5, 0x3C6E3A0D, -/**/ 0x297A0765, 0x3FCACCDB, -/**/ 0x57D6CDEB, 0xBC59883B, -/**/ 0xBD1E3A79, 0x3FEF4A6C, -/**/ 0xEDAEBB57, 0x3C813DF0, -/**/ 0x4EDC6199, 0x3FCBC6F8, -/**/ 0x6A7B0CAB, 0x3C69C1A5, -/**/ 0xC3B3D16E, 0x3FEF3CC7, -/**/ 0xD28A3494, 0xBC621A3A, -/**/ 0x588289A3, 0x3FCCC0A6, -/**/ 0x9BC87C6B, 0xBC6868D0, -/**/ 0xD753FFED, 0x3FEF2EA5, -/**/ 0x5F56D583, 0x3C8CC421, -/**/ 0x5FB5A5D0, 0x3FCDB9E1, -/**/ 0xD6CC6FC2, 0xBC632E20, -/**/ 0x3086649F, 0x3FEF2007, -/**/ 0x16C1984B, 0x3C7B9404, -/**/ 0x7F8AE5A3, 0x3FCEB2A5, -/**/ 0xAF572CEB, 0xBC60BE06, -/**/ 0x09C5873B, 0x3FEF10EC, -/**/ 0x762C1283, 0x3C8D9072, -/**/ 0xD4F31577, 0x3FCFAAEE, -/**/ 0x508E32B8, 0xBC615D88, -/**/ 0x9F7DEEA1, 0x3FEF0154, -/**/ 0x99E5CAFD, 0x3C8D3C1E, -/**/ 0xBF65155C, 0x3FD0515C, -/**/ 0x9DFD8EC8, 0xBC79B8C2, -/**/ 0x300D2F26, 0x3FEEF141, -/**/ 0x08DED372, 0xBC82AA1B, -/**/ 0xCEF36436, 0x3FD0CD00, -/**/ 0x0C93E2B5, 0xBC79FB0A, -/**/ 0xFBC0F11C, 0x3FEEE0B1, -/**/ 0x80BBC3B1, 0xBC4BFD23, -/**/ 0xAA94DDEB, 0x3FD14861, -/**/ 0xB5B615A4, 0xBC6BE881, -/**/ 0x44D5EFA1, 0x3FEECFA7, -/**/ 0x4AF541D0, 0xBC556D0A, -/**/ 0x64C6B876, 0x3FD1C37D, -/**/ 0xFE0DCFF5, 0x3C746076, -/**/ 0x4F76EFA8, 0x3FEEBE21, -/**/ 0x12BA543E, 0xBC802F9F, -/**/ 0x111AAF36, 0x3FD23E52, -/**/ 0x334EFF18, 0xBC74F080, -/**/ 0x61BBAF4F, 0x3FEEAC20, -/**/ 0x3E94658D, 0x3C62C1D5, -/**/ 0xC43EB49F, 0x3FD2B8DD, -/**/ 0x99F2D807, 0x3C615538, -/**/ 0xC3A7CD83, 0x3FEE99A4, -/**/ 0x1BC53CE8, 0xBC82264B, -/**/ 0x94049F87, 0x3FD3331E, -/**/ 0xB40C302C, 0x3C7E0CB6, -/**/ 0xBF29A9ED, 0x3FEE86AE, -/**/ 0xFDBB58A7, 0x3C89397A, -/**/ 0x9769D3D8, 0x3FD3AD12, -/**/ 0x04878398, 0x3C003D55, -/**/ 0xA0193D40, 0x3FEE733E, -/**/ 0x3546CE13, 0xBC86428B, -/**/ 0xE69EE697, 0x3FD426B7, -/**/ 0x5705C59F, 0xBC7F09C7, -/**/ 0xB436E9D0, 0x3FEE5F54, -/**/ 0xD02FC8BC, 0x3C87EB0F, -/**/ 0x9B0F3D20, 0x3FD4A00C, -/**/ 0x6BB08EAD, 0x3C7823BA, -/**/ 0x4B2A449C, 0x3FEE4AF1, -/**/ 0x2E8A6833, 0xBC868CA0, -/**/ 0xCF68A77A, 0x3FD5190E, -/**/ 0x55EEF0F3, 0x3C7B3571, -/**/ 0xB680D6A5, 0x3FEE3614, -/**/ 0xAA015237, 0xBC727793, -/**/ 0x9FA2F597, 0x3FD591BC, -/**/ 0xAC3FE0CB, 0x3C67C74B, -/**/ 0x49ACD6C1, 0x3FEE20BF, -/**/ 0xC7EF636C, 0xBC5660AE, -/**/ 0x29078775, 0x3FD60A14, -/**/ 0x0BA89133, 0x3C5B1FD8, -/**/ 0x5A03DBCE, 0x3FEE0AF1, -/**/ 0x02771AE6, 0x3C5FE8E7, -/**/ 0x8A38D7F7, 0x3FD68213, -/**/ 0x02444AAD, 0xBC7D8892, -/**/ 0x3EBD875E, 0x3FEDF4AB, -/**/ 0x7E6736C4, 0xBC8E2D8A, -/**/ 0xE33A0255, 0x3FD6F9B8, -/**/ 0x4EE9DA0D, 0x3C742BC1, -/**/ 0x50F228D6, 0x3FEDDDED, -/**/ 0xD42BA2BF, 0xBC6E80C8, -/**/ 0x55764214, 0x3FD77102, -/**/ 0x314BB6CE, 0xBC66EAD7, -/**/ 0xEB995912, 0x3FEDC6B7, -/**/ 0x776DCD35, 0x3C54B364, -/**/ 0x03C86D4E, 0x3FD7E7EE, -/**/ 0xDABF5AF2, 0xBC7B63BC, -/**/ 0x6B888E83, 0x3FEDAF0B, -/**/ 0x2B5E5CEA, 0x3C8A249E, -/**/ 0x12826949, 0x3FD85E7A, -/**/ 0x9B5FACE0, 0x3C78A40E, -/**/ 0x2F71A9DC, 0x3FED96E8, -/**/ 0xD5D2039D, 0x3C8FF61B, -/**/ 0xA774992F, 0x3FD8D4A4, -/**/ 0xEA766326, 0x3C744A02, -/**/ 0x97E17B4A, 0x3FED7E4E, -/**/ 0x352BED94, 0xBC63B770, -/**/ 0xE9F546C5, 0x3FD94A6B, -/**/ 0x3E683F58, 0xBC769CE1, -/**/ 0x073E4040, 0x3FED653F, -/**/ 0x434BEC37, 0xBC876236, -/**/ 0x02E80510, 0x3FD9BFCE, -/**/ 0xA320B0A4, 0x3C709E39, -/**/ 0xE1C619E0, 0x3FED4BB9, -/**/ 0x77858F61, 0x3C8F34BB, -/**/ 0x1CC50CCA, 0x3FDA34C9, -/**/ 0x3B50CECD, 0xBC5A310E, -/**/ 0x8D8D7C06, 0x3FED31BF, -/**/ 0x3089CBDD, 0x3C7E60DD, -/**/ 0x63A09277, 0x3FDAA95B, -/**/ 0xB13C0381, 0xBC66293E, -/**/ 0x727D94F0, 0x3FED1750, -/**/ 0x1EC1A48E, 0x3C80D52B, -/**/ 0x05321617, 0x3FDB1D83, -/**/ 0xCB99F519, 0xBC7AE242, -/**/ 0xFA52AD9F, 0x3FECFC6C, -/**/ 0x508F2A0D, 0x3C88B5B5, -/**/ 0x30DBAC43, 0x3FDB913E, -/**/ 0x2F6C3FF1, 0xBC7E38AD, -/**/ 0x909A82E5, 0x3FECE115, -/**/ 0xBB31109A, 0x3C81F139, -/**/ 0x17B140A3, 0x3FDC048B, -/**/ 0x757E9FA7, 0x3C619FE6, -/**/ 0xA2B2972E, 0x3FECC54A, -/**/ 0x2BA83A98, 0x3C64EE16, -/**/ 0xEC7FD19E, 0x3FDC7767, -/**/ 0x1A3D5826, 0xBC5EB14D, -/**/ 0x9FC67D0B, 0x3FECA90C, -/**/ 0x485E3462, 0xBC646A81, -/**/ 0xE3D4A51F, 0x3FDCE9D2, -/**/ 0x12DAE298, 0xBC62FC8A, -/**/ 0xF8CE1A84, 0x3FEC8C5B, -/**/ 0xA1590123, 0x3C7AB3D1, -/**/ 0x34047661, 0x3FDD5BCA, -/**/ 0xA75FC29C, 0x3C728A44, -/**/ 0x208BE53B, 0x3FEC6F39, -/**/ 0xFBAADB42, 0xBC8741DB, -/**/ 0x15329C9A, 0x3FDDCD4C, -/**/ 0xE171FD9A, 0x3C70D4C6, -/**/ 0x8B8B175E, 0x3FEC51A4, -/**/ 0x3B9AA880, 0xBC61BBB4, -/**/ 0xC1582A69, 0x3FDE3E56, -/**/ 0x1099F88F, 0xBC50A482, -/**/ 0xB01DDD81, 0x3FEC339E, -/**/ 0xEE82C5C0, 0xBC8CAAF5, -/**/ 0x744B05F0, 0x3FDEAEE8, -/**/ 0x3C9B027D, 0xBC5789B4, -/**/ 0x065B7D50, 0x3FEC1528, -/**/ 0x1312E828, 0xBC889211, -/**/ 0x6BC4F97B, 0x3FDF1EFF, -/**/ 0xF8A7525C, 0x3C717212, -/**/ 0x081E7536, 0x3FEBF641, -/**/ 0x1628A9A1, 0x3C8B7BD7, -/**/ 0xE76ABC97, 0x3FDF8E99, -/**/ 0xAF2D00A3, 0x3C59D950, -/**/ 0x310294F5, 0x3FEBD6EA, -/**/ 0xC88C109D, 0x3C731BBC, -/**/ 0x28D2F57A, 0x3FDFFDB6, -/**/ 0x2E905B6A, 0x3C6F4A99, -/**/ 0xFE630F32, 0x3FEBB723, -/**/ 0x452D0A39, 0x3C772BD2, -/**/ 0x39C69955, 0x3FE03629, -/**/ 0x78397B01, 0xBC82D8CD, -/**/ 0xEF58840E, 0x3FEB96EE, -/**/ 0xC78FADE0, 0x3C545A3C, -/**/ 0x86946E5B, 0x3FE06D36, -/**/ 0x4538FF1B, 0x3C83F5AE, -/**/ 0x84B704C2, 0x3FEB764B, -/**/ 0xC21B389B, 0xBC8F5848, -/**/ 0x1E9E1001, 0x3FE0A402, -/**/ 0xA13914F6, 0xBC86F643, -/**/ 0x410C104E, 0x3FEB553A, -/**/ 0x47027A16, 0x3C58FF79, -/**/ 0x26B5672E, 0x3FE0DA8B, -/**/ 0xF0BEE909, 0xBC8A58DE, -/**/ 0xA89C8948, 0x3FEB33BB, -/**/ 0x1D1F6CA9, 0x3C8EA6A5, -/**/ 0xC4B69C3B, 0x3FE110D0, -/**/ 0x98809981, 0x3C8D9189, -/**/ 0x4162A4C6, 0x3FEB11D0, -/**/ 0x1EFBC0C2, 0x3C71DD56, -/**/ 0x1F8B7F82, 0x3FE146D2, -/**/ 0x5E2739A8, 0x3C7BF953, -/**/ 0x930BD275, 0x3FEAEF78, -/**/ 0x79746F94, 0xBC7F8362, -/**/ 0x5F2EEDB0, 0x3FE17C8E, -/**/ 0x102E2488, 0x3C635E57, -/**/ 0x26F69DE5, 0x3FEACCB5, -/**/ 0x8DD6B6CC, 0x3C88FB6A, -/**/ 0xACB02FDD, 0x3FE1B204, -/**/ 0x70CBB5FF, 0xBC5F190C, -/**/ 0x88308913, 0x3FEAA986, -/**/ 0x07CD5070, 0xBC0B83D6, -/**/ 0x3236574C, 0x3FE1E734, -/**/ 0xA4F41D5A, 0x3C722A3F, -/**/ 0x4373E02D, 0x3FEA85ED, -/**/ 0x385EC792, 0x3C69BE06, -/**/ 0x1B0394CF, 0x3FE21C1C, -/**/ 0x4B23AA31, 0x3C5E5B32, -/**/ 0xE72586AF, 0x3FEA61E9, -/**/ 0xE2FD453F, 0x3C858330, -/**/ 0x93788BBB, 0x3FE250BB, -/**/ 0x2457BCCE, 0x3C7EA3D0, -/**/ 0x0352BDCF, 0x3FEA3D7D, -/**/ 0xECA19669, 0xBC868DBA, -/**/ 0xC917A067, 0x3FE28511, -/**/ 0xD9A16B70, 0xBC801DF1, -/**/ 0x29AEE445, 0x3FEA18A7, -/**/ 0x736C0358, 0x3C395E25, -/**/ 0xEA88421E, 0x3FE2B91D, -/**/ 0xDB216AB0, 0xBC8FA371, -/**/ 0xED912F85, 0x3FE9F368, -/**/ 0xC5791606, 0xBC81D200, -/**/ 0x279A3082, 0x3FE2ECDF, -/**/ 0xE0E7E37E, 0x3C8D3557, -/**/ 0xE3F25E5C, 0x3FE9CDC2, -/**/ 0x12993F62, 0x3C83F991, -/**/ 0xB148BC4F, 0x3FE32054, -/**/ 0x095A135B, 0x3C8F6B42, -/**/ 0xA36A6514, 0x3FE9A7B5, -/**/ 0xCC9FA7A9, 0x3C8722CF, -/**/ 0xB9BE0367, 0x3FE3537D, -/**/ 0x7AF040F0, 0x3C6B327E, -/**/ 0xC42E1310, 0x3FE98141, -/**/ 0x0488F08D, 0x3C8D1FF8, -/**/ 0x7456282B, 0x3FE38659, -/**/ 0xA93B07A8, 0xBC710FAD, -/**/ 0xE00CB1FD, 0x3FE95A67, -/**/ 0xA21F862D, 0xBC80BEFD, -/**/ 0x15A2840A, 0x3FE3B8E7, -/**/ 0xA7D2F07B, 0xBC797653, -/**/ 0x926D9E92, 0x3FE93328, -/**/ 0x03600CDA, 0xBC8BB770, -/**/ 0xD36CD53A, 0x3FE3EB25, -/**/ 0xE1570FC0, 0xBC5BE570, -/**/ 0x784DDAF7, 0x3FE90B84, -/**/ 0x0AB93B87, 0xBC70FEB1, -/**/ 0xE4BA6790, 0x3FE41D14, -/**/ 0xD287ECF5, 0x3C84608F, -/**/ 0x303D9AD1, 0x3FE8E37C, -/**/ 0xB53D4BF8, 0xBC6463A4, -/**/ 0x81CF386B, 0x3FE44EB3, -/**/ 0x1E6A5505, 0xBC83ED6C, -/**/ 0x5A5DC900, 0x3FE8BB10, -/**/ 0x3E9474C1, 0x3C8863E0, -/**/ 0xE431159F, 0x3FE48000, -/**/ 0x7463ED10, 0xBC8B194A, -/**/ 0x985D871F, 0x3FE89241, -/**/ 0xC413ED84, 0x3C8C48D9, -/**/ 0x46AAB761, 0x3FE4B0FC, -/**/ 0x738CC59A, 0x3C20DA05, -/**/ 0x8D77A6C6, 0x3FE86910, -/**/ 0xE2BFE9DD, 0x3C7338FF, -/**/ 0xE54ED51B, 0x3FE4E1A4, -/**/ 0x89B7C76A, 0xBC8A492F, -/**/ 0xDE701CA0, 0x3FE83F7D, -/**/ 0x609BC6E8, 0xBC4152CF, -/**/ 0xFD7B351C, 0x3FE511F9, -/**/ 0x61C48831, 0xBC85C0E8, -/**/ 0x31916D5D, 0x3FE8158A, -/**/ 0x0B8228DE, 0xBC6DE8B9, -/**/ 0xCDDBB724, 0x3FE541FA, -/**/ 0x8520D391, 0x3C7232C2, -/**/ 0x2EAA1488, 0x3FE7EB36, -/**/ 0xA4A5959F, 0x3C5A1D65, -/**/ 0x966D59B3, 0x3FE571A6, -/**/ 0x4D0FB198, 0x3C5C843B, -/**/ 0x7F09E54F, 0x3FE7C082, -/**/ 0xD72AEE68, 0xBC6C73D6, -/**/ 0x98813A12, 0x3FE5A0FC, -/**/ 0xB7D4227B, 0xBC8D82E2, -/**/ 0xCD7F6543, 0x3FE7956F, -/**/ 0xE9D45AE4, 0xBC8AB276, -/**/ 0x16BF8F0D, 0x3FE5CFFC, -/**/ 0x70EB578A, 0x3C896CB3, -/**/ 0xC655211F, 0x3FE769FE, -/**/ 0xCF8C68C5, 0xBC6827D5, -/**/ 0x552A9E57, 0x3FE5FEA4, -/**/ 0xF7EE20B7, 0x3C80B6CE, -/**/ 0x174EFBA1, 0x3FE73E30, -/**/ 0x3D94AD5F, 0xBC65D3AE, -/**/ 0x9921AC79, 0x3FE62CF4, -/**/ 0x55B6241A, 0xBC8EDD98, -/**/ 0x6FA77678, 0x3FE71204, -/**/ 0xA5029C81, 0x3C8425B0, -/**/ 0x2963E755, 0x3FE65AEC, -/**/ 0x6B71053C, 0x3C8126F9, -/**/ 0x800CF55E, 0x3FE6E57C, -/**/ 0xDEDBD0A6, 0x3C860286, -/**/ 0x4E134B2F, 0x3FE6888A, -/**/ 0x7644D5E6, 0xBC86B7D3, -/**/ 0xFA9EFB5D, 0x3FE6B898, -/**/ 0x86CCF4B2, 0x3C715AC7, -/**/ 0x50B7821A, 0x3FE6B5CE, -/**/ 0x8F702E0F, 0xBC65D515, -/**/ 0x92EB6253, 0x3FE68B5A, -/**/ 0xD985F89C, 0xBC89A91A, -/**/ 0x7C40BDE1, 0x3FE6E2B7, -/**/ 0x857FAD53, 0xBC70E729, -/**/ 0xFDEB8CBA, 0x3FE65DC1, -/**/ 0x47337C77, 0xBC597C1B, -/**/ 0x1D0A8C40, 0x3FE70F45, -/**/ 0x3885770D, 0x3C697EDE, -/**/ 0xF20191C7, 0x3FE62FCF, -/**/ 0x895756EF, 0x3C6D9143, -/**/ 0x80DEA578, 0x3FE73B76, -/**/ 0x06DC12A2, 0xBC722483, -/**/ 0x26F563DF, 0x3FE60185, -/**/ 0xE0E432D0, 0x3C846CA5, -/**/ 0xF6F7B524, 0x3FE7674A, -/**/ 0x94AC84A8, 0x3C7E9D3F, -/**/ 0x55F1F17A, 0x3FE5D2E2, -/**/ 0x04C8892B, 0x3C803141, -/**/ 0xD0041D52, 0x3FE792C1, -/**/ 0xEEB354EB, 0xBC8ABF05, -/**/ 0x39824077, 0x3FE5A3E8, -/**/ 0x2759BE62, 0x3C8428AA, -/**/ 0x5E28B3C2, 0x3FE7BDDA, -/**/ 0x7CCD0393, 0x3C4AD119, -/**/ 0x8D8E83F2, 0x3FE57497, -/**/ 0xAF282D23, 0x3C8F4714, -/**/ 0xF5037959, 0x3FE7E893, -/**/ 0xAA650C4C, 0x3C80EEFB, -/**/ 0x0F592CA5, 0x3FE544F1, -/**/ 0xE6C7A62F, 0xBC8E7AE8, -/**/ 0xE9AE4BA4, 0x3FE812ED, -/**/ 0xDF402DDA, 0xBC87830A, -/**/ 0x7D7BF3DA, 0x3FE514F5, -/**/ 0x8073C259, 0x3C747A10 } }; +#define LOW_HALF 0 + +static const union +{ + unsigned int u[880]; + int4 i[880]; + double x[440]; +} __sincostab = {.u = {/**/ 0x00000000, 0x00000000, + /**/ 0x00000000, 0x00000000, + /**/ 0x00000000, 0x3FF00000, + /**/ 0x00000000, 0x00000000, + /**/ 0xAAAEEEEF, 0x3F7FFFEA, + /**/ 0xEC67B77C, 0xBC1E45E2, + /**/ 0x00155552, 0x3FEFFFC0, + /**/ 0xA0196DAE, 0x3C8F4A01, + /**/ 0xAAEEEED5, 0x3F8FFFAA, + /**/ 0x9A9F0777, 0xBC02AB63, + /**/ 0x0155549F, 0x3FEFFF00, + /**/ 0xA03A5EF3, 0x3C828A28, + /**/ 0x01033255, 0x3F97FF70, + /**/ 0x51527336, 0x3BFEFE2B, + /**/ 0x06BFF7E6, 0x3FEFFDC0, + /**/ 0xE86977BD, 0x3C8AE6DA, + /**/ 0xAEEEE86F, 0x3F9FFEAA, + /**/ 0xFB224AE2, 0xBC3CD406, + /**/ 0x155527D3, 0x3FEFFC00, + /**/ 0x92D89B5B, 0xBC83B544, + /**/ 0xB12D45D5, 0x3FA3FEB2, + /**/ 0x203D1C11, 0x3C34EC54, + /**/ 0x3414A7BA, 0x3FEFF9C0, + /**/ 0xBE6C59BF, 0x3C6991F4, + /**/ 0x1032FBA9, 0x3FA7FDC0, + /**/ 0xF46E997A, 0xBC4599BD, + /**/ 0x6BFDF99F, 0x3FEFF700, + /**/ 0x60648D5F, 0xBC78B3B5, + /**/ 0x78586DAC, 0x3FABFC6D, + /**/ 0x03DBF236, 0x3C18E4FD, + /**/ 0xC8103A31, 0x3FEFF3C0, + /**/ 0xBDDC0E66, 0x3C74856D, + /**/ 0xEEED4EDB, 0x3FAFFAAA, + /**/ 0x32684B69, 0xBC42D16D, + /**/ 0x5549F4D3, 0x3FEFF001, + /**/ 0x7B99426F, 0x3C832838, + /**/ 0x3D808BEF, 0x3FB1FC34, + /**/ 0xE6F3BE4F, 0xBC5F3D32, + /**/ 0x22A8EF9F, 0x3FEFEBC2, + /**/ 0x34F54C77, 0x3C579349, + /**/ 0x12D1755B, 0x3FB3FACB, + /**/ 0x5299468C, 0xBC592191, + /**/ 0x4129EF6F, 0x3FEFE703, + /**/ 0x37C96F97, 0xBC6CBF43, + /**/ 0xFD10B737, 0x3FB5F911, + /**/ 0x02BE9102, 0xBC50184F, + /**/ 0xC3C873EB, 0x3FEFE1C4, + /**/ 0x057C4A02, 0xBC35A9C9, + /**/ 0x032550E4, 0x3FB7F701, + /**/ 0x1800501A, 0x3C3AFC2D, + /**/ 0xBF7E6B9B, 0x3FEFDC06, + /**/ 0xB535F8DB, 0x3C831902, + /**/ 0x2D55D1F9, 0x3FB9F490, + /**/ 0x7EAC1DC1, 0x3C52696D, + /**/ 0x4B43E000, 0x3FEFD5C9, + /**/ 0xCB4F92F9, 0xBC62E768, + /**/ 0x8568391D, 0x3FBBF1B7, + /**/ 0x1DEA4CC8, 0x3C5E9184, + /**/ 0x800E99B1, 0x3FEFCF0C, + /**/ 0x86D186AC, 0x3C6EA3D7, + /**/ 0x16C1CCE6, 0x3FBDEE6F, + /**/ 0x2FB71673, 0xBC450F8E, + /**/ 0x78D1BC88, 0x3FEFC7D0, + /**/ 0x447DB685, 0x3C8075D2, + /**/ 0xEE86EE36, 0x3FBFEAAE, + /**/ 0xBCC6F03B, 0xBC4AFCB2, + /**/ 0x527D5BD3, 0x3FEFC015, + /**/ 0x5094EFB8, 0x3C8B68F3, + /**/ 0x8DDD71D1, 0x3FC0F337, + /**/ 0x724F0F9E, 0x3C6D8468, + /**/ 0x2BFE0695, 0x3FEFB7DB, + /**/ 0xF4F65AB1, 0x3C821DAD, + /**/ 0xD7AFCEAF, 0x3FC1F0D3, + /**/ 0x099769A5, 0xBC66EF95, + /**/ 0x263C4BD3, 0x3FEFAF22, + /**/ 0x133A2769, 0xBC552ACE, + /**/ 0x5E4AB88F, 0x3FC2EE28, + /**/ 0x05DEE058, 0xBC6E4D0F, + /**/ 0x641C36F2, 0x3FEFA5EA, + /**/ 0xED17CC7C, 0x3C404DA6, + /**/ 0x2C5D66CB, 0x3FC3EB31, + /**/ 0x6B66CB91, 0x3C647D66, + /**/ 0x0A7CC428, 0x3FEF9C34, + /**/ 0x063B7462, 0x3C8C5B6B, + /**/ 0x4DC5F27B, 0x3FC4E7EA, + /**/ 0x2AC072FC, 0x3C5949DB, + /**/ 0x40374D01, 0x3FEF91FF, + /**/ 0x4D3A9E4C, 0xBC67D03F, + /**/ 0xCFA126F3, 0x3FC5E44F, + /**/ 0x063F89B6, 0xBC66F443, + /**/ 0x2E1EECF6, 0x3FEF874C, + /**/ 0xE1332B16, 0xBC8C6514, + /**/ 0xC05A4D4C, 0x3FC6E05D, + /**/ 0x8B81C940, 0xBBD32C5C, + /**/ 0xFEFFDE24, 0x3FEF7C1A, + /**/ 0xC47540B1, 0xBC78F55B, + /**/ 0x2FBAF2B5, 0x3FC7DC10, + /**/ 0xE23C97C3, 0x3C45AB50, + /**/ 0xDF9ECE1C, 0x3FEF706B, + /**/ 0x0C36DCB4, 0xBC8698C8, + /**/ 0x2EFAA944, 0x3FC8D763, + /**/ 0x62CBB953, 0xBC620FA2, + /**/ 0xFEB82ACD, 0x3FEF643E, + /**/ 0xC1FE28AC, 0x3C76B00A, + /**/ 0xD0CEC312, 0x3FC9D252, + /**/ 0x80B1137D, 0x3C59C43D, + /**/ 0x8CFF6797, 0x3FEF5794, + /**/ 0x3E03B1D5, 0x3C6E3A0D, + /**/ 0x297A0765, 0x3FCACCDB, + /**/ 0x57D6CDEB, 0xBC59883B, + /**/ 0xBD1E3A79, 0x3FEF4A6C, + /**/ 0xEDAEBB57, 0x3C813DF0, + /**/ 0x4EDC6199, 0x3FCBC6F8, + /**/ 0x6A7B0CAB, 0x3C69C1A5, + /**/ 0xC3B3D16E, 0x3FEF3CC7, + /**/ 0xD28A3494, 0xBC621A3A, + /**/ 0x588289A3, 0x3FCCC0A6, + /**/ 0x9BC87C6B, 0xBC6868D0, + /**/ 0xD753FFED, 0x3FEF2EA5, + /**/ 0x5F56D583, 0x3C8CC421, + /**/ 0x5FB5A5D0, 0x3FCDB9E1, + /**/ 0xD6CC6FC2, 0xBC632E20, + /**/ 0x3086649F, 0x3FEF2007, + /**/ 0x16C1984B, 0x3C7B9404, + /**/ 0x7F8AE5A3, 0x3FCEB2A5, + /**/ 0xAF572CEB, 0xBC60BE06, + /**/ 0x09C5873B, 0x3FEF10EC, + /**/ 0x762C1283, 0x3C8D9072, + /**/ 0xD4F31577, 0x3FCFAAEE, + /**/ 0x508E32B8, 0xBC615D88, + /**/ 0x9F7DEEA1, 0x3FEF0154, + /**/ 0x99E5CAFD, 0x3C8D3C1E, + /**/ 0xBF65155C, 0x3FD0515C, + /**/ 0x9DFD8EC8, 0xBC79B8C2, + /**/ 0x300D2F26, 0x3FEEF141, + /**/ 0x08DED372, 0xBC82AA1B, + /**/ 0xCEF36436, 0x3FD0CD00, + /**/ 0x0C93E2B5, 0xBC79FB0A, + /**/ 0xFBC0F11C, 0x3FEEE0B1, + /**/ 0x80BBC3B1, 0xBC4BFD23, + /**/ 0xAA94DDEB, 0x3FD14861, + /**/ 0xB5B615A4, 0xBC6BE881, + /**/ 0x44D5EFA1, 0x3FEECFA7, + /**/ 0x4AF541D0, 0xBC556D0A, + /**/ 0x64C6B876, 0x3FD1C37D, + /**/ 0xFE0DCFF5, 0x3C746076, + /**/ 0x4F76EFA8, 0x3FEEBE21, + /**/ 0x12BA543E, 0xBC802F9F, + /**/ 0x111AAF36, 0x3FD23E52, + /**/ 0x334EFF18, 0xBC74F080, + /**/ 0x61BBAF4F, 0x3FEEAC20, + /**/ 0x3E94658D, 0x3C62C1D5, + /**/ 0xC43EB49F, 0x3FD2B8DD, + /**/ 0x99F2D807, 0x3C615538, + /**/ 0xC3A7CD83, 0x3FEE99A4, + /**/ 0x1BC53CE8, 0xBC82264B, + /**/ 0x94049F87, 0x3FD3331E, + /**/ 0xB40C302C, 0x3C7E0CB6, + /**/ 0xBF29A9ED, 0x3FEE86AE, + /**/ 0xFDBB58A7, 0x3C89397A, + /**/ 0x9769D3D8, 0x3FD3AD12, + /**/ 0x04878398, 0x3C003D55, + /**/ 0xA0193D40, 0x3FEE733E, + /**/ 0x3546CE13, 0xBC86428B, + /**/ 0xE69EE697, 0x3FD426B7, + /**/ 0x5705C59F, 0xBC7F09C7, + /**/ 0xB436E9D0, 0x3FEE5F54, + /**/ 0xD02FC8BC, 0x3C87EB0F, + /**/ 0x9B0F3D20, 0x3FD4A00C, + /**/ 0x6BB08EAD, 0x3C7823BA, + /**/ 0x4B2A449C, 0x3FEE4AF1, + /**/ 0x2E8A6833, 0xBC868CA0, + /**/ 0xCF68A77A, 0x3FD5190E, + /**/ 0x55EEF0F3, 0x3C7B3571, + /**/ 0xB680D6A5, 0x3FEE3614, + /**/ 0xAA015237, 0xBC727793, + /**/ 0x9FA2F597, 0x3FD591BC, + /**/ 0xAC3FE0CB, 0x3C67C74B, + /**/ 0x49ACD6C1, 0x3FEE20BF, + /**/ 0xC7EF636C, 0xBC5660AE, + /**/ 0x29078775, 0x3FD60A14, + /**/ 0x0BA89133, 0x3C5B1FD8, + /**/ 0x5A03DBCE, 0x3FEE0AF1, + /**/ 0x02771AE6, 0x3C5FE8E7, + /**/ 0x8A38D7F7, 0x3FD68213, + /**/ 0x02444AAD, 0xBC7D8892, + /**/ 0x3EBD875E, 0x3FEDF4AB, + /**/ 0x7E6736C4, 0xBC8E2D8A, + /**/ 0xE33A0255, 0x3FD6F9B8, + /**/ 0x4EE9DA0D, 0x3C742BC1, + /**/ 0x50F228D6, 0x3FEDDDED, + /**/ 0xD42BA2BF, 0xBC6E80C8, + /**/ 0x55764214, 0x3FD77102, + /**/ 0x314BB6CE, 0xBC66EAD7, + /**/ 0xEB995912, 0x3FEDC6B7, + /**/ 0x776DCD35, 0x3C54B364, + /**/ 0x03C86D4E, 0x3FD7E7EE, + /**/ 0xDABF5AF2, 0xBC7B63BC, + /**/ 0x6B888E83, 0x3FEDAF0B, + /**/ 0x2B5E5CEA, 0x3C8A249E, + /**/ 0x12826949, 0x3FD85E7A, + /**/ 0x9B5FACE0, 0x3C78A40E, + /**/ 0x2F71A9DC, 0x3FED96E8, + /**/ 0xD5D2039D, 0x3C8FF61B, + /**/ 0xA774992F, 0x3FD8D4A4, + /**/ 0xEA766326, 0x3C744A02, + /**/ 0x97E17B4A, 0x3FED7E4E, + /**/ 0x352BED94, 0xBC63B770, + /**/ 0xE9F546C5, 0x3FD94A6B, + /**/ 0x3E683F58, 0xBC769CE1, + /**/ 0x073E4040, 0x3FED653F, + /**/ 0x434BEC37, 0xBC876236, + /**/ 0x02E80510, 0x3FD9BFCE, + /**/ 0xA320B0A4, 0x3C709E39, + /**/ 0xE1C619E0, 0x3FED4BB9, + /**/ 0x77858F61, 0x3C8F34BB, + /**/ 0x1CC50CCA, 0x3FDA34C9, + /**/ 0x3B50CECD, 0xBC5A310E, + /**/ 0x8D8D7C06, 0x3FED31BF, + /**/ 0x3089CBDD, 0x3C7E60DD, + /**/ 0x63A09277, 0x3FDAA95B, + /**/ 0xB13C0381, 0xBC66293E, + /**/ 0x727D94F0, 0x3FED1750, + /**/ 0x1EC1A48E, 0x3C80D52B, + /**/ 0x05321617, 0x3FDB1D83, + /**/ 0xCB99F519, 0xBC7AE242, + /**/ 0xFA52AD9F, 0x3FECFC6C, + /**/ 0x508F2A0D, 0x3C88B5B5, + /**/ 0x30DBAC43, 0x3FDB913E, + /**/ 0x2F6C3FF1, 0xBC7E38AD, + /**/ 0x909A82E5, 0x3FECE115, + /**/ 0xBB31109A, 0x3C81F139, + /**/ 0x17B140A3, 0x3FDC048B, + /**/ 0x757E9FA7, 0x3C619FE6, + /**/ 0xA2B2972E, 0x3FECC54A, + /**/ 0x2BA83A98, 0x3C64EE16, + /**/ 0xEC7FD19E, 0x3FDC7767, + /**/ 0x1A3D5826, 0xBC5EB14D, + /**/ 0x9FC67D0B, 0x3FECA90C, + /**/ 0x485E3462, 0xBC646A81, + /**/ 0xE3D4A51F, 0x3FDCE9D2, + /**/ 0x12DAE298, 0xBC62FC8A, + /**/ 0xF8CE1A84, 0x3FEC8C5B, + /**/ 0xA1590123, 0x3C7AB3D1, + /**/ 0x34047661, 0x3FDD5BCA, + /**/ 0xA75FC29C, 0x3C728A44, + /**/ 0x208BE53B, 0x3FEC6F39, + /**/ 0xFBAADB42, 0xBC8741DB, + /**/ 0x15329C9A, 0x3FDDCD4C, + /**/ 0xE171FD9A, 0x3C70D4C6, + /**/ 0x8B8B175E, 0x3FEC51A4, + /**/ 0x3B9AA880, 0xBC61BBB4, + /**/ 0xC1582A69, 0x3FDE3E56, + /**/ 0x1099F88F, 0xBC50A482, + /**/ 0xB01DDD81, 0x3FEC339E, + /**/ 0xEE82C5C0, 0xBC8CAAF5, + /**/ 0x744B05F0, 0x3FDEAEE8, + /**/ 0x3C9B027D, 0xBC5789B4, + /**/ 0x065B7D50, 0x3FEC1528, + /**/ 0x1312E828, 0xBC889211, + /**/ 0x6BC4F97B, 0x3FDF1EFF, + /**/ 0xF8A7525C, 0x3C717212, + /**/ 0x081E7536, 0x3FEBF641, + /**/ 0x1628A9A1, 0x3C8B7BD7, + /**/ 0xE76ABC97, 0x3FDF8E99, + /**/ 0xAF2D00A3, 0x3C59D950, + /**/ 0x310294F5, 0x3FEBD6EA, + /**/ 0xC88C109D, 0x3C731BBC, + /**/ 0x28D2F57A, 0x3FDFFDB6, + /**/ 0x2E905B6A, 0x3C6F4A99, + /**/ 0xFE630F32, 0x3FEBB723, + /**/ 0x452D0A39, 0x3C772BD2, + /**/ 0x39C69955, 0x3FE03629, + /**/ 0x78397B01, 0xBC82D8CD, + /**/ 0xEF58840E, 0x3FEB96EE, + /**/ 0xC78FADE0, 0x3C545A3C, + /**/ 0x86946E5B, 0x3FE06D36, + /**/ 0x4538FF1B, 0x3C83F5AE, + /**/ 0x84B704C2, 0x3FEB764B, + /**/ 0xC21B389B, 0xBC8F5848, + /**/ 0x1E9E1001, 0x3FE0A402, + /**/ 0xA13914F6, 0xBC86F643, + /**/ 0x410C104E, 0x3FEB553A, + /**/ 0x47027A16, 0x3C58FF79, + /**/ 0x26B5672E, 0x3FE0DA8B, + /**/ 0xF0BEE909, 0xBC8A58DE, + /**/ 0xA89C8948, 0x3FEB33BB, + /**/ 0x1D1F6CA9, 0x3C8EA6A5, + /**/ 0xC4B69C3B, 0x3FE110D0, + /**/ 0x98809981, 0x3C8D9189, + /**/ 0x4162A4C6, 0x3FEB11D0, + /**/ 0x1EFBC0C2, 0x3C71DD56, + /**/ 0x1F8B7F82, 0x3FE146D2, + /**/ 0x5E2739A8, 0x3C7BF953, + /**/ 0x930BD275, 0x3FEAEF78, + /**/ 0x79746F94, 0xBC7F8362, + /**/ 0x5F2EEDB0, 0x3FE17C8E, + /**/ 0x102E2488, 0x3C635E57, + /**/ 0x26F69DE5, 0x3FEACCB5, + /**/ 0x8DD6B6CC, 0x3C88FB6A, + /**/ 0xACB02FDD, 0x3FE1B204, + /**/ 0x70CBB5FF, 0xBC5F190C, + /**/ 0x88308913, 0x3FEAA986, + /**/ 0x07CD5070, 0xBC0B83D6, + /**/ 0x3236574C, 0x3FE1E734, + /**/ 0xA4F41D5A, 0x3C722A3F, + /**/ 0x4373E02D, 0x3FEA85ED, + /**/ 0x385EC792, 0x3C69BE06, + /**/ 0x1B0394CF, 0x3FE21C1C, + /**/ 0x4B23AA31, 0x3C5E5B32, + /**/ 0xE72586AF, 0x3FEA61E9, + /**/ 0xE2FD453F, 0x3C858330, + /**/ 0x93788BBB, 0x3FE250BB, + /**/ 0x2457BCCE, 0x3C7EA3D0, + /**/ 0x0352BDCF, 0x3FEA3D7D, + /**/ 0xECA19669, 0xBC868DBA, + /**/ 0xC917A067, 0x3FE28511, + /**/ 0xD9A16B70, 0xBC801DF1, + /**/ 0x29AEE445, 0x3FEA18A7, + /**/ 0x736C0358, 0x3C395E25, + /**/ 0xEA88421E, 0x3FE2B91D, + /**/ 0xDB216AB0, 0xBC8FA371, + /**/ 0xED912F85, 0x3FE9F368, + /**/ 0xC5791606, 0xBC81D200, + /**/ 0x279A3082, 0x3FE2ECDF, + /**/ 0xE0E7E37E, 0x3C8D3557, + /**/ 0xE3F25E5C, 0x3FE9CDC2, + /**/ 0x12993F62, 0x3C83F991, + /**/ 0xB148BC4F, 0x3FE32054, + /**/ 0x095A135B, 0x3C8F6B42, + /**/ 0xA36A6514, 0x3FE9A7B5, + /**/ 0xCC9FA7A9, 0x3C8722CF, + /**/ 0xB9BE0367, 0x3FE3537D, + /**/ 0x7AF040F0, 0x3C6B327E, + /**/ 0xC42E1310, 0x3FE98141, + /**/ 0x0488F08D, 0x3C8D1FF8, + /**/ 0x7456282B, 0x3FE38659, + /**/ 0xA93B07A8, 0xBC710FAD, + /**/ 0xE00CB1FD, 0x3FE95A67, + /**/ 0xA21F862D, 0xBC80BEFD, + /**/ 0x15A2840A, 0x3FE3B8E7, + /**/ 0xA7D2F07B, 0xBC797653, + /**/ 0x926D9E92, 0x3FE93328, + /**/ 0x03600CDA, 0xBC8BB770, + /**/ 0xD36CD53A, 0x3FE3EB25, + /**/ 0xE1570FC0, 0xBC5BE570, + /**/ 0x784DDAF7, 0x3FE90B84, + /**/ 0x0AB93B87, 0xBC70FEB1, + /**/ 0xE4BA6790, 0x3FE41D14, + /**/ 0xD287ECF5, 0x3C84608F, + /**/ 0x303D9AD1, 0x3FE8E37C, + /**/ 0xB53D4BF8, 0xBC6463A4, + /**/ 0x81CF386B, 0x3FE44EB3, + /**/ 0x1E6A5505, 0xBC83ED6C, + /**/ 0x5A5DC900, 0x3FE8BB10, + /**/ 0x3E9474C1, 0x3C8863E0, + /**/ 0xE431159F, 0x3FE48000, + /**/ 0x7463ED10, 0xBC8B194A, + /**/ 0x985D871F, 0x3FE89241, + /**/ 0xC413ED84, 0x3C8C48D9, + /**/ 0x46AAB761, 0x3FE4B0FC, + /**/ 0x738CC59A, 0x3C20DA05, + /**/ 0x8D77A6C6, 0x3FE86910, + /**/ 0xE2BFE9DD, 0x3C7338FF, + /**/ 0xE54ED51B, 0x3FE4E1A4, + /**/ 0x89B7C76A, 0xBC8A492F, + /**/ 0xDE701CA0, 0x3FE83F7D, + /**/ 0x609BC6E8, 0xBC4152CF, + /**/ 0xFD7B351C, 0x3FE511F9, + /**/ 0x61C48831, 0xBC85C0E8, + /**/ 0x31916D5D, 0x3FE8158A, + /**/ 0x0B8228DE, 0xBC6DE8B9, + /**/ 0xCDDBB724, 0x3FE541FA, + /**/ 0x8520D391, 0x3C7232C2, + /**/ 0x2EAA1488, 0x3FE7EB36, + /**/ 0xA4A5959F, 0x3C5A1D65, + /**/ 0x966D59B3, 0x3FE571A6, + /**/ 0x4D0FB198, 0x3C5C843B, + /**/ 0x7F09E54F, 0x3FE7C082, + /**/ 0xD72AEE68, 0xBC6C73D6, + /**/ 0x98813A12, 0x3FE5A0FC, + /**/ 0xB7D4227B, 0xBC8D82E2, + /**/ 0xCD7F6543, 0x3FE7956F, + /**/ 0xE9D45AE4, 0xBC8AB276, + /**/ 0x16BF8F0D, 0x3FE5CFFC, + /**/ 0x70EB578A, 0x3C896CB3, + /**/ 0xC655211F, 0x3FE769FE, + /**/ 0xCF8C68C5, 0xBC6827D5, + /**/ 0x552A9E57, 0x3FE5FEA4, + /**/ 0xF7EE20B7, 0x3C80B6CE, + /**/ 0x174EFBA1, 0x3FE73E30, + /**/ 0x3D94AD5F, 0xBC65D3AE, + /**/ 0x9921AC79, 0x3FE62CF4, + /**/ 0x55B6241A, 0xBC8EDD98, + /**/ 0x6FA77678, 0x3FE71204, + /**/ 0xA5029C81, 0x3C8425B0, + /**/ 0x2963E755, 0x3FE65AEC, + /**/ 0x6B71053C, 0x3C8126F9, + /**/ 0x800CF55E, 0x3FE6E57C, + /**/ 0xDEDBD0A6, 0x3C860286, + /**/ 0x4E134B2F, 0x3FE6888A, + /**/ 0x7644D5E6, 0xBC86B7D3, + /**/ 0xFA9EFB5D, 0x3FE6B898, + /**/ 0x86CCF4B2, 0x3C715AC7, + /**/ 0x50B7821A, 0x3FE6B5CE, + /**/ 0x8F702E0F, 0xBC65D515, + /**/ 0x92EB6253, 0x3FE68B5A, + /**/ 0xD985F89C, 0xBC89A91A, + /**/ 0x7C40BDE1, 0x3FE6E2B7, + /**/ 0x857FAD53, 0xBC70E729, + /**/ 0xFDEB8CBA, 0x3FE65DC1, + /**/ 0x47337C77, 0xBC597C1B, + /**/ 0x1D0A8C40, 0x3FE70F45, + /**/ 0x3885770D, 0x3C697EDE, + /**/ 0xF20191C7, 0x3FE62FCF, + /**/ 0x895756EF, 0x3C6D9143, + /**/ 0x80DEA578, 0x3FE73B76, + /**/ 0x06DC12A2, 0xBC722483, + /**/ 0x26F563DF, 0x3FE60185, + /**/ 0xE0E432D0, 0x3C846CA5, + /**/ 0xF6F7B524, 0x3FE7674A, + /**/ 0x94AC84A8, 0x3C7E9D3F, + /**/ 0x55F1F17A, 0x3FE5D2E2, + /**/ 0x04C8892B, 0x3C803141, + /**/ 0xD0041D52, 0x3FE792C1, + /**/ 0xEEB354EB, 0xBC8ABF05, + /**/ 0x39824077, 0x3FE5A3E8, + /**/ 0x2759BE62, 0x3C8428AA, + /**/ 0x5E28B3C2, 0x3FE7BDDA, + /**/ 0x7CCD0393, 0x3C4AD119, + /**/ 0x8D8E83F2, 0x3FE57497, + /**/ 0xAF282D23, 0x3C8F4714, + /**/ 0xF5037959, 0x3FE7E893, + /**/ 0xAA650C4C, 0x3C80EEFB, + /**/ 0x0F592CA5, 0x3FE544F1, + /**/ 0xE6C7A62F, 0xBC8E7AE8, + /**/ 0xE9AE4BA4, 0x3FE812ED, + /**/ 0xDF402DDA, 0xBC87830A, + /**/ 0x7D7BF3DA, 0x3FE514F5, + /**/ 0x8073C259, 0x3C747A10}}; #endif -static const double s1 = /* -0x1.5555555555555p-3; */ -0.16666666666666666 ; -static const double s2 = /* 0x1.1111111110ECEp-7; */ 0.0083333333333323288 ; -static const double s3 = /* -0x1.A01A019DB08B8p-13; */ -0.00019841269834414642 ; -static const double s4 = /* 0x1.71DE27B9A7ED9p-19; */ 2.755729806860771e-06 ; -static const double s5 = /* -0x1.ADDFFC2FCDF59p-26; */ -2.5022014848318398e-08 ; -static const double aa = /* -0x1.5558000000000p-3; */ -0.1666717529296875 ; -static const double bb = /* 0x1.5555555556E24p-18; */ 5.0862630208387126e-06 ; -static const double big = /* 0x1.8000000000000p45; */ 52776558133248 ; -static const double hp0 = /* 0x1.921FB54442D18p0; */ 1.5707963267948966 ; -static const double hp1 = /* 0x1.1A62633145C07p-54; */ 6.123233995736766e-17 ; -static const double mp1 = /* 0x1.921FB58000000p0; */ 1.5707963407039642 ; -static const double mp2 = /* -0x1.DDE973C000000p-27; */ -1.3909067564377153e-08 ; -static const double mp3 = /* -0x1.CB3B399D747F2p-55; */ -4.9789962505147994e-17 ; -static const double pp3 = /* -0x1.CB3B398000000p-55; */ -4.9789962314799099e-17 ; -static const double pp4 = /* -0x1.d747f23e32ed7p-83; */ -1.9034889620193266e-25 ; -static const double hpinv = /* 0x1.45F306DC9C883p-1; */ 0.63661977236758138 ; -static const double toint = /* 0x1.8000000000000p52; */ 6755399441055744 ; +static const double s1 = /* -0x1.5555555555555p-3; */ -0.16666666666666666; +static const double s2 = /* 0x1.1111111110ECEp-7; */ 0.0083333333333323288; +static const double s3 = /* -0x1.A01A019DB08B8p-13; */ -0.00019841269834414642; +static const double s4 = /* 0x1.71DE27B9A7ED9p-19; */ 2.755729806860771e-06; +static const double s5 = /* -0x1.ADDFFC2FCDF59p-26; */ -2.5022014848318398e-08; +static const double aa = /* -0x1.5558000000000p-3; */ -0.1666717529296875; +static const double bb = /* 0x1.5555555556E24p-18; */ 5.0862630208387126e-06; +static const double big = /* 0x1.8000000000000p45; */ 52776558133248; +static const double hp0 = /* 0x1.921FB54442D18p0; */ 1.5707963267948966; +static const double hp1 = /* 0x1.1A62633145C07p-54; */ 6.123233995736766e-17; +static const double mp1 = /* 0x1.921FB58000000p0; */ 1.5707963407039642; +static const double mp2 = /* -0x1.DDE973C000000p-27; */ -1.3909067564377153e-08; +static const double mp3 = /* -0x1.CB3B399D747F2p-55; */ -4.9789962505147994e-17; +static const double pp3 = /* -0x1.CB3B398000000p-55; */ -4.9789962314799099e-17; +static const double pp4 = /* -0x1.d747f23e32ed7p-83; */ -1.9034889620193266e-25; +static const double hpinv = /* 0x1.45F306DC9C883p-1; */ 0.63661977236758138; +static const double toint = /* 0x1.8000000000000p52; */ 6755399441055744; /* Helper macros to compute sin of the input values. */ #define POLYNOMIAL2(xx) ((((s5 * (xx) + s4) * (xx) + s3) * (xx) + s2) * (xx)) @@ -948,53 +961,52 @@ static const double toint = /* 0x1.8000000000000p52; */ 6755399441055744 The constants s1, s2, s3, etc. are pre-computed values of 1/3!, 1/5! and so on. The result is returned to LHS. */ -#define TAYLOR_SIN(xx, x, dx) \ -({ \ - double t = ((POLYNOMIAL (xx) * (x) - 0.5 * (dx)) * (xx) + (dx)); \ - double res = (x) + t; \ - res; \ -}) - -#define SINCOS_TABLE_LOOKUP(u, sn, ssn, cs, ccs) \ -({ \ - int4 k = u.i[LOW_HALF] << 2; \ - sn = __sincostab.x[k]; \ - ssn = __sincostab.x[k + 1]; \ - cs = __sincostab.x[k + 2]; \ - ccs = __sincostab.x[k + 3]; \ -}) - -static const double - sn3 = -1.66666666666664880952546298448555E-01, - sn5 = 8.33333214285722277379541354343671E-03, - cs2 = 4.99999999999999999999950396842453E-01, - cs4 = -4.16666666666664434524222570944589E-02, - cs6 = 1.38888874007937613028114285595617E-03; - -int __branred (double x, double *a, double *aa); +#define TAYLOR_SIN(xx, x, dx) \ + ({ \ + double t = ((POLYNOMIAL (xx) * (x) - 0.5 * (dx)) * (xx) + (dx)); \ + double res = (x) + t; \ + res; \ + }) + +#define SINCOS_TABLE_LOOKUP(u, sn, ssn, cs, ccs) \ + ({ \ + int4 k = u.i[LOW_HALF] << 2; \ + sn = __sincostab.x[k]; \ + ssn = __sincostab.x[k + 1]; \ + cs = __sincostab.x[k + 2]; \ + ccs = __sincostab.x[k + 3]; \ + }) + +static const double sn3 = -1.66666666666664880952546298448555E-01, sn5 = 8.33333214285722277379541354343671E-03, + cs2 = 4.99999999999999999999950396842453E-01, cs4 = -4.16666666666664434524222570944589E-02, + cs6 = 1.38888874007937613028114285595617E-03; + +int __branred (double x, double* a, double* aa); /* Given a number partitioned into X and DX, this function computes the cosine of the number by combining the sin and cos of X (as computed by a variation of the Taylor series) with the values looked up from the sin/cos table to get the result. */ static __always_inline double -do_cos (double x, double dx) + do_cos (double x, double dx) { - mynumber u; - - if (x < 0) - dx = -dx; - - u.x = big + fabs (x); - x = fabs (x) - (u.x - big) + dx; - - double xx, s, sn, ssn, c, cs, ccs, cor; - xx = x * x; - s = x + x * xx * (sn3 + xx * sn5); - c = xx * (cs2 + xx * (cs4 + xx * cs6)); - SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs); - cor = (ccs - s * ssn - cs * c) - sn * s; - return cs + cor; + mynumber u; + + if (x < 0) + { + dx = -dx; + } + + u.x = big + fabs (x); + x = fabs (x) - (u.x - big) + dx; + + double xx, s, sn, ssn, c, cs, ccs, cor; + xx = x * x; + s = x + x * xx * (sn3 + xx * sn5); + c = xx * (cs2 + xx * (cs4 + xx * cs6)); + SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs); + cor = (ccs - s * ssn - cs * c) - sn * s; + return cs + cor; } /* Given a number partitioned into X and DX, this function computes the sine of @@ -1002,27 +1014,31 @@ do_cos (double x, double dx) the Taylor series) with the values looked up from the sin/cos table to get the result. */ static __always_inline double -do_sin (double x, double dx) + do_sin (double x, double dx) { - double xold = x; - /* Max ULP is 0.501 if |x| < 0.126, otherwise ULP is 0.518. */ - if (fabs (x) < 0.126) - return TAYLOR_SIN (x * x, x, dx); - - mynumber u; - - if (x <= 0) - dx = -dx; - u.x = big + fabs (x); - x = fabs (x) - (u.x - big); - - double xx, s, sn, ssn, c, cs, ccs, cor; - xx = x * x; - s = x + (dx + x * xx * (sn3 + xx * sn5)); - c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6)); - SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs); - cor = (ssn + s * ccs - sn * c) + cs * s; - return copysign (sn + cor, xold); + double xold = x; + /* Max ULP is 0.501 if |x| < 0.126, otherwise ULP is 0.518. */ + if (fabs (x) < 0.126) + { + return TAYLOR_SIN (x * x, x, dx); + } + + mynumber u; + + if (x <= 0) + { + dx = -dx; + } + u.x = big + fabs (x); + x = fabs (x) - (u.x - big); + + double xx, s, sn, ssn, c, cs, ccs, cor; + xx = x * x; + s = x + (dx + x * xx * (sn3 + xx * sn5)); + c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6)); + SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs); + cor = (ssn + s * ccs - sn * c) + cs * s; + return copysign (sn + cor, xold); } /* Reduce range of x to within PI/2 with abs (x) < 105414350. The high part @@ -1030,233 +1046,242 @@ do_sin (double x, double dx) bits so that when x is large and *a very close to zero, all 53 bits of *a are correct. */ static __always_inline int4 -reduce_sincos (double x, double *a, double *da) + reduce_sincos (double x, double* a, double* da) { - mynumber v; - - double t = (x * hpinv + toint); - double xn = t - toint; - v.x = t; - double y = (x - xn * mp1) - xn * mp2; - int4 n = v.i[LOW_HALF] & 3; - - double b, db, t1, t2; - t1 = xn * pp3; - t2 = y - t1; - db = (y - t2) - t1; - - t1 = xn * pp4; - b = t2 - t1; - db += (t2 - b) - t1; - - *a = b; - *da = db; - return n; + mynumber v; + + double t = (x * hpinv + toint); + double xn = t - toint; + v.x = t; + double y = (x - xn * mp1) - xn * mp2; + int4 n = v.i[LOW_HALF] & 3; + + double b, db, t1, t2; + t1 = xn * pp3; + t2 = y - t1; + db = (y - t2) - t1; + + t1 = xn * pp4; + b = t2 - t1; + db += (t2 - b) - t1; + + *a = b; + *da = db; + return n; } /* Compute sin or cos (A + DA) for the given quadrant N. */ static __always_inline double -do_sincos (double a, double da, int4 n) + do_sincos (double a, double da, int4 n) { - double retval; - - if (n & 1) - /* Max ULP is 0.513. */ - retval = do_cos (a, da); - else - /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518. */ - retval = do_sin (a, da); - - return (n & 2) ? -retval : retval; + double retval; + + if (n & 1) + { + /* Max ULP is 0.513. */ + retval = do_cos (a, da); + } + else + { + /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518. */ + retval = do_sin (a, da); + } + + return (n & 2) ? -retval : retval; } - /*******************************************************************/ /* An ultimate sin routine. Given an IEEE double machine number x */ /* it computes the rounded value of sin(x). */ /*******************************************************************/ double -__sin (double x) + __sin (double x) { - double t, a, da; - mynumber u; - int4 k, m, n; - double retval = 0; - - u.x = x; - m = u.i[HIGH_HALF]; - k = 0x7fffffff & m; /* no sign */ - if (k < 0x3e500000) /* if x->0 =>sin(x)=x */ - { - retval = x; - } -/*--------------------------- 2^-26<|x|< 0.855469---------------------- */ - else if (k < 0x3feb6000) - { - /* Max ULP is 0.548. */ - retval = do_sin (x, 0); - } /* else if (k < 0x3feb6000) */ - -/*----------------------- 0.855469 <|x|<2.426265 ----------------------*/ - else if (k < 0x400368fd) - { - t = hp0 - fabs (x); - /* Max ULP is 0.51. */ - retval = copysign (do_cos (t, hp1), x); - } /* else if (k < 0x400368fd) */ - -/*-------------------------- 2.426265<|x|< 105414350 ----------------------*/ - else if (k < 0x419921FB) - { - n = reduce_sincos (x, &a, &da); - retval = do_sincos (a, da, n); - } /* else if (k < 0x419921FB ) */ - -/* --------------------105414350 <|x| <2^1024------------------------------*/ - else if (k < 0x7ff00000) - { - n = __branred (x, &a, &da); - retval = do_sincos (a, da, n); - } -/*--------------------- |x| > 2^1024 ----------------------------------*/ - else - { - if (k == 0x7ff00000 && u.i[LOW_HALF] == 0) - retval = x / x; - } - - return retval; + double t, a, da; + mynumber u; + int4 k, m, n; + double retval = 0; + + u.x = x; + m = u.i[HIGH_HALF]; + k = 0x7fffffff & m; /* no sign */ + if (k < 0x3e500000) /* if x->0 =>sin(x)=x */ + { + retval = x; + } + /*--------------------------- 2^-26<|x|< 0.855469---------------------- */ + else if (k < 0x3feb6000) + { + /* Max ULP is 0.548. */ + retval = do_sin (x, 0); + } /* else if (k < 0x3feb6000) */ + + /*----------------------- 0.855469 <|x|<2.426265 ----------------------*/ + else if (k < 0x400368fd) + { + t = hp0 - fabs (x); + /* Max ULP is 0.51. */ + retval = copysign (do_cos (t, hp1), x); + } /* else if (k < 0x400368fd) */ + + /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/ + else if (k < 0x419921FB) + { + n = reduce_sincos (x, &a, &da); + retval = do_sincos (a, da, n); + } /* else if (k < 0x419921FB ) */ + + /* --------------------105414350 <|x| <2^1024------------------------------*/ + else if (k < 0x7ff00000) + { + n = __branred (x, &a, &da); + retval = do_sincos (a, da, n); + } + /*--------------------- |x| > 2^1024 ----------------------------------*/ + else + { + if (k == 0x7ff00000 && u.i[LOW_HALF] == 0) + { + retval = x / x; + } + } + + return retval; } - /*******************************************************************/ /* An ultimate cos routine. Given an IEEE double machine number x */ /* it computes the rounded value of cos(x). */ /*******************************************************************/ double -__cos (double x) + __cos (double x) { - double y, a, da; - mynumber u; - int4 k, m, n; - - double retval = 0; - - u.x = x; - m = u.i[HIGH_HALF]; - k = 0x7fffffff & m; - - /* |x|<2^-27 => cos(x)=1 */ - if (k < 0x3e400000) - retval = 1.0; - - else if (k < 0x3feb6000) - { /* 2^-27 < |x| < 0.855469 */ - /* Max ULP is 0.51. */ - retval = do_cos (x, 0); - } /* else if (k < 0x3feb6000) */ - - else if (k < 0x400368fd) - { /* 0.855469 <|x|<2.426265 */ ; - y = hp0 - fabs (x); - a = y + hp1; - da = (y - a) + hp1; - /* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise. - Range reduction uses 106 bits here which is sufficient. */ - retval = do_sin (a, da); - } /* else if (k < 0x400368fd) */ - - else if (k < 0x419921FB) - { /* 2.426265<|x|< 105414350 */ - n = reduce_sincos (x, &a, &da); - retval = do_sincos (a, da, n + 1); - } /* else if (k < 0x419921FB ) */ - - /* 105414350 <|x| <2^1024 */ - else if (k < 0x7ff00000) - { - n = __branred (x, &a, &da); - retval = do_sincos (a, da, n + 1); - } - - else - { - if (k == 0x7ff00000 && u.i[LOW_HALF] == 0) - retval = x / x; /* |x| > 2^1024 */ - } - - return retval; + double y, a, da; + mynumber u; + int4 k, m, n; + + double retval = 0; + + u.x = x; + m = u.i[HIGH_HALF]; + k = 0x7fffffff & m; + + /* |x|<2^-27 => cos(x)=1 */ + if (k < 0x3e400000) + { + retval = 1.0; + } + else if (k < 0x3feb6000) + { /* 2^-27 < |x| < 0.855469 */ + /* Max ULP is 0.51. */ + retval = do_cos (x, 0); + } /* else if (k < 0x3feb6000) */ + + else if (k < 0x400368fd) + { /* 0.855469 <|x|<2.426265 */ + ; + y = hp0 - fabs (x); + a = y + hp1; + da = (y - a) + hp1; + /* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise. + Range reduction uses 106 bits here which is sufficient. */ + retval = do_sin (a, da); + } /* else if (k < 0x400368fd) */ + + else if (k < 0x419921FB) + { /* 2.426265<|x|< 105414350 */ + n = reduce_sincos (x, &a, &da); + retval = do_sincos (a, da, n + 1); + } /* else if (k < 0x419921FB ) */ + + /* 105414350 <|x| <2^1024 */ + else if (k < 0x7ff00000) + { + n = __branred (x, &a, &da); + retval = do_sincos (a, da, n + 1); + } + + else + { + if (k == 0x7ff00000 && u.i[LOW_HALF] == 0) + { + retval = x / x; /* |x| > 2^1024 */ + } + } + + return retval; } - void -__sincos (double x, double *sinx, double *cosx) + __sincos (double x, double* sinx, double* cosx) { - mynumber u; - int k = 0; - - u.x = x; - k = u.i[HIGH_HALF] & 0x7fffffff; - - if (k < 0x400368fd) - { - double a, da, y; - /* |x| < 2^-27 => cos (x) = 1, sin (x) = x. */ - if (k < 0x3e400000) - { - if (k < 0x3e500000) - *sinx = x; - *cosx = 1.0; - return; - } - /* |x| < 0.855469. */ - else if (k < 0x3feb6000) - { - *sinx = do_sin (x, 0); - *cosx = do_cos (x, 0); - return; - } - - /* |x| < 2.426265. */ - y = hp0 - fabs (x); - a = y + hp1; - da = (y - a) + hp1; - *sinx = copysign (do_cos (a, da), x); - *cosx = do_sin (a, da); - return; - } - /* |x| < 2^1024. */ - if (k < 0x7ff00000) - { - double a = 0.0, da = 0.0, xx = 0.0; - unsigned int n = 0; - - /* If |x| < 105414350 use simple range reduction. */ - n = k < 0x419921FB ? reduce_sincos (x, &a, &da) : __branred (x, &a, &da); - n = n & 3; - - if (n == 1 || n == 2) - { - a = -a; - da = -da; - } - - if (n & 1) - { - double *temp = cosx; - cosx = sinx; - sinx = temp; - } - - *sinx = do_sin (a, da); - xx = do_cos (a, da); - *cosx = (n & 2) ? -xx : xx; - return; - } - - *sinx = *cosx = x / x; + mynumber u; + int k = 0; + + u.x = x; + k = u.i[HIGH_HALF] & 0x7fffffff; + + if (k < 0x400368fd) + { + double a, da, y; + /* |x| < 2^-27 => cos (x) = 1, sin (x) = x. */ + if (k < 0x3e400000) + { + if (k < 0x3e500000) + { + *sinx = x; + } + *cosx = 1.0; + return; + } + /* |x| < 0.855469. */ + else if (k < 0x3feb6000) + { + *sinx = do_sin (x, 0); + *cosx = do_cos (x, 0); + return; + } + + /* |x| < 2.426265. */ + y = hp0 - fabs (x); + a = y + hp1; + da = (y - a) + hp1; + *sinx = copysign (do_cos (a, da), x); + *cosx = do_sin (a, da); + return; + } + /* |x| < 2^1024. */ + if (k < 0x7ff00000) + { + double a = 0.0, da = 0.0, xx = 0.0; + unsigned int n = 0; + + /* If |x| < 105414350 use simple range reduction. */ + n = k < 0x419921FB ? reduce_sincos (x, &a, &da) : __branred (x, &a, &da); + n = n & 3; + + if (n == 1 || n == 2) + { + a = -a; + da = -da; + } + + if (n & 1) + { + double* temp = cosx; + cosx = sinx; + sinx = temp; + } + + *sinx = do_sin (a, da); + xx = do_cos (a, da); + *cosx = (n & 2) ? -xx : xx; + return; + } + + *sinx = *cosx = x / x; } -}; -}; +}; // namespace libm +}; // namespace ModuleBase diff --git a/source/source_base/libm/test/libm_test.cpp b/source/source_base/libm/test/libm_test.cpp index 70ac62aa6a7..cda349ab2bc 100644 --- a/source/source_base/libm/test/libm_test.cpp +++ b/source/source_base/libm/test/libm_test.cpp @@ -2,161 +2,180 @@ #include #include #include -#include +#include #include "gtest/gtest.h" #include "../libm.h" -#define MY_EXPECT_DOUBLE_EQ(ds1, ds2) \ -do { \ - if (std::isnan(ds1) && std::isnan(ds2) && \ - std::signbit(ds1) == std::signbit(ds2)) \ - EXPECT_EQ(1, 1); \ - else EXPECT_DOUBLE_EQ(ds1, ds2); \ -} while (0) - -#define MY_EXPECT_COMPLEX_DOUBLE_EQ(ds1, ds2) \ - MY_EXPECT_DOUBLE_EQ(ds1.real(), ds2.real()); \ - MY_EXPECT_DOUBLE_EQ(ds1.imag(), ds2.imag()); \ - -TEST(base_libm, sincos_random) +#define MY_EXPECT_DOUBLE_EQ(ds1, ds2) \ + do \ + { \ + if (std::isnan (ds1) && std::isnan (ds2) && std::signbit (ds1) == std::signbit (ds2)) \ + EXPECT_EQ (1, 1); \ + else \ + EXPECT_DOUBLE_EQ (ds1, ds2); \ + } \ + while (0) + +#define MY_EXPECT_COMPLEX_DOUBLE_EQ(ds1, ds2) \ + MY_EXPECT_DOUBLE_EQ (ds1.real (), ds2.real ()); \ + MY_EXPECT_DOUBLE_EQ (ds1.imag (), ds2.imag ()); + +TEST (base_libm, sincos_random) { int len = 50000; - std::vector da(len); - std::vector ds1(len*2); - std::vector ds2(len*2); + std::vector da (len); + std::vector ds1 (len * 2); + std::vector ds2 (len * 2); - std::uniform_real_distribution rnd(-50000, 50000); + std::uniform_real_distribution rnd (-50000, 50000); std::default_random_engine eng; - for (int i = 0; i < len; ++i) { - da[i] = rnd(eng); - } + for (int i = 0; i < len; ++i) + { + da[i] = rnd (eng); + } + + for (int i = 0; i < len; ++i) + { + sincos (da[i], &ds1[i * 2 + 0], &ds1[i * 2 + 1]); + ModuleBase::libm::sincos (da[i], &ds2[i * 2 + 0], &ds2[i * 2 + 1]); + } + + for (int i = 0; i < len * 2; i++) + { + MY_EXPECT_DOUBLE_EQ (ds1[i], ds2[i]); + } +} - for (int i = 0; i < len; ++i) { - sincos(da[i], &ds1[i * 2 + 0], &ds1[i * 2 + 1]); - ModuleBase::libm::sincos(da[i], &ds2[i * 2 + 0], &ds2[i * 2 + 1]); - } +TEST (base_libm, sincos_spec) +{ - for (int i = 0; i < len * 2; i++) { - MY_EXPECT_DOUBLE_EQ(ds1[i], ds2[i]); - } + double da[] = {-0.0, + +0.0, + -INFINITY, + +INFINITY, + -NAN, + +NAN, + -DBL_MIN, + +DBL_MIN, + -DBL_MAX, + +DBL_MAX, + -DBL_MIN * DBL_MIN, + +DBL_MIN * DBL_MIN, + -DBL_MAX * DBL_MAX, + +DBL_MAX * DBL_MAX, + -DBL_MAX / M_PI}; + + const int len = sizeof (da) / sizeof (double); + std::vector ds1 (len * 2); + std::vector ds2 (len * 2); + + for (int i = 0; i < len; ++i) + { + sincos (da[i], &ds1[i * 2 + 0], &ds1[i * 2 + 1]); + ModuleBase::libm::sincos (da[i], &ds2[i * 2 + 0], &ds2[i * 2 + 1]); + } + + for (int i = 0; i < len * 2; i++) + { + MY_EXPECT_DOUBLE_EQ (ds1[i], ds2[i]); + } } -TEST(base_libm, sincos_spec) -{ - - double da[] = { - -0.0, +0.0, - -INFINITY, +INFINITY, - -NAN, +NAN, - -DBL_MIN, +DBL_MIN, - -DBL_MAX, +DBL_MAX, - -DBL_MIN * DBL_MIN, +DBL_MIN * DBL_MIN, - -DBL_MAX * DBL_MAX, +DBL_MAX * DBL_MAX, - -DBL_MAX / M_PI}; - - const int len = sizeof(da) / sizeof(double); - std::vector ds1(len*2); - std::vector ds2(len*2); - - for (int i = 0; i < len; ++i) { - sincos(da[i], &ds1[i * 2 + 0], &ds1[i * 2 + 1]); - ModuleBase::libm::sincos(da[i], &ds2[i * 2 + 0], &ds2[i * 2 + 1]); +#define SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE(TNAME, FNAME, LENGTH, LOW, HIGH) \ + TEST (base_libm, TNAME) \ + { \ + int len = (LENGTH); \ + std::vector da (len); \ + std::vector ds1 (len); \ + std::vector ds2 (len); \ + std::uniform_real_distribution rnd (LOW, HIGH); \ + std::default_random_engine eng; \ + for (int i = 0; i < len; ++i) \ + da[i] = rnd (eng); \ + for (int i = 0; i < len; ++i) \ + { \ + ds1[i] = std::FNAME (da[i]); \ + ds2[i] = ModuleBase::libm::FNAME (da[i]); \ + } \ + for (int i = 0; i < len; i++) \ + MY_EXPECT_DOUBLE_EQ (ds1[i], ds2[i]); \ } - for (int i = 0; i < len * 2; i++) { - MY_EXPECT_DOUBLE_EQ(ds1[i], ds2[i]); +SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE (exp_random, exp, 50000, -1000, 1000) +SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE (sin_random, sin, 50000, -50000, 50000) +SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE (cos_random, cos, 50000, -50000, 50000) +SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE (sin_random2, sin, 100000, -DBL_MAX / M_PI, DBL_MAX / M_PI) +SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE (cos_random2, cos, 100000, -DBL_MAX / M_PI, DBL_MAX / M_PI) + +#define SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(TNAME, FNAME, VALUE) \ + TEST (base_libm, TNAME) \ + { \ + double ds1 = std::FNAME (VALUE); \ + double ds2 = ModuleBase::libm::FNAME (VALUE); \ + MY_EXPECT_DOUBLE_EQ (ds1, ds2); \ } -} -#define SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE(TNAME, FNAME, LENGTH, LOW, HIGH) \ -TEST(base_libm, TNAME) { \ - int len = (LENGTH); \ - std::vector da(len); \ - std::vector ds1(len); \ - std::vector ds2(len); \ - std::uniform_real_distribution rnd(LOW, HIGH); \ - std::default_random_engine eng; \ - for (int i = 0; i < len; ++i) \ - da[i] = rnd(eng); \ - for (int i = 0; i < len; ++i) { \ - ds1[i] = std::FNAME(da[i]); \ - ds2[i] = ModuleBase::libm::FNAME(da[i]); \ - } \ - for (int i = 0; i < len; i++) \ - MY_EXPECT_DOUBLE_EQ(ds1[i], ds2[i]); \ -} \ - -SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE(exp_random, exp, 50000, -1000, 1000) -SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE(sin_random, sin, 50000, -50000, 50000) -SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE(cos_random, cos, 50000, -50000, 50000) -SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE(sin_random2, sin, 100000, -DBL_MAX / M_PI, DBL_MAX / M_PI) -SINGLE_PARAM_FLOAT64_MATH_RANDOM_TEST_TEMPLATE(cos_random2, cos, 100000, -DBL_MAX / M_PI, DBL_MAX / M_PI) - -#define SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(TNAME, FNAME, VALUE) \ -TEST(base_libm, TNAME) { \ - double ds1 = std::FNAME(VALUE); \ - double ds2 = ModuleBase::libm::FNAME(VALUE); \ - MY_EXPECT_DOUBLE_EQ(ds1, ds2); \ -} \ - -#define SINGLE_PARAM_FLOAT64_MATH_CORNER_TEST_TEMPLATE(FNAME) \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _nz, FNAME, -0.0); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _pz, FNAME, +0.0); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _ninf, FNAME, -INFINITY); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _pinf, FNAME, +INFINITY); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _nnan, FNAME, -NAN); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _pnan, FNAME, +NAN); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _nmin, FNAME, -DBL_MIN); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _nmax, FNAME, -DBL_MAX); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _pmin, FNAME, +DBL_MIN); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _pmax, FNAME, +DBL_MAX); \ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _nmin2, FNAME, -DBL_MIN * DBL_MIN);\ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _nmax2, FNAME, -DBL_MAX * DBL_MAX);\ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _pmin2, FNAME, +DBL_MIN * DBL_MIN);\ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _pmax2, FNAME, +DBL_MAX * DBL_MAX);\ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _nlarge, FNAME, -DBL_MAX / M_PI);\ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _plarge, FNAME, +DBL_MAX / M_PI);\ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _nlarge2, FNAME, -105414350.0 * 2.0);\ -SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE(FNAME ## _plarge2, FNAME, +105414350.0 * 2.0);\ - -SINGLE_PARAM_FLOAT64_MATH_CORNER_TEST_TEMPLATE(sin) -SINGLE_PARAM_FLOAT64_MATH_CORNER_TEST_TEMPLATE(cos) -SINGLE_PARAM_FLOAT64_MATH_CORNER_TEST_TEMPLATE(exp) - -TEST(base_libm, cexp_random) +#define SINGLE_PARAM_FLOAT64_MATH_CORNER_TEST_TEMPLATE(FNAME) \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_nz, FNAME, -0.0); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_pz, FNAME, +0.0); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_ninf, FNAME, -INFINITY); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_pinf, FNAME, +INFINITY); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_nnan, FNAME, -NAN); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_pnan, FNAME, +NAN); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_nmin, FNAME, -DBL_MIN); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_nmax, FNAME, -DBL_MAX); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_pmin, FNAME, +DBL_MIN); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_pmax, FNAME, +DBL_MAX); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_nmin2, FNAME, -DBL_MIN* DBL_MIN); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_nmax2, FNAME, -DBL_MAX* DBL_MAX); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_pmin2, FNAME, +DBL_MIN* DBL_MIN); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_pmax2, FNAME, +DBL_MAX* DBL_MAX); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_nlarge, FNAME, -DBL_MAX / M_PI); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_plarge, FNAME, +DBL_MAX / M_PI); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_nlarge2, FNAME, -105414350.0 * 2.0); \ + SINGLE_PARAM_FLOAT64_MATH_SPEC_TEST_TEMPLATE (FNAME##_plarge2, FNAME, +105414350.0 * 2.0); + +SINGLE_PARAM_FLOAT64_MATH_CORNER_TEST_TEMPLATE (sin) +SINGLE_PARAM_FLOAT64_MATH_CORNER_TEST_TEMPLATE (cos) +SINGLE_PARAM_FLOAT64_MATH_CORNER_TEST_TEMPLATE (exp) + +TEST (base_libm, cexp_random) { int len = 50000; - std::vector> da(len); - std::vector> ds1(len); - std::vector> ds2(len); + std::vector> da (len); + std::vector> ds1 (len); + std::vector> ds2 (len); - std::uniform_real_distribution rnd(-1000, 1000); + std::uniform_real_distribution rnd (-1000, 1000); std::default_random_engine eng; - for (int i = 0; i < len; ++i) { - da[i] = std::complex(rnd(eng), rnd(eng)); - } - - for (int i = 0; i < len; ++i) { - ds1[i] = std::exp(da[i]); - ds2[i] = ModuleBase::libm::exp(da[i]); - } - - for (int i = 0; i < len; i++) { - MY_EXPECT_COMPLEX_DOUBLE_EQ(ds1[i], ds2[i]); - } + for (int i = 0; i < len; ++i) + { + da[i] = std::complex (rnd (eng), rnd (eng)); + } + + for (int i = 0; i < len; ++i) + { + ds1[i] = std::exp (da[i]); + ds2[i] = ModuleBase::libm::exp (da[i]); + } + + for (int i = 0; i < len; i++) + { + MY_EXPECT_COMPLEX_DOUBLE_EQ (ds1[i], ds2[i]); + } } -TEST(base_libm, cexp_spec) +TEST (base_libm, cexp_spec) { std::vector> da = { {+INFINITY, +0.0}, // 1 {+INFINITY, -0.0}, {-INFINITY, +0.0}, {-INFINITY, -0.0}, - {+INFINITY, 2.0}, // 5 + {+INFINITY, 2.0}, // 5 {+INFINITY, 4.0}, {-INFINITY, 2.0}, {-INFINITY, 4.0}, @@ -205,16 +224,18 @@ TEST(base_libm, cexp_spec) {-DBL_MIN, +0.0}, }; - int len = da.size(); - std::vector> ds1(len); - std::vector> ds2(len); + int len = da.size (); + std::vector> ds1 (len); + std::vector> ds2 (len); - for (int i = 0; i < len; ++i) { - ds1[i] = std::exp(da[i]); - ds2[i] = ModuleBase::libm::exp(da[i]); - } + for (int i = 0; i < len; ++i) + { + ds1[i] = std::exp (da[i]); + ds2[i] = ModuleBase::libm::exp (da[i]); + } - for (int i = 0; i < len; i++) { - MY_EXPECT_COMPLEX_DOUBLE_EQ(ds1[i], ds2[i]); - } + for (int i = 0; i < len; i++) + { + MY_EXPECT_COMPLEX_DOUBLE_EQ (ds1[i], ds2[i]); + } } diff --git a/source/source_base/macros.h b/source/source_base/macros.h index 5fa9502072a..032d3cc94ca 100644 --- a/source/source_base/macros.h +++ b/source/source_base/macros.h @@ -4,7 +4,8 @@ #include template -struct GetTypeReal { +struct GetTypeReal +{ using type = T; /**< The return type based on the input type. */ }; @@ -14,7 +15,8 @@ struct GetTypeReal { * This specialization sets the return type to be float when the input type is std::complex. */ template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = float; /**< The return type specialization for std::complex. */ }; @@ -24,7 +26,8 @@ struct GetTypeReal> { * This specialization sets the return type to be double when the input type is std::complex. */ template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = double; /**< The return type specialization for std::complex. */ }; diff --git a/source/source_base/main.cpp b/source/source_base/main.cpp index 9a32f11d289..165a3059405 100644 --- a/source/source_base/main.cpp +++ b/source/source_base/main.cpp @@ -3,62 +3,62 @@ #include #include +void calculate (); -void calculate(); - -int main(int argc, char **argv) +int + main (int argc, char** argv) { - std::cout << "Hello, this is the 'base' module of ABACUS." << std::endl; + std::cout << "Hello, this is the 'base' module of ABACUS." << std::endl; - std::cout << "The module searchs for the neighboring atoms for a given atomic position" << std::endl; + std::cout << "The module searchs for the neighboring atoms for a given atomic position" << std::endl; - std::cout << "Right now, the module is still empty, soon we will have more tests." << std::endl; + std::cout << "Right now, the module is still empty, soon we will have more tests." << std::endl; - calculate(); + calculate (); return 0; } - -void calculate() +void + calculate () { - std::ofstream ofs("log.txt"); + std::ofstream ofs ("log.txt"); -// ooo.set_orb_tables(); + // ooo.set_orb_tables(); - ofs.close(); + ofs.close (); - std::cout << "--------------------" << std::endl; - std::cout << " Have a great day! " << std::endl; - std::cout << "--------------------" << std::endl; + std::cout << "--------------------" << std::endl; + std::cout << " Have a great day! " << std::endl; + std::cout << "--------------------" << std::endl; -/* - time_t time_start = std::time(NULL); + /* + time_t time_start = std::time(NULL); -// ModuleBase::timer::start(); + // ModuleBase::timer::start(); - //---------------------------------------------------------- - // main program for doing electronic structure calculations - //---------------------------------------------------------- -// Driver DD; -// DD.init(); + //---------------------------------------------------------- + // main program for doing electronic structure calculations + //---------------------------------------------------------- + // Driver DD; + // DD.init(); - time_t time_finish= std::time(NULL); + time_t time_finish= std::time(NULL); - // print out information before ABACUS ends - std::cout << "\n START Time : " << ctime(&time_start); - std::cout << " FINISH Time : " << ctime(&time_finish); - std::cout << " TOTAL Time : " << difftime(time_finish, time_start) << std::endl; + // print out information before ABACUS ends + std::cout << "\n START Time : " << ctime(&time_start); + std::cout << " FINISH Time : " << ctime(&time_finish); + std::cout << " TOTAL Time : " << difftime(time_finish, time_start) << std::endl; - double total_time = difftime(time_finish, time_start); - int hour = total_time / 3600; - int mins = ( total_time - 3600 * hour ) / 60; - int secs = total_time - 3600 * hour - 60 * mins ; - std::cout << " Total Time : " << hour << " h " - << mins << " mins " - << secs << " secs "<< std::endl; -*/ + double total_time = difftime(time_finish, time_start); + int hour = total_time / 3600; + int mins = ( total_time - 3600 * hour ) / 60; + int secs = total_time - 3600 * hour - 60 * mins ; + std::cout << " Total Time : " << hour << " h " + << mins << " mins " + << secs << " secs "<< std::endl; + */ return; } diff --git a/source/source_base/math_bspline.cpp b/source/source_base/math_bspline.cpp index 1fb477ec19e..b7f033ba2ad 100644 --- a/source/source_base/math_bspline.cpp +++ b/source/source_base/math_bspline.cpp @@ -1,60 +1,60 @@ #include "math_bspline.h" -#include +#include #include namespace ModuleBase { - Bspline::Bspline() - { - bezier = nullptr; - norder = 0; - xi = 0; - Dx = 1.0; - } - Bspline::~Bspline() - { - delete[] bezier; - } +Bspline::Bspline () +{ + bezier = nullptr; + norder = 0; + xi = 0; + Dx = 1.0; +} +Bspline::~Bspline () { delete[] bezier; } - void Bspline::init(int norderin, double Dxin, double xiin) - { - this->xi = xiin; - this->Dx = Dxin; - this->norder = norderin; - assert(Dx > 0); - //norder must be a positive even number. - assert(norder > 0); - assert(norder % 2 == 0); - delete[] bezier; bezier = new double [this->norder+1]; - for(int i = 0 ; i < norder+1 ; ++i) +void + Bspline::init (int norderin, double Dxin, double xiin) +{ + this->xi = xiin; + this->Dx = Dxin; + this->norder = norderin; + assert (Dx > 0); + // norder must be a positive even number. + assert (norder > 0); + assert (norder % 2 == 0); + delete[] bezier; + bezier = new double[this->norder + 1]; + for (int i = 0; i < norder + 1; ++i) { bezier[i] = 0; } - } +} - double Bspline::bezier_ele(int n) - { - return this->bezier[n]; - } +double + Bspline::bezier_ele (int n) +{ + return this->bezier[n]; +} - void Bspline::getbspline(double x) - { - bezier[0] = 1.0; - for(int k = 1 ; k <= norder ; ++k) +void + Bspline::getbspline (double x) +{ + bezier[0] = 1.0; + for (int k = 1; k <= norder; ++k) { - //for n>=1 - for(int n = k; n >= 1; --n ) - { - this->bezier[n] = ((x + n*this->Dx - this->xi)*this->bezier[n] + - (this->xi + (k-n+1)*Dx - x)*this->bezier[n-1])/(k*this->Dx); - } - - //for n = 0 - this->bezier[0] = (x - this->xi)*this->bezier[0] / (k*this->Dx); + // for n>=1 + for (int n = k; n >= 1; --n) + { + this->bezier[n] = ((x + n * this->Dx - this->xi) * this->bezier[n] + + (this->xi + (k - n + 1) * Dx - x) * this->bezier[n - 1]) + / (k * this->Dx); + } + + // for n = 0 + this->bezier[0] = (x - this->xi) * this->bezier[0] / (k * this->Dx); } - } } - - +} // namespace ModuleBase diff --git a/source/source_base/math_bspline.h b/source/source_base/math_bspline.h index 1d246d7fe2a..ed63965e708 100644 --- a/source/source_base/math_bspline.h +++ b/source/source_base/math_bspline.h @@ -24,7 +24,7 @@ namespace ModuleBase * x+n*Dx-xi xi+(k-n+1)*Dx-x * Bk[n] = -----------*B(k-1)[n] + -----------------*B(k-1)[n-1] * k*Dx k*Dx - * USAGE: + * USAGE: * ModuleBase::Bspline bp; * bp.init(10,0.7,2); //Dx = 0.7, xi = 2 * bp.getbslpine(0.5); //x = 0.5 @@ -34,24 +34,24 @@ namespace ModuleBase class Bspline { private: - int norder; // the order of bezier base; norder >= 0 - double Dx; // Dx: the interval of control node - double xi; // xi: the starting point - double * bezier = nullptr; // bezier[n] = Bk[n] + int norder; // the order of bezier base; norder >= 0 + double Dx; // Dx: the interval of control node + double xi; // xi: the starting point + double* bezier = nullptr; // bezier[n] = Bk[n] public: - Bspline(); - ~Bspline(); + Bspline (); + ~Bspline (); - void init(int norderin, double Dxin, double xiin); + void init (int norderin, double Dxin, double xiin); // Get the result of i-th bezier base functions for different input x+xi+n*Dx. // x should be in [0,Dx] // n-th result is stored in bezier[n]; - void getbspline(double x); + void getbspline (double x); // get the element of bezier - double bezier_ele(int n); + double bezier_ele (int n); }; } // namespace ModuleBase #endif diff --git a/source/source_base/math_chebyshev.cpp b/source/source_base/math_chebyshev.cpp index b7e59a89f98..d8031ea48f1 100644 --- a/source/source_base/math_chebyshev.cpp +++ b/source/source_base/math_chebyshev.cpp @@ -11,39 +11,41 @@ namespace ModuleBase { -FFTW::FFTW(const int norder2_in) +FFTW::FFTW (const int norder2_in) { - ccoef = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * norder2_in); - dcoef = (double*)fftw_malloc(sizeof(double) * norder2_in); - coef_plan = fftw_plan_dft_r2c_1d(norder2_in, dcoef, ccoef, FFTW_ESTIMATE); + ccoef = (fftw_complex*)fftw_malloc (sizeof (fftw_complex) * norder2_in); + dcoef = (double*)fftw_malloc (sizeof (double) * norder2_in); + coef_plan = fftw_plan_dft_r2c_1d (norder2_in, dcoef, ccoef, FFTW_ESTIMATE); } -FFTW::~FFTW() +FFTW::~FFTW () { - fftw_destroy_plan(coef_plan); - fftw_free(ccoef); - fftw_free(dcoef); + fftw_destroy_plan (coef_plan); + fftw_free (ccoef); + fftw_free (dcoef); } -void FFTW::execute_fftw() +void + FFTW::execute_fftw () { - fftw_execute(this->coef_plan); + fftw_execute (this->coef_plan); } #ifdef __ENABLE_FLOAT_FFTW -FFTW::FFTW(const int norder2_in) +FFTW::FFTW (const int norder2_in) { - ccoef = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * norder2_in); - dcoef = (float*)fftw_malloc(sizeof(float) * norder2_in); - coef_plan = fftwf_plan_dft_r2c_1d(norder2_in, dcoef, ccoef, FFTW_ESTIMATE); + ccoef = (fftwf_complex*)fftw_malloc (sizeof (fftwf_complex) * norder2_in); + dcoef = (float*)fftw_malloc (sizeof (float) * norder2_in); + coef_plan = fftwf_plan_dft_r2c_1d (norder2_in, dcoef, ccoef, FFTW_ESTIMATE); } -FFTW::~FFTW() +FFTW::~FFTW () { - fftwf_destroy_plan(coef_plan); - fftw_free(ccoef); - fftw_free(dcoef); + fftwf_destroy_plan (coef_plan); + fftw_free (ccoef); + fftw_free (dcoef); } -void FFTW::execute_fftw() +void + FFTW::execute_fftw () { - fftwf_execute(this->coef_plan); + fftwf_execute (this->coef_plan); } #endif @@ -51,26 +53,26 @@ void FFTW::execute_fftw() #define EXTEND 16 template -Chebyshev::Chebyshev(const int norder_in) : fftw(2 * EXTEND * norder_in) +Chebyshev::Chebyshev (const int norder_in) : fftw (2 * EXTEND * norder_in) { this->norder = norder_in; norder2 = 2 * norder * EXTEND; if (this->norder < 1) - { - ModuleBase::WARNING_QUIT("Chebyshev", "The Chebyshev expansion order should be at least 1!"); - } + { + ModuleBase::WARNING_QUIT ("Chebyshev", "The Chebyshev expansion order should be at least 1!"); + } coefr_cpu = new REAL[norder]; coefc_cpu = new std::complex[norder]; - if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) - { - resmem_var_op()(this->coef_real, norder); - resmem_complex_op()(this->coef_complex, norder); - } + if (base_device::get_device_type (this->ctx) == base_device::GpuDevice) + { + resmem_var_op () (this->coef_real, norder); + resmem_complex_op () (this->coef_complex, norder); + } else - { - coef_real = coefr_cpu; - coef_complex = coefc_cpu; - } + { + coef_real = coefr_cpu; + coef_complex = coefc_cpu; + } polytrace = new REAL[norder]; // ndmin = ndmax = ndmax_in; @@ -79,42 +81,45 @@ Chebyshev::Chebyshev(const int norder_in) : fftw(2 * EXTEND * nord } template -Chebyshev::~Chebyshev() +Chebyshev::~Chebyshev () { delete[] polytrace; - if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) - { - delmem_var_op()(this->coef_real); - delmem_complex_op()(this->coef_complex); - } + if (base_device::get_device_type (this->ctx) == base_device::GpuDevice) + { + delmem_var_op () (this->coef_real); + delmem_complex_op () (this->coef_complex); + } else - { - coef_real = nullptr; - coef_complex = nullptr; - } + { + coef_real = nullptr; + coef_complex = nullptr; + } delete[] coefr_cpu; delete[] coefc_cpu; } template -void Chebyshev::getpolyval(const REAL x, REAL* polyval, const int N) +void + Chebyshev::getpolyval (const REAL x, REAL* polyval, const int N) { polyval[0] = 1; polyval[1] = x; for (int i = 2; i < N; ++i) - { - polyval[i] = 2 * x * polyval[i - 1] - polyval[i - 2]; - } + { + polyval[i] = 2 * x * polyval[i - 1] - polyval[i - 2]; + } } template -inline REAL Chebyshev::recurs(const REAL x, const REAL Tn, REAL const Tn_1) +inline REAL + Chebyshev::recurs (const REAL x, const REAL Tn, REAL const Tn_1) { return 2 * x * Tn - Tn_1; } template -REAL Chebyshev::ddot_real(const std::complex* psi_L, +REAL + Chebyshev::ddot_real (const std::complex* psi_L, const std::complex* psi_R, const int N, const int LDA, @@ -123,41 +128,42 @@ REAL Chebyshev::ddot_real(const std::complex* psi_L, REAL result = 0; const base_device::DEVICE_CPU* cpu_ctx = {}; if (N == LDA || m == 1) - { - int dim2 = 2 * N * m; - REAL *pL, *pR; - pL = (REAL*)psi_L; - pR = (REAL*)psi_R; - REAL* dot_device = nullptr; - resmem_var_op()(dot_device, 1); - container::kernels::blas_dot()(dim2, pL, 1, pR, 1, dot_device); - syncmem_var_d2h_op()(&result, dot_device, 1); - delmem_var_op()(dot_device); - } + { + int dim2 = 2 * N * m; + REAL *pL, *pR; + pL = (REAL*)psi_L; + pR = (REAL*)psi_R; + REAL* dot_device = nullptr; + resmem_var_op () (dot_device, 1); + container::kernels::blas_dot () (dim2, pL, 1, pR, 1, dot_device); + syncmem_var_d2h_op () (&result, dot_device, 1); + delmem_var_op () (dot_device); + } else - { - REAL *pL, *pR; - pL = (REAL*)psi_L; - pR = (REAL*)psi_R; - REAL* dot_device = nullptr; - resmem_var_op()(dot_device, 1); - for (int i = 0; i < m; ++i) - { - int dim2 = 2 * N; - container::kernels::blas_dot()(dim2, pL, 1, pR, 1, dot_device); - REAL result_temp = 0; - syncmem_var_d2h_op()(&result_temp, dot_device, 1); - result += result_temp; - pL += 2 * LDA; - pR += 2 * LDA; - } - delmem_var_op()(dot_device); - } + { + REAL *pL, *pR; + pL = (REAL*)psi_L; + pR = (REAL*)psi_R; + REAL* dot_device = nullptr; + resmem_var_op () (dot_device, 1); + for (int i = 0; i < m; ++i) + { + int dim2 = 2 * N; + container::kernels::blas_dot () (dim2, pL, 1, pR, 1, dot_device); + REAL result_temp = 0; + syncmem_var_d2h_op () (&result_temp, dot_device, 1); + result += result_temp; + pL += 2 * LDA; + pR += 2 * LDA; + } + delmem_var_op () (dot_device); + } return result; } template -void Chebyshev::calcoef_real(std::function fun) +void + Chebyshev::calcoef_real (std::function fun) { std::complex* pcoef = (std::complex*)this->fftw.ccoef; @@ -166,60 +172,61 @@ void Chebyshev::calcoef_real(std::function fun) //(M)iddle point integral method part //----------------------------------------------- for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun((REAL)cos((i + 0.5) * ModuleBase::TWO_PI / norder2)); - } + { + this->fftw.dcoef[i] = fun ((REAL)cos ((i + 0.5) * ModuleBase::TWO_PI / norder2)); + } // this->fftw.dcoef --FFT--> fftw.pcoef - this->fftw.execute_fftw(); + this->fftw.execute_fftw (); for (int i = 0; i < norder; ++i) - { - REAL phi = i * ModuleBase::PI / norder2; - if (i == 0) { - coefr_cpu[i] = (cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 2 / 3; + REAL phi = i * ModuleBase::PI / norder2; + if (i == 0) + { + coefr_cpu[i] = (cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 2 / 3; + } + else + { + coefr_cpu[i] = (cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 4 / 3; + } } - else - { - coefr_cpu[i] = (cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 4 / 3; - } - } //----------------------------------------------- //(T)rapezoid integral method part //----------------------------------------------- for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun(cos(i * ModuleBase::TWO_PI / norder2)); - } + { + this->fftw.dcoef[i] = fun (cos (i * ModuleBase::TWO_PI / norder2)); + } // this->fftw.dcoef --FFT--> fftw.pcoef - this->fftw.execute_fftw(); + this->fftw.execute_fftw (); for (int i = 0; i < norder; ++i) - { - if (i == 0) { - coefr_cpu[i] += real(pcoef[i]) / norder2 * 1 / 3; + if (i == 0) + { + coefr_cpu[i] += real (pcoef[i]) / norder2 * 1 / 3; + } + else + { + coefr_cpu[i] += real (pcoef[i]) / norder2 * 2 / 3; + } } - else + + if (base_device::get_device_type (this->ctx) == base_device::GpuDevice) { - coefr_cpu[i] += real(pcoef[i]) / norder2 * 2 / 3; + syncmem_var_h2d_op () (coef_real, coefr_cpu, norder); } - } - - if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) - { - syncmem_var_h2d_op()(coef_real, coefr_cpu, norder); - } getcoef_real = true; return; } template -void Chebyshev::calcoef_complex(std::function(std::complex)> fun) +void + Chebyshev::calcoef_complex (std::function (std::complex)> fun) { std::complex* pcoef = (std::complex*)this->fftw.ccoef; @@ -228,88 +235,89 @@ void Chebyshev::calcoef_complex(std::function(s //(M)iddle point integral method part //----------------------------------------------- for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun(cos((i + 0.5) * ModuleBase::TWO_PI / norder2)).real(); - } - this->fftw.execute_fftw(); - for (int i = 0; i < norder; ++i) - { - REAL phi = i * ModuleBase::PI / norder2; - if (i == 0) { - coefc_cpu[i].real((cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 2 / 3); + this->fftw.dcoef[i] = fun (cos ((i + 0.5) * ModuleBase::TWO_PI / norder2)).real (); } - else + this->fftw.execute_fftw (); + for (int i = 0; i < norder; ++i) { - coefc_cpu[i].real((cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 4 / 3); + REAL phi = i * ModuleBase::PI / norder2; + if (i == 0) + { + coefc_cpu[i].real ((cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 2 / 3); + } + else + { + coefc_cpu[i].real ((cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 4 / 3); + } } - } for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun(cos((i + 0.5) * ModuleBase::TWO_PI / norder2)).imag(); - } - this->fftw.execute_fftw(); - for (int i = 0; i < norder; ++i) - { - REAL phi = i * ModuleBase::PI / norder2; - if (i == 0) { - coefc_cpu[i].imag((cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 2 / 3); + this->fftw.dcoef[i] = fun (cos ((i + 0.5) * ModuleBase::TWO_PI / norder2)).imag (); } - else + this->fftw.execute_fftw (); + for (int i = 0; i < norder; ++i) { - coefc_cpu[i].imag((cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 4 / 3); + REAL phi = i * ModuleBase::PI / norder2; + if (i == 0) + { + coefc_cpu[i].imag ((cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 2 / 3); + } + else + { + coefc_cpu[i].imag ((cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 4 / 3); + } } - } //----------------------------------------------- //(T)rapezoid integral method part //----------------------------------------------- for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun(cos(i * ModuleBase::TWO_PI / norder2)).real(); - } - this->fftw.execute_fftw(); - for (int i = 0; i < norder; ++i) - { - if (i == 0) { - coefc_cpu[i].real(real(coefc_cpu[i]) + real(pcoef[i]) / norder2 * 1 / 3); + this->fftw.dcoef[i] = fun (cos (i * ModuleBase::TWO_PI / norder2)).real (); } - else + this->fftw.execute_fftw (); + for (int i = 0; i < norder; ++i) { - coefc_cpu[i].real(real(coefc_cpu[i]) + real(pcoef[i]) / norder2 * 2 / 3); + if (i == 0) + { + coefc_cpu[i].real (real (coefc_cpu[i]) + real (pcoef[i]) / norder2 * 1 / 3); + } + else + { + coefc_cpu[i].real (real (coefc_cpu[i]) + real (pcoef[i]) / norder2 * 2 / 3); + } } - } for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun(cos(i * ModuleBase::TWO_PI / norder2)).imag(); - } - this->fftw.execute_fftw(); + { + this->fftw.dcoef[i] = fun (cos (i * ModuleBase::TWO_PI / norder2)).imag (); + } + this->fftw.execute_fftw (); for (int i = 0; i < norder; ++i) - { - if (i == 0) { - coefc_cpu[i].imag(imag(coefc_cpu[i]) + real(pcoef[i]) / norder2 * 1 / 3); + if (i == 0) + { + coefc_cpu[i].imag (imag (coefc_cpu[i]) + real (pcoef[i]) / norder2 * 1 / 3); + } + else + { + coefc_cpu[i].imag (imag (coefc_cpu[i]) + real (pcoef[i]) / norder2 * 2 / 3); + } } - else + if (base_device::get_device_type (this->ctx) == base_device::GpuDevice) { - coefc_cpu[i].imag(imag(coefc_cpu[i]) + real(pcoef[i]) / norder2 * 2 / 3); + syncmem_complex_h2d_op () (coef_complex, coefc_cpu, norder); } - } - if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) - { - syncmem_complex_h2d_op()(coef_complex, coefc_cpu, norder); - } getcoef_complex = true; return; } template -void Chebyshev::calcoef_pair(std::function fun1, std::function fun2) +void + Chebyshev::calcoef_pair (std::function fun1, std::function fun2) { std::complex* pcoef = (std::complex*)this->fftw.ccoef; @@ -318,130 +326,131 @@ void Chebyshev::calcoef_pair(std::function fun1, std:: //(M)iddle point integral method part //----------------------------------------------- for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun1(cos((i + 0.5) * ModuleBase::TWO_PI / norder2)); - } - this->fftw.execute_fftw(); - for (int i = 0; i < norder; ++i) - { - REAL phi = i * ModuleBase::PI / norder2; - if (i == 0) { - coefc_cpu[i].real((cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 2 / 3); + this->fftw.dcoef[i] = fun1 (cos ((i + 0.5) * ModuleBase::TWO_PI / norder2)); } - else + this->fftw.execute_fftw (); + for (int i = 0; i < norder; ++i) { - coefc_cpu[i].real((cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 4 / 3); + REAL phi = i * ModuleBase::PI / norder2; + if (i == 0) + { + coefc_cpu[i].real ((cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 2 / 3); + } + else + { + coefc_cpu[i].real ((cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 4 / 3); + } } - } for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun2(cos((i + 0.5) * ModuleBase::TWO_PI / norder2)); - } - this->fftw.execute_fftw(); - for (int i = 0; i < norder; ++i) - { - REAL phi = i * ModuleBase::PI / norder2; - if (i == 0) { - coefc_cpu[i].imag((cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 2 / 3); + this->fftw.dcoef[i] = fun2 (cos ((i + 0.5) * ModuleBase::TWO_PI / norder2)); } - else + this->fftw.execute_fftw (); + for (int i = 0; i < norder; ++i) { - coefc_cpu[i].imag((cos(phi) * pcoef[i].real() + sin(phi) * pcoef[i].imag()) / norder2 * 4 / 3); + REAL phi = i * ModuleBase::PI / norder2; + if (i == 0) + { + coefc_cpu[i].imag ((cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 2 / 3); + } + else + { + coefc_cpu[i].imag ((cos (phi) * pcoef[i].real () + sin (phi) * pcoef[i].imag ()) / norder2 * 4 / 3); + } } - } //----------------------------------------------- //(T)rapezoid integral method part //----------------------------------------------- for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun1(cos(i * ModuleBase::TWO_PI / norder2)); - } - this->fftw.execute_fftw(); - for (int i = 0; i < norder; ++i) - { - if (i == 0) { - coefc_cpu[i].real(real(coefc_cpu[i]) + real(pcoef[i]) / norder2 * 1 / 3); + this->fftw.dcoef[i] = fun1 (cos (i * ModuleBase::TWO_PI / norder2)); } - else + this->fftw.execute_fftw (); + for (int i = 0; i < norder; ++i) { - coefc_cpu[i].real(real(coefc_cpu[i]) + real(pcoef[i]) / norder2 * 2 / 3); + if (i == 0) + { + coefc_cpu[i].real (real (coefc_cpu[i]) + real (pcoef[i]) / norder2 * 1 / 3); + } + else + { + coefc_cpu[i].real (real (coefc_cpu[i]) + real (pcoef[i]) / norder2 * 2 / 3); + } } - } for (int i = 0; i < norder2; ++i) - { - this->fftw.dcoef[i] = fun2(cos(i * ModuleBase::TWO_PI / norder2)); - } - this->fftw.execute_fftw(); - for (int i = 0; i < norder; ++i) - { - if (i == 0) { - coefc_cpu[i].imag(imag(coefc_cpu[i]) + real(pcoef[i]) / norder2 * 1 / 3); + this->fftw.dcoef[i] = fun2 (cos (i * ModuleBase::TWO_PI / norder2)); } - else + this->fftw.execute_fftw (); + for (int i = 0; i < norder; ++i) { - coefc_cpu[i].imag(imag(coefc_cpu[i]) + real(pcoef[i]) / norder2 * 2 / 3); + if (i == 0) + { + coefc_cpu[i].imag (imag (coefc_cpu[i]) + real (pcoef[i]) / norder2 * 1 / 3); + } + else + { + coefc_cpu[i].imag (imag (coefc_cpu[i]) + real (pcoef[i]) / norder2 * 2 / 3); + } } - } - if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) - { - syncmem_complex_h2d_op()(coef_complex, coefc_cpu, norder); - } + if (base_device::get_device_type (this->ctx) == base_device::GpuDevice) + { + syncmem_complex_h2d_op () (coef_complex, coefc_cpu, norder); + } getcoef_complex = true; return; } template -void Chebyshev::calfinalvec_real( - std::function*, std::complex*, const int)> funA, - std::complex* wavein, - std::complex* waveout, - const int N, - const int LDA, - const int m) +void + Chebyshev::calfinalvec_real ( + std::function*, std::complex*, const int)> funA, + std::complex* wavein, + std::complex* waveout, + const int N, + const int LDA, + const int m) { if (!getcoef_real) - { - ModuleBase::WARNING_QUIT("Chebyshev", "Please calculate coef_real first!"); - } + { + ModuleBase::WARNING_QUIT ("Chebyshev", "Please calculate coef_real first!"); + } std::complex* arraynp1 = nullptr; std::complex* arrayn = nullptr; std::complex* arrayn_1 = nullptr; - assert(N >= 0 && LDA >= N); + assert (N >= 0 && LDA >= N); int ndmxt = 0; if (m == 1) - { - ndmxt = N * m; - } + { + ndmxt = N * m; + } else - { - ndmxt = LDA * m; - } + { + ndmxt = LDA * m; + } - resmem_complex_op()(arraynp1, ndmxt); - resmem_complex_op()(arrayn, ndmxt); - resmem_complex_op()(arrayn_1, ndmxt); + resmem_complex_op () (arraynp1, ndmxt); + resmem_complex_op () (arrayn, ndmxt); + resmem_complex_op () (arrayn_1, ndmxt); - memcpy_complex_op()(arrayn_1, wavein, ndmxt); + memcpy_complex_op () (arrayn_1, wavein, ndmxt); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, ndmxt); - funA(arrayn_1, arrayn, m); + funA (arrayn_1, arrayn, m); // 0- & 1-st order - setmem_complex_op()(waveout, 0, ndmxt); - std::complex coef0 = std::complex(coefr_cpu[0], 0); - container::kernels::blas_axpy, ct_Device>()(ndmxt, &coef0, arrayn_1, 1, waveout, 1); - std::complex coef1 = std::complex(coefr_cpu[1], 0); - container::kernels::blas_axpy, ct_Device>()(ndmxt, &coef1, arrayn, 1, waveout, 1); + setmem_complex_op () (waveout, 0, ndmxt); + std::complex coef0 = std::complex (coefr_cpu[0], 0); + container::kernels::blas_axpy, ct_Device> () (ndmxt, &coef0, arrayn_1, 1, waveout, 1); + std::complex coef1 = std::complex (coefr_cpu[1], 0); + container::kernels::blas_axpy, ct_Device> () (ndmxt, &coef1, arrayn, 1, waveout, 1); // for (int i = 0; i < ndmxt; ++i) // { // waveout[i] = coef_real[0] * arrayn_1[i] + coef_real[1] * arrayn[i]; @@ -449,65 +458,66 @@ void Chebyshev::calfinalvec_real( // more than 1-st orders for (int ior = 2; ior < norder; ++ior) - { - recurs_complex(funA, arraynp1, arrayn, arrayn_1, N, LDA, m); - std::complex coefior = std::complex(coefr_cpu[ior], 0); - container::kernels::blas_axpy, ct_Device>()(ndmxt, &coefior, arraynp1, 1, waveout, 1); - // for (int i = 0; i < ndmxt; ++i) - // { - // waveout[i] += coef_real[ior] * arraynp1[i]; - // } - std::complex* tem = arrayn_1; - arrayn_1 = arrayn; - arrayn = arraynp1; - arraynp1 = tem; - } - delmem_complex_op()(arraynp1); - delmem_complex_op()(arrayn); - delmem_complex_op()(arrayn_1); + { + recurs_complex (funA, arraynp1, arrayn, arrayn_1, N, LDA, m); + std::complex coefior = std::complex (coefr_cpu[ior], 0); + container::kernels::blas_axpy, ct_Device> () (ndmxt, &coefior, arraynp1, 1, waveout, 1); + // for (int i = 0; i < ndmxt; ++i) + // { + // waveout[i] += coef_real[ior] * arraynp1[i]; + // } + std::complex* tem = arrayn_1; + arrayn_1 = arrayn; + arrayn = arraynp1; + arraynp1 = tem; + } + delmem_complex_op () (arraynp1); + delmem_complex_op () (arrayn); + delmem_complex_op () (arrayn_1); return; } template -void Chebyshev::calfinalvec_complex( - std::function*, std::complex*, const int)> funA, - std::complex* wavein, - std::complex* waveout, - const int N, - const int LDA, - const int m) +void + Chebyshev::calfinalvec_complex ( + std::function*, std::complex*, const int)> funA, + std::complex* wavein, + std::complex* waveout, + const int N, + const int LDA, + const int m) { if (!getcoef_complex) - { - ModuleBase::WARNING_QUIT("Chebyshev", "Please calculate coef_complex first!"); - } + { + ModuleBase::WARNING_QUIT ("Chebyshev", "Please calculate coef_complex first!"); + } std::complex* arraynp1 = nullptr; std::complex* arrayn = nullptr; std::complex* arrayn_1 = nullptr; - assert(N >= 0 && LDA >= N); + assert (N >= 0 && LDA >= N); int ndmxt = 0; if (m == 1) - { - ndmxt = N * m; - } + { + ndmxt = N * m; + } else - { - ndmxt = LDA * m; - } + { + ndmxt = LDA * m; + } - resmem_complex_op()(arraynp1, ndmxt); - resmem_complex_op()(arrayn, ndmxt); - resmem_complex_op()(arrayn_1, ndmxt); + resmem_complex_op () (arraynp1, ndmxt); + resmem_complex_op () (arrayn, ndmxt); + resmem_complex_op () (arrayn_1, ndmxt); - memcpy_complex_op()(arrayn_1, wavein, ndmxt); + memcpy_complex_op () (arrayn_1, wavein, ndmxt); - funA(arrayn_1, arrayn, m); + funA (arrayn_1, arrayn, m); // 0- & 1-st order - setmem_complex_op()(waveout, 0, ndmxt); - container::kernels::blas_axpy, ct_Device>()(ndmxt, &coefc_cpu[0], arrayn_1, 1, waveout, 1); - container::kernels::blas_axpy, ct_Device>()(ndmxt, &coefc_cpu[1], arrayn, 1, waveout, 1); + setmem_complex_op () (waveout, 0, ndmxt); + container::kernels::blas_axpy, ct_Device> () (ndmxt, &coefc_cpu[0], arrayn_1, 1, waveout, 1); + container::kernels::blas_axpy, ct_Device> () (ndmxt, &coefc_cpu[1], arrayn, 1, waveout, 1); // for (int i = 0; i < ndmxt; ++i) // { // waveout[i] = coef_complex[0] * arrayn_1[i] + coef_complex[1] * arrayn[i]; @@ -515,35 +525,41 @@ void Chebyshev::calfinalvec_complex( // more than 1-st orders for (int ior = 2; ior < norder; ++ior) - { - recurs_complex(funA, arraynp1, arrayn, arrayn_1, N, LDA, m); - container::kernels::blas_axpy, ct_Device>()(ndmxt, &coefc_cpu[ior], arraynp1, 1, waveout, 1); - // for (int i = 0; i < ndmxt; ++i) - // { - // waveout[i] += coef_complex[ior] * arraynp1[i]; - // } - std::complex* tem = arrayn_1; - arrayn_1 = arrayn; - arrayn = arraynp1; - arraynp1 = tem; - } - delmem_complex_op()(arraynp1); - delmem_complex_op()(arrayn); - delmem_complex_op()(arrayn_1); + { + recurs_complex (funA, arraynp1, arrayn, arrayn_1, N, LDA, m); + container::kernels::blas_axpy, ct_Device> () (ndmxt, + &coefc_cpu[ior], + arraynp1, + 1, + waveout, + 1); + // for (int i = 0; i < ndmxt; ++i) + // { + // waveout[i] += coef_complex[ior] * arraynp1[i]; + // } + std::complex* tem = arrayn_1; + arrayn_1 = arrayn; + arrayn = arraynp1; + arraynp1 = tem; + } + delmem_complex_op () (arraynp1); + delmem_complex_op () (arrayn); + delmem_complex_op () (arrayn_1); return; } template -void Chebyshev::calpolyvec_complex( - std::function*, std::complex*, const int)> funA, - std::complex* wavein, - std::complex* polywaveout, - const int N, - const int LDA, - const int m) +void + Chebyshev::calpolyvec_complex ( + std::function*, std::complex*, const int)> funA, + std::complex* wavein, + std::complex* polywaveout, + const int N, + const int LDA, + const int m) { - assert(N >= 0 && LDA >= N); + assert (N >= 0 && LDA >= N); const int ndmxt = LDA * m; std::complex* arraynp1 = polywaveout + 2 * ndmxt; @@ -552,211 +568,214 @@ void Chebyshev::calpolyvec_complex( std::complex*tmpin = wavein, *tmpout = arrayn_1; for (int i = 0; i < m; ++i) - { - memcpy_complex_op()(tmpout, tmpin, N); - // ModuleBase::GlobalFunc::DCOPY(tmpin, tmpout, N); - tmpin += LDA; - tmpout += LDA; - } + { + memcpy_complex_op () (tmpout, tmpin, N); + // ModuleBase::GlobalFunc::DCOPY(tmpin, tmpout, N); + tmpin += LDA; + tmpout += LDA; + } // 1-st order - funA(arrayn_1, arrayn, m); + funA (arrayn_1, arrayn, m); // more than 1-st orders for (int ior = 2; ior < norder; ++ior) - { - recurs_complex(funA, arraynp1, arrayn, arrayn_1, N, LDA, m); - arrayn_1 += ndmxt; - arrayn += ndmxt; - arraynp1 += ndmxt; - } + { + recurs_complex (funA, arraynp1, arrayn, arrayn_1, N, LDA, m); + arrayn_1 += ndmxt; + arrayn += ndmxt; + arraynp1 += ndmxt; + } return; } template -void Chebyshev::tracepolyA( - std::function* in, std::complex* out, const int)> funA, - std::complex* wavein, - const int N, - const int LDA, - const int m) +void + Chebyshev::tracepolyA ( + std::function* in, std::complex* out, const int)> funA, + std::complex* wavein, + const int N, + const int LDA, + const int m) { std::complex* arraynp1 = nullptr; std::complex* arrayn = nullptr; std::complex* arrayn_1 = nullptr; - assert(N >= 0 && LDA >= N); + assert (N >= 0 && LDA >= N); int ndmxt = 0; if (m == 1) - { - ndmxt = N * m; - } + { + ndmxt = N * m; + } else - { - ndmxt = LDA * m; - } + { + ndmxt = LDA * m; + } - resmem_complex_op()(arraynp1, ndmxt); - resmem_complex_op()(arrayn, ndmxt); - resmem_complex_op()(arrayn_1, ndmxt); + resmem_complex_op () (arraynp1, ndmxt); + resmem_complex_op () (arrayn, ndmxt); + resmem_complex_op () (arrayn_1, ndmxt); - memcpy_complex_op()(arrayn_1, wavein, ndmxt); + memcpy_complex_op () (arrayn_1, wavein, ndmxt); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, ndmxt); - funA(arrayn_1, arrayn, m); + funA (arrayn_1, arrayn, m); - polytrace[0] = this->ddot_real(wavein, wavein, N, LDA, m); - polytrace[1] = this->ddot_real(wavein, arrayn, N, LDA, m); + polytrace[0] = this->ddot_real (wavein, wavein, N, LDA, m); + polytrace[1] = this->ddot_real (wavein, arrayn, N, LDA, m); // more than 1-st orders for (int ior = 2; ior < norder; ++ior) - { - recurs_complex(funA, arraynp1, arrayn, arrayn_1, N, LDA, m); - polytrace[ior] = this->ddot_real(wavein, arraynp1, N, LDA, m); - std::complex* tem = arrayn_1; - arrayn_1 = arrayn; - arrayn = arraynp1; - arraynp1 = tem; - } - - delmem_complex_op()(arraynp1); - delmem_complex_op()(arrayn); - delmem_complex_op()(arrayn_1); + { + recurs_complex (funA, arraynp1, arrayn, arrayn_1, N, LDA, m); + polytrace[ior] = this->ddot_real (wavein, arraynp1, N, LDA, m); + std::complex* tem = arrayn_1; + arrayn_1 = arrayn; + arrayn = arraynp1; + arraynp1 = tem; + } + + delmem_complex_op () (arraynp1); + delmem_complex_op () (arrayn); + delmem_complex_op () (arrayn_1); return; } template -void Chebyshev::recurs_complex( - std::function* in, std::complex* out, const int)> funA, - std::complex* arraynp1, - std::complex* arrayn, - std::complex* arrayn_1, - const int N, - const int LDA, - const int m) +void + Chebyshev::recurs_complex ( + std::function* in, std::complex* out, const int)> funA, + std::complex* arraynp1, + std::complex* arrayn, + std::complex* arrayn_1, + const int N, + const int LDA, + const int m) { - funA(arrayn, arraynp1, m); + funA (arrayn, arraynp1, m); const std::complex two = 2.0; const std::complex invone = -1.0; for (int ib = 0; ib < m; ++ib) - { - container::kernels::blas_scal, ct_Device>()(N, &two, arraynp1 + ib * LDA, 1); - container::kernels::blas_axpy, ct_Device>()(N, - &invone, - arrayn_1 + ib * LDA, - 1, - arraynp1 + ib * LDA, - 1); - - // for (int i = 0; i < N; ++i) - // { - // arraynp1[i + ib * LDA] = REAL(2.0) * arraynp1[i + ib * LDA] - arrayn_1[i + ib * LDA]; - // } - } + { + container::kernels::blas_scal, ct_Device> () (N, &two, arraynp1 + ib * LDA, 1); + container::kernels::blas_axpy, ct_Device> () (N, + &invone, + arrayn_1 + ib * LDA, + 1, + arraynp1 + ib * LDA, + 1); + + // for (int i = 0; i < N; ++i) + // { + // arraynp1[i + ib * LDA] = REAL(2.0) * arraynp1[i + ib * LDA] - arrayn_1[i + ib * LDA]; + // } + } } template -bool Chebyshev::checkconverge( - std::function* in, std::complex* out, const int)> funA, - std::complex* wavein, - const int N, - const int LDA, - REAL& tmax, - REAL& tmin, - REAL stept) +bool + Chebyshev::checkconverge ( + std::function* in, std::complex* out, const int)> funA, + std::complex* wavein, + const int N, + const int LDA, + REAL& tmax, + REAL& tmin, + REAL stept) { bool converge = true; std::complex* arraynp1 = nullptr; std::complex* arrayn = nullptr; std::complex* arrayn_1 = nullptr; - resmem_complex_op()(arraynp1, LDA); - resmem_complex_op()(arrayn, LDA); - resmem_complex_op()(arrayn_1, LDA); + resmem_complex_op () (arraynp1, LDA); + resmem_complex_op () (arrayn, LDA); + resmem_complex_op () (arrayn_1, LDA); - memcpy_complex_op()(arrayn_1, wavein, N); + memcpy_complex_op () (arrayn_1, wavein, N); // ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, N); if (tmin == tmax) - { - tmax += stept; - } + { + tmax += stept; + } - funA(arrayn_1, arrayn, 1); + funA (arrayn_1, arrayn, 1); REAL sum1, sum2; REAL t; - if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) - { - sum1 = this->ddot_real(arrayn_1, arrayn_1, N); - sum2 = this->ddot_real(arrayn_1, arrayn, N); - } - else - { -#ifdef __MPI - sum1 = ModuleBase::GlobalFunc::ddot_real(N, arrayn_1, arrayn_1); - sum2 = ModuleBase::GlobalFunc::ddot_real(N, arrayn_1, arrayn); -#else - sum1 = this->ddot_real(arrayn_1, arrayn_1, N); - sum2 = this->ddot_real(arrayn_1, arrayn, N); -#endif - } - t = sum2 / sum1 * (tmax - tmin) / 2 + (tmax + tmin) / 2; - if (t < tmin || tmin == 0) - { - converge = false; - tmin = t - stept; - } - if (t > tmax) - { - converge = false; - tmax = t + stept; - } - - for (int ior = 2; ior < norder; ++ior) - { - funA(arrayn, arraynp1, 1); - if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) + if (base_device::get_device_type (this->ctx) == base_device::GpuDevice) { - sum1 = this->ddot_real(arrayn, arrayn, N); - sum2 = this->ddot_real(arrayn, arraynp1, N); + sum1 = this->ddot_real (arrayn_1, arrayn_1, N); + sum2 = this->ddot_real (arrayn_1, arrayn, N); } - else + else { #ifdef __MPI - sum1 = ModuleBase::GlobalFunc::ddot_real(N, arrayn, arrayn); - sum2 = ModuleBase::GlobalFunc::ddot_real(N, arrayn, arraynp1); + sum1 = ModuleBase::GlobalFunc::ddot_real (N, arrayn_1, arrayn_1); + sum2 = ModuleBase::GlobalFunc::ddot_real (N, arrayn_1, arrayn); #else - sum1 = this->ddot_real(arrayn, arrayn, N); - sum2 = this->ddot_real(arrayn, arraynp1, N); + sum1 = this->ddot_real (arrayn_1, arrayn_1, N); + sum2 = this->ddot_real (arrayn_1, arrayn, N); #endif } - t = sum2 / sum1 * (tmax - tmin) / 2 + (tmax + tmin) / 2; - if (t < tmin) + t = sum2 / sum1 * (tmax - tmin) / 2 + (tmax + tmin) / 2; + if (t < tmin || tmin == 0) { converge = false; tmin = t - stept; } - else if (t > tmax) + if (t > tmax) { converge = false; tmax = t + stept; } - std::complex two = 2.0; - std::complex invone = -1.0; - container::kernels::blas_scal, ct_Device>()(N, &two, arraynp1, 1); - container::kernels::blas_axpy, ct_Device>()(N, &invone, arrayn_1, 1, arraynp1, 1); - // for (int i = 0; i < N; ++i) - // { - // arraynp1[i] = REAL(2.0) * arraynp1[i] - arrayn_1[i]; - // } - std::complex* tem = arrayn_1; - arrayn_1 = arrayn; - arrayn = arraynp1; - arraynp1 = tem; - } - - delmem_complex_op()(arraynp1); - delmem_complex_op()(arrayn); - delmem_complex_op()(arrayn_1); + + for (int ior = 2; ior < norder; ++ior) + { + funA (arrayn, arraynp1, 1); + if (base_device::get_device_type (this->ctx) == base_device::GpuDevice) + { + sum1 = this->ddot_real (arrayn, arrayn, N); + sum2 = this->ddot_real (arrayn, arraynp1, N); + } + else + { +#ifdef __MPI + sum1 = ModuleBase::GlobalFunc::ddot_real (N, arrayn, arrayn); + sum2 = ModuleBase::GlobalFunc::ddot_real (N, arrayn, arraynp1); +#else + sum1 = this->ddot_real (arrayn, arrayn, N); + sum2 = this->ddot_real (arrayn, arraynp1, N); +#endif + } + t = sum2 / sum1 * (tmax - tmin) / 2 + (tmax + tmin) / 2; + if (t < tmin) + { + converge = false; + tmin = t - stept; + } + else if (t > tmax) + { + converge = false; + tmax = t + stept; + } + std::complex two = 2.0; + std::complex invone = -1.0; + container::kernels::blas_scal, ct_Device> () (N, &two, arraynp1, 1); + container::kernels::blas_axpy, ct_Device> () (N, &invone, arrayn_1, 1, arraynp1, 1); + // for (int i = 0; i < N; ++i) + // { + // arraynp1[i] = REAL(2.0) * arraynp1[i] - arrayn_1[i]; + // } + std::complex* tem = arrayn_1; + arrayn_1 = arrayn; + arrayn = arraynp1; + arraynp1 = tem; + } + + delmem_complex_op () (arraynp1); + delmem_complex_op () (arrayn); + delmem_complex_op () (arrayn_1); return converge; } diff --git a/source/source_base/math_chebyshev.h b/source/source_base/math_chebyshev.h index 3d534b911c6..18b0e71b69a 100644 --- a/source/source_base/math_chebyshev.h +++ b/source/source_base/math_chebyshev.h @@ -85,20 +85,20 @@ class Chebyshev public: // constructor and deconstructor - Chebyshev(const int norder); - ~Chebyshev(); + Chebyshev (const int norder); + ~Chebyshev (); public: // I. // Calculate coefficients C_n[f], where f is a function of real number - void calcoef_real(std::function fun); + void calcoef_real (std::function fun); // Calculate coefficients C_n[g], where g is a function of complex number - void calcoef_complex(std::function(std::complex)> fun); + void calcoef_complex (std::function (std::complex)> fun); // Calculate coefficients C_n[g], where g is a general complex function g(x)=(g1(x), g2(x)) // e.g. exp(ix)=(cos(x),sin(x)) - void calcoef_pair(std::function fun1, std::function fun2); + void calcoef_pair (std::function fun1, std::function fun2); // II. // Calculate the final vector f(A)v = \sum_{n=0}^{norder-1} C_n[f]*v_n @@ -107,83 +107,83 @@ class Chebyshev // A[v1,...,vm] N is dimension of vector, and LDA is the distance between the first number of v_n and v_{n+1}. LDA // >= max(1, N). It is the same as the BLAS lib. calfinalvec_real uses C_n[f], where f is a function of real number // and A is a real Operator. - void calfinalvec_real(std::function funA, - REAL* wavein, - REAL* waveout, - const int N, - const int LDA = 1, - const int m = 1); // do not define yet + void calfinalvec_real (std::function funA, + REAL* wavein, + REAL* waveout, + const int N, + const int LDA = 1, + const int m = 1); // do not define yet // calfinalvec_real uses C_n[f], where f is a function of real number and A is a complex Operator. - void calfinalvec_real(std::function*, std::complex*, const int)> funA, - std::complex* wavein, - std::complex* waveout, - const int N, - const int LDA = 1, - const int m = 1); + void calfinalvec_real (std::function*, std::complex*, const int)> funA, + std::complex* wavein, + std::complex* waveout, + const int N, + const int LDA = 1, + const int m = 1); // calfinalvec_complex uses C_n[g], where g is a function of complex number and A is a complex Operator. - void calfinalvec_complex(std::function*, std::complex*, const int)> funA, - std::complex* wavein, - std::complex* waveout, - const int N, - const int LDA = 1, - const int m = 1); + void calfinalvec_complex (std::function*, std::complex*, const int)> funA, + std::complex* wavein, + std::complex* waveout, + const int N, + const int LDA = 1, + const int m = 1); // III. // \sum_i v_i^+f(A)v_i = \sum_{i,n=0}^{norder-1} C_n[f]*v_i^+v_{i,n} = \sum_{n=0}^{norder-1} C_n[f] * w_n // calculate the sum of diagonal elements (Trace) of T_n(A) in v-represent: w_n = \sum_i v_i^+ * T_n(A) * v_i // i = 1,2,...m - void tracepolyA(std::function* in, std::complex* out, const int)> funA, - std::complex* wavein, - const int N, - const int LDA = 1, - const int m = 1); + void tracepolyA (std::function* in, std::complex* out, const int)> funA, + std::complex* wavein, + const int N, + const int LDA = 1, + const int m = 1); // get T_n(x) - void getpolyval(REAL x, REAL* polyval, const int N); + void getpolyval (REAL x, REAL* polyval, const int N); // get each order of vector: {T_0(A)v, T_1(A)v, ..., T_n(A)v} // Note: use it carefully, it will cost a lot of memory! // calpolyvec_real: f(x) = \sum_n C_n*T_n(x), f is a real function - void calpolyvec_real(std::function funA, - REAL* wavein, - REAL* waveout, - const int N, - const int LDA = 1, - const int m = 1); // do not define yet + void calpolyvec_real (std::function funA, + REAL* wavein, + REAL* waveout, + const int N, + const int LDA = 1, + const int m = 1); // do not define yet // calpolyvec_complex: f(x) = \sum_n C_n*T_n(x), f is a complex function - void calpolyvec_complex(std::function* in, std::complex* out, const int)> funA, - std::complex* wavein, - std::complex* waveout, - const int N, - const int LDA = 1, - const int m = 1); + void calpolyvec_complex (std::function* in, std::complex* out, const int)> funA, + std::complex* wavein, + std::complex* waveout, + const int N, + const int LDA = 1, + const int m = 1); // IV. // recurs fomula: v_{n+1} = 2Av_n - v_{n-1} // get v_{n+1} from v_n and v_{n-1} // recurs_complex: A is a real operator - void recurs_real(std::function funA, - REAL* arraynp1, - REAL* arrayn, - REAL* arrayn_1, - const int N, - const int LDA = 1, - const int m = 1); + void recurs_real (std::function funA, + REAL* arraynp1, + REAL* arrayn, + REAL* arrayn_1, + const int N, + const int LDA = 1, + const int m = 1); // recurs_complex: A is a complex operator - void recurs_complex(std::function* in, std::complex* out, const int)> funA, - std::complex* arraynp1, - std::complex* arrayn, - std::complex* arrayn_1, - const int N, - const int LDA = 1, - const int m = 1); + void recurs_complex (std::function* in, std::complex* out, const int)> funA, + std::complex* arraynp1, + std::complex* arrayn, + std::complex* arrayn_1, + const int N, + const int LDA = 1, + const int m = 1); // return 2xTn-Tn_1 - REAL recurs(const REAL x, const REAL Tn, const REAL Tn_1); + REAL recurs (const REAL x, const REAL Tn, const REAL Tn_1); // V. // auxiliary function @@ -191,13 +191,13 @@ class Chebyshev // Thus \hat(a) = \frac{(A - (tmax+tmin)/2)}{(tmax-tmin)/2} // tmax >= all eigenvalues; tmin <= all eigenvalues // Here we check if the trial number tmax(tmin) is the upper(lower) bound of eigenvalues and return it. - bool checkconverge(std::function* in, std::complex* out, const int)> funA, - std::complex* wavein, - const int N, - const int LDA, - REAL& tmax, // trial number for upper bound - REAL& tmin, // trial number for lower bound - REAL stept); // tmax = max() + stept, tmin = min() - stept + bool checkconverge (std::function* in, std::complex* out, const int)> funA, + std::complex* wavein, + const int N, + const int LDA, + REAL& tmax, // trial number for upper bound + REAL& tmin, // trial number for lower bound + REAL stept); // tmax = max() + stept, tmin = min() - stept public: // Members: @@ -209,8 +209,8 @@ class Chebyshev REAL* coefr_cpu = nullptr; //[CPU] expansion coefficient of each order std::complex* coefc_cpu = nullptr; //[CPU] expansion coefficient of each order - FFTW fftw; // use for fftw - REAL* polytrace = nullptr; //[CPU] w_n = \sum_i v^+ * T_n(A) * v, only + FFTW fftw; // use for fftw + REAL* polytrace = nullptr; //[CPU] w_n = \sum_i v^+ * T_n(A) * v, only bool getcoef_real; // coef_real has been calculated bool getcoef_complex; // coef_complex has been calculated @@ -218,24 +218,26 @@ class Chebyshev public: // SI. // calculate dot product - REAL ddot_real(const std::complex* psi_L, - const std::complex* psi_R, - const int N, - const int LDA = 1, - const int m = 1); - + REAL ddot_real (const std::complex* psi_L, + const std::complex* psi_R, + const int N, + const int LDA = 1, + const int m = 1); + private: Device* ctx = {}; base_device::DEVICE_CPU* cpu_ctx = {}; using ct_Device = typename container::PsiToContainer::type; - using resmem_complex_op = base_device::memory::resize_memory_op, Device>; + using resmem_complex_op = base_device::memory::resize_memory_op, Device>; using resmem_var_op = base_device::memory::resize_memory_op; using delmem_complex_op = base_device::memory::delete_memory_op, Device>; using delmem_var_op = base_device::memory::delete_memory_op; using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op; using syncmem_var_d2h_op = base_device::memory::synchronize_memory_op; - using syncmem_complex_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; - using syncmem_complex_d2h_op = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, Device>; + using syncmem_complex_h2d_op + = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; + using syncmem_complex_d2h_op + = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, Device>; using memcpy_var_op = base_device::memory::synchronize_memory_op; using memcpy_complex_op = base_device::memory::synchronize_memory_op, Device, Device>; using setmem_complex_op = base_device::memory::set_memory_op, Device>; @@ -245,9 +247,9 @@ template <> class FFTW { public: - FFTW(const int norder2_in); - ~FFTW(); - void execute_fftw(); + FFTW (const int norder2_in); + ~FFTW (); + void execute_fftw (); double* dcoef = nullptr; //[norder2] fftw_complex* ccoef = nullptr; fftw_plan coef_plan; @@ -258,9 +260,9 @@ template <> class FFTW { public: - FFTW(const int norder2_in); - ~FFTW(); - void execute_fftw(); + FFTW (const int norder2_in); + ~FFTW (); + void execute_fftw (); float* dcoef = nullptr; //[norder2] fftwf_complex* ccoef = nullptr; fftwf_plan coef_plan; diff --git a/source/source_base/math_erf_complex.cpp b/source/source_base/math_erf_complex.cpp index 4c7385245af..b114d3e3ea2 100644 --- a/source/source_base/math_erf_complex.cpp +++ b/source/source_base/math_erf_complex.cpp @@ -4,328 +4,367 @@ #include #include -#define Inf std::numeric_limits::infinity() -#define NaN std::numeric_limits::quiet_NaN() +#define Inf std::numeric_limits::infinity () +#define NaN std::numeric_limits::quiet_NaN () namespace ModuleBase { -ErrorFunc::ErrorFunc() -{ -} -ErrorFunc::~ErrorFunc() -{ -} +ErrorFunc::ErrorFunc () {} +ErrorFunc::~ErrorFunc () {} -std::complex ErrorFunc::scaled_w(std::complex z, double relerr) +std::complex + ErrorFunc::scaled_w (std::complex z, double relerr) { - if (std::real(z) == 0.0) - return std::complex(erfcx(std::imag(z)), std::real(z)); - else if (std::imag(z) == 0.0) - return std::complex(std::exp(-std::real(z) * std::real(z)), scaled_w_im(std::real(z))); + if (std::real (z) == 0.0) + { + return std::complex (erfcx (std::imag (z)), std::real (z)); + } + else if (std::imag (z) == 0.0) + { + return std::complex (std::exp (-std::real (z) * std::real (z)), scaled_w_im (std::real (z))); + } double a, a2, c; if (relerr <= DBL_EPSILON) - { - relerr = DBL_EPSILON; - a = 0.518321480430085929872; // pi / sqrt(-log(eps*0.5)) - c = 0.329973702884629072537; // (2/pi) * a; - a2 = 0.268657157075235951582; // a^2 - } + { + relerr = DBL_EPSILON; + a = 0.518321480430085929872; // pi / sqrt(-log(eps*0.5)) + c = 0.329973702884629072537; // (2/pi) * a; + a2 = 0.268657157075235951582; // a^2 + } else - { - if (relerr > 0.1) - relerr = 0.1; // not sensible to compute < 1 digit - a = ModuleBase::PI / std::sqrt(-std::log(relerr * 0.5)); - c = (2 / ModuleBase::PI) * a; - a2 = a * a; - } - const double x = std::fabs(std::real(z)); - const double y = std::imag(z), ya = std::fabs(y); + { + if (relerr > 0.1) + { + relerr = 0.1; // not sensible to compute < 1 digit + } + a = ModuleBase::PI / std::sqrt (-std::log (relerr * 0.5)); + c = (2 / ModuleBase::PI) * a; + a2 = a * a; + } + const double x = std::fabs (std::real (z)); + const double y = std::imag (z), ya = std::fabs (y); std::complex ret = 0.; // return value double sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0, sum5 = 0; if (ya > 7 || (x > 6 && (ya > 0.1 || (x > 8 && ya > 1e-10) || x > 28))) - { + { - const double ispi = 1 / std::sqrt(ModuleBase::PI); // 1 / sqrt(pi) - double xs = y < 0 ? -std::real(z) : std::real(z); // compute for -z if y < 0 - if (x + ya > 4000) - { // nu <= 2 - if (x + ya > 1e7) - { // nu == 1, w(z) = i/sqrt(pi) / z - // scale to avoid overflow - if (x > ya) - { - double yax = ya / xs; - double denom = ispi / (xs + yax * ya); - ret = std::complex(denom * yax, denom); + const double ispi = 1 / std::sqrt (ModuleBase::PI); // 1 / sqrt(pi) + double xs = y < 0 ? -std::real (z) : std::real (z); // compute for -z if y < 0 + if (x + ya > 4000) + { // nu <= 2 + if (x + ya > 1e7) + { // nu == 1, w(z) = i/sqrt(pi) / z + // scale to avoid overflow + if (x > ya) + { + double yax = ya / xs; + double denom = ispi / (xs + yax * ya); + ret = std::complex (denom * yax, denom); + } + else if (std::isinf (ya)) + { + return ((std::isnan (x) || y < 0) ? std::complex (NaN, NaN) + : std::complex (0, 0)); + } + else + { + double xya = xs / ya; + double denom = ispi / (xya * xs + ya); + ret = std::complex (denom, denom * xya); + } + } + else + { // nu == 2, w(z) = i/sqrt(pi) * z / (z*z - 0.5) + double dr = xs * xs - ya * ya - 0.5, di = 2 * xs * ya; + double denom = ispi / (dr * dr + di * di); + ret = std::complex (denom * (xs * di - ya * dr), denom * (xs * dr + ya * di)); + } + } + else + { // compute nu(z) estimate and do general continued fraction + const double c0 = 3.9, c1 = 11.398, c2 = 0.08254, c3 = 0.1421, c4 = 0.2023; // fit + double nu = std::floor (c0 + c1 / (c2 * x + c3 * ya + c4)); + double wr = xs, wi = ya; + for (nu = 0.5 * (nu - 1); nu > 0.4; nu -= 0.5) + { + // w <- z - nu/w: + double denom = nu / (wr * wr + wi * wi); + wr = xs - wr * denom; + wi = ya + wi * denom; + } + { // w(z) = i/sqrt(pi) / w: + double denom = ispi / (wr * wr + wi * wi); + ret = std::complex (denom * wi, denom * wr); + } } - else if (std::isinf(ya)) - return ((std::isnan(x) || y < 0) ? std::complex(NaN, NaN) : std::complex(0, 0)); - else + if (y < 0) { - double xya = xs / ya; - double denom = ispi / (xya * xs + ya); - ret = std::complex(denom, denom * xya); + // use w(z) = 2.0*exp(-z*z) - w(-z), + // but be careful of overflow in exp(-z*z) + // = exp(-(xs*xs-ya*ya) -2*i*xs*ya) + return 2.0 * std::exp (std::complex ((ya - xs) * (xs + ya), 2 * xs * y)) - ret; } - } else - { // nu == 2, w(z) = i/sqrt(pi) * z / (z*z - 0.5) - double dr = xs * xs - ya * ya - 0.5, di = 2 * xs * ya; - double denom = ispi / (dr * dr + di * di); - ret = std::complex(denom * (xs * di - ya * dr), denom * (xs * dr + ya * di)); - } - } - else - { // compute nu(z) estimate and do general continued fraction - const double c0 = 3.9, c1 = 11.398, c2 = 0.08254, c3 = 0.1421, c4 = 0.2023; // fit - double nu = std::floor(c0 + c1 / (c2 * x + c3 * ya + c4)); - double wr = xs, wi = ya; - for (nu = 0.5 * (nu - 1); nu > 0.4; nu -= 0.5) - { - // w <- z - nu/w: - double denom = nu / (wr * wr + wi * wi); - wr = xs - wr * denom; - wi = ya + wi * denom; - } - { // w(z) = i/sqrt(pi) / w: - double denom = ispi / (wr * wr + wi * wi); - ret = std::complex(denom * wi, denom * wr); - } - } - if (y < 0) - { - // use w(z) = 2.0*exp(-z*z) - w(-z), - // but be careful of overflow in exp(-z*z) - // = exp(-(xs*xs-ya*ya) -2*i*xs*ya) - return 2.0 * std::exp(std::complex((ya - xs) * (xs + ya), 2 * xs * y)) - ret; + { + return ret; + } } - else - return ret; - } else if (x < 10) - { - double prod2ax = 1, prodm2ax = 1; - double expx2 = 0.0; - - if (std::isnan(y)) - return std::complex(y, y); + { + double prod2ax = 1, prodm2ax = 1; + double expx2 = 0.0; - if (relerr == DBL_EPSILON) - { // use precomputed exp(-a2*(n*n)) table - if (x < 5e-4) - { // compute sum4 and sum5 together as sum5-sum4 - const double x2 = x * x; - expx2 = 1 - x2 * (1 - 0.5 * x2); // exp(-x*x) via Taylor - // compute exp(2*a*x) and exp(-2*a*x) via Taylor, to double precision - const double ax2 = 1.036642960860171859744 * x; // 2*a*x - const double exp2ax = 1 + ax2 * (1 + ax2 * (0.5 + 0.166666666666666666667 * ax2)); - const double expm2ax = 1 - ax2 * (1 - ax2 * (0.5 - 0.166666666666666666667 * ax2)); - for (int n = 1; 1; ++n) + if (std::isnan (y)) { - const double coef = expa2n2[n - 1] * expx2 / (a2 * (n * n) + y * y); - prod2ax *= exp2ax; - prodm2ax *= expm2ax; - sum1 += coef; - sum2 += coef * prodm2ax; - sum3 += coef * prod2ax; + return std::complex (y, y); + } + + if (relerr == DBL_EPSILON) + { // use precomputed exp(-a2*(n*n)) table + if (x < 5e-4) + { // compute sum4 and sum5 together as sum5-sum4 + const double x2 = x * x; + expx2 = 1 - x2 * (1 - 0.5 * x2); // exp(-x*x) via Taylor + // compute exp(2*a*x) and exp(-2*a*x) via Taylor, to double precision + const double ax2 = 1.036642960860171859744 * x; // 2*a*x + const double exp2ax = 1 + ax2 * (1 + ax2 * (0.5 + 0.166666666666666666667 * ax2)); + const double expm2ax = 1 - ax2 * (1 - ax2 * (0.5 - 0.166666666666666666667 * ax2)); + for (int n = 1; true; ++n) + { + const double coef = expa2n2[n - 1] * expx2 / (a2 * (n * n) + y * y); + prod2ax *= exp2ax; + prodm2ax *= expm2ax; + sum1 += coef; + sum2 += coef * prodm2ax; + sum3 += coef * prod2ax; - // really = sum5 - sum4 - sum5 += coef * (2 * a) * n * std::sinh((2 * a) * n * x); + // really = sum5 - sum4 + sum5 += coef * (2 * a) * n * std::sinh ((2 * a) * n * x); - // test convergence via sum3 - if (coef * prod2ax < relerr * sum3) - break; + // test convergence via sum3 + if (coef * prod2ax < relerr * sum3) + { + break; + } + } + } + else + { // x > 5e-4, compute sum4 and sum5 separately + expx2 = std::exp (-x * x); + const double exp2ax = std::exp ((2 * a) * x), expm2ax = 1 / exp2ax; + for (int n = 1; true; ++n) + { + const double coef = expa2n2[n - 1] * expx2 / (a2 * (n * n) + y * y); + prod2ax *= exp2ax; + prodm2ax *= expm2ax; + sum1 += coef; + sum2 += coef * prodm2ax; + sum4 += (coef * prodm2ax) * (a * n); + sum3 += coef * prod2ax; + sum5 += (coef * prod2ax) * (a * n); + // test convergence via sum5, since this sum has the slowest decay + if ((coef * prod2ax) * (a * n) < relerr * sum5) + { + break; + } + } + } } - } else - { // x > 5e-4, compute sum4 and sum5 separately - expx2 = std::exp(-x * x); - const double exp2ax = std::exp((2 * a) * x), expm2ax = 1 / exp2ax; - for (int n = 1; 1; ++n) - { - const double coef = expa2n2[n - 1] * expx2 / (a2 * (n * n) + y * y); - prod2ax *= exp2ax; - prodm2ax *= expm2ax; - sum1 += coef; - sum2 += coef * prodm2ax; - sum4 += (coef * prodm2ax) * (a * n); - sum3 += coef * prod2ax; - sum5 += (coef * prod2ax) * (a * n); - // test convergence via sum5, since this sum has the slowest decay - if ((coef * prod2ax) * (a * n) < relerr * sum5) - break; - } - } - } - else - { // relerr != DBL_EPSILON, compute exp(-a2*(n*n)) on the fly - const double exp2ax = std::exp((2 * a) * x), expm2ax = 1 / exp2ax; - if (x < 5e-4) - { // compute sum4 and sum5 together as sum5-sum4 - const double x2 = x * x; - expx2 = 1 - x2 * (1 - 0.5 * x2); // exp(-x*x) via Taylor - for (int n = 1; 1; ++n) - { - const double coef = exp(-a2 * (n * n)) * expx2 / (a2 * (n * n) + y * y); - prod2ax *= exp2ax; - prodm2ax *= expm2ax; - sum1 += coef; - sum2 += coef * prodm2ax; - sum3 += coef * prod2ax; + { // relerr != DBL_EPSILON, compute exp(-a2*(n*n)) on the fly + const double exp2ax = std::exp ((2 * a) * x), expm2ax = 1 / exp2ax; + if (x < 5e-4) + { // compute sum4 and sum5 together as sum5-sum4 + const double x2 = x * x; + expx2 = 1 - x2 * (1 - 0.5 * x2); // exp(-x*x) via Taylor + for (int n = 1; true; ++n) + { + const double coef = exp (-a2 * (n * n)) * expx2 / (a2 * (n * n) + y * y); + prod2ax *= exp2ax; + prodm2ax *= expm2ax; + sum1 += coef; + sum2 += coef * prodm2ax; + sum3 += coef * prod2ax; - // really = sum5 - sum4 - sum5 += coef * (2 * a) * n * std::sinh((2 * a) * n * x); + // really = sum5 - sum4 + sum5 += coef * (2 * a) * n * std::sinh ((2 * a) * n * x); - // test convergence via sum3 - if (coef * prod2ax < relerr * sum3) - break; + // test convergence via sum3 + if (coef * prod2ax < relerr * sum3) + { + break; + } + } + } + else + { // x > 5e-4, compute sum4 and sum5 separately + expx2 = std::exp (-x * x); + for (int n = 1; true; ++n) + { + const double coef = std::exp (-a2 * (n * n)) * expx2 / (a2 * (n * n) + y * y); + prod2ax *= exp2ax; + prodm2ax *= expm2ax; + sum1 += coef; + sum2 += coef * prodm2ax; + sum4 += (coef * prodm2ax) * (a * n); + sum3 += coef * prod2ax; + sum5 += (coef * prod2ax) * (a * n); + // test convergence via sum5, since this sum has the slowest decay + if ((coef * prod2ax) * (a * n) < relerr * sum5) + { + break; + } + } + } + } + const double expx2erfcxy = // avoid spurious overflow for large negative y + y > -6 // for y < -6, erfcx(y) = 2*exp(y*y) to double precision + ? expx2 * erfcx (y) + : 2 * std::exp (y * y - x * x); + if (y > 5) + { // imaginary terms cancel + const double sinxy = std::sin (x * y); + ret = (expx2erfcxy - c * y * sum1) * cos (2 * x * y) + + (c * x * expx2) * sinxy * sinc (x * y, sinxy); } - } else - { // x > 5e-4, compute sum4 and sum5 separately - expx2 = std::exp(-x * x); - for (int n = 1; 1; ++n) { - const double coef = std::exp(-a2 * (n * n)) * expx2 / (a2 * (n * n) + y * y); - prod2ax *= exp2ax; - prodm2ax *= expm2ax; - sum1 += coef; - sum2 += coef * prodm2ax; - sum4 += (coef * prodm2ax) * (a * n); - sum3 += coef * prod2ax; - sum5 += (coef * prod2ax) * (a * n); - // test convergence via sum5, since this sum has the slowest decay - if ((coef * prod2ax) * (a * n) < relerr * sum5) - break; + double xs = std::real (z); + const double sinxy = std::sin (xs * y); + const double sin2xy = std::sin (2 * xs * y), cos2xy = std::cos (2 * xs * y); + const double coef1 = expx2erfcxy - c * y * sum1; + const double coef2 = c * xs * expx2; + ret = std::complex (coef1 * cos2xy + coef2 * sinxy * sinc (xs * y, sinxy), + coef2 * sinc (2 * xs * y, sin2xy) - coef1 * sin2xy); } - } } - const double expx2erfcxy = // avoid spurious overflow for large negative y - y > -6 // for y < -6, erfcx(y) = 2*exp(y*y) to double precision - ? expx2 * erfcx(y) - : 2 * std::exp(y * y - x * x); - if (y > 5) - { // imaginary terms cancel - const double sinxy = std::sin(x * y); - ret = (expx2erfcxy - c * y * sum1) * cos(2 * x * y) + (c * x * expx2) * sinxy * sinc(x * y, sinxy); - } - else - { - double xs = std::real(z); - const double sinxy = std::sin(xs * y); - const double sin2xy = std::sin(2 * xs * y), cos2xy = std::cos(2 * xs * y); - const double coef1 = expx2erfcxy - c * y * sum1; - const double coef2 = c * xs * expx2; - ret = std::complex(coef1 * cos2xy + coef2 * sinxy * sinc(xs * y, sinxy), - coef2 * sinc(2 * xs * y, sin2xy) - coef1 * sin2xy); - } - } else - { // x large: only sum3 & sum5 contribute (see above note) - if (std::isnan(x)) - return std::complex(x, x); - if (std::isnan(y)) - return std::complex(y, y); + { // x large: only sum3 & sum5 contribute (see above note) + if (std::isnan (x)) + { + return std::complex (x, x); + } + if (std::isnan (y)) + { + return std::complex (y, y); + } - ret = std::exp(-x * x); // |y| < 1e-10, so we only need exp(-x*x) term - // (round instead of ceil as in original paper; note that x/a > 1 here) - double n0 = std::floor(x / a + 0.5); // sum in both directions, starting at n0 - double dx = a * n0 - x; - sum3 = std::exp(-dx * dx) / (a2 * (n0 * n0) + y * y); - sum5 = a * n0 * sum3; - double exp1 = std::exp(4 * a * dx), exp1dn = 1; - int dn = 0; - for (dn = 1; n0 - dn > 0; ++dn) - { // loop over n0-dn and n0+dn terms - double np = n0 + dn, nm = n0 - dn; - double tp = exp(-(a * dn + dx) * (a * dn + dx)); - double tm = tp * (exp1dn *= exp1); // trick to get tm from tp - tp /= (a2 * (np * np) + y * y); - tm /= (a2 * (nm * nm) + y * y); - sum3 += tp + tm; - sum5 += a * (np * tp + nm * tm); - if (a * (np * tp + nm * tm) < relerr * sum5) - return ret - + std::complex((0.5 * c) * y * (sum2 + sum3), - (0.5 * c) * copysign(sum5 - sum4, std::real(z))); - ; - } - while (1) - { // loop over n0+dn terms only (since n0-dn <= 0) - double np = n0 + dn++; - double tp = std::exp(-(a * dn + dx) * (a * dn + dx)) / (a2 * (np * np) + y * y); - sum3 += tp; - sum5 += a * np * tp; - if (a * np * tp < relerr * sum5) - return ret - + std::complex((0.5 * c) * y * (sum2 + sum3), - (0.5 * c) * copysign(sum5 - sum4, std::real(z))); - ; + ret = std::exp (-x * x); // |y| < 1e-10, so we only need exp(-x*x) term + // (round instead of ceil as in original paper; note that x/a > 1 here) + double n0 = std::floor (x / a + 0.5); // sum in both directions, starting at n0 + double dx = a * n0 - x; + sum3 = std::exp (-dx * dx) / (a2 * (n0 * n0) + y * y); + sum5 = a * n0 * sum3; + double exp1 = std::exp (4 * a * dx), exp1dn = 1; + int dn = 0; + for (dn = 1; n0 - dn > 0; ++dn) + { // loop over n0-dn and n0+dn terms + double np = n0 + dn, nm = n0 - dn; + double tp = exp (-(a * dn + dx) * (a * dn + dx)); + double tm = tp * (exp1dn *= exp1); // trick to get tm from tp + tp /= (a2 * (np * np) + y * y); + tm /= (a2 * (nm * nm) + y * y); + sum3 += tp + tm; + sum5 += a * (np * tp + nm * tm); + if (a * (np * tp + nm * tm) < relerr * sum5) + { + return ret + + std::complex ((0.5 * c) * y * (sum2 + sum3), + (0.5 * c) * copysign (sum5 - sum4, std::real (z))); + }; + } + while (true) + { // loop over n0+dn terms only (since n0-dn <= 0) + double np = n0 + dn++; + double tp = std::exp (-(a * dn + dx) * (a * dn + dx)) / (a2 * (np * np) + y * y); + sum3 += tp; + sum5 += a * np * tp; + if (a * np * tp < relerr * sum5) + { + return ret + + std::complex ((0.5 * c) * y * (sum2 + sum3), + (0.5 * c) * copysign (sum5 - sum4, std::real (z))); + }; + } } - } - return ret + std::complex((0.5 * c) * y * (sum2 + sum3), (0.5 * c) * copysign(sum5 - sum4, std::real(z))); + return ret + + std::complex ((0.5 * c) * y * (sum2 + sum3), (0.5 * c) * copysign (sum5 - sum4, std::real (z))); ; } -double ErrorFunc::scaled_w_im(double x) +double + ErrorFunc::scaled_w_im (double x) { if (x >= 0) - { - if (x > 45) - { // continued-fraction expansion is faster - const double ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) - if (x > 5e7) // 1-term expansion, important to avoid overflow - return ispi / x; - /* 5-term expansion (rely on compiler for CSE), simplified from: - ispi / (x-0.5/(x-1/(x-1.5/(x-2/x)))) */ - return ispi * ((x * x) * (x * x - 4.5) + 2) / (x * ((x * x) * (x * x - 5) + 3.75)); + { + if (x > 45) + { // continued-fraction expansion is faster + const double ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) + if (x > 5e7) + { // 1-term expansion, important to avoid overflow + return ispi / x; + } + /* 5-term expansion (rely on compiler for CSE), simplified from: + ispi / (x-0.5/(x-1/(x-1.5/(x-2/x)))) */ + return ispi * ((x * x) * (x * x - 4.5) + 2) / (x * ((x * x) * (x * x - 5) + 3.75)); + } + return w_im_y100 (100 / (1 + x), x); } - return w_im_y100(100 / (1 + x), x); - } else - { // = -FADDEEVA(w_im)(-x) - if (x < -45) - { // continued-fraction expansion is faster - const double ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) - if (x < -5e7) // 1-term expansion, important to avoid overflow - return ispi / x; - /* 5-term expansion (rely on compiler for CSE), simplified from: - ispi / (x-0.5/(x-1/(x-1.5/(x-2/x)))) */ - return ispi * ((x * x) * (x * x - 4.5) + 2) / (x * ((x * x) * (x * x - 5) + 3.75)); + { // = -FADDEEVA(w_im)(-x) + if (x < -45) + { // continued-fraction expansion is faster + const double ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) + if (x < -5e7) + { // 1-term expansion, important to avoid overflow + return ispi / x; + } + /* 5-term expansion (rely on compiler for CSE), simplified from: + ispi / (x-0.5/(x-1/(x-1.5/(x-2/x)))) */ + return ispi * ((x * x) * (x * x - 4.5) + 2) / (x * ((x * x) * (x * x - 5) + 3.75)); + } + return -w_im_y100 (100 / (1 - x), -x); } - return -w_im_y100(100 / (1 - x), -x); - } } -std::complex ErrorFunc::erf(std::complex z, double relerr) +std::complex + ErrorFunc::erf (std::complex z, double relerr) { - double x = std::real(z), y = std::imag(z); + double x = std::real (z), y = std::imag (z); if (y == 0) - return std::complex(std::erf(x), - y); // preserve sign of 0 - if (x == 0) // handle separately for speed & handling of y = Inf or NaN - return std::complex(x, // preserve sign of 0 - y * y > 720 ? (y > 0 ? Inf : -Inf) : std::exp(y * y) * scaled_w_im(y)); + { + return std::complex (std::erf (x), + y); // preserve sign of 0 + } + if (x == 0) + { // handle separately for speed & handling of y = Inf or NaN + return std::complex (x, // preserve sign of 0 + y * y > 720 ? (y > 0 ? Inf : -Inf) : std::exp (y * y) * scaled_w_im (y)); + } double mRe_z2 = (y - x) * (x + y); // Re(-z^2), being careful of overflow double mIm_z2 = -2 * x * y; // Im(-z^2) - if (mRe_z2 < -750) // underflow - return (x >= 0 ? 1.0 : -1.0); + if (mRe_z2 < -750) + { // underflow + return (x >= 0 ? 1.0 : -1.0); + } // Use Taylor series for small |z|, to avoid cancellation inaccuracy // erf(z) = 2/sqrt(pi) * z * (1 - z^2/3 + z^4/10 - z^6/42 + z^8/216 + ...) - auto taylor = [&]() -> std::complex { - std::complex mz2 = std::complex(mRe_z2, mIm_z2); // -z^2 - return z - * (1.1283791670955125739 - + mz2 - * (0.37612638903183752464 - + mz2 - * (0.11283791670955125739 - + mz2 * (0.026866170645131251760 + mz2 * 0.0052239776254421878422)))); - }; + auto taylor = [&] () -> std::complex + { + std::complex mz2 = std::complex (mRe_z2, mIm_z2); // -z^2 + return z + * (1.1283791670955125739 + + mz2 + * (0.37612638903183752464 + + mz2 + * (0.11283791670955125739 + + mz2 * (0.026866170645131251760 + mz2 * 0.0052239776254421878422)))); + }; /* for small |x| and small |xy|, use Taylor series to avoid cancellation inaccuracy: @@ -336,1577 +375,1705 @@ std::complex ErrorFunc::erf(std::complex z, double relerr) where: erf(iy) = exp(y^2) * Im[w(y)] */ - auto taylor_erfi = [&]() -> std::complex { - double x2 = x * x, y2 = y * y; - double expy2 = std::exp(y2); - return std::complex( - expy2 * x - * (1.1283791670955125739 - x2 * (0.37612638903183752464 + 0.75225277806367504925 * y2) - + x2 * x2 * (0.11283791670955125739 + y2 * (0.45135166683820502956 + 0.15045055561273500986 * y2))), - expy2 - * (scaled_w_im(y) - - x2 * y * (1.1283791670955125739 - x2 * (0.56418958354775628695 + 0.37612638903183752464 * y2)))); - }; + auto taylor_erfi = [&] () -> std::complex + { + double x2 = x * x, y2 = y * y; + double expy2 = std::exp (y2); + return std::complex ( + expy2 * x + * (1.1283791670955125739 - x2 * (0.37612638903183752464 + 0.75225277806367504925 * y2) + + x2 * x2 + * (0.11283791670955125739 + y2 * (0.45135166683820502956 + 0.15045055561273500986 * y2))), + expy2 + * (scaled_w_im (y) + - x2 * y + * (1.1283791670955125739 - x2 * (0.56418958354775628695 + 0.37612638903183752464 * y2)))); + }; /* Handle positive and negative x via different formulas, using the mirror symmetries of w, to avoid overflow/underflow problems from multiplying exponentially large and small quantities. */ if (x >= 0) - { - if (x < 8e-2) { - if (std::fabs(y) < 1e-2) - return taylor(); - else if (std::fabs(mIm_z2) < 5e-3 && x < 5e-3) - return taylor_erfi(); + if (x < 8e-2) + { + if (std::fabs (y) < 1e-2) + { + return taylor (); + } + else if (std::fabs (mIm_z2) < 5e-3 && x < 5e-3) + { + return taylor_erfi (); + } + } + /* don't use complex exp function, since that will produce spurious NaN + values when multiplying w in an overflow situation. */ + return 1.0 + - std::exp (mRe_z2) + * (std::complex (std::cos (mIm_z2), std::sin (mIm_z2)) + * scaled_w (std::complex (-y, x), relerr)); } - /* don't use complex exp function, since that will produce spurious NaN - values when multiplying w in an overflow situation. */ - return 1.0 - - std::exp(mRe_z2) - * (std::complex(std::cos(mIm_z2), std::sin(mIm_z2)) - * scaled_w(std::complex(-y, x), relerr)); - } else - { // x < 0 - if (x > -8e-2) - { // duplicate from above to avoid fabs(x) call - if (std::fabs(y) < 1e-2) - return taylor(); - else if (std::fabs(mIm_z2) < 5e-3 && x > -5e-3) - return taylor_erfi(); + { // x < 0 + if (x > -8e-2) + { // duplicate from above to avoid fabs(x) call + if (std::fabs (y) < 1e-2) + { + return taylor (); + } + else if (std::fabs (mIm_z2) < 5e-3 && x > -5e-3) + { + return taylor_erfi (); + } + } + else if (std::isnan (x)) + { + return std::complex (NaN, y == 0 ? 0 : NaN); + } + /* don't use complex exp function, since that will produce spurious NaN + values when multiplying w in an overflow situation. */ + return std::exp (mRe_z2) + * (std::complex (std::cos (mIm_z2), std::sin (mIm_z2)) + * scaled_w (std::complex (y, -x), relerr)) + - 1.0; } - else if (std::isnan(x)) - return std::complex(NaN, y == 0 ? 0 : NaN); - /* don't use complex exp function, since that will produce spurious NaN - values when multiplying w in an overflow situation. */ - return std::exp(mRe_z2) - * (std::complex(std::cos(mIm_z2), std::sin(mIm_z2)) - * scaled_w(std::complex(y, -x), relerr)) - - 1.0; - } } // erfi(x) = -i erf(ix) -double ErrorFunc::erfi(double x) +double + ErrorFunc::erfi (double x) { - return x * x > 720 ? (x > 0 ? Inf : -Inf) : std::exp(x * x) * scaled_w_im(x); + return x * x > 720 ? (x > 0 ? Inf : -Inf) : std::exp (x * x) * scaled_w_im (x); } // erfi(z) = -i erf(iz) -std::complex ErrorFunc::erfi(std::complex z, double relerr) +std::complex + ErrorFunc::erfi (std::complex z, double relerr) { - std::complex e = erf(std::complex(-std::imag(z), std::real(z)), relerr); - return std::complex(std::imag(e), -std::real(e)); + std::complex e = erf (std::complex (-std::imag (z), std::real (z)), relerr); + return std::complex (std::imag (e), -std::real (e)); } -double ErrorFunc::erfcx(double x) // exp(z^2) erfc(z) +double + ErrorFunc::erfcx (double x) // exp(z^2) erfc(z) { - return std::exp(x * x) * std::erfc(x); + return std::exp (x * x) * std::erfc (x); } -std::complex ErrorFunc::erfcx(std::complex z, double relerr) // exp(z^2) erfc(z) +std::complex + ErrorFunc::erfcx (std::complex z, double relerr) // exp(z^2) erfc(z) { - return scaled_w(std::complex(-std::imag(z), std::real(z)), relerr); + return scaled_w (std::complex (-std::imag (z), std::real (z)), relerr); } // erfc(z) = 1 - erf(z) -std::complex ErrorFunc::erfc(std::complex z, double relerr) +std::complex + ErrorFunc::erfc (std::complex z, double relerr) { - double x = std::real(z), y = std::imag(z); + double x = std::real (z), y = std::imag (z); if (x == 0.) - return std::complex(1, - /* handle y -> Inf limit manually, since - exp(y^2) -> Inf but Im[w(y)] -> 0, so - IEEE will give us a NaN when it should be Inf */ - y * y > 720 ? (y > 0 ? -Inf : Inf) : -std::exp(y * y) * scaled_w_im(y)); + { + return std::complex (1, + /* handle y -> Inf limit manually, since + exp(y^2) -> Inf but Im[w(y)] -> 0, so + IEEE will give us a NaN when it should be Inf */ + y * y > 720 ? (y > 0 ? -Inf : Inf) : -std::exp (y * y) * scaled_w_im (y)); + } if (y == 0.) - { - if (x * x > 750) // underflow - return std::complex(x >= 0 ? 0.0 : 2.0, - -y); // preserve sign of 0 - return std::complex(x >= 0 ? std::exp(-x * x) * erfcx(x) : 2. - std::exp(-x * x) * erfcx(-x), - -y); // preserve sign of zero - } + { + if (x * x > 750) + { // underflow + return std::complex (x >= 0 ? 0.0 : 2.0, + -y); // preserve sign of 0 + } + return std::complex (x >= 0 ? std::exp (-x * x) * erfcx (x) : 2. - std::exp (-x * x) * erfcx (-x), + -y); // preserve sign of zero + } double mRe_z2 = (y - x) * (x + y); // Re(-z^2), being careful of overflow double mIm_z2 = -2 * x * y; // Im(-z^2) - if (mRe_z2 < -750) // underflow - return (x >= 0 ? 0.0 : 2.0); + if (mRe_z2 < -750) + { // underflow + return (x >= 0 ? 0.0 : 2.0); + } if (x >= 0) - return std::exp(std::complex(mRe_z2, mIm_z2)) * scaled_w(std::complex(-y, x), relerr); + { + return std::exp (std::complex (mRe_z2, mIm_z2)) * scaled_w (std::complex (-y, x), relerr); + } else - return 2.0 - std::exp(std::complex(mRe_z2, mIm_z2)) * scaled_w(std::complex(y, -x), relerr); + { + return 2.0 + - std::exp (std::complex (mRe_z2, mIm_z2)) * scaled_w (std::complex (y, -x), relerr); + } } -double ErrorFunc::w_im_y100(double y100, double x) +double + ErrorFunc::w_im_y100 (double y100, double x) { switch ((int)y100) - { - case 0: { - double t = 2 * y100 - 1; - return 0.28351593328822191546e-2 - + (0.28494783221378400759e-2 - + (0.14427470563276734183e-4 - + (0.10939723080231588129e-6 - + (0.92474307943275042045e-9 - + (0.89128907666450075245e-11 + 0.92974121935111111110e-13 * t) * t) - * t) - * t) - * t) - * t; - } - case 1: { - double t = 2 * y100 - 3; - return 0.85927161243940350562e-2 - + (0.29085312941641339862e-2 - + (0.15106783707725582090e-4 - + (0.11716709978531327367e-6 - + (0.10197387816021040024e-8 - + (0.10122678863073360769e-10 + 0.10917479678400000000e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 2: { - double t = 2 * y100 - 5; - return 0.14471159831187703054e-1 - + (0.29703978970263836210e-2 - + (0.15835096760173030976e-4 - + (0.12574803383199211596e-6 - + (0.11278672159518415848e-8 - + (0.11547462300333495797e-10 + 0.12894535335111111111e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 3: { - double t = 2 * y100 - 7; - return 0.20476320420324610618e-1 - + (0.30352843012898665856e-2 - + (0.16617609387003727409e-4 - + (0.13525429711163116103e-6 - + (0.12515095552507169013e-8 - + (0.13235687543603382345e-10 + 0.15326595042666666667e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 4: { - double t = 2 * y100 - 9; - return 0.26614461952489004566e-1 - + (0.31034189276234947088e-2 - + (0.17460268109986214274e-4 - + (0.14582130824485709573e-6 - + (0.13935959083809746345e-8 - + (0.15249438072998932900e-10 + 0.18344741882133333333e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 5: { - double t = 2 * y100 - 11; - return 0.32892330248093586215e-1 - + (0.31750557067975068584e-2 - + (0.18369907582308672632e-4 - + (0.15761063702089457882e-6 - + (0.15577638230480894382e-8 - + (0.17663868462699097951e-10 - + (0.22126732680711111111e-12 + 0.30273474177737853668e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 6: { - double t = 2 * y100 - 13; - return 0.39317207681134336024e-1 - + (0.32504779701937539333e-2 - + (0.19354426046513400534e-4 - + (0.17081646971321290539e-6 - + (0.17485733959327106250e-8 - + (0.20593687304921961410e-10 - + (0.26917401949155555556e-12 + 0.38562123837725712270e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 7: { - double t = 2 * y100 - 15; - return 0.45896976511367738235e-1 - + (0.33300031273110976165e-2 - + (0.20423005398039037313e-4 - + (0.18567412470376467303e-6 - + (0.19718038363586588213e-8 - + (0.24175006536781219807e-10 - + (0.33059982791466666666e-12 + 0.49756574284439426165e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 8: { - double t = 2 * y100 - 17; - return 0.52640192524848962855e-1 - + (0.34139883358846720806e-2 - + (0.21586390240603337337e-4 - + (0.20247136501568904646e-6 - + (0.22348696948197102935e-8 - + (0.28597516301950162548e-10 - + (0.41045502119111111110e-12 + 0.65151614515238361946e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 9: { - double t = 2 * y100 - 19; - return 0.59556171228656770456e-1 - + (0.35028374386648914444e-2 - + (0.22857246150998562824e-4 - + (0.22156372146525190679e-6 - + (0.25474171590893813583e-8 - + (0.34122390890697400584e-10 - + (0.51593189879111111110e-12 + 0.86775076853908006938e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 10: { - double t = 2 * y100 - 21; - return 0.66655089485108212551e-1 - + (0.35970095381271285568e-2 - + (0.24250626164318672928e-4 - + (0.24339561521785040536e-6 - + (0.29221990406518411415e-8 - + (0.41117013527967776467e-10 - + (0.65786450716444444445e-12 + 0.11791885745450623331e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 11: { - double t = 2 * y100 - 23; - return 0.73948106345519174661e-1 - + (0.36970297216569341748e-2 - + (0.25784588137312868792e-4 - + (0.26853012002366752770e-6 - + (0.33763958861206729592e-8 - + (0.50111549981376976397e-10 - + (0.85313857496888888890e-12 + 0.16417079927706899860e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 12: { - double t = 2 * y100 - 25; - return 0.81447508065002963203e-1 - + (0.38035026606492705117e-2 - + (0.27481027572231851896e-4 - + (0.29769200731832331364e-6 - + (0.39336816287457655076e-8 - + (0.61895471132038157624e-10 - + (0.11292303213511111111e-11 + 0.23558532213703884304e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 13: { - double t = 2 * y100 - 27; - return 0.89166884027582716628e-1 - + (0.39171301322438946014e-2 - + (0.29366827260422311668e-4 - + (0.33183204390350724895e-6 - + (0.46276006281647330524e-8 - + (0.77692631378169813324e-10 - + (0.15335153258844444444e-11 + 0.35183103415916026911e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 14: { - double t = 2 * y100 - 29; - return 0.97121342888032322019e-1 - + (0.40387340353207909514e-2 - + (0.31475490395950776930e-4 - + (0.37222714227125135042e-6 - + (0.55074373178613809996e-8 - + (0.99509175283990337944e-10 - + (0.21552645758222222222e-11 + 0.55728651431872687605e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 15: { - double t = 2 * y100 - 31; - return 0.10532778218603311137e0 - + (0.41692873614065380607e-2 - + (0.33849549774889456984e-4 - + (0.42064596193692630143e-6 - + (0.66494579697622432987e-8 - + (0.13094103581931802337e-9 - + (0.31896187409777777778e-11 + 0.97271974184476560742e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 16: { - double t = 2 * y100 - 33; - return 0.11380523107427108222e0 - + (0.43099572287871821013e-2 - + (0.36544324341565929930e-4 - + (0.47965044028581857764e-6 - + (0.81819034238463698796e-8 - + (0.17934133239549647357e-9 - + (0.50956666166186293627e-11 - + (0.18850487318190638010e-12 + 0.79697813173519853340e-14 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 17: { - double t = 2 * y100 - 35; - return 0.12257529703447467345e0 - + (0.44621675710026986366e-2 - + (0.39634304721292440285e-4 - + (0.55321553769873381819e-6 - + (0.10343619428848520870e-7 - + (0.26033830170470368088e-9 - + (0.87743837749108025357e-11 - + (0.34427092430230063401e-12 + 0.10205506615709843189e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 18: { - double t = 2 * y100 - 37; - return 0.13166276955656699478e0 - + (0.46276970481783001803e-2 - + (0.43225026380496399310e-4 - + (0.64799164020016902656e-6 - + (0.13580082794704641782e-7 - + (0.39839800853954313927e-9 - + (0.14431142411840000000e-10 + 0.42193457308830027541e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 19: { - double t = 2 * y100 - 39; - return 0.14109647869803356475e0 - + (0.48088424418545347758e-2 - + (0.47474504753352150205e-4 - + (0.77509866468724360352e-6 - + (0.18536851570794291724e-7 - + (0.60146623257887570439e-9 - + (0.18533978397305276318e-10 - + (0.41033845938901048380e-13 - 0.46160680279304825485e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 20: { - double t = 2 * y100 - 41; - return 0.15091057940548936603e0 - + (0.50086864672004685703e-2 - + (0.52622482832192230762e-4 - + (0.95034664722040355212e-6 - + (0.25614261331144718769e-7 - + (0.80183196716888606252e-9 - + (0.12282524750534352272e-10 - + (-0.10531774117332273617e-11 - 0.86157181395039646412e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 21: { - double t = 2 * y100 - 43; - return 0.16114648116017010770e0 - + (0.52314661581655369795e-2 - + (0.59005534545908331315e-4 - + (0.11885518333915387760e-5 - + (0.33975801443239949256e-7 - + (0.82111547144080388610e-9 - + (-0.12357674017312854138e-10 - + (-0.24355112256914479176e-11 - 0.75155506863572930844e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 22: { - double t = 2 * y100 - 45; - return 0.17185551279680451144e0 - + (0.54829002967599420860e-2 - + (0.67013226658738082118e-4 - + (0.14897400671425088807e-5 - + (0.40690283917126153701e-7 - + (0.44060872913473778318e-9 - + (-0.52641873433280000000e-10 - 0.30940587864543343124e-11 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 23: { - double t = 2 * y100 - 47; - return 0.18310194559815257381e0 - + (0.57701559375966953174e-2 - + (0.76948789401735193483e-4 - + (0.18227569842290822512e-5 - + (0.41092208344387212276e-7 - + (-0.44009499965694442143e-9 - + (-0.92195414685628803451e-10 - + (-0.22657389705721753299e-11 + 0.10004784908106839254e-12 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 24: { - double t = 2 * y100 - 49; - return 0.19496527191546630345e0 - + (0.61010853144364724856e-2 - + (0.88812881056342004864e-4 - + (0.21180686746360261031e-5 - + (0.30652145555130049203e-7 - + (-0.16841328574105890409e-8 - + (-0.11008129460612823934e-9 - + (-0.12180794204544515779e-12 + 0.15703325634590334097e-12 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 25: { - double t = 2 * y100 - 51; - return 0.20754006813966575720e0 - + (0.64825787724922073908e-2 - + (0.10209599627522311893e-3 - + (0.22785233392557600468e-5 - + (0.73495224449907568402e-8 - + (-0.29442705974150112783e-8 - + (-0.94082603434315016546e-10 - + (0.23609990400179321267e-11 + 0.14141908654269023788e-12 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 26: { - double t = 2 * y100 - 53; - return 0.22093185554845172146e0 - + (0.69182878150187964499e-2 - + (0.11568723331156335712e-3 - + (0.22060577946323627739e-5 - + (-0.26929730679360840096e-7 - + (-0.38176506152362058013e-8 - + (-0.47399503861054459243e-10 - + (0.40953700187172127264e-11 + 0.69157730376118511127e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 27: { - double t = 2 * y100 - 55; - return 0.23524827304057813918e0 - + (0.74063350762008734520e-2 - + (0.12796333874615790348e-3 - + (0.18327267316171054273e-5 - + (-0.66742910737957100098e-7 - + (-0.40204740975496797870e-8 - + (0.14515984139495745330e-10 - + (0.44921608954536047975e-11 - 0.18583341338983776219e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 28: { - double t = 2 * y100 - 57; - return 0.25058626331812744775e0 - + (0.79377285151602061328e-2 - + (0.13704268650417478346e-3 - + (0.11427511739544695861e-5 - + (-0.10485442447768377485e-6 - + (-0.34850364756499369763e-8 - + (0.72656453829502179208e-10 - + (0.36195460197779299406e-11 - 0.84882136022200714710e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 29: { - double t = 2 * y100 - 59; - return 0.26701724900280689785e0 - + (0.84959936119625864274e-2 - + (0.14112359443938883232e-3 - + (0.17800427288596909634e-6 - + (-0.13443492107643109071e-6 - + (-0.23512456315677680293e-8 - + (0.11245846264695936769e-9 - + (0.19850501334649565404e-11 - 0.11284666134635050832e-12 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 30: { - double t = 2 * y100 - 61; - return 0.28457293586253654144e0 - + (0.90581563892650431899e-2 - + (0.13880520331140646738e-3 - + (-0.97262302362522896157e-6 - + (-0.15077100040254187366e-6 - + (-0.88574317464577116689e-9 - + (0.12760311125637474581e-9 - + (0.20155151018282695055e-12 - 0.10514169375181734921e-12 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 31: { - double t = 2 * y100 - 63; - return 0.30323425595617385705e0 - + (0.95968346790597422934e-2 - + (0.12931067776725883939e-3 - + (-0.21938741702795543986e-5 - + (-0.15202888584907373963e-6 - + (0.61788350541116331411e-9 - + (0.11957835742791248256e-9 - + (-0.12598179834007710908e-11 - 0.75151817129574614194e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 32: { - double t = 2 * y100 - 65; - return 0.32292521181517384379e0 - + (0.10082957727001199408e-1 - + (0.11257589426154962226e-3 - + (-0.33670890319327881129e-5 - + (-0.13910529040004008158e-6 - + (0.19170714373047512945e-8 - + (0.94840222377720494290e-10 - + (-0.21650018351795353201e-11 - 0.37875211678024922689e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 33: { - double t = 2 * y100 - 67; - return 0.34351233557911753862e0 - + (0.10488575435572745309e-1 - + (0.89209444197248726614e-4 - + (-0.43893459576483345364e-5 - + (-0.11488595830450424419e-6 - + (0.28599494117122464806e-8 - + (0.61537542799857777779e-10 - 0.24935749227658002212e-11 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 34: { - double t = 2 * y100 - 69; - return 0.36480946642143669093e0 - + (0.10789304203431861366e-1 - + (0.60357993745283076834e-4 - + (-0.51855862174130669389e-5 - + (-0.83291664087289801313e-7 - + (0.33898011178582671546e-8 - + (0.27082948188277716482e-10 - + (-0.23603379397408694974e-11 + 0.19328087692252869842e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 35: { - double t = 2 * y100 - 71; - return 0.38658679935694939199e0 - + (0.10966119158288804999e-1 - + (0.27521612041849561426e-4 - + (-0.57132774537670953638e-5 - + (-0.48404772799207914899e-7 - + (0.35268354132474570493e-8 - + (-0.32383477652514618094e-11 - + (-0.19334202915190442501e-11 + 0.32333189861286460270e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 36: { - double t = 2 * y100 - 73; - return 0.40858275583808707870e0 - + (0.11006378016848466550e-1 - + (-0.76396376685213286033e-5 - + (-0.59609835484245791439e-5 - + (-0.13834610033859313213e-7 - + (0.33406952974861448790e-8 - + (-0.26474915974296612559e-10 - + (-0.13750229270354351983e-11 + 0.36169366979417390637e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 37: { - double t = 2 * y100 - 75; - return 0.43051714914006682977e0 - + (0.10904106549500816155e-1 - + (-0.43477527256787216909e-4 - + (-0.59429739547798343948e-5 - + (0.17639200194091885949e-7 - + (0.29235991689639918688e-8 - + (-0.41718791216277812879e-10 - + (-0.81023337739508049606e-12 + 0.33618915934461994428e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 38: { - double t = 2 * y100 - 77; - return 0.45210428135559607406e0 - + (0.10659670756384400554e-1 - + (-0.78488639913256978087e-4 - + (-0.56919860886214735936e-5 - + (0.44181850467477733407e-7 - + (0.23694306174312688151e-8 - + (-0.49492621596685443247e-10 - + (-0.31827275712126287222e-12 + 0.27494438742721623654e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 39: { - double t = 2 * y100 - 79; - return 0.47306491195005224077e0 - + (0.10279006119745977570e-1 - + (-0.11140268171830478306e-3 - + (-0.52518035247451432069e-5 - + (0.64846898158889479518e-7 - + (0.17603624837787337662e-8 - + (-0.51129481592926104316e-10 - + (0.62674584974141049511e-13 + 0.20055478560829935356e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 40: { - double t = 2 * y100 - 81; - return 0.49313638965719857647e0 - + (0.97725799114772017662e-2 - + (-0.14122854267291533334e-3 - + (-0.46707252568834951907e-5 - + (0.79421347979319449524e-7 - + (0.11603027184324708643e-8 - + (-0.48269605844397175946e-10 - + (0.32477251431748571219e-12 + 0.12831052634143527985e-13 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 41: { - double t = 2 * y100 - 83; - return 0.51208057433416004042e0 - + (0.91542422354009224951e-2 - + (-0.16726530230228647275e-3 - + (-0.39964621752527649409e-5 - + (0.88232252903213171454e-7 - + (0.61343113364949928501e-9 - + (-0.42516755603130443051e-10 - + (0.47910437172240209262e-12 + 0.66784341874437478953e-14 * t) * t) - * t) - * t) - * t) - * t) - * t) - * t; - } - case 42: { - double t = 2 * y100 - 85; - return 0.52968945458607484524e0 - + (0.84400880445116786088e-2 - + (-0.18908729783854258774e-3 - + (-0.32725905467782951931e-5 - + (0.91956190588652090659e-7 - + (0.14593989152420122909e-9 - + (-0.35239490687644444445e-10 + 0.54613829888448694898e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 43: { - double t = 2 * y100 - 87; - return 0.54578857454330070965e0 - + (0.76474155195880295311e-2 - + (-0.20651230590808213884e-3 - + (-0.25364339140543131706e-5 - + (0.91455367999510681979e-7 - + (-0.23061359005297528898e-9 - + (-0.27512928625244444444e-10 + 0.54895806008493285579e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 44: { - double t = 2 * y100 - 89; - return 0.56023851910298493910e0 - + (0.67938321739997196804e-2 - + (-0.21956066613331411760e-3 - + (-0.18181127670443266395e-5 - + (0.87650335075416845987e-7 - + (-0.51548062050366615977e-9 - + (-0.20068462174044444444e-10 + 0.50912654909758187264e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 45: { - double t = 2 * y100 - 91; - return 0.57293478057455721150e0 - + (0.58965321010394044087e-2 - + (-0.22841145229276575597e-3 - + (-0.11404605562013443659e-5 - + (0.81430290992322326296e-7 - + (-0.71512447242755357629e-9 - + (-0.13372664928000000000e-10 + 0.44461498336689298148e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 46: { - double t = 2 * y100 - 93; - return 0.58380635448407827360e0 - + (0.49717469530842831182e-2 - + (-0.23336001540009645365e-3 - + (-0.51952064448608850822e-6 - + (0.73596577815411080511e-7 - + (-0.84020916763091566035e-9 - + (-0.76700972702222222221e-11 + 0.36914462807972467044e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 47: { - double t = 2 * y100 - 95; - return 0.59281340237769489597e0 - + (0.40343592069379730568e-2 - + (-0.23477963738658326185e-3 - + (0.34615944987790224234e-7 - + (0.64832803248395814574e-7 - + (-0.90329163587627007971e-9 - + (-0.30421940400000000000e-11 + 0.29237386653743536669e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 48: { - double t = 2 * y100 - 97; - return 0.59994428743114271918e0 - + (0.30976579788271744329e-2 - + (-0.23308875765700082835e-3 - + (0.51681681023846925160e-6 - + (0.55694594264948268169e-7 - + (-0.91719117313243464652e-9 - + (0.53982743680000000000e-12 + 0.22050829296187771142e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 49: { - double t = 2 * y100 - 99; - return 0.60521224471819875444e0 - + (0.21732138012345456060e-2 - + (-0.22872428969625997456e-3 - + (0.92588959922653404233e-6 - + (0.46612665806531930684e-7 - + (-0.89393722514414153351e-9 - + (0.31718550353777777778e-11 + 0.15705458816080549117e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 50: { - double t = 2 * y100 - 101; - return 0.60865189969791123620e0 - + (0.12708480848877451719e-2 - + (-0.22212090111534847166e-3 - + (0.12636236031532793467e-5 - + (0.37904037100232937574e-7 - + (-0.84417089968101223519e-9 - + (0.49843180828444444445e-11 + 0.10355439441049048273e-12 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 51: { - double t = 2 * y100 - 103; - return 0.61031580103499200191e0 - + (0.39867436055861038223e-3 - + (-0.21369573439579869291e-3 - + (0.15339402129026183670e-5 - + (0.29787479206646594442e-7 - + (-0.77687792914228632974e-9 - + (0.61192452741333333334e-11 + 0.60216691829459295780e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 52: { - double t = 2 * y100 - 105; - return 0.61027109047879835868e0 - + (-0.43680904508059878254e-3 - + (-0.20383783788303894442e-3 - + (0.17421743090883439959e-5 - + (0.22400425572175715576e-7 - + (-0.69934719320045128997e-9 - + (0.67152759655111111110e-11 + 0.26419960042578359995e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 53: { - double t = 2 * y100 - 107; - return 0.60859639489217430521e0 - + (-0.12305921390962936873e-2 - + (-0.19290150253894682629e-3 - + (0.18944904654478310128e-5 - + (0.15815530398618149110e-7 - + (-0.61726850580964876070e-9 + 0.68987888999111111110e-11 * t) * t) - * t) - * t) - * t) - * t; - } - case 54: { - double t = 2 * y100 - 109; - return 0.60537899426486075181e0 - + (-0.19790062241395705751e-2 - + (-0.18120271393047062253e-3 - + (0.19974264162313241405e-5 - + (0.10055795094298172492e-7 - + (-0.53491997919318263593e-9 - + (0.67794550295111111110e-11 - 0.17059208095741511603e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 55: { - double t = 2 * y100 - 111; - return 0.60071229457904110537e0 - + (-0.26795676776166354354e-2 - + (-0.16901799553627508781e-3 - + (0.20575498324332621581e-5 - + (0.51077165074461745053e-8 - + (-0.45536079828057221858e-9 - + (0.64488005516444444445e-11 - 0.29311677573152766338e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 56: { - double t = 2 * y100 - 113; - return 0.59469361520112714738e0 - + (-0.33308208190600993470e-2 - + (-0.15658501295912405679e-3 - + (0.20812116912895417272e-5 - + (0.93227468760614182021e-9 - + (-0.38066673740116080415e-9 - + (0.59806790359111111110e-11 - 0.36887077278950440597e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 57: { - double t = 2 * y100 - 115; - return 0.58742228631775388268e0 - + (-0.39321858196059227251e-2 - + (-0.14410441141450122535e-3 - + (0.20743790018404020716e-5 - + (-0.25261903811221913762e-8 - + (-0.31212416519526924318e-9 - + (0.54328422462222222221e-11 - 0.40864152484979815972e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 58: { - double t = 2 * y100 - 117; - return 0.57899804200033018447e0 - + (-0.44838157005618913447e-2 - + (-0.13174245966501437965e-3 - + (0.20425306888294362674e-5 - + (-0.53330296023875447782e-8 - + (-0.25041289435539821014e-9 - + (0.48490437205333333334e-11 - 0.42162206939169045177e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 59: { - double t = 2 * y100 - 119; - return 0.56951968796931245974e0 - + (-0.49864649488074868952e-2 - + (-0.11963416583477567125e-3 - + (0.19906021780991036425e-5 - + (-0.75580140299436494248e-8 - + (-0.19576060961919820491e-9 - + (0.42613011928888888890e-11 - 0.41539443304115604377e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 60: { - double t = 2 * y100 - 121; - return 0.55908401930063918964e0 - + (-0.54413711036826877753e-2 - + (-0.10788661102511914628e-3 - + (0.19229663322982839331e-5 - + (-0.92714731195118129616e-8 - + (-0.14807038677197394186e-9 - + (0.36920870298666666666e-11 - 0.39603726688419162617e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 61: { - double t = 2 * y100 - 123; - return 0.54778496152925675315e0 - + (-0.58501497933213396670e-2 - + (-0.96582314317855227421e-4 - + (0.18434405235069270228e-5 - + (-0.10541580254317078711e-7 - + (-0.10702303407788943498e-9 - + (0.31563175582222222222e-11 - 0.36829748079110481422e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 62: { - double t = 2 * y100 - 125; - return 0.53571290831682823999e0 - + (-0.62147030670760791791e-2 - + (-0.85782497917111760790e-4 - + (0.17553116363443470478e-5 - + (-0.11432547349815541084e-7 - + (-0.72157091369041330520e-10 - + (0.26630811607111111111e-11 - 0.33578660425893164084e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 63: { - double t = 2 * y100 - 127; - return 0.52295422962048434978e0 - + (-0.65371404367776320720e-2 - + (-0.75530164941473343780e-4 - + (0.16613725797181276790e-5 - + (-0.12003521296598910761e-7 - + (-0.42929753689181106171e-10 - + (0.22170894940444444444e-11 - 0.30117697501065110505e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 64: { - double t = 2 * y100 - 129; - return 0.50959092577577886140e0 - + (-0.68197117603118591766e-2 - + (-0.65852936198953623307e-4 - + (0.15639654113906716939e-5 - + (-0.12308007991056524902e-7 - + (-0.18761997536910939570e-10 - + (0.18198628922666666667e-11 - 0.26638355362285200932e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 65: { - double t = 2 * y100 - 131; - return 0.49570040481823167970e0 - + (-0.70647509397614398066e-2 - + (-0.56765617728962588218e-4 - + (0.14650274449141448497e-5 - + (-0.12393681471984051132e-7 - + (0.92904351801168955424e-12 - + (0.14706755960177777778e-11 - 0.23272455351266325318e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 66: { - double t = 2 * y100 - 133; - return 0.48135536250935238066e0 - + (-0.72746293327402359783e-2 - + (-0.48272489495730030780e-4 - + (0.13661377309113939689e-5 - + (-0.12302464447599382189e-7 - + (0.16707760028737074907e-10 - + (0.11672928324444444444e-11 - 0.20105801424709924499e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 67: { - double t = 2 * y100 - 135; - return 0.46662374675511439448e0 - + (-0.74517177649528487002e-2 - + (-0.40369318744279128718e-4 - + (0.12685621118898535407e-5 - + (-0.12070791463315156250e-7 - + (0.29105507892605823871e-10 - + (0.90653314645333333334e-12 - 0.17189503312102982646e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 68: { - double t = 2 * y100 - 137; - return 0.45156879030168268778e0 - + (-0.75983560650033817497e-2 - + (-0.33045110380705139759e-4 - + (0.11732956732035040896e-5 - + (-0.11729986947158201869e-7 - + (0.38611905704166441308e-10 - + (0.68468768305777777779e-12 - 0.14549134330396754575e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 69: { - double t = 2 * y100 - 139; - return 0.43624909769330896904e0 - + (-0.77168291040309554679e-2 - + (-0.26283612321339907756e-4 - + (0.10811018836893550820e-5 - + (-0.11306707563739851552e-7 - + (0.45670446788529607380e-10 - + (0.49782492549333333334e-12 - 0.12191983967561779442e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 70: { - double t = 2 * y100 - 141; - return 0.42071877443548481181e0 - + (-0.78093484015052730097e-2 - + (-0.20064596897224934705e-4 - + (0.99254806680671890766e-6 - + (-0.10823412088884741451e-7 - + (0.50677203326904716247e-10 - + (0.34200547594666666666e-12 - 0.10112698698356194618e-13 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 71: { - double t = 2 * y100 - 143; - return 0.40502758809710844280e0 - + (-0.78780384460872937555e-2 - + (-0.14364940764532853112e-4 - + (0.90803709228265217384e-6 - + (-0.10298832847014466907e-7 - + (0.53981671221969478551e-10 - + (0.21342751381333333333e-12 - 0.82975901848387729274e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 72: { - double t = 2 * y100 - 145; - return 0.38922115269731446690e0 - + (-0.79249269708242064120e-2 - + (-0.91595258799106970453e-5 - + (0.82783535102217576495e-6 - + (-0.97484311059617744437e-8 - + (0.55889029041660225629e-10 - + (0.10851981336888888889e-12 - 0.67278553237853459757e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 73: { - double t = 2 * y100 - 147; - return 0.37334112915460307335e0 - + (-0.79519385109223148791e-2 - + (-0.44219833548840469752e-5 - + (0.75209719038240314732e-6 - + (-0.91848251458553190451e-8 - + (0.56663266668051433844e-10 - + (0.23995894257777777778e-13 - 0.53819475285389344313e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 74: { - double t = 2 * y100 - 149; - return 0.35742543583374223085e0 - + (-0.79608906571527956177e-2 - + (-0.12530071050975781198e-6 - + (0.68088605744900552505e-6 - + (-0.86181844090844164075e-8 - + (0.56530784203816176153e-10 - + (-0.43120012248888888890e-13 - 0.42372603392496813810e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 75: { - double t = 2 * y100 - 151; - return 0.34150846431979618536e0 - + (-0.79534924968773806029e-2 - + (0.37576885610891515813e-5 - + (0.61419263633090524326e-6 - + (-0.80565865409945960125e-8 - + (0.55684175248749269411e-10 - + (-0.95486860764444444445e-13 - 0.32712946432984510595e-14 * t) * t) - * t) - * t) - * t) - * t) - * t; - } - case 76: { - double t = 2 * y100 - 153; - return 0.32562129649136346824e0 - + (-0.79313448067948884309e-2 - + (0.72539159933545300034e-5 - + (0.55195028297415503083e-6 - + (-0.75063365335570475258e-8 - + (0.54281686749699595941e-10 - 0.13545424295111111111e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 77: { - double t = 2 * y100 - 155; - return 0.30979191977078391864e0 - + (-0.78959416264207333695e-2 - + (0.10389774377677210794e-4 - + (0.49404804463196316464e-6 - + (-0.69722488229411164685e-8 - + (0.52469254655951393842e-10 - 0.16507860650666666667e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 78: { - double t = 2 * y100 - 157; - return 0.29404543811214459904e0 - + (-0.78486728990364155356e-2 - + (0.13190885683106990459e-4 - + (0.44034158861387909694e-6 - + (-0.64578942561562616481e-8 - + (0.50354306498006928984e-10 - 0.18614473550222222222e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 79: { - double t = 2 * y100 - 159; - return 0.27840427686253660515e0 - + (-0.77908279176252742013e-2 - + (0.15681928798708548349e-4 - + (0.39066226205099807573e-6 - + (-0.59658144820660420814e-8 - + (0.48030086420373141763e-10 - 0.20018995173333333333e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 80: { - double t = 2 * y100 - 161; - return 0.26288838011163800908e0 - + (-0.77235993576119469018e-2 - + (0.17886516796198660969e-4 - + (0.34482457073472497720e-6 - + (-0.54977066551955420066e-8 - + (0.45572749379147269213e-10 - 0.20852924954666666667e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 81: { - double t = 2 * y100 - 163; - return 0.24751539954181029717e0 - + (-0.76480877165290370975e-2 - + (0.19827114835033977049e-4 - + (0.30263228619976332110e-6 - + (-0.50545814570120129947e-8 - + (0.43043879374212005966e-10 - 0.21228012028444444444e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 82: { - double t = 2 * y100 - 165; - return 0.23230087411688914593e0 - + (-0.75653060136384041587e-2 - + (0.21524991113020016415e-4 - + (0.26388338542539382413e-6 - + (-0.46368974069671446622e-8 - + (0.40492715758206515307e-10 - 0.21238627815111111111e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 83: { - double t = 2 * y100 - 167; - return 0.21725840021297341931e0 - + (-0.74761846305979730439e-2 - + (0.23000194404129495243e-4 - + (0.22837400135642906796e-6 - + (-0.42446743058417541277e-8 - + (0.37958104071765923728e-10 - 0.20963978568888888889e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 84: { - double t = 2 * y100 - 169; - return 0.20239979200788191491e0 - + (-0.73815761980493466516e-2 - + (0.24271552727631854013e-4 - + (0.19590154043390012843e-6 - + (-0.38775884642456551753e-8 - + (0.35470192372162901168e-10 - 0.20470131678222222222e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 85: { - double t = 2 * y100 - 171; - return 0.18773523211558098962e0 - + (-0.72822604530339834448e-2 - + (0.25356688567841293697e-4 - + (0.16626710297744290016e-6 - + (-0.35350521468015310830e-8 - + (0.33051896213898864306e-10 - 0.19811844544000000000e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 86: { - double t = 2 * y100 - 173; - return 0.17327341258479649442e0 - + (-0.71789490089142761950e-2 - + (0.26272046822383820476e-4 - + (0.13927732375657362345e-6 - + (-0.32162794266956859603e-8 - + (0.30720156036105652035e-10 - 0.19034196304000000000e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 87: { - double t = 2 * y100 - 175; - return 0.15902166648328672043e0 - + (-0.70722899934245504034e-2 - + (0.27032932310132226025e-4 - + (0.11474573347816568279e-6 - + (-0.29203404091754665063e-8 - + (0.28487010262547971859e-10 - 0.18174029063111111111e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 88: { - double t = 2 * y100 - 177; - return 0.14498609036610283865e0 - + (-0.69628725220045029273e-2 - + (0.27653554229160596221e-4 - + (0.92493727167393036470e-7 - + (-0.26462055548683583849e-8 - + (0.26360506250989943739e-10 - 0.17261211260444444444e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 89: { - double t = 2 * y100 - 179; - return 0.13117165798208050667e0 - + (-0.68512309830281084723e-2 - + (0.28147075431133863774e-4 - + (0.72351212437979583441e-7 - + (-0.23927816200314358570e-8 - + (0.24345469651209833155e-10 - 0.16319736960000000000e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 90: { - double t = 2 * y100 - 181; - return 0.11758232561160626306e0 - + (-0.67378491192463392927e-2 - + (0.28525664781722907847e-4 - + (0.54156999310046790024e-7 - + (-0.21589405340123827823e-8 - + (0.22444150951727334619e-10 - 0.15368675584000000000e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 91: { - double t = 2 * y100 - 183; - return 0.10422112945361673560e0 - + (-0.66231638959845581564e-2 - + (0.28800551216363918088e-4 - + (0.37758983397952149613e-7 - + (-0.19435423557038933431e-8 - + (0.20656766125421362458e-10 - 0.14422990012444444444e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 92: { - double t = 2 * y100 - 185; - return 0.91090275493541084785e-1 - + (-0.65075691516115160062e-2 - + (0.28982078385527224867e-4 - + (0.23014165807643012781e-7 - + (-0.17454532910249875958e-8 - + (0.18981946442680092373e-10 - 0.13494234691555555556e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 93: { - double t = 2 * y100 - 187; - return 0.78191222288771379358e-1 - + (-0.63914190297303976434e-2 - + (0.29079759021299682675e-4 - + (0.97885458059415717014e-8 - + (-0.15635596116134296819e-8 - + (0.17417110744051331974e-10 - 0.12591151763555555556e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 94: { - double t = 2 * y100 - 189; - return 0.65524757106147402224e-1 - + (-0.62750311956082444159e-2 - + (0.29102328354323449795e-4 - + (-0.20430838882727954582e-8 - + (-0.13967781903855367270e-8 - + (0.15958771833747057569e-10 - 0.11720175765333333333e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 95: { - double t = 2 * y100 - 191; - return 0.53091065838453612773e-1 - + (-0.61586898417077043662e-2 - + (0.29057796072960100710e-4 - + (-0.12597414620517987536e-7 - + (-0.12440642607426861943e-8 - + (0.14602787128447932137e-10 - 0.10885859114666666667e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 96: { - double t = 2 * y100 - 193; - return 0.40889797115352738582e-1 - + (-0.60426484889413678200e-2 - + (0.28953496450191694606e-4 - + (-0.21982952021823718400e-7 - + (-0.11044169117553026211e-8 - + (0.13344562332430552171e-10 - 0.10091231402844444444e-12 * t) * t) - * t) - * t) - * t) - * t; - } - case 97: - case 98: - case 99: - case 100: { // use Taylor expansion for small x (|x| <= 0.0309...) - // (2/sqrt(pi)) * (x - 2/3 x^3 + 4/15 x^5 - 8/105 x^7 + 16/945 x^9) - double x2 = x * x; - return x - * (1.1283791670955125739 - - x2 - * (0.75225277806367504925 - - x2 - * (0.30090111122547001970 - - x2 * (0.085971746064420005629 - x2 * 0.016931216931216931217)))); - } - } + { + case 0: + { + double t = 2 * y100 - 1; + return 0.28351593328822191546e-2 + + (0.28494783221378400759e-2 + + (0.14427470563276734183e-4 + + (0.10939723080231588129e-6 + + (0.92474307943275042045e-9 + + (0.89128907666450075245e-11 + 0.92974121935111111110e-13 * t) * t) + * t) + * t) + * t) + * t; + } + case 1: + { + double t = 2 * y100 - 3; + return 0.85927161243940350562e-2 + + (0.29085312941641339862e-2 + + (0.15106783707725582090e-4 + + (0.11716709978531327367e-6 + + (0.10197387816021040024e-8 + + (0.10122678863073360769e-10 + 0.10917479678400000000e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 2: + { + double t = 2 * y100 - 5; + return 0.14471159831187703054e-1 + + (0.29703978970263836210e-2 + + (0.15835096760173030976e-4 + + (0.12574803383199211596e-6 + + (0.11278672159518415848e-8 + + (0.11547462300333495797e-10 + 0.12894535335111111111e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 3: + { + double t = 2 * y100 - 7; + return 0.20476320420324610618e-1 + + (0.30352843012898665856e-2 + + (0.16617609387003727409e-4 + + (0.13525429711163116103e-6 + + (0.12515095552507169013e-8 + + (0.13235687543603382345e-10 + 0.15326595042666666667e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 4: + { + double t = 2 * y100 - 9; + return 0.26614461952489004566e-1 + + (0.31034189276234947088e-2 + + (0.17460268109986214274e-4 + + (0.14582130824485709573e-6 + + (0.13935959083809746345e-8 + + (0.15249438072998932900e-10 + 0.18344741882133333333e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 5: + { + double t = 2 * y100 - 11; + return 0.32892330248093586215e-1 + + (0.31750557067975068584e-2 + + (0.18369907582308672632e-4 + + (0.15761063702089457882e-6 + + (0.15577638230480894382e-8 + + (0.17663868462699097951e-10 + + (0.22126732680711111111e-12 + 0.30273474177737853668e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 6: + { + double t = 2 * y100 - 13; + return 0.39317207681134336024e-1 + + (0.32504779701937539333e-2 + + (0.19354426046513400534e-4 + + (0.17081646971321290539e-6 + + (0.17485733959327106250e-8 + + (0.20593687304921961410e-10 + + (0.26917401949155555556e-12 + 0.38562123837725712270e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 7: + { + double t = 2 * y100 - 15; + return 0.45896976511367738235e-1 + + (0.33300031273110976165e-2 + + (0.20423005398039037313e-4 + + (0.18567412470376467303e-6 + + (0.19718038363586588213e-8 + + (0.24175006536781219807e-10 + + (0.33059982791466666666e-12 + 0.49756574284439426165e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 8: + { + double t = 2 * y100 - 17; + return 0.52640192524848962855e-1 + + (0.34139883358846720806e-2 + + (0.21586390240603337337e-4 + + (0.20247136501568904646e-6 + + (0.22348696948197102935e-8 + + (0.28597516301950162548e-10 + + (0.41045502119111111110e-12 + 0.65151614515238361946e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 9: + { + double t = 2 * y100 - 19; + return 0.59556171228656770456e-1 + + (0.35028374386648914444e-2 + + (0.22857246150998562824e-4 + + (0.22156372146525190679e-6 + + (0.25474171590893813583e-8 + + (0.34122390890697400584e-10 + + (0.51593189879111111110e-12 + 0.86775076853908006938e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 10: + { + double t = 2 * y100 - 21; + return 0.66655089485108212551e-1 + + (0.35970095381271285568e-2 + + (0.24250626164318672928e-4 + + (0.24339561521785040536e-6 + + (0.29221990406518411415e-8 + + (0.41117013527967776467e-10 + + (0.65786450716444444445e-12 + 0.11791885745450623331e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 11: + { + double t = 2 * y100 - 23; + return 0.73948106345519174661e-1 + + (0.36970297216569341748e-2 + + (0.25784588137312868792e-4 + + (0.26853012002366752770e-6 + + (0.33763958861206729592e-8 + + (0.50111549981376976397e-10 + + (0.85313857496888888890e-12 + 0.16417079927706899860e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 12: + { + double t = 2 * y100 - 25; + return 0.81447508065002963203e-1 + + (0.38035026606492705117e-2 + + (0.27481027572231851896e-4 + + (0.29769200731832331364e-6 + + (0.39336816287457655076e-8 + + (0.61895471132038157624e-10 + + (0.11292303213511111111e-11 + 0.23558532213703884304e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 13: + { + double t = 2 * y100 - 27; + return 0.89166884027582716628e-1 + + (0.39171301322438946014e-2 + + (0.29366827260422311668e-4 + + (0.33183204390350724895e-6 + + (0.46276006281647330524e-8 + + (0.77692631378169813324e-10 + + (0.15335153258844444444e-11 + 0.35183103415916026911e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 14: + { + double t = 2 * y100 - 29; + return 0.97121342888032322019e-1 + + (0.40387340353207909514e-2 + + (0.31475490395950776930e-4 + + (0.37222714227125135042e-6 + + (0.55074373178613809996e-8 + + (0.99509175283990337944e-10 + + (0.21552645758222222222e-11 + 0.55728651431872687605e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 15: + { + double t = 2 * y100 - 31; + return 0.10532778218603311137e0 + + (0.41692873614065380607e-2 + + (0.33849549774889456984e-4 + + (0.42064596193692630143e-6 + + (0.66494579697622432987e-8 + + (0.13094103581931802337e-9 + + (0.31896187409777777778e-11 + 0.97271974184476560742e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 16: + { + double t = 2 * y100 - 33; + return 0.11380523107427108222e0 + + (0.43099572287871821013e-2 + + (0.36544324341565929930e-4 + + (0.47965044028581857764e-6 + + (0.81819034238463698796e-8 + + (0.17934133239549647357e-9 + + (0.50956666166186293627e-11 + + (0.18850487318190638010e-12 + 0.79697813173519853340e-14 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 17: + { + double t = 2 * y100 - 35; + return 0.12257529703447467345e0 + + (0.44621675710026986366e-2 + + (0.39634304721292440285e-4 + + (0.55321553769873381819e-6 + + (0.10343619428848520870e-7 + + (0.26033830170470368088e-9 + + (0.87743837749108025357e-11 + + (0.34427092430230063401e-12 + 0.10205506615709843189e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 18: + { + double t = 2 * y100 - 37; + return 0.13166276955656699478e0 + + (0.46276970481783001803e-2 + + (0.43225026380496399310e-4 + + (0.64799164020016902656e-6 + + (0.13580082794704641782e-7 + + (0.39839800853954313927e-9 + + (0.14431142411840000000e-10 + 0.42193457308830027541e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 19: + { + double t = 2 * y100 - 39; + return 0.14109647869803356475e0 + + (0.48088424418545347758e-2 + + (0.47474504753352150205e-4 + + (0.77509866468724360352e-6 + + (0.18536851570794291724e-7 + + (0.60146623257887570439e-9 + + (0.18533978397305276318e-10 + + (0.41033845938901048380e-13 - 0.46160680279304825485e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 20: + { + double t = 2 * y100 - 41; + return 0.15091057940548936603e0 + + (0.50086864672004685703e-2 + + (0.52622482832192230762e-4 + + (0.95034664722040355212e-6 + + (0.25614261331144718769e-7 + + (0.80183196716888606252e-9 + + (0.12282524750534352272e-10 + + (-0.10531774117332273617e-11 - 0.86157181395039646412e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 21: + { + double t = 2 * y100 - 43; + return 0.16114648116017010770e0 + + (0.52314661581655369795e-2 + + (0.59005534545908331315e-4 + + (0.11885518333915387760e-5 + + (0.33975801443239949256e-7 + + (0.82111547144080388610e-9 + + (-0.12357674017312854138e-10 + + (-0.24355112256914479176e-11 - 0.75155506863572930844e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 22: + { + double t = 2 * y100 - 45; + return 0.17185551279680451144e0 + + (0.54829002967599420860e-2 + + (0.67013226658738082118e-4 + + (0.14897400671425088807e-5 + + (0.40690283917126153701e-7 + + (0.44060872913473778318e-9 + + (-0.52641873433280000000e-10 - 0.30940587864543343124e-11 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 23: + { + double t = 2 * y100 - 47; + return 0.18310194559815257381e0 + + (0.57701559375966953174e-2 + + (0.76948789401735193483e-4 + + (0.18227569842290822512e-5 + + (0.41092208344387212276e-7 + + (-0.44009499965694442143e-9 + + (-0.92195414685628803451e-10 + + (-0.22657389705721753299e-11 + 0.10004784908106839254e-12 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 24: + { + double t = 2 * y100 - 49; + return 0.19496527191546630345e0 + + (0.61010853144364724856e-2 + + (0.88812881056342004864e-4 + + (0.21180686746360261031e-5 + + (0.30652145555130049203e-7 + + (-0.16841328574105890409e-8 + + (-0.11008129460612823934e-9 + + (-0.12180794204544515779e-12 + 0.15703325634590334097e-12 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 25: + { + double t = 2 * y100 - 51; + return 0.20754006813966575720e0 + + (0.64825787724922073908e-2 + + (0.10209599627522311893e-3 + + (0.22785233392557600468e-5 + + (0.73495224449907568402e-8 + + (-0.29442705974150112783e-8 + + (-0.94082603434315016546e-10 + + (0.23609990400179321267e-11 + 0.14141908654269023788e-12 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 26: + { + double t = 2 * y100 - 53; + return 0.22093185554845172146e0 + + (0.69182878150187964499e-2 + + (0.11568723331156335712e-3 + + (0.22060577946323627739e-5 + + (-0.26929730679360840096e-7 + + (-0.38176506152362058013e-8 + + (-0.47399503861054459243e-10 + + (0.40953700187172127264e-11 + 0.69157730376118511127e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 27: + { + double t = 2 * y100 - 55; + return 0.23524827304057813918e0 + + (0.74063350762008734520e-2 + + (0.12796333874615790348e-3 + + (0.18327267316171054273e-5 + + (-0.66742910737957100098e-7 + + (-0.40204740975496797870e-8 + + (0.14515984139495745330e-10 + + (0.44921608954536047975e-11 - 0.18583341338983776219e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 28: + { + double t = 2 * y100 - 57; + return 0.25058626331812744775e0 + + (0.79377285151602061328e-2 + + (0.13704268650417478346e-3 + + (0.11427511739544695861e-5 + + (-0.10485442447768377485e-6 + + (-0.34850364756499369763e-8 + + (0.72656453829502179208e-10 + + (0.36195460197779299406e-11 - 0.84882136022200714710e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 29: + { + double t = 2 * y100 - 59; + return 0.26701724900280689785e0 + + (0.84959936119625864274e-2 + + (0.14112359443938883232e-3 + + (0.17800427288596909634e-6 + + (-0.13443492107643109071e-6 + + (-0.23512456315677680293e-8 + + (0.11245846264695936769e-9 + + (0.19850501334649565404e-11 - 0.11284666134635050832e-12 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 30: + { + double t = 2 * y100 - 61; + return 0.28457293586253654144e0 + + (0.90581563892650431899e-2 + + (0.13880520331140646738e-3 + + (-0.97262302362522896157e-6 + + (-0.15077100040254187366e-6 + + (-0.88574317464577116689e-9 + + (0.12760311125637474581e-9 + + (0.20155151018282695055e-12 - 0.10514169375181734921e-12 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 31: + { + double t = 2 * y100 - 63; + return 0.30323425595617385705e0 + + (0.95968346790597422934e-2 + + (0.12931067776725883939e-3 + + (-0.21938741702795543986e-5 + + (-0.15202888584907373963e-6 + + (0.61788350541116331411e-9 + + (0.11957835742791248256e-9 + + (-0.12598179834007710908e-11 - 0.75151817129574614194e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 32: + { + double t = 2 * y100 - 65; + return 0.32292521181517384379e0 + + (0.10082957727001199408e-1 + + (0.11257589426154962226e-3 + + (-0.33670890319327881129e-5 + + (-0.13910529040004008158e-6 + + (0.19170714373047512945e-8 + + (0.94840222377720494290e-10 + + (-0.21650018351795353201e-11 - 0.37875211678024922689e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 33: + { + double t = 2 * y100 - 67; + return 0.34351233557911753862e0 + + (0.10488575435572745309e-1 + + (0.89209444197248726614e-4 + + (-0.43893459576483345364e-5 + + (-0.11488595830450424419e-6 + + (0.28599494117122464806e-8 + + (0.61537542799857777779e-10 - 0.24935749227658002212e-11 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 34: + { + double t = 2 * y100 - 69; + return 0.36480946642143669093e0 + + (0.10789304203431861366e-1 + + (0.60357993745283076834e-4 + + (-0.51855862174130669389e-5 + + (-0.83291664087289801313e-7 + + (0.33898011178582671546e-8 + + (0.27082948188277716482e-10 + + (-0.23603379397408694974e-11 + 0.19328087692252869842e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 35: + { + double t = 2 * y100 - 71; + return 0.38658679935694939199e0 + + (0.10966119158288804999e-1 + + (0.27521612041849561426e-4 + + (-0.57132774537670953638e-5 + + (-0.48404772799207914899e-7 + + (0.35268354132474570493e-8 + + (-0.32383477652514618094e-11 + + (-0.19334202915190442501e-11 + 0.32333189861286460270e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 36: + { + double t = 2 * y100 - 73; + return 0.40858275583808707870e0 + + (0.11006378016848466550e-1 + + (-0.76396376685213286033e-5 + + (-0.59609835484245791439e-5 + + (-0.13834610033859313213e-7 + + (0.33406952974861448790e-8 + + (-0.26474915974296612559e-10 + + (-0.13750229270354351983e-11 + 0.36169366979417390637e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 37: + { + double t = 2 * y100 - 75; + return 0.43051714914006682977e0 + + (0.10904106549500816155e-1 + + (-0.43477527256787216909e-4 + + (-0.59429739547798343948e-5 + + (0.17639200194091885949e-7 + + (0.29235991689639918688e-8 + + (-0.41718791216277812879e-10 + + (-0.81023337739508049606e-12 + 0.33618915934461994428e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 38: + { + double t = 2 * y100 - 77; + return 0.45210428135559607406e0 + + (0.10659670756384400554e-1 + + (-0.78488639913256978087e-4 + + (-0.56919860886214735936e-5 + + (0.44181850467477733407e-7 + + (0.23694306174312688151e-8 + + (-0.49492621596685443247e-10 + + (-0.31827275712126287222e-12 + 0.27494438742721623654e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 39: + { + double t = 2 * y100 - 79; + return 0.47306491195005224077e0 + + (0.10279006119745977570e-1 + + (-0.11140268171830478306e-3 + + (-0.52518035247451432069e-5 + + (0.64846898158889479518e-7 + + (0.17603624837787337662e-8 + + (-0.51129481592926104316e-10 + + (0.62674584974141049511e-13 + 0.20055478560829935356e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 40: + { + double t = 2 * y100 - 81; + return 0.49313638965719857647e0 + + (0.97725799114772017662e-2 + + (-0.14122854267291533334e-3 + + (-0.46707252568834951907e-5 + + (0.79421347979319449524e-7 + + (0.11603027184324708643e-8 + + (-0.48269605844397175946e-10 + + (0.32477251431748571219e-12 + 0.12831052634143527985e-13 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 41: + { + double t = 2 * y100 - 83; + return 0.51208057433416004042e0 + + (0.91542422354009224951e-2 + + (-0.16726530230228647275e-3 + + (-0.39964621752527649409e-5 + + (0.88232252903213171454e-7 + + (0.61343113364949928501e-9 + + (-0.42516755603130443051e-10 + + (0.47910437172240209262e-12 + 0.66784341874437478953e-14 * t) * t) + * t) + * t) + * t) + * t) + * t) + * t; + } + case 42: + { + double t = 2 * y100 - 85; + return 0.52968945458607484524e0 + + (0.84400880445116786088e-2 + + (-0.18908729783854258774e-3 + + (-0.32725905467782951931e-5 + + (0.91956190588652090659e-7 + + (0.14593989152420122909e-9 + + (-0.35239490687644444445e-10 + 0.54613829888448694898e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 43: + { + double t = 2 * y100 - 87; + return 0.54578857454330070965e0 + + (0.76474155195880295311e-2 + + (-0.20651230590808213884e-3 + + (-0.25364339140543131706e-5 + + (0.91455367999510681979e-7 + + (-0.23061359005297528898e-9 + + (-0.27512928625244444444e-10 + 0.54895806008493285579e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 44: + { + double t = 2 * y100 - 89; + return 0.56023851910298493910e0 + + (0.67938321739997196804e-2 + + (-0.21956066613331411760e-3 + + (-0.18181127670443266395e-5 + + (0.87650335075416845987e-7 + + (-0.51548062050366615977e-9 + + (-0.20068462174044444444e-10 + 0.50912654909758187264e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 45: + { + double t = 2 * y100 - 91; + return 0.57293478057455721150e0 + + (0.58965321010394044087e-2 + + (-0.22841145229276575597e-3 + + (-0.11404605562013443659e-5 + + (0.81430290992322326296e-7 + + (-0.71512447242755357629e-9 + + (-0.13372664928000000000e-10 + 0.44461498336689298148e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 46: + { + double t = 2 * y100 - 93; + return 0.58380635448407827360e0 + + (0.49717469530842831182e-2 + + (-0.23336001540009645365e-3 + + (-0.51952064448608850822e-6 + + (0.73596577815411080511e-7 + + (-0.84020916763091566035e-9 + + (-0.76700972702222222221e-11 + 0.36914462807972467044e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 47: + { + double t = 2 * y100 - 95; + return 0.59281340237769489597e0 + + (0.40343592069379730568e-2 + + (-0.23477963738658326185e-3 + + (0.34615944987790224234e-7 + + (0.64832803248395814574e-7 + + (-0.90329163587627007971e-9 + + (-0.30421940400000000000e-11 + 0.29237386653743536669e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 48: + { + double t = 2 * y100 - 97; + return 0.59994428743114271918e0 + + (0.30976579788271744329e-2 + + (-0.23308875765700082835e-3 + + (0.51681681023846925160e-6 + + (0.55694594264948268169e-7 + + (-0.91719117313243464652e-9 + + (0.53982743680000000000e-12 + 0.22050829296187771142e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 49: + { + double t = 2 * y100 - 99; + return 0.60521224471819875444e0 + + (0.21732138012345456060e-2 + + (-0.22872428969625997456e-3 + + (0.92588959922653404233e-6 + + (0.46612665806531930684e-7 + + (-0.89393722514414153351e-9 + + (0.31718550353777777778e-11 + 0.15705458816080549117e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 50: + { + double t = 2 * y100 - 101; + return 0.60865189969791123620e0 + + (0.12708480848877451719e-2 + + (-0.22212090111534847166e-3 + + (0.12636236031532793467e-5 + + (0.37904037100232937574e-7 + + (-0.84417089968101223519e-9 + + (0.49843180828444444445e-11 + 0.10355439441049048273e-12 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 51: + { + double t = 2 * y100 - 103; + return 0.61031580103499200191e0 + + (0.39867436055861038223e-3 + + (-0.21369573439579869291e-3 + + (0.15339402129026183670e-5 + + (0.29787479206646594442e-7 + + (-0.77687792914228632974e-9 + + (0.61192452741333333334e-11 + 0.60216691829459295780e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 52: + { + double t = 2 * y100 - 105; + return 0.61027109047879835868e0 + + (-0.43680904508059878254e-3 + + (-0.20383783788303894442e-3 + + (0.17421743090883439959e-5 + + (0.22400425572175715576e-7 + + (-0.69934719320045128997e-9 + + (0.67152759655111111110e-11 + 0.26419960042578359995e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 53: + { + double t = 2 * y100 - 107; + return 0.60859639489217430521e0 + + (-0.12305921390962936873e-2 + + (-0.19290150253894682629e-3 + + (0.18944904654478310128e-5 + + (0.15815530398618149110e-7 + + (-0.61726850580964876070e-9 + 0.68987888999111111110e-11 * t) * t) + * t) + * t) + * t) + * t; + } + case 54: + { + double t = 2 * y100 - 109; + return 0.60537899426486075181e0 + + (-0.19790062241395705751e-2 + + (-0.18120271393047062253e-3 + + (0.19974264162313241405e-5 + + (0.10055795094298172492e-7 + + (-0.53491997919318263593e-9 + + (0.67794550295111111110e-11 - 0.17059208095741511603e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 55: + { + double t = 2 * y100 - 111; + return 0.60071229457904110537e0 + + (-0.26795676776166354354e-2 + + (-0.16901799553627508781e-3 + + (0.20575498324332621581e-5 + + (0.51077165074461745053e-8 + + (-0.45536079828057221858e-9 + + (0.64488005516444444445e-11 - 0.29311677573152766338e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 56: + { + double t = 2 * y100 - 113; + return 0.59469361520112714738e0 + + (-0.33308208190600993470e-2 + + (-0.15658501295912405679e-3 + + (0.20812116912895417272e-5 + + (0.93227468760614182021e-9 + + (-0.38066673740116080415e-9 + + (0.59806790359111111110e-11 - 0.36887077278950440597e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 57: + { + double t = 2 * y100 - 115; + return 0.58742228631775388268e0 + + (-0.39321858196059227251e-2 + + (-0.14410441141450122535e-3 + + (0.20743790018404020716e-5 + + (-0.25261903811221913762e-8 + + (-0.31212416519526924318e-9 + + (0.54328422462222222221e-11 - 0.40864152484979815972e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 58: + { + double t = 2 * y100 - 117; + return 0.57899804200033018447e0 + + (-0.44838157005618913447e-2 + + (-0.13174245966501437965e-3 + + (0.20425306888294362674e-5 + + (-0.53330296023875447782e-8 + + (-0.25041289435539821014e-9 + + (0.48490437205333333334e-11 - 0.42162206939169045177e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 59: + { + double t = 2 * y100 - 119; + return 0.56951968796931245974e0 + + (-0.49864649488074868952e-2 + + (-0.11963416583477567125e-3 + + (0.19906021780991036425e-5 + + (-0.75580140299436494248e-8 + + (-0.19576060961919820491e-9 + + (0.42613011928888888890e-11 - 0.41539443304115604377e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 60: + { + double t = 2 * y100 - 121; + return 0.55908401930063918964e0 + + (-0.54413711036826877753e-2 + + (-0.10788661102511914628e-3 + + (0.19229663322982839331e-5 + + (-0.92714731195118129616e-8 + + (-0.14807038677197394186e-9 + + (0.36920870298666666666e-11 - 0.39603726688419162617e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 61: + { + double t = 2 * y100 - 123; + return 0.54778496152925675315e0 + + (-0.58501497933213396670e-2 + + (-0.96582314317855227421e-4 + + (0.18434405235069270228e-5 + + (-0.10541580254317078711e-7 + + (-0.10702303407788943498e-9 + + (0.31563175582222222222e-11 - 0.36829748079110481422e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 62: + { + double t = 2 * y100 - 125; + return 0.53571290831682823999e0 + + (-0.62147030670760791791e-2 + + (-0.85782497917111760790e-4 + + (0.17553116363443470478e-5 + + (-0.11432547349815541084e-7 + + (-0.72157091369041330520e-10 + + (0.26630811607111111111e-11 - 0.33578660425893164084e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 63: + { + double t = 2 * y100 - 127; + return 0.52295422962048434978e0 + + (-0.65371404367776320720e-2 + + (-0.75530164941473343780e-4 + + (0.16613725797181276790e-5 + + (-0.12003521296598910761e-7 + + (-0.42929753689181106171e-10 + + (0.22170894940444444444e-11 - 0.30117697501065110505e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 64: + { + double t = 2 * y100 - 129; + return 0.50959092577577886140e0 + + (-0.68197117603118591766e-2 + + (-0.65852936198953623307e-4 + + (0.15639654113906716939e-5 + + (-0.12308007991056524902e-7 + + (-0.18761997536910939570e-10 + + (0.18198628922666666667e-11 - 0.26638355362285200932e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 65: + { + double t = 2 * y100 - 131; + return 0.49570040481823167970e0 + + (-0.70647509397614398066e-2 + + (-0.56765617728962588218e-4 + + (0.14650274449141448497e-5 + + (-0.12393681471984051132e-7 + + (0.92904351801168955424e-12 + + (0.14706755960177777778e-11 - 0.23272455351266325318e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 66: + { + double t = 2 * y100 - 133; + return 0.48135536250935238066e0 + + (-0.72746293327402359783e-2 + + (-0.48272489495730030780e-4 + + (0.13661377309113939689e-5 + + (-0.12302464447599382189e-7 + + (0.16707760028737074907e-10 + + (0.11672928324444444444e-11 - 0.20105801424709924499e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 67: + { + double t = 2 * y100 - 135; + return 0.46662374675511439448e0 + + (-0.74517177649528487002e-2 + + (-0.40369318744279128718e-4 + + (0.12685621118898535407e-5 + + (-0.12070791463315156250e-7 + + (0.29105507892605823871e-10 + + (0.90653314645333333334e-12 - 0.17189503312102982646e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 68: + { + double t = 2 * y100 - 137; + return 0.45156879030168268778e0 + + (-0.75983560650033817497e-2 + + (-0.33045110380705139759e-4 + + (0.11732956732035040896e-5 + + (-0.11729986947158201869e-7 + + (0.38611905704166441308e-10 + + (0.68468768305777777779e-12 - 0.14549134330396754575e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 69: + { + double t = 2 * y100 - 139; + return 0.43624909769330896904e0 + + (-0.77168291040309554679e-2 + + (-0.26283612321339907756e-4 + + (0.10811018836893550820e-5 + + (-0.11306707563739851552e-7 + + (0.45670446788529607380e-10 + + (0.49782492549333333334e-12 - 0.12191983967561779442e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 70: + { + double t = 2 * y100 - 141; + return 0.42071877443548481181e0 + + (-0.78093484015052730097e-2 + + (-0.20064596897224934705e-4 + + (0.99254806680671890766e-6 + + (-0.10823412088884741451e-7 + + (0.50677203326904716247e-10 + + (0.34200547594666666666e-12 - 0.10112698698356194618e-13 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 71: + { + double t = 2 * y100 - 143; + return 0.40502758809710844280e0 + + (-0.78780384460872937555e-2 + + (-0.14364940764532853112e-4 + + (0.90803709228265217384e-6 + + (-0.10298832847014466907e-7 + + (0.53981671221969478551e-10 + + (0.21342751381333333333e-12 - 0.82975901848387729274e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 72: + { + double t = 2 * y100 - 145; + return 0.38922115269731446690e0 + + (-0.79249269708242064120e-2 + + (-0.91595258799106970453e-5 + + (0.82783535102217576495e-6 + + (-0.97484311059617744437e-8 + + (0.55889029041660225629e-10 + + (0.10851981336888888889e-12 - 0.67278553237853459757e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 73: + { + double t = 2 * y100 - 147; + return 0.37334112915460307335e0 + + (-0.79519385109223148791e-2 + + (-0.44219833548840469752e-5 + + (0.75209719038240314732e-6 + + (-0.91848251458553190451e-8 + + (0.56663266668051433844e-10 + + (0.23995894257777777778e-13 - 0.53819475285389344313e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 74: + { + double t = 2 * y100 - 149; + return 0.35742543583374223085e0 + + (-0.79608906571527956177e-2 + + (-0.12530071050975781198e-6 + + (0.68088605744900552505e-6 + + (-0.86181844090844164075e-8 + + (0.56530784203816176153e-10 + + (-0.43120012248888888890e-13 - 0.42372603392496813810e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 75: + { + double t = 2 * y100 - 151; + return 0.34150846431979618536e0 + + (-0.79534924968773806029e-2 + + (0.37576885610891515813e-5 + + (0.61419263633090524326e-6 + + (-0.80565865409945960125e-8 + + (0.55684175248749269411e-10 + + (-0.95486860764444444445e-13 - 0.32712946432984510595e-14 * t) * t) + * t) + * t) + * t) + * t) + * t; + } + case 76: + { + double t = 2 * y100 - 153; + return 0.32562129649136346824e0 + + (-0.79313448067948884309e-2 + + (0.72539159933545300034e-5 + + (0.55195028297415503083e-6 + + (-0.75063365335570475258e-8 + + (0.54281686749699595941e-10 - 0.13545424295111111111e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 77: + { + double t = 2 * y100 - 155; + return 0.30979191977078391864e0 + + (-0.78959416264207333695e-2 + + (0.10389774377677210794e-4 + + (0.49404804463196316464e-6 + + (-0.69722488229411164685e-8 + + (0.52469254655951393842e-10 - 0.16507860650666666667e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 78: + { + double t = 2 * y100 - 157; + return 0.29404543811214459904e0 + + (-0.78486728990364155356e-2 + + (0.13190885683106990459e-4 + + (0.44034158861387909694e-6 + + (-0.64578942561562616481e-8 + + (0.50354306498006928984e-10 - 0.18614473550222222222e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 79: + { + double t = 2 * y100 - 159; + return 0.27840427686253660515e0 + + (-0.77908279176252742013e-2 + + (0.15681928798708548349e-4 + + (0.39066226205099807573e-6 + + (-0.59658144820660420814e-8 + + (0.48030086420373141763e-10 - 0.20018995173333333333e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 80: + { + double t = 2 * y100 - 161; + return 0.26288838011163800908e0 + + (-0.77235993576119469018e-2 + + (0.17886516796198660969e-4 + + (0.34482457073472497720e-6 + + (-0.54977066551955420066e-8 + + (0.45572749379147269213e-10 - 0.20852924954666666667e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 81: + { + double t = 2 * y100 - 163; + return 0.24751539954181029717e0 + + (-0.76480877165290370975e-2 + + (0.19827114835033977049e-4 + + (0.30263228619976332110e-6 + + (-0.50545814570120129947e-8 + + (0.43043879374212005966e-10 - 0.21228012028444444444e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 82: + { + double t = 2 * y100 - 165; + return 0.23230087411688914593e0 + + (-0.75653060136384041587e-2 + + (0.21524991113020016415e-4 + + (0.26388338542539382413e-6 + + (-0.46368974069671446622e-8 + + (0.40492715758206515307e-10 - 0.21238627815111111111e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 83: + { + double t = 2 * y100 - 167; + return 0.21725840021297341931e0 + + (-0.74761846305979730439e-2 + + (0.23000194404129495243e-4 + + (0.22837400135642906796e-6 + + (-0.42446743058417541277e-8 + + (0.37958104071765923728e-10 - 0.20963978568888888889e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 84: + { + double t = 2 * y100 - 169; + return 0.20239979200788191491e0 + + (-0.73815761980493466516e-2 + + (0.24271552727631854013e-4 + + (0.19590154043390012843e-6 + + (-0.38775884642456551753e-8 + + (0.35470192372162901168e-10 - 0.20470131678222222222e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 85: + { + double t = 2 * y100 - 171; + return 0.18773523211558098962e0 + + (-0.72822604530339834448e-2 + + (0.25356688567841293697e-4 + + (0.16626710297744290016e-6 + + (-0.35350521468015310830e-8 + + (0.33051896213898864306e-10 - 0.19811844544000000000e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 86: + { + double t = 2 * y100 - 173; + return 0.17327341258479649442e0 + + (-0.71789490089142761950e-2 + + (0.26272046822383820476e-4 + + (0.13927732375657362345e-6 + + (-0.32162794266956859603e-8 + + (0.30720156036105652035e-10 - 0.19034196304000000000e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 87: + { + double t = 2 * y100 - 175; + return 0.15902166648328672043e0 + + (-0.70722899934245504034e-2 + + (0.27032932310132226025e-4 + + (0.11474573347816568279e-6 + + (-0.29203404091754665063e-8 + + (0.28487010262547971859e-10 - 0.18174029063111111111e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 88: + { + double t = 2 * y100 - 177; + return 0.14498609036610283865e0 + + (-0.69628725220045029273e-2 + + (0.27653554229160596221e-4 + + (0.92493727167393036470e-7 + + (-0.26462055548683583849e-8 + + (0.26360506250989943739e-10 - 0.17261211260444444444e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 89: + { + double t = 2 * y100 - 179; + return 0.13117165798208050667e0 + + (-0.68512309830281084723e-2 + + (0.28147075431133863774e-4 + + (0.72351212437979583441e-7 + + (-0.23927816200314358570e-8 + + (0.24345469651209833155e-10 - 0.16319736960000000000e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 90: + { + double t = 2 * y100 - 181; + return 0.11758232561160626306e0 + + (-0.67378491192463392927e-2 + + (0.28525664781722907847e-4 + + (0.54156999310046790024e-7 + + (-0.21589405340123827823e-8 + + (0.22444150951727334619e-10 - 0.15368675584000000000e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 91: + { + double t = 2 * y100 - 183; + return 0.10422112945361673560e0 + + (-0.66231638959845581564e-2 + + (0.28800551216363918088e-4 + + (0.37758983397952149613e-7 + + (-0.19435423557038933431e-8 + + (0.20656766125421362458e-10 - 0.14422990012444444444e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 92: + { + double t = 2 * y100 - 185; + return 0.91090275493541084785e-1 + + (-0.65075691516115160062e-2 + + (0.28982078385527224867e-4 + + (0.23014165807643012781e-7 + + (-0.17454532910249875958e-8 + + (0.18981946442680092373e-10 - 0.13494234691555555556e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 93: + { + double t = 2 * y100 - 187; + return 0.78191222288771379358e-1 + + (-0.63914190297303976434e-2 + + (0.29079759021299682675e-4 + + (0.97885458059415717014e-8 + + (-0.15635596116134296819e-8 + + (0.17417110744051331974e-10 - 0.12591151763555555556e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 94: + { + double t = 2 * y100 - 189; + return 0.65524757106147402224e-1 + + (-0.62750311956082444159e-2 + + (0.29102328354323449795e-4 + + (-0.20430838882727954582e-8 + + (-0.13967781903855367270e-8 + + (0.15958771833747057569e-10 - 0.11720175765333333333e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 95: + { + double t = 2 * y100 - 191; + return 0.53091065838453612773e-1 + + (-0.61586898417077043662e-2 + + (0.29057796072960100710e-4 + + (-0.12597414620517987536e-7 + + (-0.12440642607426861943e-8 + + (0.14602787128447932137e-10 - 0.10885859114666666667e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 96: + { + double t = 2 * y100 - 193; + return 0.40889797115352738582e-1 + + (-0.60426484889413678200e-2 + + (0.28953496450191694606e-4 + + (-0.21982952021823718400e-7 + + (-0.11044169117553026211e-8 + + (0.13344562332430552171e-10 - 0.10091231402844444444e-12 * t) * t) + * t) + * t) + * t) + * t; + } + case 97: + case 98: + case 99: + case 100: + { // use Taylor expansion for small x (|x| <= 0.0309...) + // (2/sqrt(pi)) * (x - 2/3 x^3 + 4/15 x^5 - 8/105 x^7 + 16/945 x^9) + double x2 = x * x; + return x + * (1.1283791670955125739 + - x2 + * (0.75225277806367504925 + - x2 + * (0.30090111122547001970 + - x2 * (0.085971746064420005629 - x2 * 0.016931216931216931217)))); + } + } /* Since 0 <= y100 < 101, this is only reached if x is NaN, in which case we should return NaN. */ return NaN; diff --git a/source/source_base/math_erf_complex.h b/source/source_base/math_erf_complex.h index eb6c48300e8..3b7fbdc26a2 100644 --- a/source/source_base/math_erf_complex.h +++ b/source/source_base/math_erf_complex.h @@ -10,8 +10,8 @@ namespace ModuleBase class ErrorFunc { public: - ErrorFunc(); - ~ErrorFunc(); + ErrorFunc (); + ~ErrorFunc (); /** * @brief A class of the error function of complex arguments based on Faddeeva algorithm. @@ -21,30 +21,32 @@ class ErrorFunc */ // compute w(z) = exp(-z^2) erfc(-iz) [ Faddeeva / scaled complex error func ] - static std::complex scaled_w(std::complex z, double relerr); - static double scaled_w_im(double x); // special-case code for Im[w(x)] of real x + static std::complex scaled_w (std::complex z, double relerr); + static double scaled_w_im (double x); // special-case code for Im[w(x)] of real x // compute erfcx(z) = exp(z^2) erfc(z) - static std::complex erfcx(std::complex z, double relerr = 0); - static double erfcx(double x); // special case for real x + static std::complex erfcx (std::complex z, double relerr = 0); + static double erfcx (double x); // special case for real x // compute erf(z), the error function of complex arguments - static std::complex erf(std::complex z, double relerr = 0); + static std::complex erf (std::complex z, double relerr = 0); // compute erfi(z) = -i erf(iz), the imaginary error function - static std::complex erfi(std::complex z, double relerr = 0); - static double erfi(double x); // special case for real x + static std::complex erfi (std::complex z, double relerr = 0); + static double erfi (double x); // special case for real x // compute erfc(z) = 1 - erf(z), the complementary error function - static std::complex erfc(std::complex z, double relerr = 0); + static std::complex erfc (std::complex z, double relerr = 0); private: - static double w_im_y100(double y100, double x); - static inline double sinc(double x, double sinx) + static double w_im_y100 (double y100, double x); + static inline double + sinc (double x, double sinx) { - return fabs(x) < 1e-4 ? 1 - (0.1666666666666666666667) * x * x : sinx / x; + return fabs (x) < 1e-4 ? 1 - (0.1666666666666666666667) * x * x : sinx / x; } - static inline double copysign(double x, double y) + static inline double + copysign (double x, double y) { return x < 0 != y < 0 ? -x : x; } diff --git a/source/source_base/math_integral.cpp b/source/source_base/math_integral.cpp index 0dd8434bc37..69067510b3b 100644 --- a/source/source_base/math_integral.cpp +++ b/source/source_base/math_integral.cpp @@ -1,5 +1,5 @@ #include "math_integral.h" -#include // use size_t +#include // use size_t #include #include #include @@ -9,10 +9,9 @@ namespace ModuleBase { -Integral::Integral(){} - -Integral::~Integral(){} +Integral::Integral () {} +Integral::~Integral () {} // Peize Lin accelerate 2017-10-02 /* @@ -34,7 +33,7 @@ void Integral::Simpson_Integral // r(i) = a(exp((i-1)*dx)-1) ==> rab(i)=(r(i)+a)*dx // Output in asum = \sum_i c_i f(i)*rab(i) = \int_0^\infty f(r) dr // where c_i are alternativaly 2/3, 4/3 except c_1 = c_mesh = 1/3 - + // simpson's rule integrator for function stored on the // radial logarithmic mesh // routine assumes that mesh is an odd number so run check @@ -60,15 +59,9 @@ void Integral::Simpson_Integral }// end subroutine simpson */ - // Peize Lin accelerate 2017-10-02 -void Integral::Simpson_Integral -( - const int mesh, - const double * const func, - const double * const rab, - double &asum -) +void + Integral::Simpson_Integral (const int mesh, const double* const func, const double* const rab, double& asum) { /* simpson's rule integration. On input: ! mesh = mhe number of grid points (should be odd) @@ -84,32 +77,26 @@ void Integral::Simpson_Integral // simpson's rule integrator for function stored on the // radial logarithmic mesh // routine assumes that mesh is an odd number so run check - assert(mesh&1); + assert (mesh & 1); asum = 0.00; - const size_t end = mesh-2; - for( size_t i=1; i!=end; i+=2 ) - { - const double f1 = func[i]*rab[i]; - asum += f1 + f1 + func[i+1]*rab[i+1]; - } - const double f1 = func[mesh-2]*rab[mesh-2]; - asum += f1+f1; - asum += asum; - asum += func[0]*rab[0] + func[mesh-1]*rab[mesh-1]; - asum /= 3.0; + const size_t end = mesh - 2; + for (size_t i = 1; i != end; i += 2) + { + const double f1 = func[i] * rab[i]; + asum += f1 + f1 + func[i + 1] * rab[i + 1]; + } + const double f1 = func[mesh - 2] * rab[mesh - 2]; + asum += f1 + f1; + asum += asum; + asum += func[0] * rab[0] + func[mesh - 1] * rab[mesh - 1]; + asum /= 3.0; return; -}// end subroutine simpson - +} // end subroutine simpson // Peize Lin accelerate 2017-10-02 -void Integral::Simpson_Integral -( - const int mesh, - const double * const func, - const double dr, - double &asum -) +void + Integral::Simpson_Integral (const int mesh, const double* const func, const double dr, double& asum) { /* simpson's rule integration. On input: ! mesh = mhe number of grid points (should be odd) @@ -125,53 +112,49 @@ void Integral::Simpson_Integral // simpson's rule integrator for function stored on the // radial logarithmic mesh // routine assumes that mesh is an odd number so run check - assert(mesh&1); + assert (mesh & 1); asum = 0.00; - const size_t end = mesh-2; - for(size_t i=1; i!=end; i+=2 ) - { - const double f1 = func[i]; - asum += f1 + f1 + func[i+1]; - } - const double f1 = func[mesh-2]; - asum += f1+f1; - asum += asum; - asum += func[0] + func[mesh-1]; - asum *= dr/3.0; + const size_t end = mesh - 2; + for (size_t i = 1; i != end; i += 2) + { + const double f1 = func[i]; + asum += f1 + f1 + func[i + 1]; + } + const double f1 = func[mesh - 2]; + asum += f1 + f1; + asum += asum; + asum += func[0] + func[mesh - 1]; + asum *= dr / 3.0; return; -}// end subroutine simpson - +} // end subroutine simpson // Peize Lin add 2016-02-14 -void Integral::Simpson_Integral_0toall -( - const int mesh, - const double * const func, - const double * const rab, - double * const asum -) +void + Integral::Simpson_Integral_0toall (const int mesh, + const double* const func, + const double* const rab, + double* const asum) { - // asum(r) = \int_{r'=0}^{r} dr' f(r') + // asum(r) = \int_{r'=0}^{r} dr' f(r') - const double r2=1.00/2.00, r3=1.00/3.00; + const double r2 = 1.00 / 2.00, r3 = 1.00 / 3.00; asum[0] = 0.00; - double f3 = func [0] * rab [0]; - for( int i=1; i0; i-=2) { - const double f3 = f1; + const double f3 = f1; if( i+3==mesh ) { const double f4 = func[mesh-1] * rab[mesh-1]; @@ -205,99 +188,100 @@ void Integral::Simpson_Integral_0toall return; }*/ - // Peize Lin add 2016-06-11 // a little lower -void Integral::Simpson_Integral_alltoinf -( - const int mesh, - const double * const func, - const double * const rab, - double * const asum -) +void + Integral::Simpson_Integral_alltoinf (const int mesh, + const double* const func, + const double* const rab, + double* const asum) { - Integral::Simpson_Integral_0toall( mesh, func, rab, asum ); - - const double asum_all = asum[mesh-1]; - for (int i = 0;i < mesh; ++i) - { - asum[i] = asum_all - asum[i]; - } - return; + Integral::Simpson_Integral_0toall (mesh, func, rab, asum); + + const double asum_all = asum[mesh - 1]; + for (int i = 0; i < mesh; ++i) + { + asum[i] = asum_all - asum[i]; + } + return; } -double Integral::simpson(const int n, const double* const f, const double dx) +double + Integral::simpson (const int n, const double* const f, const double dx) { - assert(n >= 2); + assert (n >= 2); if (n == 4) - { // Simpson's 3/8 rule - return 3.0 * dx / 8 * (f[0] + 3.0 * f[1] + 3.0 * f[2] + f[3]); - } + { // Simpson's 3/8 rule + return 3.0 * dx / 8 * (f[0] + 3.0 * f[1] + 3.0 * f[2] + f[3]); + } if (n == 2) - { - return 0.5 * dx * (f[0] + f[1]); - } + { + return 0.5 * dx * (f[0] + f[1]); + } if (n % 2 == 1) - { // composite Simpson's 1/3 rule - double sum = 0.0; - for (int i = 1; i != n-2; i += 2) - { - sum += 2.0 * f[i] + f[i+1]; + { // composite Simpson's 1/3 rule + double sum = 0.0; + for (int i = 1; i != n - 2; i += 2) + { + sum += 2.0 * f[i] + f[i + 1]; + } + sum += 2.0 * f[n - 2]; + sum *= 2.0; + sum += f[0] + f[n - 1]; + return sum * dx / 3.0; } - sum += 2.0 * f[n-2]; - sum *= 2.0; - sum += f[0] + f[n-1]; - return sum * dx / 3.0; - } else - { // composite Simpson's 1/3 rule for the first n-4 intervals plus Simpson's 3/8 rule for the last 3 intervals - return simpson(n-3, f, dx) + simpson(4, &f[n-4], dx); - } + { // composite Simpson's 1/3 rule for the first n-4 intervals plus Simpson's 3/8 rule for the last 3 intervals + return simpson (n - 3, f, dx) + simpson (4, &f[n - 4], dx); + } } -double Integral::simpson(const int n, const double* const f, const double* const h) -{ +double + Integral::simpson (const int n, const double* const f, const double* const h) +{ // Simpson's rule for irregularly-spaced grid // The treatment for even number of grid points is the same as that of the regularly-spaced grid case. - assert( n >= 2 ); - assert( std::all_of(h, h+(n-1), [](double x){return x > 0.0;}) ); + assert (n >= 2); + assert (std::all_of (h, h + (n - 1), [] (double x) { return x > 0.0; })); if (n == 4) - { - double w = h[0] + h[1] + h[2]; - return w / 12.0 * ( 2.0 + ((h[1]+h[2])/h[0]-1.0) * (h[2]/(h[0]+h[1])-1.0) ) * f[0] - + std::pow(w,3) / 12.0 * (h[0]+h[1]-h[2]) / (h[0]*h[1]*(h[1]+h[2])) * f[1] - + std::pow(w,3) / 12.0 * (h[2]+h[1]-h[0]) / (h[2]*h[1]*(h[1]+h[0])) * f[2] - + w / 12.0 * ( 2.0 + ((h[1]+h[0])/h[2]-1.0) * (h[0]/(h[2]+h[1])-1.0) ) * f[3]; - } + { + double w = h[0] + h[1] + h[2]; + return w / 12.0 * (2.0 + ((h[1] + h[2]) / h[0] - 1.0) * (h[2] / (h[0] + h[1]) - 1.0)) * f[0] + + std::pow (w, 3) / 12.0 * (h[0] + h[1] - h[2]) / (h[0] * h[1] * (h[1] + h[2])) * f[1] + + std::pow (w, 3) / 12.0 * (h[2] + h[1] - h[0]) / (h[2] * h[1] * (h[1] + h[0])) * f[2] + + w / 12.0 * (2.0 + ((h[1] + h[0]) / h[2] - 1.0) * (h[0] / (h[2] + h[1]) - 1.0)) * f[3]; + } if (n == 2) - { - return 0.5 * h[0] * (f[0] + f[1]); - } + { + return 0.5 * h[0] * (f[0] + f[1]); + } if (n % 2 == 1) - { - double sum = 0.0; - for (int i = 0; i < n/2; ++i) { - double hrp = h[2*i+1] / h[2*i]; - double hrm = h[2*i] / h[2*i+1]; - sum += (h[2*i+1] + h[2*i]) / 6.0 * ( (2.0-hrp)*f[2*i] + (2.0+hrp+hrm)*f[2*i+1] + (2.0-hrm) * f[2*i+2]); + double sum = 0.0; + for (int i = 0; i < n / 2; ++i) + { + double hrp = h[2 * i + 1] / h[2 * i]; + double hrm = h[2 * i] / h[2 * i + 1]; + sum += (h[2 * i + 1] + h[2 * i]) / 6.0 + * ((2.0 - hrp) * f[2 * i] + (2.0 + hrp + hrm) * f[2 * i + 1] + (2.0 - hrm) * f[2 * i + 2]); + } + return sum; } - return sum; - } else - { - return simpson(n-3, f, h) + simpson(4, &f[n-4], &h[n-4]); - } + { + return simpson (n - 3, f, h) + simpson (4, &f[n - 4], &h[n - 4]); + } } -void Integral::Gauss_Legendre_grid_and_weight(const int n, double *x, double *weights) +void + Integral::Gauss_Legendre_grid_and_weight (const int n, double* x, double* weights) { - assert( n >= 1 ); + assert (n >= 1); double z = 0.0; double z1 = 0.0; @@ -306,41 +290,48 @@ void Integral::Gauss_Legendre_grid_and_weight(const int n, double *x, double *we double p3 = 0.0; double pp = 0.0; - int half_grid_num = static_cast((n+1)/2); - for(int i = 1; i <= half_grid_num; i++) - { - z = cos(ModuleBase::PI * (i - 0.25) / (n + 0.5)); - - while(true) + int half_grid_num = static_cast ((n + 1) / 2); + for (int i = 1; i <= half_grid_num; i++) { - p1 = 1.0; - p2 = 0.0; - - for(int j = 1; j <= n; j++) - { - p3 = p2; - p2 = p1; - p1 = ((2.0 * j - 1.0) * z * p2 - (j - 1.0) * p3) / j; - } - - pp = n * (p2 - z * p1) / (1.0 - z*z); - z1 = z; - z = z1 - p1 / pp; - - if (std::abs(z - z1) < 1e-13) break; + z = cos (ModuleBase::PI * (i - 0.25) / (n + 0.5)); + + while (true) + { + p1 = 1.0; + p2 = 0.0; + + for (int j = 1; j <= n; j++) + { + p3 = p2; + p2 = p1; + p1 = ((2.0 * j - 1.0) * z * p2 - (j - 1.0) * p3) / j; + } + + pp = n * (p2 - z * p1) / (1.0 - z * z); + z1 = z; + z = z1 - p1 / pp; + + if (std::abs (z - z1) < 1e-13) + { + break; + } + } + + x[i - 1] = -z; + x[n - i] = z; + weights[i - 1] = 2.0 / ((1.0 - z * z) * pp * pp); + weights[n - i] = weights[i - 1]; } - - x[i-1] = -z; - x[n-i] = z; - weights[i-1] = 2.0 / ((1.0 - z * z) * pp * pp); - weights[n-i] = weights[i-1]; - } - } -void Integral::Gauss_Legendre_grid_and_weight(const double xmin, const double xmax, const int n, double *x, double *weights) +void + Integral::Gauss_Legendre_grid_and_weight (const double xmin, + const double xmax, + const int n, + double* x, + double* weights) { - assert( n >= 1 ); + assert (n >= 1); double xl = (xmax - xmin) * 0.5; double xmean = (xmax + xmin) * 0.5; @@ -352,103 +343,127 @@ void Integral::Gauss_Legendre_grid_and_weight(const double xmin, const double xm double p3 = 0.0; double pp = 0.0; - int half_grid_num = static_cast((n+1)/2); - for(int i = 1; i <= half_grid_num; i++) - { - z = cos(ModuleBase::PI * (i - 0.25) / (n + 0.5)); - - while(true) + int half_grid_num = static_cast ((n + 1) / 2); + for (int i = 1; i <= half_grid_num; i++) { - p1 = 1.0; - p2 = 0.0; - - for(int j = 1; j <= n; j++) - { - p3 = p2; - p2 = p1; - p1 = ((2.0 * j - 1.0) * z * p2 - (j - 1.0) * p3) / j; - } - - pp = n * (p2 - z * p1) / (1.0 - z*z); - z1 = z; - z = z1 - p1 / pp; - - if (std::abs(z - z1) < 1e-13) break; + z = cos (ModuleBase::PI * (i - 0.25) / (n + 0.5)); + + while (true) + { + p1 = 1.0; + p2 = 0.0; + + for (int j = 1; j <= n; j++) + { + p3 = p2; + p2 = p1; + p1 = ((2.0 * j - 1.0) * z * p2 - (j - 1.0) * p3) / j; + } + + pp = n * (p2 - z * p1) / (1.0 - z * z); + z1 = z; + z = z1 - p1 / pp; + + if (std::abs (z - z1) < 1e-13) + { + break; + } + } + + x[i - 1] = xmean - xl * z; + x[n - i] = xmean + xl * z; + weights[i - 1] = 2.0 * xl / ((1.0 - z * z) * pp * pp); + weights[n - i] = weights[i - 1]; } - - x[i-1] = xmean - xl * z; - x[n-i] = xmean + xl * z; - weights[i-1] = 2.0 * xl / ((1.0 - z * z) * pp * pp); - weights[n-i] = weights[i-1]; - } - } const double Integral::Lebedev_Laikov_grid110_x[110] = { - 1.000000000000000, -1.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.577350269189626, -0.577350269189626, - 0.577350269189626, -0.577350269189626, 0.577350269189626, -0.577350269189626, 0.577350269189626, -0.577350269189626, 0.185115635344736, -0.185115635344736, - 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, - 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, 0.965124035086594, -0.965124035086594, - 0.965124035086594, -0.965124035086594, 0.965124035086594, -0.965124035086594, 0.965124035086594, -0.965124035086594, 0.690421048382292, -0.690421048382292, - 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, - 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, 0.215957291845848, -0.215957291845848, - 0.215957291845848, -0.215957291845848, 0.215957291845848, -0.215957291845848, 0.215957291845848, -0.215957291845848, 0.395689473055942, -0.395689473055942, - 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, - 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, 0.828769981252592, -0.828769981252592, - 0.828769981252592, -0.828769981252592, 0.828769981252592, -0.828769981252592, 0.828769981252592, -0.828769981252592, 0.478369028812150, -0.478369028812150, - 0.478369028812150, -0.478369028812150, 0.878158910604066, -0.878158910604066, 0.878158910604066, -0.878158910604066, 0.478369028812150, -0.478369028812150, - 0.478369028812150, -0.478369028812150, 0.878158910604066, -0.878158910604066, 0.878158910604066, -0.878158910604066, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000 -}; - -const double Integral::Lebedev_Laikov_grid110_y[110] = { - 0.000000000000000, 0.000000000000000, 1.000000000000000, -1.000000000000000, 0.000000000000000, 0.000000000000000, 0.577350269189626, 0.577350269189626, - -0.577350269189626, -0.577350269189626, 0.577350269189626, 0.577350269189626, -0.577350269189626, -0.577350269189626, 0.185115635344736, 0.185115635344736, - -0.185115635344736, -0.185115635344736, 0.185115635344736, 0.185115635344736, -0.185115635344736, -0.185115635344736, 0.965124035086594, 0.965124035086594, - -0.965124035086594, -0.965124035086594, 0.965124035086594, 0.965124035086594, -0.965124035086594, -0.965124035086594, 0.185115635344736, 0.185115635344736, - -0.185115635344736, -0.185115635344736, 0.185115635344736, 0.185115635344736, -0.185115635344736, -0.185115635344736, 0.690421048382292, 0.690421048382292, - -0.690421048382292, -0.690421048382292, 0.690421048382292, 0.690421048382292, -0.690421048382292, -0.690421048382292, 0.215957291845848, 0.215957291845848, - -0.215957291845848, -0.215957291845848, 0.215957291845848, 0.215957291845848, -0.215957291845848, -0.215957291845848, 0.690421048382292, 0.690421048382292, - -0.690421048382292, -0.690421048382292, 0.690421048382292, 0.690421048382292, -0.690421048382292, -0.690421048382292, 0.395689473055942, 0.395689473055942, - -0.395689473055942, -0.395689473055942, 0.395689473055942, 0.395689473055942, -0.395689473055942, -0.395689473055942, 0.828769981252592, 0.828769981252592, - -0.828769981252592, -0.828769981252592, 0.828769981252592, 0.828769981252592, -0.828769981252592, -0.828769981252592, 0.395689473055942, 0.395689473055942, - -0.395689473055942, -0.395689473055942, 0.395689473055942, 0.395689473055942, -0.395689473055942, -0.395689473055942, 0.878158910604066, 0.878158910604066, - -0.878158910604066, -0.878158910604066, 0.478369028812150, 0.478369028812150, -0.478369028812150, -0.478369028812150, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.478369028812150, -0.478369028812150, - 0.478369028812150, -0.478369028812150, 0.878158910604066, -0.878158910604066, 0.878158910604066, -0.878158910604066 -}; - -const double Integral::Lebedev_Laikov_grid110_z[110] = { - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 1.000000000000000, -1.000000000000000, 0.577350269189626, 0.577350269189626, - 0.577350269189626, 0.577350269189626, -0.577350269189626, -0.577350269189626, -0.577350269189626, -0.577350269189626, 0.965124035086594, 0.965124035086594, - 0.965124035086594, 0.965124035086594, -0.965124035086594, -0.965124035086594, -0.965124035086594, -0.965124035086594, 0.185115635344736, 0.185115635344736, - 0.185115635344736, 0.185115635344736, -0.185115635344736, -0.185115635344736, -0.185115635344736, -0.185115635344736, 0.185115635344736, 0.185115635344736, - 0.185115635344736, 0.185115635344736, -0.185115635344736, -0.185115635344736, -0.185115635344736, -0.185115635344736, 0.215957291845848, 0.215957291845848, - 0.215957291845848, 0.215957291845848, -0.215957291845848, -0.215957291845848, -0.215957291845848, -0.215957291845848, 0.690421048382292, 0.690421048382292, - 0.690421048382292, 0.690421048382292, -0.690421048382292, -0.690421048382292, -0.690421048382292, -0.690421048382292, 0.690421048382292, 0.690421048382292, - 0.690421048382292, 0.690421048382292, -0.690421048382292, -0.690421048382292, -0.690421048382292, -0.690421048382292, 0.828769981252592, 0.828769981252592, - 0.828769981252592, 0.828769981252592, -0.828769981252592, -0.828769981252592, -0.828769981252592, -0.828769981252592, 0.395689473055942, 0.395689473055942, - 0.395689473055942, 0.395689473055942, -0.395689473055942, -0.395689473055942, -0.395689473055942, -0.395689473055942, 0.395689473055942, 0.395689473055942, - 0.395689473055942, 0.395689473055942, -0.395689473055942, -0.395689473055942, -0.395689473055942, -0.395689473055942, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.878158910604066, 0.878158910604066, - -0.878158910604066, -0.878158910604066, 0.478369028812150, 0.478369028812150, -0.478369028812150, -0.478369028812150, 0.878158910604066, 0.878158910604066, - -0.878158910604066, -0.878158910604066, 0.478369028812150, 0.478369028812150, -0.478369028812150, -0.478369028812150 -}; - -const double Integral::Lebedev_Laikov_grid110_w[110] = { - 0.048107465851397, 0.048107465851397, 0.048107465851397, 0.048107465851397, 0.048107465851397, 0.048107465851397, 0.123071735281670, 0.123071735281670, - 0.123071735281670, 0.123071735281670, 0.123071735281670, 0.123071735281670, 0.123071735281670, 0.123071735281670, 0.103191734088330, 0.103191734088330, - 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, - 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, - 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.124945096872513, 0.124945096872513, - 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, - 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, - 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.120580249028528, 0.120580249028528, - 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, - 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, - 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.121830917385521, 0.121830917385521, - 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, - 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, - 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521 -}; -} + 1.000000000000000, -1.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, + 0.577350269189626, -0.577350269189626, 0.577350269189626, -0.577350269189626, 0.577350269189626, -0.577350269189626, + 0.577350269189626, -0.577350269189626, 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, + 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, + 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, 0.185115635344736, -0.185115635344736, + 0.965124035086594, -0.965124035086594, 0.965124035086594, -0.965124035086594, 0.965124035086594, -0.965124035086594, + 0.965124035086594, -0.965124035086594, 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, + 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, + 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, 0.690421048382292, -0.690421048382292, + 0.215957291845848, -0.215957291845848, 0.215957291845848, -0.215957291845848, 0.215957291845848, -0.215957291845848, + 0.215957291845848, -0.215957291845848, 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, + 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, + 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, 0.395689473055942, -0.395689473055942, + 0.828769981252592, -0.828769981252592, 0.828769981252592, -0.828769981252592, 0.828769981252592, -0.828769981252592, + 0.828769981252592, -0.828769981252592, 0.478369028812150, -0.478369028812150, 0.478369028812150, -0.478369028812150, + 0.878158910604066, -0.878158910604066, 0.878158910604066, -0.878158910604066, 0.478369028812150, -0.478369028812150, + 0.478369028812150, -0.478369028812150, 0.878158910604066, -0.878158910604066, 0.878158910604066, -0.878158910604066, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, + 0.000000000000000, 0.000000000000000}; + +const double Integral::Lebedev_Laikov_grid110_y[110] + = {0.000000000000000, 0.000000000000000, 1.000000000000000, -1.000000000000000, 0.000000000000000, + 0.000000000000000, 0.577350269189626, 0.577350269189626, -0.577350269189626, -0.577350269189626, + 0.577350269189626, 0.577350269189626, -0.577350269189626, -0.577350269189626, 0.185115635344736, + 0.185115635344736, -0.185115635344736, -0.185115635344736, 0.185115635344736, 0.185115635344736, + -0.185115635344736, -0.185115635344736, 0.965124035086594, 0.965124035086594, -0.965124035086594, + -0.965124035086594, 0.965124035086594, 0.965124035086594, -0.965124035086594, -0.965124035086594, + 0.185115635344736, 0.185115635344736, -0.185115635344736, -0.185115635344736, 0.185115635344736, + 0.185115635344736, -0.185115635344736, -0.185115635344736, 0.690421048382292, 0.690421048382292, + -0.690421048382292, -0.690421048382292, 0.690421048382292, 0.690421048382292, -0.690421048382292, + -0.690421048382292, 0.215957291845848, 0.215957291845848, -0.215957291845848, -0.215957291845848, + 0.215957291845848, 0.215957291845848, -0.215957291845848, -0.215957291845848, 0.690421048382292, + 0.690421048382292, -0.690421048382292, -0.690421048382292, 0.690421048382292, 0.690421048382292, + -0.690421048382292, -0.690421048382292, 0.395689473055942, 0.395689473055942, -0.395689473055942, + -0.395689473055942, 0.395689473055942, 0.395689473055942, -0.395689473055942, -0.395689473055942, + 0.828769981252592, 0.828769981252592, -0.828769981252592, -0.828769981252592, 0.828769981252592, + 0.828769981252592, -0.828769981252592, -0.828769981252592, 0.395689473055942, 0.395689473055942, + -0.395689473055942, -0.395689473055942, 0.395689473055942, 0.395689473055942, -0.395689473055942, + -0.395689473055942, 0.878158910604066, 0.878158910604066, -0.878158910604066, -0.878158910604066, + 0.478369028812150, 0.478369028812150, -0.478369028812150, -0.478369028812150, 0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.478369028812150, -0.478369028812150, 0.478369028812150, + -0.478369028812150, 0.878158910604066, -0.878158910604066, 0.878158910604066, -0.878158910604066}; + +const double Integral::Lebedev_Laikov_grid110_z[110] + = {0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 1.000000000000000, + -1.000000000000000, 0.577350269189626, 0.577350269189626, 0.577350269189626, 0.577350269189626, + -0.577350269189626, -0.577350269189626, -0.577350269189626, -0.577350269189626, 0.965124035086594, + 0.965124035086594, 0.965124035086594, 0.965124035086594, -0.965124035086594, -0.965124035086594, + -0.965124035086594, -0.965124035086594, 0.185115635344736, 0.185115635344736, 0.185115635344736, + 0.185115635344736, -0.185115635344736, -0.185115635344736, -0.185115635344736, -0.185115635344736, + 0.185115635344736, 0.185115635344736, 0.185115635344736, 0.185115635344736, -0.185115635344736, + -0.185115635344736, -0.185115635344736, -0.185115635344736, 0.215957291845848, 0.215957291845848, + 0.215957291845848, 0.215957291845848, -0.215957291845848, -0.215957291845848, -0.215957291845848, + -0.215957291845848, 0.690421048382292, 0.690421048382292, 0.690421048382292, 0.690421048382292, + -0.690421048382292, -0.690421048382292, -0.690421048382292, -0.690421048382292, 0.690421048382292, + 0.690421048382292, 0.690421048382292, 0.690421048382292, -0.690421048382292, -0.690421048382292, + -0.690421048382292, -0.690421048382292, 0.828769981252592, 0.828769981252592, 0.828769981252592, + 0.828769981252592, -0.828769981252592, -0.828769981252592, -0.828769981252592, -0.828769981252592, + 0.395689473055942, 0.395689473055942, 0.395689473055942, 0.395689473055942, -0.395689473055942, + -0.395689473055942, -0.395689473055942, -0.395689473055942, 0.395689473055942, 0.395689473055942, + 0.395689473055942, 0.395689473055942, -0.395689473055942, -0.395689473055942, -0.395689473055942, + -0.395689473055942, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.878158910604066, + 0.878158910604066, -0.878158910604066, -0.878158910604066, 0.478369028812150, 0.478369028812150, + -0.478369028812150, -0.478369028812150, 0.878158910604066, 0.878158910604066, -0.878158910604066, + -0.878158910604066, 0.478369028812150, 0.478369028812150, -0.478369028812150, -0.478369028812150}; + +const double Integral::Lebedev_Laikov_grid110_w[110] + = {0.048107465851397, 0.048107465851397, 0.048107465851397, 0.048107465851397, 0.048107465851397, 0.048107465851397, + 0.123071735281670, 0.123071735281670, 0.123071735281670, 0.123071735281670, 0.123071735281670, 0.123071735281670, + 0.123071735281670, 0.123071735281670, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, + 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, + 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, + 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, 0.103191734088330, + 0.103191734088330, 0.103191734088330, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, + 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, + 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, + 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, 0.124945096872513, + 0.124945096872513, 0.124945096872513, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, + 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, + 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, + 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, 0.120580249028528, + 0.120580249028528, 0.120580249028528, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, + 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, + 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, + 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, 0.121830917385521, + 0.121830917385521, 0.121830917385521}; +} // namespace ModuleBase diff --git a/source/source_base/math_integral.h b/source/source_base/math_integral.h index 4e32889e5f0..2bb8c20f9fc 100644 --- a/source/source_base/math_integral.h +++ b/source/source_base/math_integral.h @@ -8,14 +8,13 @@ namespace ModuleBase class Integral { - public: - - Integral(); - ~Integral(); + public: + Integral (); + ~Integral (); /** * @brief simpson integral. - * + * * @param mesh [in] number of grid points (should be odd) * @param func [in] function to be integrated * @param rab [in] a list of interval @@ -23,17 +22,11 @@ class Integral * @author Peize Lin * @date 2017-10-02 */ - static void Simpson_Integral - ( - const int mesh, - const double * const func, - const double * const rab, - double &asum - ); + static void Simpson_Integral (const int mesh, const double* const func, const double* const rab, double& asum); /** - * @brief simpson integral. - * + * @brief simpson integral. + * * @param mesh [in] number of grid points (should be odd) * @param func [in] function to be integrated * @param dr [in] interval @@ -41,84 +34,74 @@ class Integral * @author Peize Lin * @date 2017-10-02 */ - static void Simpson_Integral - ( - const int mesh, - const double * const func, - const double dr, - double &asum - ); + static void Simpson_Integral (const int mesh, const double* const func, const double dr, double& asum); /** - * @brief simpson integral. - * - * - * @param mesh [in] number of grid points + * @brief simpson integral. + * + * + * @param mesh [in] number of grid points * @param func [in] function to be integrated * @param rab [in] a list of interval - * @param asum [out] a list of integral value. asum[i] = integral from 0 to i. The max index of asum is an even (mesh-1 or mesh). + * @param asum [out] a list of integral value. asum[i] = integral from 0 to i. The max index of asum is an even + * (mesh-1 or mesh). * @author Peize Lin * @date 2016-02-14 */ - static void Simpson_Integral_0toall - ( - const int mesh, - const double * const func, - const double * const rab, - double * const asum - ); + static void + Simpson_Integral_0toall (const int mesh, const double* const func, const double* const rab, double* const asum); /** - * @brief simpson integral. - * - * @param mesh [in] number of grid points - * @param func [in] function to be integrated - * @param rab [in] r(i) * dr(i)/di * di or (b-a)/2n - * @param asum [out] a list of integral value. sum[i] = integral from i to mesh-1. + * @brief simpson integral. + * + * @param mesh [in] number of grid points + * @param func [in] function to be integrated + * @param rab [in] r(i) * dr(i)/di * di or (b-a)/2n + * @param asum [out] a list of integral value. sum[i] = integral from i to mesh-1. * @author Peize Lin * @date 2016-02-14 */ - static void Simpson_Integral_alltoinf - ( - const int mesh, - const double * const func, - const double * const rab, - double * const asum - ); + static void Simpson_Integral_alltoinf (const int mesh, + const double* const func, + const double* const rab, + double* const asum); //! Numerical integration on an evenly-spaced grid using Simpson's rule - static double simpson(const int n, //!< number of grid points - const double* const f, //!< function values at grid points - const double dx //!< grid spacing + static double simpson (const int n, //!< number of grid points + const double* const f, //!< function values at grid points + const double dx //!< grid spacing ); //! Numerical integration on an irregularly-spaced grid using Simpson's rule - static double simpson(const int n, //!< number of grid points - const double* const f, //!< function values at grid points - const double* const h //!< grid spacing of length n-1, must be positive + static double simpson (const int n, //!< number of grid points + const double* const f, //!< function values at grid points + const double* const h //!< grid spacing of length n-1, must be positive ); - // Grid points and weights used to generate Gauss_Legendre integrals. Returns x in [-1, 1], the integration area is -1, 1. - // https://en.wikipedia.org/wiki/Gauss%E2%80%93Legendre_quadrature + // Grid points and weights used to generate Gauss_Legendre integrals. Returns x in [-1, 1], the integration area is + // -1, 1. https://en.wikipedia.org/wiki/Gauss%E2%80%93Legendre_quadrature // \int_{-1}^{1} f(x) dx \approx \sum_{i = 1}^{n} w_{i} f(x_{i}) // n is the number of sample points used, // w_{i} are quadrature weights // x_{i} are the roots of the n-th Legendre polynomial. - static void Gauss_Legendre_grid_and_weight(const int n, double *x, double *weights); + static void Gauss_Legendre_grid_and_weight (const int n, double* x, double* weights); - // Grid points and weights used to generate Gauss_Legendre integrals. Returns x in [xmin, xmax], the integration area is at xmin, xmax. + // Grid points and weights used to generate Gauss_Legendre integrals. Returns x in [xmin, xmax], the integration + // area is at xmin, xmax. // \int_{-1}^{1} f(x) dx \approx \sum_{i = 1}^{n} w_{i} f(x_{i}) // xl = (xmax - xmin) / 2 // xmean = (xmax + xmin) / 2 // \int_{xmin}^{xmax} f(x) dx = xl \int_{-1}^{1} f(xl * t + xmean) dt - static void Gauss_Legendre_grid_and_weight(const double xmin, const double xmax, const int n, double *x, double *weights); + static void + Gauss_Legendre_grid_and_weight (const double xmin, const double xmax, const int n, double* x, double* weights); - // Grid points and weights used to generate Gauss_Legendre integrals. can be generated with function Lebedev_laikov_grid in math_lebedev_laikov.cpp + // Grid points and weights used to generate Gauss_Legendre integrals. can be generated with function + // Lebedev_laikov_grid in math_lebedev_laikov.cpp static const double Lebedev_Laikov_grid110_x[110]; static const double Lebedev_Laikov_grid110_y[110]; static const double Lebedev_Laikov_grid110_z[110]; static const double Lebedev_Laikov_grid110_w[110]; }; -} +} // namespace ModuleBase #endif diff --git a/source/source_base/math_lebedev_laikov.cpp b/source/source_base/math_lebedev_laikov.cpp index 52f71dffdfc..2739bc7dccf 100644 --- a/source/source_base/math_lebedev_laikov.cpp +++ b/source/source_base/math_lebedev_laikov.cpp @@ -8,54 +8,56 @@ namespace ModuleBase { -Lebedev_laikov_grid::Lebedev_laikov_grid(int degree) +Lebedev_laikov_grid::Lebedev_laikov_grid (int degree) { - auto it = allowed_degree.find(degree); - if (it == allowed_degree.end()) - { - std::cerr << "In the Lebedev_laikov_grid class, the degree = " << degree << " is not within the allowed range." - << std::endl; - assert(false); - } + auto it = allowed_degree.find (degree); + if (it == allowed_degree.end ()) + { + std::cerr << "In the Lebedev_laikov_grid class, the degree = " << degree + << " is not within the allowed range." << std::endl; + assert (false); + } this->degree = degree; grid_coor = new ModuleBase::Vector3[degree]; weight = new double[degree]; } -Lebedev_laikov_grid::~Lebedev_laikov_grid() +Lebedev_laikov_grid::~Lebedev_laikov_grid () { if (grid_coor) - { - delete[] grid_coor; - grid_coor = nullptr; - } + { + delete[] grid_coor; + grid_coor = nullptr; + } if (weight) - { - delete[] weight; - weight = nullptr; - } + { + delete[] weight; + weight = nullptr; + } } -void Lebedev_laikov_grid::print_grid_and_weight(std::string filename) +void + Lebedev_laikov_grid::print_grid_and_weight (std::string filename) { std::stringstream ss; ss << filename << "_degree" << degree; - std::ofstream ofs(ss.str().c_str()); + std::ofstream ofs (ss.str ().c_str ()); ofs << "# grid: x y z weight" << std::endl; for (int i = 0; i < degree; i++) - { - ofs << std::setw(20) << std::setprecision(15) << std::fixed << grid_coor[i].x; - ofs << std::setw(20) << std::setprecision(15) << std::fixed << grid_coor[i].y; - ofs << std::setw(20) << std::setprecision(15) << std::fixed << grid_coor[i].z; - ofs << std::setw(20) << std::setprecision(15) << std::fixed << weight[i] << std::endl; - } - - ofs.close(); + { + ofs << std::setw (20) << std::setprecision (15) << std::fixed << grid_coor[i].x; + ofs << std::setw (20) << std::setprecision (15) << std::fixed << grid_coor[i].y; + ofs << std::setw (20) << std::setprecision (15) << std::fixed << grid_coor[i].z; + ofs << std::setw (20) << std::setprecision (15) << std::fixed << weight[i] << std::endl; + } + + ofs.close (); } -void Lebedev_laikov_grid::generate_grid_points() +void + Lebedev_laikov_grid::generate_grid_points () { int start = 0; double a = 0.0; @@ -64,5336 +66,5337 @@ void Lebedev_laikov_grid::generate_grid_points() double v = 0.0; switch (degree) - { - case 6: - - v = 0.1666666666666667E+0; - start = getLebedevReccurencePoints(1, start, a, b, v); - - break; - - case 14: - - v = 0.6666666666666667E-1; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.7500000000000000E-1; - start = getLebedevReccurencePoints(3, start, a, b, v); - - break; - - case 26: - - v = 0.4761904761904762E-1; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.3809523809523810E-1; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.3214285714285714E-1; - start = getLebedevReccurencePoints(3, start, a, b, v); - - break; - - case 38: - - v = 0.9523809523809524E-2; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.3214285714285714E-1; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.4597008433809831E+0; - v = 0.2857142857142857E-1; - start = getLebedevReccurencePoints(5, start, a, b, v); - - break; - - case 50: - - v = 0.1269841269841270E-1; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.2257495590828924E-1; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.2109375000000000E-1; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.3015113445777636E+0; - v = 0.2017333553791887E-1; - start = getLebedevReccurencePoints(4, start, a, b, v); - - break; - - case 74: - - v = 0.5130671797338464E-3; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.1660406956574204E-1; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = -0.2958603896103896E-1; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.4803844614152614E+0; - v = 0.2657620708215946E-1; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3207726489807764E+0; - v = 0.1652217099371571E-1; - start = getLebedevReccurencePoints(5, start, a, b, v); - - break; - - case 86: - - v = 0.1154401154401154E-1; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.1194390908585628E-1; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.3696028464541502E+0; - v = 0.1111055571060340E-1; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6943540066026664E+0; - v = 0.1187650129453714E-1; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3742430390903412E+0; - v = 0.1181230374690448E-1; - start = getLebedevReccurencePoints(5, start, a, b, v); - - break; - - case 110: - - v = 0.3828270494937162E-2; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.9793737512487512E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.1851156353447362E+0; - v = 0.8211737283191111E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6904210483822922E+0; - v = 0.9942814891178103E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3956894730559419E+0; - v = 0.9595471336070963E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4783690288121502E+0; - v = 0.9694996361663028E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - - break; - - case 146: - - v = 0.5996313688621381E-3; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.7372999718620756E-2; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.7210515360144488E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.6764410400114264E+0; - v = 0.7116355493117555E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4174961227965453E+0; - v = 0.6753829486314477E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1574676672039082E+0; - v = 0.7574394159054034E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1403553811713183E+0; - b = 0.4493328323269557E+0; - v = 0.6991087353303262E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 170: - - v = 0.5544842902037365E-2; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.6071332770670752E-2; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.6383674773515093E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.2551252621114134E+0; - v = 0.5183387587747790E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6743601460362766E+0; - v = 0.6317929009813725E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4318910696719410E+0; - v = 0.6201670006589077E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2613931360335988E+0; - v = 0.5477143385137348E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4990453161796037E+0; - b = 0.1446630744325115E+0; - v = 0.5968383987681156E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 194: - - v = 0.1782340447244611E-2; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.5716905949977102E-2; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.5573383178848738E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.6712973442695226E+0; - v = 0.5608704082587997E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2892465627575439E+0; - v = 0.5158237711805383E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4446933178717437E+0; - v = 0.5518771467273614E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1299335447650067E+0; - v = 0.4106777028169394E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3457702197611283E+0; - v = 0.5051846064614808E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1590417105383530E+0; - b = 0.8360360154824589E+0; - v = 0.5530248916233094E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 230: - - v = -0.5522639919727325E-1; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.4450274607445226E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.4492044687397611E+0; - v = 0.4496841067921404E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2520419490210201E+0; - v = 0.5049153450478750E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6981906658447242E+0; - v = 0.3976408018051883E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6587405243460960E+0; - v = 0.4401400650381014E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4038544050097660E-1; - v = 0.1724544350544401E-1; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5823842309715585E+0; - v = 0.4231083095357343E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3545877390518688E+0; - v = 0.5198069864064399E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2272181808998187E+0; - b = 0.4864661535886647E+0; - v = 0.4695720972568883E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 266: - - v = -0.1313769127326952E-2; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = -0.2522728704859336E-2; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.4186853881700583E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.7039373391585475E+0; - v = 0.5315167977810885E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1012526248572414E+0; - v = 0.4047142377086219E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4647448726420539E+0; - v = 0.4112482394406990E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3277420654971629E+0; - v = 0.3595584899758782E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6620338663699974E+0; - v = 0.4256131351428158E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.8506508083520399E+0; - v = 0.4229582700647240E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3233484542692899E+0; - b = 0.1153112011009701E+0; - v = 0.4080914225780505E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2314790158712601E+0; - b = 0.5244939240922365E+0; - v = 0.4071467593830964E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 302: - - v = 0.8545911725128148E-3; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.3599119285025571E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.3515640345570105E+0; - v = 0.3449788424305883E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6566329410219612E+0; - v = 0.3604822601419882E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4729054132581005E+0; - v = 0.3576729661743367E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.9618308522614784E-1; - v = 0.2352101413689164E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2219645236294178E+0; - v = 0.3108953122413675E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7011766416089545E+0; - v = 0.3650045807677255E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2644152887060663E+0; - v = 0.2982344963171804E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5718955891878961E+0; - v = 0.3600820932216460E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2510034751770465E+0; - b = 0.8000727494073952E+0; - v = 0.3571540554273387E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1233548532583327E+0; - b = 0.4127724083168531E+0; - v = 0.3392312205006170E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 350: - - v = 0.3006796749453936E-2; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.3050627745650771E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.7068965463912316E+0; - v = 0.1621104600288991E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4794682625712025E+0; - v = 0.3005701484901752E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1927533154878019E+0; - v = 0.2990992529653774E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6930357961327123E+0; - v = 0.2982170644107595E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3608302115520091E+0; - v = 0.2721564237310992E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6498486161496169E+0; - v = 0.3033513795811141E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1932945013230339E+0; - v = 0.3007949555218533E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3800494919899303E+0; - v = 0.2881964603055307E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2899558825499574E+0; - b = 0.7934537856582316E+0; - v = 0.2958357626535696E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.9684121455103957E-1; - b = 0.8280801506686862E+0; - v = 0.3036020026407088E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1833434647041659E+0; - b = 0.9074658265305127E+0; - v = 0.2832187403926303E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 434: - - v = 0.5265897968224436E-3; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.2548219972002607E-2; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.2512317418927307E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.6909346307509111E+0; - v = 0.2530403801186355E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1774836054609158E+0; - v = 0.2014279020918528E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4914342637784746E+0; - v = 0.2501725168402936E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6456664707424256E+0; - v = 0.2513267174597564E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2861289010307638E+0; - v = 0.2302694782227416E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7568084367178018E-1; - v = 0.1462495621594614E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3927259763368002E+0; - v = 0.2445373437312980E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.8818132877794288E+0; - v = 0.2417442375638981E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.9776428111182649E+0; - v = 0.1910951282179532E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2054823696403044E+0; - b = 0.8689460322872412E+0; - v = 0.2416930044324775E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5905157048925271E+0; - b = 0.7999278543857286E+0; - v = 0.2512236854563495E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5550152361076807E+0; - b = 0.7717462626915901E+0; - v = 0.2496644054553086E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.9371809858553722E+0; - b = 0.3344363145343455E+0; - v = 0.2236607760437849E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 590: - - v = 0.3095121295306187E-3; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.1852379698597489E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.7040954938227469E+0; - v = 0.1871790639277744E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6807744066455243E+0; - v = 0.1858812585438317E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6372546939258752E+0; - v = 0.1852028828296213E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5044419707800358E+0; - v = 0.1846715956151242E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4215761784010967E+0; - v = 0.1818471778162769E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3317920736472123E+0; - v = 0.1749564657281154E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2384736701421887E+0; - v = 0.1617210647254411E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1459036449157763E+0; - v = 0.1384737234851692E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6095034115507196E-1; - v = 0.9764331165051050E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6116843442009876E+0; - v = 0.1857161196774078E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3964755348199858E+0; - v = 0.1705153996395864E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1724782009907724E+0; - v = 0.1300321685886048E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5610263808622060E+0; - b = 0.3518280927733519E+0; - v = 0.1842866472905286E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4742392842551980E+0; - b = 0.2634716655937950E+0; - v = 0.1802658934377451E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5984126497885380E+0; - b = 0.1816640840360209E+0; - v = 0.1849830560443660E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3791035407695563E+0; - b = 0.1720795225656878E+0; - v = 0.1713904507106709E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2778673190586244E+0; - b = 0.8213021581932511E-1; - v = 0.1555213603396808E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5033564271075117E+0; - b = 0.8999205842074875E-1; - v = 0.1802239128008525E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 770: - - v = 0.2192942088181184E-3; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.1436433617319080E-2; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.1421940344335877E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.5087204410502360E-1; - v = 0.6798123511050502E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1228198790178831E+0; - v = 0.9913184235294912E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2026890814408786E+0; - v = 0.1180207833238949E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2847745156464294E+0; - v = 0.1296599602080921E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3656719078978026E+0; - v = 0.1365871427428316E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4428264886713469E+0; - v = 0.1402988604775325E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5140619627249735E+0; - v = 0.1418645563595609E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6306401219166803E+0; - v = 0.1421376741851662E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6716883332022612E+0; - v = 0.1423996475490962E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6979792685336881E+0; - v = 0.1431554042178567E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1446865674195309E+0; - v = 0.9254401499865368E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3390263475411216E+0; - v = 0.1250239995053509E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5335804651263506E+0; - v = 0.1394365843329230E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6944024393349413E-1; - b = 0.2355187894242326E+0; - v = 0.1127089094671749E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2269004109529460E+0; - b = 0.4102182474045730E+0; - v = 0.1345753760910670E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.8025574607775339E-1; - b = 0.6214302417481605E+0; - v = 0.1424957283316783E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1467999527896572E+0; - b = 0.3245284345717394E+0; - v = 0.1261523341237750E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1571507769824727E+0; - b = 0.5224482189696630E+0; - v = 0.1392547106052696E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2365702993157246E+0; - b = 0.6017546634089558E+0; - v = 0.1418761677877656E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.7714815866765732E-1; - b = 0.4346575516141163E+0; - v = 0.1338366684479554E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3062936666210730E+0; - b = 0.4908826589037616E+0; - v = 0.1393700862676131E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3822477379524787E+0; - b = 0.5648768149099500E+0; - v = 0.1415914757466932E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 974: - - v = 0.1438294190527431E-3; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.1125772288287004E-2; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.4292963545341347E-1; - v = 0.4948029341949241E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1051426854086404E+0; - v = 0.7357990109125470E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1750024867623087E+0; - v = 0.8889132771304384E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2477653379650257E+0; - v = 0.9888347838921435E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3206567123955957E+0; - v = 0.1053299681709471E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3916520749849983E+0; - v = 0.1092778807014578E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4590825874187624E+0; - v = 0.1114389394063227E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5214563888415861E+0; - v = 0.1123724788051555E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6253170244654199E+0; - v = 0.1125239325243814E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6637926744523170E+0; - v = 0.1126153271815905E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6910410398498301E+0; - v = 0.1130286931123841E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7052907007457760E+0; - v = 0.1134986534363955E-2; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1236686762657990E+0; - v = 0.6823367927109931E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2940777114468387E+0; - v = 0.9454158160447096E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4697753849207649E+0; - v = 0.1074429975385679E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6334563241139567E+0; - v = 0.1129300086569132E-2; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5974048614181342E-1; - b = 0.2029128752777523E+0; - v = 0.8436884500901954E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1375760408473636E+0; - b = 0.4602621942484054E+0; - v = 0.1075255720448885E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3391016526336286E+0; - b = 0.5030673999662036E+0; - v = 0.1108577236864462E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1271675191439820E+0; - b = 0.2817606422442134E+0; - v = 0.9566475323783357E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2693120740413512E+0; - b = 0.4331561291720157E+0; - v = 0.1080663250717391E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1419786452601918E+0; - b = 0.6256167358580814E+0; - v = 0.1126797131196295E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6709284600738255E-1; - b = 0.3798395216859157E+0; - v = 0.1022568715358061E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.7057738183256172E-1; - b = 0.5517505421423520E+0; - v = 0.1108960267713108E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2783888477882155E+0; - b = 0.6029619156159187E+0; - v = 0.1122790653435766E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1979578938917407E+0; - b = 0.3589606329589096E+0; - v = 0.1032401847117460E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2087307061103274E+0; - b = 0.5348666438135476E+0; - v = 0.1107249382283854E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4055122137872836E+0; - b = 0.5674997546074373E+0; - v = 0.1121780048519972E-2; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 1202: - - v = 0.1105189233267572E-3; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.9205232738090741E-3; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.9133159786443561E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.3712636449657089E-1; - v = 0.3690421898017899E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.9140060412262223E-1; - v = 0.5603990928680660E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1531077852469906E+0; - v = 0.6865297629282609E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2180928891660612E+0; - v = 0.7720338551145630E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2839874532200175E+0; - v = 0.8301545958894795E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3491177600963764E+0; - v = 0.8686692550179628E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4121431461444309E+0; - v = 0.8927076285846890E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4718993627149127E+0; - v = 0.9060820238568219E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5273145452842337E+0; - v = 0.9119777254940867E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6209475332444019E+0; - v = 0.9128720138604181E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6569722711857291E+0; - v = 0.9130714935691735E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6841788309070143E+0; - v = 0.9152873784554116E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7012604330123631E+0; - v = 0.9187436274321654E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1072382215478166E+0; - v = 0.5176977312965694E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2582068959496968E+0; - v = 0.7331143682101417E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4172752955306717E+0; - v = 0.8463232836379928E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5700366911792503E+0; - v = 0.9031122694253992E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.9827986018263947E+0; - b = 0.1771774022615325E+0; - v = 0.6485778453163257E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.9624249230326228E+0; - b = 0.2475716463426288E+0; - v = 0.7435030910982369E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.9402007994128811E+0; - b = 0.3354616289066489E+0; - v = 0.7998527891839054E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.9320822040143202E+0; - b = 0.3173615246611977E+0; - v = 0.8101731497468018E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.9043674199393299E+0; - b = 0.4090268427085357E+0; - v = 0.8483389574594331E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.8912407560074747E+0; - b = 0.3854291150669224E+0; - v = 0.8556299257311812E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.8676435628462708E+0; - b = 0.4932221184851285E+0; - v = 0.8803208679738260E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.8581979986041619E+0; - b = 0.4785320675922435E+0; - v = 0.8811048182425720E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.8396753624049856E+0; - b = 0.4507422593157064E+0; - v = 0.8850282341265444E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.8165288564022188E+0; - b = 0.5632123020762100E+0; - v = 0.9021342299040653E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.8015469370783529E+0; - b = 0.5434303569693900E+0; - v = 0.9010091677105086E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.7773563069070351E+0; - b = 0.5123518486419871E+0; - v = 0.9022692938426915E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.7661621213900394E+0; - b = 0.6394279634749102E+0; - v = 0.9158016174693465E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.7553584143533510E+0; - b = 0.6269805509024392E+0; - v = 0.9131578003189435E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.7344305757559503E+0; - b = 0.6031161693096310E+0; - v = 0.9107813579482705E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.7043837184021765E+0; - b = 0.5693702498468441E+0; - v = 0.9105760258970126E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 1454: - - v = 0.7777160743261247E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.7557646413004701E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.3229290663413854E-1; - v = 0.2841633806090617E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.8036733271462222E-1; - v = 0.4374419127053555E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1354289960531653E+0; - v = 0.5417174740872172E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1938963861114426E+0; - v = 0.6148000891358593E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2537343715011275E+0; - v = 0.6664394485800705E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3135251434752570E+0; - v = 0.7025039356923220E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3721558339375338E+0; - v = 0.7268511789249627E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4286809575195696E+0; - v = 0.7422637534208629E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4822510128282994E+0; - v = 0.7509545035841214E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5320679333566263E+0; - v = 0.7548535057718401E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6172998195394274E+0; - v = 0.7554088969774001E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6510679849127481E+0; - v = 0.7553147174442808E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6777315251687360E+0; - v = 0.7564767653292297E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6963109410648741E+0; - v = 0.7587991808518730E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7058935009831749E+0; - v = 0.7608261832033027E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.9955546194091857E+0; - v = 0.4021680447874916E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.9734115901794209E+0; - v = 0.5804871793945964E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.9275693732388626E+0; - v = 0.6792151955945159E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.8568022422795103E+0; - v = 0.7336741211286294E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.7623495553719372E+0; - v = 0.7581866300989608E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5707522908892223E+0; - b = 0.4387028039889501E+0; - v = 0.7538257859800743E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5196463388403083E+0; - b = 0.3858908414762617E+0; - v = 0.7483517247053123E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4646337531215351E+0; - b = 0.3301937372343854E+0; - v = 0.7371763661112059E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4063901697557691E+0; - b = 0.2725423573563777E+0; - v = 0.7183448895756934E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3456329466643087E+0; - b = 0.2139510237495250E+0; - v = 0.6895815529822191E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2831395121050332E+0; - b = 0.1555922309786647E+0; - v = 0.6480105801792886E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2197682022925330E+0; - b = 0.9892878979686097E-1; - v = 0.5897558896594636E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1564696098650355E+0; - b = 0.4598642910675510E-1; - v = 0.5095708849247346E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6027356673721295E+0; - b = 0.3376625140173426E+0; - v = 0.7536906428909755E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5496032320255096E+0; - b = 0.2822301309727988E+0; - v = 0.7472505965575118E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4921707755234567E+0; - b = 0.2248632342592540E+0; - v = 0.7343017132279698E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4309422998598483E+0; - b = 0.1666224723456479E+0; - v = 0.7130871582177445E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3664108182313672E+0; - b = 0.1086964901822169E+0; - v = 0.6817022032112776E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2990189057758436E+0; - b = 0.5251989784120085E-1; - v = 0.6380941145604121E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6268724013144998E+0; - b = 0.2297523657550023E+0; - v = 0.7550381377920310E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5707324144834607E+0; - b = 0.1723080607093800E+0; - v = 0.7478646640144802E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5096360901960365E+0; - b = 0.1140238465390513E+0; - v = 0.7335918720601220E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4438729938312456E+0; - b = 0.5611522095882537E-1; - v = 0.7110120527658118E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6419978471082389E+0; - b = 0.1164174423140873E+0; - v = 0.7571363978689501E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5817218061802611E+0; - b = 0.5797589531445219E-1; - v = 0.7489908329079234E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 1730: - - v = 0.6309049437420976E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.6398287705571748E-3; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.6357185073530720E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.2860923126194662E-1; - v = 0.2221207162188168E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7142556767711522E-1; - v = 0.3475784022286848E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1209199540995559E+0; - v = 0.4350742443589804E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1738673106594379E+0; - v = 0.4978569136522127E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2284645438467734E+0; - v = 0.5435036221998053E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2834807671701512E+0; - v = 0.5765913388219542E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3379680145467339E+0; - v = 0.6001200359226003E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3911355454819537E+0; - v = 0.6162178172717512E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4422860353001403E+0; - v = 0.6265218152438485E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4907781568726057E+0; - v = 0.6323987160974212E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5360006153211468E+0; - v = 0.6350767851540569E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6142105973596603E+0; - v = 0.6354362775297107E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6459300387977504E+0; - v = 0.6352302462706235E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6718056125089225E+0; - v = 0.6358117881417972E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6910888533186254E+0; - v = 0.6373101590310117E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7030467416823252E+0; - v = 0.6390428961368665E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.8354951166354646E-1; - v = 0.3186913449946576E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2050143009099486E+0; - v = 0.4678028558591711E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3370208290706637E+0; - v = 0.5538829697598626E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4689051484233963E+0; - v = 0.6044475907190476E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5939400424557334E+0; - v = 0.6313575103509012E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1394983311832261E+0; - b = 0.4097581162050343E-1; - v = 0.4078626431855630E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1967999180485014E+0; - b = 0.8851987391293348E-1; - v = 0.4759933057812725E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2546183732548967E+0; - b = 0.1397680182969819E+0; - v = 0.5268151186413440E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3121281074713875E+0; - b = 0.1929452542226526E+0; - v = 0.5643048560507316E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3685981078502492E+0; - b = 0.2467898337061562E+0; - v = 0.5914501076613073E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4233760321547856E+0; - b = 0.3003104124785409E+0; - v = 0.6104561257874195E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4758671236059246E+0; - b = 0.3526684328175033E+0; - v = 0.6230252860707806E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5255178579796463E+0; - b = 0.4031134861145713E+0; - v = 0.6305618761760796E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5718025633734589E+0; - b = 0.4509426448342351E+0; - v = 0.6343092767597889E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2686927772723415E+0; - b = 0.4711322502423248E-1; - v = 0.5176268945737826E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3306006819904809E+0; - b = 0.9784487303942695E-1; - v = 0.5564840313313692E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3904906850594983E+0; - b = 0.1505395810025273E+0; - v = 0.5856426671038980E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4479957951904390E+0; - b = 0.2039728156296050E+0; - v = 0.6066386925777091E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5027076848919780E+0; - b = 0.2571529941121107E+0; - v = 0.6208824962234458E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5542087392260217E+0; - b = 0.3092191375815670E+0; - v = 0.6296314297822907E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6020850887375187E+0; - b = 0.3593807506130276E+0; - v = 0.6340423756791859E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4019851409179594E+0; - b = 0.5063389934378671E-1; - v = 0.5829627677107342E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4635614567449800E+0; - b = 0.1032422269160612E+0; - v = 0.6048693376081110E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5215860931591575E+0; - b = 0.1566322094006254E+0; - v = 0.6202362317732461E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5758202499099271E+0; - b = 0.2098082827491099E+0; - v = 0.6299005328403779E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6259893683876795E+0; - b = 0.2618824114553391E+0; - v = 0.6347722390609353E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5313795124811891E+0; - b = 0.5263245019338556E-1; - v = 0.6203778981238834E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5893317955931995E+0; - b = 0.1061059730982005E+0; - v = 0.6308414671239979E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6426246321215801E+0; - b = 0.1594171564034221E+0; - v = 0.6362706466959498E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6511904367376113E+0; - b = 0.5354789536565540E-1; - v = 0.6375414170333233E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 2030: - - v = 0.4656031899197431E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.5421549195295507E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.2540835336814348E-1; - v = 0.1778522133346553E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6399322800504915E-1; - v = 0.2811325405682796E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1088269469804125E+0; - v = 0.3548896312631459E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1570670798818287E+0; - v = 0.4090310897173364E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2071163932282514E+0; - v = 0.4493286134169965E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2578914044450844E+0; - v = 0.4793728447962723E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3085687558169623E+0; - v = 0.5015415319164265E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3584719706267024E+0; - v = 0.5175127372677937E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4070135594428709E+0; - v = 0.5285522262081019E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4536618626222638E+0; - v = 0.5356832703713962E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4979195686463577E+0; - v = 0.5397914736175170E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5393075111126999E+0; - v = 0.5416899441599930E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6115617676843916E+0; - v = 0.5419308476889938E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6414308435160159E+0; - v = 0.5416936902030596E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6664099412721607E+0; - v = 0.5419544338703164E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6859161771214913E+0; - v = 0.5428983656630975E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6993625593503890E+0; - v = 0.5442286500098193E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7062393387719380E+0; - v = 0.5452250345057301E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7479028168349763E-1; - v = 0.2568002497728530E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1848951153969366E+0; - v = 0.3827211700292145E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3059529066581305E+0; - v = 0.4579491561917824E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4285556101021362E+0; - v = 0.5042003969083574E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5468758653496526E+0; - v = 0.5312708889976025E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6565821978343439E+0; - v = 0.5438401790747117E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1253901572367117E+0; - b = 0.3681917226439641E-1; - v = 0.3316041873197344E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1775721510383941E+0; - b = 0.7982487607213301E-1; - v = 0.3899113567153771E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2305693358216114E+0; - b = 0.1264640966592335E+0; - v = 0.4343343327201309E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2836502845992063E+0; - b = 0.1751585683418957E+0; - v = 0.4679415262318919E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3361794746232590E+0; - b = 0.2247995907632670E+0; - v = 0.4930847981631031E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3875979172264824E+0; - b = 0.2745299257422246E+0; - v = 0.5115031867540091E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4374019316999074E+0; - b = 0.3236373482441118E+0; - v = 0.5245217148457367E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4851275843340022E+0; - b = 0.3714967859436741E+0; - v = 0.5332041499895321E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5303391803806868E+0; - b = 0.4175353646321745E+0; - v = 0.5384583126021542E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5726197380596287E+0; - b = 0.4612084406355461E+0; - v = 0.5411067210798852E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2431520732564863E+0; - b = 0.4258040133043952E-1; - v = 0.4259797391468714E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3002096800895869E+0; - b = 0.8869424306722721E-1; - v = 0.4604931368460021E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3558554457457432E+0; - b = 0.1368811706510655E+0; - v = 0.4871814878255202E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4097782537048887E+0; - b = 0.1860739985015033E+0; - v = 0.5072242910074885E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4616337666067458E+0; - b = 0.2354235077395853E+0; - v = 0.5217069845235350E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5110707008417874E+0; - b = 0.2842074921347011E+0; - v = 0.5315785966280310E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5577415286163795E+0; - b = 0.3317784414984102E+0; - v = 0.5376833708758905E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6013060431366950E+0; - b = 0.3775299002040700E+0; - v = 0.5408032092069521E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3661596767261781E+0; - b = 0.4599367887164592E-1; - v = 0.4842744917904866E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4237633153506581E+0; - b = 0.9404893773654421E-1; - v = 0.5048926076188130E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4786328454658452E+0; - b = 0.1431377109091971E+0; - v = 0.5202607980478373E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5305702076789774E+0; - b = 0.1924186388843570E+0; - v = 0.5309932388325743E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5793436224231788E+0; - b = 0.2411590944775190E+0; - v = 0.5377419770895208E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6247069017094747E+0; - b = 0.2886871491583605E+0; - v = 0.5411696331677717E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4874315552535204E+0; - b = 0.4804978774953206E-1; - v = 0.5197996293282420E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5427337322059053E+0; - b = 0.9716857199366665E-1; - v = 0.5311120836622945E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5943493747246700E+0; - b = 0.1465205839795055E+0; - v = 0.5384309319956951E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6421314033564943E+0; - b = 0.1953579449803574E+0; - v = 0.5421859504051886E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6020628374713980E+0; - b = 0.4916375015738108E-1; - v = 0.5390948355046314E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6529222529856881E+0; - b = 0.9861621540127005E-1; - v = 0.5433312705027845E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 2354: - - v = 0.3922616270665292E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.4703831750854424E-3; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.4678202801282136E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.2290024646530589E-1; - v = 0.1437832228979900E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5779086652271284E-1; - v = 0.2303572493577644E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.9863103576375984E-1; - v = 0.2933110752447454E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1428155792982185E+0; - v = 0.3402905998359838E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1888978116601463E+0; - v = 0.3759138466870372E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2359091682970210E+0; - v = 0.4030638447899798E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2831228833706171E+0; - v = 0.4236591432242211E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3299495857966693E+0; - v = 0.4390522656946746E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3758840802660796E+0; - v = 0.4502523466626247E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4204751831009480E+0; - v = 0.4580577727783541E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4633068518751051E+0; - v = 0.4631391616615899E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5039849474507313E+0; - v = 0.4660928953698676E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5421265793440747E+0; - v = 0.4674751807936953E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6092660230557310E+0; - v = 0.4676414903932920E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6374654204984869E+0; - v = 0.4674086492347870E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6615136472609892E+0; - v = 0.4674928539483207E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6809487285958127E+0; - v = 0.4680748979686447E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6952980021665196E+0; - v = 0.4690449806389040E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7041245497695400E+0; - v = 0.4699877075860818E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6744033088306065E-1; - v = 0.2099942281069176E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1678684485334166E+0; - v = 0.3172269150712804E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2793559049539613E+0; - v = 0.3832051358546523E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3935264218057639E+0; - v = 0.4252193818146985E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5052629268232558E+0; - v = 0.4513807963755000E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6107905315437531E+0; - v = 0.4657797469114178E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1135081039843524E+0; - b = 0.3331954884662588E-1; - v = 0.2733362800522836E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1612866626099378E+0; - b = 0.7247167465436538E-1; - v = 0.3235485368463559E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2100786550168205E+0; - b = 0.1151539110849745E+0; - v = 0.3624908726013453E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2592282009459942E+0; - b = 0.1599491097143677E+0; - v = 0.3925540070712828E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3081740561320203E+0; - b = 0.2058699956028027E+0; - v = 0.4156129781116235E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3564289781578164E+0; - b = 0.2521624953502911E+0; - v = 0.4330644984623263E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4035587288240703E+0; - b = 0.2982090785797674E+0; - v = 0.4459677725921312E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4491671196373903E+0; - b = 0.3434762087235733E+0; - v = 0.4551593004456795E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4928854782917489E+0; - b = 0.3874831357203437E+0; - v = 0.4613341462749918E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5343646791958988E+0; - b = 0.4297814821746926E+0; - v = 0.4651019618269806E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5732683216530990E+0; - b = 0.4699402260943537E+0; - v = 0.4670249536100625E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2214131583218986E+0; - b = 0.3873602040643895E-1; - v = 0.3549555576441708E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2741796504750071E+0; - b = 0.8089496256902013E-1; - v = 0.3856108245249010E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3259797439149485E+0; - b = 0.1251732177620872E+0; - v = 0.4098622845756882E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3765441148826891E+0; - b = 0.1706260286403185E+0; - v = 0.4286328604268950E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4255773574530558E+0; - b = 0.2165115147300408E+0; - v = 0.4427802198993945E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4727795117058430E+0; - b = 0.2622089812225259E+0; - v = 0.4530473511488561E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5178546895819012E+0; - b = 0.3071721431296201E+0; - v = 0.4600805475703138E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5605141192097460E+0; - b = 0.3508998998801138E+0; - v = 0.4644599059958017E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6004763319352512E+0; - b = 0.3929160876166931E+0; - v = 0.4667274455712508E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3352842634946949E+0; - b = 0.4202563457288019E-1; - v = 0.4069360518020356E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3891971629814670E+0; - b = 0.8614309758870850E-1; - v = 0.4260442819919195E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4409875565542281E+0; - b = 0.1314500879380001E+0; - v = 0.4408678508029063E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4904893058592484E+0; - b = 0.1772189657383859E+0; - v = 0.4518748115548597E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5375056138769549E+0; - b = 0.2228277110050294E+0; - v = 0.4595564875375116E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5818255708669969E+0; - b = 0.2677179935014386E+0; - v = 0.4643988774315846E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6232334858144959E+0; - b = 0.3113675035544165E+0; - v = 0.4668827491646946E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4489485354492058E+0; - b = 0.4409162378368174E-1; - v = 0.4400541823741973E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5015136875933150E+0; - b = 0.8939009917748489E-1; - v = 0.4514512890193797E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5511300550512623E+0; - b = 0.1351806029383365E+0; - v = 0.4596198627347549E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5976720409858000E+0; - b = 0.1808370355053196E+0; - v = 0.4648659016801781E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6409956378989354E+0; - b = 0.2257852192301602E+0; - v = 0.4675502017157673E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5581222330827514E+0; - b = 0.4532173421637160E-1; - v = 0.4598494476455523E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6074705984161695E+0; - b = 0.9117488031840314E-1; - v = 0.4654916955152048E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6532272537379033E+0; - b = 0.1369294213140155E+0; - v = 0.4684709779505137E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6594761494500487E+0; - b = 0.4589901487275583E-1; - v = 0.4691445539106986E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 2702: - - v = 0.2998675149888161E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.4077860529495355E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.2065562538818703E-1; - v = 0.1185349192520667E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5250918173022379E-1; - v = 0.1913408643425751E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.8993480082038376E-1; - v = 0.2452886577209897E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1306023924436019E+0; - v = 0.2862408183288702E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1732060388531418E+0; - v = 0.3178032258257357E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2168727084820249E+0; - v = 0.3422945667633690E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2609528309173586E+0; - v = 0.3612790520235922E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3049252927938952E+0; - v = 0.3758638229818521E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3483484138084404E+0; - v = 0.3868711798859953E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3908321549106406E+0; - v = 0.3949429933189938E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4320210071894814E+0; - v = 0.4006068107541156E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4715824795890053E+0; - v = 0.4043192149672723E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5091984794078453E+0; - v = 0.4064947495808078E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5445580145650803E+0; - v = 0.4075245619813152E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6072575796841768E+0; - v = 0.4076423540893566E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6339484505755803E+0; - v = 0.4074280862251555E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6570718257486958E+0; - v = 0.4074163756012244E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6762557330090709E+0; - v = 0.4077647795071246E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6911161696923790E+0; - v = 0.4084517552782530E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7012841911659961E+0; - v = 0.4092468459224052E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7064559272410020E+0; - v = 0.4097872687240906E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6123554989894765E-1; - v = 0.1738986811745028E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1533070348312393E+0; - v = 0.2659616045280191E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2563902605244206E+0; - v = 0.3240596008171533E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3629346991663361E+0; - v = 0.3621195964432943E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4683949968987538E+0; - v = 0.3868838330760539E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5694479240657952E+0; - v = 0.4018911532693111E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6634465430993955E+0; - v = 0.4089929432983252E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1033958573552305E+0; - b = 0.3034544009063584E-1; - v = 0.2279907527706409E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1473521412414395E+0; - b = 0.6618803044247135E-1; - v = 0.2715205490578897E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1924552158705967E+0; - b = 0.1054431128987715E+0; - v = 0.3057917896703976E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2381094362890328E+0; - b = 0.1468263551238858E+0; - v = 0.3326913052452555E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2838121707936760E+0; - b = 0.1894486108187886E+0; - v = 0.3537334711890037E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3291323133373415E+0; - b = 0.2326374238761579E+0; - v = 0.3700567500783129E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3736896978741460E+0; - b = 0.2758485808485768E+0; - v = 0.3825245372589122E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4171406040760013E+0; - b = 0.3186179331996921E+0; - v = 0.3918125171518296E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4591677985256915E+0; - b = 0.3605329796303794E+0; - v = 0.3984720419937579E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4994733831718418E+0; - b = 0.4012147253586509E+0; - v = 0.4029746003338211E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5377731830445096E+0; - b = 0.4403050025570692E+0; - v = 0.4057428632156627E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5737917830001331E+0; - b = 0.4774565904277483E+0; - v = 0.4071719274114857E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2027323586271389E+0; - b = 0.3544122504976147E-1; - v = 0.2990236950664119E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2516942375187273E+0; - b = 0.7418304388646328E-1; - v = 0.3262951734212878E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3000227995257181E+0; - b = 0.1150502745727186E+0; - v = 0.3482634608242413E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3474806691046342E+0; - b = 0.1571963371209364E+0; - v = 0.3656596681700892E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3938103180359209E+0; - b = 0.1999631877247100E+0; - v = 0.3791740467794218E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4387519590455703E+0; - b = 0.2428073457846535E+0; - v = 0.3894034450156905E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4820503960077787E+0; - b = 0.2852575132906155E+0; - v = 0.3968600245508371E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5234573778475101E+0; - b = 0.3268884208674639E+0; - v = 0.4019931351420050E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5627318647235282E+0; - b = 0.3673033321675939E+0; - v = 0.4052108801278599E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5996390607156954E+0; - b = 0.4061211551830290E+0; - v = 0.4068978613940934E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3084780753791947E+0; - b = 0.3860125523100059E-1; - v = 0.3454275351319704E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3589988275920223E+0; - b = 0.7928938987104867E-1; - v = 0.3629963537007920E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4078628415881973E+0; - b = 0.1212614643030087E+0; - v = 0.3770187233889873E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4549287258889735E+0; - b = 0.1638770827382693E+0; - v = 0.3878608613694378E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5000278512957279E+0; - b = 0.2065965798260176E+0; - v = 0.3959065270221274E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5429785044928199E+0; - b = 0.2489436378852235E+0; - v = 0.4015286975463570E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5835939850491711E+0; - b = 0.2904811368946891E+0; - v = 0.4050866785614717E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6216870353444856E+0; - b = 0.3307941957666609E+0; - v = 0.4069320185051913E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4151104662709091E+0; - b = 0.4064829146052554E-1; - v = 0.3760120964062763E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4649804275009218E+0; - b = 0.8258424547294755E-1; - v = 0.3870969564418064E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5124695757009662E+0; - b = 0.1251841962027289E+0; - v = 0.3955287790534055E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5574711100606224E+0; - b = 0.1679107505976331E+0; - v = 0.4015361911302668E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5998597333287227E+0; - b = 0.2102805057358715E+0; - v = 0.4053836986719548E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6395007148516600E+0; - b = 0.2518418087774107E+0; - v = 0.4073578673299117E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5188456224746252E+0; - b = 0.4194321676077518E-1; - v = 0.3954628379231406E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5664190707942778E+0; - b = 0.8457661551921499E-1; - v = 0.4017645508847530E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6110464353283153E+0; - b = 0.1273652932519396E+0; - v = 0.4059030348651293E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6526430302051563E+0; - b = 0.1698173239076354E+0; - v = 0.4080565809484880E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6167551880377548E+0; - b = 0.4266398851548864E-1; - v = 0.4063018753664651E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6607195418355383E+0; - b = 0.8551925814238349E-1; - v = 0.4087191292799671E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 3074: - - v = 0.2599095953754734E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.3603134089687541E-3; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.3586067974412447E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.1886108518723392E-1; - v = 0.9831528474385880E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4800217244625303E-1; - v = 0.1605023107954450E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.8244922058397242E-1; - v = 0.2072200131464099E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1200408362484023E+0; - v = 0.2431297618814187E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1595773530809965E+0; - v = 0.2711819064496707E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2002635973434064E+0; - v = 0.2932762038321116E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2415127590139982E+0; - v = 0.3107032514197368E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2828584158458477E+0; - v = 0.3243808058921213E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3239091015338138E+0; - v = 0.3349899091374030E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3643225097962194E+0; - v = 0.3430580688505218E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4037897083691802E+0; - v = 0.3490124109290343E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4420247515194127E+0; - v = 0.3532148948561955E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4787572538464938E+0; - v = 0.3559862669062833E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5137265251275234E+0; - v = 0.3576224317551411E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5466764056654611E+0; - v = 0.3584050533086076E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6054859420813535E+0; - v = 0.3584903581373224E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6308106701764562E+0; - v = 0.3582991879040586E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6530369230179584E+0; - v = 0.3582371187963125E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6718609524611158E+0; - v = 0.3584353631122350E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6869676499894013E+0; - v = 0.3589120166517785E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6980467077240748E+0; - v = 0.3595445704531601E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7048241721250522E+0; - v = 0.3600943557111074E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5591105222058232E-1; - v = 0.1456447096742039E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1407384078513916E+0; - v = 0.2252370188283782E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2364035438976309E+0; - v = 0.2766135443474897E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3360602737818170E+0; - v = 0.3110729491500851E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4356292630054665E+0; - v = 0.3342506712303391E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5321569415256174E+0; - v = 0.3491981834026860E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6232956305040554E+0; - v = 0.3576003604348932E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.9469870086838469E-1; - b = 0.2778748387309470E-1; - v = 0.1921921305788564E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1353170300568141E+0; - b = 0.6076569878628364E-1; - v = 0.2301458216495632E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1771679481726077E+0; - b = 0.9703072762711040E-1; - v = 0.2604248549522893E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2197066664231751E+0; - b = 0.1354112458524762E+0; - v = 0.2845275425870697E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2624783557374927E+0; - b = 0.1750996479744100E+0; - v = 0.3036870897974840E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3050969521214442E+0; - b = 0.2154896907449802E+0; - v = 0.3188414832298066E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3472252637196021E+0; - b = 0.2560954625740152E+0; - v = 0.3307046414722089E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3885610219026360E+0; - b = 0.2965070050624096E+0; - v = 0.3398330969031360E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4288273776062765E+0; - b = 0.3363641488734497E+0; - v = 0.3466757899705373E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4677662471302948E+0; - b = 0.3753400029836788E+0; - v = 0.3516095923230054E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5051333589553359E+0; - b = 0.4131297522144286E+0; - v = 0.3549645184048486E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5406942145810492E+0; - b = 0.4494423776081795E+0; - v = 0.3570415969441392E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5742204122576457E+0; - b = 0.4839938958841502E+0; - v = 0.3581251798496118E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1865407027225188E+0; - b = 0.3259144851070796E-1; - v = 0.2543491329913348E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2321186453689432E+0; - b = 0.6835679505297343E-1; - v = 0.2786711051330776E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2773159142523882E+0; - b = 0.1062284864451989E+0; - v = 0.2985552361083679E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3219200192237254E+0; - b = 0.1454404409323047E+0; - v = 0.3145867929154039E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3657032593944029E+0; - b = 0.1854018282582510E+0; - v = 0.3273290662067609E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4084376778363622E+0; - b = 0.2256297412014750E+0; - v = 0.3372705511943501E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4499004945751427E+0; - b = 0.2657104425000896E+0; - v = 0.3448274437851510E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4898758141326335E+0; - b = 0.3052755487631557E+0; - v = 0.3503592783048583E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5281547442266309E+0; - b = 0.3439863920645423E+0; - v = 0.3541854792663162E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5645346989813992E+0; - b = 0.3815229456121914E+0; - v = 0.3565995517909428E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5988181252159848E+0; - b = 0.4175752420966734E+0; - v = 0.3578802078302898E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2850425424471603E+0; - b = 0.3562149509862536E-1; - v = 0.2958644592860982E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3324619433027876E+0; - b = 0.7330318886871096E-1; - v = 0.3119548129116835E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3785848333076282E+0; - b = 0.1123226296008472E+0; - v = 0.3250745225005984E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4232891028562115E+0; - b = 0.1521084193337708E+0; - v = 0.3355153415935208E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4664287050829722E+0; - b = 0.1921844459223610E+0; - v = 0.3435847568549328E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5078458493735726E+0; - b = 0.2321360989678303E+0; - v = 0.3495786831622488E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5473779816204180E+0; - b = 0.2715886486360520E+0; - v = 0.3537767805534621E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5848617133811376E+0; - b = 0.3101924707571355E+0; - v = 0.3564459815421428E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6201348281584888E+0; - b = 0.3476121052890973E+0; - v = 0.3578464061225468E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3852191185387871E+0; - b = 0.3763224880035108E-1; - v = 0.3239748762836212E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4325025061073423E+0; - b = 0.7659581935637135E-1; - v = 0.3345491784174287E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4778486229734490E+0; - b = 0.1163381306083900E+0; - v = 0.3429126177301782E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5211663693009000E+0; - b = 0.1563890598752899E+0; - v = 0.3492420343097421E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5623469504853703E+0; - b = 0.1963320810149200E+0; - v = 0.3537399050235257E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6012718188659246E+0; - b = 0.2357847407258738E+0; - v = 0.3566209152659172E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6378179206390117E+0; - b = 0.2743846121244060E+0; - v = 0.3581084321919782E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4836936460214534E+0; - b = 0.3895902610739024E-1; - v = 0.3426522117591512E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5293792562683797E+0; - b = 0.7871246819312640E-1; - v = 0.3491848770121379E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5726281253100033E+0; - b = 0.1187963808202981E+0; - v = 0.3539318235231476E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6133658776169068E+0; - b = 0.1587914708061787E+0; - v = 0.3570231438458694E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6515085491865307E+0; - b = 0.1983058575227646E+0; - v = 0.3586207335051714E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5778692716064976E+0; - b = 0.3977209689791542E-1; - v = 0.3541196205164025E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6207904288086192E+0; - b = 0.7990157592981152E-1; - v = 0.3574296911573953E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6608688171046802E+0; - b = 0.1199671308754309E+0; - v = 0.3591993279818963E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6656263089489130E+0; - b = 0.4015955957805969E-1; - v = 0.3595855034661997E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 3470: - - v = 0.2040382730826330E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.3178149703889544E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.1721420832906233E-1; - v = 0.8288115128076110E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4408875374981770E-1; - v = 0.1360883192522954E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7594680813878681E-1; - v = 0.1766854454542662E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1108335359204799E+0; - v = 0.2083153161230153E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1476517054388567E+0; - v = 0.2333279544657158E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1856731870860615E+0; - v = 0.2532809539930247E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2243634099428821E+0; - v = 0.2692472184211158E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2633006881662727E+0; - v = 0.2819949946811885E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3021340904916283E+0; - v = 0.2920953593973030E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3405594048030089E+0; - v = 0.2999889782948352E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3783044434007372E+0; - v = 0.3060292120496902E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4151194767407910E+0; - v = 0.3105109167522192E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4507705766443257E+0; - v = 0.3136902387550312E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4850346056573187E+0; - v = 0.3157984652454632E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5176950817792470E+0; - v = 0.3170516518425422E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5485384240820989E+0; - v = 0.3176568425633755E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6039117238943308E+0; - v = 0.3177198411207062E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6279956655573113E+0; - v = 0.3175519492394733E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6493636169568952E+0; - v = 0.3174654952634756E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6677644117704504E+0; - v = 0.3175676415467654E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6829368572115624E+0; - v = 0.3178923417835410E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6946195818184121E+0; - v = 0.3183788287531909E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7025711542057026E+0; - v = 0.3188755151918807E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7066004767140119E+0; - v = 0.3191916889313849E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5132537689946062E-1; - v = 0.1231779611744508E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1297994661331225E+0; - v = 0.1924661373839880E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2188852049401307E+0; - v = 0.2380881867403424E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3123174824903457E+0; - v = 0.2693100663037885E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4064037620738195E+0; - v = 0.2908673382834366E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4984958396944782E+0; - v = 0.3053914619381535E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5864975046021365E+0; - v = 0.3143916684147777E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6686711634580175E+0; - v = 0.3187042244055363E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.8715738780835950E-1; - b = 0.2557175233367578E-1; - v = 0.1635219535869790E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1248383123134007E+0; - b = 0.5604823383376681E-1; - v = 0.1968109917696070E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1638062693383378E+0; - b = 0.8968568601900765E-1; - v = 0.2236754342249974E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2035586203373176E+0; - b = 0.1254086651976279E+0; - v = 0.2453186687017181E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2436798975293774E+0; - b = 0.1624780150162012E+0; - v = 0.2627551791580541E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2838207507773806E+0; - b = 0.2003422342683208E+0; - v = 0.2767654860152220E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3236787502217692E+0; - b = 0.2385628026255263E+0; - v = 0.2879467027765895E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3629849554840691E+0; - b = 0.2767731148783578E+0; - v = 0.2967639918918702E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4014948081992087E+0; - b = 0.3146542308245309E+0; - v = 0.3035900684660351E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4389818379260225E+0; - b = 0.3519196415895088E+0; - v = 0.3087338237298308E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4752331143674377E+0; - b = 0.3883050984023654E+0; - v = 0.3124608838860167E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5100457318374018E+0; - b = 0.4235613423908649E+0; - v = 0.3150084294226743E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5432238388954868E+0; - b = 0.4574484717196220E+0; - v = 0.3165958398598402E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5745758685072442E+0; - b = 0.4897311639255524E+0; - v = 0.3174320440957372E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1723981437592809E+0; - b = 0.3010630597881105E-1; - v = 0.2182188909812599E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2149553257844597E+0; - b = 0.6326031554204694E-1; - v = 0.2399727933921445E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2573256081247422E+0; - b = 0.9848566980258631E-1; - v = 0.2579796133514652E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2993163751238106E+0; - b = 0.1350835952384266E+0; - v = 0.2727114052623535E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3407238005148000E+0; - b = 0.1725184055442181E+0; - v = 0.2846327656281355E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3813454978483264E+0; - b = 0.2103559279730725E+0; - v = 0.2941491102051334E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4209848104423343E+0; - b = 0.2482278774554860E+0; - v = 0.3016049492136107E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4594519699996300E+0; - b = 0.2858099509982883E+0; - v = 0.3072949726175648E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4965640166185930E+0; - b = 0.3228075659915428E+0; - v = 0.3114768142886460E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5321441655571562E+0; - b = 0.3589459907204151E+0; - v = 0.3143823673666223E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5660208438582166E+0; - b = 0.3939630088864310E+0; - v = 0.3162269764661535E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5980264315964364E+0; - b = 0.4276029922949089E+0; - v = 0.3172164663759821E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2644215852350733E+0; - b = 0.3300939429072552E-1; - v = 0.2554575398967435E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3090113743443063E+0; - b = 0.6803887650078501E-1; - v = 0.2701704069135677E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3525871079197808E+0; - b = 0.1044326136206709E+0; - v = 0.2823693413468940E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3950418005354029E+0; - b = 0.1416751597517679E+0; - v = 0.2922898463214289E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4362475663430163E+0; - b = 0.1793408610504821E+0; - v = 0.3001829062162428E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4760661812145854E+0; - b = 0.2170630750175722E+0; - v = 0.3062890864542953E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5143551042512103E+0; - b = 0.2545145157815807E+0; - v = 0.3108328279264746E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5509709026935597E+0; - b = 0.2913940101706601E+0; - v = 0.3140243146201245E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5857711030329428E+0; - b = 0.3274169910910705E+0; - v = 0.3160638030977130E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6186149917404392E+0; - b = 0.3623081329317265E+0; - v = 0.3171462882206275E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3586894569557064E+0; - b = 0.3497354386450040E-1; - v = 0.2812388416031796E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4035266610019441E+0; - b = 0.7129736739757095E-1; - v = 0.2912137500288045E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4467775312332510E+0; - b = 0.1084758620193165E+0; - v = 0.2993241256502206E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4883638346608543E+0; - b = 0.1460915689241772E+0; - v = 0.3057101738983822E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5281908348434601E+0; - b = 0.1837790832369980E+0; - v = 0.3105319326251432E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5661542687149311E+0; - b = 0.2212075390874021E+0; - v = 0.3139565514428167E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6021450102031452E+0; - b = 0.2580682841160985E+0; - v = 0.3161543006806366E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6360520783610050E+0; - b = 0.2940656362094121E+0; - v = 0.3172985960613294E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4521611065087196E+0; - b = 0.3631055365867002E-1; - v = 0.2989400336901431E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4959365651560963E+0; - b = 0.7348318468484350E-1; - v = 0.3054555883947677E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5376815804038283E+0; - b = 0.1111087643812648E+0; - v = 0.3104764960807702E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5773314480243768E+0; - b = 0.1488226085145408E+0; - v = 0.3141015825977616E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6148113245575056E+0; - b = 0.1862892274135151E+0; - v = 0.3164520621159896E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6500407462842380E+0; - b = 0.2231909701714456E+0; - v = 0.3176652305912204E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5425151448707213E+0; - b = 0.3718201306118944E-1; - v = 0.3105097161023939E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5841860556907931E+0; - b = 0.7483616335067346E-1; - v = 0.3143014117890550E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6234632186851500E+0; - b = 0.1125990834266120E+0; - v = 0.3168172866287200E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6602934551848843E+0; - b = 0.1501303813157619E+0; - v = 0.3181401865570968E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6278573968375105E+0; - b = 0.3767559930245720E-1; - v = 0.3170663659156037E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6665611711264577E+0; - b = 0.7548443301360158E-1; - v = 0.3185447944625510E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 3890: - - v = 0.1807395252196920E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.2848008782238827E-3; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.2836065837530581E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.1587876419858352E-1; - v = 0.7013149266673816E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4069193593751206E-1; - v = 0.1162798021956766E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7025888115257997E-1; - v = 0.1518728583972105E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1027495450028704E+0; - v = 0.1798796108216934E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1371457730893426E+0; - v = 0.2022593385972785E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1727758532671953E+0; - v = 0.2203093105575464E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2091492038929037E+0; - v = 0.2349294234299855E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2458813281751915E+0; - v = 0.2467682058747003E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2826545859450066E+0; - v = 0.2563092683572224E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3191957291799622E+0; - v = 0.2639253896763318E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3552621469299578E+0; - v = 0.2699137479265108E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3906329503406230E+0; - v = 0.2745196420166739E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4251028614093031E+0; - v = 0.2779529197397593E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4584777520111870E+0; - v = 0.2803996086684265E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4905711358710193E+0; - v = 0.2820302356715842E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5212011669847385E+0; - v = 0.2830056747491068E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5501878488737995E+0; - v = 0.2834808950776839E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6025037877479342E+0; - v = 0.2835282339078929E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6254572689549016E+0; - v = 0.2833819267065800E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6460107179528248E+0; - v = 0.2832858336906784E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6639541138154251E+0; - v = 0.2833268235451244E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6790688515667495E+0; - v = 0.2835432677029253E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6911338580371512E+0; - v = 0.2839091722743049E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6999385956126490E+0; - v = 0.2843308178875841E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7053037748656896E+0; - v = 0.2846703550533846E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4732224387180115E-1; - v = 0.1051193406971900E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1202100529326803E+0; - v = 0.1657871838796974E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2034304820664855E+0; - v = 0.2064648113714232E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2912285643573002E+0; - v = 0.2347942745819741E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3802361792726768E+0; - v = 0.2547775326597726E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4680598511056146E+0; - v = 0.2686876684847025E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5528151052155599E+0; - v = 0.2778665755515867E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6329386307803041E+0; - v = 0.2830996616782929E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.8056516651369069E-1; - b = 0.2363454684003124E-1; - v = 0.1403063340168372E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1156476077139389E+0; - b = 0.5191291632545936E-1; - v = 0.1696504125939477E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1520473382760421E+0; - b = 0.8322715736994519E-1; - v = 0.1935787242745390E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1892986699745931E+0; - b = 0.1165855667993712E+0; - v = 0.2130614510521968E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2270194446777792E+0; - b = 0.1513077167409504E+0; - v = 0.2289381265931048E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2648908185093273E+0; - b = 0.1868882025807859E+0; - v = 0.2418630292816186E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3026389259574136E+0; - b = 0.2229277629776224E+0; - v = 0.2523400495631193E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3400220296151384E+0; - b = 0.2590951840746235E+0; - v = 0.2607623973449605E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3768217953335510E+0; - b = 0.2951047291750847E+0; - v = 0.2674441032689209E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4128372900921884E+0; - b = 0.3307019714169930E+0; - v = 0.2726432360343356E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4478807131815630E+0; - b = 0.3656544101087634E+0; - v = 0.2765787685924545E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4817742034089257E+0; - b = 0.3997448951939695E+0; - v = 0.2794428690642224E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5143472814653344E+0; - b = 0.4327667110812024E+0; - v = 0.2814099002062895E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5454346213905650E+0; - b = 0.4645196123532293E+0; - v = 0.2826429531578994E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5748739313170252E+0; - b = 0.4948063555703345E+0; - v = 0.2832983542550884E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1599598738286342E+0; - b = 0.2792357590048985E-1; - v = 0.1886695565284976E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1998097412500951E+0; - b = 0.5877141038139065E-1; - v = 0.2081867882748234E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2396228952566202E+0; - b = 0.9164573914691377E-1; - v = 0.2245148680600796E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2792228341097746E+0; - b = 0.1259049641962687E+0; - v = 0.2380370491511872E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3184251107546741E+0; - b = 0.1610594823400863E+0; - v = 0.2491398041852455E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3570481164426244E+0; - b = 0.1967151653460898E+0; - v = 0.2581632405881230E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3949164710492144E+0; - b = 0.2325404606175168E+0; - v = 0.2653965506227417E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4318617293970503E+0; - b = 0.2682461141151439E+0; - v = 0.2710857216747087E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4677221009931678E+0; - b = 0.3035720116011973E+0; - v = 0.2754434093903659E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5023417939270955E+0; - b = 0.3382781859197439E+0; - v = 0.2786579932519380E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5355701836636128E+0; - b = 0.3721383065625942E+0; - v = 0.2809011080679474E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5672608451328771E+0; - b = 0.4049346360466055E+0; - v = 0.2823336184560987E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5972704202540162E+0; - b = 0.4364538098633802E+0; - v = 0.2831101175806309E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2461687022333596E+0; - b = 0.3070423166833368E-1; - v = 0.2221679970354546E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2881774566286831E+0; - b = 0.6338034669281885E-1; - v = 0.2356185734270703E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3293963604116978E+0; - b = 0.9742862487067941E-1; - v = 0.2469228344805590E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3697303822241377E+0; - b = 0.1323799532282290E+0; - v = 0.2562726348642046E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4090663023135127E+0; - b = 0.1678497018129336E+0; - v = 0.2638756726753028E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4472819355411712E+0; - b = 0.2035095105326114E+0; - v = 0.2699311157390862E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4842513377231437E+0; - b = 0.2390692566672091E+0; - v = 0.2746233268403837E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5198477629962928E+0; - b = 0.2742649818076149E+0; - v = 0.2781225674454771E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5539453011883145E+0; - b = 0.3088503806580094E+0; - v = 0.2805881254045684E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5864196762401251E+0; - b = 0.3425904245906614E+0; - v = 0.2821719877004913E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6171484466668390E+0; - b = 0.3752562294789468E+0; - v = 0.2830222502333124E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3350337830565727E+0; - b = 0.3261589934634747E-1; - v = 0.2457995956744870E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3775773224758284E+0; - b = 0.6658438928081572E-1; - v = 0.2551474407503706E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4188155229848973E+0; - b = 0.1014565797157954E+0; - v = 0.2629065335195311E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4586805892009344E+0; - b = 0.1368573320843822E+0; - v = 0.2691900449925075E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4970895714224235E+0; - b = 0.1724614851951608E+0; - v = 0.2741275485754276E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5339505133960747E+0; - b = 0.2079779381416412E+0; - v = 0.2778530970122595E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5691665792531440E+0; - b = 0.2431385788322288E+0; - v = 0.2805010567646741E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6026387682680377E+0; - b = 0.2776901883049853E+0; - v = 0.2822055834031040E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6342676150163307E+0; - b = 0.3113881356386632E+0; - v = 0.2831016901243473E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4237951119537067E+0; - b = 0.3394877848664351E-1; - v = 0.2624474901131803E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4656918683234929E+0; - b = 0.6880219556291447E-1; - v = 0.2688034163039377E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5058857069185980E+0; - b = 0.1041946859721635E+0; - v = 0.2738932751287636E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5443204666713996E+0; - b = 0.1398039738736393E+0; - v = 0.2777944791242523E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5809298813759742E+0; - b = 0.1753373381196155E+0; - v = 0.2806011661660987E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6156416039447128E+0; - b = 0.2105215793514010E+0; - v = 0.2824181456597460E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6483801351066604E+0; - b = 0.2450953312157051E+0; - v = 0.2833585216577828E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5103616577251688E+0; - b = 0.3485560643800719E-1; - v = 0.2738165236962878E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5506738792580681E+0; - b = 0.7026308631512033E-1; - v = 0.2778365208203180E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5889573040995292E+0; - b = 0.1059035061296403E+0; - v = 0.2807852940418966E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6251641589516930E+0; - b = 0.1414823925236026E+0; - v = 0.2827245949674705E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6592414921570178E+0; - b = 0.1767207908214530E+0; - v = 0.2837342344829828E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5930314017533384E+0; - b = 0.3542189339561672E-1; - v = 0.2809233907610981E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6309812253390175E+0; - b = 0.7109574040369549E-1; - v = 0.2829930809742694E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6666296011353230E+0; - b = 0.1067259792282730E+0; - v = 0.2841097874111479E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6703715271049922E+0; - b = 0.3569455268820809E-1; - v = 0.2843455206008783E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 4334: - - v = 0.1449063022537883E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.2546377329828424E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.1462896151831013E-1; - v = 0.6018432961087496E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3769840812493139E-1; - v = 0.1002286583263673E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6524701904096891E-1; - v = 0.1315222931028093E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.9560543416134648E-1; - v = 0.1564213746876724E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1278335898929198E+0; - v = 0.1765118841507736E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1613096104466031E+0; - v = 0.1928737099311080E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1955806225745371E+0; - v = 0.2062658534263270E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2302935218498028E+0; - v = 0.2172395445953787E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2651584344113027E+0; - v = 0.2262076188876047E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2999276825183209E+0; - v = 0.2334885699462397E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3343828669718798E+0; - v = 0.2393355273179203E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3683265013750518E+0; - v = 0.2439559200468863E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4015763206518108E+0; - v = 0.2475251866060002E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4339612026399770E+0; - v = 0.2501965558158773E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4653180651114582E+0; - v = 0.2521081407925925E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4954893331080803E+0; - v = 0.2533881002388081E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5243207068924930E+0; - v = 0.2541582900848261E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5516590479041704E+0; - v = 0.2545365737525860E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6012371927804176E+0; - v = 0.2545726993066799E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6231574466449819E+0; - v = 0.2544456197465555E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6429416514181271E+0; - v = 0.2543481596881064E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6604124272943595E+0; - v = 0.2543506451429194E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6753851470408250E+0; - v = 0.2544905675493763E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6876717970626160E+0; - v = 0.2547611407344429E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6970895061319234E+0; - v = 0.2551060375448869E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7034746912553310E+0; - v = 0.2554291933816039E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7067017217542295E+0; - v = 0.2556255710686343E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4382223501131123E-1; - v = 0.9041339695118195E-4; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1117474077400006E+0; - v = 0.1438426330079022E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1897153252911440E+0; - v = 0.1802523089820518E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2724023009910331E+0; - v = 0.2060052290565496E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3567163308709902E+0; - v = 0.2245002248967466E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4404784483028087E+0; - v = 0.2377059847731150E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5219833154161411E+0; - v = 0.2468118955882525E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5998179868977553E+0; - v = 0.2525410872966528E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6727803154548222E+0; - v = 0.2553101409933397E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.7476563943166086E-1; - b = 0.2193168509461185E-1; - v = 0.1212879733668632E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1075341482001416E+0; - b = 0.4826419281533887E-1; - v = 0.1472872881270931E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1416344885203259E+0; - b = 0.7751191883575742E-1; - v = 0.1686846601010828E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1766325315388586E+0; - b = 0.1087558139247680E+0; - v = 0.1862698414660208E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2121744174481514E+0; - b = 0.1413661374253096E+0; - v = 0.2007430956991861E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2479669443408145E+0; - b = 0.1748768214258880E+0; - v = 0.2126568125394796E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2837600452294113E+0; - b = 0.2089216406612073E+0; - v = 0.2224394603372113E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3193344933193984E+0; - b = 0.2431987685545972E+0; - v = 0.2304264522673135E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3544935442438745E+0; - b = 0.2774497054377770E+0; - v = 0.2368854288424087E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3890571932288154E+0; - b = 0.3114460356156915E+0; - v = 0.2420352089461772E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4228581214259090E+0; - b = 0.3449806851913012E+0; - v = 0.2460597113081295E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4557387211304052E+0; - b = 0.3778618641248256E+0; - v = 0.2491181912257687E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4875487950541643E+0; - b = 0.4099086391698978E+0; - v = 0.2513528194205857E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5181436529962997E+0; - b = 0.4409474925853973E+0; - v = 0.2528943096693220E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5473824095600661E+0; - b = 0.4708094517711291E+0; - v = 0.2538660368488136E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5751263398976174E+0; - b = 0.4993275140354637E+0; - v = 0.2543868648299022E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1489515746840028E+0; - b = 0.2599381993267017E-1; - v = 0.1642595537825183E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1863656444351767E+0; - b = 0.5479286532462190E-1; - v = 0.1818246659849308E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2238602880356348E+0; - b = 0.8556763251425254E-1; - v = 0.1966565649492420E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2612723375728160E+0; - b = 0.1177257802267011E+0; - v = 0.2090677905657991E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2984332990206190E+0; - b = 0.1508168456192700E+0; - v = 0.2193820409510504E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3351786584663333E+0; - b = 0.1844801892177727E+0; - v = 0.2278870827661928E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3713505522209120E+0; - b = 0.2184145236087598E+0; - v = 0.2348283192282090E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4067981098954663E+0; - b = 0.2523590641486229E+0; - v = 0.2404139755581477E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4413769993687534E+0; - b = 0.2860812976901373E+0; - v = 0.2448227407760734E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4749487182516394E+0; - b = 0.3193686757808996E+0; - v = 0.2482110455592573E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5073798105075426E+0; - b = 0.3520226949547602E+0; - v = 0.2507192397774103E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5385410448878654E+0; - b = 0.3838544395667890E+0; - v = 0.2524765968534880E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5683065353670530E+0; - b = 0.4146810037640963E+0; - v = 0.2536052388539425E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5965527620663510E+0; - b = 0.4443224094681121E+0; - v = 0.2542230588033068E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2299227700856157E+0; - b = 0.2865757664057584E-1; - v = 0.1944817013047896E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2695752998553267E+0; - b = 0.5923421684485993E-1; - v = 0.2067862362746635E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3086178716611389E+0; - b = 0.9117817776057715E-1; - v = 0.2172440734649114E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3469649871659077E+0; - b = 0.1240593814082605E+0; - v = 0.2260125991723423E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3845153566319655E+0; - b = 0.1575272058259175E+0; - v = 0.2332655008689523E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4211600033403215E+0; - b = 0.1912845163525413E+0; - v = 0.2391699681532458E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4567867834329882E+0; - b = 0.2250710177858171E+0; - v = 0.2438801528273928E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4912829319232061E+0; - b = 0.2586521303440910E+0; - v = 0.2475370504260665E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5245364793303812E+0; - b = 0.2918112242865407E+0; - v = 0.2502707235640574E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5564369788915756E+0; - b = 0.3243439239067890E+0; - v = 0.2522031701054241E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5868757697775287E+0; - b = 0.3560536787835351E+0; - v = 0.2534511269978784E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6157458853519617E+0; - b = 0.3867480821242581E+0; - v = 0.2541284914955151E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3138461110672113E+0; - b = 0.3051374637507278E-1; - v = 0.2161509250688394E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3542495872050569E+0; - b = 0.6237111233730755E-1; - v = 0.2248778513437852E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3935751553120181E+0; - b = 0.9516223952401907E-1; - v = 0.2322388803404617E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4317634668111147E+0; - b = 0.1285467341508517E+0; - v = 0.2383265471001355E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4687413842250821E+0; - b = 0.1622318931656033E+0; - v = 0.2432476675019525E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5044274237060283E+0; - b = 0.1959581153836453E+0; - v = 0.2471122223750674E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5387354077925727E+0; - b = 0.2294888081183837E+0; - v = 0.2500291752486870E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5715768898356105E+0; - b = 0.2626031152713945E+0; - v = 0.2521055942764682E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6028627200136111E+0; - b = 0.2950904075286713E+0; - v = 0.2534472785575503E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6325039812653463E+0; - b = 0.3267458451113286E+0; - v = 0.2541599713080121E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3981986708423407E+0; - b = 0.3183291458749821E-1; - v = 0.2317380975862936E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4382791182133300E+0; - b = 0.6459548193880908E-1; - v = 0.2378550733719775E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4769233057218166E+0; - b = 0.9795757037087952E-1; - v = 0.2428884456739118E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5140823911194238E+0; - b = 0.1316307235126655E+0; - v = 0.2469002655757292E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5496977833862983E+0; - b = 0.1653556486358704E+0; - v = 0.2499657574265851E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5837047306512727E+0; - b = 0.1988931724126510E+0; - v = 0.2521676168486082E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6160349566926879E+0; - b = 0.2320174581438950E+0; - v = 0.2535935662645334E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6466185353209440E+0; - b = 0.2645106562168662E+0; - v = 0.2543356743363214E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4810835158795404E+0; - b = 0.3275917807743992E-1; - v = 0.2427353285201535E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5199925041324341E+0; - b = 0.6612546183967181E-1; - v = 0.2468258039744386E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5571717692207494E+0; - b = 0.9981498331474143E-1; - v = 0.2500060956440310E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5925789250836378E+0; - b = 0.1335687001410374E+0; - v = 0.2523238365420979E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6261658523859670E+0; - b = 0.1671444402896463E+0; - v = 0.2538399260252846E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6578811126669331E+0; - b = 0.2003106382156076E+0; - v = 0.2546255927268069E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5609624612998100E+0; - b = 0.3337500940231335E-1; - v = 0.2500583360048449E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5979959659984670E+0; - b = 0.6708750335901803E-1; - v = 0.2524777638260203E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6330523711054002E+0; - b = 0.1008792126424850E+0; - v = 0.2540951193860656E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6660960998103972E+0; - b = 0.1345050343171794E+0; - v = 0.2549524085027472E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6365384364585819E+0; - b = 0.3372799460737052E-1; - v = 0.2542569507009158E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6710994302899275E+0; - b = 0.6755249309678028E-1; - v = 0.2552114127580376E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 4802: - - v = 0.9687521879420705E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.2307897895367918E-3; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.2297310852498558E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.2335728608887064E-1; - v = 0.7386265944001919E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4352987836550653E-1; - v = 0.8257977698542210E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6439200521088801E-1; - v = 0.9706044762057630E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.9003943631993181E-1; - v = 0.1302393847117003E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1196706615548473E+0; - v = 0.1541957004600968E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1511715412838134E+0; - v = 0.1704459770092199E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1835982828503801E+0; - v = 0.1827374890942906E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2165081259155405E+0; - v = 0.1926360817436107E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2496208720417563E+0; - v = 0.2008010239494833E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2827200673567900E+0; - v = 0.2075635983209175E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3156190823994346E+0; - v = 0.2131306638690909E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3481476793749115E+0; - v = 0.2176562329937335E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3801466086947226E+0; - v = 0.2212682262991018E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4114652119634011E+0; - v = 0.2240799515668565E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4419598786519751E+0; - v = 0.2261959816187525E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4714925949329543E+0; - v = 0.2277156368808855E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4999293972879466E+0; - v = 0.2287351772128336E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5271387221431248E+0; - v = 0.2293490814084085E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5529896780837761E+0; - v = 0.2296505312376273E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6000856099481712E+0; - v = 0.2296793832318756E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6210562192785175E+0; - v = 0.2295785443842974E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6401165879934240E+0; - v = 0.2295017931529102E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6571144029244334E+0; - v = 0.2295059638184868E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6718910821718863E+0; - v = 0.2296232343237362E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6842845591099010E+0; - v = 0.2298530178740771E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6941353476269816E+0; - v = 0.2301579790280501E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7012965242212991E+0; - v = 0.2304690404996513E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7056471428242644E+0; - v = 0.2307027995907102E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4595557643585895E-1; - v = 0.9312274696671092E-4; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1049316742435023E+0; - v = 0.1199919385876926E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1773548879549274E+0; - v = 0.1598039138877690E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2559071411236127E+0; - v = 0.1822253763574900E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3358156837985898E+0; - v = 0.1988579593655040E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4155835743763893E+0; - v = 0.2112620102533307E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4937894296167472E+0; - v = 0.2201594887699007E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5691569694793316E+0; - v = 0.2261622590895036E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6405840854894251E+0; - v = 0.2296458453435705E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.7345133894143348E-1; - b = 0.2177844081486067E-1; - v = 0.1006006990267000E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1009859834044931E+0; - b = 0.4590362185775188E-1; - v = 0.1227676689635876E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1324289619748758E+0; - b = 0.7255063095690877E-1; - v = 0.1467864280270117E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1654272109607127E+0; - b = 0.1017825451960684E+0; - v = 0.1644178912101232E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1990767186776461E+0; - b = 0.1325652320980364E+0; - v = 0.1777664890718961E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2330125945523278E+0; - b = 0.1642765374496765E+0; - v = 0.1884825664516690E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2670080611108287E+0; - b = 0.1965360374337889E+0; - v = 0.1973269246453848E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3008753376294316E+0; - b = 0.2290726770542238E+0; - v = 0.2046767775855328E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3344475596167860E+0; - b = 0.2616645495370823E+0; - v = 0.2107600125918040E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3675709724070786E+0; - b = 0.2941150728843141E+0; - v = 0.2157416362266829E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4001000887587812E+0; - b = 0.3262440400919066E+0; - v = 0.2197557816920721E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4318956350436028E+0; - b = 0.3578835350611916E+0; - v = 0.2229192611835437E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4628239056795531E+0; - b = 0.3888751854043678E+0; - v = 0.2253385110212775E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4927563229773636E+0; - b = 0.4190678003222840E+0; - v = 0.2271137107548774E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5215687136707969E+0; - b = 0.4483151836883852E+0; - v = 0.2283414092917525E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5491402346984905E+0; - b = 0.4764740676087880E+0; - v = 0.2291161673130077E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5753520160126075E+0; - b = 0.5034021310998277E+0; - v = 0.2295313908576598E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1388326356417754E+0; - b = 0.2435436510372806E-1; - v = 0.1438204721359031E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1743686900537244E+0; - b = 0.5118897057342652E-1; - v = 0.1607738025495257E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2099737037950268E+0; - b = 0.8014695048539634E-1; - v = 0.1741483853528379E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2454492590908548E+0; - b = 0.1105117874155699E+0; - v = 0.1851918467519151E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2807219257864278E+0; - b = 0.1417950531570966E+0; - v = 0.1944628638070613E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3156842271975842E+0; - b = 0.1736604945719597E+0; - v = 0.2022495446275152E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3502090945177752E+0; - b = 0.2058466324693981E+0; - v = 0.2087462382438514E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3841684849519686E+0; - b = 0.2381284261195919E+0; - v = 0.2141074754818308E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4174372367906016E+0; - b = 0.2703031270422569E+0; - v = 0.2184640913748162E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4498926465011892E+0; - b = 0.3021845683091309E+0; - v = 0.2219309165220329E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4814146229807701E+0; - b = 0.3335993355165720E+0; - v = 0.2246123118340624E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5118863625734701E+0; - b = 0.3643833735518232E+0; - v = 0.2266062766915125E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5411947455119144E+0; - b = 0.3943789541958179E+0; - v = 0.2280072952230796E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5692301500357246E+0; - b = 0.4234320144403542E+0; - v = 0.2289082025202583E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5958857204139576E+0; - b = 0.4513897947419260E+0; - v = 0.2294012695120025E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2156270284785766E+0; - b = 0.2681225755444491E-1; - v = 0.1722434488736947E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2532385054909710E+0; - b = 0.5557495747805614E-1; - v = 0.1830237421455091E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2902564617771537E+0; - b = 0.8569368062950249E-1; - v = 0.1923855349997633E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3266979823143256E+0; - b = 0.1167367450324135E+0; - v = 0.2004067861936271E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3625039627493614E+0; - b = 0.1483861994003304E+0; - v = 0.2071817297354263E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3975838937548699E+0; - b = 0.1803821503011405E+0; - v = 0.2128250834102103E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4318396099009774E+0; - b = 0.2124962965666424E+0; - v = 0.2174513719440102E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4651706555732742E+0; - b = 0.2445221837805913E+0; - v = 0.2211661839150214E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4974752649620969E+0; - b = 0.2762701224322987E+0; - v = 0.2240665257813102E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5286517579627517E+0; - b = 0.3075627775211328E+0; - v = 0.2262439516632620E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5586001195731895E+0; - b = 0.3382311089826877E+0; - v = 0.2277874557231869E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5872229902021319E+0; - b = 0.3681108834741399E+0; - v = 0.2287854314454994E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6144258616235123E+0; - b = 0.3970397446872839E+0; - v = 0.2293268499615575E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2951676508064861E+0; - b = 0.2867499538750441E-1; - v = 0.1912628201529828E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3335085485472725E+0; - b = 0.5867879341903510E-1; - v = 0.1992499672238701E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3709561760636381E+0; - b = 0.8961099205022284E-1; - v = 0.2061275533454027E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4074722861667498E+0; - b = 0.1211627927626297E+0; - v = 0.2119318215968572E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4429923648839117E+0; - b = 0.1530748903554898E+0; - v = 0.2167416581882652E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4774428052721736E+0; - b = 0.1851176436721877E+0; - v = 0.2206430730516600E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5107446539535904E+0; - b = 0.2170829107658179E+0; - v = 0.2237186938699523E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5428151370542935E+0; - b = 0.2487786689026271E+0; - v = 0.2260480075032884E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5735699292556964E+0; - b = 0.2800239952795016E+0; - v = 0.2277098884558542E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6029253794562866E+0; - b = 0.3106445702878119E+0; - v = 0.2287845715109671E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6307998987073145E+0; - b = 0.3404689500841194E+0; - v = 0.2293547268236294E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3752652273692719E+0; - b = 0.2997145098184479E-1; - v = 0.2056073839852528E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4135383879344028E+0; - b = 0.6086725898678011E-1; - v = 0.2114235865831876E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4506113885153907E+0; - b = 0.9238849548435643E-1; - v = 0.2163175629770551E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4864401554606072E+0; - b = 0.1242786603851851E+0; - v = 0.2203392158111650E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5209708076611709E+0; - b = 0.1563086731483386E+0; - v = 0.2235473176847839E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5541422135830122E+0; - b = 0.1882696509388506E+0; - v = 0.2260024141501235E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5858880915113817E+0; - b = 0.2199672979126059E+0; - v = 0.2277675929329182E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6161399390603444E+0; - b = 0.2512165482924867E+0; - v = 0.2289102112284834E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6448296482255090E+0; - b = 0.2818368701871888E+0; - v = 0.2295027954625118E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4544796274917948E+0; - b = 0.3088970405060312E-1; - v = 0.2161281589879992E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4919389072146628E+0; - b = 0.6240947677636835E-1; - v = 0.2201980477395102E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5279313026985183E+0; - b = 0.9430706144280313E-1; - v = 0.2234952066593166E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5624169925571135E+0; - b = 0.1263547818770374E+0; - v = 0.2260540098520838E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5953484627093287E+0; - b = 0.1583430788822594E+0; - v = 0.2279157981899988E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6266730715339185E+0; - b = 0.1900748462555988E+0; - v = 0.2291296918565571E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6563363204278871E+0; - b = 0.2213599519592567E+0; - v = 0.2297533752536649E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5314574716585696E+0; - b = 0.3152508811515374E-1; - v = 0.2234927356465995E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5674614932298185E+0; - b = 0.6343865291465561E-1; - v = 0.2261288012985219E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6017706004970264E+0; - b = 0.9551503504223951E-1; - v = 0.2280818160923688E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6343471270264178E+0; - b = 0.1275440099801196E+0; - v = 0.2293773295180159E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6651494599127802E+0; - b = 0.1593252037671960E+0; - v = 0.2300528767338634E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6050184986005704E+0; - b = 0.3192538338496105E-1; - v = 0.2281893855065666E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6390163550880400E+0; - b = 0.6402824353962306E-1; - v = 0.2295720444840727E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6711199107088448E+0; - b = 0.9609805077002909E-1; - v = 0.2303227649026753E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6741354429572275E+0; - b = 0.3211853196273233E-1; - v = 0.2304831913227114E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 5294: - - v = 0.9080510764308163E-4; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.2084824361987793E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.2303261686261450E-1; - v = 0.5011105657239616E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3757208620162394E-1; - v = 0.5942520409683854E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5821912033821852E-1; - v = 0.9564394826109721E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.8403127529194872E-1; - v = 0.1185530657126338E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1122927798060578E+0; - v = 0.1364510114230331E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1420125319192987E+0; - v = 0.1505828825605415E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1726396437341978E+0; - v = 0.1619298749867023E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2038170058115696E+0; - v = 0.1712450504267789E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2352849892876508E+0; - v = 0.1789891098164999E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2668363354312461E+0; - v = 0.1854474955629795E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2982941279900452E+0; - v = 0.1908148636673661E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3295002922087076E+0; - v = 0.1952377405281833E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3603094918363593E+0; - v = 0.1988349254282232E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3905857895173920E+0; - v = 0.2017079807160050E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4202005758160837E+0; - v = 0.2039473082709094E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4490310061597227E+0; - v = 0.2056360279288953E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4769586160311491E+0; - v = 0.2068525823066865E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5038679887049750E+0; - v = 0.2076724877534488E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5296454286519961E+0; - v = 0.2081694278237885E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5541776207164850E+0; - v = 0.2084157631219326E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5990467321921213E+0; - v = 0.2084381531128593E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6191467096294587E+0; - v = 0.2083476277129307E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6375251212901849E+0; - v = 0.2082686194459732E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6540514381131168E+0; - v = 0.2082475686112415E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6685899064391510E+0; - v = 0.2083139860289915E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6810013009681648E+0; - v = 0.2084745561831237E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6911469578730340E+0; - v = 0.2087091313375890E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6988956915141736E+0; - v = 0.2089718413297697E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7041335794868720E+0; - v = 0.2092003303479793E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7067754398018567E+0; - v = 0.2093336148263241E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3840368707853623E-1; - v = 0.7591708117365267E-4; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.9835485954117399E-1; - v = 0.1083383968169186E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1665774947612998E+0; - v = 0.1403019395292510E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2405702335362910E+0; - v = 0.1615970179286436E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3165270770189046E+0; - v = 0.1771144187504911E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3927386145645443E+0; - v = 0.1887760022988168E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4678825918374656E+0; - v = 0.1973474670768214E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5408022024266935E+0; - v = 0.2033787661234659E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6104967445752438E+0; - v = 0.2072343626517331E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6760910702685738E+0; - v = 0.2091177834226918E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6655644120217392E-1; - b = 0.1936508874588424E-1; - v = 0.9316684484675566E-4; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.9446246161270182E-1; - b = 0.4252442002115869E-1; - v = 0.1116193688682976E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1242651925452509E+0; - b = 0.6806529315354374E-1; - v = 0.1298623551559414E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1553438064846751E+0; - b = 0.9560957491205369E-1; - v = 0.1450236832456426E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1871137110542670E+0; - b = 0.1245931657452888E+0; - v = 0.1572719958149914E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2192612628836257E+0; - b = 0.1545385828778978E+0; - v = 0.1673234785867195E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2515682807206955E+0; - b = 0.1851004249723368E+0; - v = 0.1756860118725188E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2838535866287290E+0; - b = 0.2160182608272384E+0; - v = 0.1826776290439367E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3159578817528521E+0; - b = 0.2470799012277111E+0; - v = 0.1885116347992865E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3477370882791392E+0; - b = 0.2781014208986402E+0; - v = 0.1933457860170574E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3790576960890540E+0; - b = 0.3089172523515731E+0; - v = 0.1973060671902064E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4097938317810200E+0; - b = 0.3393750055472244E+0; - v = 0.2004987099616311E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4398256572859637E+0; - b = 0.3693322470987730E+0; - v = 0.2030170909281499E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4690384114718480E+0; - b = 0.3986541005609877E+0; - v = 0.2049461460119080E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4973216048301053E+0; - b = 0.4272112491408562E+0; - v = 0.2063653565200186E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5245681526132446E+0; - b = 0.4548781735309936E+0; - v = 0.2073507927381027E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5506733911803888E+0; - b = 0.4815315355023251E+0; - v = 0.2079764593256122E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5755339829522475E+0; - b = 0.5070486445801855E+0; - v = 0.2083150534968778E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1305472386056362E+0; - b = 0.2284970375722366E-1; - v = 0.1262715121590664E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1637327908216477E+0; - b = 0.4812254338288384E-1; - v = 0.1414386128545972E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1972734634149637E+0; - b = 0.7531734457511935E-1; - v = 0.1538740401313898E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2308694653110130E+0; - b = 0.1039043639882017E+0; - v = 0.1642434942331432E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2643899218338160E+0; - b = 0.1334526587117626E+0; - v = 0.1729790609237496E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2977171599622171E+0; - b = 0.1636414868936382E+0; - v = 0.1803505190260828E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3307293903032310E+0; - b = 0.1942195406166568E+0; - v = 0.1865475350079657E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3633069198219073E+0; - b = 0.2249752879943753E+0; - v = 0.1917182669679069E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3953346955922727E+0; - b = 0.2557218821820032E+0; - v = 0.1959851709034382E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4267018394184914E+0; - b = 0.2862897925213193E+0; - v = 0.1994529548117882E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4573009622571704E+0; - b = 0.3165224536636518E+0; - v = 0.2022138911146548E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4870279559856109E+0; - b = 0.3462730221636496E+0; - v = 0.2043518024208592E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5157819581450322E+0; - b = 0.3754016870282835E+0; - v = 0.2059450313018110E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5434651666465393E+0; - b = 0.4037733784993613E+0; - v = 0.2070685715318472E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5699823887764627E+0; - b = 0.4312557784139123E+0; - v = 0.2077955310694373E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5952403350947741E+0; - b = 0.4577175367122110E+0; - v = 0.2081980387824712E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2025152599210369E+0; - b = 0.2520253617719557E-1; - v = 0.1521318610377956E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2381066653274425E+0; - b = 0.5223254506119000E-1; - v = 0.1622772720185755E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2732823383651612E+0; - b = 0.8060669688588620E-1; - v = 0.1710498139420709E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3080137692611118E+0; - b = 0.1099335754081255E+0; - v = 0.1785911149448736E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3422405614587601E+0; - b = 0.1399120955959857E+0; - v = 0.1850125313687736E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3758808773890420E+0; - b = 0.1702977801651705E+0; - v = 0.1904229703933298E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4088458383438932E+0; - b = 0.2008799256601680E+0; - v = 0.1949259956121987E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4410450550841152E+0; - b = 0.2314703052180836E+0; - v = 0.1986161545363960E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4723879420561312E+0; - b = 0.2618972111375892E+0; - v = 0.2015790585641370E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5027843561874343E+0; - b = 0.2920013195600270E+0; - v = 0.2038934198707418E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5321453674452458E+0; - b = 0.3216322555190551E+0; - v = 0.2056334060538251E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5603839113834030E+0; - b = 0.3506456615934198E+0; - v = 0.2068705959462289E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5874150706875146E+0; - b = 0.3789007181306267E+0; - v = 0.2076753906106002E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6131559381660038E+0; - b = 0.4062580170572782E+0; - v = 0.2081179391734803E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2778497016394506E+0; - b = 0.2696271276876226E-1; - v = 0.1700345216228943E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3143733562261912E+0; - b = 0.5523469316960465E-1; - v = 0.1774906779990410E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3501485810261827E+0; - b = 0.8445193201626464E-1; - v = 0.1839659377002642E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3851430322303653E+0; - b = 0.1143263119336083E+0; - v = 0.1894987462975169E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4193013979470415E+0; - b = 0.1446177898344475E+0; - v = 0.1941548809452595E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4525585960458567E+0; - b = 0.1751165438438091E+0; - v = 0.1980078427252384E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4848447779622947E+0; - b = 0.2056338306745660E+0; - v = 0.2011296284744488E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5160871208276894E+0; - b = 0.2359965487229226E+0; - v = 0.2035888456966776E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5462112185696926E+0; - b = 0.2660430223139146E+0; - v = 0.2054516325352142E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5751425068101757E+0; - b = 0.2956193664498032E+0; - v = 0.2067831033092635E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6028073872853596E+0; - b = 0.3245763905312779E+0; - v = 0.2076485320284876E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6291338275278409E+0; - b = 0.3527670026206972E+0; - v = 0.2081141439525255E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3541797528439391E+0; - b = 0.2823853479435550E-1; - v = 0.1834383015469222E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3908234972074657E+0; - b = 0.5741296374713106E-1; - v = 0.1889540591777677E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4264408450107590E+0; - b = 0.8724646633650199E-1; - v = 0.1936677023597375E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4609949666553286E+0; - b = 0.1175034422915616E+0; - v = 0.1976176495066504E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4944389496536006E+0; - b = 0.1479755652628428E+0; - v = 0.2008536004560983E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5267194884346086E+0; - b = 0.1784740659484352E+0; - v = 0.2034280351712291E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5577787810220990E+0; - b = 0.2088245700431244E+0; - v = 0.2053944466027758E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5875563763536670E+0; - b = 0.2388628136570763E+0; - v = 0.2068077642882360E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6159910016391269E+0; - b = 0.2684308928769185E+0; - v = 0.2077250949661599E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6430219602956268E+0; - b = 0.2973740761960252E+0; - v = 0.2082062440705320E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4300647036213646E+0; - b = 0.2916399920493977E-1; - v = 0.1934374486546626E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4661486308935531E+0; - b = 0.5898803024755659E-1; - v = 0.1974107010484300E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5009658555287261E+0; - b = 0.8924162698525409E-1; - v = 0.2007129290388658E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5344824270447704E+0; - b = 0.1197185199637321E+0; - v = 0.2033736947471293E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5666575997416371E+0; - b = 0.1502300756161382E+0; - v = 0.2054287125902493E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5974457471404752E+0; - b = 0.1806004191913564E+0; - v = 0.2069184936818894E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6267984444116886E+0; - b = 0.2106621764786252E+0; - v = 0.2078883689808782E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6546664713575417E+0; - b = 0.2402526932671914E+0; - v = 0.2083886366116359E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5042711004437253E+0; - b = 0.2982529203607657E-1; - v = 0.2006593275470817E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5392127456774380E+0; - b = 0.6008728062339922E-1; - v = 0.2033728426135397E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5726819437668618E+0; - b = 0.9058227674571398E-1; - v = 0.2055008781377608E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6046469254207278E+0; - b = 0.1211219235803400E+0; - v = 0.2070651783518502E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6350716157434952E+0; - b = 0.1515286404791580E+0; - v = 0.2080953335094320E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6639177679185454E+0; - b = 0.1816314681255552E+0; - v = 0.2086284998988521E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5757276040972253E+0; - b = 0.3026991752575440E-1; - v = 0.2055549387644668E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6090265823139755E+0; - b = 0.6078402297870770E-1; - v = 0.2071871850267654E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6406735344387661E+0; - b = 0.9135459984176636E-1; - v = 0.2082856600431965E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6706397927793709E+0; - b = 0.1218024155966590E+0; - v = 0.2088705858819358E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6435019674426665E+0; - b = 0.3052608357660639E-1; - v = 0.2083995867536322E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6747218676375681E+0; - b = 0.6112185773983089E-1; - v = 0.2090509712889637E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - - case 5810: - - v = 0.9735347946175486E-5; - start = getLebedevReccurencePoints(1, start, a, b, v); - v = 0.1907581241803167E-3; - start = getLebedevReccurencePoints(2, start, a, b, v); - v = 0.1901059546737578E-3; - start = getLebedevReccurencePoints(3, start, a, b, v); - a = 0.1182361662400277E-1; - v = 0.3926424538919212E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3062145009138958E-1; - v = 0.6667905467294382E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5329794036834243E-1; - v = 0.8868891315019135E-4; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7848165532862220E-1; - v = 0.1066306000958872E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1054038157636201E+0; - v = 0.1214506743336128E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1335577797766211E+0; - v = 0.1338054681640871E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1625769955502252E+0; - v = 0.1441677023628504E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.1921787193412792E+0; - v = 0.1528880200826557E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2221340534690548E+0; - v = 0.1602330623773609E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2522504912791132E+0; - v = 0.1664102653445244E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.2823610860679697E+0; - v = 0.1715845854011323E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3123173966267560E+0; - v = 0.1758901000133069E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3419847036953789E+0; - v = 0.1794382485256736E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3712386456999758E+0; - v = 0.1823238106757407E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3999627649876828E+0; - v = 0.1846293252959976E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4280466458648093E+0; - v = 0.1864284079323098E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4553844360185711E+0; - v = 0.1877882694626914E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.4818736094437834E+0; - v = 0.1887716321852025E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5074138709260629E+0; - v = 0.1894381638175673E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5319061304570707E+0; - v = 0.1898454899533629E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5552514978677286E+0; - v = 0.1900497929577815E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.5981009025246183E+0; - v = 0.1900671501924092E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6173990192228116E+0; - v = 0.1899837555533510E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6351365239411131E+0; - v = 0.1899014113156229E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6512010228227200E+0; - v = 0.1898581257705106E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6654758363948120E+0; - v = 0.1898804756095753E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6778410414853370E+0; - v = 0.1899793610426402E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6881760887484110E+0; - v = 0.1901464554844117E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.6963645267094598E+0; - v = 0.1903533246259542E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7023010617153579E+0; - v = 0.1905556158463228E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.7059004636628753E+0; - v = 0.1907037155663528E-3; - start = getLebedevReccurencePoints(4, start, a, b, v); - a = 0.3552470312472575E-1; - v = 0.5992997844249967E-4; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.9151176620841283E-1; - v = 0.9749059382456978E-4; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.1566197930068980E+0; - v = 0.1241680804599158E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2265467599271907E+0; - v = 0.1437626154299360E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.2988242318581361E+0; - v = 0.1584200054793902E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.3717482419703886E+0; - v = 0.1694436550982744E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.4440094491758889E+0; - v = 0.1776617014018108E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5145337096756642E+0; - v = 0.1836132434440077E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.5824053672860230E+0; - v = 0.1876494727075983E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6468283961043370E+0; - v = 0.1899906535336482E-3; - start = getLebedevReccurencePoints(5, start, a, b, v); - a = 0.6095964259104373E-1; - b = 0.1787828275342931E-1; - v = 0.8143252820767350E-4; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.8811962270959388E-1; - b = 0.3953888740792096E-1; - v = 0.9998859890887728E-4; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1165936722428831E+0; - b = 0.6378121797722990E-1; - v = 0.1156199403068359E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1460232857031785E+0; - b = 0.8985890813745037E-1; - v = 0.1287632092635513E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1761197110181755E+0; - b = 0.1172606510576162E+0; - v = 0.1398378643365139E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2066471190463718E+0; - b = 0.1456102876970995E+0; - v = 0.1491876468417391E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2374076026328152E+0; - b = 0.1746153823011775E+0; - v = 0.1570855679175456E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2682305474337051E+0; - b = 0.2040383070295584E+0; - v = 0.1637483948103775E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2989653312142369E+0; - b = 0.2336788634003698E+0; - v = 0.1693500566632843E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3294762752772209E+0; - b = 0.2633632752654219E+0; - v = 0.1740322769393633E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3596390887276086E+0; - b = 0.2929369098051601E+0; - v = 0.1779126637278296E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3893383046398812E+0; - b = 0.3222592785275512E+0; - v = 0.1810908108835412E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4184653789358347E+0; - b = 0.3512004791195743E+0; - v = 0.1836529132600190E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4469172319076166E+0; - b = 0.3796385677684537E+0; - v = 0.1856752841777379E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4745950813276976E+0; - b = 0.4074575378263879E+0; - v = 0.1872270566606832E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5014034601410262E+0; - b = 0.4345456906027828E+0; - v = 0.1883722645591307E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5272493404551239E+0; - b = 0.4607942515205134E+0; - v = 0.1891714324525297E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5520413051846366E+0; - b = 0.4860961284181720E+0; - v = 0.1896827480450146E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5756887237503077E+0; - b = 0.5103447395342790E+0; - v = 0.1899628417059528E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1225039430588352E+0; - b = 0.2136455922655793E-1; - v = 0.1123301829001669E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1539113217321372E+0; - b = 0.4520926166137188E-1; - v = 0.1253698826711277E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1856213098637712E+0; - b = 0.7086468177864818E-1; - v = 0.1366266117678531E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2174998728035131E+0; - b = 0.9785239488772918E-1; - v = 0.1462736856106918E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2494128336938330E+0; - b = 0.1258106396267210E+0; - v = 0.1545076466685412E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2812321562143480E+0; - b = 0.1544529125047001E+0; - v = 0.1615096280814007E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3128372276456111E+0; - b = 0.1835433512202753E+0; - v = 0.1674366639741759E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3441145160177973E+0; - b = 0.2128813258619585E+0; - v = 0.1724225002437900E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3749567714853510E+0; - b = 0.2422913734880829E+0; - v = 0.1765810822987288E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4052621732015610E+0; - b = 0.2716163748391453E+0; - v = 0.1800104126010751E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4349335453522385E+0; - b = 0.3007127671240280E+0; - v = 0.1827960437331284E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4638776641524965E+0; - b = 0.3294470677216479E+0; - v = 0.1850140300716308E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4920046410462687E+0; - b = 0.3576932543699155E+0; - v = 0.1867333507394938E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5192273554861704E+0; - b = 0.3853307059757764E+0; - v = 0.1880178688638289E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5454609081136522E+0; - b = 0.4122425044452694E+0; - v = 0.1889278925654758E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5706220661424140E+0; - b = 0.4383139587781027E+0; - v = 0.1895213832507346E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5946286755181518E+0; - b = 0.4634312536300553E+0; - v = 0.1898548277397420E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.1905370790924295E+0; - b = 0.2371311537781979E-1; - v = 0.1349105935937341E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2242518717748009E+0; - b = 0.4917878059254806E-1; - v = 0.1444060068369326E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2577190808025936E+0; - b = 0.7595498960495142E-1; - v = 0.1526797390930008E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2908724534927187E+0; - b = 0.1036991083191100E+0; - v = 0.1598208771406474E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3236354020056219E+0; - b = 0.1321348584450234E+0; - v = 0.1659354368615331E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3559267359304543E+0; - b = 0.1610316571314789E+0; - v = 0.1711279910946440E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3876637123676956E+0; - b = 0.1901912080395707E+0; - v = 0.1754952725601440E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4187636705218842E+0; - b = 0.2194384950137950E+0; - v = 0.1791247850802529E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4491449019883107E+0; - b = 0.2486155334763858E+0; - v = 0.1820954300877716E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4787270932425445E+0; - b = 0.2775768931812335E+0; - v = 0.1844788524548449E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5074315153055574E+0; - b = 0.3061863786591120E+0; - v = 0.1863409481706220E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5351810507738336E+0; - b = 0.3343144718152556E+0; - v = 0.1877433008795068E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5619001025975381E+0; - b = 0.3618362729028427E+0; - v = 0.1887444543705232E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5875144035268046E+0; - b = 0.3886297583620408E+0; - v = 0.1894009829375006E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6119507308734495E+0; - b = 0.4145742277792031E+0; - v = 0.1897683345035198E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2619733870119463E+0; - b = 0.2540047186389353E-1; - v = 0.1517327037467653E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.2968149743237949E+0; - b = 0.5208107018543989E-1; - v = 0.1587740557483543E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3310451504860488E+0; - b = 0.7971828470885599E-1; - v = 0.1649093382274097E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3646215567376676E+0; - b = 0.1080465999177927E+0; - v = 0.1701915216193265E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3974916785279360E+0; - b = 0.1368413849366629E+0; - v = 0.1746847753144065E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4295967403772029E+0; - b = 0.1659073184763559E+0; - v = 0.1784555512007570E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4608742854473447E+0; - b = 0.1950703730454614E+0; - v = 0.1815687562112174E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4912598858949903E+0; - b = 0.2241721144376724E+0; - v = 0.1840864370663302E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5206882758945558E+0; - b = 0.2530655255406489E+0; - v = 0.1860676785390006E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5490940914019819E+0; - b = 0.2816118409731066E+0; - v = 0.1875690583743703E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5764123302025542E+0; - b = 0.3096780504593238E+0; - v = 0.1886453236347225E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6025786004213506E+0; - b = 0.3371348366394987E+0; - v = 0.1893501123329645E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6275291964794956E+0; - b = 0.3638547827694396E+0; - v = 0.1897366184519868E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3348189479861771E+0; - b = 0.2664841935537443E-1; - v = 0.1643908815152736E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.3699515545855295E+0; - b = 0.5424000066843495E-1; - v = 0.1696300350907768E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4042003071474669E+0; - b = 0.8251992715430854E-1; - v = 0.1741553103844483E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4375320100182624E+0; - b = 0.1112695182483710E+0; - v = 0.1780015282386092E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4699054490335947E+0; - b = 0.1402964116467816E+0; - v = 0.1812116787077125E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5012739879431952E+0; - b = 0.1694275117584291E+0; - v = 0.1838323158085421E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5315874883754966E+0; - b = 0.1985038235312689E+0; - v = 0.1859113119837737E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5607937109622117E+0; - b = 0.2273765660020893E+0; - v = 0.1874969220221698E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5888393223495521E+0; - b = 0.2559041492849764E+0; - v = 0.1886375612681076E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6156705979160163E+0; - b = 0.2839497251976899E+0; - v = 0.1893819575809276E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6412338809078123E+0; - b = 0.3113791060500690E+0; - v = 0.1897794748256767E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4076051259257167E+0; - b = 0.2757792290858463E-1; - v = 0.1738963926584846E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4423788125791520E+0; - b = 0.5584136834984293E-1; - v = 0.1777442359873466E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4760480917328258E+0; - b = 0.8457772087727143E-1; - v = 0.1810010815068719E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5085838725946297E+0; - b = 0.1135975846359248E+0; - v = 0.1836920318248129E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5399513637391218E+0; - b = 0.1427286904765053E+0; - v = 0.1858489473214328E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5701118433636380E+0; - b = 0.1718112740057635E+0; - v = 0.1875079342496592E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5990240530606021E+0; - b = 0.2006944855985351E+0; - v = 0.1887080239102310E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6266452685139695E+0; - b = 0.2292335090598907E+0; - v = 0.1894905752176822E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6529320971415942E+0; - b = 0.2572871512353714E+0; - v = 0.1898991061200695E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.4791583834610126E+0; - b = 0.2826094197735932E-1; - v = 0.1809065016458791E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5130373952796940E+0; - b = 0.5699871359683649E-1; - v = 0.1836297121596799E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5456252429628476E+0; - b = 0.8602712528554394E-1; - v = 0.1858426916241869E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5768956329682385E+0; - b = 0.1151748137221281E+0; - v = 0.1875654101134641E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6068186944699046E+0; - b = 0.1442811654136362E+0; - v = 0.1888240751833503E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6353622248024907E+0; - b = 0.1731930321657680E+0; - v = 0.1896497383866979E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6624927035731797E+0; - b = 0.2017619958756061E+0; - v = 0.1900775530219121E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5484933508028488E+0; - b = 0.2874219755907391E-1; - v = 0.1858525041478814E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.5810207682142106E+0; - b = 0.5778312123713695E-1; - v = 0.1876248690077947E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6120955197181352E+0; - b = 0.8695262371439526E-1; - v = 0.1889404439064607E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6416944284294319E+0; - b = 0.1160893767057166E+0; - v = 0.1898168539265290E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6697926391731260E+0; - b = 0.1450378826743251E+0; - v = 0.1902779940661772E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6147594390585488E+0; - b = 0.2904957622341456E-1; - v = 0.1890125641731815E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6455390026356783E+0; - b = 0.5823809152617197E-1; - v = 0.1899434637795751E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6747258588365477E+0; - b = 0.8740384899884715E-1; - v = 0.1904520856831751E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - a = 0.6772135750395347E+0; - b = 0.2919946135808105E-1; - v = 0.1905534498734563E-3; - start = getLebedevReccurencePoints(6, start, a, b, v); - - break; - } + { + case 6: + + v = 0.1666666666666667E+0; + start = getLebedevReccurencePoints (1, start, a, b, v); + + break; + + case 14: + + v = 0.6666666666666667E-1; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.7500000000000000E-1; + start = getLebedevReccurencePoints (3, start, a, b, v); + + break; + + case 26: + + v = 0.4761904761904762E-1; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.3809523809523810E-1; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.3214285714285714E-1; + start = getLebedevReccurencePoints (3, start, a, b, v); + + break; + + case 38: + + v = 0.9523809523809524E-2; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.3214285714285714E-1; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.4597008433809831E+0; + v = 0.2857142857142857E-1; + start = getLebedevReccurencePoints (5, start, a, b, v); + + break; + + case 50: + + v = 0.1269841269841270E-1; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.2257495590828924E-1; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.2109375000000000E-1; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.3015113445777636E+0; + v = 0.2017333553791887E-1; + start = getLebedevReccurencePoints (4, start, a, b, v); + + break; + + case 74: + + v = 0.5130671797338464E-3; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.1660406956574204E-1; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = -0.2958603896103896E-1; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.4803844614152614E+0; + v = 0.2657620708215946E-1; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3207726489807764E+0; + v = 0.1652217099371571E-1; + start = getLebedevReccurencePoints (5, start, a, b, v); + + break; + + case 86: + + v = 0.1154401154401154E-1; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.1194390908585628E-1; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.3696028464541502E+0; + v = 0.1111055571060340E-1; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6943540066026664E+0; + v = 0.1187650129453714E-1; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3742430390903412E+0; + v = 0.1181230374690448E-1; + start = getLebedevReccurencePoints (5, start, a, b, v); + + break; + + case 110: + + v = 0.3828270494937162E-2; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.9793737512487512E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.1851156353447362E+0; + v = 0.8211737283191111E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6904210483822922E+0; + v = 0.9942814891178103E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3956894730559419E+0; + v = 0.9595471336070963E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4783690288121502E+0; + v = 0.9694996361663028E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + + break; + + case 146: + + v = 0.5996313688621381E-3; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.7372999718620756E-2; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.7210515360144488E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.6764410400114264E+0; + v = 0.7116355493117555E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4174961227965453E+0; + v = 0.6753829486314477E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1574676672039082E+0; + v = 0.7574394159054034E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1403553811713183E+0; + b = 0.4493328323269557E+0; + v = 0.6991087353303262E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 170: + + v = 0.5544842902037365E-2; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.6071332770670752E-2; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.6383674773515093E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.2551252621114134E+0; + v = 0.5183387587747790E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6743601460362766E+0; + v = 0.6317929009813725E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4318910696719410E+0; + v = 0.6201670006589077E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2613931360335988E+0; + v = 0.5477143385137348E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4990453161796037E+0; + b = 0.1446630744325115E+0; + v = 0.5968383987681156E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 194: + + v = 0.1782340447244611E-2; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.5716905949977102E-2; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.5573383178848738E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.6712973442695226E+0; + v = 0.5608704082587997E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2892465627575439E+0; + v = 0.5158237711805383E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4446933178717437E+0; + v = 0.5518771467273614E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1299335447650067E+0; + v = 0.4106777028169394E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3457702197611283E+0; + v = 0.5051846064614808E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1590417105383530E+0; + b = 0.8360360154824589E+0; + v = 0.5530248916233094E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 230: + + v = -0.5522639919727325E-1; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.4450274607445226E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.4492044687397611E+0; + v = 0.4496841067921404E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2520419490210201E+0; + v = 0.5049153450478750E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6981906658447242E+0; + v = 0.3976408018051883E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6587405243460960E+0; + v = 0.4401400650381014E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4038544050097660E-1; + v = 0.1724544350544401E-1; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5823842309715585E+0; + v = 0.4231083095357343E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3545877390518688E+0; + v = 0.5198069864064399E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2272181808998187E+0; + b = 0.4864661535886647E+0; + v = 0.4695720972568883E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 266: + + v = -0.1313769127326952E-2; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = -0.2522728704859336E-2; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.4186853881700583E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.7039373391585475E+0; + v = 0.5315167977810885E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1012526248572414E+0; + v = 0.4047142377086219E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4647448726420539E+0; + v = 0.4112482394406990E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3277420654971629E+0; + v = 0.3595584899758782E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6620338663699974E+0; + v = 0.4256131351428158E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.8506508083520399E+0; + v = 0.4229582700647240E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3233484542692899E+0; + b = 0.1153112011009701E+0; + v = 0.4080914225780505E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2314790158712601E+0; + b = 0.5244939240922365E+0; + v = 0.4071467593830964E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 302: + + v = 0.8545911725128148E-3; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.3599119285025571E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.3515640345570105E+0; + v = 0.3449788424305883E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6566329410219612E+0; + v = 0.3604822601419882E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4729054132581005E+0; + v = 0.3576729661743367E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.9618308522614784E-1; + v = 0.2352101413689164E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2219645236294178E+0; + v = 0.3108953122413675E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7011766416089545E+0; + v = 0.3650045807677255E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2644152887060663E+0; + v = 0.2982344963171804E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5718955891878961E+0; + v = 0.3600820932216460E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2510034751770465E+0; + b = 0.8000727494073952E+0; + v = 0.3571540554273387E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1233548532583327E+0; + b = 0.4127724083168531E+0; + v = 0.3392312205006170E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 350: + + v = 0.3006796749453936E-2; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.3050627745650771E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.7068965463912316E+0; + v = 0.1621104600288991E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4794682625712025E+0; + v = 0.3005701484901752E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1927533154878019E+0; + v = 0.2990992529653774E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6930357961327123E+0; + v = 0.2982170644107595E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3608302115520091E+0; + v = 0.2721564237310992E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6498486161496169E+0; + v = 0.3033513795811141E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1932945013230339E+0; + v = 0.3007949555218533E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3800494919899303E+0; + v = 0.2881964603055307E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2899558825499574E+0; + b = 0.7934537856582316E+0; + v = 0.2958357626535696E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.9684121455103957E-1; + b = 0.8280801506686862E+0; + v = 0.3036020026407088E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1833434647041659E+0; + b = 0.9074658265305127E+0; + v = 0.2832187403926303E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 434: + + v = 0.5265897968224436E-3; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.2548219972002607E-2; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.2512317418927307E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.6909346307509111E+0; + v = 0.2530403801186355E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1774836054609158E+0; + v = 0.2014279020918528E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4914342637784746E+0; + v = 0.2501725168402936E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6456664707424256E+0; + v = 0.2513267174597564E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2861289010307638E+0; + v = 0.2302694782227416E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7568084367178018E-1; + v = 0.1462495621594614E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3927259763368002E+0; + v = 0.2445373437312980E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.8818132877794288E+0; + v = 0.2417442375638981E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.9776428111182649E+0; + v = 0.1910951282179532E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2054823696403044E+0; + b = 0.8689460322872412E+0; + v = 0.2416930044324775E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5905157048925271E+0; + b = 0.7999278543857286E+0; + v = 0.2512236854563495E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5550152361076807E+0; + b = 0.7717462626915901E+0; + v = 0.2496644054553086E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.9371809858553722E+0; + b = 0.3344363145343455E+0; + v = 0.2236607760437849E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 590: + + v = 0.3095121295306187E-3; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.1852379698597489E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.7040954938227469E+0; + v = 0.1871790639277744E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6807744066455243E+0; + v = 0.1858812585438317E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6372546939258752E+0; + v = 0.1852028828296213E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5044419707800358E+0; + v = 0.1846715956151242E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4215761784010967E+0; + v = 0.1818471778162769E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3317920736472123E+0; + v = 0.1749564657281154E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2384736701421887E+0; + v = 0.1617210647254411E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1459036449157763E+0; + v = 0.1384737234851692E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6095034115507196E-1; + v = 0.9764331165051050E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6116843442009876E+0; + v = 0.1857161196774078E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3964755348199858E+0; + v = 0.1705153996395864E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1724782009907724E+0; + v = 0.1300321685886048E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5610263808622060E+0; + b = 0.3518280927733519E+0; + v = 0.1842866472905286E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4742392842551980E+0; + b = 0.2634716655937950E+0; + v = 0.1802658934377451E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5984126497885380E+0; + b = 0.1816640840360209E+0; + v = 0.1849830560443660E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3791035407695563E+0; + b = 0.1720795225656878E+0; + v = 0.1713904507106709E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2778673190586244E+0; + b = 0.8213021581932511E-1; + v = 0.1555213603396808E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5033564271075117E+0; + b = 0.8999205842074875E-1; + v = 0.1802239128008525E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 770: + + v = 0.2192942088181184E-3; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.1436433617319080E-2; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.1421940344335877E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.5087204410502360E-1; + v = 0.6798123511050502E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1228198790178831E+0; + v = 0.9913184235294912E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2026890814408786E+0; + v = 0.1180207833238949E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2847745156464294E+0; + v = 0.1296599602080921E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3656719078978026E+0; + v = 0.1365871427428316E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4428264886713469E+0; + v = 0.1402988604775325E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5140619627249735E+0; + v = 0.1418645563595609E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6306401219166803E+0; + v = 0.1421376741851662E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6716883332022612E+0; + v = 0.1423996475490962E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6979792685336881E+0; + v = 0.1431554042178567E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1446865674195309E+0; + v = 0.9254401499865368E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3390263475411216E+0; + v = 0.1250239995053509E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5335804651263506E+0; + v = 0.1394365843329230E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6944024393349413E-1; + b = 0.2355187894242326E+0; + v = 0.1127089094671749E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2269004109529460E+0; + b = 0.4102182474045730E+0; + v = 0.1345753760910670E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.8025574607775339E-1; + b = 0.6214302417481605E+0; + v = 0.1424957283316783E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1467999527896572E+0; + b = 0.3245284345717394E+0; + v = 0.1261523341237750E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1571507769824727E+0; + b = 0.5224482189696630E+0; + v = 0.1392547106052696E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2365702993157246E+0; + b = 0.6017546634089558E+0; + v = 0.1418761677877656E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.7714815866765732E-1; + b = 0.4346575516141163E+0; + v = 0.1338366684479554E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3062936666210730E+0; + b = 0.4908826589037616E+0; + v = 0.1393700862676131E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3822477379524787E+0; + b = 0.5648768149099500E+0; + v = 0.1415914757466932E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 974: + + v = 0.1438294190527431E-3; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.1125772288287004E-2; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.4292963545341347E-1; + v = 0.4948029341949241E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1051426854086404E+0; + v = 0.7357990109125470E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1750024867623087E+0; + v = 0.8889132771304384E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2477653379650257E+0; + v = 0.9888347838921435E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3206567123955957E+0; + v = 0.1053299681709471E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3916520749849983E+0; + v = 0.1092778807014578E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4590825874187624E+0; + v = 0.1114389394063227E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5214563888415861E+0; + v = 0.1123724788051555E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6253170244654199E+0; + v = 0.1125239325243814E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6637926744523170E+0; + v = 0.1126153271815905E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6910410398498301E+0; + v = 0.1130286931123841E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7052907007457760E+0; + v = 0.1134986534363955E-2; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1236686762657990E+0; + v = 0.6823367927109931E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2940777114468387E+0; + v = 0.9454158160447096E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4697753849207649E+0; + v = 0.1074429975385679E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6334563241139567E+0; + v = 0.1129300086569132E-2; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5974048614181342E-1; + b = 0.2029128752777523E+0; + v = 0.8436884500901954E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1375760408473636E+0; + b = 0.4602621942484054E+0; + v = 0.1075255720448885E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3391016526336286E+0; + b = 0.5030673999662036E+0; + v = 0.1108577236864462E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1271675191439820E+0; + b = 0.2817606422442134E+0; + v = 0.9566475323783357E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2693120740413512E+0; + b = 0.4331561291720157E+0; + v = 0.1080663250717391E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1419786452601918E+0; + b = 0.6256167358580814E+0; + v = 0.1126797131196295E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6709284600738255E-1; + b = 0.3798395216859157E+0; + v = 0.1022568715358061E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.7057738183256172E-1; + b = 0.5517505421423520E+0; + v = 0.1108960267713108E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2783888477882155E+0; + b = 0.6029619156159187E+0; + v = 0.1122790653435766E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1979578938917407E+0; + b = 0.3589606329589096E+0; + v = 0.1032401847117460E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2087307061103274E+0; + b = 0.5348666438135476E+0; + v = 0.1107249382283854E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4055122137872836E+0; + b = 0.5674997546074373E+0; + v = 0.1121780048519972E-2; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 1202: + + v = 0.1105189233267572E-3; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.9205232738090741E-3; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.9133159786443561E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.3712636449657089E-1; + v = 0.3690421898017899E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.9140060412262223E-1; + v = 0.5603990928680660E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1531077852469906E+0; + v = 0.6865297629282609E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2180928891660612E+0; + v = 0.7720338551145630E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2839874532200175E+0; + v = 0.8301545958894795E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3491177600963764E+0; + v = 0.8686692550179628E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4121431461444309E+0; + v = 0.8927076285846890E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4718993627149127E+0; + v = 0.9060820238568219E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5273145452842337E+0; + v = 0.9119777254940867E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6209475332444019E+0; + v = 0.9128720138604181E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6569722711857291E+0; + v = 0.9130714935691735E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6841788309070143E+0; + v = 0.9152873784554116E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7012604330123631E+0; + v = 0.9187436274321654E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1072382215478166E+0; + v = 0.5176977312965694E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2582068959496968E+0; + v = 0.7331143682101417E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4172752955306717E+0; + v = 0.8463232836379928E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5700366911792503E+0; + v = 0.9031122694253992E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.9827986018263947E+0; + b = 0.1771774022615325E+0; + v = 0.6485778453163257E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.9624249230326228E+0; + b = 0.2475716463426288E+0; + v = 0.7435030910982369E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.9402007994128811E+0; + b = 0.3354616289066489E+0; + v = 0.7998527891839054E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.9320822040143202E+0; + b = 0.3173615246611977E+0; + v = 0.8101731497468018E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.9043674199393299E+0; + b = 0.4090268427085357E+0; + v = 0.8483389574594331E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.8912407560074747E+0; + b = 0.3854291150669224E+0; + v = 0.8556299257311812E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.8676435628462708E+0; + b = 0.4932221184851285E+0; + v = 0.8803208679738260E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.8581979986041619E+0; + b = 0.4785320675922435E+0; + v = 0.8811048182425720E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.8396753624049856E+0; + b = 0.4507422593157064E+0; + v = 0.8850282341265444E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.8165288564022188E+0; + b = 0.5632123020762100E+0; + v = 0.9021342299040653E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.8015469370783529E+0; + b = 0.5434303569693900E+0; + v = 0.9010091677105086E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.7773563069070351E+0; + b = 0.5123518486419871E+0; + v = 0.9022692938426915E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.7661621213900394E+0; + b = 0.6394279634749102E+0; + v = 0.9158016174693465E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.7553584143533510E+0; + b = 0.6269805509024392E+0; + v = 0.9131578003189435E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.7344305757559503E+0; + b = 0.6031161693096310E+0; + v = 0.9107813579482705E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.7043837184021765E+0; + b = 0.5693702498468441E+0; + v = 0.9105760258970126E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 1454: + + v = 0.7777160743261247E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.7557646413004701E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.3229290663413854E-1; + v = 0.2841633806090617E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.8036733271462222E-1; + v = 0.4374419127053555E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1354289960531653E+0; + v = 0.5417174740872172E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1938963861114426E+0; + v = 0.6148000891358593E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2537343715011275E+0; + v = 0.6664394485800705E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3135251434752570E+0; + v = 0.7025039356923220E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3721558339375338E+0; + v = 0.7268511789249627E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4286809575195696E+0; + v = 0.7422637534208629E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4822510128282994E+0; + v = 0.7509545035841214E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5320679333566263E+0; + v = 0.7548535057718401E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6172998195394274E+0; + v = 0.7554088969774001E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6510679849127481E+0; + v = 0.7553147174442808E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6777315251687360E+0; + v = 0.7564767653292297E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6963109410648741E+0; + v = 0.7587991808518730E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7058935009831749E+0; + v = 0.7608261832033027E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.9955546194091857E+0; + v = 0.4021680447874916E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.9734115901794209E+0; + v = 0.5804871793945964E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.9275693732388626E+0; + v = 0.6792151955945159E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.8568022422795103E+0; + v = 0.7336741211286294E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.7623495553719372E+0; + v = 0.7581866300989608E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5707522908892223E+0; + b = 0.4387028039889501E+0; + v = 0.7538257859800743E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5196463388403083E+0; + b = 0.3858908414762617E+0; + v = 0.7483517247053123E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4646337531215351E+0; + b = 0.3301937372343854E+0; + v = 0.7371763661112059E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4063901697557691E+0; + b = 0.2725423573563777E+0; + v = 0.7183448895756934E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3456329466643087E+0; + b = 0.2139510237495250E+0; + v = 0.6895815529822191E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2831395121050332E+0; + b = 0.1555922309786647E+0; + v = 0.6480105801792886E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2197682022925330E+0; + b = 0.9892878979686097E-1; + v = 0.5897558896594636E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1564696098650355E+0; + b = 0.4598642910675510E-1; + v = 0.5095708849247346E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6027356673721295E+0; + b = 0.3376625140173426E+0; + v = 0.7536906428909755E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5496032320255096E+0; + b = 0.2822301309727988E+0; + v = 0.7472505965575118E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4921707755234567E+0; + b = 0.2248632342592540E+0; + v = 0.7343017132279698E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4309422998598483E+0; + b = 0.1666224723456479E+0; + v = 0.7130871582177445E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3664108182313672E+0; + b = 0.1086964901822169E+0; + v = 0.6817022032112776E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2990189057758436E+0; + b = 0.5251989784120085E-1; + v = 0.6380941145604121E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6268724013144998E+0; + b = 0.2297523657550023E+0; + v = 0.7550381377920310E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5707324144834607E+0; + b = 0.1723080607093800E+0; + v = 0.7478646640144802E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5096360901960365E+0; + b = 0.1140238465390513E+0; + v = 0.7335918720601220E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4438729938312456E+0; + b = 0.5611522095882537E-1; + v = 0.7110120527658118E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6419978471082389E+0; + b = 0.1164174423140873E+0; + v = 0.7571363978689501E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5817218061802611E+0; + b = 0.5797589531445219E-1; + v = 0.7489908329079234E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 1730: + + v = 0.6309049437420976E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.6398287705571748E-3; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.6357185073530720E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.2860923126194662E-1; + v = 0.2221207162188168E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7142556767711522E-1; + v = 0.3475784022286848E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1209199540995559E+0; + v = 0.4350742443589804E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1738673106594379E+0; + v = 0.4978569136522127E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2284645438467734E+0; + v = 0.5435036221998053E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2834807671701512E+0; + v = 0.5765913388219542E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3379680145467339E+0; + v = 0.6001200359226003E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3911355454819537E+0; + v = 0.6162178172717512E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4422860353001403E+0; + v = 0.6265218152438485E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4907781568726057E+0; + v = 0.6323987160974212E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5360006153211468E+0; + v = 0.6350767851540569E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6142105973596603E+0; + v = 0.6354362775297107E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6459300387977504E+0; + v = 0.6352302462706235E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6718056125089225E+0; + v = 0.6358117881417972E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6910888533186254E+0; + v = 0.6373101590310117E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7030467416823252E+0; + v = 0.6390428961368665E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.8354951166354646E-1; + v = 0.3186913449946576E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2050143009099486E+0; + v = 0.4678028558591711E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3370208290706637E+0; + v = 0.5538829697598626E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4689051484233963E+0; + v = 0.6044475907190476E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5939400424557334E+0; + v = 0.6313575103509012E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1394983311832261E+0; + b = 0.4097581162050343E-1; + v = 0.4078626431855630E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1967999180485014E+0; + b = 0.8851987391293348E-1; + v = 0.4759933057812725E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2546183732548967E+0; + b = 0.1397680182969819E+0; + v = 0.5268151186413440E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3121281074713875E+0; + b = 0.1929452542226526E+0; + v = 0.5643048560507316E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3685981078502492E+0; + b = 0.2467898337061562E+0; + v = 0.5914501076613073E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4233760321547856E+0; + b = 0.3003104124785409E+0; + v = 0.6104561257874195E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4758671236059246E+0; + b = 0.3526684328175033E+0; + v = 0.6230252860707806E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5255178579796463E+0; + b = 0.4031134861145713E+0; + v = 0.6305618761760796E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5718025633734589E+0; + b = 0.4509426448342351E+0; + v = 0.6343092767597889E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2686927772723415E+0; + b = 0.4711322502423248E-1; + v = 0.5176268945737826E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3306006819904809E+0; + b = 0.9784487303942695E-1; + v = 0.5564840313313692E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3904906850594983E+0; + b = 0.1505395810025273E+0; + v = 0.5856426671038980E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4479957951904390E+0; + b = 0.2039728156296050E+0; + v = 0.6066386925777091E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5027076848919780E+0; + b = 0.2571529941121107E+0; + v = 0.6208824962234458E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5542087392260217E+0; + b = 0.3092191375815670E+0; + v = 0.6296314297822907E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6020850887375187E+0; + b = 0.3593807506130276E+0; + v = 0.6340423756791859E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4019851409179594E+0; + b = 0.5063389934378671E-1; + v = 0.5829627677107342E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4635614567449800E+0; + b = 0.1032422269160612E+0; + v = 0.6048693376081110E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5215860931591575E+0; + b = 0.1566322094006254E+0; + v = 0.6202362317732461E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5758202499099271E+0; + b = 0.2098082827491099E+0; + v = 0.6299005328403779E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6259893683876795E+0; + b = 0.2618824114553391E+0; + v = 0.6347722390609353E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5313795124811891E+0; + b = 0.5263245019338556E-1; + v = 0.6203778981238834E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5893317955931995E+0; + b = 0.1061059730982005E+0; + v = 0.6308414671239979E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6426246321215801E+0; + b = 0.1594171564034221E+0; + v = 0.6362706466959498E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6511904367376113E+0; + b = 0.5354789536565540E-1; + v = 0.6375414170333233E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 2030: + + v = 0.4656031899197431E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.5421549195295507E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.2540835336814348E-1; + v = 0.1778522133346553E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6399322800504915E-1; + v = 0.2811325405682796E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1088269469804125E+0; + v = 0.3548896312631459E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1570670798818287E+0; + v = 0.4090310897173364E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2071163932282514E+0; + v = 0.4493286134169965E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2578914044450844E+0; + v = 0.4793728447962723E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3085687558169623E+0; + v = 0.5015415319164265E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3584719706267024E+0; + v = 0.5175127372677937E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4070135594428709E+0; + v = 0.5285522262081019E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4536618626222638E+0; + v = 0.5356832703713962E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4979195686463577E+0; + v = 0.5397914736175170E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5393075111126999E+0; + v = 0.5416899441599930E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6115617676843916E+0; + v = 0.5419308476889938E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6414308435160159E+0; + v = 0.5416936902030596E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6664099412721607E+0; + v = 0.5419544338703164E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6859161771214913E+0; + v = 0.5428983656630975E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6993625593503890E+0; + v = 0.5442286500098193E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7062393387719380E+0; + v = 0.5452250345057301E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7479028168349763E-1; + v = 0.2568002497728530E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1848951153969366E+0; + v = 0.3827211700292145E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3059529066581305E+0; + v = 0.4579491561917824E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4285556101021362E+0; + v = 0.5042003969083574E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5468758653496526E+0; + v = 0.5312708889976025E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6565821978343439E+0; + v = 0.5438401790747117E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1253901572367117E+0; + b = 0.3681917226439641E-1; + v = 0.3316041873197344E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1775721510383941E+0; + b = 0.7982487607213301E-1; + v = 0.3899113567153771E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2305693358216114E+0; + b = 0.1264640966592335E+0; + v = 0.4343343327201309E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2836502845992063E+0; + b = 0.1751585683418957E+0; + v = 0.4679415262318919E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3361794746232590E+0; + b = 0.2247995907632670E+0; + v = 0.4930847981631031E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3875979172264824E+0; + b = 0.2745299257422246E+0; + v = 0.5115031867540091E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4374019316999074E+0; + b = 0.3236373482441118E+0; + v = 0.5245217148457367E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4851275843340022E+0; + b = 0.3714967859436741E+0; + v = 0.5332041499895321E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5303391803806868E+0; + b = 0.4175353646321745E+0; + v = 0.5384583126021542E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5726197380596287E+0; + b = 0.4612084406355461E+0; + v = 0.5411067210798852E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2431520732564863E+0; + b = 0.4258040133043952E-1; + v = 0.4259797391468714E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3002096800895869E+0; + b = 0.8869424306722721E-1; + v = 0.4604931368460021E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3558554457457432E+0; + b = 0.1368811706510655E+0; + v = 0.4871814878255202E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4097782537048887E+0; + b = 0.1860739985015033E+0; + v = 0.5072242910074885E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4616337666067458E+0; + b = 0.2354235077395853E+0; + v = 0.5217069845235350E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5110707008417874E+0; + b = 0.2842074921347011E+0; + v = 0.5315785966280310E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5577415286163795E+0; + b = 0.3317784414984102E+0; + v = 0.5376833708758905E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6013060431366950E+0; + b = 0.3775299002040700E+0; + v = 0.5408032092069521E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3661596767261781E+0; + b = 0.4599367887164592E-1; + v = 0.4842744917904866E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4237633153506581E+0; + b = 0.9404893773654421E-1; + v = 0.5048926076188130E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4786328454658452E+0; + b = 0.1431377109091971E+0; + v = 0.5202607980478373E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5305702076789774E+0; + b = 0.1924186388843570E+0; + v = 0.5309932388325743E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5793436224231788E+0; + b = 0.2411590944775190E+0; + v = 0.5377419770895208E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6247069017094747E+0; + b = 0.2886871491583605E+0; + v = 0.5411696331677717E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4874315552535204E+0; + b = 0.4804978774953206E-1; + v = 0.5197996293282420E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5427337322059053E+0; + b = 0.9716857199366665E-1; + v = 0.5311120836622945E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5943493747246700E+0; + b = 0.1465205839795055E+0; + v = 0.5384309319956951E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6421314033564943E+0; + b = 0.1953579449803574E+0; + v = 0.5421859504051886E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6020628374713980E+0; + b = 0.4916375015738108E-1; + v = 0.5390948355046314E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6529222529856881E+0; + b = 0.9861621540127005E-1; + v = 0.5433312705027845E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 2354: + + v = 0.3922616270665292E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.4703831750854424E-3; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.4678202801282136E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.2290024646530589E-1; + v = 0.1437832228979900E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5779086652271284E-1; + v = 0.2303572493577644E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.9863103576375984E-1; + v = 0.2933110752447454E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1428155792982185E+0; + v = 0.3402905998359838E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1888978116601463E+0; + v = 0.3759138466870372E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2359091682970210E+0; + v = 0.4030638447899798E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2831228833706171E+0; + v = 0.4236591432242211E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3299495857966693E+0; + v = 0.4390522656946746E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3758840802660796E+0; + v = 0.4502523466626247E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4204751831009480E+0; + v = 0.4580577727783541E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4633068518751051E+0; + v = 0.4631391616615899E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5039849474507313E+0; + v = 0.4660928953698676E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5421265793440747E+0; + v = 0.4674751807936953E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6092660230557310E+0; + v = 0.4676414903932920E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6374654204984869E+0; + v = 0.4674086492347870E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6615136472609892E+0; + v = 0.4674928539483207E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6809487285958127E+0; + v = 0.4680748979686447E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6952980021665196E+0; + v = 0.4690449806389040E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7041245497695400E+0; + v = 0.4699877075860818E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6744033088306065E-1; + v = 0.2099942281069176E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1678684485334166E+0; + v = 0.3172269150712804E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2793559049539613E+0; + v = 0.3832051358546523E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3935264218057639E+0; + v = 0.4252193818146985E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5052629268232558E+0; + v = 0.4513807963755000E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6107905315437531E+0; + v = 0.4657797469114178E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1135081039843524E+0; + b = 0.3331954884662588E-1; + v = 0.2733362800522836E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1612866626099378E+0; + b = 0.7247167465436538E-1; + v = 0.3235485368463559E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2100786550168205E+0; + b = 0.1151539110849745E+0; + v = 0.3624908726013453E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2592282009459942E+0; + b = 0.1599491097143677E+0; + v = 0.3925540070712828E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3081740561320203E+0; + b = 0.2058699956028027E+0; + v = 0.4156129781116235E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3564289781578164E+0; + b = 0.2521624953502911E+0; + v = 0.4330644984623263E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4035587288240703E+0; + b = 0.2982090785797674E+0; + v = 0.4459677725921312E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4491671196373903E+0; + b = 0.3434762087235733E+0; + v = 0.4551593004456795E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4928854782917489E+0; + b = 0.3874831357203437E+0; + v = 0.4613341462749918E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5343646791958988E+0; + b = 0.4297814821746926E+0; + v = 0.4651019618269806E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5732683216530990E+0; + b = 0.4699402260943537E+0; + v = 0.4670249536100625E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2214131583218986E+0; + b = 0.3873602040643895E-1; + v = 0.3549555576441708E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2741796504750071E+0; + b = 0.8089496256902013E-1; + v = 0.3856108245249010E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3259797439149485E+0; + b = 0.1251732177620872E+0; + v = 0.4098622845756882E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3765441148826891E+0; + b = 0.1706260286403185E+0; + v = 0.4286328604268950E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4255773574530558E+0; + b = 0.2165115147300408E+0; + v = 0.4427802198993945E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4727795117058430E+0; + b = 0.2622089812225259E+0; + v = 0.4530473511488561E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5178546895819012E+0; + b = 0.3071721431296201E+0; + v = 0.4600805475703138E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5605141192097460E+0; + b = 0.3508998998801138E+0; + v = 0.4644599059958017E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6004763319352512E+0; + b = 0.3929160876166931E+0; + v = 0.4667274455712508E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3352842634946949E+0; + b = 0.4202563457288019E-1; + v = 0.4069360518020356E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3891971629814670E+0; + b = 0.8614309758870850E-1; + v = 0.4260442819919195E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4409875565542281E+0; + b = 0.1314500879380001E+0; + v = 0.4408678508029063E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4904893058592484E+0; + b = 0.1772189657383859E+0; + v = 0.4518748115548597E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5375056138769549E+0; + b = 0.2228277110050294E+0; + v = 0.4595564875375116E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5818255708669969E+0; + b = 0.2677179935014386E+0; + v = 0.4643988774315846E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6232334858144959E+0; + b = 0.3113675035544165E+0; + v = 0.4668827491646946E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4489485354492058E+0; + b = 0.4409162378368174E-1; + v = 0.4400541823741973E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5015136875933150E+0; + b = 0.8939009917748489E-1; + v = 0.4514512890193797E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5511300550512623E+0; + b = 0.1351806029383365E+0; + v = 0.4596198627347549E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5976720409858000E+0; + b = 0.1808370355053196E+0; + v = 0.4648659016801781E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6409956378989354E+0; + b = 0.2257852192301602E+0; + v = 0.4675502017157673E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5581222330827514E+0; + b = 0.4532173421637160E-1; + v = 0.4598494476455523E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6074705984161695E+0; + b = 0.9117488031840314E-1; + v = 0.4654916955152048E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6532272537379033E+0; + b = 0.1369294213140155E+0; + v = 0.4684709779505137E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6594761494500487E+0; + b = 0.4589901487275583E-1; + v = 0.4691445539106986E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 2702: + + v = 0.2998675149888161E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.4077860529495355E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.2065562538818703E-1; + v = 0.1185349192520667E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5250918173022379E-1; + v = 0.1913408643425751E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.8993480082038376E-1; + v = 0.2452886577209897E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1306023924436019E+0; + v = 0.2862408183288702E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1732060388531418E+0; + v = 0.3178032258257357E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2168727084820249E+0; + v = 0.3422945667633690E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2609528309173586E+0; + v = 0.3612790520235922E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3049252927938952E+0; + v = 0.3758638229818521E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3483484138084404E+0; + v = 0.3868711798859953E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3908321549106406E+0; + v = 0.3949429933189938E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4320210071894814E+0; + v = 0.4006068107541156E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4715824795890053E+0; + v = 0.4043192149672723E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5091984794078453E+0; + v = 0.4064947495808078E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5445580145650803E+0; + v = 0.4075245619813152E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6072575796841768E+0; + v = 0.4076423540893566E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6339484505755803E+0; + v = 0.4074280862251555E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6570718257486958E+0; + v = 0.4074163756012244E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6762557330090709E+0; + v = 0.4077647795071246E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6911161696923790E+0; + v = 0.4084517552782530E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7012841911659961E+0; + v = 0.4092468459224052E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7064559272410020E+0; + v = 0.4097872687240906E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6123554989894765E-1; + v = 0.1738986811745028E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1533070348312393E+0; + v = 0.2659616045280191E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2563902605244206E+0; + v = 0.3240596008171533E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3629346991663361E+0; + v = 0.3621195964432943E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4683949968987538E+0; + v = 0.3868838330760539E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5694479240657952E+0; + v = 0.4018911532693111E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6634465430993955E+0; + v = 0.4089929432983252E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1033958573552305E+0; + b = 0.3034544009063584E-1; + v = 0.2279907527706409E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1473521412414395E+0; + b = 0.6618803044247135E-1; + v = 0.2715205490578897E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1924552158705967E+0; + b = 0.1054431128987715E+0; + v = 0.3057917896703976E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2381094362890328E+0; + b = 0.1468263551238858E+0; + v = 0.3326913052452555E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2838121707936760E+0; + b = 0.1894486108187886E+0; + v = 0.3537334711890037E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3291323133373415E+0; + b = 0.2326374238761579E+0; + v = 0.3700567500783129E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3736896978741460E+0; + b = 0.2758485808485768E+0; + v = 0.3825245372589122E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4171406040760013E+0; + b = 0.3186179331996921E+0; + v = 0.3918125171518296E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4591677985256915E+0; + b = 0.3605329796303794E+0; + v = 0.3984720419937579E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4994733831718418E+0; + b = 0.4012147253586509E+0; + v = 0.4029746003338211E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5377731830445096E+0; + b = 0.4403050025570692E+0; + v = 0.4057428632156627E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5737917830001331E+0; + b = 0.4774565904277483E+0; + v = 0.4071719274114857E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2027323586271389E+0; + b = 0.3544122504976147E-1; + v = 0.2990236950664119E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2516942375187273E+0; + b = 0.7418304388646328E-1; + v = 0.3262951734212878E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3000227995257181E+0; + b = 0.1150502745727186E+0; + v = 0.3482634608242413E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3474806691046342E+0; + b = 0.1571963371209364E+0; + v = 0.3656596681700892E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3938103180359209E+0; + b = 0.1999631877247100E+0; + v = 0.3791740467794218E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4387519590455703E+0; + b = 0.2428073457846535E+0; + v = 0.3894034450156905E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4820503960077787E+0; + b = 0.2852575132906155E+0; + v = 0.3968600245508371E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5234573778475101E+0; + b = 0.3268884208674639E+0; + v = 0.4019931351420050E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5627318647235282E+0; + b = 0.3673033321675939E+0; + v = 0.4052108801278599E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5996390607156954E+0; + b = 0.4061211551830290E+0; + v = 0.4068978613940934E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3084780753791947E+0; + b = 0.3860125523100059E-1; + v = 0.3454275351319704E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3589988275920223E+0; + b = 0.7928938987104867E-1; + v = 0.3629963537007920E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4078628415881973E+0; + b = 0.1212614643030087E+0; + v = 0.3770187233889873E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4549287258889735E+0; + b = 0.1638770827382693E+0; + v = 0.3878608613694378E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5000278512957279E+0; + b = 0.2065965798260176E+0; + v = 0.3959065270221274E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5429785044928199E+0; + b = 0.2489436378852235E+0; + v = 0.4015286975463570E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5835939850491711E+0; + b = 0.2904811368946891E+0; + v = 0.4050866785614717E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6216870353444856E+0; + b = 0.3307941957666609E+0; + v = 0.4069320185051913E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4151104662709091E+0; + b = 0.4064829146052554E-1; + v = 0.3760120964062763E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4649804275009218E+0; + b = 0.8258424547294755E-1; + v = 0.3870969564418064E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5124695757009662E+0; + b = 0.1251841962027289E+0; + v = 0.3955287790534055E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5574711100606224E+0; + b = 0.1679107505976331E+0; + v = 0.4015361911302668E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5998597333287227E+0; + b = 0.2102805057358715E+0; + v = 0.4053836986719548E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6395007148516600E+0; + b = 0.2518418087774107E+0; + v = 0.4073578673299117E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5188456224746252E+0; + b = 0.4194321676077518E-1; + v = 0.3954628379231406E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5664190707942778E+0; + b = 0.8457661551921499E-1; + v = 0.4017645508847530E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6110464353283153E+0; + b = 0.1273652932519396E+0; + v = 0.4059030348651293E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6526430302051563E+0; + b = 0.1698173239076354E+0; + v = 0.4080565809484880E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6167551880377548E+0; + b = 0.4266398851548864E-1; + v = 0.4063018753664651E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6607195418355383E+0; + b = 0.8551925814238349E-1; + v = 0.4087191292799671E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 3074: + + v = 0.2599095953754734E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.3603134089687541E-3; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.3586067974412447E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.1886108518723392E-1; + v = 0.9831528474385880E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4800217244625303E-1; + v = 0.1605023107954450E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.8244922058397242E-1; + v = 0.2072200131464099E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1200408362484023E+0; + v = 0.2431297618814187E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1595773530809965E+0; + v = 0.2711819064496707E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2002635973434064E+0; + v = 0.2932762038321116E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2415127590139982E+0; + v = 0.3107032514197368E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2828584158458477E+0; + v = 0.3243808058921213E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3239091015338138E+0; + v = 0.3349899091374030E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3643225097962194E+0; + v = 0.3430580688505218E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4037897083691802E+0; + v = 0.3490124109290343E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4420247515194127E+0; + v = 0.3532148948561955E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4787572538464938E+0; + v = 0.3559862669062833E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5137265251275234E+0; + v = 0.3576224317551411E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5466764056654611E+0; + v = 0.3584050533086076E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6054859420813535E+0; + v = 0.3584903581373224E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6308106701764562E+0; + v = 0.3582991879040586E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6530369230179584E+0; + v = 0.3582371187963125E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6718609524611158E+0; + v = 0.3584353631122350E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6869676499894013E+0; + v = 0.3589120166517785E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6980467077240748E+0; + v = 0.3595445704531601E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7048241721250522E+0; + v = 0.3600943557111074E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5591105222058232E-1; + v = 0.1456447096742039E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1407384078513916E+0; + v = 0.2252370188283782E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2364035438976309E+0; + v = 0.2766135443474897E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3360602737818170E+0; + v = 0.3110729491500851E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4356292630054665E+0; + v = 0.3342506712303391E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5321569415256174E+0; + v = 0.3491981834026860E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6232956305040554E+0; + v = 0.3576003604348932E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.9469870086838469E-1; + b = 0.2778748387309470E-1; + v = 0.1921921305788564E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1353170300568141E+0; + b = 0.6076569878628364E-1; + v = 0.2301458216495632E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1771679481726077E+0; + b = 0.9703072762711040E-1; + v = 0.2604248549522893E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2197066664231751E+0; + b = 0.1354112458524762E+0; + v = 0.2845275425870697E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2624783557374927E+0; + b = 0.1750996479744100E+0; + v = 0.3036870897974840E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3050969521214442E+0; + b = 0.2154896907449802E+0; + v = 0.3188414832298066E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3472252637196021E+0; + b = 0.2560954625740152E+0; + v = 0.3307046414722089E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3885610219026360E+0; + b = 0.2965070050624096E+0; + v = 0.3398330969031360E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4288273776062765E+0; + b = 0.3363641488734497E+0; + v = 0.3466757899705373E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4677662471302948E+0; + b = 0.3753400029836788E+0; + v = 0.3516095923230054E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5051333589553359E+0; + b = 0.4131297522144286E+0; + v = 0.3549645184048486E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5406942145810492E+0; + b = 0.4494423776081795E+0; + v = 0.3570415969441392E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5742204122576457E+0; + b = 0.4839938958841502E+0; + v = 0.3581251798496118E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1865407027225188E+0; + b = 0.3259144851070796E-1; + v = 0.2543491329913348E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2321186453689432E+0; + b = 0.6835679505297343E-1; + v = 0.2786711051330776E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2773159142523882E+0; + b = 0.1062284864451989E+0; + v = 0.2985552361083679E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3219200192237254E+0; + b = 0.1454404409323047E+0; + v = 0.3145867929154039E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3657032593944029E+0; + b = 0.1854018282582510E+0; + v = 0.3273290662067609E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4084376778363622E+0; + b = 0.2256297412014750E+0; + v = 0.3372705511943501E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4499004945751427E+0; + b = 0.2657104425000896E+0; + v = 0.3448274437851510E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4898758141326335E+0; + b = 0.3052755487631557E+0; + v = 0.3503592783048583E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5281547442266309E+0; + b = 0.3439863920645423E+0; + v = 0.3541854792663162E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5645346989813992E+0; + b = 0.3815229456121914E+0; + v = 0.3565995517909428E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5988181252159848E+0; + b = 0.4175752420966734E+0; + v = 0.3578802078302898E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2850425424471603E+0; + b = 0.3562149509862536E-1; + v = 0.2958644592860982E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3324619433027876E+0; + b = 0.7330318886871096E-1; + v = 0.3119548129116835E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3785848333076282E+0; + b = 0.1123226296008472E+0; + v = 0.3250745225005984E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4232891028562115E+0; + b = 0.1521084193337708E+0; + v = 0.3355153415935208E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4664287050829722E+0; + b = 0.1921844459223610E+0; + v = 0.3435847568549328E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5078458493735726E+0; + b = 0.2321360989678303E+0; + v = 0.3495786831622488E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5473779816204180E+0; + b = 0.2715886486360520E+0; + v = 0.3537767805534621E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5848617133811376E+0; + b = 0.3101924707571355E+0; + v = 0.3564459815421428E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6201348281584888E+0; + b = 0.3476121052890973E+0; + v = 0.3578464061225468E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3852191185387871E+0; + b = 0.3763224880035108E-1; + v = 0.3239748762836212E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4325025061073423E+0; + b = 0.7659581935637135E-1; + v = 0.3345491784174287E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4778486229734490E+0; + b = 0.1163381306083900E+0; + v = 0.3429126177301782E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5211663693009000E+0; + b = 0.1563890598752899E+0; + v = 0.3492420343097421E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5623469504853703E+0; + b = 0.1963320810149200E+0; + v = 0.3537399050235257E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6012718188659246E+0; + b = 0.2357847407258738E+0; + v = 0.3566209152659172E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6378179206390117E+0; + b = 0.2743846121244060E+0; + v = 0.3581084321919782E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4836936460214534E+0; + b = 0.3895902610739024E-1; + v = 0.3426522117591512E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5293792562683797E+0; + b = 0.7871246819312640E-1; + v = 0.3491848770121379E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5726281253100033E+0; + b = 0.1187963808202981E+0; + v = 0.3539318235231476E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6133658776169068E+0; + b = 0.1587914708061787E+0; + v = 0.3570231438458694E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6515085491865307E+0; + b = 0.1983058575227646E+0; + v = 0.3586207335051714E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5778692716064976E+0; + b = 0.3977209689791542E-1; + v = 0.3541196205164025E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6207904288086192E+0; + b = 0.7990157592981152E-1; + v = 0.3574296911573953E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6608688171046802E+0; + b = 0.1199671308754309E+0; + v = 0.3591993279818963E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6656263089489130E+0; + b = 0.4015955957805969E-1; + v = 0.3595855034661997E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 3470: + + v = 0.2040382730826330E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.3178149703889544E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.1721420832906233E-1; + v = 0.8288115128076110E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4408875374981770E-1; + v = 0.1360883192522954E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7594680813878681E-1; + v = 0.1766854454542662E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1108335359204799E+0; + v = 0.2083153161230153E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1476517054388567E+0; + v = 0.2333279544657158E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1856731870860615E+0; + v = 0.2532809539930247E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2243634099428821E+0; + v = 0.2692472184211158E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2633006881662727E+0; + v = 0.2819949946811885E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3021340904916283E+0; + v = 0.2920953593973030E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3405594048030089E+0; + v = 0.2999889782948352E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3783044434007372E+0; + v = 0.3060292120496902E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4151194767407910E+0; + v = 0.3105109167522192E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4507705766443257E+0; + v = 0.3136902387550312E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4850346056573187E+0; + v = 0.3157984652454632E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5176950817792470E+0; + v = 0.3170516518425422E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5485384240820989E+0; + v = 0.3176568425633755E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6039117238943308E+0; + v = 0.3177198411207062E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6279956655573113E+0; + v = 0.3175519492394733E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6493636169568952E+0; + v = 0.3174654952634756E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6677644117704504E+0; + v = 0.3175676415467654E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6829368572115624E+0; + v = 0.3178923417835410E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6946195818184121E+0; + v = 0.3183788287531909E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7025711542057026E+0; + v = 0.3188755151918807E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7066004767140119E+0; + v = 0.3191916889313849E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5132537689946062E-1; + v = 0.1231779611744508E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1297994661331225E+0; + v = 0.1924661373839880E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2188852049401307E+0; + v = 0.2380881867403424E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3123174824903457E+0; + v = 0.2693100663037885E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4064037620738195E+0; + v = 0.2908673382834366E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4984958396944782E+0; + v = 0.3053914619381535E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5864975046021365E+0; + v = 0.3143916684147777E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6686711634580175E+0; + v = 0.3187042244055363E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.8715738780835950E-1; + b = 0.2557175233367578E-1; + v = 0.1635219535869790E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1248383123134007E+0; + b = 0.5604823383376681E-1; + v = 0.1968109917696070E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1638062693383378E+0; + b = 0.8968568601900765E-1; + v = 0.2236754342249974E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2035586203373176E+0; + b = 0.1254086651976279E+0; + v = 0.2453186687017181E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2436798975293774E+0; + b = 0.1624780150162012E+0; + v = 0.2627551791580541E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2838207507773806E+0; + b = 0.2003422342683208E+0; + v = 0.2767654860152220E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3236787502217692E+0; + b = 0.2385628026255263E+0; + v = 0.2879467027765895E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3629849554840691E+0; + b = 0.2767731148783578E+0; + v = 0.2967639918918702E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4014948081992087E+0; + b = 0.3146542308245309E+0; + v = 0.3035900684660351E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4389818379260225E+0; + b = 0.3519196415895088E+0; + v = 0.3087338237298308E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4752331143674377E+0; + b = 0.3883050984023654E+0; + v = 0.3124608838860167E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5100457318374018E+0; + b = 0.4235613423908649E+0; + v = 0.3150084294226743E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5432238388954868E+0; + b = 0.4574484717196220E+0; + v = 0.3165958398598402E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5745758685072442E+0; + b = 0.4897311639255524E+0; + v = 0.3174320440957372E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1723981437592809E+0; + b = 0.3010630597881105E-1; + v = 0.2182188909812599E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2149553257844597E+0; + b = 0.6326031554204694E-1; + v = 0.2399727933921445E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2573256081247422E+0; + b = 0.9848566980258631E-1; + v = 0.2579796133514652E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2993163751238106E+0; + b = 0.1350835952384266E+0; + v = 0.2727114052623535E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3407238005148000E+0; + b = 0.1725184055442181E+0; + v = 0.2846327656281355E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3813454978483264E+0; + b = 0.2103559279730725E+0; + v = 0.2941491102051334E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4209848104423343E+0; + b = 0.2482278774554860E+0; + v = 0.3016049492136107E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4594519699996300E+0; + b = 0.2858099509982883E+0; + v = 0.3072949726175648E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4965640166185930E+0; + b = 0.3228075659915428E+0; + v = 0.3114768142886460E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5321441655571562E+0; + b = 0.3589459907204151E+0; + v = 0.3143823673666223E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5660208438582166E+0; + b = 0.3939630088864310E+0; + v = 0.3162269764661535E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5980264315964364E+0; + b = 0.4276029922949089E+0; + v = 0.3172164663759821E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2644215852350733E+0; + b = 0.3300939429072552E-1; + v = 0.2554575398967435E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3090113743443063E+0; + b = 0.6803887650078501E-1; + v = 0.2701704069135677E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3525871079197808E+0; + b = 0.1044326136206709E+0; + v = 0.2823693413468940E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3950418005354029E+0; + b = 0.1416751597517679E+0; + v = 0.2922898463214289E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4362475663430163E+0; + b = 0.1793408610504821E+0; + v = 0.3001829062162428E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4760661812145854E+0; + b = 0.2170630750175722E+0; + v = 0.3062890864542953E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5143551042512103E+0; + b = 0.2545145157815807E+0; + v = 0.3108328279264746E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5509709026935597E+0; + b = 0.2913940101706601E+0; + v = 0.3140243146201245E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5857711030329428E+0; + b = 0.3274169910910705E+0; + v = 0.3160638030977130E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6186149917404392E+0; + b = 0.3623081329317265E+0; + v = 0.3171462882206275E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3586894569557064E+0; + b = 0.3497354386450040E-1; + v = 0.2812388416031796E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4035266610019441E+0; + b = 0.7129736739757095E-1; + v = 0.2912137500288045E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4467775312332510E+0; + b = 0.1084758620193165E+0; + v = 0.2993241256502206E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4883638346608543E+0; + b = 0.1460915689241772E+0; + v = 0.3057101738983822E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5281908348434601E+0; + b = 0.1837790832369980E+0; + v = 0.3105319326251432E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5661542687149311E+0; + b = 0.2212075390874021E+0; + v = 0.3139565514428167E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6021450102031452E+0; + b = 0.2580682841160985E+0; + v = 0.3161543006806366E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6360520783610050E+0; + b = 0.2940656362094121E+0; + v = 0.3172985960613294E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4521611065087196E+0; + b = 0.3631055365867002E-1; + v = 0.2989400336901431E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4959365651560963E+0; + b = 0.7348318468484350E-1; + v = 0.3054555883947677E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5376815804038283E+0; + b = 0.1111087643812648E+0; + v = 0.3104764960807702E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5773314480243768E+0; + b = 0.1488226085145408E+0; + v = 0.3141015825977616E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6148113245575056E+0; + b = 0.1862892274135151E+0; + v = 0.3164520621159896E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6500407462842380E+0; + b = 0.2231909701714456E+0; + v = 0.3176652305912204E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5425151448707213E+0; + b = 0.3718201306118944E-1; + v = 0.3105097161023939E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5841860556907931E+0; + b = 0.7483616335067346E-1; + v = 0.3143014117890550E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6234632186851500E+0; + b = 0.1125990834266120E+0; + v = 0.3168172866287200E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6602934551848843E+0; + b = 0.1501303813157619E+0; + v = 0.3181401865570968E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6278573968375105E+0; + b = 0.3767559930245720E-1; + v = 0.3170663659156037E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6665611711264577E+0; + b = 0.7548443301360158E-1; + v = 0.3185447944625510E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 3890: + + v = 0.1807395252196920E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.2848008782238827E-3; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.2836065837530581E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.1587876419858352E-1; + v = 0.7013149266673816E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4069193593751206E-1; + v = 0.1162798021956766E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7025888115257997E-1; + v = 0.1518728583972105E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1027495450028704E+0; + v = 0.1798796108216934E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1371457730893426E+0; + v = 0.2022593385972785E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1727758532671953E+0; + v = 0.2203093105575464E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2091492038929037E+0; + v = 0.2349294234299855E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2458813281751915E+0; + v = 0.2467682058747003E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2826545859450066E+0; + v = 0.2563092683572224E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3191957291799622E+0; + v = 0.2639253896763318E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3552621469299578E+0; + v = 0.2699137479265108E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3906329503406230E+0; + v = 0.2745196420166739E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4251028614093031E+0; + v = 0.2779529197397593E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4584777520111870E+0; + v = 0.2803996086684265E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4905711358710193E+0; + v = 0.2820302356715842E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5212011669847385E+0; + v = 0.2830056747491068E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5501878488737995E+0; + v = 0.2834808950776839E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6025037877479342E+0; + v = 0.2835282339078929E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6254572689549016E+0; + v = 0.2833819267065800E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6460107179528248E+0; + v = 0.2832858336906784E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6639541138154251E+0; + v = 0.2833268235451244E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6790688515667495E+0; + v = 0.2835432677029253E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6911338580371512E+0; + v = 0.2839091722743049E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6999385956126490E+0; + v = 0.2843308178875841E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7053037748656896E+0; + v = 0.2846703550533846E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4732224387180115E-1; + v = 0.1051193406971900E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1202100529326803E+0; + v = 0.1657871838796974E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2034304820664855E+0; + v = 0.2064648113714232E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2912285643573002E+0; + v = 0.2347942745819741E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3802361792726768E+0; + v = 0.2547775326597726E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4680598511056146E+0; + v = 0.2686876684847025E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5528151052155599E+0; + v = 0.2778665755515867E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6329386307803041E+0; + v = 0.2830996616782929E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.8056516651369069E-1; + b = 0.2363454684003124E-1; + v = 0.1403063340168372E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1156476077139389E+0; + b = 0.5191291632545936E-1; + v = 0.1696504125939477E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1520473382760421E+0; + b = 0.8322715736994519E-1; + v = 0.1935787242745390E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1892986699745931E+0; + b = 0.1165855667993712E+0; + v = 0.2130614510521968E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2270194446777792E+0; + b = 0.1513077167409504E+0; + v = 0.2289381265931048E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2648908185093273E+0; + b = 0.1868882025807859E+0; + v = 0.2418630292816186E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3026389259574136E+0; + b = 0.2229277629776224E+0; + v = 0.2523400495631193E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3400220296151384E+0; + b = 0.2590951840746235E+0; + v = 0.2607623973449605E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3768217953335510E+0; + b = 0.2951047291750847E+0; + v = 0.2674441032689209E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4128372900921884E+0; + b = 0.3307019714169930E+0; + v = 0.2726432360343356E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4478807131815630E+0; + b = 0.3656544101087634E+0; + v = 0.2765787685924545E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4817742034089257E+0; + b = 0.3997448951939695E+0; + v = 0.2794428690642224E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5143472814653344E+0; + b = 0.4327667110812024E+0; + v = 0.2814099002062895E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5454346213905650E+0; + b = 0.4645196123532293E+0; + v = 0.2826429531578994E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5748739313170252E+0; + b = 0.4948063555703345E+0; + v = 0.2832983542550884E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1599598738286342E+0; + b = 0.2792357590048985E-1; + v = 0.1886695565284976E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1998097412500951E+0; + b = 0.5877141038139065E-1; + v = 0.2081867882748234E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2396228952566202E+0; + b = 0.9164573914691377E-1; + v = 0.2245148680600796E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2792228341097746E+0; + b = 0.1259049641962687E+0; + v = 0.2380370491511872E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3184251107546741E+0; + b = 0.1610594823400863E+0; + v = 0.2491398041852455E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3570481164426244E+0; + b = 0.1967151653460898E+0; + v = 0.2581632405881230E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3949164710492144E+0; + b = 0.2325404606175168E+0; + v = 0.2653965506227417E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4318617293970503E+0; + b = 0.2682461141151439E+0; + v = 0.2710857216747087E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4677221009931678E+0; + b = 0.3035720116011973E+0; + v = 0.2754434093903659E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5023417939270955E+0; + b = 0.3382781859197439E+0; + v = 0.2786579932519380E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5355701836636128E+0; + b = 0.3721383065625942E+0; + v = 0.2809011080679474E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5672608451328771E+0; + b = 0.4049346360466055E+0; + v = 0.2823336184560987E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5972704202540162E+0; + b = 0.4364538098633802E+0; + v = 0.2831101175806309E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2461687022333596E+0; + b = 0.3070423166833368E-1; + v = 0.2221679970354546E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2881774566286831E+0; + b = 0.6338034669281885E-1; + v = 0.2356185734270703E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3293963604116978E+0; + b = 0.9742862487067941E-1; + v = 0.2469228344805590E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3697303822241377E+0; + b = 0.1323799532282290E+0; + v = 0.2562726348642046E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4090663023135127E+0; + b = 0.1678497018129336E+0; + v = 0.2638756726753028E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4472819355411712E+0; + b = 0.2035095105326114E+0; + v = 0.2699311157390862E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4842513377231437E+0; + b = 0.2390692566672091E+0; + v = 0.2746233268403837E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5198477629962928E+0; + b = 0.2742649818076149E+0; + v = 0.2781225674454771E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5539453011883145E+0; + b = 0.3088503806580094E+0; + v = 0.2805881254045684E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5864196762401251E+0; + b = 0.3425904245906614E+0; + v = 0.2821719877004913E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6171484466668390E+0; + b = 0.3752562294789468E+0; + v = 0.2830222502333124E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3350337830565727E+0; + b = 0.3261589934634747E-1; + v = 0.2457995956744870E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3775773224758284E+0; + b = 0.6658438928081572E-1; + v = 0.2551474407503706E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4188155229848973E+0; + b = 0.1014565797157954E+0; + v = 0.2629065335195311E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4586805892009344E+0; + b = 0.1368573320843822E+0; + v = 0.2691900449925075E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4970895714224235E+0; + b = 0.1724614851951608E+0; + v = 0.2741275485754276E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5339505133960747E+0; + b = 0.2079779381416412E+0; + v = 0.2778530970122595E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5691665792531440E+0; + b = 0.2431385788322288E+0; + v = 0.2805010567646741E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6026387682680377E+0; + b = 0.2776901883049853E+0; + v = 0.2822055834031040E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6342676150163307E+0; + b = 0.3113881356386632E+0; + v = 0.2831016901243473E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4237951119537067E+0; + b = 0.3394877848664351E-1; + v = 0.2624474901131803E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4656918683234929E+0; + b = 0.6880219556291447E-1; + v = 0.2688034163039377E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5058857069185980E+0; + b = 0.1041946859721635E+0; + v = 0.2738932751287636E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5443204666713996E+0; + b = 0.1398039738736393E+0; + v = 0.2777944791242523E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5809298813759742E+0; + b = 0.1753373381196155E+0; + v = 0.2806011661660987E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6156416039447128E+0; + b = 0.2105215793514010E+0; + v = 0.2824181456597460E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6483801351066604E+0; + b = 0.2450953312157051E+0; + v = 0.2833585216577828E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5103616577251688E+0; + b = 0.3485560643800719E-1; + v = 0.2738165236962878E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5506738792580681E+0; + b = 0.7026308631512033E-1; + v = 0.2778365208203180E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5889573040995292E+0; + b = 0.1059035061296403E+0; + v = 0.2807852940418966E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6251641589516930E+0; + b = 0.1414823925236026E+0; + v = 0.2827245949674705E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6592414921570178E+0; + b = 0.1767207908214530E+0; + v = 0.2837342344829828E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5930314017533384E+0; + b = 0.3542189339561672E-1; + v = 0.2809233907610981E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6309812253390175E+0; + b = 0.7109574040369549E-1; + v = 0.2829930809742694E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6666296011353230E+0; + b = 0.1067259792282730E+0; + v = 0.2841097874111479E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6703715271049922E+0; + b = 0.3569455268820809E-1; + v = 0.2843455206008783E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 4334: + + v = 0.1449063022537883E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.2546377329828424E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.1462896151831013E-1; + v = 0.6018432961087496E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3769840812493139E-1; + v = 0.1002286583263673E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6524701904096891E-1; + v = 0.1315222931028093E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.9560543416134648E-1; + v = 0.1564213746876724E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1278335898929198E+0; + v = 0.1765118841507736E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1613096104466031E+0; + v = 0.1928737099311080E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1955806225745371E+0; + v = 0.2062658534263270E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2302935218498028E+0; + v = 0.2172395445953787E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2651584344113027E+0; + v = 0.2262076188876047E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2999276825183209E+0; + v = 0.2334885699462397E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3343828669718798E+0; + v = 0.2393355273179203E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3683265013750518E+0; + v = 0.2439559200468863E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4015763206518108E+0; + v = 0.2475251866060002E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4339612026399770E+0; + v = 0.2501965558158773E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4653180651114582E+0; + v = 0.2521081407925925E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4954893331080803E+0; + v = 0.2533881002388081E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5243207068924930E+0; + v = 0.2541582900848261E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5516590479041704E+0; + v = 0.2545365737525860E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6012371927804176E+0; + v = 0.2545726993066799E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6231574466449819E+0; + v = 0.2544456197465555E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6429416514181271E+0; + v = 0.2543481596881064E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6604124272943595E+0; + v = 0.2543506451429194E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6753851470408250E+0; + v = 0.2544905675493763E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6876717970626160E+0; + v = 0.2547611407344429E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6970895061319234E+0; + v = 0.2551060375448869E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7034746912553310E+0; + v = 0.2554291933816039E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7067017217542295E+0; + v = 0.2556255710686343E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4382223501131123E-1; + v = 0.9041339695118195E-4; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1117474077400006E+0; + v = 0.1438426330079022E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1897153252911440E+0; + v = 0.1802523089820518E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2724023009910331E+0; + v = 0.2060052290565496E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3567163308709902E+0; + v = 0.2245002248967466E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4404784483028087E+0; + v = 0.2377059847731150E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5219833154161411E+0; + v = 0.2468118955882525E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5998179868977553E+0; + v = 0.2525410872966528E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6727803154548222E+0; + v = 0.2553101409933397E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.7476563943166086E-1; + b = 0.2193168509461185E-1; + v = 0.1212879733668632E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1075341482001416E+0; + b = 0.4826419281533887E-1; + v = 0.1472872881270931E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1416344885203259E+0; + b = 0.7751191883575742E-1; + v = 0.1686846601010828E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1766325315388586E+0; + b = 0.1087558139247680E+0; + v = 0.1862698414660208E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2121744174481514E+0; + b = 0.1413661374253096E+0; + v = 0.2007430956991861E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2479669443408145E+0; + b = 0.1748768214258880E+0; + v = 0.2126568125394796E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2837600452294113E+0; + b = 0.2089216406612073E+0; + v = 0.2224394603372113E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3193344933193984E+0; + b = 0.2431987685545972E+0; + v = 0.2304264522673135E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3544935442438745E+0; + b = 0.2774497054377770E+0; + v = 0.2368854288424087E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3890571932288154E+0; + b = 0.3114460356156915E+0; + v = 0.2420352089461772E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4228581214259090E+0; + b = 0.3449806851913012E+0; + v = 0.2460597113081295E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4557387211304052E+0; + b = 0.3778618641248256E+0; + v = 0.2491181912257687E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4875487950541643E+0; + b = 0.4099086391698978E+0; + v = 0.2513528194205857E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5181436529962997E+0; + b = 0.4409474925853973E+0; + v = 0.2528943096693220E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5473824095600661E+0; + b = 0.4708094517711291E+0; + v = 0.2538660368488136E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5751263398976174E+0; + b = 0.4993275140354637E+0; + v = 0.2543868648299022E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1489515746840028E+0; + b = 0.2599381993267017E-1; + v = 0.1642595537825183E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1863656444351767E+0; + b = 0.5479286532462190E-1; + v = 0.1818246659849308E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2238602880356348E+0; + b = 0.8556763251425254E-1; + v = 0.1966565649492420E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2612723375728160E+0; + b = 0.1177257802267011E+0; + v = 0.2090677905657991E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2984332990206190E+0; + b = 0.1508168456192700E+0; + v = 0.2193820409510504E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3351786584663333E+0; + b = 0.1844801892177727E+0; + v = 0.2278870827661928E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3713505522209120E+0; + b = 0.2184145236087598E+0; + v = 0.2348283192282090E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4067981098954663E+0; + b = 0.2523590641486229E+0; + v = 0.2404139755581477E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4413769993687534E+0; + b = 0.2860812976901373E+0; + v = 0.2448227407760734E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4749487182516394E+0; + b = 0.3193686757808996E+0; + v = 0.2482110455592573E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5073798105075426E+0; + b = 0.3520226949547602E+0; + v = 0.2507192397774103E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5385410448878654E+0; + b = 0.3838544395667890E+0; + v = 0.2524765968534880E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5683065353670530E+0; + b = 0.4146810037640963E+0; + v = 0.2536052388539425E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5965527620663510E+0; + b = 0.4443224094681121E+0; + v = 0.2542230588033068E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2299227700856157E+0; + b = 0.2865757664057584E-1; + v = 0.1944817013047896E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2695752998553267E+0; + b = 0.5923421684485993E-1; + v = 0.2067862362746635E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3086178716611389E+0; + b = 0.9117817776057715E-1; + v = 0.2172440734649114E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3469649871659077E+0; + b = 0.1240593814082605E+0; + v = 0.2260125991723423E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3845153566319655E+0; + b = 0.1575272058259175E+0; + v = 0.2332655008689523E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4211600033403215E+0; + b = 0.1912845163525413E+0; + v = 0.2391699681532458E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4567867834329882E+0; + b = 0.2250710177858171E+0; + v = 0.2438801528273928E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4912829319232061E+0; + b = 0.2586521303440910E+0; + v = 0.2475370504260665E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5245364793303812E+0; + b = 0.2918112242865407E+0; + v = 0.2502707235640574E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5564369788915756E+0; + b = 0.3243439239067890E+0; + v = 0.2522031701054241E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5868757697775287E+0; + b = 0.3560536787835351E+0; + v = 0.2534511269978784E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6157458853519617E+0; + b = 0.3867480821242581E+0; + v = 0.2541284914955151E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3138461110672113E+0; + b = 0.3051374637507278E-1; + v = 0.2161509250688394E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3542495872050569E+0; + b = 0.6237111233730755E-1; + v = 0.2248778513437852E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3935751553120181E+0; + b = 0.9516223952401907E-1; + v = 0.2322388803404617E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4317634668111147E+0; + b = 0.1285467341508517E+0; + v = 0.2383265471001355E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4687413842250821E+0; + b = 0.1622318931656033E+0; + v = 0.2432476675019525E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5044274237060283E+0; + b = 0.1959581153836453E+0; + v = 0.2471122223750674E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5387354077925727E+0; + b = 0.2294888081183837E+0; + v = 0.2500291752486870E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5715768898356105E+0; + b = 0.2626031152713945E+0; + v = 0.2521055942764682E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6028627200136111E+0; + b = 0.2950904075286713E+0; + v = 0.2534472785575503E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6325039812653463E+0; + b = 0.3267458451113286E+0; + v = 0.2541599713080121E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3981986708423407E+0; + b = 0.3183291458749821E-1; + v = 0.2317380975862936E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4382791182133300E+0; + b = 0.6459548193880908E-1; + v = 0.2378550733719775E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4769233057218166E+0; + b = 0.9795757037087952E-1; + v = 0.2428884456739118E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5140823911194238E+0; + b = 0.1316307235126655E+0; + v = 0.2469002655757292E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5496977833862983E+0; + b = 0.1653556486358704E+0; + v = 0.2499657574265851E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5837047306512727E+0; + b = 0.1988931724126510E+0; + v = 0.2521676168486082E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6160349566926879E+0; + b = 0.2320174581438950E+0; + v = 0.2535935662645334E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6466185353209440E+0; + b = 0.2645106562168662E+0; + v = 0.2543356743363214E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4810835158795404E+0; + b = 0.3275917807743992E-1; + v = 0.2427353285201535E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5199925041324341E+0; + b = 0.6612546183967181E-1; + v = 0.2468258039744386E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5571717692207494E+0; + b = 0.9981498331474143E-1; + v = 0.2500060956440310E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5925789250836378E+0; + b = 0.1335687001410374E+0; + v = 0.2523238365420979E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6261658523859670E+0; + b = 0.1671444402896463E+0; + v = 0.2538399260252846E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6578811126669331E+0; + b = 0.2003106382156076E+0; + v = 0.2546255927268069E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5609624612998100E+0; + b = 0.3337500940231335E-1; + v = 0.2500583360048449E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5979959659984670E+0; + b = 0.6708750335901803E-1; + v = 0.2524777638260203E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6330523711054002E+0; + b = 0.1008792126424850E+0; + v = 0.2540951193860656E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6660960998103972E+0; + b = 0.1345050343171794E+0; + v = 0.2549524085027472E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6365384364585819E+0; + b = 0.3372799460737052E-1; + v = 0.2542569507009158E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6710994302899275E+0; + b = 0.6755249309678028E-1; + v = 0.2552114127580376E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 4802: + + v = 0.9687521879420705E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.2307897895367918E-3; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.2297310852498558E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.2335728608887064E-1; + v = 0.7386265944001919E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4352987836550653E-1; + v = 0.8257977698542210E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6439200521088801E-1; + v = 0.9706044762057630E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.9003943631993181E-1; + v = 0.1302393847117003E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1196706615548473E+0; + v = 0.1541957004600968E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1511715412838134E+0; + v = 0.1704459770092199E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1835982828503801E+0; + v = 0.1827374890942906E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2165081259155405E+0; + v = 0.1926360817436107E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2496208720417563E+0; + v = 0.2008010239494833E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2827200673567900E+0; + v = 0.2075635983209175E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3156190823994346E+0; + v = 0.2131306638690909E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3481476793749115E+0; + v = 0.2176562329937335E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3801466086947226E+0; + v = 0.2212682262991018E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4114652119634011E+0; + v = 0.2240799515668565E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4419598786519751E+0; + v = 0.2261959816187525E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4714925949329543E+0; + v = 0.2277156368808855E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4999293972879466E+0; + v = 0.2287351772128336E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5271387221431248E+0; + v = 0.2293490814084085E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5529896780837761E+0; + v = 0.2296505312376273E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6000856099481712E+0; + v = 0.2296793832318756E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6210562192785175E+0; + v = 0.2295785443842974E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6401165879934240E+0; + v = 0.2295017931529102E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6571144029244334E+0; + v = 0.2295059638184868E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6718910821718863E+0; + v = 0.2296232343237362E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6842845591099010E+0; + v = 0.2298530178740771E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6941353476269816E+0; + v = 0.2301579790280501E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7012965242212991E+0; + v = 0.2304690404996513E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7056471428242644E+0; + v = 0.2307027995907102E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4595557643585895E-1; + v = 0.9312274696671092E-4; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1049316742435023E+0; + v = 0.1199919385876926E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1773548879549274E+0; + v = 0.1598039138877690E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2559071411236127E+0; + v = 0.1822253763574900E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3358156837985898E+0; + v = 0.1988579593655040E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4155835743763893E+0; + v = 0.2112620102533307E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4937894296167472E+0; + v = 0.2201594887699007E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5691569694793316E+0; + v = 0.2261622590895036E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6405840854894251E+0; + v = 0.2296458453435705E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.7345133894143348E-1; + b = 0.2177844081486067E-1; + v = 0.1006006990267000E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1009859834044931E+0; + b = 0.4590362185775188E-1; + v = 0.1227676689635876E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1324289619748758E+0; + b = 0.7255063095690877E-1; + v = 0.1467864280270117E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1654272109607127E+0; + b = 0.1017825451960684E+0; + v = 0.1644178912101232E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1990767186776461E+0; + b = 0.1325652320980364E+0; + v = 0.1777664890718961E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2330125945523278E+0; + b = 0.1642765374496765E+0; + v = 0.1884825664516690E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2670080611108287E+0; + b = 0.1965360374337889E+0; + v = 0.1973269246453848E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3008753376294316E+0; + b = 0.2290726770542238E+0; + v = 0.2046767775855328E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3344475596167860E+0; + b = 0.2616645495370823E+0; + v = 0.2107600125918040E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3675709724070786E+0; + b = 0.2941150728843141E+0; + v = 0.2157416362266829E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4001000887587812E+0; + b = 0.3262440400919066E+0; + v = 0.2197557816920721E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4318956350436028E+0; + b = 0.3578835350611916E+0; + v = 0.2229192611835437E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4628239056795531E+0; + b = 0.3888751854043678E+0; + v = 0.2253385110212775E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4927563229773636E+0; + b = 0.4190678003222840E+0; + v = 0.2271137107548774E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5215687136707969E+0; + b = 0.4483151836883852E+0; + v = 0.2283414092917525E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5491402346984905E+0; + b = 0.4764740676087880E+0; + v = 0.2291161673130077E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5753520160126075E+0; + b = 0.5034021310998277E+0; + v = 0.2295313908576598E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1388326356417754E+0; + b = 0.2435436510372806E-1; + v = 0.1438204721359031E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1743686900537244E+0; + b = 0.5118897057342652E-1; + v = 0.1607738025495257E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2099737037950268E+0; + b = 0.8014695048539634E-1; + v = 0.1741483853528379E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2454492590908548E+0; + b = 0.1105117874155699E+0; + v = 0.1851918467519151E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2807219257864278E+0; + b = 0.1417950531570966E+0; + v = 0.1944628638070613E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3156842271975842E+0; + b = 0.1736604945719597E+0; + v = 0.2022495446275152E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3502090945177752E+0; + b = 0.2058466324693981E+0; + v = 0.2087462382438514E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3841684849519686E+0; + b = 0.2381284261195919E+0; + v = 0.2141074754818308E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4174372367906016E+0; + b = 0.2703031270422569E+0; + v = 0.2184640913748162E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4498926465011892E+0; + b = 0.3021845683091309E+0; + v = 0.2219309165220329E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4814146229807701E+0; + b = 0.3335993355165720E+0; + v = 0.2246123118340624E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5118863625734701E+0; + b = 0.3643833735518232E+0; + v = 0.2266062766915125E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5411947455119144E+0; + b = 0.3943789541958179E+0; + v = 0.2280072952230796E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5692301500357246E+0; + b = 0.4234320144403542E+0; + v = 0.2289082025202583E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5958857204139576E+0; + b = 0.4513897947419260E+0; + v = 0.2294012695120025E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2156270284785766E+0; + b = 0.2681225755444491E-1; + v = 0.1722434488736947E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2532385054909710E+0; + b = 0.5557495747805614E-1; + v = 0.1830237421455091E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2902564617771537E+0; + b = 0.8569368062950249E-1; + v = 0.1923855349997633E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3266979823143256E+0; + b = 0.1167367450324135E+0; + v = 0.2004067861936271E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3625039627493614E+0; + b = 0.1483861994003304E+0; + v = 0.2071817297354263E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3975838937548699E+0; + b = 0.1803821503011405E+0; + v = 0.2128250834102103E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4318396099009774E+0; + b = 0.2124962965666424E+0; + v = 0.2174513719440102E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4651706555732742E+0; + b = 0.2445221837805913E+0; + v = 0.2211661839150214E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4974752649620969E+0; + b = 0.2762701224322987E+0; + v = 0.2240665257813102E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5286517579627517E+0; + b = 0.3075627775211328E+0; + v = 0.2262439516632620E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5586001195731895E+0; + b = 0.3382311089826877E+0; + v = 0.2277874557231869E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5872229902021319E+0; + b = 0.3681108834741399E+0; + v = 0.2287854314454994E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6144258616235123E+0; + b = 0.3970397446872839E+0; + v = 0.2293268499615575E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2951676508064861E+0; + b = 0.2867499538750441E-1; + v = 0.1912628201529828E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3335085485472725E+0; + b = 0.5867879341903510E-1; + v = 0.1992499672238701E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3709561760636381E+0; + b = 0.8961099205022284E-1; + v = 0.2061275533454027E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4074722861667498E+0; + b = 0.1211627927626297E+0; + v = 0.2119318215968572E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4429923648839117E+0; + b = 0.1530748903554898E+0; + v = 0.2167416581882652E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4774428052721736E+0; + b = 0.1851176436721877E+0; + v = 0.2206430730516600E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5107446539535904E+0; + b = 0.2170829107658179E+0; + v = 0.2237186938699523E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5428151370542935E+0; + b = 0.2487786689026271E+0; + v = 0.2260480075032884E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5735699292556964E+0; + b = 0.2800239952795016E+0; + v = 0.2277098884558542E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6029253794562866E+0; + b = 0.3106445702878119E+0; + v = 0.2287845715109671E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6307998987073145E+0; + b = 0.3404689500841194E+0; + v = 0.2293547268236294E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3752652273692719E+0; + b = 0.2997145098184479E-1; + v = 0.2056073839852528E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4135383879344028E+0; + b = 0.6086725898678011E-1; + v = 0.2114235865831876E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4506113885153907E+0; + b = 0.9238849548435643E-1; + v = 0.2163175629770551E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4864401554606072E+0; + b = 0.1242786603851851E+0; + v = 0.2203392158111650E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5209708076611709E+0; + b = 0.1563086731483386E+0; + v = 0.2235473176847839E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5541422135830122E+0; + b = 0.1882696509388506E+0; + v = 0.2260024141501235E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5858880915113817E+0; + b = 0.2199672979126059E+0; + v = 0.2277675929329182E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6161399390603444E+0; + b = 0.2512165482924867E+0; + v = 0.2289102112284834E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6448296482255090E+0; + b = 0.2818368701871888E+0; + v = 0.2295027954625118E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4544796274917948E+0; + b = 0.3088970405060312E-1; + v = 0.2161281589879992E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4919389072146628E+0; + b = 0.6240947677636835E-1; + v = 0.2201980477395102E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5279313026985183E+0; + b = 0.9430706144280313E-1; + v = 0.2234952066593166E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5624169925571135E+0; + b = 0.1263547818770374E+0; + v = 0.2260540098520838E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5953484627093287E+0; + b = 0.1583430788822594E+0; + v = 0.2279157981899988E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6266730715339185E+0; + b = 0.1900748462555988E+0; + v = 0.2291296918565571E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6563363204278871E+0; + b = 0.2213599519592567E+0; + v = 0.2297533752536649E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5314574716585696E+0; + b = 0.3152508811515374E-1; + v = 0.2234927356465995E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5674614932298185E+0; + b = 0.6343865291465561E-1; + v = 0.2261288012985219E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6017706004970264E+0; + b = 0.9551503504223951E-1; + v = 0.2280818160923688E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6343471270264178E+0; + b = 0.1275440099801196E+0; + v = 0.2293773295180159E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6651494599127802E+0; + b = 0.1593252037671960E+0; + v = 0.2300528767338634E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6050184986005704E+0; + b = 0.3192538338496105E-1; + v = 0.2281893855065666E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6390163550880400E+0; + b = 0.6402824353962306E-1; + v = 0.2295720444840727E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6711199107088448E+0; + b = 0.9609805077002909E-1; + v = 0.2303227649026753E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6741354429572275E+0; + b = 0.3211853196273233E-1; + v = 0.2304831913227114E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 5294: + + v = 0.9080510764308163E-4; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.2084824361987793E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.2303261686261450E-1; + v = 0.5011105657239616E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3757208620162394E-1; + v = 0.5942520409683854E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5821912033821852E-1; + v = 0.9564394826109721E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.8403127529194872E-1; + v = 0.1185530657126338E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1122927798060578E+0; + v = 0.1364510114230331E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1420125319192987E+0; + v = 0.1505828825605415E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1726396437341978E+0; + v = 0.1619298749867023E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2038170058115696E+0; + v = 0.1712450504267789E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2352849892876508E+0; + v = 0.1789891098164999E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2668363354312461E+0; + v = 0.1854474955629795E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2982941279900452E+0; + v = 0.1908148636673661E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3295002922087076E+0; + v = 0.1952377405281833E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3603094918363593E+0; + v = 0.1988349254282232E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3905857895173920E+0; + v = 0.2017079807160050E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4202005758160837E+0; + v = 0.2039473082709094E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4490310061597227E+0; + v = 0.2056360279288953E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4769586160311491E+0; + v = 0.2068525823066865E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5038679887049750E+0; + v = 0.2076724877534488E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5296454286519961E+0; + v = 0.2081694278237885E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5541776207164850E+0; + v = 0.2084157631219326E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5990467321921213E+0; + v = 0.2084381531128593E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6191467096294587E+0; + v = 0.2083476277129307E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6375251212901849E+0; + v = 0.2082686194459732E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6540514381131168E+0; + v = 0.2082475686112415E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6685899064391510E+0; + v = 0.2083139860289915E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6810013009681648E+0; + v = 0.2084745561831237E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6911469578730340E+0; + v = 0.2087091313375890E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6988956915141736E+0; + v = 0.2089718413297697E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7041335794868720E+0; + v = 0.2092003303479793E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7067754398018567E+0; + v = 0.2093336148263241E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3840368707853623E-1; + v = 0.7591708117365267E-4; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.9835485954117399E-1; + v = 0.1083383968169186E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1665774947612998E+0; + v = 0.1403019395292510E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2405702335362910E+0; + v = 0.1615970179286436E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3165270770189046E+0; + v = 0.1771144187504911E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3927386145645443E+0; + v = 0.1887760022988168E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4678825918374656E+0; + v = 0.1973474670768214E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5408022024266935E+0; + v = 0.2033787661234659E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6104967445752438E+0; + v = 0.2072343626517331E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6760910702685738E+0; + v = 0.2091177834226918E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6655644120217392E-1; + b = 0.1936508874588424E-1; + v = 0.9316684484675566E-4; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.9446246161270182E-1; + b = 0.4252442002115869E-1; + v = 0.1116193688682976E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1242651925452509E+0; + b = 0.6806529315354374E-1; + v = 0.1298623551559414E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1553438064846751E+0; + b = 0.9560957491205369E-1; + v = 0.1450236832456426E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1871137110542670E+0; + b = 0.1245931657452888E+0; + v = 0.1572719958149914E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2192612628836257E+0; + b = 0.1545385828778978E+0; + v = 0.1673234785867195E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2515682807206955E+0; + b = 0.1851004249723368E+0; + v = 0.1756860118725188E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2838535866287290E+0; + b = 0.2160182608272384E+0; + v = 0.1826776290439367E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3159578817528521E+0; + b = 0.2470799012277111E+0; + v = 0.1885116347992865E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3477370882791392E+0; + b = 0.2781014208986402E+0; + v = 0.1933457860170574E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3790576960890540E+0; + b = 0.3089172523515731E+0; + v = 0.1973060671902064E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4097938317810200E+0; + b = 0.3393750055472244E+0; + v = 0.2004987099616311E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4398256572859637E+0; + b = 0.3693322470987730E+0; + v = 0.2030170909281499E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4690384114718480E+0; + b = 0.3986541005609877E+0; + v = 0.2049461460119080E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4973216048301053E+0; + b = 0.4272112491408562E+0; + v = 0.2063653565200186E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5245681526132446E+0; + b = 0.4548781735309936E+0; + v = 0.2073507927381027E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5506733911803888E+0; + b = 0.4815315355023251E+0; + v = 0.2079764593256122E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5755339829522475E+0; + b = 0.5070486445801855E+0; + v = 0.2083150534968778E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1305472386056362E+0; + b = 0.2284970375722366E-1; + v = 0.1262715121590664E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1637327908216477E+0; + b = 0.4812254338288384E-1; + v = 0.1414386128545972E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1972734634149637E+0; + b = 0.7531734457511935E-1; + v = 0.1538740401313898E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2308694653110130E+0; + b = 0.1039043639882017E+0; + v = 0.1642434942331432E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2643899218338160E+0; + b = 0.1334526587117626E+0; + v = 0.1729790609237496E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2977171599622171E+0; + b = 0.1636414868936382E+0; + v = 0.1803505190260828E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3307293903032310E+0; + b = 0.1942195406166568E+0; + v = 0.1865475350079657E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3633069198219073E+0; + b = 0.2249752879943753E+0; + v = 0.1917182669679069E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3953346955922727E+0; + b = 0.2557218821820032E+0; + v = 0.1959851709034382E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4267018394184914E+0; + b = 0.2862897925213193E+0; + v = 0.1994529548117882E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4573009622571704E+0; + b = 0.3165224536636518E+0; + v = 0.2022138911146548E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4870279559856109E+0; + b = 0.3462730221636496E+0; + v = 0.2043518024208592E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5157819581450322E+0; + b = 0.3754016870282835E+0; + v = 0.2059450313018110E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5434651666465393E+0; + b = 0.4037733784993613E+0; + v = 0.2070685715318472E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5699823887764627E+0; + b = 0.4312557784139123E+0; + v = 0.2077955310694373E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5952403350947741E+0; + b = 0.4577175367122110E+0; + v = 0.2081980387824712E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2025152599210369E+0; + b = 0.2520253617719557E-1; + v = 0.1521318610377956E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2381066653274425E+0; + b = 0.5223254506119000E-1; + v = 0.1622772720185755E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2732823383651612E+0; + b = 0.8060669688588620E-1; + v = 0.1710498139420709E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3080137692611118E+0; + b = 0.1099335754081255E+0; + v = 0.1785911149448736E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3422405614587601E+0; + b = 0.1399120955959857E+0; + v = 0.1850125313687736E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3758808773890420E+0; + b = 0.1702977801651705E+0; + v = 0.1904229703933298E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4088458383438932E+0; + b = 0.2008799256601680E+0; + v = 0.1949259956121987E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4410450550841152E+0; + b = 0.2314703052180836E+0; + v = 0.1986161545363960E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4723879420561312E+0; + b = 0.2618972111375892E+0; + v = 0.2015790585641370E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5027843561874343E+0; + b = 0.2920013195600270E+0; + v = 0.2038934198707418E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5321453674452458E+0; + b = 0.3216322555190551E+0; + v = 0.2056334060538251E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5603839113834030E+0; + b = 0.3506456615934198E+0; + v = 0.2068705959462289E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5874150706875146E+0; + b = 0.3789007181306267E+0; + v = 0.2076753906106002E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6131559381660038E+0; + b = 0.4062580170572782E+0; + v = 0.2081179391734803E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2778497016394506E+0; + b = 0.2696271276876226E-1; + v = 0.1700345216228943E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3143733562261912E+0; + b = 0.5523469316960465E-1; + v = 0.1774906779990410E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3501485810261827E+0; + b = 0.8445193201626464E-1; + v = 0.1839659377002642E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3851430322303653E+0; + b = 0.1143263119336083E+0; + v = 0.1894987462975169E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4193013979470415E+0; + b = 0.1446177898344475E+0; + v = 0.1941548809452595E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4525585960458567E+0; + b = 0.1751165438438091E+0; + v = 0.1980078427252384E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4848447779622947E+0; + b = 0.2056338306745660E+0; + v = 0.2011296284744488E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5160871208276894E+0; + b = 0.2359965487229226E+0; + v = 0.2035888456966776E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5462112185696926E+0; + b = 0.2660430223139146E+0; + v = 0.2054516325352142E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5751425068101757E+0; + b = 0.2956193664498032E+0; + v = 0.2067831033092635E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6028073872853596E+0; + b = 0.3245763905312779E+0; + v = 0.2076485320284876E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6291338275278409E+0; + b = 0.3527670026206972E+0; + v = 0.2081141439525255E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3541797528439391E+0; + b = 0.2823853479435550E-1; + v = 0.1834383015469222E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3908234972074657E+0; + b = 0.5741296374713106E-1; + v = 0.1889540591777677E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4264408450107590E+0; + b = 0.8724646633650199E-1; + v = 0.1936677023597375E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4609949666553286E+0; + b = 0.1175034422915616E+0; + v = 0.1976176495066504E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4944389496536006E+0; + b = 0.1479755652628428E+0; + v = 0.2008536004560983E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5267194884346086E+0; + b = 0.1784740659484352E+0; + v = 0.2034280351712291E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5577787810220990E+0; + b = 0.2088245700431244E+0; + v = 0.2053944466027758E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5875563763536670E+0; + b = 0.2388628136570763E+0; + v = 0.2068077642882360E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6159910016391269E+0; + b = 0.2684308928769185E+0; + v = 0.2077250949661599E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6430219602956268E+0; + b = 0.2973740761960252E+0; + v = 0.2082062440705320E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4300647036213646E+0; + b = 0.2916399920493977E-1; + v = 0.1934374486546626E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4661486308935531E+0; + b = 0.5898803024755659E-1; + v = 0.1974107010484300E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5009658555287261E+0; + b = 0.8924162698525409E-1; + v = 0.2007129290388658E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5344824270447704E+0; + b = 0.1197185199637321E+0; + v = 0.2033736947471293E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5666575997416371E+0; + b = 0.1502300756161382E+0; + v = 0.2054287125902493E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5974457471404752E+0; + b = 0.1806004191913564E+0; + v = 0.2069184936818894E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6267984444116886E+0; + b = 0.2106621764786252E+0; + v = 0.2078883689808782E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6546664713575417E+0; + b = 0.2402526932671914E+0; + v = 0.2083886366116359E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5042711004437253E+0; + b = 0.2982529203607657E-1; + v = 0.2006593275470817E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5392127456774380E+0; + b = 0.6008728062339922E-1; + v = 0.2033728426135397E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5726819437668618E+0; + b = 0.9058227674571398E-1; + v = 0.2055008781377608E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6046469254207278E+0; + b = 0.1211219235803400E+0; + v = 0.2070651783518502E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6350716157434952E+0; + b = 0.1515286404791580E+0; + v = 0.2080953335094320E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6639177679185454E+0; + b = 0.1816314681255552E+0; + v = 0.2086284998988521E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5757276040972253E+0; + b = 0.3026991752575440E-1; + v = 0.2055549387644668E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6090265823139755E+0; + b = 0.6078402297870770E-1; + v = 0.2071871850267654E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6406735344387661E+0; + b = 0.9135459984176636E-1; + v = 0.2082856600431965E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6706397927793709E+0; + b = 0.1218024155966590E+0; + v = 0.2088705858819358E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6435019674426665E+0; + b = 0.3052608357660639E-1; + v = 0.2083995867536322E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6747218676375681E+0; + b = 0.6112185773983089E-1; + v = 0.2090509712889637E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + + case 5810: + + v = 0.9735347946175486E-5; + start = getLebedevReccurencePoints (1, start, a, b, v); + v = 0.1907581241803167E-3; + start = getLebedevReccurencePoints (2, start, a, b, v); + v = 0.1901059546737578E-3; + start = getLebedevReccurencePoints (3, start, a, b, v); + a = 0.1182361662400277E-1; + v = 0.3926424538919212E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3062145009138958E-1; + v = 0.6667905467294382E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5329794036834243E-1; + v = 0.8868891315019135E-4; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7848165532862220E-1; + v = 0.1066306000958872E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1054038157636201E+0; + v = 0.1214506743336128E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1335577797766211E+0; + v = 0.1338054681640871E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1625769955502252E+0; + v = 0.1441677023628504E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.1921787193412792E+0; + v = 0.1528880200826557E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2221340534690548E+0; + v = 0.1602330623773609E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2522504912791132E+0; + v = 0.1664102653445244E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.2823610860679697E+0; + v = 0.1715845854011323E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3123173966267560E+0; + v = 0.1758901000133069E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3419847036953789E+0; + v = 0.1794382485256736E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3712386456999758E+0; + v = 0.1823238106757407E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3999627649876828E+0; + v = 0.1846293252959976E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4280466458648093E+0; + v = 0.1864284079323098E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4553844360185711E+0; + v = 0.1877882694626914E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.4818736094437834E+0; + v = 0.1887716321852025E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5074138709260629E+0; + v = 0.1894381638175673E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5319061304570707E+0; + v = 0.1898454899533629E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5552514978677286E+0; + v = 0.1900497929577815E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.5981009025246183E+0; + v = 0.1900671501924092E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6173990192228116E+0; + v = 0.1899837555533510E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6351365239411131E+0; + v = 0.1899014113156229E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6512010228227200E+0; + v = 0.1898581257705106E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6654758363948120E+0; + v = 0.1898804756095753E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6778410414853370E+0; + v = 0.1899793610426402E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6881760887484110E+0; + v = 0.1901464554844117E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.6963645267094598E+0; + v = 0.1903533246259542E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7023010617153579E+0; + v = 0.1905556158463228E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.7059004636628753E+0; + v = 0.1907037155663528E-3; + start = getLebedevReccurencePoints (4, start, a, b, v); + a = 0.3552470312472575E-1; + v = 0.5992997844249967E-4; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.9151176620841283E-1; + v = 0.9749059382456978E-4; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.1566197930068980E+0; + v = 0.1241680804599158E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2265467599271907E+0; + v = 0.1437626154299360E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.2988242318581361E+0; + v = 0.1584200054793902E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.3717482419703886E+0; + v = 0.1694436550982744E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.4440094491758889E+0; + v = 0.1776617014018108E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5145337096756642E+0; + v = 0.1836132434440077E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.5824053672860230E+0; + v = 0.1876494727075983E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6468283961043370E+0; + v = 0.1899906535336482E-3; + start = getLebedevReccurencePoints (5, start, a, b, v); + a = 0.6095964259104373E-1; + b = 0.1787828275342931E-1; + v = 0.8143252820767350E-4; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.8811962270959388E-1; + b = 0.3953888740792096E-1; + v = 0.9998859890887728E-4; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1165936722428831E+0; + b = 0.6378121797722990E-1; + v = 0.1156199403068359E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1460232857031785E+0; + b = 0.8985890813745037E-1; + v = 0.1287632092635513E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1761197110181755E+0; + b = 0.1172606510576162E+0; + v = 0.1398378643365139E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2066471190463718E+0; + b = 0.1456102876970995E+0; + v = 0.1491876468417391E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2374076026328152E+0; + b = 0.1746153823011775E+0; + v = 0.1570855679175456E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2682305474337051E+0; + b = 0.2040383070295584E+0; + v = 0.1637483948103775E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2989653312142369E+0; + b = 0.2336788634003698E+0; + v = 0.1693500566632843E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3294762752772209E+0; + b = 0.2633632752654219E+0; + v = 0.1740322769393633E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3596390887276086E+0; + b = 0.2929369098051601E+0; + v = 0.1779126637278296E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3893383046398812E+0; + b = 0.3222592785275512E+0; + v = 0.1810908108835412E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4184653789358347E+0; + b = 0.3512004791195743E+0; + v = 0.1836529132600190E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4469172319076166E+0; + b = 0.3796385677684537E+0; + v = 0.1856752841777379E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4745950813276976E+0; + b = 0.4074575378263879E+0; + v = 0.1872270566606832E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5014034601410262E+0; + b = 0.4345456906027828E+0; + v = 0.1883722645591307E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5272493404551239E+0; + b = 0.4607942515205134E+0; + v = 0.1891714324525297E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5520413051846366E+0; + b = 0.4860961284181720E+0; + v = 0.1896827480450146E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5756887237503077E+0; + b = 0.5103447395342790E+0; + v = 0.1899628417059528E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1225039430588352E+0; + b = 0.2136455922655793E-1; + v = 0.1123301829001669E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1539113217321372E+0; + b = 0.4520926166137188E-1; + v = 0.1253698826711277E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1856213098637712E+0; + b = 0.7086468177864818E-1; + v = 0.1366266117678531E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2174998728035131E+0; + b = 0.9785239488772918E-1; + v = 0.1462736856106918E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2494128336938330E+0; + b = 0.1258106396267210E+0; + v = 0.1545076466685412E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2812321562143480E+0; + b = 0.1544529125047001E+0; + v = 0.1615096280814007E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3128372276456111E+0; + b = 0.1835433512202753E+0; + v = 0.1674366639741759E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3441145160177973E+0; + b = 0.2128813258619585E+0; + v = 0.1724225002437900E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3749567714853510E+0; + b = 0.2422913734880829E+0; + v = 0.1765810822987288E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4052621732015610E+0; + b = 0.2716163748391453E+0; + v = 0.1800104126010751E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4349335453522385E+0; + b = 0.3007127671240280E+0; + v = 0.1827960437331284E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4638776641524965E+0; + b = 0.3294470677216479E+0; + v = 0.1850140300716308E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4920046410462687E+0; + b = 0.3576932543699155E+0; + v = 0.1867333507394938E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5192273554861704E+0; + b = 0.3853307059757764E+0; + v = 0.1880178688638289E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5454609081136522E+0; + b = 0.4122425044452694E+0; + v = 0.1889278925654758E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5706220661424140E+0; + b = 0.4383139587781027E+0; + v = 0.1895213832507346E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5946286755181518E+0; + b = 0.4634312536300553E+0; + v = 0.1898548277397420E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.1905370790924295E+0; + b = 0.2371311537781979E-1; + v = 0.1349105935937341E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2242518717748009E+0; + b = 0.4917878059254806E-1; + v = 0.1444060068369326E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2577190808025936E+0; + b = 0.7595498960495142E-1; + v = 0.1526797390930008E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2908724534927187E+0; + b = 0.1036991083191100E+0; + v = 0.1598208771406474E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3236354020056219E+0; + b = 0.1321348584450234E+0; + v = 0.1659354368615331E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3559267359304543E+0; + b = 0.1610316571314789E+0; + v = 0.1711279910946440E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3876637123676956E+0; + b = 0.1901912080395707E+0; + v = 0.1754952725601440E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4187636705218842E+0; + b = 0.2194384950137950E+0; + v = 0.1791247850802529E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4491449019883107E+0; + b = 0.2486155334763858E+0; + v = 0.1820954300877716E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4787270932425445E+0; + b = 0.2775768931812335E+0; + v = 0.1844788524548449E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5074315153055574E+0; + b = 0.3061863786591120E+0; + v = 0.1863409481706220E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5351810507738336E+0; + b = 0.3343144718152556E+0; + v = 0.1877433008795068E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5619001025975381E+0; + b = 0.3618362729028427E+0; + v = 0.1887444543705232E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5875144035268046E+0; + b = 0.3886297583620408E+0; + v = 0.1894009829375006E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6119507308734495E+0; + b = 0.4145742277792031E+0; + v = 0.1897683345035198E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2619733870119463E+0; + b = 0.2540047186389353E-1; + v = 0.1517327037467653E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.2968149743237949E+0; + b = 0.5208107018543989E-1; + v = 0.1587740557483543E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3310451504860488E+0; + b = 0.7971828470885599E-1; + v = 0.1649093382274097E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3646215567376676E+0; + b = 0.1080465999177927E+0; + v = 0.1701915216193265E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3974916785279360E+0; + b = 0.1368413849366629E+0; + v = 0.1746847753144065E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4295967403772029E+0; + b = 0.1659073184763559E+0; + v = 0.1784555512007570E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4608742854473447E+0; + b = 0.1950703730454614E+0; + v = 0.1815687562112174E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4912598858949903E+0; + b = 0.2241721144376724E+0; + v = 0.1840864370663302E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5206882758945558E+0; + b = 0.2530655255406489E+0; + v = 0.1860676785390006E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5490940914019819E+0; + b = 0.2816118409731066E+0; + v = 0.1875690583743703E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5764123302025542E+0; + b = 0.3096780504593238E+0; + v = 0.1886453236347225E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6025786004213506E+0; + b = 0.3371348366394987E+0; + v = 0.1893501123329645E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6275291964794956E+0; + b = 0.3638547827694396E+0; + v = 0.1897366184519868E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3348189479861771E+0; + b = 0.2664841935537443E-1; + v = 0.1643908815152736E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.3699515545855295E+0; + b = 0.5424000066843495E-1; + v = 0.1696300350907768E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4042003071474669E+0; + b = 0.8251992715430854E-1; + v = 0.1741553103844483E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4375320100182624E+0; + b = 0.1112695182483710E+0; + v = 0.1780015282386092E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4699054490335947E+0; + b = 0.1402964116467816E+0; + v = 0.1812116787077125E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5012739879431952E+0; + b = 0.1694275117584291E+0; + v = 0.1838323158085421E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5315874883754966E+0; + b = 0.1985038235312689E+0; + v = 0.1859113119837737E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5607937109622117E+0; + b = 0.2273765660020893E+0; + v = 0.1874969220221698E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5888393223495521E+0; + b = 0.2559041492849764E+0; + v = 0.1886375612681076E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6156705979160163E+0; + b = 0.2839497251976899E+0; + v = 0.1893819575809276E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6412338809078123E+0; + b = 0.3113791060500690E+0; + v = 0.1897794748256767E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4076051259257167E+0; + b = 0.2757792290858463E-1; + v = 0.1738963926584846E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4423788125791520E+0; + b = 0.5584136834984293E-1; + v = 0.1777442359873466E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4760480917328258E+0; + b = 0.8457772087727143E-1; + v = 0.1810010815068719E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5085838725946297E+0; + b = 0.1135975846359248E+0; + v = 0.1836920318248129E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5399513637391218E+0; + b = 0.1427286904765053E+0; + v = 0.1858489473214328E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5701118433636380E+0; + b = 0.1718112740057635E+0; + v = 0.1875079342496592E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5990240530606021E+0; + b = 0.2006944855985351E+0; + v = 0.1887080239102310E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6266452685139695E+0; + b = 0.2292335090598907E+0; + v = 0.1894905752176822E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6529320971415942E+0; + b = 0.2572871512353714E+0; + v = 0.1898991061200695E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.4791583834610126E+0; + b = 0.2826094197735932E-1; + v = 0.1809065016458791E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5130373952796940E+0; + b = 0.5699871359683649E-1; + v = 0.1836297121596799E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5456252429628476E+0; + b = 0.8602712528554394E-1; + v = 0.1858426916241869E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5768956329682385E+0; + b = 0.1151748137221281E+0; + v = 0.1875654101134641E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6068186944699046E+0; + b = 0.1442811654136362E+0; + v = 0.1888240751833503E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6353622248024907E+0; + b = 0.1731930321657680E+0; + v = 0.1896497383866979E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6624927035731797E+0; + b = 0.2017619958756061E+0; + v = 0.1900775530219121E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5484933508028488E+0; + b = 0.2874219755907391E-1; + v = 0.1858525041478814E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.5810207682142106E+0; + b = 0.5778312123713695E-1; + v = 0.1876248690077947E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6120955197181352E+0; + b = 0.8695262371439526E-1; + v = 0.1889404439064607E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6416944284294319E+0; + b = 0.1160893767057166E+0; + v = 0.1898168539265290E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6697926391731260E+0; + b = 0.1450378826743251E+0; + v = 0.1902779940661772E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6147594390585488E+0; + b = 0.2904957622341456E-1; + v = 0.1890125641731815E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6455390026356783E+0; + b = 0.5823809152617197E-1; + v = 0.1899434637795751E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6747258588365477E+0; + b = 0.8740384899884715E-1; + v = 0.1904520856831751E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + a = 0.6772135750395347E+0; + b = 0.2919946135808105E-1; + v = 0.1905534498734563E-3; + start = getLebedevReccurencePoints (6, start, a, b, v); + + break; + } } -int Lebedev_laikov_grid::getLebedevReccurencePoints(int type, int start, double a, double b, double v) +int + Lebedev_laikov_grid::getLebedevReccurencePoints (int type, int start, double a, double b, double v) { double pi = ModuleBase::PI; double c = 0.0; switch (type) - { - case 1: - a = 1.0; - - grid_coor[start].x = a; - grid_coor[start].y = 0.0; - grid_coor[start].z = 0.0; - weight[start] = 4.0 * pi * v; - - grid_coor[start + 1].x = -a; - grid_coor[start + 1].y = 0.0; - grid_coor[start + 1].z = 0.0; - weight[start + 1] = 4.0 * pi * v; - - grid_coor[start + 2].x = 0.0; - grid_coor[start + 2].y = a; - grid_coor[start + 2].z = 0.0; - weight[start + 2] = 4.0 * pi * v; - - grid_coor[start + 3].x = 0.0; - grid_coor[start + 3].y = -a; - grid_coor[start + 3].z = 0.0; - weight[start + 3] = 4.0 * pi * v; - - grid_coor[start + 4].x = 0.0; - grid_coor[start + 4].y = 0.0; - grid_coor[start + 4].z = a; - weight[start + 4] = 4.0 * pi * v; - - grid_coor[start + 5].x = 0.0; - grid_coor[start + 5].y = 0.0; - grid_coor[start + 5].z = -a; - weight[start + 5] = 4.0 * pi * v; - - start = start + 6; - - break; - - case 2: - a = std::sqrt(0.5); - - grid_coor[start].x = 0.0; - grid_coor[start].y = a; - grid_coor[start].z = a; - weight[start] = 4.0 * pi * v; - - grid_coor[start + 1].x = 0.0; - grid_coor[start + 1].y = -a; - grid_coor[start + 1].z = a; - weight[start + 1] = 4.0 * pi * v; - - grid_coor[start + 2].x = 0.0; - grid_coor[start + 2].y = a; - grid_coor[start + 2].z = -a; - weight[start + 2] = 4.0 * pi * v; - - grid_coor[start + 3].x = 0.0; - grid_coor[start + 3].y = -a; - grid_coor[start + 3].z = -a; - weight[start + 3] = 4.0 * pi * v; - - grid_coor[start + 4].x = a; - grid_coor[start + 4].y = 0.0; - grid_coor[start + 4].z = a; - weight[start + 4] = 4.0 * pi * v; - - grid_coor[start + 5].x = a; - grid_coor[start + 5].y = 0.0; - grid_coor[start + 5].z = -a; - weight[start + 5] = 4.0 * pi * v; - - grid_coor[start + 6].x = -a; - grid_coor[start + 6].y = 0.0; - grid_coor[start + 6].z = a; - weight[start + 6] = 4.0 * pi * v; - - grid_coor[start + 7].x = -a; - grid_coor[start + 7].y = 0.0; - grid_coor[start + 7].z = -a; - weight[start + 7] = 4.0 * pi * v; - - grid_coor[start + 8].x = a; - grid_coor[start + 8].y = a; - grid_coor[start + 8].z = 0.0; - weight[start + 8] = 4.0 * pi * v; - - grid_coor[start + 9].x = -a; - grid_coor[start + 9].y = a; - grid_coor[start + 9].z = 0.0; - weight[start + 9] = 4.0 * pi * v; - - grid_coor[start + 10].x = a; - grid_coor[start + 10].y = -a; - grid_coor[start + 10].z = 0.0; - weight[start + 10] = 4.0 * pi * v; - - grid_coor[start + 11].x = -a; - grid_coor[start + 11].y = -a; - grid_coor[start + 11].z = 0.0; - weight[start + 11] = 4.0 * pi * v; - - start = start + 12; - - break; - - case 3: - a = sqrt(1.0 / 3.0); - - grid_coor[start].x = a; - grid_coor[start].y = a; - grid_coor[start].z = a; - weight[start] = 4.0 * pi * v; - - grid_coor[start + 1].x = -a; - grid_coor[start + 1].y = a; - grid_coor[start + 1].z = a; - weight[start + 1] = 4.0 * pi * v; - - grid_coor[start + 2].x = a; - grid_coor[start + 2].y = -a; - grid_coor[start + 2].z = a; - weight[start + 2] = 4.0 * pi * v; - - grid_coor[start + 3].x = a; - grid_coor[start + 3].y = a; - grid_coor[start + 3].z = -a; - weight[start + 3] = 4.0 * pi * v; - - grid_coor[start + 4].x = -a; - grid_coor[start + 4].y = -a; - grid_coor[start + 4].z = a; - weight[start + 4] = 4.0 * pi * v; - - grid_coor[start + 5].x = a; - grid_coor[start + 5].y = -a; - grid_coor[start + 5].z = -a; - weight[start + 5] = 4.0 * pi * v; - - grid_coor[start + 6].x = -a; - grid_coor[start + 6].y = a; - grid_coor[start + 6].z = -a; - weight[start + 6] = 4.0 * pi * v; - - grid_coor[start + 7].x = -a; - grid_coor[start + 7].y = -a; - grid_coor[start + 7].z = -a; - weight[start + 7] = 4.0 * pi * v; - - start = start + 8; - - break; - - case 4: - /* In this case a is inputed */ - b = sqrt(1.0 - 2.0 * a * a); - - grid_coor[start].x = a; - grid_coor[start].y = a; - grid_coor[start].z = b; - weight[start] = 4.0 * pi * v; - - grid_coor[start + 1].x = -a; - grid_coor[start + 1].y = a; - grid_coor[start + 1].z = b; - weight[start + 1] = 4.0 * pi * v; - - grid_coor[start + 2].x = a; - grid_coor[start + 2].y = -a; - grid_coor[start + 2].z = b; - weight[start + 2] = 4.0 * pi * v; - - grid_coor[start + 3].x = a; - grid_coor[start + 3].y = a; - grid_coor[start + 3].z = -b; - weight[start + 3] = 4.0 * pi * v; - - grid_coor[start + 4].x = -a; - grid_coor[start + 4].y = -a; - grid_coor[start + 4].z = b; - weight[start + 4] = 4.0 * pi * v; - - grid_coor[start + 5].x = -a; - grid_coor[start + 5].y = a; - grid_coor[start + 5].z = -b; - weight[start + 5] = 4.0 * pi * v; - - grid_coor[start + 6].x = a; - grid_coor[start + 6].y = -a; - grid_coor[start + 6].z = -b; - weight[start + 6] = 4.0 * pi * v; - - grid_coor[start + 7].x = -a; - grid_coor[start + 7].y = -a; - grid_coor[start + 7].z = -b; - weight[start + 7] = 4.0 * pi * v; - - grid_coor[start + 8].x = -a; - grid_coor[start + 8].y = b; - grid_coor[start + 8].z = a; - weight[start + 8] = 4.0 * pi * v; - - grid_coor[start + 9].x = a; - grid_coor[start + 9].y = -b; - grid_coor[start + 9].z = a; - weight[start + 9] = 4.0 * pi * v; - - grid_coor[start + 10].x = a; - grid_coor[start + 10].y = b; - grid_coor[start + 10].z = -a; - weight[start + 10] = 4.0 * pi * v; - - grid_coor[start + 11].x = -a; - grid_coor[start + 11].y = -b; - grid_coor[start + 11].z = a; - weight[start + 11] = 4.0 * pi * v; - - grid_coor[start + 12].x = -a; - grid_coor[start + 12].y = b; - grid_coor[start + 12].z = -a; - weight[start + 12] = 4.0 * pi * v; - - grid_coor[start + 13].x = a; - grid_coor[start + 13].y = -b; - grid_coor[start + 13].z = -a; - weight[start + 13] = 4.0 * pi * v; - - grid_coor[start + 14].x = -a; - grid_coor[start + 14].y = -b; - grid_coor[start + 14].z = -a; - weight[start + 14] = 4.0 * pi * v; - - grid_coor[start + 15].x = a; - grid_coor[start + 15].y = b; - grid_coor[start + 15].z = a; - weight[start + 15] = 4.0 * pi * v; - - grid_coor[start + 16].x = b; - grid_coor[start + 16].y = a; - grid_coor[start + 16].z = a; - weight[start + 16] = 4.0 * pi * v; - - grid_coor[start + 17].x = -b; - grid_coor[start + 17].y = a; - grid_coor[start + 17].z = a; - weight[start + 17] = 4.0 * pi * v; - - grid_coor[start + 18].x = b; - grid_coor[start + 18].y = -a; - grid_coor[start + 18].z = a; - weight[start + 18] = 4.0 * pi * v; - - grid_coor[start + 19].x = b; - grid_coor[start + 19].y = a; - grid_coor[start + 19].z = -a; - weight[start + 19] = 4.0 * pi * v; - - grid_coor[start + 20].x = -b; - grid_coor[start + 20].y = -a; - grid_coor[start + 20].z = a; - weight[start + 20] = 4.0 * pi * v; - - grid_coor[start + 21].x = -b; - grid_coor[start + 21].y = a; - grid_coor[start + 21].z = -a; - weight[start + 21] = 4.0 * pi * v; - - grid_coor[start + 22].x = b; - grid_coor[start + 22].y = -a; - grid_coor[start + 22].z = -a; - weight[start + 22] = 4.0 * pi * v; - - grid_coor[start + 23].x = -b; - grid_coor[start + 23].y = -a; - grid_coor[start + 23].z = -a; - weight[start + 23] = 4.0 * pi * v; - - start = start + 24; - - break; - - case 5: - /* a is inputed in this case as well*/ - b = sqrt(1 - a * a); - - grid_coor[start].x = a; - grid_coor[start].y = b; - grid_coor[start].z = 0.0; - weight[start] = 4.0 * pi * v; - - grid_coor[start + 1].x = -a; - grid_coor[start + 1].y = b; - grid_coor[start + 1].z = 0.0; - weight[start + 1] = 4.0 * pi * v; - - grid_coor[start + 2].x = a; - grid_coor[start + 2].y = -b; - grid_coor[start + 2].z = 0.0; - weight[start + 2] = 4.0 * pi * v; - - grid_coor[start + 3].x = -a; - grid_coor[start + 3].y = -b; - grid_coor[start + 3].z = 0.0; - weight[start + 3] = 4.0 * pi * v; - - grid_coor[start + 4].x = b; - grid_coor[start + 4].y = a; - grid_coor[start + 4].z = 0.0; - weight[start + 4] = 4.0 * pi * v; - - grid_coor[start + 5].x = -b; - grid_coor[start + 5].y = a; - grid_coor[start + 5].z = 0.0; - weight[start + 5] = 4.0 * pi * v; - - grid_coor[start + 6].x = b; - grid_coor[start + 6].y = -a; - grid_coor[start + 6].z = 0.0; - weight[start + 6] = 4.0 * pi * v; - - grid_coor[start + 7].x = -b; - grid_coor[start + 7].y = -a; - grid_coor[start + 7].z = 0.0; - weight[start + 7] = 4.0 * pi * v; - - grid_coor[start + 8].x = a; - grid_coor[start + 8].y = 0.0; - grid_coor[start + 8].z = b; - weight[start + 8] = 4.0 * pi * v; - - grid_coor[start + 9].x = -a; - grid_coor[start + 9].y = 0.0; - grid_coor[start + 9].z = b; - weight[start + 9] = 4.0 * pi * v; - - grid_coor[start + 10].x = a; - grid_coor[start + 10].y = 0.0; - grid_coor[start + 10].z = -b; - weight[start + 10] = 4.0 * pi * v; - - grid_coor[start + 11].x = -a; - grid_coor[start + 11].y = 0.0; - grid_coor[start + 11].z = -b; - weight[start + 11] = 4.0 * pi * v; - - grid_coor[start + 12].x = b; - grid_coor[start + 12].y = 0.0; - grid_coor[start + 12].z = a; - weight[start + 12] = 4.0 * pi * v; - - grid_coor[start + 13].x = -b; - grid_coor[start + 13].y = 0.0; - grid_coor[start + 13].z = a; - weight[start + 13] = 4.0 * pi * v; - - grid_coor[start + 14].x = b; - grid_coor[start + 14].y = 0.0; - grid_coor[start + 14].z = -a; - weight[start + 14] = 4.0 * pi * v; - - grid_coor[start + 15].x = -b; - grid_coor[start + 15].y = 0.0; - grid_coor[start + 15].z = -a; - weight[start + 15] = 4.0 * pi * v; - - grid_coor[start + 16].x = 0.0; - grid_coor[start + 16].y = a; - grid_coor[start + 16].z = b; - weight[start + 16] = 4.0 * pi * v; - - grid_coor[start + 17].x = 0.0; - grid_coor[start + 17].y = -a; - grid_coor[start + 17].z = b; - weight[start + 17] = 4.0 * pi * v; - - grid_coor[start + 18].x = 0.0; - grid_coor[start + 18].y = a; - grid_coor[start + 18].z = -b; - weight[start + 18] = 4.0 * pi * v; - - grid_coor[start + 19].x = 0.0; - grid_coor[start + 19].y = -a; - grid_coor[start + 19].z = -b; - weight[start + 19] = 4.0 * pi * v; - - grid_coor[start + 20].x = 0.0; - grid_coor[start + 20].y = b; - grid_coor[start + 20].z = a; - weight[start + 20] = 4.0 * pi * v; - - grid_coor[start + 21].x = 0.0; - grid_coor[start + 21].y = -b; - grid_coor[start + 21].z = a; - weight[start + 21] = 4.0 * pi * v; - - grid_coor[start + 22].x = 0.0; - grid_coor[start + 22].y = b; - grid_coor[start + 22].z = -a; - weight[start + 22] = 4.0 * pi * v; - - grid_coor[start + 23].x = 0.0; - grid_coor[start + 23].y = -b; - grid_coor[start + 23].z = -a; - weight[start + 23] = 4.0 * pi * v; - - start = start + 24; - - break; - - case 6: - /* both a and b are inputed in this case */ - c = sqrt(1.0 - a * a - b * b); - - grid_coor[start].x = a; - grid_coor[start].y = b; - grid_coor[start].z = c; - weight[start] = 4.0 * pi * v; - - grid_coor[start + 1].x = -a; - grid_coor[start + 1].y = b; - grid_coor[start + 1].z = c; - weight[start + 1] = 4.0 * pi * v; - - grid_coor[start + 2].x = a; - grid_coor[start + 2].y = -b; - grid_coor[start + 2].z = c; - weight[start + 2] = 4.0 * pi * v; - - grid_coor[start + 3].x = a; - grid_coor[start + 3].y = b; - grid_coor[start + 3].z = -c; - weight[start + 3] = 4.0 * pi * v; - - grid_coor[start + 4].x = -a; - grid_coor[start + 4].y = -b; - grid_coor[start + 4].z = c; - weight[start + 4] = 4.0 * pi * v; - - grid_coor[start + 5].x = a; - grid_coor[start + 5].y = -b; - grid_coor[start + 5].z = -c; - weight[start + 5] = 4.0 * pi * v; - - grid_coor[start + 6].x = -a; - grid_coor[start + 6].y = b; - grid_coor[start + 6].z = -c; - weight[start + 6] = 4.0 * pi * v; - - grid_coor[start + 7].x = -a; - grid_coor[start + 7].y = -b; - grid_coor[start + 7].z = -c; - weight[start + 7] = 4.0 * pi * v; - - grid_coor[start + 8].x = b; - grid_coor[start + 8].y = a; - grid_coor[start + 8].z = c; - weight[start + 8] = 4.0 * pi * v; - - grid_coor[start + 9].x = -b; - grid_coor[start + 9].y = a; - grid_coor[start + 9].z = c; - weight[start + 9] = 4.0 * pi * v; - - grid_coor[start + 10].x = b; - grid_coor[start + 10].y = -a; - grid_coor[start + 10].z = c; - weight[start + 10] = 4.0 * pi * v; - - grid_coor[start + 11].x = b; - grid_coor[start + 11].y = a; - grid_coor[start + 11].z = -c; - weight[start + 11] = 4.0 * pi * v; - - grid_coor[start + 12].x = -b; - grid_coor[start + 12].y = -a; - grid_coor[start + 12].z = c; - weight[start + 12] = 4.0 * pi * v; - - grid_coor[start + 13].x = b; - grid_coor[start + 13].y = -a; - grid_coor[start + 13].z = -c; - weight[start + 13] = 4.0 * pi * v; - - grid_coor[start + 14].x = -b; - grid_coor[start + 14].y = a; - grid_coor[start + 14].z = -c; - weight[start + 14] = 4.0 * pi * v; - - grid_coor[start + 15].x = -b; - grid_coor[start + 15].y = -a; - grid_coor[start + 15].z = -c; - weight[start + 15] = 4.0 * pi * v; - - grid_coor[start + 16].x = c; - grid_coor[start + 16].y = a; - grid_coor[start + 16].z = b; - weight[start + 16] = 4.0 * pi * v; - - grid_coor[start + 17].x = -c; - grid_coor[start + 17].y = a; - grid_coor[start + 17].z = b; - weight[start + 17] = 4.0 * pi * v; - - grid_coor[start + 18].x = c; - grid_coor[start + 18].y = -a; - grid_coor[start + 18].z = b; - weight[start + 18] = 4.0 * pi * v; - - grid_coor[start + 19].x = c; - grid_coor[start + 19].y = a; - grid_coor[start + 19].z = -b; - weight[start + 19] = 4.0 * pi * v; - - grid_coor[start + 20].x = -c; - grid_coor[start + 20].y = -a; - grid_coor[start + 20].z = b; - weight[start + 20] = 4.0 * pi * v; - - grid_coor[start + 21].x = c; - grid_coor[start + 21].y = -a; - grid_coor[start + 21].z = -b; - weight[start + 21] = 4.0 * pi * v; - - grid_coor[start + 22].x = -c; - grid_coor[start + 22].y = a; - grid_coor[start + 22].z = -b; - weight[start + 22] = 4.0 * pi * v; - - grid_coor[start + 23].x = -c; - grid_coor[start + 23].y = -a; - grid_coor[start + 23].z = -b; - weight[start + 23] = 4.0 * pi * v; - - grid_coor[start + 24].x = c; - grid_coor[start + 24].y = b; - grid_coor[start + 24].z = a; - weight[start + 24] = 4.0 * pi * v; - - grid_coor[start + 25].x = -c; - grid_coor[start + 25].y = b; - grid_coor[start + 25].z = a; - weight[start + 25] = 4.0 * pi * v; - - grid_coor[start + 26].x = c; - grid_coor[start + 26].y = -b; - grid_coor[start + 26].z = a; - weight[start + 26] = 4.0 * pi * v; - - grid_coor[start + 27].x = c; - grid_coor[start + 27].y = b; - grid_coor[start + 27].z = -a; - weight[start + 27] = 4.0 * pi * v; - - grid_coor[start + 28].x = -c; - grid_coor[start + 28].y = -b; - grid_coor[start + 28].z = a; - weight[start + 28] = 4.0 * pi * v; - - grid_coor[start + 29].x = c; - grid_coor[start + 29].y = -b; - grid_coor[start + 29].z = -a; - weight[start + 29] = 4.0 * pi * v; - - grid_coor[start + 30].x = -c; - grid_coor[start + 30].y = b; - grid_coor[start + 30].z = -a; - weight[start + 30] = 4.0 * pi * v; - - grid_coor[start + 31].x = -c; - grid_coor[start + 31].y = -b; - grid_coor[start + 31].z = -a; - weight[start + 31] = 4.0 * pi * v; - - grid_coor[start + 32].x = a; - grid_coor[start + 32].y = c; - grid_coor[start + 32].z = b; - weight[start + 32] = 4.0 * pi * v; - - grid_coor[start + 33].x = -a; - grid_coor[start + 33].y = c; - grid_coor[start + 33].z = b; - weight[start + 33] = 4.0 * pi * v; - - grid_coor[start + 34].x = a; - grid_coor[start + 34].y = -c; - grid_coor[start + 34].z = b; - weight[start + 34] = 4.0 * pi * v; - - grid_coor[start + 35].x = a; - grid_coor[start + 35].y = c; - grid_coor[start + 35].z = -b; - weight[start + 35] = 4.0 * pi * v; - - grid_coor[start + 36].x = -a; - grid_coor[start + 36].y = -c; - grid_coor[start + 36].z = b; - weight[start + 36] = 4.0 * pi * v; - - grid_coor[start + 37].x = a; - grid_coor[start + 37].y = -c; - grid_coor[start + 37].z = -b; - weight[start + 37] = 4.0 * pi * v; - - grid_coor[start + 38].x = -a; - grid_coor[start + 38].y = c; - grid_coor[start + 38].z = -b; - weight[start + 38] = 4.0 * pi * v; - - grid_coor[start + 39].x = -a; - grid_coor[start + 39].y = -c; - grid_coor[start + 39].z = -b; - weight[start + 39] = 4.0 * pi * v; - - grid_coor[start + 40].x = b; - grid_coor[start + 40].y = c; - grid_coor[start + 40].z = a; - weight[start + 40] = 4.0 * pi * v; - - grid_coor[start + 41].x = -b; - grid_coor[start + 41].y = c; - grid_coor[start + 41].z = a; - weight[start + 41] = 4.0 * pi * v; - - grid_coor[start + 42].x = b; - grid_coor[start + 42].y = -c; - grid_coor[start + 42].z = a; - weight[start + 42] = 4.0 * pi * v; - - grid_coor[start + 43].x = b; - grid_coor[start + 43].y = c; - grid_coor[start + 43].z = -a; - weight[start + 43] = 4.0 * pi * v; - - grid_coor[start + 44].x = -b; - grid_coor[start + 44].y = -c; - grid_coor[start + 44].z = a; - weight[start + 44] = 4.0 * pi * v; - - grid_coor[start + 45].x = b; - grid_coor[start + 45].y = -c; - grid_coor[start + 45].z = -a; - weight[start + 45] = 4.0 * pi * v; - - grid_coor[start + 46].x = -b; - grid_coor[start + 46].y = c; - grid_coor[start + 46].z = -a; - weight[start + 46] = 4.0 * pi * v; - - grid_coor[start + 47].x = -b; - grid_coor[start + 47].y = -c; - grid_coor[start + 47].z = -a; - weight[start + 47] = 4.0 * pi * v; - - start = start + 48; - - break; - } + { + case 1: + a = 1.0; + + grid_coor[start].x = a; + grid_coor[start].y = 0.0; + grid_coor[start].z = 0.0; + weight[start] = 4.0 * pi * v; + + grid_coor[start + 1].x = -a; + grid_coor[start + 1].y = 0.0; + grid_coor[start + 1].z = 0.0; + weight[start + 1] = 4.0 * pi * v; + + grid_coor[start + 2].x = 0.0; + grid_coor[start + 2].y = a; + grid_coor[start + 2].z = 0.0; + weight[start + 2] = 4.0 * pi * v; + + grid_coor[start + 3].x = 0.0; + grid_coor[start + 3].y = -a; + grid_coor[start + 3].z = 0.0; + weight[start + 3] = 4.0 * pi * v; + + grid_coor[start + 4].x = 0.0; + grid_coor[start + 4].y = 0.0; + grid_coor[start + 4].z = a; + weight[start + 4] = 4.0 * pi * v; + + grid_coor[start + 5].x = 0.0; + grid_coor[start + 5].y = 0.0; + grid_coor[start + 5].z = -a; + weight[start + 5] = 4.0 * pi * v; + + start = start + 6; + + break; + + case 2: + a = std::sqrt (0.5); + + grid_coor[start].x = 0.0; + grid_coor[start].y = a; + grid_coor[start].z = a; + weight[start] = 4.0 * pi * v; + + grid_coor[start + 1].x = 0.0; + grid_coor[start + 1].y = -a; + grid_coor[start + 1].z = a; + weight[start + 1] = 4.0 * pi * v; + + grid_coor[start + 2].x = 0.0; + grid_coor[start + 2].y = a; + grid_coor[start + 2].z = -a; + weight[start + 2] = 4.0 * pi * v; + + grid_coor[start + 3].x = 0.0; + grid_coor[start + 3].y = -a; + grid_coor[start + 3].z = -a; + weight[start + 3] = 4.0 * pi * v; + + grid_coor[start + 4].x = a; + grid_coor[start + 4].y = 0.0; + grid_coor[start + 4].z = a; + weight[start + 4] = 4.0 * pi * v; + + grid_coor[start + 5].x = a; + grid_coor[start + 5].y = 0.0; + grid_coor[start + 5].z = -a; + weight[start + 5] = 4.0 * pi * v; + + grid_coor[start + 6].x = -a; + grid_coor[start + 6].y = 0.0; + grid_coor[start + 6].z = a; + weight[start + 6] = 4.0 * pi * v; + + grid_coor[start + 7].x = -a; + grid_coor[start + 7].y = 0.0; + grid_coor[start + 7].z = -a; + weight[start + 7] = 4.0 * pi * v; + + grid_coor[start + 8].x = a; + grid_coor[start + 8].y = a; + grid_coor[start + 8].z = 0.0; + weight[start + 8] = 4.0 * pi * v; + + grid_coor[start + 9].x = -a; + grid_coor[start + 9].y = a; + grid_coor[start + 9].z = 0.0; + weight[start + 9] = 4.0 * pi * v; + + grid_coor[start + 10].x = a; + grid_coor[start + 10].y = -a; + grid_coor[start + 10].z = 0.0; + weight[start + 10] = 4.0 * pi * v; + + grid_coor[start + 11].x = -a; + grid_coor[start + 11].y = -a; + grid_coor[start + 11].z = 0.0; + weight[start + 11] = 4.0 * pi * v; + + start = start + 12; + + break; + + case 3: + a = sqrt (1.0 / 3.0); + + grid_coor[start].x = a; + grid_coor[start].y = a; + grid_coor[start].z = a; + weight[start] = 4.0 * pi * v; + + grid_coor[start + 1].x = -a; + grid_coor[start + 1].y = a; + grid_coor[start + 1].z = a; + weight[start + 1] = 4.0 * pi * v; + + grid_coor[start + 2].x = a; + grid_coor[start + 2].y = -a; + grid_coor[start + 2].z = a; + weight[start + 2] = 4.0 * pi * v; + + grid_coor[start + 3].x = a; + grid_coor[start + 3].y = a; + grid_coor[start + 3].z = -a; + weight[start + 3] = 4.0 * pi * v; + + grid_coor[start + 4].x = -a; + grid_coor[start + 4].y = -a; + grid_coor[start + 4].z = a; + weight[start + 4] = 4.0 * pi * v; + + grid_coor[start + 5].x = a; + grid_coor[start + 5].y = -a; + grid_coor[start + 5].z = -a; + weight[start + 5] = 4.0 * pi * v; + + grid_coor[start + 6].x = -a; + grid_coor[start + 6].y = a; + grid_coor[start + 6].z = -a; + weight[start + 6] = 4.0 * pi * v; + + grid_coor[start + 7].x = -a; + grid_coor[start + 7].y = -a; + grid_coor[start + 7].z = -a; + weight[start + 7] = 4.0 * pi * v; + + start = start + 8; + + break; + + case 4: + /* In this case a is inputed */ + b = sqrt (1.0 - 2.0 * a * a); + + grid_coor[start].x = a; + grid_coor[start].y = a; + grid_coor[start].z = b; + weight[start] = 4.0 * pi * v; + + grid_coor[start + 1].x = -a; + grid_coor[start + 1].y = a; + grid_coor[start + 1].z = b; + weight[start + 1] = 4.0 * pi * v; + + grid_coor[start + 2].x = a; + grid_coor[start + 2].y = -a; + grid_coor[start + 2].z = b; + weight[start + 2] = 4.0 * pi * v; + + grid_coor[start + 3].x = a; + grid_coor[start + 3].y = a; + grid_coor[start + 3].z = -b; + weight[start + 3] = 4.0 * pi * v; + + grid_coor[start + 4].x = -a; + grid_coor[start + 4].y = -a; + grid_coor[start + 4].z = b; + weight[start + 4] = 4.0 * pi * v; + + grid_coor[start + 5].x = -a; + grid_coor[start + 5].y = a; + grid_coor[start + 5].z = -b; + weight[start + 5] = 4.0 * pi * v; + + grid_coor[start + 6].x = a; + grid_coor[start + 6].y = -a; + grid_coor[start + 6].z = -b; + weight[start + 6] = 4.0 * pi * v; + + grid_coor[start + 7].x = -a; + grid_coor[start + 7].y = -a; + grid_coor[start + 7].z = -b; + weight[start + 7] = 4.0 * pi * v; + + grid_coor[start + 8].x = -a; + grid_coor[start + 8].y = b; + grid_coor[start + 8].z = a; + weight[start + 8] = 4.0 * pi * v; + + grid_coor[start + 9].x = a; + grid_coor[start + 9].y = -b; + grid_coor[start + 9].z = a; + weight[start + 9] = 4.0 * pi * v; + + grid_coor[start + 10].x = a; + grid_coor[start + 10].y = b; + grid_coor[start + 10].z = -a; + weight[start + 10] = 4.0 * pi * v; + + grid_coor[start + 11].x = -a; + grid_coor[start + 11].y = -b; + grid_coor[start + 11].z = a; + weight[start + 11] = 4.0 * pi * v; + + grid_coor[start + 12].x = -a; + grid_coor[start + 12].y = b; + grid_coor[start + 12].z = -a; + weight[start + 12] = 4.0 * pi * v; + + grid_coor[start + 13].x = a; + grid_coor[start + 13].y = -b; + grid_coor[start + 13].z = -a; + weight[start + 13] = 4.0 * pi * v; + + grid_coor[start + 14].x = -a; + grid_coor[start + 14].y = -b; + grid_coor[start + 14].z = -a; + weight[start + 14] = 4.0 * pi * v; + + grid_coor[start + 15].x = a; + grid_coor[start + 15].y = b; + grid_coor[start + 15].z = a; + weight[start + 15] = 4.0 * pi * v; + + grid_coor[start + 16].x = b; + grid_coor[start + 16].y = a; + grid_coor[start + 16].z = a; + weight[start + 16] = 4.0 * pi * v; + + grid_coor[start + 17].x = -b; + grid_coor[start + 17].y = a; + grid_coor[start + 17].z = a; + weight[start + 17] = 4.0 * pi * v; + + grid_coor[start + 18].x = b; + grid_coor[start + 18].y = -a; + grid_coor[start + 18].z = a; + weight[start + 18] = 4.0 * pi * v; + + grid_coor[start + 19].x = b; + grid_coor[start + 19].y = a; + grid_coor[start + 19].z = -a; + weight[start + 19] = 4.0 * pi * v; + + grid_coor[start + 20].x = -b; + grid_coor[start + 20].y = -a; + grid_coor[start + 20].z = a; + weight[start + 20] = 4.0 * pi * v; + + grid_coor[start + 21].x = -b; + grid_coor[start + 21].y = a; + grid_coor[start + 21].z = -a; + weight[start + 21] = 4.0 * pi * v; + + grid_coor[start + 22].x = b; + grid_coor[start + 22].y = -a; + grid_coor[start + 22].z = -a; + weight[start + 22] = 4.0 * pi * v; + + grid_coor[start + 23].x = -b; + grid_coor[start + 23].y = -a; + grid_coor[start + 23].z = -a; + weight[start + 23] = 4.0 * pi * v; + + start = start + 24; + + break; + + case 5: + /* a is inputed in this case as well*/ + b = sqrt (1 - a * a); + + grid_coor[start].x = a; + grid_coor[start].y = b; + grid_coor[start].z = 0.0; + weight[start] = 4.0 * pi * v; + + grid_coor[start + 1].x = -a; + grid_coor[start + 1].y = b; + grid_coor[start + 1].z = 0.0; + weight[start + 1] = 4.0 * pi * v; + + grid_coor[start + 2].x = a; + grid_coor[start + 2].y = -b; + grid_coor[start + 2].z = 0.0; + weight[start + 2] = 4.0 * pi * v; + + grid_coor[start + 3].x = -a; + grid_coor[start + 3].y = -b; + grid_coor[start + 3].z = 0.0; + weight[start + 3] = 4.0 * pi * v; + + grid_coor[start + 4].x = b; + grid_coor[start + 4].y = a; + grid_coor[start + 4].z = 0.0; + weight[start + 4] = 4.0 * pi * v; + + grid_coor[start + 5].x = -b; + grid_coor[start + 5].y = a; + grid_coor[start + 5].z = 0.0; + weight[start + 5] = 4.0 * pi * v; + + grid_coor[start + 6].x = b; + grid_coor[start + 6].y = -a; + grid_coor[start + 6].z = 0.0; + weight[start + 6] = 4.0 * pi * v; + + grid_coor[start + 7].x = -b; + grid_coor[start + 7].y = -a; + grid_coor[start + 7].z = 0.0; + weight[start + 7] = 4.0 * pi * v; + + grid_coor[start + 8].x = a; + grid_coor[start + 8].y = 0.0; + grid_coor[start + 8].z = b; + weight[start + 8] = 4.0 * pi * v; + + grid_coor[start + 9].x = -a; + grid_coor[start + 9].y = 0.0; + grid_coor[start + 9].z = b; + weight[start + 9] = 4.0 * pi * v; + + grid_coor[start + 10].x = a; + grid_coor[start + 10].y = 0.0; + grid_coor[start + 10].z = -b; + weight[start + 10] = 4.0 * pi * v; + + grid_coor[start + 11].x = -a; + grid_coor[start + 11].y = 0.0; + grid_coor[start + 11].z = -b; + weight[start + 11] = 4.0 * pi * v; + + grid_coor[start + 12].x = b; + grid_coor[start + 12].y = 0.0; + grid_coor[start + 12].z = a; + weight[start + 12] = 4.0 * pi * v; + + grid_coor[start + 13].x = -b; + grid_coor[start + 13].y = 0.0; + grid_coor[start + 13].z = a; + weight[start + 13] = 4.0 * pi * v; + + grid_coor[start + 14].x = b; + grid_coor[start + 14].y = 0.0; + grid_coor[start + 14].z = -a; + weight[start + 14] = 4.0 * pi * v; + + grid_coor[start + 15].x = -b; + grid_coor[start + 15].y = 0.0; + grid_coor[start + 15].z = -a; + weight[start + 15] = 4.0 * pi * v; + + grid_coor[start + 16].x = 0.0; + grid_coor[start + 16].y = a; + grid_coor[start + 16].z = b; + weight[start + 16] = 4.0 * pi * v; + + grid_coor[start + 17].x = 0.0; + grid_coor[start + 17].y = -a; + grid_coor[start + 17].z = b; + weight[start + 17] = 4.0 * pi * v; + + grid_coor[start + 18].x = 0.0; + grid_coor[start + 18].y = a; + grid_coor[start + 18].z = -b; + weight[start + 18] = 4.0 * pi * v; + + grid_coor[start + 19].x = 0.0; + grid_coor[start + 19].y = -a; + grid_coor[start + 19].z = -b; + weight[start + 19] = 4.0 * pi * v; + + grid_coor[start + 20].x = 0.0; + grid_coor[start + 20].y = b; + grid_coor[start + 20].z = a; + weight[start + 20] = 4.0 * pi * v; + + grid_coor[start + 21].x = 0.0; + grid_coor[start + 21].y = -b; + grid_coor[start + 21].z = a; + weight[start + 21] = 4.0 * pi * v; + + grid_coor[start + 22].x = 0.0; + grid_coor[start + 22].y = b; + grid_coor[start + 22].z = -a; + weight[start + 22] = 4.0 * pi * v; + + grid_coor[start + 23].x = 0.0; + grid_coor[start + 23].y = -b; + grid_coor[start + 23].z = -a; + weight[start + 23] = 4.0 * pi * v; + + start = start + 24; + + break; + + case 6: + /* both a and b are inputed in this case */ + c = sqrt (1.0 - a * a - b * b); + + grid_coor[start].x = a; + grid_coor[start].y = b; + grid_coor[start].z = c; + weight[start] = 4.0 * pi * v; + + grid_coor[start + 1].x = -a; + grid_coor[start + 1].y = b; + grid_coor[start + 1].z = c; + weight[start + 1] = 4.0 * pi * v; + + grid_coor[start + 2].x = a; + grid_coor[start + 2].y = -b; + grid_coor[start + 2].z = c; + weight[start + 2] = 4.0 * pi * v; + + grid_coor[start + 3].x = a; + grid_coor[start + 3].y = b; + grid_coor[start + 3].z = -c; + weight[start + 3] = 4.0 * pi * v; + + grid_coor[start + 4].x = -a; + grid_coor[start + 4].y = -b; + grid_coor[start + 4].z = c; + weight[start + 4] = 4.0 * pi * v; + + grid_coor[start + 5].x = a; + grid_coor[start + 5].y = -b; + grid_coor[start + 5].z = -c; + weight[start + 5] = 4.0 * pi * v; + + grid_coor[start + 6].x = -a; + grid_coor[start + 6].y = b; + grid_coor[start + 6].z = -c; + weight[start + 6] = 4.0 * pi * v; + + grid_coor[start + 7].x = -a; + grid_coor[start + 7].y = -b; + grid_coor[start + 7].z = -c; + weight[start + 7] = 4.0 * pi * v; + + grid_coor[start + 8].x = b; + grid_coor[start + 8].y = a; + grid_coor[start + 8].z = c; + weight[start + 8] = 4.0 * pi * v; + + grid_coor[start + 9].x = -b; + grid_coor[start + 9].y = a; + grid_coor[start + 9].z = c; + weight[start + 9] = 4.0 * pi * v; + + grid_coor[start + 10].x = b; + grid_coor[start + 10].y = -a; + grid_coor[start + 10].z = c; + weight[start + 10] = 4.0 * pi * v; + + grid_coor[start + 11].x = b; + grid_coor[start + 11].y = a; + grid_coor[start + 11].z = -c; + weight[start + 11] = 4.0 * pi * v; + + grid_coor[start + 12].x = -b; + grid_coor[start + 12].y = -a; + grid_coor[start + 12].z = c; + weight[start + 12] = 4.0 * pi * v; + + grid_coor[start + 13].x = b; + grid_coor[start + 13].y = -a; + grid_coor[start + 13].z = -c; + weight[start + 13] = 4.0 * pi * v; + + grid_coor[start + 14].x = -b; + grid_coor[start + 14].y = a; + grid_coor[start + 14].z = -c; + weight[start + 14] = 4.0 * pi * v; + + grid_coor[start + 15].x = -b; + grid_coor[start + 15].y = -a; + grid_coor[start + 15].z = -c; + weight[start + 15] = 4.0 * pi * v; + + grid_coor[start + 16].x = c; + grid_coor[start + 16].y = a; + grid_coor[start + 16].z = b; + weight[start + 16] = 4.0 * pi * v; + + grid_coor[start + 17].x = -c; + grid_coor[start + 17].y = a; + grid_coor[start + 17].z = b; + weight[start + 17] = 4.0 * pi * v; + + grid_coor[start + 18].x = c; + grid_coor[start + 18].y = -a; + grid_coor[start + 18].z = b; + weight[start + 18] = 4.0 * pi * v; + + grid_coor[start + 19].x = c; + grid_coor[start + 19].y = a; + grid_coor[start + 19].z = -b; + weight[start + 19] = 4.0 * pi * v; + + grid_coor[start + 20].x = -c; + grid_coor[start + 20].y = -a; + grid_coor[start + 20].z = b; + weight[start + 20] = 4.0 * pi * v; + + grid_coor[start + 21].x = c; + grid_coor[start + 21].y = -a; + grid_coor[start + 21].z = -b; + weight[start + 21] = 4.0 * pi * v; + + grid_coor[start + 22].x = -c; + grid_coor[start + 22].y = a; + grid_coor[start + 22].z = -b; + weight[start + 22] = 4.0 * pi * v; + + grid_coor[start + 23].x = -c; + grid_coor[start + 23].y = -a; + grid_coor[start + 23].z = -b; + weight[start + 23] = 4.0 * pi * v; + + grid_coor[start + 24].x = c; + grid_coor[start + 24].y = b; + grid_coor[start + 24].z = a; + weight[start + 24] = 4.0 * pi * v; + + grid_coor[start + 25].x = -c; + grid_coor[start + 25].y = b; + grid_coor[start + 25].z = a; + weight[start + 25] = 4.0 * pi * v; + + grid_coor[start + 26].x = c; + grid_coor[start + 26].y = -b; + grid_coor[start + 26].z = a; + weight[start + 26] = 4.0 * pi * v; + + grid_coor[start + 27].x = c; + grid_coor[start + 27].y = b; + grid_coor[start + 27].z = -a; + weight[start + 27] = 4.0 * pi * v; + + grid_coor[start + 28].x = -c; + grid_coor[start + 28].y = -b; + grid_coor[start + 28].z = a; + weight[start + 28] = 4.0 * pi * v; + + grid_coor[start + 29].x = c; + grid_coor[start + 29].y = -b; + grid_coor[start + 29].z = -a; + weight[start + 29] = 4.0 * pi * v; + + grid_coor[start + 30].x = -c; + grid_coor[start + 30].y = b; + grid_coor[start + 30].z = -a; + weight[start + 30] = 4.0 * pi * v; + + grid_coor[start + 31].x = -c; + grid_coor[start + 31].y = -b; + grid_coor[start + 31].z = -a; + weight[start + 31] = 4.0 * pi * v; + + grid_coor[start + 32].x = a; + grid_coor[start + 32].y = c; + grid_coor[start + 32].z = b; + weight[start + 32] = 4.0 * pi * v; + + grid_coor[start + 33].x = -a; + grid_coor[start + 33].y = c; + grid_coor[start + 33].z = b; + weight[start + 33] = 4.0 * pi * v; + + grid_coor[start + 34].x = a; + grid_coor[start + 34].y = -c; + grid_coor[start + 34].z = b; + weight[start + 34] = 4.0 * pi * v; + + grid_coor[start + 35].x = a; + grid_coor[start + 35].y = c; + grid_coor[start + 35].z = -b; + weight[start + 35] = 4.0 * pi * v; + + grid_coor[start + 36].x = -a; + grid_coor[start + 36].y = -c; + grid_coor[start + 36].z = b; + weight[start + 36] = 4.0 * pi * v; + + grid_coor[start + 37].x = a; + grid_coor[start + 37].y = -c; + grid_coor[start + 37].z = -b; + weight[start + 37] = 4.0 * pi * v; + + grid_coor[start + 38].x = -a; + grid_coor[start + 38].y = c; + grid_coor[start + 38].z = -b; + weight[start + 38] = 4.0 * pi * v; + + grid_coor[start + 39].x = -a; + grid_coor[start + 39].y = -c; + grid_coor[start + 39].z = -b; + weight[start + 39] = 4.0 * pi * v; + + grid_coor[start + 40].x = b; + grid_coor[start + 40].y = c; + grid_coor[start + 40].z = a; + weight[start + 40] = 4.0 * pi * v; + + grid_coor[start + 41].x = -b; + grid_coor[start + 41].y = c; + grid_coor[start + 41].z = a; + weight[start + 41] = 4.0 * pi * v; + + grid_coor[start + 42].x = b; + grid_coor[start + 42].y = -c; + grid_coor[start + 42].z = a; + weight[start + 42] = 4.0 * pi * v; + + grid_coor[start + 43].x = b; + grid_coor[start + 43].y = c; + grid_coor[start + 43].z = -a; + weight[start + 43] = 4.0 * pi * v; + + grid_coor[start + 44].x = -b; + grid_coor[start + 44].y = -c; + grid_coor[start + 44].z = a; + weight[start + 44] = 4.0 * pi * v; + + grid_coor[start + 45].x = b; + grid_coor[start + 45].y = -c; + grid_coor[start + 45].z = -a; + weight[start + 45] = 4.0 * pi * v; + + grid_coor[start + 46].x = -b; + grid_coor[start + 46].y = c; + grid_coor[start + 46].z = -a; + weight[start + 46] = 4.0 * pi * v; + + grid_coor[start + 47].x = -b; + grid_coor[start + 47].y = -c; + grid_coor[start + 47].z = -a; + weight[start + 47] = 4.0 * pi * v; + + start = start + 48; + + break; + } return start; } diff --git a/source/source_base/math_lebedev_laikov.h b/source/source_base/math_lebedev_laikov.h index e13ef929497..af4d144d53d 100644 --- a/source/source_base/math_lebedev_laikov.h +++ b/source/source_base/math_lebedev_laikov.h @@ -10,43 +10,42 @@ namespace ModuleBase class Lebedev_laikov_grid { -public: - Lebedev_laikov_grid(int degree); - ~Lebedev_laikov_grid(); + public: + Lebedev_laikov_grid (int degree); + ~Lebedev_laikov_grid (); + void generate_grid_points (); - void generate_grid_points(); - - const ModuleBase::Vector3* get_grid_coor() const + const ModuleBase::Vector3* + get_grid_coor () const { return grid_coor; }; - const double* get_weight() const + const double* + get_weight () const { return weight; }; - void print_grid_and_weight(std::string filename); + void print_grid_and_weight (std::string filename); // degree: can only take the following values - // degree = { 6, 14, 26, 38, 50, 74, 86, 110, 146, 170, 194, 230, 266, 302, 350, 434, 590, 770, 974, + // degree = { 6, 14, 26, 38, 50, 74, 86, 110, 146, 170, 194, 230, 266, 302, 350, 434, 590, 770, 974, // 1202, 1454, 1730, 2030, 2354, 2702, 3074, 3470, 3890, 4334, 4802, 5294, 5810}; int degree = 6; -private: - int getLebedevReccurencePoints(int type, int start, double a, double b, double v); + private: + int getLebedevReccurencePoints (int type, int start, double a, double b, double v); - std::set allowed_degree = { - 6, 14, 26, 38, 50, 74, 86, 110, 146, 170, 194, 230, 266, - 302, 350, 434, 590, 770, 974, 1202, 1454, 1730, 2030, - 2354, 2702, 3074, 3470, 3890, 4334, 4802, 5294, 5810 - }; + std::set allowed_degree + = {6, 14, 26, 38, 50, 74, 86, 110, 146, 170, 194, 230, 266, 302, 350, 434, + 590, 770, 974, 1202, 1454, 1730, 2030, 2354, 2702, 3074, 3470, 3890, 4334, 4802, 5294, 5810}; - ModuleBase::Vector3 *grid_coor = nullptr; + ModuleBase::Vector3* grid_coor = nullptr; double* weight = nullptr; }; -} +} // namespace ModuleBase #endif \ No newline at end of file diff --git a/source/source_base/math_polyint.cpp b/source/source_base/math_polyint.cpp index ca62ebb72c2..505942d9a8a 100644 --- a/source/source_base/math_polyint.cpp +++ b/source/source_base/math_polyint.cpp @@ -3,237 +3,225 @@ namespace ModuleBase { -void PolyInt::Polynomial_Interpolation -( - const ModuleBase::realArray &table, - const int &dim1, - const int &dim2, - ModuleBase::realArray &y, - const int &dim_y, - const int &table_length, - const double &table_interval, - const double &x // input value -) +void + PolyInt::Polynomial_Interpolation (const ModuleBase::realArray& table, + const int& dim1, + const int& dim2, + ModuleBase::realArray& y, + const int& dim_y, + const int& table_length, + const double& table_interval, + const double& x // input value + ) { - ModuleBase::timer::start("PolyInt","Poly_Interpo_1"); - assert(table_interval>0.0); + ModuleBase::timer::start ("PolyInt", "Poly_Interpo_1"); + assert (table_interval > 0.0); const double position = x / table_interval; - const int iq = static_cast(position); - if(iq>=table_length-4) - { - std::cout << "\n x = " << x; - std::cout << "\n iq = " << iq << " table_length = " << table_length << std::endl; - } - assert(iq < table_length-4); - - const double x0 = position - static_cast(iq); + const int iq = static_cast (position); + if (iq >= table_length - 4) + { + std::cout << "\n x = " << x; + std::cout << "\n iq = " << iq << " table_length = " << table_length << std::endl; + } + assert (iq < table_length - 4); + + const double x0 = position - static_cast (iq); const double x1 = 1.0 - x0; const double x2 = 2.0 - x0; const double x3 = 3.0 - x0; - y(dim1, dim2, dim_y)= - table(dim1, dim2, iq) * x1 * x2 * x3 / 6.0 + - table(dim1, dim2, iq+1) * x0 * x2 * x3 / 2.0 - - table(dim1, dim2, iq+2) * x1 * x0 * x3 / 2.0 + - table(dim1, dim2, iq+3) * x1 * x2 * x0 / 6.0 ; + y (dim1, dim2, dim_y) + = table (dim1, dim2, iq) * x1 * x2 * x3 / 6.0 + table (dim1, dim2, iq + 1) * x0 * x2 * x3 / 2.0 + - table (dim1, dim2, iq + 2) * x1 * x0 * x3 / 2.0 + table (dim1, dim2, iq + 3) * x1 * x2 * x0 / 6.0; - ModuleBase::timer::end("PolyInt","Poly_Interpo_1"); + ModuleBase::timer::end ("PolyInt", "Poly_Interpo_1"); return; } -double PolyInt::Polynomial_Interpolation -( - const ModuleBase::realArray &table, - const int &dim1, - const int &dim2, - const int &table_length, - const double &table_interval, - const double &x // input value -) +double + PolyInt::Polynomial_Interpolation (const ModuleBase::realArray& table, + const int& dim1, + const int& dim2, + const int& table_length, + const double& table_interval, + const double& x // input value + ) { -// ModuleBase::timer::start("PolyInt","Poly_Interpo_2"); - assert(table_interval>0.0); + // ModuleBase::timer::start("PolyInt","Poly_Interpo_2"); + assert (table_interval > 0.0); const double position = x / table_interval; - const int iq = static_cast(position); + const int iq = static_cast (position); if (iq > table_length - 4) - { - std::cout << "\n x = " << x; - std::cout << "\n table_interval = " << table_interval; - std::cout << "\n iq=" << iq << " table_length = " << table_length << std::endl; - std::cout << "\n Not enough space allocated for radial FFT." << std::endl; - std::cout << " It is due to the rapid change of the size of cell:" << std::endl; - std::cout << " Try reseting a larger cell_factor parameter in INPUT" << std::endl; // LiuXh add 20180619 - std::cout << " Or try reseting a smaller relax_scale_force parameter in INPUT\n" << std::endl; - exit(0); - } - - const double x0 = position - static_cast(iq); + { + std::cout << "\n x = " << x; + std::cout << "\n table_interval = " << table_interval; + std::cout << "\n iq=" << iq << " table_length = " << table_length << std::endl; + std::cout << "\n Not enough space allocated for radial FFT." << std::endl; + std::cout << " It is due to the rapid change of the size of cell:" << std::endl; + std::cout << " Try reseting a larger cell_factor parameter in INPUT" << std::endl; // LiuXh add 20180619 + std::cout << " Or try reseting a smaller relax_scale_force parameter in INPUT\n" << std::endl; + exit (0); + } + + const double x0 = position - static_cast (iq); const double x1 = 1.0 - x0; const double x2 = 2.0 - x0; const double x3 = 3.0 - x0; - const double y= - table(dim1, dim2, iq) * x1 * x2 * x3 / 6.0 + - table(dim1, dim2, iq+1) * x0 * x2 * x3 / 2.0 - - table(dim1, dim2, iq+2) * x1 * x0 * x3 / 2.0 + - table(dim1, dim2, iq+3) * x1 * x2 * x0 / 6.0 ; + const double y = table (dim1, dim2, iq) * x1 * x2 * x3 / 6.0 + table (dim1, dim2, iq + 1) * x0 * x2 * x3 / 2.0 + - table (dim1, dim2, iq + 2) * x1 * x0 * x3 / 2.0 + + table (dim1, dim2, iq + 3) * x1 * x2 * x0 / 6.0; -// ModuleBase::timer::end("PolyInt","Poly_Interpo_2"); + // ModuleBase::timer::end("PolyInt","Poly_Interpo_2"); return y; } -double PolyInt::Polynomial_Interpolation // pengfei Li 2018-3-23 -( - const ModuleBase::realArray &table, - const int &dim1, - const int &dim2, - const int &dim3, - const int &table_length, - const double &table_interval, - const double &x // input value -) +double PolyInt::Polynomial_Interpolation // pengfei Li 2018-3-23 + (const ModuleBase::realArray& table, + const int& dim1, + const int& dim2, + const int& dim3, + const int& table_length, + const double& table_interval, + const double& x // input value + ) { -// ModuleBase::timer::start("PolyInt","Poly_Interpo_3"); - assert(table_interval>0.0); + // ModuleBase::timer::start("PolyInt","Poly_Interpo_3"); + assert (table_interval > 0.0); const double position = x / table_interval; - const int iq = static_cast(position); - - if(iq>table_length-4) - { - std::cout << "\n x = " << x; - std::cout << "\n table_interval = " << table_interval; - std::cout << "\n iq=" << iq << " table_length = " << table_length << std::endl; - } - assert(iq < table_length-4); - const double x0 = position - static_cast(iq); + const int iq = static_cast (position); + + if (iq > table_length - 4) + { + std::cout << "\n x = " << x; + std::cout << "\n table_interval = " << table_interval; + std::cout << "\n iq=" << iq << " table_length = " << table_length << std::endl; + } + assert (iq < table_length - 4); + const double x0 = position - static_cast (iq); const double x1 = 1.0 - x0; const double x2 = 2.0 - x0; const double x3 = 3.0 - x0; - const double y= - table(dim1, dim2, dim3, iq) * x1 * x2 * x3 / 6.0 + - table(dim1, dim2, dim3, iq+1) * x0 * x2 * x3 / 2.0 - - table(dim1, dim2, dim3, iq+2) * x1 * x0 * x3 / 2.0 + - table(dim1, dim2, dim3, iq+3) * x1 * x2 * x0 / 6.0 ; + const double y = table (dim1, dim2, dim3, iq) * x1 * x2 * x3 / 6.0 + + table (dim1, dim2, dim3, iq + 1) * x0 * x2 * x3 / 2.0 + - table (dim1, dim2, dim3, iq + 2) * x1 * x0 * x3 / 2.0 + + table (dim1, dim2, dim3, iq + 3) * x1 * x2 * x0 / 6.0; -// ModuleBase::timer::end("PolyInt","Poly_Interpo_3"); + // ModuleBase::timer::end("PolyInt","Poly_Interpo_3"); return y; } -double PolyInt::Polynomial_Interpolation -( - const double *table, - const int &table_length, - const double &table_interval, - const double &x // input value -) +double + PolyInt::Polynomial_Interpolation (const double* table, + const int& table_length, + const double& table_interval, + const double& x // input value + ) { -// assert(table_interval>0); + // assert(table_interval>0); const double position = x / table_interval; - const int iq = static_cast(position); -// if(iq >= table_length-4) -// std::cout << "\n iq = " << iq << " table_length = " << table_length; - - if(iq > table_length-4) - { - return 0.0; - } -// assert(iq < table_length-4); - const double x0 = position - static_cast(iq); + const int iq = static_cast (position); + // if(iq >= table_length-4) + // std::cout << "\n iq = " << iq << " table_length = " << table_length; + + if (iq > table_length - 4) + { + return 0.0; + } + // assert(iq < table_length-4); + const double x0 = position - static_cast (iq); const double x1 = 1.0 - x0; const double x2 = 2.0 - x0; const double x3 = 3.0 - x0; /* const double y= - table[iq] * x1 * x2 * x3 / 6.0 + - table[iq+1] * x0 * x2 * x3 / 2.0 - - table[iq+2] * x1 * x0 * x3 / 2.0 + - table[iq+3] * x1 * x2 * x0 / 6.0 ; - */ - - return x1*x2*(table[iq]*x3+table[iq+3]*x0)/6.0 - + x0*x3*(table[iq+1]*x2-table[iq+2]*x1)/2.0; + table[iq] * x1 * x2 * x3 / 6.0 + + table[iq+1] * x0 * x2 * x3 / 2.0 - + table[iq+2] * x1 * x0 * x3 / 2.0 + + table[iq+3] * x1 * x2 * x0 / 6.0 ; + */ + + return x1 * x2 * (table[iq] * x3 + table[iq + 3] * x0) / 6.0 + + x0 * x3 * (table[iq + 1] * x2 - table[iq + 2] * x1) / 2.0; } -double PolyInt::Polynomial_Interpolation_xy -( - const double *xpoint, - const double *ypoint, - const int table_length, - const double &x // input value -) +double + PolyInt::Polynomial_Interpolation_xy (const double* xpoint, + const double* ypoint, + const int table_length, + const double& x // input value + ) { int position = -1; if (x < xpoint[0]) - { - return ypoint[0]; - } + { + return ypoint[0]; + } // ModuleBase::timer::start("PolyInt","Poly_Inter_xy"); for (int ik = 0; ik < table_length; ik++) - { - if (x < xpoint[ik]) - { - break; - } - else { - position ++; + if (x < xpoint[ik]) + { + break; + } + else + { + position++; + } } - } - assert(position >= 0); - assert(position <= table_length-1); + assert (position >= 0); + assert (position <= table_length - 1); if (position + 6 < table_length) - { - double dx1, dx2, dx3, dx4, dx5, dx6; - dx1 = x - xpoint[position]; - dx2 = x - xpoint[position+1]; - dx3 = x - xpoint[position+2]; - dx4 = x - xpoint[position+3]; - dx5 = x - xpoint[position+4]; - dx6 = x - xpoint[position+5]; - - - double x12, x13, x14, x15, x16, x23, x24, x25, x26, x34, x35, x36, x45, x46, x56; - x12 = xpoint[position] - xpoint[position+1]; - x13 = xpoint[position] - xpoint[position+2]; - x14 = xpoint[position] - xpoint[position+3]; - x15 = xpoint[position] - xpoint[position+4]; - x16 = xpoint[position] - xpoint[position+5]; - - - x23 = xpoint[position+1] - xpoint[position+2]; - x24 = xpoint[position+1] - xpoint[position+3]; - x25 = xpoint[position+1] - xpoint[position+4]; - x26 = xpoint[position+1] - xpoint[position+5]; - - x34 = xpoint[position+2] - xpoint[position+3]; - x35 = xpoint[position+2] - xpoint[position+4]; - x36 = xpoint[position+2] - xpoint[position+5]; - - x45 = xpoint[position+3] - xpoint[position+4]; - x46 = xpoint[position+3] - xpoint[position+5]; - - x56 = xpoint[position+4] - xpoint[position+5]; - - double part1, part2, part3, part4, part5, part6; - part1 = dx2 * dx3 * dx4 * dx5 * dx6 / x12 / x13 / x14 / x15 / x16 * ypoint[position]; - part2 = dx1 * dx3 * dx4 * dx5 * dx6 / (-x12) / x23 / x24 / x25 / x26 * ypoint[position+1]; - part3 = dx1 * dx2 * dx4 * dx5 * dx6 / (-x13) / (-x23) / x34 / x35 / x36 * ypoint[position+2]; - part4 = dx1 * dx2 * dx3 * dx5 * dx6 / (-x14) / (-x24) / (-x34) / x45 / x46 * ypoint[position+3]; - part5 = dx1 * dx2 * dx3 * dx4 * dx6 / (-x15) / (-x25) / (-x35) / (-x45) / x56 * ypoint[position+4]; - part6 = dx1 * dx2 * dx3 * dx4 * dx5 / (-x16) / (-x26) / (-x36) / (-x46) / (-x56) * ypoint[position+5]; - - // ModuleBase::timer::end("PolyInt","Poly_Inter_xy"); - return part1 + part2 + part3 + part4 + part5 + part6; - } + { + double dx1, dx2, dx3, dx4, dx5, dx6; + dx1 = x - xpoint[position]; + dx2 = x - xpoint[position + 1]; + dx3 = x - xpoint[position + 2]; + dx4 = x - xpoint[position + 3]; + dx5 = x - xpoint[position + 4]; + dx6 = x - xpoint[position + 5]; + + double x12, x13, x14, x15, x16, x23, x24, x25, x26, x34, x35, x36, x45, x46, x56; + x12 = xpoint[position] - xpoint[position + 1]; + x13 = xpoint[position] - xpoint[position + 2]; + x14 = xpoint[position] - xpoint[position + 3]; + x15 = xpoint[position] - xpoint[position + 4]; + x16 = xpoint[position] - xpoint[position + 5]; + + x23 = xpoint[position + 1] - xpoint[position + 2]; + x24 = xpoint[position + 1] - xpoint[position + 3]; + x25 = xpoint[position + 1] - xpoint[position + 4]; + x26 = xpoint[position + 1] - xpoint[position + 5]; + + x34 = xpoint[position + 2] - xpoint[position + 3]; + x35 = xpoint[position + 2] - xpoint[position + 4]; + x36 = xpoint[position + 2] - xpoint[position + 5]; + + x45 = xpoint[position + 3] - xpoint[position + 4]; + x46 = xpoint[position + 3] - xpoint[position + 5]; + + x56 = xpoint[position + 4] - xpoint[position + 5]; + + double part1, part2, part3, part4, part5, part6; + part1 = dx2 * dx3 * dx4 * dx5 * dx6 / x12 / x13 / x14 / x15 / x16 * ypoint[position]; + part2 = dx1 * dx3 * dx4 * dx5 * dx6 / (-x12) / x23 / x24 / x25 / x26 * ypoint[position + 1]; + part3 = dx1 * dx2 * dx4 * dx5 * dx6 / (-x13) / (-x23) / x34 / x35 / x36 * ypoint[position + 2]; + part4 = dx1 * dx2 * dx3 * dx5 * dx6 / (-x14) / (-x24) / (-x34) / x45 / x46 * ypoint[position + 3]; + part5 = dx1 * dx2 * dx3 * dx4 * dx6 / (-x15) / (-x25) / (-x35) / (-x45) / x56 * ypoint[position + 4]; + part6 = dx1 * dx2 * dx3 * dx4 * dx5 / (-x16) / (-x26) / (-x36) / (-x46) / (-x56) * ypoint[position + 5]; + + // ModuleBase::timer::end("PolyInt","Poly_Inter_xy"); + return part1 + part2 + part3 + part4 + part5 + part6; + } else - { - // ModuleBase::timer::start("PolyInt","Poly_Inter_xy"); - return ypoint[position]; - } + { + // ModuleBase::timer::start("PolyInt","Poly_Inter_xy"); + return ypoint[position]; + } } -} +} // namespace ModuleBase diff --git a/source/source_base/math_polyint.h b/source/source_base/math_polyint.h index b15f50678c3..77b276d1565 100644 --- a/source/source_base/math_polyint.h +++ b/source/source_base/math_polyint.h @@ -10,10 +10,9 @@ namespace ModuleBase class PolyInt { - public: - - PolyInt(); - ~PolyInt(); + public: + PolyInt (); + ~PolyInt (); //======================================================== // Polynomial_Interpolation @@ -21,7 +20,7 @@ class PolyInt /** * @brief Lagrange interpolation - * + * * @param table [in] three dimension matrix, the data in 3rd dimension is used to do prediction * @param dim1 [in] index of 1st dimension of table/y * @param dim2 [in] index of 2nd dimension of table/y @@ -31,21 +30,18 @@ class PolyInt * @param table_interval [in] interval of 3rd dimension of table * @param x [in] the position in 3rd dimension to be predicted */ - static void Polynomial_Interpolation - ( - const ModuleBase::realArray &table, - const int &dim1, - const int &dim2, - ModuleBase::realArray &y, - const int &dim_y, - const int &table_length, - const double &table_interval, - const double &x - ); + static void Polynomial_Interpolation (const ModuleBase::realArray& table, + const int& dim1, + const int& dim2, + ModuleBase::realArray& y, + const int& dim_y, + const int& table_length, + const double& table_interval, + const double& x); /** * @brief Lagrange interpolation - * + * * @param table [in] three dimension matrix, the data in 3rd dimension is used to do prediction * @param dim1 [in] index of 1st dimension of table * @param dim2 [in] index of 2nd dimension of table @@ -54,23 +50,20 @@ class PolyInt * @param x [in] the position in 3rd dimension to be predicted * @return double the predicted value */ - static double Polynomial_Interpolation - ( - const ModuleBase::realArray &table, - const int &dim1, - const int &dim2, - const int &table_length, - const double &table_interval, - const double &x - ); + static double Polynomial_Interpolation (const ModuleBase::realArray& table, + const int& dim1, + const int& dim2, + const int& table_length, + const double& table_interval, + const double& x); /** * @brief Lagrange interpolation - * + * * @param table [in] four dimension matrix, the data in 4th dimension is used to do prediction * @param dim1 [in] index of 1st dimension of table - * @param dim2 [in] index of 2nd dimension of table - * @param dim3 [in] index of 3rd dimension of table + * @param dim2 [in] index of 2nd dimension of table + * @param dim3 [in] index of 3rd dimension of table * @param table_length [in] length of 4th dimension of table * @param table_interval [in] interval of 4th dimension of table * @param x [in] the position in 4th dimension to be predicted @@ -78,51 +71,41 @@ class PolyInt * @author pengfei Li * @date 2018-3-23 */ - static double Polynomial_Interpolation - ( - const ModuleBase::realArray &table, - const int &dim1, - const int &dim2, - const int &dim3, - const int &table_length, - const double &table_interval, - const double &x - ); + static double Polynomial_Interpolation (const ModuleBase::realArray& table, + const int& dim1, + const int& dim2, + const int& dim3, + const int& table_length, + const double& table_interval, + const double& x); /** * @brief Lagrange interpolation - * + * * @param table [in] the data used to do prediction * @param table_length [in] length of table * @param table_interval [in] interval of table * @param x [in] the position to be predicted * @return double the predicted value */ - static double Polynomial_Interpolation - ( - const double *table, - const int &table_length, - const double &table_interval, - const double &x - ); + static double Polynomial_Interpolation (const double* table, + const int& table_length, + const double& table_interval, + const double& x); /** * @brief Lagrange interpolation - * + * * @param xpoint [in] array of postion * @param ypoint [in] array of data to do prediction * @param table_length [in] length of xpoint * @param x [in] position to be predicted * @return double predicted value */ - static double Polynomial_Interpolation_xy - ( - const double *xpoint, - const double *ypoint, - const int table_length, - const double &x - ); - + static double Polynomial_Interpolation_xy (const double* xpoint, + const double* ypoint, + const int table_length, + const double& x); }; -} +} // namespace ModuleBase #endif diff --git a/source/source_base/math_sphbes.cpp b/source/source_base/math_sphbes.cpp index 43b65073de2..d44ac7ec37b 100644 --- a/source/source_base/math_sphbes.cpp +++ b/source/source_base/math_sphbes.cpp @@ -8,10 +8,11 @@ namespace ModuleBase { -Sphbes::Sphbes(){} -Sphbes::~Sphbes(){} +Sphbes::Sphbes () {} +Sphbes::~Sphbes () {} -void Sphbes::BESSJY(double x, double xnu, double *rj, double *ry, double *rjp, double *ryp) +void + Sphbes::BESSJY (double x, double xnu, double* rj, double* ry, double* rjp, double* ryp) { const int XMIN = 2.0; const double FPMIN = 1.0e-30; @@ -40,14 +41,13 @@ void Sphbes::BESSJY(double x, double xnu, double *rj, double *ry, double *rjp, d double x2 = 0.0, xi = 0.0, xi2 = 0.0; if (x <= 0.0 || xnu < 0.0) - { - std::cout << "Sphbes::BESSJY, bad arguments" << std::endl; - //ModuleBase::WARNING_QUIT("Sphbes::BESSJY","bad arguments"); - exit(0); // mohan add 2021-05-06 - } - + { + std::cout << "Sphbes::BESSJY, bad arguments" << std::endl; + // ModuleBase::WARNING_QUIT("Sphbes::BESSJY","bad arguments"); + exit (0); // mohan add 2021-05-06 + } - nl = (x < XMIN ? (int)(xnu + 0.5) : IMAX(0, (int)(xnu - x + 1.5))); + nl = (x < XMIN ? (int)(xnu + 0.5) : IMAX (0, (int)(xnu - x + 1.5))); const double xmu = xnu - nl; const double xmu2 = xmu * xmu; xi = 1.0 / x; @@ -57,9 +57,9 @@ void Sphbes::BESSJY(double x, double xnu, double *rj, double *ry, double *rjp, d h = xnu * xi; if (h < FPMIN) - { - h = FPMIN; - } + { + h = FPMIN; + } b = xi2 * xnu; @@ -67,32 +67,44 @@ void Sphbes::BESSJY(double x, double xnu, double *rj, double *ry, double *rjp, d c = h; - for (i = 1;i <= MAXIT;i++) - { - b += xi2; - d = b - d; + for (i = 1; i <= MAXIT; i++) + { + b += xi2; + d = b - d; - if (std::fabs(d) < FPMIN) d = FPMIN; + if (std::fabs (d) < FPMIN) + { + d = FPMIN; + } - c = b - 1.0 / c; + c = b - 1.0 / c; - if (std::fabs(c) < FPMIN) c = FPMIN; + if (std::fabs (c) < FPMIN) + { + c = FPMIN; + } - d = 1.0 / d; + d = 1.0 / d; - del = c * d; + del = c * d; - h = del * h; + h = del * h; - if (d < 0.0) isign = -isign; + if (d < 0.0) + { + isign = -isign; + } - if (std::fabs(del - 1.0) < EPS) break; - } + if (std::fabs (del - 1.0) < EPS) + { + break; + } + } if (i > MAXIT) - { - std::cout << "x too large in bessjy; try asymptotic expansion" << std::endl; - } + { + std::cout << "x too large in bessjy; try asymptotic expansion" << std::endl; + } rjl = isign * FPMIN; @@ -104,617 +116,657 @@ void Sphbes::BESSJY(double x, double xnu, double *rj, double *ry, double *rjp, d fact = xnu * xi; - for (l = nl;l >= 1;l--) - { - rjtemp = fact * rjl + rjpl; - fact -= xi; - rjpl = fact * rjtemp - rjl; - rjl = rjtemp; - } + for (l = nl; l >= 1; l--) + { + rjtemp = fact * rjl + rjpl; + fact -= xi; + rjpl = fact * rjtemp - rjl; + rjl = rjtemp; + } if (rjl == 0.0) - { - rjl = EPS; - } + { + rjl = EPS; + } f = rjpl / rjl; if (x < XMIN) - { - x2 = 0.5 * x; - pimu = ModuleBase::PI * xmu; - fact = (std::fabs(pimu) < EPS ? 1.0 : pimu / std::sin(pimu)); - d = -log(x2); - e = xmu * d; - fact2 = (std::fabs(e) < EPS ? 1.0 : std::sinh(e) / e); - // call BESCHB - BESCHB(xmu, &gam1, &gam2, &gampl, &gammi); - ff = 2.0 / ModuleBase::PI * fact * (gam1 * std::cosh(e) + gam2 * fact2 * d); - e = std::exp(e); - p = e / (gampl * ModuleBase::PI); - q = 1.0 / (e * ModuleBase::PI * gammi); - pimu2 = 0.5 * pimu; - fact3 = (std::fabs(pimu2) < EPS ? 1.0 : std::sin(pimu2) / pimu2); - r = ModuleBase::PI * pimu2 * fact3 * fact3; - c = 1.0; - d = -x2 * x2; - sum = ff + r * q; - sum1 = p; - - for (i = 1;i <= MAXIT;i++) - { - ff = (i * ff + p + q) / (i * i - xmu2); - c *= (d / i); - p /= (i - xmu); - q /= (i + xmu); - del = c * (ff + r * q); - sum += del; - del1 = c * p - i * del; - sum1 += del1; - - if (std::fabs(del) < (1.0 + std::fabs(sum))*EPS) break; - } - - if (i > MAXIT) std::cout << "bessy series failed to converge"; - - rymu = -sum; - - ry1 = -sum1 * xi2; - - rymup = xmu * xi * rymu - ry1; - - rjmu = w / (rymup - f * rymu); - } + { + x2 = 0.5 * x; + pimu = ModuleBase::PI * xmu; + fact = (std::fabs (pimu) < EPS ? 1.0 : pimu / std::sin (pimu)); + d = -log (x2); + e = xmu * d; + fact2 = (std::fabs (e) < EPS ? 1.0 : std::sinh (e) / e); + // call BESCHB + BESCHB (xmu, &gam1, &gam2, &gampl, &gammi); + ff = 2.0 / ModuleBase::PI * fact * (gam1 * std::cosh (e) + gam2 * fact2 * d); + e = std::exp (e); + p = e / (gampl * ModuleBase::PI); + q = 1.0 / (e * ModuleBase::PI * gammi); + pimu2 = 0.5 * pimu; + fact3 = (std::fabs (pimu2) < EPS ? 1.0 : std::sin (pimu2) / pimu2); + r = ModuleBase::PI * pimu2 * fact3 * fact3; + c = 1.0; + d = -x2 * x2; + sum = ff + r * q; + sum1 = p; + + for (i = 1; i <= MAXIT; i++) + { + ff = (i * ff + p + q) / (i * i - xmu2); + c *= (d / i); + p /= (i - xmu); + q /= (i + xmu); + del = c * (ff + r * q); + sum += del; + del1 = c * p - i * del; + sum1 += del1; + + if (std::fabs (del) < (1.0 + std::fabs (sum)) * EPS) + { + break; + } + } - else - { - a = 0.25 - xmu2; - p = -0.5 * xi; - q = 1.0; - br = 2.0 * x; - bi = 2.0; - fact = a * xi / (p * p + q * q); - cr = br + q * fact; - ci = bi + p * fact; - den = br * br + bi * bi; - dr = br / den; - di = -bi / den; - dlr = cr * dr - ci * di; - dli = cr * di + ci * dr; - temp = p * dlr - q * dli; - q = p * dli + q * dlr; - p = temp; + if (i > MAXIT) + { + std::cout << "bessy series failed to converge"; + } + + rymu = -sum; + + ry1 = -sum1 * xi2; + + rymup = xmu * xi * rymu - ry1; - for (i = 2;i <= MAXIT;i++) + rjmu = w / (rymup - f * rymu); + } + + else { - a += 2 * (i - 1); - bi += 2.0; - dr = a * dr + br; - di = a * di + bi; + a = 0.25 - xmu2; + p = -0.5 * xi; + q = 1.0; + br = 2.0 * x; + bi = 2.0; + fact = a * xi / (p * p + q * q); + cr = br + q * fact; + ci = bi + p * fact; + den = br * br + bi * bi; + dr = br / den; + di = -bi / den; + dlr = cr * dr - ci * di; + dli = cr * di + ci * dr; + temp = p * dlr - q * dli; + q = p * dli + q * dlr; + p = temp; - if (std::fabs(dr) + std::fabs(di) < FPMIN) dr = FPMIN; + for (i = 2; i <= MAXIT; i++) + { + a += 2 * (i - 1); + bi += 2.0; + dr = a * dr + br; + di = a * di + bi; - fact = a / (cr * cr + ci * ci); + if (std::fabs (dr) + std::fabs (di) < FPMIN) + { + dr = FPMIN; + } - cr = br + cr * fact; + fact = a / (cr * cr + ci * ci); - ci = bi - ci * fact; + cr = br + cr * fact; - if (std::fabs(cr) + std::fabs(ci) < FPMIN) cr = FPMIN; + ci = bi - ci * fact; - den = dr * dr + di * di; + if (std::fabs (cr) + std::fabs (ci) < FPMIN) + { + cr = FPMIN; + } - dr /= den; + den = dr * dr + di * di; - di /= -den; + dr /= den; - dlr = cr * dr - ci * di; + di /= -den; - dli = cr * di + ci * dr; + dlr = cr * dr - ci * di; - temp = p * dlr - q * dli; + dli = cr * di + ci * dr; - q = p * dli + q * dlr; + temp = p * dlr - q * dli; - p = temp; + q = p * dli + q * dlr; - if (std::fabs(dlr - 1.0) + std::fabs(dli) < EPS) break; - } + p = temp; - if (i > MAXIT) std::cout << "cf2 failed in bessjy"; + if (std::fabs (dlr - 1.0) + std::fabs (dli) < EPS) + { + break; + } + } - gam = (p - f) / q; + if (i > MAXIT) + { + std::cout << "cf2 failed in bessjy"; + } - rjmu = std::sqrt(w / ((p - f) * gam + q)); + gam = (p - f) / q; - if (rjl >=0 ) rjmu = std::fabs(rjmu); - else rjmu = -std::fabs(rjmu); + rjmu = std::sqrt (w / ((p - f) * gam + q)); - rymu = rjmu * gam; + if (rjl >= 0) + { + rjmu = std::fabs (rjmu); + } + else + { + rjmu = -std::fabs (rjmu); + } + + rymu = rjmu * gam; - rymup = rymu * (p + q / gam); + rymup = rymu * (p + q / gam); - ry1 = xmu * xi * rymu - rymup; - } + ry1 = xmu * xi * rymu - rymup; + } fact = rjmu / rjl; *rj = rjl1 * fact; *rjp = rjp1 * fact; - for (i = 1;i <= nl;i++) - { - rytemp = (xmu + i) * xi2 * ry1 - rymu; - rymu = ry1; - ry1 = rytemp; - } + for (i = 1; i <= nl; i++) + { + rytemp = (xmu + i) * xi2 * ry1 - rymu; + rymu = ry1; + ry1 = rytemp; + } *ry = rymu; *ryp = xnu * xi * rymu - ry1; } - -int Sphbes::IMAX(int a, int b) +int + Sphbes::IMAX (int a, int b) { - if (a > b) return a; - else return b; + if (a > b) + { + return a; + } + else + { + return b; + } } - -void Sphbes::BESCHB(double x, double *gam1, double *gam2, double *gampl, double *gammi) +void + Sphbes::BESCHB (double x, double* gam1, double* gam2, double* gampl, double* gammi) { const int NUSE1 = 7; const int NUSE2 = 8; double xx = 0; - static double c1[] = { -1.142022680371168e0, 6.5165112670737e-3, - 3.087090173086e-4, -3.4706269649e-6, - 6.9437664e-9, 3.67795e-11, -1.356e-13 - }; - static double c2[] = { 1.843740587300905e0, -7.68528408447867e-2, - 1.2719271366546e-3, -4.9717367042e-6, -3.31261198e-8, - 2.423096e-10, -1.702e-13, -1.49e-15 - }; - xx = 8.0 * x * x - 1.0; //Multiply x by 2 to make range be .1 to 1,and then apply transformation for evaluating even Chebyshev series. - *gam1 = CHEBEV(-1.0, 1.0, c1, NUSE1, xx); - *gam2 = CHEBEV(-1.0, 1.0, c2, NUSE2, xx); + static double c1[] = {-1.142022680371168e0, + 6.5165112670737e-3, + 3.087090173086e-4, + -3.4706269649e-6, + 6.9437664e-9, + 3.67795e-11, + -1.356e-13}; + static double c2[] = {1.843740587300905e0, + -7.68528408447867e-2, + 1.2719271366546e-3, + -4.9717367042e-6, + -3.31261198e-8, + 2.423096e-10, + -1.702e-13, + -1.49e-15}; + xx = 8.0 * x * x - 1.0; // Multiply x by 2 to make range be .1 to 1,and then apply transformation for evaluating + // even Chebyshev series. + *gam1 = CHEBEV (-1.0, 1.0, c1, NUSE1, xx); + *gam2 = CHEBEV (-1.0, 1.0, c2, NUSE2, xx); *gampl = *gam2 - x * (*gam1); *gammi = *gam2 + x * (*gam1); } -double Sphbes::CHEBEV(double a, double b, double c[], int m, double x) +double + Sphbes::CHEBEV (double a, double b, double c[], int m, double x) { double d = 0.0; - double dd = 0.0; - double sv = 0.0; - double y = 0.0; - double y2 = 0.0; - int j=0; + double dd = 0.0; + double sv = 0.0; + double y = 0.0; + double y2 = 0.0; + int j = 0; - if ((x - a)*(x - b) > 0.0) - { - std::cout << "x not in range in routine chebev" << std::endl; - } + if ((x - a) * (x - b) > 0.0) + { + std::cout << "x not in range in routine chebev" << std::endl; + } y2 = 2.0 * (y = (2.0 * x - a - b) / (b - a)); - for (j = m - 1;j >= 1;j--) - { - sv = d; - d = y2 * d - dd + c[j]; - dd = sv; - } + for (j = m - 1; j >= 1; j--) + { + sv = d; + d = y2 * d - dd + c[j]; + dd = sv; + } - return y*d - dd + 0.5*c[0]; + return y * d - dd + 0.5 * c[0]; } - -double Sphbes::Spherical_Bessel_7(const int n, const double &x) +double + Sphbes::Spherical_Bessel_7 (const int n, const double& x) { - if (x==0) - { - if (n!=0) return 0; - if (n==0) return 1; - } + if (x == 0) + { + if (n != 0) + { + return 0; + } + if (n == 0) + { + return 1; + } + } double order = 0.0, rj = 0.0, rjp = 0.0, ry = 0.0, ryp = 0.0; if (n < 0 || x <= 0.0) - { - std::cout << "Spherical_Bessel_7, bad arguments in sphbes" << std::endl; - //ModuleBase::WARNING_QUIT("Sphbes::Spherical_Bessel_7","bad arguments in sphbes"); - exit(0); - } + { + std::cout << "Spherical_Bessel_7, bad arguments in sphbes" << std::endl; + // ModuleBase::WARNING_QUIT("Sphbes::Spherical_Bessel_7","bad arguments in sphbes"); + exit (0); + } order = n + 0.5; // call BESSSJY - BESSJY(x, order, &rj, &ry, &rjp, &ryp); + BESSJY (x, order, &rj, &ry, &rjp, &ryp); - const double RTPIO2=1.2533141; + const double RTPIO2 = 1.2533141; - const double factor = RTPIO2 / std::sqrt(x); + const double factor = RTPIO2 / std::sqrt (x); - return factor*rj; + return factor * rj; } - -void Sphbes::Spherical_Bessel_Roots -( - const int &num, - const int &l, - const double &epsilon, - double* eigenvalue, - const double &rcut -) +void + Sphbes::Spherical_Bessel_Roots (const int& num, + const int& l, + const double& epsilon, + double* eigenvalue, + const double& rcut) { - //ModuleBase::TITLE("Sphbes","Spherical_Bessel_Roots"); - if (num<=0) - { - std::cout << "Spherical_Bessel_Roots, num<=0" << std::endl; - //ModuleBase::WARNING_QUIT("Sphbes::Spherical_Bessel_Roots","num<=0"); - exit(0); - } - if (rcut<=0.0) - { - std::cout << "Spherical_Bessel_Roots, rcut<=0" << std::endl; - //ModuleBase::WARNING_QUIT("Sphbes::Spherical_Bessel_Roots","rcut<=0.0"); - exit(0); - } + // ModuleBase::TITLE("Sphbes","Spherical_Bessel_Roots"); + if (num <= 0) + { + std::cout << "Spherical_Bessel_Roots, num<=0" << std::endl; + // ModuleBase::WARNING_QUIT("Sphbes::Spherical_Bessel_Roots","num<=0"); + exit (0); + } + if (rcut <= 0.0) + { + std::cout << "Spherical_Bessel_Roots, rcut<=0" << std::endl; + // ModuleBase::WARNING_QUIT("Sphbes::Spherical_Bessel_Roots","rcut<=0.0"); + exit (0); + } double min = 0.0; - double max = 2*ModuleBase::PI + (num + (l+0.5)/2 + 0.75)*ModuleBase::PI/2 + - std::sqrt((num + (l+0.5)/2+0.75)*(num + (l+0.5)/2+0.75)*ModuleBase::PI*ModuleBase::PI/4-(l+0.5)*(l+0.5)/2); + double max + = 2 * ModuleBase::PI + (num + (l + 0.5) / 2 + 0.75) * ModuleBase::PI / 2 + + std::sqrt ((num + (l + 0.5) / 2 + 0.75) * (num + (l + 0.5) / 2 + 0.75) * ModuleBase::PI * ModuleBase::PI / 4 + - (l + 0.5) * (l + 0.5) / 2); // magic number !! // guess : only need to > 1 const int msh = 10 * num; -// std::cout<<"\n msh = "< 1.0e-8) - { - ir0 = 0;//mohan modify 2007-10-13 - } - else + if (std::fabs (q) < 1.0e-8) { if (l == -1) - { - std::cout << "\n sph_bes, j_{-1}(0) ?//?"; - } + { + std::cout << "\n sph_bes, j_{-1}(0) ????"; + } else if (l == 0) - { - jl [0] = 1.0;//mohan modify 2007-10-13 - } + { + for (i = 0; i < msh; i++) + { + jl[i] = 1.0; + } + } else - { - jl [0] = 0.0;//mohan modify 2007-10-13 - } - ir0 = 1;//mohan modify 2007-10-13 - } - if (l == - 1) - { - for (ir = ir0;ir < msh; ir++) - { - x1 = q * r[ir]; - jl [ir] = std::cos(x1) / x1; - } - } - else if (l == 0) - { - for (ir = ir0;ir < msh;ir++) - { - x1 = q * r[ir]; - jl [ir] = std::sin(x1) / x1; - } - } - else if (l == 1) - { - for (ir = ir0;ir < msh;ir++) - { - x1 = q * r[ir]; - const double sinx = std::sin(x1); - const double cosx = std::cos(x1); - jl [ir] = (sinx / x1 - cosx) / x1; - } - } - else if (l == 2) - { - for (ir = ir0;ir < msh;ir++) - { - const double x1 = q * r[ir]; - const double sinx = std::sin(x1); - const double cosx = std::cos(x1); - jl [ir] = ((3.0 / x1 - x1) * sinx - - 3.0 * cosx) / (x1 * x1); - } - } - else if (l == 3) - { - for (ir = ir0;ir < msh;ir++) - { - x1 = q * r[ir]; - jl [ir] = (std::sin(x1) * (15.0 / x1 - 6.0 * x1) + - std::cos(x1) * (x1 * x1 - 15.0)) / std::pow(x1, 3);//mohan modify 2007-10-13 - } - } - else if (l == 4) - { - for (ir = ir0;ir < msh;ir++) - { - const double x1 = q * r[ir]; - const double x2 = x1 * x1; - const double x3 = x1 * x2; - const double x4 = x1 * x3; - const double x5 = x1 * x4; - jl [ir] = (std::sin(x1) * (105.0 - 45.0 * x2 + x4) + - std::cos(x1) * (10.0 * x3 - 105.0 * x1)) / x5; // mohan modify 2007-10-13 - } + { + for (i = 0; i < msh; i++) + { + jl[i] = 0.0; + } + } } - else if (l == 5) + else { - for (ir = ir0;ir < msh;ir++) - { - x1 = q * r[ir]; - - if (x1 < 0.14) + if (std::fabs (q * r[0]) > 1.0e-8) { - jl[ir] = 0;//mohan add 2007-10-15 + ir0 = 0; // mohan modify 2007-10-13 } - else + else { - double cx1 = std::cos(x1); - double sx1 = std::sin(x1); - jl [ir] = (-cx1 - - (945.0 * cx1) / std::pow(x1, 4) + - (105.0 * cx1) / (x1 * x1) + - (945.0 * sx1) / std::pow(x1, 5) - - (420.0 * sx1) / std::pow(x1, 3) + - (15.0 * sx1) / x1) / x1; - + if (l == -1) + { + std::cout << "\n sph_bes, j_{-1}(0) ?//?"; + } + else if (l == 0) + { + jl[0] = 1.0; // mohan modify 2007-10-13 + } + else + { + jl[0] = 0.0; // mohan modify 2007-10-13 + } + ir0 = 1; // mohan modify 2007-10-13 } - } - } - else if (l == 6) - { - for (ir = ir0;ir < msh;ir++) - { - x1 = q * r[ir]; - - if (x1 < 0.29) + if (l == -1) { - jl[ir] = 0;//mohan add 2007-10-15 + for (ir = ir0; ir < msh; ir++) + { + x1 = q * r[ir]; + jl[ir] = std::cos (x1) / x1; + } } - else + else if (l == 0) { - double cx1 = std::cos(x1); - double sx1 = std::sin(x1); - jl [ir] = ((-10395.0 * cx1) / std::pow(x1, 5) + - (1260.0 * cx1) / std::pow(x1, 3) - - (21.0 * cx1) / x1 - sx1 + - (10395.0 * sx1) / std::pow(x1, 6) - - (4725.0 * sx1) / std::pow(x1, 4) + - (210.0 * sx1) / (x1 * x1)) / x1; + for (ir = ir0; ir < msh; ir++) + { + x1 = q * r[ir]; + jl[ir] = std::sin (x1) / x1; + } + } + else if (l == 1) + { + for (ir = ir0; ir < msh; ir++) + { + x1 = q * r[ir]; + const double sinx = std::sin (x1); + const double cosx = std::cos (x1); + jl[ir] = (sinx / x1 - cosx) / x1; + } + } + else if (l == 2) + { + for (ir = ir0; ir < msh; ir++) + { + const double x1 = q * r[ir]; + const double sinx = std::sin (x1); + const double cosx = std::cos (x1); + jl[ir] = ((3.0 / x1 - x1) * sinx - 3.0 * cosx) / (x1 * x1); + } + } + else if (l == 3) + { + for (ir = ir0; ir < msh; ir++) + { + x1 = q * r[ir]; + jl[ir] = (std::sin (x1) * (15.0 / x1 - 6.0 * x1) + std::cos (x1) * (x1 * x1 - 15.0)) + / std::pow (x1, 3); // mohan modify 2007-10-13 + } + } + else if (l == 4) + { + for (ir = ir0; ir < msh; ir++) + { + const double x1 = q * r[ir]; + const double x2 = x1 * x1; + const double x3 = x1 * x2; + const double x4 = x1 * x3; + const double x5 = x1 * x4; + jl[ir] + = (std::sin (x1) * (105.0 - 45.0 * x2 + x4) + std::cos (x1) * (10.0 * x3 - 105.0 * x1)) + / x5; // mohan modify 2007-10-13 + } + } + else if (l == 5) + { + for (ir = ir0; ir < msh; ir++) + { + x1 = q * r[ir]; + + if (x1 < 0.14) + { + jl[ir] = 0; // mohan add 2007-10-15 + } + else + { + double cx1 = std::cos (x1); + double sx1 = std::sin (x1); + jl[ir] = (-cx1 - (945.0 * cx1) / std::pow (x1, 4) + (105.0 * cx1) / (x1 * x1) + + (945.0 * sx1) / std::pow (x1, 5) - (420.0 * sx1) / std::pow (x1, 3) + + (15.0 * sx1) / x1) + / x1; + } + } + } + else if (l == 6) + { + for (ir = ir0; ir < msh; ir++) + { + x1 = q * r[ir]; + + if (x1 < 0.29) + { + jl[ir] = 0; // mohan add 2007-10-15 + } + else + { + double cx1 = std::cos (x1); + double sx1 = std::sin (x1); + jl[ir] = ((-10395.0 * cx1) / std::pow (x1, 5) + (1260.0 * cx1) / std::pow (x1, 3) + - (21.0 * cx1) / x1 - sx1 + (10395.0 * sx1) / std::pow (x1, 6) + - (4725.0 * sx1) / std::pow (x1, 4) + (210.0 * sx1) / (x1 * x1)) + / x1; + } + } + } // mohan modify 2007-11-20 reduce cos , sin , q*r[ir] times; + else + { + std::cout << "\n error in sph_bes, l out of {-1 ... 6},l = " << l; + exit (0); } - } - }//mohan modify 2007-11-20 reduce cos , sin , q*r[ir] times; - else - { - std::cout << "\n error in sph_bes, l out of {-1 ... 6},l = " << l ; - exit(0); } - } return; } - -void Sphbes::Spherical_Bessel -( - const int &msh, //number of grid points - const double *r,//radial grid - const double &q, // - const int &l, //angular momentum - double *sj, //jl(1:msh) = j_l(q*r(i)),spherical bessel function - double *sjp -) +void + Sphbes::Spherical_Bessel (const int& msh, // number of grid points + const double* r, // radial grid + const double& q, // + const int& l, // angular momentum + double* sj, // jl(1:msh) = j_l(q*r(i)),spherical bessel function + double* sjp) { - //calculate jlx first - Spherical_Bessel (msh, r, q, l, sj); + // calculate jlx first + Spherical_Bessel (msh, r, q, l, sj); - for (int ir = 0; ir < msh; ir++) - { - sjp[ir] = 1.0; - } - return; + for (int ir = 0; ir < msh; ir++) + { + sjp[ir] = 1.0; + } + return; } -void Sphbes::dSpherical_Bessel_dx -( - const int &msh, // number of grid points - const double *r, // radial grid - const double &q, // wave std::vector - const int &l, // angular momentum - double *djl // jl(1:msh) = j_l(q*r(i)),spherical bessel function -) +void + Sphbes::dSpherical_Bessel_dx (const int& msh, // number of grid points + const double* r, // radial grid + const double& q, // wave std::vector + const int& l, // angular momentum + double* djl // jl(1:msh) = j_l(q*r(i)),spherical bessel function + ) { - if (l < 0 ) - { - std::cout << "We temporarily only calculate derivative of l >= 0." << std::endl; - exit(0); - } + if (l < 0) + { + std::cout << "We temporarily only calculate derivative of l >= 0." << std::endl; + exit (0); + } double djl0 = 0; - if(l == 1) - { - djl0 = 1.0/3.0; - } - - if(l == 0 ) - { - for (int ir = 0;ir < msh; ir++) - { - double x1 = q * r[ir]; - if(x1 < 1e-8) - { - djl[ir] = djl0; - } - else - { - djl[ir] = (x1 * std::cos(x1) - std::sin(x1)) / (x1*x1); - } + if (l == 1) + { + djl0 = 1.0 / 3.0; + } + if (l == 0) + { + for (int ir = 0; ir < msh; ir++) + { + double x1 = q * r[ir]; + if (x1 < 1e-8) + { + djl[ir] = djl0; + } + else + { + djl[ir] = (x1 * std::cos (x1) - std::sin (x1)) / (x1 * x1); + } + } } - } else - { - double *jl = new double [msh]; - Spherical_Bessel (msh, r, q, l-1, jl); - Spherical_Bessel (msh, r, q, l, djl); - for (int ir = 0;ir < msh; ir++) - { - double x1 = q * r[ir]; - if(x1 < 1e-8) - { - djl[ir] = djl0; - } - else - { - djl[ir] = jl[ir] - double(l+1)/x1 * djl[ir]; - } + { + double* jl = new double[msh]; + Spherical_Bessel (msh, r, q, l - 1, jl); + Spherical_Bessel (msh, r, q, l, djl); + for (int ir = 0; ir < msh; ir++) + { + double x1 = q * r[ir]; + if (x1 < 1e-8) + { + djl[ir] = djl0; + } + else + { + djl[ir] = jl[ir] - double (l + 1) / x1 * djl[ir]; + } + } + delete[] jl; } - delete[] jl; - } return; } -double Sphbes::_sphbesj_ascending_recurrence(int l, double x) { +double + Sphbes::_sphbesj_ascending_recurrence (int l, double x) +{ // should be used when x > l && l > 0 double invx = 1.0 / x; - double j0 = std::sin(x) * invx; - double j1 = ( j0 - std::cos(x) ) * invx; + double j0 = std::sin (x) * invx; + double j1 = (j0 - std::cos (x)) * invx; double jl = 0.0; - for (int i = 2; i <= l; ++i) { - jl = (2*i-1) * invx * j1 - j0; - j0 = j1; - j1 = jl; - } + for (int i = 2; i <= l; ++i) + { + jl = (2 * i - 1) * invx * j1 - j0; + j0 = j1; + j1 = jl; + } return j1; // at the end of the loop j1 == jl } -double Sphbes::_sphbesj_series(int l, double x) { +double + Sphbes::_sphbesj_series (int l, double x) +{ // should be used when x < l @@ -728,100 +780,101 @@ double Sphbes::_sphbesj_series(int l, double x) { // zeroth order term: x^l / (2l+1)!! int k = 0; double kth_term = 1.0; - for (int i = 1; i <= l; ++i) { - kth_term *= x / (2 * i + 1); - } + for (int i = 1; i <= l; ++i) + { + kth_term *= x / (2 * i + 1); + } double x_sqr_half = 0.5 * x * x; - do { - jl += kth_term; - k += 1; - kth_term *= -x_sqr_half / ( k * (2*(l+k)+1) ); - } while ( std::abs(kth_term) > std::abs(eps * jl) ); + do + { + jl += kth_term; + k += 1; + kth_term *= -x_sqr_half / (k * (2 * (l + k) + 1)); + } + while (std::abs (kth_term) > std::abs (eps * jl)); return jl; } -double Sphbes::sphbesj(const int l, const double x) +double + Sphbes::sphbesj (const int l, const double x) { - assert( l >= 0 ); - assert( x >= 0 ); + assert (l >= 0); + assert (x >= 0); // j_l(0) - if ( x == 0 ) - { - return l ? 0.0 : 1.0; - } - - if ( x < l ) - { - return _sphbesj_series(l, x); - } + if (x == 0) + { + return l ? 0.0 : 1.0; + } + + if (x < l) + { + return _sphbesj_series (l, x); + } else - { - double invx = 1.0 / x; - switch (l) - { - case 0: - return std::sin(x) * invx; - case 1: - return ( std::sin(x) * invx - std::cos(x) ) * invx; - // NOTE: the following explicit expressions are not necessarily faster than ascending recurrence, - // but we keep them just in case we need them in the future. - //case 2: - // return ( (3.0 * invx - x) * std::sin(x) - 3.0 * std::cos(x) ) * (invx * invx); - //case 3: - // return ( std::sin(x) * (15.0 * invx - 6.0 * x) + std::cos(x) * (x * x - 15.0) ) * std::pow(invx, 3); - //case 4: - // return ( std::sin(x) * (std::pow(x,3) - 45.0 * x + 105.0 * invx) - // + std::cos(x) * (10.0 * x * x - 105.0) ) * std::pow(invx, 4); - //case 5: - // return ( std::sin(x) * (15.0 * std::pow(x,3) - 420.0 * x + 945.0 * invx) - // + std::cos(x) * (-std::pow(x, 4) + 105.0 * x * x - 945.0) ) * std::pow(invx, 5); - //case 6: - // return ( std::sin(x) * (-std::pow(x, 5) + 210.0 * std::pow(x, 3) - 4725.0 * x + 10395.0 * invx) - // + std::cos(x) * (-21.0 * std::pow(x, 4) + 1260.0 * x * x - 10395.0) ) * std::pow(invx, 6); - default: - return _sphbesj_ascending_recurrence(l, x); - } - } + { + double invx = 1.0 / x; + switch (l) + { + case 0: + return std::sin (x) * invx; + case 1: + return (std::sin (x) * invx - std::cos (x)) * invx; + // NOTE: the following explicit expressions are not necessarily faster than ascending recurrence, + // but we keep them just in case we need them in the future. + // case 2: + // return ( (3.0 * invx - x) * std::sin(x) - 3.0 * std::cos(x) ) * (invx * invx); + // case 3: + // return ( std::sin(x) * (15.0 * invx - 6.0 * x) + std::cos(x) * (x * x - 15.0) ) * std::pow(invx, + // 3); + // case 4: + // return ( std::sin(x) * (std::pow(x,3) - 45.0 * x + 105.0 * invx) + // + std::cos(x) * (10.0 * x * x - 105.0) ) * std::pow(invx, 4); + // case 5: + // return ( std::sin(x) * (15.0 * std::pow(x,3) - 420.0 * x + 945.0 * invx) + // + std::cos(x) * (-std::pow(x, 4) + 105.0 * x * x - 945.0) ) * std::pow(invx, 5); + // case 6: + // return ( std::sin(x) * (-std::pow(x, 5) + 210.0 * std::pow(x, 3) - 4725.0 * x + 10395.0 * invx) + // + std::cos(x) * (-21.0 * std::pow(x, 4) + 1260.0 * x * x - 10395.0) ) * std::pow(invx, 6); + default: + return _sphbesj_ascending_recurrence (l, x); + } + } } -double Sphbes::dsphbesj(const int l, const double x) +double + Sphbes::dsphbesj (const int l, const double x) { - assert( l >= 0 ); - assert( x >= 0 ); - return l == 0 ? -sphbesj(1, x) : ( l * sphbesj(l - 1, x) - (l + 1) * sphbesj(l + 1, x) ) / (2 * l + 1); + assert (l >= 0); + assert (x >= 0); + return l == 0 ? -sphbesj (1, x) : (l * sphbesj (l - 1, x) - (l + 1) * sphbesj (l + 1, x)) / (2 * l + 1); } -void Sphbes::sphbesj(const int n, - const double* const r, - const double q, - const int l, - double* const jl) +void + Sphbes::sphbesj (const int n, const double* const r, const double q, const int l, double* const jl) { for (int i = 0; i != n; ++i) - { - jl[i] = Sphbes::sphbesj(l, q * r[i]); - } + { + jl[i] = Sphbes::sphbesj (l, q * r[i]); + } } -void Sphbes::dsphbesj(const int n, - const double* const r, - const double q, - const int l, - double* const djl) +void + Sphbes::dsphbesj (const int n, const double* const r, const double q, const int l, double* const djl) { for (int i = 0; i != n; ++i) - { - djl[i] = Sphbes::dsphbesj(l, q * r[i]); - } + { + djl[i] = Sphbes::dsphbesj (l, q * r[i]); + } } -void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const bool return_all) +void + Sphbes::sphbes_zeros (const int l, const int n, double* const zeros, const bool return_all) { - assert( n > 0 ); - assert( l >= 0 ); + assert (n > 0); + assert (l >= 0); // The zeros of j_l and j_{l-1} are interlaced; // So do the zeros of j_l and j_{l-2}. @@ -843,88 +896,94 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const b // If return_all is true, one needs to start with n+l zeros of j_0 // to ensure n zeros of j_l; otherwise with a stride of 2 one only // needs to start with n+(l+1)/2 zeros of j_0 - int nz = n + ( return_all ? l : (l+1)/2 ); + int nz = n + (return_all ? l : (l + 1) / 2); double* buffer = new double[nz]; // zeros of j_0 = sin(x)/x is just n*pi - double PI = std::acos(-1.0); + double PI = std::acos (-1.0); for (int i = 0; i < nz; i++) - { - buffer[i] = (i+1) * PI; - } + { + buffer[i] = (i + 1) * PI; + } int ll = 0; // active l - auto jl = [&ll] (double x) { return sphbesj(ll, x); }; + auto jl = [&ll] (double x) { return sphbesj (ll, x); }; int stride = 0; - std::function copy_if_needed; + std::function copy_if_needed; int offset = 0; // keeps track of the position in zeros for next copy (used when return_all == true) if (return_all) - { - copy_if_needed = [&](){ std::copy(buffer, buffer + n, zeros + offset); offset += n; }; - stride = 1; - ll = 1; - } + { + copy_if_needed = [&] () + { + std::copy (buffer, buffer + n, zeros + offset); + offset += n; + }; + stride = 1; + ll = 1; + } else - { - copy_if_needed = [](){}; - stride = 2; - ll = 2 - l % 2; - } + { + copy_if_needed = [] () {}; + stride = 2; + ll = 2 - l % 2; + } for (; ll <= l; ll += stride, --nz) - { - copy_if_needed(); - for (int i = 0; i < nz-1; i++) { - buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50); + copy_if_needed (); + for (int i = 0; i < nz - 1; i++) + { + buffer[i] = illinois (jl, buffer[i], buffer[i + 1], 1e-15, 50); + } } - } - std::copy(buffer, buffer + n, zeros + offset); + std::copy (buffer, buffer + n, zeros + offset); delete[] buffer; } -double Sphbes::illinois(std::function func, double x0, double x1, const double tol, const int max_iter) +double + Sphbes::illinois (std::function func, double x0, double x1, const double tol, const int max_iter) { - assert(tol > 0.0 && max_iter > 0); + assert (tol > 0.0 && max_iter > 0); - double f0 = func(x0); - double f1 = func(x1); - assert(f0 * f1 <= 0); + double f0 = func (x0); + double f1 = func (x1); + assert (f0 * f1 <= 0); - if (std::abs(f0) < std::abs(f1)) { - std::swap(x0, x1); - std::swap(f0, f1); - } + if (std::abs (f0) < std::abs (f1)) + { + std::swap (x0, x1); + std::swap (f0, f1); + } int iter = 0; double x = 0.0, f = 0.0; - while (++iter <= max_iter && std::abs(f1) > tol) - { - // regula falsi - x = (x0 * f1 - x1 * f0) / (f1 - f0); - f = func(x); - - // Illinois anti-stalling variant - if (f * f1 < 0) - { - x0 = x1; - f0 = f1; - } - else + while (++iter <= max_iter && std::abs (f1) > tol) { - f0 *= 0.5; + // regula falsi + x = (x0 * f1 - x1 * f0) / (f1 - f0); + f = func (x); + + // Illinois anti-stalling variant + if (f * f1 < 0) + { + x0 = x1; + f0 = f1; + } + else + { + f0 *= 0.5; + } + x1 = x; + f1 = f; } - x1 = x; - f1 = f; - } if (iter > max_iter) - { - std::cout << "Maximum number of iterations reached in illinois." << std::endl; - } + { + std::cout << "Maximum number of iterations reached in illinois." << std::endl; + } return x1; } -} +} // namespace ModuleBase diff --git a/source/source_base/math_sphbes.h b/source/source_base/math_sphbes.h index 7aa9c78a48c..21eacf807ea 100644 --- a/source/source_base/math_sphbes.h +++ b/source/source_base/math_sphbes.h @@ -9,52 +9,37 @@ namespace ModuleBase { -class Sphbes +class Sphbes { - public: - - Sphbes(); - ~Sphbes(); + public: + Sphbes (); + ~Sphbes (); /** * @brief spherical bessel jl(qr) - * + * * @param msh [in] number of grid points * @param r [in] radial grid * @param q [in] k_radial * @param l [in] angular momentum * @param jl [out] jl spherical bessel function */ - static void Spherical_Bessel - ( - const int &msh, - const double *r, - const double &q, - const int &l, - double *jl - ); + static void Spherical_Bessel (const int& msh, const double* r, const double& q, const int& l, double* jl); /** * @brief derivative of spherical bessel djl(qr)/d(qr) - * + * * @param msh [in] number of grid points * @param r [in] radial grid * @param q [in] k_radial * @param l [in] angular momentum * @param jl [out] jl spherical bessel function */ - static void dSpherical_Bessel_dx - ( - const int &msh, - const double *r, - const double &q, - const int &l, - double *jl - ); + static void dSpherical_Bessel_dx (const int& msh, const double* r, const double& q, const int& l, double* jl); /** * @brief spherical bessel - * + * * @param msh [in] number of grid points * @param r [in] radial grid * @param q [in] k_radial @@ -62,69 +47,59 @@ class Sphbes * @param jl [out] jl spherical bessel function * @param sjp [out] sjp[i] is assigned to be 1.0. i < msh. */ - static void Spherical_Bessel - ( - const int &msh, - const double *r, - const double &q, - const int &l, - double *sj, - double *sjp - ); - - /** - * @brief return num eigenvalues of spherical bessel function - * - * @param num [in] the number of eigenvalues - * @param l [in] angular number - * @param epsilon [in] the accuracy - * @param eigenvalue [out] the calculated eigenvalues - * @param rcut [in] the cutoff the radial function - */ - static void Spherical_Bessel_Roots - ( - const int &num, - const int &l, - const double &epsilon, - double* eigenvalue, - const double &rcut - ); + static void + Spherical_Bessel (const int& msh, const double* r, const double& q, const int& l, double* sj, double* sjp); + + /** + * @brief return num eigenvalues of spherical bessel function + * + * @param num [in] the number of eigenvalues + * @param l [in] angular number + * @param epsilon [in] the accuracy + * @param eigenvalue [out] the calculated eigenvalues + * @param rcut [in] the cutoff the radial function + */ + static void Spherical_Bessel_Roots (const int& num, + const int& l, + const double& epsilon, + double* eigenvalue, + const double& rcut); //! spherical Bessel function of the first kind /*! * This function computes j_l(x) by series expansion for x < l * and by ascending recurrence for x >= l. * */ - static double sphbesj(const int l, //!< [in] order - const double x //!< [in] argument + static double sphbesj (const int l, //!< [in] order + const double x //!< [in] argument ); //! derivative of spherical Bessel function - static double dsphbesj(const int l, //!< [in] order - const double x //!< [in] argument + static double dsphbesj (const int l, //!< [in] order + const double x //!< [in] argument ); //! computes the values of l-th order spherical Bessel function at q*r[ir] - static void sphbesj(const int n, //!< [in] number of r grid points - const double* const r, //!< [in] r grid - const double q, //!< [in] wave vector - const int l, //!< [in] order of the spherical Bessel function - double* const jl //!< [out] results + static void sphbesj (const int n, //!< [in] number of r grid points + const double* const r, //!< [in] r grid + const double q, //!< [in] wave vector + const int l, //!< [in] order of the spherical Bessel function + double* const jl //!< [out] results ); //! computes the derivative of l-th order spherical Bessel function at q*r[ir] - static void dsphbesj(const int n, //!< [in] number of r grid points - const double* const r, //!< [in] r grid - const double q, //!< [in] wave vector - const int l, //!< [in] order of the spherical Bessel function - double* const djl //!< [out] results + static void dsphbesj (const int n, //!< [in] number of r grid points + const double* const r, //!< [in] r grid + const double q, //!< [in] wave vector + const int l, //!< [in] order of the spherical Bessel function + double* const djl //!< [out] results ); - /** + /** * @brief Zeros of spherical Bessel functions. * * This function computes the first n positive zeros of the l-th order - * spherical Bessel function of the first kind. + * spherical Bessel function of the first kind. * * @param[in] l (maximum) order of the spherical Bessel function * @param[in] n number of zeros to be computed (for each j_l if return_all is true) @@ -134,37 +109,32 @@ class Sphbes * * @note The size of array "zeros" must be at least (l+1)*n if return_all is true, and n otherwise. */ - static void sphbes_zeros(const int l, - const int n, - double* const zeros, - bool return_all = false - ); - -private: + static void sphbes_zeros (const int l, const int n, double* const zeros, bool return_all = false); - static double Spherical_Bessel_7(const int n, const double &x); + private: + static double Spherical_Bessel_7 (const int n, const double& x); - // Peize Lin change double to void 2019-05-01 - static void BESSJY(double x, double xnu, double *rj, double *ry, double *rjp, double *ryp); + // Peize Lin change double to void 2019-05-01 + static void BESSJY (double x, double xnu, double* rj, double* ry, double* rjp, double* ryp); - static void BESCHB(double x, double *gam1, double *gam2, double *gampl, double *gammi); + static void BESCHB (double x, double* gam1, double* gam2, double* gampl, double* gammi); - static double CHEBEV(double a, double b, double c[], int m, double x); + static double CHEBEV (double a, double b, double c[], int m, double x); - static int IMAX(int a, int b); + static int IMAX (int a, int b); // utility functions for sphbesj - static double _sphbesj_ascending_recurrence(int l, double x); - static double _sphbesj_series(int l, double x); + static double _sphbesj_ascending_recurrence (int l, double x); + static double _sphbesj_series (int l, double x); // Regula falsi with Illinois anti-stalling variation - static double illinois(std::function func, - double x0, - double x1, - const double tol = 1e-12, - const int max_iter = 50); + static double illinois (std::function func, + double x0, + double x1, + const double tol = 1e-12, + const int max_iter = 50); }; -} +} // namespace ModuleBase #endif diff --git a/source/source_base/math_ylmreal.cpp b/source/source_base/math_ylmreal.cpp index 47415de44d1..976e9eb538d 100644 --- a/source/source_base/math_ylmreal.cpp +++ b/source/source_base/math_ylmreal.cpp @@ -15,280 +15,279 @@ namespace ModuleBase { -YlmReal::YlmReal(){} -YlmReal::~YlmReal(){} - -void YlmReal::rlylm -( - const int lmax, - const double& x, - const double& y, - const double& z, // g_cartesian_vec(x,y,z) - double* rly // output -) +YlmReal::YlmReal () {} +YlmReal::~YlmReal () {} + +void + YlmReal::rlylm (const int lmax, + const double& x, + const double& y, + const double& z, // g_cartesian_vec(x,y,z) + double* rly // output + ) { - ModuleBase::timer::start("YlmReal","rlylm"); - - assert(lmax >= 0); - - //get xy_dependence - assert(lmax <= 19); - - double Am[20]; - double Bm[20]; - - // mohan add 2021-05-07 - for(int i=0; i<20; ++i) - { - Am[i]=0.0; - Bm[i]=0.0; - } - - //ZEROS(Am, 20); - //ZEROS(Bm, 20); - - double x2, x3, x4, x5; - double y2, y3, y4, y5; - - x2 = x * x; - x3 = x2 * x; - x4 = x3 * x; - x5 = x4 * x; - - y2 = y * y; - y3 = y2 * y; - y4 = y3 * y; - y5 = y4 * y; - - //x-y dependence - //Am - //Bm - for(int im = 0; im < lmax+1; im++) - { - if(im == 0) - { - Am[0] = 1.0; - Bm[0] = 0.0; - } - else if(im == 1) - { - Am[1] = x; - Bm[1] = y; - } - else if(im == 2) - { - Am[2] = x2- y2; - Bm[2] = 2.0 * x * y; - } - else if(im == 3) - { - Am[3] = x3 - 3.0 * x * y2; - Bm[3] = 3.0 * x2 * y - y3; - } - else if(im == 4) - { - Am[4] = x4 - 6.0 * x2 * y2 + y4; - Bm[4] = 4.0 * (x3 * y - x * y3); - } - else if(im == 5) - { - Am[5] = x5 - 10.0 * x3 * y2 + 5.0 * x * y4; - Bm[5] = 5.0 * x4 * y - 10.0 * x2 * y3 + y5; - } - else - { - for(int ip = 0; ip <= im; ip++) - { - double aux = Fact(im) / Fact(ip) / Fact(im - ip); - Am[im] += aux * pow(x, ip) * pow(y, im-ip) * cos( (im-ip) * ModuleBase::PI / 2.0 ); - Bm[im] += aux * pow(x, ip) * pow(y, im-ip) * sin( (im-ip) * ModuleBase::PI / 2.0 ); - } - } - } - - //z dependence - double zdep[20][20]; - - for(int il = 0; il < 20; il++) - { - for(int jl=0; jl < 20; jl++) - { - zdep[il][jl]=0.0; // mohan add 2021-05-07 - } -// ZEROS(zdep[il], 20); - } - - double z2 = z * z; - double z3 = z2 * z; - double z4 = z3 * z; - //double z5 = z4 * z; - - double r = sqrt(x*x + y*y + z*z); - double r2 = r * r; - double r3 = r2 * r; - double r4 = r3 * r; - - for(int il = 0; il < lmax + 1; il++) - { - if(il == 0) - { - zdep[0][0] = 1.0; - } - else if(il == 1) - { - zdep[1][0] = z; - zdep[1][1] = 1.0; - } - else if(il == 2) - { - zdep[2][0] = 0.5 * (3.0 * z2 - r2); - zdep[2][1] = sqrt(3.0) * z; - zdep[2][2] = sqrt(3.0) * 0.5; - } - else if(il == 3) - { - zdep[3][0] = 2.5 * z3 - 1.5 * z * r2; - zdep[3][1] = 0.25 * sqrt(6.0) * (5.0 * z2 - r2); - zdep[3][2] = 0.5 * sqrt(15.0) * z; - zdep[3][3] = 0.25 * sqrt(10.0); - } - else if(il == 4) - { - zdep[4][0] = 0.125 * (35.0 * z4 - 30.0 * r2 * z2 + 3.0 * r4); - zdep[4][1] = sqrt(10.0) * 0.25 * z * (7.0 * z2 - 3.0 * r2); - zdep[4][2] = sqrt(5.0) * 0.25 * (7.0 * z2 - r2); - zdep[4][3] = sqrt(70.0) * 0.25 * z; - zdep[4][4] = sqrt(35.0) * 0.125; - } - else if(il == 5) - { - zdep[5][0] = 0.125 * z *( 63.0 * z4 - 70.0 * z2 * r2 + 15.0 * r4); - zdep[5][1] = 0.125 * sqrt(15.0) * (21.0 * z4 - 14.0 * z2 * r2 + r4); - zdep[5][2] = 0.25 * sqrt(105.0) * z * (3.0 * z2 - r2); - zdep[5][3] = 0.0625 * sqrt(70.0) * (9.0 * z2 - r2); - zdep[5][4] = 0.375 * sqrt(35.0) * z; - zdep[5][5] = 0.1875 * sqrt(14.0); - } - else - { - for(int im = 0; im <= il; im++) - { - int kmax = static_cast( (il - im) / 2 ); - for(int ik = 0; ik <= kmax; ik++) - { - int twok = 2 * ik; - - double gamma = 0.0; - double aux0, aux1, aux2, aux3; - - aux0 = pow(-1.0, ik) * pow(2.0, -il); - aux1 = Fact(il) / Fact(ik) / Fact(il-ik); - aux2 = Fact(2*il - twok) / Fact(il) / Fact(il - twok); - aux3 = Fact(il - twok) / Fact(il - twok - im); - - gamma = aux0 * aux1 * aux2 * aux3; - - assert(il - twok - im >= 0); - zdep[il][im] += pow(r, twok) * pow(z, il-twok-im) * gamma; - } - - if(im >= 1) - { - zdep[il][im] *= sqrt(2 * Fact(il - im) / Fact(il + im)); - - } - } - } - } - - //calc - int ic = 0; - - //special case for r=0 - double rpi = r; - const double tiny = 1.0E-10; - if (rpi < tiny) rpi += tiny; - - for(int il = 0; il <= lmax; il++) - { - double fac = sqrt( (2.0 * il + 1.0) / ModuleBase::FOUR_PI ); - - double rl = pow(rpi, il); - - //m=0 - rly[ic] = Am[0] * zdep[il][0] * fac / rl; - - ic++; - - //m ! = 0 - for(int im = 1; im <= il; im++) - { - //m>0 - rly[ic] = Am[im] * zdep[il][im] * pow(-1.0, im) * fac / rl; - - ic++; - - //m<0 - rly[ic] = Bm[im] * zdep[il][im] * pow(-1.0, im) * fac / rl; - - ic++; - } - } - - ModuleBase::timer::end("YlmReal","rlylm"); - return; -} + ModuleBase::timer::start ("YlmReal", "rlylm"); + + assert (lmax >= 0); + + // get xy_dependence + assert (lmax <= 19); + + double Am[20]; + double Bm[20]; + + // mohan add 2021-05-07 + for (int i = 0; i < 20; ++i) + { + Am[i] = 0.0; + Bm[i] = 0.0; + } + + // ZEROS(Am, 20); + // ZEROS(Bm, 20); + + double x2, x3, x4, x5; + double y2, y3, y4, y5; + + x2 = x * x; + x3 = x2 * x; + x4 = x3 * x; + x5 = x4 * x; + + y2 = y * y; + y3 = y2 * y; + y4 = y3 * y; + y5 = y4 * y; + + // x-y dependence + // Am + // Bm + for (int im = 0; im < lmax + 1; im++) + { + if (im == 0) + { + Am[0] = 1.0; + Bm[0] = 0.0; + } + else if (im == 1) + { + Am[1] = x; + Bm[1] = y; + } + else if (im == 2) + { + Am[2] = x2 - y2; + Bm[2] = 2.0 * x * y; + } + else if (im == 3) + { + Am[3] = x3 - 3.0 * x * y2; + Bm[3] = 3.0 * x2 * y - y3; + } + else if (im == 4) + { + Am[4] = x4 - 6.0 * x2 * y2 + y4; + Bm[4] = 4.0 * (x3 * y - x * y3); + } + else if (im == 5) + { + Am[5] = x5 - 10.0 * x3 * y2 + 5.0 * x * y4; + Bm[5] = 5.0 * x4 * y - 10.0 * x2 * y3 + y5; + } + else + { + for (int ip = 0; ip <= im; ip++) + { + double aux = Fact (im) / Fact (ip) / Fact (im - ip); + Am[im] += aux * pow (x, ip) * pow (y, im - ip) * cos ((im - ip) * ModuleBase::PI / 2.0); + Bm[im] += aux * pow (x, ip) * pow (y, im - ip) * sin ((im - ip) * ModuleBase::PI / 2.0); + } + } + } + + // z dependence + double zdep[20][20]; + + for (int il = 0; il < 20; il++) + { + for (int jl = 0; jl < 20; jl++) + { + zdep[il][jl] = 0.0; // mohan add 2021-05-07 + } + // ZEROS(zdep[il], 20); + } + + double z2 = z * z; + double z3 = z2 * z; + double z4 = z3 * z; + // double z5 = z4 * z; + double r = sqrt (x * x + y * y + z * z); + double r2 = r * r; + double r3 = r2 * r; + double r4 = r3 * r; -void YlmReal::Ylm_Real2 -( - const int lmax2, // lmax2 = (lmax+1)^2 - const int ng, // - const ModuleBase::Vector3 *g, // g_cartesian_vec(x,y,z) - matrix &ylm // output -) + for (int il = 0; il < lmax + 1; il++) + { + if (il == 0) + { + zdep[0][0] = 1.0; + } + else if (il == 1) + { + zdep[1][0] = z; + zdep[1][1] = 1.0; + } + else if (il == 2) + { + zdep[2][0] = 0.5 * (3.0 * z2 - r2); + zdep[2][1] = sqrt (3.0) * z; + zdep[2][2] = sqrt (3.0) * 0.5; + } + else if (il == 3) + { + zdep[3][0] = 2.5 * z3 - 1.5 * z * r2; + zdep[3][1] = 0.25 * sqrt (6.0) * (5.0 * z2 - r2); + zdep[3][2] = 0.5 * sqrt (15.0) * z; + zdep[3][3] = 0.25 * sqrt (10.0); + } + else if (il == 4) + { + zdep[4][0] = 0.125 * (35.0 * z4 - 30.0 * r2 * z2 + 3.0 * r4); + zdep[4][1] = sqrt (10.0) * 0.25 * z * (7.0 * z2 - 3.0 * r2); + zdep[4][2] = sqrt (5.0) * 0.25 * (7.0 * z2 - r2); + zdep[4][3] = sqrt (70.0) * 0.25 * z; + zdep[4][4] = sqrt (35.0) * 0.125; + } + else if (il == 5) + { + zdep[5][0] = 0.125 * z * (63.0 * z4 - 70.0 * z2 * r2 + 15.0 * r4); + zdep[5][1] = 0.125 * sqrt (15.0) * (21.0 * z4 - 14.0 * z2 * r2 + r4); + zdep[5][2] = 0.25 * sqrt (105.0) * z * (3.0 * z2 - r2); + zdep[5][3] = 0.0625 * sqrt (70.0) * (9.0 * z2 - r2); + zdep[5][4] = 0.375 * sqrt (35.0) * z; + zdep[5][5] = 0.1875 * sqrt (14.0); + } + else + { + for (int im = 0; im <= il; im++) + { + int kmax = static_cast ((il - im) / 2); + for (int ik = 0; ik <= kmax; ik++) + { + int twok = 2 * ik; + + double gamma = 0.0; + double aux0, aux1, aux2, aux3; + + aux0 = pow (-1.0, ik) * pow (2.0, -il); + aux1 = Fact (il) / Fact (ik) / Fact (il - ik); + aux2 = Fact (2 * il - twok) / Fact (il) / Fact (il - twok); + aux3 = Fact (il - twok) / Fact (il - twok - im); + + gamma = aux0 * aux1 * aux2 * aux3; + + assert (il - twok - im >= 0); + zdep[il][im] += pow (r, twok) * pow (z, il - twok - im) * gamma; + } + + if (im >= 1) + { + zdep[il][im] *= sqrt (2 * Fact (il - im) / Fact (il + im)); + } + } + } + } + + // calc + int ic = 0; + + // special case for r=0 + double rpi = r; + const double tiny = 1.0E-10; + if (rpi < tiny) + { + rpi += tiny; + } + + for (int il = 0; il <= lmax; il++) + { + double fac = sqrt ((2.0 * il + 1.0) / ModuleBase::FOUR_PI); + + double rl = pow (rpi, il); + + // m=0 + rly[ic] = Am[0] * zdep[il][0] * fac / rl; + + ic++; + + // m ! = 0 + for (int im = 1; im <= il; im++) + { + // m>0 + rly[ic] = Am[im] * zdep[il][im] * pow (-1.0, im) * fac / rl; + + ic++; + + // m<0 + rly[ic] = Bm[im] * zdep[il][im] * pow (-1.0, im) * fac / rl; + + ic++; + } + } + + ModuleBase::timer::end ("YlmReal", "rlylm"); + return; +} + +void + YlmReal::Ylm_Real2 (const int lmax2, // lmax2 = (lmax+1)^2 + const int ng, // + const ModuleBase::Vector3* g, // g_cartesian_vec(x,y,z) + matrix& ylm // output + ) { - if (ng<1 || lmax2<1) - { - ModuleBase::WARNING("YLM_REAL","ng<1 or lmax2<1"); - return; - } - -//---------------------------------------------------------- -// EXPLAIN : find out lmax -//---------------------------------------------------------- + if (ng < 1 || lmax2 < 1) + { + ModuleBase::WARNING ("YLM_REAL", "ng<1 or lmax2<1"); + return; + } + + //---------------------------------------------------------- + // EXPLAIN : find out lmax + //---------------------------------------------------------- bool out_of_range = true; int lmax = 0; - for (int l= 0; l< 30; l++) - { - if ((l+1)*(l+1) == lmax2) + for (int l = 0; l < 30; l++) { - lmax = l; - out_of_range = false; - break; + if ((l + 1) * (l + 1) == lmax2) + { + lmax = l; + out_of_range = false; + break; + } } - } if (out_of_range) - { - ModuleBase::WARNING_QUIT("YLM_REAL","l>30 or l<0"); - } - -//---------------------------------------------------------- -// Start CALC -//---------------------------------------------------------- - std::vector rly(lmax2); - - for (int ig = 0; ig < ng; ig++) - { - rlylm (lmax, g[ig].x, g[ig].y, g[ig].z, rly.data()); - - for (int lm = 0; lm < lmax2; lm++) - { - ylm (lm, ig) = rly[lm]; - } - } - - return; + { + ModuleBase::WARNING_QUIT ("YLM_REAL", "l>30 or l<0"); + } + + //---------------------------------------------------------- + // Start CALC + //---------------------------------------------------------- + std::vector rly (lmax2); + + for (int ig = 0; ig < ng; ig++) + { + rlylm (lmax, g[ig].x, g[ig].y, g[ig].z, rly.data ()); + + for (int lm = 0; lm < lmax2; lm++) + { + ylm (lm, ig) = rly[lm]; + } + } + + return; } //========================================================== @@ -299,51 +298,55 @@ void YlmReal::Ylm_Real2 // from ylmr2.f90 template -void YlmReal::Ylm_Real(Device * ctx, const int lmax2, const int ng, const FPTYPE *g, FPTYPE * ylm) +void + YlmReal::Ylm_Real (Device* ctx, const int lmax2, const int ng, const FPTYPE* g, FPTYPE* ylm) { using resmem_var_op = base_device::memory::resize_memory_op; using delmem_var_op = base_device::memory::delete_memory_op; using cal_ylm_real_op = ModuleBase::cal_ylm_real_op; - if (ng < 1 || lmax2 < 1) { - ModuleBase::WARNING("YLM_REAL","ng<1 or lmax2<1"); - return; - } + if (ng < 1 || lmax2 < 1) + { + ModuleBase::WARNING ("YLM_REAL", "ng<1 or lmax2<1"); + return; + } -//---------------------------------------------------------- -// EXPLAIN : find out lmax -//---------------------------------------------------------- + //---------------------------------------------------------- + // EXPLAIN : find out lmax + //---------------------------------------------------------- bool out_of_range = true; int lmax = 0; - for (int l = 0; l < 30; l++) { - if ((l + 1) * (l + 1) == lmax2) { - lmax = l; - out_of_range = false; - break; + for (int l = 0; l < 30; l++) + { + if ((l + 1) * (l + 1) == lmax2) + { + lmax = l; + out_of_range = false; + break; + } } - } - if (out_of_range) { - ModuleBase::WARNING_QUIT("YLM_REAL","l>30 or l<0"); - } - FPTYPE * p = nullptr, * phi = nullptr, * cost = nullptr; - resmem_var_op()(p, (lmax + 1) * (lmax + 1) * ng, "YlmReal::Ylm_Real"); - - cal_ylm_real_op()( - ctx, - ng, - lmax, - ModuleBase::SQRT2, - ModuleBase::PI, - ModuleBase::PI_HALF, - ModuleBase::FOUR_PI, - ModuleBase::SQRT_INVERSE_FOUR_PI, - g, - p, - ylm); - - delmem_var_op()(p); - delmem_var_op()(phi); - delmem_var_op()(cost); + if (out_of_range) + { + ModuleBase::WARNING_QUIT ("YLM_REAL", "l>30 or l<0"); + } + FPTYPE *p = nullptr, *phi = nullptr, *cost = nullptr; + resmem_var_op () (p, (lmax + 1) * (lmax + 1) * ng, "YlmReal::Ylm_Real"); + + cal_ylm_real_op () (ctx, + ng, + lmax, + ModuleBase::SQRT2, + ModuleBase::PI, + ModuleBase::PI_HALF, + ModuleBase::FOUR_PI, + ModuleBase::SQRT_INVERSE_FOUR_PI, + g, + p, + ylm); + + delmem_var_op () (p); + delmem_var_op () (phi); + delmem_var_op () (cost); } // end subroutine ylmr2 //========================================================== @@ -352,247 +355,240 @@ void YlmReal::Ylm_Real(Device * ctx, const int lmax2, const int ng, const FPTYPE // Use Numerical recursive algorithm as given in Numerical Recipes //========================================================== // from ylmr2.f90 -void YlmReal::Ylm_Real -( - const int lmax2, // lmax2 = (lmax+1)^2 - const int ng, // - const ModuleBase::Vector3 *g, // g_cartesian_vec(x,y,z) - matrix &ylm // output -) +void + YlmReal::Ylm_Real (const int lmax2, // lmax2 = (lmax+1)^2 + const int ng, // + const ModuleBase::Vector3* g, // g_cartesian_vec(x,y,z) + matrix& ylm // output + ) { - if (ng<1 || lmax2<1) - { - ModuleBase::WARNING("YLM_REAL","ng<1 or lmax2<1"); - return; - } + if (ng < 1 || lmax2 < 1) + { + ModuleBase::WARNING ("YLM_REAL", "ng<1 or lmax2<1"); + return; + } -//---------------------------------------------------------- -// EXPLAIN : find out lmax -//---------------------------------------------------------- + //---------------------------------------------------------- + // EXPLAIN : find out lmax + //---------------------------------------------------------- bool out_of_range = true; int lmax = 0; - for (int l= 0; l< 30; l++) - { - if ((l+1)*(l+1) == lmax2) + for (int l = 0; l < 30; l++) { - lmax = l; - out_of_range = false; - break; + if ((l + 1) * (l + 1) == lmax2) + { + lmax = l; + out_of_range = false; + break; + } } - } if (out_of_range) - { - ModuleBase::WARNING_QUIT("YLM_REAL","l>30 or l<0"); - } + { + ModuleBase::WARNING_QUIT ("YLM_REAL", "l>30 or l<0"); + } -//---------------------------------------------------------- -// EXPLAIN : if lmax = 1,only use Y00 , output result. -//---------------------------------------------------------- + //---------------------------------------------------------- + // EXPLAIN : if lmax = 1,only use Y00 , output result. + //---------------------------------------------------------- if (lmax == 0) - { + { #ifdef _OPENMP #pragma omp parallel for #endif - for (int i=0;i cost(ng); - std::vector phi(ng); + //---------------------------------------------------------- + // LOCAL VARIABLES : + // NAME : cost = cos(theta),theta and phi are polar angles + // NAME : phi + //---------------------------------------------------------- + std::vector cost (ng); + std::vector phi (ng); #ifdef _OPENMP #pragma omp parallel for #endif - for (int ig = 0;ig < ng;ig++) - { - const double gmod = g[ig].norm(); - if (gmod < 1.0e-9) - { - cost[ig] = 0.0; - } - else - { - cost[ig] = g[ig].z / gmod; - }// endif - - // beware the arc tan, it is defined modulo pi - if (g[ig].x > 1.0e-9) - { - phi[ig] = atan(g[ig].y / g[ig].x); - } - else if (g[ig].x < -1.e-9) - { - phi[ig] = atan(g[ig].y / g[ig].x) + ModuleBase::PI; - } - else + for (int ig = 0; ig < ng; ig++) { - phi[ig] = ModuleBase::PI_HALF * ((g[ig].y >= 0.0) ? 1.0 : -1.0); //HLX: modified on 10/13/2006 - } // end if - } // enddo + const double gmod = g[ig].norm (); + if (gmod < 1.0e-9) + { + cost[ig] = 0.0; + } + else + { + cost[ig] = g[ig].z / gmod; + } // endif -//========================================================== -// NAME : p(Legendre Polynomials) (0 <= m <= l) -//========================================================== - ModuleBase::realArray p(lmax+1,lmax+1,ng); + // beware the arc tan, it is defined modulo pi + if (g[ig].x > 1.0e-9) + { + phi[ig] = atan (g[ig].y / g[ig].x); + } + else if (g[ig].x < -1.e-9) + { + phi[ig] = atan (g[ig].y / g[ig].x) + ModuleBase::PI; + } + else + { + phi[ig] = ModuleBase::PI_HALF * ((g[ig].y >= 0.0) ? 1.0 : -1.0); // HLX: modified on 10/13/2006 + } // end if + } // enddo + + //========================================================== + // NAME : p(Legendre Polynomials) (0 <= m <= l) + //========================================================== + ModuleBase::realArray p (lmax + 1, lmax + 1, ng); int lm = -1; - for (int l=0; l<=lmax; l++) - { - const double c = sqrt((2*l+1) / ModuleBase::FOUR_PI); - if (l == 0) + for (int l = 0; l <= lmax; l++) { + const double c = sqrt ((2 * l + 1) / ModuleBase::FOUR_PI); + if (l == 0) + { #ifdef _OPENMP #pragma omp parallel for #endif - for (int i=0;i(l) / 2.0) ;//mohan modify 2007-10-13 - if (l%2 == 1) - { - p(l, l, i) = -p(l, l, i); - } - } - } // end if - - // Y_lm, m = 0 - ++lm; + for (int i = 0; i < ng; i++) + { + p (l1, l, i) = cost[i] * l3 * p (l1, l1, i); + auto x2 = 1.0 - cost[i] * cost[i]; + x2 = std::max (0.0, x2); + p (l, l, i) + = Semi_Fact (l3) * pow (x2, static_cast (l) / 2.0); // mohan modify 2007-10-13 + if (l % 2 == 1) + { + p (l, l, i) = -p (l, l, i); + } + } + } // end if + + // Y_lm, m = 0 + ++lm; #ifdef _OPENMP #pragma omp parallel for #endif - for (int i=0;i 0 - const double same = c * sqrt - ( - static_cast(Fact(l - m)) / - static_cast(Fact(l + m)) - ) - *ModuleBase::SQRT2; + for (int m = 1; m <= l; m++) + { + // Y_lm, m > 0 + const double same = c + * sqrt (static_cast (Fact (l - m)) / static_cast (Fact (l + m))) + * ModuleBase::SQRT2; - ++lm; + ++lm; #ifdef _OPENMP #pragma omp parallel for #endif - for (int i=0;i *g, - matrix &ylm, - matrix &dylmx, - matrix &dylmy, - matrix &dylmz - ) +void + YlmReal::grad_Ylm_Real (const int lmax2, + const int ng, + const ModuleBase::Vector3* g, + matrix& ylm, + matrix& dylmx, + matrix& dylmy, + matrix& dylmz) { - ModuleBase::Ylm::set_coefficients(); - const int lmax = int(sqrt( double(lmax2) ) + 0.1) - 1; - std::vector tmpylm((lmax2+1) * (lmax2+1)); - std::vector tmpgylm((lmax2+1) * (lmax2+1) * 3); - - for (int ig = 0;ig < ng;ig++) - { - ModuleBase::Vector3 gg = g[ig]; - double gmod = gg.norm(); - if (gmod < 1.0e-9) + ModuleBase::Ylm::set_coefficients (); + const int lmax = int (sqrt (double (lmax2)) + 0.1) - 1; + std::vector tmpylm ((lmax2 + 1) * (lmax2 + 1)); + std::vector tmpgylm ((lmax2 + 1) * (lmax2 + 1) * 3); + + for (int ig = 0; ig < ng; ig++) { - for(int lm = 0 ; lm < lmax2 ; ++lm) - { - if(lm == 0) - ylm(lm,ig) = ModuleBase::SQRT_INVERSE_FOUR_PI; - else - ylm(lm,ig) = 0; - dylmx(lm,ig) = dylmy(lm,ig) = dylmz(lm,ig) = 0; - } - } - else - { - Ylm::grad_rl_sph_harm(lmax2, gg.x, gg.y, gg.z, tmpylm.data(), tmpgylm.data()); - int lm = 0; - for(int il = 0 ; il <= lmax ; ++il) - { - for(int im = 0; im < 2*il+1; ++im, ++lm) - { - double rlylm = tmpylm[lm]; - ylm(lm,ig) = rlylm / pow(gmod,il); - dylmx(lm,ig) = ( tmpgylm[lm*3] - il*rlylm * gg.x / pow(gmod,2) )/pow(gmod,il); - dylmy(lm,ig) = ( tmpgylm[lm*3 + 1] - il*rlylm * gg.y / pow(gmod,2) )/pow(gmod,il); - dylmz(lm,ig) = ( tmpgylm[lm*3 + 2] - il*rlylm * gg.z / pow(gmod,2) )/pow(gmod,il); - } - } - - } - } - return; + ModuleBase::Vector3 gg = g[ig]; + double gmod = gg.norm (); + if (gmod < 1.0e-9) + { + for (int lm = 0; lm < lmax2; ++lm) + { + if (lm == 0) + { + ylm (lm, ig) = ModuleBase::SQRT_INVERSE_FOUR_PI; + } + else + { + ylm (lm, ig) = 0; + } + dylmx (lm, ig) = dylmy (lm, ig) = dylmz (lm, ig) = 0; + } + } + else + { + Ylm::grad_rl_sph_harm (lmax2, gg.x, gg.y, gg.z, tmpylm.data (), tmpgylm.data ()); + int lm = 0; + for (int il = 0; il <= lmax; ++il) + { + for (int im = 0; im < 2 * il + 1; ++im, ++lm) + { + double rlylm = tmpylm[lm]; + ylm (lm, ig) = rlylm / pow (gmod, il); + dylmx (lm, ig) + = (tmpgylm[lm * 3] - il * rlylm * gg.x / pow (gmod, 2)) / pow (gmod, il); + dylmy (lm, ig) + = (tmpgylm[lm * 3 + 1] - il * rlylm * gg.y / pow (gmod, 2)) / pow (gmod, il); + dylmz (lm, ig) + = (tmpgylm[lm * 3 + 2] - il * rlylm * gg.z / pow (gmod, 2)) / pow (gmod, il); + } + } + } + } + return; } - //========================================================== // MEMBER FUNCTION : // NAME : Fact ( n! ) // NAME : Semi_Fact ( n!! ) //========================================================== -long double YlmReal::Fact(const int n) +long double + YlmReal::Fact (const int n) { long double f = 1; - for (int i=n; i>1; i--) - { - f *= i; - } + for (int i = n; i > 1; i--) + { + f *= i; + } return f; } -int YlmReal::Semi_Fact(const int n) +int + YlmReal::Semi_Fact (const int n) { int semif = 1; - for (int i=n; i>2; i -= 2) - { - semif *= i; - } + for (int i = n; i > 2; i -= 2) + { + semif *= i; + } return semif; } -template void YlmReal::Ylm_Real(base_device::DEVICE_CPU*, - int, - int, - const float*, - float*); -template void YlmReal::Ylm_Real(base_device::DEVICE_CPU*, - int, - int, - const double*, - double*); +template void + YlmReal::Ylm_Real (base_device::DEVICE_CPU*, int, int, const float*, float*); +template void + YlmReal::Ylm_Real (base_device::DEVICE_CPU*, int, int, const double*, double*); #if ((defined __CUDA) || (defined __ROCM)) -template void YlmReal::Ylm_Real(base_device::DEVICE_GPU*, - int, - int, - const float*, - float*); -template void YlmReal::Ylm_Real(base_device::DEVICE_GPU*, - int, - int, - const double*, - double*); +template void + YlmReal::Ylm_Real (base_device::DEVICE_GPU*, int, int, const float*, float*); +template void + YlmReal::Ylm_Real (base_device::DEVICE_GPU*, int, int, const double*, double*); #endif -} // namespace ModuleBase +} // namespace ModuleBase diff --git a/source/source_base/math_ylmreal.h b/source/source_base/math_ylmreal.h index 9643196a21e..4cf6ab2e8d3 100644 --- a/source/source_base/math_ylmreal.h +++ b/source/source_base/math_ylmreal.h @@ -7,101 +7,81 @@ namespace ModuleBase { -class YlmReal +class YlmReal { - public: - - YlmReal(); - ~YlmReal(); - - /** - * @brief spherical harmonic function (real form) an array of vectors - * - * @param lmax2 [in] lmax2 = (lmax + 1)^2 ; lmax = angular quantum number - * @param ng [in] the number of vectors - * @param g [in] an array of vectors - * @param ylm [out] Ylm; column index represent vector, row index represent Y00, Y10, Y11, Y1-1, Y20,Y21,Y2-1,Y22.Y2-2,...; - */ - static void Ylm_Real - ( - const int lmax2, - const int ng, - const ModuleBase::Vector3 *g, - matrix &ylm - ); + public: + YlmReal (); + ~YlmReal (); /** - * @brief spherical harmonic function (real form) an array - * - * @param lmax2 [in] lmax2 = (lmax + 1)^2 ; lmax = angular quantum number - * @param ng [in] the number of vectors - * @param g [in] an array of vectors - * @param ylm [out] Ylm; column index represent vector, row index represent Y00, Y10, Y11, Y1-1, Y20,Y21,Y2-1,Y22.Y2-2,...; - */ - template - static void Ylm_Real(Device * ctx, const int lmax2, const int ng, const FPTYPE *g, FPTYPE * ylm); + * @brief spherical harmonic function (real form) an array of vectors + * + * @param lmax2 [in] lmax2 = (lmax + 1)^2 ; lmax = angular quantum number + * @param ng [in] the number of vectors + * @param g [in] an array of vectors + * @param ylm [out] Ylm; column index represent vector, row index represent Y00, Y10, Y11, Y1-1, + * Y20,Y21,Y2-1,Y22.Y2-2,...; + */ + static void Ylm_Real (const int lmax2, const int ng, const ModuleBase::Vector3* g, matrix& ylm); - /** - * @brief gradient of spherical harmonic function (real form) an array of vectors - * - * @param lmax2 [in] lmax2 = (lmax + 1)^2 ; lmax = angular quantum number - * @param ng [in] the number of vectors - * @param g [in] an array of vectors - * @param ylm [out] Ylm; column index represent vector, row index represent Y00, Y10, Y11, Y1-1, Y20,Y21,Y2-1,Y22.Y2-2,...; - * @param dylmx/dylmy/dylmz [out] \nabla Ylm; column index represent vector, row index represent dY00/dxyz, dY10/dxyz,...; - */ - static void grad_Ylm_Real - ( - const int lmax2, - const int ng, - const ModuleBase::Vector3 *g, - matrix &ylm, - matrix &dylmx, - matrix &dylmy, - matrix &dylmz - ); - - /** - * @brief spherical harmonic function (Herglotz generating form) of an array of vectors - * - * @param lmax2 [in] lmax2 = (lmax + 1)^2 ; lmax = angular quantum number - * @param ng [in] the number of vectors - * @param g [in] an array of vectors - * @param ylm [out] Ylm; column index represent vector, row index represent Y00, Y10, Y11, Y1-1, Y20,Y21,Y2-1,Y22.Y2-2,...; - */ - static void Ylm_Real2 - ( - const int lmax2, - const int ng, - const ModuleBase::Vector3 *g, - matrix &ylm - ); + /** + * @brief spherical harmonic function (real form) an array + * + * @param lmax2 [in] lmax2 = (lmax + 1)^2 ; lmax = angular quantum number + * @param ng [in] the number of vectors + * @param g [in] an array of vectors + * @param ylm [out] Ylm; column index represent vector, row index represent Y00, Y10, Y11, Y1-1, + * Y20,Y21,Y2-1,Y22.Y2-2,...; + */ + template + static void Ylm_Real (Device* ctx, const int lmax2, const int ng, const FPTYPE* g, FPTYPE* ylm); - /** - * @brief spherical harmonic function (Herglotz generating form) of a vector - * - * @param lmax [in] maximum angular quantum number - * @param x [in] x part of the vector - * @param y [in] y part of the vector - * @param z [in] z part of the vector - * @param rly [in] Ylm, Y00, Y10, Y11, Y1-1, Y20,Y21,Y2-1,Y22.Y2-2,... - */ - static void rlylm - ( - const int lmax, - const double& x, - const double& y, - const double& z, - double* rly - ); + /** + * @brief gradient of spherical harmonic function (real form) an array of vectors + * + * @param lmax2 [in] lmax2 = (lmax + 1)^2 ; lmax = angular quantum number + * @param ng [in] the number of vectors + * @param g [in] an array of vectors + * @param ylm [out] Ylm; column index represent vector, row index represent Y00, Y10, Y11, Y1-1, + * Y20,Y21,Y2-1,Y22.Y2-2,...; + * @param dylmx/dylmy/dylmz [out] \nabla Ylm; column index represent vector, row index represent dY00/dxyz, + * dY10/dxyz,...; + */ + static void grad_Ylm_Real (const int lmax2, + const int ng, + const ModuleBase::Vector3* g, + matrix& ylm, + matrix& dylmx, + matrix& dylmy, + matrix& dylmz); - private: + /** + * @brief spherical harmonic function (Herglotz generating form) of an array of vectors + * + * @param lmax2 [in] lmax2 = (lmax + 1)^2 ; lmax = angular quantum number + * @param ng [in] the number of vectors + * @param g [in] an array of vectors + * @param ylm [out] Ylm; column index represent vector, row index represent Y00, Y10, Y11, Y1-1, + * Y20,Y21,Y2-1,Y22.Y2-2,...; + */ + static void Ylm_Real2 (const int lmax2, const int ng, const ModuleBase::Vector3* g, matrix& ylm); - static long double Fact(const int n); - static int Semi_Fact(const int n); + /** + * @brief spherical harmonic function (Herglotz generating form) of a vector + * + * @param lmax [in] maximum angular quantum number + * @param x [in] x part of the vector + * @param y [in] y part of the vector + * @param z [in] z part of the vector + * @param rly [in] Ylm, Y00, Y10, Y11, Y1-1, Y20,Y21,Y2-1,Y22.Y2-2,... + */ + static void rlylm (const int lmax, const double& x, const double& y, const double& z, double* rly); + private: + static long double Fact (const int n); + static int Semi_Fact (const int n); }; -} +} // namespace ModuleBase #endif diff --git a/source/source_base/mathzone.h b/source/source_base/mathzone.h index 80239d58a18..cc373f37fdb 100644 --- a/source/source_base/mathzone.h +++ b/source/source_base/mathzone.h @@ -21,8 +21,8 @@ namespace ModuleBase class Mathzone { public: - Mathzone(); - ~Mathzone(); + Mathzone (); + ~Mathzone (); public: /** @@ -35,12 +35,15 @@ class Mathzone * @author Peize Lin (2016-08-03) */ template - static std::vector Pointwise_Product(const std::vector &f1, const std::vector &f2) + static std::vector + Pointwise_Product (const std::vector& f1, const std::vector& f2) { - assert(f1.size() == f2.size()); - std::vector f(f1.size()); - for (int ir = 0; ir != f.size(); ++ir) - f[ir] = f1[ir] * f2[ir]; + assert (f1.size () == f2.size ()); + std::vector f (f1.size ()); + for (int ir = 0; ir != f.size (); ++ir) + { + f[ir] = f1[ir] * f2[ir]; + } return f; } @@ -62,23 +65,24 @@ class Mathzone * @param[in] R33 * @param[out] cx Cartesian coordinats * @param[out] cy - * @param[out] cz + * @param[out] cz */ - static inline void Direct_to_Cartesian(const double &dx, - const double &dy, - const double &dz, - const double &R11, - const double &R12, - const double &R13, - const double &R21, - const double &R22, - const double &R23, - const double &R31, - const double &R32, - const double &R33, - double &cx, - double &cy, - double &cz) + static inline void + Direct_to_Cartesian (const double& dx, + const double& dy, + const double& dz, + const double& R11, + const double& R12, + const double& R13, + const double& R21, + const double& R22, + const double& R23, + const double& R31, + const double& R32, + const double& R33, + double& cx, + double& cy, + double& cz) { ModuleBase::Matrix3 lattice_vector; ModuleBase::Vector3 direct_vec, cartesian_vec; @@ -123,21 +127,22 @@ class Mathzone * @param[out] dy * @param[out] dz */ - static inline void Cartesian_to_Direct(const double &cx, - const double &cy, - const double &cz, - const double &R11, - const double &R12, - const double &R13, - const double &R21, - const double &R22, - const double &R23, - const double &R31, - const double &R32, - const double &R33, - double &dx, - double &dy, - double &dz) + static inline void + Cartesian_to_Direct (const double& cx, + const double& cy, + const double& cz, + const double& R11, + const double& R12, + const double& R13, + const double& R21, + const double& R22, + const double& R23, + const double& R31, + const double& R32, + const double& R33, + double& dx, + double& dy, + double& dz) { ModuleBase::Matrix3 lattice_vector, inv_lat; lattice_vector.e11 = R11; @@ -150,7 +155,7 @@ class Mathzone lattice_vector.e32 = R32; lattice_vector.e33 = R33; - inv_lat = lattice_vector.Inverse(); + inv_lat = lattice_vector.Inverse (); ModuleBase::Vector3 direct_vec, cartesian_vec; cartesian_vec.x = cx; @@ -163,15 +168,16 @@ class Mathzone return; } - template - static ModuleBase::Vector3 latvec_projection(const std::array,3> &latvec) + template + static ModuleBase::Vector3 + latvec_projection (const std::array, 3>& latvec) { ModuleBase::Vector3 proj; - proj.x = std::abs( latvec[0] * (latvec[1] ^ latvec[2]).normalize() ); - proj.y = std::abs( latvec[1] * (latvec[2] ^ latvec[0]).normalize() ); - proj.z = std::abs( latvec[2] * (latvec[0] ^ latvec[1]).normalize() ); + proj.x = std::abs (latvec[0] * (latvec[1] ^ latvec[2]).normalize ()); + proj.y = std::abs (latvec[1] * (latvec[2] ^ latvec[0]).normalize ()); + proj.z = std::abs (latvec[2] * (latvec[0] ^ latvec[1]).normalize ()); return proj; - } + } }; } // namespace ModuleBase diff --git a/source/source_base/mathzone_add1.cpp b/source/source_base/mathzone_add1.cpp index 7f05961265e..ecae090c35a 100644 --- a/source/source_base/mathzone_add1.cpp +++ b/source/source_base/mathzone_add1.cpp @@ -13,14 +13,14 @@ #define c_re(c) ((c)[0]) #define c_im(c) ((c)[1]) #else -#include -//#include "fftw3-mpi_mkl.h" +#include +// #include "fftw3-mpi_mkl.h" #define c_re(c) ((c)[0]) #define c_im(c) ((c)[1]) #endif typedef fftw_complex FFTW_COMPLEX; -//#include +// #include #ifdef _OPENMP #include #endif @@ -31,385 +31,373 @@ namespace ModuleBase double** Mathzone_Add1::c_ln_c = nullptr; double** Mathzone_Add1::c_ln_s = nullptr; -Mathzone_Add1::Mathzone_Add1() -{} +Mathzone_Add1::Mathzone_Add1 () {} -Mathzone_Add1::~Mathzone_Add1() -{} +Mathzone_Add1::~Mathzone_Add1 () {} - -double Mathzone_Add1::factorial (const int& l) +double + Mathzone_Add1::factorial (const int& l) { - if (l == 0 || l == 1) return 1.0; - else return l*factorial(l-1); + if (l == 0 || l == 1) + return 1.0; + else + return l * factorial (l - 1); } -double Mathzone_Add1::dualfac (const int& l) +double + Mathzone_Add1::dualfac (const int& l) { - if (l == -1 || l == 0) return 1.0; - else return l * dualfac (l-2); + if (l == -1 || l == 0) + return 1.0; + else + return l * dualfac (l - 2); } - void Mathzone_Add1::SplineD2 // modified by pengfei 13-8-8 add second derivative as a condition -( - const double *rad, - const double *rad_f, - const int& mesh, - const double &yp1, // if yp1 > ypmax, consider the second derivative - const double &ypn, - double* y2 -) + (const double* rad, + const double* rad_f, + const int& mesh, + const double& yp1, // if yp1 > ypmax, consider the second derivative + const double& ypn, + double* y2) { - ModuleBase::timer::start("Mathzone_Add1","SplineD2"); - - double dx1 = 0.0, dx2 = 0.0, dy1 = 0.0, dy2 = 0.0, p = 0.0, qn = 0.0, sig = 0.0, un = 0.0; - double * u = nullptr; - - u = new double[mesh-1]; - const double ypmax = 99999.00; - - if (yp1 > ypmax) - { - y2[0] = 0.0; - u[0] = 0.0; - } - else - { - y2[0] = -0.5; - - dx1 = rad[1] - rad[0]; - dy1 = rad_f[1] - rad_f[0]; - - u[0] = ( 3.0 / dx1 ) * (dy1 / dx1 - yp1); - } - - for(int i = 1; i < mesh-1 ; i++) - { - dx1 = rad[i] - rad[i-1]; - dx2 = rad[i+1] - rad[i-1]; - dy1 = rad_f[i+1] - rad_f[i]; - dy2 = rad_f[i] - rad_f[i-1]; - - sig = dx1 / dx2; - - p = sig * y2[i-1] + 2.0; - y2[i] = (sig - 1.0) / p; - - u[i] = dy1 / (dx2 - dx1) - dy2 / dx1; - u[i] = (6.0 * u[i] / dx2 - sig * u[i-1]) / p; - } - - if (ypn > ypmax) - { - qn = un = 0.0; - } - else - { - dx1 = rad[mesh-1] - rad[mesh-2]; - dy1 = rad_f[mesh-1] - rad_f[mesh-2]; - - qn = 0.5; - un = 3.0 / dx1 * (ypn - dy1 / dx1); - } - - y2[mesh-1] = (un - qn * u[mesh-2]) / (qn * y2[mesh-2] + 1.0); - - for(int i = mesh-2; i >= 0; i--) - { - y2[i] = y2[i] * y2[i+1] + u[i]; - } - - delete[] u; - - ModuleBase::timer::end("Mathzone_Add1","SplineD2"); + ModuleBase::timer::start ("Mathzone_Add1", "SplineD2"); + + double dx1 = 0.0, dx2 = 0.0, dy1 = 0.0, dy2 = 0.0, p = 0.0, qn = 0.0, sig = 0.0, un = 0.0; + double* u = nullptr; + + u = new double[mesh - 1]; + const double ypmax = 99999.00; + + if (yp1 > ypmax) + { + y2[0] = 0.0; + u[0] = 0.0; + } + else + { + y2[0] = -0.5; + + dx1 = rad[1] - rad[0]; + dy1 = rad_f[1] - rad_f[0]; + + u[0] = (3.0 / dx1) * (dy1 / dx1 - yp1); + } + + for (int i = 1; i < mesh - 1; i++) + { + dx1 = rad[i] - rad[i - 1]; + dx2 = rad[i + 1] - rad[i - 1]; + dy1 = rad_f[i + 1] - rad_f[i]; + dy2 = rad_f[i] - rad_f[i - 1]; + + sig = dx1 / dx2; + + p = sig * y2[i - 1] + 2.0; + y2[i] = (sig - 1.0) / p; + + u[i] = dy1 / (dx2 - dx1) - dy2 / dx1; + u[i] = (6.0 * u[i] / dx2 - sig * u[i - 1]) / p; + } + + if (ypn > ypmax) + { + qn = un = 0.0; + } + else + { + dx1 = rad[mesh - 1] - rad[mesh - 2]; + dy1 = rad_f[mesh - 1] - rad_f[mesh - 2]; + + qn = 0.5; + un = 3.0 / dx1 * (ypn - dy1 / dx1); + } + + y2[mesh - 1] = (un - qn * u[mesh - 2]) / (qn * y2[mesh - 2] + 1.0); + + for (int i = mesh - 2; i >= 0; i--) + { + y2[i] = y2[i] * y2[i + 1] + u[i]; + } + + delete[] u; + + ModuleBase::timer::end ("Mathzone_Add1", "SplineD2"); } -// Peize Lin add openmp 2019-12-13 -void Mathzone_Add1::Cubic_Spline_Interpolation -( - const double * const rad, - const double * const rad_f, - const double * const y2, - const int& mesh, - const double * const r, - const int& rsize, - double * const y, - double * const dy -) -{ - ModuleBase::timer::start("Mathzone","cubic_spline"); +// Peize Lin add openmp 2019-12-13 +void + Mathzone_Add1::Cubic_Spline_Interpolation (const double* const rad, + const double* const rad_f, + const double* const y2, + const int& mesh, + const double* const r, + const int& rsize, + double* const y, + double* const dy) +{ + ModuleBase::timer::start ("Mathzone", "cubic_spline"); #ifdef _OPENMP - #pragma omp parallel for schedule(static) +#pragma omp parallel for schedule(static) #endif - for(int m = 0; m < rsize ; m++) - { - int klo = 0; - int khi = mesh-1; - while (khi - klo > 1) - { - const int k = (khi + klo) / 2 ; - if(rad[k] > r[m]) khi = k; - else klo = k; - } - - const double h = rad[khi] - rad[klo]; - if(h == 0.0) - { - std::cout << "Cubic_Spline_Interpolation, h == 0.0 so that cannot be divided" << std::endl; - //ModuleBase::WARNING_QUIT("Cubic_Spline_Interpolation","h == 0.0 so that cannot be divided"); - exit(0); - } - - const double a = (rad[khi] - r[m]) / h; - const double b = (r[m] - rad[klo]) / h; - - const double dy_tmp = (rad_f[khi] - rad_f[klo]) / h - - (3.0 * a * a - 1.0) / 6.0 * h * y2[klo] + - ( 3.0 * b * b - 1.0) / 6.0 * h * y2[khi]; - dy[m] = dy_tmp; - const double y_tmp = a * rad_f[klo] + b * rad_f[khi] + ((a*a*a - a) * y2[klo] + (b*b*b - b) * y2[khi]) * (h*h) / 6.0; - y[m] = y_tmp; - //const double ddy_tmp = a * y2[klo] + b * y2 [khi]; - //ddy[m] = ddy_tmp; - } - - ModuleBase::timer::end("Mathzone","cubic_spline"); + for (int m = 0; m < rsize; m++) + { + int klo = 0; + int khi = mesh - 1; + while (khi - klo > 1) + { + const int k = (khi + klo) / 2; + if (rad[k] > r[m]) + khi = k; + else + klo = k; + } + + const double h = rad[khi] - rad[klo]; + if (h == 0.0) + { + std::cout << "Cubic_Spline_Interpolation, h == 0.0 so that cannot be divided" << std::endl; + // ModuleBase::WARNING_QUIT("Cubic_Spline_Interpolation","h == 0.0 so that cannot be divided"); + exit (0); + } + + const double a = (rad[khi] - r[m]) / h; + const double b = (r[m] - rad[klo]) / h; + + const double dy_tmp = (rad_f[khi] - rad_f[klo]) / h - (3.0 * a * a - 1.0) / 6.0 * h * y2[klo] + + (3.0 * b * b - 1.0) / 6.0 * h * y2[khi]; + dy[m] = dy_tmp; + const double y_tmp = a * rad_f[klo] + b * rad_f[khi] + + ((a * a * a - a) * y2[klo] + (b * b * b - b) * y2[khi]) * (h * h) / 6.0; + y[m] = y_tmp; + // const double ddy_tmp = a * y2[klo] + b * y2 [khi]; + // ddy[m] = ddy_tmp; + } + + ModuleBase::timer::end ("Mathzone", "cubic_spline"); } /// Interpolation for Numerical Orbitals -double Mathzone_Add1::Uni_RadialF -( - const double* old_phi, - const int& msh, - const double& dr, - const double& newr -) +double + Mathzone_Add1::Uni_RadialF (const double* old_phi, const int& msh, const double& dr, const double& newr) { - double h1 = 0.0, h2 = 0.0, h3 = 0.0, f1 = 0.0, f2 = 0.0, f3 = 0.0, f4 = 0.0; - double g1 = 0.0, g2 = 0.0, x1 = 0.0, x2 = 0.0, y1 = 0.0, y2 = 0.0, f = 0.0; - double result = 0.0; - double rmax = (msh-1) * dr; - - if (newr < 0.0) - { - throw std::runtime_error("newr should >= 0. "+std::string(__FILE__)+" line "+std::to_string(__LINE__)); - - } - else if ( rmax <= newr ) - { - f = 0.0; - } - else - { - // the old position. - const int m = static_cast (newr / dr) + 1; - - // Spline like interpolation - if (m == 1) - { - h2 = dr; - h3 = dr; - - f2 = old_phi[m-1]; - f3 = old_phi[m]; - f4 = old_phi[m+1]; - - h1 = -(h2+h3); - f1 = f4; - } - else if (m == (msh-1)) - { - h1 = dr; - h2 = dr; - - f1 = old_phi[m-2]; - f2 = old_phi[m-1]; - f3 = old_phi[m]; - - h3 = -(h1+h2); - f4 = f1; - } - else - { - h1 = dr; - h2 = dr; - h3 = dr; - - f1 = old_phi[m-2]; - f2 = old_phi[m-1]; - f3 = old_phi[m]; - f4 = old_phi[m+1]; - } - - // Calculate the value at newr - - g1 = ((f3-f2)*h1/h2 + (f2-f1)*h2/h1)/(h1+h2); - g2 = ((f4-f3)*h2/h3 + (f3-f2)*h3/h2)/(h2+h3); - - x1 = newr - (m-1)*dr; - x2 = newr - m*dr; - y1 = x1/h2; - y2 = x2/h2; - - f = y2*y2*(3.0*f2 + h2*g1 + (2.0*f2 + h2*g1)*y2) - + y1*y1*(3.0*f3 - h2*g2 - (2.0*f3 - h2*g2)*y1); - } - result = f; - - return result; + double h1 = 0.0, h2 = 0.0, h3 = 0.0, f1 = 0.0, f2 = 0.0, f3 = 0.0, f4 = 0.0; + double g1 = 0.0, g2 = 0.0, x1 = 0.0, x2 = 0.0, y1 = 0.0, y2 = 0.0, f = 0.0; + double result = 0.0; + double rmax = (msh - 1) * dr; + + if (newr < 0.0) + { + throw std::runtime_error ("newr should >= 0. " + std::string (__FILE__) + " line " + + std::to_string (__LINE__)); + } + else if (rmax <= newr) + { + f = 0.0; + } + else + { + // the old position. + const int m = static_cast (newr / dr) + 1; + + // Spline like interpolation + if (m == 1) + { + h2 = dr; + h3 = dr; + + f2 = old_phi[m - 1]; + f3 = old_phi[m]; + f4 = old_phi[m + 1]; + + h1 = -(h2 + h3); + f1 = f4; + } + else if (m == (msh - 1)) + { + h1 = dr; + h2 = dr; + + f1 = old_phi[m - 2]; + f2 = old_phi[m - 1]; + f3 = old_phi[m]; + + h3 = -(h1 + h2); + f4 = f1; + } + else + { + h1 = dr; + h2 = dr; + h3 = dr; + + f1 = old_phi[m - 2]; + f2 = old_phi[m - 1]; + f3 = old_phi[m]; + f4 = old_phi[m + 1]; + } + + // Calculate the value at newr + + g1 = ((f3 - f2) * h1 / h2 + (f2 - f1) * h2 / h1) / (h1 + h2); + g2 = ((f4 - f3) * h2 / h3 + (f3 - f2) * h3 / h2) / (h2 + h3); + + x1 = newr - (m - 1) * dr; + x2 = newr - m * dr; + y1 = x1 / h2; + y2 = x2 / h2; + + f = y2 * y2 * (3.0 * f2 + h2 * g1 + (2.0 * f2 + h2 * g1) * y2) + + y1 * y1 * (3.0 * f3 - h2 * g2 - (2.0 * f3 - h2 * g2) * y1); + } + result = f; + + return result; } -void Mathzone_Add1::Uni_Deriv_Phi -( - const double *radf, - const int &mesh, - const double &dr, - const int &nd, - double *phind -) +void + Mathzone_Add1::Uni_Deriv_Phi (const double* radf, const int& mesh, const double& dr, const int& nd, double* phind) { - ModuleBase::timer::start("Mathzone_Add1", "Uni_Deriv_Phi"); - int FFT_NR = 2*mesh-1; // FFT_NR = 16019 - // std::cout << "\n mesh=" << mesh << ", radf[8010]=" << radf[8010] << ", radf[8009]=" << radf[8009] ; - // mesh=8010, radf[8010]=4.396478951532926e-01, radf[8009]=0.000000000000000e+00 - - fftw_complex *fft_phir = new fftw_complex[FFT_NR]; - fftw_complex *fft_phik = new fftw_complex[FFT_NR]; - fftw_complex *fft_ndphik = new fftw_complex[FFT_NR]; - fftw_complex *fft_ndphir = new fftw_complex[FFT_NR]; - fftw_plan p1; - fftw_plan p2; - - ////CAREFUL: POINT 0 is OF GOOD IMPORTANCE - //for (int ir = 0; ir < FFT_NR/2; ++ir) - //{ - //fft_phir[ir].re = radf[ir]; - //fft_phir[ir].im = 0.0; - //} - - //for (int ir = FFT_NR/2; ir < FFT_NR; ++ir) - //{ - //int jr = FFT_NR - ir; - //fft_phir[ir].re = radf[jr]; - //fft_phir[ir].im = 0.0; - //} - // - // second call: different value at [8010]; FFT_NR = 16019, FFT_NR/2=8009 :: - // CAREFUL: POINT 0 is OF GOOD IMPORTANCE - for (int ir = 0; ir < FFT_NR/2; ++ir) // ik = 0 1 ... 8008 - { - c_re(fft_phir[ir]) = radf[ir]; - c_im(fft_phir[ir]) = 0.0; - } - - for (int ir = FFT_NR/2; ir < FFT_NR; ++ir) // ir = 8009 8010 ... 16018 - { - //int jr = FFT_NR - ir ; // jr = 8010 8009 ... 1 - int jr = FFT_NR - ir -1 ; // -> 8009 8008 ... 0 - c_re(fft_phir[ir]) = radf[jr]; - c_im(fft_phir[ir]) = 0.0; - } - - // FFTW + ModuleBase::timer::start ("Mathzone_Add1", "Uni_Deriv_Phi"); + int FFT_NR = 2 * mesh - 1; // FFT_NR = 16019 + // std::cout << "\n mesh=" << mesh << ", radf[8010]=" << radf[8010] << ", radf[8009]=" << radf[8009] ; + // mesh=8010, radf[8010]=4.396478951532926e-01, radf[8009]=0.000000000000000e+00 + + fftw_complex* fft_phir = new fftw_complex[FFT_NR]; + fftw_complex* fft_phik = new fftw_complex[FFT_NR]; + fftw_complex* fft_ndphik = new fftw_complex[FFT_NR]; + fftw_complex* fft_ndphir = new fftw_complex[FFT_NR]; + fftw_plan p1; + fftw_plan p2; + + ////CAREFUL: POINT 0 is OF GOOD IMPORTANCE + // for (int ir = 0; ir < FFT_NR/2; ++ir) + //{ + // fft_phir[ir].re = radf[ir]; + // fft_phir[ir].im = 0.0; + //} + + // for (int ir = FFT_NR/2; ir < FFT_NR; ++ir) + //{ + // int jr = FFT_NR - ir; + // fft_phir[ir].re = radf[jr]; + // fft_phir[ir].im = 0.0; + //} + // + // second call: different value at [8010]; FFT_NR = 16019, FFT_NR/2=8009 :: + // CAREFUL: POINT 0 is OF GOOD IMPORTANCE + for (int ir = 0; ir < FFT_NR / 2; ++ir) // ik = 0 1 ... 8008 + { + c_re (fft_phir[ir]) = radf[ir]; + c_im (fft_phir[ir]) = 0.0; + } + + for (int ir = FFT_NR / 2; ir < FFT_NR; ++ir) // ir = 8009 8010 ... 16018 + { + // int jr = FFT_NR - ir ; // jr = 8010 8009 ... 1 + int jr = FFT_NR - ir - 1; // -> 8009 8008 ... 0 + c_re (fft_phir[ir]) = radf[jr]; + c_im (fft_phir[ir]) = 0.0; + } + + // FFTW #if defined __FFTW3 - //std::cout << "\n Call FFTW3 "; - p1 = fftw_plan_dft_1d(FFT_NR, fft_phir, fft_phik, FFTW_FORWARD, FFTW_ESTIMATE); - fftw_execute(p1); - //fftw_destroy_plan(p1); + // std::cout << "\n Call FFTW3 "; + p1 = fftw_plan_dft_1d (FFT_NR, fft_phir, fft_phik, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_execute (p1); + // fftw_destroy_plan(p1); #elif defined __FFTW2 - //std::cout << "\n Call FFTW2 "; - p1 = fftw_create_plan(FFT_NR, FFTW_FORWARD, FFTW_ESTIMATE); - fftw_one(p1, fft_phir, fft_phik); - //fftw_destroy_plan(p1); + // std::cout << "\n Call FFTW2 "; + p1 = fftw_create_plan (FFT_NR, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_one (p1, fft_phir, fft_phik); + // fftw_destroy_plan(p1); #endif - - double dk_uniform = ModuleBase::TWO_PI / FFT_NR / dr; - - //for (int ik = 0; ik < FFT_NR/2; ik++) - //{ - //double kp = ik * dk_uniform; - //fft_ndphik[ik].re = pow(kp, nd) * fft_phik[ik].re; - //fft_ndphik[ik].im = 0.0; - //} - - //for (int ik = FFT_NR/2; ik < FFT_NR; ik++) - //{ - //double kp = -(FFT_NR - ik)* dk_uniform; - //fft_ndphik[ik].re = pow(kp, nd) * fft_phik[ik].re; - //fft_ndphik[ik].im = 0.0; - //} - - for (int ik = 0; ik < FFT_NR/2; ik++) // ik = 0 1 ... 8008 - { - double kp = ik * dk_uniform; - c_re(fft_ndphik[ik]) = pow(kp, nd) * c_re(fft_phik[ik]); - c_im(fft_ndphik[ik]) = 0.0; - } - for (int ik = FFT_NR/2; ik < FFT_NR; ik++) // ik = 8009 8010 ... 16018 - { - //double kp = -(FFT_NR - ik )* dk_uniform; //(...) = 8010 8009 ... 1 - double kp = -(FFT_NR - ik -1)* dk_uniform; //(...) -> 8009 8008 ... 0 - c_re(fft_ndphik[ik]) = pow(kp, nd) * c_re(fft_phik[ik]); - c_im(fft_ndphik[ik]) = 0.0; - } + double dk_uniform = ModuleBase::TWO_PI / FFT_NR / dr; + + // for (int ik = 0; ik < FFT_NR/2; ik++) + //{ + // double kp = ik * dk_uniform; + // fft_ndphik[ik].re = pow(kp, nd) * fft_phik[ik].re; + // fft_ndphik[ik].im = 0.0; + //} + + // for (int ik = FFT_NR/2; ik < FFT_NR; ik++) + //{ + // double kp = -(FFT_NR - ik)* dk_uniform; + // fft_ndphik[ik].re = pow(kp, nd) * fft_phik[ik].re; + // fft_ndphik[ik].im = 0.0; + //} + + for (int ik = 0; ik < FFT_NR / 2; ik++) // ik = 0 1 ... 8008 + { + double kp = ik * dk_uniform; + c_re (fft_ndphik[ik]) = pow (kp, nd) * c_re (fft_phik[ik]); + c_im (fft_ndphik[ik]) = 0.0; + } + for (int ik = FFT_NR / 2; ik < FFT_NR; ik++) // ik = 8009 8010 ... 16018 + { + // double kp = -(FFT_NR - ik )* dk_uniform; //(...) = 8010 8009 ... 1 + double kp = -(FFT_NR - ik - 1) * dk_uniform; //(...) -> 8009 8008 ... 0 + c_re (fft_ndphik[ik]) = pow (kp, nd) * c_re (fft_phik[ik]); + c_im (fft_ndphik[ik]) = 0.0; + } #if defined __FFTW3 - p2 = fftw_plan_dft_1d(FFT_NR, fft_ndphik, fft_ndphir, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(p2); - //fftw_destroy_plan(p2); + p2 = fftw_plan_dft_1d (FFT_NR, fft_ndphik, fft_ndphir, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (p2); + // fftw_destroy_plan(p2); #elif defined __FFTW2 - p2 = fftw_create_plan(FFT_NR, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_one(p2, fft_ndphik, fft_ndphir); - //fftw_destroy_plan(p2); + p2 = fftw_create_plan (FFT_NR, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_one (p2, fft_ndphik, fft_ndphir); + // fftw_destroy_plan(p2); #endif - bool is_re = true; - double fac = 0.0; - if (nd % 4 == 0) - { - is_re = true; - fac = 1.0; - } - else if (nd % 4 == 1) - { - is_re = false; - fac = -1.0; - } - else if (nd % 4 == 2) - { - is_re = true; - fac = -1.0; - } - else - { - is_re = false; - fac = 1.0; - } - - for (int ir = 0; ir < mesh; ir++) - { - if (is_re) - { - phind[ir] = fac * c_re(fft_ndphir[ir]) / FFT_NR; - } - else - { - phind[ir] = fac * c_im(fft_ndphir[ir]) / FFT_NR; - } - } - - fftw_destroy_plan (p1); - fftw_destroy_plan (p2); - - delete [] fft_phir; - delete [] fft_phik; - delete [] fft_ndphik; - delete [] fft_ndphir; - - ModuleBase::timer::end("Mathzone_Add1", "Uni_Deriv_Phi"); + bool is_re = true; + double fac = 0.0; + if (nd % 4 == 0) + { + is_re = true; + fac = 1.0; + } + else if (nd % 4 == 1) + { + is_re = false; + fac = -1.0; + } + else if (nd % 4 == 2) + { + is_re = true; + fac = -1.0; + } + else + { + is_re = false; + fac = 1.0; + } + + for (int ir = 0; ir < mesh; ir++) + { + if (is_re) + { + phind[ir] = fac * c_re (fft_ndphir[ir]) / FFT_NR; + } + else + { + phind[ir] = fac * c_im (fft_ndphir[ir]) / FFT_NR; + } + } + + fftw_destroy_plan (p1); + fftw_destroy_plan (p2); + + delete[] fft_phir; + delete[] fft_phik; + delete[] fft_ndphik; + delete[] fft_ndphir; + + ModuleBase::timer::end ("Mathzone_Add1", "Uni_Deriv_Phi"); } -} +} // namespace ModuleBase diff --git a/source/source_base/mathzone_add1.h b/source/source_base/mathzone_add1.h index b0f82344672..0fb33c084f0 100644 --- a/source/source_base/mathzone_add1.h +++ b/source/source_base/mathzone_add1.h @@ -14,11 +14,11 @@ LiaoChen add @ 2010/03/09 to add efficient functions in LCAO calculation class Mathzone_Add1 { public: - Mathzone_Add1(); - ~Mathzone_Add1(); + Mathzone_Add1 (); + ~Mathzone_Add1 (); - static double dualfac(const int& l); - static double factorial(const int& l); + static double dualfac (const int& l); + static double factorial (const int& l); /** * @brief calculate second derivatives for cubic * spline interpolation @@ -30,12 +30,12 @@ class Mathzone_Add1 * @param[in] ypn f'(n) boundary condition * @param[out] y2 f''(x) */ - static void SplineD2(const double* rad, - const double* rad_f, - const int& mesh, - const double& yp1, - const double& ypn, - double* y2); + static void SplineD2 (const double* rad, + const double* rad_f, + const int& mesh, + const double& yp1, + const double& ypn, + double* y2); /** * @brief cubic spline interpolation @@ -50,14 +50,14 @@ class Mathzone_Add1 * @param[out] dy f'(x) after interpolation * @param[out] ddy f''(x) after interpolation */ - static void Cubic_Spline_Interpolation(const double* const rad, - const double* const rad_f, - const double* const y2, - const int& mesh, - const double* const r, - const int& rsize, - double* const y, - double* const dy); + static void Cubic_Spline_Interpolation (const double* const rad, + const double* const rad_f, + const double* const y2, + const int& mesh, + const double* const r, + const int& rsize, + double* const y, + double* const dy); /** * @brief "spline like interpolation" of a uniform @@ -69,9 +69,9 @@ class Mathzone_Add1 * @param R f(R) is to be calculated * @return double f(R) */ - static double Uni_RadialF(const double* rad_f, const int& msh, const double& dr, const double& R); + static double Uni_RadialF (const double* rad_f, const int& msh, const double& dr, const double& R); - static void Uni_Deriv_Phi(const double* radf, const int& mesh, const double& dr, const int& nd, double* phind); + static void Uni_Deriv_Phi (const double* radf, const int& mesh, const double& dr, const int& nd, double* phind); private: const static int sph_lmax = 20; diff --git a/source/source_base/matrix-inl.h b/source/source_base/matrix-inl.h index 6fab28ee547..12623a3e7d9 100644 --- a/source/source_base/matrix-inl.h +++ b/source/source_base/matrix-inl.h @@ -9,73 +9,86 @@ namespace ModuleBase { -inline matrix::matrix( const int nrows, const int ncols, const bool flag_zero ) - :nr(nrows), nc(ncols), c(nullptr) +inline matrix::matrix (const int nrows, const int ncols, const bool flag_zero) : nr (nrows), nc (ncols), c (nullptr) { -std::cout<<__FILE__<<__LINE__<zero_out(); - } + std::cout << __FILE__ << __LINE__ << std::endl; + if (nr && nc) + { + // auto handler_old = set_new_handler(matrixAlloc); + c = new double[nr * nc]; + // set_new_handler(handler_old); + if (flag_zero) + this->zero_out (); + } } -inline matrix::matrix( const matrix &m_in ) +inline matrix::matrix (const matrix& m_in) { -std::cout<<__FILE__<<__LINE__<create( m_in.nr, m_in.nc, false ); -std::cout<<__FILE__<<__LINE__<create (m_in.nr, m_in.nc, false); + std::cout << __FILE__ << __LINE__ << std::endl; + memcpy (c, m_in.c, nr * nc * sizeof (double)); + std::cout << __FILE__ << __LINE__ << std::endl; + return *this; } // Peize Lin add 2016-08-05 -inline matrix& matrix::operator=( matrix && m_in ) +inline matrix& + matrix::operator= (matrix&& m_in) { -std::cout<<__FILE__<<__LINE__<=0); assert(ir=0); assert(ic= 0); + assert (ir < nr); + assert (ic >= 0); + assert (ic < nc); + return c[ir * nc + ic]; } -inline const double & matrix::operator()(const int ir,const int ic) const +inline const double& + matrix::operator() (const int ir, const int ic) const { - assert(ir>=0); assert(ir=0); assert(ic= 0); + assert (ir < nr); + assert (ic >= 0); + assert (ic < nc); + return c[ir * nc + ic]; } +} // namespace ModuleBase #endif \ No newline at end of file diff --git a/source/source_base/matrix.cpp b/source/source_base/matrix.cpp index eaa7c304061..06670ac235e 100644 --- a/source/source_base/matrix.cpp +++ b/source/source_base/matrix.cpp @@ -22,85 +22,87 @@ // All constructors call init() // ******************************************************** -//int matrix::mCount = 0; +// int matrix::mCount = 0; namespace ModuleBase { -void matrixAlloc() +void + matrixAlloc () { std::cerr << "Allocation error for Matrix" << std::endl; - std::exit(EXIT_FAILURE); + std::exit (EXIT_FAILURE); } /*create a matrix with nrows*ncols size */ -matrix::matrix( const int nrows, const int ncols, const bool flag_zero ) - :nr(nrows), - nc(ncols), - c(nullptr) -{ - if( nr && nc ) - { - auto handler_old = std::set_new_handler(matrixAlloc); - c = new double[nr*nc]; - std::set_new_handler(handler_old); - if(flag_zero) this->zero_out(); - } +matrix::matrix (const int nrows, const int ncols, const bool flag_zero) : nr (nrows), nc (ncols), c (nullptr) +{ + if (nr && nc) + { + auto handler_old = std::set_new_handler (matrixAlloc); + c = new double[nr * nc]; + std::set_new_handler (handler_old); + if (flag_zero) + { + this->zero_out (); + } + } } -matrix::matrix( const matrix &m_in ) - :nr(m_in.nr), - nc(m_in.nc), - c(nullptr) -{ - if( nr && nc ) - { - auto handler_old = std::set_new_handler(matrixAlloc); - c = new double[nr*nc]; - std::set_new_handler(handler_old); - memcpy( c, m_in.c, nr*nc*sizeof(double) ); - } +matrix::matrix (const matrix& m_in) : nr (m_in.nr), nc (m_in.nc), c (nullptr) +{ + if (nr && nc) + { + auto handler_old = std::set_new_handler (matrixAlloc); + c = new double[nr * nc]; + std::set_new_handler (handler_old); + memcpy (c, m_in.c, nr * nc * sizeof (double)); + } } // Peize Lin add 2016-08-05 -matrix::matrix( matrix && m_in ) - :nr(m_in.nr), - nc(m_in.nc) +matrix::matrix (matrix&& m_in) : nr (m_in.nr), nc (m_in.nc) { - c = m_in.c; - m_in.nr = m_in.nc = 0; - m_in.c = nullptr; + c = m_in.c; + m_in.nr = m_in.nc = 0; + m_in.c = nullptr; } // Peize Lin change 2018-07-02 -matrix& matrix::operator=( const matrix & m_in ) +matrix& + matrix::operator= (const matrix& m_in) { - this->create( m_in.nr, m_in.nc, false ); - memcpy( c, m_in.c, nr*nc*sizeof(double) ); - return *this; + this->create (m_in.nr, m_in.nc, false); + memcpy (c, m_in.c, nr * nc * sizeof (double)); + return *this; } // Peize Lin add 2016-08-05 -matrix& matrix::operator=( matrix && m_in ) -{ - nr = m_in.nr; nc = m_in.nc; - if(c) delete[] c; - c = m_in.c; - m_in.nr = m_in.nc = 0; - m_in.c = nullptr; - return *this; +matrix& + matrix::operator= (matrix&& m_in) +{ + nr = m_in.nr; + nc = m_in.nc; + if (c) + { + delete[] c; + } + c = m_in.c; + m_in.nr = m_in.nc = 0; + m_in.c = nullptr; + return *this; } /* double & matrix::operator()(const int ir,const int ic) { - assert(ir>=0); assert(ir=0); assert(ic=0); assert(ir=0); assert(ic=0); assert(ir=0); assert(ic=0); assert(ir=0); assert(ic zero_out(); + if (flag_zero) + { + this->zero_out (); + } } -double trace_on(const matrix &A, const matrix &B) +double + trace_on (const matrix& A, const matrix& B) { - assert(A.nr == B.nc); - assert(A.nc == B.nr); + assert (A.nr == B.nc); + assert (A.nc == B.nr); double tr = 0.0; for (int i = 0; i < A.nr; ++i) - for (int k = 0; k < A.nc; ++k) - tr += A(i,k) * B(k, i); + { + for (int k = 0; k < A.nc; ++k) + { + tr += A (i, k) * B (k, i); + } + } return tr; } -double mdot(const matrix &A, const matrix &B) +double + mdot (const matrix& A, const matrix& B) { assert (A.nr == B.nr); assert (A.nc == B.nc); @@ -368,7 +404,9 @@ double mdot(const matrix &A, const matrix &B) double sum = 0.0; for (int i = 0; i < size; ++i) - sum += A.c[i] * B.c[i]; + { + sum += A.c[i] * B.c[i]; + } return sum; } @@ -376,86 +414,97 @@ double mdot(const matrix &A, const matrix &B) // Peize Lin add 2016-09-08 std::ostream & operator<<( std::ostream & os, const matrix & m ) { - for( int ir=0; ir!=m.nr; ++ir ) - { - for( int ic=0; ic!=m.nc; ++ic ) - { - if(std::abs(m(ir,ic))>1E-10) - os<1E-10) + os<nr; ++ir ) - { - for( int ic=0; ic!=this->nc; ++ic ) - { - if(std::abs((*this)(ir,ic))>threshold) - os<<(*this)(ir,ic)<<"\t"; - else - os<<0<<"\t"; - } - os<nr; ++ir) + { + for (int ic = 0; ic != this->nc; ++ic) + { + if (std::abs ((*this) (ir, ic)) > threshold) + { + os << (*this) (ir, ic) << "\t"; + } + else + { + os << 0 << "\t"; + } + } + os << std::endl; + } + return os; } // Peize Lin add 2016-09-08 -double matrix::max() const +double + matrix::max () const { - double value = std::numeric_limits::min(); - const int size = nr * nc; - for( int i=0; i::min (); + const int size = nr * nc; + for (int i = 0; i < size; ++i) + { + value = std::max (value, c[i]); + } + return value; } // Peize Lin add 2016-09-08 -double matrix::min() const -{ - double value = std::numeric_limits::max(); - const int size = nr * nc; - for( int i=0; i::max (); + const int size = nr * nc; + for (int i = 0; i < size; ++i) + { + value = std::min (value, c[i]); + } + return value; } // Peize Lin add 2018-07-02 -double matrix::absmax() const -{ - double value = 0; - const int size = nr * nc; - for( int i=0; i -#include +#include #include // test @@ -16,85 +16,91 @@ namespace ModuleBase class matrix { - /* data */ -public: + /* data */ + public: + int nr = 0; + int nc = 0; /* Number of rows and columns */ + double* c = nullptr; /* Holds the data */ - int nr=0; - int nc=0; /* Number of rows and columns */ - double *c=nullptr; /* Holds the data */ + /* Constructors and destructor */ + matrix () : nr (0), nc (0), c (nullptr) {} + matrix (const int nrows, const int ncols, const bool flag_zero = true); // Peize Lin add flag_zero 2018-07-02 + matrix (const matrix& m1); /* copy constructor */ + matrix (matrix&& m1); // Peize Lin add 2016-08-05 + ~matrix (); - /* Constructors and destructor */ - matrix(): nr(0), nc(0), c(nullptr){} - matrix( const int nrows, const int ncols, const bool flag_zero=true ); // Peize Lin add flag_zero 2018-07-02 - matrix( const matrix &m1 ); /* copy constructor */ - matrix( matrix && m1 ); // Peize Lin add 2016-08-05 - ~matrix(); + void create (const int nrow, const int ncol, const bool flag_zero = true); // Peize Lin add flag_zero 2018-07-02 + matrix& operator= (const matrix& m1); // Peize Lin change 2018-03-12 + matrix& operator= (matrix&& m1); // Peize Lin add 2016-08-05 - void create( const int nrow, const int ncol, const bool flag_zero=true ); // Peize Lin add flag_zero 2018-07-02 - matrix& operator=(const matrix &m1); // Peize Lin change 2018-03-12 - matrix& operator=( matrix && m1 ); // Peize Lin add 2016-08-05 + double& + operator() (const int ir, const int ic) + { + assert (ir >= 0); + assert (ir < nr); + assert (ic >= 0); + assert (ic < nc); + return c[ir * nc + ic]; + } - double &operator()(const int ir,const int ic) - { - assert(ir>=0); assert(ir=0); assert(ic= 0); + assert (ir < nr); + assert (ic >= 0); + assert (ic < nc); + return c[ir * nc + ic]; + } - const double &operator()(const int ir,const int ic) const - { - assert(ir>=0); assert(ir=0); assert(icDet(); + double d = this->Det (); - if(d == 0) - { - d = 1; - } + if (d == 0) + { + d = 1; + } - return Matrix3((e22*e33 - e23*e32) / d, - -(e12*e33 - e13*e32) / d, - (e12*e23 - e13*e22) / d, - -(e21*e33 - e23*e31) / d, - (e11*e33 - e13*e31) / d, - -(e11*e23 - e13*e21) / d, - (e21*e32 - e22*e31) / d, - -(e11*e32 - e12*e31) / d, - (e11*e22 - e12*e21) / d); + return Matrix3 ((e22 * e33 - e23 * e32) / d, + -(e12 * e33 - e13 * e32) / d, + (e12 * e23 - e13 * e22) / d, + -(e21 * e33 - e23 * e31) / d, + (e11 * e33 - e13 * e31) / d, + -(e11 * e23 - e13 * e21) / d, + (e21 * e32 - e22 * e31) / d, + -(e11 * e32 - e12 * e31) / d, + (e11 * e22 - e12 * e21) / d); } -Matrix3& Matrix3::operator = (const Matrix3 &m) +Matrix3& + Matrix3::operator= (const Matrix3& m) { - e11 = m.e11; e12 = m.e12; e13 = m.e13; - e21 = m.e21; e22 = m.e22; e23 = m.e23; - e31 = m.e31; e32 = m.e32; e33 = m.e33; + e11 = m.e11; + e12 = m.e12; + e13 = m.e13; + e21 = m.e21; + e22 = m.e22; + e23 = m.e23; + e31 = m.e31; + e32 = m.e32; + e33 = m.e33; return *this; } -Matrix3& Matrix3::operator +=(const Matrix3 &m) +Matrix3& + Matrix3::operator+= (const Matrix3& m) { - e11 += m.e11; e12 += m.e12; e13 += m.e13; - e21 += m.e21; e22 += m.e22; e23 += m.e23; - e31 += m.e31; e32 += m.e32; e33 += m.e33; + e11 += m.e11; + e12 += m.e12; + e13 += m.e13; + e21 += m.e21; + e22 += m.e22; + e23 += m.e23; + e31 += m.e31; + e32 += m.e32; + e33 += m.e33; return *this; } -Matrix3& Matrix3::operator -=(const Matrix3 &m) +Matrix3& + Matrix3::operator-= (const Matrix3& m) { - e11 -= m.e11; e12 -= m.e12; e13 -= m.e13; - e21 -= m.e21; e22 -= m.e22; e23 -= m.e23; - e31 -= m.e31; e32 -= m.e32; e33 -= m.e33; + e11 -= m.e11; + e12 -= m.e12; + e13 -= m.e13; + e21 -= m.e21; + e22 -= m.e22; + e23 -= m.e23; + e31 -= m.e31; + e32 -= m.e32; + e33 -= m.e33; return *this; } -Matrix3& Matrix3::operator *=(const double &s) +Matrix3& + Matrix3::operator*= (const double& s) { - e11 *= s; e12 *= s; e13 *= s; - e21 *= s; e22 *= s; e23 *= s; - e31 *= s; e32 *= s; e33 *= s; + e11 *= s; + e12 *= s; + e13 *= s; + e21 *= s; + e22 *= s; + e23 *= s; + e31 *= s; + e32 *= s; + e33 *= s; return *this; } -Matrix3& Matrix3::operator /=(const double &s) +Matrix3& + Matrix3::operator/= (const double& s) { - e11 /= s; e12 /= s; e13 /= s; - e21 /= s; e22 /= s; e23 /= s; - e31 /= s; e32 /= s; e33 /= s; + e11 /= s; + e12 /= s; + e13 /= s; + e21 /= s; + e22 /= s; + e23 /= s; + e31 /= s; + e32 /= s; + e33 /= s; return *this; } -//m1+m2 -Matrix3 operator +(const Matrix3 &m1, const Matrix3 &m2) +// m1+m2 +Matrix3 + operator+ (const Matrix3& m1, const Matrix3& m2) { - return Matrix3(m1.e11 + m2.e11, - m1.e12 + m2.e12, - m1.e13 + m2.e13, - m1.e21 + m2.e21, - m1.e22 + m2.e22, - m1.e23 + m2.e23, - m1.e31 + m2.e31, - m1.e32 + m2.e32, - m1.e33 + m2.e33); + return Matrix3 (m1.e11 + m2.e11, + m1.e12 + m2.e12, + m1.e13 + m2.e13, + m1.e21 + m2.e21, + m1.e22 + m2.e22, + m1.e23 + m2.e23, + m1.e31 + m2.e31, + m1.e32 + m2.e32, + m1.e33 + m2.e33); } -//m1-m2 -Matrix3 operator -(const Matrix3 &m1, const Matrix3 &m2) +// m1-m2 +Matrix3 + operator- (const Matrix3& m1, const Matrix3& m2) { - return Matrix3(m1.e11 - m2.e11, // Zujian Dai fix bug 2019-01-21 - m1.e12 - m2.e12, - m1.e13 - m2.e13, - m1.e21 - m2.e21, - m1.e22 - m2.e22, - m1.e23 - m2.e23, - m1.e31 - m2.e31, - m1.e32 - m2.e32, - m1.e33 - m2.e33); + return Matrix3 (m1.e11 - m2.e11, // Zujian Dai fix bug 2019-01-21 + m1.e12 - m2.e12, + m1.e13 - m2.e13, + m1.e21 - m2.e21, + m1.e22 - m2.e22, + m1.e23 - m2.e23, + m1.e31 - m2.e31, + m1.e32 - m2.e32, + m1.e33 - m2.e33); } -//m/s -Matrix3 operator /(const Matrix3 &m, const double &s) +// m/s +Matrix3 + operator/ (const Matrix3& m, const double& s) { - return Matrix3(m.e11 / s, m.e12 / s, m.e13 / s, - m.e21 / s, m.e22 / s, m.e23 / s, - m.e31 / s, m.e32 / s, m.e33 / s); + return Matrix3 (m.e11 / s, m.e12 / s, m.e13 / s, m.e21 / s, m.e22 / s, m.e23 / s, m.e31 / s, m.e32 / s, m.e33 / s); } -//m1*m2 -Matrix3 operator *(const Matrix3 &m1, const Matrix3 &m2) +// m1*m2 +Matrix3 + operator* (const Matrix3& m1, const Matrix3& m2) { - return Matrix3(m1.e11*m2.e11 + m1.e12*m2.e21 + m1.e13*m2.e31, - m1.e11*m2.e12 + m1.e12*m2.e22 + m1.e13*m2.e32, - m1.e11*m2.e13 + m1.e12*m2.e23 + m1.e13*m2.e33, - m1.e21*m2.e11 + m1.e22*m2.e21 + m1.e23*m2.e31, - m1.e21*m2.e12 + m1.e22*m2.e22 + m1.e23*m2.e32, - m1.e21*m2.e13 + m1.e22*m2.e23 + m1.e23*m2.e33, - m1.e31*m2.e11 + m1.e32*m2.e21 + m1.e33*m2.e31, - m1.e31*m2.e12 + m1.e32*m2.e22 + m1.e33*m2.e32, - m1.e31*m2.e13 + m1.e32*m2.e23 + m1.e33*m2.e33); + return Matrix3 (m1.e11 * m2.e11 + m1.e12 * m2.e21 + m1.e13 * m2.e31, + m1.e11 * m2.e12 + m1.e12 * m2.e22 + m1.e13 * m2.e32, + m1.e11 * m2.e13 + m1.e12 * m2.e23 + m1.e13 * m2.e33, + m1.e21 * m2.e11 + m1.e22 * m2.e21 + m1.e23 * m2.e31, + m1.e21 * m2.e12 + m1.e22 * m2.e22 + m1.e23 * m2.e32, + m1.e21 * m2.e13 + m1.e22 * m2.e23 + m1.e23 * m2.e33, + m1.e31 * m2.e11 + m1.e32 * m2.e21 + m1.e33 * m2.e31, + m1.e31 * m2.e12 + m1.e32 * m2.e22 + m1.e33 * m2.e32, + m1.e31 * m2.e13 + m1.e32 * m2.e23 + m1.e33 * m2.e33); } -//m*s -Matrix3 operator *(const Matrix3 &m,const double &s) +// m*s +Matrix3 + operator* (const Matrix3& m, const double& s) { - return Matrix3(m.e11*s, m.e12*s, m.e13*s, - m.e21*s, m.e22*s, m.e23*s, - m.e31*s, m.e32*s, m.e33*s); + return Matrix3 (m.e11 * s, m.e12 * s, m.e13 * s, m.e21 * s, m.e22 * s, m.e23 * s, m.e31 * s, m.e32 * s, m.e33 * s); } -//s*m -Matrix3 operator *(const double &s, const Matrix3 &m) +// s*m +Matrix3 + operator* (const double& s, const Matrix3& m) { - return Matrix3(m.e11*s, m.e12*s, m.e13*s, - m.e21*s, m.e22*s, m.e23*s, - m.e31*s, m.e32*s, m.e33*s); + return Matrix3 (m.e11 * s, m.e12 * s, m.e13 * s, m.e21 * s, m.e22 * s, m.e23 * s, m.e31 * s, m.e32 * s, m.e33 * s); } // whether m1==m2 -bool operator==(const Matrix3 &m1, const Matrix3 &m2) -{ - if(m1.e11 == m2.e11 && - m1.e12 == m2.e12 && - m1.e13 == m2.e13 && - m1.e21 == m2.e21 && - m1.e22 == m2.e22 && - m1.e23 == m2.e23 && - m1.e31 == m2.e31 && - m1.e32 == m2.e32 && - m1.e33 == m2.e33) - { - return true; - } +bool + operator== (const Matrix3& m1, const Matrix3& m2) +{ + if (m1.e11 == m2.e11 && m1.e12 == m2.e12 && m1.e13 == m2.e13 && m1.e21 == m2.e21 && m1.e22 == m2.e22 + && m1.e23 == m2.e23 && m1.e31 == m2.e31 && m1.e32 == m2.e32 && m1.e33 == m2.e33) + { + return true; + } return false; } -//whether m1 != m2 -bool operator!=(const Matrix3 &m1, const Matrix3 &m2) +// whether m1 != m2 +bool + operator!= (const Matrix3& m1, const Matrix3& m2) { return !(m1 == m2); //!= defined in terms of operator == } - -void Matrix3::print(void) const +void + Matrix3::print () const { - std::cout << std::setw(15) << e11 << std::setw(15) << e12 << std::setw(15) << e13 << std::endl; - std::cout << std::setw(15) << e21 << std::setw(15) << e22 << std::setw(15) << e23 << std::endl; - std::cout << std::setw(15) << e31 << std::setw(15) << e32 << std::setw(15) << e33 << std::endl; + std::cout << std::setw (15) << e11 << std::setw (15) << e12 << std::setw (15) << e13 << std::endl; + std::cout << std::setw (15) << e21 << std::setw (15) << e22 << std::setw (15) << e23 << std::endl; + std::cout << std::setw (15) << e31 << std::setw (15) << e32 << std::setw (15) << e33 << std::endl; return; } -ModuleBase::matrix Matrix3::to_matrix(void)const // Peize Lin add 2021.03.09 +ModuleBase::matrix + Matrix3::to_matrix () const // Peize Lin add 2021.03.09 { - ModuleBase::matrix m(3,3); - m(0,0)=e11; m(0,1)=e12; m(0,2)=e13; - m(1,0)=e21; m(1,1)=e22; m(1,2)=e23; - m(2,0)=e31; m(2,1)=e32; m(2,2)=e33; + ModuleBase::matrix m (3, 3); + m (0, 0) = e11; + m (0, 1) = e12; + m (0, 2) = e13; + m (1, 0) = e21; + m (1, 1) = e22; + m (1, 2) = e23; + m (2, 0) = e31; + m (2, 1) = e32; + m (2, 2) = e33; return m; } -} \ No newline at end of file +} // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/matrix3.h b/source/source_base/matrix3.h index cfd08972716..47845ae5194 100644 --- a/source/source_base/matrix3.h +++ b/source/source_base/matrix3.h @@ -30,10 +30,7 @@ class Matrix3 * to Identity matrix * */ - Matrix3() - { - Identity(); - } + Matrix3 () { Identity (); } /** * @brief Construct a new Matrix 3 object @@ -48,48 +45,48 @@ class Matrix3 * @param r32 * @param r33 */ - Matrix3(const double &r11, - const double &r12, - const double &r13, - const double &r21, - const double &r22, - const double &r23, - const double &r31, - const double &r32, - const double &r33); + Matrix3 (const double& r11, + const double& r12, + const double& r13, + const double& r21, + const double& r22, + const double& r23, + const double& r31, + const double& r32, + const double& r33); /** * @brief Set a 3x3 matrix to identity matrix * */ - void Identity(void); + void Identity (); /** * @brief Set all elements of a 3x3 matrix to zero * */ - void Zero(void); + void Zero (); /** * @brief Calculate the determinant of a 3x3 matrix * * @return double */ - double Det(void) const; + double Det () const; /** * @brief Transpose a 3x3 matrix * * @return Matrix3 */ - Matrix3 Transpose(void) const; + Matrix3 Transpose () const; /** * @brief Inverse a 3x3 matrix * * @return Matrix3 */ - Matrix3 Inverse(void) const; + Matrix3 Inverse () const; /** * @brief Overload operator "=" for 3x3 matrices @@ -98,7 +95,7 @@ class Matrix3 * @param m * @return Matrix3& */ - Matrix3 &operator=(const Matrix3 &m); + Matrix3& operator= (const Matrix3& m); /** * @brief Overload operator "+=" for 3x3 matrices @@ -107,7 +104,7 @@ class Matrix3 * @param m * @return Matrix3& */ - Matrix3 &operator+=(const Matrix3 &m); + Matrix3& operator+= (const Matrix3& m); /** * @brief Overload operator "-=" for 3x3 matrices @@ -116,7 +113,7 @@ class Matrix3 * @param m * @return Matrix3& */ - Matrix3 &operator-=(const Matrix3 &m); + Matrix3& operator-= (const Matrix3& m); /** * @brief Overload operator "*=" for 3x3 matrix and @@ -126,7 +123,7 @@ class Matrix3 * @param s The scalar * @return Matrix3& */ - Matrix3 &operator*=(const double &s); + Matrix3& operator*= (const double& s); /** * @brief Overload operator "/=" for 3x3 matrix and @@ -136,13 +133,13 @@ class Matrix3 * @param s The scalar * @return Matrix3& */ - Matrix3 &operator/=(const double &s); + Matrix3& operator/= (const double& s); /** * @brief Print a 3x3 matrix on screening * */ - void print(void) const; + void print () const; /** * @brief Change the form of a 3x3 matrix from that of @@ -150,7 +147,7 @@ class Matrix3 * * @return ModuleBase::matrix */ - ModuleBase::matrix to_matrix(void) const; + ModuleBase::matrix to_matrix () const; }; /** @@ -161,7 +158,7 @@ class Matrix3 * @param m2 * @return Matrix3 */ -Matrix3 operator+(const Matrix3 &m1, const Matrix3 &m2); +Matrix3 operator+ (const Matrix3& m1, const Matrix3& m2); /** * @brief Overload operator "-" for two 3x3 matrices m1 and m2, @@ -171,7 +168,7 @@ Matrix3 operator+(const Matrix3 &m1, const Matrix3 &m2); * @param m2 * @return Matrix3 */ -Matrix3 operator-(const Matrix3 &m1, const Matrix3 &m2); +Matrix3 operator- (const Matrix3& m1, const Matrix3& m2); /** * @brief Overload operator "/" for a (Matrix3)/(scalar) @@ -181,7 +178,7 @@ Matrix3 operator-(const Matrix3 &m1, const Matrix3 &m2); * @param s The scalar * @return Matrix3 */ -Matrix3 operator/(const Matrix3 &m, const double &s); +Matrix3 operator/ (const Matrix3& m, const double& s); /** * @brief Overload operator "*" for two 3x3 matrices m1 and m2 @@ -191,7 +188,7 @@ Matrix3 operator/(const Matrix3 &m, const double &s); * @param m2 * @return Matrix3 */ -Matrix3 operator*(const Matrix3 &m1, const Matrix3 &m2); +Matrix3 operator* (const Matrix3& m1, const Matrix3& m2); /** * @brief Overload operator "*" for (Matrix3)*(scalar) @@ -201,7 +198,7 @@ Matrix3 operator*(const Matrix3 &m1, const Matrix3 &m2); * @param s The scalar * @return Matrix3 */ -Matrix3 operator*(const Matrix3 &m, const double &s); +Matrix3 operator* (const Matrix3& m, const double& s); /** * @brief Overload operator "*" for (scalar)*(Matrix3) @@ -211,7 +208,7 @@ Matrix3 operator*(const Matrix3 &m, const double &s); * @param m The 3x3 matrix * @return Matrix3 */ -Matrix3 operator*(const double &s, const Matrix3 &m); +Matrix3 operator* (const double& s, const Matrix3& m); /** * @brief Overload operator "*" for (Matrix3)*(Vector3) @@ -222,7 +219,8 @@ Matrix3 operator*(const double &s, const Matrix3 &m); * @return ModuleBase::Vector3 * @author Peize Lin */ -template ModuleBase::Vector3 operator*(const Matrix3 &m, const ModuleBase::Vector3 &u); +template +ModuleBase::Vector3 operator* (const Matrix3& m, const ModuleBase::Vector3& u); /** * @brief Overload operator "*" for (Vector3)*(Matrix3) @@ -232,7 +230,8 @@ template ModuleBase::Vector3 operator*(const Matrix3 &m, co * @param m The 3x3 matrix * @return ModuleBase::Vector3 */ -template ModuleBase::Vector3 operator*(const ModuleBase::Vector3 &u, const Matrix3 &m); +template +ModuleBase::Vector3 operator* (const ModuleBase::Vector3& u, const Matrix3& m); /** * @brief Overload operator "==" to assert @@ -243,7 +242,7 @@ template ModuleBase::Vector3 operator*(const ModuleBase::Ve * @return true: if two matrices equal each other * @return false: if they do not equal */ -bool operator==(const Matrix3 &m1, const Matrix3 &m2); +bool operator== (const Matrix3& m1, const Matrix3& m2); /** * @brief Overload operator "!=" to assert @@ -254,22 +253,26 @@ bool operator==(const Matrix3 &m1, const Matrix3 &m2); * @return true: if two matrices are inequal * @return false: if they equal */ -bool operator!=(const Matrix3 &m1, const Matrix3 &m2); // whethor m1 != m2 +bool operator!= (const Matrix3& m1, const Matrix3& m2); // whethor m1 != m2 // m*u -template ModuleBase::Vector3 operator*(const Matrix3 &m, const ModuleBase::Vector3 &u) +template +ModuleBase::Vector3 + operator* (const Matrix3& m, const ModuleBase::Vector3& u) { - return ModuleBase::Vector3(m.e11 * u.x + m.e12 * u.y + m.e13 * u.z, - m.e21 * u.x + m.e22 * u.y + m.e23 * u.z, - m.e31 * u.x + m.e32 * u.y + m.e33 * u.z); + return ModuleBase::Vector3 (m.e11 * u.x + m.e12 * u.y + m.e13 * u.z, + m.e21 * u.x + m.e22 * u.y + m.e23 * u.z, + m.e31 * u.x + m.e32 * u.y + m.e33 * u.z); } // u*m -template ModuleBase::Vector3 operator*(const ModuleBase::Vector3 &u, const Matrix3 &m) +template +ModuleBase::Vector3 + operator* (const ModuleBase::Vector3& u, const Matrix3& m) { - return ModuleBase::Vector3(u.x * m.e11 + u.y * m.e21 + u.z * m.e31, - u.x * m.e12 + u.y * m.e22 + u.z * m.e32, - u.x * m.e13 + u.y * m.e23 + u.z * m.e33); + return ModuleBase::Vector3 (u.x * m.e11 + u.y * m.e21 + u.z * m.e31, + u.x * m.e12 + u.y * m.e22 + u.z * m.e32, + u.x * m.e13 + u.y * m.e23 + u.z * m.e33); } } // namespace ModuleBase diff --git a/source/source_base/matrix_wrapper.h b/source/source_base/matrix_wrapper.h index 018bc22ab7a..8b1eb6318a1 100644 --- a/source/source_base/matrix_wrapper.h +++ b/source/source_base/matrix_wrapper.h @@ -18,72 +18,93 @@ namespace ModuleBase class Matrix_Wrapper { -public: - int nr; - int nc; - double * c = nullptr; - bool flag_delete_c; + public: + int nr; + int nc; + double* c = nullptr; + bool flag_delete_c; - Matrix_Wrapper(): nr(0), nc(0), c(nullptr), flag_delete_c(false){} - Matrix_Wrapper( const matrix &m ): nr(m.nr), nc(m.nc), c(m.c), flag_delete_c(false){} - inline void create( const int nr_in, const int nc_in, const bool flag_zero ); - inline matrix to_matrix(); - ~Matrix_Wrapper(){ if(flag_delete_c) delete[]c; } - Matrix_Wrapper( const Matrix_Wrapper &m )=delete; - Matrix_Wrapper( Matrix_Wrapper &m )=delete; - inline Matrix_Wrapper( Matrix_Wrapper &&m ); - inline Matrix_Wrapper&operator=( const Matrix_Wrapper&m ); - inline Matrix_Wrapper&operator=( Matrix_Wrapper&&m ); + Matrix_Wrapper () : nr (0), nc (0), c (nullptr), flag_delete_c (false) {} + Matrix_Wrapper (const matrix& m) : nr (m.nr), nc (m.nc), c (m.c), flag_delete_c (false) {} + inline void create (const int nr_in, const int nc_in, const bool flag_zero); + inline matrix to_matrix (); + ~Matrix_Wrapper () + { + if (flag_delete_c) + { + delete[] c; + } + } + Matrix_Wrapper (const Matrix_Wrapper& m) = delete; + Matrix_Wrapper (Matrix_Wrapper& m) = delete; + inline Matrix_Wrapper (Matrix_Wrapper&& m); + inline Matrix_Wrapper& operator= (const Matrix_Wrapper& m); + inline Matrix_Wrapper& operator= (Matrix_Wrapper&& m); }; - -inline void Matrix_Wrapper::create( const int nr_in, const int nc_in, const bool flag_zero ) +inline void + Matrix_Wrapper::create (const int nr_in, const int nc_in, const bool flag_zero) { - nr = nr_in; nc = nc_in; - if(flag_delete_c) - delete[] c; - c = new double[nr*nc]; - flag_delete_c = true; - if(flag_zero) - memset( c, 0, sizeof(double)*nr*nc ); + nr = nr_in; + nc = nc_in; + if (flag_delete_c) + { + delete[] c; + } + c = new double[nr * nc]; + flag_delete_c = true; + if (flag_zero) + { + memset (c, 0, sizeof (double) * nr * nc); + } } -inline matrix Matrix_Wrapper::to_matrix() +inline matrix + Matrix_Wrapper::to_matrix () { - assert( flag_delete_c==true ); - flag_delete_c = false; - matrix m; - m.nr = nr; m.nc = nc; - m.c = c; - return m; + assert (flag_delete_c == true); + flag_delete_c = false; + matrix m; + m.nr = nr; + m.nc = nc; + m.c = c; + return m; } -inline Matrix_Wrapper::Matrix_Wrapper( Matrix_Wrapper &&m ) - :nr(m.nr), - nc(m.nc), - c(m.c), - flag_delete_c(m.flag_delete_c) +inline Matrix_Wrapper::Matrix_Wrapper (Matrix_Wrapper&& m) + : nr (m.nr), nc (m.nc), c (m.c), flag_delete_c (m.flag_delete_c) { - m.nr = m.nc = 0; - m.c = nullptr; - m.flag_delete_c = false; + m.nr = m.nc = 0; + m.c = nullptr; + m.flag_delete_c = false; } -inline Matrix_Wrapper& Matrix_Wrapper::operator=( const Matrix_Wrapper&m ) +inline Matrix_Wrapper& + Matrix_Wrapper::operator= (const Matrix_Wrapper& m) { - assert( !m.flag_delete_c ); - nr = m.nr; nc = m.nc; c = m.c; flag_delete_c = m.flag_delete_c; - return *this; + assert (!m.flag_delete_c); + nr = m.nr; + nc = m.nc; + c = m.c; + flag_delete_c = m.flag_delete_c; + return *this; } -inline Matrix_Wrapper& Matrix_Wrapper::operator=( Matrix_Wrapper&&m ) +inline Matrix_Wrapper& + Matrix_Wrapper::operator= (Matrix_Wrapper&& m) { - nr = m.nr; nc = m.nc; c = m.c; flag_delete_c = m.flag_delete_c; - m.nr = 0; m.nc = 0; m.c = nullptr; m.flag_delete_c = false; - return *this; + nr = m.nr; + nc = m.nc; + c = m.c; + flag_delete_c = m.flag_delete_c; + m.nr = 0; + m.nc = 0; + m.c = nullptr; + m.flag_delete_c = false; + return *this; } -} +} // namespace ModuleBase #endif diff --git a/source/source_base/matrix_wrapper_tianhe2.h b/source/source_base/matrix_wrapper_tianhe2.h index b3b94e2a05e..a8272bb71fc 100644 --- a/source/source_base/matrix_wrapper_tianhe2.h +++ b/source/source_base/matrix_wrapper_tianhe2.h @@ -15,41 +15,67 @@ namespace ModuleBase class Matrix_Wrapper { -public: - double * c = nullptr; - int nr; - int nc; - - Matrix_Wrapper(): nr(0), nc(0), c(nullptr){} - Matrix_Wrapper( const matrix &m ): nr(m.nr), nc(m.nc), c(m.c){} - inline void create( const int nr_in, const int nc_in, const bool flag_zero ); - inline matrix to_matrix(); - Matrix_Wrapper( const Matrix_Wrapper &m ): nr(m.nr), nc(m.nc), c(m.c){} - Matrix_Wrapper( Matrix_Wrapper &m ): nr(m.nr), nc(m.nc), c(m.c){} - Matrix_Wrapper( Matrix_Wrapper &&m ): nr(m.nr), nc(m.nc), c(m.c){} - inline Matrix_Wrapper&operator=( const Matrix_Wrapper&m ){ nr=m.nr; nc=m.nc; c=m.c; return *this; }; - inline Matrix_Wrapper&operator=( Matrix_Wrapper&m ){ nr=m.nr; nc=m.nc; c=m.c; return *this; }; - inline Matrix_Wrapper&operator=( Matrix_Wrapper&&m ){ nr=m.nr; nc=m.nc; c=m.c; return *this; }; -}; + public: + double* c = nullptr; + int nr; + int nc; + Matrix_Wrapper () : nr (0), nc (0), c (nullptr) {} + Matrix_Wrapper (const matrix& m) : nr (m.nr), nc (m.nc), c (m.c) {} + inline void create (const int nr_in, const int nc_in, const bool flag_zero); + inline matrix to_matrix (); + Matrix_Wrapper (const Matrix_Wrapper& m) : nr (m.nr), nc (m.nc), c (m.c) {} + Matrix_Wrapper (Matrix_Wrapper& m) : nr (m.nr), nc (m.nc), c (m.c) {} + Matrix_Wrapper (Matrix_Wrapper&& m) : nr (m.nr), nc (m.nc), c (m.c) {} + inline Matrix_Wrapper& + operator= (const Matrix_Wrapper& m) + { + nr = m.nr; + nc = m.nc; + c = m.c; + return *this; + }; + inline Matrix_Wrapper& + operator= (Matrix_Wrapper& m) + { + nr = m.nr; + nc = m.nc; + c = m.c; + return *this; + }; + inline Matrix_Wrapper& + operator= (Matrix_Wrapper&& m) + { + nr = m.nr; + nc = m.nc; + c = m.c; + return *this; + }; +}; -inline void Matrix_Wrapper::create( const int nr_in, const int nc_in, const bool flag_zero ) +inline void + Matrix_Wrapper::create (const int nr_in, const int nc_in, const bool flag_zero) { - nr = nr_in; nc = nc_in; - c = new double[nr*nc]; - if(flag_zero) - memset( c, 0, sizeof(double)*nr*nc ); + nr = nr_in; + nc = nc_in; + c = new double[nr * nc]; + if (flag_zero) + { + memset (c, 0, sizeof (double) * nr * nc); + } } -inline matrix Matrix_Wrapper::to_matrix() +inline matrix + Matrix_Wrapper::to_matrix () { - matrix m; - m.nr = nr; m.nc = nc; - m.c = c; - nr = nc = 0; - c = nullptr; - return m; + matrix m; + m.nr = nr; + m.nc = nc; + m.c = c; + nr = nc = 0; + c = nullptr; + return m; } -} +} // namespace ModuleBase #endif diff --git a/source/source_base/mcd.c b/source/source_base/mcd.c index 3ec039270f5..c1eb4d89af0 100644 --- a/source/source_base/mcd.c +++ b/source/source_base/mcd.c @@ -12,873 +12,900 @@ * http://prj.softpixel.com/licenses/#bsd */ -#ifndef _MCD_GONE //if mcd is gone, don't do anything +#ifndef _MCD_GONE // if mcd is gone, don't do anything #ifdef _MCD_CHECK -#undef _MCD_CHECK //don't want trace allocations in this file =) +#undef _MCD_CHECK // don't want trace allocations in this file =) #endif #include #ifndef __USE_GNU -#define __USE_GNU // activate strndup in GNU headers +#define __USE_GNU // activate strndup in GNU headers #endif #include #include #include -//#include // fixes the vscanf warnings, - // but kills the va_* macros... +// #include // fixes the vscanf warnings, +// but kills the va_* macros... -#define MCD_C_FILE_BUILD //flag some stuff in mcd.h +#define MCD_C_FILE_BUILD // flag some stuff in mcd.h #include "mcd.h" #ifdef WIN32 - #include +#include #else - #include - #include +#include +#include #endif -#define CHUNK_MAGIC 0x1337C0DE +#define CHUNK_MAGIC 0x1337C0DE #ifndef MCD_FASTFREE -#define TOP_TO_BOTTOM 0x00 //search like old style -#define BOTTOM_TO_TOP 0xff //search from LastChunk up +#define TOP_TO_BOTTOM 0x00 // search like old style +#define BOTTOM_TO_TOP 0xff // search from LastChunk up -#define TRACK_LENGTH 5 //how many previous biases to use +#define TRACK_LENGTH 5 // how many previous biases to use #endif /* Number of bytes to add to each allocation to check for padding */ -//#define OVERFLOW_PAD 4 // unimplimented +// #define OVERFLOW_PAD 4 // unimplimented /* all allocations are accounted for with 'chunnks' */ typedef struct ChunkS { #ifdef MCD_FASTFREE - int magic; //help make sure the chunk is really a chunk + int magic; // help make sure the chunk is really a chunk #endif - int size; //size of allocation in bytes - int line; //line it was alocated on + int size; // size of allocation in bytes + int line; // line it was alocated on #ifdef WIN32 - LONGLONG id; // 64bit allocation ID + LONGLONG id; // 64bit allocation ID #else - long long id; // 64bit allocation ID -#endif - char * function = nullptr; //creating function - char * file = nullptr; //file function is in - void * ptr = nullptr; //pointer to allocation - struct ChunkS *next, //next chunk (null if nonw) - *prev; //previous chunk (null if nonw) -}Chunk; + long long id; // 64bit allocation ID +#endif + char* function = nullptr; // creating function + char* file = nullptr; // file function is in + void* ptr = nullptr; // pointer to allocation + struct ChunkS *next, // next chunk (null if nonw) + *prev; // previous chunk (null if nonw) +} Chunk; /*note: *ptr+SOC is actually the allocation. with fastfree, ptr points to the chunk itself, and SOC adds the offset where the actual data is */ - /* MemoryChunks stores the first entry to the allocation chunks. Lastchunk stores the last one, to save list traversals. */ -Chunk *MemoryChunks=NULL,*LastChunk=NULL; +Chunk *MemoryChunks = NULL, *LastChunk = NULL; -/* - smallest = smallest allocation - largest = largest allocation - current = amount currently allocated (for peak tracking) +/* + smallest = smallest allocation + largest = largest allocation + current = amount currently allocated (for peak tracking) */ -int smallest=0,largest=0,maximum=0,current=0; +int smallest = 0, largest = 0, maximum = 0, current = 0; /* - totalAlloc = total allocations - totalFree = total frees - peakAlloc = maximum number of outstanging allocations - totalOverflow = number of pointers that were overflowed - (unimplimented) + totalAlloc = total allocations + totalFree = total frees + peakAlloc = maximum number of outstanging allocations + totalOverflow = number of pointers that were overflowed + (unimplimented) */ -int totalAlloc=0, totalFree=0, peakAlloc=0, totalOverflow=0; +int totalAlloc = 0, totalFree = 0, peakAlloc = 0, totalOverflow = 0; /* Our log files (Null causes default behaviour) */ -FILE *RealTimeLog=NULL; -FILE *MemStatLog=NULL; +FILE* RealTimeLog = NULL; +FILE* MemStatLog = NULL; -//return realtimelog if it's set, otherwise stderr -#define RTL (RealTimeLog?RealTimeLog:stderr) +// return realtimelog if it's set, otherwise stderr +#define RTL (RealTimeLog ? RealTimeLog : stderr) /* SOC (SizeOf Chunk) is used in verbose mode to print correct pointers */ #ifdef MCD_FASTFREE -#define SOC sizeof(Chunk) //fastfree pointer offset +#define SOC sizeof (Chunk) // fastfree pointer offset #else -#define SOC 0 //no fastfree implies raw pointers (no offset) +#define SOC 0 // no fastfree implies raw pointers (no offset) #endif /* idbase is the id variable. each new allocation increments it. - to keep it useful, it is defined to be as long as possible - (64 bits) to keep it from wrapping (unless your program does - a _lot_ of allocating :) + to keep it useful, it is defined to be as long as possible + (64 bits) to keep it from wrapping (unless your program does + a _lot_ of allocating :) */ #ifdef WIN32 -LONGLONG idbase=0; +LONGLONG idbase = 0; #else -long long idbase=0; +long long idbase = 0; #endif /* chunklock is our simple lock variable to keep the chunk list thread-safe */ -char chunkLock=0; +char chunkLock = 0; -/* TrackBias is used to record the weight of the previous locations - TrackPos keeps our current position. each free increments - and wraps TrackPos. +/* TrackBias is used to record the weight of the previous locations + TrackPos keeps our current position. each free increments + and wraps TrackPos. */ #ifndef MCD_FASTFREE -unsigned int TrackBias[TRACK_LENGTH]={0x7f}; //start in middle -int TrackPos=0; +unsigned int TrackBias[TRACK_LENGTH] = {0x7f}; // start in middle +int TrackPos = 0; #endif /* Here, *p is the raw pointer. we stick our chunk pointer there for easy lookups */ -void addChunk(void*p,int size,char*fun,char*file,int line) +void + addChunk (void* p, int size, char* fun, char* file, int line) { #ifdef MCD_FASTFREE - Chunk *c=(Chunk*)p; //extra space for chunk is accounted for -#else //not FASTFREE - Chunk *c=(Chunk*)malloc(sizeof(Chunk)); + Chunk* c = (Chunk*)p; // extra space for chunk is accounted for +#else // not FASTFREE + Chunk* c = (Chunk*)malloc (sizeof (Chunk)); #endif - while(chunkLock) + while (chunkLock) #ifdef WIN32 - Sleep(1); + Sleep (1); #else - usleep(1); -#endif - ++chunkLock; - #ifdef MCD_FASTFREE - c->magic=CHUNK_MAGIC; - #endif - c->size=size; - c->line=line; - c->function=fun; - c->file=file; - c->ptr=p; - c->next=NULL; - c->prev=NULL; - c->id=idbase++; - -//overflow padding checks (incomplete) -// pad=p+size; -// for(po=0;pomaximum) - maximum=current; - if(size>largest) - largest=size; - if(sizepeakAlloc) - peakAlloc=totalAlloc-totalFree; - - if(!MemoryChunks) //first chunk - { - MemoryChunks=c; - LastChunk=c; - } - else //new chunk - { - c->prev=LastChunk; - LastChunk->next=c; - LastChunk=c; - } - --chunkLock; - return; + usleep (1); +#endif + ++chunkLock; +#ifdef MCD_FASTFREE + c->magic = CHUNK_MAGIC; +#endif + c->size = size; + c->line = line; + c->function = fun; + c->file = file; + c->ptr = p; + c->next = NULL; + c->prev = NULL; + c->id = idbase++; + + // overflow padding checks (incomplete) + // pad=p+size; + // for(po=0;po maximum) + maximum = current; + if (size > largest) + largest = size; + if (size < smallest || smallest == 0) + smallest = size; + ++totalAlloc; + if ((totalAlloc - totalFree) > peakAlloc) + peakAlloc = totalAlloc - totalFree; + + if (!MemoryChunks) // first chunk + { + MemoryChunks = c; + LastChunk = c; + } + else // new chunk + { + c->prev = LastChunk; + LastChunk->next = c; + LastChunk = c; + } + --chunkLock; + return; } /* here, *p is the pointer, adjusted to be the chunk*/ -int delChunk(void*p/*,char*fun,char*file,int line*/) +int + delChunk (void* p /*,char*fun,char*file,int line*/) { - Chunk *s; + Chunk* s; #ifndef MCD_FASTFREE - unsigned int bias,i; - int depth=0; + unsigned int bias, i; + int depth = 0; #endif - //should be handled in MCD_free now - //if(!p) - // return 0; // null pointer. list walker would crash + // should be handled in MCD_free now + // if(!p) + // return 0; // null pointer. list walker would crash - while(chunkLock) + while (chunkLock) #ifdef WIN32 - Sleep(1); + Sleep (1); #else - usleep(1); + usleep (1); #endif - ++chunkLock; //0wn the list + ++chunkLock; // 0wn the list #ifdef MCD_FASTFREE - s=(Chunk*)p; - if(s->magic!=CHUNK_MAGIC) - { - /*freeing illegal pointer, or wrote out of bounds*/ - fprintf(RTL,"** Magic is [%x], which is wrong...[ptr:%x]\n",s->magic,(int)p); - fprintf(RTL,"** Either some buffer overflowed, or this is a bad pointer\n"); - fprintf(RTL,"** I'm just going to free it and see what happens... might segfault.\n"); - fflush(RTL); - /*because this is broken, we're not really sure what to do - with it... so, we opt to just free it*/ - --chunkLock; - return 1; - } - //pointer is to previous chunk, so use that as a basis, plus some pointer acrobatics - - if(s->prev==NULL) //first chunk - { - if(MemoryChunks->next) //next chunk gets promoted to MemoryChunks - { - current-=MemoryChunks->size; - //prev=MemoryChunks; - MemoryChunks=MemoryChunks->next; - MemoryChunks->prev=NULL; - } - else //only chunk left (empty list) - { - current-=MemoryChunks->size; - MemoryChunks=NULL; - LastChunk=NULL; - } - } - else if(s->next==NULL) //not first, is it last? - { - current-=s->size; - LastChunk=s->prev; - LastChunk->next=NULL; //last - } - else //must be in the middle somewhere... - { - - s->prev->next=s->next; - s->next->prev=s->prev; - current-=s->size; - } - ++totalFree; -#else //not FASTFREE - //non fastfree version. here, we expect the chunk to be freed upon exit. - - for(bias=0,i=0;iptr==p) - { - current-=s->size; - - if(s==MemoryChunks) //first in list - { - MemoryChunks=MemoryChunks->next; //set next as first - free(s); - if(MemoryChunks) //if there is a next, set prev to none - MemoryChunks->prev=NULL; - --chunkLock; - ++totalFree; - return 0; - } - else if(s==LastChunk) //last guy... - { - LastChunk=LastChunk->prev; - free(s); - if(LastChunk) //LastChunk could be first as well (no next), dont assume anything. - LastChunk->next=NULL; - --chunkLock; - ++totalFree; - return 0; - } - else // somewhere in the middle... - { - // s should have a next and first (handled by previous cases) - s->prev->next=s->next; - s->next->prev=s->prev; - free(s); - --chunkLock; - ++totalFree; - return 0; - } - } - else - { - if(bias==BOTTOM_TO_TOP) - s=s->prev; - else - s=s->next; - } - } - //hmm, didnt find it in the list, must be foreign or error... - #ifdef MCD_VERBOSE - --chunkLock; - fprintf(RTL,"** Pointer [%08X] wasn't in list...\n",(int)p); - fprintf(RTL,"** Either it came from an external library, or it's a bad pointer\n"); - fprintf(RTL,"** I'm just going to free it and see. might segfault\n"); - fflush(RTL); //mohan - #endif - //return 1; //error goes unnoticed for now -#endif //fastfree - --chunkLock; - return 0; + s = (Chunk*)p; + if (s->magic != CHUNK_MAGIC) + { + /*freeing illegal pointer, or wrote out of bounds*/ + fprintf (RTL, "** Magic is [%x], which is wrong...[ptr:%x]\n", s->magic, (int)p); + fprintf (RTL, "** Either some buffer overflowed, or this is a bad pointer\n"); + fprintf (RTL, "** I'm just going to free it and see what happens... might segfault.\n"); + fflush (RTL); + /*because this is broken, we're not really sure what to do + with it... so, we opt to just free it*/ + --chunkLock; + return 1; + } + // pointer is to previous chunk, so use that as a basis, plus some pointer acrobatics + + if (s->prev == NULL) // first chunk + { + if (MemoryChunks->next) // next chunk gets promoted to MemoryChunks + { + current -= MemoryChunks->size; + // prev=MemoryChunks; + MemoryChunks = MemoryChunks->next; + MemoryChunks->prev = NULL; + } + else // only chunk left (empty list) + { + current -= MemoryChunks->size; + MemoryChunks = NULL; + LastChunk = NULL; + } + } + else if (s->next == NULL) // not first, is it last? + { + current -= s->size; + LastChunk = s->prev; + LastChunk->next = NULL; // last + } + else // must be in the middle somewhere... + { + + s->prev->next = s->next; + s->next->prev = s->prev; + current -= s->size; + } + ++totalFree; +#else // not FASTFREE + // non fastfree version. here, we expect the chunk to be freed upon exit. + + for (bias = 0, i = 0; i < TRACK_LENGTH; ++i) + bias += TrackBias[i]; + if (bias / TRACK_LENGTH < 0x7f) + bias = TOP_TO_BOTTOM; + else + bias = BOTTOM_TO_TOP; + if (bias == BOTTOM_TO_TOP) + s = LastChunk; // bottom up! + else + s = MemoryChunks; // top down! + + while (s) + { + if (depth == (totalAlloc - totalFree) / 2) // uhoh, passed mid point. adapt trend + { + if (bias == TOP_TO_BOTTOM) + TrackBias[TrackPos] = BOTTOM_TO_TOP; + else + TrackBias[TrackPos] = TOP_TO_BOTTOM; + ++TrackPos; + TrackPos %= TRACK_LENGTH; + } + ++depth; + + if (s->ptr == p) + { + current -= s->size; + + if (s == MemoryChunks) // first in list + { + MemoryChunks = MemoryChunks->next; // set next as first + free (s); + if (MemoryChunks) // if there is a next, set prev to none + MemoryChunks->prev = NULL; + --chunkLock; + ++totalFree; + return 0; + } + else if (s == LastChunk) // last guy... + { + LastChunk = LastChunk->prev; + free (s); + if (LastChunk) // LastChunk could be first as well (no next), dont assume anything. + LastChunk->next = NULL; + --chunkLock; + ++totalFree; + return 0; + } + else // somewhere in the middle... + { + // s should have a next and first (handled by previous cases) + s->prev->next = s->next; + s->next->prev = s->prev; + free (s); + --chunkLock; + ++totalFree; + return 0; + } + } + else + { + if (bias == BOTTOM_TO_TOP) + s = s->prev; + else + s = s->next; + } + } +// hmm, didnt find it in the list, must be foreign or error... +#ifdef MCD_VERBOSE + --chunkLock; + fprintf (RTL, "** Pointer [%08X] wasn't in list...\n", (int)p); + fprintf (RTL, "** Either it came from an external library, or it's a bad pointer\n"); + fprintf (RTL, "** I'm just going to free it and see. might segfault\n"); + fflush (RTL); // mohan +#endif + // return 1; //error goes unnoticed for now +#endif // fastfree + --chunkLock; + return 0; } -/* (FastFree) because we use fake pointers, we must add sizeof(Chunk) to all of - them for proper pointer hiding. +/* (FastFree) because we use fake pointers, we must add sizeof(Chunk) to all of + them for proper pointer hiding. */ #ifndef WIN32 -inline //inline to make it a bit snappier +inline // inline to make it a bit snappier #endif -void phex(FILE*f, //print our 64bit id in a generic way + void + phex (FILE* f, // print our 64bit id in a generic way #ifdef WIN32 - LONGLONG v + LONGLONG v #else - long long v + long long v #endif -) + ) { - int i; - char t[16]={'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; - for(i=0;i<64;i+=4) - { - fprintf(f,"%c",t[(v>>(60-i))&0xf]); - } - fprintf(f,"]\n"); - fflush(f); - return; + int i; + char t[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + for (i = 0; i < 64; i += 4) + { + fprintf (f, "%c", t[(v >> (60 - i)) & 0xf]); + } + fprintf (f, "]\n"); + fflush (f); + return; } -void *MCD_malloc(int size,char*fun,char*file,int line) +void* + MCD_malloc (int size, char* fun, char* file, int line) { #ifdef MCD_FASTFREE - void*p=malloc(size+sizeof(Chunk)); -#else //not FASTFREE - void*p=malloc(size); + void* p = malloc (size + sizeof (Chunk)); +#else // not FASTFREE + void* p = malloc (size); #endif - + #ifdef MCD_VERBOSE - fprintf(RTL,"malloc\t[%X][%i bytes]\tfrom %s:%s, line %i [id ", - (int)p+SOC,size,file,fun,line); - phex(RTL,idbase); + fprintf (RTL, "malloc\t[%X][%i bytes]\tfrom %s:%s, line %i [id ", (int)p + SOC, size, file, fun, line); + phex (RTL, idbase); #endif - addChunk(p,size,fun,file,line); + addChunk (p, size, fun, file, line); - return p + return p #ifdef MCD_FASTFREE - +sizeof(Chunk) + + sizeof (Chunk) #endif - ; + ; } - -void *MCD_calloc(int size,char*fun,char*file,int line) +void* + MCD_calloc (int size, char* fun, char* file, int line) { #ifdef MCD_FASTFREE - void*p=calloc(size+sizeof(Chunk),1); + void* p = calloc (size + sizeof (Chunk), 1); #else - void*p=calloc(size,1); + void* p = calloc (size, 1); #endif #ifdef MCD_VERBOSE - fprintf(RTL,"calloc\t[%X][%i bytes]\tfrom %s:%s, line %i [id ", - (int)p+SOC,size,file,fun,line); - phex(RTL,idbase); + fprintf (RTL, "calloc\t[%X][%i bytes]\tfrom %s:%s, line %i [id ", (int)p + SOC, size, file, fun, line); + phex (RTL, idbase); #endif - addChunk(p,size,fun,file,line); - return p + addChunk (p, size, fun, file, line); + return p #ifdef MCD_FASTFREE - +sizeof(Chunk) + + sizeof (Chunk) #endif - ; + ; } -/* instead of really reallocing, we just allocate a new area, and copy +/* instead of really reallocing, we just allocate a new area, and copy this shouldn't matter, but there may be a better way */ -void *MCD_realloc(void *p,int size,char*fun,char*file,int line) +void* + MCD_realloc (void* p, int size, char* fun, char* file, int line) { #ifdef MCD_FASTFREE - Chunk *New,*Old; - - void*n=malloc(size+sizeof(Chunk)); - New=(Chunk*)n; - Old=(Chunk*)p-sizeof(Chunk); - - if(Old->size==size) //no change?? - return p; - - if(size>Old->size) //allocated more room, just copy old stuff - memcpy(n,p-sizeof(Chunk),Old->size+sizeof(Chunk)); - else //allocated less room (truncate) - memcpy(n,p-sizeof(Chunk),Old->size+sizeof(Chunk)); - - -#ifdef MCD_VERBOSE - fprintf(RTL,"realloc\t[%X]->[%X][%i bytes]\tfrom %s:%s, line %i [id ", - (int)p+SOC,(int)n+SOC,size,file,fun,line); - phex(RTL,idbase); -#endif + Chunk *New, *Old; + + void* n = malloc (size + sizeof (Chunk)); + New = (Chunk*)n; + Old = (Chunk*)p - sizeof (Chunk); + + if (Old->size == size) // no change?? + return p; + + if (size > Old->size) // allocated more room, just copy old stuff + memcpy (n, p - sizeof (Chunk), Old->size + sizeof (Chunk)); + else // allocated less room (truncate) + memcpy (n, p - sizeof (Chunk), Old->size + sizeof (Chunk)); - delChunk(p/*,fun,file,line*/); - free(p-sizeof(Chunk)); - addChunk(n,size,fun,file,line); - return n+sizeof(Chunk); -#else //non-fastfree - void*n=realloc(p,size); #ifdef MCD_VERBOSE - fprintf(RTL,"realloc [%X]->[%X][%ib] from %s:%s, line %i [id ", - (int)p+SOC,(int)n+SOC,size,file,fun,line); - phex(RTL,idbase); -#endif - delChunk(p/*,fun,file,line*/); - addChunk(n,size,fun,file,line); - return n; + fprintf (RTL, + "realloc\t[%X]->[%X][%i bytes]\tfrom %s:%s, line %i [id ", + (int)p + SOC, + (int)n + SOC, + size, + file, + fun, + line); + phex (RTL, idbase); +#endif + + delChunk (p /*,fun,file,line*/); + free (p - sizeof (Chunk)); + addChunk (n, size, fun, file, line); + return n + sizeof (Chunk); +#else // non-fastfree + void* n = realloc (p, size); +#ifdef MCD_VERBOSE + fprintf (RTL, + "realloc [%X]->[%X][%ib] from %s:%s, line %i [id ", + (int)p + SOC, + (int)n + SOC, + size, + file, + fun, + line); + phex (RTL, idbase); +#endif + delChunk (p /*,fun,file,line*/); + addChunk (n, size, fun, file, line); + return n; #endif } // getcwd isn't implimented in fastfree yet #ifndef MCD_FASTFREE -#ifndef WIN32 //win32's getcwd doesn't allocate anyway -char *MCD_getcwd(char *p,int size, char*fun,char*file,int line) +#ifndef WIN32 // win32's getcwd doesn't allocate anyway +char* + MCD_getcwd (char* p, int size, char* fun, char* file, int line) { - int olderrno,wsize=256; - char *tmp=NULL; - - //according to man page: - // if p is null, size bytes are allocated - // if p is null and size is zero, strlen(wd) bytes are allocated - - if(p!=NULL) //ok, do nothing - { - return getcwd(p,size); - } - else // ok, interesting stuff - { - if(!size) //alloc as big as needed - { - while(tmp==NULL) - { - tmp=(char*)malloc(wsize); - olderrno=errno; - if(getcwd(tmp,0)==NULL) - { //too small, let's try again - wsize*=2; - free(tmp); - } - errno=olderrno; //restore old errno, just in case - } - p=(char*)malloc(strlen(tmp)+1); - memcpy(p,tmp,strlen(tmp)+1); - free(tmp); - addChunk(p, strlen(tmp)+1, file, fun, line); - } - else //alloc size bytes - { - // TODO: see if failure condition here still allocs p - p=(char*)malloc(size); - addChunk(p, size, file, fun, line); - getcwd(p,size); - } - return p; - } + int olderrno, wsize = 256; + char* tmp = NULL; + + // according to man page: + // if p is null, size bytes are allocated + // if p is null and size is zero, strlen(wd) bytes are allocated + + if (p != NULL) // ok, do nothing + { + return getcwd (p, size); + } + else // ok, interesting stuff + { + if (!size) // alloc as big as needed + { + while (tmp == NULL) + { + tmp = (char*)malloc (wsize); + olderrno = errno; + if (getcwd (tmp, 0) == NULL) + { // too small, let's try again + wsize *= 2; + free (tmp); + } + errno = olderrno; // restore old errno, just in case + } + p = (char*)malloc (strlen (tmp) + 1); + memcpy (p, tmp, strlen (tmp) + 1); + free (tmp); + addChunk (p, strlen (tmp) + 1, file, fun, line); + } + else // alloc size bytes + { + // TODO: see if failure condition here still allocs p + p = (char*)malloc (size); + addChunk (p, size, file, fun, line); + getcwd (p, size); + } + return p; + } } -#endif //win32 -#endif //fastfree +#endif // win32 +#endif // fastfree /*(in fastfree) can't make this simple due to pointer stuffing :( */ #ifdef __GNUC__ -char *MCD_strdup(const char*s,char*fun,char*file,int line) +char* + MCD_strdup (const char* s, char* fun, char* file, int line) #else -char *MCD_strdup(char*s,char*fun,char*file,int line) +char* + MCD_strdup (char* s, char* fun, char* file, int line) #endif { - char * n = nullptr; + char* n = nullptr; #ifdef MCD_FASTFREE - n=(char*)malloc(sizeof(char)*strlen(s)+1+sizeof(Chunk)); - memcpy(n+sizeof(Chunk),s,strlen(s)); -#else //non-fastfree - n=strdup(s); //unpointer'd version + n = (char*)malloc (sizeof (char) * strlen (s) + 1 + sizeof (Chunk)); + memcpy (n + sizeof (Chunk), s, strlen (s)); +#else // non-fastfree + n = strdup (s); // unpointer'd version #endif #ifdef MCD_VERBOSE - fprintf(RTL,"strdup\t[%X][%ib]\t\tfrom %s:%s, line %i [id ", - (int)n+SOC,(int)strlen(s),file,fun,line); - phex(RTL,idbase); + fprintf (RTL, "strdup\t[%X][%ib]\t\tfrom %s:%s, line %i [id ", (int)n + SOC, (int)strlen (s), file, fun, line); + phex (RTL, idbase); #endif - addChunk(n,strlen(s)+1,fun,file,line); - return n + addChunk (n, strlen (s) + 1, fun, file, line); + return n #ifdef MCD_FASTFREE - +sizeof(Chunk) + + sizeof (Chunk) #endif - ; + ; } -#ifndef WIN32 //win32 lacks strndup completely +#ifndef WIN32 // win32 lacks strndup completely #ifdef __GNUC__ -char *MCD_strndup(const char*s, int z,char*fun,char*file,int line) +char* + MCD_strndup (const char* s, int z, char* fun, char* file, int line) #else -char *MCD_strndup(char*s, int z,char*fun,char*file,int line) +char* + MCD_strndup (char* s, int z, char* fun, char* file, int line) #endif { - char * n = nullptr; - int size; - if((signed)strlen(s)>z) - size=z; - else - size=strlen(s); + char* n = nullptr; + int size; + if ((signed)strlen (s) > z) + size = z; + else + size = strlen (s); #ifdef MCD_FASTFREE - n=(char*)malloc(sizeof(char)*size+1+sizeof(Chunk)); - memcpy(n+sizeof(Chunk),s,size); -#else //non-fastfree - n=(char*)strndup(s,z); //unpointer'd version + n = (char*)malloc (sizeof (char) * size + 1 + sizeof (Chunk)); + memcpy (n + sizeof (Chunk), s, size); +#else // non-fastfree + n = (char*)strndup (s, z); // unpointer'd version #endif #ifdef MCD_VERBOSE - fprintf(RTL,"strndup\t[%X][%ib]\t\tfrom %s:%s, line %i [id ", - (int)n+SOC,(int)size,file,fun,line); - phex(RTL,idbase); + fprintf (RTL, "strndup\t[%X][%ib]\t\tfrom %s:%s, line %i [id ", (int)n + SOC, (int)size, file, fun, line); + phex (RTL, idbase); #endif - addChunk(n,size+1,fun,file,line); - return n + addChunk (n, size + 1, fun, file, line); + return n #ifdef MCD_FASTFREE - +sizeof(Chunk) + + sizeof (Chunk) #endif - ; + ; } /* asprintf/vasprintf based on a patch from Stephen Lee */ #ifdef _GNU_SOURCE -int MCD_asprintf(char **ptr,const char *fmt,char *fun, char*file, int line,...) +int + MCD_asprintf (char** ptr, const char* fmt, char* fun, char* file, int line, ...) { - int retval; - va_list argptr; + int retval; + va_list argptr; #ifdef MCD_FASTFREE - void * fc = nullptr; -#endif - - va_start(argptr,line); - if((retval=vasprintf(ptr, fmt, argptr))<0) - { - #ifdef MCD_VERBOSE - fprintf(RTL,"asprintf failure %s:%s, line %i [id ", - file,fun,line); - #endif - return retval; - } - va_end(argptr); - + void* fc = nullptr; +#endif + + va_start (argptr, line); + if ((retval = vasprintf (ptr, fmt, argptr)) < 0) + { +#ifdef MCD_VERBOSE + fprintf (RTL, "asprintf failure %s:%s, line %i [id ", file, fun, line); +#endif + return retval; + } + va_end (argptr); + #ifdef MCD_FASTFREE - fc=malloc(sizeof(Ghunk)+retval+1); //make string+chunk - memcpy(fc+sizeof(Chunk),ptr,retval); //hack in string - free(ptr); //clean up string - ptr=fc+sizeof(Chunk); //hack pointer - addChunk(fc,retval+1,fun,file,line); -#else //non-fastfree - addChunk(*ptr, retval+1, fun, file, line); -#endif - #ifdef MCD_VERBOSE - fprintf(RTL,"asprintf\t[%X][%ib]\t\tfrom %s:%s, line %i [id ", - (int)*ptr+SOC,retval+1,file,fun,line); - phex(RTL,idbase); - #endif - return retval; + fc = malloc (sizeof (Ghunk) + retval + 1); // make string+chunk + memcpy (fc + sizeof (Chunk), ptr, retval); // hack in string + free (ptr); // clean up string + ptr = fc + sizeof (Chunk); // hack pointer + addChunk (fc, retval + 1, fun, file, line); +#else // non-fastfree + addChunk (*ptr, retval + 1, fun, file, line); +#endif +#ifdef MCD_VERBOSE + fprintf (RTL, "asprintf\t[%X][%ib]\t\tfrom %s:%s, line %i [id ", (int)*ptr + SOC, retval + 1, file, fun, line); + phex (RTL, idbase); +#endif + return retval; } -int MCD_vasprintf(char **ptr,const char *fmt,va_list argptr,char *fun, char*file, int line) +int + MCD_vasprintf (char** ptr, const char* fmt, va_list argptr, char* fun, char* file, int line) { - int retval; + int retval; #ifdef MCD_FASTFREE - void * fc = nullptr; -#endif - - if((retval=vasprintf(ptr, fmt, argptr))<0) - { - #ifdef MCD_VERBOSE - fprintf(RTL,"vasprintf failure %s:%s, line %i [id ", - file,fun,line); - #endif - return retval; - } + void* fc = nullptr; +#endif + + if ((retval = vasprintf (ptr, fmt, argptr)) < 0) + { +#ifdef MCD_VERBOSE + fprintf (RTL, "vasprintf failure %s:%s, line %i [id ", file, fun, line); +#endif + return retval; + } #ifdef MCD_FASTFREE - fc=malloc(sizeof(Chunk)+retval+1); - memcpy(fc+sizeof(Chunk),ptr,retval); - free(ptr); - ptr=fc+sizeof(Chunk); - addChunk(fc,retval+1,fun,file,line); -#else //non-fastfree - addChunk(*ptr, retval+1, fun, file, line); -#endif - #ifdef MCD_VERBOSE - fprintf(RTL,"vasprintf\t[%X][%ib]\t\tfrom %s:%s, line %i [id ", - (int)*ptr+SOC,retval+1,file,fun,line); - phex(RTL,idbase); - #endif - return retval; + fc = malloc (sizeof (Chunk) + retval + 1); + memcpy (fc + sizeof (Chunk), ptr, retval); + free (ptr); + ptr = fc + sizeof (Chunk); + addChunk (fc, retval + 1, fun, file, line); +#else // non-fastfree + addChunk (*ptr, retval + 1, fun, file, line); +#endif +#ifdef MCD_VERBOSE + fprintf (RTL, "vasprintf\t[%X][%ib]\t\tfrom %s:%s, line %i [id ", (int)*ptr + SOC, retval + 1, file, fun, line); + phex (RTL, idbase); +#endif + return retval; } -#endif // _GNU_SOURCE -#endif // WIN32 +#endif // _GNU_SOURCE +#endif // WIN32 /* scanf family patch provided by Stephen Lee */ -void scan_args(const char*fmt,va_list argptr,char*fun,char*file,int line); +void scan_args (const char* fmt, va_list argptr, char* fun, char* file, int line); -#ifndef __cplusplus // c++ doesn't seem to compile when vscanf etc are implicit -#ifndef WIN32 //win32 doesn't link vscanf stuff nicely -int MCD_scanf(const char *fmt,char*fun,char*file,int line,...) +#ifndef __cplusplus // c++ doesn't seem to compile when vscanf etc are implicit +#ifndef WIN32 // win32 doesn't link vscanf stuff nicely +int + MCD_scanf (const char* fmt, char* fun, char* file, int line, ...) { - int retval; - va_list argptr; - - va_start(argptr,line); - if((retval=vscanf(fmt,argptr))<1) { - /* no args, so let's return */ - va_end(argptr); - return retval; - } - va_end(argptr); - va_start(argptr,line); - scan_args(fmt,argptr,fun,file,line); - va_end(argptr); - return retval; + int retval; + va_list argptr; + + va_start (argptr, line); + if ((retval = vscanf (fmt, argptr)) < 1) + { + /* no args, so let's return */ + va_end (argptr); + return retval; + } + va_end (argptr); + va_start (argptr, line); + scan_args (fmt, argptr, fun, file, line); + va_end (argptr); + return retval; } -int MCD_fscanf(FILE *stream,const char *fmt,char*fun,char*file,int line,...) +int + MCD_fscanf (FILE* stream, const char* fmt, char* fun, char* file, int line, ...) { - int retval; - va_list argptr; - - va_start(argptr,line); - if((retval=vfscanf(stream,fmt,argptr))<1) { - /* no args, so let's return */ - va_end(argptr); - return retval; - } - va_end(argptr); - va_start(argptr,line); - scan_args(fmt,argptr,fun,file,line); - va_end(argptr); - return retval; + int retval; + va_list argptr; + + va_start (argptr, line); + if ((retval = vfscanf (stream, fmt, argptr)) < 1) + { + /* no args, so let's return */ + va_end (argptr); + return retval; + } + va_end (argptr); + va_start (argptr, line); + scan_args (fmt, argptr, fun, file, line); + va_end (argptr); + return retval; } -int MCD_sscanf(const char *str,const char *fmt,char*fun,char*file,int line,...) +int + MCD_sscanf (const char* str, const char* fmt, char* fun, char* file, int line, ...) { - int retval; - va_list argptr; - - va_start(argptr,line); - if((retval=vsscanf(str,fmt,argptr))<1) { - /* no args, so let's return */ - va_end(argptr); - return retval; - } - va_end(argptr); - va_start(argptr,line); - scan_args(fmt,argptr,fun,file,line); - va_end(argptr); - return retval; + int retval; + va_list argptr; + + va_start (argptr, line); + if ((retval = vsscanf (str, fmt, argptr)) < 1) + { + /* no args, so let's return */ + va_end (argptr); + return retval; + } + va_end (argptr); + va_start (argptr, line); + scan_args (fmt, argptr, fun, file, line); + va_end (argptr); + return retval; } -#endif //Win32 link +#endif // Win32 link #endif //__cplusplus (implicit blah blah) /* scanf etc helper function */ -void scan_args(const char *fmt,va_list argptr,char*fun,char*file,int line) +void + scan_args (const char* fmt, va_list argptr, char* fun, char* file, int line) { - char ** ptr = nullptr; - void * dummy = nullptr; // clear up the unused warning - - for(;*fmt;fmt++) { - if(*fmt!='%') - continue; - switch(*(++fmt)) { - case 'a': /* malloc'd string */ - ptr=(char **)va_arg(argptr,char *); - addChunk(*ptr,strlen(*ptr)+1,fun,file,line); - break; - case '%': - break; - default: /* next arg */ - dummy=va_arg(argptr,void *); - break; - } - } - return; + char** ptr = nullptr; + void* dummy = nullptr; // clear up the unused warning + + for (; *fmt; fmt++) + { + if (*fmt != '%') + continue; + switch (*(++fmt)) + { + case 'a': /* malloc'd string */ + ptr = (char**)va_arg (argptr, char*); + addChunk (*ptr, strlen (*ptr) + 1, fun, file, line); + break; + case '%': + break; + default: /* next arg */ + dummy = va_arg (argptr, void*); + break; + } + } + return; } -void MCD_free(void *p,char*fun,char*file,int line) +void + MCD_free (void* p, char* fun, char* file, int line) { - //quiet the unused argument warnings when MCD_VERBOSE isn't defined + // quiet the unused argument warnings when MCD_VERBOSE isn't defined #ifndef MCD_VERBOSE - void *dummy=&fun; - dummy=&file; - dummy=(char*)&line; -#endif //MCD_VERBOSE - - if(!p) - { + void* dummy = &fun; + dummy = &file; + dummy = (char*)&line; +#endif // MCD_VERBOSE + + if (!p) + { #ifdef MCD_VERBOSE - fprintf(RTL,"** Call to free with NULL argument in %s:%s, line %i\n", - file,fun,line); -#endif //mcd_verbose - /* If they want native free(null), do it */ - /* in case free(null) does something bad, */ - /* rather than let them think it's ok */ - /* Must be enabled, off by default. */ - #ifdef MCD_FREE_NULL - free(p); //native free(null) for same behaviour - #endif - return; //we do nothing - } + fprintf (RTL, "** Call to free with NULL argument in %s:%s, line %i\n", file, fun, line); +#endif // mcd_verbose +/* If they want native free(null), do it */ +/* in case free(null) does something bad, */ +/* rather than let them think it's ok */ +/* Must be enabled, off by default. */ +#ifdef MCD_FREE_NULL + free (p); // native free(null) for same behaviour +#endif + return; // we do nothing + } #ifdef MCD_VERBOSE - fprintf(RTL,"free\t[%X]\t\tfrom %s:%s, line %i\n", - (int)p,file,fun,line); + fprintf (RTL, "free\t[%X]\t\tfrom %s:%s, line %i\n", (int)p, file, fun, line); #endif - //should find id, but that's delchunk's job... + // should find id, but that's delchunk's job... #ifdef MCD_FASTFREE - // if delChunk returns something, it means the pointer is crazy - if(!delChunk(p-sizeof(Chunk)/*,file,fun,line*/)) - free(p-sizeof(Chunk)); - else - { - fprintf(RTL,"** Bad or Foreign pointer [%08X] from %s:%s line %i\n", - (int)p,file,fun,line); - fflush(RTL); - free(p); //assume it's just a foreign pointer (could die) - } -#else //not fastfree - - delChunk(p); //return value here doesn't matter, but 1 is still 'error' - // error behaviour is simply the same (free and see) - free(p); -#endif - return; + // if delChunk returns something, it means the pointer is crazy + if (!delChunk (p - sizeof (Chunk) /*,file,fun,line*/)) + free (p - sizeof (Chunk)); + else + { + fprintf (RTL, "** Bad or Foreign pointer [%08X] from %s:%s line %i\n", (int)p, file, fun, line); + fflush (RTL); + free (p); // assume it's just a foreign pointer (could die) + } +#else // not fastfree + + delChunk (p); // return value here doesn't matter, but 1 is still 'error' + // error behaviour is simply the same (free and see) + free (p); +#endif + return; } -void showMemStats() +void + showMemStats () { - Chunk*c=MemoryChunks; - int total=0; - - FILE * o = nullptr; - - if(MemStatLog) - o=MemStatLog; - else - o=stdout; - - fprintf(o,"Memory Stats:\n"); - while(c) - { - fprintf(o,"%12i bytes allocated in [%s:%s], line %7i [%08X][id ", - c->size,c->function,c->file,c->line,(int)c->ptr+SOC); - phex(o,c->id); - total+=c->size; - c=c->next; - fflush(o); - } - fprintf(o,"\t\tTotal unfreed bytes: %12i\n",total); - //fprintf(o,"\t\tOverflows[approx.]: %12i\n",totalOverflow); - fprintf(o,"\t\tPeak memory usage: %12i\n",maximum); - fprintf(o,"\t\tLargest allocation: %12i\n",largest); - fprintf(o,"\t\tSmallest allocation: %12i\n",smallest); - fprintf(o,"\t\tTotal Allocations: %12i\n",totalAlloc); - fprintf(o,"\t\tTotal Frees: %12i\n",totalFree); - fprintf(o,"\t\tPeak Allocations: %12i\n",peakAlloc); - fprintf(o,"\n"); - return; + Chunk* c = MemoryChunks; + int total = 0; + + FILE* o = nullptr; + + if (MemStatLog) + o = MemStatLog; + else + o = stdout; + + fprintf (o, "Memory Stats:\n"); + while (c) + { + fprintf (o, + "%12i bytes allocated in [%s:%s], line %7i [%08X][id ", + c->size, + c->function, + c->file, + c->line, + (int)c->ptr + SOC); + phex (o, c->id); + total += c->size; + c = c->next; + fflush (o); + } + fprintf (o, "\t\tTotal unfreed bytes: %12i\n", total); + // fprintf(o,"\t\tOverflows[approx.]: %12i\n",totalOverflow); + fprintf (o, "\t\tPeak memory usage: %12i\n", maximum); + fprintf (o, "\t\tLargest allocation: %12i\n", largest); + fprintf (o, "\t\tSmallest allocation: %12i\n", smallest); + fprintf (o, "\t\tTotal Allocations: %12i\n", totalAlloc); + fprintf (o, "\t\tTotal Frees: %12i\n", totalFree); + fprintf (o, "\t\tPeak Allocations: %12i\n", peakAlloc); + fprintf (o, "\n"); + return; } -void _MCD_RealTimeLog(FILE*f) +void + _MCD_RealTimeLog (FILE* f) { - RealTimeLog=f; - return; + RealTimeLog = f; + return; } -void _MCD_MemStatLog(FILE*f) +void + _MCD_MemStatLog (FILE* f) { - MemStatLog=f; - return; + MemStatLog = f; + return; } #ifdef __cplusplus -char *_MCD_LastSetFile,*_MCD_LastSetFun; -int _MCD_LastSetLine; - +char *_MCD_LastSetFile, *_MCD_LastSetFun; +int _MCD_LastSetLine; /*void* operator new (unsigned int size,char *file, char*fun) { - return MCD_malloc(size,file,fun,5); + return MCD_malloc(size,file,fun,5); } void* operator new[] (unsigned int size,char*file, char*fun) { - return MCD_malloc(size,file,fun,5); + return MCD_malloc(size,file,fun,5); }*/ - /*void* operator new (size_t size, char* file, char*fun, int line) { - return MCD_malloc(size, file, fun, line); + return MCD_malloc(size, file, fun, line); } void* operator new[] (size_t size, char* file, char* fun, int line) { - return MCD_malloc(size, file, fun, line); + return MCD_malloc(size, file, fun, line); } void operator delete(void* buffer, char*file, char*fun, int line) { - MCD_free(buffer, file, fun, line); + MCD_free(buffer, file, fun, line); void operator delete[] (void* buffer, char*file, char*fun, int line) { - MCD_free(buffer, file, fun, line); + MCD_free(buffer, file, fun, line); }*/ /* void* operator new (size_t size) { - return MCD_malloc(size, __FILE__, __FUNCTION__, __LINE__); + return MCD_malloc(size, __FILE__, __FUNCTION__, __LINE__); } void* operator new[] (size_t size) { - return MCD_malloc(size, __FILE__, __FUNCTION__, __LINE__); + return MCD_malloc(size, __FILE__, __FUNCTION__, __LINE__); } void operator delete(void* buffer) { - MCD_free(buffer, __FILE__, __FUNCTION__, __LINE__); + MCD_free(buffer, __FILE__, __FUNCTION__, __LINE__); } void operator delete[](void* buffer) { - MCD_free(buffer, __FILE__, __FUNCTION__, __LINE__); + MCD_free(buffer, __FILE__, __FUNCTION__, __LINE__); }*/ #endif // __cplusplus -#endif //_MCD_GONE +#endif //_MCD_GONE diff --git a/source/source_base/mcd.h b/source/source_base/mcd.h index ee53c2a5eeb..36d82026d19 100644 --- a/source/source_base/mcd.h +++ b/source/source_base/mcd.h @@ -9,16 +9,16 @@ * http://prj.softpixel.com/licenses/#bsd */ - #ifndef MCD_H #define MCD_H -#include //for file redirection +#include //for file redirection -#define MCD_VERSION 0x010201 +#define MCD_VERSION 0x010201 #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #ifndef _MCD_GONE @@ -28,258 +28,267 @@ extern "C" { #include #ifdef WIN32 - #define __FUNCTION__ __FILE__ +#define __FUNCTION__ __FILE__ #endif #ifndef MCD_QUIET - #ifndef WIN32 - #warning - Building with memory checking. - #warning expect lower performance. - - #endif +#ifndef WIN32 +#warning - Building with memory checking. +#warning expect lower performance. - +#endif #endif -//warn about redefinitions (sometimes these can be #defined) + // warn about redefinitions (sometimes these can be #defined) #ifdef malloc #undef malloc - #ifndef WIN32 - #warning ------ Redefining malloc() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining malloc() ------ +#endif #endif #ifdef calloc #undef calloc - #ifndef WIN32 - #warning ------ Redefining calloc() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining calloc() ------ +#endif #endif #ifdef realloc #undef realloc - #ifndef WIN32 - #warning ------ Redefining realloc() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining realloc() ------ +#endif #endif #ifdef free #undef free - #ifndef WIN32 - #warning ------ Redefining free() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining free() ------ +#endif #endif #ifdef strdup #undef strdup - #ifndef WIN32 - #warning ------- Redefining strdup() -------- - #endif +#ifndef WIN32 +#warning ------- Redefining strdup() -------- +#endif #endif #ifdef strndup #undef strndup - #ifndef WIN32 - #warning ------- Redefining strndup() ------- - #endif +#ifndef WIN32 +#warning ------- Redefining strndup() ------- +#endif #endif #ifdef asprintf #undef asprintf - #ifndef WIN32 - #warning ------ Redefining asprintf() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining asprintf() ------ +#endif #endif #ifdef vasprintf #undef vasprintf - #ifndef WIN32 - #warning ------ Redefining vasprintf() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining vasprintf() ------ +#endif #endif #ifdef scanf #undef scanf - #ifndef WIN32 - #warning ------ Redefining scanf() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining scanf() ------ +#endif #endif #ifdef fscanf #undef fscanf - #ifndef WIN32 - #warning ------ Redefining fscanf() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining fscanf() ------ +#endif #endif #ifdef sscanf #undef sscanf - #ifndef WIN32 - #warning ------ Redefining sscanf() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining sscanf() ------ +#endif #endif #ifdef getcwd #undef getcwd - #ifndef WIN32 - #warning ------ Redefining getcwd() ------ - #endif +#ifndef WIN32 +#warning ------ Redefining getcwd() ------ +#endif #endif -#define strdup(p) MCD_strdup(p,__FUNCTION__,__FILE__,__LINE__) -#ifndef WIN32 //no strndup in win32 -#define strndup(p,n) MCD_strndup(p,n,__FUNCTION__,__FILE__,__LINE__) +#define strdup(p) MCD_strdup (p, __FUNCTION__, __FILE__, __LINE__) +#ifndef WIN32 // no strndup in win32 +#define strndup(p, n) MCD_strndup (p, n, __FUNCTION__, __FILE__, __LINE__) #endif -#define malloc(size) MCD_malloc(size,__FUNCTION__,__FILE__,__LINE__) -#define calloc(n,s) MCD_calloc(s*n,__FUNCTION__,__FILE__,__LINE__) -#define realloc(p,s) MCD_realloc(p,s,__FUNCTION__,__FILE__,__LINE__) +#define malloc(size) MCD_malloc (size, __FUNCTION__, __FILE__, __LINE__) +#define calloc(n, s) MCD_calloc (s* n, __FUNCTION__, __FILE__, __LINE__) +#define realloc(p, s) MCD_realloc (p, s, __FUNCTION__, __FILE__, __LINE__) #ifdef _GNU_SOURCE -#define asprintf(p,f,args...) MCD_asprintf(p,f,__FUNCTION__,__FILE__,__LINE__, ## args) -#define vasprintf(p,f,ap) MCD_vasprintf(p,f,ap,__FUNCTION__,__FILE__,__LINE__) +#define asprintf(p, f, args...) MCD_asprintf (p, f, __FUNCTION__, __FILE__, __LINE__, ##args) +#define vasprintf(p, f, ap) MCD_vasprintf (p, f, ap, __FUNCTION__, __FILE__, __LINE__) #endif #ifndef WIN32 /* windows doesn't like variable arguments in #define's */ -#ifndef MCD_FASTFREE //fastfree isnt supported here yet -#define scanf(f,args...) MCD_scanf(f,__FUNCTION__,__FILE__,__LINE__, ## args) -#define fscanf(s,f,args...) MCD_fscanf(s,f,__FUNCTION__,__FILE__,__LINE__, ## args) -#define sscanf(s,f,args...) MCD_sscanf(s,f,__FUNCTION__,__FILE__,__LINE__, ## args) -#define getcwd(p,s) MCD_getcwd(p,s,__FUNCTION__,__FILE__,__LINE__) -#endif //fastfree -#endif //win32 - -#define free(p) MCD_free(p,__FUNCTION__,__FILE__,__LINE__) - -#endif // _MCD_CHECK - - -/* !!! These are called by the defines only. Do NOT use directly. !!! */ -void *MCD_malloc(int size, char*, char*, int); -void *MCD_calloc(int size, char*, char*, int); -void *MCD_realloc(void *p, int size, char*, char*, int); -char *MCD_getcwd (char *p, int size, char*, char*, int); +#ifndef MCD_FASTFREE // fastfree isnt supported here yet +#define scanf(f, args...) MCD_scanf (f, __FUNCTION__, __FILE__, __LINE__, ##args) +#define fscanf(s, f, args...) MCD_fscanf (s, f, __FUNCTION__, __FILE__, __LINE__, ##args) +#define sscanf(s, f, args...) MCD_sscanf (s, f, __FUNCTION__, __FILE__, __LINE__, ##args) +#define getcwd(p, s) MCD_getcwd (p, s, __FUNCTION__, __FILE__, __LINE__) +#endif // fastfree +#endif // win32 + +#define free(p) MCD_free (p, __FUNCTION__, __FILE__, __LINE__) + +#endif // _MCD_CHECK + + /* !!! These are called by the defines only. Do NOT use directly. !!! */ + void* MCD_malloc (int size, char*, char*, int); + void* MCD_calloc (int size, char*, char*, int); + void* MCD_realloc (void* p, int size, char*, char*, int); + char* MCD_getcwd (char* p, int size, char*, char*, int); #ifdef __GNUC__ -char *MCD_strdup (const char*s, char*, char*, int); -char *MCD_strndup(const char*s, int n, char*, char*, int); + char* MCD_strdup (const char* s, char*, char*, int); + char* MCD_strndup (const char* s, int n, char*, char*, int); #else -char *MCD_strdup (char*s, char*, char*, int); -char *MCD_strndup(char*s, int n, char*, char*, int); + char* MCD_strdup (char* s, char*, char*, int); + char* MCD_strndup (char* s, int n, char*, char*, int); #endif -int MCD_scanf (const char *fmt, char*fun, char*file, int line,...); -int MCD_fscanf(FILE *stream,const char *fmt,char*fun,char*file,int line,...); -int MCD_sscanf(const char *str,const char *fmt,char*fun,char*file,int line,...); + int MCD_scanf (const char* fmt, char* fun, char* file, int line, ...); + int MCD_fscanf (FILE* stream, const char* fmt, char* fun, char* file, int line, ...); + int MCD_sscanf (const char* str, const char* fmt, char* fun, char* file, int line, ...); // Private MCD function, no need to be in public namespace -//void scan_args(const char*fmt,va_list argptr,char*fun,char*file,int line); +// void scan_args(const char*fmt,va_list argptr,char*fun,char*file,int line); #ifdef _GNU_SOURCE -int MCD_asprintf(char **ptr, const char*fmt,char*,char*,int,...); -int MCD_vasprintf(char **ptr, const char*fmt,va_list argptr,char*,char*,int); -#endif // _GNU_SOURCE + int MCD_asprintf (char** ptr, const char* fmt, char*, char*, int, ...); + int MCD_vasprintf (char** ptr, const char* fmt, va_list argptr, char*, char*, int); +#endif // _GNU_SOURCE -void MCD_free(void *p,char*,char*,int); + void MCD_free (void* p, char*, char*, int); -/* --- call this for memory stats --- */ -void showMemStats(void); + /* --- call this for memory stats --- */ + void showMemStats (void); -/* --- to send realtime stats somewhere other than stderr, - put an opened fp in here --- */ -void _MCD_RealTimeLog(FILE*); + /* --- to send realtime stats somewhere other than stderr, + put an opened fp in here --- */ + void _MCD_RealTimeLog (FILE*); -/* --- to send showMemStats() somewhere other than stdout, - put an opened fp in here --- */ -void _MCD_MemStatLog(FILE*); + /* --- to send showMemStats() somewhere other than stdout, + put an opened fp in here --- */ + void _MCD_MemStatLog (FILE*); -#else //MCD is gone +#else // MCD is gone /* define functions so source will compile without modification */ #define showMemStats() #define _MCD_RealTimeLog(x) #define _MCD_MemStatLog(x) -#endif //_MCD_GONE +#endif //_MCD_GONE #ifdef __cplusplus } /* extern "C" */ #endif -#ifndef _MCD_GONE // check again, outside of extern "C" mode -#ifdef __cplusplus /* Some C++ new/delete operator overloading */ +#ifndef _MCD_GONE // check again, outside of extern "C" mode +#ifdef __cplusplus /* Some C++ new/delete operator overloading */ #ifndef WIN32 - #warning C++ Extentions Enabled +#warning C++ Extentions Enabled #endif #ifdef new - #undef new +#undef new #endif #ifdef delete - #undef delete +#undef delete #endif -extern char *_MCD_LastSetFile,*_MCD_LastSetFun; -extern int _MCD_LastSetLine; +extern char *_MCD_LastSetFile, *_MCD_LastSetFun; +extern int _MCD_LastSetLine; -inline void setFileFunLineState(char*file,char*fun,int line) +inline void + setFileFunLineState (char* file, char* fun, int line) { - _MCD_LastSetLine=line; - _MCD_LastSetFile=file; - _MCD_LastSetFun=fun; + _MCD_LastSetLine = line; + _MCD_LastSetFile = file; + _MCD_LastSetFun = fun; } -inline void* operator new (unsigned int size,char *file, -char*fun,int line) +inline void* + operator new (unsigned int size, char* file, char* fun, int line) { - return MCD_malloc(size,file,fun,line); + return MCD_malloc (size, file, fun, line); } -inline void* operator new[] (unsigned int size,char*file, -char*fun,int line) +inline void* + operator new[] (unsigned int size, char* file, char* fun, int line) { - return MCD_malloc(size,file,fun,line); + return MCD_malloc (size, file, fun, line); } // currently, passing args to delete operator is not working at all... -#ifndef WIN32 //win32 doesn't like default params to delete -inline void operator delete (void * buf,char*file=__FILE__, +#ifndef WIN32 // win32 doesn't like default params to delete +inline void + operator delete (void* buf, + char* file = __FILE__, #ifdef WIN32 -char*fun=__FILE__,int line=__LINE__) + char* fun = __FILE__, + int line = __LINE__) #else -char*fun=__FUNCTION__,int line=__LINE__) + char* fun = __FUNCTION__, + int line = __LINE__) #endif { - MCD_free(buf,_MCD_LastSetFile,_MCD_LastSetFun,_MCD_LastSetLine); + MCD_free (buf, _MCD_LastSetFile, _MCD_LastSetFun, _MCD_LastSetLine); } // ...so we have these here for the day they work, which isn't today. -inline void operator delete[] (void * buf,char*file=__FILE__, +inline void + operator delete[] (void* buf, + char* file = __FILE__, #ifdef WIN32 -char*fun=__FILE__,int line=__LINE__) + char* fun = __FILE__, + int line = __LINE__) #else -char*fun=__FUNCTION__,int line=__LINE__) + char* fun = __FUNCTION__, + int line = __LINE__) #endif { - MCD_free(buf,_MCD_LastSetFile,_MCD_LastSetFun,_MCD_LastSetLine); + MCD_free (buf, _MCD_LastSetFile, _MCD_LastSetFun, _MCD_LastSetLine); } -#endif // win32 default delete params +#endif // win32 default delete params -inline void operator delete (void * buf) +inline void + operator delete (void* buf) { - MCD_free(buf,0,0,0); + MCD_free (buf, 0, 0, 0); } -inline void operator delete[] (void * buf) +inline void + operator delete[] (void* buf) { - MCD_free(buf,0,0,0); + MCD_free (buf, 0, 0, 0); } #ifdef WIN32 -#define new new(__FILE__,__FILE__,__LINE__) +#define new new (__FILE__, __FILE__, __LINE__) #else -#define new new(__FILE__,__FUNCTION__,__LINE__) +#define new new (__FILE__, __FUNCTION__, __LINE__) #endif -//#define delete setFileFunLineState(__FILE__,__FUNCTION__,__LINE__);delete -//#define delete delete(__FILE__,__FUNCTION__,__LINE__) +// #define delete setFileFunLineState(__FILE__,__FUNCTION__,__LINE__);delete +// #define delete delete(__FILE__,__FUNCTION__,__LINE__) #endif // __cplusplus #endif // _MCD_GONE - -#endif // MCD_H +#endif // MCD_H diff --git a/source/source_base/memory.cpp b/source/source_base/memory.cpp index 7d7bd6bcc5a..8be9482b9dd 100644 --- a/source/source_base/memory.cpp +++ b/source/source_base/memory.cpp @@ -14,12 +14,12 @@ namespace ModuleBase // 1024 KB = 1 MB // 1024 MB = 1 GB double Memory::total = 0.0; -int Memory::complex_matrix_memory = 2*sizeof(double); // 16 byte -int Memory::double_memory = sizeof(double); // 8 byte -int Memory::int_memory = sizeof(int); // 4.0 Byte -int Memory::bool_memory = sizeof(bool); // 1.0 Byte -int Memory::float_memory = sizeof(float); // 4.0 Byte -int Memory::short_memory = sizeof(short); // 2.0 Byte +int Memory::complex_matrix_memory = 2 * sizeof (double); // 16 byte +int Memory::double_memory = sizeof (double); // 8 byte +int Memory::int_memory = sizeof (int); // 4.0 Byte +int Memory::bool_memory = sizeof (bool); // 1.0 Byte +int Memory::float_memory = sizeof (float); // 4.0 Byte +int Memory::short_memory = sizeof (short); // 2.0 Byte int Memory::n_memory = 1000; int Memory::n_now = 0; @@ -31,472 +31,453 @@ double Memory::total_gpu = 0.0; int Memory::n_now_gpu = 0; bool Memory::init_flag_gpu = false; -std::string *Memory::name_gpu; -std::string *Memory::class_name_gpu; -double *Memory::consume_gpu; +std::string* Memory::name_gpu; +std::string* Memory::class_name_gpu; +double* Memory::consume_gpu; #endif +std::string* Memory::name; +std::string* Memory::class_name; +double* Memory::consume; -std::string *Memory::name; -std::string *Memory::class_name; -double *Memory::consume; +Memory::Memory () {} -Memory::Memory() -{ -} +Memory::~Memory () {} -Memory::~Memory() +double + Memory::calculate_mem (const long& n_in, const std::string& type) { + double n = static_cast (n_in); + double mem = 0.0; + + double factor = 1.0 / 1024.0 / 1024.0; + double complex_matrix_mem = complex_matrix_memory * factor; + double double_mem = double_memory * factor; + double int_mem = int_memory * factor; + double bool_mem = bool_memory * factor; + double float_mem = float_memory * factor; + double short_mem = short_memory * factor; + + if (type == "ModuleBase::ComplexMatrix" || type == "complexmatrix" || type == "cdouble") + { + mem = complex_matrix_mem; + } + else if (type == "real" || type == "double") + { + mem = double_mem; + } + else if (type == "int") + { + mem = int_mem; + } + else if (type == "bool") + { + mem = bool_mem; + } + else if (type == "short") + { + mem = short_mem; + } + else if (type == "float") + { + mem = float_mem; + } + else if (type == "AtomLink") + { + mem = int_mem * 2 + double_mem * 3; + } + else if (type == "ModuleBase::Vector3") + { + mem = 3 * double_mem; + } + else + { + std::cout << "not this type in memory storage : " << type << std::endl; + } + total += n * mem; + return n * mem; } -double Memory::calculate_mem(const long &n_in,const std::string &type) -{ - double n = static_cast(n_in); - double mem = 0.0; - - double factor = 1.0/1024.0/1024.0; - double complex_matrix_mem = complex_matrix_memory * factor; - double double_mem = double_memory * factor; - double int_mem = int_memory * factor; - double bool_mem = bool_memory * factor; - double float_mem = float_memory * factor; - double short_mem = short_memory * factor; - - if(type=="ModuleBase::ComplexMatrix" || type=="complexmatrix" || type=="cdouble") - { - mem = complex_matrix_mem; - } - else if(type=="real" || type=="double") - { - mem = double_mem; - } - else if(type=="int") - { - mem = int_mem; - } - else if(type=="bool") - { - mem = bool_mem; - } - else if(type=="short") - { - mem = short_mem; - } - else if(type=="float") - { - mem = float_mem; - } - else if(type=="AtomLink") - { - mem = int_mem * 2 + double_mem * 3; - } - else if(type=="ModuleBase::Vector3") - { - mem = 3 * double_mem; - } - else - { - std::cout<<"not this type in memory storage : "<= n_memory) - { - std::cout<<" Error! Too many memories required."; - return 0.0; - } - - consume[find] = Memory::calculate_mem(n_in,type); - - if(consume[find] > 5) - { - print(find); - } - return consume[find]; + if (!Memory::init_flag) + { + name = new std::string[n_memory]; + class_name = new std::string[n_memory]; + consume = new double[n_memory]; + for (int i = 0; i < n_memory; i++) + { + consume[i] = 0.0; + } + Memory::init_flag = true; + } + + int find = 0; + for (find = 0; find < n_now; find++) + { + if (name_in == name[find]) + { + break; + } + } + + // find == n_now : found a new record. + if (find == n_now) + { + n_now++; + name[find] = name_in; + class_name[find] = class_name_in; + } + if (n_now >= n_memory) + { + std::cout << " Error! Too many memories required."; + return 0.0; + } + + consume[find] = Memory::calculate_mem (n_in, type); + + if (consume[find] > 5) + { + print (find); + } + return consume[find]; } -void Memory::record -( - const std::string &name_in, - const long &n_in, - const bool accumulate -) +void + Memory::record (const std::string& name_in, const long& n_in, const bool accumulate) { - if(!Memory::init_flag) - { - name = new std::string[n_memory]; - class_name = new std::string[n_memory]; - consume = new double[n_memory]; - for(int i=0;i= n_memory) - { - std::cout<<" Error! Too many memories has been recorded."; - return; - } - - const double factor = 1.0/1024.0/1024.0; - double size_mb = n_in * factor; - - if(accumulate) - { - consume[find] += size_mb; - Memory::total += size_mb; - } - else - { - if(consume[find] < size_mb) - { - Memory::total += size_mb - consume[find]; - consume[find] = size_mb; - if(consume[find] > 5) - { - print(find); - } - } - } - - return; + if (!Memory::init_flag) + { + name = new std::string[n_memory]; + class_name = new std::string[n_memory]; + consume = new double[n_memory]; + for (int i = 0; i < n_memory; i++) + { + consume[i] = 0.0; + } + Memory::init_flag = true; + } + + int find = 0; + for (find = 0; find < n_now; find++) + { + if (name_in == name[find]) + { + break; + } + } + + // find == n_now : found a new record. + if (find == n_now) + { + n_now++; + name[find] = name_in; + class_name[find] = ""; + } + if (n_now >= n_memory) + { + std::cout << " Error! Too many memories has been recorded."; + return; + } + + const double factor = 1.0 / 1024.0 / 1024.0; + double size_mb = n_in * factor; + + if (accumulate) + { + consume[find] += size_mb; + Memory::total += size_mb; + } + else + { + if (consume[find] < size_mb) + { + Memory::total += size_mb - consume[find]; + consume[find] = size_mb; + if (consume[find] > 5) + { + print (find); + } + } + } + + return; } #if defined(__CUDA) || defined(__ROCM) -double Memory::record_gpu -( - const std::string &class_name_in, - const std::string &name_in, - const long &n_in, - const std::string &type, - const bool accumulate -) +double + Memory::record_gpu (const std::string& class_name_in, + const std::string& name_in, + const long& n_in, + const std::string& type, + const bool accumulate) { - if(!Memory::init_flag_gpu) - { - name_gpu = new std::string[n_memory]; - class_name_gpu = new std::string[n_memory]; - consume_gpu = new double[n_memory]; - for(int i=0;i= n_memory) - { - std::cout<<" Error! Too many gpu memories required."; - return 0.0; - } - - consume_gpu[find] = Memory::calculate_mem(n_in,type); - - if(consume_gpu[find] > 5) - { - print(find); - } - return consume_gpu[find]; + if (!Memory::init_flag_gpu) + { + name_gpu = new std::string[n_memory]; + class_name_gpu = new std::string[n_memory]; + consume_gpu = new double[n_memory]; + for (int i = 0; i < n_memory; i++) + { + consume_gpu[i] = 0.0; + } + Memory::init_flag_gpu = true; + } + + int find = 0; + for (find = 0; find < n_now_gpu; find++) + { + if (name_in == name_gpu[find]) + { + break; + } + } + + // find == n_now : found a new record. + if (find == n_now_gpu) + { + n_now_gpu++; + name_gpu[find] = name_in; + class_name_gpu[find] = class_name_in; + } + if (n_now_gpu >= n_memory) + { + std::cout << " Error! Too many gpu memories required."; + return 0.0; + } + + consume_gpu[find] = Memory::calculate_mem (n_in, type); + + if (consume_gpu[find] > 5) + { + print (find); + } + return consume_gpu[find]; } -void Memory::record_gpu -( - const std::string &name_in, - const size_t &n_in, - const bool accumulate -) +void + Memory::record_gpu (const std::string& name_in, const size_t& n_in, const bool accumulate) { - if(!Memory::init_flag_gpu) - { - name_gpu = new std::string[n_memory]; - class_name_gpu = new std::string[n_memory]; - consume_gpu = new double[n_memory]; - for(int i=0;i= n_memory) - { - std::cout<<" Error! Too many gpu memories has been recorded."; - return; - } - - const double factor = 1.0/1024.0/1024.0; - double size_mb = n_in * factor; - - if(accumulate) - { - consume_gpu[find] += size_mb; - Memory::total_gpu += size_mb; - } - else - { - if(consume_gpu[find] < size_mb) - { - Memory::total_gpu += size_mb - consume_gpu[find]; - consume_gpu[find] = size_mb; - if(consume_gpu[find] > 5) - { - print(find); - } - } - } - return; + if (!Memory::init_flag_gpu) + { + name_gpu = new std::string[n_memory]; + class_name_gpu = new std::string[n_memory]; + consume_gpu = new double[n_memory]; + for (int i = 0; i < n_memory; i++) + { + consume_gpu[i] = 0.0; + } + Memory::init_flag_gpu = true; + } + + int find = 0; + for (find = 0; find < n_now_gpu; find++) + { + if (name_in == name_gpu[find]) + { + break; + } + } + + // find == n_now : found a new record. + if (find == n_now_gpu) + { + n_now_gpu++; + name_gpu[find] = name_in; + class_name_gpu[find] = ""; + } + if (n_now_gpu >= n_memory) + { + std::cout << " Error! Too many gpu memories has been recorded."; + return; + } + + const double factor = 1.0 / 1024.0 / 1024.0; + double size_mb = n_in * factor; + + if (accumulate) + { + consume_gpu[find] += size_mb; + Memory::total_gpu += size_mb; + } + else + { + if (consume_gpu[find] < size_mb) + { + Memory::total_gpu += size_mb - consume_gpu[find]; + consume_gpu[find] = size_mb; + if (consume_gpu[find] > 5) + { + print (find); + } + } + } + return; } #endif -void Memory::print(const int find) +void + Memory::print (const int find) { - GlobalV::ofs_running <<"\n Warning_Memory_Consuming allocated: " - <<" "<0); + ofs << std::setw (30) << "total" << std::setw (15) << std::setprecision (4) << Memory::total << std::endl; - bool *print_flag = new bool[n_memory]; + assert (n_memory > 0); - for(int i=0; i0); - - bool *print_flag_gpu = new bool[n_memory]; - - for(int i=0; i 0); + + bool* print_flag_gpu = new bool[n_memory]; + + for (int i = 0; i < n_memory; i++) + { + print_flag_gpu[i] = false; + } + + for (int i = 0; i < n_memory; i++) + { #ifdef __MPI - Parallel_Reduce::reduce_all(consume_gpu[i]); + Parallel_Reduce::reduce_all (consume_gpu[i]); #endif - } - - for (int i=0; i 1 MB) in a file * * @param ofs The output file stream for print out memory records */ - static void print_all(std::ofstream &ofs); + static void print_all (std::ofstream& ofs); - static void print(const int find_in); + static void print (const int find_in); /** * @brief Calculate memory requirements for various @@ -106,29 +99,29 @@ class Memory * @param type The type of data * @return double */ - static double calculate_mem(const long &n, const std::string &type); + static double calculate_mem (const long& n, const std::string& type); private: static double total; - static std::string *name; - static std::string *class_name; - static double *consume; + static std::string* name; + static std::string* class_name; + static double* consume; static int n_memory; static int n_now; static bool init_flag; #if defined(__CUDA) || defined(__ROCM) static double total_gpu; - static std::string *name_gpu; - static std::string *class_name_gpu; - static double *consume_gpu; + static std::string* name_gpu; + static std::string* class_name_gpu; + static double* consume_gpu; static int n_now_gpu; static bool init_flag_gpu; #endif static int complex_matrix_memory; //(16 Byte) - static int double_memory; //(8 Byte) - static int int_memory; //(4 Byte) + static int double_memory; //(8 Byte) + static int int_memory; //(4 Byte) static int bool_memory; static int short_memory; //(2 Byte) static int float_memory; //(4 Byte) diff --git a/source/source_base/module_container/ATen/core/tensor.cpp b/source/source_base/module_container/ATen/core/tensor.cpp index 92babb361c9..936d540524f 100644 --- a/source/source_base/module_container/ATen/core/tensor.cpp +++ b/source/source_base/module_container/ATen/core/tensor.cpp @@ -5,44 +5,44 @@ #if defined(__CUDA) || defined(__ROCM) #include #endif // __CUDA || __ROCM -namespace container { +namespace container +{ -Tensor::Tensor() : Tensor(DataType::DT_FLOAT) {} +Tensor::Tensor () : Tensor (DataType::DT_FLOAT) {} -Tensor::Tensor(DataType data_type) : Tensor(data_type, TensorShape({1})) {} +Tensor::Tensor (DataType data_type) : Tensor (data_type, TensorShape ({1})) {} // Constructor that creates a tensor with the given data type and shape using the default allocator. -Tensor::Tensor(DataType data_type, const TensorShape& shape) - : Tensor(GetAllocator(DeviceType::CpuDevice), data_type, DeviceType::CpuDevice, shape) {} +Tensor::Tensor (DataType data_type, const TensorShape& shape) + : Tensor (GetAllocator (DeviceType::CpuDevice), data_type, DeviceType::CpuDevice, shape) +{ +} // Construct a new Tensor object with the given data type and shape. -Tensor::Tensor(DataType data_type, DeviceType device, const TensorShape& shape) - : Tensor(GetAllocator(device), data_type, device, shape) {} - -Tensor::Tensor(base::core::Allocator* a, DataType data_type, DeviceType device, const TensorShape& shape) - : data_type_(data_type), - device_(device), - shape_(shape), - buffer_(new TensorBuffer(a, shape_.NumElements() * SizeOfType(data_type_))) {} +Tensor::Tensor (DataType data_type, DeviceType device, const TensorShape& shape) + : Tensor (GetAllocator (device), data_type, device, shape) +{ +} -// Construct a new Tensor object by copying another Tensor. -Tensor::Tensor(const Tensor& other) - : data_type_(other.data_type_), - shape_(other.shape_), - device_(other.device_), - buffer_(new TensorBuffer(GetAllocator(device_), shape_.NumElements() * SizeOfType(data_type_))) +Tensor::Tensor (base::core::Allocator* a, DataType data_type, DeviceType device, const TensorShape& shape) + : data_type_ (data_type), device_ (device), shape_ (shape), + buffer_ (new TensorBuffer (a, shape_.NumElements () * SizeOfType (data_type_))) { - TEMPLATE_ALL_2(data_type_, device_, - kernels::synchronize_memory()( - this->data(), other.data(), this->NumElements())) } -// Construct a new Tensor object by moving another Tensor. -Tensor::Tensor(Tensor&& other) noexcept - : data_type_(other.data_type_), - device_(other.device_), - shape_(other.shape_), - buffer_(other.buffer_) +// Construct a new Tensor object by copying another Tensor. +Tensor::Tensor (const Tensor& other) + : data_type_ (other.data_type_), shape_ (other.shape_), device_ (other.device_), + buffer_ (new TensorBuffer (GetAllocator (device_), shape_.NumElements () * SizeOfType (data_type_))){ + TEMPLATE_ALL_2 (data_type_, + device_, + kernels::synchronize_memory () (this->data (), + other.data (), + this->NumElements ()))} + + // Construct a new Tensor object by moving another Tensor. + Tensor::Tensor (Tensor && other) noexcept + : data_type_ (other.data_type_), device_ (other.device_), shape_ (other.shape_), buffer_ (other.buffer_) { // Reset the other object. other.buffer_ = nullptr; @@ -52,358 +52,488 @@ Tensor::Tensor(Tensor&& other) noexcept // Note: If you have a class with virtual functions, it requires a virtual destructor. // However, Our subclass TensorMap, etc., do not own resources. // So, we do not need to declare a virtual destructor here. -Tensor::~Tensor() { - if (buffer_) { buffer_->unref(); -} +Tensor::~Tensor () +{ + if (buffer_) + { + buffer_->unref (); + } } // Get the data type of the tensor. -DataType Tensor::data_type() const { return data_type_; } +DataType + Tensor::data_type () const +{ + return data_type_; +} // Get the device type of the tensor. -DeviceType Tensor::device_type() const { return device_; } +DeviceType + Tensor::device_type () const +{ + return device_; +} // Get the shape of the tensor. -const TensorShape& Tensor::shape() const { return shape_; } +const TensorShape& + Tensor::shape () const +{ + return shape_; +} // Get the total number of elements in the tensor. -int64_t Tensor::NumElements() const { return shape_.NumElements(); } +int64_t + Tensor::NumElements () const +{ + return shape_.NumElements (); +} // Get a pointer to the data buffer of the tensor. -void* Tensor::data() const { return buffer_->data(); } +void* + Tensor::data () const +{ + return buffer_->data (); +} // Get the TensorBuffer object that holds the data of the tensor. -const TensorBuffer& Tensor::buffer() const { return *buffer_; } +const TensorBuffer& + Tensor::buffer () const +{ + return *buffer_; +} // Get the Allocator object according to the given device type. -base::core::Allocator* Tensor::GetAllocator(DeviceType device) { - base::core::Allocator * allocator; - if (device == DeviceType::CpuDevice) { - allocator = new base::core::CPUAllocator(); - } +base::core::Allocator* + Tensor::GetAllocator (DeviceType device) +{ + base::core::Allocator* allocator; + if (device == DeviceType::CpuDevice) + { + allocator = new base::core::CPUAllocator (); + } #if defined(__CUDA) || defined(__ROCM) - else if (device == DeviceType::GpuDevice) { - allocator = new base::core::GPUAllocator(); - } + else if (device == DeviceType::GpuDevice) + { + allocator = new base::core::GPUAllocator (); + } #endif // __CUDA || __ROCM - else { - std::cerr << "Tensor device type " << device << " does not match requested type." << std::endl; - exit(EXIT_FAILURE); - } + else + { + std::cerr << "Tensor device type " << device << " does not match requested type." << std::endl; + exit (EXIT_FAILURE); + } return allocator; } // Set the tensor to zero -void Tensor::zero() { - TEMPLATE_ALL_2(this->data_type_, this->device_, - kernels::set_memory()(this->data(), 0, this->NumElements())) +void + Tensor::zero () +{ + TEMPLATE_ALL_2 (this->data_type_, + this->device_, + kernels::set_memory () (this->data (), 0, this->NumElements ())) } // Reshape the current tensor -void Tensor::reshape(TensorShape shape) { +void + Tensor::reshape (TensorShape shape) +{ // check the -1 dimension int64_t num = 1; int auto_shape = 0, dim_count = -1, dim_idx = -1; - for (auto dim : shape.dims()) { - dim_count++; - if (dim < 1 && dim != -1) { - throw std::invalid_argument("Invalid shape, dim of tensor must >= 1 or equal to -1(auto shape)."); + for (auto dim: shape.dims ()) + { + dim_count++; + if (dim < 1 && dim != -1) + { + throw std::invalid_argument ("Invalid shape, dim of tensor must >= 1 or equal to -1(auto shape)."); + } + if (dim == -1) + { + auto_shape++; + dim_idx = dim_count; + } + num *= dim; } - if (dim == -1) { - auto_shape++; - dim_idx = dim_count; - } - num *= dim; - } // more than one -1 dimension. - if (auto_shape > 1) { - throw std::invalid_argument("Invalid shape, there can be only one -1 dim in TensorShape object."); - } + if (auto_shape > 1) + { + throw std::invalid_argument ("Invalid shape, there can be only one -1 dim in TensorShape object."); + } // auto reshape - if (auto_shape == 1) { - int dim_ = static_cast(this->NumElements() / (-num)); - if (dim_ < 1 || -dim_ * num != this->NumElements()) { - throw std::invalid_argument("Invalid shape, total number of elements does not match!"); - } - shape.set_dim_size(dim_idx, dim_); - } - else { - if (num != this->NumElements()) { - throw std::invalid_argument("Invalid shape, total number of elements does not match!"); - } - } + if (auto_shape == 1) + { + int dim_ = static_cast (this->NumElements () / (-num)); + if (dim_ < 1 || -dim_ * num != this->NumElements ()) + { + throw std::invalid_argument ("Invalid shape, total number of elements does not match!"); + } + shape.set_dim_size (dim_idx, dim_); + } + else + { + if (num != this->NumElements ()) + { + throw std::invalid_argument ("Invalid shape, total number of elements does not match!"); + } + } this->shape_ = shape; } -Tensor Tensor::shaped(const TensorShape& shape) const { +Tensor + Tensor::shaped (const TensorShape& shape) const +{ Tensor output; - REQUIRES_OK(output.CopyFrom(*this, this->shape()), "Invalid shaped operation.") - output.reshape(shape); - return std::move(output); + REQUIRES_OK (output.CopyFrom (*this, this->shape ()), "Invalid shaped operation.") + output.reshape (shape); + return std::move (output); } // Slice the current tensor object. -Tensor Tensor::slice(const std::vector &start, const std::vector &size) const { +Tensor + Tensor::slice (const std::vector& start, const std::vector& size) const +{ // check the ndim of input shape - if (start.size() > 3 || size.size() > 3) { - throw std::invalid_argument("TensorSlice: The slice method only supports tensor ranks that are less than or equal to 2."); - } + if (start.size () > 3 || size.size () > 3) + { + throw std::invalid_argument ( + "TensorSlice: The slice method only supports tensor ranks that are less than or equal to 2."); + } // check the dimension size - if (start.size() != shape_.ndim() || size.size() != shape_.ndim()) { - throw std::invalid_argument("TensorSlice: start and size vectors must have same length as number of dimensions"); - } + if (start.size () != shape_.ndim () || size.size () != shape_.ndim ()) + { + throw std::invalid_argument ( + "TensorSlice: start and size vectors must have same length as number of dimensions"); + } // check the boundary - for (int i = 0; i < start.size(); i++) { - if (start[i] < 0 || start[i] >= shape_.dim_size(i)) { - throw std::invalid_argument("TensorSlice: start index is out of bounds"); - } - if (size[i] < 0 || start[i] + size[i] > shape_.dim_size(i)) { - throw std::invalid_argument("TensorSlice: size is out of bounds"); + for (int i = 0; i < start.size (); i++) + { + if (start[i] < 0 || start[i] >= shape_.dim_size (i)) + { + throw std::invalid_argument ("TensorSlice: start index is out of bounds"); + } + if (size[i] < 0 || start[i] + size[i] > shape_.dim_size (i)) + { + throw std::invalid_argument ("TensorSlice: size is out of bounds"); + } } - } // set the output shape of the current tensor. TensorShape output_shape = shape_; - for (int i = 0; i < start.size(); i++) { - output_shape.set_dim_size(i, size[i]); - } - Tensor output(this->data_type_, this->device_, output_shape); + for (int i = 0; i < start.size (); i++) + { + output_shape.set_dim_size (i, size[i]); + } + Tensor output (this->data_type_, this->device_, output_shape); // TODO: implement the data copy. // copy the data from the input tensor to the output tensor - unsigned int ndim = shape_.ndim(); - if (ndim == 1) { - TEMPLATE_ALL_2(this->data_type_, this->device_, - kernels::synchronize_memory()( - output.data(), this->data() + start[0], size[0])) - } - else if (ndim == 2) { - for (int i = 0; i < size[0]; i++) { - int offset = static_cast((start[0] + i) * shape_.dim_size(1) + start[1]); - int offset_out = i * size[1]; - TEMPLATE_ALL_2(this->data_type_, this->device_, - kernels::synchronize_memory()( - output.data() + offset_out, this->data() + offset, size[1])) - } - } - else if (ndim == 3) { - for (int i = 0; i < size[0]; i++) { - for (int j = 0; j < size[1]; j++) { - int offset = static_cast((i + start[0]) * shape_.dim_size(1) * shape_.dim_size(2) + - (j + start[1]) * shape_.dim_size(2) + start[2]); - int offset_out = i * size[1] * size[2] + j * size[2]; - TEMPLATE_ALL_2(this->data_type_, this->device_, - kernels::synchronize_memory()( - output.data() + offset_out, this->data() + offset, size[1])) - } + unsigned int ndim = shape_.ndim (); + if (ndim == 1) + { + TEMPLATE_ALL_2 (this->data_type_, + this->device_, + kernels::synchronize_memory () (output.data (), + this->data () + start[0], + size[0])) + } + else if (ndim == 2) + { + for (int i = 0; i < size[0]; i++) + { + int offset = static_cast ((start[0] + i) * shape_.dim_size (1) + start[1]); + int offset_out = i * size[1]; + TEMPLATE_ALL_2 ( + this->data_type_, + this->device_, + kernels::synchronize_memory () (output.data () + offset_out, + this->data () + offset, + size[1])) + } } - } - return std::move(output); + else if (ndim == 3) + { + for (int i = 0; i < size[0]; i++) + { + for (int j = 0; j < size[1]; j++) + { + int offset = static_cast ((i + start[0]) * shape_.dim_size (1) * shape_.dim_size (2) + + (j + start[1]) * shape_.dim_size (2) + start[2]); + int offset_out = i * size[1] * size[2] + j * size[2]; + TEMPLATE_ALL_2 ( + this->data_type_, + this->device_, + kernels::synchronize_memory () (output.data () + offset_out, + this->data () + offset, + size[1])) + } + } + } + return std::move (output); } // Resize tensor object with the given tensor_shape -void Tensor::resize(const TensorShape& new_shape) { - if (shape_ == new_shape) { - return; - } - REQUIRES_OK(buffer_->OwnsMemory() || this->NumElements() == 0, - "Cannot resize a tensor that mapped from a given data buffer") - if (buffer_ && buffer_->GetAllocatedBytes() < new_shape.NumElements() * SizeOfType(data_type_)) { - buffer_->unref(); - this->buffer_ = new TensorBuffer(GetAllocator(device_), new_shape.NumElements() * SizeOfType(data_type_)); - } +void + Tensor::resize (const TensorShape& new_shape) +{ + if (shape_ == new_shape) + { + return; + } + REQUIRES_OK (buffer_->OwnsMemory () || this->NumElements () == 0, + "Cannot resize a tensor that mapped from a given data buffer") + if (buffer_ && buffer_->GetAllocatedBytes () < new_shape.NumElements () * SizeOfType (data_type_)) + { + buffer_->unref (); + this->buffer_ + = new TensorBuffer (GetAllocator (device_), new_shape.NumElements () * SizeOfType (data_type_)); + } shape_ = new_shape; } -Tensor& Tensor::operator=(const Tensor& other) { - if (this == &other) { - return *this; - } +Tensor& + Tensor::operator= (const Tensor& other) +{ + if (this == &other) + { + return *this; + } this->device_ = other.device_; this->data_type_ = other.data_type_; this->shape_ = other.shape_; - if (buffer_) { buffer_->unref(); -} + if (buffer_) + { + buffer_->unref (); + } - this->buffer_ = new TensorBuffer(GetAllocator(device_), shape_.NumElements() * SizeOfType(data_type_)); + this->buffer_ = new TensorBuffer (GetAllocator (device_), shape_.NumElements () * SizeOfType (data_type_)); - TEMPLATE_ALL_2(this->data_type_, this->device_, - kernels::synchronize_memory()( - this->data(), other.data(), this->NumElements())) + TEMPLATE_ALL_2 (this->data_type_, + this->device_, + kernels::synchronize_memory () (this->data (), + other.data (), + this->NumElements ())) return *this; } -Tensor& Tensor::operator=(Tensor&& other) noexcept { - if (this == &other) { - return *this; - } +Tensor& + Tensor::operator= (Tensor&& other) noexcept +{ + if (this == &other) + { + return *this; + } this->device_ = other.device_; this->data_type_ = other.data_type_; this->shape_ = other.shape_; - - if (buffer_) { buffer_->unref(); // Release current resource -} + + if (buffer_) + { + buffer_->unref (); // Release current resource + } this->buffer_ = other.buffer_; - other.buffer_ = nullptr; // Reset the other TensorBuffer. + other.buffer_ = nullptr; // Reset the other TensorBuffer. return *this; } -bool Tensor::operator==(const Tensor& other) const { - if (this->data_type_ != other.data_type_ || - this->device_ != other.device_ || - this->shape_ != other.shape_) - { - return false; - } +bool + Tensor::operator== (const Tensor& other) const +{ + if (this->data_type_ != other.data_type_ || this->device_ != other.device_ || this->shape_ != other.shape_) + { + return false; + } bool result = false; - if (this->device_ != DeviceType::CpuDevice) { - Tensor tmpA = this->to_device(); - Tensor tmpB = other.to_device(); - TEMPLATE_ALL_2(tmpA.data_type(), tmpA.device_type(), - result = std::equal(tmpA.data(), tmpA.data() + tmpA.NumElements(), tmpB.data(), element_compare::type)>)) - return result; - } - TEMPLATE_ALL_2(this->data_type_, this->device_, - result = std::equal(this->data(), this->data() + this->NumElements(), other.data(), element_compare::type)>)) + if (this->device_ != DeviceType::CpuDevice) + { + Tensor tmpA = this->to_device (); + Tensor tmpB = other.to_device (); + TEMPLATE_ALL_2 (tmpA.data_type (), + tmpA.device_type (), + result = std::equal (tmpA.data (), + tmpA.data () + tmpA.NumElements (), + tmpB.data (), + element_compare::type)>)) + return result; + } + TEMPLATE_ALL_2 (this->data_type_, + this->device_, + result = std::equal (this->data (), + this->data () + this->NumElements (), + other.data (), + element_compare::type)>)) return result; } -bool Tensor::CopyFrom(const Tensor& other) { - CopyFromInternal(other, other.shape()); +bool + Tensor::CopyFrom (const Tensor& other) +{ + CopyFromInternal (other, other.shape ()); return true; } -bool Tensor::CopyFrom(const Tensor& other, const TensorShape& shape) { - if (other.NumElements() == shape.NumElements()) { - CopyFromInternal(other, shape); - return true; - } +bool + Tensor::CopyFrom (const Tensor& other, const TensorShape& shape) +{ + if (other.NumElements () == shape.NumElements ()) + { + CopyFromInternal (other, shape); + return true; + } return false; } -bool Tensor::AllocateFrom(const Tensor& other, const TensorShape& shape) { +bool + Tensor::AllocateFrom (const Tensor& other, const TensorShape& shape) +{ data_type_ = other.data_type_; device_ = other.device_; shape_ = shape; - if (buffer_) { buffer_->unref(); -} - buffer_ = new TensorBuffer(GetAllocator(device_), shape_.NumElements() * SizeOfType(data_type_)); + if (buffer_) + { + buffer_->unref (); + } + buffer_ = new TensorBuffer (GetAllocator (device_), shape_.NumElements () * SizeOfType (data_type_)); return true; } -void Tensor::sync(const Tensor& rhs) { - REQUIRES_OK(this->data_type_ == rhs.data_type_ - && this->device_ == rhs.device_) - - if (this->shape_ == rhs.shape_) { - TEMPLATE_ALL_2(data_type_, device_, - kernels::synchronize_memory()( - this->data(), rhs.data(), this->NumElements())) - } - else { - TEMPLATE_ALL_2(data_type_, device_, - kernels::synchronize_memory_stride()( - this->data(), rhs.data(), this->shape().dims(), rhs.shape().dims())) - } +void + Tensor::sync (const Tensor& rhs) +{ + REQUIRES_OK (this->data_type_ == rhs.data_type_ && this->device_ == rhs.device_) + + if (this->shape_ == rhs.shape_) + { + TEMPLATE_ALL_2 (data_type_, + device_, + kernels::synchronize_memory () (this->data (), + rhs.data (), + this->NumElements ())) + } + else + { + TEMPLATE_ALL_2 (data_type_, + device_, + kernels::synchronize_memory_stride () (this->data (), + rhs.data (), + this->shape ().dims (), + rhs.shape ().dims ())) + } } -Tensor Tensor::operator[](const int& index) const { - REQUIRES_OK( - index >= 0 && index < shape_.dim_size(0), - "Tensor index is out of bounds.") +Tensor + Tensor::operator[] (const int& index) const +{ + REQUIRES_OK (index >= 0 && index < shape_.dim_size (0), "Tensor index is out of bounds.") TensorShape output_shape = this->shape_; - output_shape.remove_dim(0); - if (output_shape.ndim() == 0) { - // If the output shape is empty, we need to add a dimension of size 1 - output_shape.add_dim(1); - } - auto data_ = reinterpret_cast(this->data()) + index * shape_.strides()[0] * SizeOfType(this->data_type_); - - return TensorMap(data_, this->data_type_, this->device_, output_shape); + output_shape.remove_dim (0); + if (output_shape.ndim () == 0) + { + // If the output shape is empty, we need to add a dimension of size 1 + output_shape.add_dim (1); + } + auto data_ = reinterpret_cast (this->data ()) + index * shape_.strides ()[0] * SizeOfType (this->data_type_); + + return TensorMap (data_, this->data_type_, this->device_, output_shape); } // Overloaded operator<< for the Tensor class. -std::ostream& operator<<(std::ostream& os, const Tensor& tensor) { - std::ios::fmtflags flag(os.flags()); - std::streamsize precision = os.precision(); // save the current precision - const int64_t num_elements = tensor.NumElements(); - const DataType data_type = tensor.data_type(); - const DeviceType device_type = tensor.device_type(); - const TensorShape& shape = tensor.shape(); +std::ostream& + operator<< (std::ostream& os, const Tensor& tensor) +{ + std::ios::fmtflags flag (os.flags ()); + std::streamsize precision = os.precision (); // save the current precision + const int64_t num_elements = tensor.NumElements (); + const DataType data_type = tensor.data_type (); + const DeviceType device_type = tensor.device_type (); + const TensorShape& shape = tensor.shape (); // Copy the data from device to host for output - auto * data_ = tensor.data(); + auto* data_ = tensor.data (); #if __CUDA || __ROCM - if (device_type != DeviceType::CpuDevice) { - data_ = malloc(num_elements * Tensor::SizeOfType(data_type)); - // Copy data to a specified device - TEMPLATE_ALL_2(data_type, device_type, - kernels::synchronize_memory()( - reinterpret_cast(data_), tensor.data(), num_elements)) - } + if (device_type != DeviceType::CpuDevice) + { + data_ = malloc (num_elements * Tensor::SizeOfType (data_type)); + // Copy data to a specified device + TEMPLATE_ALL_2 (data_type, + device_type, + kernels::synchronize_memory () (reinterpret_cast (data_), + tensor.data (), + num_elements)) + } #endif os << "Tensor("; os << "shape=["; - for (int i = 0; i < shape.ndim(); ++i) { - os << shape.dim_size(i); - if (i < shape.ndim() - 1) { - os << ","; + for (int i = 0; i < shape.ndim (); ++i) + { + os << shape.dim_size (i); + if (i < shape.ndim () - 1) + { + os << ","; + } } - } os << "], data_type=" << data_type; os << ", device_type=" << device_type; - os << ", owns_memory=" << tensor.buffer().OwnsMemory(); + os << ", owns_memory=" << tensor.buffer ().OwnsMemory (); os << ", buffer=\narray("; - switch (data_type) { - case DataType::DT_FLOAT: { - const auto* data = static_cast(data_); - _internal_output(os, data, shape, num_elements); - break; - } - case DataType::DT_DOUBLE: { - const auto* data = static_cast(data_); - _internal_output(os, data, shape, num_elements); - break; - } - case DataType::DT_INT: { - const auto* data = static_cast(data_); - _internal_output(os, data, shape, num_elements); - break; - } - case DataType::DT_INT64: { - const auto* data = static_cast(data_); - _internal_output(os, data, shape, num_elements); - break; - } - case DataType::DT_COMPLEX: { - const auto* data = static_cast*>(data_); - _internal_output(os, data, shape, num_elements); - break; - } - case DataType::DT_COMPLEX_DOUBLE: { - const auto* data = static_cast*>(data_); - _internal_output(os, data, shape, num_elements); - break; - } + switch (data_type) + { + case DataType::DT_FLOAT: + { + const auto* data = static_cast (data_); + _internal_output (os, data, shape, num_elements); + break; + } + case DataType::DT_DOUBLE: + { + const auto* data = static_cast (data_); + _internal_output (os, data, shape, num_elements); + break; + } + case DataType::DT_INT: + { + const auto* data = static_cast (data_); + _internal_output (os, data, shape, num_elements); + break; + } + case DataType::DT_INT64: + { + const auto* data = static_cast (data_); + _internal_output (os, data, shape, num_elements); + break; + } + case DataType::DT_COMPLEX: + { + const auto* data = static_cast*> (data_); + _internal_output (os, data, shape, num_elements); + break; + } + case DataType::DT_COMPLEX_DOUBLE: + { + const auto* data = static_cast*> (data_); + _internal_output (os, data, shape, num_elements); + break; + } default: os << "unknown"; break; - } + } os << "))\n"; #if __CUDA || __ROCM // delete the temporary data - if (device_type != DeviceType::CpuDevice) { - free(data_); - } + if (device_type != DeviceType::CpuDevice) + { + free (data_); + } #endif // restore the os settings - os.flags(flag); - os.precision(precision); // restore the precision + os.flags (flag); + os.precision (precision); // restore the precision return os; } diff --git a/source/source_base/module_container/ATen/core/tensor.h b/source/source_base/module_container/ATen/core/tensor.h index 3faf820dbd7..b574881f52b 100644 --- a/source/source_base/module_container/ATen/core/tensor.h +++ b/source/source_base/module_container/ATen/core/tensor.h @@ -11,14 +11,15 @@ #include -// TODO: +// TODO: // 1. add log system // 2. add exception system // 3. refact cmake system, use cmake parant scope to construct the libraries namespace ct = container; -namespace container { +namespace container +{ /** * @brief A multi-dimensional array of elements of a single data type. @@ -29,27 +30,27 @@ namespace container { * This class is not thread-safe and should not be accessed by multiple threads * concurrently. */ -class Tensor { +class Tensor +{ public: - /** * @brief Creates a 1-dimentional, 0-element float tensor. - * + * * This constructor creates a new Tensor object. It can be used to initialize a tensor with * default values or to create an empty tensor. */ - Tensor(); + Tensor (); /** * @brief Explicit constructor for the Tensor class. - * + * * This constructor creates a new Tensor object with the specified data type. * The constructor is marked as explicit, which means it requires an explicit * call and cannot be used for implicit type conversions. - * + * * @param data_type The data type of the tensor elements. */ - explicit Tensor(DataType data_type); + explicit Tensor (DataType data_type); /** * @brief Constructor that creates a tensor with the given data type and shape using the default allocator. @@ -57,7 +58,7 @@ class Tensor { * @param data_type The data type of the tensor. * @param shape The shape of the tensor. */ - Tensor(DataType data_type, const TensorShape& shape); + Tensor (DataType data_type, const TensorShape& shape); /** * @brief Construct a new Tensor object with the given data type, shape and device type. @@ -68,9 +69,9 @@ class Tensor { * @param shape The shape of the tensor. * @param device The data type of the tensor. */ - Tensor(DataType data_type, DeviceType device, const TensorShape& shape); + Tensor (DataType data_type, DeviceType device, const TensorShape& shape); - Tensor(base::core::Allocator* a, DataType data_type, DeviceType device, const TensorShape& shape); + Tensor (base::core::Allocator* a, DataType data_type, DeviceType device, const TensorShape& shape); /** * @brief Construct a new Tensor object by copying another Tensor. @@ -79,7 +80,7 @@ class Tensor { * * @param other The tensor to copy from. */ - Tensor(const Tensor& other); + Tensor (const Tensor& other); /** * @brief Move constructor for the Tensor class. @@ -90,9 +91,9 @@ class Tensor { * * @param other The rvalue reference to the source Tensor object to be moved. */ - Tensor(Tensor&& other) noexcept; - - ~Tensor(); + Tensor (Tensor&& other) noexcept; + + ~Tensor (); /** * @brief Constructor for the Tensor class using an initializer list of values. * @@ -104,48 +105,49 @@ class Tensor { * @param values The initializer list containing the values to populate the Tensor with. * @param device The device type where the Tensor will be allocated (default is CPU). */ - template - Tensor(std::initializer_list values, DeviceType device = DeviceType::CpuDevice) : - Tensor(DataTypeToEnum::value, device, TensorShape({static_cast(values.size())})) { - TEMPLATE_ALL_2(this->data_type_, this->device_, - kernels::synchronize_memory()( - this->data(), values.begin(), this->NumElements())) - } - - /** - * @brief Get the data type of the tensor. - * - * @return The data type of the tensor. - */ - DataType data_type() const; + template + Tensor (std::initializer_list values, DeviceType device = DeviceType::CpuDevice) + : Tensor (DataTypeToEnum::value, device, TensorShape ({static_cast (values.size ())})){ + TEMPLATE_ALL_2 (this->data_type_, + this->device_, + kernels::synchronize_memory () (this->data (), + values.begin (), + this->NumElements ()))} + + /** + * @brief Get the data type of the tensor. + * + * @return The data type of the tensor. + */ + DataType data_type () const; /** * @brief Get the data type of the tensor. * * @return The data type of the tensor. */ - DeviceType device_type() const; + DeviceType device_type () const; /** * @brief Get the shape of the tensor. * * @return The shape of the tensor. */ - const TensorShape& shape() const; + const TensorShape& shape () const; /** * @brief Get the total number of elements in the tensor. * * @return The total number of elements in the tensor. */ - int64_t NumElements() const; + int64_t NumElements () const; /** * @brief Get a pointer to the data buffer of the tensor. * * @return A void pointer to the data buffer of the tensor. */ - void* data() const; + void* data () const; /** * @brief Get a typed pointer to the data buffer of the tensor. @@ -161,18 +163,20 @@ class Tensor { * of the tensor. If the tensor is empty, the behavior is undefined. */ template - T* data() const { - if ((std::is_same::value && data_type_ != DataType::DT_FLOAT) || - (std::is_same::value && data_type_ != DataType::DT_INT) || - (std::is_same::value && data_type_ != DataType::DT_INT64) || - (std::is_same::value && data_type_ != DataType::DT_DOUBLE) || - (std::is_same>::value && data_type_ != DataType::DT_COMPLEX) || - (std::is_same>::value && data_type_ != DataType::DT_COMPLEX_DOUBLE)) - { - std::cerr << "Tensor data type does not match requested type." << std::endl; - exit(EXIT_FAILURE); - } - return buffer_->base(); + T* + data () const + { + if ((std::is_same::value && data_type_ != DataType::DT_FLOAT) + || (std::is_same::value && data_type_ != DataType::DT_INT) + || (std::is_same::value && data_type_ != DataType::DT_INT64) + || (std::is_same::value && data_type_ != DataType::DT_DOUBLE) + || (std::is_same>::value && data_type_ != DataType::DT_COMPLEX) + || (std::is_same>::value && data_type_ != DataType::DT_COMPLEX_DOUBLE)) + { + std::cerr << "Tensor data type does not match requested type." << std::endl; + exit (EXIT_FAILURE); + } + return buffer_->base (); } /** @@ -193,24 +197,27 @@ class Tensor { * DT_COMPLEX: 8 bytes (2 floats) * DT_COMPLEX_DOUBLE: 16 bytes (2 doubles) */ - static size_t SizeOfType(DataType data_type) { - switch (data_type) { + static size_t + SizeOfType (DataType data_type) + { + switch (data_type) + { case DataType::DT_FLOAT: - return sizeof(float); + return sizeof (float); case DataType::DT_INT: - return sizeof(int32_t); + return sizeof (int32_t); case DataType::DT_INT64: - return sizeof(int64_t); + return sizeof (int64_t); case DataType::DT_DOUBLE: - return sizeof(double); + return sizeof (double); case DataType::DT_COMPLEX: - return sizeof(std::complex); + return sizeof (std::complex); case DataType::DT_COMPLEX_DOUBLE: - return sizeof(std::complex); + return sizeof (std::complex); default: std::cerr << "Unsupported data type!" << std::endl; - exit(EXIT_FAILURE); - } + exit (EXIT_FAILURE); + } } /** @@ -218,7 +225,7 @@ class Tensor { * * @return The TensorBuffer object that holds the data of the tensor. */ - const TensorBuffer& buffer() const; + const TensorBuffer& buffer () const; /** * @brief Method to transform data from a given tensor object to the output tensor with a given device type @@ -228,54 +235,62 @@ class Tensor { * @return Tensor A tensor object with data transformed to the output tensor */ template - Tensor to_device() const { - if (this->device_ == DeviceTypeToEnum::value) { - return *this; - } + Tensor + to_device () const + { + if (this->device_ == DeviceTypeToEnum::value) + { + return *this; + } // Create output tensor on device - Tensor output(this->data_type_, DeviceTypeToEnum::value, this->shape_); + Tensor output (this->data_type_, DeviceTypeToEnum::value, this->shape_); // Copy data to a specified device // TODO: move the memory operator into the tensor_buff class. - TEMPLATE_ALL_2(this->data_type_, this->device_, - kernels::synchronize_memory()( - output.data(), this->data(), this->NumElements())) + TEMPLATE_ALL_2 (this->data_type_, + this->device_, + kernels::synchronize_memory () (output.data (), + this->data (), + this->NumElements ())) return output; } /** * @brief Copies data from a given device to the current tensor object. - * + * * This function is designed to copy a given number of elements from a device-specific memory location * to the memory associated with this object. It ensures that the size of the data being copied does not exceed * the size of the destination tensor. * * @tparam DEVICE The device type from which the data will be copied. * @tparam T The data type of the elements being copied. - * + * * @param data Pointer to the data array in the device memory that needs to be copied. * @param num_elements The number of elements to copy. - * - * @pre The number of elements to copy (`num_elements`) must be less than or equal to the number of elements + * + * @pre The number of elements to copy (`num_elements`) must be less than or equal to the number of elements * in the destination tensor (`this->shape_.num_elements()`). If this condition is not met, the function * will trigger an error through `REQUIRES_OK`. - * + * * @note The function uses a template specialization `TEMPLATE_CZ_2` to handle the copying of memory * based on the data type `T` and the device type `DEVICE`. It utilizes the `kernels::cast_memory` * method to perform the actual memory copy operation. */ template - void copy_from_device(const T* data, int64_t num_elements = -1) { - if (num_elements == -1) { - num_elements = this->NumElements(); - } - REQUIRES_OK(this->shape_.NumElements() >= num_elements, - "The number of elements of the input data must match the number of elements of the tensor.") - - TEMPLATE_CZ_2(this->data_type_, this->device_, - kernels::cast_memory()( - this->data(), data, num_elements)) + void + copy_from_device (const T* data, int64_t num_elements = -1) + { + if (num_elements == -1) + { + num_elements = this->NumElements (); + } + REQUIRES_OK (this->shape_.NumElements () >= num_elements, + "The number of elements of the input data must match the number of elements of the tensor.") + + TEMPLATE_CZ_2 (this->data_type_, + this->device_, + kernels::cast_memory () (this->data (), data, num_elements)) } /** @@ -286,16 +301,20 @@ class Tensor { * @return Tensor A tensor object with data transformed to the output tensor */ template - Tensor cast() const { + Tensor + cast () const + { // Create output tensor on device - Tensor output(DataTypeToEnum::value, this->device_, this->shape_); + Tensor output (DataTypeToEnum::value, this->device_, this->shape_); // TODO: error handle of cast memory // TODO: move the memory operator into the tensor_buff class. // Copy data to a specified device - TEMPLATE_CZ_2(this->data_type_, this->device_, - kernels::cast_memory()( - output.data(), this->data(), this->NumElements())) + TEMPLATE_CZ_2 (this->data_type_, + this->device_, + kernels::cast_memory () (output.data (), + this->data (), + this->NumElements ())) return output; } @@ -303,7 +322,7 @@ class Tensor { /** * @brief Set all elements in current tensor object to zero. */ - void zero(); + void zero (); /** * @brief Set all elements in current tensor object to zero. @@ -312,7 +331,7 @@ class Tensor { * * @note There can be one -1 dimension in the input shape, indicates the auto reshape. */ - void reshape(TensorShape shape); + void reshape (TensorShape shape); /** * @brief Set all elements in current tensor object to zero. @@ -321,7 +340,7 @@ class Tensor { * * @note There can be one -1 dimension in the input shape, indicates the auto reshape. */ - Tensor shaped(const TensorShape& shape) const; + Tensor shaped (const TensorShape& shape) const; /** * @brief Return a new Tensor slice starting at the specified indices with the given size. @@ -333,7 +352,7 @@ class Tensor { * * @note Currently, this method only supports tensors with a ndim of less than or equal to 3. */ - Tensor slice(const std::vector& start, const std::vector& size) const; + Tensor slice (const std::vector& start, const std::vector& size) const; /** * @brief Resize the tensor to the new shape. @@ -348,7 +367,7 @@ class Tensor { * * @note This method will automatically zero the resized tensor object. */ - void resize(const TensorShape& new_shape); + void resize (const TensorShape& new_shape); /** * @brief Get the Allocator object according to the given device type. @@ -358,7 +377,7 @@ class Tensor { * @return The related Allocator class pointer. */ // TODO: Delete this function, and use a singleton allocator class. - static base::core::Allocator* GetAllocator(DeviceType device); + static base::core::Allocator* GetAllocator (DeviceType device); /** * @brief Get the element at the specified indices. @@ -371,16 +390,19 @@ class Tensor { * If the indices are out of bounds, the behavior is undefined. */ template - T& get_value(Indices... indices) const { - if (sizeof...(Indices) != shape_.ndim()) { - throw std::invalid_argument("Incorrect number of indices."); - } + T& + get_value (Indices... indices) const + { + if (sizeof...(Indices) != shape_.ndim ()) + { + throw std::invalid_argument ("Incorrect number of indices."); + } // Calculate the linear index corresponding to the given indices - size_t linearIndex = calculateLinearIndex(indices...); + size_t linearIndex = calculateLinearIndex (indices...); // Access the element at the calculated linear index - return *reinterpret_cast(data() + linearIndex); + return *reinterpret_cast (data () + linearIndex); } /** @@ -395,20 +417,25 @@ class Tensor { * If the row index is out of bounds, the behavior is undefined. */ template - T* inner_most_ptr(const int &index) const { - if (shape_.ndim() > 2) { - throw std::invalid_argument("Invalid call, inner_most_ptr only support tensor rank <= 2!"); - } - if (index > shape_.dim_size(static_cast(shape_.ndim() - 2))) { - throw std::invalid_argument("Invalid index, index of the inner-most must less than the inner-most shape size!"); - } - if (shape_.ndim() == 1) { - return data() + index; - } - return data() + index * shape_.dim_size(static_cast(shape_.ndim()) - 1); + T* + inner_most_ptr (const int& index) const + { + if (shape_.ndim () > 2) + { + throw std::invalid_argument ("Invalid call, inner_most_ptr only support tensor rank <= 2!"); + } + if (index > shape_.dim_size (static_cast (shape_.ndim () - 2))) + { + throw std::invalid_argument ( + "Invalid index, index of the inner-most must less than the inner-most shape size!"); + } + if (shape_.ndim () == 1) + { + return data () + index; + } + return data () + index * shape_.dim_size (static_cast (shape_.ndim ()) - 1); } - /** * @brief Equality comparison operator for tensors. * @@ -418,7 +445,7 @@ class Tensor { * @param other The tensor to compare with. * @return True if the tensors are equal, otherwise false. */ - bool operator==(const Tensor& other) const; + bool operator== (const Tensor& other) const; /** * @brief Assignment operator overload for the Tensor class. @@ -429,7 +456,7 @@ class Tensor { * @param other The source Tensor object whose values will be assigned. * @return A reference to the current Tensor object after the assignment. */ - Tensor& operator=(const Tensor& other); + Tensor& operator= (const Tensor& other); /** * @brief Move assignment operator overload for the Tensor class. @@ -442,7 +469,7 @@ class Tensor { * @return A reference to the current Tensor object after the move assignment. * @note This function is declared as noexcept, indicating that it does not throw exceptions. */ - Tensor& operator=(Tensor&& other) noexcept; + Tensor& operator= (Tensor&& other) noexcept; /** * @brief Copy the data from another tensor into this tensor. @@ -454,7 +481,7 @@ class Tensor { * @return Returns true if the copy was successful, false otherwise. * @note The current tensor will share the same underlying storage as the source tensor. */ - bool CopyFrom(const Tensor& other); + bool CopyFrom (const Tensor& other); /** * @brief Copy the data from another tensor into this tensor while reshaping it. @@ -470,7 +497,7 @@ class Tensor { * @note The function returns true if the number of elements in `other.shape()` matches the * number of elements in the given `shape`. */ - bool CopyFrom(const Tensor& other, const TensorShape& shape); + bool CopyFrom (const Tensor& other, const TensorShape& shape); /** * @brief Copies data from another Tensor with memory allocation and specified shape. @@ -483,32 +510,35 @@ class Tensor { * @param shape The TensorShape specifying the shape of the newly allocated memory. * @return Returns true if the copy and allocation were successful, false otherwise. */ - bool AllocateFrom(const Tensor& other, const TensorShape& shape); + bool AllocateFrom (const Tensor& other, const TensorShape& shape); /** * @brief Accessor function for a multi-dimensional tensor. - * + * * This function provides read-only access to the data of a tensor with a specific rank. * It performs checks to ensure that the rank of the tensor matches the rank of the accessor. - * + * * @tparam T The data type of the elements. * @tparam N The number of dimensions. - * + * * @return A TensorAccessor object for accessing the tensor's data. */ template - TensorAccessor accessor() const& { + TensorAccessor + accessor () const& + { // Check if the tensor's rank is greater than 0 - static_assert(N > 0, - "Accessor is used to access the data of a tensor with rank > 0, for scalars use *data()"); + static_assert (N > 0, + "Accessor is used to access the data of a tensor with rank > 0, for scalars use *data()"); // Check if the rank of the tensor matches the rank of the accessor - REQUIRES_OK(this->shape_.ndim() == N, - "The rank of the tensor must match the rank of the accessor.") + REQUIRES_OK (this->shape_.ndim () == N, "The rank of the tensor must match the rank of the accessor.") // Create and return a TensorAccessor object - return TensorAccessor(this->data(), this->shape_.dims().data(), this->shape_.strides().data()); + return TensorAccessor (this->data (), + this->shape_.dims ().data (), + this->shape_.strides ().data ()); } - template - TensorAccessor accessor() && = delete; + template + TensorAccessor accessor () && = delete; /** * @brief Synchronize the current Tensor with another Tensor. @@ -517,7 +547,7 @@ class Tensor { * * @param rhs The Tensor to synchronize with. */ - void sync(const Tensor& rhs); + void sync (const Tensor& rhs); /** * @brief Access a sub-Tensor based on an index. @@ -529,18 +559,22 @@ class Tensor { */ Tensor operator[] (const int& index) const; - explicit operator bool() const { - return this->NumElements() > 0; + explicit + operator bool () const + { + return this->NumElements () > 0; } - template - void set_value(T value) { - TEMPLATE_ALL_2(this->data_type_, this->device_, - kernels::set_memory()(this->data(), value, this->NumElements())) + template + void + set_value (T value) + { + TEMPLATE_ALL_2 (this->data_type_, + this->device_, + kernels::set_memory () (this->data (), value, this->NumElements ())) } -protected: - + protected: /** * @brief The data type of the tensor. */ @@ -574,38 +608,45 @@ class Tensor { * multiplying each index by the corresponding stride. */ template - size_t calculateLinearIndex(Indices... indices) const { + size_t + calculateLinearIndex (Indices... indices) const + { size_t stride = 1; size_t linearIndex = 0; - size_t indexArray[] = { static_cast(indices)... }; + size_t indexArray[] = {static_cast (indices)...}; - for (int ii = static_cast(shape_.ndim()) - 1; ii >= 0; --ii) { - linearIndex += indexArray[ii] * stride; - stride *= shape_.dim_size(ii); - } + for (int ii = static_cast (shape_.ndim ()) - 1; ii >= 0; --ii) + { + linearIndex += indexArray[ii] * stride; + stride *= shape_.dim_size (ii); + } return linearIndex; } - // This function is used to copy data and properties from another Tensor instance, 'other', into the current Tensor instance. - // The 'shape' parameter specifies the new shape for the current Tensor. - inline void CopyFromInternal(const Tensor& other, const TensorShape& shape) { + // This function is used to copy data and properties from another Tensor instance, 'other', into the current Tensor + // instance. The 'shape' parameter specifies the new shape for the current Tensor. + inline void + CopyFromInternal (const Tensor& other, const TensorShape& shape) + { // Copy the data type and device from the 'other' Tensor. data_type_ = other.data_type_; device_ = other.device_; // Set the shape of the current Tensor to the provided 'shape'. shape_ = shape; // Check if the buffer of the current Tensor is different from the buffer of the 'other' Tensor. - if (buffer_ != other.buffer_) { - // If the current Tensor has a buffer, decrease its reference count. - // Note this could indicate a deleted of current buffer_ - if (buffer_) buffer_->unref(); - // Assign the buffer of the 'other' Tensor to the current Tensor's buffer. - buffer_ = other.buffer_; - // Increase the reference count of the buffer to indicate shared ownership. - if (buffer_) buffer_->ref(); - } + if (buffer_ != other.buffer_) + { + // If the current Tensor has a buffer, decrease its reference count. + // Note this could indicate a deleted of current buffer_ + if (buffer_) + buffer_->unref (); + // Assign the buffer of the 'other' Tensor to the current Tensor's buffer. + buffer_ = other.buffer_; + // Increase the reference count of the buffer to indicate shared ownership. + if (buffer_) + buffer_->ref (); + } } - }; /** @@ -618,7 +659,7 @@ class Tensor { * * @return The output stream. */ -std::ostream& operator<<(std::ostream& os, const Tensor& tensor); +std::ostream& operator<< (std::ostream& os, const Tensor& tensor); } // namespace container diff --git a/source/source_base/module_container/ATen/core/tensor_accessor.h b/source/source_base/module_container/ATen/core/tensor_accessor.h index 9fbea6f0b81..a8dad80a0b0 100644 --- a/source/source_base/module_container/ATen/core/tensor_accessor.h +++ b/source/source_base/module_container/ATen/core/tensor_accessor.h @@ -6,54 +6,68 @@ #include #include -namespace container { +namespace container +{ template -struct DefaultPtrTraits { +struct DefaultPtrTraits +{ using PtrType = T*; }; #if defined(__CUDACC__) || defined(__HIPCC__) template -struct RestrictPtrTraits { +struct RestrictPtrTraits +{ using PtrType = T* __restrict__; }; #endif -template class PtrTraits = DefaultPtrTraits> -class TensorAccessorBase { +template class PtrTraits = DefaultPtrTraits> +class TensorAccessorBase +{ public: - using PtrType = typename PtrTraits::PtrType; - AT_HOST_DEVICE TensorAccessorBase( - PtrType data, - const index_t* sizes, - const index_t* strides) - : data_(data), sizes_(sizes), strides_(strides) {} - - AT_HOST int_array_ref sizes() const { + AT_HOST_DEVICE + TensorAccessorBase (PtrType data, const index_t* sizes, const index_t* strides) + : data_ (data), sizes_ (sizes), strides_ (strides) + { + } + + AT_HOST int_array_ref + sizes () const + { return {sizes_, N}; } - AT_HOST int_array_ref strides() const { + AT_HOST int_array_ref + strides () const + { return {strides_, N}; } - AT_HOST_DEVICE index_t stride(index_t idx) const { + AT_HOST_DEVICE index_t + stride (index_t idx) const + { return strides_[idx]; } - AT_HOST_DEVICE index_t size(index_t idx) const { + AT_HOST_DEVICE index_t + size (index_t idx) const + { return sizes_[idx]; } - AT_HOST_DEVICE PtrType data() { + AT_HOST_DEVICE PtrType + data () + { return data_; } - AT_HOST_DEVICE const PtrType data() const { + AT_HOST_DEVICE const PtrType + data () const + { return data_; } @@ -63,42 +77,59 @@ class TensorAccessorBase { const index_t* strides_ = nullptr; }; -template class PtrTraits = DefaultPtrTraits> -class TensorAccessor : public TensorAccessorBase { +template class PtrTraits = DefaultPtrTraits> +class TensorAccessor : public TensorAccessorBase +{ public: using PtrType = typename PtrTraits::PtrType; - AT_HOST_DEVICE TensorAccessor(PtrType data, const index_t* sizes, const index_t* strides) - : TensorAccessorBase(data, sizes, strides) {} + AT_HOST_DEVICE + TensorAccessor (PtrType data, const index_t* sizes, const index_t* strides) + : TensorAccessorBase (data, sizes, strides) + { + } - AT_HOST_DEVICE TensorAccessor operator[](index_t idx) { - return TensorAccessor(this->data_ + idx * this->strides_[0], this->sizes_ + 1, this->strides_ + 1); + AT_HOST_DEVICE TensorAccessor + operator[] (index_t idx) + { + return TensorAccessor (this->data_ + idx * this->strides_[0], + this->sizes_ + 1, + this->strides_ + 1); } - AT_HOST_DEVICE const TensorAccessor operator[](index_t idx) const { - return TensorAccessor(this->data_ + idx * this->strides_[0], this->sizes_ + 1, this->strides_ + 1); + AT_HOST_DEVICE const TensorAccessor + operator[] (index_t idx) const + { + return TensorAccessor (this->data_ + idx * this->strides_[0], + this->sizes_ + 1, + this->strides_ + 1); } }; -template class PtrTraits> -class TensorAccessor : public TensorAccessorBase { +template class PtrTraits> +class TensorAccessor : public TensorAccessorBase +{ public: using PtrType = typename PtrTraits::PtrType; - AT_HOST_DEVICE TensorAccessor(T* data, const index_t* sizes, const index_t* strides) - : TensorAccessorBase(data, sizes, strides) {} + AT_HOST_DEVICE + TensorAccessor (T* data, const index_t* sizes, const index_t* strides) + : TensorAccessorBase (data, sizes, strides) + { + } - AT_HOST_DEVICE T& operator[](index_t idx) { + AT_HOST_DEVICE T& + operator[] (index_t idx) + { return this->data_[idx * this->strides_[0]]; } - AT_HOST_DEVICE const T& operator[](index_t idx) const { + AT_HOST_DEVICE const T& + operator[] (index_t idx) const + { return this->data_[idx * this->strides_[0]]; } }; - } // namespace container #endif // ATEN_CORE_TENSOR_ACCESSOR_H_ \ No newline at end of file diff --git a/source/source_base/module_container/ATen/core/tensor_buffer.cpp b/source/source_base/module_container/ATen/core/tensor_buffer.cpp index d840e57d439..861891718aa 100644 --- a/source/source_base/module_container/ATen/core/tensor_buffer.cpp +++ b/source/source_base/module_container/ATen/core/tensor_buffer.cpp @@ -7,32 +7,36 @@ #include #endif -namespace container { +namespace container +{ // Construct a new TensorBuffer object. -TensorBuffer::TensorBuffer(base::core::Allocator* alloc, void* data_ptr) : alloc_(alloc), data_(data_ptr), owns_memory_(true) {} +TensorBuffer::TensorBuffer (base::core::Allocator* alloc, void* data_ptr) + : alloc_ (alloc), data_ (data_ptr), owns_memory_ (true) +{ +} // Construct a new TensorBuffer object. // Note, this is a reference TensorBuffer, does not own memory itself. -TensorBuffer::TensorBuffer(void* data_ptr) : alloc_(), data_(data_ptr), owns_memory_(false) {} +TensorBuffer::TensorBuffer (void* data_ptr) : alloc_ (), data_ (data_ptr), owns_memory_ (false) {} -// Class members are initialized in the order of their declaration, +// Class members are initialized in the order of their declaration, // rather than the order they appear in the initialization list! -TensorBuffer::TensorBuffer(base::core::Allocator* alloc, size_t size) { - alloc_ = alloc; - if (size > 0) { - data_ = alloc_->allocate(size); - owns_memory_ = true; - allocated_bytes_ = size; - } +TensorBuffer::TensorBuffer (base::core::Allocator* alloc, size_t size) +{ + alloc_ = alloc; + if (size > 0) + { + data_ = alloc_->allocate (size); + owns_memory_ = true; + allocated_bytes_ = size; + } } // Move constructor. -TensorBuffer::TensorBuffer(TensorBuffer&& other) noexcept - : alloc_(other.alloc_), - data_(other.data_), - owns_memory_(other.owns_memory_), - allocated_bytes_(other.allocated_bytes_) +TensorBuffer::TensorBuffer (TensorBuffer&& other) noexcept + : alloc_ (other.alloc_), data_ (other.data_), owns_memory_ (other.owns_memory_), + allocated_bytes_ (other.allocated_bytes_) { // Reset the other TensorBuffer. other.data_ = nullptr; @@ -41,85 +45,117 @@ TensorBuffer::TensorBuffer(TensorBuffer&& other) noexcept } // Destroy the TensorBuffer object. -TensorBuffer::~TensorBuffer() { - if (this->OwnsMemory() && data_ != nullptr) { - alloc_->free(data_); - } - if (alloc_ != nullptr) { - delete alloc_; - } +TensorBuffer::~TensorBuffer () +{ + if (this->OwnsMemory () && data_ != nullptr) + { + alloc_->free (data_); + } + if (alloc_ != nullptr) + { + delete alloc_; + } } // Get the raw data pointer. -void* TensorBuffer::data() const { return data_; } +void* + TensorBuffer::data () const +{ + return data_; +} // Get the total number of bytes allocated for the buffer. // This method returns the total number of bytes allocated for the buffer by the allocator // associated with the TensorBuffer. If the buffer is not yet allocated, the function returns 0. -size_t TensorBuffer::GetAllocatedBytes() const { +size_t + TensorBuffer::GetAllocatedBytes () const +{ return allocated_bytes_; } // Get the root TensorBuffer object. // If this TensorBuffer is a sub-buffer of another TensorBuffer, returns that // TensorBuffer. Otherwise, returns this. -TensorBuffer* TensorBuffer::root_buffer() { return this; } // Implementation goes here. +TensorBuffer* + TensorBuffer::root_buffer () +{ + return this; +} // Implementation goes here. // Get the Allocator object used in this class. -base::core::Allocator* TensorBuffer::allocator() const { +base::core::Allocator* + TensorBuffer::allocator () const +{ return alloc_; } // Check whether this TensorBuffer owns the underlying memory. -bool TensorBuffer::OwnsMemory() const { return this->owns_memory_; } +bool + TensorBuffer::OwnsMemory () const +{ + return this->owns_memory_; +} // Get the type of device used by the TensorBuffer. -DeviceType TensorBuffer::GetDeviceType() const { - if (alloc_ != nullptr) { - return alloc_->GetDeviceType(); - } +DeviceType + TensorBuffer::GetDeviceType () const +{ + if (alloc_ != nullptr) + { + return alloc_->GetDeviceType (); + } return DeviceType::UnKnown; } -void TensorBuffer::resize(size_t size) { +void + TensorBuffer::resize (size_t size) +{ // Allocate a new buffer. - void* new_data = this->alloc_->allocate(size); + void* new_data = this->alloc_->allocate (size); // Free the old buffer. - if (this->OwnsMemory()) { - this->alloc_->free(data_); - } + if (this->OwnsMemory ()) + { + this->alloc_->free (data_); + } // Update the internal state. this->data_ = new_data; this->owns_memory_ = true; } -TensorBuffer& TensorBuffer::operator=(const TensorBuffer& other) { - if (this->OwnsMemory()) { - this->alloc_->free(data_); - } +TensorBuffer& + TensorBuffer::operator= (const TensorBuffer& other) +{ + if (this->OwnsMemory ()) + { + this->alloc_->free (data_); + } delete this->alloc_; - if (other.GetDeviceType() == DeviceType::CpuDevice) { - this->alloc_ = new base::core::CPUAllocator(); - } - #if defined(__CUDA) || defined(__ROCM) - else if (other.GetDeviceType() == DeviceType::GpuDevice) { - this->alloc_ = new base::core::GPUAllocator(); - } - #endif // __CUDA || __ROCM - + if (other.GetDeviceType () == DeviceType::CpuDevice) + { + this->alloc_ = new base::core::CPUAllocator (); + } +#if defined(__CUDA) || defined(__ROCM) + else if (other.GetDeviceType () == DeviceType::GpuDevice) + { + this->alloc_ = new base::core::GPUAllocator (); + } +#endif // __CUDA || __ROCM - this->data_ = this->alloc_->allocate(other.GetAllocatedBytes()); + this->data_ = this->alloc_->allocate (other.GetAllocatedBytes ()); this->owns_memory_ = true; return *this; } -TensorBuffer& TensorBuffer::operator=(TensorBuffer&& other) noexcept { - if (this->OwnsMemory()) { - this->alloc_->free(data_); - } +TensorBuffer& + TensorBuffer::operator= (TensorBuffer&& other) noexcept +{ + if (this->OwnsMemory ()) + { + this->alloc_->free (data_); + } delete this->alloc_; this->alloc_ = other.alloc_; this->data_ = other.data_; @@ -131,4 +167,4 @@ TensorBuffer& TensorBuffer::operator=(TensorBuffer&& other) noexcept { return *this; } -} // namespace container +} // namespace container diff --git a/source/source_base/module_container/ATen/core/tensor_buffer.h b/source/source_base/module_container/ATen/core/tensor_buffer.h index 52b393901d5..b2e6280d838 100644 --- a/source/source_base/module_container/ATen/core/tensor_buffer.h +++ b/source/source_base/module_container/ATen/core/tensor_buffer.h @@ -5,31 +5,33 @@ #include #include -namespace container { +namespace container +{ /** * @brief Interface to access the raw ref-counted data buffer. */ -class TensorBuffer : public base::core::counted_base { - public: +class TensorBuffer : public base::core::counted_base +{ + public: /** * @brief Construct a new TensorBuffer object. * * @param alloc Pointer to the allocator to use for memory allocation. * @param data_ptr Pointer to the underlying data buffer. */ - TensorBuffer(base::core::Allocator* alloc, void* data_ptr); + TensorBuffer (base::core::Allocator* alloc, void* data_ptr); - TensorBuffer(base::core::Allocator* alloc, size_t size); + TensorBuffer (base::core::Allocator* alloc, size_t size); /** - * @brief Construct a new TensorBuffer object. - * - * This is a reference TensorBuffer, does not owns memory itself. - * - * @param data_ptr Pointer to the given data buffer. - */ - explicit TensorBuffer(void* data_ptr); + * @brief Construct a new TensorBuffer object. + * + * This is a reference TensorBuffer, does not owns memory itself. + * + * @param data_ptr Pointer to the given data buffer. + */ + explicit TensorBuffer (void* data_ptr); /** * @brief Move constructor for the TensorBuffer class. @@ -41,31 +43,31 @@ class TensorBuffer : public base::core::counted_base { * @param other The rvalue reference to the source TensorBuffer object to be moved. * @note This function is declared as noexcept, indicating that it does not throw exceptions. */ - TensorBuffer(TensorBuffer&& other) noexcept; + TensorBuffer (TensorBuffer&& other) noexcept; /** * @brief Destroy the TensorBuffer object. */ - ~TensorBuffer(); + ~TensorBuffer (); /** * @brief Get the raw data pointer. * * @return void* Pointer to the underlying data buffer. */ - void* data() const; + void* data () const; /** * @brief resize the tensor buffer */ - void resize(size_t size); + void resize (size_t size); /** * @brief Get the size of the buffer. * * @return size_t The size of the buffer in bytes. */ - size_t GetAllocatedBytes() const; + size_t GetAllocatedBytes () const; /** * @brief Get the root TensorBuffer object. @@ -75,14 +77,14 @@ class TensorBuffer : public base::core::counted_base { * * @return TensorBuffer* Pointer to the root TensorBuffer object. */ - TensorBuffer* root_buffer(); + TensorBuffer* root_buffer (); /** * @brief Get the Allocator object used in this class. * * @return Allocator* Pointer to the Allocator object. */ - base::core::Allocator* allocator() const; + base::core::Allocator* allocator () const; /** * @brief Reinterpret the buffer as an array of type T. @@ -91,8 +93,10 @@ class TensorBuffer : public base::core::counted_base { * @return T* Pointer to the underlying buffer reinterpreted as type T. */ template - T* base() const { - return reinterpret_cast(data()); + T* + base () const + { + return reinterpret_cast (data ()); } /** @@ -101,14 +105,14 @@ class TensorBuffer : public base::core::counted_base { * @return true If the TensorBuffer owns the underlying memory. * @return false If the TensorBuffer does not own the underlying memory. */ - virtual bool OwnsMemory() const; + virtual bool OwnsMemory () const; /** * @brief Get the type of device used by the TensorBuffer. * * @return MemoryType The type of memory used by the TensorBuffer. */ - DeviceType GetDeviceType() const; + DeviceType GetDeviceType () const; /** * @brief Assignment operator overload for the TensorBuffer class. @@ -119,7 +123,7 @@ class TensorBuffer : public base::core::counted_base { * @param other The source TensorBuffer object whose values will be assigned. * @return A reference to the current TensorBuffer object after the assignment. */ - TensorBuffer& operator=(const TensorBuffer& other); + TensorBuffer& operator= (const TensorBuffer& other); /** * @brief Move assignment operator overload for the TensorBuffer class. @@ -132,16 +136,15 @@ class TensorBuffer : public base::core::counted_base { * @return A reference to the current TensorBuffer object after the move assignment. * @note This function is declared as noexcept, indicating that it does not throw exceptions. */ - TensorBuffer& operator=(TensorBuffer&& other) noexcept; - + TensorBuffer& operator= (TensorBuffer&& other) noexcept; private: base::core::Allocator* alloc_ = nullptr; ///< Pointer to the allocator used for memory allocation. - void *data_ = nullptr; ///< Pointer to the underlying data buffer. - bool owns_memory_ = false; ///< Bool to indicate whether this tensor owns it's memory. - int64_t allocated_bytes_ = 0; ///< The number of bytes allocated for this buffer. + void* data_ = nullptr; ///< Pointer to the underlying data buffer. + bool owns_memory_ = false; ///< Bool to indicate whether this tensor owns it's memory. + int64_t allocated_bytes_ = 0; ///< The number of bytes allocated for this buffer. }; -} // namespace container +} // namespace container -#endif // ATEN_CORE_TENSOR_BUFFER_H_ +#endif // ATEN_CORE_TENSOR_BUFFER_H_ diff --git a/source/source_base/module_container/ATen/core/tensor_map.cpp b/source/source_base/module_container/ATen/core/tensor_map.cpp index d169f9add6e..9715d79c5e7 100644 --- a/source/source_base/module_container/ATen/core/tensor_map.cpp +++ b/source/source_base/module_container/ATen/core/tensor_map.cpp @@ -1,42 +1,42 @@ #include -namespace container { +namespace container +{ // Constructor that creates a tensor with the given data pointer, data type, device type and shape. -TensorMap::TensorMap(void *data, DataType data_type, DeviceType device, const TensorShape &shape) - : Tensor(data_type, device, {}) +TensorMap::TensorMap (void* data, DataType data_type, DeviceType device, const TensorShape& shape) + : Tensor (data_type, device, {}) { this->shape_ = shape; - if(this->buffer_ != nullptr) - { - this->buffer_->unref(); - } - this->buffer_ = new TensorBuffer(data); + if (this->buffer_ != nullptr) + { + this->buffer_->unref (); + } + this->buffer_ = new TensorBuffer (data); } -TensorMap::TensorMap(void* data, const Tensor& other, const TensorShape &shape) - : Tensor(other.data_type(), other.device_type(), {}) +TensorMap::TensorMap (void* data, const Tensor& other, const TensorShape& shape) + : Tensor (other.data_type (), other.device_type (), {}) { this->shape_ = shape; - if(this->buffer_ != nullptr) - { - this->buffer_->unref(); - } - this->buffer_ = new TensorBuffer(data); + if (this->buffer_ != nullptr) + { + this->buffer_->unref (); + } + this->buffer_ = new TensorBuffer (data); } -TensorMap::TensorMap(void *data, const Tensor& other) - : Tensor(other.data_type(), other.device_type(), {}) +TensorMap::TensorMap (void* data, const Tensor& other) : Tensor (other.data_type (), other.device_type (), {}) { - this->shape_ = other.shape(); - if(this->buffer_ != nullptr) - { - this->buffer_->unref(); - } - this->buffer_ = new TensorBuffer(data); + this->shape_ = other.shape (); + if (this->buffer_ != nullptr) + { + this->buffer_->unref (); + } + this->buffer_ = new TensorBuffer (data); } -//void TensorMap::resize(const TensorShape &new_shape) { -// throw std::logic_error("TensorMap object does not support the resize method."); -//} +// void TensorMap::resize(const TensorShape &new_shape) { +// throw std::logic_error("TensorMap object does not support the resize method."); +// } } // namespace container \ No newline at end of file diff --git a/source/source_base/module_container/ATen/core/tensor_map.h b/source/source_base/module_container/ATen/core/tensor_map.h index 724c0afd513..9e4cbc0f50a 100644 --- a/source/source_base/module_container/ATen/core/tensor_map.h +++ b/source/source_base/module_container/ATen/core/tensor_map.h @@ -5,7 +5,8 @@ #include -namespace container { +namespace container +{ /** * @brief A multi-dimensional reference array of elements of a single data type. @@ -16,9 +17,9 @@ namespace container { * This class is not thread-safe and should not be accessed by multiple threads * concurrently. */ - class TensorMap : public Tensor { - public: - +class TensorMap : public Tensor +{ + public: /** * @brief Constructor that map the given data pointer to a tensor object with the given * data type, device type and shape. @@ -30,7 +31,7 @@ namespace container { * @param device The data type of the tensor. * @param shape The shape of the tensor. */ - TensorMap(void *data, DataType data_type, DeviceType device, const TensorShape &shape); + TensorMap (void* data, DataType data_type, DeviceType device, const TensorShape& shape); /** * @brief Constructs a TensorMap from the provided data, with the specified shape. @@ -43,7 +44,7 @@ namespace container { * @param other The Tensor object to be associated with the TensorMap. * @param shape The shape of the data referenced by the TensorMap. */ - TensorMap(void *data, const Tensor& other, const TensorShape& shape); + TensorMap (void* data, const Tensor& other, const TensorShape& shape); /** * @brief Constructs a TensorMap from the provided data, using the shape of the provided Tensor. @@ -56,8 +57,7 @@ namespace container { * @param data A pointer to the data to be referenced by the TensorMap. * @param other The Tensor object to be associated with the TensorMap, which defines the shape. */ - TensorMap(void *data, const Tensor& other); - + TensorMap (void* data, const Tensor& other); }; } // namespace container diff --git a/source/source_base/module_container/ATen/core/tensor_shape.cpp b/source/source_base/module_container/ATen/core/tensor_shape.cpp index 313da1b834c..21e79f23df8 100644 --- a/source/source_base/module_container/ATen/core/tensor_shape.cpp +++ b/source/source_base/module_container/ATen/core/tensor_shape.cpp @@ -1,114 +1,152 @@ #include -namespace container { +namespace container +{ /** * @brief Namespace containing constants for default constructor */ -namespace { - /** - * @brief Default size of a dimension - */ - constexpr int kDefaultDimSize = 0; +namespace +{ +/** + * @brief Default size of a dimension + */ +constexpr int kDefaultDimSize = 0; } // namespace // Default constructor for TensorShape class // Initializes TensorShape with default dimensions -TensorShape::TensorShape() : dims_(kDefaultDimSize), strides_(1) {} +TensorShape::TensorShape () : dims_ (kDefaultDimSize), strides_ (1) {} // Constructor for TensorShape class -TensorShape::TensorShape(std::initializer_list dims) : dims_(dims), strides_(std::move(get_strides_(dims))) {} +TensorShape::TensorShape (std::initializer_list dims) + : dims_ (dims), strides_ (std::move (get_strides_ (dims))) +{ +} // Constructor for TensorShape class -TensorShape::TensorShape(const std::vector& dims) : dims_(dims), strides_(std::move(get_strides_(dims))) {} +TensorShape::TensorShape (const std::vector& dims) : dims_ (dims), strides_ (std::move (get_strides_ (dims))) +{ +} // Copy constructor for TensorShape class -TensorShape::TensorShape(const TensorShape& other) { - dims_ = other.dims(); - strides_ = std::move(get_strides_(dims_)); +TensorShape::TensorShape (const TensorShape& other) +{ + dims_ = other.dims (); + strides_ = std::move (get_strides_ (dims_)); } // Get size of a specific dimension in the tensor -int64_t TensorShape::dim_size(int dim) const { +int64_t + TensorShape::dim_size (int dim) const +{ return dims_[dim]; } // Get all dimension sizes in the tensor -const std::vector& TensorShape::dims() const { +const std::vector& + TensorShape::dims () const +{ return dims_; } // Get all dimension strides in the tensor -const std::vector& TensorShape::strides() const { +const std::vector& + TensorShape::strides () const +{ return strides_; } // Get ndim of the tensor, i.e., number of dimensions -unsigned int TensorShape::ndim() const { - return dims_.size(); +unsigned int + TensorShape::ndim () const +{ + return dims_.size (); } // Returns the total number of elements in the shape. -int64_t TensorShape::NumElements() const { +int64_t + TensorShape::NumElements () const +{ int64_t num_elements = 1; - for (int i = 0; i < this->ndim(); ++i) { - num_elements *= dims_[i]; - } - return this->ndim() ? num_elements : 0; + for (int i = 0; i < this->ndim (); ++i) + { + num_elements *= dims_[i]; + } + return this->ndim () ? num_elements : 0; } // Modify size of a specific dimension in the tensor -void TensorShape::set_dim_size(int dim, int64_t size) { +void + TensorShape::set_dim_size (int dim, int64_t size) +{ dims_[dim] = size; - strides_ = std::move(get_strides_(dims_)); + strides_ = std::move (get_strides_ (dims_)); } // Add a new dimension to the tensor -void TensorShape::add_dim(int64_t size) { - dims_.push_back(size); - strides_ = std::move(get_strides_(dims_)); +void + TensorShape::add_dim (int64_t size) +{ + dims_.push_back (size); + strides_ = std::move (get_strides_ (dims_)); } // Remove a dimension from the tensor -void TensorShape::remove_dim(int dim) { - if (dim < 0 && dim >= dims_.size()) { - throw std::runtime_error("Invalid axis to remove."); - } - dims_.erase(dims_.begin() + dim); - strides_ = std::move(get_strides_(dims_)); +void + TensorShape::remove_dim (int dim) +{ + if (dim < 0 && dim >= dims_.size ()) + { + throw std::runtime_error ("Invalid axis to remove."); + } + dims_.erase (dims_.begin () + dim); + strides_ = std::move (get_strides_ (dims_)); } // Overload the == operator to compare two TensorShape objects -bool TensorShape::operator==(const TensorShape& other) const { +bool + TensorShape::operator== (const TensorShape& other) const +{ return dims_ == other.dims_; } // Overload the != operator to compare two TensorShape objects -bool TensorShape::operator!=(const TensorShape& other) const { +bool + TensorShape::operator!= (const TensorShape& other) const +{ return dims_ != other.dims_; } -std::vector TensorShape::get_strides_(const std::vector& dim) { +std::vector + TensorShape::get_strides_ (const std::vector& dim) +{ std::vector strides{}; - if (dim.empty()) { - return strides; - } - strides.resize(dim.size()); - strides.back() = 1; - for (int ii = dim.size() - 2; ii >= 0; ii--) { - strides[ii] = strides[ii + 1] * dim[ii + 1]; - } + if (dim.empty ()) + { + return strides; + } + strides.resize (dim.size ()); + strides.back () = 1; + for (int ii = dim.size () - 2; ii >= 0; ii--) + { + strides[ii] = strides[ii + 1] * dim[ii + 1]; + } return strides; } // Overload the << operator to print the tensor shape -std::ostream& operator<<(std::ostream& os, const TensorShape& shape) { +std::ostream& + operator<< (std::ostream& os, const TensorShape& shape) +{ os << "["; - for (int i = 0; i < shape.ndim(); ++i) { - os << shape.dims()[i]; - if (i < shape.ndim() - 1) { - os << ","; + for (int i = 0; i < shape.ndim (); ++i) + { + os << shape.dims ()[i]; + if (i < shape.ndim () - 1) + { + os << ","; + } } - } os << "]"; return os; } diff --git a/source/source_base/module_container/ATen/core/tensor_shape.h b/source/source_base/module_container/ATen/core/tensor_shape.h index 9085beb7e59..4bf01182f3d 100644 --- a/source/source_base/module_container/ATen/core/tensor_shape.h +++ b/source/source_base/module_container/ATen/core/tensor_shape.h @@ -5,112 +5,114 @@ #include #include -namespace container { +namespace container +{ /** * @brief A class for representing the shape of a tensor. */ -class TensorShape { -public: +class TensorShape +{ + public: /** * @brief Default constructor. */ - TensorShape(); + TensorShape (); /** * @brief Constructor with an initializer list of integers. * @param dims An initializer list of integers representing the dimensions of the tensor. */ - TensorShape(std::initializer_list dims); + TensorShape (std::initializer_list dims); /** * @brief Constructor with a vector of integers. * @param dims A vector of integers representing the dimensions of the tensor. */ - TensorShape(const std::vector& dims); + TensorShape (const std::vector& dims); /** * @brief Copy constructor. * @param other The TensorShape object to be copied. */ - TensorShape(const TensorShape& other); + TensorShape (const TensorShape& other); /** * @brief Get the size of a dimension in the tensor. * @param dim The index of the dimension. * @return The size of the specified dimension. */ - int64_t dim_size(int dim) const; + int64_t dim_size (int dim) const; /** * @brief Get all dimension sizes in the tensor. * @return A const reference to the vector of dimension sizes. */ - const std::vector& dims() const; + const std::vector& dims () const; - const std::vector& strides() const; + const std::vector& strides () const; /** * @brief Get the ndim of the tensor. * @return The number of dimensions in the tensor. */ - unsigned int ndim() const; + unsigned int ndim () const; /** * @brief Modify the size of a dimension in the tensor. * @param dim The index of the dimension to be modified. * @param size The new size of the dimension. */ - void set_dim_size(int dim, int64_t size); + void set_dim_size (int dim, int64_t size); /** * @brief Add a new dimension to the tensor. * @param size The size of the new dimension. */ - void add_dim(int64_t size); + void add_dim (int64_t size); /** * @brief Remove a dimension from the tensor. * @param dim The index of the dimension to be removed. */ - void remove_dim(int dim); + void remove_dim (int dim); /** - * @brief Returns the total number of elements in the shape. - * - * @return int64_t The number of elements. - */ - int64_t NumElements() const; + * @brief Returns the total number of elements in the shape. + * + * @return int64_t The number of elements. + */ + int64_t NumElements () const; /** * @brief Overload the == operator to compare two TensorShape objects. * @param other The other TensorShape object to be compared. * @return True if the two objects have the same dimensions, false otherwise. */ - bool operator==(const TensorShape& other) const; + bool operator== (const TensorShape& other) const; /** * @brief Overload the != operator to compare two TensorShape objects. * @param other The other TensorShape object to be compared. * @return True if the two objects have different dimensions, false otherwise. */ - bool operator!=(const TensorShape& other) const; + bool operator!= (const TensorShape& other) const; -private: - std::vector dims_ = {}; // Save dimension sizes of the tensor + private: + std::vector dims_ = {}; // Save dimension sizes of the tensor // Note: strides are not always equals to the dimension sizes. // The strides specifies the number of elements to step in each dimension when traversing a tensor. // There could be some sparse region in the tensor, and the strides will be larger than the dimension sizes. - // For example, given a 2D tensor with shape [3, 4], + // For example, given a 2D tensor with shape [3, 4], // and the strides could be [6, 1] if the actual data is stored in a 1D array with size 18 [3, 6]. // The strides could also be [12, 3] if the actual data is stored in a 1D array with size 36 [3, 12]. // strides can only be modified by the TensorMap object. - std::vector strides_ = {}; // Save dimension strides of the tensor + std::vector strides_ = {}; // Save dimension strides of the tensor /** * @brief Compute the strides of the tensor. */ - std::vector get_strides_(const std::vector& dim); + std::vector get_strides_ (const std::vector& dim); }; /** @@ -119,8 +121,8 @@ class TensorShape { * @param shape The TensorShape object to be printed. * @return A reference to the output stream. */ -std::ostream& operator<<(std::ostream& os, const TensorShape& shape); +std::ostream& operator<< (std::ostream& os, const TensorShape& shape); -} // container +} // namespace container -#endif // ATEN_CORE_TENSOR_SHAPE_H_ +#endif // ATEN_CORE_TENSOR_SHAPE_H_ diff --git a/source/source_base/module_container/ATen/core/tensor_types.cpp b/source/source_base/module_container/ATen/core/tensor_types.cpp index a57519c91f4..0dabc540cf6 100644 --- a/source/source_base/module_container/ATen/core/tensor_types.cpp +++ b/source/source_base/module_container/ATen/core/tensor_types.cpp @@ -1,11 +1,15 @@ #include -namespace container { +namespace container +{ // Overloaded operator<< for the Tensor class. // Prints the data type of the enum type DataType. -std::ostream& operator<<(std::ostream& os, const DataType& data_type) { - switch (data_type) { +std::ostream& + operator<< (std::ostream& os, const DataType& data_type) +{ + switch (data_type) + { case DataType::DT_FLOAT: os << "float"; break; @@ -27,26 +31,29 @@ std::ostream& operator<<(std::ostream& os, const DataType& data_type) { default: os << "unknown"; break; - } + } return os; } // Overloaded operator<< for the Tensor class. // Prints the memory type of the enum type DeviceType. -std::ostream& operator<<(std::ostream& os, const DeviceType& device_type) { - switch (device_type) { +std::ostream& + operator<< (std::ostream& os, const DeviceType& device_type) +{ + switch (device_type) + { case DeviceType::CpuDevice: os << "cpu"; break; - #if __CUDA || __ROCM +#if __CUDA || __ROCM case DeviceType::GpuDevice: os << "gpu"; break; - #endif +#endif default: os << "unknown"; break; - } + } return os; } diff --git a/source/source_base/module_container/ATen/core/tensor_types.h b/source/source_base/module_container/ATen/core/tensor_types.h index b87907825ac..3e8d9eb3a0d 100644 --- a/source/source_base/module_container/ATen/core/tensor_types.h +++ b/source/source_base/module_container/ATen/core/tensor_types.h @@ -26,19 +26,25 @@ #include #endif // defined(__CUDACC__) || defined(__HIPCC__) -namespace container { +namespace container +{ template -static inline bool element_compare(T& a, T& b) { - if (Accuracy <= 4) { - return (a == b) || (std::norm(a - b) < 1e-7); - } - else if (Accuracy <= 8) { - return (a == b) || (std::norm(a - b) < 1e-15); - } - else { - return (a == b); - } +static inline bool + element_compare (T& a, T& b) +{ + if (Accuracy <= 4) + { + return (a == b) || (std::norm (a - b) < 1e-7); + } + else if (Accuracy <= 8) + { + return (a == b) || (std::norm (a - b) < 1e-15); + } + else + { + return (a == b); + } } /** @@ -47,33 +53,39 @@ The DataType enum lists the supported data types for tensors. Each data type is identified by a unique value. The DT_INVALID value is reserved for invalid data types. */ -enum class DataType { - DT_INVALID = 0, ///< Invalid data type */ - DT_FLOAT = 1, ///< Single-precision floating point */ - DT_DOUBLE = 2, ///< Double-precision floating point */ - DT_INT = 3, ///< 32-bit integer */ - DT_INT64 = 4, ///< 64-bit integer */ - DT_COMPLEX = 5, ///< 32-bit complex */ +enum class DataType +{ + DT_INVALID = 0, ///< Invalid data type */ + DT_FLOAT = 1, ///< Single-precision floating point */ + DT_DOUBLE = 2, ///< Double-precision floating point */ + DT_INT = 3, ///< 32-bit integer */ + DT_INT64 = 4, ///< 64-bit integer */ + DT_COMPLEX = 5, ///< 32-bit complex */ DT_COMPLEX_DOUBLE = 6, /**< 64-bit complex */ -// ... other data types + // ... other data types }; /** *@struct DEVICE_CPU, DEVICE_GPU *@brief A tag type for identifying CPU and GPU devices. -*/ + */ struct DEVICE_CPU; struct DEVICE_GPU; -struct DEVICE_CPU {}; -struct DEVICE_GPU {}; +struct DEVICE_CPU +{ +}; +struct DEVICE_GPU +{ +}; /** * @brief The type of memory used by an allocator. */ -enum class DeviceType { - UnKnown = 0, ///< Memory type is unknown. - CpuDevice = 1, ///< Memory type is CPU. - GpuDevice = 2, ///< Memory type is GPU(CUDA or ROCm). +enum class DeviceType +{ + UnKnown = 0, ///< Memory type is unknown. + CpuDevice = 1, ///< Memory type is CPU. + GpuDevice = 2, ///< Memory type is GPU(CUDA or ROCm). }; /** @@ -85,7 +97,8 @@ enum class DeviceType { * @tparam T The input type for which the return type needs to be determined. */ template -struct GetTypeReal { +struct GetTypeReal +{ using type = T; /**< The return type based on the input type. */ }; @@ -95,7 +108,8 @@ struct GetTypeReal { * This specialization sets the return type to be float when the input type is std::complex. */ template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = float; /**< The return type specialization for std::complex. */ }; @@ -105,12 +119,14 @@ struct GetTypeReal> { * This specialization sets the return type to be double when the input type is std::complex. */ template <> -struct GetTypeReal> { +struct GetTypeReal> +{ using type = double; /**< The return type specialization for std::complex. */ }; -template -struct PsiToContainer { +template +struct PsiToContainer +{ using type = T; /**< The return type based on the input type. */ }; @@ -126,22 +142,24 @@ struct PsiToContainer using type = container::DEVICE_GPU; /**< The return type specialization for std::complex. */ }; -template -struct ContainerToPsi { +template +struct ContainerToPsi +{ using type = T; /**< The return type based on the input type. */ }; template <> -struct ContainerToPsi { +struct ContainerToPsi +{ using type = base_device::DEVICE_CPU; /**< The return type specialization for std::complex. */ }; template <> -struct ContainerToPsi { +struct ContainerToPsi +{ using type = base_device::DEVICE_GPU; /**< The return type specialization for std::complex. */ }; - /** * @brief Template struct for mapping a Device Type to its corresponding enum value. * @@ -155,16 +173,19 @@ struct ContainerToPsi { * DataTypeToEnum::value; // Returns DataType::DT_FLOAT */ template -struct DeviceTypeToEnum { +struct DeviceTypeToEnum +{ static constexpr DeviceType value = {}; }; // Specializations of DeviceTypeToEnum for supported devices. template <> -struct DeviceTypeToEnum { +struct DeviceTypeToEnum +{ static constexpr DeviceType value = DeviceType::CpuDevice; }; template <> -struct DeviceTypeToEnum { +struct DeviceTypeToEnum +{ static constexpr DeviceType value = DeviceType::GpuDevice; }; template <> @@ -191,43 +212,52 @@ struct DeviceTypeToEnum * DataTypeToEnum::value; // Returns DataType::DT_FLOAT */ template -struct DataTypeToEnum { +struct DataTypeToEnum +{ static constexpr DataType value = {}; }; // Specializations of DataTypeToEnum for supported types. template <> -struct DataTypeToEnum { +struct DataTypeToEnum +{ static constexpr DataType value = DataType::DT_INT; }; template <> -struct DataTypeToEnum { +struct DataTypeToEnum +{ static constexpr DataType value = DataType::DT_FLOAT; }; template <> -struct DataTypeToEnum { +struct DataTypeToEnum +{ static constexpr DataType value = DataType::DT_DOUBLE; }; template <> -struct DataTypeToEnum { +struct DataTypeToEnum +{ static constexpr DataType value = DataType::DT_INT64; }; template <> -struct DataTypeToEnum> { +struct DataTypeToEnum> +{ static constexpr DataType value = DataType::DT_COMPLEX; }; template <> -struct DataTypeToEnum> { +struct DataTypeToEnum> +{ static constexpr DataType value = DataType::DT_COMPLEX_DOUBLE; }; #if defined(__CUDACC__) || defined(__HIPCC__) template <> -struct DataTypeToEnum> { +struct DataTypeToEnum> +{ static constexpr DataType value = DataType::DT_COMPLEX; }; template <> -struct DataTypeToEnum> { +struct DataTypeToEnum> +{ static constexpr DataType value = DataType::DT_COMPLEX_DOUBLE; }; #endif // defined(__CUDACC__) || defined(__HIPCC__) @@ -242,7 +272,7 @@ struct DataTypeToEnum> { * * @return The output stream. */ -std::ostream& operator<<(std::ostream& os, const DataType& data_type); +std::ostream& operator<< (std::ostream& os, const DataType& data_type); /** * @brief Overloaded operator<< for the Tensor class. @@ -254,7 +284,7 @@ std::ostream& operator<<(std::ostream& os, const DataType& data_type); * * @return The output stream. */ -std::ostream& operator<<(std::ostream& os, const DeviceType& memory_type); +std::ostream& operator<< (std::ostream& os, const DeviceType& memory_type); } // namespace container #endif // ATEN_CORE_TENSOR_TYPES_H_ \ No newline at end of file diff --git a/source/source_base/module_container/ATen/core/tensor_utils.h b/source/source_base/module_container/ATen/core/tensor_utils.h index 115ddfedb73..42043222210 100644 --- a/source/source_base/module_container/ATen/core/tensor_utils.h +++ b/source/source_base/module_container/ATen/core/tensor_utils.h @@ -4,7 +4,8 @@ #include #include -namespace container { +namespace container +{ /** * @@ -16,16 +17,19 @@ namespace container { * * @return A string without trailing zeros. */ -__inline__ -std::string removeTrailingZeros(std::string str) { - int i = static_cast(str.length()) - 1; - while (i >= 0 && str[i] == '0') { - i--; - } - if (i == -1) { - return "0"; - } - return str.substr(0, i + 1); +__inline__ std::string + removeTrailingZeros (std::string str) +{ + int i = static_cast (str.length ()) - 1; + while (i >= 0 && str[i] == '0') + { + i--; + } + if (i == -1) + { + return "0"; + } + return str.substr (0, i + 1); } /** @@ -44,49 +48,53 @@ std::string removeTrailingZeros(std::string str) { * @return The total length of the longest integer and fractional part. */ -template -__inline__ -int _get_digit_places( - const T* arr, - int size, - int& integer_count, - int& fraction_count) +template +__inline__ int + _get_digit_places (const T* arr, int size, int& integer_count, int& fraction_count) { integer_count = 0; fraction_count = 0; - for (int i = 0; i < size; i++) { - int digits = 0; - if (arr[i] < 0) { - digits = log10(-arr[i]) + 1; - if (digits + 1 > integer_count) { - integer_count = digits + 1; - } - } - else { - digits = log10(arr[i]) + 1; - if (digits > integer_count) { - integer_count = digits; - } - } + for (int i = 0; i < size; i++) + { + int digits = 0; + if (arr[i] < 0) + { + digits = log10 (-arr[i]) + 1; + if (digits + 1 > integer_count) + { + integer_count = digits + 1; + } + } + else + { + digits = log10 (arr[i]) + 1; + if (digits > integer_count) + { + integer_count = digits; + } + } - T fraction = arr[i] - std::floor(arr[i]); - if (fraction == 0) { - continue; - } - std::string str = removeTrailingZeros(std::to_string(fraction)); - digits = str.length() - str.find('.'); - if (digits > fraction_count) { - fraction_count = digits; + T fraction = arr[i] - std::floor (arr[i]); + if (fraction == 0) + { + continue; + } + std::string str = removeTrailingZeros (std::to_string (fraction)); + digits = str.length () - str.find ('.'); + if (digits > fraction_count) + { + fraction_count = digits; + } } - } return integer_count + fraction_count; } /** * - * @brief Overloaded function to calculate the length of the longest integer and fractional part of an array of complex numbers. + * @brief Overloaded function to calculate the length of the longest integer and fractional part of an array of complex + * numbers. * * This function is an overloaded version of _get_digit_places for an array of complex numbers. * @@ -99,15 +107,11 @@ int _get_digit_places( * * @return The total length of the longest integer and fractional part. */ -template -__inline__ -int _get_digit_places( - const std::complex* arr, - int size, - int& integer_count, - int& fraction_count) +template +__inline__ int + _get_digit_places (const std::complex* arr, int size, int& integer_count, int& fraction_count) { - return _get_digit_places(reinterpret_cast(arr), size * 2, integer_count, fraction_count); + return _get_digit_places (reinterpret_cast (arr), size * 2, integer_count, fraction_count); } /** @@ -122,15 +126,10 @@ int _get_digit_places( * @param fraction_count The number of digits to display after the decimal point. */ template -__inline__ -void _output_wrapper( - std::ostream& os, - const T data, - const int& digit_width, - const int& fraction_count) +__inline__ void + _output_wrapper (std::ostream& os, const T data, const int& digit_width, const int& fraction_count) { - os << std::setw(digit_width) \ - << std::setprecision(fraction_count) << std::fixed << data; + os << std::setw (digit_width) << std::setprecision (fraction_count) << std::fixed << data; } /** @@ -145,23 +144,15 @@ void _output_wrapper( * @param fraction_count The number of digits to display after the decimal point. */ template -__inline__ -void _output_wrapper( - std::ostream& os, - const std::complex data, - const int& digit_width, - const int& fraction_count) +__inline__ void + _output_wrapper (std::ostream& os, const std::complex data, const int& digit_width, const int& fraction_count) { // Write the real and imaginary parts of the complex value to the output stream // with the specified formatting. os << "{"; - os << std::setw(digit_width) \ - << std::setprecision(fraction_count) << std::fixed - << data.real(); + os << std::setw (digit_width) << std::setprecision (fraction_count) << std::fixed << data.real (); os << ", "; - os << std::setw(digit_width) \ - << std::setprecision(fraction_count) << std::fixed - << data.imag(); + os << std::setw (digit_width) << std::setprecision (fraction_count) << std::fixed << data.imag (); os << "}"; } @@ -175,15 +166,10 @@ void _output_wrapper( * @param fraction_count The number of digits to display after the decimal point. */ template <> -__inline__ -void _output_wrapper( - std::ostream& os, - const int data, - const int& digit_width, - const int& fraction_count) +__inline__ void + _output_wrapper (std::ostream& os, const int data, const int& digit_width, const int& fraction_count) { - os << std::setw(digit_width - 1) \ - << std::setprecision(fraction_count) << std::fixed << data; + os << std::setw (digit_width - 1) << std::setprecision (fraction_count) << std::fixed << data; } /** @@ -199,87 +185,115 @@ void _output_wrapper( * @param data A pointer to the tensor data. * @param shape The shape of the tensor. * @param num_elements The total number of elements in the tensor. -*/ + */ template -__inline__ -void _internal_output( - std::ostream& os, - const T * data, - const TensorShape& shape, - const int64_t& num_elements) +__inline__ void + _internal_output (std::ostream& os, const T* data, const TensorShape& shape, const int64_t& num_elements) { int integer_count = 0, fraction_count = 0; - int digit_width = _get_digit_places(data, num_elements, integer_count, fraction_count) + 1; - if (shape.ndim() == 1) { - os << "["; - for (int i = 0; i < num_elements; ++i) { - _output_wrapper(os, data[i], digit_width, fraction_count); - if (i != num_elements - 1) { - os << ","; - } + int digit_width = _get_digit_places (data, num_elements, integer_count, fraction_count) + 1; + if (shape.ndim () == 1) + { + os << "["; + for (int i = 0; i < num_elements; ++i) + { + _output_wrapper (os, data[i], digit_width, fraction_count); + if (i != num_elements - 1) + { + os << ","; + } + } + os << "]"; } - os << "]"; - } - else if (shape.ndim() == 2) { - os << "["; - for (int i = 0; i < shape.dim_size(0); ++i) { - if (i != 0) os << " "; + else if (shape.ndim () == 2) + { os << "["; - for (int j = 0; j < shape.dim_size(1); ++j) { - _output_wrapper(os, data[i * shape.dim_size(1) + j], digit_width, fraction_count); - if (j != shape.dim_size(1) - 1) { - os << ", "; + for (int i = 0; i < shape.dim_size (0); ++i) + { + if (i != 0) + os << " "; + os << "["; + for (int j = 0; j < shape.dim_size (1); ++j) + { + _output_wrapper (os, data[i * shape.dim_size (1) + j], digit_width, fraction_count); + if (j != shape.dim_size (1) - 1) + { + os << ", "; + } + } + os << "]"; + if (i != shape.dim_size (0) - 1) + os << ",\n"; } - } os << "]"; - if (i != shape.dim_size(0) - 1) os << ",\n"; } - os << "]"; - } - else if (shape.ndim() == 3) { - os << "["; - for (int i = 0; i < shape.dim_size(0); ++i) { - if (i != 0) os << " "; + else if (shape.ndim () == 3) + { os << "["; - for (int j = 0; j < shape.dim_size(1); ++j) { - if (j != 0) os << " "; - os << "["; - for (int k = 0; k < shape.dim_size(2); ++k) { - _output_wrapper(os, data[i * shape.dim_size(1) * shape.dim_size(2) + j * shape.dim_size(2) + k], digit_width, fraction_count); - if (k != shape.dim_size(2) - 1) { - os << ", "; - } + for (int i = 0; i < shape.dim_size (0); ++i) + { + if (i != 0) + os << " "; + os << "["; + for (int j = 0; j < shape.dim_size (1); ++j) + { + if (j != 0) + os << " "; + os << "["; + for (int k = 0; k < shape.dim_size (2); ++k) + { + _output_wrapper ( + os, + data[i * shape.dim_size (1) * shape.dim_size (2) + j * shape.dim_size (2) + k], + digit_width, + fraction_count); + if (k != shape.dim_size (2) - 1) + { + os << ", "; + } + } + os << "]"; + if (j != shape.dim_size (1) - 1) + os << ",\n"; + } + os << "]"; + if (i != shape.dim_size (0) - 1) + os << ",\n\n"; } - os << "]"; - if (j != shape.dim_size(1) - 1) os << ",\n"; - } os << "]"; - if (i != shape.dim_size(0) - 1) os << ",\n\n"; } - os << "]"; - } - else { - for (int64_t i = 0; i < num_elements; ++i) { - _output_wrapper(os, data[i], 0, 0); - if (i < num_elements - 1) { - os << ", "; - } + else + { + for (int64_t i = 0; i < num_elements; ++i) + { + _output_wrapper (os, data[i], 0, 0); + if (i < num_elements - 1) + { + os << ", "; + } + } } - } } template -T extract(const container::Tensor& tensor) { - if (tensor.device_type() == DeviceType::CpuDevice) { - return reinterpret_cast(tensor.data())[0]; - } - else { - T result = 0; - TEMPLATE_ALL_2(tensor.data_type(), tensor.device_type(), - kernels::synchronize_memory()( - &result, reinterpret_cast(tensor.data()), 1)) - return result; - } +T + extract (const container::Tensor& tensor) +{ + if (tensor.device_type () == DeviceType::CpuDevice) + { + return reinterpret_cast (tensor.data ())[0]; + } + else + { + T result = 0; + TEMPLATE_ALL_2 ( + tensor.data_type (), + tensor.device_type (), + kernels::synchronize_memory () (&result, + reinterpret_cast (tensor.data ()), + 1)) + return result; + } } } // namespace container diff --git a/source/source_base/module_container/ATen/kernels/blas.cpp b/source/source_base/module_container/ATen/kernels/blas.cpp index 5935ed7c282..a5daf8e5553 100644 --- a/source/source_base/module_container/ATen/kernels/blas.cpp +++ b/source/source_base/module_container/ATen/kernels/blas.cpp @@ -1,260 +1,282 @@ #include -namespace container { -namespace kernels { - +namespace container +{ +namespace kernels +{ template -struct blas_copy { - void operator()( - const int n, - const T *x, - const int incx, - T *y, - const int incy) +struct blas_copy +{ + void + operator() (const int n, const T* x, const int incx, T* y, const int incy) { - BlasConnector::copy(n, x, incx, y, incy); + BlasConnector::copy (n, x, incx, y, incy); } }; template -struct blas_nrm2 { +struct blas_nrm2 +{ using Real = typename GetTypeReal::type; - Real operator()( - const int n, - const T *x, - const int incx) + Real + operator() (const int n, const T* x, const int incx) { - return BlasConnector::nrm2(n, x, incx); + return BlasConnector::nrm2 (n, x, incx); } }; template -struct blas_dot { - void operator()( - const int& n, - const T* x, - const int& incx, - const T* y, - const int& incy, - T* result) +struct blas_dot +{ + void + operator() (const int& n, const T* x, const int& incx, const T* y, const int& incy, T* result) { - *result = BlasConnector::dot(n, x, incx, y, incy); + *result = BlasConnector::dot (n, x, incx, y, incy); } }; template -struct blas_scal { - void operator()( - const int& n, - const T* alpha, - T* x, - const int& incx) +struct blas_scal +{ + void + operator() (const int& n, const T* alpha, T* x, const int& incx) { - BlasConnector::scal(n, *alpha, x, incx); + BlasConnector::scal (n, *alpha, x, incx); } }; template -struct blas_axpy { - void operator()( - const int& n, - const T* alpha, - const T* x, - const int& incx, - T* y, - const int& incy) +struct blas_axpy +{ + void + operator() (const int& n, const T* alpha, const T* x, const int& incx, T* y, const int& incy) { - BlasConnector::axpy(n, *alpha, x, incx, y, incy); + BlasConnector::axpy (n, *alpha, x, incx, y, incy); } }; template -struct blas_gemv { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - const T* A, - const int& lda, - const T* x, - const int& incx, - const T* beta, - T* y, - const int& incy) +struct blas_gemv +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const T* x, + const int& incx, + const T* beta, + T* y, + const int& incy) { - BlasConnector::gemv(trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy); + BlasConnector::gemv (trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy); } }; template -struct blas_gemv_batched { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - T** A, - const int& lda, - T** x, - const int& incx, - const T* beta, - T** y, - const int& incy, - const int& batch_size) +struct blas_gemv_batched +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + T** A, + const int& lda, + T** x, + const int& incx, + const T* beta, + T** y, + const int& incy, + const int& batch_size) { - BlasConnector::gemv_batched(trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy, batch_size); + BlasConnector::gemv_batched (trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy, batch_size); } }; - template -struct blas_gemv_batched_strided { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - const T* A, - const int& lda, - const int64_t& stride_a, - const T* x, - const int& incx, - const int64_t& stride_x, - const T* beta, - T* y, - const int& incy, - const int64_t& stride_y, - const int& batch_size) +struct blas_gemv_batched_strided +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const int64_t& stride_a, + const T* x, + const int& incx, + const int64_t& stride_x, + const T* beta, + T* y, + const int& incy, + const int64_t& stride_y, + const int& batch_size) { - BlasConnector::gemv_batched_strided(trans, m, n, *alpha, A, lda, stride_a, x, incx, stride_x, *beta, y, incy, stride_y, batch_size); + BlasConnector::gemv_batched_strided (trans, + m, + n, + *alpha, + A, + lda, + stride_a, + x, + incx, + stride_x, + *beta, + y, + incy, + stride_y, + batch_size); } }; template -struct blas_gemm { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - const T* A, - const int& lda, - const T* B, - const int& ldb, - const T* beta, - T* C, - const int& ldc) +struct blas_gemm +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* A, + const int& lda, + const T* B, + const int& ldb, + const T* beta, + T* C, + const int& ldc) { - BlasConnector::gemm(transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); + BlasConnector::gemm (transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); } }; template -struct blas_gemm_batched { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - T** A, - const int& lda, - T** B, - const int& ldb, - const T* beta, - T** C, - const int& ldc, - const int& batch_size) +struct blas_gemm_batched +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + T** A, + const int& lda, + T** B, + const int& ldb, + const T* beta, + T** C, + const int& ldc, + const int& batch_size) { - BlasConnector::gemm_batched(transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc, batch_size); + BlasConnector::gemm_batched (transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc, batch_size); } }; template -struct blas_gemm_batched_strided { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - const T* A, - const int& lda, - const int& stride_a, - const T* B, - const int& ldb, - const int& stride_b, - const T* beta, - T* C, - const int& ldc, - const int& stride_c, - const int& batch_size) +struct blas_gemm_batched_strided +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* A, + const int& lda, + const int& stride_a, + const T* B, + const int& ldb, + const int& stride_b, + const T* beta, + T* C, + const int& ldc, + const int& stride_c, + const int& batch_size) { - BlasConnector::gemm_batched_strided(transa, transb, m, n, k, *alpha, A, lda, stride_a, B, ldb, stride_b, *beta, C, ldc, stride_c, batch_size); + BlasConnector::gemm_batched_strided (transa, + transb, + m, + n, + k, + *alpha, + A, + lda, + stride_a, + B, + ldb, + stride_b, + *beta, + C, + ldc, + stride_c, + batch_size); } }; // Explicitly instantiate functors for the types of functor registered. -template struct blas_copy; +template struct blas_copy; template struct blas_copy; -template struct blas_copy, DEVICE_CPU>; +template struct blas_copy, DEVICE_CPU>; template struct blas_copy, DEVICE_CPU>; -template struct blas_nrm2; +template struct blas_nrm2; template struct blas_nrm2; -template struct blas_nrm2, DEVICE_CPU>; +template struct blas_nrm2, DEVICE_CPU>; template struct blas_nrm2, DEVICE_CPU>; -template struct blas_dot; +template struct blas_dot; template struct blas_dot; -template struct blas_dot, DEVICE_CPU>; +template struct blas_dot, DEVICE_CPU>; template struct blas_dot, DEVICE_CPU>; -template struct blas_scal; +template struct blas_scal; template struct blas_scal; -template struct blas_scal, DEVICE_CPU>; +template struct blas_scal, DEVICE_CPU>; template struct blas_scal, DEVICE_CPU>; -template struct blas_axpy; +template struct blas_axpy; template struct blas_axpy; -template struct blas_axpy, DEVICE_CPU>; +template struct blas_axpy, DEVICE_CPU>; template struct blas_axpy, DEVICE_CPU>; -template struct blas_gemv; +template struct blas_gemv; template struct blas_gemv; -template struct blas_gemv, DEVICE_CPU>; +template struct blas_gemv, DEVICE_CPU>; template struct blas_gemv, DEVICE_CPU>; -template struct blas_gemv_batched; +template struct blas_gemv_batched; template struct blas_gemv_batched; -template struct blas_gemv_batched, DEVICE_CPU>; +template struct blas_gemv_batched, DEVICE_CPU>; template struct blas_gemv_batched, DEVICE_CPU>; -template struct blas_gemv_batched_strided; +template struct blas_gemv_batched_strided; template struct blas_gemv_batched_strided; -template struct blas_gemv_batched_strided, DEVICE_CPU>; +template struct blas_gemv_batched_strided, DEVICE_CPU>; template struct blas_gemv_batched_strided, DEVICE_CPU>; -template struct blas_gemm; +template struct blas_gemm; template struct blas_gemm; -template struct blas_gemm, DEVICE_CPU>; +template struct blas_gemm, DEVICE_CPU>; template struct blas_gemm, DEVICE_CPU>; -template struct blas_gemm_batched; +template struct blas_gemm_batched; template struct blas_gemm_batched; -template struct blas_gemm_batched, DEVICE_CPU>; +template struct blas_gemm_batched, DEVICE_CPU>; template struct blas_gemm_batched, DEVICE_CPU>; -template struct blas_gemm_batched_strided; +template struct blas_gemm_batched_strided; template struct blas_gemm_batched_strided; -template struct blas_gemm_batched_strided, DEVICE_CPU>; +template struct blas_gemm_batched_strided, DEVICE_CPU>; template struct blas_gemm_batched_strided, DEVICE_CPU>; } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/blas.h b/source/source_base/module_container/ATen/kernels/blas.h index 550caa2f79f..c55e69e7d81 100644 --- a/source/source_base/module_container/ATen/kernels/blas.h +++ b/source/source_base/module_container/ATen/kernels/blas.h @@ -6,184 +6,159 @@ #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -struct blas_copy { +struct blas_copy +{ // DCOPY copies a vector, x, to a vector, y. - void operator()( - const int n, - const T *x, - const int incx, - T *y, - const int incy); + void operator() (const int n, const T* x, const int incx, T* y, const int incy); }; template -struct blas_nrm2 { +struct blas_nrm2 +{ using Real = typename GetTypeReal::type; - Real operator()( - const int n, - const T *x, - const int incx); + Real operator() (const int n, const T* x, const int incx); }; template -struct blas_dot { - void operator()( - const int& n, - const T* x, - const int& incx, - const T* y, - const int& incy, - T* result); +struct blas_dot +{ + void operator() (const int& n, const T* x, const int& incx, const T* y, const int& incy, T* result); }; - template -struct blas_scal { - void operator()( - const int& n, - const T* alpha, - T* x, - const int& incx); +struct blas_scal +{ + void operator() (const int& n, const T* alpha, T* x, const int& incx); }; - template -struct blas_axpy { - void operator()( - const int& n, - const T* alpha, - const T* x, - const int& incx, - T* y, - const int& incy); +struct blas_axpy +{ + void operator() (const int& n, const T* alpha, const T* x, const int& incx, T* y, const int& incy); }; - template -struct blas_gemv { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - const T* A, - const int& lda, - const T* x, - const int& incx, - const T* beta, - T* y, - const int& incy); +struct blas_gemv +{ + void operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const T* x, + const int& incx, + const T* beta, + T* y, + const int& incy); }; - template -struct blas_gemv_batched { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - T** A, - const int& lda, - T** x, - const int& incx, - const T* beta, - T** y, - const int& incy, - const int& batch_size); +struct blas_gemv_batched +{ + void operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + T** A, + const int& lda, + T** x, + const int& incx, + const T* beta, + T** y, + const int& incy, + const int& batch_size); }; - template -struct blas_gemv_batched_strided { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - const T* A, - const int& lda, - const int64_t& stride_a, - const T* x, - const int& incx, - const int64_t& stride_x, - const T* beta, - T* y, - const int& incy, - const int64_t& stride_y, - const int& batch_size); +struct blas_gemv_batched_strided +{ + void operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const int64_t& stride_a, + const T* x, + const int& incx, + const int64_t& stride_x, + const T* beta, + T* y, + const int& incy, + const int64_t& stride_y, + const int& batch_size); }; - template -struct blas_gemm { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - const T* A, - const int& lda, - const T* B, - const int& ldb, - const T* beta, - T* C, - const int& ldc); +struct blas_gemm +{ + void operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* A, + const int& lda, + const T* B, + const int& ldb, + const T* beta, + T* C, + const int& ldc); }; - template -struct blas_gemm_batched { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - T** A, - const int& lda, - T** B, - const int& ldb, - const T* beta, - T** C, - const int& ldc, - const int& batch_size); +struct blas_gemm_batched +{ + void operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + T** A, + const int& lda, + T** B, + const int& ldb, + const T* beta, + T** C, + const int& ldc, + const int& batch_size); }; - template -struct blas_gemm_batched_strided { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - const T* A, - const int& lda, - const int& stride_a, - const T* B, - const int& ldb, - const int& stride_b, - const T* beta, - T* C, - const int& ldc, - const int& stride_c, - const int& batch_size); +struct blas_gemm_batched_strided +{ + void operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* A, + const int& lda, + const int& stride_a, + const T* B, + const int& ldb, + const int& stride_b, + const T* beta, + T* C, + const int& ldc, + const int& stride_c, + const int& batch_size); }; #if __CUDA || __ROCM -void createGpuBlasHandle(); // create blas handle -void destroyGpuBlasHandle(); // destory blas handle -#endif // __CUDA || __UT_USE_CUDA +void createGpuBlasHandle (); // create blas handle +void destroyGpuBlasHandle (); // destory blas handle +#endif // __CUDA || __UT_USE_CUDA } // namespace kernels } // namespace container diff --git a/source/source_base/module_container/ATen/kernels/cuda/blas.cu b/source/source_base/module_container/ATen/kernels/cuda/blas.cu index 7f794768ad5..890ef654f17 100644 --- a/source/source_base/module_container/ATen/kernels/cuda/blas.cu +++ b/source/source_base/module_container/ATen/kernels/cuda/blas.cu @@ -4,280 +4,309 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ static cublasHandle_t cublas_handle = nullptr; -void createGpuBlasHandle() { - if (cublas_handle == nullptr) { - CHECK_CUBLAS(cublasCreate(&cublas_handle)); - } +void + createGpuBlasHandle () +{ + if (cublas_handle == nullptr) + { + CHECK_CUBLAS (cublasCreate (&cublas_handle)); + } } -void destroyGpuBlasHandle() { - if (cublas_handle != nullptr) { - CHECK_CUBLAS(cublasDestroy(cublas_handle)); - cublas_handle = nullptr; - } +void + destroyGpuBlasHandle () +{ + if (cublas_handle != nullptr) + { + CHECK_CUBLAS (cublasDestroy (cublas_handle)); + cublas_handle = nullptr; + } } template -struct blas_nrm2 { +struct blas_nrm2 +{ using Real = typename GetTypeReal::type; - Real operator()( - const int n, - const T *x, - const int incx) + Real + operator() (const int n, const T* x, const int incx) { Real result; - cuBlasConnector::nrm2(cublas_handle, n, x, incx, &result); + cuBlasConnector::nrm2 (cublas_handle, n, x, incx, &result); return result; } }; template -struct blas_copy { - void operator()( - const int n, - const T * x, - const int incx, - T *y, - const int incy) +struct blas_copy +{ + void + operator() (const int n, const T* x, const int incx, T* y, const int incy) { - cuBlasConnector::copy(cublas_handle, n, x, incx, y, incy); + cuBlasConnector::copy (cublas_handle, n, x, incx, y, incy); } }; template -struct blas_dot { - void operator()( - const int& n, - const T* x, - const int& incx, - const T* y, - const int& incy, - T* result) +struct blas_dot +{ + void + operator() (const int& n, const T* x, const int& incx, const T* y, const int& incy, T* result) { - cuBlasConnector::dot(cublas_handle, n, x, incx, y, incy, result); + cuBlasConnector::dot (cublas_handle, n, x, incx, y, incy, result); } }; template -struct blas_scal { - void operator()( - const int& n, - const T* alpha, - T* x, - const int& incx) +struct blas_scal +{ + void + operator() (const int& n, const T* alpha, T* x, const int& incx) { - cuBlasConnector::scal(cublas_handle, n, *alpha, x, incx); + cuBlasConnector::scal (cublas_handle, n, *alpha, x, incx); } }; template -struct blas_axpy { - void operator()( - const int& n, - const T* alpha, - const T* x, - const int& incx, - T* y, - const int& incy) +struct blas_axpy +{ + void + operator() (const int& n, const T* alpha, const T* x, const int& incx, T* y, const int& incy) { - cuBlasConnector::axpy(cublas_handle, n, *alpha, x, incx, y, incy); + cuBlasConnector::axpy (cublas_handle, n, *alpha, x, incx, y, incy); } }; template -struct blas_gemv { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - const T* A, - const int& lda, - const T* x, - const int& incx, - const T* beta, - T* y, - const int& incy) +struct blas_gemv +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const T* x, + const int& incx, + const T* beta, + T* y, + const int& incy) { - cuBlasConnector::gemv(cublas_handle, trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy); + cuBlasConnector::gemv (cublas_handle, trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy); } }; - template -struct blas_gemv_batched { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - T** A, - const int& lda, - T** x, - const int& incx, - const T* beta, - T** y, - const int& incy, - const int& batch_size) +struct blas_gemv_batched +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + T** A, + const int& lda, + T** x, + const int& incx, + const T* beta, + T** y, + const int& incy, + const int& batch_size) { - cuBlasConnector::gemv_batched(cublas_handle, trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy, batch_size); + cuBlasConnector::gemv_batched (cublas_handle, trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy, batch_size); } }; - template -struct blas_gemv_batched_strided { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - const T* A, - const int& lda, - const int64_t& stride_a, - const T* x, - const int& incx, - const int64_t& stride_x, - const T* beta, - T* y, - const int& incy, - const int64_t& stride_y, - const int& batch_size) +struct blas_gemv_batched_strided +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const int64_t& stride_a, + const T* x, + const int& incx, + const int64_t& stride_x, + const T* beta, + T* y, + const int& incy, + const int64_t& stride_y, + const int& batch_size) { - cuBlasConnector::gemv_batched_strided(cublas_handle, trans, m, n, *alpha, A, lda, stride_a, x, incx, stride_x, *beta, y, incy, stride_y, batch_size); + cuBlasConnector::gemv_batched_strided (cublas_handle, + trans, + m, + n, + *alpha, + A, + lda, + stride_a, + x, + incx, + stride_x, + *beta, + y, + incy, + stride_y, + batch_size); } }; template -struct blas_gemm { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - const T* A, - const int& lda, - const T* B, - const int& ldb, - const T* beta, - T* C, - const int& ldc) +struct blas_gemm +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* A, + const int& lda, + const T* B, + const int& ldb, + const T* beta, + T* C, + const int& ldc) { - cuBlasConnector::gemm(cublas_handle, transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); + cuBlasConnector::gemm (cublas_handle, transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); } }; template -struct blas_gemm_batched { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - T** A, - const int& lda, - T** B, - const int& ldb, - const T* beta, - T** C, - const int& ldc, - const int& batch_size) +struct blas_gemm_batched +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + T** A, + const int& lda, + T** B, + const int& ldb, + const T* beta, + T** C, + const int& ldc, + const int& batch_size) { - cuBlasConnector::gemm_batched(cublas_handle, transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc, batch_size); + cuBlasConnector:: + gemm_batched (cublas_handle, transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc, batch_size); } }; template -struct blas_gemm_batched_strided { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - const T* A, - const int& lda, - const int& stride_a, - const T* B, - const int& ldb, - const int& stride_b, - const T* beta, - T* C, - const int& ldc, - const int& stride_c, - const int& batch_size) +struct blas_gemm_batched_strided +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* A, + const int& lda, + const int& stride_a, + const T* B, + const int& ldb, + const int& stride_b, + const T* beta, + T* C, + const int& ldc, + const int& stride_c, + const int& batch_size) { - cuBlasConnector::gemm_batched_strided(cublas_handle, transa, transb, m, n, k, *alpha, A, lda, stride_a, B, ldb, stride_b, *beta, C, ldc, stride_c, batch_size); + cuBlasConnector::gemm_batched_strided (cublas_handle, + transa, + transb, + m, + n, + k, + *alpha, + A, + lda, + stride_a, + B, + ldb, + stride_b, + *beta, + C, + ldc, + stride_c, + batch_size); } }; // Explicitly instantiate functors for the types of functor registered. - - -template struct blas_copy; +template struct blas_copy; template struct blas_copy; -template struct blas_copy , DEVICE_GPU>; +template struct blas_copy, DEVICE_GPU>; template struct blas_copy, DEVICE_GPU>; -template struct blas_nrm2; +template struct blas_nrm2; template struct blas_nrm2; -template struct blas_nrm2 , DEVICE_GPU>; +template struct blas_nrm2, DEVICE_GPU>; template struct blas_nrm2, DEVICE_GPU>; -template struct blas_dot; +template struct blas_dot; template struct blas_dot; -template struct blas_dot , DEVICE_GPU>; +template struct blas_dot, DEVICE_GPU>; template struct blas_dot, DEVICE_GPU>; -template struct blas_scal; +template struct blas_scal; template struct blas_scal; -template struct blas_scal , DEVICE_GPU>; +template struct blas_scal, DEVICE_GPU>; template struct blas_scal, DEVICE_GPU>; -template struct blas_axpy; +template struct blas_axpy; template struct blas_axpy; -template struct blas_axpy , DEVICE_GPU>; +template struct blas_axpy, DEVICE_GPU>; template struct blas_axpy, DEVICE_GPU>; -template struct blas_gemv; +template struct blas_gemv; template struct blas_gemv; -template struct blas_gemv, DEVICE_GPU>; +template struct blas_gemv, DEVICE_GPU>; template struct blas_gemv, DEVICE_GPU>; -template struct blas_gemv_batched; +template struct blas_gemv_batched; template struct blas_gemv_batched; -template struct blas_gemv_batched, DEVICE_GPU>; +template struct blas_gemv_batched, DEVICE_GPU>; template struct blas_gemv_batched, DEVICE_GPU>; -template struct blas_gemv_batched_strided; +template struct blas_gemv_batched_strided; template struct blas_gemv_batched_strided; -template struct blas_gemv_batched_strided, DEVICE_GPU>; +template struct blas_gemv_batched_strided, DEVICE_GPU>; template struct blas_gemv_batched_strided, DEVICE_GPU>; -template struct blas_gemm; +template struct blas_gemm; template struct blas_gemm; -template struct blas_gemm, DEVICE_GPU>; +template struct blas_gemm, DEVICE_GPU>; template struct blas_gemm, DEVICE_GPU>; -template struct blas_gemm_batched; +template struct blas_gemm_batched; template struct blas_gemm_batched; -template struct blas_gemm_batched, DEVICE_GPU>; +template struct blas_gemm_batched, DEVICE_GPU>; template struct blas_gemm_batched, DEVICE_GPU>; -template struct blas_gemm_batched_strided; +template struct blas_gemm_batched_strided; template struct blas_gemm_batched_strided; -template struct blas_gemm_batched_strided, DEVICE_GPU>; +template struct blas_gemm_batched_strided, DEVICE_GPU>; template struct blas_gemm_batched_strided, DEVICE_GPU>; } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/cuda/lapack.cu b/source/source_base/module_container/ATen/kernels/cuda/lapack.cu index 4c69973b6be..8a6c723fc2f 100644 --- a/source/source_base/module_container/ATen/kernels/cuda/lapack.cu +++ b/source/source_base/module_container/ATen/kernels/cuda/lapack.cu @@ -10,138 +10,127 @@ #include "source_base/module_device/device_check.h" - -namespace container { -namespace kernels { - +namespace container +{ +namespace kernels +{ static cusolverDnHandle_t cusolver_handle = nullptr; -void createGpuSolverHandle() { - if (cusolver_handle == nullptr) { - CHECK_CUSOLVER(cusolverDnCreate(&cusolver_handle)); - } +void + createGpuSolverHandle () +{ + if (cusolver_handle == nullptr) + { + CHECK_CUSOLVER (cusolverDnCreate (&cusolver_handle)); + } } -void destroyGpuSolverHandle() { - if (cusolver_handle != nullptr) { - CHECK_CUSOLVER(cusolverDnDestroy(cusolver_handle)); - cusolver_handle = nullptr; - } +void + destroyGpuSolverHandle () +{ + if (cusolver_handle != nullptr) + { + CHECK_CUSOLVER (cusolverDnDestroy (cusolver_handle)); + cusolver_handle = nullptr; + } } template -__global__ void set_matrix_kernel( - const char uplo, - T* A, - const int dim) +__global__ void + set_matrix_kernel (const char uplo, T* A, const int dim) { int bid = blockIdx.x; int tid = threadIdx.x; - for (int ii = tid; ii < bid + 1; ii += THREADS_PER_BLOCK) { - if (uplo == 'L') { - A[ii * dim + bid + 1] = static_cast(0); - } - else { - A[(bid + 1) * dim + ii] = static_cast(0); + for (int ii = tid; ii < bid + 1; ii += THREADS_PER_BLOCK) + { + if (uplo == 'L') + { + A[ii * dim + bid + 1] = static_cast (0); + } + else + { + A[(bid + 1) * dim + ii] = static_cast (0); + } } - } } template -struct set_matrix { +struct set_matrix +{ using Type = typename GetTypeThrust::type; - void operator() ( - const char& uplo, - T* A, - const int& dim) + void + operator() (const char& uplo, T* A, const int& dim) { - set_matrix_kernel<<>>( - uplo, reinterpret_cast(A), dim); + set_matrix_kernel<<>> (uplo, reinterpret_cast (A), dim); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } }; - - // --- 1. Matrix Decomposition --- template -struct lapack_trtri { - void operator()( - const char& uplo, - const char& diag, - const int& dim, - T* Mat, - const int& lda) +struct lapack_trtri +{ + void + operator() (const char& uplo, const char& diag, const int& dim, T* Mat, const int& lda) { // TODO: trtri is not implemented in this method yet // Cause the trtri in cuSolver is not stable for ABACUS! - cuSolverConnector::trtri(cusolver_handle, uplo, diag, dim, Mat, lda); + cuSolverConnector::trtri (cusolver_handle, uplo, diag, dim, Mat, lda); // cuSolverConnector::potri(cusolver_handle, uplo, diag, dim, Mat, lda); } }; template -struct lapack_potrf { - void operator()( - const char& uplo, - const int& dim, - T* Mat, - const int& lda) +struct lapack_potrf +{ + void + operator() (const char& uplo, const int& dim, T* Mat, const int& lda) { - cuSolverConnector::potrf(cusolver_handle, uplo, dim, Mat, dim); + cuSolverConnector::potrf (cusolver_handle, uplo, dim, Mat, dim); } }; template -struct lapack_getrf { - void operator()( - const int& m, - const int& n, - T* Mat, - const int& lda, - int* ipiv) +struct lapack_getrf +{ + void + operator() (const int& m, const int& n, T* Mat, const int& lda, int* ipiv) { - cuSolverConnector::getrf(cusolver_handle, m, n, Mat, lda, ipiv); + cuSolverConnector::getrf (cusolver_handle, m, n, Mat, lda, ipiv); } }; template -struct lapack_getri { - void operator()( - const int& n, - T* Mat, - const int& lda, - const int* ipiv, - T* work, - const int& lwork) +struct lapack_getri +{ + void + operator() (const int& n, T* Mat, const int& lda, const int* ipiv, T* work, const int& lwork) { - throw std::runtime_error("cuSOLVER does not provide LU-based matrix inversion interface (getri). To compute the inverse on GPU, use getrs instead."); + throw std::runtime_error ("cuSOLVER does not provide LU-based matrix inversion interface (getri). To compute " + "the inverse on GPU, use getrs instead."); } }; - template -struct lapack_geqrf_inplace { - void operator()( - const int m, - const int n, - T *d_A, - const int lda) +struct lapack_geqrf_inplace +{ + void + operator() (const int m, const int n, T* d_A, const int lda) { - const int k = std::min(m, n); + const int k = std::min (m, n); // Allocate tau on device - T *d_tau; - CHECK_CUDA(cudaMalloc(&d_tau, sizeof(T) * k)); + T* d_tau; + CHECK_CUDA (cudaMalloc (&d_tau, sizeof (T) * k)); - cuSolverConnector::geqrf(cusolver_handle, m, n, d_A, lda, d_tau); + cuSolverConnector::geqrf (cusolver_handle, m, n, d_A, lda, d_tau); - cuSolverConnector::orgqr(cusolver_handle, m, n, k, d_A, lda, d_tau); + cuSolverConnector::orgqr (cusolver_handle, m, n, k, d_A, lda, d_tau); - CHECK_CUDA(cudaFree(d_tau)); + CHECK_CUDA (cudaFree (d_tau)); // // geqrf: workspace query @@ -212,112 +201,93 @@ struct lapack_geqrf_inplace { // --- 2. Linear System Solvers --- template -struct lapack_getrs { - void operator()( - const char& trans, - const int& n, - const int& nrhs, - T* A, - const int& lda, - const int* ipiv, - T* B, - const int& ldb) +struct lapack_getrs +{ + void + operator() (const char& trans, + const int& n, + const int& nrhs, + T* A, + const int& lda, + const int* ipiv, + T* B, + const int& ldb) { - cuSolverConnector::getrs(cusolver_handle, trans, n, nrhs, A, lda, ipiv, B, ldb); + cuSolverConnector::getrs (cusolver_handle, trans, n, nrhs, A, lda, ipiv, B, ldb); } }; - // --- 3. Standard & Generalized Eigenvalue --- template -struct lapack_heevd { +struct lapack_heevd +{ using Real = typename GetTypeReal::type; - void operator()( - const int dim, - T* Mat, - const int lda, - Real* eigen_val) + void + operator() (const int dim, T* Mat, const int lda, Real* eigen_val) { - char jobz = 'V'; // Compute eigenvalues and eigenvectors + char jobz = 'V'; // Compute eigenvalues and eigenvectors char uplo = 'U'; - cuSolverConnector::heevd(cusolver_handle, jobz, uplo, dim, Mat, lda, eigen_val); + cuSolverConnector::heevd (cusolver_handle, jobz, uplo, dim, Mat, lda, eigen_val); } }; template -struct lapack_heevx { +struct lapack_heevx +{ using Real = typename GetTypeReal::type; - void operator()( - const int n, - const int lda, - const T *d_Mat, - const int neig, - Real *d_eigen_val, - T *d_eigen_vec) + void + operator() (const int n, const int lda, const T* d_Mat, const int neig, Real* d_eigen_val, T* d_eigen_vec) { - assert(n <= lda); + assert (n <= lda); // copy d_Mat to d_eigen_vec, and results will be overwritten into d_eigen_vec // by cuSolver - CHECK_CUDA(cudaMemcpy(d_eigen_vec, d_Mat, sizeof(T) * n * lda, cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMemcpy (d_eigen_vec, d_Mat, sizeof (T) * n * lda, cudaMemcpyDeviceToDevice)); int meig = 0; - cuSolverConnector::heevdx( - cusolver_handle, - n, - lda, - d_eigen_vec, - 'V', // jobz: compute vectors - 'L', // uplo: lower triangle - 'I', // range: by index - 1, neig, // il, iu - Real(0), Real(0), // vl, vu (unused) - d_eigen_val, - &meig - ); - + cuSolverConnector::heevdx (cusolver_handle, + n, + lda, + d_eigen_vec, + 'V', // jobz: compute vectors + 'L', // uplo: lower triangle + 'I', // range: by index + 1, + neig, // il, iu + Real (0), + Real (0), // vl, vu (unused) + d_eigen_val, + &meig); } }; template -struct lapack_hegvd { +struct lapack_hegvd +{ using Real = typename GetTypeReal::type; - void operator()( - const int dim, - const int lda, - T* Mat_A, - T* Mat_B, - Real* eigen_val, - T *eigen_vec) + void + operator() (const int dim, const int lda, T* Mat_A, T* Mat_B, Real* eigen_val, T* eigen_vec) { const int itype = 1; const char jobz = 'V'; const char uplo = 'U'; - CHECK_CUDA(cudaMemcpy(eigen_vec, Mat_A, sizeof(T) * dim * lda, cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMemcpy (eigen_vec, Mat_A, sizeof (T) * dim * lda, cudaMemcpyDeviceToDevice)); // prevent B from being overwritten by Cholesky - T *d_B_backup = nullptr; - CHECK_CUDA(cudaMalloc(&d_B_backup, sizeof(T) * dim * lda)); - CHECK_CUDA(cudaMemcpy(d_B_backup, Mat_B, sizeof(T) * dim * lda, cudaMemcpyDeviceToDevice)); - - cuSolverConnector::hegvd(cusolver_handle, itype, jobz, uplo, dim, - eigen_vec, lda, - d_B_backup, lda, - eigen_val); - CHECK_CUDA(cudaFree(d_B_backup)); + T* d_B_backup = nullptr; + CHECK_CUDA (cudaMalloc (&d_B_backup, sizeof (T) * dim * lda)); + CHECK_CUDA (cudaMemcpy (d_B_backup, Mat_B, sizeof (T) * dim * lda, cudaMemcpyDeviceToDevice)); + + cuSolverConnector::hegvd (cusolver_handle, itype, jobz, uplo, dim, eigen_vec, lda, d_B_backup, lda, eigen_val); + CHECK_CUDA (cudaFree (d_B_backup)); } }; template -struct lapack_hegvx { +struct lapack_hegvx +{ using Real = typename GetTypeReal::type; - void operator()( - const int n, - const int lda, - T *A, - T *B, - const int m, - Real *eigen_val, - T *eigen_vec) + void + operator() (const int n, const int lda, T* A, T* B, const int m, Real* eigen_val, T* eigen_vec) { const int itype = 1; const char jobz = 'V'; @@ -327,42 +297,48 @@ struct lapack_hegvx { // this hegvdx will protect the input A, B from being overwritten // and write the eigenvectors into eigen_vec. - cuSolverConnector::hegvdx(cusolver_handle, - itype, jobz, range, uplo, - n, lda, A, B, - Real(0), Real(0), - 1, m, &meig, - eigen_val, eigen_vec); + cuSolverConnector::hegvdx (cusolver_handle, + itype, + jobz, + range, + uplo, + n, + lda, + A, + B, + Real (0), + Real (0), + 1, + m, + &meig, + eigen_val, + eigen_vec); } }; - - -template struct set_matrix; +template struct set_matrix; template struct set_matrix; -template struct set_matrix, DEVICE_GPU>; +template struct set_matrix, DEVICE_GPU>; template struct set_matrix, DEVICE_GPU>; -template struct lapack_trtri; +template struct lapack_trtri; template struct lapack_trtri; -template struct lapack_trtri, DEVICE_GPU>; +template struct lapack_trtri, DEVICE_GPU>; template struct lapack_trtri, DEVICE_GPU>; -template struct lapack_potrf; +template struct lapack_potrf; template struct lapack_potrf; -template struct lapack_potrf, DEVICE_GPU>; +template struct lapack_potrf, DEVICE_GPU>; template struct lapack_potrf, DEVICE_GPU>; - -template struct lapack_getrs; +template struct lapack_getrs; template struct lapack_getrs; -template struct lapack_getrs, DEVICE_GPU>; +template struct lapack_getrs, DEVICE_GPU>; template struct lapack_getrs, DEVICE_GPU>; - -template struct lapack_heevd; +template struct lapack_heevd; template struct lapack_heevd; -template struct lapack_heevd, DEVICE_GPU>; +template struct lapack_heevd, DEVICE_GPU>; template struct lapack_heevd, DEVICE_GPU>; template struct lapack_heevx; @@ -370,29 +346,29 @@ template struct lapack_heevx; template struct lapack_heevx, DEVICE_GPU>; template struct lapack_heevx, DEVICE_GPU>; -template struct lapack_hegvd; +template struct lapack_hegvd; template struct lapack_hegvd; -template struct lapack_hegvd, DEVICE_GPU>; +template struct lapack_hegvd, DEVICE_GPU>; template struct lapack_hegvd, DEVICE_GPU>; -template struct lapack_hegvx; +template struct lapack_hegvx; template struct lapack_hegvx; -template struct lapack_hegvx, DEVICE_GPU>; +template struct lapack_hegvx, DEVICE_GPU>; template struct lapack_hegvx, DEVICE_GPU>; -template struct lapack_getrf; +template struct lapack_getrf; template struct lapack_getrf; -template struct lapack_getrf, DEVICE_GPU>; +template struct lapack_getrf, DEVICE_GPU>; template struct lapack_getrf, DEVICE_GPU>; -template struct lapack_getri; +template struct lapack_getri; template struct lapack_getri; -template struct lapack_getri, DEVICE_GPU>; +template struct lapack_getri, DEVICE_GPU>; template struct lapack_getri, DEVICE_GPU>; -template struct lapack_geqrf_inplace; +template struct lapack_geqrf_inplace; template struct lapack_geqrf_inplace; -template struct lapack_geqrf_inplace, DEVICE_GPU>; +template struct lapack_geqrf_inplace, DEVICE_GPU>; template struct lapack_geqrf_inplace, DEVICE_GPU>; } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/cuda/linalg.cu b/source/source_base/module_container/ATen/kernels/cuda/linalg.cu index 9fc1f15c518..ddea4cbaac3 100644 --- a/source/source_base/module_container/ATen/kernels/cuda/linalg.cu +++ b/source/source_base/module_container/ATen/kernels/cuda/linalg.cu @@ -6,433 +6,486 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -__device__ static inline -T conj(T& in) { +__device__ static inline T + conj (T& in) +{ return in; } template -__device__ static inline -thrust::complex conj(thrust::complex& in) { - return thrust::conj(in); +__device__ static inline thrust::complex + conj (thrust::complex& in) +{ + return thrust::conj (in); } template -__global__ void do_add_kernel( - const int num_element, - const T alpha, - const T* x, - const T beta, - const T* y, - T* z) +__global__ void + do_add_kernel (const int num_element, const T alpha, const T* x, const T beta, const T* y, T* z) { // Perform add operation for the specified range [begin, end) in the output Tensor. - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] + beta * y[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] + beta * y[o_idx]; + } } template -__global__ void do_mul_kernel( - const int num_element, - const T alpha, - const T* x, - T* y) +__global__ void + do_mul_kernel (const int num_element, const T alpha, const T* x, T* y) { - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - y[o_idx] = alpha * x[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + y[o_idx] = alpha * x[o_idx]; + } } template -__global__ void do_mul_kernel( - const int num_element, - const T alpha, - const T* x, - const T* y, - T* z) +__global__ void + do_mul_kernel (const int num_element, const T alpha, const T* x, const T* y, T* z) { - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] * y[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] * y[o_idx]; + } } template -__global__ void do_div_kernel( - const int num_element, - const T alpha, - const T* x, - const T* y, - T* z) +__global__ void + do_div_kernel (const int num_element, const T alpha, const T* x, const T* y, T* z) { - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] / y[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] / y[o_idx]; + } } template -__global__ void do_fma_kernel( - const int num_element, - const T alpha, - const T* x, - const T* y, - const T beta, - const T* z, - T* out) +__global__ void + do_fma_kernel (const int num_element, const T alpha, const T* x, const T* y, const T beta, const T* z, T* out) { - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - out[o_idx] = alpha * x[o_idx] * y[o_idx] + beta * z[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + out[o_idx] = alpha * x[o_idx] * y[o_idx] + beta * z[o_idx]; + } } template -__global__ void do_transpose_kernel( - int ndim, - int64_t num_elements, - const T* p, - const int* perm, - const int64_t* in_strides, - const int64_t* out_strides, - T* q) +__global__ void + do_transpose_kernel (int ndim, + int64_t num_elements, + const T* p, + const int* perm, + const int64_t* in_strides, + const int64_t* out_strides, + T* q) { - for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. - - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculate the ratio of the current output Tensor index 'current_o_idx' in the current dimension. - const int64_t ratio = current_o_idx / out_strides[ii]; - // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. - current_o_idx -= ratio * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - i_idx += ratio * in_strides[perm[ii]]; - } - // Check if conjugation is needed. - if (Conjugate) { - // Assign the conjugate value of the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = kernels::conj(p[i_idx]); - } else { - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx]; + for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. + + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculate the ratio of the current output Tensor index 'current_o_idx' in the current dimension. + const int64_t ratio = current_o_idx / out_strides[ii]; + // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. + current_o_idx -= ratio * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + i_idx += ratio * in_strides[perm[ii]]; + } + // Check if conjugation is needed. + if (Conjugate) + { + // Assign the conjugate value of the input Tensor element at index 'i_idx' to the output Tensor + // element at index 'o_idx'. + q[o_idx] = kernels::conj (p[i_idx]); + } + else + { + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx]; + } } - } } template -__global__ void do_stride_kernel( - int ndim, - int64_t size, - const T* p, - const int64_t* stride, - const int64_t* in_strides, - const int64_t* out_strides, - T* q) +__global__ void + do_stride_kernel (int ndim, + int64_t size, + const T* p, + const int64_t* stride, + const int64_t* in_strides, + const int64_t* out_strides, + T* q) { // Perform stride operation for the specified range [begin, end) in the output Tensor. - for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculate the index in the current dimension. - // It is natural to view a tensor as a multi-dimentional array. - const int64_t current_dim_idx = current_o_idx / out_strides[ii]; - // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. - current_o_idx -= current_dim_idx * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - i_idx += current_dim_idx * stride[ii] * in_strides[ii]; + for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculate the index in the current dimension. + // It is natural to view a tensor as a multi-dimentional array. + const int64_t current_dim_idx = current_o_idx / out_strides[ii]; + // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. + current_o_idx -= current_dim_idx * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + i_idx += current_dim_idx * stride[ii] * in_strides[ii]; + } + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx]; } - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx]; - } } template -__global__ void do_inflate_kernel( - int ndim, - int64_t size, - const T* p, - const int64_t* stride, - const int64_t* in_strides, - const int64_t* out_strides, - T* q) +__global__ void + do_inflate_kernel (int ndim, + int64_t size, + const T* p, + const int64_t* stride, + const int64_t* in_strides, + const int64_t* out_strides, + T* q) { // Perform stride operation for the specified range [begin, end) in the output Tensor. - for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. - bool valid = true; - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculte the ratio of the current output Tensor index 'current_o_idx' in the current dimension. - const int64_t current_dim_idx = current_o_idx / out_strides[ii]; - // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. - current_o_idx -= current_dim_idx * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - if (current_dim_idx % stride[ii] == 0) { - i_idx += (current_dim_idx / stride[ii]) * in_strides[ii]; - } - else { - valid = false; - break; - } + for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. + bool valid = true; + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculte the ratio of the current output Tensor index 'current_o_idx' in the current dimension. + const int64_t current_dim_idx = current_o_idx / out_strides[ii]; + // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. + current_o_idx -= current_dim_idx * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + if (current_dim_idx % stride[ii] == 0) + { + i_idx += (current_dim_idx / stride[ii]) * in_strides[ii]; + } + else + { + valid = false; + break; + } + } + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx] * static_cast (valid ? 1.0 : 0.0); } - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx] * static_cast(valid ? 1.0 : 0.0); - } } template -__global__ void do_reduce_kernel( - int64_t size, - int64_t inner_most_dim, - const T* p, - T* q) +__global__ void + do_reduce_kernel (int64_t size, int64_t inner_most_dim, const T* p, T* q) { - for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) { - T sum = 0; - for (int64_t i_idx = o_idx * inner_most_dim; i_idx < inner_most_dim + o_idx * inner_most_dim; i_idx++) { - sum += p[i_idx]; + for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) + { + T sum = 0; + for (int64_t i_idx = o_idx * inner_most_dim; i_idx < inner_most_dim + o_idx * inner_most_dim; i_idx++) + { + sum += p[i_idx]; + } + q[o_idx] = sum; } - q[o_idx] = sum; - } } template -static std::vector compute_stride(const std::vector& shape) { - int ndims = shape.size(); - std::vector strides(ndims); +static std::vector + compute_stride (const std::vector& shape) +{ + int ndims = shape.size (); + std::vector strides (ndims); T stride = 1; - auto it = shape.end(); // Start from the last element - for (int ii = ndims - 1; ii >= 0; ii--) { - it--; - strides[ii] = stride; - stride *= static_cast(*it); - } - return std::move(strides); + auto it = shape.end (); // Start from the last element + for (int ii = ndims - 1; ii >= 0; ii--) + { + it--; + strides[ii] = stride; + stride *= static_cast (*it); + } + return std::move (strides); } -template -void add::operator()(const int& num_element, const T& alpha, const T* x, const T& beta, const T* y, T* z) { +template +void + add::operator() (const int& num_element, const T& alpha, const T* x, const T& beta, const T* y, T* z) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); - auto beta_ = *reinterpret_cast(&beta); + auto alpha_ = *reinterpret_cast (&alpha); + auto beta_ = *reinterpret_cast (&beta); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_add_kernel<<>> ( - num_element, alpha_, reinterpret_cast(x), - beta_, reinterpret_cast(y), reinterpret_cast(z)); - - CHECK_CUDA_SYNC(); + do_add_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + beta_, + reinterpret_cast (y), + reinterpret_cast (z)); + + CHECK_CUDA_SYNC (); } -template -void mul::operator()(const int& num_element, const T& alpha, const T* x, T* y) { +template +void + mul::operator() (const int& num_element, const T& alpha, const T* x, T* y) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); + auto alpha_ = *reinterpret_cast (&alpha); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_mul_kernel<<>> ( - num_element, alpha_, - reinterpret_cast(x), reinterpret_cast(y)); + do_mul_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + reinterpret_cast (y)); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } -template -void mul::operator()(const int& num_element, const T& alpha, const T* x, const T* y, T* z) { +template +void + mul::operator() (const int& num_element, const T& alpha, const T* x, const T* y, T* z) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); + auto alpha_ = *reinterpret_cast (&alpha); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_mul_kernel<<>> ( - num_element, alpha_, - reinterpret_cast(x), reinterpret_cast(y), reinterpret_cast(z)); + do_mul_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + reinterpret_cast (y), + reinterpret_cast (z)); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } -template -void div::operator()(const int& num_element, const T& alpha, const T* x, const T* y, T* z) { +template +void + div::operator() (const int& num_element, const T& alpha, const T* x, const T* y, T* z) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); + auto alpha_ = *reinterpret_cast (&alpha); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_div_kernel<<>> ( - num_element, alpha_, reinterpret_cast(x), reinterpret_cast(y), reinterpret_cast(z)); + do_div_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + reinterpret_cast (y), + reinterpret_cast (z)); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } -template -void fma::operator()(const int& num_element, const T& alpha, const T* x, const T* y, const T& beta, const T* z, T* out) { +template +void + fma::operator() (const int& num_element, + const T& alpha, + const T* x, + const T* y, + const T& beta, + const T* z, + T* out) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); - auto beta_ = *reinterpret_cast(&beta); + auto alpha_ = *reinterpret_cast (&alpha); + auto beta_ = *reinterpret_cast (&beta); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_fma_kernel<<>> ( - num_element, alpha_, reinterpret_cast(x), reinterpret_cast(y), - beta_, reinterpret_cast(z), reinterpret_cast(out)); - - CHECK_CUDA_SYNC(); + do_fma_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + reinterpret_cast (y), + beta_, + reinterpret_cast (z), + reinterpret_cast (out)); + + CHECK_CUDA_SYNC (); } -template -void transpose::operator()( - const std::vector &perm, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + transpose::operator() (const std::vector& perm, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { using Type = typename GetTypeThrust::type; - REQUIRES_OK(p_shape.size() == q_shape.size(), - "transpose: p and q must have the same number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = compute_stride(p_shape); - auto out_strides = compute_stride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "transpose: p and q must have the same number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = compute_stride (p_shape); + auto out_strides = compute_stride (q_shape); int num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= static_cast(q_shape[ii]); - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= static_cast (q_shape[ii]); + } num_elements = ndim ? num_elements : 0; - Tensor t_perm(DataType::DT_INT, DeviceType::GpuDevice, {ndim}); - Tensor t_in_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_out_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_perm (DataType::DT_INT, DeviceType::GpuDevice, {ndim}); + Tensor t_in_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_out_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - kernels::synchronize_memory()( - t_perm.data(), perm.data(), perm.size()); - kernels::synchronize_memory()( - t_in_strides.data(), in_strides.data(), in_strides.size()); - kernels::synchronize_memory()( - t_out_strides.data(), out_strides.data(), out_strides.size()); + kernels::synchronize_memory () (t_perm.data (), perm.data (), perm.size ()); + kernels::synchronize_memory () (t_in_strides.data (), + in_strides.data (), + in_strides.size ()); + kernels::synchronize_memory () (t_out_strides.data (), + out_strides.data (), + out_strides.size ()); - const Type* p_ = reinterpret_cast(p); - Type* q_ = reinterpret_cast((q)); + const Type* p_ = reinterpret_cast (p); + Type* q_ = reinterpret_cast ((q)); const int block = (num_elements + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_transpose_kernel<<>> ( - ndim, num_elements, p_, t_perm.data(), - t_in_strides.data(), t_out_strides.data(), q_); - - CHECK_CUDA_SYNC(); + do_transpose_kernel<<>> (ndim, + num_elements, + p_, + t_perm.data (), + t_in_strides.data (), + t_out_strides.data (), + q_); + + CHECK_CUDA_SYNC (); } -template -void stride::operator()( - const std::vector &stride, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + stride::operator() (const std::vector& stride, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { using Type = typename GetTypeThrust::type; - REQUIRES_OK(p_shape.size() == q_shape.size(), - "transpose: p and q must have the same number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = compute_stride(p_shape); - auto out_strides = compute_stride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "transpose: p and q must have the same number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = compute_stride (p_shape); + auto out_strides = compute_stride (q_shape); int num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= static_cast(q_shape[ii]); - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= static_cast (q_shape[ii]); + } num_elements = ndim ? num_elements : 0; - Tensor t_stride(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_in_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_out_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_stride (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_in_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_out_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - kernels::synchronize_memory()( - t_stride.data(), stride.data(), stride.size()); - kernels::synchronize_memory()( - t_in_strides.data(), in_strides.data(), in_strides.size()); - kernels::synchronize_memory()( - t_out_strides.data(), out_strides.data(), out_strides.size()); + kernels::synchronize_memory () (t_stride.data (), + stride.data (), + stride.size ()); + kernels::synchronize_memory () (t_in_strides.data (), + in_strides.data (), + in_strides.size ()); + kernels::synchronize_memory () (t_out_strides.data (), + out_strides.data (), + out_strides.size ()); - const Type* p_ = reinterpret_cast(p); - Type* q_ = reinterpret_cast((q)); + const Type* p_ = reinterpret_cast (p); + Type* q_ = reinterpret_cast ((q)); const int block = (num_elements + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_stride_kernel<<>> ( - ndim, num_elements, p_, t_stride.data(), t_in_strides.data(), t_out_strides.data(), q_); - - CHECK_CUDA_SYNC(); + do_stride_kernel<<>> (ndim, + num_elements, + p_, + t_stride.data (), + t_in_strides.data (), + t_out_strides.data (), + q_); + + CHECK_CUDA_SYNC (); } - -template -void inflate::operator()( - const std::vector &inflate, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + inflate::operator() (const std::vector& inflate, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { using Type = typename GetTypeThrust::type; - REQUIRES_OK(p_shape.size() == q_shape.size(), - "transpose: p and q must have the same number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = compute_stride(p_shape); - auto out_strides = compute_stride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "transpose: p and q must have the same number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = compute_stride (p_shape); + auto out_strides = compute_stride (q_shape); int num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= static_cast(q_shape[ii]); - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= static_cast (q_shape[ii]); + } num_elements = ndim ? num_elements : 0; - Tensor t_stride(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_in_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_out_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_stride (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_in_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_out_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - kernels::synchronize_memory()( - t_stride.data(), inflate.data(), inflate.size()); - kernels::synchronize_memory()( - t_in_strides.data(), in_strides.data(), in_strides.size()); - kernels::synchronize_memory()( - t_out_strides.data(), out_strides.data(), out_strides.size()); + kernels::synchronize_memory () (t_stride.data (), + inflate.data (), + inflate.size ()); + kernels::synchronize_memory () (t_in_strides.data (), + in_strides.data (), + in_strides.size ()); + kernels::synchronize_memory () (t_out_strides.data (), + out_strides.data (), + out_strides.size ()); - const Type* p_ = reinterpret_cast(p); - Type* q_ = reinterpret_cast((q)); + const Type* p_ = reinterpret_cast (p); + Type* q_ = reinterpret_cast ((q)); const int block = (num_elements + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_inflate_kernel<<>> ( - ndim, num_elements, p_, t_stride.data(), t_in_strides.data(), t_out_strides.data(), q_); - - CHECK_CUDA_SYNC(); + do_inflate_kernel<<>> (ndim, + num_elements, + p_, + t_stride.data (), + t_in_strides.data (), + t_out_strides.data (), + q_); + + CHECK_CUDA_SYNC (); } - -template -void reduce::operator()( - const int64_t &num_element, - const int64_t &inner_most_dim, - const T *p, - T *q) +template +void + reduce::operator() (const int64_t& num_element, const int64_t& inner_most_dim, const T* p, T* q) { using Type = typename GetTypeThrust::type; - const Type* p_ = reinterpret_cast(p); - Type* q_ = reinterpret_cast((q)); + const Type* p_ = reinterpret_cast (p); + Type* q_ = reinterpret_cast ((q)); - const int block = (static_cast(num_element) + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_reduce_kernel<<>> ( - num_element, inner_most_dim, p_, q_); + const int block = (static_cast (num_element) + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + do_reduce_kernel<<>> (num_element, inner_most_dim, p_, q_); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template struct add; diff --git a/source/source_base/module_container/ATen/kernels/cuda/memory.cu b/source/source_base/module_container/ATen/kernels/cuda/memory.cu index 1012b351eab..4b68b9c768c 100644 --- a/source/source_base/module_container/ATen/kernels/cuda/memory.cu +++ b/source/source_base/module_container/ATen/kernels/cuda/memory.cu @@ -4,154 +4,148 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -__global__ void do_set_memory( - T* out, - const T var, - const size_t size) +__global__ void + do_set_memory (T* out, const T var, const size_t size) { unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= size) {return;} + if (idx >= size) + { + return; + } out[idx] = var; } template -__global__ void do_cast_memory( - T_out* out, - const T_in* in, - const int size) +__global__ void + do_cast_memory (T_out* out, const T_in* in, const int size) { unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= size) {return;} - out[idx] = static_cast(in[idx]); + if (idx >= size) + { + return; + } + out[idx] = static_cast (in[idx]); } template -__global__ void do_cast_memory( - std::complex* out, - const std::complex* in, - const int size) +__global__ void + do_cast_memory (std::complex* out, const std::complex* in, const int size) { unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= size) {return;} - auto* _out = reinterpret_cast*>(out); - const auto* _in = reinterpret_cast*>(in); - _out[idx] = static_cast>(_in[idx]); + if (idx >= size) + { + return; + } + auto* _out = reinterpret_cast*> (out); + const auto* _in = reinterpret_cast*> (in); + _out[idx] = static_cast> (_in[idx]); } template -void resize_memory::operator()( - T*& arr, - const size_t& size, - const char* record_in) +void + resize_memory::operator() (T*& arr, const size_t& size, const char* record_in) { - if (arr != nullptr) { - delete_memory()(arr); - } - CHECK_CUDA(cudaMalloc((void **)&arr, sizeof(T) * size)); + if (arr != nullptr) + { + delete_memory () (arr); + } + CHECK_CUDA (cudaMalloc ((void**)&arr, sizeof (T) * size)); } template -void set_memory::operator()( - T* arr, - const T& var, - const size_t& size) +void + set_memory::operator() (T* arr, const T& var, const size_t& size) { - const int block = static_cast((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); - do_set_memory<<>>(arr, var, size); - CHECK_CUDA_SYNC(); + const int block = static_cast ((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); + do_set_memory<<>> (arr, var, size); + CHECK_CUDA_SYNC (); } template -struct synchronize_memory { - void operator()( - T *arr_out, - const T *arr_in, - const size_t& size) +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) { - CHECK_CUDA(cudaMemcpy(arr_out, arr_in, sizeof(T) * size, cudaMemcpyDeviceToHost)); + CHECK_CUDA (cudaMemcpy (arr_out, arr_in, sizeof (T) * size, cudaMemcpyDeviceToHost)); } }; template -struct synchronize_memory { - void operator()( - T *arr_out, - const T *arr_in, - const size_t& size) +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) { - CHECK_CUDA(cudaMemcpy(arr_out, arr_in, sizeof(T) * size, cudaMemcpyHostToDevice)); + CHECK_CUDA (cudaMemcpy (arr_out, arr_in, sizeof (T) * size, cudaMemcpyHostToDevice)); } }; template -struct synchronize_memory { - void operator()( - T *arr_out, - const T *arr_in, - const size_t& size) +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) { - CHECK_CUDA(cudaMemcpy(arr_out, arr_in, sizeof(T) * size, cudaMemcpyHostToDevice)); + CHECK_CUDA (cudaMemcpy (arr_out, arr_in, sizeof (T) * size, cudaMemcpyHostToDevice)); } }; - template -struct cast_memory { - void operator()( - T_out* arr_out, - const T_in* arr_in, - const size_t& size) +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) { - const int block = static_cast((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); - do_cast_memory<<>>(arr_out, arr_in, size); - CHECK_CUDA_SYNC(); + const int block = static_cast ((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); + do_cast_memory<<>> (arr_out, arr_in, size); + CHECK_CUDA_SYNC (); } }; - template -struct cast_memory { - void operator()( - T_out* arr_out, - const T_in* arr_in, - const size_t& size) +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) { - T_in * arr = nullptr; - CHECK_CUDA(cudaMalloc((void **)&arr, sizeof(T_in) * size)); - CHECK_CUDA(cudaMemcpy(arr, arr_in, sizeof(T_in) * size, cudaMemcpyHostToDevice)); - const int block = static_cast((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); - do_cast_memory<<>>(arr_out, arr, size); - CHECK_CUDA_SYNC(); - CHECK_CUDA(cudaFree(arr)); + T_in* arr = nullptr; + CHECK_CUDA (cudaMalloc ((void**)&arr, sizeof (T_in) * size)); + CHECK_CUDA (cudaMemcpy (arr, arr_in, sizeof (T_in) * size, cudaMemcpyHostToDevice)); + const int block = static_cast ((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); + do_cast_memory<<>> (arr_out, arr, size); + CHECK_CUDA_SYNC (); + CHECK_CUDA (cudaFree (arr)); } }; - template -struct cast_memory { - void operator()( - T_out* arr_out, - const T_in* arr_in, - const size_t& size) +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) { - auto * arr = (T_in*) malloc(sizeof(T_in) * size); - CHECK_CUDA(cudaMemcpy(arr, arr_in, sizeof(T_in) * size, cudaMemcpyDeviceToHost)); - for (int ii = 0; ii < size; ii++) { - arr_out[ii] = static_cast(arr[ii]); - } - free(arr); + auto* arr = (T_in*)malloc (sizeof (T_in) * size); + CHECK_CUDA (cudaMemcpy (arr, arr_in, sizeof (T_in) * size, cudaMemcpyDeviceToHost)); + for (int ii = 0; ii < size; ii++) + { + arr_out[ii] = static_cast (arr[ii]); + } + free (arr); } }; template -void delete_memory::operator() ( - T* arr) +void + delete_memory::operator() (T* arr) { - CHECK_CUDA(cudaFree(arr)); + CHECK_CUDA (cudaFree (arr)); } template struct resize_memory; @@ -162,7 +156,7 @@ template struct resize_memory, container::DEVICE_GPU>; template struct resize_memory, container::DEVICE_GPU>; template struct set_memory; -template struct set_memory; +template struct set_memory; template struct set_memory; template struct set_memory; template struct set_memory, container::DEVICE_GPU>; diff --git a/source/source_base/module_container/ATen/kernels/lapack.cpp b/source/source_base/module_container/ATen/kernels/lapack.cpp index 2ab02f35c81..b3be955073f 100644 --- a/source/source_base/module_container/ATen/kernels/lapack.cpp +++ b/source/source_base/module_container/ATen/kernels/lapack.cpp @@ -8,244 +8,267 @@ #include #include -namespace container { -namespace kernels { - -inline double get_real(const std::complex &x) { return x.real(); } -inline float get_real(const std::complex &x) { return x.real(); } -inline double get_real(const double &x) { return x; } -inline float get_real(const float &x) { return x; } +namespace container +{ +namespace kernels +{ + +inline double + get_real (const std::complex& x) +{ + return x.real (); +} +inline float + get_real (const std::complex& x) +{ + return x.real (); +} +inline double + get_real (const double& x) +{ + return x; +} +inline float + get_real (const float& x) +{ + return x; +} template -struct set_matrix { - void operator() ( - const char& uplo, - T* A, - const int& dim) +struct set_matrix +{ + void + operator() (const char& uplo, T* A, const int& dim) { - if (uplo == 'L') { - for (int ii = 0; ii < dim; ii++) { - for (int jj = ii + 1; jj < dim; jj++) { - A[ii * dim + jj] = 0; - } + if (uplo == 'L') + { + for (int ii = 0; ii < dim; ii++) + { + for (int jj = ii + 1; jj < dim; jj++) + { + A[ii * dim + jj] = 0; + } + } } - } - else if (uplo == 'U') { - for (int ii = 0; ii < dim; ii++) { - for (int jj = 0; jj < ii; jj++) { - A[ii * dim + jj] = 0; - } + else if (uplo == 'U') + { + for (int ii = 0; ii < dim; ii++) + { + for (int jj = 0; jj < ii; jj++) + { + A[ii * dim + jj] = 0; + } + } } - } } }; // --- 1. Matrix Decomposition --- template -struct lapack_trtri { - void operator()( - const char& uplo, - const char& diag, - const int& dim, - T* Mat, - const int& lda) +struct lapack_trtri +{ + void + operator() (const char& uplo, const char& diag, const int& dim, T* Mat, const int& lda) { int info = 0; - lapackConnector::trtri(uplo, diag, dim, Mat, lda, info); - if (info != 0) { - throw std::runtime_error("potrf failed with info = " + std::to_string(info)); - } + lapackConnector::trtri (uplo, diag, dim, Mat, lda, info); + if (info != 0) + { + throw std::runtime_error ("potrf failed with info = " + std::to_string (info)); + } } }; template -struct lapack_potrf { - void operator()( - const char& uplo, - const int& dim, - T* Mat, - const int& lda) +struct lapack_potrf +{ + void + operator() (const char& uplo, const int& dim, T* Mat, const int& lda) { int info = 0; - lapackConnector::potrf(uplo, dim, Mat, dim, info); - if (info != 0) { - throw std::runtime_error("potrf failed with info = " + std::to_string(info)); - } + lapackConnector::potrf (uplo, dim, Mat, dim, info); + if (info != 0) + { + throw std::runtime_error ("potrf failed with info = " + std::to_string (info)); + } } }; - template -struct lapack_getrf { - void operator()( - const int& m, - const int& n, - T* Mat, - const int& lda, - int* ipiv) +struct lapack_getrf +{ + void + operator() (const int& m, const int& n, T* Mat, const int& lda, int* ipiv) { int info = 0; - lapackConnector::getrf(m, n, Mat, lda, ipiv, info); - if (info != 0) { - throw std::runtime_error("getrf failed with info = " + std::to_string(info)); - } + lapackConnector::getrf (m, n, Mat, lda, ipiv, info); + if (info != 0) + { + throw std::runtime_error ("getrf failed with info = " + std::to_string (info)); + } } }; template -struct lapack_getri { - void operator()( - const int& n, - T* Mat, - const int& lda, - const int* ipiv, - T* work, - const int& lwork) +struct lapack_getri +{ + void + operator() (const int& n, T* Mat, const int& lda, const int* ipiv, T* work, const int& lwork) { int info = 0; - lapackConnector::getri(n, Mat, lda, ipiv, work, lwork, info); - if (info != 0) { - throw std::runtime_error("getri failed with info = " + std::to_string(info)); - } + lapackConnector::getri (n, Mat, lda, ipiv, work, lwork, info); + if (info != 0) + { + throw std::runtime_error ("getri failed with info = " + std::to_string (info)); + } } }; template -struct lapack_geqrf_inplace { - void operator()( - const int m, - const int n, - T *A, - const int lda) +struct lapack_geqrf_inplace +{ + void + operator() (const int m, const int n, T* A, const int lda) { // Tensor or vector? // 1. tau for storing the Householder reflectors // tau should be dimension min(m, n) - int k = std::min(m, n); - Tensor tau(DataTypeToEnum::value, DeviceType::CpuDevice, {k}); - tau.zero(); + int k = std::min (m, n); + Tensor tau (DataTypeToEnum::value, DeviceType::CpuDevice, {k}); + tau.zero (); int info = 0; // 2. query for workspace size int lwork = -1; T work_query; - lapackConnector::geqrf(m, n, A, lda, tau.data(), &work_query, lwork, info); - if (info != 0) { - throw std::runtime_error("geqrf workspace query failed with info = " + std::to_string(info)); - } + lapackConnector::geqrf (m, n, A, lda, tau.data (), &work_query, lwork, info); + if (info != 0) + { + throw std::runtime_error ("geqrf workspace query failed with info = " + std::to_string (info)); + } // allocate workspace - lwork = static_cast(get_real(work_query)); - Tensor work(DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); - work.zero(); + lwork = static_cast (get_real (work_query)); + Tensor work (DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); + work.zero (); // 3. perform QR decomposition // and A is overwritten with upper R. // Lower A + tau => Q - lapackConnector::geqrf(m, n, A, lda, tau.data(), work.data(), lwork, info); - if (info != 0) { - throw std::runtime_error("geqrf failed with info = " + std::to_string(info)); - } + lapackConnector::geqrf (m, n, A, lda, tau.data (), work.data (), lwork, info); + if (info != 0) + { + throw std::runtime_error ("geqrf failed with info = " + std::to_string (info)); + } // 4. use orgqr to compute Q // workspace query lwork = -1; - lapackConnector::orgqr(m, n, k, A, lda, tau.data(), &work_query, lwork, info); - if (info != 0) { - throw std::runtime_error("orgqr workspace query failed with info = " + std::to_string(info)); - } + lapackConnector::orgqr (m, n, k, A, lda, tau.data (), &work_query, lwork, info); + if (info != 0) + { + throw std::runtime_error ("orgqr workspace query failed with info = " + std::to_string (info)); + } // allocate workspace - lwork = static_cast(get_real(work_query)); - work.resize({lwork}); + lwork = static_cast (get_real (work_query)); + work.resize ({lwork}); // compute Q - lapackConnector::orgqr(m, n, k, A, lda, tau.data(), work.data(), lwork, info); - if (info != 0) { - throw std::runtime_error("orgqr failed with info = " + std::to_string(info)); - } + lapackConnector::orgqr (m, n, k, A, lda, tau.data (), work.data (), lwork, info); + if (info != 0) + { + throw std::runtime_error ("orgqr failed with info = " + std::to_string (info)); + } // now, A should be overwritten with Q, columns orthogonal - } }; // --- 2. Linear System Solvers --- template -struct lapack_getrs { - void operator()( - const char& trans, - const int& n, - const int& nrhs, - T* A, - const int& lda, - const int* ipiv, - T* B, - const int& ldb) +struct lapack_getrs +{ + void + operator() (const char& trans, + const int& n, + const int& nrhs, + T* A, + const int& lda, + const int* ipiv, + T* B, + const int& ldb) { int info = 0; - lapackConnector::getrs(trans, n, nrhs, A, lda, ipiv, B, ldb, info); - if (info != 0) { - throw std::runtime_error("getrs failed with info = " + std::to_string(info)); - } + lapackConnector::getrs (trans, n, nrhs, A, lda, ipiv, B, ldb, info); + if (info != 0) + { + throw std::runtime_error ("getrs failed with info = " + std::to_string (info)); + } } }; - // --- 3. Standard & Generalized Eigenvalue --- template -struct lapack_heevd { +struct lapack_heevd +{ using Real = typename GetTypeReal::type; - void operator()( - const int dim, - T* Mat, - const int lda, - Real* eigen_val) + void + operator() (const int dim, T* Mat, const int lda, Real* eigen_val) { - char jobz = 'V'; // Compute eigenvalues and eigenvectors + char jobz = 'V'; // Compute eigenvalues and eigenvectors char uplo = 'U'; int info = 0; - int lwork = std::max(2 * dim + dim * dim, 1 + 6 * dim + 2 * dim * dim); - Tensor work(DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); - work.zero(); + int lwork = std::max (2 * dim + dim * dim, 1 + 6 * dim + 2 * dim * dim); + Tensor work (DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); + work.zero (); int lrwork = 1 + 5 * dim + 2 * dim * dim; - Tensor rwork(DataTypeToEnum::value, DeviceType::CpuDevice, {lrwork}); - rwork.zero(); + Tensor rwork (DataTypeToEnum::value, DeviceType::CpuDevice, {lrwork}); + rwork.zero (); int liwork = 3 + 5 * dim; - Tensor iwork(DataTypeToEnum::value, DeviceType::CpuDevice, {liwork}); - iwork.zero(); - - lapackConnector::heevd(jobz, uplo, dim, Mat, lda, eigen_val, work.data(), lwork, rwork.data(), lrwork, iwork.data(), liwork, info); - if (info != 0) { - throw std::runtime_error("heevd failed with info = " + std::to_string(info)); - } + Tensor iwork (DataTypeToEnum::value, DeviceType::CpuDevice, {liwork}); + iwork.zero (); + + lapackConnector::heevd (jobz, + uplo, + dim, + Mat, + lda, + eigen_val, + work.data (), + lwork, + rwork.data (), + lrwork, + iwork.data (), + liwork, + info); + if (info != 0) + { + throw std::runtime_error ("heevd failed with info = " + std::to_string (info)); + } } }; template -struct lapack_heevx { +struct lapack_heevx +{ using Real = typename GetTypeReal::type; - void operator()( - const int n, - const int lda, - const T *Mat, - const int neig, - Real *eigen_val, - T *eigen_vec) + void + operator() (const int n, const int lda, const T* Mat, const int neig, Real* eigen_val, T* eigen_vec) { // copy Mat to aux, solve heevx(aux, eigen_val, eigen_vec) // input Mat is not referenced in actual heevx LAPACK routines, and aux is destroyed. - Tensor aux(DataTypeToEnum::value, DeviceType::CpuDevice, {n * lda}); + Tensor aux (DataTypeToEnum::value, DeviceType::CpuDevice, {n * lda}); // Copy Mat to aux since heevx will destroy it // aux = Mat - std::copy(Mat, Mat + n * lda, aux.data()); + std::copy (Mat, Mat + n * lda, aux.data ()); - char jobz = 'V'; // Compute eigenvalues and eigenvectors - char range = 'I'; // Find eigenvalues in index range [il, iu] - char uplo = 'L'; // Use Lower triangle + char jobz = 'V'; // Compute eigenvalues and eigenvectors + char range = 'I'; // Find eigenvalues in index range [il, iu] + char uplo = 'L'; // Use Lower triangle int info = 0; - int found = 0; // Number of eigenvalues found + int found = 0; // Number of eigenvalues found // found should be iu - il + 1, i.e. found = neig const int il = 1; const int iu = neig; @@ -260,129 +283,152 @@ struct lapack_heevx { // Dummy call to get optimal workspace size // when lwork = -1 - lapackConnector::heevx( - jobz, range, uplo, n, - aux.data(), lda, - 0.0, 0.0, il, iu, // vl, vu not used when range='I' - abstol, - found, - eigen_val, - eigen_vec, lda, - &work_query, lwork, - &rwork_query, - &iwork_query, - &ifail_query, - info); - - if (info != 0) { - throw std::runtime_error("heevx workspace query failed with info = " + std::to_string(info)); - } - - lwork = static_cast(get_real(work_query)); + lapackConnector::heevx (jobz, + range, + uplo, + n, + aux.data (), + lda, + 0.0, + 0.0, + il, + iu, // vl, vu not used when range='I' + abstol, + found, + eigen_val, + eigen_vec, + lda, + &work_query, + lwork, + &rwork_query, + &iwork_query, + &ifail_query, + info); + + if (info != 0) + { + throw std::runtime_error ("heevx workspace query failed with info = " + std::to_string (info)); + } + + lwork = static_cast (get_real (work_query)); // Allocate buffers using Tensor (RAII) - Tensor work(DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); - work.zero(); + Tensor work (DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); + work.zero (); - Tensor rwork(DataTypeToEnum::value, DeviceType::CpuDevice, {7 * n}); - rwork.zero(); + Tensor rwork (DataTypeToEnum::value, DeviceType::CpuDevice, {7 * n}); + rwork.zero (); - Tensor iwork(DataType::DT_INT, DeviceType::CpuDevice, {5 * n}); - iwork.zero(); + Tensor iwork (DataType::DT_INT, DeviceType::CpuDevice, {5 * n}); + iwork.zero (); - Tensor ifail(DataType::DT_INT, DeviceType::CpuDevice, {n}); - ifail.zero(); + Tensor ifail (DataType::DT_INT, DeviceType::CpuDevice, {n}); + ifail.zero (); // Actual call to heevx - lapackConnector::heevx( - jobz, range, uplo, n, - aux.data(), lda, - 0.0, 0.0, il, iu, - abstol, - found, - eigen_val, - eigen_vec, lda, - work.data(), lwork, - rwork.data(), - iwork.data(), - ifail.data(), - info); - - if (info != 0) { - throw std::runtime_error("heevx failed with info = " + std::to_string(info)); - } - + lapackConnector::heevx (jobz, + range, + uplo, + n, + aux.data (), + lda, + 0.0, + 0.0, + il, + iu, + abstol, + found, + eigen_val, + eigen_vec, + lda, + work.data (), + lwork, + rwork.data (), + iwork.data (), + ifail.data (), + info); + + if (info != 0) + { + throw std::runtime_error ("heevx failed with info = " + std::to_string (info)); + } } }; template -struct lapack_hegvd { +struct lapack_hegvd +{ using Real = typename GetTypeReal::type; - void operator()( - const int dim, - const int lda, - T *Mat_A, - T *Mat_B, - Real *eigen_val, - T *eigen_vec) + void + operator() (const int dim, const int lda, T* Mat_A, T* Mat_B, Real* eigen_val, T* eigen_vec) { // first copy Mat_A to eigen_vec // then pass as argument "A" in lapack hegvd // and this block of memory will be overwritten by eigenvectors // eigen_vec = Mat_A - std::copy(Mat_A, Mat_A + dim*lda, eigen_vec); + std::copy (Mat_A, Mat_A + dim * lda, eigen_vec); - Tensor aux_B(DataTypeToEnum::value, DeviceType::CpuDevice, {dim * lda}); - std::copy(Mat_B, Mat_B + dim * lda, aux_B.data()); + Tensor aux_B (DataTypeToEnum::value, DeviceType::CpuDevice, {dim * lda}); + std::copy (Mat_B, Mat_B + dim * lda, aux_B.data ()); const int itype = 1; const char jobz = 'V'; const char uplo = 'L'; int info = 0; - int lwork = std::max(2 * dim + dim * dim, 1 + 6 * dim + 2 * dim * dim); - Tensor work(DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); - work.zero(); + int lwork = std::max (2 * dim + dim * dim, 1 + 6 * dim + 2 * dim * dim); + Tensor work (DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); + work.zero (); int lrwork = 1 + 5 * dim + 2 * dim * dim; - Tensor rwork(DataTypeToEnum::value, DeviceType::CpuDevice, {lrwork}); - rwork.zero(); + Tensor rwork (DataTypeToEnum::value, DeviceType::CpuDevice, {lrwork}); + rwork.zero (); int liwork = 3 + 5 * dim; - Tensor iwork(DataType::DT_INT, DeviceType::CpuDevice, {liwork}); - iwork.zero(); + Tensor iwork (DataType::DT_INT, DeviceType::CpuDevice, {liwork}); + iwork.zero (); // After this, eigen_vec will contain the matrix Z of eigenvectors - lapackConnector::hegvd(itype, jobz, uplo, dim, eigen_vec, lda, aux_B.data(), lda, eigen_val, work.data(), lwork, rwork.data(), lrwork, iwork.data(), liwork, info); - if (info != 0) { - throw std::runtime_error("hegvd failed with info = " + std::to_string(info)); - } + lapackConnector::hegvd (itype, + jobz, + uplo, + dim, + eigen_vec, + lda, + aux_B.data (), + lda, + eigen_val, + work.data (), + lwork, + rwork.data (), + lrwork, + iwork.data (), + liwork, + info); + if (info != 0) + { + throw std::runtime_error ("hegvd failed with info = " + std::to_string (info)); + } } }; - template -struct lapack_hegvx { +struct lapack_hegvx +{ using Real = typename GetTypeReal::type; - void operator()( - const int n, - const int lda, - T *Mat_A, - T *Mat_B, - const int m, - Real *eigen_val, - T *eigen_vec) + void + operator() (const int n, const int lda, T* Mat_A, T* Mat_B, const int m, Real* eigen_val, T* eigen_vec) { // first copy Mat_A and Mat_B to auxiliary memory // to avoid the origin block being overwritten by hegvx - Tensor aux_A(DataTypeToEnum::value, DeviceType::CpuDevice, {n * lda}); - std::copy(Mat_A, Mat_A + n * lda, aux_A.data()); - Tensor aux_B(DataTypeToEnum::value, DeviceType::CpuDevice, {n * lda}); - std::copy(Mat_B, Mat_B + n * lda, aux_B.data()); + Tensor aux_A (DataTypeToEnum::value, DeviceType::CpuDevice, {n * lda}); + std::copy (Mat_A, Mat_A + n * lda, aux_A.data ()); + Tensor aux_B (DataTypeToEnum::value, DeviceType::CpuDevice, {n * lda}); + std::copy (Mat_B, Mat_B + n * lda, aux_B.data ()); const int itype = 1; // ITYPE = 1: A*x = (lambda)*B*x - const char jobz = 'V';// JOBZ = 'V': Compute eigenvalues and eigenvectors. + const char jobz = 'V'; // JOBZ = 'V': Compute eigenvalues and eigenvectors. const char range = 'I'; // RANGE = 'I': the IL-th through IU-th eigenvalues will be found. - const char uplo = 'L'; // UPLO = 'L': Lower triangles of A and B are stored. + const char uplo = 'L'; // UPLO = 'L': Lower triangles of A and B are stored. const int il = 1; const int iu = m; @@ -395,94 +441,106 @@ struct lapack_hegvx { Real rwork_query; // set lwork = -1 to query optimal work size - lapackConnector::hegvx( - itype, jobz, range, uplo, - n, - aux_A.data(), lda, // A (in/out) - aux_B.data(), lda, // B (in/out) - 0.0, 0.0, // VL, VU (not used) - il, iu, // IL, IU - Real(0.0), // ABSTOL - found, // M (output) - eigen_val, // W (output) - eigen_vec, lda, // Z (output) - &work_query, // WORK (query) - lwork, - &rwork_query, // RWORK (query) - static_cast(nullptr), // IWORK (query) - static_cast(nullptr), // IFAIL (query) - info); + lapackConnector::hegvx (itype, + jobz, + range, + uplo, + n, + aux_A.data (), + lda, // A (in/out) + aux_B.data (), + lda, // B (in/out) + 0.0, + 0.0, // VL, VU (not used) + il, + iu, // IL, IU + Real (0.0), // ABSTOL + found, // M (output) + eigen_val, // W (output) + eigen_vec, + lda, // Z (output) + &work_query, // WORK (query) + lwork, + &rwork_query, // RWORK (query) + static_cast (nullptr), // IWORK (query) + static_cast (nullptr), // IFAIL (query) + info); // !> If LWORK = -1, then a workspace query is assumed; the routine // !> only calculates the optimal size of the WORK array, returns // !> this value as the first entry of the WORK array. - lwork = static_cast(get_real(work_query)); - lwork = std::max(lwork, 1); + lwork = static_cast (get_real (work_query)); + lwork = std::max (lwork, 1); // work space - Tensor work(DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); - work.zero(); + Tensor work (DataTypeToEnum::value, DeviceType::CpuDevice, {lwork}); + work.zero (); const int lrwork = 7 * n; - Tensor rwork(DataTypeToEnum::value, DeviceType::CpuDevice, {lrwork}); - rwork.zero(); + Tensor rwork (DataTypeToEnum::value, DeviceType::CpuDevice, {lrwork}); + rwork.zero (); const int liwork = 5 * n; - Tensor iwork(DataType::DT_INT, DeviceType::CpuDevice, {liwork}); - iwork.zero(); - - std::vector ifail(n); - - lapackConnector::hegvx( - itype, jobz, range, uplo, - n, - aux_A.data(), lda, // A - aux_B.data(), lda, // B - 0.0, 0.0, // VL, VU - il, iu, // IL, IU - Real(0.0), // ABSTOL - found, // M (output) - eigen_val, // W - eigen_vec, lda, // Z (output) - work.data(), // WORK - lwork, - rwork.data(), // RWORK - iwork.data(), // IWORK - ifail.data(), // IFAIL - info); - - if (info < 0) { - throw std::runtime_error("hegvx failed: illegal argument #" + std::to_string(-info)); - } - if (info > 0) { - throw std::runtime_error("hegvx failed to converge. Number of converged eigenvalues: " + std::to_string(info)); - } + Tensor iwork (DataType::DT_INT, DeviceType::CpuDevice, {liwork}); + iwork.zero (); + + std::vector ifail (n); + + lapackConnector::hegvx (itype, + jobz, + range, + uplo, + n, + aux_A.data (), + lda, // A + aux_B.data (), + lda, // B + 0.0, + 0.0, // VL, VU + il, + iu, // IL, IU + Real (0.0), // ABSTOL + found, // M (output) + eigen_val, // W + eigen_vec, + lda, // Z (output) + work.data (), // WORK + lwork, + rwork.data (), // RWORK + iwork.data (), // IWORK + ifail.data (), // IFAIL + info); + + if (info < 0) + { + throw std::runtime_error ("hegvx failed: illegal argument #" + std::to_string (-info)); + } + if (info > 0) + { + throw std::runtime_error ("hegvx failed to converge. Number of converged eigenvalues: " + + std::to_string (info)); + } } }; - - - - -template struct set_matrix; +template struct set_matrix; template struct set_matrix; -template struct set_matrix, DEVICE_CPU>; +template struct set_matrix, DEVICE_CPU>; template struct set_matrix, DEVICE_CPU>; -template struct lapack_potrf; +template struct lapack_potrf; template struct lapack_potrf; -template struct lapack_potrf, DEVICE_CPU>; +template struct lapack_potrf, DEVICE_CPU>; template struct lapack_potrf, DEVICE_CPU>; -template struct lapack_trtri; +template struct lapack_trtri; template struct lapack_trtri; -template struct lapack_trtri, DEVICE_CPU>; +template struct lapack_trtri, DEVICE_CPU>; template struct lapack_trtri, DEVICE_CPU>; - -template struct lapack_getrf; +template struct lapack_getrf; template struct lapack_getrf; -template struct lapack_getrf, DEVICE_CPU>; +template struct lapack_getrf, DEVICE_CPU>; template struct lapack_getrf, DEVICE_CPU>; template struct lapack_getri; @@ -490,20 +548,19 @@ template struct lapack_getri; template struct lapack_getri, DEVICE_CPU>; template struct lapack_getri, DEVICE_CPU>; - template struct lapack_getrs; template struct lapack_getrs; template struct lapack_getrs, DEVICE_CPU>; template struct lapack_getrs, DEVICE_CPU>; -template struct lapack_geqrf_inplace; +template struct lapack_geqrf_inplace; template struct lapack_geqrf_inplace; -template struct lapack_geqrf_inplace, DEVICE_CPU>; +template struct lapack_geqrf_inplace, DEVICE_CPU>; template struct lapack_geqrf_inplace, DEVICE_CPU>; -template struct lapack_heevd; +template struct lapack_heevd; template struct lapack_heevd; -template struct lapack_heevd, DEVICE_CPU>; +template struct lapack_heevd, DEVICE_CPU>; template struct lapack_heevd, DEVICE_CPU>; template struct lapack_heevx; @@ -511,14 +568,14 @@ template struct lapack_heevx; template struct lapack_heevx, DEVICE_CPU>; template struct lapack_heevx, DEVICE_CPU>; -template struct lapack_hegvd; +template struct lapack_hegvd; template struct lapack_hegvd; -template struct lapack_hegvd, DEVICE_CPU>; +template struct lapack_hegvd, DEVICE_CPU>; template struct lapack_hegvd, DEVICE_CPU>; -template struct lapack_hegvx; +template struct lapack_hegvx; template struct lapack_hegvx; -template struct lapack_hegvx, DEVICE_CPU>; +template struct lapack_hegvx, DEVICE_CPU>; template struct lapack_hegvx, DEVICE_CPU>; } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/lapack.h b/source/source_base/module_container/ATen/kernels/lapack.h index 117f8ef24ba..276e1876766 100644 --- a/source/source_base/module_container/ATen/kernels/lapack.h +++ b/source/source_base/module_container/ATen/kernels/lapack.h @@ -7,66 +7,47 @@ #include -namespace container { -namespace kernels { - +namespace container +{ +namespace kernels +{ template -struct set_matrix { - void operator() ( - const char& uplo, - T* A, - const int& dim); +struct set_matrix +{ + void operator() (const char& uplo, T* A, const int& dim); }; - // --- 1. Matrix Decomposition --- template -struct lapack_trtri { - void operator()( - const char& uplo, - const char& diag, - const int& dim, - T* Mat, - const int& lda); +struct lapack_trtri +{ + void operator() (const char& uplo, const char& diag, const int& dim, T* Mat, const int& lda); }; - template -struct lapack_potrf { - void operator()( - const char& uplo, - const int& dim, - T* Mat, - const int& lda); +struct lapack_potrf +{ + void operator() (const char& uplo, const int& dim, T* Mat, const int& lda); }; template -struct lapack_getrf { - void operator()( - const int& m, - const int& n, - T* Mat, - const int& lda, - int* ipiv); +struct lapack_getrf +{ + void operator() (const int& m, const int& n, T* Mat, const int& lda, int* ipiv); }; - template -struct lapack_getri { - void operator()( - const int& n, - T* Mat, - const int& lda, - const int* ipiv, - T* work, - const int& lwork); +struct lapack_getri +{ + void operator() (const int& n, T* Mat, const int& lda, const int* ipiv, T* work, const int& lwork); }; // This is QR factorization in-place // that will change input Mat A to orthogonal/unitary matrix Q template -struct lapack_geqrf_inplace { +struct lapack_geqrf_inplace +{ /** * @brief Perform in-place QR factorization of a matrix using LAPACK's geqrf function. * @@ -81,11 +62,7 @@ struct lapack_geqrf_inplace { * @param A Pointer to the matrix A to be factorized. On exit, contains the QR factorization * @param lda The leading dimension of the matrix A. lda >= max(1, m) */ - void operator()( - const int m, - const int n, - T *A, - const int lda); + void operator() (const int m, const int n, T* A, const int lda); }; // This is QR factorization @@ -112,23 +89,20 @@ struct lapack_geqrf_inplace { // T *tau); // }; - // --- 2. Linear System Solvers --- template -struct lapack_getrs { - void operator()( - const char& trans, - const int& n, - const int& nrhs, - T* A, - const int& lda, - const int* ipiv, - T* B, - const int& ldb); +struct lapack_getrs +{ + void operator() (const char& trans, + const int& n, + const int& nrhs, + T* A, + const int& lda, + const int* ipiv, + T* B, + const int& ldb); }; - - // --- 3. Standard & Generalized Eigenvalue --- // ============================================================================ @@ -144,7 +118,8 @@ struct lapack_getrs { // // ============================================================================ template -struct lapack_heevd { +struct lapack_heevd +{ // !> ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a // !> complex Hermitian matrix A. If eigenvectors are desired, it uses a // !> divide and conquer algorithm. @@ -172,15 +147,12 @@ struct lapack_heevd { * to the actual implementation). */ using Real = typename GetTypeReal::type; - void operator()( - const int dim, - T* Mat, - const int lda, - Real* eigen_val); + void operator() (const int dim, T* Mat, const int lda, Real* eigen_val); }; template -struct lapack_heevx { +struct lapack_heevx +{ using Real = typename GetTypeReal::type; /** * @brief Computes selected eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix. @@ -203,16 +175,9 @@ struct lapack_heevx { * See LAPACK ZHEEVX or CHEEVX documentation for more details. * This routine allocates auxiliary memory inside to prevent input matrix from being destroyed. */ - void operator()( - const int dim, - const int lda, - const T *Mat, - const int neig, - Real *eigen_val, - T *eigen_vec); + void operator() (const int dim, const int lda, const T* Mat, const int neig, Real* eigen_val, T* eigen_vec); }; - // ============================================================================ // Generalized Hermitian-definite Eigenvalue Problem Solvers // ============================================================================ @@ -228,12 +193,15 @@ struct lapack_heevx { // ============================================================================ template -struct lapack_hegvd { +struct lapack_hegvd +{ using Real = typename GetTypeReal::type; /** - * @brief Computes all the eigenvalues and, optionally, the eigenvectors of a complex generalized Hermitian-definite eigenproblem. + * @brief Computes all the eigenvalues and, optionally, the eigenvectors of a complex generalized Hermitian-definite + * eigenproblem. * - * This function solves the problem A*x = lambda*B*x, where A and B are Hermitian matrices, and B is also positive definite. + * This function solves the problem A*x = lambda*B*x, where A and B are Hermitian matrices, and B is also positive + * definite. * * @param n The order of the matrices Mat_A and Mat_B. n >= 0. * @param lda The leading dimension of the arrays Mat_A and Mat_B. lda >= max(1, n). @@ -247,17 +215,12 @@ struct lapack_hegvd { * This function assumes that A and B have the same leading dimensions, lda. * This function copies B to auxiliary memory to avoid being overwritten. */ - void operator()( - const int n, - const int lda, - T *Mat_A, - T *Mat_B, - Real *eigen_val, - T *eigen_vec); + void operator() (const int n, const int lda, T* Mat_A, T* Mat_B, Real* eigen_val, T* eigen_vec); }; template -struct lapack_hegvx { +struct lapack_hegvx +{ using Real = typename GetTypeReal::type; /** * @ brief hegvx computes the first m eigenvalues and their corresponding eigenvectors of @@ -271,33 +234,27 @@ struct lapack_hegvx { * @param n The order of the matrices A and B. n >= 0. * @param lda The leading dimension of the array A and B. lda >= max(1, n). * @param A On entry, the Hermitian matrix A. On exit, if info = 0, A contains the matrix Z of eigenvectors. - * @param B On entry, the Hermitian positive definite matrix B. On exit, the triangular factor from the Cholesky factorization of B. + * @param B On entry, the Hermitian positive definite matrix B. On exit, the triangular factor from the Cholesky + * factorization of B. * @param m The number of eigenvalues and eigenvectors to be found. 0 < m <= n. * @param eigen_val The first m eigenvalues in ascending order. - * @param eigen_vec The first m columns contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues. + * @param eigen_vec The first m columns contain the orthonormal eigenvectors of the matrix A corresponding to the + * selected eigenvalues. * * @note * See LAPACK ZHEGVX doc for more details. * This routine allocates auxiliary memory inside to prevent input matrix from being destroyed. */ - void operator()( - const int n, - const int lda, - T *Mat_A, - T *Mat_B, - const int m, - Real *eigen_val, - T *eigen_vec); + void operator() (const int n, const int lda, T* Mat_A, T* Mat_B, const int m, Real* eigen_val, T* eigen_vec); }; - #if defined(__CUDA) || defined(__ROCM) // TODO: Use C++ singleton to manage the GPU handles -void createGpuSolverHandle(); // create cusolver handle -void destroyGpuSolverHandle(); // destroy cusolver handle +void createGpuSolverHandle (); // create cusolver handle +void destroyGpuSolverHandle (); // destroy cusolver handle #endif -} // namespace container } // namespace kernels +} // namespace container #endif // ATEN_KERNELS_LAPACK_H_ diff --git a/source/source_base/module_container/ATen/kernels/linalg.cpp b/source/source_base/module_container/ATen/kernels/linalg.cpp index e78ed9b4a5c..367d3c7b0a6 100644 --- a/source/source_base/module_container/ATen/kernels/linalg.cpp +++ b/source/source_base/module_container/ATen/kernels/linalg.cpp @@ -1,234 +1,279 @@ #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -static inline T conj(T& in) { +static inline T + conj (T& in) +{ return in; } template -static inline std::complex conj(std::complex& in) { - return std::conj(in); +static inline std::complex + conj (std::complex& in) +{ + return std::conj (in); } template -static std::vector ComputeStride(const std::vector& shape) { - int ndims = shape.size(); - std::vector strides(ndims); +static std::vector + ComputeStride (const std::vector& shape) +{ + int ndims = shape.size (); + std::vector strides (ndims); T stride = 1; - auto it = shape.end(); // Start from the last element - for (int ii = ndims - 1; ii >= 0; ii--) { - it--; - strides[ii] = stride; - stride *= static_cast(*it); - } - return std::move(strides); + auto it = shape.end (); // Start from the last element + for (int ii = ndims - 1; ii >= 0; ii--) + { + it--; + strides[ii] = stride; + stride *= static_cast (*it); + } + return std::move (strides); } -template -void add::operator()(const int& num_element, const T& alpha, const T* x, const T& beta, const T* y, T* z) { +template +void + add::operator() (const int& num_element, const T& alpha, const T* x, const T& beta, const T* y, T* z) +{ // Define a lambda expression 'add_fn' to implement add operation. // Perform add operation for the specified range [begin, end) in the output Tensor. - for (int o_idx = 0; o_idx < num_element; o_idx++) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] + beta * y[o_idx]; - } + for (int o_idx = 0; o_idx < num_element; o_idx++) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] + beta * y[o_idx]; + } } -template -void mul::operator()(const int& num_element, const T& alpha, const T* x, T* y) { +template +void + mul::operator() (const int& num_element, const T& alpha, const T* x, T* y) +{ // Define a lambda expression 'mul_fn' to implement mul operation. // Perform mul operation for the specified range [begin, end) in the output Tensor. - for (int o_idx = 0; o_idx < num_element; o_idx++) { - // Assign the product of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - y[o_idx] = alpha * x[o_idx]; - } + for (int o_idx = 0; o_idx < num_element; o_idx++) + { + // Assign the product of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + y[o_idx] = alpha * x[o_idx]; + } } -template -void mul::operator()(const int& num_element, const T& alpha, const T* x, const T* y, T* z) { +template +void + mul::operator() (const int& num_element, const T& alpha, const T* x, const T* y, T* z) +{ // Define a lambda expression 'mul_fn' to implement mul operation. // Perform mul operation for the specified range [begin, end) in the output Tensor. - for (int o_idx = 0; o_idx < num_element; o_idx++) { - // Assign the product of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] * y[o_idx]; - } + for (int o_idx = 0; o_idx < num_element; o_idx++) + { + // Assign the product of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] * y[o_idx]; + } } -template -void div::operator()(const int& num_element, const T& alpha, const T* x, const T* y, T* z) { +template +void + div::operator() (const int& num_element, const T& alpha, const T* x, const T* y, T* z) +{ // Define a lambda expression 'div_fn' to implement div operation. // Perform div operation for the specified range [begin, end) in the output Tensor. - for (int o_idx = 0; o_idx < num_element; o_idx++) { - // Assign the quotient of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] / y[o_idx]; - } + for (int o_idx = 0; o_idx < num_element; o_idx++) + { + // Assign the quotient of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] / y[o_idx]; + } } -template -void fma::operator()(const int& num_element, const T& alpha, const T* x, const T* y, const T& beta, const T* z, T* out) { +template +void + fma::operator() (const int& num_element, + const T& alpha, + const T* x, + const T* y, + const T& beta, + const T* z, + T* out) +{ // Define a lambda expression 'fma_fn' to implement fma operation. // Perform fma operation for the specified range [begin, end) in the output Tensor. - for (int o_idx = 0; o_idx < num_element; o_idx++) { - // Assign the sum of the product of the input Tensor elements at index 'o_idx' and the corresponding coefficients to the output Tensor element at index 'o_idx'. - out[o_idx] = alpha * x[o_idx] * y[o_idx] + beta * z[o_idx]; - } + for (int o_idx = 0; o_idx < num_element; o_idx++) + { + // Assign the sum of the product of the input Tensor elements at index 'o_idx' and the corresponding + // coefficients to the output Tensor element at index 'o_idx'. + out[o_idx] = alpha * x[o_idx] * y[o_idx] + beta * z[o_idx]; + } } -template -void transpose::operator()( - const std::vector &perm, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + transpose::operator() (const std::vector& perm, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { - REQUIRES_OK(p_shape.size() == q_shape.size(), - "transpose: p and q must have the same number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = ComputeStride(p_shape); - auto out_strides = ComputeStride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "transpose: p and q must have the same number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = ComputeStride (p_shape); + auto out_strides = ComputeStride (q_shape); int64_t num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= q_shape[ii]; - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= q_shape[ii]; + } num_elements = ndim ? num_elements : 0; // Define a lambda expression 'transpose_fn' to implement transpose operation. // Perform transpose operation for the specified range [begin, end) in the output Tensor. - for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t t = o_idx; // Calculate the index for the output Tensor element. + for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t t = o_idx; // Calculate the index for the output Tensor element. - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculate the ratio of the current output Tensor index 't' in the current dimension. - const int64_t ratio = t / out_strides[ii]; - // Update the output Tensor index 't' by removing the offset in the current dimension. - t -= ratio * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - i_idx += ratio * in_strides[perm[ii]]; + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculate the ratio of the current output Tensor index 't' in the current dimension. + const int64_t ratio = t / out_strides[ii]; + // Update the output Tensor index 't' by removing the offset in the current dimension. + t -= ratio * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + i_idx += ratio * in_strides[perm[ii]]; + } + // Check if conjugation is needed. + if (Conjugate) + { + // Assign the conjugate value of the input Tensor element at index 'i_idx' to the output Tensor + // element at index 'o_idx'. + q[o_idx] = kernels::conj (p[i_idx]); + } + else + { + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx]; + } } - // Check if conjugation is needed. - if (Conjugate) { - // Assign the conjugate value of the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = kernels::conj(p[i_idx]); - } else { - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx]; - } - } } - -template -void stride::operator()( - const std::vector &stride, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + stride::operator() (const std::vector& stride, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { - REQUIRES_OK(p_shape.size() == q_shape.size() , - "stride: p and q must match the number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = ComputeStride(p_shape); - auto out_strides = ComputeStride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "stride: p and q must match the number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = ComputeStride (p_shape); + auto out_strides = ComputeStride (q_shape); int64_t num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= q_shape[ii]; - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= q_shape[ii]; + } num_elements = ndim ? num_elements : 0; // Define a lambda expression 'stride_fn' to implement stride operation. // Perform stride operation for the specified range [begin, end) in the output Tensor. // Perform stride operation for the specified range [begin, end) in the output Tensor. - for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculate the index in the current dimension. - // It is natural to view a tensor as a multi-dimentional array. - const int64_t current_dim_idx = current_o_idx / out_strides[ii]; - // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. - current_o_idx -= current_dim_idx * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - i_idx += (current_dim_idx * stride[ii]) * in_strides[ii]; + for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculate the index in the current dimension. + // It is natural to view a tensor as a multi-dimentional array. + const int64_t current_dim_idx = current_o_idx / out_strides[ii]; + // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. + current_o_idx -= current_dim_idx * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + i_idx += (current_dim_idx * stride[ii]) * in_strides[ii]; + } + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx]; } - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx]; - } } - -template -void inflate::operator()( - const std::vector &inflate, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + inflate::operator() (const std::vector& inflate, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { - REQUIRES_OK(p_shape.size() == q_shape.size(), - "transpose: p and q must have the same number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = ComputeStride(p_shape); - auto out_strides = ComputeStride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "transpose: p and q must have the same number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = ComputeStride (p_shape); + auto out_strides = ComputeStride (q_shape); int64_t num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= q_shape[ii]; - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= q_shape[ii]; + } num_elements = ndim ? num_elements : 0; // Define a lambda expression 'inflate_fn' to implement inflate operation. // Perform inflate operation for the specified range [begin, end) in the output Tensor. - for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. - bool valid = true; - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculte the ratio of the current output Tensor index 'current_o_idx' in the current dimension. - const int64_t current_dim_idx = current_o_idx / out_strides[ii]; - // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. - current_o_idx -= current_dim_idx * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - if (current_dim_idx % inflate[ii] == 0) { - i_idx += (current_dim_idx / inflate[ii]) * in_strides[ii]; - } - else { - valid = false; - break; - } + for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. + bool valid = true; + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculte the ratio of the current output Tensor index 'current_o_idx' in the current dimension. + const int64_t current_dim_idx = current_o_idx / out_strides[ii]; + // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. + current_o_idx -= current_dim_idx * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + if (current_dim_idx % inflate[ii] == 0) + { + i_idx += (current_dim_idx / inflate[ii]) * in_strides[ii]; + } + else + { + valid = false; + break; + } + } + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx] * static_cast (valid ? 1.0 : 0.0); } - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx] * static_cast(valid ? 1.0 : 0.0); - } } - -template -void reduce::operator()( - const int64_t &num_element, - const int64_t &inner_most_dim, - const T *p, - T *q) +template +void + reduce::operator() (const int64_t& num_element, const int64_t& inner_most_dim, const T* p, T* q) { // It's just so simple to implement the reduce operation. - for (int64_t o_idx = 0; o_idx < num_element; o_idx++) { - T sum = 0; - for (int64_t i_idx = o_idx * inner_most_dim; i_idx < inner_most_dim + o_idx * inner_most_dim; i_idx++) { - sum += p[i_idx]; + for (int64_t o_idx = 0; o_idx < num_element; o_idx++) + { + T sum = 0; + for (int64_t i_idx = o_idx * inner_most_dim; i_idx < inner_most_dim + o_idx * inner_most_dim; i_idx++) + { + sum += p[i_idx]; + } + q[o_idx] = sum; } - q[o_idx] = sum; - } } - template struct add; template struct add; template struct add; diff --git a/source/source_base/module_container/ATen/kernels/linalg.h b/source/source_base/module_container/ATen/kernels/linalg.h index 985d1958e13..bce50ee2946 100644 --- a/source/source_base/module_container/ATen/kernels/linalg.h +++ b/source/source_base/module_container/ATen/kernels/linalg.h @@ -5,104 +5,77 @@ #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -struct add { +struct add +{ // z = alpha * x + beta * y - void operator()( - const int& num_element, - const T& alpha, - const T* x, - const T& beta, - const T* y, - T* z); + void operator() (const int& num_element, const T& alpha, const T* x, const T& beta, const T* y, T* z); }; template -struct mul { - void operator()( - const int& num_element, - const T& alpha, - const T* x, - T* y); +struct mul +{ + void operator() (const int& num_element, const T& alpha, const T* x, T* y); // z = alpha * x * y - void operator()( - const int& num_element, - const T& alpha, - const T* x, - const T* y, - T* z); + void operator() (const int& num_element, const T& alpha, const T* x, const T* y, T* z); }; template -struct div { +struct div +{ // z = alpha * x / y - void operator()( - const int& num_element, - const T& alpha, - const T* x, - const T* y, - T* z); + void operator() (const int& num_element, const T& alpha, const T* x, const T* y, T* z); }; template -struct fma { +struct fma +{ // out = alpha * x * y + beta * z - void operator()( - const int& num_element, - const T& alpha, - const T* x, - const T* y, - const T& beta, - const T* z, - T* out); + void operator() (const int& num_element, const T& alpha, const T* x, const T* y, const T& beta, const T* z, T* out); }; template -struct transpose { - void operator()( - const std::vector& perm, - const std::vector& p_shape, - const std::vector& q_shape, - const T* p, - T* q); +struct transpose +{ + void operator() (const std::vector& perm, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q); }; - template -struct stride { - void operator()( - const std::vector& stride, - const std::vector& p_shape, - const std::vector& q_shape, - const T* p, - T* q); +struct stride +{ + void operator() (const std::vector& stride, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q); }; template -struct inflate { - void operator()( - const std::vector& inflate, - const std::vector& p_shape, - const std::vector& q_shape, - const T* p, - T* q); +struct inflate +{ + void operator() (const std::vector& inflate, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q); }; - template -struct reduce { - void operator()( - const int64_t& num_element, - const int64_t& inner_most_dim, - const T* p, - T* q); +struct reduce +{ + void operator() (const int64_t& num_element, const int64_t& inner_most_dim, const T* p, T* q); }; - -} // namespace op +} // namespace kernels } // namespace container #endif // ATEN_KERNELS_LINALG_H_ \ No newline at end of file diff --git a/source/source_base/module_container/ATen/kernels/memory.h b/source/source_base/module_container/ATen/kernels/memory.h index da079d7a8c4..f5abc730493 100644 --- a/source/source_base/module_container/ATen/kernels/memory.h +++ b/source/source_base/module_container/ATen/kernels/memory.h @@ -7,8 +7,10 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ /** * @brief A functor to resize memory allocation. @@ -16,7 +18,8 @@ namespace kernels { * @tparam Device Device type where the memory will be allocated. */ template -struct resize_memory { +struct resize_memory +{ /** * @brief Resize memory allocation. * @@ -25,7 +28,7 @@ struct resize_memory { * @param size New size of the allocated memory. * @param record_in Optional message to record the resize operation. */ - void operator()(T*& arr, const size_t& size, const char* record_in = nullptr); + void operator() (T*& arr, const size_t& size, const char* record_in = nullptr); }; /** @@ -34,7 +37,8 @@ struct resize_memory { * @tparam Device Device type where the memory is allocated. */ template -struct set_memory { +struct set_memory +{ /** * @brief Set memory to a constant value. * @@ -42,7 +46,7 @@ struct set_memory { * @param var Constant value to set. * @param size Size of the memory to set. */ - void operator()(T* arr, const T& var, const size_t& size); + void operator() (T* arr, const T& var, const size_t& size); }; /** @@ -55,7 +59,8 @@ struct set_memory { * @tparam Device_in The input device. */ template -struct synchronize_memory { +struct synchronize_memory +{ /** * @brief Synchronizes memory between devices. * @@ -67,29 +72,32 @@ struct synchronize_memory { * @param arr_in The input array. * @param size The size of the array. */ - void operator()( - T* arr_out, - const T* arr_in, - const size_t& size); + void operator() (T* arr_out, const T* arr_in, const size_t& size); }; template -struct synchronize_memory_stride { - void operator()( - T* arr_out, - const T* arr_in, - const std::vector& out_size, - const std::vector& in_size) +struct synchronize_memory_stride +{ + void + operator() (T* arr_out, + const T* arr_in, + const std::vector& out_size, + const std::vector& in_size) { - REQUIRES_OK(in_size.size() == out_size.size() && in_size.size() <= 2); - if (in_size.size() == 1) { - synchronize_memory()(arr_out, arr_in, in_size[0]); - } - else { - for (int64_t ii = 0; ii < out_size[0]; ii++) { - synchronize_memory()(arr_out + ii * out_size[1], arr_in + ii * in_size[1], in_size[1]); + REQUIRES_OK (in_size.size () == out_size.size () && in_size.size () <= 2); + if (in_size.size () == 1) + { + synchronize_memory () (arr_out, arr_in, in_size[0]); + } + else + { + for (int64_t ii = 0; ii < out_size[0]; ii++) + { + synchronize_memory () (arr_out + ii * out_size[1], + arr_in + ii * in_size[1], + in_size[1]); + } } - } } }; @@ -104,7 +112,8 @@ struct synchronize_memory_stride { * @tparam Device_in The input device. */ template -struct cast_memory { +struct cast_memory +{ /** * @brief Casts memory between devices. * @@ -116,13 +125,9 @@ struct cast_memory { * @param arr_in The input array. * @param size The size of the array. */ - void operator()( - T_out* arr_out, - const T_in* arr_in, - const size_t& size); + void operator() (T_out* arr_out, const T_in* arr_in, const size_t& size); }; - /** * @brief Deletes memory on a device. * @@ -132,7 +137,8 @@ struct cast_memory { * @tparam Device The device. */ template -struct delete_memory { +struct delete_memory +{ /** * @brief Deletes memory on a device. * @@ -141,7 +147,7 @@ struct delete_memory { * @param dev The device. * @param arr The array to be deleted. */ - void operator()(T* arr); + void operator() (T* arr); }; } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/memory_impl.cpp b/source/source_base/module_container/ATen/kernels/memory_impl.cpp index e48c89be00b..ce5323c311a 100644 --- a/source/source_base/module_container/ATen/kernels/memory_impl.cpp +++ b/source/source_base/module_container/ATen/kernels/memory_impl.cpp @@ -4,56 +4,68 @@ #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -struct resize_memory { - void operator()(T*& arr, const size_t& size, const char* /*record_in*/) { - if (arr != nullptr) { - free(arr); - } - arr = (T*) malloc(sizeof(T) * size); +struct resize_memory +{ + void + operator() (T*& arr, const size_t& size, const char* /*record_in*/) + { + if (arr != nullptr) + { + free (arr); + } + arr = (T*)malloc (sizeof (T) * size); } }; template -struct set_memory { - void operator()(T* arr, const T& var, const size_t& size) { - for (size_t ii = 0; ii < size; ii++) { - arr[ii] = var; - } +struct set_memory +{ + void + operator() (T* arr, const T& var, const size_t& size) + { + for (size_t ii = 0; ii < size; ii++) + { + arr[ii] = var; + } } }; template -struct synchronize_memory { - void operator()( - T* arr_out, - const T* arr_in, - const size_t& size) +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) { - memcpy(arr_out, arr_in, sizeof(T) * size); + memcpy (arr_out, arr_in, sizeof (T) * size); } }; template -struct cast_memory { - void operator()( - T_out* arr_out, - const T_in* arr_in, - const size_t& size) - { - for (int ii = 0; ii < size; ii++) { - arr_out[ii] = static_cast(arr_in[ii]); +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) + { + for (int ii = 0; ii < size; ii++) + { + arr_out[ii] = static_cast (arr_in[ii]); } - } + } }; template -struct delete_memory { - void operator()(T* arr) { - free(arr); +struct delete_memory +{ + void + operator() (T* arr) + { + free (arr); } }; @@ -96,48 +108,84 @@ template struct delete_memory, DEVICE_CPU>; #if !(defined(__CUDA) || defined(__ROCM)) template -struct resize_memory { - void operator()(T*& arr, const size_t& size, const char* record_in = nullptr) {} +struct resize_memory +{ + void + operator() (T*& arr, const size_t& size, const char* record_in = nullptr) + { + } }; template -struct set_memory { - void operator()(T* arr, const int var, const size_t& size) {} +struct set_memory +{ + void + operator() (T* arr, const int var, const size_t& size) + { + } }; template -struct synchronize_memory { - void operator()(T* arr_out, const T* arr_in, const size_t& size) {} +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) + { + } }; template -struct synchronize_memory { - void operator()(T* arr_out, const T* arr_in, const size_t& size) {} +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) + { + } }; template -struct synchronize_memory { - void operator()(T* arr_out, const T* arr_in, const size_t& size) {} +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) + { + } }; template -struct cast_memory { - void operator()(T_out* arr_out, const T_in* arr_in, const size_t& size) {} +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) + { + } }; template -struct cast_memory { - void operator()(T_out* arr_out, const T_in* arr_in, const size_t& size) {} +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) + { + } }; template -struct cast_memory { - void operator()(T_out* arr_out, const T_in* arr_in, const size_t& size) {} +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) + { + } }; template -struct delete_memory { - void operator()(T* arr) {} +struct delete_memory +{ + void + operator() (T* arr) + { + } }; template struct resize_memory; diff --git a/source/source_base/module_container/ATen/kernels/rocm/blas.hip.cu b/source/source_base/module_container/ATen/kernels/rocm/blas.hip.cu index 9fa7f63f08b..471ff72166f 100644 --- a/source/source_base/module_container/ATen/kernels/rocm/blas.hip.cu +++ b/source/source_base/module_container/ATen/kernels/rocm/blas.hip.cu @@ -4,259 +4,304 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ static hipblasHandle_t hipblas_handle = nullptr; -void createGpuBlasHandle() { - if (hipblas_handle == nullptr) { - hipblasErrcheck(hipblasCreate(&hipblas_handle)); - } +void + createGpuBlasHandle () +{ + if (hipblas_handle == nullptr) + { + hipblasErrcheck (hipblasCreate (&hipblas_handle)); + } } -void destroyGpuBlasHandle() { - if (hipblas_handle != nullptr) { - hipblasErrcheck(hipblasDestroy(hipblas_handle)); - hipblas_handle = nullptr; - } +void + destroyGpuBlasHandle () +{ + if (hipblas_handle != nullptr) + { + hipblasErrcheck (hipblasDestroy (hipblas_handle)); + hipblas_handle = nullptr; + } } - template -struct blas_nrm2 { - T operator()( - const int n, - const T *x, - const int incx) +struct blas_nrm2 +{ + T + operator() (const int n, const T* x, const int incx) { T result; - hipBlasConnector::nrm2(hipblas_handle, n, x, incx, &result); + hipBlasConnector::nrm2 (hipblas_handle, n, x, incx, &result); return result; } }; template -struct blas_dot { - void operator()( - const int& n, - const T* x, - const int& incx, - const T* y, - const int& incy, - T* result) +struct blas_dot +{ + void + operator() (const int& n, const T* x, const int& incx, const T* y, const int& incy, T* result) { - hipBlasConnector::dot(hipblas_handle, n, x, incx, y, incy, result); + hipBlasConnector::dot (hipblas_handle, n, x, incx, y, incy, result); } }; template -struct blas_scal { - void operator()( - const int& n, - const T* alpha, - T* x, - const int& incx) +struct blas_scal +{ + void + operator() (const int& n, const T* alpha, T* x, const int& incx) { - hipBlasConnector::scal(hipblas_handle, n, *alpha, x, incx); + hipBlasConnector::scal (hipblas_handle, n, *alpha, x, incx); } }; template -struct blas_axpy { - void operator()( - const int& n, - const T* alpha, - const T* x, - const int& incx, - T* y, - const int& incy) +struct blas_axpy +{ + void + operator() (const int& n, const T* alpha, const T* x, const int& incx, T* y, const int& incy) { - hipBlasConnector::axpy(hipblas_handle, n, *alpha, x, incx, y, incy); + hipBlasConnector::axpy (hipblas_handle, n, *alpha, x, incx, y, incy); } }; template -struct blas_gemv { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - const T* A, - const int& lda, - const T* x, - const int& incx, - const T* beta, - T* y, - const int& incy) +struct blas_gemv +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const T* x, + const int& incx, + const T* beta, + T* y, + const int& incy) { - hipBlasConnector::gemv(hipblas_handle, trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy); + hipBlasConnector::gemv (hipblas_handle, trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy); } }; - template -struct blas_gemv_batched { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - T** A, - const int& lda, - T** x, - const int& incx, - const T* beta, - T** y, - const int& incy, - const int& batch_size) +struct blas_gemv_batched +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + T** A, + const int& lda, + T** x, + const int& incx, + const T* beta, + T** y, + const int& incy, + const int& batch_size) { - hipBlasConnector::gemv_batched(hipblas_handle, trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy, batch_size); + hipBlasConnector::gemv_batched (hipblas_handle, + trans, + m, + n, + *alpha, + A, + lda, + x, + incx, + *beta, + y, + incy, + batch_size); } }; - template -struct blas_gemv_batched_strided { - void operator()( - const char& trans, - const int& m, - const int& n, - const T* alpha, - const T* A, - const int& lda, - const int64_t& stride_a, - const T* x, - const int& incx, - const int64_t& stride_x, - const T* beta, - T* y, - const int& incy, - const int64_t& stride_y, - const int& batch_size) +struct blas_gemv_batched_strided +{ + void + operator() (const char& trans, + const int& m, + const int& n, + const T* alpha, + const T* A, + const int& lda, + const int64_t& stride_a, + const T* x, + const int& incx, + const int64_t& stride_x, + const T* beta, + T* y, + const int& incy, + const int64_t& stride_y, + const int& batch_size) { - hipBlasConnector::gemv_batched_strided(hipblas_handle, trans, m, n, *alpha, A, lda, stride_a, x, incx, stride_x, *beta, y, incy, stride_y, batch_size); + hipBlasConnector::gemv_batched_strided (hipblas_handle, + trans, + m, + n, + *alpha, + A, + lda, + stride_a, + x, + incx, + stride_x, + *beta, + y, + incy, + stride_y, + batch_size); } }; template -struct blas_gemm { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - const T* A, - const int& lda, - const T* B, - const int& ldb, - const T* beta, - T* C, - const int& ldc) +struct blas_gemm +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* A, + const int& lda, + const T* B, + const int& ldb, + const T* beta, + T* C, + const int& ldc) { - hipBlasConnector::gemm(hipblas_handle, transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); + hipBlasConnector::gemm (hipblas_handle, transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); } }; template -struct blas_gemm_batched { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - T** A, - const int& lda, - T** B, - const int& ldb, - const T* beta, - T** C, - const int& ldc, - const int& batch_size) +struct blas_gemm_batched +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + T** A, + const int& lda, + T** B, + const int& ldb, + const T* beta, + T** C, + const int& ldc, + const int& batch_size) { - hipBlasConnector::gemm_batched(hipblas_handle, transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc, batch_size); + hipBlasConnector:: + gemm_batched (hipblas_handle, transa, transb, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc, batch_size); } }; template -struct blas_gemm_batched_strided { - void operator()( - const char& transa, - const char& transb, - const int& m, - const int& n, - const int& k, - const T* alpha, - const T* A, - const int& lda, - const int& stride_a, - const T* B, - const int& ldb, - const int& stride_b, - const T* beta, - T* C, - const int& ldc, - const int& stride_c, - const int& batch_size) +struct blas_gemm_batched_strided +{ + void + operator() (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T* alpha, + const T* A, + const int& lda, + const int& stride_a, + const T* B, + const int& ldb, + const int& stride_b, + const T* beta, + T* C, + const int& ldc, + const int& stride_c, + const int& batch_size) { - hipBlasConnector::gemm_batched_strided(hipblas_handle, transa, transb, m, n, k, *alpha, A, lda, stride_a, B, ldb, stride_b, *beta, C, ldc, stride_c, batch_size); + hipBlasConnector::gemm_batched_strided (hipblas_handle, + transa, + transb, + m, + n, + k, + *alpha, + A, + lda, + stride_a, + B, + ldb, + stride_b, + *beta, + C, + ldc, + stride_c, + batch_size); } }; // Explicitly instantiate functors for the types of functor registered. -template struct blas_nrm2; +template struct blas_nrm2; template struct blas_nrm2; -template struct blas_nrm2 , DEVICE_GPU>; +template struct blas_nrm2, DEVICE_GPU>; template struct blas_nrm2, DEVICE_GPU>; -template struct blas_dot; +template struct blas_dot; template struct blas_dot; -template struct blas_dot , DEVICE_GPU>; +template struct blas_dot, DEVICE_GPU>; template struct blas_dot, DEVICE_GPU>; -template struct blas_scal; +template struct blas_scal; template struct blas_scal; -template struct blas_scal , DEVICE_GPU>; +template struct blas_scal, DEVICE_GPU>; template struct blas_scal, DEVICE_GPU>; -template struct blas_axpy; +template struct blas_axpy; template struct blas_axpy; -template struct blas_axpy , DEVICE_GPU>; +template struct blas_axpy, DEVICE_GPU>; template struct blas_axpy, DEVICE_GPU>; -template struct blas_gemv; +template struct blas_gemv; template struct blas_gemv; -template struct blas_gemv, DEVICE_GPU>; +template struct blas_gemv, DEVICE_GPU>; template struct blas_gemv, DEVICE_GPU>; -template struct blas_gemv_batched; +template struct blas_gemv_batched; template struct blas_gemv_batched; -template struct blas_gemv_batched, DEVICE_GPU>; +template struct blas_gemv_batched, DEVICE_GPU>; template struct blas_gemv_batched, DEVICE_GPU>; -template struct blas_gemv_batched_strided; +template struct blas_gemv_batched_strided; template struct blas_gemv_batched_strided; -template struct blas_gemv_batched_strided, DEVICE_GPU>; +template struct blas_gemv_batched_strided, DEVICE_GPU>; template struct blas_gemv_batched_strided, DEVICE_GPU>; -template struct blas_gemm; +template struct blas_gemm; template struct blas_gemm; -template struct blas_gemm, DEVICE_GPU>; +template struct blas_gemm, DEVICE_GPU>; template struct blas_gemm, DEVICE_GPU>; -template struct blas_gemm_batched; +template struct blas_gemm_batched; template struct blas_gemm_batched; -template struct blas_gemm_batched, DEVICE_GPU>; +template struct blas_gemm_batched, DEVICE_GPU>; template struct blas_gemm_batched, DEVICE_GPU>; -template struct blas_gemm_batched_strided; +template struct blas_gemm_batched_strided; template struct blas_gemm_batched_strided; -template struct blas_gemm_batched_strided, DEVICE_GPU>; +template struct blas_gemm_batched_strided, DEVICE_GPU>; template struct blas_gemm_batched_strided, DEVICE_GPU>; } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/rocm/lapack.hip.cu b/source/source_base/module_container/ATen/kernels/rocm/lapack.hip.cu index 07572a657ab..c0d34cae9cf 100644 --- a/source/source_base/module_container/ATen/kernels/rocm/lapack.hip.cu +++ b/source/source_base/module_container/ATen/kernels/rocm/lapack.hip.cu @@ -6,156 +6,150 @@ #include #include -namespace container { -namespace kernels { - +namespace container +{ +namespace kernels +{ static hipsolverHandle_t hipsolver_handle = nullptr; -void createGpuSolverHandle() { - if (hipsolver_handle == nullptr) { - hipsolverErrcheck(hipsolverCreate(&hipsolver_handle)); - } +void + createGpuSolverHandle () +{ + if (hipsolver_handle == nullptr) + { + hipsolverErrcheck (hipsolverCreate (&hipsolver_handle)); + } } -void destroyGpuSolverHandle() { - if (hipsolver_handle != nullptr) { - hipsolverErrcheck(hipsolverDestroy(hipsolver_handle)); - hipsolver_handle = nullptr; - } +void + destroyGpuSolverHandle () +{ + if (hipsolver_handle != nullptr) + { + hipsolverErrcheck (hipsolverDestroy (hipsolver_handle)); + hipsolver_handle = nullptr; + } } template -__global__ void set_matrix_kernel( - const char uplo, - T* A, - const int dim) +__global__ void + set_matrix_kernel (const char uplo, T* A, const int dim) { int bid = blockIdx.x; int tid = threadIdx.x; - for (int ii = tid; ii < bid + 1; ii += THREADS_PER_BLOCK) { - if (uplo == 'L') { - A[ii * dim + bid + 1] = static_cast(0); - } - else { - A[(bid + 1) * dim + ii] = static_cast(0); + for (int ii = tid; ii < bid + 1; ii += THREADS_PER_BLOCK) + { + if (uplo == 'L') + { + A[ii * dim + bid + 1] = static_cast (0); + } + else + { + A[(bid + 1) * dim + ii] = static_cast (0); + } } - } } template -struct set_matrix { +struct set_matrix +{ using Type = typename GetTypeThrust::type; - void operator() ( - const char& uplo, - T* A, - const int& dim) + void + operator() (const char& uplo, T* A, const int& dim) { - set_matrix_kernel<<>>( - uplo, reinterpret_cast(A), dim); + set_matrix_kernel<<>> (uplo, reinterpret_cast (A), dim); } }; template -struct lapack_trtri { - void operator()( - const char& uplo, - const char& diag, - const int& dim, - T* Mat, - const int& lda) +struct lapack_trtri +{ + void + operator() (const char& uplo, const char& diag, const int& dim, T* Mat, const int& lda) { // TODO: trtri is not implemented in this method yet // Cause the trtri in cuSolver is not stable for ABACUS! // hipSolverConnector::trtri(hipsolver_handle, uplo, diag, dim, Mat, lda); // hipSolverConnector::potri(hipsolver_handle, uplo, diag, dim, Mat, lda); - std::vector H_Mat(dim * dim, static_cast(0.0)); - hipMemcpy(H_Mat.data(), Mat, sizeof(T) * H_Mat.size(), hipMemcpyDeviceToHost); - lapack_trtri()(uplo, diag, dim, H_Mat.data(), lda); - hipMemcpy(Mat, H_Mat.data(), sizeof(T) * H_Mat.size(), hipMemcpyHostToDevice); + std::vector H_Mat (dim * dim, static_cast (0.0)); + hipMemcpy (H_Mat.data (), Mat, sizeof (T) * H_Mat.size (), hipMemcpyDeviceToHost); + lapack_trtri () (uplo, diag, dim, H_Mat.data (), lda); + hipMemcpy (Mat, H_Mat.data (), sizeof (T) * H_Mat.size (), hipMemcpyHostToDevice); } }; template -struct lapack_potrf { - void operator()( - const char& uplo, - const int& dim, - T* Mat, - const int& lda) +struct lapack_potrf +{ + void + operator() (const char& uplo, const int& dim, T* Mat, const int& lda) { // hipSolverConnector::potrf(hipsolver_handle, uplo, dim, Mat, dim); - std::vector H_Mat(dim * dim, static_cast(0.0)); - hipMemcpy(H_Mat.data(), Mat, sizeof(T) * H_Mat.size(), hipMemcpyDeviceToHost); - lapack_potrf()(uplo, dim, H_Mat.data(), lda); - hipMemcpy(Mat, H_Mat.data(), sizeof(T) * H_Mat.size(), hipMemcpyHostToDevice); + std::vector H_Mat (dim * dim, static_cast (0.0)); + hipMemcpy (H_Mat.data (), Mat, sizeof (T) * H_Mat.size (), hipMemcpyDeviceToHost); + lapack_potrf () (uplo, dim, H_Mat.data (), lda); + hipMemcpy (Mat, H_Mat.data (), sizeof (T) * H_Mat.size (), hipMemcpyHostToDevice); } }; template -struct lapack_heevd { +struct lapack_heevd +{ using Real = typename GetTypeReal::type; - void operator()( - const char& jobz, - const char& uplo, - T* Mat, - const int& dim, - Real* eigen_val) + void + operator() (const char& jobz, const char& uplo, T* Mat, const int& dim, Real* eigen_val) { // hipSolverConnector::heevd(hipsolver_handle, jobz, uplo, dim, Mat, dim, eigen_val); - std::vector H_Mat(dim * dim, static_cast(0.0)); - std::vector H_eigen_val(dim, static_cast(0.0)); - hipMemcpy(H_Mat.data(), Mat, sizeof(T) * H_Mat.size(), hipMemcpyDeviceToHost); - hipMemcpy(H_eigen_val.data(), eigen_val, sizeof(Real) * H_eigen_val.size(), hipMemcpyDeviceToHost); - lapack_heevd()(jobz, uplo, H_Mat.data(), dim, H_eigen_val.data()); - hipMemcpy(Mat, H_Mat.data(), sizeof(T) * H_Mat.size(), hipMemcpyHostToDevice); - hipMemcpy(eigen_val, H_eigen_val.data(), sizeof(Real) * H_eigen_val.size(), hipMemcpyHostToDevice); + std::vector H_Mat (dim * dim, static_cast (0.0)); + std::vector H_eigen_val (dim, static_cast (0.0)); + hipMemcpy (H_Mat.data (), Mat, sizeof (T) * H_Mat.size (), hipMemcpyDeviceToHost); + hipMemcpy (H_eigen_val.data (), eigen_val, sizeof (Real) * H_eigen_val.size (), hipMemcpyDeviceToHost); + lapack_heevd () (jobz, uplo, H_Mat.data (), dim, H_eigen_val.data ()); + hipMemcpy (Mat, H_Mat.data (), sizeof (T) * H_Mat.size (), hipMemcpyHostToDevice); + hipMemcpy (eigen_val, H_eigen_val.data (), sizeof (Real) * H_eigen_val.size (), hipMemcpyHostToDevice); } }; template -struct lapack_hegvd { +struct lapack_hegvd +{ using Real = typename GetTypeReal::type; - void operator()( - const int dim, - const int lda, - T* Mat_A, - T* Mat_B, - Real* eigen_val, - T *eigen_vec) + void + operator() (const int dim, const int lda, T* Mat_A, T* Mat_B, Real* eigen_val, T* eigen_vec) { const int itype = 1; const char jobz = 'V'; const char uplo = 'U'; - hipErrcheck(hipMemcpy(eigen_vec, Mat_A, sizeof(T) * dim * lda, hipMemcpyDeviceToDevice)); - hipSolverConnector::hegvd(hipsolver_handle, itype, jobz, uplo, dim, Mat_A, lda, Mat_B, lda, eigen_val); + hipErrcheck (hipMemcpy (eigen_vec, Mat_A, sizeof (T) * dim * lda, hipMemcpyDeviceToDevice)); + hipSolverConnector::hegvd (hipsolver_handle, itype, jobz, uplo, dim, Mat_A, lda, Mat_B, lda, eigen_val); } }; -template struct set_matrix; +template struct set_matrix; template struct set_matrix; -template struct set_matrix, DEVICE_GPU>; +template struct set_matrix, DEVICE_GPU>; template struct set_matrix, DEVICE_GPU>; -template struct lapack_trtri; +template struct lapack_trtri; template struct lapack_trtri; -template struct lapack_trtri, DEVICE_GPU>; +template struct lapack_trtri, DEVICE_GPU>; template struct lapack_trtri, DEVICE_GPU>; -template struct lapack_potrf; +template struct lapack_potrf; template struct lapack_potrf; -template struct lapack_potrf, DEVICE_GPU>; +template struct lapack_potrf, DEVICE_GPU>; template struct lapack_potrf, DEVICE_GPU>; -template struct lapack_heevd; +template struct lapack_heevd; template struct lapack_heevd; -template struct lapack_heevd, DEVICE_GPU>; +template struct lapack_heevd, DEVICE_GPU>; template struct lapack_heevd, DEVICE_GPU>; -template struct lapack_hegvd; +template struct lapack_hegvd; template struct lapack_hegvd; -template struct lapack_hegvd, DEVICE_GPU>; +template struct lapack_hegvd, DEVICE_GPU>; template struct lapack_hegvd, DEVICE_GPU>; } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/rocm/linalg.hip.cu b/source/source_base/module_container/ATen/kernels/rocm/linalg.hip.cu index ef43a5408cc..008f768f020 100644 --- a/source/source_base/module_container/ATen/kernels/rocm/linalg.hip.cu +++ b/source/source_base/module_container/ATen/kernels/rocm/linalg.hip.cu @@ -6,413 +6,468 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -__device__ static inline -T conj(T& in) { +__device__ static inline T + conj (T& in) +{ return in; } template -__device__ static inline -thrust::complex conj(thrust::complex& in) { - return thrust::conj(in); +__device__ static inline thrust::complex + conj (thrust::complex& in) +{ + return thrust::conj (in); } template -__global__ void do_add_kernel( - const int num_element, - const T alpha, - const T* x, - const T beta, - const T* y, - T* z) +__global__ void + do_add_kernel (const int num_element, const T alpha, const T* x, const T beta, const T* y, T* z) { // Perform add operation for the specified range [begin, end) in the output Tensor. - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] + beta * y[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] + beta * y[o_idx]; + } } template -__global__ void do_mul_kernel( - const int num_element, - const T alpha, - const T* x, - T* y) +__global__ void + do_mul_kernel (const int num_element, const T alpha, const T* x, T* y) { - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - y[o_idx] = alpha * x[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + y[o_idx] = alpha * x[o_idx]; + } } template -__global__ void do_mul_kernel( - const int num_element, - const T alpha, - const T* x, - const T* y, - T* z) +__global__ void + do_mul_kernel (const int num_element, const T alpha, const T* x, const T* y, T* z) { - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] * y[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] * y[o_idx]; + } } template -__global__ void do_div_kernel( - const int num_element, - const T alpha, - const T* x, - const T* y, - T* z) +__global__ void + do_div_kernel (const int num_element, const T alpha, const T* x, const T* y, T* z) { - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - z[o_idx] = alpha * x[o_idx] / y[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + z[o_idx] = alpha * x[o_idx] / y[o_idx]; + } } template -__global__ void do_fma_kernel( - const int num_element, - const T alpha, - const T* x, - const T* y, - const T beta, - const T* z, - T* out) +__global__ void + do_fma_kernel (const int num_element, const T alpha, const T* x, const T* y, const T beta, const T* z, T* out) { - for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) { - // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'. - out[o_idx] = alpha * x[o_idx] * y[o_idx] + beta * z[o_idx]; - } + for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) + { + // Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index + // 'o_idx'. + out[o_idx] = alpha * x[o_idx] * y[o_idx] + beta * z[o_idx]; + } } template -__global__ void do_transpose_kernel( - int ndim, - int64_t num_elements, - const T* p, - const int* perm, - const int64_t* in_strides, - const int64_t* out_strides, - T* q) +__global__ void + do_transpose_kernel (int ndim, + int64_t num_elements, + const T* p, + const int* perm, + const int64_t* in_strides, + const int64_t* out_strides, + T* q) { - for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. - - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculate the ratio of the current output Tensor index 'current_o_idx' in the current dimension. - const int64_t ratio = current_o_idx / out_strides[ii]; - // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. - current_o_idx -= ratio * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - i_idx += ratio * in_strides[perm[ii]]; - } - // Check if conjugation is needed. - if (Conjugate) { - // Assign the conjugate value of the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = kernels::conj(p[i_idx]); - } else { - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx]; + for (int64_t o_idx = 0; o_idx < num_elements; o_idx++) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. + + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculate the ratio of the current output Tensor index 'current_o_idx' in the current dimension. + const int64_t ratio = current_o_idx / out_strides[ii]; + // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. + current_o_idx -= ratio * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + i_idx += ratio * in_strides[perm[ii]]; + } + // Check if conjugation is needed. + if (Conjugate) + { + // Assign the conjugate value of the input Tensor element at index 'i_idx' to the output Tensor + // element at index 'o_idx'. + q[o_idx] = kernels::conj (p[i_idx]); + } + else + { + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx]; + } } - } } template -__global__ void do_stride_kernel( - int ndim, - int64_t size, - const T* p, - const int64_t* stride, - const int64_t* in_strides, - const int64_t* out_strides, - T* q) +__global__ void + do_stride_kernel (int ndim, + int64_t size, + const T* p, + const int64_t* stride, + const int64_t* in_strides, + const int64_t* out_strides, + T* q) { // Perform stride operation for the specified range [begin, end) in the output Tensor. - for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculate the index in the current dimension. - // It is natural to view a tensor as a multi-dimentional array. - const int64_t current_dim_idx = current_o_idx / out_strides[ii]; - // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. - current_o_idx -= current_dim_idx * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - i_idx += current_dim_idx * stride[ii] * in_strides[ii]; + for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculate the index in the current dimension. + // It is natural to view a tensor as a multi-dimentional array. + const int64_t current_dim_idx = current_o_idx / out_strides[ii]; + // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. + current_o_idx -= current_dim_idx * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + i_idx += current_dim_idx * stride[ii] * in_strides[ii]; + } + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx]; } - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx]; - } } template -__global__ void do_inflate_kernel( - int ndim, - int64_t size, - const T* p, - const int64_t* stride, - const int64_t* in_strides, - const int64_t* out_strides, - T* q) +__global__ void + do_inflate_kernel (int ndim, + int64_t size, + const T* p, + const int64_t* stride, + const int64_t* in_strides, + const int64_t* out_strides, + T* q) { // Perform stride operation for the specified range [begin, end) in the output Tensor. - for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) { - int64_t i_idx = 0; // Initialize the index for the input Tensor element. - int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. - bool valid = true; - // Iterate over each dimension of the output Tensor. - for (int ii = 0; ii < ndim; ++ii) { - // Calculte the ratio of the current output Tensor index 'current_o_idx' in the current dimension. - const int64_t current_dim_idx = current_o_idx / out_strides[ii]; - // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. - current_o_idx -= current_dim_idx * out_strides[ii]; - // Calculate the offset for the corresponding index position in the input Tensor and accumulate it in 'i_idx'. - if (current_dim_idx % stride[ii] == 0) { - i_idx += (current_dim_idx / stride[ii]) * in_strides[ii]; - } - else { - valid = false; - break; - } + for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) + { + int64_t i_idx = 0; // Initialize the index for the input Tensor element. + int64_t current_o_idx = o_idx; // Calculate the index for the output Tensor element. + bool valid = true; + // Iterate over each dimension of the output Tensor. + for (int ii = 0; ii < ndim; ++ii) + { + // Calculte the ratio of the current output Tensor index 'current_o_idx' in the current dimension. + const int64_t current_dim_idx = current_o_idx / out_strides[ii]; + // Update the output Tensor index 'current_o_idx' by removing the offset in the current dimension. + current_o_idx -= current_dim_idx * out_strides[ii]; + // Calculate the offset for the corresponding index position in the input Tensor and accumulate it + // in 'i_idx'. + if (current_dim_idx % stride[ii] == 0) + { + i_idx += (current_dim_idx / stride[ii]) * in_strides[ii]; + } + else + { + valid = false; + break; + } + } + // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. + q[o_idx] = p[i_idx] * static_cast (valid ? 1.0 : 0.0); } - // Assign the input Tensor element at index 'i_idx' to the output Tensor element at index 'o_idx'. - q[o_idx] = p[i_idx] * static_cast(valid ? 1.0 : 0.0); - } } template -__global__ void do_reduce_kernel( - int64_t size, - int64_t inner_most_dim, - const T* p, - T* q) +__global__ void + do_reduce_kernel (int64_t size, int64_t inner_most_dim, const T* p, T* q) { - for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) { - T sum = 0; - for (int64_t i_idx = o_idx * inner_most_dim; i_idx < inner_most_dim + o_idx * inner_most_dim; i_idx++) { - sum += p[i_idx]; + for (int64_t o_idx = threadIdx.x; o_idx < size; o_idx += blockDim.x) + { + T sum = 0; + for (int64_t i_idx = o_idx * inner_most_dim; i_idx < inner_most_dim + o_idx * inner_most_dim; i_idx++) + { + sum += p[i_idx]; + } + q[o_idx] = sum; } - q[o_idx] = sum; - } } template -static std::vector compute_stride(const std::vector& shape) { - int ndims = shape.size(); - std::vector strides(ndims); +static std::vector + compute_stride (const std::vector& shape) +{ + int ndims = shape.size (); + std::vector strides (ndims); T stride = 1; - auto it = shape.end(); // Start from the last element - for (int ii = ndims - 1; ii >= 0; ii--) { - it--; - strides[ii] = stride; - stride *= static_cast(*it); - } - return std::move(strides); + auto it = shape.end (); // Start from the last element + for (int ii = ndims - 1; ii >= 0; ii--) + { + it--; + strides[ii] = stride; + stride *= static_cast (*it); + } + return std::move (strides); } -template -void add::operator()(const int& num_element, const T& alpha, const T* x, const T& beta, const T* y, T* z) { +template +void + add::operator() (const int& num_element, const T& alpha, const T* x, const T& beta, const T* y, T* z) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); - auto beta_ = *reinterpret_cast(&beta); + auto alpha_ = *reinterpret_cast (&alpha); + auto beta_ = *reinterpret_cast (&beta); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_add_kernel<<>> ( - num_element, alpha_, reinterpret_cast(x), - beta_, reinterpret_cast(y), reinterpret_cast(z)); + do_add_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + beta_, + reinterpret_cast (y), + reinterpret_cast (z)); } -template -void mul::operator()(const int& num_element, const T& alpha, const T* x, T* y) { +template +void + mul::operator() (const int& num_element, const T& alpha, const T* x, T* y) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); + auto alpha_ = *reinterpret_cast (&alpha); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_mul_kernel<<>> ( - num_element, alpha_, - reinterpret_cast(x), reinterpret_cast(y)); + do_mul_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + reinterpret_cast (y)); } -template -void mul::operator()(const int& num_element, const T& alpha, const T* x, const T* y, T* z) { +template +void + mul::operator() (const int& num_element, const T& alpha, const T* x, const T* y, T* z) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); + auto alpha_ = *reinterpret_cast (&alpha); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_mul_kernel<<>> ( - num_element, alpha_, - reinterpret_cast(x), reinterpret_cast(y), reinterpret_cast(z)); + do_mul_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + reinterpret_cast (y), + reinterpret_cast (z)); } -template -void div::operator()(const int& num_element, const T& alpha, const T* x, const T* y, T* z) { +template +void + div::operator() (const int& num_element, const T& alpha, const T* x, const T* y, T* z) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); + auto alpha_ = *reinterpret_cast (&alpha); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_div_kernel<<>> ( - num_element, alpha_, reinterpret_cast(x), reinterpret_cast(y), reinterpret_cast(z)); + do_div_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + reinterpret_cast (y), + reinterpret_cast (z)); } -template -void fma::operator()(const int& num_element, const T& alpha, const T* x, const T* y, const T& beta, const T* z, T* out) { +template +void + fma::operator() (const int& num_element, + const T& alpha, + const T* x, + const T* y, + const T& beta, + const T* z, + T* out) +{ using Type = typename GetTypeThrust::type; - auto alpha_ = *reinterpret_cast(&alpha); - auto beta_ = *reinterpret_cast(&beta); + auto alpha_ = *reinterpret_cast (&alpha); + auto beta_ = *reinterpret_cast (&beta); const int block = (num_element + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_fma_kernel<<>> ( - num_element, alpha_, reinterpret_cast(x), reinterpret_cast(y), - beta_, reinterpret_cast(z), reinterpret_cast(out)); + do_fma_kernel<<>> (num_element, + alpha_, + reinterpret_cast (x), + reinterpret_cast (y), + beta_, + reinterpret_cast (z), + reinterpret_cast (out)); } -template -void transpose::operator()( - const std::vector &perm, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + transpose::operator() (const std::vector& perm, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { using Type = typename GetTypeThrust::type; - REQUIRES_OK(p_shape.size() == q_shape.size(), - "transpose: p and q must have the same number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = compute_stride(p_shape); - auto out_strides = compute_stride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "transpose: p and q must have the same number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = compute_stride (p_shape); + auto out_strides = compute_stride (q_shape); int num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= static_cast(q_shape[ii]); - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= static_cast (q_shape[ii]); + } num_elements = ndim ? num_elements : 0; - Tensor t_perm(DataType::DT_INT, DeviceType::GpuDevice, {ndim}); - Tensor t_in_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_out_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_perm (DataType::DT_INT, DeviceType::GpuDevice, {ndim}); + Tensor t_in_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_out_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - kernels::synchronize_memory()( - t_perm.data(), perm.data(), perm.size()); - kernels::synchronize_memory()( - t_in_strides.data(), in_strides.data(), in_strides.size()); - kernels::synchronize_memory()( - t_out_strides.data(), out_strides.data(), out_strides.size()); + kernels::synchronize_memory () (t_perm.data (), perm.data (), perm.size ()); + kernels::synchronize_memory () (t_in_strides.data (), + in_strides.data (), + in_strides.size ()); + kernels::synchronize_memory () (t_out_strides.data (), + out_strides.data (), + out_strides.size ()); - const Type* p_ = reinterpret_cast(p); - Type* q_ = reinterpret_cast((q)); + const Type* p_ = reinterpret_cast (p); + Type* q_ = reinterpret_cast ((q)); const int block = (num_elements + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_transpose_kernel<<>> ( - ndim, num_elements, p_, t_perm.data(), - t_in_strides.data(), t_out_strides.data(), q_); + do_transpose_kernel<<>> (ndim, + num_elements, + p_, + t_perm.data (), + t_in_strides.data (), + t_out_strides.data (), + q_); } -template -void stride::operator()( - const std::vector &stride, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + stride::operator() (const std::vector& stride, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { using Type = typename GetTypeThrust::type; - REQUIRES_OK(p_shape.size() == q_shape.size(), - "transpose: p and q must have the same number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = compute_stride(p_shape); - auto out_strides = compute_stride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "transpose: p and q must have the same number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = compute_stride (p_shape); + auto out_strides = compute_stride (q_shape); int num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= static_cast(q_shape[ii]); - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= static_cast (q_shape[ii]); + } num_elements = ndim ? num_elements : 0; - Tensor t_stride(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_in_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_out_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_stride (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_in_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_out_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - kernels::synchronize_memory()( - t_stride.data(), stride.data(), stride.size()); - kernels::synchronize_memory()( - t_in_strides.data(), in_strides.data(), in_strides.size()); - kernels::synchronize_memory()( - t_out_strides.data(), out_strides.data(), out_strides.size()); + kernels::synchronize_memory () (t_stride.data (), + stride.data (), + stride.size ()); + kernels::synchronize_memory () (t_in_strides.data (), + in_strides.data (), + in_strides.size ()); + kernels::synchronize_memory () (t_out_strides.data (), + out_strides.data (), + out_strides.size ()); - const Type* p_ = reinterpret_cast(p); - Type* q_ = reinterpret_cast((q)); + const Type* p_ = reinterpret_cast (p); + Type* q_ = reinterpret_cast ((q)); const int block = (num_elements + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_stride_kernel<<>> ( - ndim, num_elements, p_, t_stride.data(), t_in_strides.data(), t_out_strides.data(), q_); + do_stride_kernel<<>> (ndim, + num_elements, + p_, + t_stride.data (), + t_in_strides.data (), + t_out_strides.data (), + q_); } -template -void inflate::operator()( - const std::vector &inflate, - const std::vector &p_shape, - const std::vector &q_shape, - const T *p, - T *q) +template +void + inflate::operator() (const std::vector& inflate, + const std::vector& p_shape, + const std::vector& q_shape, + const T* p, + T* q) { using Type = typename GetTypeThrust::type; - REQUIRES_OK(p_shape.size() == q_shape.size(), - "transpose: p and q must have the same number of dimensions"); - const int ndim = static_cast(p_shape.size()); - auto in_strides = compute_stride(p_shape); - auto out_strides = compute_stride(q_shape); + REQUIRES_OK (p_shape.size () == q_shape.size (), "transpose: p and q must have the same number of dimensions"); + const int ndim = static_cast (p_shape.size ()); + auto in_strides = compute_stride (p_shape); + auto out_strides = compute_stride (q_shape); int num_elements = 1; - for (int ii = 0; ii < ndim; ++ii) { - num_elements *= static_cast(q_shape[ii]); - } + for (int ii = 0; ii < ndim; ++ii) + { + num_elements *= static_cast (q_shape[ii]); + } num_elements = ndim ? num_elements : 0; - Tensor t_stride(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_in_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - Tensor t_out_strides(DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_stride (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_in_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); + Tensor t_out_strides (DataType::DT_INT64, DeviceType::GpuDevice, {ndim}); - kernels::synchronize_memory()( - t_stride.data(), inflate.data(), inflate.size()); - kernels::synchronize_memory()( - t_in_strides.data(), in_strides.data(), in_strides.size()); - kernels::synchronize_memory()( - t_out_strides.data(), out_strides.data(), out_strides.size()); + kernels::synchronize_memory () (t_stride.data (), + inflate.data (), + inflate.size ()); + kernels::synchronize_memory () (t_in_strides.data (), + in_strides.data (), + in_strides.size ()); + kernels::synchronize_memory () (t_out_strides.data (), + out_strides.data (), + out_strides.size ()); - const Type* p_ = reinterpret_cast(p); - Type* q_ = reinterpret_cast((q)); + const Type* p_ = reinterpret_cast (p); + Type* q_ = reinterpret_cast ((q)); const int block = (num_elements + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_inflate_kernel<<>> ( - ndim, num_elements, p_, t_stride.data(), t_in_strides.data(), t_out_strides.data(), q_); + do_inflate_kernel<<>> (ndim, + num_elements, + p_, + t_stride.data (), + t_in_strides.data (), + t_out_strides.data (), + q_); } -template -void reduce::operator()( - const int64_t &num_element, - const int64_t &inner_most_dim, - const T *p, - T *q) +template +void + reduce::operator() (const int64_t& num_element, const int64_t& inner_most_dim, const T* p, T* q) { using Type = typename GetTypeThrust::type; - const Type* p_ = reinterpret_cast(p); - Type* q_ = reinterpret_cast((q)); + const Type* p_ = reinterpret_cast (p); + Type* q_ = reinterpret_cast ((q)); - const int block = (static_cast(num_element) + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_reduce_kernel<<>> ( - num_element, inner_most_dim, p_, q_); + const int block = (static_cast (num_element) + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + do_reduce_kernel<<>> (num_element, inner_most_dim, p_, q_); } template struct add; diff --git a/source/source_base/module_container/ATen/kernels/rocm/memory.hip.cu b/source/source_base/module_container/ATen/kernels/rocm/memory.hip.cu index b45a7cfd568..e6cdc3171d1 100644 --- a/source/source_base/module_container/ATen/kernels/rocm/memory.hip.cu +++ b/source/source_base/module_container/ATen/kernels/rocm/memory.hip.cu @@ -4,150 +4,145 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -__global__ void do_set_memory( - T* out, - const T var, - const size_t size) +__global__ void + do_set_memory (T* out, const T var, const size_t size) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= size) {return;} + if (idx >= size) + { + return; + } out[idx] = var; } template -__global__ void do_cast_memory( - T_out* out, - const T_in* in, - const int size) +__global__ void + do_cast_memory (T_out* out, const T_in* in, const int size) { unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= size) {return;} - out[idx] = static_cast(in[idx]); + if (idx >= size) + { + return; + } + out[idx] = static_cast (in[idx]); } template -__global__ void do_cast_memory( - std::complex* out, - const std::complex* in, - const int size) +__global__ void + do_cast_memory (std::complex* out, const std::complex* in, const int size) { unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= size) {return;} - auto* _out = reinterpret_cast*>(out); - const auto* _in = reinterpret_cast*>(in); - _out[idx] = static_cast>(_in[idx]); + if (idx >= size) + { + return; + } + auto* _out = reinterpret_cast*> (out); + const auto* _in = reinterpret_cast*> (in); + _out[idx] = static_cast> (_in[idx]); } template -void resize_memory::operator()( - T*& arr, - const size_t& size, - const char* record_in) +void + resize_memory::operator() (T*& arr, const size_t& size, const char* record_in) { - if (arr != nullptr) { - delete_memory()(arr); - } - hipMalloc((void **)&arr, sizeof(T) * size); + if (arr != nullptr) + { + delete_memory () (arr); + } + hipMalloc ((void**)&arr, sizeof (T) * size); } template -void set_memory::operator()( - T* arr, - const T& var, - const size_t& size) +void + set_memory::operator() (T* arr, const T& var, const size_t& size) { const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - do_set_memory<<>>(arr, var, size); + do_set_memory<<>> (arr, var, size); } template -struct synchronize_memory { - void operator()( - T *arr_out, - const T *arr_in, - const size_t& size) +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) { - hipMemcpy(arr_out, arr_in, sizeof(T) * size, hipMemcpyDeviceToHost); + hipMemcpy (arr_out, arr_in, sizeof (T) * size, hipMemcpyDeviceToHost); } }; template -struct synchronize_memory { - void operator()( - T *arr_out, - const T *arr_in, - const size_t& size) +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) { - hipMemcpy(arr_out, arr_in, sizeof(T) * size, hipMemcpyHostToDevice); + hipMemcpy (arr_out, arr_in, sizeof (T) * size, hipMemcpyHostToDevice); } }; template -struct synchronize_memory { - void operator()( - T *arr_out, - const T *arr_in, - const size_t& size) +struct synchronize_memory +{ + void + operator() (T* arr_out, const T* arr_in, const size_t& size) { - hipMemcpy(arr_out, arr_in, sizeof(T) * size, hipMemcpyDeviceToDevice); + hipMemcpy (arr_out, arr_in, sizeof (T) * size, hipMemcpyDeviceToDevice); } }; template -struct cast_memory { - void operator()( - T_out* arr_out, - const T_in* arr_in, - const size_t& size) +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) { - const int block = static_cast((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); - do_cast_memory<<>>(arr_out, arr_in, size); + const int block = static_cast ((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); + do_cast_memory<<>> (arr_out, arr_in, size); } }; - template -struct cast_memory { - void operator()( - T_out* arr_out, - const T_in* arr_in, - const size_t& size) +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) { - T_in * arr = nullptr; - hipMalloc((void **)&arr, sizeof(T_in) * size); - hipMemcpy(arr, arr_in, sizeof(T_in) * size, hipMemcpyHostToDevice); - const int block = static_cast((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); - do_cast_memory<<>>(arr_out, arr, size); - hipFree(arr); + T_in* arr = nullptr; + hipMalloc ((void**)&arr, sizeof (T_in) * size); + hipMemcpy (arr, arr_in, sizeof (T_in) * size, hipMemcpyHostToDevice); + const int block = static_cast ((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); + do_cast_memory<<>> (arr_out, arr, size); + hipFree (arr); } }; - template -struct cast_memory { - void operator()( - T_out* arr_out, - const T_in* arr_in, - const size_t& size) +struct cast_memory +{ + void + operator() (T_out* arr_out, const T_in* arr_in, const size_t& size) { - auto * arr = (T_in*) malloc(sizeof(T_in) * size); - hipMemcpy(arr, arr_in, sizeof(T_in) * size, hipMemcpyDeviceToHost); - for (int ii = 0; ii < size; ii++) { - arr_out[ii] = static_cast(arr[ii]); - } - free(arr); + auto* arr = (T_in*)malloc (sizeof (T_in) * size); + hipMemcpy (arr, arr_in, sizeof (T_in) * size, hipMemcpyDeviceToHost); + for (int ii = 0; ii < size; ii++) + { + arr_out[ii] = static_cast (arr[ii]); + } + free (arr); } }; template -void delete_memory::operator() ( - T* arr) +void + delete_memory::operator() (T* arr) { - hipFree(arr); + hipFree (arr); } template struct resize_memory; @@ -158,7 +153,7 @@ template struct resize_memory, container::DEVICE_GPU>; template struct resize_memory, container::DEVICE_GPU>; template struct set_memory; -template struct set_memory; +template struct set_memory; template struct set_memory; template struct set_memory; template struct set_memory, container::DEVICE_GPU>; diff --git a/source/source_base/module_container/ATen/kernels/test/blas_test.cpp b/source/source_base/module_container/ATen/kernels/test/blas_test.cpp index d0c53422d46..341704c22a0 100644 --- a/source/source_base/module_container/ATen/kernels/test/blas_test.cpp +++ b/source/source_base/module_container/ATen/kernels/test/blas_test.cpp @@ -4,133 +4,154 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -class BlasTest : public testing::Test { -public: - BlasTest() { - base::utils::init_blas_handle(); - } - ~BlasTest() override { - base::utils::delete_blas_handle(); - } +class BlasTest : public testing::Test +{ + public: + BlasTest () { base::utils::init_blas_handle (); } + ~BlasTest () override { base::utils::delete_blas_handle (); } }; -TYPED_TEST_SUITE(BlasTest, base::utils::Types); +TYPED_TEST_SUITE (BlasTest, base::utils::Types); -TYPED_TEST(BlasTest, Copy) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, Copy) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_copy copyCalculator; const int n = 3; - const Tensor x = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); - Tensor y = std::move(Tensor({static_cast(0.0), static_cast(0.0), static_cast(0.0)}).to_device()); + const Tensor x = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); + Tensor y = std::move ( + Tensor ({static_cast (0.0), static_cast (0.0), static_cast (0.0)}).to_device ()); - copyCalculator(n, x.data(), 1, y.data(), 1); - const Tensor expected = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); + copyCalculator (n, x.data (), 1, y.data (), 1); + const Tensor expected = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); - EXPECT_EQ(y, expected); + EXPECT_EQ (y, expected); } -TYPED_TEST(BlasTest, Nrm2) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, Nrm2) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_nrm2 nrm2Calculator; const int n = 3; - const Tensor x = std::move(Tensor({static_cast(3.0), static_cast(4.0), static_cast(0.0)}).to_device()); + const Tensor x = std::move ( + Tensor ({static_cast (3.0), static_cast (4.0), static_cast (0.0)}).to_device ()); using Real = typename GetTypeReal::type; Real result = {}; - result = nrm2Calculator(n, x.data(), 1); - const Real expected = static_cast(5.0); + result = nrm2Calculator (n, x.data (), 1); + const Real expected = static_cast (5.0); - EXPECT_NEAR(result, expected, static_cast(1e-6)); + EXPECT_NEAR (result, expected, static_cast (1e-6)); } -TYPED_TEST(BlasTest, Dot) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, Dot) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_dot dotCalculator; const int n = 3; - const Tensor x = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); - const Tensor y = std::move(Tensor({static_cast(4.0), static_cast(5.0), static_cast(6.0)}).to_device()); + const Tensor x = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); + const Tensor y = std::move ( + Tensor ({static_cast (4.0), static_cast (5.0), static_cast (6.0)}).to_device ()); Type result = {}; - dotCalculator(n, x.data(), 1, y.data(), 1, &result); - const Type expected = static_cast(32.0); + dotCalculator (n, x.data (), 1, y.data (), 1, &result); + const Type expected = static_cast (32.0); - EXPECT_EQ(result, expected); + EXPECT_EQ (result, expected); } -TYPED_TEST(BlasTest, Scal) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, Scal) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_scal scalCalculator; const int n = 3; - const Type alpha = static_cast(2.0); - Tensor x = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); + const Type alpha = static_cast (2.0); + Tensor x = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); - scalCalculator(n, &alpha, x.data(), 1); - const Tensor expected = std::move(Tensor({static_cast(2.0), static_cast(4.0), static_cast(6.0)}).to_device()); + scalCalculator (n, &alpha, x.data (), 1); + const Tensor expected = std::move ( + Tensor ({static_cast (2.0), static_cast (4.0), static_cast (6.0)}).to_device ()); - EXPECT_EQ(x, expected); + EXPECT_EQ (x, expected); } - -TYPED_TEST(BlasTest, Axpy) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, Axpy) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_axpy axpyCalculator; const int n = 3; - const Type alpha = static_cast(2.0); - const Tensor x = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); - Tensor y = std::move(Tensor({static_cast(4.0), static_cast(5.0), static_cast(6.0)}).to_device()); + const Type alpha = static_cast (2.0); + const Tensor x = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); + Tensor y = std::move ( + Tensor ({static_cast (4.0), static_cast (5.0), static_cast (6.0)}).to_device ()); - axpyCalculator(n, &alpha, x.data(), 1, y.data(), 1); - const Tensor expected = std::move(Tensor({static_cast(6.0), static_cast(9.0), static_cast(12.0)}).to_device()); + axpyCalculator (n, &alpha, x.data (), 1, y.data (), 1); + const Tensor expected = std::move ( + Tensor ({static_cast (6.0), static_cast (9.0), static_cast (12.0)}).to_device ()); - EXPECT_EQ(y, expected); + EXPECT_EQ (y, expected); } - -TYPED_TEST(BlasTest, Gemv) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, Gemv) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemv gemvCalculator; const char trans = 'N'; const int m = 3; const int n = 2; - const Type alpha = static_cast(2.0); - const Type beta = static_cast(3.0); - const Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0)}).to_device()); - const Tensor x = std::move(Tensor({static_cast(1.0), static_cast(2.0)}).to_device()); - Tensor y = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); - - gemvCalculator(trans, m, n, &alpha, A.data(), m, x.data(), 1, &beta, y.data(), 1); - const Tensor expected = std::move(Tensor({static_cast(21.0), static_cast(30.0), static_cast(39.0)}).to_device()); - - EXPECT_EQ(y, expected); + const Type alpha = static_cast (2.0); + const Type beta = static_cast (3.0); + const Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0)}) + .to_device ()); + const Tensor x = std::move (Tensor ({static_cast (1.0), static_cast (2.0)}).to_device ()); + Tensor y = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); + + gemvCalculator (trans, m, n, &alpha, A.data (), m, x.data (), 1, &beta, y.data (), 1); + const Tensor expected = std::move ( + Tensor ({static_cast (21.0), static_cast (30.0), static_cast (39.0)}).to_device ()); + + EXPECT_EQ (y, expected); } - -TYPED_TEST(BlasTest, GemvBatched) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, GemvBatched) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemv gemvCalculator; blas_gemv_batched gemvBatchedCalculator; @@ -139,48 +160,60 @@ TYPED_TEST(BlasTest, GemvBatched) { const int m = 3; const int n = 2; const int batch_size = 2; - const Type alpha = static_cast(2.0); - const Type beta = static_cast(3.0); + const Type alpha = static_cast (2.0); + const Type beta = static_cast (3.0); std::vector A = {}; std::vector x = {}; std::vector y = {}; - const Tensor _A = std::move(Tensor({ - static_cast(1.0), static_cast(2.0), - static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), - - static_cast(7.0), static_cast(8.0), - static_cast(9.0), static_cast(10.0), - static_cast(11.0),static_cast(12.0)}).to_device()); - - A.push_back(_A.data()); - A.push_back(_A.data() + m * n); - - const Tensor _x = std::move(Tensor({static_cast(1.0), static_cast(2.0)}).to_device()); - x.push_back(_x.data()); - x.push_back(_x.data()); - - Tensor _y1 = std::move(Tensor({static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); + const Tensor _A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + + static_cast (7.0), + static_cast (8.0), + static_cast (9.0), + static_cast (10.0), + static_cast (11.0), + static_cast (12.0)}) + .to_device ()); + + A.push_back (_A.data ()); + A.push_back (_A.data () + m * n); + + const Tensor _x = std::move (Tensor ({static_cast (1.0), static_cast (2.0)}).to_device ()); + x.push_back (_x.data ()); + x.push_back (_x.data ()); + + Tensor _y1 = std::move (Tensor ({static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); Tensor _y2 = _y1; - y.push_back(_y1.data()); - y.push_back(_y1.data() + m); + y.push_back (_y1.data ()); + y.push_back (_y1.data () + m); - gemvBatchedCalculator(trans, m, n, &alpha, A.data(), m, x.data(), 1, &beta, y.data(), 1, batch_size); + gemvBatchedCalculator (trans, m, n, &alpha, A.data (), m, x.data (), 1, &beta, y.data (), 1, batch_size); - for (int ii = 0; ii < batch_size; ++ii) { - gemvCalculator(trans, m, n, &alpha, A[ii], m, x[ii], 1, &beta, _y2.data() + ii * m, 1); - } + for (int ii = 0; ii < batch_size; ++ii) + { + gemvCalculator (trans, m, n, &alpha, A[ii], m, x[ii], 1, &beta, _y2.data () + ii * m, 1); + } - EXPECT_EQ(_y1, _y2); + EXPECT_EQ (_y1, _y2); } - -TYPED_TEST(BlasTest, GemvBatchedStrided) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, GemvBatchedStrided) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemv gemvCalculator; blas_gemv_batched_strided gemvBatchedStridedCalculator; @@ -189,70 +222,89 @@ TYPED_TEST(BlasTest, GemvBatchedStrided) { const int m = 3; const int n = 2; const int batch_size = 2; - const Type alpha = static_cast(2.0); - const Type beta = static_cast(3.0); + const Type alpha = static_cast (2.0); + const Type beta = static_cast (3.0); std::vector A = {}; std::vector x = {}; std::vector y = {}; - const Tensor _A = std::move(Tensor({ - static_cast(1.0), static_cast(2.0), - static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), - - static_cast(7.0), static_cast(8.0), - static_cast(9.0), static_cast(10.0), - static_cast(11.0),static_cast(12.0)}).to_device()); - - A.push_back(_A.data()); - A.push_back(_A.data() + m * n); - - const Tensor _x = std::move(Tensor({static_cast(1.0), static_cast(2.0)}).to_device()); - x.push_back(_x.data()); - x.push_back(_x.data()); - - Tensor _y1 = std::move(Tensor({static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); + const Tensor _A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + + static_cast (7.0), + static_cast (8.0), + static_cast (9.0), + static_cast (10.0), + static_cast (11.0), + static_cast (12.0)}) + .to_device ()); + + A.push_back (_A.data ()); + A.push_back (_A.data () + m * n); + + const Tensor _x = std::move (Tensor ({static_cast (1.0), static_cast (2.0)}).to_device ()); + x.push_back (_x.data ()); + x.push_back (_x.data ()); + + Tensor _y1 = std::move (Tensor ({static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); Tensor _y2 = _y1; - y.push_back(_y1.data()); - y.push_back(_y1.data() + m); + y.push_back (_y1.data ()); + y.push_back (_y1.data () + m); - gemvBatchedStridedCalculator(trans, m, n, &alpha, A[0], m, m * n, x[0], 1, 0, &beta, y[0], 1, m, batch_size); + gemvBatchedStridedCalculator (trans, m, n, &alpha, A[0], m, m * n, x[0], 1, 0, &beta, y[0], 1, m, batch_size); - for (int ii = 0; ii < batch_size; ++ii) { - gemvCalculator(trans, m, n, &alpha, A[ii], m, x[ii], 1, &beta, _y2.data() + ii * m, 1); - } - EXPECT_EQ(_y1, _y2); + for (int ii = 0; ii < batch_size; ++ii) + { + gemvCalculator (trans, m, n, &alpha, A[ii], m, x[ii], 1, &beta, _y2.data () + ii * m, 1); + } + EXPECT_EQ (_y1, _y2); } - -TYPED_TEST(BlasTest, Gemm) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, Gemm) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemm gemmCalculator; const char trans = 'N'; const int m = 3; const int n = 2; - const Type alpha = static_cast(2.0); - const Type beta = static_cast(3.0); - const Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0)}).to_device()); - const Tensor x = std::move(Tensor({static_cast(1.0), static_cast(2.0)}).to_device()); - Tensor y = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); - - gemmCalculator(trans, trans, m, 1, n, &alpha, A.data(), m, x.data(), n, &beta, y.data(), m); - const Tensor expected = std::move(Tensor({static_cast(21.0), static_cast(30.0), static_cast(39.0)}).to_device()); - - EXPECT_EQ(y, expected); + const Type alpha = static_cast (2.0); + const Type beta = static_cast (3.0); + const Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0)}) + .to_device ()); + const Tensor x = std::move (Tensor ({static_cast (1.0), static_cast (2.0)}).to_device ()); + Tensor y = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); + + gemmCalculator (trans, trans, m, 1, n, &alpha, A.data (), m, x.data (), n, &beta, y.data (), m); + const Tensor expected = std::move ( + Tensor ({static_cast (21.0), static_cast (30.0), static_cast (39.0)}).to_device ()); + + EXPECT_EQ (y, expected); } - -TYPED_TEST(BlasTest, GemmBatched) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, GemmBatched) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemv_batched gemvBatchedCalculator; blas_gemm_batched gemmBatchedCalculator; @@ -261,48 +313,59 @@ TYPED_TEST(BlasTest, GemmBatched) { const int m = 3; const int n = 2; const int batch_size = 2; - const Type alpha = static_cast(2.0); - const Type beta = static_cast(3.0); + const Type alpha = static_cast (2.0); + const Type beta = static_cast (3.0); std::vector A = {}; std::vector x = {}; std::vector y1 = {}; std::vector y2 = {}; - const Tensor _A = std::move(Tensor({ - static_cast(1.0), static_cast(2.0), - static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), - - static_cast(7.0), static_cast(8.0), - static_cast(9.0), static_cast(10.0), - static_cast(11.0),static_cast(12.0)}).to_device()); - - A.push_back(_A.data()); - A.push_back(_A.data() + m * n); - - const Tensor _x = std::move(Tensor({static_cast(1.0), static_cast(2.0)}).to_device()); - x.push_back(_x.data()); - x.push_back(_x.data()); - - Tensor _y1 = std::move(Tensor({static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); + const Tensor _A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + + static_cast (7.0), + static_cast (8.0), + static_cast (9.0), + static_cast (10.0), + static_cast (11.0), + static_cast (12.0)}) + .to_device ()); + + A.push_back (_A.data ()); + A.push_back (_A.data () + m * n); + + const Tensor _x = std::move (Tensor ({static_cast (1.0), static_cast (2.0)}).to_device ()); + x.push_back (_x.data ()); + x.push_back (_x.data ()); + + Tensor _y1 = std::move (Tensor ({static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); Tensor _y2 = _y1; - y1.push_back(_y1.data()); - y1.push_back(_y1.data() + m); - y2.push_back(_y2.data()); - y2.push_back(_y2.data() + m); + y1.push_back (_y1.data ()); + y1.push_back (_y1.data () + m); + y2.push_back (_y2.data ()); + y2.push_back (_y2.data () + m); - gemvBatchedCalculator(trans, m, n, &alpha, A.data(), m, x.data(), 1, &beta, y1.data(), 1, batch_size); - gemmBatchedCalculator(trans, trans, m, 1, n, &alpha, A.data(), m, x.data(), n, &beta, y2.data(), m, batch_size); + gemvBatchedCalculator (trans, m, n, &alpha, A.data (), m, x.data (), 1, &beta, y1.data (), 1, batch_size); + gemmBatchedCalculator (trans, trans, m, 1, n, &alpha, A.data (), m, x.data (), n, &beta, y2.data (), m, batch_size); - EXPECT_EQ(_y1, _y2); + EXPECT_EQ (_y1, _y2); } - -TYPED_TEST(BlasTest, GemmBatchedStrided) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (BlasTest, GemmBatchedStrided) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemv_batched_strided gemvBatchedStridedCalculator; blas_gemm_batched_strided gemmBatchedStridedCalculator; @@ -311,48 +374,77 @@ TYPED_TEST(BlasTest, GemmBatchedStrided) { const int m = 3; const int n = 2; const int batch_size = 2; - const Type alpha = static_cast(2.0); - const Type beta = static_cast(3.0); + const Type alpha = static_cast (2.0); + const Type beta = static_cast (3.0); std::vector A = {}; std::vector x = {}; std::vector y1 = {}; std::vector y2 = {}; - const Tensor _A = std::move(Tensor({ - static_cast(1.0), static_cast(2.0), - static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), - - static_cast(7.0), static_cast(8.0), - static_cast(9.0), static_cast(10.0), - static_cast(11.0),static_cast(12.0)}).to_device()); - - A.push_back(_A.data()); - A.push_back(_A.data() + m * n); - - const Tensor _x = std::move(Tensor({static_cast(1.0), static_cast(2.0)}).to_device()); - x.push_back(_x.data()); - x.push_back(_x.data()); - - Tensor _y1 = std::move(Tensor({static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); + const Tensor _A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + + static_cast (7.0), + static_cast (8.0), + static_cast (9.0), + static_cast (10.0), + static_cast (11.0), + static_cast (12.0)}) + .to_device ()); + + A.push_back (_A.data ()); + A.push_back (_A.data () + m * n); + + const Tensor _x = std::move (Tensor ({static_cast (1.0), static_cast (2.0)}).to_device ()); + x.push_back (_x.data ()); + x.push_back (_x.data ()); + + Tensor _y1 = std::move (Tensor ({static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); Tensor _y2 = _y1; - y1.push_back(_y1.data()); - y1.push_back(_y1.data() + m); - y2.push_back(_y2.data()); - y2.push_back(_y2.data() + m); - - gemvBatchedStridedCalculator(trans, m, n, &alpha, A[0], m, m * n, x[0], 1, 0, &beta, y1[0], 1, m, batch_size); - gemmBatchedStridedCalculator(trans, trans, m, 1, n, &alpha, A[0], m, m * n, x[0], n, 0, &beta, y2[0], m, m, batch_size); - - EXPECT_EQ(_y1, _y2); + y1.push_back (_y1.data ()); + y1.push_back (_y1.data () + m); + y2.push_back (_y2.data ()); + y2.push_back (_y2.data () + m); + + gemvBatchedStridedCalculator (trans, m, n, &alpha, A[0], m, m * n, x[0], 1, 0, &beta, y1[0], 1, m, batch_size); + gemmBatchedStridedCalculator (trans, + trans, + m, + 1, + n, + &alpha, + A[0], + m, + m * n, + x[0], + n, + 0, + &beta, + y2[0], + m, + m, + batch_size); + + EXPECT_EQ (_y1, _y2); } -} // namespace op +} // namespace kernels } // namespace container -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); +int + main (int argc, char** argv) +{ + ::testing::InitGoogleTest (&argc, argv); + return RUN_ALL_TESTS (); } diff --git a/source/source_base/module_container/ATen/kernels/test/lapack_test.cpp b/source/source_base/module_container/ATen/kernels/test/lapack_test.cpp index 5524ca6c50e..b2a4e8f8eb3 100644 --- a/source/source_base/module_container/ATen/kernels/test/lapack_test.cpp +++ b/source/source_base/module_container/ATen/kernels/test/lapack_test.cpp @@ -4,134 +4,189 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -class LapackTest : public testing::Test { -public: - LapackTest() { - base::utils::init_blas_handle(); - base::utils::init_cusolver_handle(); +class LapackTest : public testing::Test +{ + public: + LapackTest () + { + base::utils::init_blas_handle (); + base::utils::init_cusolver_handle (); } - ~LapackTest() override { - base::utils::delete_blas_handle(); - base::utils::delete_cusolver_handle(); + ~LapackTest () override + { + base::utils::delete_blas_handle (); + base::utils::delete_cusolver_handle (); } }; -TYPED_TEST_SUITE(LapackTest, base::utils::Types); +TYPED_TEST_SUITE (LapackTest, base::utils::Types); -TYPED_TEST(LapackTest, Trtri) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; +TYPED_TEST (LapackTest, Trtri) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; using Device = DEVICE_CPU; blas_gemm gemmCalculator; lapack_trtri trtriCalculator; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - - Tensor I = std::move(Tensor({static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(1.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(1.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + + Tensor I = std::move (Tensor ({static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (1.0)}) + .to_device ()); Tensor B = A; Tensor C = B; - C.zero(); + C.zero (); const char trans = 'N'; const int m = 3; const int n = 3; const int k = 3; - const Type alpha = static_cast(1.0); - const Type beta = static_cast(0.0); + const Type alpha = static_cast (1.0); + const Type beta = static_cast (0.0); // Note all blas and lapack operators within container are column major! // For this reason, we should employ 'L' instead of 'U' in the subsequent line. - trtriCalculator('L', 'N', dim, B.data(), dim); - gemmCalculator(trans, trans, m, n, k, &alpha, B.data(), k, A.data(), n, &beta, C.data(), n); + trtriCalculator ('L', 'N', dim, B.data (), dim); + gemmCalculator (trans, trans, m, n, k, &alpha, B.data (), k, A.data (), n, &beta, C.data (), n); - EXPECT_EQ(C, I); + EXPECT_EQ (C, I); } -TYPED_TEST(LapackTest, Potrf) { +TYPED_TEST (LapackTest, Potrf) +{ return; - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemm gemmCalculator; lapack_potrf potrfCalculator; set_matrix setMatrixCalculator; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(4.0), static_cast(1.0), static_cast(2.0), - static_cast(1.0), static_cast(5.0), static_cast(3.0), - static_cast(2.0), static_cast(3.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (4.0), + static_cast (1.0), + static_cast (2.0), + static_cast (1.0), + static_cast (5.0), + static_cast (3.0), + static_cast (2.0), + static_cast (3.0), + static_cast (6.0)}) + .to_device ()); Tensor B = A; Tensor C = B; - C.zero(); + C.zero (); const char transa = 'N'; const char transb = 'C'; const int m = 3; const int n = 3; const int k = 3; - const Type alpha = static_cast(1.0); - const Type beta = static_cast(0.0); + const Type alpha = static_cast (1.0); + const Type beta = static_cast (0.0); // Note all blas and lapack operators within container are column major! // For this reason, we should employ 'L' instead of 'U' in the subsequent line. - potrfCalculator('L', dim, B.data(), dim); + potrfCalculator ('L', dim, B.data (), dim); // Keep the upper triangle of B - setMatrixCalculator('U', B.data(), dim); + setMatrixCalculator ('U', B.data (), dim); // A = U**T * U - gemmCalculator(transa, transb, m, n, k, &alpha, B.to_device().data(), k, B.to_device().data(), n, &beta, C.to_device().data(), n); - - EXPECT_EQ(A, C); + gemmCalculator (transa, + transb, + m, + n, + k, + &alpha, + B.to_device ().data (), + k, + B.to_device ().data (), + n, + &beta, + C.to_device ().data (), + n); + + EXPECT_EQ (A, C); } // lapack_geqrf_inplace, // check that QtQ = I -TYPED_TEST(LapackTest, GeqrfInPlace) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LapackTest, GeqrfInPlace) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; lapack_geqrf_inplace geqrfCalculator; const int m = 4; - const int n = 3; // m >= n,Q is m x n column-orthogonal matrix + const int n = 3; // m >= n,Q is m x n column-orthogonal matrix const int lda = m; - Tensor A_input = std::move(Tensor({ - static_cast(1.0), static_cast(2.0), static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), static_cast(7.0), static_cast(8.0), - static_cast(9.0), static_cast(10.0), static_cast(11.0), static_cast(12.0) - }).to_device()); + Tensor A_input = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0), + static_cast (10.0), + static_cast (11.0), + static_cast (12.0)}) + .to_device ()); Tensor A = A_input; // will be overwritten as Q // do geqrf -> get orthogonal Q - geqrfCalculator(m, n, A.data(), lda); + geqrfCalculator (m, n, A.data (), lda); // check on CPU - Tensor Q = A.to_device(); - const Type* Q_data = Q.data(); + Tensor Q = A.to_device (); + const Type* Q_data = Q.data (); // compute QtQ = Q^T * Q (n x n) Tensor QtQ = Q; // std::move(Tensor(std::vector(n * n, static_cast(0.0))).to_device()); - const Type alpha = static_cast(1.0); - const Type beta = static_cast(0.0); + const Type alpha = static_cast (1.0); + const Type beta = static_cast (0.0); blas_gemm gemm; - gemm('C', 'N', // Q^T * Q - n, n, m, // n x n - &alpha, - Q_data, lda, // Q^T - Q_data, lda, // Q - &beta, - QtQ.data(), n); + gemm ('C', + 'N', // Q^T * Q + n, + n, + m, // n x n + &alpha, + Q_data, + lda, // Q^T + Q_data, + lda, // Q + &beta, + QtQ.data (), + n); // To print value: first to_device CPU, then print // // Test code: print A @@ -160,229 +215,369 @@ TYPED_TEST(LapackTest, GeqrfInPlace) { // } // check QtQ - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - Type expected = (i == j) ? static_cast(1.0) : static_cast(0.0); - EXPECT_NEAR(std::abs(QtQ.data()[i + j * n]), std::abs(expected), 1e-5) - << "Q^T * Q not identity at (" << i << "," << j << ")"; + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < n; ++j) + { + Type expected = (i == j) ? static_cast (1.0) : static_cast (0.0); + EXPECT_NEAR (std::abs (QtQ.data ()[i + j * n]), std::abs (expected), 1e-5) + << "Q^T * Q not identity at (" << i << "," << j << ")"; + } } - } } // Test for lapack_heevd and lapack_heevx: // Solve a standard eigenvalue problem // and check that A*V = V*E -TYPED_TEST(LapackTest, heevd) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; +TYPED_TEST (LapackTest, heevd) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; using Real = typename GetTypeReal::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemm gemmCalculator; blas_axpy axpyCalculator; lapack_heevd heevdCalculator; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(4.0), static_cast(1.0), static_cast(1.0), - static_cast(1.0), static_cast(5.0), static_cast(3.0), - static_cast(1.0), static_cast(3.0), static_cast(6.0)}).to_device()); - - Tensor E = std::move(Tensor({static_cast(0.0), static_cast(0.0), static_cast(0.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (4.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (5.0), + static_cast (3.0), + static_cast (1.0), + static_cast (3.0), + static_cast (6.0)}) + .to_device ()); + + Tensor E = std::move ( + Tensor ({static_cast (0.0), static_cast (0.0), static_cast (0.0)}).to_device ()); Tensor B = A; Tensor expected_C1 = A; Tensor expected_C2 = A; - expected_C1.zero(); - expected_C2.zero(); + expected_C1.zero (); + expected_C2.zero (); const char trans = 'N'; const int m = 3; const int n = 3; const int k = 3; - const Type alpha = static_cast(1.0); - const Type beta = static_cast(0.0); + const Type alpha = static_cast (1.0); + const Type beta = static_cast (0.0); // Note all blas and lapack operators within container are column major! // For this reason, we should employ 'L' instead of 'U' in the subsequent line. // heevdCalculator('V', 'U', B.data(), dim, E.data()); - heevdCalculator(dim, B.data(), dim, E.data()); + heevdCalculator (dim, B.data (), dim, E.data ()); - E = E.to_device(); - const Tensor Alpha = std::move(Tensor({ - static_cast(E.data()[0]), - static_cast(E.data()[1]), - static_cast(E.data()[2])})); + E = E.to_device (); + const Tensor Alpha = std::move (Tensor ({static_cast (E.data ()[0]), + static_cast (E.data ()[1]), + static_cast (E.data ()[2])})); // Check the eigenvalues and eigenvectors // A * x = lambda * x - gemmCalculator(trans, trans, m, n, k, &alpha, A.data(), m, B.data(), k, &beta, expected_C1.data(), m); - for (int ii = 0; ii < dim; ii++) { - axpyCalculator(dim, Alpha.data() + ii, B.data() + ii * dim, 1, expected_C2.data() + ii * dim, 1); - } - EXPECT_EQ(expected_C1, expected_C2); + gemmCalculator (trans, + trans, + m, + n, + k, + &alpha, + A.data (), + m, + B.data (), + k, + &beta, + expected_C1.data (), + m); + for (int ii = 0; ii < dim; ii++) + { + axpyCalculator (dim, + Alpha.data () + ii, + B.data () + ii * dim, + 1, + expected_C2.data () + ii * dim, + 1); + } + EXPECT_EQ (expected_C1, expected_C2); } -TYPED_TEST(LapackTest, heevx) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; +TYPED_TEST (LapackTest, heevx) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; using Real = typename GetTypeReal::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemm gemmCalculator; blas_axpy axpyCalculator; lapack_heevx heevxCalculator; const int dim = 3; - const int neig = 2; // Compute first 2 eigenvalues - - Tensor A = std::move(Tensor({static_cast(4.0), static_cast(1.0), static_cast(1.0), - static_cast(1.0), static_cast(5.0), static_cast(3.0), - static_cast(1.0), static_cast(3.0), static_cast(6.0)}).to_device()); - - Tensor E = std::move(Tensor({static_cast(0.0), static_cast(0.0)}).to_device()); + const int neig = 2; // Compute first 2 eigenvalues + + Tensor A = std::move (Tensor ({static_cast (4.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (5.0), + static_cast (3.0), + static_cast (1.0), + static_cast (3.0), + static_cast (6.0)}) + .to_device ()); + + Tensor E = std::move (Tensor ({static_cast (0.0), static_cast (0.0)}).to_device ()); Tensor V = A; - Tensor expected_C1 = std::move(Tensor({static_cast(0.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(0.0)}).to_device()); + Tensor expected_C1 = std::move (Tensor ({static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0)}) + .to_device ()); Tensor expected_C2 = expected_C1; - expected_C1.zero(); - expected_C2.zero(); + expected_C1.zero (); + expected_C2.zero (); const char trans = 'N'; const int m = 3; const int n = neig; const int k = 3; - const Type alpha = static_cast(1.0); - const Type beta = static_cast(0.0); + const Type alpha = static_cast (1.0); + const Type beta = static_cast (0.0); // Compute first neig eigenvalues and eigenvectors using heevx - heevxCalculator(dim, dim, A.data(), neig, E.data(), V.data()); + heevxCalculator (dim, dim, A.data (), neig, E.data (), V.data ()); - E = E.to_device(); - const Tensor Alpha = std::move(Tensor({ - static_cast(E.data()[0]), - static_cast(E.data()[1])})); + E = E.to_device (); + const Tensor Alpha + = std::move (Tensor ({static_cast (E.data ()[0]), static_cast (E.data ()[1])})); // Check the eigenvalues and eigenvectors // A * x = lambda * x for the first neig eigenvectors // check that A * V = V * E // get A * V - gemmCalculator(trans, trans, m, n, k, &alpha, A.data(), m, V.data(), k, &beta, expected_C1.data(), m); + gemmCalculator (trans, + trans, + m, + n, + k, + &alpha, + A.data (), + m, + V.data (), + k, + &beta, + expected_C1.data (), + m); // get V * E - for (int ii = 0; ii < neig; ii++) { - axpyCalculator(dim, Alpha.data() + ii, V.data() + ii * dim, 1, expected_C2.data() + ii * dim, 1); - } + for (int ii = 0; ii < neig; ii++) + { + axpyCalculator (dim, + Alpha.data () + ii, + V.data () + ii * dim, + 1, + expected_C2.data () + ii * dim, + 1); + } - EXPECT_EQ(expected_C1, expected_C2); + EXPECT_EQ (expected_C1, expected_C2); } // Test for lapack_hegvd and lapack_hegvx // Solve a generalized eigenvalue problem // and check that A * v = e * B * v -TYPED_TEST(LapackTest, hegvd) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; +TYPED_TEST (LapackTest, hegvd) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; using Real = typename GetTypeReal::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemm gemmCalculator; blas_axpy axpyCalculator; lapack_hegvd hegvdCalculator; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(4.0), static_cast(1.0), static_cast(1.0), - static_cast(1.0), static_cast(5.0), static_cast(3.0), - static_cast(1.0), static_cast(3.0), static_cast(6.0)}).to_device()); - - Tensor I = std::move(Tensor({static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(1.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(1.0)}).to_device()); - - Tensor E = std::move(Tensor({static_cast(0.0), static_cast(0.0), static_cast(0.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (4.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (5.0), + static_cast (3.0), + static_cast (1.0), + static_cast (3.0), + static_cast (6.0)}) + .to_device ()); + + Tensor I = std::move (Tensor ({static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (1.0)}) + .to_device ()); + + Tensor E = std::move ( + Tensor ({static_cast (0.0), static_cast (0.0), static_cast (0.0)}).to_device ()); Tensor B = A; Tensor expected_C1 = A; Tensor expected_C2 = A; - expected_C1.zero(); - expected_C2.zero(); + expected_C1.zero (); + expected_C2.zero (); const char trans = 'N'; const int m = 3; const int n = 3; const int k = 3; - const Type alpha = static_cast(1.0); - const Type beta = static_cast(0.0); + const Type alpha = static_cast (1.0); + const Type beta = static_cast (0.0); // Note all blas and lapack operators within container are column major! // For this reason, we should employ 'L' instead of 'U' in the subsequent line. - hegvdCalculator(dim, dim, A.data(), I.data(), E.data(), B.data()); + hegvdCalculator (dim, dim, A.data (), I.data (), E.data (), B.data ()); - E = E.to_device(); - const Tensor Alpha = std::move(Tensor({ - static_cast(E.data()[0]), - static_cast(E.data()[1]), - static_cast(E.data()[2])})); + E = E.to_device (); + const Tensor Alpha = std::move (Tensor ({static_cast (E.data ()[0]), + static_cast (E.data ()[1]), + static_cast (E.data ()[2])})); // Check the eigenvalues and eigenvectors // A * x = lambda * x - gemmCalculator(trans, trans, m, n, k, &alpha, A.data(), m, B.data(), k, &beta, expected_C1.data(), m); - for (int ii = 0; ii < dim; ii++) { - axpyCalculator(dim, Alpha.data() + ii, B.data() + ii * dim, 1, expected_C2.data() + ii * dim, 1); - } - EXPECT_EQ(expected_C1, expected_C2); + gemmCalculator (trans, + trans, + m, + n, + k, + &alpha, + A.data (), + m, + B.data (), + k, + &beta, + expected_C1.data (), + m); + for (int ii = 0; ii < dim; ii++) + { + axpyCalculator (dim, + Alpha.data () + ii, + B.data () + ii * dim, + 1, + expected_C2.data () + ii * dim, + 1); + } + EXPECT_EQ (expected_C1, expected_C2); } -TYPED_TEST(LapackTest, hegvx) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; +TYPED_TEST (LapackTest, hegvx) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; using Real = typename GetTypeReal::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; blas_gemm gemmCalculator; blas_axpy axpyCalculator; lapack_hegvx hegvxCalculator; const int dim = 3; - const int neig = 2; // Compute first 2 eigenvalues - - Tensor A = std::move(Tensor({static_cast(4.0), static_cast(1.0), static_cast(1.0), - static_cast(1.0), static_cast(5.0), static_cast(3.0), - static_cast(1.0), static_cast(3.0), static_cast(6.0)}).to_device()); - - Tensor B = std::move(Tensor({static_cast(2.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(2.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(2.0)}).to_device()); - - Tensor E = std::move(Tensor({static_cast(0.0), static_cast(0.0)}).to_device()); + const int neig = 2; // Compute first 2 eigenvalues + + Tensor A = std::move (Tensor ({static_cast (4.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (5.0), + static_cast (3.0), + static_cast (1.0), + static_cast (3.0), + static_cast (6.0)}) + .to_device ()); + + Tensor B = std::move (Tensor ({static_cast (2.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (2.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (2.0)}) + .to_device ()); + + Tensor E = std::move (Tensor ({static_cast (0.0), static_cast (0.0)}).to_device ()); Tensor V = A; - Tensor expected_C1 = std::move(Tensor({static_cast(0.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(0.0)}).to_device()); + Tensor expected_C1 = std::move (Tensor ({static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0)}) + .to_device ()); Tensor expected_C2 = expected_C1; Tensor C_temp = expected_C1; - expected_C1.zero(); - expected_C2.zero(); + expected_C1.zero (); + expected_C2.zero (); const char trans = 'N'; const int m = 3; const int n = neig; const int k = 3; - const Type alpha = static_cast(1.0); - const Type beta = static_cast(0.0); + const Type alpha = static_cast (1.0); + const Type beta = static_cast (0.0); // Compute first neig eigenvalues and eigenvectors using hegvx - hegvxCalculator(dim, dim, A.data(), B.data(), neig, E.data(), V.data()); + hegvxCalculator (dim, dim, A.data (), B.data (), neig, E.data (), V.data ()); - E = E.to_device(); - const Tensor Alpha = std::move(Tensor({ - static_cast(E.data()[0]), - static_cast(E.data()[1])})); + E = E.to_device (); + const Tensor Alpha + = std::move (Tensor ({static_cast (E.data ()[0]), static_cast (E.data ()[1])})); // Check the eigenvalues and eigenvectors // A * x = lambda * B * x for the first neig eigenvectors // check that A * V = E * B * V // get A * V - gemmCalculator(trans, trans, m, n, k, &alpha, A.data(), m, V.data(), k, &beta, expected_C1.data(), m); + gemmCalculator (trans, + trans, + m, + n, + k, + &alpha, + A.data (), + m, + V.data (), + k, + &beta, + expected_C1.data (), + m); // get E * B * V // where B is 2 * eye(3,3) // get C_temp = B * V first - gemmCalculator(trans, trans, m, n, k, &alpha, B.data(), m, V.data(), k, &beta, C_temp.data(), m); + gemmCalculator (trans, + trans, + m, + n, + k, + &alpha, + B.data (), + m, + V.data (), + k, + &beta, + C_temp.data (), + m); // then compute C2 = E * B * V - for (int ii = 0; ii < neig; ii++) { - axpyCalculator(dim, Alpha.data() + ii, C_temp.data() + ii * dim, 1, expected_C2.data() + ii * dim, 1); - } + for (int ii = 0; ii < neig; ii++) + { + axpyCalculator (dim, + Alpha.data () + ii, + C_temp.data () + ii * dim, + 1, + expected_C2.data () + ii * dim, + 1); + } - EXPECT_EQ(expected_C1, expected_C2); + EXPECT_EQ (expected_C1, expected_C2); } } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/test/linalg_test.cpp b/source/source_base/module_container/ATen/kernels/test/linalg_test.cpp index 8b0afe634fb..ae4fa25d7c3 100644 --- a/source/source_base/module_container/ATen/kernels/test/linalg_test.cpp +++ b/source/source_base/module_container/ATen/kernels/test/linalg_test.cpp @@ -4,203 +4,342 @@ #include #include -namespace container { -namespace kernels { - -template -class LinalgTest : public testing::Test { -public: - LinalgTest() = default; - - ~LinalgTest() override = default; +namespace container +{ +namespace kernels +{ + +template +class LinalgTest : public testing::Test +{ + public: + LinalgTest () = default; + + ~LinalgTest () override = default; }; -TYPED_TEST_SUITE(LinalgTest, base::utils::Types); - -TYPED_TEST(LinalgTest, Add) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; - - Tensor A = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - Tensor B = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - - Tensor expected = std::move( - Tensor({static_cast(3.0), static_cast(6.0), static_cast(9.0), - static_cast(12.0), static_cast(15.0), static_cast(18.0), - static_cast(21.0), static_cast(24.0), static_cast(27.0)}).to_device()); - Tensor result = Tensor(expected.data_type(), expected.device_type(), expected.shape()); - kernels::add()( - A.NumElements(), static_cast(2.0), A.data(), static_cast(1.0), B.data(), result.data()); - EXPECT_EQ(result, expected); +TYPED_TEST_SUITE (LinalgTest, base::utils::Types); + +TYPED_TEST (LinalgTest, Add) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; + + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + + Tensor expected = std::move (Tensor ({static_cast (3.0), + static_cast (6.0), + static_cast (9.0), + static_cast (12.0), + static_cast (15.0), + static_cast (18.0), + static_cast (21.0), + static_cast (24.0), + static_cast (27.0)}) + .to_device ()); + Tensor result = Tensor (expected.data_type (), expected.device_type (), expected.shape ()); + kernels::add () (A.NumElements (), + static_cast (2.0), + A.data (), + static_cast (1.0), + B.data (), + result.data ()); + EXPECT_EQ (result, expected); } -TYPED_TEST(LinalgTest, Mul) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; - - Tensor A = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - Tensor B = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - - Tensor expected = std::move( - Tensor({static_cast(1.0), static_cast(4.0), static_cast(9.0), - static_cast(16.0), static_cast(25.0), static_cast(36.0), - static_cast(49.0), static_cast(64.0), static_cast(81.0)}).to_device()); - Tensor result = Tensor(expected.data_type(), expected.device_type(), expected.shape()); - kernels::mul()( - A.NumElements(), static_cast(1.0), A.data(), B.data(), result.data()); - EXPECT_EQ(result, expected); +TYPED_TEST (LinalgTest, Mul) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; + + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (4.0), + static_cast (9.0), + static_cast (16.0), + static_cast (25.0), + static_cast (36.0), + static_cast (49.0), + static_cast (64.0), + static_cast (81.0)}) + .to_device ()); + Tensor result = Tensor (expected.data_type (), expected.device_type (), expected.shape ()); + kernels::mul () (A.NumElements (), + static_cast (1.0), + A.data (), + B.data (), + result.data ()); + EXPECT_EQ (result, expected); } -TYPED_TEST(LinalgTest, Div) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; - - Tensor A = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - Tensor B = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - - Tensor expected = std::move( - Tensor({static_cast(1.0), static_cast(1.0), static_cast(1.0), - static_cast(1.0), static_cast(1.0), static_cast(1.0), - static_cast(1.0), static_cast(1.0), static_cast(1.0)}).to_device()); - - Tensor result = Tensor(expected.data_type(), expected.device_type(), expected.shape()); - kernels::div()( - A.NumElements(), static_cast(1.0), A.data(), B.data(), result.data()); - EXPECT_EQ(result, expected); +TYPED_TEST (LinalgTest, Div) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; + + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0), + static_cast (1.0)}) + .to_device ()); + + Tensor result = Tensor (expected.data_type (), expected.device_type (), expected.shape ()); + kernels::div () (A.NumElements (), + static_cast (1.0), + A.data (), + B.data (), + result.data ()); + EXPECT_EQ (result, expected); } -TYPED_TEST(LinalgTest, Fma) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; - - Tensor A = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - Tensor B = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - Tensor C = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - - Tensor expected = std::move( - Tensor({static_cast(5.0), static_cast(14.0), static_cast(27.0), - static_cast(44.0), static_cast(65.0), static_cast(90.0), - static_cast(119.0), static_cast(152.0), static_cast(189.0)}).to_device()); - - Tensor result = Tensor(expected.data_type(), expected.device_type(), expected.shape()); - kernels::fma()( - A.NumElements(), static_cast(2.0), A.data(), B.data(), static_cast(3.0), C.data(), result.data()); - EXPECT_EQ(result, expected); +TYPED_TEST (LinalgTest, Fma) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; + + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + Tensor C = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + + Tensor expected = std::move (Tensor ({static_cast (5.0), + static_cast (14.0), + static_cast (27.0), + static_cast (44.0), + static_cast (65.0), + static_cast (90.0), + static_cast (119.0), + static_cast (152.0), + static_cast (189.0)}) + .to_device ()); + + Tensor result = Tensor (expected.data_type (), expected.device_type (), expected.shape ()); + kernels::fma () (A.NumElements (), + static_cast (2.0), + A.data (), + B.data (), + static_cast (3.0), + C.data (), + result.data ()); + EXPECT_EQ (result, expected); } -TYPED_TEST(LinalgTest, Transpose) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; - - Tensor A = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - A.reshape({3, 3}); +TYPED_TEST (LinalgTest, Transpose) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; + + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + A.reshape ({3, 3}); Tensor A_transpose = A; - Tensor expected = std::move( - Tensor({static_cast(1.0), static_cast(4.0), static_cast(7.0), - static_cast(2.0), static_cast(5.0), static_cast(8.0), - static_cast(3.0), static_cast(6.0), static_cast(9.0)}).to_device()); - expected.reshape({3, 3}); + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (4.0), + static_cast (7.0), + static_cast (2.0), + static_cast (5.0), + static_cast (8.0), + static_cast (3.0), + static_cast (6.0), + static_cast (9.0)}) + .to_device ()); + expected.reshape ({3, 3}); std::vector perm = {1, 0}; - kernels::transpose()( - perm, A.shape().dims(), A_transpose.shape().dims(), A.data(), A_transpose.data()); - EXPECT_EQ(A_transpose, expected); + kernels::transpose () (perm, + A.shape ().dims (), + A_transpose.shape ().dims (), + A.data (), + A_transpose.data ()); + EXPECT_EQ (A_transpose, expected); } - -TYPED_TEST(LinalgTest, Stride) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; - - Tensor A = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - A.reshape({-1}); - Tensor expected = std::move( - Tensor({static_cast(1.0), static_cast(5.0), static_cast(9.0)}).to_device()); - expected.reshape({-1}); +TYPED_TEST (LinalgTest, Stride) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; + + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + A.reshape ({-1}); + Tensor expected = std::move ( + Tensor ({static_cast (1.0), static_cast (5.0), static_cast (9.0)}).to_device ()); + expected.reshape ({-1}); Tensor A_stride = expected; - A_stride.zero(); + A_stride.zero (); std::vector stride = {4}; - kernels::stride()( - stride, A.shape().dims(), A_stride.shape().dims(), A.data(), A_stride.data()); - EXPECT_EQ(A_stride, expected); + kernels::stride () (stride, + A.shape ().dims (), + A_stride.shape ().dims (), + A.data (), + A_stride.data ()); + EXPECT_EQ (A_stride, expected); } - -TYPED_TEST(LinalgTest, Inflate) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; - - Tensor expected = std::move( - Tensor({static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(5.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(9.0)}).to_device()); - expected.reshape({-1}); - Tensor A = std::move( - Tensor({static_cast(1.0), static_cast(5.0), static_cast(9.0)}).to_device()); - A.reshape({-1}); +TYPED_TEST (LinalgTest, Inflate) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; + + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (9.0)}) + .to_device ()); + expected.reshape ({-1}); + Tensor A = std::move ( + Tensor ({static_cast (1.0), static_cast (5.0), static_cast (9.0)}).to_device ()); + A.reshape ({-1}); Tensor A_inflate = expected; - A_inflate.zero(); + A_inflate.zero (); std::vector inflate = {4}; - kernels::inflate()( - inflate, A.shape().dims(), A_inflate.shape().dims(), A.data(), A_inflate.data()); - EXPECT_EQ(A_inflate, expected); + kernels::inflate () (inflate, + A.shape ().dims (), + A_inflate.shape ().dims (), + A.data (), + A_inflate.data ()); + EXPECT_EQ (A_inflate, expected); } - -TYPED_TEST(LinalgTest, Reduce) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; - - Tensor A = std::move( - Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(4.0), static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0)}).to_device()); - A.reshape({3, 3}); - Tensor expected = std::move( - Tensor({static_cast(6.0), static_cast(15.0), static_cast(24.0)}).to_device()); - expected.reshape({-1}); +TYPED_TEST (LinalgTest, Reduce) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; + + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0)}) + .to_device ()); + A.reshape ({3, 3}); + Tensor expected = std::move ( + Tensor ({static_cast (6.0), static_cast (15.0), static_cast (24.0)}).to_device ()); + expected.reshape ({-1}); Tensor A_reduce = expected; - A_reduce.zero(); + A_reduce.zero (); int64_t inner_most_dim = 3; - kernels::reduce()( - A_reduce.NumElements(), inner_most_dim, A.data(), A_reduce.data()); - EXPECT_EQ(A_reduce, expected); + kernels::reduce () (A_reduce.NumElements (), inner_most_dim, A.data (), A_reduce.data ()); + EXPECT_EQ (A_reduce, expected); } } // namespace kernels diff --git a/source/source_base/module_container/ATen/kernels/test/memory_test.cpp b/source/source_base/module_container/ATen/kernels/test/memory_test.cpp index e8943ae610d..64c33181303 100644 --- a/source/source_base/module_container/ATen/kernels/test/memory_test.cpp +++ b/source/source_base/module_container/ATen/kernels/test/memory_test.cpp @@ -5,72 +5,82 @@ #include #include -namespace container { -namespace kernels { +namespace container +{ +namespace kernels +{ template -class MemoryTest : public testing::Test { -public: - MemoryTest() = default; - ~MemoryTest() override = default; +class MemoryTest : public testing::Test +{ + public: + MemoryTest () = default; + ~MemoryTest () override = default; }; -TYPED_TEST_SUITE(MemoryTest, base::utils::Types); +TYPED_TEST_SUITE (MemoryTest, base::utils::Types); -TYPED_TEST(MemoryTest, ResizeAndSynchronizeMemory) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (MemoryTest, ResizeAndSynchronizeMemory) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; kernels::resize_memory resizeMemory; kernels::synchronize_memory syncMemoryDeviceToHost; kernels::synchronize_memory syncMemoryHostToDevice; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); + Tensor A = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); Type* d_B = nullptr; - resizeMemory(d_B, 3, "B"); - Tensor B = std::move(TensorMap(d_B, A.data_type(), A.device_type(), {3})); - B.zero(); - - syncMemoryDeviceToHost(B.data(), A.data(), 3); - EXPECT_EQ(A, B); - - A.zero(); - syncMemoryHostToDevice(A.data(), B.data(), 3); - EXPECT_EQ(A, B); + resizeMemory (d_B, 3, "B"); + Tensor B = std::move (TensorMap (d_B, A.data_type (), A.device_type (), {3})); + B.zero (); + + syncMemoryDeviceToHost (B.data (), A.data (), 3); + EXPECT_EQ (A, B); + + A.zero (); + syncMemoryHostToDevice (A.data (), B.data (), 3); + EXPECT_EQ (A, B); } -TYPED_TEST(MemoryTest, SetMemory) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (MemoryTest, SetMemory) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; kernels::set_memory setMemory; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); + Tensor A = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); Tensor B = A; - - A.zero(); - setMemory(B.data(), 0, 3); - EXPECT_EQ(A, B); + + A.zero (); + setMemory (B.data (), 0, 3); + EXPECT_EQ (A, B); } -TYPED_TEST(MemoryTest, CastAndDeleteMemory) { - using Type = std::complex::type>::type>; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (MemoryTest, CastAndDeleteMemory) +{ + using Type + = std::complex::type>::type>; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; kernels::delete_memory, DEVICE_CPU> deleteMemory; kernels::cast_memory, Type, DEVICE_CPU, Device> castMemory_H2D_D2S; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0)}).to_device()); - Tensor B = A.to_device().cast>(); - - auto * d_A = (std::complex*)malloc(sizeof(std::complex) * 3); - castMemory_H2D_D2S(d_A, A.data(), 3); - Tensor C = std::move(TensorMap(d_A, B.data_type(), B.device_type(), {3})); + Tensor A = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0)}).to_device ()); + Tensor B = A.to_device ().cast> (); + + auto* d_A = (std::complex*)malloc (sizeof (std::complex) * 3); + castMemory_H2D_D2S (d_A, A.data (), 3); + Tensor C = std::move (TensorMap (d_A, B.data_type (), B.device_type (), {3})); - EXPECT_EQ(B, C); - deleteMemory(d_A); + EXPECT_EQ (B, C); + deleteMemory (d_A); } -} // namespace op +} // namespace kernels } // namespace container diff --git a/source/source_base/module_container/ATen/ops/einsum_op.cpp b/source/source_base/module_container/ATen/ops/einsum_op.cpp index 40440db9407..b63e1bddaaa 100644 --- a/source/source_base/module_container/ATen/ops/einsum_op.cpp +++ b/source/source_base/module_container/ATen/ops/einsum_op.cpp @@ -7,12 +7,14 @@ #include #include -namespace container { -namespace einsum_utils { +namespace container +{ +namespace einsum_utils +{ -struct BCast { +struct BCast +{ public: - bool valid = true; bool requires_broadcast = false; @@ -27,63 +29,69 @@ struct BCast { std::vector x_bcast_shape = {}; // input a bcast shape std::vector y_bcast_shape = {}; // input b bcast shape - static void reverse(std::vector& vec) { - std::reverse(vec.begin(), vec.end()); + static void + reverse (std::vector& vec) + { + std::reverse (vec.begin (), vec.end ()); } }; // Do some initialization work for bcast dimensions -static BCast prepare_bcast( - const std::vector& x_, - const std::vector& y_) +static BCast + prepare_bcast (const std::vector& x_, const std::vector& y_) { - const std::vector x(x_.begin(), x_.end() - 2); - const std::vector y(y_.begin(), y_.end() - 2); + const std::vector x (x_.begin (), x_.end () - 2); + const std::vector y (y_.begin (), y_.end () - 2); // Safely multiplies dimensions taking into account symbolic shapes. - auto mul_dims = [](int64_t dim1, int64_t dim2) -> int64_t { - if (dim1 != 0 && dim2 != 0 && (dim1 < 0 || dim2 < 0)) { - return -1; - } - return dim1 * dim2; - }; + auto mul_dims = [] (int64_t dim1, int64_t dim2) -> int64_t + { + if (dim1 != 0 && dim2 != 0 && (dim1 < 0 || dim2 < 0)) + { + return -1; + } + return dim1 * dim2; + }; BCast bcast = {}; bool all_equal = x == y; - size_t lagest_batch_rank = std::max(x.size(), y.size()); + size_t lagest_batch_rank = std::max (x.size (), y.size ()); // calculate the all_equal and lagest_rank // There can be at most two operands, so we can use a 2 for loop size - if (all_equal) { - bcast.requires_broadcast = false; - // Fast path for common case of identical shapes. - int64_t batch_size = 1; - const int rank = x.size(); - for (int ii = 0; ii < rank; ++ii) { - bcast.z_batch_shape.resize(rank); - for (int ii = 0; ii < x.size(); ii++) { - bcast.z_batch_shape[ii] = x[ii]; - batch_size = mul_dims(batch_size, x[ii]); - } + if (all_equal) + { + bcast.requires_broadcast = false; + // Fast path for common case of identical shapes. + int64_t batch_size = 1; + const int rank = x.size (); + for (int ii = 0; ii < rank; ++ii) + { + bcast.z_batch_shape.resize (rank); + for (int ii = 0; ii < x.size (); ii++) + { + bcast.z_batch_shape[ii] = x[ii]; + batch_size = mul_dims (batch_size, x[ii]); + } + } + bcast.x_bcast_shape.push_back (1); + bcast.y_bcast_shape.push_back (1); + bcast.x_batch_shape.push_back (batch_size); + bcast.y_batch_shape.push_back (batch_size); + bcast.z_batch_size = batch_size; + bcast.x_batch_size = TensorShape (bcast.x_batch_shape).NumElements (); + bcast.y_batch_size = TensorShape (bcast.y_batch_shape).NumElements (); + return std::move (bcast); } - bcast.x_bcast_shape.push_back(1); - bcast.y_bcast_shape.push_back(1); - bcast.x_batch_shape.push_back(batch_size); - bcast.y_batch_shape.push_back(batch_size); - bcast.z_batch_size = batch_size; - bcast.x_batch_size = TensorShape(bcast.x_batch_shape).NumElements(); - bcast.y_batch_size = TensorShape(bcast.y_batch_shape).NumElements(); - return std::move(bcast); - } - + std::vector inv_x = x; std::vector inv_y = y; - BCast::reverse(inv_x); - BCast::reverse(inv_y); + BCast::reverse (inv_x); + BCast::reverse (inv_y); // 1-extend and align all vectors. - inv_x.resize(lagest_batch_rank, 1); - inv_y.resize(lagest_batch_rank, 1); + inv_x.resize (lagest_batch_rank, 1); + inv_y.resize (lagest_batch_rank, 1); // going through each dimension starting from the inner-most // dimension, compares dimension of x and y. They are compatible if @@ -94,161 +102,184 @@ static BCast prepare_bcast( bool x_prev_is_one = false, y_prev_is_one = false; bool x_current_is_one = false, y_current_is_one = false; bool output_dim_set = false, none_is_one = true, set_one = false; - for (int ii = 0; ii < lagest_batch_rank; ii++) { - // Pre condition setting - output_dim = -1; - output_dim_set = false; - none_is_one = true; - if (inv_x[ii] == 1) { - none_is_one = false; - x_current_is_one = true; - } - else { - x_current_is_one = false; - output_dim = inv_x[ii]; - output_dim_set = true; + for (int ii = 0; ii < lagest_batch_rank; ii++) + { + // Pre condition setting + output_dim = -1; + output_dim_set = false; + none_is_one = true; + if (inv_x[ii] == 1) + { + none_is_one = false; + x_current_is_one = true; + } + else + { + x_current_is_one = false; + output_dim = inv_x[ii]; + output_dim_set = true; + } + if (inv_x[ii] == 1) + { + none_is_one = false; + y_current_is_one = true; + } + else + { + y_current_is_one = false; + if (!output_dim_set || output_dim == inv_y[ii]) + { + output_dim = inv_y[ii]; + output_dim_set = true; + } + else + { + bcast.valid = false; + return std::move (bcast); + } + } + bcast.z_batch_shape.push_back (output_dim_set ? output_dim : 1); + bcast.z_batch_size = mul_dims (bcast.z_batch_size, bcast.z_batch_shape.back ()); + + // All dimensions are 1 + if (!output_dim_set) + { + // This will skip updating the previous state to the current one. We'll + // explain why this is safe below. + // Consider the previous state P, current state C and the next state N. + // In the case where N also is all ones (N == C), we'll do the same + // optimization here (push back one dimensions if we need to), which is + // safe and is expected. + // + // When N != C, we'll continue as usual. However, we might trigger the + // next block if N == P (because we didn't update the previous state). + // We trigger the next block if `fewer_dims_optimization` is true. + // This means that we did not modify and broadcast / reshapes in this + // block (we skipped updating, since the one dimensions can be ignored). + // In essence, we only need to check whether the previous non-one state is + // equal to the current non-one state. + continue; + } + else if (x_current_is_one == x_prev_is_one && y_current_is_one == y_prev_is_one && set_one) + { + // fewer_dims_optimization + // If the previous state is the same as the current state, we can skip + // broadcasting / reshaping. This is because we can ignore dimensions of + // size 1. This is safe because we know that the previous state is not + // all ones (otherwise we would have continued in the previous block). + + // It is a run of the same broadcasting case as last time. + // We can reshape the input so that fewer dimensions + // are involved in the intermediate computation. + bcast.x_batch_shape.back () = mul_dims (bcast.x_batch_shape.back (), inv_x[ii]); + bcast.y_batch_shape.back () = mul_dims (bcast.y_batch_shape.back (), inv_y[ii]); + + bcast.x_bcast_shape.back () + = mul_dims (bcast.x_bcast_shape.back (), x_current_is_one ? output_dim : 1); + bcast.y_bcast_shape.back () + = mul_dims (bcast.y_bcast_shape.back (), y_current_is_one ? output_dim : 1); + } + else + { + bcast.x_batch_shape.push_back (inv_x[ii]); + bcast.y_batch_shape.push_back (inv_y[ii]); + + bcast.x_bcast_shape.push_back (x_current_is_one ? output_dim : 1); + bcast.y_bcast_shape.push_back (y_current_is_one ? output_dim : 1); + } + set_one = true; + x_prev_is_one = x_current_is_one; + y_prev_is_one = y_current_is_one; } - if (inv_x[ii] == 1) { - none_is_one = false; - y_current_is_one = true; + if (bcast.x_batch_shape.empty ()) + { + bcast.x_batch_shape.push_back (1); + bcast.x_bcast_shape.push_back (1); } - else { - y_current_is_one = false; - if (!output_dim_set || output_dim == inv_y[ii]) { - output_dim = inv_y[ii]; - output_dim_set = true; - } - else { - bcast.valid = false; - return std::move(bcast); - } + if (bcast.y_batch_shape.empty ()) + { + bcast.y_batch_shape.push_back (1); + bcast.y_bcast_shape.push_back (1); } - bcast.z_batch_shape.push_back(output_dim_set ? output_dim : 1); - bcast.z_batch_size = mul_dims(bcast.z_batch_size, bcast.z_batch_shape.back()); - - // All dimensions are 1 - if (!output_dim_set) { - // This will skip updating the previous state to the current one. We'll - // explain why this is safe below. - // Consider the previous state P, current state C and the next state N. - // In the case where N also is all ones (N == C), we'll do the same - // optimization here (push back one dimensions if we need to), which is - // safe and is expected. - // - // When N != C, we'll continue as usual. However, we might trigger the - // next block if N == P (because we didn't update the previous state). - // We trigger the next block if `fewer_dims_optimization` is true. - // This means that we did not modify and broadcast / reshapes in this - // block (we skipped updating, since the one dimensions can be ignored). - // In essence, we only need to check whether the previous non-one state is - // equal to the current non-one state. - continue; - } - else if (x_current_is_one == x_prev_is_one && y_current_is_one == y_prev_is_one && set_one) { - // fewer_dims_optimization - // If the previous state is the same as the current state, we can skip - // broadcasting / reshaping. This is because we can ignore dimensions of - // size 1. This is safe because we know that the previous state is not - // all ones (otherwise we would have continued in the previous block). - - // It is a run of the same broadcasting case as last time. - // We can reshape the input so that fewer dimensions - // are involved in the intermediate computation. - bcast.x_batch_shape.back() = mul_dims(bcast.x_batch_shape.back(), inv_x[ii]); - bcast.y_batch_shape.back() = mul_dims(bcast.y_batch_shape.back(), inv_y[ii]); - - bcast.x_bcast_shape.back() = mul_dims(bcast.x_bcast_shape.back(), x_current_is_one ? output_dim : 1); - bcast.y_bcast_shape.back() = mul_dims(bcast.y_bcast_shape.back(), y_current_is_one ? output_dim : 1); - } - else { - bcast.x_batch_shape.push_back(inv_x[ii]); - bcast.y_batch_shape.push_back(inv_y[ii]); - - bcast.x_bcast_shape.push_back(x_current_is_one ? output_dim : 1); - bcast.y_bcast_shape.push_back(y_current_is_one ? output_dim : 1); - } - set_one = true; - x_prev_is_one = x_current_is_one; - y_prev_is_one = y_current_is_one; - } - if (bcast.x_batch_shape.empty()) { - bcast.x_batch_shape.push_back(1); - bcast.x_bcast_shape.push_back(1); - } - if (bcast.y_batch_shape.empty()) { - bcast.y_batch_shape.push_back(1); - bcast.y_bcast_shape.push_back(1); - } // Do something about batches - BCast::reverse(bcast.x_batch_shape); - BCast::reverse(bcast.x_bcast_shape); - BCast::reverse(bcast.y_batch_shape); - BCast::reverse(bcast.y_bcast_shape); - BCast::reverse(bcast.z_batch_shape); + BCast::reverse (bcast.x_batch_shape); + BCast::reverse (bcast.x_bcast_shape); + BCast::reverse (bcast.y_batch_shape); + BCast::reverse (bcast.y_bcast_shape); + BCast::reverse (bcast.z_batch_shape); // Init batch_sizes - bcast.x_batch_size = TensorShape(bcast.x_batch_shape).NumElements(); - bcast.y_batch_size = TensorShape(bcast.y_batch_shape).NumElements(); - return std::move(bcast); + bcast.x_batch_size = TensorShape (bcast.x_batch_shape).NumElements (); + bcast.y_batch_size = TensorShape (bcast.y_batch_shape).NumElements (); + return std::move (bcast); } -static inline int64_t IPow(int64_t base, int64_t exponent) { +static inline int64_t + IPow (int64_t base, int64_t exponent) +{ int64_t result = 1; - for (int64_t ii = 0; ii < exponent; ++ii) { - result *= base; - } + for (int64_t ii = 0; ii < exponent; ++ii) + { + result *= base; + } return result; } // Returns a reshaped input Tensor. The underlying buffer is not copied. -// Note: This method will not allocate memory for output tensor, +// Note: This method will not allocate memory for output tensor, // intead use the reference of the input buffer. -static inline bool CopyFrom(const Tensor& input, const TensorShape& shape, Tensor* output) { - return output->CopyFrom(input, shape); +static inline bool + CopyFrom (const Tensor& input, const TensorShape& shape, Tensor* output) +{ + return output->CopyFrom (input, shape); } // Note: This method will allocate memory with the given shape for output tensor, // also keep the data_type, device_type consistant with the input tensor. -static inline bool CopyFromWithAllocate(const Tensor& input, const TensorShape& shape, Tensor* output) { - return output->AllocateFrom(input, shape); +static inline bool + CopyFromWithAllocate (const Tensor& input, const TensorShape& shape, Tensor* output) +{ + return output->AllocateFrom (input, shape); } // Reshapes a Tensor of shape [b0,b1...bk,N,M] to [prod(b0,b1...bk),N,M]. -static bool ReshapeToRank3(Tensor& input, int batch_size, Tensor& output) +static bool + ReshapeToRank3 (Tensor& input, int batch_size, Tensor& output) { - const int rank = input.shape().ndim(); - TensorShape output_shape = {batch_size, input.shape().dim_size(rank - 2), - input.shape().dim_size(rank - 1)}; - return CopyFrom(input, output_shape, &output); + const int rank = input.shape ().ndim (); + TensorShape output_shape = {batch_size, input.shape ().dim_size (rank - 2), input.shape ().dim_size (rank - 1)}; + return CopyFrom (input, output_shape, &output); } template -static inline bool all_of( - const std::vector& vec, - const std::function& predicate) +static inline bool + all_of (const std::vector& vec, const std::function& predicate) { - for (const auto& element : vec) { - if (!predicate(element)) { - return false; + for (const auto& element: vec) + { + if (!predicate (element)) + { + return false; + } } - } return true; } // If there are repeated labels in either the input or output, then this // strides the input (e.g. iii->i) or inflates it (e.g. i->iii), respectively. -static bool StrideOrInflateOperand( - Tensor& input, - const std::vector& labels, - const std::vector& label_counts, - const bool should_inflate, - Tensor& output) // output is the result of stride or inflate +static bool + StrideOrInflateOperand (Tensor& input, + const std::vector& labels, + const std::vector& label_counts, + const bool should_inflate, + Tensor& output) // output is the result of stride or inflate { // Return early if there are no repeated indices. - if (all_of(label_counts, [](int var) {return var <= 1;})) { - return CopyFrom(input, input.shape(), &output); - } + if (all_of (label_counts, [] (int var) { return var <= 1; })) + { + return CopyFrom (input, input.shape (), &output); + } // We reshape so that each repeated label is compressed to one dimension. // E.g. For iiij -> ij, The shape [3, 3, 3, 5] would be compressed to [27, @@ -261,89 +292,92 @@ static bool StrideOrInflateOperand( // false). E.g. they are [3, 5] and [3, 3, 3, 5] in the above example. std::vector strided_shape = {}; std::vector inflated_shape = {}; - for (int label : labels) { - const int count = label_counts[label]; - const int current_axis = static_cast( - should_inflate ? strided_shape.size() : inflated_shape.size()); - const int64_t dim = input.shape().dim_size(current_axis); - strided_shape.push_back(dim); - inflated_shape.insert(inflated_shape.end(), count, dim); - const int64_t reshape_dim = IPow(dim, count); - reshape.add_dim(reshape_dim); - // While taking the d-diagonal in a rank k Tensor, we take d - // equally-spaced elements including the first and last element. Then, (k - // - 1) * stride = d^k - 1, or, stride = (d^k - 1)/(d - 1). - const int64_t stride = - (dim > 1 && count > 1) ? (reshape_dim - 1) / (dim - 1) : 1; - strides.add_dim(stride); - } + for (int label: labels) + { + const int count = label_counts[label]; + const int current_axis = static_cast (should_inflate ? strided_shape.size () : inflated_shape.size ()); + const int64_t dim = input.shape ().dim_size (current_axis); + strided_shape.push_back (dim); + inflated_shape.insert (inflated_shape.end (), count, dim); + const int64_t reshape_dim = IPow (dim, count); + reshape.add_dim (reshape_dim); + // While taking the d-diagonal in a rank k Tensor, we take d + // equally-spaced elements including the first and last element. Then, (k + // - 1) * stride = d^k - 1, or, stride = (d^k - 1)/(d - 1). + const int64_t stride = (dim > 1 && count > 1) ? (reshape_dim - 1) / (dim - 1) : 1; + strides.add_dim (stride); + } - TensorShape output_shape = - TensorShape(should_inflate ? inflated_shape : strided_shape); + TensorShape output_shape = TensorShape (should_inflate ? inflated_shape : strided_shape); // Also allocate memory for the output tensor with the given shape. - CopyFromWithAllocate(input, output_shape, &output); - - if (should_inflate) { - Tensor output_reshaped = output.shaped(reshape); - op::inflate_op()(input.shaped(strided_shape), strides.dims(), output_reshaped); - } - else { - op::stride_op()(input.shaped(reshape), strides.dims(), output); - } + CopyFromWithAllocate (input, output_shape, &output); - return true; + if (should_inflate) + { + Tensor output_reshaped = output.shaped (reshape); + op::inflate_op () (input.shaped (strided_shape), strides.dims (), output_reshaped); + } + else + { + op::stride_op () (input.shaped (reshape), strides.dims (), output); + } + + return true; } // Permutes the labels according to the given permutation. -static void PermuteLabels( - const std::vector& permutation, - std::vector& labels) +static void + PermuteLabels (const std::vector& permutation, std::vector& labels) { - auto num_labels = labels.size(); - std::vector permuted_labels(num_labels, 0); - for (int ii = 0; ii < num_labels; ii++) { - permuted_labels[ii] = labels[permutation[ii]]; - } - labels.swap(permuted_labels); + auto num_labels = labels.size (); + std::vector permuted_labels (num_labels, 0); + for (int ii = 0; ii < num_labels; ii++) + { + permuted_labels[ii] = labels[permutation[ii]]; + } + labels.swap (permuted_labels); } // Returns whether transposing would be a no-op; whether input has rank < 2 or // the permutation is the identity permutation. -static bool ShouldTranspose( - const TensorShape& input_shape, - const std::vector& permutation) +static bool + ShouldTranspose (const TensorShape& input_shape, const std::vector& permutation) { - if (input_shape.ndim() < 2) return false; - for (int ii = 0; ii < permutation.size(); ++ii) { - if (permutation[ii] != ii) return true; - } + if (input_shape.ndim () < 2) + return false; + for (int ii = 0; ii < permutation.size (); ++ii) + { + if (permutation[ii] != ii) + return true; + } return false; } // Transpose the input given a permutation. Returns a reference to the input // if transposing is not necessary. -static bool TransposeOperand( - const Tensor& input, - const std::vector& permutation, - Tensor& output) +static bool + TransposeOperand (const Tensor& input, const std::vector& permutation, Tensor& output) { - if (!ShouldTranspose(input.shape(), permutation)) { - return CopyFrom(input, input.shape(), &output); - } + if (!ShouldTranspose (input.shape (), permutation)) + { + return CopyFrom (input, input.shape (), &output); + } TensorShape transposed_shape; - for (int ii = 0; ii < input.shape().ndim(); ++ii) { - transposed_shape.add_dim(input.shape().dim_size(permutation[ii])); - } + for (int ii = 0; ii < input.shape ().ndim (); ++ii) + { + transposed_shape.add_dim (input.shape ().dim_size (permutation[ii])); + } // For empty Tensors, just change the shape. E.g. we may need to transpose // from shape [1, 0, 5] to [5, 1, 0]. - if (input.NumElements() == 0) { - return CopyFrom(input, input.shape(), &output); - } + if (input.NumElements () == 0) + { + return CopyFrom (input, input.shape (), &output); + } // Note: Allocate memory for the output tensor first. - CopyFromWithAllocate(input, transposed_shape, &output); + CopyFromWithAllocate (input, transposed_shape, &output); // Then transpose the input tensor. - op::transpose_op()(input, permutation, output); + op::transpose_op () (input, permutation, output); return true; } @@ -351,51 +385,51 @@ static bool TransposeOperand( // Returns true if the input dimensions are already sorted in the order // [broadcasting, batch, contract, free, reduce]. Used to implement an optimization to avoid // an extra transpose and instead uses (conj_x and conj_y) in BatchMatMul. -static bool ShouldSwapFreeAndContract( - const std::vector& labels, - const std::vector& label_types) +static bool + ShouldSwapFreeAndContract (const std::vector& labels, const std::vector& label_types) { // Check that ordering is according to dimension type, with the role of // free and contract dimensions swapped. std::array remap = {0, 1, 3, 2, 4}; - for (int ii = 0; ii < labels.size() - 1; ii++) { - const int dimtype_a = remap[label_types[labels[ii]]]; - const int dimtype_b = remap[label_types[labels[ii + 1]]]; - if (dimtype_a > dimtype_b || (dimtype_a == dimtype_b && labels[ii] > labels[ii + 1])) { - return false; + for (int ii = 0; ii < labels.size () - 1; ii++) + { + const int dimtype_a = remap[label_types[labels[ii]]]; + const int dimtype_b = remap[label_types[labels[ii + 1]]]; + if (dimtype_a > dimtype_b || (dimtype_a == dimtype_b && labels[ii] > labels[ii + 1])) + { + return false; + } } - } return true; } // Insert new (unnamed) broadcasting labels at the location of ellipsis. -static void InsertBroadcastLabels( - int num_bcast_labels, - int num_named_labels, - int ellipsis_idx, - std::vector& labels, - std::vector& label_counts) +static void + InsertBroadcastLabels (int num_bcast_labels, + int num_named_labels, + int ellipsis_idx, + std::vector& labels, + std::vector& label_counts) { - labels.erase(labels.begin() + ellipsis_idx); - labels.insert(labels.begin() + ellipsis_idx, num_bcast_labels, 0); + labels.erase (labels.begin () + ellipsis_idx); + labels.insert (labels.begin () + ellipsis_idx, num_bcast_labels, 0); // Does the padding ellipsis overlap with any named labels? // Insert new labels at the location of ellipsis. // Now I understand finally! // Start from the num_named_labels, and insert num_bcast_labels // These broadcasting labels are not overlapped with the named labels - std::iota(labels.begin() + ellipsis_idx, - labels.begin() + ellipsis_idx + num_bcast_labels, - num_named_labels); + std::iota (labels.begin () + ellipsis_idx, labels.begin () + ellipsis_idx + num_bcast_labels, num_named_labels); - label_counts.resize(num_named_labels + num_bcast_labels, 1); + label_counts.resize (num_named_labels + num_bcast_labels, 1); } // Returns the EinsumDimensionType given whether the corresponding label is // present in exactly one input subscript (is_unique) and whether it is absent // from the output subscripts (is_removed). Does not handle broadcasting // dimensions. -static EinsumDimensionType GetDimensionType(bool is_removed, bool is_unique) +static EinsumDimensionType + GetDimensionType (bool is_removed, bool is_unique) { if (!is_removed && !is_unique) return kBatch; @@ -403,332 +437,378 @@ static EinsumDimensionType GetDimensionType(bool is_removed, bool is_unique) return kFree; else if (is_removed && !is_unique) return kContract; - else // is_removed && is_unique + else // is_removed && is_unique return kReduce; } // Maps the character labels to consecutive integers. -static void MapToLabels(const std::string& subscript, std::vector& labels, - std::unordered_map& label_mapping) +static void + MapToLabels (const std::string& subscript, std::vector& labels, std::unordered_map& label_mapping) { - for (int ii = 0; ii < subscript.size(); ii++) { - const char label_char = subscript[ii]; - if (label_char == '.') { - // Ellipsis is a special case. - if (subscript[ii + 1] != '.' || subscript[ii + 2] != '.') { - throw std::invalid_argument("Invalid ellipsis in subscript: " + subscript); - } - labels.push_back(kEllipsisLabel); - ii += 2; // Skip next 2 characters as well. - continue; - } - // Check that the label is a valid character. - // Don't worry about the ellipsis character as it is handled above. - if (label_mapping.find(label_char) == label_mapping.end()) { - const int next_label = label_mapping.size(); - label_mapping[label_char] = next_label; + for (int ii = 0; ii < subscript.size (); ii++) + { + const char label_char = subscript[ii]; + if (label_char == '.') + { + // Ellipsis is a special case. + if (subscript[ii + 1] != '.' || subscript[ii + 2] != '.') + { + throw std::invalid_argument ("Invalid ellipsis in subscript: " + subscript); + } + labels.push_back (kEllipsisLabel); + ii += 2; // Skip next 2 characters as well. + continue; + } + // Check that the label is a valid character. + // Don't worry about the ellipsis character as it is handled above. + if (label_mapping.find (label_char) == label_mapping.end ()) + { + const int next_label = label_mapping.size (); + label_mapping[label_char] = next_label; + } + // Map the label to an integer. + const int mapped_label = label_mapping[label_char]; + labels.push_back (mapped_label); } - // Map the label to an integer. - const int mapped_label = label_mapping[label_char]; - labels.push_back(mapped_label); - } // Check the number of ellipsis. - if (std::count(labels.begin(), labels.end(), kEllipsisLabel) > 1) { - throw std::invalid_argument("More than one ellipsis in subscript: " + subscript); - } + if (std::count (labels.begin (), labels.end (), kEllipsisLabel) > 1) + { + throw std::invalid_argument ("More than one ellipsis in subscript: " + subscript); + } } /// Check the validation of the input equations -bool ValidateEinsumEquation( - const std::string& equation, - std::vector& input_subscripts, - std::string& output_subscript) +bool + ValidateEinsumEquation (const std::string& equation, + std::vector& input_subscripts, + std::string& output_subscript) { // Part 1: Check the equation's validation - if (equation.empty()) { - throw std::invalid_argument("Empty einsum equation"); - } + if (equation.empty ()) + { + throw std::invalid_argument ("Empty einsum equation"); + } // Part 2: Remove the white space in the equation std::string equation_no_space; - for (const char c : equation) { - if (c != ' ') { - equation_no_space.push_back(c); + for (const char c: equation) + { + if (c != ' ') + { + equation_no_space.push_back (c); + } } - } // Part 3: Check the "->" flag std::vector inputs_and_output_subscripts; - auto delimiter_pos = equation_no_space.find("->"); - if (delimiter_pos == std::string::npos) { - throw std::invalid_argument("No '->' in einsum equation: " + equation_no_space); - } - else if (equation_no_space.find("->", delimiter_pos + 1) != std::string::npos) { - throw std::invalid_argument("Expecting exactly one '->' in einsum equation: " + equation_no_space); - } - inputs_and_output_subscripts.push_back(equation_no_space.substr(0, delimiter_pos)); - inputs_and_output_subscripts.push_back(equation_no_space.substr(delimiter_pos + 2)); + auto delimiter_pos = equation_no_space.find ("->"); + if (delimiter_pos == std::string::npos) + { + throw std::invalid_argument ("No '->' in einsum equation: " + equation_no_space); + } + else if (equation_no_space.find ("->", delimiter_pos + 1) != std::string::npos) + { + throw std::invalid_argument ("Expecting exactly one '->' in einsum equation: " + equation_no_space); + } + inputs_and_output_subscripts.push_back (equation_no_space.substr (0, delimiter_pos)); + inputs_and_output_subscripts.push_back (equation_no_space.substr (delimiter_pos + 2)); - output_subscript = std::move(inputs_and_output_subscripts[1]); + output_subscript = std::move (inputs_and_output_subscripts[1]); // Part 4: Address the comma in the input subscripts - auto comma_pos = inputs_and_output_subscripts[0].find(','); - while (comma_pos != std::string::npos) { - input_subscripts.push_back(inputs_and_output_subscripts[0].substr(0, comma_pos)); - inputs_and_output_subscripts[0] = inputs_and_output_subscripts[0].substr(comma_pos + 1); - comma_pos = inputs_and_output_subscripts[0].find(','); - } - input_subscripts.push_back(inputs_and_output_subscripts[0]); + auto comma_pos = inputs_and_output_subscripts[0].find (','); + while (comma_pos != std::string::npos) + { + input_subscripts.push_back (inputs_and_output_subscripts[0].substr (0, comma_pos)); + inputs_and_output_subscripts[0] = inputs_and_output_subscripts[0].substr (comma_pos + 1); + comma_pos = inputs_and_output_subscripts[0].find (','); + } + input_subscripts.push_back (inputs_and_output_subscripts[0]); - if (input_subscripts.size() != 1 && input_subscripts.size() != 2) { - throw std::invalid_argument("Expecting 1 or 2 input subscripts in equation '" + equation_no_space + - "' but got: " + std::to_string(input_subscripts.size())); - } + if (input_subscripts.size () != 1 && input_subscripts.size () != 2) + { + throw std::invalid_argument ("Expecting 1 or 2 input subscripts in equation '" + equation_no_space + + "' but got: " + std::to_string (input_subscripts.size ())); + } // Part 5: Check the characters in the equation - std::regex pattern("[^a-zA-Z\\.]+"); - if (input_subscripts.size() == 2) { - if (std::regex_search(input_subscripts[0] + input_subscripts[1] + output_subscript, pattern)) { - throw std::invalid_argument("Invalid character in einsum equation: " + equation); + std::regex pattern ("[^a-zA-Z\\.]+"); + if (input_subscripts.size () == 2) + { + if (std::regex_search (input_subscripts[0] + input_subscripts[1] + output_subscript, pattern)) + { + throw std::invalid_argument ("Invalid character in einsum equation: " + equation); + } } - } - else if (input_subscripts.size() == 1) { - if (std::regex_search(input_subscripts[0] + output_subscript, pattern)) { - throw std::invalid_argument("Invalid character in einsum equation: " + equation); + else if (input_subscripts.size () == 1) + { + if (std::regex_search (input_subscripts[0] + output_subscript, pattern)) + { + throw std::invalid_argument ("Invalid character in einsum equation: " + equation); + } + } + else + { + throw std::invalid_argument ("Invalid einsum equation: " + equation); } - } - else { - throw std::invalid_argument("Invalid einsum equation: " + equation); - } return true; } // Preprocessing for the input equation expr -bool ParseEinsumEquation( - const std::string& equation, - std::vector& label_types, - std::vector>& input_labels, - std::vector& output_labels, - std::vector>& input_label_counts, - std::vector& output_label_counts, - std::vector& input_has_ellipsis, - bool& output_has_ellipsis) +bool + ParseEinsumEquation (const std::string& equation, + std::vector& label_types, + std::vector>& input_labels, + std::vector& output_labels, + std::vector>& input_label_counts, + std::vector& output_label_counts, + std::vector& input_has_ellipsis, + bool& output_has_ellipsis) { // Check the equation's validation std::vector input_str; std::string output_str; - if (!ValidateEinsumEquation(equation, input_str, output_str)) { - return false; - } + if (!ValidateEinsumEquation (equation, input_str, output_str)) + { + return false; + } std::unordered_map label_mapping; - int num_inputs = input_str.size(); - input_labels.resize(num_inputs); + int num_inputs = input_str.size (); + input_labels.resize (num_inputs); // Map from single characters to integer labels. - // Labels that are identical in the output equation and distinct input equations are assigned the same integer mapping. - for (int ii = 0; ii < num_inputs; ii++) { - MapToLabels(input_str[ii], input_labels[ii], label_mapping); - } - MapToLabels(output_str, output_labels, label_mapping); + // Labels that are identical in the output equation and distinct input equations are assigned the same integer + // mapping. + for (int ii = 0; ii < num_inputs; ii++) + { + MapToLabels (input_str[ii], input_labels[ii], label_mapping); + } + MapToLabels (output_str, output_labels, label_mapping); // Compute counts for input and output labels. - int num_labels = label_mapping.size(); - input_label_counts.resize(num_inputs); - input_has_ellipsis.resize(num_inputs); - for (int ii = 0; ii < num_inputs; ii++) { - input_label_counts[ii].resize(num_labels, 0); - input_has_ellipsis[ii] = false; - for (const int label : input_labels[ii]) { - if (label != kEllipsisLabel) { - input_label_counts[ii][label] += 1; - } else { - input_has_ellipsis[ii] = true; - } + int num_labels = label_mapping.size (); + input_label_counts.resize (num_inputs); + input_has_ellipsis.resize (num_inputs); + for (int ii = 0; ii < num_inputs; ii++) + { + input_label_counts[ii].resize (num_labels, 0); + input_has_ellipsis[ii] = false; + for (const int label: input_labels[ii]) + { + if (label != kEllipsisLabel) + { + input_label_counts[ii][label] += 1; + } + else + { + input_has_ellipsis[ii] = true; + } + } } - } - output_label_counts.resize(num_labels, 0); + output_label_counts.resize (num_labels, 0); output_has_ellipsis = false; - for (const int label : output_labels) { - if (label != kEllipsisLabel) { - output_label_counts[label] += 1; - } else { - output_has_ellipsis = true; + for (const int label: output_labels) + { + if (label != kEllipsisLabel) + { + output_label_counts[label] += 1; + } + else + { + output_has_ellipsis = true; + } } - } // Map each label to a unique EinsumDimensionType. - label_types.resize(num_labels); - for (int label = 0; label < num_labels; label++) { - // if (label == kEllipsisLabel) continue; Not necessary here. - bool removed = output_label_counts[label] == 0; - bool unique = num_inputs == 1 || input_label_counts[0][label] == 0 || - input_label_counts[1][label] == 0; - label_types[label] = GetDimensionType(removed, unique); - } + label_types.resize (num_labels); + for (int label = 0; label < num_labels; label++) + { + // if (label == kEllipsisLabel) continue; Not necessary here. + bool removed = output_label_counts[label] == 0; + bool unique = num_inputs == 1 || input_label_counts[0][label] == 0 || input_label_counts[1][label] == 0; + label_types[label] = GetDimensionType (removed, unique); + } return true; } // Records the dimension size for the given label. Checks that the dimension -bool RecordLabelToDimension(const int label, const int axis, const Tensor& input, - std::unordered_map& label_to_dim_sizes) { - const int64_t input_dim = input.shape().dim_size(axis); +bool + RecordLabelToDimension (const int label, + const int axis, + const Tensor& input, + std::unordered_map& label_to_dim_sizes) +{ + const int64_t input_dim = input.shape ().dim_size (axis); auto& label_dim = label_to_dim_sizes[label]; - if (label_dim != 0 && label_dim != input_dim) { - throw std::invalid_argument( - "Expected dimension " + std::to_string(label_to_dim_sizes[label]) + " at axis " + - std::to_string(axis) + " of the input shaped " + - " but got dimension " + std::to_string(input_dim)); - } + if (label_dim != 0 && label_dim != input_dim) + { + throw std::invalid_argument ("Expected dimension " + std::to_string (label_to_dim_sizes[label]) + + " at axis " + std::to_string (axis) + " of the input shaped " + + " but got dimension " + std::to_string (input_dim)); + } label_to_dim_sizes[label] = input_dim; return true; } // Validate input dimensions and populate unnamed labels and their label counts. // Also populate the mapping from named labels to their dimension sizes. -bool ProcessDimensions( - const std::vector& inputs, - std::vector& label_types, - std::vector>& input_labels, - std::vector& output_labels, - std::vector>& input_label_counts, - std::vector& output_label_counts, - const std::vector& input_has_ellipsis, - const bool output_has_ellipsis, - std::unordered_map& label_to_dim_sizes) +bool + ProcessDimensions (const std::vector& inputs, + std::vector& label_types, + std::vector>& input_labels, + std::vector& output_labels, + std::vector>& input_label_counts, + std::vector& output_label_counts, + const std::vector& input_has_ellipsis, + const bool output_has_ellipsis, + std::unordered_map& label_to_dim_sizes) { - const int num_inputs = inputs.size(); - const int num_labels = label_types.size(); + const int num_inputs = inputs.size (); + const int num_labels = label_types.size (); int max_bcast_dims = 0; // Check that the number of dimensions match for each label. - for (int ii = 0; ii < num_inputs; ii++) { - const Tensor& input = *inputs[ii]; - const int num_dims = input.shape().ndim(); - const std::vector& labels = input_labels[ii]; - const std::vector& label_counts = input_label_counts[ii]; - bool has_ellipsis = input_has_ellipsis[ii]; - // We infer the number of broadcasting dimensions by taking the maximum rank - // among the broadcasting subshapes of the input. - if (!has_ellipsis) { - // If there is no ellipsis, the number of dimensions must match the number - if (num_dims != labels.size()) { - throw std::invalid_argument( - "Input " + std::to_string(ii) + " has " + std::to_string(num_dims) + - " dimensions but got" + std::to_string(num_labels) + " labels"); - } - for (int label_idx = 0; label_idx < labels.size(); label_idx++) { - const int label = labels[label_idx]; - // if (label == kEllipsisLabel) continue; - // Double counting of labels is allowed. No need to check. - // if (label_counts[label] > 1) { - // throw std::invalid_argument( - // "Label " + std::to_string(label) + " appears more than once in input " + - // std::to_string(ii)); - // } - RecordLabelToDimension(label, label_idx, input, label_to_dim_sizes); - } - continue; - } - - // Input has an ellipsis. - // There are two cases: - // 1. The ellipsis shadows at least one label: num_dims >= labels.size(). - // 2. The ellipsis shadows no labels: num_dims == labels.size() - 1. - // In both cases, num_dims >= labels.size() - 1. - if (num_dims < labels.size() - 1) { - throw std::invalid_argument( - "Input " + std::to_string(ii) + " has " + std::to_string(num_dims) + - " dimensions but " + std::to_string(num_labels) + " labels"); - } - int ellipsis_idx = -1; - // Number of shadowed labels = num_dims - (labels.size() - 1). - const int num_bcast_labels = num_dims - labels.size() + 1; - for (int label_idx = 0; label_idx < labels.size(); label_idx++) { - const int label = labels[label_idx]; - // Find the ellipsis axis. - if (label == kEllipsisLabel) { - ellipsis_idx = label_idx; - continue; - } - // Current label is not an ellipsis. + for (int ii = 0; ii < num_inputs; ii++) + { + const Tensor& input = *inputs[ii]; + const int num_dims = input.shape ().ndim (); + const std::vector& labels = input_labels[ii]; + const std::vector& label_counts = input_label_counts[ii]; + bool has_ellipsis = input_has_ellipsis[ii]; + // We infer the number of broadcasting dimensions by taking the maximum rank + // among the broadcasting subshapes of the input. + if (!has_ellipsis) + { + // If there is no ellipsis, the number of dimensions must match the number + if (num_dims != labels.size ()) + { + throw std::invalid_argument ("Input " + std::to_string (ii) + " has " + + std::to_string (num_dims) + " dimensions but got" + + std::to_string (num_labels) + " labels"); + } + for (int label_idx = 0; label_idx < labels.size (); label_idx++) + { + const int label = labels[label_idx]; + // if (label == kEllipsisLabel) continue; + // Double counting of labels is allowed. No need to check. + // if (label_counts[label] > 1) { + // throw std::invalid_argument( + // "Label " + std::to_string(label) + " appears more than once in input " + + // std::to_string(ii)); + // } + RecordLabelToDimension (label, label_idx, input, label_to_dim_sizes); + } + continue; + } + + // Input has an ellipsis. // There are two cases: - // 1. The ellipsis axis is not found yet: axis = label_idx. - // 2. The ellipsis axis is found: axis = label_idx - 1 + num_bcast_labels. - const int axis = label_idx + (ellipsis_idx == -1 ? 0 : num_bcast_labels - 1); - RecordLabelToDimension(label, axis, input, label_to_dim_sizes); + // 1. The ellipsis shadows at least one label: num_dims >= labels.size(). + // 2. The ellipsis shadows no labels: num_dims == labels.size() - 1. + // In both cases, num_dims >= labels.size() - 1. + if (num_dims < labels.size () - 1) + { + throw std::invalid_argument ("Input " + std::to_string (ii) + " has " + std::to_string (num_dims) + + " dimensions but " + std::to_string (num_labels) + " labels"); + } + int ellipsis_idx = -1; + // Number of shadowed labels = num_dims - (labels.size() - 1). + const int num_bcast_labels = num_dims - labels.size () + 1; + for (int label_idx = 0; label_idx < labels.size (); label_idx++) + { + const int label = labels[label_idx]; + // Find the ellipsis axis. + if (label == kEllipsisLabel) + { + ellipsis_idx = label_idx; + continue; + } + // Current label is not an ellipsis. + // There are two cases: + // 1. The ellipsis axis is not found yet: axis = label_idx. + // 2. The ellipsis axis is found: axis = label_idx - 1 + num_bcast_labels. + const int axis = label_idx + (ellipsis_idx == -1 ? 0 : num_bcast_labels - 1); + RecordLabelToDimension (label, axis, input, label_to_dim_sizes); + } + // Found an ellipsis. Replace it with the appropriate number of broadcasting + // labels. + if (ellipsis_idx != -1) + { + InsertBroadcastLabels (num_bcast_labels, + num_labels, + ellipsis_idx, + input_labels[ii], + input_label_counts[ii]); + max_bcast_dims = std::max (max_bcast_dims, num_bcast_labels); + } } - // Found an ellipsis. Replace it with the appropriate number of broadcasting - // labels. - if (ellipsis_idx != -1) { - InsertBroadcastLabels(num_bcast_labels, num_labels, ellipsis_idx, input_labels[ii], - input_label_counts[ii]); - max_bcast_dims = std::max(max_bcast_dims, num_bcast_labels); + if (!input_has_ellipsis[0] && (input_has_ellipsis.size () == 1 || !input_has_ellipsis[1]) && !output_has_ellipsis) + { + return true; } - } - if (!input_has_ellipsis[0] - && (input_has_ellipsis.size() == 1 || !input_has_ellipsis[1]) - && !output_has_ellipsis) - { - return true; - } // Insert broadcasting labels into the output labels. - auto it = std::find(output_labels.begin(), output_labels.end(), kEllipsisLabel); - if (it != output_labels.end()) { - const int ellipsis_idx = it - output_labels.begin(); - InsertBroadcastLabels(max_bcast_dims, num_labels, ellipsis_idx, - output_labels, output_label_counts); - } else if (max_bcast_dims > 0) { - throw std::invalid_argument( - "Output has no ellipsis but input has ellipsis. Cannot insert broadcasting labels."); - } + auto it = std::find (output_labels.begin (), output_labels.end (), kEllipsisLabel); + if (it != output_labels.end ()) + { + const int ellipsis_idx = it - output_labels.begin (); + InsertBroadcastLabels (max_bcast_dims, num_labels, ellipsis_idx, output_labels, output_label_counts); + } + else if (max_bcast_dims > 0) + { + throw std::invalid_argument ( + "Output has no ellipsis but input has ellipsis. Cannot insert broadcasting labels."); + } // Polupate EinsumDimensionType for the new broadcasting labels. - label_types.resize(num_labels + max_bcast_dims, EinsumDimensionType::kBroadcasting); + label_types.resize (num_labels + max_bcast_dims, EinsumDimensionType::kBroadcasting); return true; } - -bool ReduceOperand( - const Tensor& input, - const std::vector& label_types, - std::vector& labels, - const std::vector& label_counts, - std::vector& free_labels, - int& swap_free_and_contract, - Tensor& output) +bool + ReduceOperand (const Tensor& input, + const std::vector& label_types, + std::vector& labels, + const std::vector& label_counts, + std::vector& free_labels, + int& swap_free_and_contract, + Tensor& output) { // Find the permutation to transpose the input dimensions in the order of // EinsumDimensionType; i.e. batch, free, contract and reduce dimensions. // This makes it more convenient to invoke Reduce/Contract operations. - std::vector permutation(input.shape().ndim(), 0); - std::iota(permutation.begin(), permutation.end(), 0); + std::vector permutation (input.shape ().ndim (), 0); + std::iota (permutation.begin (), permutation.end (), 0); Tensor input_transposed; // Check if we can avoid the transpose. We need to flip the conj_x (or conj_y) // flag during BatchMatMul. This is an extra optimization not necessary for // correctness. - if(ShouldSwapFreeAndContract(labels, label_types)) { - swap_free_and_contract = 1; - } - else { - std::sort(permutation.begin(), permutation.end(), [&](int ii, int jj) { - int label_ii = labels[ii]; - int label_jj = labels[jj]; - return std::tie(label_types[label_ii], label_ii) < - std::tie(label_types[label_jj], label_jj); - }); - } + if (ShouldSwapFreeAndContract (labels, label_types)) + { + swap_free_and_contract = 1; + } + else + { + std::sort (permutation.begin (), + permutation.end (), + [&] (int ii, int jj) + { + int label_ii = labels[ii]; + int label_jj = labels[jj]; + return std::tie (label_types[label_ii], label_ii) + < std::tie (label_types[label_jj], label_jj); + }); + } // Transpose the input so that EinsumDimensionTypes are in order. - TransposeOperand(input, permutation, input_transposed); + TransposeOperand (input, permutation, input_transposed); // Permutes labels - PermuteLabels(permutation, labels); + PermuteLabels (permutation, labels); // Take the generalized diagonal for dimensions with repeated axis labels. // This is necessary for the Reduce/Contract operations. Tensor input_deduped; - labels.erase(std::unique(labels.begin(), labels.end()), labels.end()); + labels.erase (std::unique (labels.begin (), labels.end ()), labels.end ()); - - StrideOrInflateOperand(input_transposed, labels, label_counts, false, input_deduped); + StrideOrInflateOperand (input_transposed, labels, label_counts, false, input_deduped); // Reshape denotes the rank-5 shape [broadcast, batch, free, contract, // reduce] where we've compacted the dimensions of each EinsumDimensionType. @@ -736,289 +816,358 @@ bool ReduceOperand( // The output shape is [batch shape] + [free size, contract size] // That is, the batch shape is preserved (for broadcasting while // contracting) while the free dims and contract dims are compressed to one - // dimension each. + // dimension each. TensorShape output_shape; - for (int label_idx = 0; label_idx < labels.size(); label_idx++) { - const int label = labels[label_idx]; - int64_t dim = input_deduped.shape().dim_size(label_idx); - if (label_types[label] == EinsumDimensionType::kBroadcasting || - label_types[label] == EinsumDimensionType::kBatch) { - output_shape.add_dim(dim); - } - else if (label_types[label] == EinsumDimensionType::kFree) { - free_labels.push_back(label); + for (int label_idx = 0; label_idx < labels.size (); label_idx++) + { + const int label = labels[label_idx]; + int64_t dim = input_deduped.shape ().dim_size (label_idx); + if (label_types[label] == EinsumDimensionType::kBroadcasting + || label_types[label] == EinsumDimensionType::kBatch) + { + output_shape.add_dim (dim); + } + else if (label_types[label] == EinsumDimensionType::kFree) + { + free_labels.push_back (label); + } + // All together, the reshape is [broadcast, batch, free, contract, reduce] + reshape[label_types[label]] *= dim; } - // All together, the reshape is [broadcast, batch, free, contract, reduce] - reshape[label_types[label]] *= dim; - } - - if (swap_free_and_contract) { - std::swap(reshape[EinsumDimensionType::kFree], reshape[EinsumDimensionType::kContract]); - } - output_shape.add_dim(reshape[EinsumDimensionType::kFree]); - output_shape.add_dim(reshape[EinsumDimensionType::kContract]); + if (swap_free_and_contract) + { + std::swap (reshape[EinsumDimensionType::kFree], reshape[EinsumDimensionType::kContract]); + } + output_shape.add_dim (reshape[EinsumDimensionType::kFree]); + output_shape.add_dim (reshape[EinsumDimensionType::kContract]); - if (reshape[EinsumDimensionType::kReduce] == - 1) { // No need to actually reduce. - return CopyFrom(input_deduped, output_shape, &output); - } + if (reshape[EinsumDimensionType::kReduce] == 1) + { // No need to actually reduce. + return CopyFrom (input_deduped, output_shape, &output); + } // This command will actually allocate memory for the output tensor - CopyFromWithAllocate(input_deduped, output_shape, &output); - Tensor output_shaped = output.shaped({-1}); + CopyFromWithAllocate (input_deduped, output_shape, &output); + Tensor output_shaped = output.shaped ({-1}); - op::reduce_op()( - input_deduped.shaped({-1, reshape[EinsumDimensionType::kReduce]}), - reshape[EinsumDimensionType::kReduce], output_shaped); + op::reduce_op () (input_deduped.shaped ({-1, reshape[EinsumDimensionType::kReduce]}), + reshape[EinsumDimensionType::kReduce], + output_shaped); return true; } template -static void DoContract( - const Tensor& in_x, - const Tensor& in_y, - const EinsumOption& option, - const bool& trans_x, - const bool& trans_y, - const einsum_utils::BCast& bcast, - Tensor& out_z) +static void + DoContract (const Tensor& in_x, + const Tensor& in_y, + const EinsumOption& option, + const bool& trans_x, + const bool& trans_y, + const einsum_utils::BCast& bcast, + Tensor& out_z) { - const T alpha = static_cast(option.alpha); - const T beta = static_cast(option.beta); - const int m = in_x.shape().dim_size(option.conj_x || trans_x ? 2 : 1); - const int k = in_x.shape().dim_size(option.conj_x || trans_x ? 1 : 2); - const int n = in_y.shape().dim_size(option.conj_y || trans_y ? 1 : 2); + const T alpha = static_cast (option.alpha); + const T beta = static_cast (option.beta); + const int m = in_x.shape ().dim_size (option.conj_x || trans_x ? 2 : 1); + const int k = in_x.shape ().dim_size (option.conj_x || trans_x ? 1 : 2); + const int n = in_y.shape ().dim_size (option.conj_y || trans_y ? 1 : 2); const int64_t batch_size = bcast.z_batch_size; - std::vector x_device_memory_ptrs = {}; x_device_memory_ptrs.reserve(batch_size); - std::vector y_device_memory_ptrs = {}; y_device_memory_ptrs.reserve(batch_size); - std::vector z_device_memory_ptrs = {}; z_device_memory_ptrs.reserve(batch_size); + std::vector x_device_memory_ptrs = {}; + x_device_memory_ptrs.reserve (batch_size); + std::vector y_device_memory_ptrs = {}; + y_device_memory_ptrs.reserve (batch_size); + std::vector z_device_memory_ptrs = {}; + z_device_memory_ptrs.reserve (batch_size); - auto* x_base_ptr = in_x.data(); - auto* y_base_ptr = in_y.data(); - auto* z_base_ptr = out_z.data(); + auto* x_base_ptr = in_x.data (); + auto* y_base_ptr = in_y.data (); + auto* z_base_ptr = out_z.data (); int64_t x_stride = 0; int64_t y_stride = 0; int64_t z_stride = 0; - bool is_full_broadcast = - std::min(bcast.x_batch_size, bcast.y_batch_size) == 1; + bool is_full_broadcast = std::min (bcast.x_batch_size, bcast.y_batch_size) == 1; - bool use_strided_batched = - (!bcast.requires_broadcast || is_full_broadcast) && batch_size > 1; - - if (use_strided_batched) { - x_stride = bcast.x_batch_size != 1 ? m * k : 0; - y_stride = bcast.y_batch_size != 1 ? k * n : 0; - z_stride = m * n; + bool use_strided_batched = (!bcast.requires_broadcast || is_full_broadcast) && batch_size > 1; - x_device_memory_ptrs.push_back(x_base_ptr); - y_device_memory_ptrs.push_back(y_base_ptr); - z_device_memory_ptrs.push_back(z_base_ptr); - } - else if (!bcast.requires_broadcast) { - for (int ii = 0; ii < batch_size; ii++) { - x_device_memory_ptrs.push_back(x_base_ptr + ii * m * k); - y_device_memory_ptrs.push_back(y_base_ptr + ii * k * n); - z_device_memory_ptrs.push_back(z_base_ptr + ii * m * n); - } - } - else { - std::vector x_device_memory = {}; - std::vector y_device_memory = {}; - for (int ii = 0; ii < bcast.x_batch_size; ii++) { - x_device_memory.push_back(x_base_ptr + ii * m * k); + if (use_strided_batched) + { + x_stride = bcast.x_batch_size != 1 ? m * k : 0; + y_stride = bcast.y_batch_size != 1 ? k * n : 0; + z_stride = m * n; + + x_device_memory_ptrs.push_back (x_base_ptr); + y_device_memory_ptrs.push_back (y_base_ptr); + z_device_memory_ptrs.push_back (z_base_ptr); } - for (int ii = 0; ii < bcast.y_batch_size; ii++) { - y_device_memory.push_back(y_base_ptr + ii * k * n); + else if (!bcast.requires_broadcast) + { + for (int ii = 0; ii < batch_size; ii++) + { + x_device_memory_ptrs.push_back (x_base_ptr + ii * m * k); + y_device_memory_ptrs.push_back (y_base_ptr + ii * k * n); + z_device_memory_ptrs.push_back (z_base_ptr + ii * m * n); + } } - for (int ii = 0; ii < bcast.z_batch_size; ii++) { - x_device_memory_ptrs.push_back(x_device_memory[bcast.x_batch_shape[ii]]); - y_device_memory_ptrs.push_back(y_device_memory[bcast.y_batch_shape[ii]]); - z_device_memory_ptrs.push_back(z_base_ptr + ii * m * n); + else + { + std::vector x_device_memory = {}; + std::vector y_device_memory = {}; + for (int ii = 0; ii < bcast.x_batch_size; ii++) + { + x_device_memory.push_back (x_base_ptr + ii * m * k); + } + for (int ii = 0; ii < bcast.y_batch_size; ii++) + { + y_device_memory.push_back (y_base_ptr + ii * k * n); + } + for (int ii = 0; ii < bcast.z_batch_size; ii++) + { + x_device_memory_ptrs.push_back (x_device_memory[bcast.x_batch_shape[ii]]); + y_device_memory_ptrs.push_back (y_device_memory[bcast.y_batch_shape[ii]]); + z_device_memory_ptrs.push_back (z_base_ptr + ii * m * n); + } } - } // Do GEMM operations finally! // where A, B and C are assumed to be in column major. // We want the output to be in row-major, so we can compute // C' = B' x A', where ' stands for transpose (not adjoint). - if (batch_size == 1) { - // Dot product - if (m == 1 && n == 1 && option.conj_x != true && option.conj_y != true) { + if (batch_size == 1) + { // Dot product - // TODO: implement the Conjugate version of Dot product. - kernels::blas_dot()(k, x_device_memory_ptrs[0], 1, y_device_memory_ptrs[0], 1, z_device_memory_ptrs[0]); + if (m == 1 && n == 1 && option.conj_x != true && option.conj_y != true) + { + // Dot product + // TODO: implement the Conjugate version of Dot product. + kernels::blas_dot () (k, + x_device_memory_ptrs[0], + 1, + y_device_memory_ptrs[0], + 1, + z_device_memory_ptrs[0]); + } + // Gemv + else if (n == 1 && option.conj_x != true) + { + // This is a matrix*vector multiply so use GEMV to compute A * x. + // Here we are multiplying in the natural order, so we have to flip + // the transposition flag to compensate for the tensor being stored + // row-major. Since GEMV doesn't provide a way to just conjugate an + // argument, we have to defer those cases to GEMM below. + kernels::blas_gemv () (trans_x ? 'N' : 'T', + trans_x ? m : k, + trans_x ? k : m, + &alpha, + x_device_memory_ptrs[0], + trans_x ? m : k, + y_device_memory_ptrs[0], + 1, + &beta, + z_device_memory_ptrs[0], + 1); + } + // Gemm + else + { + // Call the column-major Blas library + kernels::blas_gemm () (option.conj_y ? 'C' + : trans_y ? 'T' + : 'N', + option.conj_x ? 'C' + : trans_x ? 'T' + : 'N', + n, + m, + k, + &alpha, + y_device_memory_ptrs[0], + option.conj_y || trans_y ? k : n, + x_device_memory_ptrs[0], + option.conj_x || trans_x ? m : k, + &beta, + z_device_memory_ptrs[0], + n); + } + return; } - // Gemv - else if (n == 1 && option.conj_x != true) { - // This is a matrix*vector multiply so use GEMV to compute A * x. - // Here we are multiplying in the natural order, so we have to flip - // the transposition flag to compensate for the tensor being stored - // row-major. Since GEMV doesn't provide a way to just conjugate an - // argument, we have to defer those cases to GEMM below. - kernels::blas_gemv()( - trans_x ? 'N' : 'T', - trans_x ? m : k, - trans_x ? k : m, - &alpha, - x_device_memory_ptrs[0], trans_x ? m : k, - y_device_memory_ptrs[0], 1, - &beta, - z_device_memory_ptrs[0], 1); + else if (use_strided_batched) + { + kernels::blas_gemm_batched_strided () (option.conj_y ? 'C' + : trans_y ? 'T' + : 'N', + option.conj_x ? 'C' + : trans_x ? 'T' + : 'N', + n, + m, + k, + &alpha, + y_device_memory_ptrs[0], + option.conj_y || trans_y ? k : n, + y_stride, + x_device_memory_ptrs[0], + option.conj_x || trans_x ? m : k, + x_stride, + &beta, + z_device_memory_ptrs[0], + n, + z_stride, + batch_size); } - // Gemm - else { - // Call the column-major Blas library - kernels::blas_gemm()( - option.conj_y ? 'C' : trans_y ? 'T' : 'N', - option.conj_x ? 'C' : trans_x ? 'T' : 'N', - n, m, k, - &alpha, - y_device_memory_ptrs[0], option.conj_y || trans_y ? k : n, - x_device_memory_ptrs[0], option.conj_x || trans_x ? m : k, - &beta, - z_device_memory_ptrs[0], n); + else + { + kernels::blas_gemm_batched () (option.conj_y ? 'C' + : trans_y ? 'T' + : 'N', + option.conj_x ? 'C' + : trans_x ? 'T' + : 'N', + n, + m, + k, + &alpha, + y_device_memory_ptrs.data (), + option.conj_y || trans_y ? k : n, + x_device_memory_ptrs.data (), + option.conj_x || trans_x ? m : k, + &beta, + z_device_memory_ptrs.data (), + n, + batch_size); } - return; - } - else if (use_strided_batched) { - kernels::blas_gemm_batched_strided()( - option.conj_y ? 'C' : trans_y ? 'T' : 'N', - option.conj_x ? 'C' : trans_x ? 'T' : 'N', - n, m, k, - &alpha, - y_device_memory_ptrs[0], option.conj_y || trans_y ? k : n, y_stride, - x_device_memory_ptrs[0], option.conj_x || trans_x ? m : k, x_stride, - &beta, - z_device_memory_ptrs[0], n, z_stride, - batch_size); - } - else { - kernels::blas_gemm_batched()( - option.conj_y ? 'C' : trans_y ? 'T' : 'N', - option.conj_x ? 'C' : trans_x ? 'T' : 'N', - n, m, k, - &alpha, - y_device_memory_ptrs.data(), option.conj_y || trans_y ? k : n, - x_device_memory_ptrs.data(), option.conj_x || trans_x ? m : k, - &beta, - z_device_memory_ptrs.data(), n, - batch_size); - } } // Contracts the inputs along the last axis (or the second last if the // corresponding value of swap_free_and_contract is true). The batch // dimensions are broadcast to the output shape. -bool ContractOperands( - std::vector& inputs, - const std::vector& swap_free_and_contract, - const EinsumOption& option, - Tensor& output) +bool + ContractOperands (std::vector& inputs, + const std::vector& swap_free_and_contract, + const EinsumOption& option, + Tensor& output) { - if (inputs.size() == 1) { - return CopyFrom(inputs[0], inputs[0].shape(), &output); - } - BCast bcast = prepare_bcast(inputs[0].shape().dims(), inputs[1].shape().dims()); + if (inputs.size () == 1) + { + return CopyFrom (inputs[0], inputs[0].shape (), &output); + } + BCast bcast = prepare_bcast (inputs[0].shape ().dims (), inputs[1].shape ().dims ()); - if (bcast.valid == false) { - throw std::invalid_argument("Invalid broadcast shape"); - } + if (bcast.valid == false) + { + throw std::invalid_argument ("Invalid broadcast shape"); + } Tensor lhs, rhs; - ReshapeToRank3(inputs[0], bcast.x_batch_size, lhs); - ReshapeToRank3(inputs[1], bcast.y_batch_size, rhs); + ReshapeToRank3 (inputs[0], bcast.x_batch_size, lhs); + ReshapeToRank3 (inputs[1], bcast.y_batch_size, rhs); TensorShape output_shape = bcast.z_batch_shape; - for (int ii = 0; ii < inputs.size(); ii++) { - const int64_t free_axis = - inputs[ii].shape().ndim() - (swap_free_and_contract[ii] ? 1 : 2); - output_shape.add_dim(inputs[ii].shape().dim_size(free_axis)); - } + for (int ii = 0; ii < inputs.size (); ii++) + { + const int64_t free_axis = inputs[ii].shape ().ndim () - (swap_free_and_contract[ii] ? 1 : 2); + output_shape.add_dim (inputs[ii].shape ().dim_size (free_axis)); + } bool trans_x = swap_free_and_contract[0]; bool trans_y = !swap_free_and_contract[1]; - if (option.out != nullptr) { - if (output_shape.NumElements() != option.out->NumElements()) { - throw std::invalid_argument("Invalid option: output shape mismatch the requested shape"); + if (option.out != nullptr) + { + if (output_shape.NumElements () != option.out->NumElements ()) + { + throw std::invalid_argument ("Invalid option: output shape mismatch the requested shape"); + } + CopyFrom (*option.out, output_shape, &output); + } + else + { + CopyFromWithAllocate (inputs[0], output_shape, &output); + } + if (lhs.NumElements () == 0 || rhs.NumElements () == 0) + { + output.zero (); + return true; } - CopyFrom(*option.out, output_shape, &output); - } - else { - CopyFromWithAllocate(inputs[0], output_shape, &output); - } - if (lhs.NumElements() == 0 || rhs.NumElements() == 0) { - output.zero(); - return true; - } Tensor output_reshaped; - ReshapeToRank3(output, bcast.z_batch_size, output_reshaped); + ReshapeToRank3 (output, bcast.z_batch_size, output_reshaped); - TEMPLATE_BLAS_2(output_reshaped.data_type(), output_reshaped.device_type(), - einsum_utils::DoContract(lhs, rhs, option, trans_x, trans_y, bcast, output_reshaped)) + TEMPLATE_BLAS_2 (output_reshaped.data_type (), + output_reshaped.device_type (), + einsum_utils::DoContract (lhs, rhs, option, trans_x, trans_y, bcast, output_reshaped)) return true; } -void ProcessOutput( - const Tensor& input, - const std::vector& label_types, - const std::vector>& free_labels, - std::unordered_map& label_to_dim_sizes, - const std::vector& output_labels, - const std::vector& output_label_counts, - Tensor& output) +void + ProcessOutput (const Tensor& input, + const std::vector& label_types, + const std::vector>& free_labels, + std::unordered_map& label_to_dim_sizes, + const std::vector& output_labels, + const std::vector& output_label_counts, + Tensor& output) { - TensorShape result_shape = input.shape(); - result_shape.remove_dim(result_shape.ndim() - 1); - result_shape.remove_dim(result_shape.ndim() - 1); + TensorShape result_shape = input.shape (); + result_shape.remove_dim (result_shape.ndim () - 1); + result_shape.remove_dim (result_shape.ndim () - 1); - int num_labels = label_types.size(); + int num_labels = label_types.size (); std::vector result_labels = {}; // All batch dimensions should be present in the contracted result. First // the broadcasting dimensions, then the named batch dimensions. - for (int label = 0; label < num_labels; ++label) { - if (label_types[label] == EinsumDimensionType::kBroadcasting || label_types[label] == EinsumDimensionType::kBatch) { - result_labels.push_back(label); + for (int label = 0; label < num_labels; ++label) + { + if (label_types[label] == EinsumDimensionType::kBroadcasting + || label_types[label] == EinsumDimensionType::kBatch) + { + result_labels.push_back (label); + } } - } - for (int ii = 0; ii < free_labels.size(); ii++) { - for (int label : free_labels[ii]) { - result_labels.push_back(label); - result_shape.add_dim(label_to_dim_sizes[label]); + for (int ii = 0; ii < free_labels.size (); ii++) + { + for (int label: free_labels[ii]) + { + result_labels.push_back (label); + result_shape.add_dim (label_to_dim_sizes[label]); + } } - } // If the output is a zero dimensional scalar, use a 1 dimention vector instead. // TODO: Use a scalar constructor in Tensor Object. - if (result_shape.ndim() == 0 && input.NumElements() == 1) { - result_shape.add_dim(1); - } + if (result_shape.ndim () == 0 && input.NumElements () == 1) + { + result_shape.add_dim (1); + } // Reshape the contraction (or reduction) result to its expanded shape: // [(broadcasted) batch shape] + [free shape 0] + [free shape 1]. Tensor contraction_output; - CopyFrom(input, result_shape, &contraction_output); + CopyFrom (input, result_shape, &contraction_output); // Inflate the output if necessary. (E.g. for the equation 'i->iii' which // may arise while computing gradient of a regular Einsum). Tensor output_inflated; - StrideOrInflateOperand(contraction_output, - result_labels, output_label_counts, true /* should_inflate */, output_inflated); - - if (output_inflated.shape().ndim() > contraction_output.shape().ndim()) { - // We inflated the output. Modify result labels accordingly. - std::vector inflated_labels = {}; - for (int label : result_labels) { - inflated_labels.insert(inflated_labels.end(), output_label_counts[label], label); + StrideOrInflateOperand (contraction_output, + result_labels, + output_label_counts, + true /* should_inflate */, + output_inflated); + + if (output_inflated.shape ().ndim () > contraction_output.shape ().ndim ()) + { + // We inflated the output. Modify result labels accordingly. + std::vector inflated_labels = {}; + for (int label: result_labels) + { + inflated_labels.insert (inflated_labels.end (), output_label_counts[label], label); + } + result_labels.swap (inflated_labels); } - result_labels.swap(inflated_labels); - } // Find the permutation to map the result labels to the output labels. Note // that both the result and the final output may have the repeated labels, @@ -1026,23 +1175,26 @@ void ProcessOutput( // E.g. if result labels are [0, 0, 1] and output is [0, l, 0] then the // permutation should be [0, 2, 1]. We also use the fact that repeated // labels in the result are adjacent to each other. - std::vector output_permutation(output_labels.size()); - std::vector label_to_position(num_labels, -1); - - for (int ii = 0; ii < result_labels.size(); ii++) { - // Remember the position of only the leftmost result label. - if (label_to_position[result_labels[ii]] == -1) { - label_to_position[result_labels[ii]] = ii; + std::vector output_permutation (output_labels.size ()); + std::vector label_to_position (num_labels, -1); + + for (int ii = 0; ii < result_labels.size (); ii++) + { + // Remember the position of only the leftmost result label. + if (label_to_position[result_labels[ii]] == -1) + { + label_to_position[result_labels[ii]] = ii; + } + } + for (int ii = 0; ii < output_labels.size (); ii++) + { + output_permutation[ii] = label_to_position[output_labels[ii]]; + // We have found the leftmost occurrence. The next one would be adjacent. + label_to_position[output_labels[ii]] += 1; } - } - for (int ii = 0; ii < output_labels.size(); ii++) { - output_permutation[ii] = label_to_position[output_labels[ii]]; - // We have found the leftmost occurrence. The next one would be adjacent. - label_to_position[output_labels[ii]] += 1; - } - TransposeOperand(output_inflated, output_permutation, output); + TransposeOperand (output_inflated, output_permutation, output); } -} // namespace utils -} // namespace container +} // namespace einsum_utils +} // namespace container diff --git a/source/source_base/module_container/ATen/ops/einsum_op.h b/source/source_base/module_container/ATen/ops/einsum_op.h index 6850f0445cb..40e8d406e95 100644 --- a/source/source_base/module_container/ATen/ops/einsum_op.h +++ b/source/source_base/module_container/ATen/ops/einsum_op.h @@ -4,20 +4,29 @@ #include #include -namespace container { +namespace container +{ -struct EinsumOption { +struct EinsumOption +{ bool conj_x = false; bool conj_y = false; float alpha = 1.0; - float beta = 0.0; + float beta = 0.0; Tensor* out = nullptr; - EinsumOption(bool conj_x_ = false, bool conj_y_ = false, float alpha_ = 1.0, float beta_ = 0.0, Tensor* out_ = nullptr) - : conj_x(conj_x_), conj_y(conj_y_), alpha(alpha_), beta(beta_), out(out_) {} + EinsumOption (bool conj_x_ = false, + bool conj_y_ = false, + float alpha_ = 1.0, + float beta_ = 0.0, + Tensor* out_ = nullptr) + : conj_x (conj_x_), conj_y (conj_y_), alpha (alpha_), beta (beta_), out (out_) + { + } }; -namespace einsum_utils { +namespace einsum_utils +{ struct BCast; // Dummy axis label used to denote an ellipsis in an input or output subscript. @@ -26,7 +35,8 @@ constexpr int kEllipsisLabel = -1; // Each dimension is categorized into exactly one of five types based on // whether its corresponding label is present in the input and/or the output // subscripts. -enum EinsumDimensionType { +enum EinsumDimensionType +{ // Batch dimensions are those present in two inputs as well as the output. // They are part of the batch dimensions during Tensor contraction. Such // dimensions may be broadcasting dimensions (those mapping to ellipsis) @@ -45,53 +55,48 @@ enum EinsumDimensionType { }; // Parses and validates an einsum equation in explicit form. -bool ValidateEinsumEquation( - const std::string& equation, - std::vector& input_subscripts, - std::string& output_subscript); +bool ValidateEinsumEquation (const std::string& equation, + std::vector& input_subscripts, + std::string& output_subscript); // Parses and validates the equation and the input shapes. Single character // labels are integerized, and we populate input and output label subscripts // and corresponding counts. Also create the mapping from (named) labels to // their EinsumDimensionType. -bool ParseEinsumEquation( - const std::string& equation, - std::vector& label_types, - std::vector>& input_labels, - std::vector& output_labels, - std::vector>& input_label_counts, - std::vector& output_label_counts, - std::vector& input_has_ellipsis, - bool& output_has_ellipsis); - -bool ProcessDimensions( - const std::vector& inputs, - std::vector& label_types, - std::vector>& input_labels, - std::vector& output_labels, - std::vector>& input_label_counts, - std::vector& output_label_counts, - const std::vector& input_has_ellipsis, - const bool output_has_ellipsis, - std::unordered_map& label_to_dim_sizes); +bool ParseEinsumEquation (const std::string& equation, + std::vector& label_types, + std::vector>& input_labels, + std::vector& output_labels, + std::vector>& input_label_counts, + std::vector& output_label_counts, + std::vector& input_has_ellipsis, + bool& output_has_ellipsis); + +bool ProcessDimensions (const std::vector& inputs, + std::vector& label_types, + std::vector>& input_labels, + std::vector& output_labels, + std::vector>& input_label_counts, + std::vector& output_label_counts, + const std::vector& input_has_ellipsis, + const bool output_has_ellipsis, + std::unordered_map& label_to_dim_sizes); // This function records the mapping of a label to its corresponding dimension for a specific axis in the input tensor. -// It also validates that the label and dimension mapping is consistent with previous recordings, ensuring that the +// It also validates that the label and dimension mapping is consistent with previous recordings, ensuring that the // same label is not mapped to different dimensions along different axes. -bool RecordLabelToDimension( - const int label, - const int axis, - const Tensor& input, - std::unordered_map& label_to_dim_sizes); - -bool ReduceOperand( - const Tensor& input, - const std::vector& label_types, - std::vector& labels, - const std::vector& label_counts, - std::vector& free_labels, - int& swap_free_and_contract, - Tensor& output); +bool RecordLabelToDimension (const int label, + const int axis, + const Tensor& input, + std::unordered_map& label_to_dim_sizes); + +bool ReduceOperand (const Tensor& input, + const std::vector& label_types, + std::vector& labels, + const std::vector& label_counts, + std::vector& free_labels, + int& swap_free_and_contract, + Tensor& output); /** * @brief A function to perform contraction operation on multiple Tensors. @@ -102,24 +107,23 @@ bool ReduceOperand( * specifies whether each input Tensor should be contracted or simply copied to the output. * */ -bool ContractOperands( - std::vector& inputs, - const std::vector& swap_free_and_contract, - const EinsumOption& option, - Tensor& output); - -void ProcessOutput( - const Tensor& input, - const std::vector& label_types, - const std::vector>& free_labels, - std::unordered_map& label_to_dim_sizes, - const std::vector& output_labels, - const std::vector& output_label_counts, - Tensor& output); +bool ContractOperands (std::vector& inputs, + const std::vector& swap_free_and_contract, + const EinsumOption& option, + Tensor& output); + +void ProcessOutput (const Tensor& input, + const std::vector& label_types, + const std::vector>& free_labels, + std::unordered_map& label_to_dim_sizes, + const std::vector& output_labels, + const std::vector& output_label_counts, + Tensor& output); } // namespace einsum_utils -namespace op { +namespace op +{ // TODO: implement this method this week! // piapia pat face @@ -139,16 +143,16 @@ namespace op { * @throws std::runtime_error if an error occurs while performing the summation operation. */ template -typename std::enable_if::type, Tensor>::value, Tensor>::type - einsum_impl(const std::string& equation, const EinsumOption& option, const Tensors&... tensors) +typename std::enable_if::type, Tensor>::value, Tensor>::type + einsum_impl (const std::string& equation, const EinsumOption& option, const Tensors&... tensors) { // Check the input dimension constexpr int num_inputs = sizeof...(Tensors); - if (num_inputs > 2) { - throw std::invalid_argument("Einstein notation only support two or less tensors!"); - } - const std::vector inputs{reinterpret_cast(&tensors)...}; + if (num_inputs > 2) + { + throw std::invalid_argument ("Einstein notation only support two or less tensors!"); + } + const std::vector inputs{reinterpret_cast (&tensors)...}; // Init the input and output labels std::vector> input_labels = {}; std::vector output_labels = {}; @@ -158,65 +162,79 @@ typename std::enable_if input_has_ellipsis = {}; bool output_has_ellipsis = {}; - einsum_utils::ParseEinsumEquation( - equation, label_types, - input_labels, output_labels, - input_label_counts, output_label_counts, - input_has_ellipsis, output_has_ellipsis); - - if (input_labels.size() != num_inputs) { - throw std::runtime_error("The number of input tensors does not match the number of input labels!"); - } - + einsum_utils::ParseEinsumEquation (equation, + label_types, + input_labels, + output_labels, + input_label_counts, + output_label_counts, + input_has_ellipsis, + output_has_ellipsis); + + if (input_labels.size () != num_inputs) + { + throw std::runtime_error ("The number of input tensors does not match the number of input labels!"); + } + std::unordered_map label_to_dim_sizes = {}; - einsum_utils::ProcessDimensions( - inputs, label_types, - input_labels, output_labels, - input_label_counts, output_label_counts, - input_has_ellipsis, output_has_ellipsis, - label_to_dim_sizes); - - std::vector> free_labels(num_inputs); - std::vector swap_free_and_contract(num_inputs); - std::vector inputs_reduced(num_inputs, Tensor(DataType::DT_FLOAT, {})); - - for (int ii = 0; ii < num_inputs; ++ii) { - einsum_utils::ReduceOperand( - *inputs[ii], label_types, - input_labels[ii], input_label_counts[ii], - free_labels[ii], swap_free_and_contract[ii], - inputs_reduced[ii]); - } + einsum_utils::ProcessDimensions (inputs, + label_types, + input_labels, + output_labels, + input_label_counts, + output_label_counts, + input_has_ellipsis, + output_has_ellipsis, + label_to_dim_sizes); + + std::vector> free_labels (num_inputs); + std::vector swap_free_and_contract (num_inputs); + std::vector inputs_reduced (num_inputs, Tensor (DataType::DT_FLOAT, {})); + + for (int ii = 0; ii < num_inputs; ++ii) + { + einsum_utils::ReduceOperand (*inputs[ii], + label_types, + input_labels[ii], + input_label_counts[ii], + free_labels[ii], + swap_free_and_contract[ii], + inputs_reduced[ii]); + } // After reduction, the inputs should be reshaped to Tensors suitable for // contraction. If num_inputs is 1, the reduced input is simply forwarded to // the output. Tensor contraction_output_reshaped; - einsum_utils::ContractOperands( - inputs_reduced, swap_free_and_contract, - option, contraction_output_reshaped); - + einsum_utils::ContractOperands (inputs_reduced, swap_free_and_contract, option, contraction_output_reshaped); + Tensor output; // Copy the batch labels from the contraction output. Recover the batch // shape, which may have been broadcasted. - einsum_utils::ProcessOutput( - contraction_output_reshaped, label_types, - free_labels, label_to_dim_sizes, - output_labels, output_label_counts, - output); - - return std::move(output); + einsum_utils::ProcessOutput (contraction_output_reshaped, + label_types, + free_labels, + label_to_dim_sizes, + output_labels, + output_label_counts, + output); + + return std::move (output); } // Make the conj params only works for the matmul equations. -inline static Tensor einsum(const std::string& equation, const Tensor& A) { +inline static Tensor + einsum (const std::string& equation, const Tensor& A) +{ const EinsumOption& option = {}; - return std::move(op::einsum_impl(equation, option, A)); + return std::move (op::einsum_impl (equation, option, A)); } -inline static Tensor einsum(const std::string& equation, const Tensor& A, const Tensor& B, const EinsumOption& option = {}) { - return std::move(op::einsum_impl(equation, option, A, B)); +inline static Tensor + einsum (const std::string& equation, const Tensor& A, const Tensor& B, const EinsumOption& option = {}) +{ + return std::move (op::einsum_impl (equation, option, A, B)); } } // namespace op diff --git a/source/source_base/module_container/ATen/ops/linalg_op.cpp b/source/source_base/module_container/ATen/ops/linalg_op.cpp index e647bbbe89b..a53b0c801fa 100644 --- a/source/source_base/module_container/ATen/ops/linalg_op.cpp +++ b/source/source_base/module_container/ATen/ops/linalg_op.cpp @@ -5,238 +5,303 @@ #include -namespace container { -namespace op { +namespace container +{ +namespace op +{ -void add_op::operator()(const Tensor &x, const Tensor &y, Tensor &z) { +void + add_op::operator() (const Tensor& x, const Tensor& y, Tensor& z) +{ // check the shape - REQUIRES_OK(x.shape() == y.shape() && x.shape() == z.shape(), - "add: the shape of the two input Tensors must be the same") - REQUIRES_OK(x.data_type() == y.data_type() && x.data_type() == z.data_type(), - "add: the data type of the two input Tensors must be the same") - REQUIRES_OK(x.device_type() == y.device_type() && x.device_type() == z.device_type(), - "add: the device type of the two input Tensors must be the same") + REQUIRES_OK (x.shape () == y.shape () && x.shape () == z.shape (), + "add: the shape of the two input Tensors must be the same") + REQUIRES_OK (x.data_type () == y.data_type () && x.data_type () == z.data_type (), + "add: the data type of the two input Tensors must be the same") + REQUIRES_OK (x.device_type () == y.device_type () && x.device_type () == z.device_type (), + "add: the device type of the two input Tensors must be the same") // allocate memory for the result - TEMPLATE_ALL_LAMBDA_2(x.data_type(), x.device_type(), [&](){ - T_ alpha = static_cast(1); - T_ beta = static_cast(1); - kernels::add()( - x.NumElements(), alpha, x.data(), beta, y.data(), z.data()); - }) + TEMPLATE_ALL_LAMBDA_2 (x.data_type (), + x.device_type (), + [&] () + { + T_ alpha = static_cast (1); + T_ beta = static_cast (1); + kernels::add () (x.NumElements (), + alpha, + x.data (), + beta, + y.data (), + z.data ()); + }) } -template -void add_op::operator()(const T& alpha, const Tensor &x, const T& beta, const Tensor &y, Tensor &z) { +template +void + add_op::operator() (const T& alpha, const Tensor& x, const T& beta, const Tensor& y, Tensor& z) +{ // check the shape - REQUIRES_OK(x.shape() == y.shape() && x.shape() == z.shape(), - "add: the shape of the two input Tensors must be the same") - REQUIRES_OK(x.data_type() == y.data_type() && x.data_type() == z.data_type(), - "add: the data type of the two input Tensors must be the same") - REQUIRES_OK(x.device_type() == y.device_type() && x.device_type() == z.device_type(), - "add: the device type of the two input Tensors must be the same") + REQUIRES_OK (x.shape () == y.shape () && x.shape () == z.shape (), + "add: the shape of the two input Tensors must be the same") + REQUIRES_OK (x.data_type () == y.data_type () && x.data_type () == z.data_type (), + "add: the data type of the two input Tensors must be the same") + REQUIRES_OK (x.device_type () == y.device_type () && x.device_type () == z.device_type (), + "add: the device type of the two input Tensors must be the same") // allocate memory for the result - TEMPLATE_ALL_LAMBDA_2(x.data_type(), x.device_type(), [&](){ - kernels::add()( - x.NumElements(), alpha, x.data(), beta, y.data(), z.data()); - }) + TEMPLATE_ALL_LAMBDA_2 ( + x.data_type (), + x.device_type (), + [&] () + { kernels::add () (x.NumElements (), alpha, x.data (), beta, y.data (), z.data ()); }) } -void mul_op::operator()(const container::Tensor &x, const container::Tensor &y, container::Tensor &z) { +void + mul_op::operator() (const container::Tensor& x, const container::Tensor& y, container::Tensor& z) +{ // check the shape - REQUIRES_OK(x.shape() == y.shape() && x.shape() == z.shape(), - "mul: the shape of the two input Tensors must be the same") - REQUIRES_OK(x.data_type() == y.data_type() && x.data_type() == z.data_type(), - "mul: the data type of the two input Tensors must be the same") - REQUIRES_OK(x.device_type() == y.device_type() && x.device_type() == z.device_type(), - "mul: the device type of the two input Tensors must be the same") + REQUIRES_OK (x.shape () == y.shape () && x.shape () == z.shape (), + "mul: the shape of the two input Tensors must be the same") + REQUIRES_OK (x.data_type () == y.data_type () && x.data_type () == z.data_type (), + "mul: the data type of the two input Tensors must be the same") + REQUIRES_OK (x.device_type () == y.device_type () && x.device_type () == z.device_type (), + "mul: the device type of the two input Tensors must be the same") // allocate memory for the result - TEMPLATE_ALL_LAMBDA_2(x.data_type(), x.device_type(), [&](){ - T_ alpha = static_cast(1); - kernels::mul()( - x.NumElements(), alpha, x.data(), y.data(), z.data()); - }) + TEMPLATE_ALL_LAMBDA_2 ( + x.data_type (), + x.device_type (), + [&] () + { + T_ alpha = static_cast (1); + kernels::mul () (x.NumElements (), alpha, x.data (), y.data (), z.data ()); + }) } -template -void mul_op::operator()(const T& alpha, const container::Tensor &x, container::Tensor &y) { +template +void + mul_op::operator() (const T& alpha, const container::Tensor& x, container::Tensor& y) +{ // check the shape - REQUIRES_OK(x.shape() == y.shape(), - "mul: the shape of the two input Tensors must be the same") - REQUIRES_OK(x.data_type() == y.data_type(), - "mul: the data type of the two input Tensors must be the same") - REQUIRES_OK(x.device_type() == y.device_type(), - "mul: the device type of the two input Tensors must be the same") + REQUIRES_OK (x.shape () == y.shape (), "mul: the shape of the two input Tensors must be the same") + REQUIRES_OK (x.data_type () == y.data_type (), "mul: the data type of the two input Tensors must be the same") + REQUIRES_OK (x.device_type () == y.device_type (), "mul: the device type of the two input Tensors must be the same") // allocate memory for the result - TEMPLATE_ALL_LAMBDA_2(x.data_type(), x.device_type(), [&](){ - kernels::mul()( - x.NumElements(), alpha, x.data(), y.data()); - }) -} - -void div_op::operator()(const container::Tensor &x, const container::Tensor &y, container::Tensor &z) { -// check the shape - REQUIRES_OK(x.shape() == y.shape() && x.shape() == z.shape(), - "div: the shape of the two input Tensors must be the same") - REQUIRES_OK(x.data_type() == y.data_type() && x.data_type() == z.data_type(), - "div: the data type of the two input Tensors must be the same") - REQUIRES_OK(x.device_type() == y.device_type() && x.device_type() == z.device_type(), - "div: the device type of the two input Tensors must be the same") - // allocate memory for the result - TEMPLATE_ALL_LAMBDA_2(x.data_type(), x.device_type(), [&](){ - T_ alpha = static_cast(1); - kernels::div()( - x.NumElements(), alpha, x.data(), y.data(), z.data()); - }) + TEMPLATE_ALL_LAMBDA_2 (x.data_type (), + x.device_type (), + [&] () + { kernels::mul () (x.NumElements (), alpha, x.data (), y.data ()); }) } - -template -void transpose_op::operator()( - const Tensor& input, - const std::vector& perm, - Tensor& output) +void + div_op::operator() (const container::Tensor& x, const container::Tensor& y, container::Tensor& z) { - TEMPLATE_ALL_2(input.data_type(), input.device_type(), - kernels::transpose()( - perm, input.shape().dims(), output.shape().dims(), input.data(), output.data())) + // check the shape + REQUIRES_OK (x.shape () == y.shape () && x.shape () == z.shape (), + "div: the shape of the two input Tensors must be the same") + REQUIRES_OK (x.data_type () == y.data_type () && x.data_type () == z.data_type (), + "div: the data type of the two input Tensors must be the same") + REQUIRES_OK (x.device_type () == y.device_type () && x.device_type () == z.device_type (), + "div: the device type of the two input Tensors must be the same") + // allocate memory for the result + TEMPLATE_ALL_LAMBDA_2 ( + x.data_type (), + x.device_type (), + [&] () + { + T_ alpha = static_cast (1); + kernels::div () (x.NumElements (), alpha, x.data (), y.data (), z.data ()); + }) } - -void stride_op::operator()( - const Tensor& input, - const std::vector& stride, - Tensor& output) +template +void + transpose_op::operator() (const Tensor& input, const std::vector& perm, Tensor& output) { - TEMPLATE_ALL_2(input.data_type(), input.device_type(), - kernels::stride()( - stride, input.shape().dims(), output.shape().dims(), input.data(), output.data())) + TEMPLATE_ALL_2 (input.data_type (), + input.device_type (), + kernels::transpose () (perm, + input.shape ().dims (), + output.shape ().dims (), + input.data (), + output.data ())) } - -void inflate_op::operator()( - const Tensor& input, - const std::vector& inflate, - Tensor& output) +void + stride_op::operator() (const Tensor& input, const std::vector& stride, Tensor& output) { - TEMPLATE_ALL_2(input.data_type(), input.device_type(), - kernels::inflate()( - inflate, input.shape().dims(), output.shape().dims(), input.data(), output.data())) + TEMPLATE_ALL_2 (input.data_type (), + input.device_type (), + kernels::stride () (stride, + input.shape ().dims (), + output.shape ().dims (), + input.data (), + output.data ())) } +void + inflate_op::operator() (const Tensor& input, const std::vector& inflate, Tensor& output) +{ + TEMPLATE_ALL_2 (input.data_type (), + input.device_type (), + kernels::inflate () (inflate, + input.shape ().dims (), + output.shape ().dims (), + input.data (), + output.data ())) +} -void reduce_op::operator()( - const Tensor &input, - const int64_t &inner_most_dim, - Tensor &output) +void + reduce_op::operator() (const Tensor& input, const int64_t& inner_most_dim, Tensor& output) { - TEMPLATE_ALL_2(input.data_type(), input.device_type(), - kernels::reduce()( - output.NumElements(), inner_most_dim, input.data(), output.data())) + TEMPLATE_ALL_2 ( + input.data_type (), + input.device_type (), + kernels::reduce () (output.NumElements (), inner_most_dim, input.data (), output.data ())) } template struct transpose_op; -template void add_op::operator()(const float&, const container::Tensor&, const float&, const container::Tensor&, container::Tensor&); -template void add_op::operator()(const double&, const container::Tensor&, const double&, const container::Tensor&, container::Tensor&); -template void add_op::operator() >(const std::complex&, const container::Tensor&, const std::complex&, const container::Tensor&, container::Tensor&); -template void add_op::operator()>(const std::complex&, const container::Tensor&, const std::complex&, const container::Tensor&, container::Tensor&); +template void add_op::operator() (const float&, + const container::Tensor&, + const float&, + const container::Tensor&, + container::Tensor&); +template void add_op::operator() (const double&, + const container::Tensor&, + const double&, + const container::Tensor&, + container::Tensor&); +template void add_op::operator()> (const std::complex&, + const container::Tensor&, + const std::complex&, + const container::Tensor&, + container::Tensor&); +template void add_op::operator()> (const std::complex&, + const container::Tensor&, + const std::complex&, + const container::Tensor&, + container::Tensor&); -template void mul_op::operator()(const float&, const container::Tensor&, container::Tensor&); -template void mul_op::operator()(const double&, const container::Tensor&, container::Tensor&); -template void mul_op::operator() >(const std::complex&, const container::Tensor&, container::Tensor&); -template void mul_op::operator()>(const std::complex&, const container::Tensor&, container::Tensor&); +template void mul_op::operator() (const float&, const container::Tensor&, container::Tensor&); +template void mul_op::operator() (const double&, const container::Tensor&, container::Tensor&); +template void + mul_op::operator()> (const std::complex&, const container::Tensor&, container::Tensor&); +template void mul_op::operator()> (const std::complex&, + const container::Tensor&, + container::Tensor&); -} // namespace kernels +} // namespace op } // namespace container -ct::Tensor operator+(const ct::Tensor& self, const ct::Tensor& other) { +ct::Tensor + operator+ (const ct::Tensor& self, const ct::Tensor& other) +{ // check the shape - REQUIRES_OK(self.shape() == other.shape(), - "add: the shape of the two input Tensors must be the same") + REQUIRES_OK (self.shape () == other.shape (), "add: the shape of the two input Tensors must be the same") // allocate memory for the result - ct::Tensor result = ct::Tensor(self.data_type(), self.device_type(), self.shape()); - ct::op::add_op()(self, other, result); + ct::Tensor result = ct::Tensor (self.data_type (), self.device_type (), self.shape ()); + ct::op::add_op () (self, other, result); return result; } -ct::Tensor operator-(const ct::Tensor& self, const ct::Tensor& other) { +ct::Tensor + operator- (const ct::Tensor& self, const ct::Tensor& other) +{ // check the shape - REQUIRES_OK(self.shape() == other.shape(), - "add: the shape of the two input Tensors must be the same") - REQUIRES_OK(self.data_type() == other.data_type(), - "add: the data type of the two input Tensors must be the same") - REQUIRES_OK(self.device_type() == other.device_type(), - "add: the device type of the two input Tensors must be the same") - ct::Tensor result = ct::Tensor(self.data_type(), self.device_type(), self.shape()); + REQUIRES_OK (self.shape () == other.shape (), "add: the shape of the two input Tensors must be the same") + REQUIRES_OK (self.data_type () == other.data_type (), + "add: the data type of the two input Tensors must be the same") + REQUIRES_OK (self.device_type () == other.device_type (), + "add: the device type of the two input Tensors must be the same") + ct::Tensor result = ct::Tensor (self.data_type (), self.device_type (), self.shape ()); // allocate memory for the result - TEMPLATE_ALL_LAMBDA_2(self.data_type(), self.device_type(), [&](){ - T_ alpha = static_cast(1.0); - T_ beta = static_cast(-1.0); - ct::kernels::add()( - self.NumElements(), alpha, self.data(), beta, other.data(), result.data()); - }) + TEMPLATE_ALL_LAMBDA_2 (self.data_type (), + self.device_type (), + [&] () + { + T_ alpha = static_cast (1.0); + T_ beta = static_cast (-1.0); + ct::kernels::add () (self.NumElements (), + alpha, + self.data (), + beta, + other.data (), + result.data ()); + }) return result; } -ct::Tensor operator*(const ct::Tensor& self, const ct::Tensor& other) { +ct::Tensor + operator* (const ct::Tensor& self, const ct::Tensor& other) +{ // check the shape - REQUIRES_OK(self.shape() == other.shape(), - "mul: the shape of the two input Tensors must be the same") + REQUIRES_OK (self.shape () == other.shape (), "mul: the shape of the two input Tensors must be the same") // allocate memory for the result - ct::Tensor result = ct::Tensor(self.data_type(), self.device_type(), self.shape()); - ct::op::mul_op()(self, other, result); + ct::Tensor result = ct::Tensor (self.data_type (), self.device_type (), self.shape ()); + ct::op::mul_op () (self, other, result); return result; } -ct::Tensor operator/(const ct::Tensor& self, const ct::Tensor& other) { +ct::Tensor + operator/ (const ct::Tensor& self, const ct::Tensor& other) +{ // check the shape - REQUIRES_OK(self.shape() == other.shape(), - "div: the shape of the two input Tensors must be the same") + REQUIRES_OK (self.shape () == other.shape (), "div: the shape of the two input Tensors must be the same") // allocate memory for the result - ct::Tensor result = ct::Tensor(self.data_type(), self.device_type(), self.shape()); - ct::op::div_op()(self, other, result); + ct::Tensor result = ct::Tensor (self.data_type (), self.device_type (), self.shape ()); + ct::op::div_op () (self, other, result); return result; } -ct::Tensor& operator+=(ct::Tensor& self, const ct::Tensor& other) { +ct::Tensor& + operator+= (ct::Tensor& self, const ct::Tensor& other) +{ // check the shape - REQUIRES_OK(self.shape() == other.shape(), - "add: the shape of the two input Tensors must be the same") - ct::op::add_op()(self, other, self); + REQUIRES_OK (self.shape () == other.shape (), "add: the shape of the two input Tensors must be the same") + ct::op::add_op () (self, other, self); return self; } -ct::Tensor& operator-=(ct::Tensor& self, const ct::Tensor& other) { +ct::Tensor& + operator-= (ct::Tensor& self, const ct::Tensor& other) +{ // check the shape - REQUIRES_OK(self.shape() == other.shape(), - "add: the shape of the two input Tensors must be the same") - REQUIRES_OK(self.data_type() == other.data_type(), - "add: the data type of the two input Tensors must be the same") - REQUIRES_OK(self.device_type() == other.device_type(), - "add: the device type of the two input Tensors must be the same") + REQUIRES_OK (self.shape () == other.shape (), "add: the shape of the two input Tensors must be the same") + REQUIRES_OK (self.data_type () == other.data_type (), + "add: the data type of the two input Tensors must be the same") + REQUIRES_OK (self.device_type () == other.device_type (), + "add: the device type of the two input Tensors must be the same") // allocate memory for the result - TEMPLATE_ALL_LAMBDA_2(self.data_type(), self.device_type(), [&](){ - T_ alpha = static_cast(1.0); - T_ beta = static_cast(-1.0); - ct::kernels::add()( - self.NumElements(), alpha, self.data(), beta, other.data(), self.data()); - }) + TEMPLATE_ALL_LAMBDA_2 (self.data_type (), + self.device_type (), + [&] () + { + T_ alpha = static_cast (1.0); + T_ beta = static_cast (-1.0); + ct::kernels::add () (self.NumElements (), + alpha, + self.data (), + beta, + other.data (), + self.data ()); + }) return self; } -ct::Tensor& operator*=(ct::Tensor& self, const ct::Tensor& other) { +ct::Tensor& + operator*= (ct::Tensor& self, const ct::Tensor& other) +{ // check the shape - REQUIRES_OK(self.shape() == other.shape(), - "mul: the shape of the two input Tensors must be the same") - ct::op::mul_op()(self, other, self); + REQUIRES_OK (self.shape () == other.shape (), "mul: the shape of the two input Tensors must be the same") + ct::op::mul_op () (self, other, self); return self; } -ct::Tensor& operator/=(ct::Tensor& self, const ct::Tensor& other) { +ct::Tensor& + operator/= (ct::Tensor& self, const ct::Tensor& other) +{ // check the shape - REQUIRES_OK(self.shape() == other.shape(), - "div: the shape of the two input Tensors must be the same") - ct::op::div_op()(self, other, self); + REQUIRES_OK (self.shape () == other.shape (), "div: the shape of the two input Tensors must be the same") + ct::op::div_op () (self, other, self); return self; } \ No newline at end of file diff --git a/source/source_base/module_container/ATen/ops/linalg_op.h b/source/source_base/module_container/ATen/ops/linalg_op.h index f397fc32298..f75d2c1dd80 100644 --- a/source/source_base/module_container/ATen/ops/linalg_op.h +++ b/source/source_base/module_container/ATen/ops/linalg_op.h @@ -5,8 +5,10 @@ #include -namespace container { -namespace op { +namespace container +{ +namespace op +{ /** * @brief A functor to perform add operation on a Tensor. @@ -14,7 +16,8 @@ namespace op { * This functor adds two Tensors element-wise, resulting in a new Tensor with the same * shape as the input Tensors. */ -struct add_op { +struct add_op +{ /** * @brief Perform add operation on the input Tensors. * @@ -26,45 +29,31 @@ struct add_op { * @param z The output Tensor that will hold the result of the add operation. * It must have the same shape as the input Tensors. */ - void operator()( - const Tensor& x, - const Tensor& y, - Tensor& z); + void operator() (const Tensor& x, const Tensor& y, Tensor& z); template - void operator()( - const T& alpha, - const Tensor& x, - const T& beta, - const Tensor& y, - Tensor& z); + void operator() (const T& alpha, const Tensor& x, const T& beta, const Tensor& y, Tensor& z); }; -struct mul_op { +struct mul_op +{ // z = x * y - void operator()( - const Tensor& x, - const Tensor& y, - Tensor& z); + void operator() (const Tensor& x, const Tensor& y, Tensor& z); // y = alpha * x template - void operator()( - const T& alpha, - const Tensor& x, - Tensor& y); + void operator() (const T& alpha, const Tensor& x, Tensor& y); }; -struct div_op { +struct div_op +{ // z = x / y - void operator()( - const Tensor& x, - const Tensor& y, - Tensor& z); + void operator() (const Tensor& x, const Tensor& y, Tensor& z); }; template -struct transpose_op { +struct transpose_op +{ /** * @brief Perform the transpose operation on the input tensor. * @@ -84,10 +73,7 @@ struct transpose_op { * the output tensor is not pre-allocated with the correct shape, the * function will return false. */ - void operator()( - const Tensor& input, - const std::vector& permutation, - Tensor& output); + void operator() (const Tensor& input, const std::vector& permutation, Tensor& output); }; /** @@ -100,7 +86,8 @@ struct transpose_op { * @tparam T The data type of the Tensor. * @tparam Device The execution device (e.g., CPU or GPU). */ -struct stride_op { +struct stride_op +{ /** * @brief Perform stride operation on the input Tensor. * @@ -114,10 +101,7 @@ struct stride_op { * @param output The output Tensor that will hold the result of the stride operation. * It must have the appropriate size to store the selected elements. */ - void operator()( - const Tensor& input, - const std::vector& stride, - Tensor& output); + void operator() (const Tensor& input, const std::vector& stride, Tensor& output); }; /** @@ -125,7 +109,8 @@ struct stride_op { * * This struct defines a functor that can be used to inflate a tensor using the specified stride. */ -struct inflate_op { +struct inflate_op +{ /** * @brief Inflate the input tensor. * @@ -135,30 +120,24 @@ struct inflate_op { * @param stride The stride to use for inflation. * @param output The output tensor where the inflated data will be stored. */ - void operator()( - const Tensor& input, - const std::vector& stride, - Tensor& output); + void operator() (const Tensor& input, const std::vector& stride, Tensor& output); }; - -struct reduce_op { - void operator()( - const Tensor& input, - const int64_t& inner_most_dim, - Tensor& output); +struct reduce_op +{ + void operator() (const Tensor& input, const int64_t& inner_most_dim, Tensor& output); }; } // namespace op } // namespace container -ct::Tensor operator+(const ct::Tensor& self, const ct::Tensor& other); -ct::Tensor operator-(const ct::Tensor& self, const ct::Tensor& other); -ct::Tensor operator*(const ct::Tensor& self, const ct::Tensor& other); -ct::Tensor operator/(const ct::Tensor& self, const ct::Tensor& other); -ct::Tensor& operator+=(ct::Tensor& self, const ct::Tensor& other); -ct::Tensor& operator-=(ct::Tensor& self, const ct::Tensor& other); -ct::Tensor& operator*=(ct::Tensor& self, const ct::Tensor& other); -ct::Tensor& operator/=(ct::Tensor& self, const ct::Tensor& other); +ct::Tensor operator+ (const ct::Tensor& self, const ct::Tensor& other); +ct::Tensor operator- (const ct::Tensor& self, const ct::Tensor& other); +ct::Tensor operator* (const ct::Tensor& self, const ct::Tensor& other); +ct::Tensor operator/ (const ct::Tensor& self, const ct::Tensor& other); +ct::Tensor& operator+= (ct::Tensor& self, const ct::Tensor& other); +ct::Tensor& operator-= (ct::Tensor& self, const ct::Tensor& other); +ct::Tensor& operator*= (ct::Tensor& self, const ct::Tensor& other); +ct::Tensor& operator/= (ct::Tensor& self, const ct::Tensor& other); #endif // ATEN_OPS_LINALG_H_ \ No newline at end of file diff --git a/source/source_base/module_container/ATen/ops/test/einsum_op_test.cpp b/source/source_base/module_container/ATen/ops/test/einsum_op_test.cpp index efec0072f0c..2dc511c51cc 100644 --- a/source/source_base/module_container/ATen/ops/test/einsum_op_test.cpp +++ b/source/source_base/module_container/ATen/ops/test/einsum_op_test.cpp @@ -4,359 +4,545 @@ #include #include - -namespace container { -namespace op { +namespace container +{ +namespace op +{ template -class EinsumOpTest : public testing::Test { -public: - EinsumOpTest() { - base::utils::init_blas_handle(); - base::utils::init_cusolver_handle(); +class EinsumOpTest : public testing::Test +{ + public: + EinsumOpTest () + { + base::utils::init_blas_handle (); + base::utils::init_cusolver_handle (); } - ~EinsumOpTest() override { - base::utils::delete_blas_handle(); - base::utils::delete_cusolver_handle(); + ~EinsumOpTest () override + { + base::utils::delete_blas_handle (); + base::utils::delete_cusolver_handle (); } }; -TYPED_TEST_SUITE(EinsumOpTest, base::utils::Types); +TYPED_TEST_SUITE (EinsumOpTest, base::utils::Types); -TYPED_TEST(EinsumOpTest, Transform) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, Transform) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - A.reshape({-1, dim}); - Tensor expected = std::move(Tensor( - {static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(2.0), static_cast(4.0), static_cast(0.0), - static_cast(3.0), static_cast(5.0), static_cast(6.0)}).to_device()); - expected.reshape({-1, dim}); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + A.reshape ({-1, dim}); + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (2.0), + static_cast (4.0), + static_cast (0.0), + static_cast (3.0), + static_cast (5.0), + static_cast (6.0)}) + .to_device ()); + expected.reshape ({-1, dim}); // const Tensor expected = std::move(Tensor({static_cast(21.0)}).to_device()); - Tensor A_transformed = op::einsum("ij->ji", A); - EXPECT_EQ(A_transformed, expected); + Tensor A_transformed = op::einsum ("ij->ji", A); + EXPECT_EQ (A_transformed, expected); } -TYPED_TEST(EinsumOpTest, Reduce) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, Reduce) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - A.reshape({-1, dim}); - Tensor expected_1 = std::move(Tensor( - {static_cast(6.0), static_cast(9.0), static_cast(6.0)}).to_device()); - Tensor expected_2 = std::move(Tensor( - {static_cast(1.0), static_cast(6.0), static_cast(14.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + A.reshape ({-1, dim}); + Tensor expected_1 = std::move ( + Tensor ({static_cast (6.0), static_cast (9.0), static_cast (6.0)}).to_device ()); + Tensor expected_2 = std::move ( + Tensor ({static_cast (1.0), static_cast (6.0), static_cast (14.0)}).to_device ()); // const Tensor expected = std::move(Tensor({static_cast(21.0)}).to_device()); // Case 1: Normal reduction - Tensor A_reduced = op::einsum("ij->i", A); - EXPECT_EQ(A_reduced, expected_1); + Tensor A_reduced = op::einsum ("ij->i", A); + EXPECT_EQ (A_reduced, expected_1); // Case 2: Transpose reduction - A_reduced = op::einsum("ij->j", A); - EXPECT_EQ(A_reduced, expected_2); + A_reduced = op::einsum ("ij->j", A); + EXPECT_EQ (A_reduced, expected_2); // Case 3: All reduction - A_reduced = op::einsum("ij->", A); - EXPECT_EQ(A_reduced, Tensor({static_cast(21.0)}).to_device()); + A_reduced = op::einsum ("ij->", A); + EXPECT_EQ (A_reduced, Tensor ({static_cast (21.0)}).to_device ()); } -TYPED_TEST(EinsumOpTest, Stride) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, Stride) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - A.reshape({-1, dim}); - Tensor expected = std::move(Tensor( - {static_cast(1.0), static_cast(4.0), static_cast(6.0)}).to_device()); - - Tensor A_strided = op::einsum("ii->i", A); - EXPECT_EQ(A_strided, expected); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + A.reshape ({-1, dim}); + Tensor expected = std::move ( + Tensor ({static_cast (1.0), static_cast (4.0), static_cast (6.0)}).to_device ()); + + Tensor A_strided = op::einsum ("ii->i", A); + EXPECT_EQ (A_strided, expected); } -TYPED_TEST(EinsumOpTest, Inflate) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, Inflate) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(4.0), static_cast(6.0)}).to_device()); - Tensor expected = std::move(Tensor( - {static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(4.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - expected.reshape({-1, dim}); - - Tensor A_inflated = op::einsum("i->ii", A); - EXPECT_EQ(A_inflated, expected); + Tensor A = std::move ( + Tensor ({static_cast (1.0), static_cast (4.0), static_cast (6.0)}).to_device ()); + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (4.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + expected.reshape ({-1, dim}); + + Tensor A_inflated = op::einsum ("i->ii", A); + EXPECT_EQ (A_inflated, expected); } -TYPED_TEST(EinsumOpTest, ContractDot) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, ContractDot) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int n = 4; - const Tensor x = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), static_cast(4.0)}).to_device()); - const Tensor y = std::move(Tensor({static_cast(4.0), static_cast(3.0), static_cast(2.0), static_cast(1.0)}).to_device()); - - const Tensor expected = std::move(Tensor({static_cast(20.0)}).to_device()); + const Tensor x = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0), static_cast (4.0)}) + .to_device ()); + const Tensor y = std::move ( + Tensor ({static_cast (4.0), static_cast (3.0), static_cast (2.0), static_cast (1.0)}) + .to_device ()); + + const Tensor expected = std::move (Tensor ({static_cast (20.0)}).to_device ()); - Tensor z = op::einsum("i,i->", x, y); - EXPECT_EQ(z, expected); + Tensor z = op::einsum ("i,i->", x, y); + EXPECT_EQ (z, expected); } -TYPED_TEST(EinsumOpTest, ContractGemv) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, ContractGemv) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int m = 2, n = 4; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), static_cast(7.0), static_cast(8.0)}).to_device()); - A.reshape({m, n}); - const Tensor x1 = std::move(Tensor( - {static_cast(4.0), static_cast(3.0), static_cast(2.0), static_cast(1.0)}).to_device()); - const Tensor x2 = std::move(Tensor( - {static_cast(1.0), static_cast(2.0)}).to_device()); - - const Tensor expected_1 = std::move(Tensor( - {static_cast(20.0),static_cast(60.0)}).to_device()); - const Tensor expected_2 = std::move(Tensor( - {static_cast(11.0),static_cast(14.0),static_cast(17.0), static_cast(20.0)}).to_device()); - - Tensor y = op::einsum("ij,j->i", A, x1); - EXPECT_EQ(y, expected_1); - y = op::einsum("ij,i->j", A, x2); - EXPECT_EQ(y, expected_2); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0)}) + .to_device ()); + A.reshape ({m, n}); + const Tensor x1 = std::move ( + Tensor ({static_cast (4.0), static_cast (3.0), static_cast (2.0), static_cast (1.0)}) + .to_device ()); + const Tensor x2 = std::move (Tensor ({static_cast (1.0), static_cast (2.0)}).to_device ()); + + const Tensor expected_1 + = std::move (Tensor ({static_cast (20.0), static_cast (60.0)}).to_device ()); + const Tensor expected_2 = std::move ( + Tensor ( + {static_cast (11.0), static_cast (14.0), static_cast (17.0), static_cast (20.0)}) + .to_device ()); + + Tensor y = op::einsum ("ij,j->i", A, x1); + EXPECT_EQ (y, expected_1); + y = op::einsum ("ij,i->j", A, x2); + EXPECT_EQ (y, expected_2); } -TYPED_TEST(EinsumOpTest, ContractGemm) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, ContractGemm) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int m = 2, k = 4, n = 2; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), static_cast(7.0), static_cast(8.0)}).to_device()); - A.reshape({m, k}); - Tensor B = std::move(Tensor({static_cast(1.0), static_cast(2.0), - static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0)}).to_device()); - B.reshape({k, n}); - Tensor expected_1 = std::move(Tensor( - {static_cast(50.0), static_cast(60.0), - static_cast(114.0),static_cast(140.0)}).to_device()); - expected_1.reshape({m, n}); - Tensor expected_2 = std::move(Tensor( - {static_cast(11.0), static_cast(23.0), static_cast(35.0), static_cast(47.0), - static_cast(14.0), static_cast(30.0), static_cast(46.0), static_cast(62.0), - static_cast(17.0), static_cast(37.0), static_cast(57.0), static_cast(77.0), - static_cast(20.0), static_cast(44.0), static_cast(68.0), static_cast(92.0)}).to_device()); - expected_2.reshape({k, k}); - - Tensor C = op::einsum("ij,jk->ik", A, B); - EXPECT_EQ(C, expected_1); - C = op::einsum("ij,ki->jk", A, B); - EXPECT_EQ(C, expected_2); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0)}) + .to_device ()); + A.reshape ({m, k}); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0)}) + .to_device ()); + B.reshape ({k, n}); + Tensor expected_1 = std::move ( + Tensor ( + {static_cast (50.0), static_cast (60.0), static_cast (114.0), static_cast (140.0)}) + .to_device ()); + expected_1.reshape ({m, n}); + Tensor expected_2 = std::move (Tensor ({static_cast (11.0), + static_cast (23.0), + static_cast (35.0), + static_cast (47.0), + static_cast (14.0), + static_cast (30.0), + static_cast (46.0), + static_cast (62.0), + static_cast (17.0), + static_cast (37.0), + static_cast (57.0), + static_cast (77.0), + static_cast (20.0), + static_cast (44.0), + static_cast (68.0), + static_cast (92.0)}) + .to_device ()); + expected_2.reshape ({k, k}); + + Tensor C = op::einsum ("ij,jk->ik", A, B); + EXPECT_EQ (C, expected_1); + C = op::einsum ("ij,ki->jk", A, B); + EXPECT_EQ (C, expected_2); } -TYPED_TEST(EinsumOpTest, TransformEllipsis) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, TransformEllipsis) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0), - static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - A.reshape({-1, dim, dim}); - Tensor expected = std::move(Tensor( - {static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(2.0), static_cast(4.0), static_cast(0.0), - static_cast(3.0), static_cast(5.0), static_cast(6.0), - static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(2.0), static_cast(4.0), static_cast(0.0), - static_cast(3.0), static_cast(5.0), static_cast(6.0)}).to_device()); - expected.reshape({-1, dim, dim}); - Tensor expected_ellipsis = std::move(Tensor( - {static_cast(1.0), static_cast(1.0), - static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), - static_cast(2.0), static_cast(2.0), - static_cast(4.0), static_cast(4.0), - static_cast(0.0), static_cast(0.0), - static_cast(3.0), static_cast(3.0), - static_cast(5.0), static_cast(5.0), - static_cast(6.0), static_cast(6.0)}).to_device()); - expected_ellipsis.reshape({dim, dim, -1}); - - Tensor A_transformed = op::einsum("ijk->ikj", A); - EXPECT_EQ(A_transformed, expected); - Tensor A_transformed_ellipsis = op::einsum("...ij->...ji", A); - EXPECT_EQ(A_transformed_ellipsis, expected); - A_transformed_ellipsis = op::einsum("i...j->j...i", A); - EXPECT_EQ(A_transformed_ellipsis, expected_ellipsis); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0), + static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + A.reshape ({-1, dim, dim}); + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (2.0), + static_cast (4.0), + static_cast (0.0), + static_cast (3.0), + static_cast (5.0), + static_cast (6.0), + static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (2.0), + static_cast (4.0), + static_cast (0.0), + static_cast (3.0), + static_cast (5.0), + static_cast (6.0)}) + .to_device ()); + expected.reshape ({-1, dim, dim}); + Tensor expected_ellipsis = std::move (Tensor ({static_cast (1.0), + static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (2.0), + static_cast (2.0), + static_cast (4.0), + static_cast (4.0), + static_cast (0.0), + static_cast (0.0), + static_cast (3.0), + static_cast (3.0), + static_cast (5.0), + static_cast (5.0), + static_cast (6.0), + static_cast (6.0)}) + .to_device ()); + expected_ellipsis.reshape ({dim, dim, -1}); + + Tensor A_transformed = op::einsum ("ijk->ikj", A); + EXPECT_EQ (A_transformed, expected); + Tensor A_transformed_ellipsis = op::einsum ("...ij->...ji", A); + EXPECT_EQ (A_transformed_ellipsis, expected); + A_transformed_ellipsis = op::einsum ("i...j->j...i", A); + EXPECT_EQ (A_transformed_ellipsis, expected_ellipsis); } -TYPED_TEST(EinsumOpTest, ReduceEllipsis) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, ReduceEllipsis) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0), - static_cast(0.0), static_cast(10.0),static_cast(11.0), - static_cast(0.0), static_cast(0.0), static_cast(12.0)}).to_device()); - A.reshape({-1, dim, dim}); - Tensor expected_1 = std::move(Tensor( - {static_cast(6.0), static_cast(9.0), static_cast(6.0), - static_cast(24.0),static_cast(21.0),static_cast(12.0)}).to_device()); - expected_1.reshape({-1, dim}); - Tensor expected_2 = std::move(Tensor( - {static_cast(1.0), static_cast(6.0), static_cast(14.0), - static_cast(7.0), static_cast(18.0),static_cast(32.0)}).to_device()); - expected_2.reshape({-1, dim}); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (9.0), + static_cast (0.0), + static_cast (10.0), + static_cast (11.0), + static_cast (0.0), + static_cast (0.0), + static_cast (12.0)}) + .to_device ()); + A.reshape ({-1, dim, dim}); + Tensor expected_1 = std::move (Tensor ({static_cast (6.0), + static_cast (9.0), + static_cast (6.0), + static_cast (24.0), + static_cast (21.0), + static_cast (12.0)}) + .to_device ()); + expected_1.reshape ({-1, dim}); + Tensor expected_2 = std::move (Tensor ({static_cast (1.0), + static_cast (6.0), + static_cast (14.0), + static_cast (7.0), + static_cast (18.0), + static_cast (32.0)}) + .to_device ()); + expected_2.reshape ({-1, dim}); // Case 1: Normal reduction - Tensor A_reduced = op::einsum("ijk->ij", A); - EXPECT_EQ(A_reduced, expected_1); - Tensor A_reduced_ellipsis = op::einsum("...i->...", A); - EXPECT_EQ(A_reduced_ellipsis, expected_1); + Tensor A_reduced = op::einsum ("ijk->ij", A); + EXPECT_EQ (A_reduced, expected_1); + Tensor A_reduced_ellipsis = op::einsum ("...i->...", A); + EXPECT_EQ (A_reduced_ellipsis, expected_1); // Case 2: Transpose reduction - A_reduced = op::einsum("ijk->ik", A); - EXPECT_EQ(A_reduced, expected_2); - A_reduced_ellipsis = op::einsum("...jk->...k", A); - EXPECT_EQ(A_reduced_ellipsis, expected_2); + A_reduced = op::einsum ("ijk->ik", A); + EXPECT_EQ (A_reduced, expected_2); + A_reduced_ellipsis = op::einsum ("...jk->...k", A); + EXPECT_EQ (A_reduced_ellipsis, expected_2); // Case 3: All reduction - A_reduced = op::einsum("ijk->", A); - EXPECT_EQ(A_reduced, Tensor({static_cast(78.0)}).to_device()); + A_reduced = op::einsum ("ijk->", A); + EXPECT_EQ (A_reduced, Tensor ({static_cast (78.0)}).to_device ()); // Not available // A_reduced = op::einsum("...->", A); } -TYPED_TEST(EinsumOpTest, StrideEllipsis) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, StrideEllipsis) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), static_cast(9.0), - static_cast(0.0), static_cast(10.0),static_cast(11.0), - static_cast(0.0), static_cast(0.0), static_cast(12.0), - static_cast(13.0),static_cast(14.0),static_cast(15.0), - static_cast(0.0), static_cast(16.0),static_cast(17.0), - static_cast(0.0), static_cast(0.0), static_cast(18.0)}).to_device()); - A.reshape({-1, dim, dim}); - Tensor expected_1 = std::move(Tensor( - {static_cast(1.0), static_cast(4.0), static_cast(6.0), - static_cast(7.0), static_cast(10.0),static_cast(12.0), - static_cast(13.0),static_cast(16.0),static_cast(18.0)}).to_device()); - expected_1.reshape({-1, dim}); - Tensor expected_2 = std::move(Tensor( - {static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(10.0),static_cast(11.0), - static_cast(0.0), static_cast(0.0), static_cast(18.0)}).to_device()); - expected_2.reshape({-1, dim}); + Tensor A = std::move ( + Tensor ({static_cast (1.0), static_cast (2.0), static_cast (3.0), static_cast (0.0), + static_cast (4.0), static_cast (5.0), static_cast (0.0), static_cast (0.0), + static_cast (6.0), static_cast (7.0), static_cast (8.0), static_cast (9.0), + static_cast (0.0), static_cast (10.0), static_cast (11.0), static_cast (0.0), + static_cast (0.0), static_cast (12.0), static_cast (13.0), static_cast (14.0), + static_cast (15.0), static_cast (0.0), static_cast (16.0), static_cast (17.0), + static_cast (0.0), static_cast (0.0), static_cast (18.0)}) + .to_device ()); + A.reshape ({-1, dim, dim}); + Tensor expected_1 = std::move (Tensor ({static_cast (1.0), + static_cast (4.0), + static_cast (6.0), + static_cast (7.0), + static_cast (10.0), + static_cast (12.0), + static_cast (13.0), + static_cast (16.0), + static_cast (18.0)}) + .to_device ()); + expected_1.reshape ({-1, dim}); + Tensor expected_2 = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (10.0), + static_cast (11.0), + static_cast (0.0), + static_cast (0.0), + static_cast (18.0)}) + .to_device ()); + expected_2.reshape ({-1, dim}); // Case 1: - Tensor A_strided = op::einsum("ijj->ij", A); - EXPECT_EQ(A_strided, expected_1); - Tensor A_strided_ellipsis = op::einsum("...jj->...j", A); - EXPECT_EQ(A_strided_ellipsis, expected_1); + Tensor A_strided = op::einsum ("ijj->ij", A); + EXPECT_EQ (A_strided, expected_1); + Tensor A_strided_ellipsis = op::einsum ("...jj->...j", A); + EXPECT_EQ (A_strided_ellipsis, expected_1); // Case 2: - A_strided = op::einsum("iij->ij", A); - EXPECT_EQ(A_strided, expected_2); - A_strided_ellipsis = op::einsum("ii...->i...", A); - EXPECT_EQ(A_strided_ellipsis, expected_2); - A_strided_ellipsis = op::einsum("iij...->ij...", A); - EXPECT_EQ(A_strided_ellipsis, expected_2); - A_strided_ellipsis = op::einsum("...iij->ij...", A); - EXPECT_EQ(A_strided_ellipsis, expected_2); + A_strided = op::einsum ("iij->ij", A); + EXPECT_EQ (A_strided, expected_2); + A_strided_ellipsis = op::einsum ("ii...->i...", A); + EXPECT_EQ (A_strided_ellipsis, expected_2); + A_strided_ellipsis = op::einsum ("iij...->ij...", A); + EXPECT_EQ (A_strided_ellipsis, expected_2); + A_strided_ellipsis = op::einsum ("...iij->ij...", A); + EXPECT_EQ (A_strided_ellipsis, expected_2); } -TYPED_TEST(EinsumOpTest, InflateEllipsis) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, InflateEllipsis) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(4.0), static_cast(6.0)}).to_device()); - Tensor expected = std::move(Tensor( - {static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(4.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - expected.reshape({-1, dim, dim}); - - Tensor A_inflated = op::einsum("i->iii", A); - EXPECT_EQ(A_inflated, expected); - Tensor A_inflated_ellipsis = op::einsum("...i->...iii", A); - EXPECT_EQ(A_inflated_ellipsis, expected); + Tensor A = std::move ( + Tensor ({static_cast (1.0), static_cast (4.0), static_cast (6.0)}).to_device ()); + Tensor expected = std::move ( + Tensor ({static_cast (1.0), static_cast (0.0), static_cast (0.0), static_cast (0.0), + static_cast (0.0), static_cast (0.0), static_cast (0.0), static_cast (0.0), + static_cast (0.0), static_cast (0.0), static_cast (0.0), static_cast (0.0), + static_cast (0.0), static_cast (4.0), static_cast (0.0), static_cast (0.0), + static_cast (0.0), static_cast (0.0), static_cast (0.0), static_cast (0.0), + static_cast (0.0), static_cast (0.0), static_cast (0.0), static_cast (0.0), + static_cast (0.0), static_cast (0.0), static_cast (6.0)}) + .to_device ()); + expected.reshape ({-1, dim, dim}); + + Tensor A_inflated = op::einsum ("i->iii", A); + EXPECT_EQ (A_inflated, expected); + Tensor A_inflated_ellipsis = op::einsum ("...i->...iii", A); + EXPECT_EQ (A_inflated_ellipsis, expected); } -TYPED_TEST(EinsumOpTest, ContractGemmEllipsis) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (EinsumOpTest, ContractGemmEllipsis) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; const int m = 2, k = 4, n = 2, batch_size = 2; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), static_cast(7.0), static_cast(8.0), - static_cast(1.0), static_cast(2.0), static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), static_cast(7.0), static_cast(8.0)}).to_device()); - A.reshape({batch_size, m, k}); - Tensor B = std::move(Tensor({static_cast(1.0), static_cast(2.0), - static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0)}).to_device()); - B.reshape({k, n}); - Tensor expected = std::move(Tensor( - {static_cast(50.0), static_cast(60.0), - static_cast(114.0),static_cast(140.0), - static_cast(50.0), static_cast(60.0), - static_cast(114.0),static_cast(140.0)}).to_device()); - expected.reshape({batch_size, m, n}); - - Tensor C = op::einsum("ijk,...kl->i...jl", A, B); - EXPECT_EQ(C, expected); - - B = std::move(Tensor({ static_cast(1.0), static_cast(2.0), - static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0), - static_cast(1.0), static_cast(2.0), - static_cast(3.0), static_cast(4.0), - static_cast(5.0), static_cast(6.0), - static_cast(7.0), static_cast(8.0)}).to_device()); - - B.reshape({batch_size, k, n}); - C = op::einsum("ijk,ikl->ijl", A, B); - EXPECT_EQ(C, expected); - C = op::einsum("...jk,...kl->...jl", A, B); - EXPECT_EQ(C, expected); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0)}) + .to_device ()); + A.reshape ({batch_size, m, k}); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0)}) + .to_device ()); + B.reshape ({k, n}); + Tensor expected = std::move (Tensor ({static_cast (50.0), + static_cast (60.0), + static_cast (114.0), + static_cast (140.0), + static_cast (50.0), + static_cast (60.0), + static_cast (114.0), + static_cast (140.0)}) + .to_device ()); + expected.reshape ({batch_size, m, n}); + + Tensor C = op::einsum ("ijk,...kl->i...jl", A, B); + EXPECT_EQ (C, expected); + + B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0), + static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (4.0), + static_cast (5.0), + static_cast (6.0), + static_cast (7.0), + static_cast (8.0)}) + .to_device ()); + + B.reshape ({batch_size, k, n}); + C = op::einsum ("ijk,ikl->ijl", A, B); + EXPECT_EQ (C, expected); + C = op::einsum ("...jk,...kl->...jl", A, B); + EXPECT_EQ (C, expected); } } // namespace op diff --git a/source/source_base/module_container/ATen/ops/test/linalg_op_test.cpp b/source/source_base/module_container/ATen/ops/test/linalg_op_test.cpp index 610daec816f..386b45ce600 100644 --- a/source/source_base/module_container/ATen/ops/test/linalg_op_test.cpp +++ b/source/source_base/module_container/ATen/ops/test/linalg_op_test.cpp @@ -4,271 +4,434 @@ #include #include -namespace container { -namespace op { +namespace container +{ +namespace op +{ template -class LinalgOpTest : public testing::Test { -public: - LinalgOpTest() = default; - ~LinalgOpTest() override = default; +class LinalgOpTest : public testing::Test +{ + public: + LinalgOpTest () = default; + ~LinalgOpTest () override = default; }; -TYPED_TEST_SUITE(LinalgOpTest, base::utils::Types); +TYPED_TEST_SUITE (LinalgOpTest, base::utils::Types); -TYPED_TEST(LinalgOpTest, Add) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, Add) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::add_op addCalculator; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - Tensor B = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); Tensor C = A; - C.zero(); - - Tensor expected = std::move(Tensor( - {static_cast(2.0), static_cast(4.0), static_cast(6.0), - static_cast(0.0), static_cast(8.0), static_cast(10.0), - static_cast(0.0), static_cast(0.0), static_cast(12.0)}).to_device()); - addCalculator(A, B, C); - EXPECT_EQ(C, expected); - - C.zero(); + C.zero (); + + Tensor expected = std::move (Tensor ({static_cast (2.0), + static_cast (4.0), + static_cast (6.0), + static_cast (0.0), + static_cast (8.0), + static_cast (10.0), + static_cast (0.0), + static_cast (0.0), + static_cast (12.0)}) + .to_device ()); + addCalculator (A, B, C); + EXPECT_EQ (C, expected); + + C.zero (); C = A + B; - EXPECT_EQ(C, expected); + EXPECT_EQ (C, expected); A += B; - EXPECT_EQ(A, expected); + EXPECT_EQ (A, expected); - C.zero(); + C.zero (); C = expected - B; C -= B; - expected.zero(); - EXPECT_EQ(C, expected); + expected.zero (); + EXPECT_EQ (C, expected); } -TYPED_TEST(LinalgOpTest, Sub) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, Sub) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::add_op addCalculator; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - Tensor B = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); Tensor C = A; - C.zero(); - - Tensor expected = std::move(Tensor( - {static_cast(2.0), static_cast(4.0), static_cast(6.0), - static_cast(0.0), static_cast(8.0), static_cast(10.0), - static_cast(0.0), static_cast(0.0), static_cast(12.0)}).to_device()); + C.zero (); + + Tensor expected = std::move (Tensor ({static_cast (2.0), + static_cast (4.0), + static_cast (6.0), + static_cast (0.0), + static_cast (8.0), + static_cast (10.0), + static_cast (0.0), + static_cast (0.0), + static_cast (12.0)}) + .to_device ()); C = expected - B; C -= B; - expected.zero(); - EXPECT_EQ(C, expected); + expected.zero (); + EXPECT_EQ (C, expected); } -TYPED_TEST(LinalgOpTest, AddScalar) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, AddScalar) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::add_op addCalculator; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - Tensor B = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); Tensor C = A; - C.zero(); - - Tensor expected = std::move(Tensor({static_cast(-1.0), static_cast(-2.0), static_cast(-3.0), - static_cast(0.0), static_cast(-4.0), static_cast(-5.0), - static_cast(0.0), static_cast(0.0), static_cast(-6.0)}).to_device()); - - auto alpha = static_cast(2.0); - auto beta = static_cast(-3.0); - - addCalculator(alpha, A, beta, B, C); - EXPECT_EQ(C, expected); + C.zero (); + + Tensor expected = std::move (Tensor ({static_cast (-1.0), + static_cast (-2.0), + static_cast (-3.0), + static_cast (0.0), + static_cast (-4.0), + static_cast (-5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (-6.0)}) + .to_device ()); + + auto alpha = static_cast (2.0); + auto beta = static_cast (-3.0); + + addCalculator (alpha, A, beta, B, C); + EXPECT_EQ (C, expected); } -TYPED_TEST(LinalgOpTest, Mul) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, Mul) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::mul_op mulCalculator; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - - Tensor B = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); Tensor C = A; - C.zero(); - Tensor expected = std::move(Tensor({ - static_cast(1.0), static_cast(4.0), static_cast(9.0), - static_cast(0.0), static_cast(16.0),static_cast(25.0), - static_cast(0.0), static_cast(0.0), static_cast(36.0)}).to_device()); - - mulCalculator(A, B, C); - EXPECT_EQ(C, expected); - - C.zero(); + C.zero (); + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (4.0), + static_cast (9.0), + static_cast (0.0), + static_cast (16.0), + static_cast (25.0), + static_cast (0.0), + static_cast (0.0), + static_cast (36.0)}) + .to_device ()); + + mulCalculator (A, B, C); + EXPECT_EQ (C, expected); + + C.zero (); C = A * B; - EXPECT_EQ(C, expected); + EXPECT_EQ (C, expected); A *= B; - EXPECT_EQ(A, expected); + EXPECT_EQ (A, expected); } -TYPED_TEST(LinalgOpTest, MulScalar) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, MulScalar) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::mul_op mulCalculator; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); Tensor C = A; - C.zero(); - Tensor expected = std::move(Tensor({ - static_cast(2.0), static_cast(4.0), static_cast(6.0), - static_cast(0.0), static_cast(8.0),static_cast(10.0), - static_cast(0.0), static_cast(0.0), static_cast(12.0)}).to_device()); - - auto alpha = static_cast(2.0); - mulCalculator(alpha, A, C); - EXPECT_EQ(C, expected); + C.zero (); + Tensor expected = std::move (Tensor ({static_cast (2.0), + static_cast (4.0), + static_cast (6.0), + static_cast (0.0), + static_cast (8.0), + static_cast (10.0), + static_cast (0.0), + static_cast (0.0), + static_cast (12.0)}) + .to_device ()); + + auto alpha = static_cast (2.0); + mulCalculator (alpha, A, C); + EXPECT_EQ (C, expected); } -TYPED_TEST(LinalgOpTest, Div) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, Div) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::div_op divCalculator; - Tensor A = std::move(Tensor({static_cast(2.0), static_cast(4.0), static_cast(6.0), - static_cast(0.0), static_cast(8.0), static_cast(10.0), - static_cast(0.0), static_cast(0.0), static_cast(12.0)}).to_device()); - - Tensor B = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(1.0), static_cast(4.0), static_cast(5.0), - static_cast(1.0), static_cast(1.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (2.0), + static_cast (4.0), + static_cast (6.0), + static_cast (0.0), + static_cast (8.0), + static_cast (10.0), + static_cast (0.0), + static_cast (0.0), + static_cast (12.0)}) + .to_device ()); + + Tensor B = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (1.0), + static_cast (4.0), + static_cast (5.0), + static_cast (1.0), + static_cast (1.0), + static_cast (6.0)}) + .to_device ()); Tensor C = A; - C.zero(); - Tensor expected = std::move(Tensor({ - static_cast(2.0), static_cast(2.0), static_cast(2.0), - static_cast(0.0), static_cast(2.0), static_cast(2.0), - static_cast(0.0), static_cast(0.0), static_cast(2.0)}).to_device()); - - divCalculator(A, B, C); - EXPECT_EQ(C, expected); - - C.zero(); + C.zero (); + Tensor expected = std::move (Tensor ({static_cast (2.0), + static_cast (2.0), + static_cast (2.0), + static_cast (0.0), + static_cast (2.0), + static_cast (2.0), + static_cast (0.0), + static_cast (0.0), + static_cast (2.0)}) + .to_device ()); + + divCalculator (A, B, C); + EXPECT_EQ (C, expected); + + C.zero (); C = A / B; - EXPECT_EQ(C, expected); + EXPECT_EQ (C, expected); A /= B; - EXPECT_EQ(A, expected); + EXPECT_EQ (A, expected); } -TYPED_TEST(LinalgOpTest, Transpose) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, Transpose) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::transpose_op transposeCalculator; const int dim = 3; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - A.reshape({-1, 3}); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + A.reshape ({-1, 3}); Tensor A_transpose = A; - A_transpose.zero(); - - Tensor expected = std::move(Tensor({static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(2.0), static_cast(4.0), static_cast(0.0), - static_cast(3.0), static_cast(5.0), static_cast(6.0)}).to_device()); - expected.reshape({-1, 3}); - - transposeCalculator(A, {1, 0}, A_transpose); - - EXPECT_EQ(A_transpose, expected); + A_transpose.zero (); + + Tensor expected = std::move (Tensor ({static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (2.0), + static_cast (4.0), + static_cast (0.0), + static_cast (3.0), + static_cast (5.0), + static_cast (6.0)}) + .to_device ()); + expected.reshape ({-1, 3}); + + transposeCalculator (A, {1, 0}, A_transpose); + + EXPECT_EQ (A_transpose, expected); } -TYPED_TEST(LinalgOpTest, Stride) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, Stride) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::stride_op strideCalculator; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - - Tensor expected = std::move(Tensor({static_cast(1.0), static_cast(4.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + + Tensor expected = std::move ( + Tensor ({static_cast (1.0), static_cast (4.0), static_cast (6.0)}).to_device ()); Tensor A_stride = expected; - A_stride.zero(); + A_stride.zero (); - strideCalculator(A, {4}, A_stride); + strideCalculator (A, {4}, A_stride); - EXPECT_EQ(A_stride, expected); + EXPECT_EQ (A_stride, expected); } -TYPED_TEST(LinalgOpTest, Inflate) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, Inflate) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::inflate_op inflateCalculator; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(0.0), static_cast(0.0), - static_cast(0.0), static_cast(4.0), static_cast(0.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (4.0), + static_cast (0.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); Tensor A_inflate = A; - A_inflate.zero(); - Tensor expected = std::move(Tensor({static_cast(1.0), static_cast(4.0), static_cast(6.0)}).to_device()); + A_inflate.zero (); + Tensor expected = std::move ( + Tensor ({static_cast (1.0), static_cast (4.0), static_cast (6.0)}).to_device ()); + inflateCalculator (expected, {4}, A_inflate); - inflateCalculator(expected, {4}, A_inflate); - - EXPECT_EQ(A, A_inflate); + EXPECT_EQ (A, A_inflate); } -TYPED_TEST(LinalgOpTest, Reduce) { - using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; - using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; +TYPED_TEST (LinalgOpTest, Reduce) +{ + using Type = typename std::tuple_element<0, decltype (TypeParam ())>::type; + using Device = typename std::tuple_element<1, decltype (TypeParam ())>::type; op::reduce_op reduceCalculator; - Tensor A = std::move(Tensor({static_cast(1.0), static_cast(2.0), static_cast(3.0), - static_cast(0.0), static_cast(4.0), static_cast(5.0), - static_cast(0.0), static_cast(0.0), static_cast(6.0)}).to_device()); - A.reshape({-1, 3}); - - Tensor expected = std::move( - Tensor({static_cast(6.0), static_cast(9.0), static_cast(6.0)}).to_device()); + Tensor A = std::move (Tensor ({static_cast (1.0), + static_cast (2.0), + static_cast (3.0), + static_cast (0.0), + static_cast (4.0), + static_cast (5.0), + static_cast (0.0), + static_cast (0.0), + static_cast (6.0)}) + .to_device ()); + A.reshape ({-1, 3}); + + Tensor expected = std::move ( + Tensor ({static_cast (6.0), static_cast (9.0), static_cast (6.0)}).to_device ()); Tensor A_reduce = expected; - A_reduce.zero(); + A_reduce.zero (); - reduceCalculator(A, 3, A_reduce); + reduceCalculator (A, 3, A_reduce); - EXPECT_EQ(A_reduce, expected); + EXPECT_EQ (A_reduce, expected); } } // namespace op diff --git a/source/source_base/module_container/base/core/allocator.h b/source/source_base/module_container/base/core/allocator.h index 76c14d8b6bb..c96a519d693 100644 --- a/source/source_base/module_container/base/core/allocator.h +++ b/source/source_base/module_container/base/core/allocator.h @@ -3,8 +3,10 @@ #include -namespace base { -namespace core { +namespace base +{ +namespace core +{ /** * @brief An abstract base class for memory allocators. * @@ -14,7 +16,8 @@ namespace core { * All memory allocated by an Allocator must be freed using the same allocator that * allocated it. */ -class Allocator { +class Allocator +{ public: /** * @brief Allocate a block of memory with the given size and default alignment. @@ -23,7 +26,7 @@ class Allocator { * * @return A pointer to the allocated memory block, or nullptr if the allocation fails. */ - virtual void* allocate(size_t size) = 0; + virtual void* allocate (size_t size) = 0; /** * @brief Allocate a block of memory with the given size and alignment. @@ -33,14 +36,14 @@ class Allocator { * * @return A pointer to the allocated memory block, or nullptr if the allocation fails. */ - virtual void* allocate(size_t size, size_t alignment) = 0; + virtual void* allocate (size_t size, size_t alignment) = 0; /** * @brief Free a block of memory that was previously allocated by this allocator. * * @param ptr A pointer to the memory block to free. */ - virtual void free(void* ptr) = 0; + virtual void free (void* ptr) = 0; /** * @brief Get the allocated size of a given pointer. @@ -48,7 +51,9 @@ class Allocator { * @param ptr The pointer to get the allocated size of. * @return size_t The size of the allocated block of memory, in bytes. */ - virtual size_t AllocatedSize(void* ptr) { + virtual size_t + AllocatedSize (void* ptr) + { return allocated_size_; } @@ -57,9 +62,9 @@ class Allocator { * * @return MemoryType The type of memory used by the TensorBuffer. */ - virtual container::DeviceType GetDeviceType() = 0; + virtual container::DeviceType GetDeviceType () = 0; - virtual ~Allocator() = default; + virtual ~Allocator () = default; protected: /** diff --git a/source/source_base/module_container/base/core/bfc_allocator.h b/source/source_base/module_container/base/core/bfc_allocator.h index ac1384f2cdd..27d3fec2a36 100644 --- a/source/source_base/module_container/base/core/bfc_allocator.h +++ b/source/source_base/module_container/base/core/bfc_allocator.h @@ -3,25 +3,30 @@ #include -namespace base { -namespace core { +namespace base +{ +namespace core +{ /** * @brief An allocator that allocates memory on a GPU device. * * This class provides an implementation of the Allocator interface that allocates memory * on a GPU device using CUDA APIs. */ -class BFCAllocator : public Allocator { -public: - - struct Options { +class BFCAllocator : public Allocator +{ + public: + struct Options + { bool allow_growth = true; double fragment_fraction = 0.0; }; - BFCAllocator(std::unique_ptr sub_alloc, const size_t& total_memory, const Options& options = Options()); + BFCAllocator (std::unique_ptr sub_alloc, + const size_t& total_memory, + const Options& options = Options ()); - ~BFCAllocator(); + ~BFCAllocator (); /** * @brief Allocate a block of memory with the given size and default alignment on GPU. * @@ -29,7 +34,7 @@ class BFCAllocator : public Allocator { * * @return A pointer to the allocated memory block, or nullptr if the allocation fails. */ - void* allocate(size_t size) override; + void* allocate (size_t size) override; /** * @brief Allocate a block of memory with the given size and alignment on GPU. @@ -39,24 +44,23 @@ class BFCAllocator : public Allocator { * * @return A pointer to the allocated memory block, or nullptr if the allocation fails. */ - void* allocate(size_t size, size_t alignment) override; + void* allocate (size_t size, size_t alignment) override; /** * @brief Free a block of GPU memory that was previously allocated by this allocator. * * @param ptr A pointer to the memory block to free. */ - void free(void* ptr) override; + void free (void* ptr) override; /** * @brief Get the type of memory used by the TensorBuffer. * * @return MemoryType The type of memory used by the TensorBuffer. */ - DeviceType GetDeviceType() override; - - private: + DeviceType GetDeviceType () override; + private: // The sub allocator to use for extending the BFC's memory pool. std::unique_ptr sub_alloc_; @@ -73,7 +77,8 @@ class BFCAllocator : public Allocator { // The following means that the largest bin'd chunk size is 256 << 21 = 512MB. static constexpr int kNumBins = 21; - struct chunk { + struct chunk + { // The size of the chunk in bytes. size_t size = 0; // The bin index of the chunk. @@ -89,15 +94,20 @@ class BFCAllocator : public Allocator { // the parent allocator. int64_t allocation_id = -1; // pointer to granted subbuffer. - void* ptr = nullptr; + void* ptr = nullptr; chunk_handle_t next_chunk_handle = kInvalidChunkHandle; // The handle of the previous chunk in the bin. chunk_handle_t prev_chunk_handle = kInvalidChunkHandle; // Whether the chunk is allocated. - bool allocated() const { return allocation_id > 0; } + bool + allocated () const + { + return allocation_id > 0; + } }; - struct bin { + struct bin + { // The size of the chunks in this bin. size_t bin_size = 0; // The number of chunks in this bin. @@ -107,28 +117,29 @@ class BFCAllocator : public Allocator { // The handle of the last chunk in the bin. chunk_handle_t last_chunk_handle = kInvalidChunkHandle; - class chunk_comparator { + class chunk_comparator + { public: - explicit chunk_comparator(BFCAllocator* allocator) : allocator_(allocator) {} + explicit chunk_comparator (BFCAllocator* allocator) : allocator_ (allocator) {} // Sort first by size and then use pointer address as a tie breaker. - bool operator()(const chunk_handle_t ha, - const chunk_handle_t hb) const { - const chunk* a = allocator_->chunk_from_handle(ha); - const chunk* b = allocator_->chunk_from_handle(hb); - if (a->size != b->size) { - return a->size < b->size; - } + bool + operator() (const chunk_handle_t ha, const chunk_handle_t hb) const + { + const chunk* a = allocator_->chunk_from_handle (ha); + const chunk* b = allocator_->chunk_from_handle (hb); + if (a->size != b->size) + { + return a->size < b->size; + } return a->ptr < b->ptr; } private: - BFCAllocator* allocator_ = nullptr; // The parent allocator + BFCAllocator* allocator_ = nullptr; // The parent allocator }; using free_chunk_set_t = std::set; }; - - }; } // namespace core diff --git a/source/source_base/module_container/base/core/cpu_allocator.cpp b/source/source_base/module_container/base/core/cpu_allocator.cpp index 2027402430c..c8986bdef27 100644 --- a/source/source_base/module_container/base/core/cpu_allocator.cpp +++ b/source/source_base/module_container/base/core/cpu_allocator.cpp @@ -1,32 +1,43 @@ #include -namespace base { -namespace core { +namespace base +{ +namespace core +{ // Allocate a block of CPU memory with the given size and default alignment. -void *CPUAllocator::allocate(size_t size) { +void* + CPUAllocator::allocate (size_t size) +{ this->allocated_size_ = size; - return ::operator new(size); + return ::operator new (size); } // Allocate a block of CPU memory with the given size and alignment. -void *CPUAllocator::allocate(size_t size, size_t alignment) { +void* + CPUAllocator::allocate (size_t size, size_t alignment) +{ this->allocated_size_ = size; - void *ptr = nullptr; - if (posix_memalign(&ptr, alignment, size) != 0) { - ptr = nullptr; - } + void* ptr = nullptr; + if (posix_memalign (&ptr, alignment, size) != 0) + { + ptr = nullptr; + } return ptr; } // Free a block of CPU memory that was previously allocated by this allocator. -void CPUAllocator::free(void *ptr) { +void + CPUAllocator::free (void* ptr) +{ this->allocated_size_ = 0; - ::operator delete(ptr); + ::operator delete (ptr); } // Get the type of device used by the TensorBuffer. -container::DeviceType CPUAllocator::GetDeviceType() { +container::DeviceType + CPUAllocator::GetDeviceType () +{ return container::DeviceType::CpuDevice; } diff --git a/source/source_base/module_container/base/core/cpu_allocator.h b/source/source_base/module_container/base/core/cpu_allocator.h index 020234a5633..95b96154b7b 100644 --- a/source/source_base/module_container/base/core/cpu_allocator.h +++ b/source/source_base/module_container/base/core/cpu_allocator.h @@ -3,8 +3,10 @@ #include -namespace base { -namespace core { +namespace base +{ +namespace core +{ /** * @brief An Allocator subclass for CPU memory. @@ -13,9 +15,9 @@ namespace core { * uses the standard library functions std::malloc, std::free, and std::aligned_alloc * to allocate and deallocate memory blocks. */ -class CPUAllocator : public Allocator { -public: - +class CPUAllocator : public Allocator +{ + public: /** * @brief Allocate a block of CPU memory with the given size and default alignment. * @@ -23,7 +25,7 @@ class CPUAllocator : public Allocator { * * @return A pointer to the allocated memory block, or nullptr if the allocation fails. */ - void *allocate(size_t size) override; + void* allocate (size_t size) override; /** * @brief Allocate a block of CPU memory with the given size and alignment. @@ -33,22 +35,21 @@ class CPUAllocator : public Allocator { * * @return A pointer to the allocated memory block, or nullptr if the allocation fails. */ - void *allocate(size_t size, size_t alignment) override; + void* allocate (size_t size, size_t alignment) override; /** * @brief Free a block of CPU memory that was previously allocated by this allocator. * * @param ptr A pointer to the memory block to free. */ - void free(void *ptr) override; + void free (void* ptr) override; /** * @brief Get the type of device used by the TensorBuffer. * * @return MemoryType The type of memory used by the TensorBuffer. */ - container::DeviceType GetDeviceType() override; - + container::DeviceType GetDeviceType () override; }; } // namespace core diff --git a/source/source_base/module_container/base/core/gpu_allocator.cpp b/source/source_base/module_container/base/core/gpu_allocator.cpp index e63043c5379..fda0cd3fefe 100644 --- a/source/source_base/module_container/base/core/gpu_allocator.cpp +++ b/source/source_base/module_container/base/core/gpu_allocator.cpp @@ -12,40 +12,52 @@ #define device_free hipFree #define device_result_t hipError_t #define device_success hipSuccess -#endif +#endif -namespace base { -namespace core { +namespace base +{ +namespace core +{ // Allocate a block of memory with the given size and default alignment on GPU. -void *GPUAllocator::allocate(size_t size) { - void * ptr = nullptr; - device_result_t result = device_malloc(&ptr, size); - if (result != device_success) { - return nullptr; - } +void* + GPUAllocator::allocate (size_t size) +{ + void* ptr = nullptr; + device_result_t result = device_malloc (&ptr, size); + if (result != device_success) + { + return nullptr; + } this->allocated_size_ = size; return ptr; } // Allocate a block of CPU memory with the given size and alignment. -void *GPUAllocator::allocate(size_t size, size_t alignment) { - void * ptr = nullptr; - device_result_t result = device_malloc(&ptr, size); - if (result != device_success) { - return nullptr; - } +void* + GPUAllocator::allocate (size_t size, size_t alignment) +{ + void* ptr = nullptr; + device_result_t result = device_malloc (&ptr, size); + if (result != device_success) + { + return nullptr; + } this->allocated_size_ = size; return ptr; } // Free a block of CPU memory that was previously allocated by this allocator. -void GPUAllocator::free(void *ptr) { - device_free(ptr); +void + GPUAllocator::free (void* ptr) +{ + device_free (ptr); this->allocated_size_ = 0; } // Get the type of device used by the TensorBuffer. -container::DeviceType GPUAllocator::GetDeviceType() { +container::DeviceType + GPUAllocator::GetDeviceType () +{ return container::DeviceType::GpuDevice; } diff --git a/source/source_base/module_container/base/core/gpu_allocator.h b/source/source_base/module_container/base/core/gpu_allocator.h index 446f57d0ce6..d38e4bb9c5f 100644 --- a/source/source_base/module_container/base/core/gpu_allocator.h +++ b/source/source_base/module_container/base/core/gpu_allocator.h @@ -3,16 +3,19 @@ #include -namespace base { -namespace core { +namespace base +{ +namespace core +{ /** * @brief An allocator that allocates memory on a GPU device. * * This class provides an implementation of the Allocator interface that allocates memory * on a GPU device using CUDA APIs. */ -class GPUAllocator : public Allocator { -public: +class GPUAllocator : public Allocator +{ + public: /** * @brief Allocate a block of memory with the given size and default alignment on GPU. * @@ -20,7 +23,7 @@ class GPUAllocator : public Allocator { * * @return A pointer to the allocated memory block, or nullptr if the allocation fails. */ - void *allocate(size_t size) override; + void* allocate (size_t size) override; /** * @brief Allocate a block of memory with the given size and alignment on GPU. @@ -30,24 +33,24 @@ class GPUAllocator : public Allocator { * * @return A pointer to the allocated memory block, or nullptr if the allocation fails. */ - void *allocate(size_t size, size_t alignment) override; + void* allocate (size_t size, size_t alignment) override; /** * @brief Free a block of GPU memory that was previously allocated by this allocator. * * @param ptr A pointer to the memory block to free. */ - void free(void *ptr) override; + void free (void* ptr) override; /** * @brief Get the type of memory used by the TensorBuffer. * * @return MemoryType The type of memory used by the TensorBuffer. */ - container::DeviceType GetDeviceType() override; + container::DeviceType GetDeviceType () override; }; -} // namespace base } // namespace core +} // namespace base #endif // BASE_CORE_GPU_ALLOCATOR_H_ diff --git a/source/source_base/module_container/base/core/refcount.cpp b/source/source_base/module_container/base/core/refcount.cpp index 98d9e352b62..bf03f213d1b 100644 --- a/source/source_base/module_container/base/core/refcount.cpp +++ b/source/source_base/module_container/base/core/refcount.cpp @@ -1,29 +1,40 @@ #include #include "refcount.h" -namespace base { -namespace core { +namespace base +{ +namespace core +{ -counted_base::counted_base() : ref_(1) {} +counted_base::counted_base () : ref_ (1) {} -void counted_base::ref() const { - ref_.fetch_add(1, std::memory_order_relaxed); +void + counted_base::ref () const +{ + ref_.fetch_add (1, std::memory_order_relaxed); } -bool counted_base::unref() const { - if (ref_.fetch_sub(1, std::memory_order_acq_rel) == 1) { - delete this; - return true; - } +bool + counted_base::unref () const +{ + if (ref_.fetch_sub (1, std::memory_order_acq_rel) == 1) + { + delete this; + return true; + } return false; } -int_fast32_t counted_base::ref_count() const { - return ref_.load(std::memory_order_acquire); +int_fast32_t + counted_base::ref_count () const +{ + return ref_.load (std::memory_order_acquire); } -bool counted_base::ref_count_is_one() const { - return ref_count() == 1; +bool + counted_base::ref_count_is_one () const +{ + return ref_count () == 1; } } // namespace core diff --git a/source/source_base/module_container/base/core/refcount.h b/source/source_base/module_container/base/core/refcount.h index 14abafcf35e..5835f13aa80 100644 --- a/source/source_base/module_container/base/core/refcount.h +++ b/source/source_base/module_container/base/core/refcount.h @@ -5,65 +5,71 @@ #include #include -namespace base { -namespace core { +namespace base +{ +namespace core +{ /** * @brief The base class for reference-counted objects. */ -class counted_base { - public: +class counted_base +{ + public: /** * @brief Default constructor. Initializes the reference count to one. */ - counted_base(); + counted_base (); /** * @brief Increases the reference count by one. */ - void ref() const; + void ref () const; /** * @brief Decreases the reference count by one. * @return True if the object is deleted, otherwise false. */ - bool unref() const; + bool unref () const; /** * @brief Gets the current reference count. * @return The current reference count. */ - int_fast32_t ref_count() const; + int_fast32_t ref_count () const; /** * @brief Checks if the reference count is one. * @return True if the reference count is one, otherwise false. */ - bool ref_count_is_one() const; + bool ref_count_is_one () const; - protected: + protected: /** * @brief Virtual destructor. * @details The destructor is protected to prevent the explicit initialization of the base class. */ - virtual ~counted_base() {} + virtual ~counted_base () {} - private: + private: mutable std::atomic_int_fast32_t ref_; - counted_base(const counted_base&) = delete; - void operator=(const counted_base&) = delete; + counted_base (const counted_base&) = delete; + void operator= (const counted_base&) = delete; }; /** * @brief A deleter functor for creating std::unique_ptr that unrefs objects. */ -struct ref_count_deleter { +struct ref_count_deleter +{ /** * @brief Calls unref on the object. * @param o Pointer to the object. */ - void operator()(const counted_base* o) const { - o->unref(); + void + operator() (const counted_base* o) const + { + o->unref (); } }; @@ -81,15 +87,17 @@ class ref_count_ptr; * @return A smart pointer holding the reference to the object. */ template -std::unique_ptr get_new_ref(T* ptr) { - static_assert(std::is_base_of::value, - "T must be derived from counted_base"); - - if (ptr == nullptr) { - return std::unique_ptr(); - } - ptr->ref(); - return std::unique_ptr(ptr); +std::unique_ptr + get_new_ref (T* ptr) +{ + static_assert (std::is_base_of::value, "T must be derived from counted_base"); + + if (ptr == nullptr) + { + return std::unique_ptr (); + } + ptr->ref (); + return std::unique_ptr (ptr); } /** @@ -97,20 +105,24 @@ std::unique_ptr get_new_ref(T* ptr) { * @tparam T Type of the object. */ template -class ref_count_ptr : public std::unique_ptr { - public: +class ref_count_ptr : public std::unique_ptr +{ + public: using std::unique_ptr::unique_ptr; /** * @brief Adds a new reference to the owned object. * @return A smart pointer holding the reference to the object. */ - std::unique_ptr get_new_ref() const { - if (this->get() == nullptr) { - return std::unique_ptr(); - } - this->get()->ref(); - return std::unique_ptr(this->get()); + std::unique_ptr + get_new_ref () const + { + if (this->get () == nullptr) + { + return std::unique_ptr (); + } + this->get ()->ref (); + return std::unique_ptr (this->get ()); } }; diff --git a/source/source_base/module_container/base/macros/cuda.h b/source/source_base/module_container/base/macros/cuda.h index 572eecdffd0..920dffaad8e 100644 --- a/source/source_base/module_container/base/macros/cuda.h +++ b/source/source_base/module_container/base/macros/cuda.h @@ -28,21 +28,22 @@ struct GetTypeThrust> using type = thrust::complex; /**< The return type specialization for std::complex. */ }; -static inline cublasOperation_t GetCublasOperation(const char& trans) +static inline cublasOperation_t + GetCublasOperation (const char& trans) { cublasOperation_t cutrans = {}; if (trans == 'N') - { - cutrans = CUBLAS_OP_N; - } + { + cutrans = CUBLAS_OP_N; + } else if (trans == 'T') - { - cutrans = CUBLAS_OP_T; - } + { + cutrans = CUBLAS_OP_T; + } else if (trans == 'C') - { - cutrans = CUBLAS_OP_C; - } + { + cutrans = CUBLAS_OP_C; + } return cutrans; } @@ -83,44 +84,48 @@ struct GetTypeCuda> static constexpr cudaDataType cuda_data_type = cudaDataType::CUDA_C_64F; }; -static inline cublasFillMode_t cublas_fill_mode(const char& uplo) +static inline cublasFillMode_t + cublas_fill_mode (const char& uplo) { if (uplo == 'U' || uplo == 'u') return CUBLAS_FILL_MODE_UPPER; else if (uplo == 'L' || uplo == 'l') return CUBLAS_FILL_MODE_LOWER; else - throw std::runtime_error("cublas_fill_mode: unknown uplo"); + throw std::runtime_error ("cublas_fill_mode: unknown uplo"); } -static inline cublasDiagType_t cublas_diag_type(const char& diag) +static inline cublasDiagType_t + cublas_diag_type (const char& diag) { if (diag == 'U' || diag == 'u') return CUBLAS_DIAG_UNIT; else if (diag == 'N' || diag == 'n') return CUBLAS_DIAG_NON_UNIT; else - throw std::runtime_error("cublas_diag_type: unknown diag"); + throw std::runtime_error ("cublas_diag_type: unknown diag"); } -static inline cusolverEigMode_t cublas_eig_mode(const char& jobz) +static inline cusolverEigMode_t + cublas_eig_mode (const char& jobz) { if (jobz == 'N' || jobz == 'n') return CUSOLVER_EIG_MODE_NOVECTOR; else if (jobz == 'V' || jobz == 'v') return CUSOLVER_EIG_MODE_VECTOR; else - throw std::runtime_error("cublas_eig_mode: unknown diag"); + throw std::runtime_error ("cublas_eig_mode: unknown diag"); } -static inline cusolverEigType_t cublas_eig_type(const int& itype) +static inline cusolverEigType_t + cublas_eig_type (const int& itype) { if (itype == 1) return CUSOLVER_EIG_TYPE_1; else if (itype == 2) return CUSOLVER_EIG_TYPE_2; else - throw std::runtime_error("cublas_eig_mode: unknown diag"); + throw std::runtime_error ("cublas_eig_mode: unknown diag"); } /** @@ -134,7 +139,8 @@ static inline cusolverEigType_t cublas_eig_type(const int& itype) * @return Corresponding cusolverEigRange_t enum value * @throws std::runtime_error if character is invalid */ -static inline cusolverEigRange_t cublas_eig_range(const char& range) +static inline cusolverEigRange_t + cublas_eig_range (const char& range) { if (range == 'A' || range == 'a') return CUSOLVER_EIG_RANGE_ALL; @@ -143,7 +149,7 @@ static inline cusolverEigRange_t cublas_eig_range(const char& range) else if (range == 'I' || range == 'i') return CUSOLVER_EIG_RANGE_I; else - throw std::runtime_error("cublas_eig_range: unknown range '" + std::string(1, range) + "'"); + throw std::runtime_error ("cublas_eig_range: unknown range '" + std::string (1, range) + "'"); } #endif // BASE_MACROS_CUDA_H_ diff --git a/source/source_base/module_container/base/macros/macros.h b/source/source_base/module_container/base/macros/macros.h index 645c6aee896..8a24aaf6643 100644 --- a/source/source_base/module_container/base/macros/macros.h +++ b/source/source_base/module_container/base/macros/macros.h @@ -5,28 +5,28 @@ #include #if __CUDA -#include +#include #elif __ROCM #include #endif #include -#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&) = delete; \ - void operator=(const TypeName&) = delete +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName (const TypeName&) = delete; \ + void operator= (const TypeName&) = delete -#define DISALLOW_MOVE_AND_ASSIGN(TypeName) \ - TypeName(TypeName&&) = delete; \ - void operator=(TypeName&&) = delete +#define DISALLOW_MOVE_AND_ASSIGN(TypeName) \ + TypeName (TypeName&&) = delete; \ + void operator= (TypeName&&) = delete -#define DISALLOW_COPY_MOVE_AND_ASSIGN(TypeName) \ - DISALLOW_COPY_AND_ASSIGN(TypeName); \ - DISALLOW_MOVE_AND_ASSIGN(TypeName) +#define DISALLOW_COPY_MOVE_AND_ASSIGN(TypeName) \ + DISALLOW_COPY_AND_ASSIGN (TypeName); \ + DISALLOW_MOVE_AND_ASSIGN (TypeName) -#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ - TypeName() = delete; \ - DISALLOW_COPY_MOVE_AND_ASSIGN(TypeName) +#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ + TypeName () = delete; \ + DISALLOW_COPY_MOVE_AND_ASSIGN (TypeName) #define MAX_SIZE_T UINT64_MAX @@ -41,198 +41,199 @@ #endif // defined(__CUDACC__) || defined(__HIPCC__) #if defined(__GNUC__) || defined(__ICL) || defined(__clang__) -#define PREDICT_TRUE(expr) (__builtin_expect(static_cast(expr), 1)) -#define PREDICT_FALSE(expr) (__builtin_expect(static_cast(expr), 0)) +#define PREDICT_TRUE(expr) (__builtin_expect (static_cast (expr), 1)) +#define PREDICT_FALSE(expr) (__builtin_expect (static_cast (expr), 0)) #else #define PREDICT_TRUE(expr) (expr) #define PREDICT_FALSE(expr) (expr) #endif -#define CHECK_MSG(expr, ...) \ - (::base::utils::check_msg_impl( \ - "Expected " #expr \ - " to be true, but got false. " \ - __VA_ARGS__ " " \ - "(Could this error message be improved? If so, " \ - "please report an enhancement request to Container)" \ - )) - -#define REQUIRES_OK(expr, ...) \ - if(PREDICT_FALSE(!(expr))) { \ - ::base::utils::check_exit_impl( \ - __func__, \ - __FILE__, \ - static_cast(__LINE__), \ - CHECK_MSG(expr, ##__VA_ARGS__)); \ - } +#define CHECK_MSG(expr, ...) \ + (::base::utils::check_msg_impl ("Expected " #expr " to be true, but got false. " __VA_ARGS__ " " \ + "(Could this error message be improved? If so, " \ + "please report an enhancement request to Container)")) + +#define REQUIRES_OK(expr, ...) \ + if (PREDICT_FALSE (!(expr))) \ + { \ + ::base::utils::check_exit_impl (__func__, \ + __FILE__, \ + static_cast (__LINE__), \ + CHECK_MSG (expr, ##__VA_ARGS__)); \ + } // The macro TEMPLATE_1() expands to a switch statement conditioned on // TYPE_ENUM. Each case expands the STMTS after a typedef for T. #define SINGLE_ARG(...) __VA_ARGS__ -#define CASE_2(TYPE, DEVICE, STMTS) \ - case (int(ct::DataTypeToEnum::value) * 10 + \ - int(ct::DeviceTypeToEnum::value)): { \ - typedef TYPE T_; \ - typedef DEVICE DEVICE_; \ - STMTS; \ - break; \ - } - -#define CASE_LAMBDA_2(TYPE, DEVICE, FUNC) \ - case (int(ct::DataTypeToEnum::value) * 10 + \ - int(ct::DeviceTypeToEnum::value)): { \ - typedef TYPE T_; \ - typedef DEVICE DEVICE_; \ - FUNC(); \ - break; \ - } - -#define CASES_ALL_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ - switch (int(TYPE_ENUM) * 10 + int(DEVICE_ENUM)) { \ - CASE_2(float, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(double, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(int, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(int64_t, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - default: \ - DEFAULT; \ - break; \ - } - -#define CASES_ALL_LAMBDA_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, FUNC, DEFAULT) \ - switch (int(TYPE_ENUM) * 10 + int(DEVICE_ENUM)) { \ - CASE_LAMBDA_2(float, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(double, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(int, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(int64_t, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(std::complex, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(std::complex, ct::DEVICE_CPU, FUNC) \ - default: \ - DEFAULT; \ - break; \ - } - -#define CASES_BLAS_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ - switch (int(TYPE_ENUM) * 10 + int(DEVICE_ENUM)) { \ - CASE_2(float, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(double, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - default: \ - DEFAULT; \ - break; \ - } - -#define CASES_ALL_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ - switch (int(TYPE_ENUM) * 10 + int(DEVICE_ENUM)) { \ - CASE_2(float, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(float, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(double, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(double, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(int, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(int, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(int64_t, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(int64_t, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - default: \ - DEFAULT; \ - break; \ - } - -#define CASES_ALL_LAMBDA_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, FUNC, DEFAULT) \ - switch (int(TYPE_ENUM) * 10 + int(DEVICE_ENUM)) { \ - CASE_LAMBDA_2(float, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(float, ct::DEVICE_GPU, FUNC) \ - CASE_LAMBDA_2(double, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(double, ct::DEVICE_GPU, FUNC) \ - CASE_LAMBDA_2(int, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(int, ct::DEVICE_GPU, FUNC) \ - CASE_LAMBDA_2(int64_t, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(int64_t, ct::DEVICE_GPU, FUNC) \ - CASE_LAMBDA_2(std::complex, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(std::complex, ct::DEVICE_GPU, FUNC) \ - CASE_LAMBDA_2(std::complex, ct::DEVICE_CPU, FUNC) \ - CASE_LAMBDA_2(std::complex, ct::DEVICE_GPU, FUNC) \ - default: \ - DEFAULT; \ - break; \ - } - -#define CASES_BLAS_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ - switch (int(TYPE_ENUM) * 10 + int(DEVICE_ENUM)) { \ - CASE_2(float, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(float, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(double, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(double, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - default: \ - DEFAULT; \ - break; \ - } - -#define CASES_CZ_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ - switch (int(TYPE_ENUM) * 10 + int(DEVICE_ENUM)) { \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_CPU, SINGLE_ARG(STMTS)) \ - CASE_2(std::complex, ct::DEVICE_GPU, SINGLE_ARG(STMTS)) \ - default: \ - DEFAULT; \ - break; \ - } - +#define CASE_2(TYPE, DEVICE, STMTS) \ + case (int (ct::DataTypeToEnum::value) * 10 + int (ct::DeviceTypeToEnum::value)): \ + { \ + typedef TYPE T_; \ + typedef DEVICE DEVICE_; \ + STMTS; \ + break; \ + } + +#define CASE_LAMBDA_2(TYPE, DEVICE, FUNC) \ + case (int (ct::DataTypeToEnum::value) * 10 + int (ct::DeviceTypeToEnum::value)): \ + { \ + typedef TYPE T_; \ + typedef DEVICE DEVICE_; \ + FUNC (); \ + break; \ + } + +#define CASES_ALL_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ + switch (int (TYPE_ENUM) * 10 + int (DEVICE_ENUM)) \ + { \ + CASE_2 (float, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (double, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (int, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (int64_t, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + default: \ + DEFAULT; \ + break; \ + } + +#define CASES_ALL_LAMBDA_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, FUNC, DEFAULT) \ + switch (int (TYPE_ENUM) * 10 + int (DEVICE_ENUM)) \ + { \ + CASE_LAMBDA_2 (float, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (double, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (int, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (int64_t, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (std::complex, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (std::complex, ct::DEVICE_CPU, FUNC) \ + default: \ + DEFAULT; \ + break; \ + } + +#define CASES_BLAS_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ + switch (int (TYPE_ENUM) * 10 + int (DEVICE_ENUM)) \ + { \ + CASE_2 (float, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (double, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + default: \ + DEFAULT; \ + break; \ + } + +#define CASES_ALL_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ + switch (int (TYPE_ENUM) * 10 + int (DEVICE_ENUM)) \ + { \ + CASE_2 (float, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (float, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (double, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (double, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (int, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (int, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (int64_t, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (int64_t, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + default: \ + DEFAULT; \ + break; \ + } + +#define CASES_ALL_LAMBDA_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, FUNC, DEFAULT) \ + switch (int (TYPE_ENUM) * 10 + int (DEVICE_ENUM)) \ + { \ + CASE_LAMBDA_2 (float, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (float, ct::DEVICE_GPU, FUNC) \ + CASE_LAMBDA_2 (double, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (double, ct::DEVICE_GPU, FUNC) \ + CASE_LAMBDA_2 (int, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (int, ct::DEVICE_GPU, FUNC) \ + CASE_LAMBDA_2 (int64_t, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (int64_t, ct::DEVICE_GPU, FUNC) \ + CASE_LAMBDA_2 (std::complex, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (std::complex, ct::DEVICE_GPU, FUNC) \ + CASE_LAMBDA_2 (std::complex, ct::DEVICE_CPU, FUNC) \ + CASE_LAMBDA_2 (std::complex, ct::DEVICE_GPU, FUNC) \ + default: \ + DEFAULT; \ + break; \ + } + +#define CASES_BLAS_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ + switch (int (TYPE_ENUM) * 10 + int (DEVICE_ENUM)) \ + { \ + CASE_2 (float, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (float, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (double, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (double, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + default: \ + DEFAULT; \ + break; \ + } + +#define CASES_CZ_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, STMTS, DEFAULT) \ + switch (int (TYPE_ENUM) * 10 + int (DEVICE_ENUM)) \ + { \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_CPU, SINGLE_ARG (STMTS)) \ + CASE_2 (std::complex, ct::DEVICE_GPU, SINGLE_ARG (STMTS)) \ + default: \ + DEFAULT; \ + break; \ + } #if __CUDA || __ROCM -#define TEMPLATE_ALL_LAMBDA_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_ALL_LAMBDA_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); +#define TEMPLATE_ALL_LAMBDA_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_ALL_LAMBDA_WITH_DEFAULT_2_GPU ( \ + TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; exit (EXIT_FAILURE)); -#define TEMPLATE_ALL_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_ALL_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); +#define TEMPLATE_ALL_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_ALL_WITH_DEFAULT_2_GPU ( \ + TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; exit (EXIT_FAILURE)); -#define TEMPLATE_BLAS_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_BLAS_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); +#define TEMPLATE_BLAS_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_BLAS_WITH_DEFAULT_2_GPU ( \ + TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; exit (EXIT_FAILURE)); -#define TEMPLATE_ALL_CALC_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_ALL_CALC_WITH_DEFAULT_2_GPU(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); +#define TEMPLATE_ALL_CALC_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_ALL_CALC_WITH_DEFAULT_2_GPU ( \ + TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; exit (EXIT_FAILURE)); #else -#define TEMPLATE_ALL_LAMBDA_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_ALL_LAMBDA_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); +#define TEMPLATE_ALL_LAMBDA_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_ALL_LAMBDA_WITH_DEFAULT_2 ( \ + TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; exit (EXIT_FAILURE)); -#define TEMPLATE_ALL_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_ALL_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); +#define TEMPLATE_ALL_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_ALL_WITH_DEFAULT_2 (TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; \ + exit (EXIT_FAILURE)); -#define TEMPLATE_BLAS_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_BLAS_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); +#define TEMPLATE_BLAS_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_BLAS_WITH_DEFAULT_2 (TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; \ + exit (EXIT_FAILURE)); -#define TEMPLATE_ALL_CALC_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_ALL_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); -#endif - -#define TEMPLATE_CZ_2(TYPE_ENUM, DEVICE_ENUM, ...) \ -CASES_CZ_WITH_DEFAULT_2(TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), \ - std::cerr << "Unexpected type: " << TYPE_ENUM; exit(EXIT_FAILURE)); +#define TEMPLATE_ALL_CALC_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_ALL_WITH_DEFAULT_2 (TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; \ + exit (EXIT_FAILURE)); +#endif +#define TEMPLATE_CZ_2(TYPE_ENUM, DEVICE_ENUM, ...) \ + CASES_CZ_WITH_DEFAULT_2 (TYPE_ENUM, DEVICE_ENUM, (__VA_ARGS__), std::cerr << "Unexpected type: " << TYPE_ENUM; \ + exit (EXIT_FAILURE)); #if defined(_MSC_VER) #define AT_ALWAYS_INLINE __forceinline #elif __has_attribute(always_inline) || defined(__GNUC__) -#define AT_ALWAYS_INLINE __attribute__((__always_inline__)) inline +#define AT_ALWAYS_INLINE __attribute__ ((__always_inline__)) inline #else #define AT_ALWAYS_INLINE inline #endif diff --git a/source/source_base/module_container/base/macros/rocm.h b/source/source_base/module_container/base/macros/rocm.h index af015b2c40f..1139d3d10bb 100644 --- a/source/source_base/module_container/base/macros/rocm.h +++ b/source/source_base/module_container/base/macros/rocm.h @@ -32,26 +32,27 @@ struct GetTypeThrust> }; #endif // defined(__HCC__) || defined(__HIP__) -static inline hipblasOperation_t GetHipblasOperation(const char& trans) +static inline hipblasOperation_t + GetHipblasOperation (const char& trans) { hipblasOperation_t hip_trans = {}; if (trans == 'N') - { - hip_trans = HIPBLAS_OP_N; - } + { + hip_trans = HIPBLAS_OP_N; + } else if (trans == 'T') - { - hip_trans = HIPBLAS_OP_T; - } + { + hip_trans = HIPBLAS_OP_T; + } else if (trans == 'C') - { - hip_trans = HIPBLAS_OP_C; - } + { + hip_trans = HIPBLAS_OP_C; + } else - { - // Handle invalid input or provide a default behavior. - hip_trans = HIPBLAS_OP_N; - } + { + // Handle invalid input or provide a default behavior. + hip_trans = HIPBLAS_OP_N; + } return hip_trans; } @@ -93,155 +94,168 @@ struct GetTypeRocm> static constexpr hipDataType hip_data_type = HIP_C_64F; }; -static inline hipblasFillMode_t hipblas_fill_mode(const char& uplo) +static inline hipblasFillMode_t + hipblas_fill_mode (const char& uplo) { if (uplo == 'U' || uplo == 'u') return HIPBLAS_FILL_MODE_UPPER; else if (uplo == 'L' || uplo == 'l') return HIPBLAS_FILL_MODE_LOWER; else - throw std::runtime_error("hipblas_fill_mode: unknown uplo"); + throw std::runtime_error ("hipblas_fill_mode: unknown uplo"); } -static inline hipblasDiagType_t hipblas_diag_type(const char& diag) +static inline hipblasDiagType_t + hipblas_diag_type (const char& diag) { if (diag == 'U' || diag == 'u') return HIPBLAS_DIAG_UNIT; else if (diag == 'N' || diag == 'n') return HIPBLAS_DIAG_NON_UNIT; else - throw std::runtime_error("hipblas_diag_type: unknown diag"); + throw std::runtime_error ("hipblas_diag_type: unknown diag"); } -static inline hipsolverEigMode_t hipblas_eig_mode(const char& jobz) +static inline hipsolverEigMode_t + hipblas_eig_mode (const char& jobz) { if (jobz == 'N' || jobz == 'n') return HIPSOLVER_EIG_MODE_NOVECTOR; else if (jobz == 'V' || jobz == 'v') return HIPSOLVER_EIG_MODE_VECTOR; else - throw std::runtime_error("hipblas_eig_mode: unknown diag"); + throw std::runtime_error ("hipblas_eig_mode: unknown diag"); } -static inline hipsolverEigType_t hipblas_eig_type(const int& itype) +static inline hipsolverEigType_t + hipblas_eig_type (const int& itype) { if (itype == 1) return HIPSOLVER_EIG_TYPE_1; else if (itype == 2) return HIPSOLVER_EIG_TYPE_2; else - throw std::runtime_error("hipblas_eig_mode: unknown diag"); + throw std::runtime_error ("hipblas_eig_mode: unknown diag"); } -static inline hipsolverFillMode_t hipsolver_fill_mode(const char& uplo) +static inline hipsolverFillMode_t + hipsolver_fill_mode (const char& uplo) { if (uplo == 'U' || uplo == 'u') return HIPSOLVER_FILL_MODE_UPPER; else if (uplo == 'L' || uplo == 'l') return HIPSOLVER_FILL_MODE_LOWER; else - throw std::runtime_error("hipsolver_fill_mode: unknown uplo"); + throw std::runtime_error ("hipsolver_fill_mode: unknown uplo"); } // hipSOLVER API errors -static const char* hipsolverGetErrorEnum(hipsolverStatus_t error) +static const char* + hipsolverGetErrorEnum (hipsolverStatus_t error) { switch (error) - { - case HIPSOLVER_STATUS_SUCCESS: - return "HIPSOLVER_STATUS_SUCCESS"; - case HIPSOLVER_STATUS_NOT_INITIALIZED: - return "HIPSOLVER_STATUS_NOT_INITIALIZED"; - case HIPSOLVER_STATUS_ALLOC_FAILED: - return "HIPSOLVER_STATUS_ALLOC_FAILED"; - case HIPSOLVER_STATUS_INVALID_VALUE: - return "HIPSOLVER_STATUS_INVALID_VALUE"; - case HIPSOLVER_STATUS_ARCH_MISMATCH: - return "HIPSOLVER_STATUS_ARCH_MISMATCH"; - case HIPSOLVER_STATUS_MAPPING_ERROR: - return "HIPSOLVER_STATUS_MAPPING_ERROR"; - case HIPSOLVER_STATUS_EXECUTION_FAILED: - return "HIPSOLVER_STATUS_EXECUTION_FAILED"; - case HIPSOLVER_STATUS_INTERNAL_ERROR: - return "HIPSOLVER_STATUS_INTERNAL_ERROR"; - case HIPSOLVER_STATUS_NOT_SUPPORTED: - return "HIPSOLVER_STATUS_NOT_SUPPORTED "; - case HIPSOLVER_STATUS_INVALID_ENUM: - return "HIPSOLVER_STATUS_INVALID_ENUM"; - default: - return "Unknown hipsolverStatus_t message"; - } + { + case HIPSOLVER_STATUS_SUCCESS: + return "HIPSOLVER_STATUS_SUCCESS"; + case HIPSOLVER_STATUS_NOT_INITIALIZED: + return "HIPSOLVER_STATUS_NOT_INITIALIZED"; + case HIPSOLVER_STATUS_ALLOC_FAILED: + return "HIPSOLVER_STATUS_ALLOC_FAILED"; + case HIPSOLVER_STATUS_INVALID_VALUE: + return "HIPSOLVER_STATUS_INVALID_VALUE"; + case HIPSOLVER_STATUS_ARCH_MISMATCH: + return "HIPSOLVER_STATUS_ARCH_MISMATCH"; + case HIPSOLVER_STATUS_MAPPING_ERROR: + return "HIPSOLVER_STATUS_MAPPING_ERROR"; + case HIPSOLVER_STATUS_EXECUTION_FAILED: + return "HIPSOLVER_STATUS_EXECUTION_FAILED"; + case HIPSOLVER_STATUS_INTERNAL_ERROR: + return "HIPSOLVER_STATUS_INTERNAL_ERROR"; + case HIPSOLVER_STATUS_NOT_SUPPORTED: + return "HIPSOLVER_STATUS_NOT_SUPPORTED "; + case HIPSOLVER_STATUS_INVALID_ENUM: + return "HIPSOLVER_STATUS_INVALID_ENUM"; + default: + return "Unknown hipsolverStatus_t message"; + } } -inline void hipsolverAssert(hipsolverStatus_t code, const char* file, int line, bool abort = true) +inline void + hipsolverAssert (hipsolverStatus_t code, const char* file, int line, bool abort = true) { if (code != HIPSOLVER_STATUS_SUCCESS) - { - fprintf(stderr, "hipSOLVER Assert: %s %s %d\n", hipsolverGetErrorEnum(code), file, line); - if (abort) - exit(code); - } + { + fprintf (stderr, "hipSOLVER Assert: %s %s %d\n", hipsolverGetErrorEnum (code), file, line); + if (abort) + exit (code); + } } // hipSOLVER API errors -static const char* hipblasGetErrorEnum(hipblasStatus_t error) +static const char* + hipblasGetErrorEnum (hipblasStatus_t error) { switch (error) - { - case HIPBLAS_STATUS_SUCCESS: - return "HIPBLAS_STATUS_SUCCESS"; - case HIPBLAS_STATUS_NOT_INITIALIZED: - return "HIPBLAS_STATUS_NOT_INITIALIZED"; - case HIPBLAS_STATUS_ALLOC_FAILED: - return "HIPBLAS_STATUS_ALLOC_FAILED"; - case HIPBLAS_STATUS_INVALID_VALUE: - return "HIPBLAS_STATUS_INVALID_VALUE"; - case HIPBLAS_STATUS_ARCH_MISMATCH: - return "HIPBLAS_STATUS_ARCH_MISMATCH"; - case HIPBLAS_STATUS_MAPPING_ERROR: - return "HIPBLAS_STATUS_MAPPING_ERROR"; - case HIPBLAS_STATUS_EXECUTION_FAILED: - return "HIPBLAS_STATUS_EXECUTION_FAILED"; - case HIPBLAS_STATUS_INTERNAL_ERROR: - return "HIPBLAS_STATUS_INTERNAL_ERROR"; - default: - return "Unknown"; - } + { + case HIPBLAS_STATUS_SUCCESS: + return "HIPBLAS_STATUS_SUCCESS"; + case HIPBLAS_STATUS_NOT_INITIALIZED: + return "HIPBLAS_STATUS_NOT_INITIALIZED"; + case HIPBLAS_STATUS_ALLOC_FAILED: + return "HIPBLAS_STATUS_ALLOC_FAILED"; + case HIPBLAS_STATUS_INVALID_VALUE: + return "HIPBLAS_STATUS_INVALID_VALUE"; + case HIPBLAS_STATUS_ARCH_MISMATCH: + return "HIPBLAS_STATUS_ARCH_MISMATCH"; + case HIPBLAS_STATUS_MAPPING_ERROR: + return "HIPBLAS_STATUS_MAPPING_ERROR"; + case HIPBLAS_STATUS_EXECUTION_FAILED: + return "HIPBLAS_STATUS_EXECUTION_FAILED"; + case HIPBLAS_STATUS_INTERNAL_ERROR: + return "HIPBLAS_STATUS_INTERNAL_ERROR"; + default: + return "Unknown"; + } } -inline void hipblasAssert(hipblasStatus_t code, const char* file, int line, bool abort = true) +inline void + hipblasAssert (hipblasStatus_t code, const char* file, int line, bool abort = true) { if (code != HIPBLAS_STATUS_SUCCESS) - { - fprintf(stderr, "Unexpected hipBLAS Error: %s %s %d\n", hipblasGetErrorEnum(code), file, line); - if (abort) - exit(code); - } + { + fprintf (stderr, "Unexpected hipBLAS Error: %s %s %d\n", hipblasGetErrorEnum (code), file, line); + if (abort) + exit (code); + } } #define hipsolverErrcheck(res) \ { \ - hipsolverAssert((res), __FILE__, __LINE__); \ + hipsolverAssert ((res), __FILE__, __LINE__); \ } #define hipblasErrcheck(res) \ { \ - hipblasAssert((res), __FILE__, __LINE__); \ + hipblasAssert ((res), __FILE__, __LINE__); \ } // ROCM API errors #define hipErrcheck(res) \ { \ if (res != hipSuccess) \ - { \ - fprintf(stderr, " Unexpected Device Error %s:%d: %s, %s\n", __FILE__, __LINE__, hipGetErrorName(res), \ - hipGetErrorString(res)); \ - exit(res); \ - } \ + { \ + fprintf (stderr, \ + " Unexpected Device Error %s:%d: %s, %s\n", \ + __FILE__, \ + __LINE__, \ + hipGetErrorName (res), \ + hipGetErrorString (res)); \ + exit (res); \ + } \ } #ifdef __DEBUG -#define hipCheckOnDebug() hipErrcheck(hipDeviceSynchronize()) +#define hipCheckOnDebug() hipErrcheck (hipDeviceSynchronize ()) #else #define hipCheckOnDebug() #endif diff --git a/source/source_base/module_container/base/third_party/blas.h b/source/source_base/module_container/base/third_party/blas.h index 1fdbac67b2b..79ac65490f2 100644 --- a/source/source_base/module_container/base/third_party/blas.h +++ b/source/source_base/module_container/base/third_party/blas.h @@ -11,374 +11,644 @@ extern "C" { -// level 1: std::vector-std::vector operations, O(n) data and O(n) work. - -// Peize Lin add ?scal 2016-08-04, to compute x=a*x -void sscal_(const int *N, const float *alpha, float *x, const int *incx); -void dscal_(const int *N, const double *alpha, double *x, const int *incx); -void cscal_(const int *N, const std::complex *alpha, std::complex *x, const int *incx); -void zscal_(const int *N, const std::complex *alpha, std::complex *x, const int *incx); - -// Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y -void saxpy_(const int *N, const float *alpha, const float *x, const int *incx, float *y, const int *incy); -void daxpy_(const int *N, const double *alpha, const double *x, const int *incx, double *y, const int *incy); -void caxpy_(const int *N, const std::complex *alpha, const std::complex *x, const int *incx, std::complex *y, const int *incy); -void zaxpy_(const int *N, const std::complex *alpha, const std::complex *x, const int *incx, std::complex *y, const int *incy); - -void scopy_(const int *n, const float *a, const int *incx, float *b, int const *incy); -void dcopy_(const int *n, const double *a, const int *incx, double *b, int const *incy); -void ccopy_(const int *n, const std::complex *a, const int *incx, std::complex *b, int const *incy); -void zcopy_(const int *n, const std::complex *a, const int *incx, std::complex *b, int const *incy); - - -//reason for passing results as argument instead of returning it: -//see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ -void cdotc_(const int *n, const std::complex *zx, const int *incx, - const std::complex *zy, const int *incy, std::complex *result); -void zdotc_(const int *n, const std::complex *zx, const int *incx, - const std::complex *zy, const int *incy, std::complex *result); -// Peize Lin add ?dot 2017-10-27, to compute d=x*y -float sdot_(const int *N, const float *x, const int *incx, const float *y, const int *incy); -double ddot_(const int *N, const double *x, const int *incx, const double *y, const int *incy); - -// Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } -float snrm2_( const int *n, const float *x, const int *incx ); -double dnrm2_( const int *n, const double *x, const int *incx ); -float scnrm2_( const int *n, const std::complex *x, const int *incx ); -double dznrm2_( const int *n, const std::complex *x, const int *incx ); - -// level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. -void sgemv_(const char*const transa, const int*const m, const int*const n, - const float*const alpha, const float*const a, const int*const lda, const float*const x, const int*const incx, - const float*const eta, float*const y, const int*const incy); -void dgemv_(const char*const transa, const int*const m, const int*const n, - const double*const alpha, const double*const a, const int*const lda, const double*const x, const int*const incx, - const double*const beta, double*const y, const int*const incy); - -void cgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - -void zgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - -void dsymv_(const char *uplo, const int *n, - const double *alpha, const double *a, const int *lda, - const double *x, const int *incx, - const double *beta, double *y, const int *incy); - -// A := alpha x * y.T + A -void dger_(const int* m, - const int* n, - const double* alpha, - const double* x, - const int* incx, - const double* y, - const int* incy, - double* a, - const int* lda); -void zgerc_(const int* m, - const int* n, - const std::complex* alpha, - const std::complex* x, - const int* incx, - const std::complex* y, - const int* incy, - std::complex* a, - const int* lda); - -// level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. - -// Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C -// A is general -void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc); -void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); -void cgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); -void zgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - - -//a is symmetric -void dsymm_(const char *side, const char *uplo, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); -//a is hermitian -void zhemm_(const char *side, const char *uplo, - const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, - const std::complex *beta, - std::complex *c, const int *ldc); - -//solving triangular matrix with multiple right hand sides -void dtrsm_(const char *side, const char *uplo, const char *transa, const char *diag, - const int *m, const int *n, - const double *alpha, - const double *a, const int *lda, - double *b, const int *ldb); - -void ztrsm_(const char *side, const char *uplo, const char *transa, const char *diag, - const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - std::complex *b, const int *ldb); - -} - -namespace container { + // level 1: std::vector-std::vector operations, O(n) data and O(n) work. + + // Peize Lin add ?scal 2016-08-04, to compute x=a*x + void sscal_ (const int* N, const float* alpha, float* x, const int* incx); + void dscal_ (const int* N, const double* alpha, double* x, const int* incx); + void cscal_ (const int* N, const std::complex* alpha, std::complex* x, const int* incx); + void zscal_ (const int* N, const std::complex* alpha, std::complex* x, const int* incx); + + // Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y + void saxpy_ (const int* N, const float* alpha, const float* x, const int* incx, float* y, const int* incy); + void daxpy_ (const int* N, const double* alpha, const double* x, const int* incx, double* y, const int* incy); + void caxpy_ (const int* N, + const std::complex* alpha, + const std::complex* x, + const int* incx, + std::complex* y, + const int* incy); + void zaxpy_ (const int* N, + const std::complex* alpha, + const std::complex* x, + const int* incx, + std::complex* y, + const int* incy); + + void scopy_ (const int* n, const float* a, const int* incx, float* b, int const* incy); + void dcopy_ (const int* n, const double* a, const int* incx, double* b, int const* incy); + void ccopy_ (const int* n, const std::complex* a, const int* incx, std::complex* b, int const* incy); + void + zcopy_ (const int* n, const std::complex* a, const int* incx, std::complex* b, int const* incy); + + // reason for passing results as argument instead of returning it: + // see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ + void cdotc_ (const int* n, + const std::complex* zx, + const int* incx, + const std::complex* zy, + const int* incy, + std::complex* result); + void zdotc_ (const int* n, + const std::complex* zx, + const int* incx, + const std::complex* zy, + const int* incy, + std::complex* result); + // Peize Lin add ?dot 2017-10-27, to compute d=x*y + float sdot_ (const int* N, const float* x, const int* incx, const float* y, const int* incy); + double ddot_ (const int* N, const double* x, const int* incx, const double* y, const int* incy); + + // Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } + float snrm2_ (const int* n, const float* x, const int* incx); + double dnrm2_ (const int* n, const double* x, const int* incx); + float scnrm2_ (const int* n, const std::complex* x, const int* incx); + double dznrm2_ (const int* n, const std::complex* x, const int* incx); + + // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. + void sgemv_ (const char* const transa, + const int* const m, + const int* const n, + const float* const alpha, + const float* const a, + const int* const lda, + const float* const x, + const int* const incx, + const float* const eta, + float* const y, + const int* const incy); + void dgemv_ (const char* const transa, + const int* const m, + const int* const n, + const double* const alpha, + const double* const a, + const int* const lda, + const double* const x, + const int* const incx, + const double* const beta, + double* const y, + const int* const incy); + + void cgemv_ (const char* trans, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* x, + const int* incx, + const std::complex* beta, + std::complex* y, + const int* incy); + + void zgemv_ (const char* trans, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* x, + const int* incx, + const std::complex* beta, + std::complex* y, + const int* incy); + + void dsymv_ (const char* uplo, + const int* n, + const double* alpha, + const double* a, + const int* lda, + const double* x, + const int* incx, + const double* beta, + double* y, + const int* incy); + + // A := alpha x * y.T + A + void dger_ (const int* m, + const int* n, + const double* alpha, + const double* x, + const int* incx, + const double* y, + const int* incy, + double* a, + const int* lda); + void zgerc_ (const int* m, + const int* n, + const std::complex* alpha, + const std::complex* x, + const int* incx, + const std::complex* y, + const int* incy, + std::complex* a, + const int* lda); + + // level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. + + // Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C + // A is general + void sgemm_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const float* alpha, + const float* a, + const int* lda, + const float* b, + const int* ldb, + const float* beta, + float* c, + const int* ldc); + void dgemm_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const double* alpha, + const double* a, + const int* lda, + const double* b, + const int* ldb, + const double* beta, + double* c, + const int* ldc); + void cgemm_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + void zgemm_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + + // a is symmetric + void dsymm_ (const char* side, + const char* uplo, + const int* m, + const int* n, + const double* alpha, + const double* a, + const int* lda, + const double* b, + const int* ldb, + const double* beta, + double* c, + const int* ldc); + // a is hermitian + void zhemm_ (const char* side, + const char* uplo, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + + // solving triangular matrix with multiple right hand sides + void dtrsm_ (const char* side, + const char* uplo, + const char* transa, + const char* diag, + const int* m, + const int* n, + const double* alpha, + const double* a, + const int* lda, + double* b, + const int* ldb); + + void ztrsm_ (const char* side, + const char* uplo, + const char* transa, + const char* diag, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + std::complex* b, + const int* ldb); +} + +namespace container +{ // Class BlasConnector provide the connector to fortran lapack routine. // The entire function in this class are static and inline function. // Usage example: BlasConnector::functionname(parameter list). -namespace BlasConnector { +namespace BlasConnector +{ -static inline -void axpy( const int& n, const float& alpha, const float *x, const int& incx, float *y, const int& incy) +static inline void + axpy (const int& n, const float& alpha, const float* x, const int& incx, float* y, const int& incy) { - saxpy_(&n, &alpha, x, &incx, y, &incy); + saxpy_ (&n, &alpha, x, &incx, y, &incy); } -static inline -void axpy( const int& n, const double& alpha, const double *x, const int& incx, double *y, const int& incy) +static inline void + axpy (const int& n, const double& alpha, const double* x, const int& incx, double* y, const int& incy) { - daxpy_(&n, &alpha, x, &incx, y, &incy); + daxpy_ (&n, &alpha, x, &incx, y, &incy); } -static inline -void axpy( const int& n, const std::complex& alpha, const std::complex *x, const int& incx, std::complex *y, const int& incy) +static inline void + axpy (const int& n, + const std::complex& alpha, + const std::complex* x, + const int& incx, + std::complex* y, + const int& incy) { - caxpy_(&n, &alpha, x, &incx, y, &incy); + caxpy_ (&n, &alpha, x, &incx, y, &incy); } -static inline -void axpy( const int& n, const std::complex& alpha, const std::complex *x, const int& incx, std::complex *y, const int& incy) +static inline void + axpy (const int& n, + const std::complex& alpha, + const std::complex* x, + const int& incx, + std::complex* y, + const int& incy) { - zaxpy_(&n, &alpha, x, &incx, y, &incy); + zaxpy_ (&n, &alpha, x, &incx, y, &incy); } // Peize Lin add 2016-08-04 // x=a*x -static inline -void scal( const int& n, const float& alpha, float *x, const int& incx) +static inline void + scal (const int& n, const float& alpha, float* x, const int& incx) { - sscal_(&n, &alpha, x, &incx); + sscal_ (&n, &alpha, x, &incx); } -static inline -void scal( const int& n, const double& alpha, double *x, const int& incx) +static inline void + scal (const int& n, const double& alpha, double* x, const int& incx) { - dscal_(&n, &alpha, x, &incx); + dscal_ (&n, &alpha, x, &incx); } -static inline -void scal( const int& n, const std::complex& alpha, std::complex *x, const int& incx) +static inline void + scal (const int& n, const std::complex& alpha, std::complex* x, const int& incx) { - cscal_(&n, &alpha, x, &incx); + cscal_ (&n, &alpha, x, &incx); } -static inline -void scal( const int& n, const std::complex& alpha, std::complex *x, const int& incx) +static inline void + scal (const int& n, const std::complex& alpha, std::complex* x, const int& incx) { - zscal_(&n, &alpha, x, &incx); + zscal_ (&n, &alpha, x, &incx); } // Peize Lin add 2017-10-27 // d=x*y -static inline -float dot( const int& n, const float *x, const int& incx, const float *y, const int& incy) +static inline float + dot (const int& n, const float* x, const int& incx, const float* y, const int& incy) { - return sdot_(&n, x, &incx, y, &incy); + return sdot_ (&n, x, &incx, y, &incy); } -static inline -double dot( const int& n, const double *x, const int& incx, const double *y, const int& incy) +static inline double + dot (const int& n, const double* x, const int& incx, const double* y, const int& incy) { - return ddot_(&n, x, &incx, y, &incy); + return ddot_ (&n, x, &incx, y, &incy); } // Denghui Lu add 2023-8-01 -static inline -std::complex dot(const int& n, const std::complex *x, const int& incx, const std::complex *y, const int& incy) +static inline std::complex + dot (const int& n, const std::complex* x, const int& incx, const std::complex* y, const int& incy) { std::complex result = {0, 0}; // cdotc_(&n, x, &incx, y, &incy, &result); - for (int ii = 0; ii < n; ii++) { - result += std::conj(x[ii * incx]) * y[ii * incy]; - } + for (int ii = 0; ii < n; ii++) + { + result += std::conj (x[ii * incx]) * y[ii * incy]; + } return result; } -static inline -std::complex dot(const int& n, const std::complex *x, const int& incx, const std::complex *y, const int& incy) +static inline std::complex + dot (const int& n, const std::complex* x, const int& incx, const std::complex* y, const int& incy) { std::complex result = {0, 0}; // zdotc_(&n, x, &incx, y, &incy, &result); - for (int ii = 0; ii < n; ii++) { - result += std::conj(x[ii * incx]) * y[ii * incy]; - } + for (int ii = 0; ii < n; ii++) + { + result += std::conj (x[ii * incx]) * y[ii * incy]; + } return result; } // Peize Lin add 2017-10-27, fix bug trans 2019-01-17 // C = a * A.? * B.? + b * C -static inline -void gemm(const char& transa, const char& transb, const int& m, const int& n, const int& k, - const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, - const float& beta, float* C, const int& ldc) -{ - sgemm_(&transa, &transb, &m, &n, &k, - &alpha, A, &lda, B, &ldb, - &beta, C, &ldc); -} -static inline -void gemm(const char& transa, const char& transb, const int& m, const int& n, const int& k, - const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, - const double& beta, double* C, const int& ldc) -{ - dgemm_(&transa, &transb, &m, &n, &k, - &alpha, A, &lda, B, &ldb, - &beta, C, &ldc); -} -static inline -void gemm(const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const std::complex* B, const int& ldb, - const std::complex& beta, std::complex* C, const int& ldc) -{ - cgemm_(&transa, &transb, &m, &n, &k, - &alpha, A, &lda, B, &ldb, - &beta, C, &ldc); -} -static inline -void gemm(const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const std::complex* B, const int& ldb, - const std::complex& beta, std::complex* C, const int& ldc) -{ - zgemm_(&transa, &transb, &m, &n, &k, - &alpha, A, &lda, B, &ldb, - &beta, C, &ldc); +static inline void + gemm (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const float& alpha, + const float* A, + const int& lda, + const float* B, + const int& ldb, + const float& beta, + float* C, + const int& ldc) +{ + sgemm_ (&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); +} +static inline void + gemm (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const double& alpha, + const double* A, + const int& lda, + const double* B, + const int& ldb, + const double& beta, + double* C, + const int& ldc) +{ + dgemm_ (&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); +} +static inline void + gemm (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* B, + const int& ldb, + const std::complex& beta, + std::complex* C, + const int& ldc) +{ + cgemm_ (&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); +} +static inline void + gemm (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* B, + const int& ldb, + const std::complex& beta, + std::complex* C, + const int& ldc) +{ + zgemm_ (&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } template -static inline -void gemm_batched(const char& transa, const char& transb, const int& m, const int& n, const int& k, - const T& alpha, T** A, const int& lda, T** B, const int& ldb, - const T& beta, T** C, const int& ldc, const int& batch_size) -{ - for (int ii = 0; ii < batch_size; ++ii) { - // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] - BlasConnector::gemm(transa, transb, m, n, k, alpha, A[ii], lda, B[ii], ldb, beta, C[ii], ldc); - } +static inline void + gemm_batched (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T& alpha, + T** A, + const int& lda, + T** B, + const int& ldb, + const T& beta, + T** C, + const int& ldc, + const int& batch_size) +{ + for (int ii = 0; ii < batch_size; ++ii) + { + // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] + BlasConnector::gemm (transa, transb, m, n, k, alpha, A[ii], lda, B[ii], ldb, beta, C[ii], ldc); + } } template -static inline -void gemm_batched_strided(const char& transa, const char& transb, const int& m, const int& n, const int& k, - const T& alpha, const T* A, const int& lda, const int& stride_a, const T* B, const int& ldb, const int& stride_b, - const T& beta, T* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - for (int ii = 0; ii < batch_size; ii++) { - // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] - BlasConnector::gemm(transa, transb, m, n, k, alpha, A + ii * stride_a, lda, B + ii * stride_b, ldb, beta, C + ii * stride_c, ldc); - } -} - -static inline -void gemv(const char& trans, const int& m, const int& n, - const float& alpha, const float *A, const int& lda, const float *x, const int& incx, - const float& beta, float *y, const int& incy) -{ - sgemv_(&trans, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); -} -static inline -void gemv(const char& trans, const int& m, const int& n, - const double& alpha, const double *A, const int& lda, const double *x, const int& incx, - const double& beta, double *y, const int& incy) -{ - dgemv_(&trans, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); -} -static inline -void gemv(const char& trans, const int& m, const int& n, - const std::complex& alpha, const std::complex *A, const int& lda, const std::complex *x, const int& incx, - const std::complex& beta, std::complex *y, const int& incy) -{ - cgemv_(&trans, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); -} -static inline -void gemv(const char& trans, const int& m, const int& n, - const std::complex& alpha, const std::complex *A, const int& lda, const std::complex *x, const int& incx, - const std::complex& beta, std::complex *y, const int& incy) -{ - zgemv_(&trans, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); +static inline void + gemm_batched_strided (const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const T& alpha, + const T* A, + const int& lda, + const int& stride_a, + const T* B, + const int& ldb, + const int& stride_b, + const T& beta, + T* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + for (int ii = 0; ii < batch_size; ii++) + { + // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] + BlasConnector::gemm (transa, + transb, + m, + n, + k, + alpha, + A + ii * stride_a, + lda, + B + ii * stride_b, + ldb, + beta, + C + ii * stride_c, + ldc); + } +} + +static inline void + gemv (const char& trans, + const int& m, + const int& n, + const float& alpha, + const float* A, + const int& lda, + const float* x, + const int& incx, + const float& beta, + float* y, + const int& incy) +{ + sgemv_ (&trans, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); +} +static inline void + gemv (const char& trans, + const int& m, + const int& n, + const double& alpha, + const double* A, + const int& lda, + const double* x, + const int& incx, + const double& beta, + double* y, + const int& incy) +{ + dgemv_ (&trans, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); +} +static inline void + gemv (const char& trans, + const int& m, + const int& n, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* x, + const int& incx, + const std::complex& beta, + std::complex* y, + const int& incy) +{ + cgemv_ (&trans, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); +} +static inline void + gemv (const char& trans, + const int& m, + const int& n, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* x, + const int& incx, + const std::complex& beta, + std::complex* y, + const int& incy) +{ + zgemv_ (&trans, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } template -static inline -void gemv_batched(const char& trans, const int& m, const int& n, - const T& alpha, T** A, const int& lda, T** x, const int& incx, - const T& beta, T** y, const int& incy, const int& batch_size) -{ - for (int ii = 0; ii < batch_size; ++ii) { - // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] - BlasConnector::gemv(trans, m, n, alpha, A[ii], lda, x[ii], incy, beta, y[ii], incy); - } +static inline void + gemv_batched (const char& trans, + const int& m, + const int& n, + const T& alpha, + T** A, + const int& lda, + T** x, + const int& incx, + const T& beta, + T** y, + const int& incy, + const int& batch_size) +{ + for (int ii = 0; ii < batch_size; ++ii) + { + // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] + BlasConnector::gemv (trans, m, n, alpha, A[ii], lda, x[ii], incy, beta, y[ii], incy); + } } template -static inline -void gemv_batched_strided(const char& transa, const int& m, const int& n, - const T& alpha, const T* A, const int& lda, const int& stride_a, const T* x, const int& incx, const int& stride_x, - const T& beta, T* y, const int& incy, const int& stride_y, const int& batch_size) -{ - for (int ii = 0; ii < batch_size; ii++) { - // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] - BlasConnector::gemv(transa, m, n, alpha, A + ii * stride_a, lda, x + ii * stride_x, incx, beta, y + ii * stride_y, incy); - } +static inline void + gemv_batched_strided (const char& transa, + const int& m, + const int& n, + const T& alpha, + const T* A, + const int& lda, + const int& stride_a, + const T* x, + const int& incx, + const int& stride_x, + const T& beta, + T* y, + const int& incy, + const int& stride_y, + const int& batch_size) +{ + for (int ii = 0; ii < batch_size; ii++) + { + // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] + BlasConnector::gemv (transa, + m, + n, + alpha, + A + ii * stride_a, + lda, + x + ii * stride_x, + incx, + beta, + y + ii * stride_y, + incy); + } } // Peize Lin add 2018-06-12 // out = ||x||_2 -static inline -float nrm2( const int n, const float *x, const int incx ) +static inline float + nrm2 (const int n, const float* x, const int incx) { - return snrm2_( &n, x, &incx ); + return snrm2_ (&n, x, &incx); } -static inline -double nrm2( const int n, const double *x, const int incx ) +static inline double + nrm2 (const int n, const double* x, const int incx) { - return dnrm2_( &n, x, &incx ); + return dnrm2_ (&n, x, &incx); } -static inline -double nrm2( const int n, const std::complex *x, const int incx ) +static inline double + nrm2 (const int n, const std::complex* x, const int incx) { - return scnrm2_( &n, x, &incx ); + return scnrm2_ (&n, x, &incx); } -static inline -double nrm2( const int n, const std::complex *x, const int incx ) +static inline double + nrm2 (const int n, const std::complex* x, const int incx) { - return dznrm2_( &n, x, &incx ); + return dznrm2_ (&n, x, &incx); } // copies a into b -static inline -void copy(const int n, const float *a, const int incx, float *b, const int incy) +static inline void + copy (const int n, const float* a, const int incx, float* b, const int incy) { - scopy_(&n, a, &incx, b, &incy); + scopy_ (&n, a, &incx, b, &incy); } -static inline -void copy(const int n, const double *a, const int incx, double *b, const int incy) +static inline void + copy (const int n, const double* a, const int incx, double* b, const int incy) { - dcopy_(&n, a, &incx, b, &incy); + dcopy_ (&n, a, &incx, b, &incy); } -static inline -void copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy) +static inline void + copy (const int n, const std::complex* a, const int incx, std::complex* b, const int incy) { - ccopy_(&n, a, &incx, b, &incy); + ccopy_ (&n, a, &incx, b, &incy); } -static inline -void copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy) +static inline void + copy (const int n, const std::complex* a, const int incx, std::complex* b, const int incy) { - zcopy_(&n, a, &incx, b, &incy); + zcopy_ (&n, a, &incx, b, &incy); } } // namespace BlasConnector diff --git a/source/source_base/module_container/base/third_party/cublas.h b/source/source_base/module_container/base/third_party/cublas.h index fabc32e9836..f7db83f4ebc 100644 --- a/source/source_base/module_container/base/third_party/cublas.h +++ b/source/source_base/module_container/base/third_party/cublas.h @@ -5,356 +5,811 @@ #include #include -namespace container { -namespace cuBlasConnector { - -static inline -void copy(cublasHandle_t& handle, const int& n, const float *x, const int& incx, float *y, const int& incy) -{ - CHECK_CUBLAS(cublasScopy(handle, n, x, incx, y, incy)); -} -static inline -void copy(cublasHandle_t& handle, const int& n, const double *x, const int& incx, double *y, const int& incy) -{ - CHECK_CUBLAS(cublasDcopy(handle, n, x, incx, y, incy)); -} -static inline -void copy(cublasHandle_t& handle, const int& n, const std::complex *x, const int& incx, std::complex *y, const int& incy) +namespace container { - CHECK_CUBLAS(cublasCcopy(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy)); -} -static inline -void copy(cublasHandle_t& handle, const int& n, const std::complex *x, const int& incx, std::complex *y, const int& incy) +namespace cuBlasConnector { - CHECK_CUBLAS(cublasZcopy(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy)); -} -static inline -void nrm2(cublasHandle_t& handle, const int& n, const float *x, const int& incx, float* result) -{ - CHECK_CUBLAS(cublasSnrm2(handle, n, x, incx, result)); -} -static inline -void nrm2(cublasHandle_t& handle, const int& n, const double *x, const int& incx, double* result) -{ - CHECK_CUBLAS(cublasDnrm2(handle, n, x, incx, result)); -} -static inline -void nrm2(cublasHandle_t& handle, const int& n, const std::complex *x, const int& incx, float* result) -{ - CHECK_CUBLAS(cublasScnrm2(handle, n, reinterpret_cast(x), incx, result)); -} -static inline -void nrm2(cublasHandle_t& handle, const int& n, const std::complex *x, const int& incx, double* result) -{ - CHECK_CUBLAS(cublasDznrm2(handle, n, reinterpret_cast(x), incx, result)); +static inline void + copy (cublasHandle_t& handle, const int& n, const float* x, const int& incx, float* y, const int& incy) +{ + CHECK_CUBLAS (cublasScopy (handle, n, x, incx, y, incy)); +} +static inline void + copy (cublasHandle_t& handle, const int& n, const double* x, const int& incx, double* y, const int& incy) +{ + CHECK_CUBLAS (cublasDcopy (handle, n, x, incx, y, incy)); +} +static inline void + copy (cublasHandle_t& handle, + const int& n, + const std::complex* x, + const int& incx, + std::complex* y, + const int& incy) +{ + CHECK_CUBLAS ( + cublasCcopy (handle, n, reinterpret_cast (x), incx, reinterpret_cast (y), incy)); +} +static inline void + copy (cublasHandle_t& handle, + const int& n, + const std::complex* x, + const int& incx, + std::complex* y, + const int& incy) +{ + CHECK_CUBLAS (cublasZcopy (handle, + n, + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy)); } -static inline -void dot(cublasHandle_t& handle, const int& n, const float *x, const int& incx, const float *y, const int& incy, float* result) +static inline void + nrm2 (cublasHandle_t& handle, const int& n, const float* x, const int& incx, float* result) { - CHECK_CUBLAS(cublasSdot(handle, n, x, incx, y, incy, result)); + CHECK_CUBLAS (cublasSnrm2 (handle, n, x, incx, result)); } -static inline -void dot(cublasHandle_t& handle, const int& n, const double *x, const int& incx, const double *y, const int& incy, double* result) +static inline void + nrm2 (cublasHandle_t& handle, const int& n, const double* x, const int& incx, double* result) { - CHECK_CUBLAS(cublasDdot(handle, n, x, incx, y, incy, result)); + CHECK_CUBLAS (cublasDnrm2 (handle, n, x, incx, result)); } -static inline -void dot(cublasHandle_t& handle, const int& n, const std::complex *x, const int& incx, const std::complex *y, const int& incy, std::complex* result) +static inline void + nrm2 (cublasHandle_t& handle, const int& n, const std::complex* x, const int& incx, float* result) { - CHECK_CUBLAS(cublasCdotc(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy, reinterpret_cast(result))); + CHECK_CUBLAS (cublasScnrm2 (handle, n, reinterpret_cast (x), incx, result)); } -static inline -void dot(cublasHandle_t& handle, const int& n, const std::complex *x, const int& incx, const std::complex *y, const int& incy, std::complex* result) +static inline void + nrm2 (cublasHandle_t& handle, const int& n, const std::complex* x, const int& incx, double* result) { - CHECK_CUBLAS(cublasZdotc(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy, reinterpret_cast(result))); + CHECK_CUBLAS (cublasDznrm2 (handle, n, reinterpret_cast (x), incx, result)); } -static inline -void axpy(cublasHandle_t& handle, const int& n, const float& alpha, const float *x, const int& incx, float *y, const int& incy) -{ - CHECK_CUBLAS(cublasSaxpy(handle, n, &alpha, x, incx, y, incy)); +static inline void + dot (cublasHandle_t& handle, + const int& n, + const float* x, + const int& incx, + const float* y, + const int& incy, + float* result) +{ + CHECK_CUBLAS (cublasSdot (handle, n, x, incx, y, incy, result)); +} +static inline void + dot (cublasHandle_t& handle, + const int& n, + const double* x, + const int& incx, + const double* y, + const int& incy, + double* result) +{ + CHECK_CUBLAS (cublasDdot (handle, n, x, incx, y, incy, result)); +} +static inline void + dot (cublasHandle_t& handle, + const int& n, + const std::complex* x, + const int& incx, + const std::complex* y, + const int& incy, + std::complex* result) +{ + CHECK_CUBLAS (cublasCdotc (handle, + n, + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy, + reinterpret_cast (result))); +} +static inline void + dot (cublasHandle_t& handle, + const int& n, + const std::complex* x, + const int& incx, + const std::complex* y, + const int& incy, + std::complex* result) +{ + CHECK_CUBLAS (cublasZdotc (handle, + n, + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy, + reinterpret_cast (result))); } -static inline -void axpy(cublasHandle_t& handle, const int& n, const double& alpha, const double *x, const int& incx, double *y, const int& incy) -{ - CHECK_CUBLAS(cublasDaxpy(handle, n, &alpha, x, incx, y, incy)); -} -static inline -void axpy(cublasHandle_t& handle, const int& n, const std::complex& alpha, const std::complex *x, const int& incx, std::complex *y, const int& incy) -{ - CHECK_CUBLAS(cublasCaxpy(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx, reinterpret_cast(y), incy)); -} -static inline -void axpy(cublasHandle_t& handle, const int& n, const std::complex& alpha, const std::complex *x, const int& incx, std::complex *y, const int& incy) -{ - CHECK_CUBLAS(cublasZaxpy(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx, reinterpret_cast(y), incy)); + +static inline void + axpy (cublasHandle_t& handle, + const int& n, + const float& alpha, + const float* x, + const int& incx, + float* y, + const int& incy) +{ + CHECK_CUBLAS (cublasSaxpy (handle, n, &alpha, x, incx, y, incy)); +} +static inline void + axpy (cublasHandle_t& handle, + const int& n, + const double& alpha, + const double* x, + const int& incx, + double* y, + const int& incy) +{ + CHECK_CUBLAS (cublasDaxpy (handle, n, &alpha, x, incx, y, incy)); +} +static inline void + axpy (cublasHandle_t& handle, + const int& n, + const std::complex& alpha, + const std::complex* x, + const int& incx, + std::complex* y, + const int& incy) +{ + CHECK_CUBLAS (cublasCaxpy (handle, + n, + reinterpret_cast (&alpha), + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy)); +} +static inline void + axpy (cublasHandle_t& handle, + const int& n, + const std::complex& alpha, + const std::complex* x, + const int& incx, + std::complex* y, + const int& incy) +{ + CHECK_CUBLAS (cublasZaxpy (handle, + n, + reinterpret_cast (&alpha), + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy)); } -static inline -void scal(cublasHandle_t& handle, const int& n, const float& alpha, float *x, const int& incx) +static inline void + scal (cublasHandle_t& handle, const int& n, const float& alpha, float* x, const int& incx) { - CHECK_CUBLAS(cublasSscal(handle, n, &alpha, x, incx)); + CHECK_CUBLAS (cublasSscal (handle, n, &alpha, x, incx)); } -static inline -void scal(cublasHandle_t& handle, const int& n, const double& alpha, double *x, const int& incx) +static inline void + scal (cublasHandle_t& handle, const int& n, const double& alpha, double* x, const int& incx) { - CHECK_CUBLAS(cublasDscal(handle, n, &alpha, x, incx)); + CHECK_CUBLAS (cublasDscal (handle, n, &alpha, x, incx)); } -static inline -void scal(cublasHandle_t& handle, const int& n, const std::complex& alpha, std::complex *x, const int& incx) +static inline void + scal (cublasHandle_t& handle, + const int& n, + const std::complex& alpha, + std::complex* x, + const int& incx) { - CHECK_CUBLAS(cublasCscal(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx)); + CHECK_CUBLAS ( + cublasCscal (handle, n, reinterpret_cast (&alpha), reinterpret_cast (x), incx)); } -static inline -void scal(cublasHandle_t& handle, const int& n, const std::complex& alpha, std::complex *x, const int& incx) +static inline void + scal (cublasHandle_t& handle, + const int& n, + const std::complex& alpha, + std::complex* x, + const int& incx) { - CHECK_CUBLAS(cublasZscal(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx)); + CHECK_CUBLAS (cublasZscal (handle, + n, + reinterpret_cast (&alpha), + reinterpret_cast (x), + incx)); } -static inline -void gemv(cublasHandle_t& handle, const char& trans, const int& m, const int& n, - const float& alpha, const float *A, const int& lda, const float *x, const int& incx, - const float& beta, float *y, const int& incy) -{ - CHECK_CUBLAS(cublasSgemv(handle, GetCublasOperation(trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy)); -} -static inline -void gemv(cublasHandle_t& handle, const char& trans, const int& m, const int& n, - const double& alpha, const double *A, const int& lda, const double *x, const int& incx, - const double& beta, double *y, const int& incy) -{ - CHECK_CUBLAS(cublasDgemv(handle, GetCublasOperation(trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy)); -} -static inline -void gemv(cublasHandle_t& handle, const char& trans, const int& m, const int& n, - const std::complex& alpha, const std::complex *A, const int& lda, const std::complex *x, const int& incx, - const std::complex& beta, std::complex *y, const int& incy) -{ - CHECK_CUBLAS(cublasCgemv(handle, GetCublasOperation(trans), m, n, reinterpret_cast(&alpha), - reinterpret_cast(A), lda, reinterpret_cast(x), incx, reinterpret_cast(&beta), reinterpret_cast(y), incy)); -} -static inline -void gemv(cublasHandle_t& handle, const char& trans, const int& m, const int& n, - const std::complex& alpha, const std::complex *A, const int& lda, const std::complex *x, const int& incx, - const std::complex& beta, std::complex *y, const int& incy) -{ - CHECK_CUBLAS(cublasZgemv(handle, GetCublasOperation(trans), m, n, reinterpret_cast(&alpha), - reinterpret_cast(A), lda, reinterpret_cast(x), incx, reinterpret_cast(&beta), reinterpret_cast(y), incy)); +static inline void + gemv (cublasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const float& alpha, + const float* A, + const int& lda, + const float* x, + const int& incx, + const float& beta, + float* y, + const int& incy) +{ + CHECK_CUBLAS (cublasSgemv (handle, GetCublasOperation (trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy)); +} +static inline void + gemv (cublasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const double& alpha, + const double* A, + const int& lda, + const double* x, + const int& incx, + const double& beta, + double* y, + const int& incy) +{ + CHECK_CUBLAS (cublasDgemv (handle, GetCublasOperation (trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy)); +} +static inline void + gemv (cublasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* x, + const int& incx, + const std::complex& beta, + std::complex* y, + const int& incy) +{ + CHECK_CUBLAS (cublasCgemv (handle, + GetCublasOperation (trans), + m, + n, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + reinterpret_cast (x), + incx, + reinterpret_cast (&beta), + reinterpret_cast (y), + incy)); +} +static inline void + gemv (cublasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* x, + const int& incx, + const std::complex& beta, + std::complex* y, + const int& incy) +{ + CHECK_CUBLAS (cublasZgemv (handle, + GetCublasOperation (trans), + m, + n, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + reinterpret_cast (x), + incx, + reinterpret_cast (&beta), + reinterpret_cast (y), + incy)); } template -static inline -void gemv_batched(cublasHandle_t& handle, const char& trans, const int& m, const int& n, - const T& alpha, T** A, const int& lda, T** x, const int& incx, - const T& beta, T** y, const int& incy, const int& batch_size) -{ - for (int ii = 0; ii < batch_size; ++ii) { - // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] - cuBlasConnector::gemv(handle, trans, m, n, alpha, A[ii], lda, x[ii], incx, beta, y[ii], incy); - } +static inline void + gemv_batched (cublasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const T& alpha, + T** A, + const int& lda, + T** x, + const int& incx, + const T& beta, + T** y, + const int& incy, + const int& batch_size) +{ + for (int ii = 0; ii < batch_size; ++ii) + { + // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] + cuBlasConnector::gemv (handle, trans, m, n, alpha, A[ii], lda, x[ii], incx, beta, y[ii], incy); + } } template -static inline -void gemv_batched_strided(cublasHandle_t& handle, const char& transa, const int& m, const int& n, - const T& alpha, const T* A, const int& lda, const int& stride_a, const T* x, const int& incx, const int& stride_x, - const T& beta, T* y, const int& incy, const int& stride_y, const int& batch_size) -{ - for (int ii = 0; ii < batch_size; ii++) { - // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] - cuBlasConnector::gemv(handle, transa, m, n, alpha, A + ii * stride_a, lda, x + ii * stride_x, incx, beta, y + ii * stride_y, incy); - } +static inline void + gemv_batched_strided (cublasHandle_t& handle, + const char& transa, + const int& m, + const int& n, + const T& alpha, + const T* A, + const int& lda, + const int& stride_a, + const T* x, + const int& incx, + const int& stride_x, + const T& beta, + T* y, + const int& incy, + const int& stride_y, + const int& batch_size) +{ + for (int ii = 0; ii < batch_size; ii++) + { + // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] + cuBlasConnector::gemv (handle, + transa, + m, + n, + alpha, + A + ii * stride_a, + lda, + x + ii * stride_x, + incx, + beta, + y + ii * stride_y, + incy); + } } -static inline -void gemm(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, - const float& beta, float* C, const int& ldc) -{ - CHECK_CUBLAS(cublasSgemm(handle, GetCublasOperation(transa), GetCublasOperation(transb), - m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc)); -} -static inline -void gemm(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, - const double& beta, double* C, const int& ldc) -{ - CHECK_CUBLAS(cublasDgemm(handle, GetCublasOperation(transa), GetCublasOperation(transb), - m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc)); -} -static inline -void gemm(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const std::complex* B, const int& ldb, - const std::complex& beta, std::complex* C, const int& ldc) -{ - CHECK_CUBLAS(cublasCgemm(handle, GetCublasOperation(transa), GetCublasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(A), lda, - reinterpret_cast(B), ldb, - reinterpret_cast(&beta), - reinterpret_cast(C), ldc)); -} -static inline -void gemm(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const std::complex* B, const int& ldb, - const std::complex& beta, std::complex* C, const int& ldc) -{ - CHECK_CUBLAS(cublasZgemm(handle, GetCublasOperation(transa), GetCublasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(A), lda, - reinterpret_cast(B), ldb, - reinterpret_cast(&beta), - reinterpret_cast(C), ldc)); +static inline void + gemm (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const float& alpha, + const float* A, + const int& lda, + const float* B, + const int& ldb, + const float& beta, + float* C, + const int& ldc) +{ + CHECK_CUBLAS (cublasSgemm (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + &alpha, + A, + lda, + B, + ldb, + &beta, + C, + ldc)); +} +static inline void + gemm (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const double& alpha, + const double* A, + const int& lda, + const double* B, + const int& ldb, + const double& beta, + double* C, + const int& ldc) +{ + CHECK_CUBLAS (cublasDgemm (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + &alpha, + A, + lda, + B, + ldb, + &beta, + C, + ldc)); +} +static inline void + gemm (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* B, + const int& ldb, + const std::complex& beta, + std::complex* C, + const int& ldc) +{ + CHECK_CUBLAS (cublasCgemm (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + reinterpret_cast (&beta), + reinterpret_cast (C), + ldc)); +} +static inline void + gemm (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* B, + const int& ldb, + const std::complex& beta, + std::complex* C, + const int& ldc) +{ + CHECK_CUBLAS (cublasZgemm (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + reinterpret_cast (&beta), + reinterpret_cast (C), + ldc)); } template -static inline -T** allocate_(T** in, const int& batch_size) +static inline T** + allocate_ (T** in, const int& batch_size) { T** out = nullptr; - CHECK_CUDA(cudaMalloc(reinterpret_cast(&out), sizeof(T*) * batch_size)); - CHECK_CUDA(cudaMemcpy(out, in, sizeof(T*) * batch_size, cudaMemcpyHostToDevice)); + CHECK_CUDA (cudaMalloc (reinterpret_cast (&out), sizeof (T*) * batch_size)); + CHECK_CUDA (cudaMemcpy (out, in, sizeof (T*) * batch_size, cudaMemcpyHostToDevice)); return out; } -static inline -void gemm_batched(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const float& alpha, float** A, const int& lda, float** B, const int& ldb, - const float& beta, float** C, const int& ldc, const int& batch_size) -{ - float** d_A = allocate_(A, batch_size); - float** d_B = allocate_(B, batch_size); - float** d_C = allocate_(C, batch_size); - CHECK_CUBLAS(cublasSgemmBatched(handle, GetCublasOperation(transa), GetCublasOperation(transb), - m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc, batch_size)); - CHECK_CUDA(cudaFree(d_A)); - CHECK_CUDA(cudaFree(d_B)); - CHECK_CUDA(cudaFree(d_C)); -} -static inline -void gemm_batched(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const double& alpha, double** A, const int& lda, double** B, const int& ldb, - const double& beta, double** C, const int& ldc, const int& batch_size) -{ - double** d_A = allocate_(A, batch_size); - double** d_B = allocate_(B, batch_size); - double** d_C = allocate_(C, batch_size); - CHECK_CUBLAS(cublasDgemmBatched(handle, GetCublasOperation(transa), GetCublasOperation(transb), - m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc, batch_size)); - CHECK_CUDA(cudaFree(d_A)); - CHECK_CUDA(cudaFree(d_B)); - CHECK_CUDA(cudaFree(d_C)); -} -static inline -void gemm_batched(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, std::complex** A, const int& lda, std::complex** B, const int& ldb, - const std::complex& beta, std::complex** C, const int& ldc, const int& batch_size) -{ - std::complex** d_A = allocate_(A, batch_size); - std::complex** d_B = allocate_(B, batch_size); - std::complex** d_C = allocate_(C, batch_size); - CHECK_CUBLAS(cublasCgemmBatched(handle, GetCublasOperation(transa), GetCublasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(d_A), lda, - reinterpret_cast(d_B), ldb, - reinterpret_cast(&beta), - reinterpret_cast(d_C), ldc, batch_size)); - CHECK_CUDA(cudaFree(d_A)); - CHECK_CUDA(cudaFree(d_B)); - CHECK_CUDA(cudaFree(d_C)); -} -static inline -void gemm_batched(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, std::complex** A, const int& lda, std::complex** B, const int& ldb, - const std::complex& beta, std::complex** C, const int& ldc, const int& batch_size) -{ - std::complex** d_A = allocate_(A, batch_size); - std::complex** d_B = allocate_(B, batch_size); - std::complex** d_C = allocate_(C, batch_size); - CHECK_CUBLAS(cublasZgemmBatched(handle, GetCublasOperation(transa), GetCublasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(d_A), lda, - reinterpret_cast(d_B), ldb, - reinterpret_cast(&beta), - reinterpret_cast(d_C), ldc, batch_size)); - CHECK_CUDA(cudaFree(d_A)); - CHECK_CUDA(cudaFree(d_B)); - CHECK_CUDA(cudaFree(d_C)); +static inline void + gemm_batched (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const float& alpha, + float** A, + const int& lda, + float** B, + const int& ldb, + const float& beta, + float** C, + const int& ldc, + const int& batch_size) +{ + float** d_A = allocate_ (A, batch_size); + float** d_B = allocate_ (B, batch_size); + float** d_C = allocate_ (C, batch_size); + CHECK_CUBLAS (cublasSgemmBatched (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + &alpha, + d_A, + lda, + d_B, + ldb, + &beta, + d_C, + ldc, + batch_size)); + CHECK_CUDA (cudaFree (d_A)); + CHECK_CUDA (cudaFree (d_B)); + CHECK_CUDA (cudaFree (d_C)); +} +static inline void + gemm_batched (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const double& alpha, + double** A, + const int& lda, + double** B, + const int& ldb, + const double& beta, + double** C, + const int& ldc, + const int& batch_size) +{ + double** d_A = allocate_ (A, batch_size); + double** d_B = allocate_ (B, batch_size); + double** d_C = allocate_ (C, batch_size); + CHECK_CUBLAS (cublasDgemmBatched (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + &alpha, + d_A, + lda, + d_B, + ldb, + &beta, + d_C, + ldc, + batch_size)); + CHECK_CUDA (cudaFree (d_A)); + CHECK_CUDA (cudaFree (d_B)); + CHECK_CUDA (cudaFree (d_C)); +} +static inline void + gemm_batched (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + std::complex** A, + const int& lda, + std::complex** B, + const int& ldb, + const std::complex& beta, + std::complex** C, + const int& ldc, + const int& batch_size) +{ + std::complex** d_A = allocate_ (A, batch_size); + std::complex** d_B = allocate_ (B, batch_size); + std::complex** d_C = allocate_ (C, batch_size); + CHECK_CUBLAS (cublasCgemmBatched (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_B), + ldb, + reinterpret_cast (&beta), + reinterpret_cast (d_C), + ldc, + batch_size)); + CHECK_CUDA (cudaFree (d_A)); + CHECK_CUDA (cudaFree (d_B)); + CHECK_CUDA (cudaFree (d_C)); +} +static inline void + gemm_batched (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + std::complex** A, + const int& lda, + std::complex** B, + const int& ldb, + const std::complex& beta, + std::complex** C, + const int& ldc, + const int& batch_size) +{ + std::complex** d_A = allocate_ (A, batch_size); + std::complex** d_B = allocate_ (B, batch_size); + std::complex** d_C = allocate_ (C, batch_size); + CHECK_CUBLAS (cublasZgemmBatched (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_B), + ldb, + reinterpret_cast (&beta), + reinterpret_cast (d_C), + ldc, + batch_size)); + CHECK_CUDA (cudaFree (d_A)); + CHECK_CUDA (cudaFree (d_B)); + CHECK_CUDA (cudaFree (d_C)); } -static inline -void gemm_batched_strided(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const float& alpha, const float* A, const int& lda, const int& stride_a, const float* B, const int& ldb, const int& stride_b, - const float& beta, float* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - CHECK_CUBLAS(cublasSgemmStridedBatched( - handle, - GetCublasOperation(transa), - GetCublasOperation(transb), - m, n, k, - &alpha, - A, lda, stride_a, - B, ldb, stride_b, - &beta, - C, ldc, stride_c, - batch_size)); -} -static inline -void gemm_batched_strided(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const double& alpha, const double* A, const int& lda, const int& stride_a, const double* B, const int& ldb, const int& stride_b, - const double& beta, double* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - CHECK_CUBLAS(cublasDgemmStridedBatched( - handle, - GetCublasOperation(transa), - GetCublasOperation(transb), - m, n, k, - &alpha, - A, lda, stride_a, - B, ldb, stride_b, - &beta, - C, ldc, stride_c, - batch_size)); -} -static inline -void gemm_batched_strided(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const int& stride_a, const std::complex* B, const int& ldb, const int& stride_b, - const std::complex& beta, std::complex* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - CHECK_CUBLAS(cublasCgemmStridedBatched( - handle, - GetCublasOperation(transa), - GetCublasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(A), lda, stride_a, - reinterpret_cast(B), ldb, stride_b, - reinterpret_cast(&beta), - reinterpret_cast(C), ldc, stride_c, - batch_size)); -} -static inline -void gemm_batched_strided(cublasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const int& stride_a, const std::complex* B, const int& ldb, const int& stride_b, - const std::complex& beta, std::complex* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - CHECK_CUBLAS(cublasZgemmStridedBatched( - handle, - GetCublasOperation(transa), - GetCublasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(A), lda, stride_a, - reinterpret_cast(B), ldb, stride_b, - reinterpret_cast(&beta), - reinterpret_cast(C), ldc, stride_c, - batch_size)); +static inline void + gemm_batched_strided (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const float& alpha, + const float* A, + const int& lda, + const int& stride_a, + const float* B, + const int& ldb, + const int& stride_b, + const float& beta, + float* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + CHECK_CUBLAS (cublasSgemmStridedBatched (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + &alpha, + A, + lda, + stride_a, + B, + ldb, + stride_b, + &beta, + C, + ldc, + stride_c, + batch_size)); +} +static inline void + gemm_batched_strided (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const double& alpha, + const double* A, + const int& lda, + const int& stride_a, + const double* B, + const int& ldb, + const int& stride_b, + const double& beta, + double* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + CHECK_CUBLAS (cublasDgemmStridedBatched (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + &alpha, + A, + lda, + stride_a, + B, + ldb, + stride_b, + &beta, + C, + ldc, + stride_c, + batch_size)); +} +static inline void + gemm_batched_strided (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const int& stride_a, + const std::complex* B, + const int& ldb, + const int& stride_b, + const std::complex& beta, + std::complex* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + CHECK_CUBLAS (cublasCgemmStridedBatched (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + stride_a, + reinterpret_cast (B), + ldb, + stride_b, + reinterpret_cast (&beta), + reinterpret_cast (C), + ldc, + stride_c, + batch_size)); +} +static inline void + gemm_batched_strided (cublasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const int& stride_a, + const std::complex* B, + const int& ldb, + const int& stride_b, + const std::complex& beta, + std::complex* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + CHECK_CUBLAS (cublasZgemmStridedBatched (handle, + GetCublasOperation (transa), + GetCublasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + stride_a, + reinterpret_cast (B), + ldb, + stride_b, + reinterpret_cast (&beta), + reinterpret_cast (C), + ldc, + stride_c, + batch_size)); } } // namespace cuBlasConnector diff --git a/source/source_base/module_container/base/third_party/cusolver.h b/source/source_base/module_container/base/third_party/cusolver.h index 529109823df..f05d78d243b 100644 --- a/source/source_base/module_container/base/third_party/cusolver.h +++ b/source/source_base/module_container/base/third_party/cusolver.h @@ -16,592 +16,941 @@ #include -namespace container { -namespace cuSolverConnector { +namespace container +{ +namespace cuSolverConnector +{ template -static inline -void trtri (cusolverDnHandle_t& cusolver_handle, const char& uplo, const char& diag, const int& n, T* A, const int& lda) +static inline void + trtri (cusolverDnHandle_t& cusolver_handle, const char& uplo, const char& diag, const int& n, T* A, const int& lda) { size_t d_lwork = 0, h_lwork = 0; using Type = typename GetTypeThrust::type; - CHECK_CUSOLVER(cusolverDnXtrtri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), cublas_diag_type(diag), n, GetTypeCuda::cuda_data_type, reinterpret_cast(A), lda, &d_lwork, &h_lwork)); - void* d_work = nullptr, *h_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_work, d_lwork)); - if (h_lwork) { - h_work = malloc(h_lwork); - if (h_work == nullptr) { - throw std::bad_alloc(); + CHECK_CUSOLVER (cusolverDnXtrtri_bufferSize (cusolver_handle, + cublas_fill_mode (uplo), + cublas_diag_type (diag), + n, + GetTypeCuda::cuda_data_type, + reinterpret_cast (A), + lda, + &d_lwork, + &h_lwork)); + void *d_work = nullptr, *h_work = nullptr; + CHECK_CUDA (cudaMalloc ((void**)&d_work, d_lwork)); + if (h_lwork) + { + h_work = malloc (h_lwork); + if (h_work == nullptr) + { + throw std::bad_alloc (); + } } - } int h_info = 0; int* d_info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnXtrtri(cusolver_handle, cublas_fill_mode(uplo), cublas_diag_type(diag), n, GetTypeCuda::cuda_data_type, reinterpret_cast(A), n, d_work, d_lwork, h_work, h_lwork, d_info)); - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("trtri: failed to invert matrix"); - } - free(h_work); - CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUSOLVER (cusolverDnXtrtri (cusolver_handle, + cublas_fill_mode (uplo), + cublas_diag_type (diag), + n, + GetTypeCuda::cuda_data_type, + reinterpret_cast (A), + n, + d_work, + d_lwork, + h_work, + h_lwork, + d_info)); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("trtri: failed to invert matrix"); + } + free (h_work); + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } -static inline -void potri (cusolverDnHandle_t& cusolver_handle, const char& uplo, const char& diag, const int& n, float * A, const int& lda) +static inline void + potri (cusolverDnHandle_t& cusolver_handle, + const char& uplo, + const char& diag, + const int& n, + float* A, + const int& lda) { int lwork; - CHECK_CUSOLVER(cusolverDnSpotri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork)); + CHECK_CUSOLVER (cusolverDnSpotri_bufferSize (cusolver_handle, cublas_fill_mode (uplo), n, A, n, &lwork)); float* work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&work, lwork * sizeof(float))); + CHECK_CUDA (cudaMalloc ((void**)&work, lwork * sizeof (float))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnSpotri(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, nullptr)); - CHECK_CUDA(cudaFree(work)); + CHECK_CUSOLVER (cusolverDnSpotri (cusolver_handle, cublas_fill_mode (uplo), n, A, n, work, lwork, nullptr)); + CHECK_CUDA (cudaFree (work)); } -static inline -void potri (cusolverDnHandle_t& cusolver_handle, const char& uplo, const char& diag, const int& n, double * A, const int& lda) +static inline void + potri (cusolverDnHandle_t& cusolver_handle, + const char& uplo, + const char& diag, + const int& n, + double* A, + const int& lda) { int lwork; - CHECK_CUSOLVER(cusolverDnDpotri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork)); + CHECK_CUSOLVER (cusolverDnDpotri_bufferSize (cusolver_handle, cublas_fill_mode (uplo), n, A, n, &lwork)); double* work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&work, lwork * sizeof(double))); + CHECK_CUDA (cudaMalloc ((void**)&work, lwork * sizeof (double))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnDpotri(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, nullptr)); - CHECK_CUDA(cudaFree(work)); + CHECK_CUSOLVER (cusolverDnDpotri (cusolver_handle, cublas_fill_mode (uplo), n, A, n, work, lwork, nullptr)); + CHECK_CUDA (cudaFree (work)); } -static inline -void potri (cusolverDnHandle_t& cusolver_handle, const char& uplo, const char& diag, const int& n, std::complex * A, const int& lda) +static inline void + potri (cusolverDnHandle_t& cusolver_handle, + const char& uplo, + const char& diag, + const int& n, + std::complex* A, + const int& lda) { int lwork; - CHECK_CUSOLVER(cusolverDnCpotri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast(A), n, &lwork)); + CHECK_CUSOLVER (cusolverDnCpotri_bufferSize (cusolver_handle, + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + &lwork)); cuComplex* work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&work, lwork * sizeof(cuComplex))); + CHECK_CUDA (cudaMalloc ((void**)&work, lwork * sizeof (cuComplex))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnCpotri(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast(A), n, work, lwork, nullptr)); - CHECK_CUDA(cudaFree(work)); + CHECK_CUSOLVER (cusolverDnCpotri (cusolver_handle, + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + work, + lwork, + nullptr)); + CHECK_CUDA (cudaFree (work)); } -static inline -void potri (cusolverDnHandle_t& cusolver_handle, const char& uplo, const char& diag, const int& n, std::complex * A, const int& lda) +static inline void + potri (cusolverDnHandle_t& cusolver_handle, + const char& uplo, + const char& diag, + const int& n, + std::complex* A, + const int& lda) { int lwork; - CHECK_CUSOLVER(cusolverDnZpotri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast(A), n, &lwork)); + CHECK_CUSOLVER (cusolverDnZpotri_bufferSize (cusolver_handle, + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + &lwork)); cuDoubleComplex* work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&work, lwork * sizeof(cuDoubleComplex))); + CHECK_CUDA (cudaMalloc ((void**)&work, lwork * sizeof (cuDoubleComplex))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnZpotri(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast(A), n, work, lwork, nullptr)); - CHECK_CUDA(cudaFree(work)); + CHECK_CUSOLVER (cusolverDnZpotri (cusolver_handle, + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + work, + lwork, + nullptr)); + CHECK_CUDA (cudaFree (work)); } - -static inline -void potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, float * A, const int& lda) +static inline void + potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, float* A, const int& lda) { int lwork; - int *info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&info, 1 * sizeof(int))); - CHECK_CUSOLVER(cusolverDnSpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork)); + int* info = nullptr; + CHECK_CUDA (cudaMalloc ((void**)&info, 1 * sizeof (int))); + CHECK_CUSOLVER (cusolverDnSpotrf_bufferSize (cusolver_handle, cublas_fill_mode (uplo), n, A, n, &lwork)); float* work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&work, lwork * sizeof(float))); + CHECK_CUDA (cudaMalloc ((void**)&work, lwork * sizeof (float))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnSpotrf(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, info)); - CHECK_CUDA(cudaFree(work)); - CHECK_CUDA(cudaFree(info)); + CHECK_CUSOLVER (cusolverDnSpotrf (cusolver_handle, cublas_fill_mode (uplo), n, A, n, work, lwork, info)); + CHECK_CUDA (cudaFree (work)); + CHECK_CUDA (cudaFree (info)); } -static inline -void potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, double * A, const int& lda) +static inline void + potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, double* A, const int& lda) { int lwork; - int *info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&info, 1 * sizeof(int))); - CHECK_CUSOLVER(cusolverDnDpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork)); + int* info = nullptr; + CHECK_CUDA (cudaMalloc ((void**)&info, 1 * sizeof (int))); + CHECK_CUSOLVER (cusolverDnDpotrf_bufferSize (cusolver_handle, cublas_fill_mode (uplo), n, A, n, &lwork)); double* work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&work, lwork * sizeof(double))); + CHECK_CUDA (cudaMalloc ((void**)&work, lwork * sizeof (double))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnDpotrf(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, info)); - CHECK_CUDA(cudaFree(work)); - CHECK_CUDA(cudaFree(info)); + CHECK_CUSOLVER (cusolverDnDpotrf (cusolver_handle, cublas_fill_mode (uplo), n, A, n, work, lwork, info)); + CHECK_CUDA (cudaFree (work)); + CHECK_CUDA (cudaFree (info)); } -static inline -void potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, std::complex * A, const int& lda) +static inline void + potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, std::complex* A, const int& lda) { int lwork; - int *info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&info, 1 * sizeof(int))); - CHECK_CUSOLVER(cusolverDnCpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast(A), lda, &lwork)); + int* info = nullptr; + CHECK_CUDA (cudaMalloc ((void**)&info, 1 * sizeof (int))); + CHECK_CUSOLVER (cusolverDnCpotrf_bufferSize (cusolver_handle, + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + &lwork)); cuComplex* work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&work, lwork * sizeof(cuComplex))); + CHECK_CUDA (cudaMalloc ((void**)&work, lwork * sizeof (cuComplex))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnCpotrf(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast(A), lda, work, lwork, info)); - CHECK_CUDA(cudaFree(work)); - CHECK_CUDA(cudaFree(info)); + CHECK_CUSOLVER (cusolverDnCpotrf (cusolver_handle, + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + work, + lwork, + info)); + CHECK_CUDA (cudaFree (work)); + CHECK_CUDA (cudaFree (info)); } -static inline -void potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, std::complex * A, const int& lda) +static inline void + potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, std::complex* A, const int& lda) { int lwork; - int *info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&info, 1 * sizeof(int))); - CHECK_CUSOLVER(cusolverDnZpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast(A), lda, &lwork)); + int* info = nullptr; + CHECK_CUDA (cudaMalloc ((void**)&info, 1 * sizeof (int))); + CHECK_CUSOLVER (cusolverDnZpotrf_bufferSize (cusolver_handle, + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + &lwork)); cuDoubleComplex* work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&work, lwork * sizeof(cuDoubleComplex))); + CHECK_CUDA (cudaMalloc ((void**)&work, lwork * sizeof (cuDoubleComplex))); // Perform Cholesky decomposition - CHECK_CUSOLVER(cusolverDnZpotrf(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast(A), lda, work, lwork, info)); - CHECK_CUDA(cudaFree(work)); - CHECK_CUDA(cudaFree(info)); + CHECK_CUSOLVER (cusolverDnZpotrf (cusolver_handle, + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + work, + lwork, + info)); + CHECK_CUDA (cudaFree (work)); + CHECK_CUDA (cudaFree (info)); } - -static inline -void heevd (cusolverDnHandle_t& cusolver_handle, const char& jobz, const char& uplo, const int& n, float* A, const int& lda, float * W) +static inline void + heevd (cusolverDnHandle_t& cusolver_handle, + const char& jobz, + const char& uplo, + const int& n, + float* A, + const int& lda, + float* W) { // prepare some values for cusolverDnSsyevd_bufferSize - int lwork = 0; + int lwork = 0; int h_info = 0; - int* d_info = nullptr; + int* d_info = nullptr; float* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnSsyevd_bufferSize(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, A, lda, W, &lwork)); + CHECK_CUSOLVER (cusolverDnSsyevd_bufferSize (cusolver_handle, + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + A, + lda, + W, + &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(float) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (float) * lwork)); // compute eigenvalues and eigenvectors. - CHECK_CUSOLVER(cusolverDnSsyevd(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, A, lda, W, d_work, lwork, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - CHECK_CUDA(cudaFree(d_info)); - CHECK_CUDA(cudaFree(d_work)); + CHECK_CUSOLVER (cusolverDnSsyevd (cusolver_handle, + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + A, + lda, + W, + d_work, + lwork, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + CHECK_CUDA (cudaFree (d_info)); + CHECK_CUDA (cudaFree (d_work)); } -static inline -void heevd (cusolverDnHandle_t& cusolver_handle, const char& jobz, const char& uplo, const int& n, double* A, const int& lda, double * W) +static inline void + heevd (cusolverDnHandle_t& cusolver_handle, + const char& jobz, + const char& uplo, + const int& n, + double* A, + const int& lda, + double* W) { // prepare some values for cusolverDnDsyevd_bufferSize - int lwork = 0; + int lwork = 0; int h_info = 0; - int* d_info = nullptr; + int* d_info = nullptr; double* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnDsyevd_bufferSize(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, A, lda, W, &lwork)); + CHECK_CUSOLVER (cusolverDnDsyevd_bufferSize (cusolver_handle, + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + A, + lda, + W, + &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(double) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (double) * lwork)); // compute eigenvalues and eigenvectors. - CHECK_CUSOLVER(cusolverDnDsyevd(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, A, lda, W, d_work, lwork, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - CHECK_CUDA(cudaFree(d_info)); - CHECK_CUDA(cudaFree(d_work)); + CHECK_CUSOLVER (cusolverDnDsyevd (cusolver_handle, + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + A, + lda, + W, + d_work, + lwork, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + CHECK_CUDA (cudaFree (d_info)); + CHECK_CUDA (cudaFree (d_work)); } -static inline -void heevd (cusolverDnHandle_t& cusolver_handle, const char& jobz, const char& uplo, const int& n, std::complex* A, const int& lda, float * W) +static inline void + heevd (cusolverDnHandle_t& cusolver_handle, + const char& jobz, + const char& uplo, + const int& n, + std::complex* A, + const int& lda, + float* W) { // prepare some values for cusolverDnCheevd_bufferSize - int lwork = 0; + int lwork = 0; int h_info = 0; - int* d_info = nullptr; + int* d_info = nullptr; cuComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnCheevd_bufferSize(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, reinterpret_cast(A), lda, W, &lwork)); + CHECK_CUSOLVER (cusolverDnCheevd_bufferSize (cusolver_handle, + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + W, + &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuComplex) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuComplex) * lwork)); // compute eigenvalues and eigenvectors. - CHECK_CUSOLVER(cusolverDnCheevd(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, reinterpret_cast(A), lda, W, d_work, lwork, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - CHECK_CUDA(cudaFree(d_info)); - CHECK_CUDA(cudaFree(d_work)); + CHECK_CUSOLVER (cusolverDnCheevd (cusolver_handle, + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + W, + d_work, + lwork, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + CHECK_CUDA (cudaFree (d_info)); + CHECK_CUDA (cudaFree (d_work)); } -static inline -void heevd (cusolverDnHandle_t& cusolver_handle, const char& jobz, const char& uplo, const int& n, std::complex* A, const int& lda, double* W) +static inline void + heevd (cusolverDnHandle_t& cusolver_handle, + const char& jobz, + const char& uplo, + const int& n, + std::complex* A, + const int& lda, + double* W) { // prepare some values for cusolverDnZheevd_bufferSize - int lwork = 0; + int lwork = 0; int h_info = 0; - int* d_info = nullptr; + int* d_info = nullptr; cuDoubleComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnZheevd_bufferSize(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, reinterpret_cast(A), lda, W, &lwork)); + CHECK_CUSOLVER (cusolverDnZheevd_bufferSize (cusolver_handle, + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + W, + &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuDoubleComplex) * lwork)); // compute eigenvalues and eigenvectors. - CHECK_CUSOLVER(cusolverDnZheevd(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, reinterpret_cast(A), lda, W, d_work, lwork, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - CHECK_CUDA(cudaFree(d_info)); - CHECK_CUDA(cudaFree(d_work)); + CHECK_CUSOLVER (cusolverDnZheevd (cusolver_handle, + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + W, + d_work, + lwork, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + CHECK_CUDA (cudaFree (d_info)); + CHECK_CUDA (cudaFree (d_work)); } // ===================================================================================================== // heevdx: Compute eigenvalues and eigenvectors of symmetric/Hermitian matrix // ===================================================================================================== // --- float --- -static inline -void heevdx(cusolverDnHandle_t& cusolver_handle, - const int n, - const int lda, - float* d_A, - const char jobz, - const char uplo, - const char range, - const int il, const int iu, - const float vl, const float vu, - float* d_eigen_val, - int* h_meig) +static inline void + heevdx (cusolverDnHandle_t& cusolver_handle, + const int n, + const int lda, + float* d_A, + const char jobz, + const char uplo, + const char range, + const int il, + const int iu, + const float vl, + const float vu, + float* d_eigen_val, + int* h_meig) { int lwork = 0; int* d_info = nullptr; float* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); - - cusolverEigMode_t jobz_t = cublas_eig_mode(jobz); - cublasFillMode_t uplo_t = cublas_fill_mode(uplo); - cusolverEigRange_t range_t = cublas_eig_range(range); - - CHECK_CUSOLVER(cusolverDnSsyevdx_bufferSize( - cusolver_handle, - jobz_t, range_t, uplo_t, - n, d_A, lda, - vl, vu, il, iu, - h_meig, // ← int* output: number of eigenvalues found - d_eigen_val, // ← const float* W (used for query, can be dummy) - &lwork // ← int* lwork (output) - )); - - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(float) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); + + cusolverEigMode_t jobz_t = cublas_eig_mode (jobz); + cublasFillMode_t uplo_t = cublas_fill_mode (uplo); + cusolverEigRange_t range_t = cublas_eig_range (range); + + CHECK_CUSOLVER (cusolverDnSsyevdx_bufferSize (cusolver_handle, + jobz_t, + range_t, + uplo_t, + n, + d_A, + lda, + vl, + vu, + il, + iu, + h_meig, // ← int* output: number of eigenvalues found + d_eigen_val, // ← const float* W (used for query, can be dummy) + &lwork // ← int* lwork (output) + )); + + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (float) * lwork)); // Main call - CHECK_CUSOLVER(cusolverDnSsyevdx( - cusolver_handle, - jobz_t, range_t, uplo_t, - n, - d_A, lda, - vl, vu, il, iu, - h_meig, // ← int* output - d_eigen_val, // ← float* W: eigenvalues - d_work, lwork, - d_info - )); + CHECK_CUSOLVER (cusolverDnSsyevdx (cusolver_handle, + jobz_t, + range_t, + uplo_t, + n, + d_A, + lda, + vl, + vu, + il, + iu, + h_meig, // ← int* output + d_eigen_val, // ← float* W: eigenvalues + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - cudaFree(d_info); cudaFree(d_work); - throw std::runtime_error("heevdx (float) failed with info = " + std::to_string(h_info)); - } - - cudaFree(d_info); - cudaFree(d_work); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + cudaFree (d_info); + cudaFree (d_work); + throw std::runtime_error ("heevdx (float) failed with info = " + std::to_string (h_info)); + } + + cudaFree (d_info); + cudaFree (d_work); } // --- double --- -static inline -void heevdx(cusolverDnHandle_t& cusolver_handle, - const int n, - const int lda, - double* d_A, - const char jobz, - const char uplo, - const char range, - const int il, const int iu, - const double vl, const double vu, - double* d_eigen_val, - int* h_meig) +static inline void + heevdx (cusolverDnHandle_t& cusolver_handle, + const int n, + const int lda, + double* d_A, + const char jobz, + const char uplo, + const char range, + const int il, + const int iu, + const double vl, + const double vu, + double* d_eigen_val, + int* h_meig) { int lwork = 0; int* d_info = nullptr; double* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); - - cusolverEigMode_t jobz_t = cublas_eig_mode(jobz); - cublasFillMode_t uplo_t = cublas_fill_mode(uplo); - cusolverEigRange_t range_t = cublas_eig_range(range); - - CHECK_CUSOLVER(cusolverDnDsyevdx_bufferSize( - cusolver_handle, - jobz_t, range_t, uplo_t, - n, d_A, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - &lwork - )); - - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(double) * lwork)); - - CHECK_CUSOLVER(cusolverDnDsyevdx( - cusolver_handle, - jobz_t, range_t, uplo_t, - n, - d_A, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - d_work, lwork, - d_info - )); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); + + cusolverEigMode_t jobz_t = cublas_eig_mode (jobz); + cublasFillMode_t uplo_t = cublas_fill_mode (uplo); + cusolverEigRange_t range_t = cublas_eig_range (range); + + CHECK_CUSOLVER (cusolverDnDsyevdx_bufferSize (cusolver_handle, + jobz_t, + range_t, + uplo_t, + n, + d_A, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + &lwork)); + + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (double) * lwork)); + + CHECK_CUSOLVER (cusolverDnDsyevdx (cusolver_handle, + jobz_t, + range_t, + uplo_t, + n, + d_A, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - cudaFree(d_info); cudaFree(d_work); - throw std::runtime_error("heevdx (double) failed with info = " + std::to_string(h_info)); - } - - cudaFree(d_info); - cudaFree(d_work); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + cudaFree (d_info); + cudaFree (d_work); + throw std::runtime_error ("heevdx (double) failed with info = " + std::to_string (h_info)); + } + + cudaFree (d_info); + cudaFree (d_work); } // --- complex --- -static inline -void heevdx(cusolverDnHandle_t& cusolver_handle, - const int n, - const int lda, - std::complex* d_A, - const char jobz, - const char uplo, - const char range, - const int il, const int iu, - const float vl, const float vu, - float* d_eigen_val, - int* h_meig) +static inline void + heevdx (cusolverDnHandle_t& cusolver_handle, + const int n, + const int lda, + std::complex* d_A, + const char jobz, + const char uplo, + const char range, + const int il, + const int iu, + const float vl, + const float vu, + float* d_eigen_val, + int* h_meig) { int lwork = 0; int* d_info = nullptr; cuComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); - - cusolverEigMode_t jobz_t = cublas_eig_mode(jobz); - cublasFillMode_t uplo_t = cublas_fill_mode(uplo); - cusolverEigRange_t range_t = cublas_eig_range(range); - - CHECK_CUSOLVER(cusolverDnCheevdx_bufferSize( - cusolver_handle, - jobz_t, range_t, uplo_t, - n, - reinterpret_cast(d_A), lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - &lwork - )); - - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuComplex) * lwork)); - - CHECK_CUSOLVER(cusolverDnCheevdx( - cusolver_handle, - jobz_t, range_t, uplo_t, - n, - reinterpret_cast(d_A), lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - d_work, lwork, - d_info - )); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); + + cusolverEigMode_t jobz_t = cublas_eig_mode (jobz); + cublasFillMode_t uplo_t = cublas_fill_mode (uplo); + cusolverEigRange_t range_t = cublas_eig_range (range); + + CHECK_CUSOLVER (cusolverDnCheevdx_bufferSize (cusolver_handle, + jobz_t, + range_t, + uplo_t, + n, + reinterpret_cast (d_A), + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + &lwork)); + + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuComplex) * lwork)); + + CHECK_CUSOLVER (cusolverDnCheevdx (cusolver_handle, + jobz_t, + range_t, + uplo_t, + n, + reinterpret_cast (d_A), + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - cudaFree(d_info); cudaFree(d_work); - throw std::runtime_error("heevdx (complex) failed with info = " + std::to_string(h_info)); - } - - cudaFree(d_info); - cudaFree(d_work); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + cudaFree (d_info); + cudaFree (d_work); + throw std::runtime_error ("heevdx (complex) failed with info = " + std::to_string (h_info)); + } + + cudaFree (d_info); + cudaFree (d_work); } // --- complex --- -static inline -void heevdx(cusolverDnHandle_t& cusolver_handle, - const int n, - const int lda, - std::complex* d_A, - const char jobz, - const char uplo, - const char range, - const int il, const int iu, - const double vl, const double vu, - double* d_eigen_val, - int* h_meig) +static inline void + heevdx (cusolverDnHandle_t& cusolver_handle, + const int n, + const int lda, + std::complex* d_A, + const char jobz, + const char uplo, + const char range, + const int il, + const int iu, + const double vl, + const double vu, + double* d_eigen_val, + int* h_meig) { int lwork = 0; int* d_info = nullptr; cuDoubleComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); - - cusolverEigMode_t jobz_t = cublas_eig_mode(jobz); - cublasFillMode_t uplo_t = cublas_fill_mode(uplo); - cusolverEigRange_t range_t = cublas_eig_range(range); - - CHECK_CUSOLVER(cusolverDnZheevdx_bufferSize( - cusolver_handle, - jobz_t, range_t, uplo_t, - n, - reinterpret_cast(d_A), lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - &lwork - )); - - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex) * lwork)); - - CHECK_CUSOLVER(cusolverDnZheevdx( - cusolver_handle, - jobz_t, range_t, uplo_t, - n, - reinterpret_cast(d_A), lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - d_work, lwork, - d_info - )); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); + + cusolverEigMode_t jobz_t = cublas_eig_mode (jobz); + cublasFillMode_t uplo_t = cublas_fill_mode (uplo); + cusolverEigRange_t range_t = cublas_eig_range (range); + + CHECK_CUSOLVER (cusolverDnZheevdx_bufferSize (cusolver_handle, + jobz_t, + range_t, + uplo_t, + n, + reinterpret_cast (d_A), + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + &lwork)); + + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuDoubleComplex) * lwork)); + + CHECK_CUSOLVER (cusolverDnZheevdx (cusolver_handle, + jobz_t, + range_t, + uplo_t, + n, + reinterpret_cast (d_A), + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - cudaFree(d_info); cudaFree(d_work); - throw std::runtime_error("heevdx (complex) failed with info = " + std::to_string(h_info)); - } - - cudaFree(d_info); - cudaFree(d_work); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + cudaFree (d_info); + cudaFree (d_work); + throw std::runtime_error ("heevdx (complex) failed with info = " + std::to_string (h_info)); + } + + cudaFree (d_info); + cudaFree (d_work); } -static inline -void hegvd (cusolverDnHandle_t& cusolver_handle, const int& itype, const char& jobz, const char& uplo, const int& n, float* A, const int& lda, float* B, const int& ldb, float * W) +static inline void + hegvd (cusolverDnHandle_t& cusolver_handle, + const int& itype, + const char& jobz, + const char& uplo, + const int& n, + float* A, + const int& lda, + float* B, + const int& ldb, + float* W) { // prepare some values for cusolverDnSsygvd_bufferSize - int lwork = 0; + int lwork = 0; int h_info = 0; - int* d_info = nullptr; + int* d_info = nullptr; float* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnSsygvd_bufferSize(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, A, lda, B, ldb, W, &lwork)); + CHECK_CUSOLVER (cusolverDnSsygvd_bufferSize (cusolver_handle, + cublas_eig_type (itype), + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + A, + lda, + B, + ldb, + W, + &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(float) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (float) * lwork)); // compute eigenvalues and eigenvectors. - CHECK_CUSOLVER(cusolverDnSsygvd(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, A, lda, B, ldb, W, d_work, lwork, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - CHECK_CUDA(cudaFree(d_info)); - CHECK_CUDA(cudaFree(d_work)); + CHECK_CUSOLVER (cusolverDnSsygvd (cusolver_handle, + cublas_eig_type (itype), + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + A, + lda, + B, + ldb, + W, + d_work, + lwork, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + CHECK_CUDA (cudaFree (d_info)); + CHECK_CUDA (cudaFree (d_work)); } -static inline -void hegvd (cusolverDnHandle_t& cusolver_handle, const int& itype, const char& jobz, const char& uplo, const int& n, double* A, const int& lda, double* B, const int& ldb, double * W) +static inline void + hegvd (cusolverDnHandle_t& cusolver_handle, + const int& itype, + const char& jobz, + const char& uplo, + const int& n, + double* A, + const int& lda, + double* B, + const int& ldb, + double* W) { // prepare some values for cusolverDnDsygvd_bufferSize - int lwork = 0; + int lwork = 0; int h_info = 0; - int* d_info = nullptr; + int* d_info = nullptr; double* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnDsygvd_bufferSize(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, A, lda, B, ldb, W, &lwork)); + CHECK_CUSOLVER (cusolverDnDsygvd_bufferSize (cusolver_handle, + cublas_eig_type (itype), + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + A, + lda, + B, + ldb, + W, + &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(double) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (double) * lwork)); // compute eigenvalues and eigenvectors. - CHECK_CUSOLVER(cusolverDnDsygvd(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, A, lda, B, ldb, W, d_work, lwork, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - CHECK_CUDA(cudaFree(d_info)); - CHECK_CUDA(cudaFree(d_work)); + CHECK_CUSOLVER (cusolverDnDsygvd (cusolver_handle, + cublas_eig_type (itype), + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + A, + lda, + B, + ldb, + W, + d_work, + lwork, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + CHECK_CUDA (cudaFree (d_info)); + CHECK_CUDA (cudaFree (d_work)); } -static inline -void hegvd (cusolverDnHandle_t& cusolver_handle, const int& itype, const char& jobz, const char& uplo, const int& n, std::complex* A, const int& lda, std::complex* B, const int& ldb, float* W) +static inline void + hegvd (cusolverDnHandle_t& cusolver_handle, + const int& itype, + const char& jobz, + const char& uplo, + const int& n, + std::complex* A, + const int& lda, + std::complex* B, + const int& ldb, + float* W) { // prepare some values for cusolverDnChegvd_bufferSize - int lwork = 0; + int lwork = 0; int h_info = 0; - int* d_info = nullptr; + int* d_info = nullptr; cuComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnChegvd_bufferSize(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, reinterpret_cast(A), lda, reinterpret_cast(B), ldb, W, &lwork)); + CHECK_CUSOLVER (cusolverDnChegvd_bufferSize (cusolver_handle, + cublas_eig_type (itype), + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + W, + &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuComplex) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuComplex) * lwork)); // compute eigenvalues and eigenvectors. - CHECK_CUSOLVER(cusolverDnChegvd(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, reinterpret_cast(A), lda, reinterpret_cast(B), ldb, W, d_work, lwork, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - CHECK_CUDA(cudaFree(d_info)); - CHECK_CUDA(cudaFree(d_work)); + CHECK_CUSOLVER (cusolverDnChegvd (cusolver_handle, + cublas_eig_type (itype), + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + W, + d_work, + lwork, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + CHECK_CUDA (cudaFree (d_info)); + CHECK_CUDA (cudaFree (d_work)); } -static inline -void hegvd (cusolverDnHandle_t& cusolver_handle, const int& itype, const char& jobz, const char& uplo, const int& n, std::complex* A, const int& lda, std::complex* B, const int& ldb, double* W) +static inline void + hegvd (cusolverDnHandle_t& cusolver_handle, + const int& itype, + const char& jobz, + const char& uplo, + const int& n, + std::complex* A, + const int& lda, + std::complex* B, + const int& ldb, + double* W) { // prepare some values for cusolverDnZhegvd_bufferSize - int lwork = 0; + int lwork = 0; int h_info = 0; - int* d_info = nullptr; + int* d_info = nullptr; cuDoubleComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnZhegvd_bufferSize(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, reinterpret_cast(A), lda, reinterpret_cast(B), ldb, W, &lwork)); + CHECK_CUSOLVER (cusolverDnZhegvd_bufferSize (cusolver_handle, + cublas_eig_type (itype), + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + W, + &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuDoubleComplex) * lwork)); // compute eigenvalues and eigenvectors. - CHECK_CUSOLVER(cusolverDnZhegvd(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo), - n, reinterpret_cast(A), lda, reinterpret_cast(B), ldb, W, d_work, lwork, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - CHECK_CUDA(cudaFree(d_info)); - CHECK_CUDA(cudaFree(d_work)); + CHECK_CUSOLVER (cusolverDnZhegvd (cusolver_handle, + cublas_eig_type (itype), + cublas_eig_mode (jobz), + cublas_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + W, + d_work, + lwork, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + CHECK_CUDA (cudaFree (d_info)); + CHECK_CUDA (cudaFree (d_work)); } // ===================================================================================================== @@ -610,541 +959,713 @@ void hegvd (cusolverDnHandle_t& cusolver_handle, const int& itype, const char& j // ===================================================================================================== // --- float --- -static inline -void hegvdx( - cusolverDnHandle_t& cusolver_handle, - const int itype, // 1: A*x = lambda*B*x - const char jobz, // 'V' or 'N' - const char range, // 'I', 'V', 'A' - const char uplo, // 'U' or 'L' - const int n, - const int lda, - float* d_A, // Input matrix A (device) - float* d_B, // Input matrix B (device) - const float vl, // for RANGE='V' - const float vu, - const int il, // for RANGE='I' - const int iu, - int* h_meig, // output: number of eigenvalues found - float* d_eigen_val, // output: eigenvalues - float* d_eigen_vec // output: eigenvectors (if jobz='V'), size ldz × m -) { +static inline void + hegvdx (cusolverDnHandle_t& cusolver_handle, + const int itype, // 1: A*x = lambda*B*x + const char jobz, // 'V' or 'N' + const char range, // 'I', 'V', 'A' + const char uplo, // 'U' or 'L' + const int n, + const int lda, + float* d_A, // Input matrix A (device) + float* d_B, // Input matrix B (device) + const float vl, // for RANGE='V' + const float vu, + const int il, // for RANGE='I' + const int iu, + int* h_meig, // output: number of eigenvalues found + float* d_eigen_val, // output: eigenvalues + float* d_eigen_vec // output: eigenvectors (if jobz='V'), size ldz × m + ) +{ int lwork = 0; - int *d_info = nullptr; - float *d_work = nullptr; + int* d_info = nullptr; + float* d_work = nullptr; // Allocate device info - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // Copy A and B to temporary buffers since sygvdx may modify them float *d_A_copy = nullptr, *d_B_copy = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_A_copy, sizeof(float) * n * lda)); - CHECK_CUDA(cudaMalloc((void**)&d_B_copy, sizeof(float) * n * lda)); - CHECK_CUDA(cudaMemcpy(d_A_copy, d_A, sizeof(float) * n * lda, cudaMemcpyDeviceToDevice)); - CHECK_CUDA(cudaMemcpy(d_B_copy, d_B, sizeof(float) * n * lda, cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMalloc ((void**)&d_A_copy, sizeof (float) * n * lda)); + CHECK_CUDA (cudaMalloc ((void**)&d_B_copy, sizeof (float) * n * lda)); + CHECK_CUDA (cudaMemcpy (d_A_copy, d_A, sizeof (float) * n * lda, cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMemcpy (d_B_copy, d_B, sizeof (float) * n * lda, cudaMemcpyDeviceToDevice)); // Set parameters - cusolverEigType_t itype_t = cublas_eig_type(itype); - cusolverEigMode_t jobz_t = cublas_eig_mode(jobz); - cusolverEigRange_t range_t = cublas_eig_range(range); - cublasFillMode_t uplo_t = cublas_fill_mode(uplo); + cusolverEigType_t itype_t = cublas_eig_type (itype); + cusolverEigMode_t jobz_t = cublas_eig_mode (jobz); + cusolverEigRange_t range_t = cublas_eig_range (range); + cublasFillMode_t uplo_t = cublas_fill_mode (uplo); // Query workspace size - CHECK_CUSOLVER(cusolverDnSsygvdx_bufferSize( - cusolver_handle, - itype_t, jobz_t, range_t, uplo_t, - n, - d_A_copy, lda, - d_B_copy, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - &lwork - )); + CHECK_CUSOLVER (cusolverDnSsygvdx_bufferSize (cusolver_handle, + itype_t, + jobz_t, + range_t, + uplo_t, + n, + d_A_copy, + lda, + d_B_copy, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + &lwork)); // Allocate workspace - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(float) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (float) * lwork)); // Main call - CHECK_CUSOLVER(cusolverDnSsygvdx( - cusolver_handle, - itype_t, jobz_t, range_t, uplo_t, - n, - d_A_copy, lda, - d_B_copy, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - d_work, lwork, - d_info - )); + CHECK_CUSOLVER (cusolverDnSsygvdx (cusolver_handle, + itype_t, + jobz_t, + range_t, + uplo_t, + n, + d_A_copy, + lda, + d_B_copy, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + d_work, + lwork, + d_info)); // Check result int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info < 0) { - throw std::runtime_error("hegvdx (float): illegal argument #" + std::to_string(-h_info)); - } else if (h_info > 0) { - // If h_info <= n: convergence issue in tridiag solver (no vec) OR - // If h_info > n: B's leading minor of order (h_info - n) is not positive definite - if (jobz_t == CUSOLVER_EIG_MODE_NOVECTOR && h_info <= n) { - throw std::runtime_error("hegvdx (float): failed to converge, " + std::to_string(h_info) + " off-diagonal elements didn't converge"); - } else if (h_info > n) { - throw std::runtime_error("hegvdx (float): leading minor of order " + std::to_string(h_info - n) + " of B is not positive definite"); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info < 0) + { + throw std::runtime_error ("hegvdx (float): illegal argument #" + std::to_string (-h_info)); + } + else if (h_info > 0) + { + // If h_info <= n: convergence issue in tridiag solver (no vec) OR + // If h_info > n: B's leading minor of order (h_info - n) is not positive definite + if (jobz_t == CUSOLVER_EIG_MODE_NOVECTOR && h_info <= n) + { + throw std::runtime_error ("hegvdx (float): failed to converge, " + std::to_string (h_info) + + " off-diagonal elements didn't converge"); + } + else if (h_info > n) + { + throw std::runtime_error ("hegvdx (float): leading minor of order " + std::to_string (h_info - n) + + " of B is not positive definite"); + } } - } // If jobz == 'V', copy eigenvectors from A (which now contains Z) to output - if (jobz == 'V') { - const int m = (*h_meig); // number of eigenvectors computed - CHECK_CUDA(cudaMemcpy(d_eigen_vec, d_A_copy, sizeof(float) * n * m, cudaMemcpyDeviceToDevice)); - } + if (jobz == 'V') + { + const int m = (*h_meig); // number of eigenvectors computed + CHECK_CUDA (cudaMemcpy (d_eigen_vec, d_A_copy, sizeof (float) * n * m, cudaMemcpyDeviceToDevice)); + } // Cleanup - cudaFree(d_info); - cudaFree(d_work); - cudaFree(d_A_copy); - cudaFree(d_B_copy); + cudaFree (d_info); + cudaFree (d_work); + cudaFree (d_A_copy); + cudaFree (d_B_copy); } - // --- double --- -static inline -void hegvdx( - cusolverDnHandle_t& cusolver_handle, - const int itype, - const char jobz, - const char range, - const char uplo, - const int n, - const int lda, - double* d_A, - double* d_B, - const double vl, - const double vu, - const int il, - const int iu, - int* h_meig, - double* d_eigen_val, - double* d_eigen_vec -) { +static inline void + hegvdx (cusolverDnHandle_t& cusolver_handle, + const int itype, + const char jobz, + const char range, + const char uplo, + const int n, + const int lda, + double* d_A, + double* d_B, + const double vl, + const double vu, + const int il, + const int iu, + int* h_meig, + double* d_eigen_val, + double* d_eigen_vec) +{ int lwork = 0; - int *d_info = nullptr; - double *d_work = nullptr; + int* d_info = nullptr; + double* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); double *d_A_copy = nullptr, *d_B_copy = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_A_copy, sizeof(double) * n * lda)); - CHECK_CUDA(cudaMalloc((void**)&d_B_copy, sizeof(double) * n * lda)); - CHECK_CUDA(cudaMemcpy(d_A_copy, d_A, sizeof(double) * n * lda, cudaMemcpyDeviceToDevice)); - CHECK_CUDA(cudaMemcpy(d_B_copy, d_B, sizeof(double) * n * lda, cudaMemcpyDeviceToDevice)); - - cusolverEigType_t itype_t = cublas_eig_type(itype); - cusolverEigMode_t jobz_t = cublas_eig_mode(jobz); - cusolverEigRange_t range_t = cublas_eig_range(range); - cublasFillMode_t uplo_t = cublas_fill_mode(uplo); - - CHECK_CUSOLVER(cusolverDnDsygvdx_bufferSize( - cusolver_handle, - itype_t, jobz_t, range_t, uplo_t, - n, - d_A_copy, lda, - d_B_copy, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - &lwork - )); - - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(double) * lwork)); - - CHECK_CUSOLVER(cusolverDnDsygvdx( - cusolver_handle, - itype_t, jobz_t, range_t, uplo_t, - n, - d_A_copy, lda, - d_B_copy, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - d_work, lwork, - d_info - )); + CHECK_CUDA (cudaMalloc ((void**)&d_A_copy, sizeof (double) * n * lda)); + CHECK_CUDA (cudaMalloc ((void**)&d_B_copy, sizeof (double) * n * lda)); + CHECK_CUDA (cudaMemcpy (d_A_copy, d_A, sizeof (double) * n * lda, cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMemcpy (d_B_copy, d_B, sizeof (double) * n * lda, cudaMemcpyDeviceToDevice)); + + cusolverEigType_t itype_t = cublas_eig_type (itype); + cusolverEigMode_t jobz_t = cublas_eig_mode (jobz); + cusolverEigRange_t range_t = cublas_eig_range (range); + cublasFillMode_t uplo_t = cublas_fill_mode (uplo); + + CHECK_CUSOLVER (cusolverDnDsygvdx_bufferSize (cusolver_handle, + itype_t, + jobz_t, + range_t, + uplo_t, + n, + d_A_copy, + lda, + d_B_copy, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + &lwork)); + + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (double) * lwork)); + + CHECK_CUSOLVER (cusolverDnDsygvdx (cusolver_handle, + itype_t, + jobz_t, + range_t, + uplo_t, + n, + d_A_copy, + lda, + d_B_copy, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info < 0) { - throw std::runtime_error("hegvdx (double): illegal argument #" + std::to_string(-h_info)); - } else if (h_info > 0) { - if (jobz_t == CUSOLVER_EIG_MODE_NOVECTOR && h_info <= n) { - throw std::runtime_error("hegvdx (double): failed to converge, " + std::to_string(h_info) + " off-diagonal elements didn't converge"); - } else if (h_info > n) { - throw std::runtime_error("hegvdx (double): leading minor of order " + std::to_string(h_info - n) + " of B is not positive definite"); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info < 0) + { + throw std::runtime_error ("hegvdx (double): illegal argument #" + std::to_string (-h_info)); + } + else if (h_info > 0) + { + if (jobz_t == CUSOLVER_EIG_MODE_NOVECTOR && h_info <= n) + { + throw std::runtime_error ("hegvdx (double): failed to converge, " + std::to_string (h_info) + + " off-diagonal elements didn't converge"); + } + else if (h_info > n) + { + throw std::runtime_error ("hegvdx (double): leading minor of order " + std::to_string (h_info - n) + + " of B is not positive definite"); + } } - } - if (jobz == 'V') { - const int m = (*h_meig); - CHECK_CUDA(cudaMemcpy(d_eigen_vec, d_A_copy, sizeof(double) * n * m, cudaMemcpyDeviceToDevice)); - } + if (jobz == 'V') + { + const int m = (*h_meig); + CHECK_CUDA (cudaMemcpy (d_eigen_vec, d_A_copy, sizeof (double) * n * m, cudaMemcpyDeviceToDevice)); + } - cudaFree(d_info); - cudaFree(d_work); - cudaFree(d_A_copy); - cudaFree(d_B_copy); + cudaFree (d_info); + cudaFree (d_work); + cudaFree (d_A_copy); + cudaFree (d_B_copy); } - // --- complex --- -static inline -void hegvdx( - cusolverDnHandle_t& cusolver_handle, - const int itype, - const char jobz, - const char range, - const char uplo, - const int n, - const int lda, - std::complex* d_A, - std::complex* d_B, - const float vl, - const float vu, - const int il, - const int iu, - int* h_meig, - float* d_eigen_val, - std::complex* d_eigen_vec -) { +static inline void + hegvdx (cusolverDnHandle_t& cusolver_handle, + const int itype, + const char jobz, + const char range, + const char uplo, + const int n, + const int lda, + std::complex* d_A, + std::complex* d_B, + const float vl, + const float vu, + const int il, + const int iu, + int* h_meig, + float* d_eigen_val, + std::complex* d_eigen_vec) +{ int lwork = 0; - int *d_info = nullptr; - cuComplex *d_work = nullptr; + int* d_info = nullptr; + cuComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); cuComplex *d_A_copy = nullptr, *d_B_copy = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_A_copy, sizeof(cuComplex) * n * lda)); - CHECK_CUDA(cudaMalloc((void**)&d_B_copy, sizeof(cuComplex) * n * lda)); - CHECK_CUDA(cudaMemcpy(d_A_copy, reinterpret_cast(d_A), sizeof(cuComplex) * n * lda, cudaMemcpyDeviceToDevice)); - CHECK_CUDA(cudaMemcpy(d_B_copy, reinterpret_cast(d_B), sizeof(cuComplex) * n * lda, cudaMemcpyDeviceToDevice)); - - cusolverEigType_t itype_t = cublas_eig_type(itype); - cusolverEigMode_t jobz_t = cublas_eig_mode(jobz); - cusolverEigRange_t range_t = cublas_eig_range(range); - cublasFillMode_t uplo_t = cublas_fill_mode(uplo); - - CHECK_CUSOLVER(cusolverDnChegvdx_bufferSize( - cusolver_handle, - itype_t, jobz_t, range_t, uplo_t, - n, - d_A_copy, lda, - d_B_copy, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - &lwork - )); - - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuComplex) * lwork)); - - CHECK_CUSOLVER(cusolverDnChegvdx( - cusolver_handle, - itype_t, jobz_t, range_t, uplo_t, - n, - d_A_copy, lda, - d_B_copy, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - d_work, lwork, - d_info - )); + CHECK_CUDA (cudaMalloc ((void**)&d_A_copy, sizeof (cuComplex) * n * lda)); + CHECK_CUDA (cudaMalloc ((void**)&d_B_copy, sizeof (cuComplex) * n * lda)); + CHECK_CUDA (cudaMemcpy (d_A_copy, + reinterpret_cast (d_A), + sizeof (cuComplex) * n * lda, + cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMemcpy (d_B_copy, + reinterpret_cast (d_B), + sizeof (cuComplex) * n * lda, + cudaMemcpyDeviceToDevice)); + + cusolverEigType_t itype_t = cublas_eig_type (itype); + cusolverEigMode_t jobz_t = cublas_eig_mode (jobz); + cusolverEigRange_t range_t = cublas_eig_range (range); + cublasFillMode_t uplo_t = cublas_fill_mode (uplo); + + CHECK_CUSOLVER (cusolverDnChegvdx_bufferSize (cusolver_handle, + itype_t, + jobz_t, + range_t, + uplo_t, + n, + d_A_copy, + lda, + d_B_copy, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + &lwork)); + + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuComplex) * lwork)); + + CHECK_CUSOLVER (cusolverDnChegvdx (cusolver_handle, + itype_t, + jobz_t, + range_t, + uplo_t, + n, + d_A_copy, + lda, + d_B_copy, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info < 0) { - throw std::runtime_error("hegvdx (complex): illegal argument #" + std::to_string(-h_info)); - } else if (h_info > 0) { - if (jobz_t == CUSOLVER_EIG_MODE_NOVECTOR && h_info <= n) { - throw std::runtime_error("hegvdx (complex): failed to converge, " + std::to_string(h_info) + " off-diagonal elements didn't converge"); - } else if (h_info > n) { - throw std::runtime_error("hegvdx (complex): leading minor of order " + std::to_string(h_info - n) + " of B is not positive definite"); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info < 0) + { + throw std::runtime_error ("hegvdx (complex): illegal argument #" + std::to_string (-h_info)); + } + else if (h_info > 0) + { + if (jobz_t == CUSOLVER_EIG_MODE_NOVECTOR && h_info <= n) + { + throw std::runtime_error ("hegvdx (complex): failed to converge, " + std::to_string (h_info) + + " off-diagonal elements didn't converge"); + } + else if (h_info > n) + { + throw std::runtime_error ("hegvdx (complex): leading minor of order " + + std::to_string (h_info - n) + " of B is not positive definite"); + } } - } - if (jobz == 'V') { - const int m = (*h_meig); - CHECK_CUDA(cudaMemcpy(reinterpret_cast(d_eigen_vec), d_A_copy, sizeof(cuComplex) * n * m, cudaMemcpyDeviceToDevice)); - } + if (jobz == 'V') + { + const int m = (*h_meig); + CHECK_CUDA (cudaMemcpy (reinterpret_cast (d_eigen_vec), + d_A_copy, + sizeof (cuComplex) * n * m, + cudaMemcpyDeviceToDevice)); + } - cudaFree(d_info); - cudaFree(d_work); - cudaFree(d_A_copy); - cudaFree(d_B_copy); + cudaFree (d_info); + cudaFree (d_work); + cudaFree (d_A_copy); + cudaFree (d_B_copy); } - // --- complex --- -static inline -void hegvdx( - cusolverDnHandle_t& cusolver_handle, - const int itype, - const char jobz, - const char range, - const char uplo, - const int n, - const int lda, - std::complex* d_A, - std::complex* d_B, - const double vl, - const double vu, - const int il, - const int iu, - int* h_meig, - double* d_eigen_val, - std::complex* d_eigen_vec -) { +static inline void + hegvdx (cusolverDnHandle_t& cusolver_handle, + const int itype, + const char jobz, + const char range, + const char uplo, + const int n, + const int lda, + std::complex* d_A, + std::complex* d_B, + const double vl, + const double vu, + const int il, + const int iu, + int* h_meig, + double* d_eigen_val, + std::complex* d_eigen_vec) +{ int lwork = 0; - int *d_info = nullptr; - cuDoubleComplex *d_work = nullptr; + int* d_info = nullptr; + cuDoubleComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); cuDoubleComplex *d_A_copy = nullptr, *d_B_copy = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_A_copy, sizeof(cuDoubleComplex) * n * lda)); - CHECK_CUDA(cudaMalloc((void**)&d_B_copy, sizeof(cuDoubleComplex) * n * lda)); - CHECK_CUDA(cudaMemcpy(d_A_copy, reinterpret_cast(d_A), sizeof(cuDoubleComplex) * n * lda, cudaMemcpyDeviceToDevice)); - CHECK_CUDA(cudaMemcpy(d_B_copy, reinterpret_cast(d_B), sizeof(cuDoubleComplex) * n * lda, cudaMemcpyDeviceToDevice)); - - cusolverEigType_t itype_t = cublas_eig_type(itype); - cusolverEigMode_t jobz_t = cublas_eig_mode(jobz); - cusolverEigRange_t range_t = cublas_eig_range(range); - cublasFillMode_t uplo_t = cublas_fill_mode(uplo); - - CHECK_CUSOLVER(cusolverDnZhegvdx_bufferSize( - cusolver_handle, - itype_t, jobz_t, range_t, uplo_t, - n, - d_A_copy, lda, - d_B_copy, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - &lwork - )); - - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex) * lwork)); - - CHECK_CUSOLVER(cusolverDnZhegvdx( - cusolver_handle, - itype_t, jobz_t, range_t, uplo_t, - n, - d_A_copy, lda, - d_B_copy, lda, - vl, vu, il, iu, - h_meig, - d_eigen_val, - d_work, lwork, - d_info - )); + CHECK_CUDA (cudaMalloc ((void**)&d_A_copy, sizeof (cuDoubleComplex) * n * lda)); + CHECK_CUDA (cudaMalloc ((void**)&d_B_copy, sizeof (cuDoubleComplex) * n * lda)); + CHECK_CUDA (cudaMemcpy (d_A_copy, + reinterpret_cast (d_A), + sizeof (cuDoubleComplex) * n * lda, + cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMemcpy (d_B_copy, + reinterpret_cast (d_B), + sizeof (cuDoubleComplex) * n * lda, + cudaMemcpyDeviceToDevice)); + + cusolverEigType_t itype_t = cublas_eig_type (itype); + cusolverEigMode_t jobz_t = cublas_eig_mode (jobz); + cusolverEigRange_t range_t = cublas_eig_range (range); + cublasFillMode_t uplo_t = cublas_fill_mode (uplo); + + CHECK_CUSOLVER (cusolverDnZhegvdx_bufferSize (cusolver_handle, + itype_t, + jobz_t, + range_t, + uplo_t, + n, + d_A_copy, + lda, + d_B_copy, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + &lwork)); + + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuDoubleComplex) * lwork)); + + CHECK_CUSOLVER (cusolverDnZhegvdx (cusolver_handle, + itype_t, + jobz_t, + range_t, + uplo_t, + n, + d_A_copy, + lda, + d_B_copy, + lda, + vl, + vu, + il, + iu, + h_meig, + d_eigen_val, + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info < 0) { - throw std::runtime_error("hegvdx (complex): illegal argument #" + std::to_string(-h_info)); - } else if (h_info > 0) { - if (jobz_t == CUSOLVER_EIG_MODE_NOVECTOR && h_info <= n) { - throw std::runtime_error("hegvdx (complex): failed to converge, " + std::to_string(h_info) + " off-diagonal elements didn't converge"); - } else if (h_info > n) { - throw std::runtime_error("hegvdx (complex): leading minor of order " + std::to_string(h_info - n) + " of B is not positive definite"); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info < 0) + { + throw std::runtime_error ("hegvdx (complex): illegal argument #" + std::to_string (-h_info)); + } + else if (h_info > 0) + { + if (jobz_t == CUSOLVER_EIG_MODE_NOVECTOR && h_info <= n) + { + throw std::runtime_error ("hegvdx (complex): failed to converge, " + std::to_string (h_info) + + " off-diagonal elements didn't converge"); + } + else if (h_info > n) + { + throw std::runtime_error ("hegvdx (complex): leading minor of order " + + std::to_string (h_info - n) + " of B is not positive definite"); + } } - } - if (jobz == 'V') { - const int m = (*h_meig); - CHECK_CUDA(cudaMemcpy(reinterpret_cast(d_eigen_vec), d_A_copy, sizeof(cuDoubleComplex) * n * m, cudaMemcpyDeviceToDevice)); - } + if (jobz == 'V') + { + const int m = (*h_meig); + CHECK_CUDA (cudaMemcpy (reinterpret_cast (d_eigen_vec), + d_A_copy, + sizeof (cuDoubleComplex) * n * m, + cudaMemcpyDeviceToDevice)); + } - cudaFree(d_info); - cudaFree(d_work); - cudaFree(d_A_copy); - cudaFree(d_B_copy); + cudaFree (d_info); + cudaFree (d_work); + cudaFree (d_A_copy); + cudaFree (d_B_copy); } - // --- getrf -static inline -void getrf(cusolverDnHandle_t& cusolver_handle, const int& m, const int& n, float* A, const int& lda, int* ipiv) +static inline void + getrf (cusolverDnHandle_t& cusolver_handle, const int& m, const int& n, float* A, const int& lda, int* ipiv) { // prepare some values for cusolverDnSgetrf_bufferSize int lwork = 0; int h_info = 0; int* d_info = nullptr; float* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnSgetrf_bufferSize(cusolver_handle, m, n, A, lda, &lwork)); + CHECK_CUSOLVER (cusolverDnSgetrf_bufferSize (cusolver_handle, m, n, A, lda, &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(float) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (float) * lwork)); // Perform LU decomposition - CHECK_CUSOLVER(cusolverDnSgetrf(cusolver_handle, m, n, A, lda, d_work, ipiv, d_info)); + CHECK_CUSOLVER (cusolverDnSgetrf (cusolver_handle, m, n, A, lda, d_work, ipiv, d_info)); - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("getrf: failed to compute LU factorization"); - } + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("getrf: failed to compute LU factorization"); + } - CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } -static inline -void getrf(cusolverDnHandle_t& cusolver_handle, const int& m, const int& n, double* A, const int& lda, int* ipiv) +static inline void + getrf (cusolverDnHandle_t& cusolver_handle, const int& m, const int& n, double* A, const int& lda, int* ipiv) { // prepare some values for cusolverDnDgetrf_bufferSize int lwork = 0; int h_info = 0; int* d_info = nullptr; double* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnDgetrf_bufferSize(cusolver_handle, m, n, A, lda, &lwork)); + CHECK_CUSOLVER (cusolverDnDgetrf_bufferSize (cusolver_handle, m, n, A, lda, &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(double) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (double) * lwork)); // Perform LU decomposition - CHECK_CUSOLVER(cusolverDnDgetrf(cusolver_handle, m, n, A, lda, d_work, ipiv, d_info)); + CHECK_CUSOLVER (cusolverDnDgetrf (cusolver_handle, m, n, A, lda, d_work, ipiv, d_info)); - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("getrf: failed to compute LU factorization"); - } + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("getrf: failed to compute LU factorization"); + } - CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } -static inline -void getrf(cusolverDnHandle_t& cusolver_handle, const int& m, const int& n, std::complex* A, const int& lda, int* ipiv) +static inline void + getrf (cusolverDnHandle_t& cusolver_handle, + const int& m, + const int& n, + std::complex* A, + const int& lda, + int* ipiv) { // prepare some values for cusolverDnCgetrf_bufferSize int lwork = 0; int h_info = 0; int* d_info = nullptr; cuComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnCgetrf_bufferSize(cusolver_handle, m, n, reinterpret_cast(A), lda, &lwork)); + CHECK_CUSOLVER (cusolverDnCgetrf_bufferSize (cusolver_handle, m, n, reinterpret_cast (A), lda, &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuComplex) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuComplex) * lwork)); // Perform LU decomposition - CHECK_CUSOLVER(cusolverDnCgetrf(cusolver_handle, m, n, reinterpret_cast(A), lda, d_work, ipiv, d_info)); + CHECK_CUSOLVER ( + cusolverDnCgetrf (cusolver_handle, m, n, reinterpret_cast (A), lda, d_work, ipiv, d_info)); - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("getrf: failed to compute LU factorization"); - } + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("getrf: failed to compute LU factorization"); + } - CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } -static inline -void getrf(cusolverDnHandle_t& cusolver_handle, const int& m, const int& n, std::complex* A, const int& lda, int* ipiv) +static inline void + getrf (cusolverDnHandle_t& cusolver_handle, + const int& m, + const int& n, + std::complex* A, + const int& lda, + int* ipiv) { // prepare some values for cusolverDnZgetrf_bufferSize int lwork = 0; int h_info = 0; int* d_info = nullptr; cuDoubleComplex* d_work = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - CHECK_CUSOLVER(cusolverDnZgetrf_bufferSize(cusolver_handle, m, n, reinterpret_cast(A), lda, &lwork)); + CHECK_CUSOLVER ( + cusolverDnZgetrf_bufferSize (cusolver_handle, m, n, reinterpret_cast (A), lda, &lwork)); // allocate memory - CHECK_CUDA(cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex) * lwork)); + CHECK_CUDA (cudaMalloc ((void**)&d_work, sizeof (cuDoubleComplex) * lwork)); // Perform LU decomposition - CHECK_CUSOLVER(cusolverDnZgetrf(cusolver_handle, m, n, reinterpret_cast(A), lda, d_work, ipiv, d_info)); + CHECK_CUSOLVER ( + cusolverDnZgetrf (cusolver_handle, m, n, reinterpret_cast (A), lda, d_work, ipiv, d_info)); - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("getrf: failed to compute LU factorization"); - } + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("getrf: failed to compute LU factorization"); + } - CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } -static inline -void getrs(cusolverDnHandle_t& cusolver_handle, const char& trans, const int& n, const int& nrhs, float* A, const int& lda, const int* ipiv, float* B, const int& ldb) +static inline void + getrs (cusolverDnHandle_t& cusolver_handle, + const char& trans, + const int& n, + const int& nrhs, + float* A, + const int& lda, + const int* ipiv, + float* B, + const int& ldb) { int h_info = 0; int* d_info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); - CHECK_CUSOLVER(cusolverDnSgetrs(cusolver_handle, GetCublasOperation(trans), n, nrhs, A, lda, ipiv, B, ldb, d_info)); + CHECK_CUSOLVER ( + cusolverDnSgetrs (cusolver_handle, GetCublasOperation (trans), n, nrhs, A, lda, ipiv, B, ldb, d_info)); - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("getrs: failed to solve the linear system"); - } + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("getrs: failed to solve the linear system"); + } - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaFree (d_info)); } -static inline -void getrs(cusolverDnHandle_t& cusolver_handle, const char& trans, const int& n, const int& nrhs, double* A, const int& lda, const int* ipiv, double* B, const int& ldb) +static inline void + getrs (cusolverDnHandle_t& cusolver_handle, + const char& trans, + const int& n, + const int& nrhs, + double* A, + const int& lda, + const int* ipiv, + double* B, + const int& ldb) { int h_info = 0; int* d_info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); - CHECK_CUSOLVER(cusolverDnDgetrs(cusolver_handle, GetCublasOperation(trans), n, nrhs, A, lda, ipiv, B, ldb, d_info)); + CHECK_CUSOLVER ( + cusolverDnDgetrs (cusolver_handle, GetCublasOperation (trans), n, nrhs, A, lda, ipiv, B, ldb, d_info)); - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("getrs: failed to solve the linear system"); - } + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("getrs: failed to solve the linear system"); + } - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaFree (d_info)); } -static inline -void getrs(cusolverDnHandle_t& cusolver_handle, const char& trans, const int& n, const int& nrhs, std::complex* A, const int& lda, const int* ipiv, std::complex* B, const int& ldb) +static inline void + getrs (cusolverDnHandle_t& cusolver_handle, + const char& trans, + const int& n, + const int& nrhs, + std::complex* A, + const int& lda, + const int* ipiv, + std::complex* B, + const int& ldb) { int h_info = 0; int* d_info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); - - CHECK_CUSOLVER(cusolverDnCgetrs(cusolver_handle, GetCublasOperation(trans), n, nrhs, reinterpret_cast(A), lda, ipiv, reinterpret_cast(B), ldb, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("getrs: failed to solve the linear system"); - } + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); + + CHECK_CUSOLVER (cusolverDnCgetrs (cusolver_handle, + GetCublasOperation (trans), + n, + nrhs, + reinterpret_cast (A), + lda, + ipiv, + reinterpret_cast (B), + ldb, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("getrs: failed to solve the linear system"); + } - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaFree (d_info)); } -static inline -void getrs(cusolverDnHandle_t& cusolver_handle, const char& trans, const int& n, const int& nrhs, std::complex* A, const int& lda, const int* ipiv, std::complex* B, const int& ldb) +static inline void + getrs (cusolverDnHandle_t& cusolver_handle, + const char& trans, + const int& n, + const int& nrhs, + std::complex* A, + const int& lda, + const int* ipiv, + std::complex* B, + const int& ldb) { int h_info = 0; int* d_info = nullptr; - CHECK_CUDA(cudaMalloc((void**)&d_info, sizeof(int))); - - CHECK_CUSOLVER(cusolverDnZgetrs(cusolver_handle, GetCublasOperation(trans), n, nrhs, reinterpret_cast(A), lda, ipiv, reinterpret_cast(B), ldb, d_info)); - - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("getrs: failed to solve the linear system"); - } + CHECK_CUDA (cudaMalloc ((void**)&d_info, sizeof (int))); + + CHECK_CUSOLVER (cusolverDnZgetrs (cusolver_handle, + GetCublasOperation (trans), + n, + nrhs, + reinterpret_cast (A), + lda, + ipiv, + reinterpret_cast (B), + ldb, + d_info)); + + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("getrs: failed to solve the linear system"); + } - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaFree (d_info)); } // QR decomposition @@ -1240,339 +1761,366 @@ void getrs(cusolverDnHandle_t& cusolver_handle, const char& trans, const int& n, // geqrf // --- float --- -static inline void geqrf( - cusolverDnHandle_t& cusolver_handle, - const int m, - const int n, - float* d_A, - const int lda, - float* d_tau -) { +static inline void + geqrf (cusolverDnHandle_t& cusolver_handle, const int m, const int n, float* d_A, const int lda, float* d_tau) +{ int lwork = 0; - CHECK_CUSOLVER(cusolverDnSgeqrf_bufferSize( - cusolver_handle, m, n, d_A, lda, &lwork)); + CHECK_CUSOLVER (cusolverDnSgeqrf_bufferSize (cusolver_handle, m, n, d_A, lda, &lwork)); float* d_work = nullptr; - int* d_info = nullptr; + int* d_info = nullptr; - if (lwork > 0) { - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_work), sizeof(float) * lwork)); - } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); + if (lwork > 0) + { + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_work), sizeof (float) * lwork)); + } + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_info), sizeof (int))); - CHECK_CUSOLVER(cusolverDnSgeqrf( - cusolver_handle, m, n, d_A, lda, d_tau, d_work, lwork, d_info)); + CHECK_CUSOLVER (cusolverDnSgeqrf (cusolver_handle, m, n, d_A, lda, d_tau, d_work, lwork, d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - std::cout << "geqrf (S): info = " << h_info << std::endl; - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); - throw std::runtime_error("geqrf (S): QR factorization failed"); - } - - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + std::cout << "geqrf (S): info = " << h_info << std::endl; + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); + throw std::runtime_error ("geqrf (S): QR factorization failed"); + } + + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } // --- double --- -static inline void geqrf( - cusolverDnHandle_t& cusolver_handle, - const int m, - const int n, - double* d_A, - const int lda, - double* d_tau -) { +static inline void + geqrf (cusolverDnHandle_t& cusolver_handle, const int m, const int n, double* d_A, const int lda, double* d_tau) +{ int lwork = 0; - CHECK_CUSOLVER(cusolverDnDgeqrf_bufferSize( - cusolver_handle, m, n, d_A, lda, &lwork)); + CHECK_CUSOLVER (cusolverDnDgeqrf_bufferSize (cusolver_handle, m, n, d_A, lda, &lwork)); double* d_work = nullptr; - int* d_info = nullptr; + int* d_info = nullptr; - if (lwork > 0) { - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_work), sizeof(double) * lwork)); - } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); + if (lwork > 0) + { + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_work), sizeof (double) * lwork)); + } + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_info), sizeof (int))); - CHECK_CUSOLVER(cusolverDnDgeqrf( - cusolver_handle, m, n, d_A, lda, d_tau, d_work, lwork, d_info)); + CHECK_CUSOLVER (cusolverDnDgeqrf (cusolver_handle, m, n, d_A, lda, d_tau, d_work, lwork, d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - std::cout << "geqrf (D): info = " << h_info << std::endl; - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); - throw std::runtime_error("geqrf (D): QR factorization failed"); - } - - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + std::cout << "geqrf (D): info = " << h_info << std::endl; + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); + throw std::runtime_error ("geqrf (D): QR factorization failed"); + } + + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } // --- std::complex --- -static inline void geqrf( - cusolverDnHandle_t& cusolver_handle, - const int m, - const int n, - std::complex* d_A, - const int lda, - std::complex* d_tau -) { +static inline void + geqrf (cusolverDnHandle_t& cusolver_handle, + const int m, + const int n, + std::complex* d_A, + const int lda, + std::complex* d_tau) +{ int lwork = 0; - CHECK_CUSOLVER(cusolverDnCgeqrf_bufferSize( - cusolver_handle, m, n, - reinterpret_cast(d_A), - lda, - &lwork // ← 这里才是 lwork 的地址! - )); + CHECK_CUSOLVER (cusolverDnCgeqrf_bufferSize (cusolver_handle, + m, + n, + reinterpret_cast (d_A), + lda, + &lwork // ← 这里才是 lwork 的地址! + )); cuComplex* d_work = nullptr; - int* d_info = nullptr; - - if (lwork > 0) { - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_work), sizeof(cuComplex) * lwork)); - } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); + int* d_info = nullptr; - CHECK_CUSOLVER(cusolverDnCgeqrf( - cusolver_handle, m, n, - reinterpret_cast(d_A), - lda, - reinterpret_cast(d_tau), // ← 这里才是 d_tau - d_work, lwork, d_info)); + if (lwork > 0) + { + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_work), sizeof (cuComplex) * lwork)); + } + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_info), sizeof (int))); + + CHECK_CUSOLVER (cusolverDnCgeqrf (cusolver_handle, + m, + n, + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_tau), // ← 这里才是 d_tau + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - std::cout << "geqrf (C): info = " << h_info << std::endl; - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); - throw std::runtime_error("geqrf (C): QR factorization failed"); - } - - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + std::cout << "geqrf (C): info = " << h_info << std::endl; + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); + throw std::runtime_error ("geqrf (C): QR factorization failed"); + } + + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } // --- std::complex --- -static inline void geqrf( - cusolverDnHandle_t& cusolver_handle, - const int m, - const int n, - std::complex* d_A, - const int lda, - std::complex* d_tau -) { +static inline void + geqrf (cusolverDnHandle_t& cusolver_handle, + const int m, + const int n, + std::complex* d_A, + const int lda, + std::complex* d_tau) +{ int lwork = 0; - CHECK_CUSOLVER(cusolverDnZgeqrf_bufferSize( - cusolver_handle, m, n, - reinterpret_cast(d_A), - lda, - &lwork - )); + CHECK_CUSOLVER ( + cusolverDnZgeqrf_bufferSize (cusolver_handle, m, n, reinterpret_cast (d_A), lda, &lwork)); cuDoubleComplex* d_work = nullptr; - int* d_info = nullptr; - - if (lwork > 0) { - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_work), sizeof(cuDoubleComplex) * lwork)); - } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); + int* d_info = nullptr; - CHECK_CUSOLVER(cusolverDnZgeqrf( - cusolver_handle, m, n, - reinterpret_cast(d_A), - lda, - reinterpret_cast(d_tau), - d_work, lwork, d_info)); + if (lwork > 0) + { + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_work), sizeof (cuDoubleComplex) * lwork)); + } + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_info), sizeof (int))); + + CHECK_CUSOLVER (cusolverDnZgeqrf (cusolver_handle, + m, + n, + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_tau), + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - std::cout << "geqrf (Z): info = " << h_info << std::endl; - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); - throw std::runtime_error("geqrf (Z): QR factorization failed"); - } - - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); -} + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + std::cout << "geqrf (Z): info = " << h_info << std::endl; + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); + throw std::runtime_error ("geqrf (Z): QR factorization failed"); + } + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); +} // --- float --- -static inline void orgqr( - cusolverDnHandle_t& cusolver_handle, - const int m, - const int n, - const int k, - float* d_A, - const int lda, - float* d_tau -) { +static inline void + orgqr (cusolverDnHandle_t& cusolver_handle, + const int m, + const int n, + const int k, + float* d_A, + const int lda, + float* d_tau) +{ int lwork = 0; - CHECK_CUSOLVER(cusolverDnSorgqr_bufferSize( - cusolver_handle, m, n, k, d_A, lda, d_tau, &lwork)); + CHECK_CUSOLVER (cusolverDnSorgqr_bufferSize (cusolver_handle, m, n, k, d_A, lda, d_tau, &lwork)); float* d_work = nullptr; - int* d_info = nullptr; + int* d_info = nullptr; - if (lwork > 0) { - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_work), sizeof(float) * lwork)); - } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); + if (lwork > 0) + { + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_work), sizeof (float) * lwork)); + } + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_info), sizeof (int))); - CHECK_CUSOLVER(cusolverDnSorgqr( - cusolver_handle, m, n, k, d_A, lda, d_tau, d_work, lwork, d_info)); + CHECK_CUSOLVER (cusolverDnSorgqr (cusolver_handle, m, n, k, d_A, lda, d_tau, d_work, lwork, d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - std::cout << "orgqr (S): info = " << h_info << " (failure at parameter " << -h_info << ")" << std::endl; - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); - throw std::runtime_error("orgqr (S): failed to generate Q matrix"); - } + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + std::cout << "orgqr (S): info = " << h_info << " (failure at parameter " << -h_info << ")" << std::endl; + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); + throw std::runtime_error ("orgqr (S): failed to generate Q matrix"); + } // clean workspace - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } // --- double --- -static inline void orgqr( - cusolverDnHandle_t& cusolver_handle, - const int m, - const int n, - const int k, - double* d_A, - const int lda, - double* d_tau -) { +static inline void + orgqr (cusolverDnHandle_t& cusolver_handle, + const int m, + const int n, + const int k, + double* d_A, + const int lda, + double* d_tau) +{ int lwork = 0; - CHECK_CUSOLVER(cusolverDnDorgqr_bufferSize( - cusolver_handle, m, n, k, d_A, lda, d_tau, &lwork)); + CHECK_CUSOLVER (cusolverDnDorgqr_bufferSize (cusolver_handle, m, n, k, d_A, lda, d_tau, &lwork)); double* d_work = nullptr; - int* d_info = nullptr; + int* d_info = nullptr; - if (lwork > 0) { - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_work), sizeof(double) * lwork)); - } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); + if (lwork > 0) + { + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_work), sizeof (double) * lwork)); + } + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_info), sizeof (int))); - CHECK_CUSOLVER(cusolverDnDorgqr( - cusolver_handle, m, n, k, d_A, lda, d_tau, d_work, lwork, d_info)); + CHECK_CUSOLVER (cusolverDnDorgqr (cusolver_handle, m, n, k, d_A, lda, d_tau, d_work, lwork, d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - std::cout << "orgqr (D): info = " << h_info << std::endl; - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); - throw std::runtime_error("orgqr (D): failed to generate Q matrix"); - } - - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + std::cout << "orgqr (D): info = " << h_info << std::endl; + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); + throw std::runtime_error ("orgqr (D): failed to generate Q matrix"); + } + + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } // --- std::complex --- -static inline void orgqr( - cusolverDnHandle_t& cusolver_handle, - const int m, - const int n, - const int k, - std::complex* d_A, - const int lda, - std::complex* d_tau -) { +static inline void + orgqr (cusolverDnHandle_t& cusolver_handle, + const int m, + const int n, + const int k, + std::complex* d_A, + const int lda, + std::complex* d_tau) +{ int lwork = 0; - CHECK_CUSOLVER(cusolverDnCungqr_bufferSize( - cusolver_handle, m, n, k, - reinterpret_cast(d_A), - lda, - reinterpret_cast(d_tau), - &lwork)); + CHECK_CUSOLVER (cusolverDnCungqr_bufferSize (cusolver_handle, + m, + n, + k, + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_tau), + &lwork)); cuComplex* d_work = nullptr; - int* d_info = nullptr; - - if (lwork > 0) { - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_work), sizeof(cuComplex) * lwork)); - } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); + int* d_info = nullptr; - CHECK_CUSOLVER(cusolverDnCungqr( - cusolver_handle, m, n, k, - reinterpret_cast(d_A), - lda, - reinterpret_cast(d_tau), - d_work, lwork, d_info)); + if (lwork > 0) + { + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_work), sizeof (cuComplex) * lwork)); + } + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_info), sizeof (int))); + + CHECK_CUSOLVER (cusolverDnCungqr (cusolver_handle, + m, + n, + k, + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_tau), + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - std::cout << "orgqr (C): info = " << h_info << std::endl; - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); - throw std::runtime_error("orgqr (C): failed to generate Q matrix"); - } - - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + std::cout << "orgqr (C): info = " << h_info << std::endl; + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); + throw std::runtime_error ("orgqr (C): failed to generate Q matrix"); + } + + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); } // --- std::complex --- -static inline void orgqr( - cusolverDnHandle_t& cusolver_handle, - const int m, - const int n, - const int k, - std::complex* d_A, - const int lda, - std::complex* d_tau -) { +static inline void + orgqr (cusolverDnHandle_t& cusolver_handle, + const int m, + const int n, + const int k, + std::complex* d_A, + const int lda, + std::complex* d_tau) +{ int lwork = 0; - CHECK_CUSOLVER(cusolverDnZungqr_bufferSize( - cusolver_handle, m, n, k, - reinterpret_cast(d_A), - lda, - reinterpret_cast(d_tau), - &lwork)); + CHECK_CUSOLVER (cusolverDnZungqr_bufferSize (cusolver_handle, + m, + n, + k, + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_tau), + &lwork)); cuDoubleComplex* d_work = nullptr; - int* d_info = nullptr; - - if (lwork > 0) { - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_work), sizeof(cuDoubleComplex) * lwork)); - } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); + int* d_info = nullptr; - CHECK_CUSOLVER(cusolverDnZungqr( - cusolver_handle, m, n, k, - reinterpret_cast(d_A), - lda, - reinterpret_cast(d_tau), - d_work, lwork, d_info)); + if (lwork > 0) + { + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_work), sizeof (cuDoubleComplex) * lwork)); + } + CHECK_CUDA (cudaMalloc (reinterpret_cast (&d_info), sizeof (int))); + + CHECK_CUSOLVER (cusolverDnZungqr (cusolver_handle, + m, + n, + k, + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_tau), + d_work, + lwork, + d_info)); int h_info = 0; - CHECK_CUDA(cudaMemcpy(&h_info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); - if (h_info != 0) { - std::cout << "orgqr (Z): info = " << h_info << std::endl; - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); - throw std::runtime_error("orgqr (Z): failed to generate Q matrix"); - } - - if (d_work) CHECK_CUDA(cudaFree(d_work)); - CHECK_CUDA(cudaFree(d_info)); -} + CHECK_CUDA (cudaMemcpy (&h_info, d_info, sizeof (int), cudaMemcpyDeviceToHost)); + if (h_info != 0) + { + std::cout << "orgqr (Z): info = " << h_info << std::endl; + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); + throw std::runtime_error ("orgqr (Z): failed to generate Q matrix"); + } + if (d_work) + CHECK_CUDA (cudaFree (d_work)); + CHECK_CUDA (cudaFree (d_info)); +} } // namespace cuSolverConnector } // namespace container diff --git a/source/source_base/module_container/base/third_party/hipblas.h b/source/source_base/module_container/base/third_party/hipblas.h index 739fbd96b23..0cabf49ea72 100644 --- a/source/source_base/module_container/base/third_party/hipblas.h +++ b/source/source_base/module_container/base/third_party/hipblas.h @@ -5,314 +5,756 @@ #include #include -namespace container { -namespace hipBlasConnector { +namespace container +{ +namespace hipBlasConnector +{ -static inline -void dot(hipblasHandle_t& handle, const int& n, const float *x, const int& incx, const float *y, const int& incy, float* result) +static inline void + dot (hipblasHandle_t& handle, + const int& n, + const float* x, + const int& incx, + const float* y, + const int& incy, + float* result) { - hipblasErrcheck(hipblasSdot(handle, n, x, incx, y, incy, result)); + hipblasErrcheck (hipblasSdot (handle, n, x, incx, y, incy, result)); } -static inline -void dot(hipblasHandle_t& handle, const int& n, const double *x, const int& incx, const double *y, const int& incy, double* result) +static inline void + dot (hipblasHandle_t& handle, + const int& n, + const double* x, + const int& incx, + const double* y, + const int& incy, + double* result) { - hipblasErrcheck(hipblasDdot(handle, n, x, incx, y, incy, result)); + hipblasErrcheck (hipblasDdot (handle, n, x, incx, y, incy, result)); } -static inline -void dot(hipblasHandle_t& handle, const int& n, const std::complex *x, const int& incx, const std::complex *y, const int& incy, std::complex* result) +static inline void + dot (hipblasHandle_t& handle, + const int& n, + const std::complex* x, + const int& incx, + const std::complex* y, + const int& incy, + std::complex* result) { - hipblasErrcheck(hipblasCdotc(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy, reinterpret_cast(result))); + hipblasErrcheck (hipblasCdotc (handle, + n, + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy, + reinterpret_cast (result))); } -static inline -void dot(hipblasHandle_t& handle, const int& n, const std::complex *x, const int& incx, const std::complex *y, const int& incy, std::complex* result) +static inline void + dot (hipblasHandle_t& handle, + const int& n, + const std::complex* x, + const int& incx, + const std::complex* y, + const int& incy, + std::complex* result) { - hipblasErrcheck(hipblasZdotc(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy, reinterpret_cast(result))); + hipblasErrcheck (hipblasZdotc (handle, + n, + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy, + reinterpret_cast (result))); } -static inline -void axpy(hipblasHandle_t& handle, const int& n, const float& alpha, const float *x, const int& incx, float *y, const int& incy) +static inline void + axpy (hipblasHandle_t& handle, + const int& n, + const float& alpha, + const float* x, + const int& incx, + float* y, + const int& incy) { - hipblasErrcheck(hipblasSaxpy(handle, n, &alpha, x, incx, y, incy)); + hipblasErrcheck (hipblasSaxpy (handle, n, &alpha, x, incx, y, incy)); } -static inline -void axpy(hipblasHandle_t& handle, const int& n, const double& alpha, const double *x, const int& incx, double *y, const int& incy) +static inline void + axpy (hipblasHandle_t& handle, + const int& n, + const double& alpha, + const double* x, + const int& incx, + double* y, + const int& incy) { - hipblasErrcheck(hipblasDaxpy(handle, n, &alpha, x, incx, y, incy)); + hipblasErrcheck (hipblasDaxpy (handle, n, &alpha, x, incx, y, incy)); } -static inline -void axpy(hipblasHandle_t& handle, const int& n, const std::complex& alpha, const std::complex *x, const int& incx, std::complex *y, const int& incy) +static inline void + axpy (hipblasHandle_t& handle, + const int& n, + const std::complex& alpha, + const std::complex* x, + const int& incx, + std::complex* y, + const int& incy) { - hipblasErrcheck(hipblasCaxpy(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx, reinterpret_cast(y), incy)); + hipblasErrcheck (hipblasCaxpy (handle, + n, + reinterpret_cast (&alpha), + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy)); } -static inline -void axpy(hipblasHandle_t& handle, const int& n, const std::complex& alpha, const std::complex *x, const int& incx, std::complex *y, const int& incy) +static inline void + axpy (hipblasHandle_t& handle, + const int& n, + const std::complex& alpha, + const std::complex* x, + const int& incx, + std::complex* y, + const int& incy) { - hipblasErrcheck(hipblasZaxpy(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx, reinterpret_cast(y), incy)); + hipblasErrcheck (hipblasZaxpy (handle, + n, + reinterpret_cast (&alpha), + reinterpret_cast (x), + incx, + reinterpret_cast (y), + incy)); } -static inline -void scal(hipblasHandle_t& handle, const int& n, const float& alpha, float *x, const int& incx) +static inline void + scal (hipblasHandle_t& handle, const int& n, const float& alpha, float* x, const int& incx) { - hipblasErrcheck(hipblasSscal(handle, n, &alpha, x, incx)); + hipblasErrcheck (hipblasSscal (handle, n, &alpha, x, incx)); } -static inline -void scal(hipblasHandle_t& handle, const int& n, const double& alpha, double *x, const int& incx) +static inline void + scal (hipblasHandle_t& handle, const int& n, const double& alpha, double* x, const int& incx) { - hipblasErrcheck(hipblasDscal(handle, n, &alpha, x, incx)); + hipblasErrcheck (hipblasDscal (handle, n, &alpha, x, incx)); } -static inline -void scal(hipblasHandle_t& handle, const int& n, const std::complex& alpha, std::complex *x, const int& incx) +static inline void + scal (hipblasHandle_t& handle, + const int& n, + const std::complex& alpha, + std::complex* x, + const int& incx) { - hipblasErrcheck(hipblasCscal(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx)); + hipblasErrcheck (hipblasCscal (handle, + n, + reinterpret_cast (&alpha), + reinterpret_cast (x), + incx)); } -static inline -void scal(hipblasHandle_t& handle, const int& n, const std::complex& alpha, std::complex *x, const int& incx) +static inline void + scal (hipblasHandle_t& handle, + const int& n, + const std::complex& alpha, + std::complex* x, + const int& incx) { - hipblasErrcheck(hipblasZscal(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx)); + hipblasErrcheck (hipblasZscal (handle, + n, + reinterpret_cast (&alpha), + reinterpret_cast (x), + incx)); } -static inline -void gemv(hipblasHandle_t& handle, const char& trans, const int& m, const int& n, - const float& alpha, const float *A, const int& lda, const float *x, const int& incx, - const float& beta, float *y, const int& incy) +static inline void + gemv (hipblasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const float& alpha, + const float* A, + const int& lda, + const float* x, + const int& incx, + const float& beta, + float* y, + const int& incy) { - hipblasErrcheck(hipblasSgemv(handle, GetHipblasOperation(trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy)); + hipblasErrcheck (hipblasSgemv (handle, GetHipblasOperation (trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy)); } -static inline -void gemv(hipblasHandle_t& handle, const char& trans, const int& m, const int& n, - const double& alpha, const double *A, const int& lda, const double *x, const int& incx, - const double& beta, double *y, const int& incy) +static inline void + gemv (hipblasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const double& alpha, + const double* A, + const int& lda, + const double* x, + const int& incx, + const double& beta, + double* y, + const int& incy) { - hipblasErrcheck(hipblasDgemv(handle, GetHipblasOperation(trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy)); + hipblasErrcheck (hipblasDgemv (handle, GetHipblasOperation (trans), m, n, &alpha, A, lda, x, incx, &beta, y, incy)); } -static inline -void gemv(hipblasHandle_t& handle, const char& trans, const int& m, const int& n, - const std::complex& alpha, const std::complex *A, const int& lda, const std::complex *x, const int& incx, - const std::complex& beta, std::complex *y, const int& incy) +static inline void + gemv (hipblasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* x, + const int& incx, + const std::complex& beta, + std::complex* y, + const int& incy) { - hipblasErrcheck(hipblasCgemv(handle, GetHipblasOperation(trans), m, n, reinterpret_cast(&alpha), - reinterpret_cast(A), lda, reinterpret_cast(x), incx, reinterpret_cast(&beta), reinterpret_cast(y), incy)); + hipblasErrcheck (hipblasCgemv (handle, + GetHipblasOperation (trans), + m, + n, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + reinterpret_cast (x), + incx, + reinterpret_cast (&beta), + reinterpret_cast (y), + incy)); } -static inline -void gemv(hipblasHandle_t& handle, const char& trans, const int& m, const int& n, - const std::complex& alpha, const std::complex *A, const int& lda, const std::complex *x, const int& incx, - const std::complex& beta, std::complex *y, const int& incy) +static inline void + gemv (hipblasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* x, + const int& incx, + const std::complex& beta, + std::complex* y, + const int& incy) { - hipblasErrcheck(hipblasZgemv(handle, GetHipblasOperation(trans), m, n, reinterpret_cast(&alpha), - reinterpret_cast(A), lda, reinterpret_cast(x), incx, reinterpret_cast(&beta), reinterpret_cast(y), incy)); + hipblasErrcheck (hipblasZgemv (handle, + GetHipblasOperation (trans), + m, + n, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + reinterpret_cast (x), + incx, + reinterpret_cast (&beta), + reinterpret_cast (y), + incy)); } template -static inline -void gemv_batched(hipblasHandle_t& handle, const char& trans, const int& m, const int& n, - const T& alpha, T** A, const int& lda, T** x, const int& incx, - const T& beta, T** y, const int& incy, const int& batch_size) -{ - for (int ii = 0; ii < batch_size; ++ii) { - // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] - hipBlasConnector::gemv(handle, trans, m, n, alpha, A[ii], lda, x[ii], incy, beta, y[ii], incy); - } +static inline void + gemv_batched (hipblasHandle_t& handle, + const char& trans, + const int& m, + const int& n, + const T& alpha, + T** A, + const int& lda, + T** x, + const int& incx, + const T& beta, + T** y, + const int& incy, + const int& batch_size) +{ + for (int ii = 0; ii < batch_size; ++ii) + { + // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] + hipBlasConnector::gemv (handle, trans, m, n, alpha, A[ii], lda, x[ii], incy, beta, y[ii], incy); + } } template -static inline -void gemv_batched_strided(hipblasHandle_t& handle, const char& transa, const int& m, const int& n, - const T& alpha, const T* A, const int& lda, const int& stride_a, const T* x, const int& incx, const int& stride_x, - const T& beta, T* y, const int& incy, const int& stride_y, const int& batch_size) -{ - for (int ii = 0; ii < batch_size; ii++) { - // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] - hipBlasConnector::gemv(handle, transa, m, n, alpha, A + ii * stride_a, lda, x + ii * stride_x, incx, beta, y + ii * stride_y, incy); - } +static inline void + gemv_batched_strided (hipblasHandle_t& handle, + const char& transa, + const int& m, + const int& n, + const T& alpha, + const T* A, + const int& lda, + const int& stride_a, + const T* x, + const int& incx, + const int& stride_x, + const T& beta, + T* y, + const int& incy, + const int& stride_y, + const int& batch_size) +{ + for (int ii = 0; ii < batch_size; ii++) + { + // Call the single GEMV for each pair of matrix A[ii] and vector x[ii] + hipBlasConnector::gemv (handle, + transa, + m, + n, + alpha, + A + ii * stride_a, + lda, + x + ii * stride_x, + incx, + beta, + y + ii * stride_y, + incy); + } } -static inline -void gemm(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, - const float& beta, float* C, const int& ldc) +static inline void + gemm (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const float& alpha, + const float* A, + const int& lda, + const float* B, + const int& ldb, + const float& beta, + float* C, + const int& ldc) { - hipblasErrcheck(hipblasSgemm(handle, GetHipblasOperation(transa), GetHipblasOperation(transb), - m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc)); + hipblasErrcheck (hipblasSgemm (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + &alpha, + A, + lda, + B, + ldb, + &beta, + C, + ldc)); } -static inline -void gemm(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, - const double& beta, double* C, const int& ldc) +static inline void + gemm (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const double& alpha, + const double* A, + const int& lda, + const double* B, + const int& ldb, + const double& beta, + double* C, + const int& ldc) { - hipblasErrcheck(hipblasDgemm(handle, GetHipblasOperation(transa), GetHipblasOperation(transb), - m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc)); + hipblasErrcheck (hipblasDgemm (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + &alpha, + A, + lda, + B, + ldb, + &beta, + C, + ldc)); } -static inline -void gemm(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const std::complex* B, const int& ldb, - const std::complex& beta, std::complex* C, const int& ldc) -{ - hipblasErrcheck(hipblasCgemm(handle, GetHipblasOperation(transa), GetHipblasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(A), lda, - reinterpret_cast(B), ldb, - reinterpret_cast(&beta), - reinterpret_cast(C), ldc)); +static inline void + gemm (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* B, + const int& ldb, + const std::complex& beta, + std::complex* C, + const int& ldc) +{ + hipblasErrcheck (hipblasCgemm (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + reinterpret_cast (&beta), + reinterpret_cast (C), + ldc)); } -static inline -void gemm(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const std::complex* B, const int& ldb, - const std::complex& beta, std::complex* C, const int& ldc) -{ - hipblasErrcheck(hipblasZgemm(handle, GetHipblasOperation(transa), GetHipblasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(A), lda, - reinterpret_cast(B), ldb, - reinterpret_cast(&beta), - reinterpret_cast(C), ldc)); +static inline void + gemm (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const std::complex* B, + const int& ldb, + const std::complex& beta, + std::complex* C, + const int& ldc) +{ + hipblasErrcheck (hipblasZgemm (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + reinterpret_cast (&beta), + reinterpret_cast (C), + ldc)); } template -static inline -T** allocate_(T** in, const int& batch_size) +static inline T** + allocate_ (T** in, const int& batch_size) { T** out = nullptr; - hipErrcheck(hipMalloc(reinterpret_cast(&out), sizeof(T*) * batch_size)); - hipErrcheck(hipMemcpy(out, in, sizeof(T*) * batch_size, hipMemcpyHostToDevice)); + hipErrcheck (hipMalloc (reinterpret_cast (&out), sizeof (T*) * batch_size)); + hipErrcheck (hipMemcpy (out, in, sizeof (T*) * batch_size, hipMemcpyHostToDevice)); return out; } -static inline -void gemm_batched(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const float& alpha, float** A, const int& lda, float** B, const int& ldb, - const float& beta, float** C, const int& ldc, const int& batch_size) -{ - float** d_A = allocate_(A, batch_size); - float** d_B = allocate_(B, batch_size); - float** d_C = allocate_(C, batch_size); - hipblasErrcheck(hipblasSgemmBatched(handle, GetHipblasOperation(transa), GetHipblasOperation(transb), - m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc, batch_size)); - hipErrcheck(hipFree(d_A)); - hipErrcheck(hipFree(d_B)); - hipErrcheck(hipFree(d_C)); +static inline void + gemm_batched (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const float& alpha, + float** A, + const int& lda, + float** B, + const int& ldb, + const float& beta, + float** C, + const int& ldc, + const int& batch_size) +{ + float** d_A = allocate_ (A, batch_size); + float** d_B = allocate_ (B, batch_size); + float** d_C = allocate_ (C, batch_size); + hipblasErrcheck (hipblasSgemmBatched (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + &alpha, + d_A, + lda, + d_B, + ldb, + &beta, + d_C, + ldc, + batch_size)); + hipErrcheck (hipFree (d_A)); + hipErrcheck (hipFree (d_B)); + hipErrcheck (hipFree (d_C)); } -static inline -void gemm_batched(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const double& alpha, double** A, const int& lda, double** B, const int& ldb, - const double& beta, double** C, const int& ldc, const int& batch_size) -{ - double** d_A = allocate_(A, batch_size); - double** d_B = allocate_(B, batch_size); - double** d_C = allocate_(C, batch_size); - hipblasErrcheck(hipblasDgemmBatched(handle, GetHipblasOperation(transa), GetHipblasOperation(transb), - m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc, batch_size)); - hipErrcheck(hipFree(d_A)); - hipErrcheck(hipFree(d_B)); - hipErrcheck(hipFree(d_C)); +static inline void + gemm_batched (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const double& alpha, + double** A, + const int& lda, + double** B, + const int& ldb, + const double& beta, + double** C, + const int& ldc, + const int& batch_size) +{ + double** d_A = allocate_ (A, batch_size); + double** d_B = allocate_ (B, batch_size); + double** d_C = allocate_ (C, batch_size); + hipblasErrcheck (hipblasDgemmBatched (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + &alpha, + d_A, + lda, + d_B, + ldb, + &beta, + d_C, + ldc, + batch_size)); + hipErrcheck (hipFree (d_A)); + hipErrcheck (hipFree (d_B)); + hipErrcheck (hipFree (d_C)); } -static inline -void gemm_batched(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, std::complex** A, const int& lda, std::complex** B, const int& ldb, - const std::complex& beta, std::complex** C, const int& ldc, const int& batch_size) -{ - std::complex** d_A = allocate_(A, batch_size); - std::complex** d_B = allocate_(B, batch_size); - std::complex** d_C = allocate_(C, batch_size); - hipblasErrcheck(hipblasCgemmBatched(handle, GetHipblasOperation(transa), GetHipblasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(d_A), lda, - reinterpret_cast(d_B), ldb, - reinterpret_cast(&beta), - reinterpret_cast(d_C), ldc, batch_size)); - hipErrcheck(hipFree(d_A)); - hipErrcheck(hipFree(d_B)); - hipErrcheck(hipFree(d_C)); +static inline void + gemm_batched (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + std::complex** A, + const int& lda, + std::complex** B, + const int& ldb, + const std::complex& beta, + std::complex** C, + const int& ldc, + const int& batch_size) +{ + std::complex** d_A = allocate_ (A, batch_size); + std::complex** d_B = allocate_ (B, batch_size); + std::complex** d_C = allocate_ (C, batch_size); + hipblasErrcheck (hipblasCgemmBatched (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_B), + ldb, + reinterpret_cast (&beta), + reinterpret_cast (d_C), + ldc, + batch_size)); + hipErrcheck (hipFree (d_A)); + hipErrcheck (hipFree (d_B)); + hipErrcheck (hipFree (d_C)); } -static inline -void gemm_batched(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, std::complex** A, const int& lda, std::complex** B, const int& ldb, - const std::complex& beta, std::complex** C, const int& ldc, const int& batch_size) -{ - std::complex** d_A = allocate_(A, batch_size); - std::complex** d_B = allocate_(B, batch_size); - std::complex** d_C = allocate_(C, batch_size); - hipblasErrcheck(hipblasZgemmBatched(handle, GetHipblasOperation(transa), GetHipblasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(d_A), lda, - reinterpret_cast(d_B), ldb, - reinterpret_cast(&beta), - reinterpret_cast(d_C), ldc, batch_size)); - hipErrcheck(hipFree(d_A)); - hipErrcheck(hipFree(d_B)); - hipErrcheck(hipFree(d_C)); +static inline void + gemm_batched (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + std::complex** A, + const int& lda, + std::complex** B, + const int& ldb, + const std::complex& beta, + std::complex** C, + const int& ldc, + const int& batch_size) +{ + std::complex** d_A = allocate_ (A, batch_size); + std::complex** d_B = allocate_ (B, batch_size); + std::complex** d_C = allocate_ (C, batch_size); + hipblasErrcheck (hipblasZgemmBatched (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (d_A), + lda, + reinterpret_cast (d_B), + ldb, + reinterpret_cast (&beta), + reinterpret_cast (d_C), + ldc, + batch_size)); + hipErrcheck (hipFree (d_A)); + hipErrcheck (hipFree (d_B)); + hipErrcheck (hipFree (d_C)); } -static inline -void gemm_batched_strided(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const float& alpha, const float* A, const int& lda, const int& stride_a, const float* B, const int& ldb, const int& stride_b, - const float& beta, float* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - hipblasErrcheck(hipblasSgemmStridedBatched( - handle, - GetHipblasOperation(transa), - GetHipblasOperation(transb), - m, n, k, - &alpha, - A, lda, stride_a, - B, ldb, stride_b, - &beta, - C, ldc, stride_c, - batch_size)); +static inline void + gemm_batched_strided (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const float& alpha, + const float* A, + const int& lda, + const int& stride_a, + const float* B, + const int& ldb, + const int& stride_b, + const float& beta, + float* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + hipblasErrcheck (hipblasSgemmStridedBatched (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + &alpha, + A, + lda, + stride_a, + B, + ldb, + stride_b, + &beta, + C, + ldc, + stride_c, + batch_size)); } -static inline -void gemm_batched_strided(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const double& alpha, const double* A, const int& lda, const int& stride_a, const double* B, const int& ldb, const int& stride_b, - const double& beta, double* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - hipblasErrcheck(hipblasDgemmStridedBatched( - handle, - GetHipblasOperation(transa), - GetHipblasOperation(transb), - m, n, k, - &alpha, - A, lda, stride_a, - B, ldb, stride_b, - &beta, - C, ldc, stride_c, - batch_size)); +static inline void + gemm_batched_strided (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const double& alpha, + const double* A, + const int& lda, + const int& stride_a, + const double* B, + const int& ldb, + const int& stride_b, + const double& beta, + double* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + hipblasErrcheck (hipblasDgemmStridedBatched (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + &alpha, + A, + lda, + stride_a, + B, + ldb, + stride_b, + &beta, + C, + ldc, + stride_c, + batch_size)); } -static inline -void gemm_batched_strided(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const int& stride_a, const std::complex* B, const int& ldb, const int& stride_b, - const std::complex& beta, std::complex* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - hipblasErrcheck(hipblasCgemmStridedBatched( - handle, - GetHipblasOperation(transa), - GetHipblasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(A), lda, stride_a, - reinterpret_cast(B), ldb, stride_b, - reinterpret_cast(&beta), - reinterpret_cast(C), ldc, stride_c, - batch_size)); +static inline void + gemm_batched_strided (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const int& stride_a, + const std::complex* B, + const int& ldb, + const int& stride_b, + const std::complex& beta, + std::complex* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + hipblasErrcheck (hipblasCgemmStridedBatched (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + stride_a, + reinterpret_cast (B), + ldb, + stride_b, + reinterpret_cast (&beta), + reinterpret_cast (C), + ldc, + stride_c, + batch_size)); } -static inline -void gemm_batched_strided(hipblasHandle_t& handle, const char& transa, const char& transb, const int& m, const int& n, const int& k, - const std::complex& alpha, const std::complex* A, const int& lda, const int& stride_a, const std::complex* B, const int& ldb, const int& stride_b, - const std::complex& beta, std::complex* C, const int& ldc, const int& stride_c, const int& batch_size) -{ - hipblasErrcheck(hipblasZgemmStridedBatched( - handle, - GetHipblasOperation(transa), - GetHipblasOperation(transb), - m, n, k, - reinterpret_cast(&alpha), - reinterpret_cast(A), lda, stride_a, - reinterpret_cast(B), ldb, stride_b, - reinterpret_cast(&beta), - reinterpret_cast(C), ldc, stride_c, - batch_size)); +static inline void + gemm_batched_strided (hipblasHandle_t& handle, + const char& transa, + const char& transb, + const int& m, + const int& n, + const int& k, + const std::complex& alpha, + const std::complex* A, + const int& lda, + const int& stride_a, + const std::complex* B, + const int& ldb, + const int& stride_b, + const std::complex& beta, + std::complex* C, + const int& ldc, + const int& stride_c, + const int& batch_size) +{ + hipblasErrcheck (hipblasZgemmStridedBatched (handle, + GetHipblasOperation (transa), + GetHipblasOperation (transb), + m, + n, + k, + reinterpret_cast (&alpha), + reinterpret_cast (A), + lda, + stride_a, + reinterpret_cast (B), + ldb, + stride_b, + reinterpret_cast (&beta), + reinterpret_cast (C), + ldc, + stride_c, + batch_size)); } } // namespace hipBlasConnector diff --git a/source/source_base/module_container/base/third_party/hipsolver.h b/source/source_base/module_container/base/third_party/hipsolver.h index af8911be438..62496dd3cd4 100644 --- a/source/source_base/module_container/base/third_party/hipsolver.h +++ b/source/source_base/module_container/base/third_party/hipsolver.h @@ -7,337 +7,631 @@ #include #include -namespace container { -namespace hipSolverConnector { +namespace container +{ +namespace hipSolverConnector +{ template -static inline -void trtri (hipsolverHandle_t& hipsolver_handle, const char& uplo, const char& diag, const int& n, T* A, const int& lda) +static inline void + trtri (hipsolverHandle_t& hipsolver_handle, const char& uplo, const char& diag, const int& n, T* A, const int& lda) { size_t d_lwork = 0, h_lwork = 0; - hipsolverErrcheck(hipsolverDnXtrtri_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), hipblas_diag_type(diag), n, GetTypeRocm::cuda_data_type, A, lda, &d_lwork, &h_lwork)); - void* d_work = nullptr, *h_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_work, d_lwork)); - if (h_lwork) { - h_work = malloc(h_lwork); - if (h_work == nullptr) { - throw std::bad_alloc(); + hipsolverErrcheck (hipsolverDnXtrtri_bufferSize (hipsolver_handle, + hipsolver_fill_mode (uplo), + hipblas_diag_type (diag), + n, + GetTypeRocm::cuda_data_type, + A, + lda, + &d_lwork, + &h_lwork)); + void *d_work = nullptr, *h_work = nullptr; + hipErrcheck (hipMalloc ((void**)&d_work, d_lwork)); + if (h_lwork) + { + h_work = malloc (h_lwork); + if (h_work == nullptr) + { + throw std::bad_alloc (); + } } - } int h_info = 0; int* d_info = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnXtrtri(hipsolver_handle, hipsolver_fill_mode(uplo), hipblas_diag_type(diag), n, GetTypeRocm::cuda_data_type, A, n, d_work, d_lwork, h_work, h_lwork, d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("trtri: failed to invert matrix"); - } - free(h_work); - hipErrcheck(hipFree(d_work)); - hipErrcheck(hipFree(d_info)); + hipsolverErrcheck (hipsolverDnXtrtri (hipsolver_handle, + hipsolver_fill_mode (uplo), + hipblas_diag_type (diag), + n, + GetTypeRocm::cuda_data_type, + A, + n, + d_work, + d_lwork, + h_work, + h_lwork, + d_info)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("trtri: failed to invert matrix"); + } + free (h_work); + hipErrcheck (hipFree (d_work)); + hipErrcheck (hipFree (d_info)); } -static inline -void potri (hipsolverHandle_t& hipsolver_handle, const char& uplo, const char& diag, const int& n, float * A, const int& lda) +static inline void + potri (hipsolverHandle_t& hipsolver_handle, + const char& uplo, + const char& diag, + const int& n, + float* A, + const int& lda) { int lwork; - hipsolverErrcheck(hipsolverDnSpotri_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), n, A, n, &lwork)); + hipsolverErrcheck (hipsolverDnSpotri_bufferSize (hipsolver_handle, hipsolver_fill_mode (uplo), n, A, n, &lwork)); float* work = nullptr; - hipErrcheck(hipMalloc((void**)&work, lwork * sizeof(float))); + hipErrcheck (hipMalloc ((void**)&work, lwork * sizeof (float))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnSpotri(hipsolver_handle, hipsolver_fill_mode(uplo), n, A, n, work, lwork, nullptr)); - hipErrcheck(hipFree(work)); + hipsolverErrcheck (hipsolverDnSpotri (hipsolver_handle, hipsolver_fill_mode (uplo), n, A, n, work, lwork, nullptr)); + hipErrcheck (hipFree (work)); } -static inline -void potri (hipsolverHandle_t& hipsolver_handle, const char& uplo, const char& diag, const int& n, double * A, const int& lda) +static inline void + potri (hipsolverHandle_t& hipsolver_handle, + const char& uplo, + const char& diag, + const int& n, + double* A, + const int& lda) { int lwork; - hipsolverErrcheck(hipsolverDnDpotri_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), n, A, n, &lwork)); + hipsolverErrcheck (hipsolverDnDpotri_bufferSize (hipsolver_handle, hipsolver_fill_mode (uplo), n, A, n, &lwork)); double* work = nullptr; - hipErrcheck(hipMalloc((void**)&work, lwork * sizeof(double))); + hipErrcheck (hipMalloc ((void**)&work, lwork * sizeof (double))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnDpotri(hipsolver_handle, hipsolver_fill_mode(uplo), n, A, n, work, lwork, nullptr)); - hipErrcheck(hipFree(work)); + hipsolverErrcheck (hipsolverDnDpotri (hipsolver_handle, hipsolver_fill_mode (uplo), n, A, n, work, lwork, nullptr)); + hipErrcheck (hipFree (work)); } -static inline -void potri (hipsolverHandle_t& hipsolver_handle, const char& uplo, const char& diag, const int& n, std::complex * A, const int& lda) +static inline void + potri (hipsolverHandle_t& hipsolver_handle, + const char& uplo, + const char& diag, + const int& n, + std::complex* A, + const int& lda) { int lwork; - hipsolverErrcheck(hipsolverDnCpotri_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), n, reinterpret_cast(A), n, &lwork)); + hipsolverErrcheck (hipsolverDnCpotri_bufferSize (hipsolver_handle, + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + &lwork)); hipFloatComplex* work = nullptr; - hipErrcheck(hipMalloc((void**)&work, lwork * sizeof(hipFloatComplex))); + hipErrcheck (hipMalloc ((void**)&work, lwork * sizeof (hipFloatComplex))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnCpotri(hipsolver_handle, hipsolver_fill_mode(uplo), n, reinterpret_cast(A), n, work, lwork, nullptr)); - hipErrcheck(hipFree(work)); + hipsolverErrcheck (hipsolverDnCpotri (hipsolver_handle, + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + work, + lwork, + nullptr)); + hipErrcheck (hipFree (work)); } -static inline -void potri (hipsolverHandle_t& hipsolver_handle, const char& uplo, const char& diag, const int& n, std::complex * A, const int& lda) +static inline void + potri (hipsolverHandle_t& hipsolver_handle, + const char& uplo, + const char& diag, + const int& n, + std::complex* A, + const int& lda) { int lwork; - hipsolverErrcheck(hipsolverDnZpotri_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), n, reinterpret_cast(A), n, &lwork)); + hipsolverErrcheck (hipsolverDnZpotri_bufferSize (hipsolver_handle, + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + &lwork)); hipDoubleComplex* work = nullptr; - hipErrcheck(hipMalloc((void**)&work, lwork * sizeof(hipDoubleComplex))); + hipErrcheck (hipMalloc ((void**)&work, lwork * sizeof (hipDoubleComplex))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnZpotri(hipsolver_handle, hipsolver_fill_mode(uplo), n, reinterpret_cast(A), n, work, lwork, nullptr)); - hipErrcheck(hipFree(work)); + hipsolverErrcheck (hipsolverDnZpotri (hipsolver_handle, + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + work, + lwork, + nullptr)); + hipErrcheck (hipFree (work)); } - -static inline -void potrf (hipsolverHandle_t& hipsolver_handle, const char& uplo, const int& n, float * A, const int& lda) +static inline void + potrf (hipsolverHandle_t& hipsolver_handle, const char& uplo, const int& n, float* A, const int& lda) { int lwork; - hipsolverErrcheck(hipsolverDnSpotrf_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), n, A, n, &lwork)); + hipsolverErrcheck (hipsolverDnSpotrf_bufferSize (hipsolver_handle, hipsolver_fill_mode (uplo), n, A, n, &lwork)); float* work = nullptr; - hipErrcheck(hipMalloc((void**)&work, lwork * sizeof(float))); + hipErrcheck (hipMalloc ((void**)&work, lwork * sizeof (float))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnSpotrf(hipsolver_handle, hipsolver_fill_mode(uplo), n, A, n, work, lwork, nullptr)); - hipErrcheck(hipFree(work)); + hipsolverErrcheck (hipsolverDnSpotrf (hipsolver_handle, hipsolver_fill_mode (uplo), n, A, n, work, lwork, nullptr)); + hipErrcheck (hipFree (work)); } -static inline -void potrf (hipsolverHandle_t& hipsolver_handle, const char& uplo, const int& n, double * A, const int& lda) +static inline void + potrf (hipsolverHandle_t& hipsolver_handle, const char& uplo, const int& n, double* A, const int& lda) { int lwork; - hipsolverErrcheck(hipsolverDnDpotrf_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), n, A, n, &lwork)); + hipsolverErrcheck (hipsolverDnDpotrf_bufferSize (hipsolver_handle, hipsolver_fill_mode (uplo), n, A, n, &lwork)); double* work = nullptr; - hipErrcheck(hipMalloc((void**)&work, lwork * sizeof(double))); + hipErrcheck (hipMalloc ((void**)&work, lwork * sizeof (double))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnDpotrf(hipsolver_handle, hipsolver_fill_mode(uplo), n, A, n, work, lwork, nullptr)); - hipErrcheck(hipFree(work)); + hipsolverErrcheck (hipsolverDnDpotrf (hipsolver_handle, hipsolver_fill_mode (uplo), n, A, n, work, lwork, nullptr)); + hipErrcheck (hipFree (work)); } -static inline -void potrf (hipsolverHandle_t& hipsolver_handle, const char& uplo, const int& n, std::complex * A, const int& lda) +static inline void + potrf (hipsolverHandle_t& hipsolver_handle, const char& uplo, const int& n, std::complex* A, const int& lda) { int lwork; - hipsolverErrcheck(hipsolverDnCpotrf_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), n, reinterpret_cast(A), n, &lwork)); + hipsolverErrcheck (hipsolverDnCpotrf_bufferSize (hipsolver_handle, + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + &lwork)); hipFloatComplex* work = nullptr; - hipErrcheck(hipMalloc((void**)&work, lwork * sizeof(hipFloatComplex))); + hipErrcheck (hipMalloc ((void**)&work, lwork * sizeof (hipFloatComplex))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnCpotrf(hipsolver_handle, hipsolver_fill_mode(uplo), n, reinterpret_cast(A), n, work, lwork, nullptr)); - hipErrcheck(hipFree(work)); + hipsolverErrcheck (hipsolverDnCpotrf (hipsolver_handle, + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + work, + lwork, + nullptr)); + hipErrcheck (hipFree (work)); } -static inline -void potrf (hipsolverHandle_t& hipsolver_handle, const char& uplo, const int& n, std::complex * A, const int& lda) +static inline void + potrf (hipsolverHandle_t& hipsolver_handle, const char& uplo, const int& n, std::complex* A, const int& lda) { int lwork; - hipsolverErrcheck(hipsolverDnZpotrf_bufferSize(hipsolver_handle, hipsolver_fill_mode(uplo), n, reinterpret_cast(A), n, &lwork)); + hipsolverErrcheck (hipsolverDnZpotrf_bufferSize (hipsolver_handle, + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + &lwork)); hipDoubleComplex* work = nullptr; - hipErrcheck(hipMalloc((void**)&work, lwork * sizeof(hipDoubleComplex))); + hipErrcheck (hipMalloc ((void**)&work, lwork * sizeof (hipDoubleComplex))); // Perform Cholesky decomposition - hipsolverErrcheck(hipsolverDnZpotrf(hipsolver_handle, hipsolver_fill_mode(uplo), n, reinterpret_cast(A), n, work, lwork, nullptr)); - hipErrcheck(hipFree(work)); + hipsolverErrcheck (hipsolverDnZpotrf (hipsolver_handle, + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + n, + work, + lwork, + nullptr)); + hipErrcheck (hipFree (work)); } - -static inline -void heevd (hipsolverHandle_t& hipsolver_handle, const char& jobz, const char& uplo, const int& n, float* A, const int& lda, float * W) +static inline void + heevd (hipsolverHandle_t& hipsolver_handle, + const char& jobz, + const char& uplo, + const int& n, + float* A, + const int& lda, + float* W) { // prepare some values for hipsolverDnZhegvd_bufferSize - int lwork = 0; - int h_info = 0; - int* d_info = nullptr; + int lwork = 0; + int h_info = 0; + int* d_info = nullptr; float* d_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - hipsolverErrcheck(hipsolverDnSsyevd_bufferSize(hipsolver_handle, hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, A, lda, W, &lwork)); + hipsolverErrcheck (hipsolverDnSsyevd_bufferSize (hipsolver_handle, + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + A, + lda, + W, + &lwork)); // allocate memery - hipErrcheck(hipMalloc((void**)&d_work, sizeof(float) * lwork)); + hipErrcheck (hipMalloc ((void**)&d_work, sizeof (float) * lwork)); // compute eigenvalues and eigenvectors. - hipsolverErrcheck(hipsolverDnSsyevd(hipsolver_handle, hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, A, lda, W, d_work, lwork, d_info)); + hipsolverErrcheck (hipsolverDnSsyevd (hipsolver_handle, + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + A, + lda, + W, + d_work, + lwork, + d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - hipErrcheck(hipFree(d_info)); - hipErrcheck(hipFree(d_work)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + hipErrcheck (hipFree (d_info)); + hipErrcheck (hipFree (d_work)); } -static inline -void heevd (hipsolverHandle_t& hipsolver_handle, const char& jobz, const char& uplo, const int& n, double* A, const int& lda, double * W) +static inline void + heevd (hipsolverHandle_t& hipsolver_handle, + const char& jobz, + const char& uplo, + const int& n, + double* A, + const int& lda, + double* W) { // prepare some values for hipsolverDnZhegvd_bufferSize - int lwork = 0; - int h_info = 0; - int* d_info = nullptr; + int lwork = 0; + int h_info = 0; + int* d_info = nullptr; double* d_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - hipsolverErrcheck(hipsolverDnDsyevd_bufferSize(hipsolver_handle, hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, A, lda, W, &lwork)); + hipsolverErrcheck (hipsolverDnDsyevd_bufferSize (hipsolver_handle, + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + A, + lda, + W, + &lwork)); // allocate memery - hipErrcheck(hipMalloc((void**)&d_work, sizeof(double) * lwork)); + hipErrcheck (hipMalloc ((void**)&d_work, sizeof (double) * lwork)); // compute eigenvalues and eigenvectors. - hipsolverErrcheck(hipsolverDnDsyevd(hipsolver_handle, hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, A, lda, W, d_work, lwork, d_info)); + hipsolverErrcheck (hipsolverDnDsyevd (hipsolver_handle, + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + A, + lda, + W, + d_work, + lwork, + d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - hipErrcheck(hipFree(d_info)); - hipErrcheck(hipFree(d_work)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + hipErrcheck (hipFree (d_info)); + hipErrcheck (hipFree (d_work)); } -static inline -void heevd (hipsolverHandle_t& hipsolver_handle, const char& jobz, const char& uplo, const int& n, std::complex* A, const int& lda, float * W) +static inline void + heevd (hipsolverHandle_t& hipsolver_handle, + const char& jobz, + const char& uplo, + const int& n, + std::complex* A, + const int& lda, + float* W) { // prepare some values for hipsolverDnZhegvd_bufferSize - int lwork = 0; - int h_info = 0; - int* d_info = nullptr; + int lwork = 0; + int h_info = 0; + int* d_info = nullptr; hipFloatComplex* d_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - hipsolverErrcheck(hipsolverDnCheevd_bufferSize(hipsolver_handle, hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, reinterpret_cast(A), lda, W, &lwork)); + hipsolverErrcheck (hipsolverDnCheevd_bufferSize (hipsolver_handle, + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + W, + &lwork)); // allocate memery - hipErrcheck(hipMalloc((void**)&d_work, sizeof(hipFloatComplex) * lwork)); + hipErrcheck (hipMalloc ((void**)&d_work, sizeof (hipFloatComplex) * lwork)); // compute eigenvalues and eigenvectors. - hipsolverErrcheck(hipsolverDnCheevd(hipsolver_handle, hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, reinterpret_cast(A), lda, W, d_work, lwork, d_info)); + hipsolverErrcheck (hipsolverDnCheevd (hipsolver_handle, + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + W, + d_work, + lwork, + d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - hipErrcheck(hipFree(d_info)); - hipErrcheck(hipFree(d_work)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + hipErrcheck (hipFree (d_info)); + hipErrcheck (hipFree (d_work)); } -static inline -void heevd (hipsolverHandle_t& hipsolver_handle, const char& jobz, const char& uplo, const int& n, std::complex* A, const int& lda, double* W) +static inline void + heevd (hipsolverHandle_t& hipsolver_handle, + const char& jobz, + const char& uplo, + const int& n, + std::complex* A, + const int& lda, + double* W) { // prepare some values for hipsolverDnZhegvd_bufferSize - int lwork = 0; - int h_info = 0; - int* d_info = nullptr; + int lwork = 0; + int h_info = 0; + int* d_info = nullptr; hipDoubleComplex* d_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - hipsolverErrcheck(hipsolverDnZheevd_bufferSize(hipsolver_handle, hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, reinterpret_cast(A), lda, W, &lwork)); + hipsolverErrcheck (hipsolverDnZheevd_bufferSize (hipsolver_handle, + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + W, + &lwork)); // allocate memery - hipErrcheck(hipMalloc((void**)&d_work, sizeof(hipDoubleComplex) * lwork)); + hipErrcheck (hipMalloc ((void**)&d_work, sizeof (hipDoubleComplex) * lwork)); // compute eigenvalues and eigenvectors. - hipsolverErrcheck(hipsolverDnZheevd(hipsolver_handle, hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, reinterpret_cast(A), lda, W, d_work, lwork, d_info)); + hipsolverErrcheck (hipsolverDnZheevd (hipsolver_handle, + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + W, + d_work, + lwork, + d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - hipErrcheck(hipFree(d_info)); - hipErrcheck(hipFree(d_work)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + hipErrcheck (hipFree (d_info)); + hipErrcheck (hipFree (d_work)); } -static inline -void hegvd (hipsolverHandle_t& hipsolver_handle, const int& itype, const char& jobz, const char& uplo, const int& n, float* A, const int& lda, float* B, const int& ldb, float * W) +static inline void + hegvd (hipsolverHandle_t& hipsolver_handle, + const int& itype, + const char& jobz, + const char& uplo, + const int& n, + float* A, + const int& lda, + float* B, + const int& ldb, + float* W) { // prepare some values for hipsolverDnZhegvd_bufferSize - int lwork = 0; - int h_info = 0; - int* d_info = nullptr; + int lwork = 0; + int h_info = 0; + int* d_info = nullptr; float* d_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - hipsolverErrcheck(hipsolverDnSsygvd_bufferSize(hipsolver_handle, hipblas_eig_type(itype), hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, A, lda, B, ldb, W, &lwork)); + hipsolverErrcheck (hipsolverDnSsygvd_bufferSize (hipsolver_handle, + hipblas_eig_type (itype), + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + A, + lda, + B, + ldb, + W, + &lwork)); // allocate memery - hipErrcheck(hipMalloc((void**)&d_work, sizeof(float) * lwork)); + hipErrcheck (hipMalloc ((void**)&d_work, sizeof (float) * lwork)); // compute eigenvalues and eigenvectors. - hipsolverErrcheck(hipsolverDnSsygvd(hipsolver_handle, hipblas_eig_type(itype), hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, A, lda, B, ldb, W, d_work, lwork, d_info)); + hipsolverErrcheck (hipsolverDnSsygvd (hipsolver_handle, + hipblas_eig_type (itype), + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + A, + lda, + B, + ldb, + W, + d_work, + lwork, + d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - hipErrcheck(hipFree(d_info)); - hipErrcheck(hipFree(d_work)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + hipErrcheck (hipFree (d_info)); + hipErrcheck (hipFree (d_work)); } -static inline -void hegvd (hipsolverHandle_t& hipsolver_handle, const int& itype, const char& jobz, const char& uplo, const int& n, double* A, const int& lda, double* B, const int& ldb, double * W) +static inline void + hegvd (hipsolverHandle_t& hipsolver_handle, + const int& itype, + const char& jobz, + const char& uplo, + const int& n, + double* A, + const int& lda, + double* B, + const int& ldb, + double* W) { // prepare some values for hipsolverDnZhegvd_bufferSize - int lwork = 0; - int h_info = 0; - int* d_info = nullptr; + int lwork = 0; + int h_info = 0; + int* d_info = nullptr; double* d_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - hipsolverErrcheck(hipsolverDnDsygvd_bufferSize(hipsolver_handle, hipblas_eig_type(itype), hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, A, lda, B, ldb, W, &lwork)); + hipsolverErrcheck (hipsolverDnDsygvd_bufferSize (hipsolver_handle, + hipblas_eig_type (itype), + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + A, + lda, + B, + ldb, + W, + &lwork)); // allocate memery - hipErrcheck(hipMalloc((void**)&d_work, sizeof(double) * lwork)); + hipErrcheck (hipMalloc ((void**)&d_work, sizeof (double) * lwork)); // compute eigenvalues and eigenvectors. - hipsolverErrcheck(hipsolverDnDsygvd(hipsolver_handle, hipblas_eig_type(itype), hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, A, lda, B, ldb, W, d_work, lwork, d_info)); + hipsolverErrcheck (hipsolverDnDsygvd (hipsolver_handle, + hipblas_eig_type (itype), + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + A, + lda, + B, + ldb, + W, + d_work, + lwork, + d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - hipErrcheck(hipFree(d_info)); - hipErrcheck(hipFree(d_work)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + hipErrcheck (hipFree (d_info)); + hipErrcheck (hipFree (d_work)); } -static inline -void hegvd (hipsolverHandle_t& hipsolver_handle, const int& itype, const char& jobz, const char& uplo, const int& n, std::complex* A, const int& lda, std::complex* B, const int& ldb, float* W) +static inline void + hegvd (hipsolverHandle_t& hipsolver_handle, + const int& itype, + const char& jobz, + const char& uplo, + const int& n, + std::complex* A, + const int& lda, + std::complex* B, + const int& ldb, + float* W) { // prepare some values for hipsolverDnZhegvd_bufferSize - int lwork = 0; - int h_info = 0; - int* d_info = nullptr; + int lwork = 0; + int h_info = 0; + int* d_info = nullptr; hipFloatComplex* d_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - hipsolverErrcheck(hipsolverDnChegvd_bufferSize(hipsolver_handle, hipblas_eig_type(itype), hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, reinterpret_cast(A), lda, reinterpret_cast(B), ldb, W, &lwork)); + hipsolverErrcheck (hipsolverDnChegvd_bufferSize (hipsolver_handle, + hipblas_eig_type (itype), + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + W, + &lwork)); // allocate memery - hipErrcheck(hipMalloc((void**)&d_work, sizeof(hipFloatComplex) * lwork)); + hipErrcheck (hipMalloc ((void**)&d_work, sizeof (hipFloatComplex) * lwork)); // compute eigenvalues and eigenvectors. - hipsolverErrcheck(hipsolverDnChegvd(hipsolver_handle, hipblas_eig_type(itype), hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, reinterpret_cast(A), lda, reinterpret_cast(B), ldb, W, d_work, lwork, d_info)); + hipsolverErrcheck (hipsolverDnChegvd (hipsolver_handle, + hipblas_eig_type (itype), + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + W, + d_work, + lwork, + d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - hipErrcheck(hipFree(d_info)); - hipErrcheck(hipFree(d_work)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + hipErrcheck (hipFree (d_info)); + hipErrcheck (hipFree (d_work)); } -static inline -void hegvd (hipsolverHandle_t& hipsolver_handle, const int& itype, const char& jobz, const char& uplo, const int& n, std::complex* A, const int& lda, std::complex* B, const int& ldb, double* W) +static inline void + hegvd (hipsolverHandle_t& hipsolver_handle, + const int& itype, + const char& jobz, + const char& uplo, + const int& n, + std::complex* A, + const int& lda, + std::complex* B, + const int& ldb, + double* W) { // prepare some values for hipsolverDnZhegvd_bufferSize - int lwork = 0; - int h_info = 0; - int* d_info = nullptr; + int lwork = 0; + int h_info = 0; + int* d_info = nullptr; hipDoubleComplex* d_work = nullptr; - hipErrcheck(hipMalloc((void**)&d_info, sizeof(int))); + hipErrcheck (hipMalloc ((void**)&d_info, sizeof (int))); // calculate the sizes needed for pre-allocated buffer. - hipsolverErrcheck(hipsolverDnZhegvd_bufferSize(hipsolver_handle, hipblas_eig_type(itype), hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, reinterpret_cast(A), lda, reinterpret_cast(B), ldb, W, &lwork)); + hipsolverErrcheck (hipsolverDnZhegvd_bufferSize (hipsolver_handle, + hipblas_eig_type (itype), + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + W, + &lwork)); // allocate memery - hipErrcheck(hipMalloc((void**)&d_work, sizeof(hipDoubleComplex) * lwork)); + hipErrcheck (hipMalloc ((void**)&d_work, sizeof (hipDoubleComplex) * lwork)); // compute eigenvalues and eigenvectors. - hipsolverErrcheck(hipsolverDnZhegvd(hipsolver_handle, hipblas_eig_type(itype), hipblas_eig_mode(jobz), hipsolver_fill_mode(uplo), - n, reinterpret_cast(A), lda, reinterpret_cast(B), ldb, W, d_work, lwork, d_info)); + hipsolverErrcheck (hipsolverDnZhegvd (hipsolver_handle, + hipblas_eig_type (itype), + hipblas_eig_mode (jobz), + hipsolver_fill_mode (uplo), + n, + reinterpret_cast (A), + lda, + reinterpret_cast (B), + ldb, + W, + d_work, + lwork, + d_info)); - hipErrcheck(hipMemcpy(&h_info, d_info, sizeof(int), hipMemcpyDeviceToHost)); - if (h_info != 0) { - throw std::runtime_error("heevd: failed to invert matrix"); - } - hipErrcheck(hipFree(d_info)); - hipErrcheck(hipFree(d_work)); + hipErrcheck (hipMemcpy (&h_info, d_info, sizeof (int), hipMemcpyDeviceToHost)); + if (h_info != 0) + { + throw std::runtime_error ("heevd: failed to invert matrix"); + } + hipErrcheck (hipFree (d_info)); + hipErrcheck (hipFree (d_work)); } } // namespace hipSolverConnector diff --git a/source/source_base/module_container/base/third_party/lapack.h b/source/source_base/module_container/base/third_party/lapack.h index 34881055fd1..d7c20b193b2 100644 --- a/source/source_base/module_container/base/third_party/lapack.h +++ b/source/source_base/module_container/base/third_party/lapack.h @@ -18,7 +18,6 @@ #include - #if defined(__CUDA) #include #elif defined(__ROCM) @@ -29,253 +28,583 @@ /// Direct wrapping of standard LAPACK routines. (column major, fortran style) /// with some slight modification to fit the C++ style for overloading purpose. -//Naming convention of lapack subroutines : ammxxx, where +// Naming convention of lapack subroutines : ammxxx, where //"a" specifies the data type: -// - d stands for double -// - z stands for complex double +// - d stands for double +// - z stands for complex double //"mm" specifies the type of matrix, for example: -// - he stands for hermitian -// - sy stands for symmetric +// - he stands for hermitian +// - sy stands for symmetric //"xxx" specifies the type of problem, for example: -// - gv stands for generalized eigenvalue +// - gv stands for generalized eigenvalue extern "C" { -// ILAENV - environment inquiry -int ilaenv_(const int* ispec, const char* name, const char* opts, - const int* n1, const int* n2, const int* n3, const int* n4); - -// Generalized symmetric-definite eigenproblems (divide-and-conquer) -void ssygvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - float* a, const int* lda, - float* b, const int* ldb, - float* w, - float* work, const int* lwork, - int* iwork, const int* liwork, - int* info); - -void dsygvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - double* a, const int* lda, - double* b, const int* ldb, - double* w, - double* work, const int* lwork, - int* iwork, const int* liwork, - int* info); - -void chegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - std::complex* b, const int* ldb, - float* w, - std::complex* work, const int* lwork, - float* rwork, const int* lrwork, - int* iwork, const int* liwork, - int* info); - -void zhegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - std::complex* b, const int* ldb, - double* w, - std::complex* work, const int* lwork, - double* rwork, const int* lrwork, - int* iwork, const int* liwork, - int* info); - -// Generalized symmetric-definite eigenproblems (selected eigenvalues/vectors) -void ssygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, float* A, const int* lda, float* B, const int* ldb, - const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, int* m, float* w, float* Z, const int* ldz, - float* work, const int* lwork, int* iwork, int* ifail, int* info); - -void dsygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, double* A, const int* lda, double* B, const int* ldb, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, int* m, double* w, double* Z, const int* ldz, - double* work, const int* lwork, int* iwork, int* ifail, int* info); - -void chegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, std::complex* A, const int* lda, std::complex* B, const int* ldb, - const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, int* m, float* w, std::complex* Z, const int* ldz, - std::complex* work, const int* lwork, float* rwork, int* iwork, int* ifail, int* info); - -void zhegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, std::complex* A, const int* lda, std::complex* B, const int* ldb, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, int* m, double* w, std::complex* Z, const int* ldz, - std::complex* work, const int* lwork, double* rwork, int* iwork, int* ifail, int* info); - -// Standard symmetric eigenproblems (selected) -void ssyevx_(const char* jobz, const char* range, const char* uplo, const int* n, - float* a, const int* lda, - const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, int* m, float* w, float* z, const int* ldz, - float* work, const int* lwork, int* iwork, int* ifail, int* info); - -void dsyevx_(const char* jobz, const char* range, const char* uplo, const int* n, - double* a, const int* lda, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, int* m, double* w, double* z, const int* ldz, - double* work, const int* lwork, int* iwork, int* ifail, int* info); - -void cheevx_(const char* jobz, const char* range, const char* uplo, const int* n, - std::complex* a, const int* lda, - const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, int* m, float* w, std::complex* z, const int* ldz, - std::complex* work, const int* lwork, float* rwork, int* iwork, int* ifail, int* info); - -void zheevx_(const char* jobz, const char* range, const char* uplo, const int* n, - std::complex* a, const int* lda, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, int* m, double* w, std::complex* z, const int* ldz, - std::complex* work, const int* lwork, double* rwork, int* iwork, int* ifail, int* info); - -// Standard symmetric eigenproblems (divide-and-conquer) -void ssyevd_(const char* jobz, const char* uplo, const int* n, - float* a, const int* lda, float* w, - float* work, const int* lwork, - int* iwork, const int* liwork, int* info); - -void dsyevd_(const char* jobz, const char* uplo, const int* n, - double* a, const int* lda, double* w, - double* work, const int* lwork, - int* iwork, const int* liwork, int* info); - -void cheevd_(const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, float* w, - std::complex* work, const int* lwork, float* rwork, const int* lrwork, - int* iwork, const int* liwork, int* info); - -void zheevd_(const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, double* w, - std::complex* work, const int* lwork, double* rwork, const int* lrwork, - int* iwork, const int* liwork, int* info); - -// Cholesky factorization -void spotrf_(const char* uplo, const int* n, float* A, const int* lda, int* info); -void dpotrf_(const char* uplo, const int* n, double* A, const int* lda, int* info); -void cpotrf_(const char* uplo, const int* n, std::complex* A, const int* lda, int* info); -void zpotrf_(const char* uplo, const int* n, std::complex* A, const int* lda, int* info); - -// Inverse using Cholesky factorization -void spotri_(const char* uplo, const int* n, float* A, const int* lda, int* info); -void dpotri_(const char* uplo, const int* n, double* A, const int* lda, int* info); -void cpotri_(const char* uplo, const int* n, std::complex* A, const int* lda, int* info); -void zpotri_(const char* uplo, const int* n, std::complex* A, const int* lda, int* info); - -// Inverse of triangular matrix -void strtri_(const char* uplo, const char* diag, const int* n, float* a, const int* lda, int* info); -void dtrtri_(const char* uplo, const char* diag, const int* n, double* a, const int* lda, int* info); -void ctrtri_(const char* uplo, const char* diag, const int* n, std::complex* a, const int* lda, int* info); -void ztrtri_(const char* uplo, const char* diag, const int* n, std::complex* a, const int* lda, int* info); - -// LU factorization -void sgetrf_(const int* m, const int* n, float* a, const int* lda, int* ipiv, int* info); -void dgetrf_(const int* m, const int* n, double* a, const int* lda, int* ipiv, int* info); -void cgetrf_(const int* m, const int* n, std::complex* a, const int* lda, int* ipiv, int* info); -void zgetrf_(const int* m, const int* n, std::complex* a, const int* lda, int* ipiv, int* info); - -// Inverse using LU factorization -void sgetri_(const int* n, float* A, const int* lda, const int* ipiv, float* work, const int* lwork, int* info); -void dgetri_(const int* n, double* A, const int* lda, const int* ipiv, double* work, const int* lwork, int* info); -void cgetri_(const int* n, std::complex* A, const int* lda, const int* ipiv, std::complex* work, const int* lwork, int* info); -void zgetri_(const int* n, std::complex* A, const int* lda, const int* ipiv, std::complex* work, const int* lwork, int* info); - -// Solve linear system using LU factorization -void sgetrs_(const char* trans, const int* n, const int* nrhs, - const float* A, const int* lda, const int* ipiv, - float* B, const int* ldb, int* info); -void dgetrs_(const char* trans, const int* n, const int* nrhs, - const double* A, const int* lda, const int* ipiv, - double* B, const int* ldb, int* info); -void cgetrs_(const char* trans, const int* n, const int* nrhs, - const std::complex* A, const int* lda, const int* ipiv, - std::complex* B, const int* ldb, int* info); -void zgetrs_(const char* trans, const int* n, const int* nrhs, - const std::complex* A, const int* lda, const int* ipiv, - std::complex* B, const int* ldb, int* info); - -// QR factorization -// build R and Householder -void sgeqrf_(const int* m, const int* n, float* A, const int* lda, float* tau, float *work, const int* lwork, int* info); -void dgeqrf_(const int* m, const int* n, double* A, const int* lda, double* tau, double *work, const int* lwork, int* info); -void cgeqrf_(const int* m, const int* n, std::complex* A, const int* lda, std::complex* tau, std::complex *work, const int* lwork, int* info); -void zgeqrf_(const int* m, const int* n, std::complex* A, const int* lda, std::complex* tau, std::complex *work, const int* lwork, int* info); -// make explicit Q -void sorgqr_(const int* m, const int* n, const int* k, float* A, const int* lda, const float* tau, float* work, const int* lwork, int* info); -void dorgqr_(const int* m, const int* n, const int* k, double* A, const int* lda, const double* tau, double* work, const int* lwork, int* info); -void cungqr_(const int* m, const int* n, const int* k, std::complex* A, const int* lda, const std::complex* tau, std::complex *work, const int* lwork, int* info); -void zungqr_(const int* m, const int* n, const int* k, std::complex* A, const int* lda, const std::complex* tau, std::complex *work, const int* lwork, int* info); + // ILAENV - environment inquiry + int ilaenv_ (const int* ispec, + const char* name, + const char* opts, + const int* n1, + const int* n2, + const int* n3, + const int* n4); + + // Generalized symmetric-definite eigenproblems (divide-and-conquer) + void ssygvd_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + float* a, + const int* lda, + float* b, + const int* ldb, + float* w, + float* work, + const int* lwork, + int* iwork, + const int* liwork, + int* info); + + void dsygvd_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + double* a, + const int* lda, + double* b, + const int* ldb, + double* w, + double* work, + const int* lwork, + int* iwork, + const int* liwork, + int* info); + + void chegvd_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + float* w, + std::complex* work, + const int* lwork, + float* rwork, + const int* lrwork, + int* iwork, + const int* liwork, + int* info); + + void zhegvd_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + double* w, + std::complex* work, + const int* lwork, + double* rwork, + const int* lrwork, + int* iwork, + const int* liwork, + int* info); + + // Generalized symmetric-definite eigenproblems (selected eigenvalues/vectors) + void ssygvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + float* A, + const int* lda, + float* B, + const int* ldb, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + float* w, + float* Z, + const int* ldz, + float* work, + const int* lwork, + int* iwork, + int* ifail, + int* info); + + void dsygvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + double* A, + const int* lda, + double* B, + const int* ldb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + double* Z, + const int* ldz, + double* work, + const int* lwork, + int* iwork, + int* ifail, + int* info); + + void chegvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* A, + const int* lda, + std::complex* B, + const int* ldb, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + float* w, + std::complex* Z, + const int* ldz, + std::complex* work, + const int* lwork, + float* rwork, + int* iwork, + int* ifail, + int* info); + + void zhegvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* A, + const int* lda, + std::complex* B, + const int* ldb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + std::complex* Z, + const int* ldz, + std::complex* work, + const int* lwork, + double* rwork, + int* iwork, + int* ifail, + int* info); + + // Standard symmetric eigenproblems (selected) + void ssyevx_ (const char* jobz, + const char* range, + const char* uplo, + const int* n, + float* a, + const int* lda, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + float* w, + float* z, + const int* ldz, + float* work, + const int* lwork, + int* iwork, + int* ifail, + int* info); + + void dsyevx_ (const char* jobz, + const char* range, + const char* uplo, + const int* n, + double* a, + const int* lda, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + double* z, + const int* ldz, + double* work, + const int* lwork, + int* iwork, + int* ifail, + int* info); + + void cheevx_ (const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + float* w, + std::complex* z, + const int* ldz, + std::complex* work, + const int* lwork, + float* rwork, + int* iwork, + int* ifail, + int* info); + + void zheevx_ (const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + std::complex* z, + const int* ldz, + std::complex* work, + const int* lwork, + double* rwork, + int* iwork, + int* ifail, + int* info); + + // Standard symmetric eigenproblems (divide-and-conquer) + void ssyevd_ (const char* jobz, + const char* uplo, + const int* n, + float* a, + const int* lda, + float* w, + float* work, + const int* lwork, + int* iwork, + const int* liwork, + int* info); + + void dsyevd_ (const char* jobz, + const char* uplo, + const int* n, + double* a, + const int* lda, + double* w, + double* work, + const int* lwork, + int* iwork, + const int* liwork, + int* info); + + void cheevd_ (const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + float* w, + std::complex* work, + const int* lwork, + float* rwork, + const int* lrwork, + int* iwork, + const int* liwork, + int* info); + + void zheevd_ (const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + double* w, + std::complex* work, + const int* lwork, + double* rwork, + const int* lrwork, + int* iwork, + const int* liwork, + int* info); + + // Cholesky factorization + void spotrf_ (const char* uplo, const int* n, float* A, const int* lda, int* info); + void dpotrf_ (const char* uplo, const int* n, double* A, const int* lda, int* info); + void cpotrf_ (const char* uplo, const int* n, std::complex* A, const int* lda, int* info); + void zpotrf_ (const char* uplo, const int* n, std::complex* A, const int* lda, int* info); + + // Inverse using Cholesky factorization + void spotri_ (const char* uplo, const int* n, float* A, const int* lda, int* info); + void dpotri_ (const char* uplo, const int* n, double* A, const int* lda, int* info); + void cpotri_ (const char* uplo, const int* n, std::complex* A, const int* lda, int* info); + void zpotri_ (const char* uplo, const int* n, std::complex* A, const int* lda, int* info); + // Inverse of triangular matrix + void strtri_ (const char* uplo, const char* diag, const int* n, float* a, const int* lda, int* info); + void dtrtri_ (const char* uplo, const char* diag, const int* n, double* a, const int* lda, int* info); + void ctrtri_ (const char* uplo, const char* diag, const int* n, std::complex* a, const int* lda, int* info); + void ztrtri_ (const char* uplo, const char* diag, const int* n, std::complex* a, const int* lda, int* info); + + // LU factorization + void sgetrf_ (const int* m, const int* n, float* a, const int* lda, int* ipiv, int* info); + void dgetrf_ (const int* m, const int* n, double* a, const int* lda, int* ipiv, int* info); + void cgetrf_ (const int* m, const int* n, std::complex* a, const int* lda, int* ipiv, int* info); + void zgetrf_ (const int* m, const int* n, std::complex* a, const int* lda, int* ipiv, int* info); + + // Inverse using LU factorization + void sgetri_ (const int* n, float* A, const int* lda, const int* ipiv, float* work, const int* lwork, int* info); + void dgetri_ (const int* n, double* A, const int* lda, const int* ipiv, double* work, const int* lwork, int* info); + void cgetri_ (const int* n, + std::complex* A, + const int* lda, + const int* ipiv, + std::complex* work, + const int* lwork, + int* info); + void zgetri_ (const int* n, + std::complex* A, + const int* lda, + const int* ipiv, + std::complex* work, + const int* lwork, + int* info); + + // Solve linear system using LU factorization + void sgetrs_ (const char* trans, + const int* n, + const int* nrhs, + const float* A, + const int* lda, + const int* ipiv, + float* B, + const int* ldb, + int* info); + void dgetrs_ (const char* trans, + const int* n, + const int* nrhs, + const double* A, + const int* lda, + const int* ipiv, + double* B, + const int* ldb, + int* info); + void cgetrs_ (const char* trans, + const int* n, + const int* nrhs, + const std::complex* A, + const int* lda, + const int* ipiv, + std::complex* B, + const int* ldb, + int* info); + void zgetrs_ (const char* trans, + const int* n, + const int* nrhs, + const std::complex* A, + const int* lda, + const int* ipiv, + std::complex* B, + const int* ldb, + int* info); + + // QR factorization + // build R and Householder + void sgeqrf_ (const int* m, + const int* n, + float* A, + const int* lda, + float* tau, + float* work, + const int* lwork, + int* info); + void dgeqrf_ (const int* m, + const int* n, + double* A, + const int* lda, + double* tau, + double* work, + const int* lwork, + int* info); + void cgeqrf_ (const int* m, + const int* n, + std::complex* A, + const int* lda, + std::complex* tau, + std::complex* work, + const int* lwork, + int* info); + void zgeqrf_ (const int* m, + const int* n, + std::complex* A, + const int* lda, + std::complex* tau, + std::complex* work, + const int* lwork, + int* info); + // make explicit Q + void sorgqr_ (const int* m, + const int* n, + const int* k, + float* A, + const int* lda, + const float* tau, + float* work, + const int* lwork, + int* info); + void dorgqr_ (const int* m, + const int* n, + const int* k, + double* A, + const int* lda, + const double* tau, + double* work, + const int* lwork, + int* info); + void cungqr_ (const int* m, + const int* n, + const int* k, + std::complex* A, + const int* lda, + const std::complex* tau, + std::complex* work, + const int* lwork, + int* info); + void zungqr_ (const int* m, + const int* n, + const int* k, + std::complex* A, + const int* lda, + const std::complex* tau, + std::complex* work, + const int* lwork, + int* info); } // Class LapackConnector provide the connector to fortran lapack routine. // The entire function in this class are static and inline function. // Usage example: LapackConnector::functionname(parameter list). -namespace container { +namespace container +{ namespace lapackConnector { -static inline -int ilaenv( int ispec, const char *name,const char *opts,const int n1,const int n2, - const int n3,const int n4) +static inline int + ilaenv (int ispec, const char* name, const char* opts, const int n1, const int n2, const int n3, const int n4) { - const int nb = ilaenv_(&ispec, name, opts, &n1, &n2, &n3, &n4); + const int nb = ilaenv_ (&ispec, name, opts, &n1, &n2, &n3, &n4); return nb; } // wrap function of fortran lapack routine zhegvd. (pointer version) -static inline -void hegvd(const int itype, const char jobz, const char uplo, const int n, - float* a, const int lda, - float* b, const int ldb, float* w, - float* work, int lwork, float* rwork, int lrwork, - int* iwork, int liwork, int info) +static inline void + hegvd (const int itype, + const char jobz, + const char uplo, + const int n, + float* a, + const int lda, + float* b, + const int ldb, + float* w, + float* work, + int lwork, + float* rwork, + int lrwork, + int* iwork, + int liwork, + int info) { // call the fortran routine - ssygvd_(&itype, &jobz, &uplo, &n, - a, &lda, b, &ldb, w, - work, &lwork, - iwork, &liwork, &info); + ssygvd_ (&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, iwork, &liwork, &info); } // wrap function of fortran lapack routine zhegvd. -static inline -void hegvd(const int itype, const char jobz, const char uplo, const int n, - double* a, const int lda, - double* b, const int ldb, double* w, - double* work, int lwork, double* rwork, int lrwork, - int* iwork, int liwork, int info) +static inline void + hegvd (const int itype, + const char jobz, + const char uplo, + const int n, + double* a, + const int lda, + double* b, + const int ldb, + double* w, + double* work, + int lwork, + double* rwork, + int lrwork, + int* iwork, + int liwork, + int info) { // call the fortran routine - dsygvd_(&itype, &jobz, &uplo, &n, - a, &lda, b, &ldb, w, - work, &lwork, - iwork, &liwork, &info); -} -static inline -void hegvd(const int itype, const char jobz, const char uplo, const int n, - std::complex* a, const int lda, - std::complex* b, const int ldb, float* w, - std::complex* work, int lwork, float* rwork, int lrwork, - int* iwork, int liwork, int info) + dsygvd_ (&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, iwork, &liwork, &info); +} +static inline void + hegvd (const int itype, + const char jobz, + const char uplo, + const int n, + std::complex* a, + const int lda, + std::complex* b, + const int ldb, + float* w, + std::complex* work, + int lwork, + float* rwork, + int lrwork, + int* iwork, + int liwork, + int info) { // call the fortran routine - chegvd_(&itype, &jobz, &uplo, &n, - a, &lda, b, &ldb, w, - work, &lwork, rwork, &lrwork, - iwork, &liwork, &info); + chegvd_ (&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); } // wrap function of fortran lapack routine zhegvd. -static inline -void hegvd(const int itype, const char jobz, const char uplo, const int n, - std::complex* a, const int lda, - std::complex* b, const int ldb, double* w, - std::complex* work, int lwork, double* rwork, int lrwork, - int* iwork, int liwork, int info) +static inline void + hegvd (const int itype, + const char jobz, + const char uplo, + const int n, + std::complex* a, + const int lda, + std::complex* b, + const int ldb, + double* w, + std::complex* work, + int lwork, + double* rwork, + int lrwork, + int* iwork, + int liwork, + int info) { // call the fortran routine - zhegvd_(&itype, &jobz, &uplo, &n, - a, &lda, b, &ldb, w, - work, &lwork, rwork, &lrwork, - iwork, &liwork, &info); + zhegvd_ (&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); } // Note @@ -283,315 +612,708 @@ void hegvd(const int itype, const char jobz, const char uplo, const int n, // and we include rwork in the function parameter list // for simplicity of function overloading // and unification of function parameter list -static inline -void hegvx(const int itype, const char jobz, const char range, const char uplo, const int n, - float* a, const int lda, float* b, const int ldb, - const float vl, const float vu, const int il, const int iu, const float abstol, - int m, float* w, float* z, const int ldz, - float* work, const int lwork, float* rwork, int* iwork, int* ifail, int& info) -{ - ssygvx_(&itype, &jobz, &range, &uplo, &n, - a, &lda, b, &ldb, - &vl, &vu, &il, &iu, - &abstol, &m, w, z, &ldz, - work, &lwork, iwork, ifail, &info); -} - -static inline -void hegvx(const int itype, const char jobz, const char range, const char uplo, const int n, - double* a, const int lda, double* b, const int ldb, - const double vl, const double vu, const int il, const int iu, const double abstol, - int m, double* w, double* z, const int ldz, - double* work, const int lwork, double* rwork, int* iwork, int* ifail, int& info) -{ - dsygvx_(&itype, &jobz, &range, &uplo, &n, - a, &lda, b, &ldb, - &vl, &vu, &il, &iu, - &abstol, &m, w, z, &ldz, - work, &lwork, iwork, ifail, &info); -} - -static inline -void hegvx(const int itype, const char jobz, const char range, const char uplo, const int n, - std::complex* a, const int lda, std::complex* b, const int ldb, - const float vl, const float vu, const int il, const int iu, const float abstol, - int m, float* w, std::complex* z, const int ldz, - std::complex* work, const int lwork, float* rwork, int* iwork, int* ifail, int& info) -{ - chegvx_(&itype, &jobz, &range, &uplo, &n, - a, &lda, b, &ldb, - &vl, &vu, &il, &iu, - &abstol, &m, w, z, &ldz, - work, &lwork, rwork, iwork, ifail, &info); -} - -static inline -void hegvx(const int itype, const char jobz, const char range, const char uplo, const int n, - std::complex* a, const int lda, std::complex* b, const int ldb, - const double vl, const double vu, const int il, const int iu, const double abstol, - int m, double* w, std::complex* z, const int ldz, - std::complex* work, const int lwork, double* rwork, int* iwork, int* ifail, int& info) -{ - zhegvx_(&itype, &jobz, &range, &uplo, &n, - a, &lda, b, &ldb, - &vl, &vu, &il, &iu, - &abstol, &m, w, z, &ldz, - work, &lwork, rwork, iwork, ifail, &info); +static inline void + hegvx (const int itype, + const char jobz, + const char range, + const char uplo, + const int n, + float* a, + const int lda, + float* b, + const int ldb, + const float vl, + const float vu, + const int il, + const int iu, + const float abstol, + int m, + float* w, + float* z, + const int ldz, + float* work, + const int lwork, + float* rwork, + int* iwork, + int* ifail, + int& info) +{ + ssygvx_ (&itype, + &jobz, + &range, + &uplo, + &n, + a, + &lda, + b, + &ldb, + &vl, + &vu, + &il, + &iu, + &abstol, + &m, + w, + z, + &ldz, + work, + &lwork, + iwork, + ifail, + &info); } +static inline void + hegvx (const int itype, + const char jobz, + const char range, + const char uplo, + const int n, + double* a, + const int lda, + double* b, + const int ldb, + const double vl, + const double vu, + const int il, + const int iu, + const double abstol, + int m, + double* w, + double* z, + const int ldz, + double* work, + const int lwork, + double* rwork, + int* iwork, + int* ifail, + int& info) +{ + dsygvx_ (&itype, + &jobz, + &range, + &uplo, + &n, + a, + &lda, + b, + &ldb, + &vl, + &vu, + &il, + &iu, + &abstol, + &m, + w, + z, + &ldz, + work, + &lwork, + iwork, + ifail, + &info); +} + +static inline void + hegvx (const int itype, + const char jobz, + const char range, + const char uplo, + const int n, + std::complex* a, + const int lda, + std::complex* b, + const int ldb, + const float vl, + const float vu, + const int il, + const int iu, + const float abstol, + int m, + float* w, + std::complex* z, + const int ldz, + std::complex* work, + const int lwork, + float* rwork, + int* iwork, + int* ifail, + int& info) +{ + chegvx_ (&itype, + &jobz, + &range, + &uplo, + &n, + a, + &lda, + b, + &ldb, + &vl, + &vu, + &il, + &iu, + &abstol, + &m, + w, + z, + &ldz, + work, + &lwork, + rwork, + iwork, + ifail, + &info); +} + +static inline void + hegvx (const int itype, + const char jobz, + const char range, + const char uplo, + const int n, + std::complex* a, + const int lda, + std::complex* b, + const int ldb, + const double vl, + const double vu, + const int il, + const int iu, + const double abstol, + int m, + double* w, + std::complex* z, + const int ldz, + std::complex* work, + const int lwork, + double* rwork, + int* iwork, + int* ifail, + int& info) +{ + zhegvx_ (&itype, + &jobz, + &range, + &uplo, + &n, + a, + &lda, + b, + &ldb, + &vl, + &vu, + &il, + &iu, + &abstol, + &m, + w, + z, + &ldz, + work, + &lwork, + rwork, + iwork, + ifail, + &info); +} // wrap function of fortran lapack routine zheevx. -static inline -void heevx(const char jobz, const char range, const char uplo, const int n, - float* a, const int lda, - const float vl, const float vu, const int il, const int iu, const float abstol, - int m, float* w, float* z, const int ldz, - float* work, const int lwork, float* rwork, int* iwork, int* ifail, int info) -{ - ssyevx_(&jobz, &range, &uplo, &n, - a, &lda, &vl, &vu, &il, &iu, - &abstol, &m, w, z, &ldz, - work, &lwork, iwork, ifail, &info); +static inline void + heevx (const char jobz, + const char range, + const char uplo, + const int n, + float* a, + const int lda, + const float vl, + const float vu, + const int il, + const int iu, + const float abstol, + int m, + float* w, + float* z, + const int ldz, + float* work, + const int lwork, + float* rwork, + int* iwork, + int* ifail, + int info) +{ + ssyevx_ (&jobz, + &range, + &uplo, + &n, + a, + &lda, + &vl, + &vu, + &il, + &iu, + &abstol, + &m, + w, + z, + &ldz, + work, + &lwork, + iwork, + ifail, + &info); } // wrap function of fortran lapack routine zheevx. -static inline -void heevx(const char jobz, const char range, const char uplo, const int n, - double* a, const int lda, - const double vl, const double vu, const int il, const int iu, const double abstol, - int m, double* w, double* z, const int ldz, - double* work, const int lwork, double* rwork, int* iwork, int* ifail, int info) -{ - dsyevx_(&jobz, &range, &uplo, &n, - a, &lda, &vl, &vu, &il, &iu, - &abstol, &m, w, z, &ldz, - work, &lwork, iwork, ifail, &info); -} -static inline -void heevx(const char jobz, const char range, const char uplo, const int n, - std::complex* a, const int lda, - const float vl, const float vu, const int il, const int iu, const float abstol, - int m, float* w, std::complex* z, const int ldz, - std::complex* work, const int lwork, float* rwork, int* iwork, int* ifail, int info) -{ - cheevx_(&jobz, &range, &uplo, &n, - a, &lda, &vl, &vu, &il, &iu, - &abstol, &m, w, z, &ldz, - work, &lwork, rwork, iwork, ifail, &info); +static inline void + heevx (const char jobz, + const char range, + const char uplo, + const int n, + double* a, + const int lda, + const double vl, + const double vu, + const int il, + const int iu, + const double abstol, + int m, + double* w, + double* z, + const int ldz, + double* work, + const int lwork, + double* rwork, + int* iwork, + int* ifail, + int info) +{ + dsyevx_ (&jobz, + &range, + &uplo, + &n, + a, + &lda, + &vl, + &vu, + &il, + &iu, + &abstol, + &m, + w, + z, + &ldz, + work, + &lwork, + iwork, + ifail, + &info); +} +static inline void + heevx (const char jobz, + const char range, + const char uplo, + const int n, + std::complex* a, + const int lda, + const float vl, + const float vu, + const int il, + const int iu, + const float abstol, + int m, + float* w, + std::complex* z, + const int ldz, + std::complex* work, + const int lwork, + float* rwork, + int* iwork, + int* ifail, + int info) +{ + cheevx_ (&jobz, + &range, + &uplo, + &n, + a, + &lda, + &vl, + &vu, + &il, + &iu, + &abstol, + &m, + w, + z, + &ldz, + work, + &lwork, + rwork, + iwork, + ifail, + &info); } // wrap function of fortran lapack routine zheevx. -static inline -void heevx(const char jobz, const char range, const char uplo, const int n, - std::complex* a, const int lda, - const double vl, const double vu, const int il, const int iu, const double abstol, - int m, double* w, std::complex* z, const int ldz, - std::complex* work, const int lwork, double* rwork, int* iwork, int* ifail, int info) +static inline void + heevx (const char jobz, + const char range, + const char uplo, + const int n, + std::complex* a, + const int lda, + const double vl, + const double vu, + const int il, + const int iu, + const double abstol, + int m, + double* w, + std::complex* z, + const int ldz, + std::complex* work, + const int lwork, + double* rwork, + int* iwork, + int* ifail, + int info) { - zheevx_(&jobz, &range, &uplo, &n, - a, &lda, &vl, &vu, &il, &iu, - &abstol, &m, w, z, &ldz, - work, &lwork, rwork, iwork, ifail, &info); + zheevx_ (&jobz, + &range, + &uplo, + &n, + a, + &lda, + &vl, + &vu, + &il, + &iu, + &abstol, + &m, + w, + z, + &ldz, + work, + &lwork, + rwork, + iwork, + ifail, + &info); } -static inline -void heevd(const char jobz, const char uplo, const int n, - float* a, const int lda, float* w, - float* work, int lwork, float* rwork, int lrwork, - int* iwork, int liwork, int& info) +static inline void + heevd (const char jobz, + const char uplo, + const int n, + float* a, + const int lda, + float* w, + float* work, + int lwork, + float* rwork, + int lrwork, + int* iwork, + int liwork, + int& info) { // call the fortran routine - ssyevd_( &jobz, &uplo, &n, - a, &lda, w, - work, &lwork, - iwork, &liwork, &info); + ssyevd_ (&jobz, &uplo, &n, a, &lda, w, work, &lwork, iwork, &liwork, &info); } // wrap function of fortran lapack routine zhegvd. -static inline -void heevd(const char jobz, const char uplo, const int n, - double* a, const int lda, double* w, - double* work, int lwork, double* rwork, int lrwork, - int* iwork, int liwork, int& info) +static inline void + heevd (const char jobz, + const char uplo, + const int n, + double* a, + const int lda, + double* w, + double* work, + int lwork, + double* rwork, + int lrwork, + int* iwork, + int liwork, + int& info) { // call the fortran routine - dsyevd_( &jobz, &uplo, &n, - a, &lda, w, - work, &lwork, - iwork, &liwork, &info); + dsyevd_ (&jobz, &uplo, &n, a, &lda, w, work, &lwork, iwork, &liwork, &info); } -static inline -void heevd(const char jobz, const char uplo, const int n, - std::complex* a, const int lda, float* w, - std::complex* work, int lwork, float* rwork, int lrwork, - int* iwork, int liwork, int& info) +static inline void + heevd (const char jobz, + const char uplo, + const int n, + std::complex* a, + const int lda, + float* w, + std::complex* work, + int lwork, + float* rwork, + int lrwork, + int* iwork, + int liwork, + int& info) { // call the fortran routine - cheevd_( &jobz, &uplo, &n, - a, &lda, w, - work, &lwork, rwork, &lrwork, - iwork, &liwork, &info); + cheevd_ (&jobz, &uplo, &n, a, &lda, w, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); } // wrap function of fortran lapack routine zhegvd. -static inline -void heevd(const char jobz, const char uplo, const int n, - std::complex* a, const int lda, double* w, - std::complex* work, int lwork, double* rwork, int lrwork, - int* iwork, int liwork, int& info) +static inline void + heevd (const char jobz, + const char uplo, + const int n, + std::complex* a, + const int lda, + double* w, + std::complex* work, + int lwork, + double* rwork, + int lrwork, + int* iwork, + int liwork, + int& info) { // call the fortran routine - zheevd_( &jobz, &uplo, &n, - a, &lda, w, - work, &lwork, rwork, &lrwork, - iwork, &liwork, &info); + zheevd_ (&jobz, &uplo, &n, a, &lda, w, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); } -static inline -void potrf( const char &uplo, const int &n, float* A, const int &lda, int &info ) +static inline void + potrf (const char& uplo, const int& n, float* A, const int& lda, int& info) { - spotrf_(&uplo, &n, A, &lda, &info ); + spotrf_ (&uplo, &n, A, &lda, &info); } -static inline -void potrf( const char &uplo, const int &n, double* A, const int &lda, int &info ) +static inline void + potrf (const char& uplo, const int& n, double* A, const int& lda, int& info) { - dpotrf_(&uplo, &n, A, &lda, &info ); + dpotrf_ (&uplo, &n, A, &lda, &info); } -static inline -void potrf( const char &uplo, const int &n, std::complex* A, const int &lda, int &info ) +static inline void + potrf (const char& uplo, const int& n, std::complex* A, const int& lda, int& info) { - cpotrf_(&uplo, &n, A, &lda, &info ); + cpotrf_ (&uplo, &n, A, &lda, &info); } -static inline -void potrf( const char &uplo, const int &n, std::complex* A, const int &lda, int &info ) +static inline void + potrf (const char& uplo, const int& n, std::complex* A, const int& lda, int& info) { - zpotrf_( &uplo, &n, A, &lda, &info ); + zpotrf_ (&uplo, &n, A, &lda, &info); } -static inline -void trtri( const char &uplo, const char &diag, const int &n, float* A, const int &lda, int &info ) +static inline void + trtri (const char& uplo, const char& diag, const int& n, float* A, const int& lda, int& info) { - strtri_( &uplo, &diag, &n, A, &lda, &info); + strtri_ (&uplo, &diag, &n, A, &lda, &info); } -static inline -void trtri( const char &uplo, const char &diag, const int &n, double* A, const int &lda, int &info) +static inline void + trtri (const char& uplo, const char& diag, const int& n, double* A, const int& lda, int& info) { - dtrtri_( &uplo, &diag, &n, A, &lda, &info); + dtrtri_ (&uplo, &diag, &n, A, &lda, &info); } -static inline -void trtri( const char &uplo, const char &diag, const int &n, std::complex* A, const int &lda, int &info ) +static inline void + trtri (const char& uplo, const char& diag, const int& n, std::complex* A, const int& lda, int& info) { - ctrtri_( &uplo, &diag, &n, A, &lda, &info); + ctrtri_ (&uplo, &diag, &n, A, &lda, &info); } -static inline -void trtri( const char &uplo, const char &diag, const int &n, std::complex* A, const int &lda, int &info) +static inline void + trtri (const char& uplo, const char& diag, const int& n, std::complex* A, const int& lda, int& info) { - ztrtri_( &uplo, &diag, &n, A, &lda, &info); + ztrtri_ (&uplo, &diag, &n, A, &lda, &info); } -static inline -void getrf(const int m, const int n, float* A, const int lda, int* ipiv, int &info) +static inline void + getrf (const int m, const int n, float* A, const int lda, int* ipiv, int& info) { - sgetrf_(&m, &n, A, &lda, ipiv, &info); + sgetrf_ (&m, &n, A, &lda, ipiv, &info); } -static inline -void getrf(const int m, const int n, double* A, const int lda, int* ipiv, int &info) +static inline void + getrf (const int m, const int n, double* A, const int lda, int* ipiv, int& info) { - dgetrf_(&m, &n, A, &lda, ipiv, &info); + dgetrf_ (&m, &n, A, &lda, ipiv, &info); } -static inline -void getrf(const int m, const int n, std::complex* A, const int lda, int* ipiv, int &info) +static inline void + getrf (const int m, const int n, std::complex* A, const int lda, int* ipiv, int& info) { - cgetrf_(&m, &n, A, &lda, ipiv, &info); + cgetrf_ (&m, &n, A, &lda, ipiv, &info); } -static inline -void getrf(const int m, const int n, std::complex* A, const int lda, int* ipiv, int &info) +static inline void + getrf (const int m, const int n, std::complex* A, const int lda, int* ipiv, int& info) { - zgetrf_(&m, &n, A, &lda, ipiv, &info); + zgetrf_ (&m, &n, A, &lda, ipiv, &info); } -static inline -void getri(const int n, float* A, const int lda, const int* ipiv, float* work, const int lwork, int& info) +static inline void + getri (const int n, float* A, const int lda, const int* ipiv, float* work, const int lwork, int& info) { - sgetri_(&n, A, &lda, ipiv, work, &lwork, &info); + sgetri_ (&n, A, &lda, ipiv, work, &lwork, &info); } -static inline -void getri(const int n, double* A, const int lda, const int* ipiv, double* work, const int lwork, int& info) +static inline void + getri (const int n, double* A, const int lda, const int* ipiv, double* work, const int lwork, int& info) { - dgetri_(&n, A, &lda, ipiv, work, &lwork, &info); + dgetri_ (&n, A, &lda, ipiv, work, &lwork, &info); } -static inline -void getri(const int n, std::complex* A, const int lda, const int* ipiv, std::complex* work, const int lwork, int& info) +static inline void + getri (const int n, + std::complex* A, + const int lda, + const int* ipiv, + std::complex* work, + const int lwork, + int& info) { - cgetri_(&n, A, &lda, ipiv, work, &lwork, &info); + cgetri_ (&n, A, &lda, ipiv, work, &lwork, &info); } -static inline -void getri(const int n, std::complex* A, const int lda, const int* ipiv, std::complex* work, const int lwork, int& info) +static inline void + getri (const int n, + std::complex* A, + const int lda, + const int* ipiv, + std::complex* work, + const int lwork, + int& info) { - zgetri_(&n, A, &lda, ipiv, work, &lwork, &info); + zgetri_ (&n, A, &lda, ipiv, work, &lwork, &info); } -static inline -void getrs(const char& trans, const int n, const int nrhs, float* A, const int lda, const int* ipiv, float* B, const int ldb, int& info) +static inline void + getrs (const char& trans, + const int n, + const int nrhs, + float* A, + const int lda, + const int* ipiv, + float* B, + const int ldb, + int& info) { - sgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); + sgetrs_ (&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); } -static inline -void getrs(const char& trans, const int n, const int nrhs, double* A, const int lda, const int* ipiv, double* B, const int ldb, int& info) +static inline void + getrs (const char& trans, + const int n, + const int nrhs, + double* A, + const int lda, + const int* ipiv, + double* B, + const int ldb, + int& info) { - dgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); + dgetrs_ (&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); } -static inline -void getrs(const char& trans, const int n, const int nrhs, std::complex* A, const int lda, const int* ipiv, std::complex* B, const int ldb, int& info) +static inline void + getrs (const char& trans, + const int n, + const int nrhs, + std::complex* A, + const int lda, + const int* ipiv, + std::complex* B, + const int ldb, + int& info) { - cgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); + cgetrs_ (&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); } -static inline -void getrs(const char& trans, const int n, const int nrhs, std::complex* A, const int lda, const int* ipiv, std::complex* B, const int ldb, int& info) +static inline void + getrs (const char& trans, + const int n, + const int nrhs, + std::complex* A, + const int lda, + const int* ipiv, + std::complex* B, + const int ldb, + int& info) { - zgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); + zgetrs_ (&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); } // LAPACK routines for QR decomposition -static inline -void geqrf(const int m, const int n, float* A, const int lda, float* tau, float* work, const int lwork, int& info) +static inline void + geqrf (const int m, const int n, float* A, const int lda, float* tau, float* work, const int lwork, int& info) { - sgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); + sgeqrf_ (&m, &n, A, &lda, tau, work, &lwork, &info); } -static inline -void geqrf(const int m, const int n, double* A, const int lda, double* tau, double* work, const int lwork, int& info) +static inline void + geqrf (const int m, const int n, double* A, const int lda, double* tau, double* work, const int lwork, int& info) { - dgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); + dgeqrf_ (&m, &n, A, &lda, tau, work, &lwork, &info); } -static inline -void geqrf(const int m, const int n, std::complex* A, const int lda, std::complex* tau, std::complex* work, const int lwork, int& info) +static inline void + geqrf (const int m, + const int n, + std::complex* A, + const int lda, + std::complex* tau, + std::complex* work, + const int lwork, + int& info) { - cgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); + cgeqrf_ (&m, &n, A, &lda, tau, work, &lwork, &info); } -static inline -void geqrf(const int m, const int n, std::complex* A, const int lda, std::complex* tau, std::complex* work, const int lwork, int& info) +static inline void + geqrf (const int m, + const int n, + std::complex* A, + const int lda, + std::complex* tau, + std::complex* work, + const int lwork, + int& info) { - zgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); + zgeqrf_ (&m, &n, A, &lda, tau, work, &lwork, &info); } // these routines generate the orthogonal matrix Q from the QR decomposition -static inline -void orgqr(const int m, const int n, const int k, float* A, const int lda, const float* tau, float* work, const int lwork, int& info) +static inline void + orgqr (const int m, + const int n, + const int k, + float* A, + const int lda, + const float* tau, + float* work, + const int lwork, + int& info) { - sorgqr_(&m, &n, &k, A, &lda, tau, work, &lwork, &info); + sorgqr_ (&m, &n, &k, A, &lda, tau, work, &lwork, &info); } -static inline -void orgqr(const int m, const int n, const int k, double* A, const int lda, const double* tau, double* work, const int lwork, int& info) +static inline void + orgqr (const int m, + const int n, + const int k, + double* A, + const int lda, + const double* tau, + double* work, + const int lwork, + int& info) { - dorgqr_(&m, &n, &k, A, &lda, tau, work, &lwork, &info); + dorgqr_ (&m, &n, &k, A, &lda, tau, work, &lwork, &info); } -static inline -void orgqr(const int m, const int n, const int k, std::complex* A, const int lda, const std::complex* tau, std::complex* work, const int lwork, int& info) +static inline void + orgqr (const int m, + const int n, + const int k, + std::complex* A, + const int lda, + const std::complex* tau, + std::complex* work, + const int lwork, + int& info) { - cungqr_(&m, &n, &k, A, &lda, tau, work, &lwork, &info); + cungqr_ (&m, &n, &k, A, &lda, tau, work, &lwork, &info); } -static inline -void orgqr(const int m, const int n, const int k, std::complex* A, const int lda, const std::complex* tau, std::complex* work, const int lwork, int& info) +static inline void + orgqr (const int m, + const int n, + const int k, + std::complex* A, + const int lda, + const std::complex* tau, + std::complex* work, + const int lwork, + int& info) { - zungqr_(&m, &n, &k, A, &lda, tau, work, &lwork, &info); + zungqr_ (&m, &n, &k, A, &lda, tau, work, &lwork, &info); } } // namespace lapackConnector } // namespace container -#endif // BASE_THIRD_PARTY_LAPACK_H_ +#endif // BASE_THIRD_PARTY_LAPACK_H_ diff --git a/source/source_base/module_container/base/utils/array_ref.h b/source/source_base/module_container/base/utils/array_ref.h index 735a05c41b6..d56383ea0c0 100644 --- a/source/source_base/module_container/base/utils/array_ref.h +++ b/source/source_base/module_container/base/utils/array_ref.h @@ -5,107 +5,142 @@ #include #include -namespace base { -namespace utils { +namespace base +{ +namespace utils +{ template -class array_ref final { +class array_ref final +{ private: T* data_ = nullptr; size_t length_; public: - /* implicit */ constexpr array_ref() : data_(nullptr), length_(0) {} - /* implicit */ constexpr array_ref(T* data, size_t length) : data_(data), length_(length) {} - /* implicit */ constexpr array_ref(T* begin, T* end) : data_(begin), length_(end - begin) {} - explicit constexpr array_ref(const T& item) : data_(&item), length_(1) {} + /* implicit */ constexpr array_ref () : data_ (nullptr), length_ (0) {} + /* implicit */ constexpr array_ref (T* data, size_t length) : data_ (data), length_ (length) {} + /* implicit */ constexpr array_ref (T* begin, T* end) : data_ (begin), length_ (end - begin) {} + explicit constexpr array_ref (const T& item) : data_ (&item), length_ (1) {} // Construct from a std::vector. template - /* implicit */ array_ref(const std::vector& vec) - : data_(vec.data()), length_(vec.size()) { - static_assert( - !std::is_same::value, - "array_ref cannot be constructed from a std::vector bitfield."); + /* implicit */ array_ref (const std::vector& vec) : data_ (vec.data ()), length_ (vec.size ()) + { + static_assert (!std::is_same::value, + "array_ref cannot be constructed from a std::vector bitfield."); } // Construct from a std::array. template - /* implicit */ constexpr array_ref(const std::array& arr) : data_(arr.data()), length_(N) {} + /* implicit */ constexpr array_ref (const std::array& arr) : data_ (arr.data ()), length_ (N) + { + } // Construct from a std::initializer_list. - /* implicit */ constexpr array_ref(const std::initializer_list& list) - : data_(list.begin()), length_(list.size()) {} + /* implicit */ constexpr array_ref (const std::initializer_list& list) + : data_ (list.begin ()), length_ (list.size ()) + { + } - constexpr const T* begin() const { return data_; } - constexpr const T* end() const { return data_ + length_; } + constexpr const T* + begin () const + { + return data_; + } + constexpr const T* + end () const + { + return data_ + length_; + } - constexpr bool empty() const { + constexpr bool + empty () const + { return length_ == 0; } - constexpr const T* data() const { + constexpr const T* + data () const + { return data_; } - constexpr size_t size() const { + constexpr size_t + size () const + { return length_; } - constexpr const T& front() const { + constexpr const T& + front () const + { return data_[0]; } - constexpr const T& back() const { + constexpr const T& + back () const + { return data_[length_ - 1]; } - constexpr bool equals(const array_ref& rhs) const { - return length_ == rhs.size() && std::equal(begin(), end(), rhs.begin()); + constexpr bool + equals (const array_ref& rhs) const + { + return length_ == rhs.size () && std::equal (begin (), end (), rhs.begin ()); } - constexpr const T& operator[](size_t index) const { + constexpr const T& + operator[] (size_t index) const + { return data_[index]; } template - typename std::enable_if::value, array_ref>::type& - operator=(U&& Temporary) = delete; + typename std::enable_if::value, array_ref>::type& operator= (U&& Temporary) = delete; template - typename std::enable_if::value, array_ref>::type& - operator=(std::initializer_list) = delete; + typename std::enable_if::value, array_ref>::type& operator= (std::initializer_list) + = delete; - std::vector vec() const { - return std::vector(data_, data_ + length_); + std::vector + vec () const + { + return std::vector (data_, data_ + length_); } }; template -std::ostream& operator<<(std::ostream& out, array_ref arr) { +std::ostream& + operator<< (std::ostream& out, array_ref arr) +{ int ii = 0; out << "["; - for (const auto& item : arr) { - if (ii++ > 0) - out << ", "; - out << item; - } + for (const auto& item: arr) + { + if (ii++ > 0) + out << ", "; + out << item; + } out << "]"; return out; } template -bool operator==(array_ref a1, array_ref a2) { - return a1.equals(a2); +bool + operator== (array_ref a1, array_ref a2) +{ + return a1.equals (a2); } template -bool operator!=(array_ref a1, array_ref a2) { - return !a1.equals(a2); +bool + operator!= (array_ref a1, array_ref a2) +{ + return !a1.equals (a2); } - } // namespace utils } // namespace base diff --git a/source/source_base/module_container/base/utils/gtest.h b/source/source_base/module_container/base/utils/gtest.h index 4ec7ecb1181..d25feab2eba 100644 --- a/source/source_base/module_container/base/utils/gtest.h +++ b/source/source_base/module_container/base/utils/gtest.h @@ -4,51 +4,63 @@ #include #include -namespace base { -namespace utils { +namespace base +{ +namespace utils +{ #if __CUDA || __ROCM -using ComplexTypes = ::testing::Types< - std::tuple, ct::DEVICE_CPU>, std::tuple, ct::DEVICE_GPU>, - std::tuple, ct::DEVICE_CPU>, std::tuple, ct::DEVICE_GPU>>; -using Types = ::testing::Types< - std::tuple, std::tuple, - std::tuple, std::tuple, - std::tuple, ct::DEVICE_CPU>, std::tuple, ct::DEVICE_GPU>, - std::tuple, ct::DEVICE_CPU>, std::tuple, ct::DEVICE_GPU>>; -#else -using ComplexTypes = ::testing::Types< - std::tuple, ct::DEVICE_CPU>, - std::tuple, ct::DEVICE_CPU>>; -using Types = ::testing::Types< - std::tuple, - std::tuple, - std::tuple, ct::DEVICE_CPU>, - std::tuple, ct::DEVICE_CPU>>; -#endif - -static inline void init_blas_handle() { - #if __CUDA || __ROCM - ct::kernels::createGpuBlasHandle(); - #endif +using ComplexTypes = ::testing::Types, ct::DEVICE_CPU>, + std::tuple, ct::DEVICE_GPU>, + std::tuple, ct::DEVICE_CPU>, + std::tuple, ct::DEVICE_GPU>>; +using Types = ::testing::Types, + std::tuple, + std::tuple, + std::tuple, + std::tuple, ct::DEVICE_CPU>, + std::tuple, ct::DEVICE_GPU>, + std::tuple, ct::DEVICE_CPU>, + std::tuple, ct::DEVICE_GPU>>; +#else +using ComplexTypes = ::testing::Types, ct::DEVICE_CPU>, + std::tuple, ct::DEVICE_CPU>>; +using Types = ::testing::Types, + std::tuple, + std::tuple, ct::DEVICE_CPU>, + std::tuple, ct::DEVICE_CPU>>; +#endif + +static inline void + init_blas_handle () +{ +#if __CUDA || __ROCM + ct::kernels::createGpuBlasHandle (); +#endif } -static inline void delete_blas_handle() { - #if __CUDA || __ROCM - ct::kernels::destroyGpuBlasHandle(); - #endif +static inline void + delete_blas_handle () +{ +#if __CUDA || __ROCM + ct::kernels::destroyGpuBlasHandle (); +#endif } -static inline void init_cusolver_handle() { - #if __CUDA || __ROCM - ct::kernels::createGpuSolverHandle(); - #endif +static inline void + init_cusolver_handle () +{ +#if __CUDA || __ROCM + ct::kernels::createGpuSolverHandle (); +#endif } -static inline void delete_cusolver_handle() { - #if __CUDA || __ROCM - ct::kernels::destroyGpuSolverHandle(); - #endif +static inline void + delete_cusolver_handle () +{ +#if __CUDA || __ROCM + ct::kernels::destroyGpuSolverHandle (); +#endif } } // namespace utils diff --git a/source/source_base/module_container/base/utils/logging.h b/source/source_base/module_container/base/utils/logging.h index 7d45105774e..c9ac70d35e2 100644 --- a/source/source_base/module_container/base/utils/logging.h +++ b/source/source_base/module_container/base/utils/logging.h @@ -3,20 +3,26 @@ #include -namespace base { -namespace utils { +namespace base +{ +namespace utils +{ -// Note while in the calling situation of check_msg_impl and check_exit_impl, +// Note while in the calling situation of check_msg_impl and check_exit_impl, // the check has been failed, so we don't need to release the char* msg -inline static const char* check_msg_impl(const char* msg) { - return msg; +inline static const char* + check_msg_impl (const char* msg) +{ + return msg; } -inline static void check_exit_impl(const char* func, const char* file, uint32_t line, const char* msg) { - fprintf(stderr, "Fatal error in function %s, file %s, line %u, \nwith message: \n\t\t%s\n", func, file, line, msg); - std::abort(); +inline static void + check_exit_impl (const char* func, const char* file, uint32_t line, const char* msg) +{ + fprintf (stderr, "Fatal error in function %s, file %s, line %u, \nwith message: \n\t\t%s\n", func, file, line, msg); + std::abort (); } -} // namespace logging +} // namespace utils } // namespace base #endif // BASE_CORE_LOGGING_H_ \ No newline at end of file diff --git a/source/source_base/module_container/test/allocator_test.cpp b/source/source_base/module_container/test/allocator_test.cpp index 980a5e80716..8a490fdaf5c 100644 --- a/source/source_base/module_container/test/allocator_test.cpp +++ b/source/source_base/module_container/test/allocator_test.cpp @@ -5,34 +5,35 @@ #include #include +TEST (CPUAllocator, AllocateAndFree) +{ + base::core::CPUAllocator alloc; + // Allocate memory of size 100. + void* ptr = alloc.allocate (100); + EXPECT_NE (nullptr, ptr); + alloc.free (ptr); -TEST(CPUAllocator, AllocateAndFree) { - base::core::CPUAllocator alloc; - // Allocate memory of size 100. - void* ptr = alloc.allocate(100); - EXPECT_NE(nullptr, ptr); - alloc.free(ptr); + // Allocate memory of size 200 with alignment 16. + ptr = alloc.allocate (200, 16); + EXPECT_NE (nullptr, ptr); + alloc.free (ptr); - // Allocate memory of size 200 with alignment 16. - ptr = alloc.allocate(200, 16); - EXPECT_NE(nullptr, ptr); - alloc.free(ptr); - - // Allocate memory of size 200 with alignment 16. - ptr = alloc.allocate(0, 0); - EXPECT_EQ(nullptr, ptr); + // Allocate memory of size 200 with alignment 16. + ptr = alloc.allocate (0, 0); + EXPECT_EQ (nullptr, ptr); } -TEST(CPUAllocator, AllocatedSize) { - base::core::CPUAllocator alloc; - // Allocate memory of size 100 and check its size. - void* ptr = alloc.allocate(100); - EXPECT_NE(nullptr, ptr); - alloc.free(ptr); +TEST (CPUAllocator, AllocatedSize) +{ + base::core::CPUAllocator alloc; + // Allocate memory of size 100 and check its size. + void* ptr = alloc.allocate (100); + EXPECT_NE (nullptr, ptr); + alloc.free (ptr); } -TEST(CPUAllocator, GetDeviceType) { - base::core::CPUAllocator alloc; - EXPECT_EQ(container::DeviceType::CpuDevice, - alloc.GetDeviceType()); +TEST (CPUAllocator, GetDeviceType) +{ + base::core::CPUAllocator alloc; + EXPECT_EQ (container::DeviceType::CpuDevice, alloc.GetDeviceType ()); } \ No newline at end of file diff --git a/source/source_base/module_container/test/tensor_accessor_test.cpp b/source/source_base/module_container/test/tensor_accessor_test.cpp index f04918e43f6..3922c7073cf 100644 --- a/source/source_base/module_container/test/tensor_accessor_test.cpp +++ b/source/source_base/module_container/test/tensor_accessor_test.cpp @@ -1,43 +1,46 @@ #include #include // Include the header file you provided -namespace container { +namespace container +{ // Test fixture to set up common data for tests -class TensorAccessorTest : public testing::Test { -protected: +class TensorAccessorTest : public testing::Test +{ + protected: // Common setup code - TensorAccessorTest() = default; + TensorAccessorTest () = default; // Common cleanup code - virtual ~TensorAccessorTest() = default; + virtual ~TensorAccessorTest () = default; }; // Test the TensorAccessor class -TEST_F(TensorAccessorTest, TensorAccessorTest) { +TEST_F (TensorAccessorTest, TensorAccessorTest) +{ // Test data int data[6] = {1, 2, 3, 4, 5, 6}; int sizes[3] = {2, 3, 1}; int strides[3] = {3, 1, 1}; - TensorAccessor accessor(data, sizes, strides); + TensorAccessor accessor (data, sizes, strides); // Test operator[] for 1D TensorAccessor - EXPECT_EQ(accessor[0][0][0], 1); - EXPECT_EQ(accessor[0][1][0], 2); - EXPECT_EQ(accessor[0][2][0], 3); - EXPECT_EQ(accessor[1][0][0], 4); - EXPECT_EQ(accessor[1][1][0], 5); - EXPECT_EQ(accessor[1][2][0], 6); + EXPECT_EQ (accessor[0][0][0], 1); + EXPECT_EQ (accessor[0][1][0], 2); + EXPECT_EQ (accessor[0][2][0], 3); + EXPECT_EQ (accessor[1][0][0], 4); + EXPECT_EQ (accessor[1][1][0], 5); + EXPECT_EQ (accessor[1][2][0], 6); // Test operator[] for 2D TensorAccessor auto sub_accessor_1 = accessor[1]; - EXPECT_EQ(sub_accessor_1[0][0], 4); - EXPECT_EQ(sub_accessor_1[1][0], 5); - EXPECT_EQ(sub_accessor_1[2][0], 6); + EXPECT_EQ (sub_accessor_1[0][0], 4); + EXPECT_EQ (sub_accessor_1[1][0], 5); + EXPECT_EQ (sub_accessor_1[2][0], 6); auto sub_accessor_2 = accessor[1][0]; - EXPECT_EQ(sub_accessor_2[0], 4); + EXPECT_EQ (sub_accessor_2[0], 4); } } // namespace container diff --git a/source/source_base/module_container/test/tensor_buffer_test.cpp b/source/source_base/module_container/test/tensor_buffer_test.cpp index a5336709117..b982e81513d 100644 --- a/source/source_base/module_container/test/tensor_buffer_test.cpp +++ b/source/source_base/module_container/test/tensor_buffer_test.cpp @@ -4,42 +4,44 @@ #include // Test the GetAllocatedBytes() method. -TEST(TensorBuffer, GetAllocatedBytes) { +TEST (TensorBuffer, GetAllocatedBytes) +{ // Create an allocator and allocate memory for a TensorBuffer. - base::core::Allocator* alloc = new base::core::CPUAllocator(); + base::core::Allocator* alloc = new base::core::CPUAllocator (); const size_t buffer_size = 100; // Create a TensorBuffer. - container::TensorBuffer tensor_buffer(alloc, 100); + container::TensorBuffer tensor_buffer (alloc, 100); - // Check the allocator - EXPECT_EQ(alloc, tensor_buffer.allocator()); + // Check the allocator + EXPECT_EQ (alloc, tensor_buffer.allocator ()); // Check the DeviceType - EXPECT_EQ(container::DeviceType::CpuDevice, tensor_buffer.GetDeviceType()); + EXPECT_EQ (container::DeviceType::CpuDevice, tensor_buffer.GetDeviceType ()); // Check the size of the buffer. - EXPECT_EQ(buffer_size, tensor_buffer.GetAllocatedBytes()); + EXPECT_EQ (buffer_size, tensor_buffer.GetAllocatedBytes ()); } // Test the resize() method. -TEST(TensorBuffer, resize) { +TEST (TensorBuffer, resize) +{ // Create an allocator and allocate memory for a TensorBuffer. - base::core::Allocator* alloc = new base::core::CPUAllocator(); + base::core::Allocator* alloc = new base::core::CPUAllocator (); const size_t initial_buffer_size = 100; // Create a TensorBuffer. - container::TensorBuffer tensor_buffer(alloc, initial_buffer_size); + container::TensorBuffer tensor_buffer (alloc, initial_buffer_size); - // Check the allocator - EXPECT_EQ(alloc, tensor_buffer.allocator()); + // Check the allocator + EXPECT_EQ (alloc, tensor_buffer.allocator ()); // Check the DeviceType - EXPECT_EQ(container::DeviceType::CpuDevice, tensor_buffer.GetDeviceType()); + EXPECT_EQ (container::DeviceType::CpuDevice, tensor_buffer.GetDeviceType ()); // Resize the buffer. const size_t new_buffer_size = 200; - tensor_buffer.resize(new_buffer_size); + tensor_buffer.resize (new_buffer_size); // Free the memory. // auto free by the destructor @@ -47,39 +49,41 @@ TEST(TensorBuffer, resize) { } // Test the root_buffer() method. -TEST(TensorBuffer, root_buffer) { +TEST (TensorBuffer, root_buffer) +{ // Create an allocator and allocate memory for a TensorBuffer. - base::core::Allocator* alloc = new base::core::CPUAllocator(); + base::core::Allocator* alloc = new base::core::CPUAllocator (); const size_t buffer_size = 100; // Create a root TensorBuffer. - container::TensorBuffer root_buffer(alloc, buffer_size); + container::TensorBuffer root_buffer (alloc, buffer_size); - // Check the allocator - EXPECT_EQ(alloc, root_buffer.allocator()); + // Check the allocator + EXPECT_EQ (alloc, root_buffer.allocator ()); // Check the DeviceType - EXPECT_EQ(container::DeviceType::CpuDevice, root_buffer.GetDeviceType()); + EXPECT_EQ (container::DeviceType::CpuDevice, root_buffer.GetDeviceType ()); // Check that the sub-buffer's root buffer is correct. - EXPECT_EQ(&root_buffer, root_buffer.root_buffer()); + EXPECT_EQ (&root_buffer, root_buffer.root_buffer ()); // Free the memory. // alloc.free(buffer); } -TEST(TensorBuffer, empty_allocator) { +TEST (TensorBuffer, empty_allocator) +{ // Create an allocator and allocate memory for a TensorBuffer. base::core::CPUAllocator alloc; const size_t buffer_size = 100; - void* buffer = alloc.allocate(buffer_size); + void* buffer = alloc.allocate (buffer_size); // Create a root TensorBuffer. - container::TensorBuffer root_buffer(buffer); + container::TensorBuffer root_buffer (buffer); // Check the DeviceType - EXPECT_EQ(container::DeviceType::UnKnown, root_buffer.GetDeviceType()); + EXPECT_EQ (container::DeviceType::UnKnown, root_buffer.GetDeviceType ()); // Free the memory. - alloc.free(buffer); + alloc.free (buffer); } \ No newline at end of file diff --git a/source/source_base/module_container/test/tensor_map_test.cpp b/source/source_base/module_container/test/tensor_map_test.cpp index 77a2a608974..46745d25f9c 100644 --- a/source/source_base/module_container/test/tensor_map_test.cpp +++ b/source/source_base/module_container/test/tensor_map_test.cpp @@ -3,27 +3,31 @@ #include -TEST(TensorMap, Constructor) { +TEST (TensorMap, Constructor) +{ // Test reference constructor std::vector vec{1.0, 2.0, 3.0}; - container::TensorMap t1(&vec[0], container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, container::TensorShape({1, 3})); - EXPECT_EQ(t1.data_type(), container::DataType::DT_FLOAT); - EXPECT_EQ(t1.device_type(), container::DeviceType::CpuDevice); - EXPECT_EQ(t1.shape().dims(), std::vector({1, 3})); - EXPECT_EQ(t1.NumElements(), 3); - EXPECT_EQ(t1.data(), vec.data()); + container::TensorMap t1 (&vec[0], + container::DataType::DT_FLOAT, + container::DeviceType::CpuDevice, + container::TensorShape ({1, 3})); + EXPECT_EQ (t1.data_type (), container::DataType::DT_FLOAT); + EXPECT_EQ (t1.device_type (), container::DeviceType::CpuDevice); + EXPECT_EQ (t1.shape ().dims (), std::vector ({1, 3})); + EXPECT_EQ (t1.NumElements (), 3); + EXPECT_EQ (t1.data (), vec.data ()); - container::TensorMap t2(&vec[0], t1, container::TensorShape({1, 3})); - EXPECT_EQ(t2.data_type(), container::DataType::DT_FLOAT); - EXPECT_EQ(t2.device_type(), container::DeviceType::CpuDevice); - EXPECT_EQ(t2.shape().dims(), std::vector({1, 3})); - EXPECT_EQ(t2.NumElements(), 3); - EXPECT_EQ(t2.data(), vec.data()); + container::TensorMap t2 (&vec[0], t1, container::TensorShape ({1, 3})); + EXPECT_EQ (t2.data_type (), container::DataType::DT_FLOAT); + EXPECT_EQ (t2.device_type (), container::DeviceType::CpuDevice); + EXPECT_EQ (t2.shape ().dims (), std::vector ({1, 3})); + EXPECT_EQ (t2.NumElements (), 3); + EXPECT_EQ (t2.data (), vec.data ()); - container::TensorMap t3(&vec[0], t2); - EXPECT_EQ(t3.data_type(), container::DataType::DT_FLOAT); - EXPECT_EQ(t3.device_type(), container::DeviceType::CpuDevice); - EXPECT_EQ(t3.shape().dims(), std::vector({1, 3})); - EXPECT_EQ(t3.NumElements(), 3); - EXPECT_EQ(t3.data(), vec.data()); + container::TensorMap t3 (&vec[0], t2); + EXPECT_EQ (t3.data_type (), container::DataType::DT_FLOAT); + EXPECT_EQ (t3.device_type (), container::DeviceType::CpuDevice); + EXPECT_EQ (t3.shape ().dims (), std::vector ({1, 3})); + EXPECT_EQ (t3.NumElements (), 3); + EXPECT_EQ (t3.data (), vec.data ()); } \ No newline at end of file diff --git a/source/source_base/module_container/test/tensor_shape_test.cpp b/source/source_base/module_container/test/tensor_shape_test.cpp index 14944d9c7b6..0445be97476 100644 --- a/source/source_base/module_container/test/tensor_shape_test.cpp +++ b/source/source_base/module_container/test/tensor_shape_test.cpp @@ -2,79 +2,82 @@ #include - /** * @brief Test cases for constructors of container::TensorShape class. */ -TEST(TensorShape, Constructor) { +TEST (TensorShape, Constructor) +{ // Test default constructor container::TensorShape shape1; - EXPECT_EQ(shape1.ndim(), 0); + EXPECT_EQ (shape1.ndim (), 0); // Test initializer_list constructor - container::TensorShape shape2({2, 3, 4}); - EXPECT_EQ(shape2.ndim(), 3); - EXPECT_EQ(shape2.dim_size(0), 2); - EXPECT_EQ(shape2.dim_size(1), 3); - EXPECT_EQ(shape2.dim_size(2), 4); + container::TensorShape shape2 ({2, 3, 4}); + EXPECT_EQ (shape2.ndim (), 3); + EXPECT_EQ (shape2.dim_size (0), 2); + EXPECT_EQ (shape2.dim_size (1), 3); + EXPECT_EQ (shape2.dim_size (2), 4); // Test vector constructor std::vector dims = {5, 6}; - container::TensorShape shape3(dims); - EXPECT_EQ(shape3.ndim(), 2); - EXPECT_EQ(shape3.dim_size(0), 5); - EXPECT_EQ(shape3.dim_size(1), 6); + container::TensorShape shape3 (dims); + EXPECT_EQ (shape3.ndim (), 2); + EXPECT_EQ (shape3.dim_size (0), 5); + EXPECT_EQ (shape3.dim_size (1), 6); } /** * @brief Test cases for size manipulation functions of container::TensorShape class. */ -TEST(TensorShape, SizeManipulation) { +TEST (TensorShape, SizeManipulation) +{ // Test add_dim and dim_size - container::TensorShape shape({2, 3}); - shape.add_dim(4); - EXPECT_EQ(shape.ndim(), 3); - EXPECT_EQ(shape.dim_size(0), 2); - EXPECT_EQ(shape.dim_size(1), 3); - EXPECT_EQ(shape.dim_size(2), 4); + container::TensorShape shape ({2, 3}); + shape.add_dim (4); + EXPECT_EQ (shape.ndim (), 3); + EXPECT_EQ (shape.dim_size (0), 2); + EXPECT_EQ (shape.dim_size (1), 3); + EXPECT_EQ (shape.dim_size (2), 4); // Test remove_dim - shape.remove_dim(1); - EXPECT_EQ(shape.ndim(), 2); - EXPECT_EQ(shape.dim_size(0), 2); - EXPECT_EQ(shape.dim_size(1), 4); + shape.remove_dim (1); + EXPECT_EQ (shape.ndim (), 2); + EXPECT_EQ (shape.dim_size (0), 2); + EXPECT_EQ (shape.dim_size (1), 4); // Test set_dim_size - shape.set_dim_size(1, 5); - EXPECT_EQ(shape.dim_size(1), 5); + shape.set_dim_size (1, 5); + EXPECT_EQ (shape.dim_size (1), 5); // Test NumElements - EXPECT_EQ(shape.NumElements(), 10); + EXPECT_EQ (shape.NumElements (), 10); } /** * @brief Test cases for comparison operators of container::TensorShape class. */ -TEST(TensorShape, Comparison) { - container::TensorShape shape1({2, 3, 4}); - container::TensorShape shape2({2, 3, 4}); - container::TensorShape shape3({3, 3, 4}); +TEST (TensorShape, Comparison) +{ + container::TensorShape shape1 ({2, 3, 4}); + container::TensorShape shape2 ({2, 3, 4}); + container::TensorShape shape3 ({3, 3, 4}); // Test == operator - EXPECT_EQ(shape1, shape2); - EXPECT_NE(shape1, shape3); + EXPECT_EQ (shape1, shape2); + EXPECT_NE (shape1, shape3); // Test != operator - EXPECT_NE(shape1, shape3); - EXPECT_NE(shape2, shape3); + EXPECT_NE (shape1, shape3); + EXPECT_NE (shape2, shape3); } /** * @brief Test cases for output stream operator of container::TensorShape class. */ -TEST(TensorShape, Output) { - container::TensorShape shape({2, 3, 4}); +TEST (TensorShape, Output) +{ + container::TensorShape shape ({2, 3, 4}); std::stringstream ss; ss << shape; - EXPECT_EQ(ss.str(), "[2,3,4]"); + EXPECT_EQ (ss.str (), "[2,3,4]"); } diff --git a/source/source_base/module_container/test/tensor_test.cpp b/source/source_base/module_container/test/tensor_test.cpp index 803ac3d3820..cb11e3065f5 100644 --- a/source/source_base/module_container/test/tensor_test.cpp +++ b/source/source_base/module_container/test/tensor_test.cpp @@ -4,533 +4,570 @@ #include #include -namespace container { +namespace container +{ -TEST(Tensor, Constructor) { +TEST (Tensor, Constructor) +{ // Test constructor with default allocator - container::Tensor t1(container::DataType::DT_FLOAT, container::TensorShape({2, 3})); - EXPECT_EQ(t1.data_type(), container::DataType::DT_FLOAT); - EXPECT_EQ(t1.device_type(), container::DeviceType::CpuDevice); + container::Tensor t1 (container::DataType::DT_FLOAT, container::TensorShape ({2, 3})); + EXPECT_EQ (t1.data_type (), container::DataType::DT_FLOAT); + EXPECT_EQ (t1.device_type (), container::DeviceType::CpuDevice); // EXPECT_EQ(t1.shape().dims(), std::vector({2, 3})); - EXPECT_EQ(t1.NumElements(), 6); + EXPECT_EQ (t1.NumElements (), 6); #if __CUDA || __ROCM // Test constructor with specified device type - container::Tensor t2(container::DataType::DT_DOUBLE, container::DeviceType::GpuDevice, - container::TensorShape({3, 4})); - EXPECT_EQ(t2.data_type(), container::DataType::DT_DOUBLE); - EXPECT_EQ(t2.device_type(), container::DeviceType::GpuDevice); - EXPECT_EQ(t2.shape().dims(), std::vector({3, 4})); - EXPECT_EQ(t2.NumElements(), 12); + container::Tensor t2 (container::DataType::DT_DOUBLE, + container::DeviceType::GpuDevice, + container::TensorShape ({3, 4})); + EXPECT_EQ (t2.data_type (), container::DataType::DT_DOUBLE); + EXPECT_EQ (t2.device_type (), container::DeviceType::GpuDevice); + EXPECT_EQ (t2.shape ().dims (), std::vector ({3, 4})); + EXPECT_EQ (t2.NumElements (), 12); #endif // Test copy constructor container::Tensor t3 = t1; - EXPECT_EQ(t3.data_type(), container::DataType::DT_FLOAT); - EXPECT_EQ(t3.device_type(), container::DeviceType::CpuDevice); + EXPECT_EQ (t3.data_type (), container::DataType::DT_FLOAT); + EXPECT_EQ (t3.device_type (), container::DeviceType::CpuDevice); // EXPECT_EQ(t3.shape().dims(), std::vector({2, 3})); - EXPECT_EQ(t3.NumElements(), 6); - EXPECT_NE(t3.data(), t1.data()); + EXPECT_EQ (t3.NumElements (), 6); + EXPECT_NE (t3.data (), t1.data ()); // Test reference constructor std::vector vec{1.0, 2.0, 3.0}; - container::TensorMap t4(&vec[0], container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, - container::TensorShape({1, 3})); - EXPECT_EQ(t4.data_type(), container::DataType::DT_FLOAT); - EXPECT_EQ(t4.device_type(), container::DeviceType::CpuDevice); + container::TensorMap t4 (&vec[0], + container::DataType::DT_FLOAT, + container::DeviceType::CpuDevice, + container::TensorShape ({1, 3})); + EXPECT_EQ (t4.data_type (), container::DataType::DT_FLOAT); + EXPECT_EQ (t4.device_type (), container::DeviceType::CpuDevice); // EXPECT_EQ(t4.shape().dims(), std::vector({1, 3})); - EXPECT_EQ(t4.NumElements(), 3); - EXPECT_EQ(t4.data(), vec.data()); + EXPECT_EQ (t4.NumElements (), 3); + EXPECT_EQ (t4.data (), vec.data ()); } - -TEST(Tensor, GetDataPointer) { +TEST (Tensor, GetDataPointer) +{ // Create a 1x1 float tensor with data [1.0, 2.0, 3.0, 4.0]. - container::Tensor t1(container::DataType::DT_INT, container::TensorShape({1, 1})); - container::Tensor t2(container::DataType::DT_INT64, container::TensorShape({1, 1})); - container::Tensor t3(container::DataType::DT_FLOAT, container::TensorShape({1, 1})); - container::Tensor t4(container::DataType::DT_DOUBLE, container::TensorShape({1, 1})); - container::Tensor t5(container::DataType::DT_COMPLEX, container::TensorShape({1, 1})); - container::Tensor t6(container::DataType::DT_COMPLEX_DOUBLE, container::TensorShape({1, 1})); - t1.data()[0] = 1; - t2.data()[0] = 1; - t3.data()[0] = 1.0f; - t4.data()[0] = 1.0f; - t5.data>()[0] = {1.0f, 0.0f}; - t6.data>()[0] = {1.0f, 0.0f}; + container::Tensor t1 (container::DataType::DT_INT, container::TensorShape ({1, 1})); + container::Tensor t2 (container::DataType::DT_INT64, container::TensorShape ({1, 1})); + container::Tensor t3 (container::DataType::DT_FLOAT, container::TensorShape ({1, 1})); + container::Tensor t4 (container::DataType::DT_DOUBLE, container::TensorShape ({1, 1})); + container::Tensor t5 (container::DataType::DT_COMPLEX, container::TensorShape ({1, 1})); + container::Tensor t6 (container::DataType::DT_COMPLEX_DOUBLE, container::TensorShape ({1, 1})); + t1.data ()[0] = 1; + t2.data ()[0] = 1; + t3.data ()[0] = 1.0f; + t4.data ()[0] = 1.0f; + t5.data> ()[0] = {1.0f, 0.0f}; + t6.data> ()[0] = {1.0f, 0.0f}; // Get a pointer to the data buffer. - void *ptr1 = t1.data(); - void *ptr2 = t2.data(); - void *ptr3 = t3.data(); - void *ptr4 = t4.data(); - void *ptr5 = t5.data(); - void *ptr6 = t6.data(); + void* ptr1 = t1.data (); + void* ptr2 = t2.data (); + void* ptr3 = t3.data (); + void* ptr4 = t4.data (); + void* ptr5 = t5.data (); + void* ptr6 = t6.data (); // Ensure that the returned pointer is not null and points to the expected data. - EXPECT_NE(ptr1, nullptr); - EXPECT_NE(ptr2, nullptr); - EXPECT_NE(ptr3, nullptr); - EXPECT_NE(ptr4, nullptr); - EXPECT_NE(ptr5, nullptr); - EXPECT_NE(ptr6, nullptr); - EXPECT_EQ(static_cast(ptr1)[0], 1); - EXPECT_EQ(static_cast(ptr2)[0], 1); - EXPECT_EQ(static_cast(ptr3)[0], 1.0f); - EXPECT_EQ(static_cast(ptr4)[0], 1.0f); - - EXPECT_EQ(static_cast *>(ptr5)[0].real(), 1.0); - EXPECT_EQ(static_cast *>(ptr5)[0].imag(), 0.0); - EXPECT_EQ(static_cast *>(ptr6)[0].real(), 1.0); - EXPECT_EQ(static_cast *>(ptr6)[0].imag(), 0.0); + EXPECT_NE (ptr1, nullptr); + EXPECT_NE (ptr2, nullptr); + EXPECT_NE (ptr3, nullptr); + EXPECT_NE (ptr4, nullptr); + EXPECT_NE (ptr5, nullptr); + EXPECT_NE (ptr6, nullptr); + EXPECT_EQ (static_cast (ptr1)[0], 1); + EXPECT_EQ (static_cast (ptr2)[0], 1); + EXPECT_EQ (static_cast (ptr3)[0], 1.0f); + EXPECT_EQ (static_cast (ptr4)[0], 1.0f); + + EXPECT_EQ (static_cast*> (ptr5)[0].real (), 1.0); + EXPECT_EQ (static_cast*> (ptr5)[0].imag (), 0.0); + EXPECT_EQ (static_cast*> (ptr6)[0].real (), 1.0); + EXPECT_EQ (static_cast*> (ptr6)[0].imag (), 0.0); } - -TEST(Tensor, GetDataPointerDeathTest) { +TEST (Tensor, GetDataPointerDeathTest) +{ ::testing::FLAGS_gtest_death_test_style = "threadsafe"; // Try to get a typed pointer with a type that does not match the tensor's data type. // This should cause an error message to be printed and the program to exit with failure. - container::Tensor tensor(container::DataType::DT_FLOAT, container::TensorShape({1, 1})); + container::Tensor tensor (container::DataType::DT_FLOAT, container::TensorShape ({1, 1})); // Verify that requesting data with an unsupported data type causes the program to exit. - ASSERT_EXIT( - tensor.data(), // Unsupported data type - ::testing::ExitedWithCode(EXIT_FAILURE), - "Tensor data type does not match requested type." - ); + ASSERT_EXIT (tensor.data (), // Unsupported data type + ::testing::ExitedWithCode (EXIT_FAILURE), + "Tensor data type does not match requested type."); } -TEST(Tensor, SizeOfType) { +TEST (Tensor, SizeOfType) +{ // Test DT_FLOAT - EXPECT_EQ(container::Tensor::SizeOfType(container::DataType::DT_FLOAT), sizeof(float)); + EXPECT_EQ (container::Tensor::SizeOfType (container::DataType::DT_FLOAT), sizeof (float)); // Test DT_INT - EXPECT_EQ(container::Tensor::SizeOfType(container::DataType::DT_INT), sizeof(int32_t)); + EXPECT_EQ (container::Tensor::SizeOfType (container::DataType::DT_INT), sizeof (int32_t)); // Test DT_INT64 - EXPECT_EQ(container::Tensor::SizeOfType(container::DataType::DT_INT64), sizeof(int64_t)); + EXPECT_EQ (container::Tensor::SizeOfType (container::DataType::DT_INT64), sizeof (int64_t)); // Test DT_DOUBLE - EXPECT_EQ(container::Tensor::SizeOfType(container::DataType::DT_DOUBLE), sizeof(double)); + EXPECT_EQ (container::Tensor::SizeOfType (container::DataType::DT_DOUBLE), sizeof (double)); // Test DT_COMPLEX - EXPECT_EQ(container::Tensor::SizeOfType(container::DataType::DT_COMPLEX), sizeof(std::complex)); + EXPECT_EQ (container::Tensor::SizeOfType (container::DataType::DT_COMPLEX), sizeof (std::complex)); // Test DT_COMPLEX_DOUBLE - EXPECT_EQ(container::Tensor::SizeOfType(container::DataType::DT_COMPLEX_DOUBLE), sizeof(std::complex)); - + EXPECT_EQ (container::Tensor::SizeOfType (container::DataType::DT_COMPLEX_DOUBLE), sizeof (std::complex)); } -TEST(Tensor, SizeOfTypeDeathTest) { +TEST (Tensor, SizeOfTypeDeathTest) +{ ::testing::FLAGS_gtest_death_test_style = "threadsafe"; // Verify that requesting data with an unsupported data type causes the program to exit. - ASSERT_EXIT( - container::Tensor::SizeOfType(container::DataType::DT_INVALID), - ::testing::ExitedWithCode(EXIT_FAILURE), - "Unsupported data type!" - ); + ASSERT_EXIT (container::Tensor::SizeOfType (container::DataType::DT_INVALID), + ::testing::ExitedWithCode (EXIT_FAILURE), + "Unsupported data type!"); } -TEST(Tensor, ToDeviceAndSetZero) { +TEST (Tensor, ToDeviceAndSetZero) +{ // Create tensor on CPU - container::Tensor tensor(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3}); + container::Tensor tensor (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3}); // Set zero test - tensor.zero(); + tensor.zero (); // Create tensor on GPU - container::Tensor cpu_tensor = tensor.to_device(); + container::Tensor cpu_tensor = tensor.to_device (); // Check device type - EXPECT_EQ(cpu_tensor.device_type(), container::DeviceType::CpuDevice); + EXPECT_EQ (cpu_tensor.device_type (), container::DeviceType::CpuDevice); // Check data type - EXPECT_EQ(cpu_tensor.data_type(), container::DataType::DT_FLOAT); + EXPECT_EQ (cpu_tensor.data_type (), container::DataType::DT_FLOAT); // Check shape - EXPECT_EQ(cpu_tensor.shape(), tensor.shape()); + EXPECT_EQ (cpu_tensor.shape (), tensor.shape ()); // Check data - for (int ii = 0; ii < cpu_tensor.NumElements(); ii++) { - EXPECT_EQ(cpu_tensor.data()[ii], 0.0); - } + for (int ii = 0; ii < cpu_tensor.NumElements (); ii++) + { + EXPECT_EQ (cpu_tensor.data ()[ii], 0.0); + } } -TEST(Tensor, Cast) { +TEST (Tensor, Cast) +{ // Create a tensor object with float data type and device CPU - container::Tensor t(container::DataType::DT_COMPLEX_DOUBLE, container::DeviceType::CpuDevice, {2, 3}); - t.data>()[0] = {1.0, 0.0}; - t.data>()[1] = {2.0, 0.0}; - t.data>()[2] = {3.0, 0.0}; - t.data>()[3] = {4.0, 0.0}; - t.data>()[4] = {5.0, 0.0}; - t.data>()[5] = {6.0, 0.0}; + container::Tensor t (container::DataType::DT_COMPLEX_DOUBLE, container::DeviceType::CpuDevice, {2, 3}); + t.data> ()[0] = {1.0, 0.0}; + t.data> ()[1] = {2.0, 0.0}; + t.data> ()[2] = {3.0, 0.0}; + t.data> ()[3] = {4.0, 0.0}; + t.data> ()[4] = {5.0, 0.0}; + t.data> ()[5] = {6.0, 0.0}; // Cast the tensor to integer data type - container::Tensor t_float = t.cast>(); + container::Tensor t_float = t.cast> (); // Check that the data type and device of the output tensor are correct - EXPECT_EQ(t_float.data_type(), container::DataType::DT_COMPLEX); - EXPECT_EQ(t_float.device_type(), container::DeviceType::CpuDevice); + EXPECT_EQ (t_float.data_type (), container::DataType::DT_COMPLEX); + EXPECT_EQ (t_float.device_type (), container::DeviceType::CpuDevice); // Check that the shape of the output tensor is correct - EXPECT_EQ(t_float.shape().dims(), std::vector({2, 3})); + EXPECT_EQ (t_float.shape ().dims (), std::vector ({2, 3})); // Check that the data of the output tensor is correct - EXPECT_EQ(t_float.data>()[0].real(), 1.0); - EXPECT_EQ(t_float.data>()[0].imag(), 0.0); - EXPECT_EQ(t_float.data>()[1].real(), 2.0); - EXPECT_EQ(t_float.data>()[1].imag(), 0.0); - EXPECT_EQ(t_float.data>()[2].real(), 3.0); - EXPECT_EQ(t_float.data>()[2].imag(), 0.0); - EXPECT_EQ(t_float.data>()[3].real(), 4.0); - EXPECT_EQ(t_float.data>()[3].imag(), 0.0); - EXPECT_EQ(t_float.data>()[4].real(), 5.0); - EXPECT_EQ(t_float.data>()[4].imag(), 0.0); - EXPECT_EQ(t_float.data>()[5].real(), 6.0); - EXPECT_EQ(t_float.data>()[5].imag(), 0.0); + EXPECT_EQ (t_float.data> ()[0].real (), 1.0); + EXPECT_EQ (t_float.data> ()[0].imag (), 0.0); + EXPECT_EQ (t_float.data> ()[1].real (), 2.0); + EXPECT_EQ (t_float.data> ()[1].imag (), 0.0); + EXPECT_EQ (t_float.data> ()[2].real (), 3.0); + EXPECT_EQ (t_float.data> ()[2].imag (), 0.0); + EXPECT_EQ (t_float.data> ()[3].real (), 4.0); + EXPECT_EQ (t_float.data> ()[3].imag (), 0.0); + EXPECT_EQ (t_float.data> ()[4].real (), 5.0); + EXPECT_EQ (t_float.data> ()[4].imag (), 0.0); + EXPECT_EQ (t_float.data> ()[5].real (), 6.0); + EXPECT_EQ (t_float.data> ()[5].imag (), 0.0); } // Tests the reshape() function of the Tensor class. -TEST(Tensor, Reshape) { - container::Tensor t(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); - container::TensorShape new_shape({-1, 8}); - ASSERT_NO_THROW(t.reshape(new_shape)); - EXPECT_EQ(t.shape().ndim(), 2); - EXPECT_EQ(t.shape().dim_size(0), 3); - EXPECT_EQ(t.shape().dim_size(1), 8); - - container::Tensor t1(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); - container::TensorShape new_shape1({2, 3, 4}); - ASSERT_NO_THROW(t1.reshape(new_shape1)); - EXPECT_EQ(t1.shape(), new_shape1); +TEST (Tensor, Reshape) +{ + container::Tensor t (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); + container::TensorShape new_shape ({-1, 8}); + ASSERT_NO_THROW (t.reshape (new_shape)); + EXPECT_EQ (t.shape ().ndim (), 2); + EXPECT_EQ (t.shape ().dim_size (0), 3); + EXPECT_EQ (t.shape ().dim_size (1), 8); + + container::Tensor t1 (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); + container::TensorShape new_shape1 ({2, 3, 4}); + ASSERT_NO_THROW (t1.reshape (new_shape1)); + EXPECT_EQ (t1.shape (), new_shape1); } -TEST(Tensor, GetValueAndInnerMostPtr) { - container::Tensor t(container::DataType::DT_INT, container::DeviceType::CpuDevice, {2, 2, 4}); +TEST (Tensor, GetValueAndInnerMostPtr) +{ + container::Tensor t (container::DataType::DT_INT, container::DeviceType::CpuDevice, {2, 2, 4}); std::vector vec = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - EXPECT_EQ(t.shape().NumElements(), vec.size()); - memcpy(t.data(), vec.data(), sizeof(int) * vec.size()); - EXPECT_EQ(t.get_value(0, 0, 1), 2); - EXPECT_EQ(t.get_value(1, 1, 2), 15); - t.reshape({4, 4}); + EXPECT_EQ (t.shape ().NumElements (), vec.size ()); + memcpy (t.data (), vec.data (), sizeof (int) * vec.size ()); + EXPECT_EQ (t.get_value (0, 0, 1), 2); + EXPECT_EQ (t.get_value (1, 1, 2), 15); + t.reshape ({4, 4}); // check the inner_most_ptr meshod - auto row_ptr = t.inner_most_ptr(2); - EXPECT_EQ(row_ptr[0], 9); - EXPECT_EQ(row_ptr[3], 12); + auto row_ptr = t.inner_most_ptr (2); + EXPECT_EQ (row_ptr[0], 9); + EXPECT_EQ (row_ptr[3], 12); } -TEST(Tensor, ReshapeDeathTest) { +TEST (Tensor, ReshapeDeathTest) +{ ::testing::FLAGS_gtest_death_test_style = "threadsafe"; - container::Tensor t(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); - container::TensorShape new_shape({-1, 8}); - new_shape.set_dim_size(1, -2); - EXPECT_THROW(t.reshape(new_shape), std::invalid_argument); - - container::Tensor t1(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); - container::TensorShape new_shape1({-1, 5}); - EXPECT_THROW(t1.reshape(new_shape1), std::invalid_argument); - - container::Tensor t2(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); - container::TensorShape new_shape2({-1, -1}); - EXPECT_THROW(t2.reshape(new_shape2), std::invalid_argument); - - container::Tensor t3(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); - container::TensorShape new_shape3({2, 7}); - EXPECT_THROW(t3.reshape(new_shape3), std::invalid_argument); + container::Tensor t (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); + container::TensorShape new_shape ({-1, 8}); + new_shape.set_dim_size (1, -2); + EXPECT_THROW (t.reshape (new_shape), std::invalid_argument); + + container::Tensor t1 (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); + container::TensorShape new_shape1 ({-1, 5}); + EXPECT_THROW (t1.reshape (new_shape1), std::invalid_argument); + + container::Tensor t2 (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); + container::TensorShape new_shape2 ({-1, -1}); + EXPECT_THROW (t2.reshape (new_shape2), std::invalid_argument); + + container::Tensor t3 (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); + container::TensorShape new_shape3 ({2, 7}); + EXPECT_THROW (t3.reshape (new_shape3), std::invalid_argument); } // Tests the slice() function of the Tensor class. -TEST(Tensor, Slice) { - container::Tensor t(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3}); +TEST (Tensor, Slice) +{ + container::Tensor t (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3}); // fill with test data - for (int ii = 0; ii < t.NumElements(); ++ii) { - t.data()[ii] = ii; - } + for (int ii = 0; ii < t.NumElements (); ++ii) + { + t.data ()[ii] = ii; + } // test the slice() function - container::Tensor output = t.slice({0, 0}, {2, 2}); - EXPECT_EQ(output.shape().ndim(), 2); - EXPECT_EQ(output.shape().dim_size(0), 2); - EXPECT_EQ(output.shape().dim_size(1), 2); - EXPECT_EQ(output.data()[0], 0.0f); - EXPECT_EQ(output.data()[1], 1.0f); - EXPECT_EQ(output.data()[2], 3.0f); - EXPECT_EQ(output.data()[3], 4.0f); + container::Tensor output = t.slice ({0, 0}, {2, 2}); + EXPECT_EQ (output.shape ().ndim (), 2); + EXPECT_EQ (output.shape ().dim_size (0), 2); + EXPECT_EQ (output.shape ().dim_size (1), 2); + EXPECT_EQ (output.data ()[0], 0.0f); + EXPECT_EQ (output.data ()[1], 1.0f); + EXPECT_EQ (output.data ()[2], 3.0f); + EXPECT_EQ (output.data ()[3], 4.0f); // test error handling - container::Tensor t2(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3}); - EXPECT_THROW(t2.slice({-1, 0}, {2, 2}), std::invalid_argument); - EXPECT_THROW(t2.slice({0, 0}, {2, 4}), std::invalid_argument); - EXPECT_THROW(t2.slice({0, 0, 0}, {2, 4, 3}), std::invalid_argument); - EXPECT_THROW(t2.slice({0, 0, 0, 0}, {2, 4, 3, 6}), std::invalid_argument); + container::Tensor t2 (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3}); + EXPECT_THROW (t2.slice ({-1, 0}, {2, 2}), std::invalid_argument); + EXPECT_THROW (t2.slice ({0, 0}, {2, 4}), std::invalid_argument); + EXPECT_THROW (t2.slice ({0, 0, 0}, {2, 4, 3}), std::invalid_argument); + EXPECT_THROW (t2.slice ({0, 0, 0, 0}, {2, 4, 3, 6}), std::invalid_argument); - container::Tensor t3(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {3}); + container::Tensor t3 (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {3}); // fill with test data - for (int ii = 0; ii < t3.NumElements(); ++ii) { - t3.data()[ii] = ii; - } + for (int ii = 0; ii < t3.NumElements (); ++ii) + { + t3.data ()[ii] = ii; + } // test the slice() function - container::Tensor output3 = t3.slice({0}, {1}); - EXPECT_EQ(output3.shape().ndim(), 1); - EXPECT_EQ(output3.shape().dim_size(0), 1); - EXPECT_EQ(output3.data()[0], 0.0f); + container::Tensor output3 = t3.slice ({0}, {1}); + EXPECT_EQ (output3.shape ().ndim (), 1); + EXPECT_EQ (output3.shape ().dim_size (0), 1); + EXPECT_EQ (output3.data ()[0], 0.0f); - container::Tensor t4(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {3}); + container::Tensor t4 (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {3}); // fill with test data - for (int ii = 0; ii < t4.NumElements(); ++ii) { - t4.data()[ii] = ii; - } - t4.reshape({1, 1, 3}); + for (int ii = 0; ii < t4.NumElements (); ++ii) + { + t4.data ()[ii] = ii; + } + t4.reshape ({1, 1, 3}); // test the slice() function - container::Tensor output4 = t4.slice({0, 0, 0}, {1, 1, 2}); - EXPECT_EQ(output4.shape().ndim(), 3); - EXPECT_EQ(output4.shape().dim_size(2), 2); - EXPECT_EQ(output4.data()[0], 0.0f); + container::Tensor output4 = t4.slice ({0, 0, 0}, {1, 1, 2}); + EXPECT_EQ (output4.shape ().ndim (), 3); + EXPECT_EQ (output4.shape ().dim_size (2), 2); + EXPECT_EQ (output4.data ()[0], 0.0f); } -TEST(Tensor, Buffer) { +TEST (Tensor, Buffer) +{ // create a tensor of shape (2, 3) - container::TensorShape shape({2, 3}); - container::Tensor tensor(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, shape); + container::TensorShape shape ({2, 3}); + container::Tensor tensor (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, shape); // fill the tensor with some values - auto *data_ptr = tensor.data(); - for (int ii = 0; ii < tensor.NumElements(); ii++) { - data_ptr[ii] = static_cast(ii); - } + auto* data_ptr = tensor.data (); + for (int ii = 0; ii < tensor.NumElements (); ii++) + { + data_ptr[ii] = static_cast (ii); + } // get the tensor buffer - const container::TensorBuffer &buffer = tensor.buffer(); + const container::TensorBuffer& buffer = tensor.buffer (); // check if the data pointer is the same as the tensor data pointer - assert(buffer.data() == static_cast(data_ptr)); + assert (buffer.data () == static_cast (data_ptr)); } -TEST(Tensor, Resize) { - container::Tensor t1(container::DataType::DT_FLOAT, container::TensorShape({2, 2})); - const float *data_ptr1 = t1.data(); +TEST (Tensor, Resize) +{ + container::Tensor t1 (container::DataType::DT_FLOAT, container::TensorShape ({2, 2})); + const float* data_ptr1 = t1.data (); - container::TensorShape new_shape({3, 3}); - t1.resize(new_shape); - t1.zero(); + container::TensorShape new_shape ({3, 3}); + t1.resize (new_shape); + t1.zero (); // Check if the data type remains the same after resize - EXPECT_EQ(t1.data_type(), container::DataType::DT_FLOAT); + EXPECT_EQ (t1.data_type (), container::DataType::DT_FLOAT); // Check if the shape of the tensor object is updated - EXPECT_EQ(t1.shape(), new_shape); + EXPECT_EQ (t1.shape (), new_shape); // Check if the data buffer of the tensor object is reallocated - EXPECT_NE(t1.data(), data_ptr1); + EXPECT_NE (t1.data (), data_ptr1); // Check if the data buffer is correctly zeroed - const float *data_ptr2 = t1.data(); - for (int ii = 0; ii < new_shape.NumElements(); ++ii) { - EXPECT_FLOAT_EQ(data_ptr2[ii], 0.0); - } - - container::Tensor t2(container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); - container::TensorShape new_shape2({2, 3, 4}); - ASSERT_NO_THROW(t2.resize(new_shape2)); + const float* data_ptr2 = t1.data (); + for (int ii = 0; ii < new_shape.NumElements (); ++ii) + { + EXPECT_FLOAT_EQ (data_ptr2[ii], 0.0); + } + + container::Tensor t2 (container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, {2, 3, 4}); + container::TensorShape new_shape2 ({2, 3, 4}); + ASSERT_NO_THROW (t2.resize (new_shape2)); } -TEST(Tensor, GetAllocatorDeathTest) { +TEST (Tensor, GetAllocatorDeathTest) +{ ::testing::FLAGS_gtest_death_test_style = "threadsafe"; - container::Tensor t1(container::DataType::DT_FLOAT, container::TensorShape({2, 2})); - ASSERT_EXIT( - base::core::Allocator *alloc = container::Tensor::GetAllocator(container::DeviceType::UnKnown), - ::testing::ExitedWithCode(EXIT_FAILURE), - "Tensor device type unknown does not match requested type." - ); + container::Tensor t1 (container::DataType::DT_FLOAT, container::TensorShape ({2, 2})); + ASSERT_EXIT (base::core::Allocator* alloc = container::Tensor::GetAllocator (container::DeviceType::UnKnown), + ::testing::ExitedWithCode (EXIT_FAILURE), + "Tensor device type unknown does not match requested type."); } -TEST(Tensor, OutputOperator) { +TEST (Tensor, OutputOperator) +{ // Create a tensor of shape [2, 2] with random values const int64_t num_elements = 4; - int *data1 = new int[num_elements]; - auto *data2 = new int64_t[num_elements]; - auto *data3 = new float[num_elements]; - auto *data4 = new double[num_elements]; - auto *data5 = new std::complex[num_elements]; - auto *data6 = new std::complex[num_elements]; - for (int ii = 0; ii < num_elements; ++ii) { - data1[ii] = static_cast(ii); - data2[ii] = static_cast(ii); - data3[ii] = static_cast(ii); - data4[ii] = static_cast(ii); - data5[ii] = std::complex{static_cast(ii), static_cast(ii)}; - data6[ii] = std::complex{static_cast(ii), static_cast(ii)}; - } - const container::TensorShape shape({2, 2}); - const container::TensorMap t1(data1, container::DataType::DT_INT, container::DeviceType::CpuDevice, shape); - const container::TensorMap t2(data2, container::DataType::DT_INT64, container::DeviceType::CpuDevice, shape); - const container::TensorMap t3(data3, container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, shape); - const container::TensorMap t4(data4, container::DataType::DT_DOUBLE, container::DeviceType::CpuDevice, shape); - const container::TensorMap t5(data5, container::DataType::DT_COMPLEX, container::DeviceType::CpuDevice, shape); - const container::TensorMap t6(data6, container::DataType::DT_COMPLEX_DOUBLE, container::DeviceType::CpuDevice, - shape); + int* data1 = new int[num_elements]; + auto* data2 = new int64_t[num_elements]; + auto* data3 = new float[num_elements]; + auto* data4 = new double[num_elements]; + auto* data5 = new std::complex[num_elements]; + auto* data6 = new std::complex[num_elements]; + for (int ii = 0; ii < num_elements; ++ii) + { + data1[ii] = static_cast (ii); + data2[ii] = static_cast (ii); + data3[ii] = static_cast (ii); + data4[ii] = static_cast (ii); + data5[ii] = std::complex{static_cast (ii), static_cast (ii)}; + data6[ii] = std::complex{static_cast (ii), static_cast (ii)}; + } + const container::TensorShape shape ({2, 2}); + const container::TensorMap t1 (data1, container::DataType::DT_INT, container::DeviceType::CpuDevice, shape); + const container::TensorMap t2 (data2, container::DataType::DT_INT64, container::DeviceType::CpuDevice, shape); + const container::TensorMap t3 (data3, container::DataType::DT_FLOAT, container::DeviceType::CpuDevice, shape); + const container::TensorMap t4 (data4, container::DataType::DT_DOUBLE, container::DeviceType::CpuDevice, shape); + const container::TensorMap t5 (data5, container::DataType::DT_COMPLEX, container::DeviceType::CpuDevice, shape); + const container::TensorMap t6 (data6, + container::DataType::DT_COMPLEX_DOUBLE, + container::DeviceType::CpuDevice, + shape); // Test if the output operator produces the expected output std::ostringstream oss; oss << t1 << t2 << t3 << t4 << t5 << t6; const std::string expected_output = "Tensor(shape=[2,2], data_type=int32, device_type=cpu, owns_memory=0, buffer="; - EXPECT_TRUE(oss.str().find(expected_output) == 0); + EXPECT_TRUE (oss.str ().find (expected_output) == 0); delete[] data1; } // Test constructor and basic operations -TEST(Tensor, CopyFrom) { +TEST (Tensor, CopyFrom) +{ // Create two tensors for testing - container::Tensor sourceTensor(DataType::DT_INT, DeviceType::CpuDevice, TensorShape({3, 2})); - container::Tensor destTensor(DataType::DT_FLOAT, DeviceType::CpuDevice, TensorShape({2, 3})); + container::Tensor sourceTensor (DataType::DT_INT, DeviceType::CpuDevice, TensorShape ({3, 2})); + container::Tensor destTensor (DataType::DT_FLOAT, DeviceType::CpuDevice, TensorShape ({2, 3})); // Initialize data in the source tensor - int *sourceData = sourceTensor.data(); - for (int ii = 0; ii < sourceTensor.NumElements(); ++ii) { - sourceData[ii] = ii; - } + int* sourceData = sourceTensor.data (); + for (int ii = 0; ii < sourceTensor.NumElements (); ++ii) + { + sourceData[ii] = ii; + } // Perform the CopyFrom operation - bool result = destTensor.CopyFrom(sourceTensor); + bool result = destTensor.CopyFrom (sourceTensor); // Verify that the CopyFrom operation was successful - EXPECT_TRUE(result); + EXPECT_TRUE (result); // Check the properties of the destination tensor - EXPECT_EQ(destTensor.data_type(), DataType::DT_INT); - EXPECT_EQ(destTensor.device_type(), DeviceType::CpuDevice); - EXPECT_EQ(destTensor.shape(), TensorShape({3, 2})); - EXPECT_EQ(destTensor.NumElements(), 6); + EXPECT_EQ (destTensor.data_type (), DataType::DT_INT); + EXPECT_EQ (destTensor.device_type (), DeviceType::CpuDevice); + EXPECT_EQ (destTensor.shape (), TensorShape ({3, 2})); + EXPECT_EQ (destTensor.NumElements (), 6); // Check that the data in the destination tensor matches the source tensor - int *destData = destTensor.data(); - for (int ii = 0; ii < destTensor.NumElements(); ++ii) { - EXPECT_EQ(destData[ii], ii); - } + int* destData = destTensor.data (); + for (int ii = 0; ii < destTensor.NumElements (); ++ii) + { + EXPECT_EQ (destData[ii], ii); + } } // Test constructor and basic operations -TEST(Tensor, CopyFromWithReshape) { +TEST (Tensor, CopyFromWithReshape) +{ // Create two tensors for testing - container::Tensor sourceTensor(DataType::DT_INT, DeviceType::CpuDevice, TensorShape({2, 3})); - container::Tensor destTensor(DataType::DT_FLOAT, DeviceType::CpuDevice, TensorShape({3, 2})); + container::Tensor sourceTensor (DataType::DT_INT, DeviceType::CpuDevice, TensorShape ({2, 3})); + container::Tensor destTensor (DataType::DT_FLOAT, DeviceType::CpuDevice, TensorShape ({3, 2})); // Initialize data in the source tensor - int* sourceData = sourceTensor.data(); - for (int ii = 0; ii < sourceTensor.NumElements(); ++ii) { - sourceData[ii] = ii; - } + int* sourceData = sourceTensor.data (); + for (int ii = 0; ii < sourceTensor.NumElements (); ++ii) + { + sourceData[ii] = ii; + } // Perform the CopyFrom with reshaping operation - bool result = destTensor.CopyFrom(sourceTensor, TensorShape({3, 2})); + bool result = destTensor.CopyFrom (sourceTensor, TensorShape ({3, 2})); // Verify that the CopyFrom with reshaping operation was successful - EXPECT_TRUE(result); + EXPECT_TRUE (result); // Check the properties of the destination tensor - EXPECT_EQ(destTensor.data_type(), DataType::DT_INT); - EXPECT_EQ(destTensor.device_type(), DeviceType::CpuDevice); - EXPECT_EQ(destTensor.shape(), TensorShape({3, 2})); - EXPECT_EQ(destTensor.NumElements(), 6); + EXPECT_EQ (destTensor.data_type (), DataType::DT_INT); + EXPECT_EQ (destTensor.device_type (), DeviceType::CpuDevice); + EXPECT_EQ (destTensor.shape (), TensorShape ({3, 2})); + EXPECT_EQ (destTensor.NumElements (), 6); // Check that the data in the destination tensor matches the source tensor - int* destData = destTensor.data(); - for (int ii = 0; ii < destTensor.NumElements(); ++ii) { - EXPECT_EQ(destData[ii], ii); - } + int* destData = destTensor.data (); + for (int ii = 0; ii < destTensor.NumElements (); ++ii) + { + EXPECT_EQ (destData[ii], ii); + } } // Test AllocateFrom function -TEST(Tensor, AllocateFrom) { +TEST (Tensor, AllocateFrom) +{ // Create source and destination tensors - container::Tensor sourceTensor(DataType::DT_INT, DeviceType::CpuDevice, TensorShape({2, 2})); + container::Tensor sourceTensor (DataType::DT_INT, DeviceType::CpuDevice, TensorShape ({2, 2})); container::Tensor destTensor = {}; // Initialize data in the source tensor - int* sourceData = sourceTensor.data(); - for (int ii = 0; ii < sourceTensor.NumElements(); ++ii) { - sourceData[ii] = ii; - } + int* sourceData = sourceTensor.data (); + for (int ii = 0; ii < sourceTensor.NumElements (); ++ii) + { + sourceData[ii] = ii; + } // Perform the AllocateFrom operation - bool result = destTensor.AllocateFrom(sourceTensor, sourceTensor.shape()); + bool result = destTensor.AllocateFrom (sourceTensor, sourceTensor.shape ()); // Verify that the AllocateFrom operation was successful - EXPECT_TRUE(result); + EXPECT_TRUE (result); // Check the properties of the destination tensor - EXPECT_EQ(destTensor.data_type(), DataType::DT_INT); - EXPECT_EQ(destTensor.device_type(), DeviceType::CpuDevice); - EXPECT_EQ(destTensor.shape(), TensorShape({2, 2})); - EXPECT_EQ(destTensor.NumElements(), 4); + EXPECT_EQ (destTensor.data_type (), DataType::DT_INT); + EXPECT_EQ (destTensor.device_type (), DeviceType::CpuDevice); + EXPECT_EQ (destTensor.shape (), TensorShape ({2, 2})); + EXPECT_EQ (destTensor.NumElements (), 4); } // Test sync function -TEST(Tensor, Sync) { +TEST (Tensor, Sync) +{ // Create two tensors with the same data type, device type, and shape - container::Tensor tensor1(DataType::DT_INT, DeviceType::CpuDevice, TensorShape({2, 2})); - container::Tensor tensor2(DataType::DT_INT, DeviceType::CpuDevice, TensorShape({2, 2})); + container::Tensor tensor1 (DataType::DT_INT, DeviceType::CpuDevice, TensorShape ({2, 2})); + container::Tensor tensor2 (DataType::DT_INT, DeviceType::CpuDevice, TensorShape ({2, 2})); // Initialize data in the source tensor (tensor2) - int* tensor2Data = tensor2.data(); - for (int ii = 0; ii < tensor2.NumElements(); ++ii) { - tensor2Data[ii] = ii; - } + int* tensor2Data = tensor2.data (); + for (int ii = 0; ii < tensor2.NumElements (); ++ii) + { + tensor2Data[ii] = ii; + } // Sync the data from tensor2 to tensor1 - tensor1.sync(tensor2); + tensor1.sync (tensor2); // Check that the data in tensor1 now matches tensor2 - int* tensor1Data = tensor1.data(); - for (int ii = 0; ii < tensor1.NumElements(); ++ii) { - EXPECT_EQ(tensor1Data[ii], ii); - } + int* tensor1Data = tensor1.data (); + for (int ii = 0; ii < tensor1.NumElements (); ++ii) + { + EXPECT_EQ (tensor1Data[ii], ii); + } } -TEST(Tensor, SubTensor) { +TEST (Tensor, SubTensor) +{ // Create a tensor with some data - container::Tensor tensor(DataType::DT_INT, DeviceType::CpuDevice, TensorShape({2, 3})); - int* tensorData = tensor.data(); + container::Tensor tensor (DataType::DT_INT, DeviceType::CpuDevice, TensorShape ({2, 3})); + int* tensorData = tensor.data (); // Initialize data in the tensor - for (int ii = 0; ii < tensor.NumElements(); ++ii) { - tensorData[ii] = ii; - } + for (int ii = 0; ii < tensor.NumElements (); ++ii) + { + tensorData[ii] = ii; + } // Access a sub-tensor based on the provided index - container::Tensor subTensor = tensor[1]; // Get the second row + container::Tensor subTensor = tensor[1]; // Get the second row // Check the properties of the sub-tensor - EXPECT_EQ(subTensor.data_type(), DataType::DT_INT); - EXPECT_EQ(subTensor.device_type(), DeviceType::CpuDevice); - EXPECT_EQ(subTensor.shape().ndim(), 1); // Sub-tensor should be 1D - EXPECT_EQ(subTensor.shape().dim_size(0), 3); // Sub-tensor should have 3 elements + EXPECT_EQ (subTensor.data_type (), DataType::DT_INT); + EXPECT_EQ (subTensor.device_type (), DeviceType::CpuDevice); + EXPECT_EQ (subTensor.shape ().ndim (), 1); // Sub-tensor should be 1D + EXPECT_EQ (subTensor.shape ().dim_size (0), 3); // Sub-tensor should have 3 elements // Check the data in the sub-tensor - int* subTensorData = subTensor.data(); - for (int ii = 0; ii < subTensor.NumElements(); ++ii) { - EXPECT_EQ(subTensorData[ii], ii + 3); // Offset by 3 elements (second row) - } + int* subTensorData = subTensor.data (); + for (int ii = 0; ii < subTensor.NumElements (); ++ii) + { + EXPECT_EQ (subTensorData[ii], ii + 3); // Offset by 3 elements (second row) + } } // Test accessor function -TEST(Tensor, Accessor) { +TEST (Tensor, Accessor) +{ // Create a tensor with some data - container::Tensor tensor(DataType::DT_INT, DeviceType::CpuDevice, TensorShape({2, 3})); - int* tensorData = tensor.data(); + container::Tensor tensor (DataType::DT_INT, DeviceType::CpuDevice, TensorShape ({2, 3})); + int* tensorData = tensor.data (); // Initialize data in the tensor - for (int ii = 0; ii < tensor.NumElements(); ++ii) { - tensorData[ii] = ii; - } + for (int ii = 0; ii < tensor.NumElements (); ++ii) + { + tensorData[ii] = ii; + } // Access a 2D tensor accessor - container::TensorAccessor accessor2D = tensor.accessor(); + container::TensorAccessor accessor2D = tensor.accessor (); // Check the data in the accessor - for (int ii = 0; ii < 2; ++ii) { - for (int jj = 0; jj < 3; ++jj) { - EXPECT_EQ(accessor2D[ii][jj], ii * 3 + jj); + for (int ii = 0; ii < 2; ++ii) + { + for (int jj = 0; jj < 3; ++jj) + { + EXPECT_EQ (accessor2D[ii][jj], ii * 3 + jj); + } } - } } } // namespace container \ No newline at end of file diff --git a/source/source_base/module_container/test/tensor_utils_test.cpp b/source/source_base/module_container/test/tensor_utils_test.cpp index 4d99aaa1aa5..2c94b24c8c1 100644 --- a/source/source_base/module_container/test/tensor_utils_test.cpp +++ b/source/source_base/module_container/test/tensor_utils_test.cpp @@ -2,36 +2,36 @@ #include - -TEST(TensorUtils, _get_digit_places) { +TEST (TensorUtils, _get_digit_places) +{ const int size = 6; float arr[size] = {-1.0, 2.5, 3.0, -0.5, 0.0, 1.234567}; int int_count = 0, frac_count = 0; // Test for float type - int total_digits = container::_get_digit_places(arr, size, int_count, frac_count); - EXPECT_EQ(total_digits, 9); - EXPECT_EQ(int_count, 2); - EXPECT_EQ(frac_count, 7); + int total_digits = container::_get_digit_places (arr, size, int_count, frac_count); + EXPECT_EQ (total_digits, 9); + EXPECT_EQ (int_count, 2); + EXPECT_EQ (frac_count, 7); } -TEST(TensorUtils, _internal_output) { +TEST (TensorUtils, _internal_output) +{ const int num_elements = 8; float* data = new float[num_elements]; - for (int ii = 0; ii < 8; ii++) { - data[ii] = ii; - } - container::TensorShape shape1 {8}, shape2{2, 4}, shape3{2, 2, 2}, shape4{1, 2, 2, 2}; + for (int ii = 0; ii < 8; ii++) + { + data[ii] = ii; + } + container::TensorShape shape1{8}, shape2{2, 4}, shape3{2, 2, 2}, shape4{1, 2, 2, 2}; // Test if the output operator produces the expected output std::ostringstream oss; - container::_internal_output(oss, data, shape1, num_elements); - container::_internal_output(oss, data, shape2, num_elements); - container::_internal_output(oss, data, shape3, num_elements); - container::_internal_output(oss, data, shape4, num_elements); + container::_internal_output (oss, data, shape1, num_elements); + container::_internal_output (oss, data, shape2, num_elements); + container::_internal_output (oss, data, shape3, num_elements); + container::_internal_output (oss, data, shape4, num_elements); const std::string expected_output = "[ 0, 1, 2, 3, 4, 5, 6, 7]"; - EXPECT_TRUE(oss.str().find(expected_output) == 0); + EXPECT_TRUE (oss.str ().find (expected_output) == 0); } -TEST(TensorUtils, removeTrailingZeros) { - EXPECT_EQ(container::removeTrailingZeros(""), "0"); -} \ No newline at end of file +TEST (TensorUtils, removeTrailingZeros) { EXPECT_EQ (container::removeTrailingZeros (""), "0"); } \ No newline at end of file diff --git a/source/source_base/module_device/cuda/memory_op.cu b/source/source_base/module_device/cuda/memory_op.cu index ccb52ae719e..79195bab82c 100644 --- a/source/source_base/module_device/cuda/memory_op.cu +++ b/source/source_base/module_device/cuda/memory_op.cu @@ -16,218 +16,254 @@ namespace memory { template -__global__ void cast_memory(FPTYPE_out* out, const FPTYPE_in* in, const int size) +__global__ void + cast_memory (FPTYPE_out* out, const FPTYPE_in* in, const int size) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= size) - { - return; - } - out[idx] = static_cast(in[idx]); + { + return; + } + out[idx] = static_cast (in[idx]); } template -__global__ void cast_memory(std::complex* out, const std::complex* in, const int size) +__global__ void + cast_memory (std::complex* out, const std::complex* in, const int size) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= size) - { - return; - } - auto* _out = reinterpret_cast*>(out); - const auto* _in = reinterpret_cast*>(in); - _out[idx] = static_cast>(_in[idx]); + { + return; + } + auto* _out = reinterpret_cast*> (out); + const auto* _in = reinterpret_cast*> (in); + _out[idx] = static_cast> (_in[idx]); } template -__global__ void cast_memory(std::complex* out, const FPTYPE_in* in, const int size) +__global__ void + cast_memory (std::complex* out, const FPTYPE_in* in, const int size) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= size) - { - return; - } - auto* _out = reinterpret_cast*>(out); - _out[idx] = static_cast>(in[idx]); + { + return; + } + auto* _out = reinterpret_cast*> (out); + _out[idx] = static_cast> (in[idx]); } template -void resize_memory_op::operator()(FPTYPE*& arr, +void + resize_memory_op::operator() (FPTYPE*& arr, const size_t size, const char* record_in) { if (arr != nullptr) - { - delete_memory_op()(arr); - } - CHECK_CUDA(cudaMalloc((void**)&arr, sizeof(FPTYPE) * size)); + { + delete_memory_op () (arr); + } + CHECK_CUDA (cudaMalloc ((void**)&arr, sizeof (FPTYPE) * size)); std::string record_string; if (record_in != nullptr) - { - record_string = record_in; - } + { + record_string = record_in; + } else - { - record_string = "no_record"; - } + { + record_string = "no_record"; + } if (record_string != "no_record") - { - ModuleBase::Memory::record_gpu(record_string, sizeof(FPTYPE) * size); - } + { + ModuleBase::Memory::record_gpu (record_string, sizeof (FPTYPE) * size); + } } template -void set_memory_op::operator()(FPTYPE* arr, - const int var, - const size_t size) +void + set_memory_op::operator() (FPTYPE* arr, const int var, const size_t size) { - CHECK_CUDA(cudaMemset(arr, var, sizeof(FPTYPE) * size)); + CHECK_CUDA (cudaMemset (arr, var, sizeof (FPTYPE) * size)); } template -void set_memory_2d_op::operator()(FPTYPE* arr, +void + set_memory_2d_op::operator() (FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height) { - CHECK_CUDA(cudaMemset2D(arr, sizeof(FPTYPE) * pitch , var, sizeof(FPTYPE) * width, height)); + CHECK_CUDA (cudaMemset2D (arr, sizeof (FPTYPE) * pitch, var, sizeof (FPTYPE) * width, height)); } template -void synchronize_memory_op::operator()( - FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) +void + synchronize_memory_op::operator() (FPTYPE* arr_out, + const FPTYPE* arr_in, + const size_t size) { - CHECK_CUDA(cudaMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, cudaMemcpyDeviceToHost)); + CHECK_CUDA (cudaMemcpy (arr_out, arr_in, sizeof (FPTYPE) * size, cudaMemcpyDeviceToHost)); } template -void synchronize_memory_op::operator()( - FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) +void + synchronize_memory_op::operator() (FPTYPE* arr_out, + const FPTYPE* arr_in, + const size_t size) { - CHECK_CUDA(cudaMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, cudaMemcpyHostToDevice)); + CHECK_CUDA (cudaMemcpy (arr_out, arr_in, sizeof (FPTYPE) * size, cudaMemcpyHostToDevice)); } template -void synchronize_memory_op::operator()( - FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) +void + synchronize_memory_op::operator() (FPTYPE* arr_out, + const FPTYPE* arr_in, + const size_t size) { - CHECK_CUDA(cudaMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMemcpy (arr_out, arr_in, sizeof (FPTYPE) * size, cudaMemcpyDeviceToDevice)); } template -void synchronize_memory_2d_op::operator()( - FPTYPE* arr_out, - const size_t dpitch, - const FPTYPE* arr_in, - const size_t spitch, - const size_t width, - const size_t height) +void + synchronize_memory_2d_op::operator() ( + FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) { - CHECK_CUDA(cudaMemcpy2D(arr_out, dpitch * sizeof(FPTYPE), arr_in, spitch * sizeof(FPTYPE), width * sizeof(FPTYPE), height, cudaMemcpyDeviceToHost)); + CHECK_CUDA (cudaMemcpy2D (arr_out, + dpitch * sizeof (FPTYPE), + arr_in, + spitch * sizeof (FPTYPE), + width * sizeof (FPTYPE), + height, + cudaMemcpyDeviceToHost)); } template -void synchronize_memory_2d_op::operator()( - FPTYPE* arr_out, - const size_t dpitch, - const FPTYPE* arr_in, - const size_t spitch, - const size_t width, - const size_t height) +void + synchronize_memory_2d_op::operator() ( + FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) { - CHECK_CUDA(cudaMemcpy2D(arr_out, dpitch * sizeof(FPTYPE), arr_in, spitch * sizeof(FPTYPE), width * sizeof(FPTYPE), height, cudaMemcpyHostToDevice)); + CHECK_CUDA (cudaMemcpy2D (arr_out, + dpitch * sizeof (FPTYPE), + arr_in, + spitch * sizeof (FPTYPE), + width * sizeof (FPTYPE), + height, + cudaMemcpyHostToDevice)); } template -void synchronize_memory_2d_op::operator()( - FPTYPE* arr_out, - const size_t dpitch, - const FPTYPE* arr_in, - const size_t spitch, - const size_t width, - const size_t height) +void + synchronize_memory_2d_op::operator() ( + FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) { - CHECK_CUDA(cudaMemcpy2D(arr_out, dpitch * sizeof(FPTYPE), arr_in, spitch * sizeof(FPTYPE), width * sizeof(FPTYPE), height, cudaMemcpyDeviceToDevice)); + CHECK_CUDA (cudaMemcpy2D (arr_out, + dpitch * sizeof (FPTYPE), + arr_in, + spitch * sizeof (FPTYPE), + width * sizeof (FPTYPE), + height, + cudaMemcpyDeviceToDevice)); } template struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { if (size == 0) - { - return; - } + { + return; + } const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - cast_memory<<>>(arr_out, arr_in, size); + cast_memory<<>> (arr_out, arr_in, size); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } }; template -struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) { +struct cast_memory_op +{ + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) + { - if (size == 0) {return;} + if (size == 0) + { + return; + } // No need to cast the memory if the data types are the same. if (std::is_same::value) - { - synchronize_memory_op()(arr_out, - reinterpret_cast(arr_in), - size); - return; - } - FPTYPE_in * arr = nullptr; - CHECK_CUDA(cudaMalloc((void **)&arr, sizeof(FPTYPE_in) * size)); - CHECK_CUDA(cudaMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, cudaMemcpyHostToDevice)); + { + synchronize_memory_op () ( + arr_out, + reinterpret_cast (arr_in), + size); + return; + } + FPTYPE_in* arr = nullptr; + CHECK_CUDA (cudaMalloc ((void**)&arr, sizeof (FPTYPE_in) * size)); + CHECK_CUDA (cudaMemcpy (arr, arr_in, sizeof (FPTYPE_in) * size, cudaMemcpyHostToDevice)); const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - cast_memory<<>>(arr_out, arr, size); - CHECK_CUDA_SYNC(); - CHECK_CUDA(cudaFree(arr)); + cast_memory<<>> (arr_out, arr, size); + CHECK_CUDA_SYNC (); + CHECK_CUDA (cudaFree (arr)); } }; template -struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) { - if (size == 0) {return;} +struct cast_memory_op +{ + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) + { + if (size == 0) + { + return; + } // No need to cast the memory if the data types are the same. if (std::is_same::value) - { - synchronize_memory_op()(arr_out, - reinterpret_cast(arr_in), - size); - return; - } - auto * arr = (FPTYPE_in*) malloc(sizeof(FPTYPE_in) * size); - CHECK_CUDA(cudaMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, cudaMemcpyDeviceToHost)); - for (int ii = 0; ii < size; ii++) { - arr_out[ii] = static_cast(arr[ii]); - } - free(arr); + { + synchronize_memory_op () ( + arr_out, + reinterpret_cast (arr_in), + size); + return; + } + auto* arr = (FPTYPE_in*)malloc (sizeof (FPTYPE_in) * size); + CHECK_CUDA (cudaMemcpy (arr, arr_in, sizeof (FPTYPE_in) * size, cudaMemcpyDeviceToHost)); + for (int ii = 0; ii < size; ii++) + { + arr_out[ii] = static_cast (arr[ii]); + } + free (arr); } }; template -void delete_memory_op::operator()(FPTYPE* arr) +void + delete_memory_op::operator() (FPTYPE* arr) { - CHECK_CUDA(cudaFree(arr)); + CHECK_CUDA (cudaFree (arr)); } template struct resize_memory_op; diff --git a/source/source_base/module_device/cuda_compat.cpp b/source/source_base/module_device/cuda_compat.cpp index a4a6844797d..725d11b2e5c 100644 --- a/source/source_base/module_device/cuda_compat.cpp +++ b/source/source_base/module_device/cuda_compat.cpp @@ -1,64 +1,67 @@ #include "cuda_compat.h" -namespace ModuleBase { -namespace cuda_compat { +namespace ModuleBase +{ +namespace cuda_compat +{ //--------------------------------------------------------------------------- // Implementation of printDeprecatedDeviceInfo and printComputeModeInfo //--------------------------------------------------------------------------- -void printDeprecatedDeviceInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp) +void + printDeprecatedDeviceInfo (std::ostream& ofs_device, const cudaDeviceProp& deviceProp) { #if defined(CUDA_VERSION) && CUDA_VERSION < 13000 - char msg[1024]; - sprintf(msg, - " GPU Max Clock rate: %.0f MHz (%0.2f " - "GHz)\n", - deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); - ofs_device << msg << std::endl; - // This is supported in CUDA 5.0 (runtime API device properties) - sprintf(msg, " Memory Clock rate: %.0f Mhz\n", - deviceProp.memoryClockRate * 1e-3f); - ofs_device << msg << std::endl; + char msg[1024]; + sprintf (msg, + " GPU Max Clock rate: %.0f MHz (%0.2f " + "GHz)\n", + deviceProp.clockRate * 1e-3f, + deviceProp.clockRate * 1e-6f); + ofs_device << msg << std::endl; + // This is supported in CUDA 5.0 (runtime API device properties) + sprintf (msg, " Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f); + ofs_device << msg << std::endl; + + sprintf (msg, " Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth); + ofs_device << msg << std::endl; - sprintf(msg, " Memory Bus Width: %d-bit\n", - deviceProp.memoryBusWidth); - ofs_device << msg << std::endl; - - sprintf(msg, - " Concurrent copy and kernel execution: %s with %d copy " - "engine(s)\n", - (deviceProp.deviceOverlap ? "Yes" : "No"), - deviceProp.asyncEngineCount); - ofs_device << msg << std::endl; - sprintf(msg, " Run time limit on kernels: %s\n", - deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, + " Concurrent copy and kernel execution: %s with %d copy " + "engine(s)\n", + (deviceProp.deviceOverlap ? "Yes" : "No"), + deviceProp.asyncEngineCount); + ofs_device << msg << std::endl; + sprintf (msg, + " Run time limit on kernels: %s\n", + deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); + ofs_device << msg << std::endl; #endif } -void printComputeModeInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp) +void + printComputeModeInfo (std::ostream& ofs_device, const cudaDeviceProp& deviceProp) { #if defined(CUDA_VERSION) && CUDA_VERSION < 13000 - char msg[1024]; - sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n", - deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); - ofs_device << msg << std::endl; + char msg[1024]; + sprintf (msg, + " Supports MultiDevice Co-op Kernel Launch: %s\n", + deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); + ofs_device << msg << std::endl; - const char *sComputeMode[] = { - "Default (multiple host threads can use ::cudaSetDevice() with device " - "simultaneously)", - "Exclusive (only one host thread in one process is able to use " - "::cudaSetDevice() with this device)", - "Prohibited (no host thread can use ::cudaSetDevice() with this " - "device)", - "Exclusive Process (many threads in one process is able to use " - "::cudaSetDevice() with this device)", - "Unknown", - NULL}; - sprintf(msg, " Compute Mode:\n"); - ofs_device << msg << std::endl; - ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl - << std::endl; + const char* sComputeMode[] = {"Default (multiple host threads can use ::cudaSetDevice() with device " + "simultaneously)", + "Exclusive (only one host thread in one process is able to use " + "::cudaSetDevice() with this device)", + "Prohibited (no host thread can use ::cudaSetDevice() with this " + "device)", + "Exclusive Process (many threads in one process is able to use " + "::cudaSetDevice() with this device)", + "Unknown", + NULL}; + sprintf (msg, " Compute Mode:\n"); + ofs_device << msg << std::endl; + ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl << std::endl; #endif } diff --git a/source/source_base/module_device/cuda_compat.h b/source/source_base/module_device/cuda_compat.h index acb2fca3948..d95615dd060 100644 --- a/source/source_base/module_device/cuda_compat.h +++ b/source/source_base/module_device/cuda_compat.h @@ -12,13 +12,12 @@ #ifndef CUDA_COMPAT_H_ #define CUDA_COMPAT_H_ -#include // For std::ostream +#include // For std::ostream #include // For std::invalid_argument -#include // defines CUDA_VERSION +#include // defines CUDA_VERSION #include #include - // NVTX header for CUDA versions prior to 12.9 vs. 12.9+ // This block ensures the correct NVTX header path is used based on CUDA_VERSION. // - For CUDA Toolkit < 12.9, the legacy header "nvToolsExt.h" is included. @@ -30,17 +29,19 @@ // https://docs.nvidia.com/cuda/archive/12.9.0/cuda-toolkit-release-notes/index.html#id4 #if defined(__CUDA) && defined(__USE_NVTX) #if CUDA_VERSION < 12090 - #include "nvToolsExt.h" +#include "nvToolsExt.h" #else - #include "nvtx3/nvToolsExt.h" +#include "nvtx3/nvToolsExt.h" #endif #endif //------------------------------------------------------------------------------------------------- // Compatibility Layer Declarations //------------------------------------------------------------------------------------------------- -namespace ModuleBase { -namespace cuda_compat { +namespace ModuleBase +{ +namespace cuda_compat +{ /** * @brief Prints device information that was deprecated or removed in CUDA 13.0. @@ -51,7 +52,7 @@ namespace cuda_compat { * @param os The output stream (e.g., std::cout, std::ofstream). * @param prop The cudaDeviceProp structure containing device properties. */ -void printDeprecatedDeviceInfo(std::ostream& os, const cudaDeviceProp& prop); +void printDeprecatedDeviceInfo (std::ostream& os, const cudaDeviceProp& prop); /** * @brief Prints the device's compute mode using a legacy string mapping. @@ -62,7 +63,7 @@ void printDeprecatedDeviceInfo(std::ostream& os, const cudaDeviceProp& prop); * @param os The output stream (e.g., std::cout, std::ofstream). * @param prop The cudaDeviceProp structure containing device properties. */ -void printComputeModeInfo(std::ostream& os, const cudaDeviceProp& prop); +void printComputeModeInfo (std::ostream& os, const cudaDeviceProp& prop); } // namespace cuda_compat } // namespace ModuleBase diff --git a/source/source_base/module_device/device.cpp b/source/source_base/module_device/device.cpp index fb95e94f491..2d7c1ea4760 100644 --- a/source/source_base/module_device/device.cpp +++ b/source/source_base/module_device/device.cpp @@ -18,41 +18,48 @@ #include #endif -namespace base_device { +namespace base_device +{ -namespace information { +namespace information +{ #if __MPI -int get_node_rank_with_mpi_shared(const MPI_Comm mpi_comm) { - // 20240530 zhanghaochong - // The main difference between this function and the above is that it does not - // use hostname, but uses MPI's built-in function to achieve similar - // functions. - MPI_Comm localComm; - int localMpiRank; - MPI_Comm_split_type(mpi_comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, - &localComm); - MPI_Comm_rank(localComm, &localMpiRank); - MPI_Comm_free(&localComm); - return localMpiRank; +int + get_node_rank_with_mpi_shared (const MPI_Comm mpi_comm) +{ + // 20240530 zhanghaochong + // The main difference between this function and the above is that it does not + // use hostname, but uses MPI's built-in function to achieve similar + // functions. + MPI_Comm localComm; + int localMpiRank; + MPI_Comm_split_type (mpi_comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &localComm); + MPI_Comm_rank (localComm, &localMpiRank); + MPI_Comm_free (&localComm); + return localMpiRank; } #endif -bool probe_gpu_availability() { +bool + probe_gpu_availability () +{ #if defined(__CUDA) int device_count = 0; // Directly call cudaGetDeviceCount without CHECK_CUDA to prevent program exit - cudaError_t error_id = cudaGetDeviceCount(&device_count); - if (error_id == cudaSuccess && device_count > 0) { - return true; - } + cudaError_t error_id = cudaGetDeviceCount (&device_count); + if (error_id == cudaSuccess && device_count > 0) + { + return true; + } return false; #elif defined(__ROCM) int device_count = 0; - hipError_t error_id = hipGetDeviceCount(&device_count); - if (error_id == hipSuccess && device_count > 0) { - return true; - } + hipError_t error_id = hipGetDeviceCount (&device_count); + if (error_id == hipSuccess && device_count > 0) + { + return true; + } return false; #else // If not compiled with GPU support, GPU is not available @@ -60,44 +67,62 @@ bool probe_gpu_availability() { #endif } -std::string get_device_flag(const std::string &device, - const std::string &basis_type) { +std::string + get_device_flag (const std::string& device, const std::string& basis_type) +{ // 1. Validate input string - if (device != "cpu" && device != "gpu" && device != "auto") { - ModuleBase::WARNING_QUIT("device", "Parameter \"device\" can only be set to \"cpu\", \"gpu\", or \"auto\"!"); - } - + if (device != "cpu" && device != "gpu" && device != "auto") + { + ModuleBase::WARNING_QUIT ("device", + "Parameter \"device\" can only be set to \"cpu\", \"gpu\", or \"auto\"!"); + } + // NOTE: This function is called only on rank 0 during input parsing. // The result will be broadcast to other ranks via the standard bcast mechanism. // DO NOT use MPI_Bcast here as other ranks are not in this code path. - + std::string result = "cpu"; - - if (device == "gpu") { - if (probe_gpu_availability()) { - result = "gpu"; - // std::cout << " INFO: 'device=gpu' specified. GPU will be used." << std::endl; - } else { - ModuleBase::WARNING_QUIT("device", "Device is set to 'gpu', but no available GPU was found. Please check your hardware/drivers or set 'device=cpu'."); + + if (device == "gpu") + { + if (probe_gpu_availability ()) + { + result = "gpu"; + // std::cout << " INFO: 'device=gpu' specified. GPU will be used." << std::endl; + } + else + { + ModuleBase::WARNING_QUIT ("device", + "Device is set to 'gpu', but no available GPU was found. Please check " + "your hardware/drivers or set 'device=cpu'."); + } + } + else if (device == "auto") + { + if (probe_gpu_availability ()) + { + result = "gpu"; + // std::cout << " INFO: 'device=auto' specified. GPU detected and will be used." << std::endl; + } + else + { + result = "cpu"; + // std::cout << " WARNING: 'device=auto' specified, but no GPU was found. Falling back to CPU." << + // std::endl; std::cout << " To suppress this warning, please explicitly set 'device=cpu' + // in your input." << std::endl; + } } - } else if (device == "auto") { - if (probe_gpu_availability()) { - result = "gpu"; - // std::cout << " INFO: 'device=auto' specified. GPU detected and will be used." << std::endl; - } else { + else + { // device == "cpu" result = "cpu"; - // std::cout << " WARNING: 'device=auto' specified, but no GPU was found. Falling back to CPU." << std::endl; - // std::cout << " To suppress this warning, please explicitly set 'device=cpu' in your input." << std::endl; + // std::cout << " INFO: 'device=cpu' specified. CPU will be used." << std::endl; } - } else { // device == "cpu" - result = "cpu"; - // std::cout << " INFO: 'device=cpu' specified. CPU will be used." << std::endl; - } // 2. Final check for incompatible basis type - if (result == "gpu" && basis_type == "lcao_in_pw") { - ModuleBase::WARNING_QUIT("device", "The GPU currently does not support the basis type \"lcao_in_pw\"!"); - } + if (result == "gpu" && basis_type == "lcao_in_pw") + { + ModuleBase::WARNING_QUIT ("device", "The GPU currently does not support the basis type \"lcao_in_pw\"!"); + } // 3. Return the final decision return result; @@ -109,19 +134,24 @@ std::string get_device_flag(const std::string &device, // DeviceContext singleton implementation // ============================================================================ -DeviceContext& DeviceContext::instance() { +DeviceContext& + DeviceContext::instance () +{ static DeviceContext instance; return instance; } -void DeviceContext::init() { +void + DeviceContext::init () +{ // Thread-safe initialization using mutex - std::lock_guard lock(init_mutex_); + std::lock_guard lock (init_mutex_); // If already initialized, do nothing (idempotent) - if (initialized_) { - return; - } + if (initialized_) + { + return; + } #if defined(__CUDA) || defined(__ROCM) @@ -130,46 +160,50 @@ void DeviceContext::init() { // This is the modern and recommended way to get node-local rank // Use MPI_COMM_WORLD as the default communicator MPI_Comm local_comm; - MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local_comm); - MPI_Comm_rank(local_comm, &local_rank_); - MPI_Comm_free(&local_comm); + MPI_Comm_split_type (MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local_comm); + MPI_Comm_rank (local_comm, &local_rank_); + MPI_Comm_free (&local_comm); #else local_rank_ = 0; #endif // Get the number of available GPU devices #if defined(__CUDA) - cudaError_t err = cudaGetDeviceCount(&device_count_); - if (err != cudaSuccess || device_count_ <= 0) { - ModuleBase::WARNING_QUIT("DeviceContext::init", - "No CUDA-capable GPU device found! Please check your hardware/drivers."); - return; - } + cudaError_t err = cudaGetDeviceCount (&device_count_); + if (err != cudaSuccess || device_count_ <= 0) + { + ModuleBase::WARNING_QUIT ("DeviceContext::init", + "No CUDA-capable GPU device found! Please check your hardware/drivers."); + return; + } // Bind to GPU device based on local rank device_id_ = local_rank_ % device_count_; - err = cudaSetDevice(device_id_); - if (err != cudaSuccess) { - ModuleBase::WARNING_QUIT("DeviceContext::init", - "cudaSetDevice failed! Device ID: " + std::to_string(device_id_)); - return; - } + err = cudaSetDevice (device_id_); + if (err != cudaSuccess) + { + ModuleBase::WARNING_QUIT ("DeviceContext::init", + "cudaSetDevice failed! Device ID: " + std::to_string (device_id_)); + return; + } #elif defined(__ROCM) - hipError_t err = hipGetDeviceCount(&device_count_); - if (err != hipSuccess || device_count_ <= 0) { - ModuleBase::WARNING_QUIT("DeviceContext::init", - "No ROCm-capable GPU device found! Please check your hardware/drivers."); - return; - } + hipError_t err = hipGetDeviceCount (&device_count_); + if (err != hipSuccess || device_count_ <= 0) + { + ModuleBase::WARNING_QUIT ("DeviceContext::init", + "No ROCm-capable GPU device found! Please check your hardware/drivers."); + return; + } // Bind to GPU device based on local rank device_id_ = local_rank_ % device_count_; - err = hipSetDevice(device_id_); - if (err != hipSuccess) { - ModuleBase::WARNING_QUIT("DeviceContext::init", - "hipSetDevice failed! Device ID: " + std::to_string(device_id_)); - return; - } + err = hipSetDevice (device_id_); + if (err != hipSuccess) + { + ModuleBase::WARNING_QUIT ("DeviceContext::init", + "hipSetDevice failed! Device ID: " + std::to_string (device_id_)); + return; + } #endif gpu_enabled_ = true; diff --git a/source/source_base/module_device/device.h b/source/source_base/module_device/device.h index 453c304bb27..7b8ea757499 100644 --- a/source/source_base/module_device/device.h +++ b/source/source_base/module_device/device.h @@ -20,13 +20,13 @@ namespace information * @brief Get the device name * for source_esolver */ -std::string get_device_name(std::string device_flag); +std::string get_device_name (std::string device_flag); /** * @brief Get the device number * for source_esolver */ -int get_device_num(std::string device_flag); +int get_device_num (std::string device_flag); /** * @brief Output the device information @@ -34,20 +34,19 @@ int get_device_num(std::string device_flag); * @param output output stream. * @param device device flag, "cpu" / "gpu" / "dsp". */ -void output_device_info(std::ostream& output, const std::string& device); +void output_device_info (std::ostream& output, const std::string& device); /** * @brief Safely probes for GPU availability without exiting on error. * @return True if at least one GPU is found and usable, false otherwise. */ -bool probe_gpu_availability(); +bool probe_gpu_availability (); /** * @brief Get the device flag object * for source_io PARAM.inp.device */ -std::string get_device_flag(const std::string& device, - const std::string& basis_type); +std::string get_device_flag (const std::string& device, const std::string& basis_type); #if __MPI /** @@ -55,27 +54,32 @@ std::string get_device_flag(const std::string& device, * @param mpi_comm MPI communicator (default: MPI_COMM_WORLD) * @return Local rank within the node */ -int get_node_rank_with_mpi_shared(const MPI_Comm mpi_comm = MPI_COMM_WORLD); +int get_node_rank_with_mpi_shared (const MPI_Comm mpi_comm = MPI_COMM_WORLD); #endif template -void print_device_info(const Device* dev, std::ofstream& ofs_device) +void + print_device_info (const Device* dev, std::ofstream& ofs_device) { return; } template -void record_device_memory(const Device* dev, std::ofstream& ofs_device, std::string str, size_t size) +void + record_device_memory (const Device* dev, std::ofstream& ofs_device, std::string str, size_t size) { return; } #if defined(__CUDA) || defined(__ROCM) template <> -void print_device_info(const base_device::DEVICE_GPU *ctx, std::ofstream &ofs_device); +void print_device_info (const base_device::DEVICE_GPU* ctx, std::ofstream& ofs_device); template <> -void record_device_memory(const base_device::DEVICE_GPU* dev, std::ofstream& ofs_device, std::string str, size_t size); +void record_device_memory (const base_device::DEVICE_GPU* dev, + std::ofstream& ofs_device, + std::string str, + size_t size); #endif } // end of namespace information @@ -95,13 +99,14 @@ void record_device_memory(const base_device::DEVICE_GPU * // Query device info * int dev_id = DeviceContext::instance().get_device_id(); */ -class DeviceContext { -public: +class DeviceContext +{ + public: /** * @brief Get the singleton instance of DeviceContext * @return Reference to the singleton instance */ - static DeviceContext& instance(); + static DeviceContext& instance (); /** * @brief Initialize GPU device binding. @@ -115,75 +120,115 @@ class DeviceContext { * @note This function should only be called when device=gpu is confirmed. * @note In MPI builds, uses MPI_COMM_WORLD internally. */ - void init(); + void init (); /** * @brief Check if the DeviceContext has been initialized * @return true if init() has been called successfully */ - bool is_initialized() const { return initialized_; } + bool + is_initialized () const + { + return initialized_; + } /** * @brief Check if GPU is enabled and available * @return true if GPU device is bound and usable */ - bool is_gpu_enabled() const { return gpu_enabled_; } + bool + is_gpu_enabled () const + { + return gpu_enabled_; + } /** * @brief Get the bound GPU device ID * @return Device ID (0-based), or -1 if not initialized */ - int get_device_id() const { return device_id_; } + int + get_device_id () const + { + return device_id_; + } /** * @brief Get the total number of GPU devices on this node * @return Number of GPU devices, or 0 if not initialized */ - int get_device_count() const { return device_count_; } + int + get_device_count () const + { + return device_count_; + } /** * @brief Get the local MPI rank within the node * @return Local rank, or 0 if not initialized */ - int get_local_rank() const { return local_rank_; } + int + get_local_rank () const + { + return local_rank_; + } /** * @brief Set the device type (CpuDevice, GpuDevice, or DspDevice) * @param type The device type */ - void set_device_type(AbacusDevice_t type) { device_type_ = type; } + void + set_device_type (AbacusDevice_t type) + { + device_type_ = type; + } /** * @brief Get the device type * @return AbacusDevice_t The device type */ - AbacusDevice_t get_device_type() const { return device_type_; } + AbacusDevice_t + get_device_type () const + { + return device_type_; + } /** * @brief Check if the device is CPU * @return true if the device is CPU */ - bool is_cpu() const { return device_type_ == CpuDevice; } + bool + is_cpu () const + { + return device_type_ == CpuDevice; + } /** * @brief Check if the device is GPU * @return true if the device is GPU */ - bool is_gpu() const { return device_type_ == GpuDevice; } + bool + is_gpu () const + { + return device_type_ == GpuDevice; + } /** * @brief Check if the device is DSP * @return true if the device is DSP */ - bool is_dsp() const { return device_type_ == DspDevice; } + bool + is_dsp () const + { + return device_type_ == DspDevice; + } // Disable copy and assignment - DeviceContext(const DeviceContext&) = delete; - DeviceContext& operator=(const DeviceContext&) = delete; + DeviceContext (const DeviceContext&) = delete; + DeviceContext& operator= (const DeviceContext&) = delete; -private: - DeviceContext() = default; - ~DeviceContext() = default; + private: + DeviceContext () = default; + ~DeviceContext () = default; bool initialized_ = false; bool gpu_enabled_ = false; @@ -200,9 +245,10 @@ class DeviceContext { * @param ctx Pointer to DeviceContext * @return AbacusDevice_t enum value */ -inline AbacusDevice_t get_device_type(const DeviceContext* ctx) +inline AbacusDevice_t + get_device_type (const DeviceContext* ctx) { - return ctx->get_device_type(); + return ctx->get_device_type (); } } // end of namespace base_device diff --git a/source/source_base/module_device/device_check.h b/source/source_base/module_device/device_check.h index 92ab5b4d5db..f6e93ba70bb 100644 --- a/source/source_base/module_device/device_check.h +++ b/source/source_base/module_device/device_check.h @@ -10,213 +10,251 @@ #include "cusolverDn.h" #include -static const char* _cublasGetErrorString(cublasStatus_t error) +static const char* + _cublasGetErrorString (cublasStatus_t error) { switch (error) - { - case CUBLAS_STATUS_SUCCESS: - return "CUBLAS_STATUS_SUCCESS"; - case CUBLAS_STATUS_NOT_INITIALIZED: - return "CUBLAS_STATUS_NOT_INITIALIZED"; - case CUBLAS_STATUS_ALLOC_FAILED: - return "CUBLAS_STATUS_ALLOC_FAILED"; - case CUBLAS_STATUS_INVALID_VALUE: - return "CUBLAS_STATUS_INVALID_VALUE"; - case CUBLAS_STATUS_ARCH_MISMATCH: - return "CUBLAS_STATUS_ARCH_MISMATCH"; - case CUBLAS_STATUS_MAPPING_ERROR: - return "CUBLAS_STATUS_MAPPING_ERROR"; - case CUBLAS_STATUS_EXECUTION_FAILED: - return "CUBLAS_STATUS_EXECUTION_FAILED"; - case CUBLAS_STATUS_INTERNAL_ERROR: - return "CUBLAS_STATUS_INTERNAL_ERROR"; - case CUBLAS_STATUS_NOT_SUPPORTED: - return "CUBLAS_STATUS_NOT_SUPPORTED"; - case CUBLAS_STATUS_LICENSE_ERROR: - return "CUBLAS_STATUS_LICENSE_ERROR"; - default: - return ""; - } + { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; + case CUBLAS_STATUS_NOT_SUPPORTED: + return "CUBLAS_STATUS_NOT_SUPPORTED"; + case CUBLAS_STATUS_LICENSE_ERROR: + return "CUBLAS_STATUS_LICENSE_ERROR"; + default: + return ""; + } } -static const char* _cusolverGetErrorString(cusolverStatus_t error) +static const char* + _cusolverGetErrorString (cusolverStatus_t error) { switch (error) - { - case CUSOLVER_STATUS_SUCCESS: - return "CUSOLVER_STATUS_SUCCESS"; - case CUSOLVER_STATUS_NOT_INITIALIZED: - return "CUSOLVER_STATUS_NOT_INITIALIZED"; - case CUSOLVER_STATUS_ALLOC_FAILED: - return "CUSOLVER_STATUS_ALLOC_FAILED"; - case CUSOLVER_STATUS_INVALID_VALUE: - return "CUSOLVER_STATUS_INVALID_VALUE"; - case CUSOLVER_STATUS_ARCH_MISMATCH: - return "CUSOLVER_STATUS_ARCH_MISMATCH"; - case CUSOLVER_STATUS_MAPPING_ERROR: - return "CUSOLVER_STATUS_MAPPING_ERROR"; - case CUSOLVER_STATUS_EXECUTION_FAILED: - return "CUSOLVER_STATUS_EXECUTION_FAILED"; - case CUSOLVER_STATUS_INTERNAL_ERROR: - return "CUSOLVER_STATUS_INTERNAL_ERROR"; - case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; - case CUSOLVER_STATUS_NOT_SUPPORTED: - return "CUSOLVER_STATUS_NOT_SUPPORTED"; - case CUSOLVER_STATUS_ZERO_PIVOT: - return "CUSOLVER_STATUS_ZERO_PIVOT"; - case CUSOLVER_STATUS_INVALID_LICENSE: - return "CUSOLVER_STATUS_INVALID_LICENSE"; - case CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED: - return "CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED"; - case CUSOLVER_STATUS_IRS_PARAMS_INVALID: - return "CUSOLVER_STATUS_IRS_PARAMS_INVALID"; - case CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC: - return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC"; - case CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE: - return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE"; - case CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER: - return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER"; - case CUSOLVER_STATUS_IRS_INTERNAL_ERROR: - return "CUSOLVER_STATUS_IRS_INTERNAL_ERROR"; - case CUSOLVER_STATUS_IRS_NOT_SUPPORTED: - return "CUSOLVER_STATUS_IRS_NOT_SUPPORTED"; - case CUSOLVER_STATUS_IRS_OUT_OF_RANGE: - return "CUSOLVER_STATUS_IRS_OUT_OF_RANGE"; - case CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES: - return "CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES"; - case CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED: - return "CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED"; - case CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED: - return "CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED"; - case CUSOLVER_STATUS_IRS_MATRIX_SINGULAR: - return "CUSOLVER_STATUS_IRS_MATRIX_SINGULAR"; - case CUSOLVER_STATUS_INVALID_WORKSPACE: - return "CUSOLVER_STATUS_INVALID_WORKSPACE"; - default: - return ""; - } + { + case CUSOLVER_STATUS_SUCCESS: + return "CUSOLVER_STATUS_SUCCESS"; + case CUSOLVER_STATUS_NOT_INITIALIZED: + return "CUSOLVER_STATUS_NOT_INITIALIZED"; + case CUSOLVER_STATUS_ALLOC_FAILED: + return "CUSOLVER_STATUS_ALLOC_FAILED"; + case CUSOLVER_STATUS_INVALID_VALUE: + return "CUSOLVER_STATUS_INVALID_VALUE"; + case CUSOLVER_STATUS_ARCH_MISMATCH: + return "CUSOLVER_STATUS_ARCH_MISMATCH"; + case CUSOLVER_STATUS_MAPPING_ERROR: + return "CUSOLVER_STATUS_MAPPING_ERROR"; + case CUSOLVER_STATUS_EXECUTION_FAILED: + return "CUSOLVER_STATUS_EXECUTION_FAILED"; + case CUSOLVER_STATUS_INTERNAL_ERROR: + return "CUSOLVER_STATUS_INTERNAL_ERROR"; + case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: + return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; + case CUSOLVER_STATUS_NOT_SUPPORTED: + return "CUSOLVER_STATUS_NOT_SUPPORTED"; + case CUSOLVER_STATUS_ZERO_PIVOT: + return "CUSOLVER_STATUS_ZERO_PIVOT"; + case CUSOLVER_STATUS_INVALID_LICENSE: + return "CUSOLVER_STATUS_INVALID_LICENSE"; + case CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED: + return "CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED"; + case CUSOLVER_STATUS_IRS_PARAMS_INVALID: + return "CUSOLVER_STATUS_IRS_PARAMS_INVALID"; + case CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC: + return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC"; + case CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE: + return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE"; + case CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER: + return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER"; + case CUSOLVER_STATUS_IRS_INTERNAL_ERROR: + return "CUSOLVER_STATUS_IRS_INTERNAL_ERROR"; + case CUSOLVER_STATUS_IRS_NOT_SUPPORTED: + return "CUSOLVER_STATUS_IRS_NOT_SUPPORTED"; + case CUSOLVER_STATUS_IRS_OUT_OF_RANGE: + return "CUSOLVER_STATUS_IRS_OUT_OF_RANGE"; + case CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES: + return "CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES"; + case CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED: + return "CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED"; + case CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED: + return "CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED"; + case CUSOLVER_STATUS_IRS_MATRIX_SINGULAR: + return "CUSOLVER_STATUS_IRS_MATRIX_SINGULAR"; + case CUSOLVER_STATUS_INVALID_WORKSPACE: + return "CUSOLVER_STATUS_INVALID_WORKSPACE"; + default: + return ""; + } } -static const char* _cufftGetErrorString(cufftResult_t error) +static const char* + _cufftGetErrorString (cufftResult_t error) { switch (error) - { - case CUFFT_SUCCESS: - return "CUFFT_SUCCESS"; - case CUFFT_INVALID_PLAN: - return "CUFFT_INVALID_PLAN"; - case CUFFT_ALLOC_FAILED: - return "CUFFT_ALLOC_FAILED"; - case CUFFT_INVALID_TYPE: - return "CUFFT_INVALID_TYPE"; - case CUFFT_INVALID_VALUE: - return "CUFFT_INVALID_VALUE"; - case CUFFT_INTERNAL_ERROR: - return "CUFFT_INTERNAL_ERROR"; - case CUFFT_EXEC_FAILED: - return "CUFFT_EXEC_FAILED"; - case CUFFT_SETUP_FAILED: - return "CUFFT_SETUP_FAILED"; - case CUFFT_INVALID_SIZE: - return "CUFFT_INVALID_SIZE"; - case CUFFT_UNALIGNED_DATA: - return "CUFFT_UNALIGNED_DATA"; - case CUFFT_INVALID_DEVICE: - return "CUFFT_INVALID_DEVICE"; - case CUFFT_NO_WORKSPACE: - return "CUFFT_NO_WORKSPACE"; - case CUFFT_NOT_IMPLEMENTED: - return "CUFFT_NOT_IMPLEMENTED"; - case CUFFT_NOT_SUPPORTED: - return "CUFFT_NOT_SUPPORTED"; + { + case CUFFT_SUCCESS: + return "CUFFT_SUCCESS"; + case CUFFT_INVALID_PLAN: + return "CUFFT_INVALID_PLAN"; + case CUFFT_ALLOC_FAILED: + return "CUFFT_ALLOC_FAILED"; + case CUFFT_INVALID_TYPE: + return "CUFFT_INVALID_TYPE"; + case CUFFT_INVALID_VALUE: + return "CUFFT_INVALID_VALUE"; + case CUFFT_INTERNAL_ERROR: + return "CUFFT_INTERNAL_ERROR"; + case CUFFT_EXEC_FAILED: + return "CUFFT_EXEC_FAILED"; + case CUFFT_SETUP_FAILED: + return "CUFFT_SETUP_FAILED"; + case CUFFT_INVALID_SIZE: + return "CUFFT_INVALID_SIZE"; + case CUFFT_UNALIGNED_DATA: + return "CUFFT_UNALIGNED_DATA"; + case CUFFT_INVALID_DEVICE: + return "CUFFT_INVALID_DEVICE"; + case CUFFT_NO_WORKSPACE: + return "CUFFT_NO_WORKSPACE"; + case CUFFT_NOT_IMPLEMENTED: + return "CUFFT_NOT_IMPLEMENTED"; + case CUFFT_NOT_SUPPORTED: + return "CUFFT_NOT_SUPPORTED"; #if defined(CUDA_VERSION) && CUDA_VERSION < 13000 - case CUFFT_INCOMPLETE_PARAMETER_LIST: - return "CUFFT_INCOMPLETE_PARAMETER_LIST"; - case CUFFT_PARSE_ERROR: - return "CUFFT_PARSE_ERROR"; - case CUFFT_LICENSE_ERROR: - return "CUFFT_LICENSE_ERROR"; + case CUFFT_INCOMPLETE_PARAMETER_LIST: + return "CUFFT_INCOMPLETE_PARAMETER_LIST"; + case CUFFT_PARSE_ERROR: + return "CUFFT_PARSE_ERROR"; + case CUFFT_LICENSE_ERROR: + return "CUFFT_LICENSE_ERROR"; #endif - default: - return ""; - } + default: + return ""; + } } #define CHECK_CUDA(func) \ do \ - { \ - cudaError_t status = (func); \ - if (status != cudaSuccess) \ { \ - fprintf(stderr, "In File %s : CUDA API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - cudaGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + cudaError_t status = (func); \ + if (status != cudaSuccess) \ + { \ + fprintf (stderr, \ + "In File %s : CUDA API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + cudaGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #define CHECK_CUBLAS(func) \ do \ - { \ - cublasStatus_t status = (func); \ - if (status != CUBLAS_STATUS_SUCCESS) \ { \ - fprintf(stderr, "In File %s : CUBLAS API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - _cublasGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + cublasStatus_t status = (func); \ + if (status != CUBLAS_STATUS_SUCCESS) \ + { \ + fprintf (stderr, \ + "In File %s : CUBLAS API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + _cublasGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #define CHECK_CUSOLVER(func) \ do \ - { \ - cusolverStatus_t status = (func); \ - if (status != CUSOLVER_STATUS_SUCCESS) \ { \ - fprintf(stderr, "In File %s : CUSOLVER API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - _cusolverGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + cusolverStatus_t status = (func); \ + if (status != CUSOLVER_STATUS_SUCCESS) \ + { \ + fprintf (stderr, \ + "In File %s : CUSOLVER API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + _cusolverGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #define CHECK_CUFFT(func) \ do \ - { \ - cufftResult_t status = (func); \ - if (status != CUFFT_SUCCESS) \ { \ - fprintf(stderr, "In File %s : CUFFT API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - _cufftGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + cufftResult_t status = (func); \ + if (status != CUFFT_SUCCESS) \ + { \ + fprintf (stderr, \ + "In File %s : CUFFT API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + _cufftGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #define CHECK_LAST_CUDA_ERROR(msg) \ do \ - { \ - cudaError_t status = cudaGetLastError(); \ - if (status != cudaSuccess) \ { \ - fprintf(stderr, "%s(%d) : CUDA error : %s : (%d) %s.\n", __FILE__, __LINE__, msg, \ - static_cast(status), cudaGetErrorString(status)); \ - exit(EXIT_FAILURE); \ + cudaError_t status = cudaGetLastError (); \ + if (status != cudaSuccess) \ + { \ + fprintf (stderr, \ + "%s(%d) : CUDA error : %s : (%d) %s.\n", \ + __FILE__, \ + __LINE__, \ + msg, \ + static_cast (status), \ + cudaGetErrorString (status)); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #ifdef __DEBUG #define CHECK_CUDA_SYNC() \ do \ - { \ - cudaError_t status = cudaDeviceSynchronize(); \ - if (status != cudaSuccess) \ { \ - fprintf(stderr, "In File %s : CUDA sync failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - cudaGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + cudaError_t status = cudaDeviceSynchronize (); \ + if (status != cudaSuccess) \ + { \ + fprintf (stderr, \ + "In File %s : CUDA sync failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + cudaGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #else -#define CHECK_CUDA_SYNC() do {} while (0) +#define CHECK_CUDA_SYNC() \ + do \ + { \ + } \ + while (0) #endif // NCCL check macro: shared by cuSOLVER MP (non-CAL path) and parallel device @@ -225,15 +263,20 @@ static const char* _cufftGetErrorString(cufftResult_t error) #define CHECK_NCCL(func) \ do \ - { \ - ncclResult_t status = (func); \ - if (status != ncclSuccess) \ { \ - fprintf(stderr, "In File %s : NCCL API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - ncclGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + ncclResult_t status = (func); \ + if (status != ncclSuccess) \ + { \ + fprintf (stderr, \ + "In File %s : NCCL API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + ncclGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #endif // cuSOLVER MP support @@ -243,42 +286,48 @@ static const char* _cufftGetErrorString(cufftResult_t error) #ifdef __USE_CAL #include -static const char* _calGetErrorString(calError_t error) +static const char* + _calGetErrorString (calError_t error) { switch (error) - { - case CAL_OK: - return "CAL_OK"; - case CAL_ERROR: - return "CAL_ERROR"; - case CAL_ERROR_INVALID_PARAMETER: - return "CAL_ERROR_INVALID_PARAMETER"; - case CAL_ERROR_INTERNAL: - return "CAL_ERROR_INTERNAL"; - case CAL_ERROR_CUDA: - return "CAL_ERROR_CUDA"; - case CAL_ERROR_UCC: - return "CAL_ERROR_UCC"; - case CAL_ERROR_NOT_SUPPORTED: - return "CAL_ERROR_NOT_SUPPORTED"; - case CAL_ERROR_INPROGRESS: - return "CAL_ERROR_INPROGRESS"; - default: - return ""; - } + { + case CAL_OK: + return "CAL_OK"; + case CAL_ERROR: + return "CAL_ERROR"; + case CAL_ERROR_INVALID_PARAMETER: + return "CAL_ERROR_INVALID_PARAMETER"; + case CAL_ERROR_INTERNAL: + return "CAL_ERROR_INTERNAL"; + case CAL_ERROR_CUDA: + return "CAL_ERROR_CUDA"; + case CAL_ERROR_UCC: + return "CAL_ERROR_UCC"; + case CAL_ERROR_NOT_SUPPORTED: + return "CAL_ERROR_NOT_SUPPORTED"; + case CAL_ERROR_INPROGRESS: + return "CAL_ERROR_INPROGRESS"; + default: + return ""; + } } #define CHECK_CAL(func) \ do \ - { \ - calError_t status = (func); \ - if (status != CAL_OK) \ { \ - fprintf(stderr, "In File %s : CAL API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - _calGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + calError_t status = (func); \ + if (status != CAL_OK) \ + { \ + fprintf (stderr, \ + "In File %s : CAL API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + _calGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #endif // __USE_CAL #endif // __CUSOLVERMP @@ -291,183 +340,221 @@ static const char* _calGetErrorString(calError_t error) #include #include -static const char* _hipblasGetErrorString(hipblasStatus_t error) +static const char* + _hipblasGetErrorString (hipblasStatus_t error) { switch (error) - { - case HIPBLAS_STATUS_SUCCESS: - return "HIPBLAS_STATUS_SUCCESS"; - case HIPBLAS_STATUS_NOT_INITIALIZED: - return "HIPBLAS_STATUS_NOT_INITIALIZED"; - case HIPBLAS_STATUS_ALLOC_FAILED: - return "HIPBLAS_STATUS_ALLOC_FAILED"; - case HIPBLAS_STATUS_INVALID_VALUE: - return "HIPBLAS_STATUS_INVALID_VALUE"; - case HIPBLAS_STATUS_ARCH_MISMATCH: - return "HIPBLAS_STATUS_ARCH_MISMATCH"; - case HIPBLAS_STATUS_MAPPING_ERROR: - return "HIPBLAS_STATUS_MAPPING_ERROR"; - case HIPBLAS_STATUS_EXECUTION_FAILED: - return "HIPBLAS_STATUS_EXECUTION_FAILED"; - case HIPBLAS_STATUS_INTERNAL_ERROR: - return "HIPBLAS_STATUS_INTERNAL_ERROR"; - case HIPBLAS_STATUS_NOT_SUPPORTED: - return "HIPBLAS_STATUS_NOT_SUPPORTED"; - case HIPBLAS_STATUS_HANDLE_IS_NULLPTR: - return "HIPBLAS_STATUS_HANDLE_IS_NULLPTR"; - default: - return ""; - } + { + case HIPBLAS_STATUS_SUCCESS: + return "HIPBLAS_STATUS_SUCCESS"; + case HIPBLAS_STATUS_NOT_INITIALIZED: + return "HIPBLAS_STATUS_NOT_INITIALIZED"; + case HIPBLAS_STATUS_ALLOC_FAILED: + return "HIPBLAS_STATUS_ALLOC_FAILED"; + case HIPBLAS_STATUS_INVALID_VALUE: + return "HIPBLAS_STATUS_INVALID_VALUE"; + case HIPBLAS_STATUS_ARCH_MISMATCH: + return "HIPBLAS_STATUS_ARCH_MISMATCH"; + case HIPBLAS_STATUS_MAPPING_ERROR: + return "HIPBLAS_STATUS_MAPPING_ERROR"; + case HIPBLAS_STATUS_EXECUTION_FAILED: + return "HIPBLAS_STATUS_EXECUTION_FAILED"; + case HIPBLAS_STATUS_INTERNAL_ERROR: + return "HIPBLAS_STATUS_INTERNAL_ERROR"; + case HIPBLAS_STATUS_NOT_SUPPORTED: + return "HIPBLAS_STATUS_NOT_SUPPORTED"; + case HIPBLAS_STATUS_HANDLE_IS_NULLPTR: + return "HIPBLAS_STATUS_HANDLE_IS_NULLPTR"; + default: + return ""; + } } -static const char* _hipfftGetErrorString(hipfftResult_t error) +static const char* + _hipfftGetErrorString (hipfftResult_t error) { switch (error) - { - case HIPFFT_SUCCESS: - return "HIPFFT_SUCCESS"; - case HIPFFT_INVALID_PLAN: - return "HIPFFT_INVALID_PLAN"; - case HIPFFT_ALLOC_FAILED: - return "HIPFFT_ALLOC_FAILED"; - case HIPFFT_INVALID_TYPE: - return "HIPFFT_INVALID_TYPE"; - case HIPFFT_INVALID_VALUE: - return "HIPFFT_INVALID_VALUE"; - case HIPFFT_INTERNAL_ERROR: - return "HIPFFT_INTERNAL_ERROR"; - case HIPFFT_EXEC_FAILED: - return "HIPFFT_EXEC_FAILED"; - case HIPFFT_SETUP_FAILED: - return "HIPFFT_SETUP_FAILED"; - case HIPFFT_INVALID_SIZE: - return "HIPFFT_INVALID_SIZE"; - case HIPFFT_UNALIGNED_DATA: - return "HIPFFT_UNALIGNED_DATA"; - case HIPFFT_INCOMPLETE_PARAMETER_LIST: - return "HIPFFT_INCOMPLETE_PARAMETER_LIST"; - case HIPFFT_INVALID_DEVICE: - return "HIPFFT_INVALID_DEVICE"; - case HIPFFT_PARSE_ERROR: - return "HIPFFT_PARSE_ERROR"; - case HIPFFT_NO_WORKSPACE: - return "HIPFFT_NO_WORKSPACE"; - case HIPFFT_NOT_IMPLEMENTED: - return "HIPFFT_NOT_IMPLEMENTED"; - case HIPFFT_NOT_SUPPORTED: - return "HIPFFT_NOT_SUPPORTED"; - default: - return ""; - } + { + case HIPFFT_SUCCESS: + return "HIPFFT_SUCCESS"; + case HIPFFT_INVALID_PLAN: + return "HIPFFT_INVALID_PLAN"; + case HIPFFT_ALLOC_FAILED: + return "HIPFFT_ALLOC_FAILED"; + case HIPFFT_INVALID_TYPE: + return "HIPFFT_INVALID_TYPE"; + case HIPFFT_INVALID_VALUE: + return "HIPFFT_INVALID_VALUE"; + case HIPFFT_INTERNAL_ERROR: + return "HIPFFT_INTERNAL_ERROR"; + case HIPFFT_EXEC_FAILED: + return "HIPFFT_EXEC_FAILED"; + case HIPFFT_SETUP_FAILED: + return "HIPFFT_SETUP_FAILED"; + case HIPFFT_INVALID_SIZE: + return "HIPFFT_INVALID_SIZE"; + case HIPFFT_UNALIGNED_DATA: + return "HIPFFT_UNALIGNED_DATA"; + case HIPFFT_INCOMPLETE_PARAMETER_LIST: + return "HIPFFT_INCOMPLETE_PARAMETER_LIST"; + case HIPFFT_INVALID_DEVICE: + return "HIPFFT_INVALID_DEVICE"; + case HIPFFT_PARSE_ERROR: + return "HIPFFT_PARSE_ERROR"; + case HIPFFT_NO_WORKSPACE: + return "HIPFFT_NO_WORKSPACE"; + case HIPFFT_NOT_IMPLEMENTED: + return "HIPFFT_NOT_IMPLEMENTED"; + case HIPFFT_NOT_SUPPORTED: + return "HIPFFT_NOT_SUPPORTED"; + default: + return ""; + } } -static const char* _hipsolverGetErrorString(hipsolverStatus_t error) +static const char* + _hipsolverGetErrorString (hipsolverStatus_t error) { switch (error) - { - case HIPSOLVER_STATUS_SUCCESS: - return "HIPSOLVER_STATUS_SUCCESS"; - case HIPSOLVER_STATUS_NOT_INITIALIZED: - return "HIPSOLVER_STATUS_NOT_INITIALIZED"; - case HIPSOLVER_STATUS_ALLOC_FAILED: - return "HIPSOLVER_STATUS_ALLOC_FAILED"; - case HIPSOLVER_STATUS_INVALID_VALUE: - return "HIPSOLVER_STATUS_INVALID_VALUE"; - case HIPSOLVER_STATUS_MAPPING_ERROR: - return "HIPSOLVER_STATUS_MAPPING_ERROR"; - case HIPSOLVER_STATUS_EXECUTION_FAILED: - return "HIPSOLVER_STATUS_EXECUTION_FAILED"; - case HIPSOLVER_STATUS_INTERNAL_ERROR: - return "HIPSOLVER_STATUS_INTERNAL_ERROR"; - case HIPSOLVER_STATUS_NOT_SUPPORTED: - return "HIPSOLVER_STATUS_NOT_SUPPORTED"; - case HIPSOLVER_STATUS_ARCH_MISMATCH: - return "HIPSOLVER_STATUS_ARCH_MISMATCH"; - case HIPSOLVER_STATUS_HANDLE_IS_NULLPTR: - return "HIPSOLVER_STATUS_HANDLE_IS_NULLPTR"; - case HIPSOLVER_STATUS_INVALID_ENUM: - return "HIPSOLVER_STATUS_INVALID_ENUM"; - case HIPSOLVER_STATUS_UNKNOWN: - return "HIPSOLVER_STATUS_UNKNOWN"; - default: - return ""; - } + { + case HIPSOLVER_STATUS_SUCCESS: + return "HIPSOLVER_STATUS_SUCCESS"; + case HIPSOLVER_STATUS_NOT_INITIALIZED: + return "HIPSOLVER_STATUS_NOT_INITIALIZED"; + case HIPSOLVER_STATUS_ALLOC_FAILED: + return "HIPSOLVER_STATUS_ALLOC_FAILED"; + case HIPSOLVER_STATUS_INVALID_VALUE: + return "HIPSOLVER_STATUS_INVALID_VALUE"; + case HIPSOLVER_STATUS_MAPPING_ERROR: + return "HIPSOLVER_STATUS_MAPPING_ERROR"; + case HIPSOLVER_STATUS_EXECUTION_FAILED: + return "HIPSOLVER_STATUS_EXECUTION_FAILED"; + case HIPSOLVER_STATUS_INTERNAL_ERROR: + return "HIPSOLVER_STATUS_INTERNAL_ERROR"; + case HIPSOLVER_STATUS_NOT_SUPPORTED: + return "HIPSOLVER_STATUS_NOT_SUPPORTED"; + case HIPSOLVER_STATUS_ARCH_MISMATCH: + return "HIPSOLVER_STATUS_ARCH_MISMATCH"; + case HIPSOLVER_STATUS_HANDLE_IS_NULLPTR: + return "HIPSOLVER_STATUS_HANDLE_IS_NULLPTR"; + case HIPSOLVER_STATUS_INVALID_ENUM: + return "HIPSOLVER_STATUS_INVALID_ENUM"; + case HIPSOLVER_STATUS_UNKNOWN: + return "HIPSOLVER_STATUS_UNKNOWN"; + default: + return ""; + } } #define CHECK_CUDA(func) \ do \ - { \ - hipError_t status = (func); \ - if (status != hipSuccess) \ { \ - fprintf(stderr, "In File %s : HIP API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - hipGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + hipError_t status = (func); \ + if (status != hipSuccess) \ + { \ + fprintf (stderr, \ + "In File %s : HIP API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + hipGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #define CHECK_CUBLAS(func) \ do \ - { \ - hipblasStatus_t status = (func); \ - if (status != HIPBLAS_STATUS_SUCCESS) \ { \ - fprintf(stderr, "In File %s : HIPBLAS API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - _hipblasGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + hipblasStatus_t status = (func); \ + if (status != HIPBLAS_STATUS_SUCCESS) \ + { \ + fprintf (stderr, \ + "In File %s : HIPBLAS API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + _hipblasGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #define CHECK_CUSOLVER(func) \ do \ - { \ - hipsolverStatus_t status = (func); \ - if (status != HIPSOLVER_STATUS_SUCCESS) \ { \ - fprintf(stderr, "In File %s : HIPSOLVER API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - _hipsolverGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + hipsolverStatus_t status = (func); \ + if (status != HIPSOLVER_STATUS_SUCCESS) \ + { \ + fprintf (stderr, \ + "In File %s : HIPSOLVER API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + _hipsolverGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #define CHECK_CUFFT(func) \ do \ - { \ - hipfftResult_t status = (func); \ - if (status != HIPFFT_SUCCESS) \ { \ - fprintf(stderr, "In File %s : HIPFFT API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - _hipfftGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + hipfftResult_t status = (func); \ + if (status != HIPFFT_SUCCESS) \ + { \ + fprintf (stderr, \ + "In File %s : HIPFFT API failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + _hipfftGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #define CHECK_LAST_CUDA_ERROR(msg) \ do \ - { \ - hipError_t status = hipGetLastError(); \ - if (status != hipSuccess) \ { \ - fprintf(stderr, "%s(%d) : HIP error : %s : (%d) %s.\n", __FILE__, __LINE__, msg, \ - static_cast(status), hipGetErrorString(status)); \ - exit(EXIT_FAILURE); \ + hipError_t status = hipGetLastError (); \ + if (status != hipSuccess) \ + { \ + fprintf (stderr, \ + "%s(%d) : HIP error : %s : (%d) %s.\n", \ + __FILE__, \ + __LINE__, \ + msg, \ + static_cast (status), \ + hipGetErrorString (status)); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #ifdef __DEBUG #define CHECK_CUDA_SYNC() \ do \ - { \ - hipError_t status = hipDeviceSynchronize(); \ - if (status != hipSuccess) \ { \ - fprintf(stderr, "In File %s : HIP sync failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - hipGetErrorString(status), status); \ - exit(EXIT_FAILURE); \ + hipError_t status = hipDeviceSynchronize (); \ + if (status != hipSuccess) \ + { \ + fprintf (stderr, \ + "In File %s : HIP sync failed at line %d with error: %s (%d)\n", \ + __FILE__, \ + __LINE__, \ + hipGetErrorString (status), \ + status); \ + exit (EXIT_FAILURE); \ + } \ } \ - } while (0) + while (0) #else -#define CHECK_CUDA_SYNC() do {} while (0) +#define CHECK_CUDA_SYNC() \ + do \ + { \ + } \ + while (0) #endif #endif // __ROCM diff --git a/source/source_base/module_device/device_helpers.cpp b/source/source_base/module_device/device_helpers.cpp index 0b5d5a1693a..58e95fe8bac 100644 --- a/source/source_base/module_device/device_helpers.cpp +++ b/source/source_base/module_device/device_helpers.cpp @@ -5,25 +5,29 @@ namespace base_device // Precision specializations template <> -std::string get_current_precision(const float* var) +std::string + get_current_precision (const float* var) { return "single"; } template <> -std::string get_current_precision(const double* var) +std::string + get_current_precision (const double* var) { return "double"; } template <> -std::string get_current_precision>(const std::complex* var) +std::string + get_current_precision> (const std::complex* var) { return "single"; } template <> -std::string get_current_precision>(const std::complex* var) +std::string + get_current_precision> (const std::complex* var) { return "double"; } diff --git a/source/source_base/module_device/device_helpers.h b/source/source_base/module_device/device_helpers.h index 2870eea2d78..799c11d8c10 100644 --- a/source/source_base/module_device/device_helpers.h +++ b/source/source_base/module_device/device_helpers.h @@ -26,7 +26,7 @@ class DeviceContext; * @param ctx Pointer to DeviceContext * @return AbacusDevice_t enum value */ -inline AbacusDevice_t get_device_type(const DeviceContext* ctx); +inline AbacusDevice_t get_device_type (const DeviceContext* ctx); /** * @brief Get the device type enum for a given device type (compile-time version). @@ -35,12 +35,25 @@ inline AbacusDevice_t get_device_type(const DeviceContext* ctx); * @return AbacusDevice_t enum value */ template -AbacusDevice_t get_device_type(const Device* dev) +AbacusDevice_t + get_device_type (const Device* dev) { - if (std::is_same::value) return CpuDevice; - else if (std::is_same::value) return GpuDevice; - else if (std::is_same::value) return DspDevice; - else return UnKnown; + if (std::is_same::value) + { + return CpuDevice; + } + else if (std::is_same::value) + { + return GpuDevice; + } + else if (std::is_same::value) + { + return DspDevice; + } + else + { + return UnKnown; + } } /** @@ -50,20 +63,20 @@ AbacusDevice_t get_device_type(const Device* dev) * @return "single" or "double" */ template -std::string get_current_precision(const T* var); +std::string get_current_precision (const T* var); // Template specialization declarations template <> -std::string get_current_precision(const float* var); +std::string get_current_precision (const float* var); template <> -std::string get_current_precision(const double* var); +std::string get_current_precision (const double* var); template <> -std::string get_current_precision>(const std::complex* var); +std::string get_current_precision> (const std::complex* var); template <> -std::string get_current_precision>(const std::complex* var); +std::string get_current_precision> (const std::complex* var); } // end of namespace base_device diff --git a/source/source_base/module_device/gpu_runtime.h b/source/source_base/module_device/gpu_runtime.h index ce759c0d98c..dd7efc6b361 100644 --- a/source/source_base/module_device/gpu_runtime.h +++ b/source/source_base/module_device/gpu_runtime.h @@ -19,52 +19,52 @@ #include // Error handling -#define gpuError_t cudaError_t -#define gpuSuccess cudaSuccess -#define gpuGetErrorString cudaGetErrorString +#define gpuError_t cudaError_t +#define gpuSuccess cudaSuccess +#define gpuGetErrorString cudaGetErrorString // Device management -#define gpuGetDeviceCount cudaGetDeviceCount -#define gpuGetDevice cudaGetDevice -#define gpuSetDevice cudaSetDevice -#define gpuGetDeviceProperties cudaGetDeviceProperties -#define gpuDeviceProp_t cudaDeviceProp +#define gpuGetDeviceCount cudaGetDeviceCount +#define gpuGetDevice cudaGetDevice +#define gpuSetDevice cudaSetDevice +#define gpuGetDeviceProperties cudaGetDeviceProperties +#define gpuDeviceProp_t cudaDeviceProp // Version info -#define gpuDriverGetVersion cudaDriverGetVersion -#define gpuRuntimeGetVersion cudaRuntimeGetVersion +#define gpuDriverGetVersion cudaDriverGetVersion +#define gpuRuntimeGetVersion cudaRuntimeGetVersion // Peer access -#define gpuDeviceCanAccessPeer cudaDeviceCanAccessPeer +#define gpuDeviceCanAccessPeer cudaDeviceCanAccessPeer // Error check macro -#define gpuErrcheck CHECK_CUDA +#define gpuErrcheck CHECK_CUDA #elif defined(__ROCM) #include // Error handling -#define gpuError_t hipError_t -#define gpuSuccess hipSuccess -#define gpuGetErrorString hipGetErrorString +#define gpuError_t hipError_t +#define gpuSuccess hipSuccess +#define gpuGetErrorString hipGetErrorString // Device management -#define gpuGetDeviceCount hipGetDeviceCount -#define gpuGetDevice hipGetDevice -#define gpuSetDevice hipSetDevice -#define gpuGetDeviceProperties hipGetDeviceProperties -#define gpuDeviceProp_t hipDeviceProp_t +#define gpuGetDeviceCount hipGetDeviceCount +#define gpuGetDevice hipGetDevice +#define gpuSetDevice hipSetDevice +#define gpuGetDeviceProperties hipGetDeviceProperties +#define gpuDeviceProp_t hipDeviceProp_t // Version info -#define gpuDriverGetVersion hipDriverGetVersion -#define gpuRuntimeGetVersion hipRuntimeGetVersion +#define gpuDriverGetVersion hipDriverGetVersion +#define gpuRuntimeGetVersion hipRuntimeGetVersion // Peer access -#define gpuDeviceCanAccessPeer hipDeviceCanAccessPeer +#define gpuDeviceCanAccessPeer hipDeviceCanAccessPeer // Error check macro -#define gpuErrcheck CHECK_CUDA +#define gpuErrcheck CHECK_CUDA #endif // __CUDA / __ROCM diff --git a/source/source_base/module_device/kernel_compat.h b/source/source_base/module_device/kernel_compat.h index 13c0f11c613..eae7510afd6 100644 --- a/source/source_base/module_device/kernel_compat.h +++ b/source/source_base/module_device/kernel_compat.h @@ -16,17 +16,19 @@ // atomicAdd for double precision - required for CUDA architectures < 600 (pre-Pascal) #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 && !defined(__CUDA_ON_DCU) -static __inline__ __device__ double atomicAdd(double* address, double val) +static __inline__ __device__ double + atomicAdd (double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; do - { - assumed = old; - old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } while (assumed != old); - return __longlong_as_double(old); + { + assumed = old; + old = atomicCAS (address_as_ull, assumed, __double_as_longlong (val + __longlong_as_double (assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } + while (assumed != old); + return __longlong_as_double (old); } #endif diff --git a/source/source_base/module_device/memory_op.cpp b/source/source_base/module_device/memory_op.cpp index d31d29c422e..a40dfc6082f 100644 --- a/source/source_base/module_device/memory_op.cpp +++ b/source/source_base/module_device/memory_op.cpp @@ -17,110 +17,130 @@ namespace memory template struct resize_memory_op { - void operator()(FPTYPE*& arr, const size_t size, const char* record_in) + void + operator() (FPTYPE*& arr, const size_t size, const char* record_in) { if (arr != nullptr) - { - free(arr); - } - arr = (FPTYPE*)malloc(sizeof(FPTYPE) * size); + { + free (arr); + } + arr = (FPTYPE*)malloc (sizeof (FPTYPE) * size); std::string record_string; if (record_in != nullptr) - { - record_string = record_in; - } + { + record_string = record_in; + } else - { - record_string = "no_record"; - } + { + record_string = "no_record"; + } if (record_string != "no_record") - { - ModuleBase::Memory::record(record_string, sizeof(FPTYPE) * size); - } + { + ModuleBase::Memory::record (record_string, sizeof (FPTYPE) * size); + } } }; template struct set_memory_op { - void operator()(FPTYPE* arr, const int var, const size_t size) + void + operator() (FPTYPE* arr, const int var, const size_t size) { - ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { - int beg = 0, len = 0; - ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); - memset(arr + beg, var, sizeof(FPTYPE) * len); - }); + ModuleBase::OMP_PARALLEL ( + [&] (int num_thread, int thread_id) + { + int beg = 0, len = 0; + ModuleBase::BLOCK_TASK_DIST_1D (num_thread, + thread_id, + size, + (size_t)4096 / sizeof (FPTYPE), + beg, + len); + memset (arr + beg, var, sizeof (FPTYPE) * len); + }); } }; template struct set_memory_2d_op { - void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height) + void + operator() (FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height) { - for (size_t i = 0; i < height; i++){ - set_memory_op()(arr + i * pitch, var, width); - } + for (size_t i = 0; i < height; i++) + { + set_memory_op () (arr + i * pitch, var, width); + } } }; - template struct synchronize_memory_op { - void operator()(FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) + void + operator() (FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) { - ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { - int beg = 0, len = 0; - ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); - memcpy(arr_out + beg, arr_in + beg, sizeof(FPTYPE) * len); - }); + ModuleBase::OMP_PARALLEL ( + [&] (int num_thread, int thread_id) + { + int beg = 0, len = 0; + ModuleBase::BLOCK_TASK_DIST_1D (num_thread, + thread_id, + size, + (size_t)4096 / sizeof (FPTYPE), + beg, + len); + memcpy (arr_out + beg, arr_in + beg, sizeof (FPTYPE) * len); + }); } }; template struct synchronize_memory_2d_op { - void operator()(FPTYPE* arr_out, + void + operator() (FPTYPE* arr_out, const size_t dpitch, const FPTYPE* arr_in, const size_t spitch, const size_t width, const size_t height) { - for (int i = 0; i < height; i++){ - synchronize_memory_op()( - arr_out + i * dpitch, arr_in + i * spitch, width); - } + for (int i = 0; i < height; i++) + { + synchronize_memory_op () (arr_out + + i * dpitch, + arr_in + i * spitch, + width); + } } }; template struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int ii = 0; ii < size; ii++) - { - arr_out[ii] = static_cast(arr_in[ii]); - } + { + arr_out[ii] = static_cast (arr_in[ii]); + } } }; template struct delete_memory_op { - void operator()(FPTYPE* arr) + void + operator() (FPTYPE* arr) { - free(arr); + free (arr); } }; @@ -192,9 +212,8 @@ template struct delete_memory_op*, base_device::DEVICE_CPU> template struct resize_memory_op { - void operator()(FPTYPE*& arr, - const size_t size, - const char* record_in = nullptr) + void + operator() (FPTYPE*& arr, const size_t size, const char* record_in = nullptr) { } }; @@ -202,7 +221,8 @@ struct resize_memory_op template struct set_memory_op { - void operator()(FPTYPE* arr, const int var, const size_t size) + void + operator() (FPTYPE* arr, const int var, const size_t size) { } }; @@ -210,7 +230,8 @@ struct set_memory_op template struct set_memory_2d_op { - void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height) + void + operator() (FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height) { } }; @@ -218,9 +239,8 @@ struct set_memory_2d_op template struct synchronize_memory_op { - void operator()(FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) + void + operator() (FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) { } }; @@ -228,9 +248,8 @@ struct synchronize_memory_op struct synchronize_memory_op { - void operator()(FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) + void + operator() (FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) { } }; @@ -238,9 +257,8 @@ struct synchronize_memory_op struct synchronize_memory_op { - void operator()(FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) + void + operator() (FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size) { } }; @@ -248,7 +266,8 @@ struct synchronize_memory_op struct synchronize_memory_2d_op { - void operator()(FPTYPE* arr_out, + void + operator() (FPTYPE* arr_out, const size_t dpitch, const FPTYPE* arr_in, const size_t spitch, @@ -261,7 +280,8 @@ struct synchronize_memory_2d_op struct synchronize_memory_2d_op { - void operator()(FPTYPE* arr_out, + void + operator() (FPTYPE* arr_out, const size_t dpitch, const FPTYPE* arr_in, const size_t spitch, @@ -274,7 +294,8 @@ struct synchronize_memory_2d_op struct synchronize_memory_2d_op { - void operator()(FPTYPE* arr_out, + void + operator() (FPTYPE* arr_out, const size_t dpitch, const FPTYPE* arr_in, const size_t spitch, @@ -287,9 +308,8 @@ struct synchronize_memory_2d_op struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { } }; @@ -297,9 +317,8 @@ struct cast_memory_op struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { } }; @@ -307,9 +326,8 @@ struct cast_memory_op struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) { } }; @@ -317,7 +335,8 @@ struct cast_memory_op struct delete_memory_op { - void operator()(FPTYPE* arr) + void + operator() (FPTYPE* arr) { } }; @@ -447,7 +466,8 @@ namespace int g_dsp_cluster_id = 0; } -void set_dsp_cluster_id(int id) +void + set_dsp_cluster_id (int id) { g_dsp_cluster_id = id; } @@ -455,53 +475,62 @@ void set_dsp_cluster_id(int id) template struct resize_memory_op_mt { - void operator()(FPTYPE*& arr, const size_t size, const char* record_in) + void + operator() (FPTYPE*& arr, const size_t size, const char* record_in) { if (arr != nullptr) - { - mtfunc::free_ht(arr); - } - arr = (FPTYPE*)mtfunc::malloc_ht(sizeof(FPTYPE) * size, g_dsp_cluster_id); + { + mtfunc::free_ht (arr); + } + arr = (FPTYPE*)mtfunc::malloc_ht (sizeof (FPTYPE) * size, g_dsp_cluster_id); std::string record_string; if (record_in != nullptr) - { - record_string = record_in; - } + { + record_string = record_in; + } else - { - record_string = "no_record"; - } + { + record_string = "no_record"; + } if (record_string != "no_record") - { - ModuleBase::Memory::record(record_string, sizeof(FPTYPE) * size); - } + { + ModuleBase::Memory::record (record_string, sizeof (FPTYPE) * size); + } } }; template struct set_memory_op_mt { - void operator()(FPTYPE* arr, const int var, const size_t size) + void + operator() (FPTYPE* arr, const int var, const size_t size) { - ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { - int beg = 0, len = 0; - ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); - memset(arr + beg, var, sizeof(FPTYPE) * len); - }); + ModuleBase::OMP_PARALLEL ( + [&] (int num_thread, int thread_id) + { + int beg = 0, len = 0; + ModuleBase::BLOCK_TASK_DIST_1D (num_thread, + thread_id, + size, + (size_t)4096 / sizeof (FPTYPE), + beg, + len); + memset (arr + beg, var, sizeof (FPTYPE) * len); + }); } }; template struct delete_memory_op_mt { - void operator()(FPTYPE* arr) + void + operator() (FPTYPE* arr) { - mtfunc::free_ht(arr); + mtfunc::free_ht (arr); } }; - template struct resize_memory_op_mt; template struct resize_memory_op_mt; template struct resize_memory_op_mt; @@ -522,68 +551,105 @@ template struct delete_memory_op_mt, base_device::DEVICE_CP #endif template -void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type) +void + resize_memory (FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice){ - resize_memory_op()(arr, size); - } - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - resize_memory_op()(arr, size); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + resize_memory_op () (arr, size); + } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + resize_memory_op () (arr, size); + } } template -void set_memory(FPTYPE* arr, const int var, const size_t size, base_device::AbacusDevice_t device_type){ - if (device_type == base_device::AbacusDevice_t::CpuDevice){ - set_memory_op()(arr, var, size); - } - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - set_memory_op()(arr, var, size); - } +void + set_memory (FPTYPE* arr, const int var, const size_t size, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + set_memory_op () (arr, var, size); + } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + set_memory_op () (arr, var, size); + } } template -void synchronize_memory(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in){ - if (device_type_out == base_device::AbacusDevice_t::CpuDevice || device_type_in == base_device::AbacusDevice_t::CpuDevice){ - synchronize_memory_op()(arr_out, arr_in, size); - } - else if (device_type_out == base_device::AbacusDevice_t::CpuDevice || device_type_in == base_device::AbacusDevice_t::GpuDevice){ - synchronize_memory_op()(arr_out, arr_in, size); - } - else if (device_type_out == base_device::AbacusDevice_t::GpuDevice || device_type_in == base_device::AbacusDevice_t::CpuDevice){ - synchronize_memory_op()(arr_out, arr_in, size); - } - else if (device_type_out == base_device::AbacusDevice_t::GpuDevice || device_type_in == base_device::AbacusDevice_t::GpuDevice){ - synchronize_memory_op()(arr_out, arr_in, size); - } +void + synchronize_memory (FPTYPE* arr_out, + const FPTYPE* arr_in, + const size_t size, + base_device::AbacusDevice_t device_type_out, + base_device::AbacusDevice_t device_type_in) +{ + if (device_type_out == base_device::AbacusDevice_t::CpuDevice + || device_type_in == base_device::AbacusDevice_t::CpuDevice) + { + synchronize_memory_op () (arr_out, arr_in, size); + } + else if (device_type_out == base_device::AbacusDevice_t::CpuDevice + || device_type_in == base_device::AbacusDevice_t::GpuDevice) + { + synchronize_memory_op () (arr_out, arr_in, size); + } + else if (device_type_out == base_device::AbacusDevice_t::GpuDevice + || device_type_in == base_device::AbacusDevice_t::CpuDevice) + { + synchronize_memory_op () (arr_out, arr_in, size); + } + else if (device_type_out == base_device::AbacusDevice_t::GpuDevice + || device_type_in == base_device::AbacusDevice_t::GpuDevice) + { + synchronize_memory_op () (arr_out, arr_in, size); + } } template -void cast_memory(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in) +void + cast_memory (FPTYPE_out* arr_out, + const FPTYPE_in* arr_in, + const size_t size, + base_device::AbacusDevice_t device_type_out, + base_device::AbacusDevice_t device_type_in) { - if (device_type_out == base_device::AbacusDevice_t::CpuDevice || device_type_in == base_device::AbacusDevice_t::CpuDevice){ - cast_memory_op()(arr_out, arr_in, size); - } - else if (device_type_out == base_device::AbacusDevice_t::CpuDevice || device_type_in == base_device::AbacusDevice_t::GpuDevice){ - cast_memory_op()(arr_out, arr_in, size); - } - else if (device_type_out == base_device::AbacusDevice_t::GpuDevice || device_type_in == base_device::AbacusDevice_t::CpuDevice){ - cast_memory_op()(arr_out, arr_in, size); - } - else if (device_type_out == base_device::AbacusDevice_t::GpuDevice || device_type_in == base_device::AbacusDevice_t::GpuDevice){ - cast_memory_op()(arr_out, arr_in, size); - } + if (device_type_out == base_device::AbacusDevice_t::CpuDevice + || device_type_in == base_device::AbacusDevice_t::CpuDevice) + { + cast_memory_op () (arr_out, arr_in, size); + } + else if (device_type_out == base_device::AbacusDevice_t::CpuDevice + || device_type_in == base_device::AbacusDevice_t::GpuDevice) + { + cast_memory_op () (arr_out, arr_in, size); + } + else if (device_type_out == base_device::AbacusDevice_t::GpuDevice + || device_type_in == base_device::AbacusDevice_t::CpuDevice) + { + cast_memory_op () (arr_out, arr_in, size); + } + else if (device_type_out == base_device::AbacusDevice_t::GpuDevice + || device_type_in == base_device::AbacusDevice_t::GpuDevice) + { + cast_memory_op () (arr_out, arr_in, size); + } } template -void delete_memory(FPTYPE* arr, base_device::AbacusDevice_t device_type) +void + delete_memory (FPTYPE* arr, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice){ - delete_memory_op()(arr); - } - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - delete_memory_op()(arr); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + delete_memory_op () (arr); + } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + delete_memory_op () (arr); + } } } // namespace memory diff --git a/source/source_base/module_device/memory_op.h b/source/source_base/module_device/memory_op.h index d9e95c7adb0..b910f832e32 100644 --- a/source/source_base/module_device/memory_op.h +++ b/source/source_base/module_device/memory_op.h @@ -23,7 +23,7 @@ struct resize_memory_op /// /// Output Parameters /// \param arr : allocated array - void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); + void operator() (FPTYPE*& arr, const size_t size, const char* record_in = nullptr); }; template @@ -37,7 +37,7 @@ struct set_memory_op /// /// Output Parameters /// \param arr : output array initialized by the input value - void operator()(FPTYPE* arr, const int var, const size_t size); + void operator() (FPTYPE* arr, const int var, const size_t size); }; template @@ -53,7 +53,7 @@ struct set_memory_2d_op /// /// Output Parameters /// \param arr : output array initialized by the input value - void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height); + void operator() (FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height); }; template @@ -67,9 +67,7 @@ struct synchronize_memory_op /// /// Output Parameters /// \param arr_out : output array initialized by the input array - void operator()(FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size); + void operator() (FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size); }; template @@ -86,12 +84,12 @@ struct synchronize_memory_2d_op /// /// Output Parameters /// \param arr_out : output array initialized by the input array - void operator()(FPTYPE* arr_out, - const size_t dpitch, - const FPTYPE* arr_in, - const size_t spitch, - const size_t width, - const size_t height); + void operator() (FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height); }; template @@ -105,9 +103,7 @@ struct cast_memory_op /// /// Output Parameters /// \param arr_out : output array initialized by the input array - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size); + void operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size); }; template @@ -117,104 +113,108 @@ struct delete_memory_op /// /// Input Parameters /// \param arr : the input array - void operator()(FPTYPE* arr); + void operator() (FPTYPE* arr); }; template -void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); +void resize_memory (FPTYPE* arr, + const size_t size, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); template -void set_memory(FPTYPE* arr, const int var, const size_t size, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); +void set_memory (FPTYPE* arr, + const int var, + const size_t size, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); template -void synchronize_memory(FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in); +void synchronize_memory (FPTYPE* arr_out, + const FPTYPE* arr_in, + const size_t size, + base_device::AbacusDevice_t device_type_out, + base_device::AbacusDevice_t device_type_in); template -void cast_memory(FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size, base_device::AbacusDevice_t device_type_out, base_device::AbacusDevice_t device_type_in); +void cast_memory (FPTYPE_out* arr_out, + const FPTYPE_in* arr_in, + const size_t size, + base_device::AbacusDevice_t device_type_out, + base_device::AbacusDevice_t device_type_in); template -void delete_memory(FPTYPE* arr, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); +void delete_memory (FPTYPE* arr, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM // Partially specialize operator for base_device::GpuDevice. template struct resize_memory_op { - void operator()(FPTYPE*& arr, - const size_t size, - const char* record_in = nullptr); + void operator() (FPTYPE*& arr, const size_t size, const char* record_in = nullptr); }; template struct set_memory_op { - void operator()(FPTYPE* arr, const int var, const size_t size); + void operator() (FPTYPE* arr, const int var, const size_t size); }; template struct set_memory_2d_op { - void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height); + void operator() (FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height); }; template struct synchronize_memory_op { - void operator()(FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size); + void operator() (FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size); }; template struct synchronize_memory_op { - void operator()(FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size); + void operator() (FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size); }; template struct synchronize_memory_op { - void operator()(FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size); - + void operator() (FPTYPE* arr_out, const FPTYPE* arr_in, const size_t size); }; template struct synchronize_memory_2d_op { - void operator()(FPTYPE* arr_out, - const size_t dpitch, - const FPTYPE* arr_in, - const size_t spitch, - const size_t width, - const size_t height); + void operator() (FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height); }; template struct synchronize_memory_2d_op { - void operator()(FPTYPE* arr_out, - const size_t dpitch, - const FPTYPE* arr_in, - const size_t spitch, - const size_t width, - const size_t height); + void operator() (FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height); }; template struct synchronize_memory_2d_op { - void operator()(FPTYPE* arr_out, - const size_t dpitch, - const FPTYPE* arr_in, - const size_t spitch, - const size_t width, - const size_t height); + void operator() (FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height); }; template struct delete_memory_op { - void operator()(FPTYPE* arr); + void operator() (FPTYPE* arr); }; #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM @@ -223,7 +223,7 @@ struct delete_memory_op /// @brief Inject the DSP cluster id used by mt-allocator (mtfunc::malloc_ht). /// Caller-injected (typically once after input parameters are read). /// Defaults to 0 if never set. -void set_dsp_cluster_id(int id); +void set_dsp_cluster_id (int id); template struct resize_memory_op_mt @@ -236,7 +236,7 @@ struct resize_memory_op_mt /// /// Output Parameters /// \param arr : allocated array - void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); + void operator() (FPTYPE*& arr, const size_t size, const char* record_in = nullptr); }; template @@ -250,7 +250,7 @@ struct set_memory_op_mt /// /// Output Parameters /// \param arr : output array initialized by the input value - void operator()(FPTYPE* arr, const int var, const size_t size); + void operator() (FPTYPE* arr, const int var, const size_t size); }; template @@ -260,7 +260,7 @@ struct delete_memory_op_mt /// /// Input Parameters /// \param arr : the input array - void operator()(FPTYPE* arr); + void operator() (FPTYPE* arr); }; #endif // __DSP @@ -327,12 +327,12 @@ using syncmem_c2c_h2d_op = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; using syncmem_c2c_d2h_op = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -using syncmem_z2z_h2h_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -using syncmem_z2z_h2d_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -using syncmem_z2z_d2h_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +using syncmem_z2z_h2h_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +using syncmem_z2z_h2d_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +using syncmem_z2z_d2h_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; using syncmem_c2c_h2h_op = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; @@ -340,25 +340,25 @@ using syncmem_c2c_h2d_op = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; using syncmem_c2c_d2h_op = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -using syncmem_z2z_h2h_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -using syncmem_z2z_h2d_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -using syncmem_z2z_d2h_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; - -using syncmem_c2c_h2h_2d_op - = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -using syncmem_c2c_h2d_2d_op - = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -using syncmem_c2c_d2h_2d_op - = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; -using syncmem_z2z_h2h_2d_op - = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; -using syncmem_z2z_h2d_2d_op - = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; -using syncmem_z2z_d2h_2d_op - = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +using syncmem_z2z_h2h_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +using syncmem_z2z_h2d_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +using syncmem_z2z_d2h_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; + +using syncmem_c2c_h2h_2d_op = base_device::memory:: + synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +using syncmem_c2c_h2d_2d_op = base_device::memory:: + synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +using syncmem_c2c_d2h_2d_op = base_device::memory:: + synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +using syncmem_z2z_h2h_2d_op = base_device::memory:: + synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +using syncmem_z2z_h2d_2d_op = base_device::memory:: + synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +using syncmem_z2z_d2h_2d_op = base_device::memory:: + synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; using castmem_s2d_h2h_op = base_device::memory::cast_memory_op; diff --git a/source/source_base/module_device/output_device.cpp b/source/source_base/module_device/output_device.cpp index 3d4587d4a99..920bd2ea35d 100644 --- a/source/source_base/module_device/output_device.cpp +++ b/source/source_base/module_device/output_device.cpp @@ -20,148 +20,164 @@ namespace base_device { namespace information { -std::string get_device_name(std::string device_flag) { - std::string device_info = "Unknown"; +std::string + get_device_name (std::string device_flag) +{ + std::string device_info = "Unknown"; #if defined(__CUDA) || defined(__ROCM) - if (device_flag == "gpu") { - int dev = 0; - gpuDeviceProp_t deviceProp; - gpuErrcheck(gpuGetDeviceProperties(&deviceProp, dev)); - device_info = deviceProp.name; - } + if (device_flag == "gpu") + { + int dev = 0; + gpuDeviceProp_t deviceProp; + gpuErrcheck (gpuGetDeviceProperties (&deviceProp, dev)); + device_info = deviceProp.name; + } #endif - if (device_flag == "cpu") { - std::ifstream cpuinfo("/proc/cpuinfo"); - std::string line = "", cpu_name = ""; - - while (std::getline(cpuinfo, line)) { - if (line.find("model name") != std::string::npos) { - // Extract the CPU name from the line - size_t colonPos = line.find(":"); - if (colonPos != std::string::npos) { - cpu_name = line.substr(colonPos + 2); // Skip the colon and space - break; // Stop after the first match + if (device_flag == "cpu") + { + std::ifstream cpuinfo ("/proc/cpuinfo"); + std::string line = "", cpu_name = ""; + + while (std::getline (cpuinfo, line)) + { + if (line.find ("model name") != std::string::npos) + { + // Extract the CPU name from the line + size_t colonPos = line.find (":"); + if (colonPos != std::string::npos) + { + cpu_name = line.substr (colonPos + 2); // Skip the colon and space + break; // Stop after the first match + } + } + } + if (cpu_name != "") + { + device_info = cpu_name; + } + cpuinfo.close (); } - } - } - if (cpu_name != "") { - device_info = cpu_name; - } - cpuinfo.close(); - } - return device_info; + return device_info; } -int get_device_num(std::string device_flag) +int + get_device_num (std::string device_flag) { - if (device_flag == "gpu") { - int count = 0; - #if defined(__CUDA) || defined(__ROCM) - gpuErrcheck(gpuGetDeviceCount(&count)); - #endif - return count; - } - if(device_flag == "cpu") - { - std::ifstream file("/proc/cpuinfo"); - if (!file.is_open()) { - return 1; // fallback to 1 if cannot read - } - - std::string line; - std::set physical_ids; // Use set to avoid duplicates - - while (std::getline(file, line)) { - if (line.substr(0, 11) == "physical id") { - size_t pos = line.find(':'); - if (pos != std::string::npos) { - std::string value = line.substr(pos + 1); - std::stringstream ss(value); - int socket_id; - if (ss >> socket_id) { - physical_ids.insert(socket_id); + if (device_flag == "gpu") + { + int count = 0; +#if defined(__CUDA) || defined(__ROCM) + gpuErrcheck (gpuGetDeviceCount (&count)); +#endif + return count; + } + if (device_flag == "cpu") + { + std::ifstream file ("/proc/cpuinfo"); + if (!file.is_open ()) + { + return 1; // fallback to 1 if cannot read + } + + std::string line; + std::set physical_ids; // Use set to avoid duplicates + + while (std::getline (file, line)) + { + if (line.substr (0, 11) == "physical id") + { + size_t pos = line.find (':'); + if (pos != std::string::npos) + { + std::string value = line.substr (pos + 1); + std::stringstream ss (value); + int socket_id; + if (ss >> socket_id) + { + physical_ids.insert (socket_id); + } + } + } } - } + file.close (); + return (physical_ids.size () > 0) ? static_cast (physical_ids.size ()) : 1; } - } - file.close(); - return (physical_ids.size() > 0) ? static_cast(physical_ids.size()) : 1; - } - return 0; + return 0; } -void output_device_info(std::ostream &output, const std::string& device) +void + output_device_info (std::ostream& output, const std::string& device) { #ifdef __MPI int world_rank, world_size; - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank (MPI_COMM_WORLD, &world_rank); + MPI_Comm_size (MPI_COMM_WORLD, &world_size); // rank in the node - int local_rank = get_node_rank_with_mpi_shared(MPI_COMM_WORLD); + int local_rank = get_node_rank_with_mpi_shared (MPI_COMM_WORLD); // Get local hardware info int local_gpu_count = 0; - #if defined(__CUDA) || defined(__ROCM) - if(device == "gpu" && local_rank == 0) - { - local_gpu_count = get_device_num("gpu"); - } - #endif - int local_cpu_sockets = local_rank == 0 ? get_device_num("cpu") : 0; +#if defined(__CUDA) || defined(__ROCM) + if (device == "gpu" && local_rank == 0) + { + local_gpu_count = get_device_num ("gpu"); + } +#endif + int local_cpu_sockets = local_rank == 0 ? get_device_num ("cpu") : 0; // Prepare vectors to gather data from all ranks - std::vector all_gpu_counts(world_size); - std::vector all_cpu_sockets(world_size); + std::vector all_gpu_counts (world_size); + std::vector all_cpu_sockets (world_size); // Gather GPU and CPU socket counts from all MPI ranks - MPI_Gather(&local_gpu_count, 1, MPI_INT, all_gpu_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Gather(&local_cpu_sockets, 1, MPI_INT, all_cpu_sockets.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gather (&local_gpu_count, 1, MPI_INT, all_gpu_counts.data (), 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gather (&local_cpu_sockets, 1, MPI_INT, all_cpu_sockets.data (), 1, MPI_INT, 0, MPI_COMM_WORLD); // Only rank 0 prints the full summary - if (world_rank == 0) { - int total_gpus = std::accumulate(all_gpu_counts.begin(), all_gpu_counts.end(), 0); - int total_cpus = std::accumulate(all_cpu_sockets.begin(), all_cpu_sockets.end(), 0); - - // Get device model names (from rank 0 node) - std::string cpu_name = get_device_name("cpu"); - std::string gpu_name; - #if defined(__CUDA) || defined(__ROCM) - if(device == "gpu" && total_gpus > 0) + if (world_rank == 0) { - gpu_name = get_device_name("gpu"); - } - #endif + int total_gpus = std::accumulate (all_gpu_counts.begin (), all_gpu_counts.end (), 0); + int total_cpus = std::accumulate (all_cpu_sockets.begin (), all_cpu_sockets.end (), 0); - // Output all collected information - output << " RUNNING WITH DEVICE : " << "CPU" << " / " - << cpu_name << " (x" << total_cpus << ")" << std::endl; - #if defined(__CUDA) || defined(__ROCM) - if(device == "gpu" && total_gpus > 0) - { - output << " " << "GPU" << " / " - << gpu_name << " (x" << total_gpus << ")" << std::endl; + // Get device model names (from rank 0 node) + std::string cpu_name = get_device_name ("cpu"); + std::string gpu_name; +#if defined(__CUDA) || defined(__ROCM) + if (device == "gpu" && total_gpus > 0) + { + gpu_name = get_device_name ("gpu"); + } +#endif + + // Output all collected information + output << " RUNNING WITH DEVICE : " << "CPU" << " / " << cpu_name << " (x" << total_cpus << ")" + << std::endl; +#if defined(__CUDA) || defined(__ROCM) + if (device == "gpu" && total_gpus > 0) + { + output << " " << "GPU" << " / " << gpu_name << " (x" << total_gpus << ")" + << std::endl; + } +#endif } - #endif - } #else - int cpu_sockets = get_device_num("cpu"); - std::string cpu_name = get_device_name("cpu"); - output << " RUNNING WITH DEVICE : " << "CPU" << " / " - << cpu_name << " (x" << cpu_sockets << ")" << std::endl; - #if defined(__CUDA) || defined(__ROCM) - if(device == "gpu") + int cpu_sockets = get_device_num ("cpu"); + std::string cpu_name = get_device_name ("cpu"); + output << " RUNNING WITH DEVICE : " << "CPU" << " / " << cpu_name << " (x" << cpu_sockets << ")" << std::endl; +#if defined(__CUDA) || defined(__ROCM) + if (device == "gpu") { - int gpu_count = get_device_num("gpu"); - if(gpu_count > 0) - { - std::string gpu_name = get_device_name("gpu"); - output << " " << "GPU" << " / " - << gpu_name << " (x" << gpu_count << ")" << std::endl; - } + int gpu_count = get_device_num ("gpu"); + if (gpu_count > 0) + { + std::string gpu_name = get_device_name ("gpu"); + output << " " << "GPU" << " / " << gpu_name << " (x" << gpu_count << ")" + << std::endl; + } } - #endif +#endif #endif } @@ -170,273 +186,288 @@ void output_device_info(std::ostream &output, const std::string& device) static bool is_init = false; template <> -void print_device_info( - const base_device::DEVICE_GPU *ctx, std::ofstream &ofs_device) { - if (is_init) { - return; - } - int deviceCount = 0; - gpuError_t error_id = gpuGetDeviceCount(&deviceCount); - if (error_id != gpuSuccess) { - ofs_device << "gpuGetDeviceCount returned " << static_cast(error_id) - << "\n-> " << gpuGetErrorString(error_id) << std::endl; - ModuleBase::WARNING_QUIT("device", "GPU returned is without gpuSuccess"); - } - // This function call returns 0 if there are no GPU capable devices. - if (deviceCount == 0) { - ofs_device << "There are no available device(s) that support GPU\n"; - } else { - ofs_device << "Detected " << deviceCount << " GPU Capable device(s)\n"; - } - int dev = 0, driverVersion = 0, runtimeVersion = 0; - gpuErrcheck(gpuGetDevice(&dev)); - gpuDeviceProp_t deviceProp; - gpuErrcheck(gpuGetDeviceProperties(&deviceProp, dev)); - ofs_device << "\nDevice " << dev << ":\t " << deviceProp.name << std::endl; - // Console log - gpuErrcheck(gpuDriverGetVersion(&driverVersion)); - gpuErrcheck(gpuRuntimeGetVersion(&runtimeVersion)); - char msg[1024]; - sprintf(msg, - " GPU Driver Version / Runtime Version %d.%d / %d.%d\n", - driverVersion / 1000, (driverVersion % 100) / 10, - runtimeVersion / 1000, (runtimeVersion % 100) / 10); - ofs_device << msg << std::endl; - sprintf(msg, " GPU Capability Major/Minor version number: %d.%d\n", - deviceProp.major, deviceProp.minor); - ofs_device << msg << std::endl; +void + print_device_info (const base_device::DEVICE_GPU* ctx, std::ofstream& ofs_device) +{ + if (is_init) + { + return; + } + int deviceCount = 0; + gpuError_t error_id = gpuGetDeviceCount (&deviceCount); + if (error_id != gpuSuccess) + { + ofs_device << "gpuGetDeviceCount returned " << static_cast (error_id) << "\n-> " + << gpuGetErrorString (error_id) << std::endl; + ModuleBase::WARNING_QUIT ("device", "GPU returned is without gpuSuccess"); + } + // This function call returns 0 if there are no GPU capable devices. + if (deviceCount == 0) + { + ofs_device << "There are no available device(s) that support GPU\n"; + } + else + { + ofs_device << "Detected " << deviceCount << " GPU Capable device(s)\n"; + } + int dev = 0, driverVersion = 0, runtimeVersion = 0; + gpuErrcheck (gpuGetDevice (&dev)); + gpuDeviceProp_t deviceProp; + gpuErrcheck (gpuGetDeviceProperties (&deviceProp, dev)); + ofs_device << "\nDevice " << dev << ":\t " << deviceProp.name << std::endl; + // Console log + gpuErrcheck (gpuDriverGetVersion (&driverVersion)); + gpuErrcheck (gpuRuntimeGetVersion (&runtimeVersion)); + char msg[1024]; + sprintf (msg, + " GPU Driver Version / Runtime Version %d.%d / %d.%d\n", + driverVersion / 1000, + (driverVersion % 100) / 10, + runtimeVersion / 1000, + (runtimeVersion % 100) / 10); + ofs_device << msg << std::endl; + sprintf (msg, " GPU Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor); + ofs_device << msg << std::endl; #if defined(__ROCM) - // ROCm-specific: clock rates - sprintf(msg, - " GPU Max Clock rate: %.0f MHz (%0.2f " - "GHz)\n", - deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); - ofs_device << msg << std::endl; - sprintf(msg, " Memory Clock rate: %.0f Mhz\n", - deviceProp.memoryClockRate * 1e-3f); - ofs_device << msg << std::endl; - sprintf(msg, " Memory Bus Width: %d-bit\n", - deviceProp.memoryBusWidth); - ofs_device << msg << std::endl; + // ROCm-specific: clock rates + sprintf (msg, + " GPU Max Clock rate: %.0f MHz (%0.2f " + "GHz)\n", + deviceProp.clockRate * 1e-3f, + deviceProp.clockRate * 1e-6f); + ofs_device << msg << std::endl; + sprintf (msg, " Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f); + ofs_device << msg << std::endl; + sprintf (msg, " Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth); + ofs_device << msg << std::endl; #endif - // Common properties - sprintf(msg, - " Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, " - "%d), 3D=(%d, %d, %d)\n", - deviceProp.maxTexture1D, deviceProp.maxTexture2D[0], - deviceProp.maxTexture2D[1], deviceProp.maxTexture3D[0], - deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]); - ofs_device << msg << std::endl; + // Common properties + sprintf (msg, + " Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, " + "%d), 3D=(%d, %d, %d)\n", + deviceProp.maxTexture1D, + deviceProp.maxTexture2D[0], + deviceProp.maxTexture2D[1], + deviceProp.maxTexture3D[0], + deviceProp.maxTexture3D[1], + deviceProp.maxTexture3D[2]); + ofs_device << msg << std::endl; #if defined(__CUDA) - // CUDA-specific: layered textures - sprintf( - msg, - " Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n", - deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]); - ofs_device << msg << std::endl; - sprintf(msg, - " Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d " - "layers\n", - deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], - deviceProp.maxTexture2DLayered[2]); - ofs_device << msg << std::endl; + // CUDA-specific: layered textures + sprintf (msg, + " Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n", + deviceProp.maxTexture1DLayered[0], + deviceProp.maxTexture1DLayered[1]); + ofs_device << msg << std::endl; + sprintf (msg, + " Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d " + "layers\n", + deviceProp.maxTexture2DLayered[0], + deviceProp.maxTexture2DLayered[1], + deviceProp.maxTexture2DLayered[2]); + ofs_device << msg << std::endl; #endif - sprintf(msg, " Total amount of constant memory: %zu bytes\n", - deviceProp.totalConstMem); - ofs_device << msg << std::endl; - sprintf(msg, " Total amount of shared memory per block: %zu bytes\n", - deviceProp.sharedMemPerBlock); - ofs_device << msg << std::endl; + sprintf (msg, " Total amount of constant memory: %zu bytes\n", deviceProp.totalConstMem); + ofs_device << msg << std::endl; + sprintf (msg, " Total amount of shared memory per block: %zu bytes\n", deviceProp.sharedMemPerBlock); + ofs_device << msg << std::endl; #if defined(__CUDA) - sprintf(msg, " Total shared memory per multiprocessor: %zu bytes\n", - deviceProp.sharedMemPerMultiprocessor); - ofs_device << msg << std::endl; + sprintf (msg, + " Total shared memory per multiprocessor: %zu bytes\n", + deviceProp.sharedMemPerMultiprocessor); + ofs_device << msg << std::endl; #endif - sprintf(msg, " Total number of registers available per block: %d\n", - deviceProp.regsPerBlock); - ofs_device << msg << std::endl; - sprintf(msg, " Warp size: %d\n", - deviceProp.warpSize); - ofs_device << msg << std::endl; - sprintf(msg, " Maximum number of threads per multiprocessor: %d\n", - deviceProp.maxThreadsPerMultiProcessor); - ofs_device << msg << std::endl; - sprintf(msg, " Maximum number of threads per block: %d\n", - deviceProp.maxThreadsPerBlock); - ofs_device << msg << std::endl; - sprintf(msg, " Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", - deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], - deviceProp.maxThreadsDim[2]); - ofs_device << msg << std::endl; - sprintf(msg, " Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", - deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], - deviceProp.maxGridSize[2]); - ofs_device << msg << std::endl; - sprintf(msg, " Maximum memory pitch: %zu bytes\n", - deviceProp.memPitch); - ofs_device << msg << std::endl; - sprintf(msg, " Texture alignment: %zu bytes\n", - deviceProp.textureAlignment); - ofs_device << msg << std::endl; + sprintf (msg, " Total number of registers available per block: %d\n", deviceProp.regsPerBlock); + ofs_device << msg << std::endl; + sprintf (msg, " Warp size: %d\n", deviceProp.warpSize); + ofs_device << msg << std::endl; + sprintf (msg, " Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor); + ofs_device << msg << std::endl; + sprintf (msg, " Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock); + ofs_device << msg << std::endl; + sprintf (msg, + " Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", + deviceProp.maxThreadsDim[0], + deviceProp.maxThreadsDim[1], + deviceProp.maxThreadsDim[2]); + ofs_device << msg << std::endl; + sprintf (msg, + " Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", + deviceProp.maxGridSize[0], + deviceProp.maxGridSize[1], + deviceProp.maxGridSize[2]); + ofs_device << msg << std::endl; + sprintf (msg, " Maximum memory pitch: %zu bytes\n", deviceProp.memPitch); + ofs_device << msg << std::endl; + sprintf (msg, " Texture alignment: %zu bytes\n", deviceProp.textureAlignment); + ofs_device << msg << std::endl; #if defined(__ROCM) - sprintf(msg, " Run time limit on kernels: %s\n", - deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, + " Run time limit on kernels: %s\n", + deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); + ofs_device << msg << std::endl; #endif - sprintf(msg, " Integrated GPU sharing Host Memory: %s\n", - deviceProp.integrated ? "Yes" : "No"); - ofs_device << msg << std::endl; - sprintf(msg, " Support host page-locked memory mapping: %s\n", - deviceProp.canMapHostMemory ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, " Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No"); + ofs_device << msg << std::endl; + sprintf (msg, " Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No"); + ofs_device << msg << std::endl; #if defined(__CUDA) - sprintf(msg, " Alignment requirement for Surfaces: %s\n", - deviceProp.surfaceAlignment ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, " Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No"); + ofs_device << msg << std::endl; #endif - sprintf(msg, " Device has ECC support: %s\n", - deviceProp.ECCEnabled ? "Enabled" : "Disabled"); - ofs_device << msg << std::endl; + sprintf (msg, + " Device has ECC support: %s\n", + deviceProp.ECCEnabled ? "Enabled" : "Disabled"); + ofs_device << msg << std::endl; #if defined(__CUDA) - sprintf(msg, " Device supports Unified Addressing (UVA): %s\n", - deviceProp.unifiedAddressing ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, " Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No"); + ofs_device << msg << std::endl; #endif - sprintf(msg, " Device supports Managed Memory: %s\n", - deviceProp.managedMemory ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, " Device supports Managed Memory: %s\n", deviceProp.managedMemory ? "Yes" : "No"); + ofs_device << msg << std::endl; #if defined(__CUDA) - sprintf(msg, " Device supports Compute Preemption: %s\n", - deviceProp.computePreemptionSupported ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, + " Device supports Compute Preemption: %s\n", + deviceProp.computePreemptionSupported ? "Yes" : "No"); + ofs_device << msg << std::endl; #endif - sprintf(msg, " Supports Cooperative Kernel Launch: %s\n", - deviceProp.cooperativeLaunch ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, " Supports Cooperative Kernel Launch: %s\n", deviceProp.cooperativeLaunch ? "Yes" : "No"); + ofs_device << msg << std::endl; #if defined(__ROCM) - sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n", - deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); - ofs_device << msg << std::endl; + sprintf (msg, + " Supports MultiDevice Co-op Kernel Launch: %s\n", + deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); + ofs_device << msg << std::endl; #endif - sprintf(msg, - " Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", - deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID); - ofs_device << msg << std::endl; + sprintf (msg, + " Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", + deviceProp.pciDomainID, + deviceProp.pciBusID, + deviceProp.pciDeviceID); + ofs_device << msg << std::endl; #if defined(__CUDA) - ModuleBase::cuda_compat::printDeprecatedDeviceInfo(ofs_device, deviceProp); - ModuleBase::cuda_compat::printComputeModeInfo(ofs_device, deviceProp); + ModuleBase::cuda_compat::printDeprecatedDeviceInfo (ofs_device, deviceProp); + ModuleBase::cuda_compat::printComputeModeInfo (ofs_device, deviceProp); #elif defined(__ROCM) - const char *sComputeMode[] = { - "Default (multiple host threads can use ::gpuSetDevice() with device " - "simultaneously)", - "Exclusive (only one host thread in one process is able to use " - "::gpuSetDevice() with this device)", - "Prohibited (no host thread can use ::gpuSetDevice() with this " - "device)", - "Exclusive Process (many threads in one process is able to use " - "::gpuSetDevice() with this device)", - "Unknown", - NULL}; - sprintf(msg, " Compute Mode:\n"); - ofs_device << msg << std::endl; - ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl - << std::endl; + const char* sComputeMode[] = {"Default (multiple host threads can use ::gpuSetDevice() with device " + "simultaneously)", + "Exclusive (only one host thread in one process is able to use " + "::gpuSetDevice() with this device)", + "Prohibited (no host thread can use ::gpuSetDevice() with this " + "device)", + "Exclusive Process (many threads in one process is able to use " + "::gpuSetDevice() with this device)", + "Unknown", + NULL}; + sprintf (msg, " Compute Mode:\n"); + ofs_device << msg << std::endl; + ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl << std::endl; #endif - // If there are 2 or more GPUs, query to determine whether RDMA is supported - if (deviceCount >= 2) { - gpuDeviceProp_t prop[64]; - int gpuid[64]; // we want to find the first two GPUs that can support P2P - int gpu_p2p_count = 0; - - for (int i = 0; i < deviceCount; i++) { - gpuErrcheck(gpuGetDeviceProperties(&prop[i], i)); - - // Only boards based on Fermi or later can support P2P - if (prop[i].major >= 2) { - // This is an array of P2P capable GPUs - gpuid[gpu_p2p_count++] = i; - } - } - - // Show all the combinations of support P2P GPUs - int can_access_peer; - - if (gpu_p2p_count >= 2) { - for (int i = 0; i < gpu_p2p_count; i++) { - for (int j = 0; j < gpu_p2p_count; j++) { - if (gpuid[i] == gpuid[j]) { - continue; - } - gpuErrcheck( - gpuDeviceCanAccessPeer(&can_access_peer, gpuid[i], gpuid[j])); - sprintf(msg, "> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", - prop[gpuid[i]].name, gpuid[i], prop[gpuid[j]].name, gpuid[j], - can_access_peer ? "Yes" : "No"); - ofs_device << msg << std::endl; + // If there are 2 or more GPUs, query to determine whether RDMA is supported + if (deviceCount >= 2) + { + gpuDeviceProp_t prop[64]; + int gpuid[64]; // we want to find the first two GPUs that can support P2P + int gpu_p2p_count = 0; + + for (int i = 0; i < deviceCount; i++) + { + gpuErrcheck (gpuGetDeviceProperties (&prop[i], i)); + + // Only boards based on Fermi or later can support P2P + if (prop[i].major >= 2) + { + // This is an array of P2P capable GPUs + gpuid[gpu_p2p_count++] = i; + } + } + + // Show all the combinations of support P2P GPUs + int can_access_peer; + + if (gpu_p2p_count >= 2) + { + for (int i = 0; i < gpu_p2p_count; i++) + { + for (int j = 0; j < gpu_p2p_count; j++) + { + if (gpuid[i] == gpuid[j]) + { + continue; + } + gpuErrcheck (gpuDeviceCanAccessPeer (&can_access_peer, gpuid[i], gpuid[j])); + sprintf (msg, + "> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", + prop[gpuid[i]].name, + gpuid[i], + prop[gpuid[j]].name, + gpuid[j], + can_access_peer ? "Yes" : "No"); + ofs_device << msg << std::endl; + } + } + } } - } - } - } - - // csv masterlog info - // ***************************** - // exe and GPU driver name - std::string sProfileString = "deviceQuery, GPU Driver = GPURT"; - char cTemp[16]; - - // driver version - sProfileString += ", GPU Driver Version = "; - - snprintf(cTemp, sizeof(cTemp), "%d.%d", driverVersion / 1000, - (driverVersion % 100) / 10); - sProfileString += cTemp; - - // Runtime version - sProfileString += ", GPU Runtime Version = "; - snprintf(cTemp, sizeof(cTemp), "%d.%d", runtimeVersion / 1000, - (runtimeVersion % 100) / 10); - sProfileString += cTemp; - - // Device count - sProfileString += ", NumDevs = "; - snprintf(cTemp, sizeof(cTemp), "%d", deviceCount); - sProfileString += cTemp; - sProfileString += "\n"; - - ofs_device << sProfileString.c_str() << std::endl; - is_init = true; - ofs_device << "End of device informations." << std::endl << std::endl; + + // csv masterlog info + // ***************************** + // exe and GPU driver name + std::string sProfileString = "deviceQuery, GPU Driver = GPURT"; + char cTemp[16]; + + // driver version + sProfileString += ", GPU Driver Version = "; + + snprintf (cTemp, sizeof (cTemp), "%d.%d", driverVersion / 1000, (driverVersion % 100) / 10); + sProfileString += cTemp; + + // Runtime version + sProfileString += ", GPU Runtime Version = "; + snprintf (cTemp, sizeof (cTemp), "%d.%d", runtimeVersion / 1000, (runtimeVersion % 100) / 10); + sProfileString += cTemp; + + // Device count + sProfileString += ", NumDevs = "; + snprintf (cTemp, sizeof (cTemp), "%d", deviceCount); + sProfileString += cTemp; + sProfileString += "\n"; + + ofs_device << sProfileString.c_str () << std::endl; + is_init = true; + ofs_device << "End of device informations." << std::endl << std::endl; } template <> -void record_device_memory( - const base_device::DEVICE_GPU *ctx, std::ofstream &ofs_device, - std::string str, size_t size) { - ofs_device << "Allocate " << static_cast(size) / 8 / 1024 / 1024 - << " \tMB device memory\t" - << "from " << str << std::endl - << std::endl; +void + record_device_memory (const base_device::DEVICE_GPU* ctx, + std::ofstream& ofs_device, + std::string str, + size_t size) +{ + ofs_device << "Allocate " << static_cast (size) / 8 / 1024 / 1024 << " \tMB device memory\t" + << "from " << str << std::endl + << std::endl; } #endif // defined(__CUDA) || defined(__ROCM) -} -} +} // namespace information +} // namespace base_device diff --git a/source/source_base/module_device/rocm/memory_op.hip.cu b/source/source_base/module_device/rocm/memory_op.hip.cu index 56eb6f06b70..9df8ad7307b 100644 --- a/source/source_base/module_device/rocm/memory_op.hip.cu +++ b/source/source_base/module_device/rocm/memory_op.hip.cu @@ -15,142 +15,160 @@ namespace memory { template -__global__ void cast_memory(FPTYPE_out* out, const FPTYPE_in* in, const int size) +__global__ void + cast_memory (FPTYPE_out* out, const FPTYPE_in* in, const int size) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= size) - { - return; - } - out[idx] = static_cast(in[idx]); + { + return; + } + out[idx] = static_cast (in[idx]); } template -__global__ void cast_memory(std::complex* out, const std::complex* in, const int size) +__global__ void + cast_memory (std::complex* out, const std::complex* in, const int size) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= size) - { - return; - } - auto* _out = reinterpret_cast*>(out); - const auto* _in = reinterpret_cast*>(in); - _out[idx] = static_cast>(_in[idx]); + { + return; + } + auto* _out = reinterpret_cast*> (out); + const auto* _in = reinterpret_cast*> (in); + _out[idx] = static_cast> (_in[idx]); } template -void resize_memory_op::operator()(FPTYPE*& arr, +void + resize_memory_op::operator() (FPTYPE*& arr, const size_t size, const char* record_in) { if (arr != nullptr) - { - delete_memory_op()(arr); - } - hipErrcheck(hipMalloc((void**)&arr, sizeof(FPTYPE) * size)); + { + delete_memory_op () (arr); + } + hipErrcheck (hipMalloc ((void**)&arr, sizeof (FPTYPE) * size)); } template -void set_memory_op::operator()(FPTYPE* arr, - const int var, - const size_t size) +void + set_memory_op::operator() (FPTYPE* arr, const int var, const size_t size) { - hipErrcheck(hipMemset(arr, var, sizeof(FPTYPE) * size)); + hipErrcheck (hipMemset (arr, var, sizeof (FPTYPE) * size)); } template -void synchronize_memory_op::operator()( - FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) +void + synchronize_memory_op::operator() (FPTYPE* arr_out, + const FPTYPE* arr_in, + const size_t size) { - hipErrcheck(hipMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, hipMemcpyDeviceToHost)); + hipErrcheck (hipMemcpy (arr_out, arr_in, sizeof (FPTYPE) * size, hipMemcpyDeviceToHost)); } template -void synchronize_memory_op::operator()( - FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) +void + synchronize_memory_op::operator() (FPTYPE* arr_out, + const FPTYPE* arr_in, + const size_t size) { - hipErrcheck(hipMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, hipMemcpyHostToDevice)); + hipErrcheck (hipMemcpy (arr_out, arr_in, sizeof (FPTYPE) * size, hipMemcpyHostToDevice)); } template -void synchronize_memory_op::operator()( - FPTYPE* arr_out, - const FPTYPE* arr_in, - const size_t size) +void + synchronize_memory_op::operator() (FPTYPE* arr_out, + const FPTYPE* arr_in, + const size_t size) { - hipErrcheck(hipMemcpy(arr_out, arr_in, sizeof(FPTYPE) * size, hipMemcpyDeviceToDevice)); + hipErrcheck (hipMemcpy (arr_out, arr_in, sizeof (FPTYPE) * size, hipMemcpyDeviceToDevice)); } template -struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) { +struct cast_memory_op +{ + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) + { - if (size == 0) {return;} + if (size == 0) + { + return; + } const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(cast_memory, dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, arr_out, arr_in, size); - hipCheckOnDebug(); + hipLaunchKernelGGL (cast_memory, dim3 (block), dim3 (THREADS_PER_BLOCK), 0, 0, arr_out, arr_in, size); + hipCheckOnDebug (); } }; template -struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) { +struct cast_memory_op +{ + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) + { - if (size == 0) {return;} + if (size == 0) + { + return; + } // No need to cast the memory if the data types are the same. if (std::is_same::value) - { - synchronize_memory_op()(arr_out, - reinterpret_cast(arr_in), - size); - return; - } - FPTYPE_in * arr = nullptr; - hipErrcheck(hipMalloc((void **)&arr, sizeof(FPTYPE_in) * size)); - hipErrcheck(hipMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, hipMemcpyHostToDevice)); + { + synchronize_memory_op () ( + arr_out, + reinterpret_cast (arr_in), + size); + return; + } + FPTYPE_in* arr = nullptr; + hipErrcheck (hipMalloc ((void**)&arr, sizeof (FPTYPE_in) * size)); + hipErrcheck (hipMemcpy (arr, arr_in, sizeof (FPTYPE_in) * size, hipMemcpyHostToDevice)); const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(cast_memory, dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, arr_out, arr, size); - hipCheckOnDebug(); - hipErrcheck(hipFree(arr)); + hipLaunchKernelGGL (cast_memory, dim3 (block), dim3 (THREADS_PER_BLOCK), 0, 0, arr_out, arr, size); + hipCheckOnDebug (); + hipErrcheck (hipFree (arr)); } }; template -struct cast_memory_op { - void operator()(FPTYPE_out* arr_out, - const FPTYPE_in* arr_in, - const size_t size) { +struct cast_memory_op +{ + void + operator() (FPTYPE_out* arr_out, const FPTYPE_in* arr_in, const size_t size) + { - if (size == 0) {return;} + if (size == 0) + { + return; + } // No need to cast the memory if the data types are the same. if (std::is_same::value) - { - synchronize_memory_op()(arr_out, - reinterpret_cast(arr_in), - size); - return; - } - auto * arr = (FPTYPE_in*) malloc(sizeof(FPTYPE_in) * size); - hipErrcheck(hipMemcpy(arr, arr_in, sizeof(FPTYPE_in) * size, hipMemcpyDeviceToHost)); - for (int ii = 0; ii < size; ii++) { - arr_out[ii] = static_cast(arr[ii]); - } - free(arr); + { + synchronize_memory_op () ( + arr_out, + reinterpret_cast (arr_in), + size); + return; + } + auto* arr = (FPTYPE_in*)malloc (sizeof (FPTYPE_in) * size); + hipErrcheck (hipMemcpy (arr, arr_in, sizeof (FPTYPE_in) * size, hipMemcpyDeviceToHost)); + for (int ii = 0; ii < size; ii++) + { + arr_out[ii] = static_cast (arr[ii]); + } + free (arr); } }; template -void delete_memory_op::operator()(FPTYPE* arr) +void + delete_memory_op::operator() (FPTYPE* arr) { - hipErrcheck(hipFree(arr)); + hipErrcheck (hipFree (arr)); } template struct resize_memory_op; diff --git a/source/source_base/module_device/test/device_test.cpp b/source/source_base/module_device/test/device_test.cpp index faf083c721a..880ccc8a031 100644 --- a/source/source_base/module_device/test/device_test.cpp +++ b/source/source_base/module_device/test/device_test.cpp @@ -10,24 +10,26 @@ class TestModulePsiDevice : public ::testing::Test const base_device::DEVICE_CPU* cpu_ctx = {}; const base_device::DEVICE_GPU* gpu_ctx = {}; - void SetUp() override + void + SetUp () override { } - void TearDown() override + void + TearDown () override { } }; -TEST_F(TestModulePsiDevice, get_device_type_cpu) +TEST_F (TestModulePsiDevice, get_device_type_cpu) { - base_device::AbacusDevice_t device = base_device::get_device_type(cpu_ctx); - EXPECT_EQ(device, base_device::CpuDevice); + base_device::AbacusDevice_t device = base_device::get_device_type (cpu_ctx); + EXPECT_EQ (device, base_device::CpuDevice); } #if __UT_USE_CUDA || __UT_USE_ROCM -TEST_F(TestModulePsiDevice, get_device_type_gpu) +TEST_F (TestModulePsiDevice, get_device_type_gpu) { - base_device::AbacusDevice_t device = base_device::get_device_type(gpu_ctx); - EXPECT_EQ(device, base_device::GpuDevice); + base_device::AbacusDevice_t device = base_device::get_device_type (gpu_ctx); + EXPECT_EQ (device, base_device::GpuDevice); } #endif // __UT_USE_CUDA || __UT_USE_ROCM diff --git a/source/source_base/module_device/test/memory_test.cpp b/source/source_base/module_device/test/memory_test.cpp index a963c68c532..4a86a4e3964 100644 --- a/source/source_base/module_device/test/memory_test.cpp +++ b/source/source_base/module_device/test/memory_test.cpp @@ -37,15 +37,17 @@ class TestModulePsiMemory : public ::testing::Test {3.41302551, -2.3175205}, {-0.27628221, -1.35701656}}; - const int z_dim = z_xx.size(); + const int z_dim = z_xx.size (); const base_device::DEVICE_CPU* cpu_ctx = {}; const base_device::DEVICE_GPU* gpu_ctx = {}; - void SetUp() override + void + SetUp () override { } - void TearDown() override + void + TearDown () override { } @@ -57,8 +59,8 @@ class TestModulePsiMemory : public ::testing::Test = base_device::memory::resize_memory_op, base_device::DEVICE_CPU>; using synchronize_memory_double_cpu_to_cpu_op = base_device::memory::synchronize_memory_op; - using synchronize_memory_complex_double_cpu_to_cpu_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; + using synchronize_memory_complex_double_cpu_to_cpu_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; using delete_memory_double_cpu_op = base_device::memory::delete_memory_op; using delete_memory_complex_double_cpu_op = base_device::memory::delete_memory_op, base_device::DEVICE_CPU>; @@ -76,275 +78,273 @@ class TestModulePsiMemory : public ::testing::Test = base_device::memory::synchronize_memory_op; using synchronize_memory_double_gpu_to_gpu_op = base_device::memory::synchronize_memory_op; - using synchronize_memory_complex_double_cpu_to_gpu_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; - using synchronize_memory_complex_double_gpu_to_cpu_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; - using synchronize_memory_complex_double_gpu_to_gpu_op - = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; + using synchronize_memory_complex_double_cpu_to_gpu_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; + using synchronize_memory_complex_double_gpu_to_cpu_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; + using synchronize_memory_complex_double_gpu_to_gpu_op = base_device::memory:: + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; using delete_memory_double_gpu_op = base_device::memory::delete_memory_op; using delete_memory_complex_double_gpu_op = base_device::memory::delete_memory_op, base_device::DEVICE_GPU>; #endif // __UT_USE_CUDA || __UT_USE_ROCM }; -TEST_F(TestModulePsiMemory, set_memory_op_double_cpu) +TEST_F (TestModulePsiMemory, set_memory_op_double_cpu) { std::vector v_xx = xx; - set_memory_double_cpu_op()(v_xx.data(), 0, xx.size()); - for (int ii = 0; ii < xx.size(); ii++) - { - EXPECT_EQ(v_xx[ii], 0.0); - } + set_memory_double_cpu_op () (v_xx.data (), 0, xx.size ()); + for (int ii = 0; ii < xx.size (); ii++) + { + EXPECT_EQ (v_xx[ii], 0.0); + } } -TEST_F(TestModulePsiMemory, set_memory_op_complex_double_cpu) +TEST_F (TestModulePsiMemory, set_memory_op_complex_double_cpu) { std::vector> vz_xx = z_xx; - set_memory_complex_double_cpu_op()(vz_xx.data(), 0, z_xx.size()); - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(vz_xx[ii], std::complex(0.0, 0.0)); - } + set_memory_complex_double_cpu_op () (vz_xx.data (), 0, z_xx.size ()); + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (vz_xx[ii], std::complex (0.0, 0.0)); + } } -TEST_F(TestModulePsiMemory, resize_memory_op_double_cpu) +TEST_F (TestModulePsiMemory, resize_memory_op_double_cpu) { - double* xx_tmp = NULL; - resize_memory_double_cpu_op()(xx_tmp, xx.size()); - for (int ii = 0; ii < xx.size(); ii++) - { - xx_tmp[ii] = xx[ii]; - } - for (int ii = 0; ii < xx.size(); ii++) - { - EXPECT_EQ(xx_tmp[ii], xx[ii]); - } - free(xx_tmp); + double* xx_tmp = nullptr; + resize_memory_double_cpu_op () (xx_tmp, xx.size ()); + for (int ii = 0; ii < xx.size (); ii++) + { + xx_tmp[ii] = xx[ii]; + } + for (int ii = 0; ii < xx.size (); ii++) + { + EXPECT_EQ (xx_tmp[ii], xx[ii]); + } + free (xx_tmp); } -TEST_F(TestModulePsiMemory, resize_memory_op_comlex_double_cpu) +TEST_F (TestModulePsiMemory, resize_memory_op_comlex_double_cpu) { - std::complex* z_xx_tmp = NULL; - resize_memory_comlex_double_cpu_op()(z_xx_tmp, z_xx.size()); - for (int ii = 0; ii < z_xx.size(); ii++) - { - z_xx_tmp[ii] = z_xx[ii]; - } - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(z_xx_tmp[ii], z_xx[ii]); - } - free(z_xx_tmp); + std::complex* z_xx_tmp = nullptr; + resize_memory_comlex_double_cpu_op () (z_xx_tmp, z_xx.size ()); + for (int ii = 0; ii < z_xx.size (); ii++) + { + z_xx_tmp[ii] = z_xx[ii]; + } + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (z_xx_tmp[ii], z_xx[ii]); + } + free (z_xx_tmp); } -TEST_F(TestModulePsiMemory, synchronize_memory_op_double_cpu_to_cpu) +TEST_F (TestModulePsiMemory, synchronize_memory_op_double_cpu_to_cpu) { - std::vector h_xx(xx.size(), 0); - synchronize_memory_double_cpu_to_cpu_op()(h_xx.data(), xx.data(), xx.size()); - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(h_xx[ii], xx[ii]); - } + std::vector h_xx (xx.size (), 0); + synchronize_memory_double_cpu_to_cpu_op () (h_xx.data (), xx.data (), xx.size ()); + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (h_xx[ii], xx[ii]); + } } -TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_cpu) +TEST_F (TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_cpu) { - std::vector> hz_xx(z_xx.size(), std::complex(0, 0)); - synchronize_memory_complex_double_cpu_to_cpu_op()(hz_xx.data(), z_xx.data(), z_xx.size()); - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(hz_xx[ii], z_xx[ii]); - } + std::vector> hz_xx (z_xx.size (), std::complex (0, 0)); + synchronize_memory_complex_double_cpu_to_cpu_op () (hz_xx.data (), z_xx.data (), z_xx.size ()); + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (hz_xx[ii], z_xx[ii]); + } } -TEST_F(TestModulePsiMemory, delete_memory_op_double_cpu) +TEST_F (TestModulePsiMemory, delete_memory_op_double_cpu) { - double* h_xx = (double*)malloc(sizeof(double) * xx.size()); - delete_memory_double_cpu_op()(h_xx); + double* h_xx = (double*)malloc (sizeof (double) * xx.size ()); + delete_memory_double_cpu_op () (h_xx); } -TEST_F(TestModulePsiMemory, delete_memory_op_complex_double_cpu) +TEST_F (TestModulePsiMemory, delete_memory_op_complex_double_cpu) { - std::complex* hz_xx = (std::complex*)malloc(sizeof(std::complex) * z_xx.size()); - delete_memory_complex_double_cpu_op()(hz_xx); + std::complex* hz_xx = (std::complex*)malloc (sizeof (std::complex) * z_xx.size ()); + delete_memory_complex_double_cpu_op () (hz_xx); } #if __UT_USE_CUDA || __UT_USE_ROCM -TEST_F(TestModulePsiMemory, set_memory_op_double_gpu) +TEST_F (TestModulePsiMemory, set_memory_op_double_gpu) { - thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - thrust::copy(xx.begin(), xx.end(), d_xx); - set_memory_double_gpu_op()(thrust::raw_pointer_cast(d_xx), 0, xx.size()); - thrust::host_vector h_xx(xx.size()); - thrust::copy(d_xx, d_xx + xx.size(), h_xx.begin()); - for (int ii = 0; ii < xx.size(); ii++) - { - EXPECT_EQ(h_xx[ii], 0.0); - } + thrust::device_ptr d_xx = thrust::device_malloc (xx.size ()); + thrust::copy (xx.begin (), xx.end (), d_xx); + set_memory_double_gpu_op () (thrust::raw_pointer_cast (d_xx), 0, xx.size ()); + thrust::host_vector h_xx (xx.size ()); + thrust::copy (d_xx, d_xx + xx.size (), h_xx.begin ()); + for (int ii = 0; ii < xx.size (); ii++) + { + EXPECT_EQ (h_xx[ii], 0.0); + } } -TEST_F(TestModulePsiMemory, set_memory_op_complex_double_gpu) +TEST_F (TestModulePsiMemory, set_memory_op_complex_double_gpu) { - thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); - set_memory_complex_double_gpu_op()(thrust::raw_pointer_cast(dz_xx), 0, z_xx.size()); - thrust::host_vector> h_xx(z_xx.size()); - thrust::copy(dz_xx, dz_xx + z_xx.size(), h_xx.begin()); - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(h_xx[ii], std::complex(0.0, 0.0)); - } + thrust::device_ptr> dz_xx = thrust::device_malloc> (z_xx.size ()); + thrust::copy (z_xx.begin (), z_xx.end (), dz_xx); + set_memory_complex_double_gpu_op () (thrust::raw_pointer_cast (dz_xx), 0, z_xx.size ()); + thrust::host_vector> h_xx (z_xx.size ()); + thrust::copy (dz_xx, dz_xx + z_xx.size (), h_xx.begin ()); + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (h_xx[ii], std::complex (0.0, 0.0)); + } } -TEST_F(TestModulePsiMemory, resize_memory_op_double_gpu) +TEST_F (TestModulePsiMemory, resize_memory_op_double_gpu) { double* xx_tmp = NULL; - resize_memory_double_gpu_op()(xx_tmp, xx.size()); - - thrust::device_ptr d_xx(xx_tmp); - thrust::copy(xx.begin(), xx.end(), d_xx); - - thrust::host_vector h_xx(xx.size()); - thrust::copy(d_xx, d_xx + xx.size(), h_xx.begin()); - for (int ii = 0; ii < xx.size(); ii++) - { - EXPECT_EQ(h_xx[ii], xx[ii]); - } - thrust::device_free(d_xx); + resize_memory_double_gpu_op () (xx_tmp, xx.size ()); + + thrust::device_ptr d_xx (xx_tmp); + thrust::copy (xx.begin (), xx.end (), d_xx); + + thrust::host_vector h_xx (xx.size ()); + thrust::copy (d_xx, d_xx + xx.size (), h_xx.begin ()); + for (int ii = 0; ii < xx.size (); ii++) + { + EXPECT_EQ (h_xx[ii], xx[ii]); + } + thrust::device_free (d_xx); } -TEST_F(TestModulePsiMemory, resize_memory_op_complex_double_gpu) +TEST_F (TestModulePsiMemory, resize_memory_op_complex_double_gpu) { std::complex* z_xx_tmp = NULL; - resize_memory_comlex_double_gpu_op()(z_xx_tmp, z_xx.size()); - - thrust::device_ptr> dz_xx(z_xx_tmp); - thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); - - thrust::host_vector> h_z_xx(z_xx.size()); - thrust::copy(dz_xx, dz_xx + z_xx.size(), h_z_xx.begin()); - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(h_z_xx[ii], z_xx[ii]); - } - thrust::device_free(dz_xx); + resize_memory_comlex_double_gpu_op () (z_xx_tmp, z_xx.size ()); + + thrust::device_ptr> dz_xx (z_xx_tmp); + thrust::copy (z_xx.begin (), z_xx.end (), dz_xx); + + thrust::host_vector> h_z_xx (z_xx.size ()); + thrust::copy (dz_xx, dz_xx + z_xx.size (), h_z_xx.begin ()); + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (h_z_xx[ii], z_xx[ii]); + } + thrust::device_free (dz_xx); } -TEST_F(TestModulePsiMemory, synchronize_memory_op_double_cpu_to_gpu) +TEST_F (TestModulePsiMemory, synchronize_memory_op_double_cpu_to_gpu) { - thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - std::vector hv_xx(xx.size(), 0); - thrust::copy(hv_xx.begin(), hv_xx.end(), d_xx); - synchronize_memory_double_cpu_to_gpu_op()(thrust::raw_pointer_cast(d_xx), xx.data(), xx.size()); - - thrust::host_vector h_xx(xx.size()); - thrust::copy(d_xx, d_xx + xx.size(), h_xx.begin()); - for (int ii = 0; ii < xx.size(); ii++) - { - EXPECT_EQ(h_xx[ii], xx[ii]); - } - thrust::device_free(d_xx); + thrust::device_ptr d_xx = thrust::device_malloc (xx.size ()); + std::vector hv_xx (xx.size (), 0); + thrust::copy (hv_xx.begin (), hv_xx.end (), d_xx); + synchronize_memory_double_cpu_to_gpu_op () (thrust::raw_pointer_cast (d_xx), xx.data (), xx.size ()); + + thrust::host_vector h_xx (xx.size ()); + thrust::copy (d_xx, d_xx + xx.size (), h_xx.begin ()); + for (int ii = 0; ii < xx.size (); ii++) + { + EXPECT_EQ (h_xx[ii], xx[ii]); + } + thrust::device_free (d_xx); } -TEST_F(TestModulePsiMemory, synchronize_memory_op_double_gpu_to_cpu) +TEST_F (TestModulePsiMemory, synchronize_memory_op_double_gpu_to_cpu) { - thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - thrust::copy(xx.begin(), xx.end(), d_xx); - thrust::host_vector h_xx(xx.size()); - synchronize_memory_double_gpu_to_cpu_op()(thrust::raw_pointer_cast(h_xx.data()), - thrust::raw_pointer_cast(d_xx), - xx.size()); - - for (int ii = 0; ii < xx.size(); ii++) - { - EXPECT_EQ(h_xx[ii], xx[ii]); - } - thrust::device_free(d_xx); + thrust::device_ptr d_xx = thrust::device_malloc (xx.size ()); + thrust::copy (xx.begin (), xx.end (), d_xx); + thrust::host_vector h_xx (xx.size ()); + synchronize_memory_double_gpu_to_cpu_op () (thrust::raw_pointer_cast (h_xx.data ()), + thrust::raw_pointer_cast (d_xx), + xx.size ()); + + for (int ii = 0; ii < xx.size (); ii++) + { + EXPECT_EQ (h_xx[ii], xx[ii]); + } + thrust::device_free (d_xx); } -TEST_F(TestModulePsiMemory, synchronize_memory_op_double_gpu_to_gpu) +TEST_F (TestModulePsiMemory, synchronize_memory_op_double_gpu_to_gpu) { - thrust::device_ptr d1_xx = thrust::device_malloc(xx.size()); - thrust::device_ptr d2_xx = thrust::device_malloc(xx.size()); - thrust::copy(xx.begin(), xx.end(), d1_xx); - synchronize_memory_double_gpu_to_gpu_op()(thrust::raw_pointer_cast(d2_xx), - thrust::raw_pointer_cast(d1_xx), - xx.size()); - - thrust::host_vector h_xx(xx.size()); - thrust::copy(d2_xx, d2_xx + xx.size(), h_xx.begin()); - for (int ii = 0; ii < xx.size(); ii++) - { - EXPECT_EQ(h_xx[ii], xx[ii]); - } - thrust::device_free(thrust::device_ptr(d1_xx)); - thrust::device_free(thrust::device_ptr(d2_xx)); + thrust::device_ptr d1_xx = thrust::device_malloc (xx.size ()); + thrust::device_ptr d2_xx = thrust::device_malloc (xx.size ()); + thrust::copy (xx.begin (), xx.end (), d1_xx); + synchronize_memory_double_gpu_to_gpu_op () (thrust::raw_pointer_cast (d2_xx), + thrust::raw_pointer_cast (d1_xx), + xx.size ()); + + thrust::host_vector h_xx (xx.size ()); + thrust::copy (d2_xx, d2_xx + xx.size (), h_xx.begin ()); + for (int ii = 0; ii < xx.size (); ii++) + { + EXPECT_EQ (h_xx[ii], xx[ii]); + } + thrust::device_free (thrust::device_ptr (d1_xx)); + thrust::device_free (thrust::device_ptr (d2_xx)); } -TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_gpu) +TEST_F (TestModulePsiMemory, synchronize_memory_op_complex_double_cpu_to_gpu) { - thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - std::vector> hvz_xx(z_xx.size(), 0); - thrust::copy(hvz_xx.begin(), hvz_xx.end(), dz_xx); - synchronize_memory_complex_double_cpu_to_gpu_op()(thrust::raw_pointer_cast(dz_xx), - z_xx.data(), - z_xx.size()); - - thrust::host_vector> hz_xx(z_xx.size()); - thrust::copy(dz_xx, dz_xx + z_xx.size(), hz_xx.begin()); - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(hz_xx[ii], z_xx[ii]); - } - thrust::device_free(dz_xx); + thrust::device_ptr> dz_xx = thrust::device_malloc> (z_xx.size ()); + std::vector> hvz_xx (z_xx.size (), 0); + thrust::copy (hvz_xx.begin (), hvz_xx.end (), dz_xx); + synchronize_memory_complex_double_cpu_to_gpu_op () (thrust::raw_pointer_cast (dz_xx), z_xx.data (), z_xx.size ()); + + thrust::host_vector> hz_xx (z_xx.size ()); + thrust::copy (dz_xx, dz_xx + z_xx.size (), hz_xx.begin ()); + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (hz_xx[ii], z_xx[ii]); + } + thrust::device_free (dz_xx); } -TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_cpu) +TEST_F (TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_cpu) { - thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - thrust::copy(z_xx.begin(), z_xx.end(), dz_xx); - thrust::host_vector> hz_xx(z_xx.size()); - synchronize_memory_complex_double_gpu_to_cpu_op()(thrust::raw_pointer_cast(hz_xx.data()), - thrust::raw_pointer_cast(dz_xx), - z_xx.size()); - - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(hz_xx[ii], z_xx[ii]); - } - thrust::device_free(dz_xx); + thrust::device_ptr> dz_xx = thrust::device_malloc> (z_xx.size ()); + thrust::copy (z_xx.begin (), z_xx.end (), dz_xx); + thrust::host_vector> hz_xx (z_xx.size ()); + synchronize_memory_complex_double_gpu_to_cpu_op () (thrust::raw_pointer_cast (hz_xx.data ()), + thrust::raw_pointer_cast (dz_xx), + z_xx.size ()); + + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (hz_xx[ii], z_xx[ii]); + } + thrust::device_free (dz_xx); } -TEST_F(TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_gpu) +TEST_F (TestModulePsiMemory, synchronize_memory_op_complex_double_gpu_to_gpu) { - thrust::device_ptr> dz1_xx = thrust::device_malloc>(z_xx.size()); - thrust::device_ptr> dz2_xx = thrust::device_malloc>(z_xx.size()); - thrust::copy(z_xx.begin(), z_xx.end(), dz1_xx); - synchronize_memory_complex_double_gpu_to_gpu_op()(thrust::raw_pointer_cast(dz2_xx), - thrust::raw_pointer_cast(dz1_xx), - z_xx.size()); - - thrust::host_vector> h_xx(z_xx.size()); - thrust::copy(dz2_xx, dz2_xx + z_xx.size(), h_xx.begin()); - for (int ii = 0; ii < z_xx.size(); ii++) - { - EXPECT_EQ(h_xx[ii], z_xx[ii]); - } - thrust::device_free(thrust::device_ptr>(dz1_xx)); - thrust::device_free(thrust::device_ptr>(dz2_xx)); + thrust::device_ptr> dz1_xx = thrust::device_malloc> (z_xx.size ()); + thrust::device_ptr> dz2_xx = thrust::device_malloc> (z_xx.size ()); + thrust::copy (z_xx.begin (), z_xx.end (), dz1_xx); + synchronize_memory_complex_double_gpu_to_gpu_op () (thrust::raw_pointer_cast (dz2_xx), + thrust::raw_pointer_cast (dz1_xx), + z_xx.size ()); + + thrust::host_vector> h_xx (z_xx.size ()); + thrust::copy (dz2_xx, dz2_xx + z_xx.size (), h_xx.begin ()); + for (int ii = 0; ii < z_xx.size (); ii++) + { + EXPECT_EQ (h_xx[ii], z_xx[ii]); + } + thrust::device_free (thrust::device_ptr> (dz1_xx)); + thrust::device_free (thrust::device_ptr> (dz2_xx)); } -TEST_F(TestModulePsiMemory, delete_memory_op_double_gpu) +TEST_F (TestModulePsiMemory, delete_memory_op_double_gpu) { - thrust::device_ptr d_xx = thrust::device_malloc(xx.size()); - delete_memory_double_gpu_op()(thrust::raw_pointer_cast(d_xx)); + thrust::device_ptr d_xx = thrust::device_malloc (xx.size ()); + delete_memory_double_gpu_op () (thrust::raw_pointer_cast (d_xx)); } -TEST_F(TestModulePsiMemory, delete_memory_op_complex_double_gpu) +TEST_F (TestModulePsiMemory, delete_memory_op_complex_double_gpu) { - thrust::device_ptr> dz_xx = thrust::device_malloc>(z_xx.size()); - delete_memory_complex_double_gpu_op()(thrust::raw_pointer_cast(dz_xx)); + thrust::device_ptr> dz_xx = thrust::device_malloc> (z_xx.size ()); + delete_memory_complex_double_gpu_op () (thrust::raw_pointer_cast (dz_xx)); } #endif // __UT_USE_CUDA || __UT_USE_ROCM diff --git a/source/source_base/module_external/blacs_connector.h b/source/source_base/module_external/blacs_connector.h index 61c67324e87..2ee3c813889 100644 --- a/source/source_base/module_external/blacs_connector.h +++ b/source/source_base/module_external/blacs_connector.h @@ -1,7 +1,7 @@ //------------------------------------>8====================================== // Copyright (c) 2016, Yu Shen (shenyu@ustc.edu.cn) // All rights reserved. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // * Redistributions of source code must retain the above copyright @@ -12,7 +12,7 @@ // * Neither the name of the nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. -// +// // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE @@ -25,7 +25,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //====================================8<---------------------------------------- // blacs - // Initialization +// Initialization #ifndef BLACS_CONNECTOR_H #define BLACS_CONNECTOR_H @@ -33,98 +33,117 @@ extern "C" { - void Cblacs_pinfo(int *myid, int *nprocs); - void Cblacs_get(int icontxt, int what, int *val); - void Cblacs_gridmap(int* icontxt, int *usermap, int ldumap, int nprow, int npcol); - // Informational and Miscellaneous - void Cblacs_gridinfo(int icontxt, int* nprow, int *npcol, int *myprow, int *mypcol); - void Cblacs_gridinit(int* icontxt, char* layout, int nprow, int npcol); - void Cblacs_gridexit(int icontxt); - int Cblacs_pnum(int icontxt, int prow, int pcol); - void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol); - void Cblacs_exit(int icontxt); + void Cblacs_pinfo (int* myid, int* nprocs); + void Cblacs_get (int icontxt, int what, int* val); + void Cblacs_gridmap (int* icontxt, int* usermap, int ldumap, int nprow, int npcol); + // Informational and Miscellaneous + void Cblacs_gridinfo (int icontxt, int* nprow, int* npcol, int* myprow, int* mypcol); + void Cblacs_gridinit (int* icontxt, char* layout, int nprow, int npcol); + void Cblacs_gridexit (int icontxt); + int Cblacs_pnum (int icontxt, int prow, int pcol); + void Cblacs_pcoord (int icontxt, int pnum, int* prow, int* pcol); + void Cblacs_exit (int icontxt); // broadcast (send/recv) - void Cigebs2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda); - void Cigebr2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rsrc, int csrc); + void Cigebs2d (int ConTxt, char* scope, char* top, int m, int n, int* A, int lda); + void Cigebr2d (int ConTxt, char* scope, char* top, int m, int n, int* A, int lda, int rsrc, int csrc); - void Csgebs2d(int ConTxt, char *scope, char *top, int m, int n, float *A, int lda); - void Csgebr2d(int ConTxt, char *scope, char *top, int m, int n, float *A, int lda, int rsrc, int csrc); + void Csgebs2d (int ConTxt, char* scope, char* top, int m, int n, float* A, int lda); + void Csgebr2d (int ConTxt, char* scope, char* top, int m, int n, float* A, int lda, int rsrc, int csrc); - void Cdgebs2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda); - void Cdgebr2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int rsrc, int csrc); + void Cdgebs2d (int ConTxt, char* scope, char* top, int m, int n, double* A, int lda); + void Cdgebr2d (int ConTxt, char* scope, char* top, int m, int n, double* A, int lda, int rsrc, int csrc); - void Ccgebs2d(int ConTxt, char *scope, char *top, int m, int n, std::complex *A, int lda); - void Ccgebr2d(int ConTxt, char *scope, char *top, int m, int n, std::complex *A, int lda, int rsrc, int csrc); + void Ccgebs2d (int ConTxt, char* scope, char* top, int m, int n, std::complex* A, int lda); + void Ccgebr2d (int ConTxt, + char* scope, + char* top, + int m, + int n, + std::complex* A, + int lda, + int rsrc, + int csrc); - void Czgebs2d(int ConTxt, char *scope, char *top, int m, int n, std::complex *A, int lda); - void Czgebr2d(int ConTxt, char *scope, char *top, int m, int n, std::complex *A, int lda, int rsrc, int csrc); + void Czgebs2d (int ConTxt, char* scope, char* top, int m, int n, std::complex* A, int lda); + void Czgebr2d (int ConTxt, + char* scope, + char* top, + int m, + int n, + std::complex* A, + int lda, + int rsrc, + int csrc); } // unified interface for broadcast template -void Cxgebs2d(int ConTxt, char *scope, char *top, int m, int n, T *A, int lda) +void + Cxgebs2d (int ConTxt, char* scope, char* top, int m, int n, T* A, int lda) { - static_assert( - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same>::value || - std::is_same>::value, - "Type not supported"); + static_assert (std::is_same::value || std::is_same::value || std::is_same::value + || std::is_same>::value || std::is_same>::value, + "Type not supported"); - if (std::is_same::value) { - Cigebs2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda); - } - if (std::is_same::value) { - Csgebs2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda); - } - if (std::is_same::value) { - Cdgebs2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda); - } - if (std::is_same>::value) { - Ccgebs2d(ConTxt, scope, top, m, n, reinterpret_cast*>(A), lda); - } - if (std::is_same>::value) { - Czgebs2d(ConTxt, scope, top, m, n, reinterpret_cast*>(A), lda); - } + if (std::is_same::value) + { + Cigebs2d (ConTxt, scope, top, m, n, reinterpret_cast (A), lda); + } + if (std::is_same::value) + { + Csgebs2d (ConTxt, scope, top, m, n, reinterpret_cast (A), lda); + } + if (std::is_same::value) + { + Cdgebs2d (ConTxt, scope, top, m, n, reinterpret_cast (A), lda); + } + if (std::is_same>::value) + { + Ccgebs2d (ConTxt, scope, top, m, n, reinterpret_cast*> (A), lda); + } + if (std::is_same>::value) + { + Czgebs2d (ConTxt, scope, top, m, n, reinterpret_cast*> (A), lda); + } } template -void Cxgebr2d(int ConTxt, char *scope, char *top, int m, int n, T *A, int lda, int rsrc, int csrc) +void + Cxgebr2d (int ConTxt, char* scope, char* top, int m, int n, T* A, int lda, int rsrc, int csrc) { - static_assert( - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same>::value || - std::is_same>::value, - "Type not supported"); + static_assert (std::is_same::value || std::is_same::value || std::is_same::value + || std::is_same>::value || std::is_same>::value, + "Type not supported"); - if (std::is_same::value) { - Cigebr2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda, rsrc, csrc); - } - if (std::is_same::value) { - Csgebr2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda, rsrc, csrc); - } - if (std::is_same::value) { - Cdgebr2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda, rsrc, csrc); - } - if (std::is_same>::value) { - Ccgebr2d(ConTxt, scope, top, m, n, reinterpret_cast*>(A), lda, rsrc, csrc); - } - if (std::is_same>::value) { - Czgebr2d(ConTxt, scope, top, m, n, reinterpret_cast*>(A), lda, rsrc, csrc); - } + if (std::is_same::value) + { + Cigebr2d (ConTxt, scope, top, m, n, reinterpret_cast (A), lda, rsrc, csrc); + } + if (std::is_same::value) + { + Csgebr2d (ConTxt, scope, top, m, n, reinterpret_cast (A), lda, rsrc, csrc); + } + if (std::is_same::value) + { + Cdgebr2d (ConTxt, scope, top, m, n, reinterpret_cast (A), lda, rsrc, csrc); + } + if (std::is_same>::value) + { + Ccgebr2d (ConTxt, scope, top, m, n, reinterpret_cast*> (A), lda, rsrc, csrc); + } + if (std::is_same>::value) + { + Czgebr2d (ConTxt, scope, top, m, n, reinterpret_cast*> (A), lda, rsrc, csrc); + } } - #ifdef __MPI #include extern "C" { - int Csys2blacs_handle(MPI_Comm SysCtxt); - MPI_Comm Cblacs2sys_handle(int BlacsCtxt); + int Csys2blacs_handle (MPI_Comm SysCtxt); + MPI_Comm Cblacs2sys_handle (int BlacsCtxt); } #endif // __MPI diff --git a/source/source_base/module_external/blas_connector.h b/source/source_base/module_external/blas_connector.h index dd3edd3e43c..6a2c4c5598a 100644 --- a/source/source_base/module_external/blas_connector.h +++ b/source/source_base/module_external/blas_connector.h @@ -12,188 +12,317 @@ // If you need a BLAS function that is not included here, feel free to add its declaration as needed. extern "C" { -// Level 1 BLAS -void sscal_(const int *N, const float *alpha, float *X, const int *incX); -void dscal_(const int *N, const double *alpha, double *X, const int *incX); -void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); -void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - -void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); -void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); -void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); -void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - -void scopy_(const int *n, const float *a, const int *incx, float *b, const int *incy); -void dcopy_(const int *n, const double *a, const int *incx, double *b, const int *incy); -void ccopy_(const int *n, const std::complex *a, const int *incx, std::complex *b, const int *incy); -void zcopy_(const int *n, const std::complex *a, const int *incx, std::complex *b, const int *incy); - -// Note: sdot_/ddot_ return by value -float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); -double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); - -float snrm2_(const int *n, const float *X, const int *incX); -double dnrm2_(const int *n, const double *X, const int *incX); -float scnrm2_(const int *n, const std::complex *X, const int *incX); -double dznrm2_(const int *n, const std::complex *X, const int *incX); - -// Level 2 BLAS - -void sgemv_(const char *transa, const int *m, const int *n, - const float *alpha, const float *a, const int *lda, - const float *x, const int *incx, - const float *beta, float *y, const int *incy); - -void dgemv_(const char *transa, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, - const double *x, const int *incx, - const double *beta, double *y, const int *incy); - -void cgemv_(const char *trans, const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *x, const int *incx, - const std::complex *beta, - std::complex *y, const int *incy); - -void zgemv_(const char *trans, const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *x, const int *incx, - const std::complex *beta, - std::complex *y, const int *incy); - -void dsymv_(const char *uplo, const int *n, - const double *alpha, const double *a, const int *lda, - const double *x, const int *incx, - const double *beta, double *y, const int *incy); - -void dger_(const int *m, const int *n, - const double *alpha, - const double *x, const int *incx, - const double *y, const int *incy, - double *a, const int *lda); - -void zgerc_(const int *m, const int *n, - const std::complex *alpha, - const std::complex *x, const int *incx, - const std::complex *y, const int *incy, - std::complex *a, const int *lda); - -// Level 3 BLAS - -void sgemm_(const char *transa, const char *transb, - const int *m, const int *n, const int *k, - const float *alpha, - const float *a, const int *lda, - const float *b, const int *ldb, - const float *beta, - float *c, const int *ldc); - -void dgemm_(const char *transa, const char *transb, - const int *m, const int *n, const int *k, - const double *alpha, - const double *a, const int *lda, - const double *b, const int *ldb, - const double *beta, - double *c, const int *ldc); - -void cgemm_(const char *transa, const char *transb, - const int *m, const int *n, const int *k, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, - const std::complex *beta, - std::complex *c, const int *ldc); - -void zgemm_(const char *transa, const char *transb, - const int *m, const int *n, const int *k, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, - const std::complex *beta, - std::complex *c, const int *ldc); - -void ssymm_(const char *side, const char *uplo, - const int *m, const int *n, - const float *alpha, - const float *a, const int *lda, - const float *b, const int *ldb, - const float *beta, - float *c, const int *ldc); - -void dsymm_(const char *side, const char *uplo, - const int *m, const int *n, - const double *alpha, - const double *a, const int *lda, - const double *b, const int *ldb, - const double *beta, - double *c, const int *ldc); - -void csymm_(const char *side, const char *uplo, - const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, - const std::complex *beta, - std::complex *c, const int *ldc); - -void zsymm_(const char *side, const char *uplo, - const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, - const std::complex *beta, - std::complex *c, const int *ldc); - -void chemm_(const char *side, const char *uplo, - const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, - const std::complex *beta, - std::complex *c, const int *ldc); - -void zhemm_(const char *side, const char *uplo, - const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, - const std::complex *beta, - std::complex *c, const int *ldc); - -void dtrsm_(const char *side, const char *uplo, const char *transa, const char *diag, - const int *m, const int *n, - const double *alpha, - const double *a, const int *lda, - double *b, const int *ldb); - -void ztrsm_(const char *side, const char *uplo, const char *transa, const char *diag, - const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *lda, - std::complex *b, const int *ldb); - -// === Hermitian rank-k update === -void cherk_(const char* uplo, const char* trans, const int* n, const int* k, - const float* alpha, - const std::complex* a, const int* lda, - const float* beta, - std::complex* c, const int* ldc); - -void zherk_(const char* uplo, const char* trans, const int* n, const int* k, - const double* alpha, - const std::complex* a, const int* lda, - const double* beta, - std::complex* c, const int* ldc); - -// === Symmetric rank-k update === -void dsyrk_(const char* uplo, const char* trans, const int* n, const int* k, - const double* alpha, - const double* a, const int* lda, - const double* beta, - double* c, - const int* ldc); + // Level 1 BLAS + void sscal_ (const int* N, const float* alpha, float* X, const int* incX); + void dscal_ (const int* N, const double* alpha, double* X, const int* incX); + void cscal_ (const int* N, const std::complex* alpha, std::complex* X, const int* incX); + void zscal_ (const int* N, const std::complex* alpha, std::complex* X, const int* incX); + + void saxpy_ (const int* N, const float* alpha, const float* X, const int* incX, float* Y, const int* incY); + void daxpy_ (const int* N, const double* alpha, const double* X, const int* incX, double* Y, const int* incY); + void caxpy_ (const int* N, + const std::complex* alpha, + const std::complex* X, + const int* incX, + std::complex* Y, + const int* incY); + void zaxpy_ (const int* N, + const std::complex* alpha, + const std::complex* X, + const int* incX, + std::complex* Y, + const int* incY); + + void scopy_ (const int* n, const float* a, const int* incx, float* b, const int* incy); + void dcopy_ (const int* n, const double* a, const int* incx, double* b, const int* incy); + void ccopy_ (const int* n, const std::complex* a, const int* incx, std::complex* b, const int* incy); + void + zcopy_ (const int* n, const std::complex* a, const int* incx, std::complex* b, const int* incy); + + // Note: sdot_/ddot_ return by value + float sdot_ (const int* N, const float* X, const int* incX, const float* Y, const int* incY); + double ddot_ (const int* N, const double* X, const int* incX, const double* Y, const int* incY); + + float snrm2_ (const int* n, const float* X, const int* incX); + double dnrm2_ (const int* n, const double* X, const int* incX); + float scnrm2_ (const int* n, const std::complex* X, const int* incX); + double dznrm2_ (const int* n, const std::complex* X, const int* incX); + + // Level 2 BLAS + + void sgemv_ (const char* transa, + const int* m, + const int* n, + const float* alpha, + const float* a, + const int* lda, + const float* x, + const int* incx, + const float* beta, + float* y, + const int* incy); + + void dgemv_ (const char* transa, + const int* m, + const int* n, + const double* alpha, + const double* a, + const int* lda, + const double* x, + const int* incx, + const double* beta, + double* y, + const int* incy); + + void cgemv_ (const char* trans, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* x, + const int* incx, + const std::complex* beta, + std::complex* y, + const int* incy); + + void zgemv_ (const char* trans, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* x, + const int* incx, + const std::complex* beta, + std::complex* y, + const int* incy); + + void dsymv_ (const char* uplo, + const int* n, + const double* alpha, + const double* a, + const int* lda, + const double* x, + const int* incx, + const double* beta, + double* y, + const int* incy); + + void dger_ (const int* m, + const int* n, + const double* alpha, + const double* x, + const int* incx, + const double* y, + const int* incy, + double* a, + const int* lda); + + void zgerc_ (const int* m, + const int* n, + const std::complex* alpha, + const std::complex* x, + const int* incx, + const std::complex* y, + const int* incy, + std::complex* a, + const int* lda); + + // Level 3 BLAS + + void sgemm_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const float* alpha, + const float* a, + const int* lda, + const float* b, + const int* ldb, + const float* beta, + float* c, + const int* ldc); + + void dgemm_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const double* alpha, + const double* a, + const int* lda, + const double* b, + const int* ldb, + const double* beta, + double* c, + const int* ldc); + + void cgemm_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + + void zgemm_ (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + + void ssymm_ (const char* side, + const char* uplo, + const int* m, + const int* n, + const float* alpha, + const float* a, + const int* lda, + const float* b, + const int* ldb, + const float* beta, + float* c, + const int* ldc); + + void dsymm_ (const char* side, + const char* uplo, + const int* m, + const int* n, + const double* alpha, + const double* a, + const int* lda, + const double* b, + const int* ldb, + const double* beta, + double* c, + const int* ldc); + + void csymm_ (const char* side, + const char* uplo, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + + void zsymm_ (const char* side, + const char* uplo, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + + void chemm_ (const char* side, + const char* uplo, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + + void zhemm_ (const char* side, + const char* uplo, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + + void dtrsm_ (const char* side, + const char* uplo, + const char* transa, + const char* diag, + const int* m, + const int* n, + const double* alpha, + const double* a, + const int* lda, + double* b, + const int* ldb); + + void ztrsm_ (const char* side, + const char* uplo, + const char* transa, + const char* diag, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* lda, + std::complex* b, + const int* ldb); + + // === Hermitian rank-k update === + void cherk_ (const char* uplo, + const char* trans, + const int* n, + const int* k, + const float* alpha, + const std::complex* a, + const int* lda, + const float* beta, + std::complex* c, + const int* ldc); + + void zherk_ (const char* uplo, + const char* trans, + const int* n, + const int* k, + const double* alpha, + const std::complex* a, + const int* lda, + const double* beta, + std::complex* c, + const int* ldc); + + // === Symmetric rank-k update === + void dsyrk_ (const char* uplo, + const char* trans, + const int* n, + const int* k, + const double* alpha, + const double* a, + const int* lda, + const double* beta, + double* c, + const int* ldc); } // Class BlasConnector provide the connector to fortran lapack routine. @@ -201,240 +330,541 @@ void dsyrk_(const char* uplo, const char* trans, const int* n, const int* k, // Usage example: BlasConnector::functionname(parameter list). class BlasConnector { -public: - - // Peize Lin add 2016-08-04 - // y=a*x+y - static - void axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - - // Peize Lin add 2016-08-04 - // x=a*x - static - void scal( const int n, const float alpha, float *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void scal( const int n, const double alpha, double *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - - // Peize Lin add 2017-10-27 - // d=x*y - static - float dot( const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - double dot( const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // d=x*y - static - float dotu( const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - double dotu( const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - std::complex dotu( const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - std::complex dotu( const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // d=x.conj()*y - static - float dotc( const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - double dotc( const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - std::complex dotc( const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - std::complex dotc( const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // Peize Lin add 2017-10-27, fix bug trans 2019-01-17 - // C = a * A.? * B.? + b * C - // Row Major by default - static - void gemm(const char transa, const char transb, const int m, const int n, const int k, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemm(const char transa, const char transb, const int m, const int n, const int k, - const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemm(const char transa, const char transb, const int m, const int n, const int k, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemm(const char transa, const char transb, const int m, const int n, const int k, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // Col-Major if you need to use it - - static - void gemm_cm(const char transa, const char transb, const int m, const int n, const int k, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemm_cm(const char transa, const char transb, const int m, const int n, const int k, - const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemm_cm(const char transa, const char transb, const int m, const int n, const int k, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemm_cm(const char transa, const char transb, const int m, const int n, const int k, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // side=='L': C = a * A * B + b * C. - // side=='R': C = a * B * A + b * C. - // A == A^T - // Because you cannot pack symm or hemm into a row-major kernel by exchanging parameters, so only col-major functions are provided. - static - void symm_cm(const char side, const char uplo, const int m, const int n, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void symm_cm(const char side, const char uplo, const int m, const int n, - const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void symm_cm(const char side, const char uplo, const int m, const int n, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void symm_cm(const char side, const char uplo, const int m, const int n, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // side=='L': C = a * A * B + b * C. - // side=='R': C = a * B * A + b * C. - // A == A^H - static - void hemm_cm(const char side, const char uplo, const int m, const int n, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void hemm_cm(const char side, const char uplo, const int m, const int n, - const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void hemm_cm(char side, char uplo, int m, int n, - std::complex alpha, std::complex *a, int lda, std::complex *b, int ldb, - std::complex beta, std::complex *c, int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void hemm_cm(char side, char uplo, int m, int n, - std::complex alpha, std::complex *a, int lda, std::complex *b, int ldb, - std::complex beta, std::complex *c, int ldc, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // y = A*x + beta*y - static - void gemv(const char trans, const int m, const int n, - const float alpha, const float* A, const int lda, const float* X, const int incx, - const float beta, float* Y, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemv(const char trans, const int m, const int n, - const double alpha, const double* A, const int lda, const double* X, const int incx, - const double beta, double* Y, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemv(const char trans, const int m, const int n, - const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void gemv(const char trans, const int m, const int n, - const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // Peize Lin add 2018-06-12 - // out = ||x||_2 - static - float nrm2( const int n, const float *X, const int, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice ); - - static - double nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice ); - - static - double nrm2( const int n, const std::complex *X, const int incX, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice ); - - - // copies a into b - static - void copy(const int n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void copy(const int n, const float *a, const int incx, float *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // There is some other operators needed, so implemented manually here - template - static - void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - template - static - void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - // y = alpha * x + beta * y - static - void vector_add_vector(const int& dim, float *result, const float *vector1, const float constant1, const float *vector2, const float constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void vector_add_vector(const int& dim, double *result, const double *vector1, const double constant1, const double *vector2, const double constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void vector_add_vector(const int& dim, std::complex *result, const std::complex *vector1, const float constant1, const std::complex *vector2, const float constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); - - static - void vector_add_vector(const int& dim, std::complex *result, const std::complex *vector1, const double constant1, const std::complex *vector2, const double constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + public: + // Peize Lin add 2016-08-04 + // y=a*x+y + static void axpy (const int n, + const float alpha, + const float* X, + const int incX, + float* Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void axpy (const int n, + const double alpha, + const double* X, + const int incX, + double* Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void axpy (const int n, + const std::complex alpha, + const std::complex* X, + const int incX, + std::complex* Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void axpy (const int n, + const std::complex alpha, + const std::complex* X, + const int incX, + std::complex* Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // Peize Lin add 2016-08-04 + // x=a*x + static void scal (const int n, + const float alpha, + float* X, + const int incX, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void scal (const int n, + const double alpha, + double* X, + const int incX, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void scal (const int n, + const std::complex alpha, + std::complex* X, + const int incX, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void scal (const int n, + const std::complex alpha, + std::complex* X, + const int incX, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // Peize Lin add 2017-10-27 + // d=x*y + static float dot (const int n, + const float* const X, + const int incX, + const float* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static double dot (const int n, + const double* const X, + const int incX, + const double* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // d=x*y + static float dotu (const int n, + const float* const X, + const int incX, + const float* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static double dotu (const int n, + const double* const X, + const int incX, + const double* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static std::complex dotu (const int n, + const std::complex* const X, + const int incX, + const std::complex* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static std::complex dotu (const int n, + const std::complex* const X, + const int incX, + const std::complex* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // d=x.conj()*y + static float dotc (const int n, + const float* const X, + const int incX, + const float* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static double dotc (const int n, + const double* const X, + const int incX, + const double* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static std::complex dotc (const int n, + const std::complex* const X, + const int incX, + const std::complex* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static std::complex dotc (const int n, + const std::complex* const X, + const int incX, + const std::complex* const Y, + const int incY, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // Peize Lin add 2017-10-27, fix bug trans 2019-01-17 + // C = a * A.? * B.? + b * C + // Row Major by default + static void gemm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const float alpha, + const float* a, + const int lda, + const float* b, + const int ldb, + const float beta, + float* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const double alpha, + const double* a, + const int lda, + const double* b, + const int ldb, + const double beta, + double* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // Col-Major if you need to use it + + static void gemm_cm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const float alpha, + const float* a, + const int lda, + const float* b, + const int ldb, + const float beta, + float* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemm_cm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const double alpha, + const double* a, + const int lda, + const double* b, + const int ldb, + const double beta, + double* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemm_cm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemm_cm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // side=='L': C = a * A * B + b * C. + // side=='R': C = a * B * A + b * C. + // A == A^T + // Because you cannot pack symm or hemm into a row-major kernel by exchanging parameters, so only col-major + // functions are provided. + static void symm_cm (const char side, + const char uplo, + const int m, + const int n, + const float alpha, + const float* a, + const int lda, + const float* b, + const int ldb, + const float beta, + float* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void symm_cm (const char side, + const char uplo, + const int m, + const int n, + const double alpha, + const double* a, + const int lda, + const double* b, + const int ldb, + const double beta, + double* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void symm_cm (const char side, + const char uplo, + const int m, + const int n, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void symm_cm (const char side, + const char uplo, + const int m, + const int n, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // side=='L': C = a * A * B + b * C. + // side=='R': C = a * B * A + b * C. + // A == A^H + static void hemm_cm (const char side, + const char uplo, + const int m, + const int n, + const float alpha, + const float* a, + const int lda, + const float* b, + const int ldb, + const float beta, + float* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void hemm_cm (const char side, + const char uplo, + const int m, + const int n, + const double alpha, + const double* a, + const int lda, + const double* b, + const int ldb, + const double beta, + double* c, + const int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void hemm_cm (char side, + char uplo, + int m, + int n, + std::complex alpha, + std::complex* a, + int lda, + std::complex* b, + int ldb, + std::complex beta, + std::complex* c, + int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void hemm_cm (char side, + char uplo, + int m, + int n, + std::complex alpha, + std::complex* a, + int lda, + std::complex* b, + int ldb, + std::complex beta, + std::complex* c, + int ldc, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // y = A*x + beta*y + static void gemv (const char trans, + const int m, + const int n, + const float alpha, + const float* A, + const int lda, + const float* X, + const int incx, + const float beta, + float* Y, + const int incy, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemv (const char trans, + const int m, + const int n, + const double alpha, + const double* A, + const int lda, + const double* X, + const int incx, + const double beta, + double* Y, + const int incy, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemv (const char trans, + const int m, + const int n, + const std::complex alpha, + const std::complex* A, + const int lda, + const std::complex* X, + const int incx, + const std::complex beta, + std::complex* Y, + const int incy, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void gemv (const char trans, + const int m, + const int n, + const std::complex alpha, + const std::complex* A, + const int lda, + const std::complex* X, + const int incx, + const std::complex beta, + std::complex* Y, + const int incy, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // Peize Lin add 2018-06-12 + // out = ||x||_2 + static float nrm2 (const int n, + const float* X, + const int, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static double nrm2 (const int n, + const double* X, + const int incX, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static double nrm2 (const int n, + const std::complex* X, + const int incX, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // copies a into b + static void copy (const int n, + const double* a, + const int incx, + double* b, + const int incy, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void copy (const int n, + const float* a, + const int incx, + float* b, + const int incy, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void copy (const int n, + const std::complex* a, + const int incx, + std::complex* b, + const int incy, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void copy (const int n, + const std::complex* a, + const int incx, + std::complex* b, + const int incy, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // There is some other operators needed, so implemented manually here + template + static void vector_mul_vector (const int& dim, + T* result, + const T* vector1, + const T* vector2, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + template + static void vector_div_vector (const int& dim, + T* result, + const T* vector1, + const T* vector2, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + // y = alpha * x + beta * y + static void vector_add_vector (const int& dim, + float* result, + const float* vector1, + const float constant1, + const float* vector2, + const float constant2, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void vector_add_vector (const int& dim, + double* result, + const double* vector1, + const double constant1, + const double* vector2, + const double constant2, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void vector_add_vector (const int& dim, + std::complex* result, + const std::complex* vector1, + const float constant1, + const std::complex* vector2, + const float constant2, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + + static void vector_add_vector (const int& dim, + std::complex* result, + const std::complex* vector1, + const double constant1, + const std::complex* vector2, + const double constant2, + base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); #ifdef __DSP - /// @brief Inject the DSP cluster id used by mt-allocator BLAS kernels. - /// Caller-injected (typically once after input parameters are read). - /// Defaults to 0 if never set. - static void set_dsp_cluster_id(int id); + /// @brief Inject the DSP cluster id used by mt-allocator BLAS kernels. + /// Caller-injected (typically once after input parameters are read). + /// Defaults to 0 if never set. + static void set_dsp_cluster_id (int id); -private: - static int dsp_cluster_id_; + private: + static int dsp_cluster_id_; #endif }; @@ -446,21 +876,24 @@ class BlasConnector // If you want to use cublas, you need these functions to create and destroy the cublas/hipblas handle. // You also need to use these functions to translate the transpose parameter into cublas/hipblas datatype. -namespace BlasUtils{ +namespace BlasUtils +{ - static cublasHandle_t cublas_handle = nullptr; +static cublasHandle_t cublas_handle = nullptr; - void createGpuBlasHandle(); // Create a cublas/hipblas handle. +void createGpuBlasHandle (); // Create a cublas/hipblas handle. - void destoryBLAShandle(); // Destroy the cublas/hipblas handle. Do this when the software is about to end. +void destoryBLAShandle (); // Destroy the cublas/hipblas handle. Do this when the software is about to end. - cublasOperation_t judge_trans(bool is_complex, const char& trans, const char* name); // Translate a normal transpose parameter to a cublas/hipblas type. +cublasOperation_t judge_trans (bool is_complex, + const char& trans, + const char* name); // Translate a normal transpose parameter to a cublas/hipblas type. - cublasSideMode_t judge_side(const char& trans); // Translate a normal side parameter to a cublas/hipblas type. +cublasSideMode_t judge_side (const char& trans); // Translate a normal side parameter to a cublas/hipblas type. - cublasFillMode_t judge_fill(const char& trans); // Translate a normal fill parameter to a cublas/hipblas type. +cublasFillMode_t judge_fill (const char& trans); // Translate a normal fill parameter to a cublas/hipblas type. -} +} // namespace BlasUtils #endif @@ -470,27 +903,27 @@ namespace BlasUtils{ #ifdef GATHER_INFO #define zgemm_ zgemm_i -void zgemm_i(const char *transa, - const char *transb, - const int *m, - const int *n, - const int *k, - const std::complex *alpha, - const std::complex *a, - const int *lda, - const std::complex *b, - const int *ldb, - const std::complex *beta, - std::complex *c, - const int *ldc); - -#define zaxpy_ zaxpy_i -void zaxpy_i(const int *N, - const std::complex *alpha, - const std::complex *X, - const int *incX, - std::complex *Y, - const int *incY); +void zgemm_i (const char* transa, + const char* transb, + const int* m, + const int* n, + const int* k, + const std::complex* alpha, + const std::complex* a, + const int* lda, + const std::complex* b, + const int* ldb, + const std::complex* beta, + std::complex* c, + const int* ldc); + +#define zaxpy_ zaxpy_i +void zaxpy_i (const int* N, + const std::complex* alpha, + const std::complex* X, + const int* incX, + std::complex* Y, + const int* incY); /* #define zgemv_ zgemv_i diff --git a/source/source_base/module_external/blas_connector_base.cpp b/source/source_base/module_external/blas_connector_base.cpp index b07f8a3a780..e964151f77f 100644 --- a/source/source_base/module_external/blas_connector_base.cpp +++ b/source/source_base/module_external/blas_connector_base.cpp @@ -8,69 +8,77 @@ #include "source_base/kernels/math_kernel_op.h" #include "source_base/module_device/memory_op.h" +namespace BlasUtils +{ -namespace BlasUtils{ +void + createGpuBlasHandle () +{ + if (cublas_handle == nullptr) + { + CHECK_CUBLAS (cublasCreate (&cublas_handle)); + } +} - void createGpuBlasHandle(){ - if (cublas_handle == nullptr) { - CHECK_CUBLAS(cublasCreate(&cublas_handle)); - } - } +void + destoryBLAShandle () +{ + if (cublas_handle != nullptr) + { + CHECK_CUBLAS (cublasDestroy (cublas_handle)); + cublas_handle = nullptr; + } +} - void destoryBLAShandle(){ - if (cublas_handle != nullptr) { - CHECK_CUBLAS(cublasDestroy(cublas_handle)); - cublas_handle = nullptr; - } - } +cublasOperation_t + judge_trans (bool is_complex, const char& trans, const char* name) +{ + if (trans == 'N') + { + return CUBLAS_OP_N; + } + else if (trans == 'T') + { + return CUBLAS_OP_T; + } + else if (is_complex && trans == 'C') + { + return CUBLAS_OP_C; + } + return CUBLAS_OP_N; +} +cublasSideMode_t + judge_side (const char& trans) +{ + if (trans == 'L') + { + return CUBLAS_SIDE_LEFT; + } + else if (trans == 'R') + { + return CUBLAS_SIDE_RIGHT; + } + return CUBLAS_SIDE_LEFT; +} - cublasOperation_t judge_trans(bool is_complex, const char& trans, const char* name) - { - if (trans == 'N') - { - return CUBLAS_OP_N; - } - else if(trans == 'T') - { - return CUBLAS_OP_T; - } - else if(is_complex && trans == 'C') - { - return CUBLAS_OP_C; - } - return CUBLAS_OP_N; - } - - cublasSideMode_t judge_side(const char& trans) - { - if (trans == 'L') - { - return CUBLAS_SIDE_LEFT; - } - else if (trans == 'R') - { - return CUBLAS_SIDE_RIGHT; - } - return CUBLAS_SIDE_LEFT; - } - - cublasFillMode_t judge_fill(const char& trans) - { - if (trans == 'F') - { - return CUBLAS_FILL_MODE_FULL; - } - else if (trans == 'U') - { - return CUBLAS_FILL_MODE_UPPER; - } - else if (trans == 'D') - { - return CUBLAS_FILL_MODE_LOWER; - } - return CUBLAS_FILL_MODE_FULL; - } +cublasFillMode_t + judge_fill (const char& trans) +{ + if (trans == 'F') + { + return CUBLAS_FILL_MODE_FULL; + } + else if (trans == 'U') + { + return CUBLAS_FILL_MODE_UPPER; + } + else if (trans == 'D') + { + return CUBLAS_FILL_MODE_LOWER; + } + return CUBLAS_FILL_MODE_FULL; +} } // namespace BlasUtils diff --git a/source/source_base/module_external/blas_connector_matrix.cpp b/source/source_base/module_external/blas_connector_matrix.cpp index 1e819d56ae0..87a8d35b314 100644 --- a/source/source_base/module_external/blas_connector_matrix.cpp +++ b/source/source_base/module_external/blas_connector_matrix.cpp @@ -6,7 +6,8 @@ int BlasConnector::dsp_cluster_id_ = 0; -void BlasConnector::set_dsp_cluster_id(int id) +void + BlasConnector::set_dsp_cluster_id (int id) { dsp_cluster_id_ = id; } @@ -20,38 +21,77 @@ void BlasConnector::set_dsp_cluster_id(int id) #include "source_base/module_device/memory_op.h" #endif - // C = a * A.? * B.? + b * C // Row-Major part -void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::gemm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const float alpha, + const float* a, + const int lda, + const float* b, + const int ldb, + const float beta, + float* c, + const int ldc, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - sgemm_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + sgemm_ (&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); + } #ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice){ - mtfunc::sgemm_mth_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc, BlasConnector::dsp_cluster_id_); - } + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::sgemm_mth_ (&transb, + &transa, + &n, + &m, + &k, + &alpha, + b, + &ldb, + a, + &lda, + &beta, + c, + &ldc, + BlasConnector::dsp_cluster_id_); + } #endif #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - CHECK_CUBLAS(cublasSgemm(BlasUtils::cublas_handle, cutransA, cutransB, n, m, k, &alpha, b, ldb, a, lda, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasOperation_t cutransA = BlasUtils::judge_trans (false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasSgemm (BlasUtils::cublas_handle, + cutransA, + cutransB, + n, + m, + k, + &alpha, + b, + ldb, + a, + lda, + &beta, + c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemm(const char transa, +void + BlasConnector::gemm (const char transa, const char transb, const int m, const int n, @@ -67,30 +107,58 @@ void BlasConnector::gemm(const char transa, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - dgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); - } + { + dgemm_ (&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); + } #ifdef __DSP else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::dgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, BlasConnector::dsp_cluster_id_); - } + { + mtfunc::dgemm_mth_ (&transb, + &transa, + &n, + &m, + &k, + &alpha, + b, + &ldb, + a, + &lda, + &beta, + c, + &ldc, + BlasConnector::dsp_cluster_id_); + } #endif else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { + { #ifdef __CUDA - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - CHECK_CUBLAS( - cublasDgemm(BlasUtils::cublas_handle, cutransA, cutransB, n, m, k, &alpha, b, ldb, a, lda, &beta, c, ldc)); -#endif - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + cublasOperation_t cutransA = BlasUtils::judge_trans (false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasDgemm (BlasUtils::cublas_handle, + cutransA, + cutransB, + n, + m, + k, + &alpha, + b, + ldb, + a, + lda, + &beta, + c, + ldc)); +#endif + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemm(const char transa, +void + BlasConnector::gemm (const char transa, const char transb, const int m, const int n, @@ -106,44 +174,60 @@ void BlasConnector::gemm(const char transa, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - cgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); - } + { + cgemm_ (&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); + } #ifdef __DSP else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::cgemm_pack_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, BlasConnector::dsp_cluster_id_); - // cgemm_mth_ for raw dsp mth; - // cgemm_pack_mth_ for dsp mth with memcpy to DSP buffer - } + { + mtfunc::cgemm_pack_mth_ (&transb, + &transa, + &n, + &m, + &k, + &alpha, + b, + &ldb, + a, + &lda, + &beta, + c, + &ldc, + BlasConnector::dsp_cluster_id_); + // cgemm_mth_ for raw dsp mth; + // cgemm_pack_mth_ for dsp mth with memcpy to DSP buffer + } #endif else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { + { #ifdef __CUDA - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - CHECK_CUBLAS(cublasCgemm(BlasUtils::cublas_handle, - cutransA, - cutransB, - n, - m, - k, - (float2*)&alpha, - (float2*)b, - ldb, - (float2*)a, - lda, - (float2*)&beta, - (float2*)c, - ldc)); -#endif - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + cublasOperation_t cutransA = BlasUtils::judge_trans (false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasCgemm (BlasUtils::cublas_handle, + cutransA, + cutransB, + n, + m, + k, + (float2*)&alpha, + (float2*)b, + ldb, + (float2*)a, + lda, + (float2*)&beta, + (float2*)c, + ldc)); +#endif + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemm(const char transa, +void + BlasConnector::gemm (const char transa, const char transb, const int m, const int n, @@ -159,73 +243,128 @@ void BlasConnector::gemm(const char transa, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - zgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); - } + { + zgemm_ (&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); + } #ifdef __DSP else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::zgemm_pack_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, BlasConnector::dsp_cluster_id_); - // zgemm_mth_ for raw dsp mth; - // zgemm_pack_mth_ for dsp mth with memcpy to DSP buffer - } + { + mtfunc::zgemm_pack_mth_ (&transb, + &transa, + &n, + &m, + &k, + &alpha, + b, + &ldb, + a, + &lda, + &beta, + c, + &ldc, + BlasConnector::dsp_cluster_id_); + // zgemm_mth_ for raw dsp mth; + // zgemm_pack_mth_ for dsp mth with memcpy to DSP buffer + } #endif else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { + { #ifdef __CUDA - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - CHECK_CUBLAS(cublasZgemm(BlasUtils::cublas_handle, - cutransA, - cutransB, - n, - m, - k, - (double2*)&alpha, - (double2*)b, - ldb, - (double2*)a, - lda, - (double2*)&beta, - (double2*)c, - ldc)); -#endif - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + cublasOperation_t cutransA = BlasUtils::judge_trans (false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasZgemm (BlasUtils::cublas_handle, + cutransA, + cutransB, + n, + m, + k, + (double2*)&alpha, + (double2*)b, + ldb, + (double2*)a, + lda, + (double2*)&beta, + (double2*)c, + ldc)); +#endif + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } // Col-Major part -void BlasConnector::gemm_cm(const char transa, const char transb, const int m, const int n, const int k, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::gemm_cm (const char transa, + const char transb, + const int m, + const int n, + const int k, + const float alpha, + const float* a, + const int lda, + const float* b, + const int ldb, + const float beta, + float* c, + const int ldc, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - sgemm_(&transa, &transb, &m, &n, &k, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + sgemm_ (&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice){ - mtfunc::sgemm_mth_(&transb, &transa, &m, &n, &k, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc, BlasConnector::dsp_cluster_id_); - } + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::sgemm_mth_ (&transb, + &transa, + &m, + &n, + &k, + &alpha, + a, + &lda, + b, + &ldb, + &beta, + c, + &ldc, + BlasConnector::dsp_cluster_id_); + } #endif #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - CHECK_CUBLAS(cublasSgemm(BlasUtils::cublas_handle, cutransA, cutransB, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasOperation_t cutransA = BlasUtils::judge_trans (false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasSgemm (BlasUtils::cublas_handle, + cutransA, + cutransB, + m, + n, + k, + &alpha, + a, + lda, + b, + ldb, + &beta, + c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemm_cm(const char transa, +void + BlasConnector::gemm_cm (const char transa, const char transb, const int m, const int n, @@ -241,30 +380,58 @@ void BlasConnector::gemm_cm(const char transa, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - dgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); - } + { + dgemm_ (&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __DSP else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::dgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, BlasConnector::dsp_cluster_id_); - } + { + mtfunc::dgemm_mth_ (&transa, + &transb, + &m, + &n, + &k, + &alpha, + a, + &lda, + b, + &ldb, + &beta, + c, + &ldc, + BlasConnector::dsp_cluster_id_); + } #endif #ifdef __CUDA else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - CHECK_CUBLAS( - cublasDgemm(BlasUtils::cublas_handle, cutransA, cutransB, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + { + cublasOperation_t cutransA = BlasUtils::judge_trans (false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasDgemm (BlasUtils::cublas_handle, + cutransA, + cutransB, + m, + n, + k, + &alpha, + a, + lda, + b, + ldb, + &beta, + c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemm_cm(const char transa, +void + BlasConnector::gemm_cm (const char transa, const char transb, const int m, const int n, @@ -280,44 +447,60 @@ void BlasConnector::gemm_cm(const char transa, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - cgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); - } + { + cgemm_ (&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __DSP else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::cgemm_pack_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, BlasConnector::dsp_cluster_id_); - // cgemm_mth_ for raw dsp mth; - // cgemm_pack_mth_ for dsp mth with memcpy to DSP buffer - } + { + mtfunc::cgemm_pack_mth_ (&transa, + &transb, + &m, + &n, + &k, + &alpha, + a, + &lda, + b, + &ldb, + &beta, + c, + &ldc, + BlasConnector::dsp_cluster_id_); + // cgemm_mth_ for raw dsp mth; + // cgemm_pack_mth_ for dsp mth with memcpy to DSP buffer + } #endif #ifdef __CUDA else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - CHECK_CUBLAS(cublasCgemm(BlasUtils::cublas_handle, - cutransA, - cutransB, - m, - n, - k, - (float2*)&alpha, - (float2*)a, - lda, - (float2*)b, - ldb, - (float2*)&beta, - (float2*)c, - ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + { + cublasOperation_t cutransA = BlasUtils::judge_trans (false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasCgemm (BlasUtils::cublas_handle, + cutransA, + cutransB, + m, + n, + k, + (float2*)&alpha, + (float2*)a, + lda, + (float2*)b, + ldb, + (float2*)&beta, + (float2*)c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemm_cm(const char transa, +void + BlasConnector::gemm_cm (const char transa, const char transb, const int m, const int n, @@ -333,321 +516,592 @@ void BlasConnector::gemm_cm(const char transa, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - zgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); - } + { + zgemm_ (&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __DSP else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::zgemm_pack_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, BlasConnector::dsp_cluster_id_); - // zgemm_mth_ for raw dsp mth; - // zgemm_pack_mth_ for dsp mth with memcpy to DSP buffer - } + { + mtfunc::zgemm_pack_mth_ (&transa, + &transb, + &m, + &n, + &k, + &alpha, + a, + &lda, + b, + &ldb, + &beta, + c, + &ldc, + BlasConnector::dsp_cluster_id_); + // zgemm_mth_ for raw dsp mth; + // zgemm_pack_mth_ for dsp mth with memcpy to DSP buffer + } #endif #ifdef __CUDA else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - CHECK_CUBLAS(cublasZgemm(BlasUtils::cublas_handle, - cutransA, - cutransB, - m, - n, - k, - (double2*)&alpha, - (double2*)a, - lda, - (double2*)b, - ldb, - (double2*)&beta, - (double2*)c, - ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + { + cublasOperation_t cutransA = BlasUtils::judge_trans (false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans (false, transb, "gemm_op"); + CHECK_CUBLAS (cublasZgemm (BlasUtils::cublas_handle, + cutransA, + cutransB, + m, + n, + k, + (double2*)&alpha, + (double2*)a, + lda, + (double2*)b, + ldb, + (double2*)&beta, + (double2*)c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } // Symm and Hemm part. Only col-major is supported. -void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::symm_cm (const char side, + const char uplo, + const int m, + const int n, + const float alpha, + const float* a, + const int lda, + const float* b, + const int ldb, + const float beta, + float* c, + const int ldc, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - ssymm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + ssymm_ (&side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - CHECK_CUBLAS(cublasSsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, &alpha, a, lda, b, ldb, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasSideMode_t sideMode = BlasUtils::judge_side (side); + cublasFillMode_t fillMode = BlasUtils::judge_fill (uplo); + CHECK_CUBLAS (cublasSsymm (BlasUtils::cublas_handle, + sideMode, + fillMode, + m, + n, + &alpha, + a, + lda, + b, + ldb, + &beta, + c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, - const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::symm_cm (const char side, + const char uplo, + const int m, + const int n, + const double alpha, + const double* a, + const int lda, + const double* b, + const int ldb, + const double beta, + double* c, + const int ldc, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - dsymm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + dsymm_ (&side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - CHECK_CUBLAS(cublasDsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, &alpha, a, lda, b, ldb, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasSideMode_t sideMode = BlasUtils::judge_side (side); + cublasFillMode_t fillMode = BlasUtils::judge_fill (uplo); + CHECK_CUBLAS (cublasDsymm (BlasUtils::cublas_handle, + sideMode, + fillMode, + m, + n, + &alpha, + a, + lda, + b, + ldb, + &beta, + c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::symm_cm (const char side, + const char uplo, + const int m, + const int n, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - csymm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + csymm_ (&side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - CHECK_CUBLAS(cublasCsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (float2*)&alpha, (float2*)a, lda, (float2*)b, ldb, (float2*)&beta, (float2*)c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasSideMode_t sideMode = BlasUtils::judge_side (side); + cublasFillMode_t fillMode = BlasUtils::judge_fill (uplo); + CHECK_CUBLAS (cublasCsymm (BlasUtils::cublas_handle, + sideMode, + fillMode, + m, + n, + (float2*)&alpha, + (float2*)a, + lda, + (float2*)b, + ldb, + (float2*)&beta, + (float2*)c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::symm_cm (const char side, + const char uplo, + const int m, + const int n, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zsymm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + zsymm_ (&side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - CHECK_CUBLAS(cublasZsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (double2*)&alpha, (double2*)a, lda, (double2*)b, ldb, (double2*)&beta, (double2*)c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasSideMode_t sideMode = BlasUtils::judge_side (side); + cublasFillMode_t fillMode = BlasUtils::judge_fill (uplo); + CHECK_CUBLAS (cublasZsymm (BlasUtils::cublas_handle, + sideMode, + fillMode, + m, + n, + (double2*)&alpha, + (double2*)a, + lda, + (double2*)b, + ldb, + (double2*)&beta, + (double2*)c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::hemm_cm(const char side, const char uplo, const int m, const int n, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::hemm_cm (const char side, + const char uplo, + const int m, + const int n, + const float alpha, + const float* a, + const int lda, + const float* b, + const int ldb, + const float beta, + float* c, + const int ldc, + base_device::AbacusDevice_t device_type) { - symm_cm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc, device_type); + symm_cm (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc, device_type); } -void BlasConnector::hemm_cm(const char side, const char uplo, const int m, const int n, - const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::hemm_cm (const char side, + const char uplo, + const int m, + const int n, + const double alpha, + const double* a, + const int lda, + const double* b, + const int ldb, + const double beta, + double* c, + const int ldc, + base_device::AbacusDevice_t device_type) { - symm_cm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc, device_type); + symm_cm (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc, device_type); } -void BlasConnector::hemm_cm(char side, char uplo, int m, int n, - std::complex alpha, std::complex *a, int lda, std::complex *b, int ldb, - std::complex beta, std::complex *c, int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::hemm_cm (char side, + char uplo, + int m, + int n, + std::complex alpha, + std::complex* a, + int lda, + std::complex* b, + int ldb, + std::complex beta, + std::complex* c, + int ldc, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - chemm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + chemm_ (&side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - CHECK_CUBLAS(cublasChemm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (float2*)&alpha, (float2*)a, lda, (float2*)b, ldb, (float2*)&beta, (float2*)c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasSideMode_t sideMode = BlasUtils::judge_side (side); + cublasFillMode_t fillMode = BlasUtils::judge_fill (uplo); + CHECK_CUBLAS (cublasChemm (BlasUtils::cublas_handle, + sideMode, + fillMode, + m, + n, + (float2*)&alpha, + (float2*)a, + lda, + (float2*)b, + ldb, + (float2*)&beta, + (float2*)c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::hemm_cm(char side, char uplo, int m, int n, - std::complex alpha, std::complex *a, int lda, std::complex *b, int ldb, - std::complex beta, std::complex *c, int ldc, base_device::AbacusDevice_t device_type) +void + BlasConnector::hemm_cm (char side, + char uplo, + int m, + int n, + std::complex alpha, + std::complex* a, + int lda, + std::complex* b, + int ldb, + std::complex beta, + std::complex* c, + int ldc, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zhemm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + zhemm_ (&side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - CHECK_CUBLAS(cublasZhemm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (double2*)&alpha, (double2*)a, lda, (double2*)b, ldb, (double2*)&beta, (double2*)c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasSideMode_t sideMode = BlasUtils::judge_side (side); + cublasFillMode_t fillMode = BlasUtils::judge_fill (uplo); + CHECK_CUBLAS (cublasZhemm (BlasUtils::cublas_handle, + sideMode, + fillMode, + m, + n, + (double2*)&alpha, + (double2*)a, + lda, + (double2*)b, + ldb, + (double2*)&beta, + (double2*)c, + ldc)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemv(const char trans, const int m, const int n, - const float alpha, const float* A, const int lda, const float* X, const int incx, - const float beta, float* Y, const int incy, base_device::AbacusDevice_t device_type) +void + BlasConnector::gemv (const char trans, + const int m, + const int n, + const float alpha, + const float* A, + const int lda, + const float* X, + const int incx, + const float beta, + float* Y, + const int incy, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - sgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + sgemv_ (&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + } #ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) { - mtfunc::sgemv_mth_(&trans, - &m, - &n, - &alpha, - A, - &lda, - X, - &incx, - &beta, - Y, - &incy, - BlasConnector::dsp_cluster_id_); - } + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::sgemv_mth_ (&trans, + &m, + &n, + &alpha, + A, + &lda, + X, + &incx, + &beta, + Y, + &incy, + BlasConnector::dsp_cluster_id_); + } #endif #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, trans, "gemv_op"); - CHECK_CUBLAS(cublasSgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha, A, lda, X, incx, &beta, Y, incy)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasOperation_t cutransA = BlasUtils::judge_trans (false, trans, "gemv_op"); + CHECK_CUBLAS ( + cublasSgemv (BlasUtils::cublas_handle, cutransA, m, n, &alpha, A, lda, X, incx, &beta, Y, incy)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemv(const char trans, const int m, const int n, - const double alpha, const double* A, const int lda, const double* X, const int incx, - const double beta, double* Y, const int incy, base_device::AbacusDevice_t device_type) +void + BlasConnector::gemv (const char trans, + const int m, + const int n, + const double alpha, + const double* A, + const int lda, + const double* X, + const int incx, + const double beta, + double* Y, + const int incy, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + dgemv_ (&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + } #ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) { - mtfunc::dgemv_mth_(&trans, - &m, - &n, - &alpha, - A, - &lda, - X, - &incx, - &beta, - Y, - &incy, - BlasConnector::dsp_cluster_id_); - } + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::dgemv_mth_ (&trans, + &m, + &n, + &alpha, + A, + &lda, + X, + &incx, + &beta, + Y, + &incy, + BlasConnector::dsp_cluster_id_); + } #endif #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, trans, "gemv_op"); - CHECK_CUBLAS(cublasDgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha, A, lda, X, incx, &beta, Y, incy)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasOperation_t cutransA = BlasUtils::judge_trans (false, trans, "gemv_op"); + CHECK_CUBLAS ( + cublasDgemv (BlasUtils::cublas_handle, cutransA, m, n, &alpha, A, lda, X, incx, &beta, Y, incy)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemv(const char trans, const int m, const int n, - const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) +void + BlasConnector::gemv (const char trans, + const int m, + const int n, + const std::complex alpha, + const std::complex* A, + const int lda, + const std::complex* X, + const int incx, + const std::complex beta, + std::complex* Y, + const int incy, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + cgemv_ (&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + } #ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) { - mtfunc::cgemv_mth_(&trans, - &m, - &n, - &alpha, - A, - &lda, - X, - &incx, - &beta, - Y, - &incy, - BlasConnector::dsp_cluster_id_); - } + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::cgemv_mth_ (&trans, + &m, + &n, + &alpha, + A, + &lda, + X, + &incx, + &beta, + Y, + &incy, + BlasConnector::dsp_cluster_id_); + } #endif #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cuFloatComplex alpha_cu = make_cuFloatComplex(alpha.real(), alpha.imag()); - cuFloatComplex beta_cu = make_cuFloatComplex(beta.real(), beta.imag()); - cublasOperation_t cutransA = BlasUtils::judge_trans(true, trans, "gemv_op"); - CHECK_CUBLAS(cublasCgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha_cu, (cuFloatComplex*)A, lda, (cuFloatComplex*)X, incx, &beta_cu, (cuFloatComplex*)Y, incy)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cuFloatComplex alpha_cu = make_cuFloatComplex (alpha.real (), alpha.imag ()); + cuFloatComplex beta_cu = make_cuFloatComplex (beta.real (), beta.imag ()); + cublasOperation_t cutransA = BlasUtils::judge_trans (true, trans, "gemv_op"); + CHECK_CUBLAS (cublasCgemv (BlasUtils::cublas_handle, + cutransA, + m, + n, + &alpha_cu, + (cuFloatComplex*)A, + lda, + (cuFloatComplex*)X, + incx, + &beta_cu, + (cuFloatComplex*)Y, + incy)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::gemv(const char trans, const int m, const int n, - const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) +void + BlasConnector::gemv (const char trans, + const int m, + const int n, + const std::complex alpha, + const std::complex* A, + const int lda, + const std::complex* X, + const int incx, + const std::complex beta, + std::complex* Y, + const int incy, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + zgemv_ (&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + } #ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) { - mtfunc::zgemv_mth_(&trans, - &m, - &n, - &alpha, - A, - &lda, - X, - &incx, - &beta, - Y, - &incy, - BlasConnector::dsp_cluster_id_); - } + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::zgemv_mth_ (&trans, + &m, + &n, + &alpha, + A, + &lda, + X, + &incx, + &beta, + Y, + &incy, + BlasConnector::dsp_cluster_id_); + } #endif #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cuDoubleComplex alpha_cu = make_cuDoubleComplex(alpha.real(), alpha.imag()); - cuDoubleComplex beta_cu = make_cuDoubleComplex(beta.real(), beta.imag()); - cublasOperation_t cutransA = BlasUtils::judge_trans(true, trans, "gemv_op"); - CHECK_CUBLAS(cublasZgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha_cu, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)X, incx, &beta_cu, (cuDoubleComplex*)Y, incy)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cuDoubleComplex alpha_cu = make_cuDoubleComplex (alpha.real (), alpha.imag ()); + cuDoubleComplex beta_cu = make_cuDoubleComplex (beta.real (), beta.imag ()); + cublasOperation_t cutransA = BlasUtils::judge_trans (true, trans, "gemv_op"); + CHECK_CUBLAS (cublasZgemv (BlasUtils::cublas_handle, + cutransA, + m, + n, + &alpha_cu, + (cuDoubleComplex*)A, + lda, + (cuDoubleComplex*)X, + incx, + &beta_cu, + (cuDoubleComplex*)Y, + incy)); + } +#endif + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } \ No newline at end of file diff --git a/source/source_base/module_external/blas_connector_vector.cpp b/source/source_base/module_external/blas_connector_vector.cpp index db89fc07fb5..ba13bd8f193 100644 --- a/source/source_base/module_external/blas_connector_vector.cpp +++ b/source/source_base/module_external/blas_connector_vector.cpp @@ -14,480 +14,777 @@ #include "source_base/module_device/memory_op.h" #endif - -void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY, base_device::AbacusDevice_t device_type) +void + BlasConnector::axpy (const int n, + const float alpha, + const float* X, + const int incX, + float* Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - saxpy_(&n, &alpha, X, &incX, Y, &incY); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + saxpy_ (&n, &alpha, X, &incX, Y, &incY); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - CHECK_CUBLAS(cublasSaxpy(BlasUtils::cublas_handle, n, &alpha, X, incX, Y, incY)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + CHECK_CUBLAS (cublasSaxpy (BlasUtils::cublas_handle, n, &alpha, X, incX, Y, incY)); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY, base_device::AbacusDevice_t device_type) +void + BlasConnector::axpy (const int n, + const double alpha, + const double* X, + const int incX, + double* Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - daxpy_(&n, &alpha, X, &incX, Y, &incY); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + daxpy_ (&n, &alpha, X, &incX, Y, &incY); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - CHECK_CUBLAS(cublasDaxpy(BlasUtils::cublas_handle, n, &alpha, X, incX, Y, incY)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + CHECK_CUBLAS (cublasDaxpy (BlasUtils::cublas_handle, n, &alpha, X, incX, Y, incY)); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) +void + BlasConnector::axpy (const int n, + const std::complex alpha, + const std::complex* X, + const int incX, + std::complex* Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - caxpy_(&n, &alpha, X, &incX, Y, &incY); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + caxpy_ (&n, &alpha, X, &incX, Y, &incY); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - CHECK_CUBLAS(cublasCaxpy(BlasUtils::cublas_handle, n, (float2*)&alpha, (float2*)X, incX, (float2*)Y, incY)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + CHECK_CUBLAS ( + cublasCaxpy (BlasUtils::cublas_handle, n, (float2*)&alpha, (float2*)X, incX, (float2*)Y, incY)); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) +void + BlasConnector::axpy (const int n, + const std::complex alpha, + const std::complex* X, + const int incX, + std::complex* Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zaxpy_(&n, &alpha, X, &incX, Y, &incY); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + zaxpy_ (&n, &alpha, X, &incX, Y, &incY); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - CHECK_CUBLAS(cublasZaxpy(BlasUtils::cublas_handle, n, (double2*)&alpha, (double2*)X, incX, (double2*)Y, incY)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + CHECK_CUBLAS ( + cublasZaxpy (BlasUtils::cublas_handle, n, (double2*)&alpha, (double2*)X, incX, (double2*)Y, incY)); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } - // x=a*x -void BlasConnector::scal( const int n, const float alpha, float *X, const int incX, base_device::AbacusDevice_t device_type) +void + BlasConnector::scal (const int n, + const float alpha, + float* X, + const int incX, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - sscal_(&n, &alpha, X, &incX); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + sscal_ (&n, &alpha, X, &incX); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - CHECK_CUBLAS(cublasSscal(BlasUtils::cublas_handle, n, &alpha, X, incX)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + CHECK_CUBLAS (cublasSscal (BlasUtils::cublas_handle, n, &alpha, X, incX)); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::scal( const int n, const double alpha, double *X, const int incX, base_device::AbacusDevice_t device_type) +void + BlasConnector::scal (const int n, + const double alpha, + double* X, + const int incX, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - dscal_(&n, &alpha, X, &incX); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + dscal_ (&n, &alpha, X, &incX); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - CHECK_CUBLAS(cublasDscal(BlasUtils::cublas_handle, n, &alpha, X, incX)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + CHECK_CUBLAS (cublasDscal (BlasUtils::cublas_handle, n, &alpha, X, incX)); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) +void + BlasConnector::scal (const int n, + const std::complex alpha, + std::complex* X, + const int incX, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - cscal_(&n, &alpha, X, &incX); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + cscal_ (&n, &alpha, X, &incX); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - CHECK_CUBLAS(cublasCscal(BlasUtils::cublas_handle, n, (float2*)&alpha, (float2*)X, incX)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + CHECK_CUBLAS (cublasCscal (BlasUtils::cublas_handle, n, (float2*)&alpha, (float2*)X, incX)); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) +void + BlasConnector::scal (const int n, + const std::complex alpha, + std::complex* X, + const int incX, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zscal_(&n, &alpha, X, &incX); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + zscal_ (&n, &alpha, X, &incX); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - CHECK_CUBLAS(cublasZscal(BlasUtils::cublas_handle, n, (double2*)&alpha, (double2*)X, incX)); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + CHECK_CUBLAS (cublasZscal (BlasUtils::cublas_handle, n, (double2*)&alpha, (double2*)X, incX)); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } - // d=x*y -float BlasConnector::dot( const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) +float + BlasConnector::dot (const int n, + const float* const X, + const int incX, + const float* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return sdot_(&n, X, &incX, Y, &incY); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + return sdot_ (&n, X, &incX, Y, &incY); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - float result = 0.0; - CHECK_CUBLAS(cublasSdot(BlasUtils::cublas_handle, n, X, incX, Y, incY, &result)); - return result; - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + float result = 0.0; + CHECK_CUBLAS (cublasSdot (BlasUtils::cublas_handle, n, X, incX, Y, incY, &result)); + return result; + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -double BlasConnector::dot( const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) +double + BlasConnector::dot (const int n, + const double* const X, + const int incX, + const double* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return ddot_(&n, X, &incX, Y, &incY); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + return ddot_ (&n, X, &incX, Y, &incY); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - double result = 0.0; - CHECK_CUBLAS(cublasDdot(BlasUtils::cublas_handle, n, X, incX, Y, incY, &result)); - return result; - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + double result = 0.0; + CHECK_CUBLAS (cublasDdot (BlasUtils::cublas_handle, n, X, incX, Y, incY, &result)); + return result; + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } // d=x*y -float BlasConnector::dotu(const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) +float + BlasConnector::dotu (const int n, + const float* const X, + const int incX, + const float* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - return BlasConnector::dot(n, X, incX, Y, incY, device_type); + return BlasConnector::dot (n, X, incX, Y, incY, device_type); } -double BlasConnector::dotu(const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) +double + BlasConnector::dotu (const int n, + const double* const X, + const int incX, + const double* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - return BlasConnector::dot(n, X, incX, Y, incY, device_type); + return BlasConnector::dot (n, X, incX, Y, incY, device_type); } -std::complex BlasConnector::dotu(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) +std::complex + BlasConnector::dotu (const int n, + const std::complex* const X, + const int incX, + const std::complex* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - const int incX2 = 2 * incX; - const int incY2 = 2 * incY; - const float*const x = reinterpret_cast(X); - const float*const y = reinterpret_cast(Y); - //Re(result)=Re(x)*Re(y)-Im(x)*Im(y) - //Im(result)=Re(x)*Im(y)+Im(x)*Re(y) - return std::complex( - BlasConnector::dot(n, x, incX2, y, incY2, device_type) - dot(n, x+1, incX2, y+1, incY2, device_type), - BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) + dot(n, x+1, incX2, y, incY2, device_type)); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + const int incX2 = 2 * incX; + const int incY2 = 2 * incY; + const float* const x = reinterpret_cast (X); + const float* const y = reinterpret_cast (Y); + // Re(result)=Re(x)*Re(y)-Im(x)*Im(y) + // Im(result)=Re(x)*Im(y)+Im(x)*Re(y) + return std::complex (BlasConnector::dot (n, x, incX2, y, incY2, device_type) + - dot (n, x + 1, incX2, y + 1, incY2, device_type), + BlasConnector::dot (n, x, incX2, y + 1, incY2, device_type) + + dot (n, x + 1, incX2, y, incY2, device_type)); + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -std::complex BlasConnector::dotu(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) +std::complex + BlasConnector::dotu (const int n, + const std::complex* const X, + const int incX, + const std::complex* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - const int incX2 = 2 * incX; - const int incY2 = 2 * incY; - const double*const x = reinterpret_cast(X); - const double*const y = reinterpret_cast(Y); - //Re(result)=Re(x)*Re(y)-Im(x)*Im(y) - //Im(result)=Re(x)*Im(y)+Im(x)*Re(y) - return std::complex( - BlasConnector::dot(n, x, incX2, y, incY2, device_type) - dot(n, x+1, incX2, y+1, incY2, device_type), - BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) + dot(n, x+1, incX2, y, incY2, device_type)); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + const int incX2 = 2 * incX; + const int incY2 = 2 * incY; + const double* const x = reinterpret_cast (X); + const double* const y = reinterpret_cast (Y); + // Re(result)=Re(x)*Re(y)-Im(x)*Im(y) + // Im(result)=Re(x)*Im(y)+Im(x)*Re(y) + return std::complex (BlasConnector::dot (n, x, incX2, y, incY2, device_type) + - dot (n, x + 1, incX2, y + 1, incY2, device_type), + BlasConnector::dot (n, x, incX2, y + 1, incY2, device_type) + + dot (n, x + 1, incX2, y, incY2, device_type)); + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } // d = x.conj() * Vy -float BlasConnector::dotc(const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) +float + BlasConnector::dotc (const int n, + const float* const X, + const int incX, + const float* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - return BlasConnector::dot(n, X, incX, Y, incY, device_type); + return BlasConnector::dot (n, X, incX, Y, incY, device_type); } -double BlasConnector::dotc(const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) +double + BlasConnector::dotc (const int n, + const double* const X, + const int incX, + const double* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - return BlasConnector::dot(n, X, incX, Y, incY, device_type); + return BlasConnector::dot (n, X, incX, Y, incY, device_type); } -std::complex BlasConnector::dotc(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) +std::complex + BlasConnector::dotc (const int n, + const std::complex* const X, + const int incX, + const std::complex* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - const int incX2 = 2 * incX; - const int incY2 = 2 * incY; - const float*const x = reinterpret_cast(X); - const float*const y = reinterpret_cast(Y); - // Re(result)=Re(X)*Re(Y)+Im(X)*Im(Y) - // Im(result)=Re(X)*Im(Y)-Im(X)*Re(Y) - return std::complex( - BlasConnector::dot(n, x, incX2, y, incY2, device_type) + dot(n, x+1, incX2, y+1, incY2, device_type), - BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) - dot(n, x+1, incX2, y, incY2, device_type)); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + const int incX2 = 2 * incX; + const int incY2 = 2 * incY; + const float* const x = reinterpret_cast (X); + const float* const y = reinterpret_cast (Y); + // Re(result)=Re(X)*Re(Y)+Im(X)*Im(Y) + // Im(result)=Re(X)*Im(Y)-Im(X)*Re(Y) + return std::complex (BlasConnector::dot (n, x, incX2, y, incY2, device_type) + + dot (n, x + 1, incX2, y + 1, incY2, device_type), + BlasConnector::dot (n, x, incX2, y + 1, incY2, device_type) + - dot (n, x + 1, incX2, y, incY2, device_type)); + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -std::complex BlasConnector::dotc(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) +std::complex + BlasConnector::dotc (const int n, + const std::complex* const X, + const int incX, + const std::complex* const Y, + const int incY, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - const int incX2 = 2 * incX; - const int incY2 = 2 * incY; - const double*const x = reinterpret_cast(X); - const double*const y = reinterpret_cast(Y); - // Re(result)=Re(X)*Re(Y)+Im(X)*Im(Y) - // Im(result)=Re(X)*Im(Y)-Im(X)*Re(Y) - return std::complex( - BlasConnector::dot(n, x, incX2, y, incY2, device_type) + dot(n, x+1, incX2, y+1, incY2, device_type), - BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) - dot(n, x+1, incX2, y, incY2, device_type)); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + const int incX2 = 2 * incX; + const int incY2 = 2 * incY; + const double* const x = reinterpret_cast (X); + const double* const y = reinterpret_cast (Y); + // Re(result)=Re(X)*Re(Y)+Im(X)*Im(Y) + // Im(result)=Re(X)*Im(Y)-Im(X)*Re(Y) + return std::complex (BlasConnector::dot (n, x, incX2, y, incY2, device_type) + + dot (n, x + 1, incX2, y + 1, incY2, device_type), + BlasConnector::dot (n, x, incX2, y + 1, incY2, device_type) + - dot (n, x + 1, incX2, y, incY2, device_type)); + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } // out = ||x||_2 -float BlasConnector::nrm2( const int n, const float *X, const int incX, base_device::AbacusDevice_t device_type ) +float + BlasConnector::nrm2 (const int n, const float* X, const int incX, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return snrm2_( &n, X, &incX ); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + return snrm2_ (&n, X, &incX); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - float result = 0.0; - CHECK_CUBLAS(cublasSnrm2(BlasUtils::cublas_handle, n, X, incX, &result)); - return result; - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + float result = 0.0; + CHECK_CUBLAS (cublasSnrm2 (BlasUtils::cublas_handle, n, X, incX, &result)); + return result; + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } - -double BlasConnector::nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type ) +double + BlasConnector::nrm2 (const int n, const double* X, const int incX, base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return dnrm2_( &n, X, &incX ); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + return dnrm2_ (&n, X, &incX); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - double result = 0.0; - CHECK_CUBLAS(cublasDnrm2(BlasUtils::cublas_handle, n, X, incX, &result)); - return result; - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + double result = 0.0; + CHECK_CUBLAS (cublasDnrm2 (BlasUtils::cublas_handle, n, X, incX, &result)); + return result; + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } - -double BlasConnector::nrm2( const int n, const std::complex *X, const int incX, base_device::AbacusDevice_t device_type ) +double + BlasConnector::nrm2 (const int n, + const std::complex* X, + const int incX, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return dznrm2_( &n, X, &incX ); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + return dznrm2_ (&n, X, &incX); + } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - double result = 0.0; - CHECK_CUBLAS(cublasDznrm2(BlasUtils::cublas_handle, n, (double2*)X, incX, &result)); - return result; - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + double result = 0.0; + CHECK_CUBLAS (cublasDznrm2 (BlasUtils::cublas_handle, n, (double2*)X, incX, &result)); + return result; + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } // copies a into b -void BlasConnector::copy(const int n, const float *a, const int incx, float *b, const int incy, base_device::AbacusDevice_t device_type) +void + BlasConnector::copy (const int n, + const float* a, + const int incx, + float* b, + const int incy, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - scopy_(&n, a, &incx, b, &incy); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + scopy_ (&n, a, &incx, b, &incy); + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::copy(const int n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type) +void + BlasConnector::copy (const int n, + const double* a, + const int incx, + double* b, + const int incy, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - dcopy_(&n, a, &incx, b, &incy); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + dcopy_ (&n, a, &incx, b, &incy); + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) +void + BlasConnector::copy (const int n, + const std::complex* a, + const int incx, + std::complex* b, + const int incy, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - ccopy_(&n, a, &incx, b, &incy); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + ccopy_ (&n, a, &incx, b, &incy); + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void BlasConnector::copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) +void + BlasConnector::copy (const int n, + const std::complex* a, + const int incx, + std::complex* b, + const int incy, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zcopy_(&n, a, &incx, b, &incy); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + zcopy_ (&n, a, &incx, b, &incy); + } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } - template -void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type){ - using Real = typename GetTypeReal::type; - if (device_type == base_device::AbacusDevice_t::CpuDevice) { +void + vector_mul_vector (const int& dim, + T* result, + const T* vector1, + const T* vector2, + base_device::AbacusDevice_t device_type) +{ + using Real = typename GetTypeReal::type; + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * vector2[i]; + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * vector2[i]; + } } - } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - ModuleBase::vector_mul_vector_op()(dim, result, vector1, vector2); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + ModuleBase::vector_mul_vector_op () (dim, result, vector1, vector2); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } - template -void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type){ - using Real = typename GetTypeReal::type; - if (device_type == base_device::AbacusDevice_t::CpuDevice) { +void + vector_div_vector (const int& dim, + T* result, + const T* vector1, + const T* vector2, + base_device::AbacusDevice_t device_type) +{ + using Real = typename GetTypeReal::type; + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] / vector2[i]; + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] / vector2[i]; + } } - } #ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - ModuleBase::vector_div_vector_op()(dim, result, vector1, vector2); - } + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + ModuleBase::vector_div_vector_op () (dim, result, vector1, vector2); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void vector_add_vector(const int& dim, float *result, const float *vector1, const float constant1, const float *vector2, const float constant2, base_device::AbacusDevice_t device_type) +void + vector_add_vector (const int& dim, + float* result, + const float* vector1, + const float constant1, + const float* vector2, + const float constant2, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::CpuDevice){ + if (device_type == base_device::CpuDevice) + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } } - } #ifdef __CUDA - else if (device_type == base_device::GpuDevice) { - ModuleBase::vector_add_vector_op()(dim, result, vector1, constant1, vector2, constant2); - } + else if (device_type == base_device::GpuDevice) + { + ModuleBase::vector_add_vector_op () (dim, + result, + vector1, + constant1, + vector2, + constant2); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void vector_add_vector(const int& dim, double *result, const double *vector1, const double constant1, const double *vector2, const double constant2, base_device::AbacusDevice_t device_type) +void + vector_add_vector (const int& dim, + double* result, + const double* vector1, + const double constant1, + const double* vector2, + const double constant2, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::CpuDevice){ + if (device_type == base_device::CpuDevice) + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } } - } #ifdef __CUDA - else if (device_type == base_device::GpuDevice) { - ModuleBase::vector_add_vector_op()(dim, result, vector1, constant1, vector2, constant2); - } + else if (device_type == base_device::GpuDevice) + { + ModuleBase::vector_add_vector_op () (dim, + result, + vector1, + constant1, + vector2, + constant2); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void vector_add_vector(const int& dim, std::complex *result, const std::complex *vector1, const float constant1, const std::complex *vector2, const float constant2, base_device::AbacusDevice_t device_type) +void + vector_add_vector (const int& dim, + std::complex* result, + const std::complex* vector1, + const float constant1, + const std::complex* vector2, + const float constant2, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::CpuDevice){ + if (device_type == base_device::CpuDevice) + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } } - } #ifdef __CUDA - else if (device_type == base_device::GpuDevice) { - ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2); - } + else if (device_type == base_device::GpuDevice) + { + ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU> () (dim, + result, + vector1, + constant1, + vector2, + constant2); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } -void vector_add_vector(const int& dim, std::complex *result, const std::complex *vector1, const double constant1, const std::complex *vector2, const double constant2, base_device::AbacusDevice_t device_type) +void + vector_add_vector (const int& dim, + std::complex* result, + const std::complex* vector1, + const double constant1, + const std::complex* vector2, + const double constant2, + base_device::AbacusDevice_t device_type) { - if (device_type == base_device::CpuDevice){ + if (device_type == base_device::CpuDevice) + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } } - } #ifdef __CUDA - else if (device_type == base_device::GpuDevice) { - ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2); - } + else if (device_type == base_device::GpuDevice) + { + ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU> () (dim, + result, + vector1, + constant1, + vector2, + constant2); + } #endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } + else + { + throw std::invalid_argument ("device_type = " + std::to_string (device_type) + " in " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } } \ No newline at end of file diff --git a/source/source_base/module_external/lapack_connector.h b/source/source_base/module_external/lapack_connector.h index 5007aedabf4..c9da6bb476b 100644 --- a/source/source_base/module_external/lapack_connector.h +++ b/source/source_base/module_external/lapack_connector.h @@ -1,14 +1,14 @@ /** * @file lapack_connector.h - * + * * @brief This is a wrapper of some LAPACK routines. * \b Row-Major version. - * + * * @warning MAY BE DEPRECATED IN THE FUTURE. * @warning For Column-major version, please refer to \c source/source_base/module_container/base/third_party/lapack.h. - * - * @note - * !!! Note that + * + * @note + * !!! Note that * This wrapper is a C++ style wrapper of LAPACK routines, * i.e., assuming that the input matrices are in \b row-major order. * The data layout in C++ is row-major, C style, @@ -16,7 +16,7 @@ * (ModuleBase::ComplexMatrix is in row-major order) * The wrapper will do the data transformation between * row-major and column-major order automatically. - * + * */ #ifndef LAPACK_CONNECTOR_HPP @@ -30,305 +30,459 @@ #include "../complexmatrix.h" #include "../global_function.h" -//Naming convention of lapack subroutines : ammxxx, where +// Naming convention of lapack subroutines : ammxxx, where //"a" specifies the data type: -// - s stands for float -// - d stands for double -// - c stands for complex float -// - z stands for complex double +// - s stands for float +// - d stands for double +// - c stands for complex float +// - z stands for complex double //"mm" specifies the type of matrix, for example: -// - he stands for hermitian -// - sy stands for symmetric +// - he stands for hermitian +// - sy stands for symmetric //"xxx" specifies the type of problem, for example: -// - gv stands for generalized eigenvalue +// - gv stands for generalized eigenvalue // The following declarations cover only a subset of LAPACK routines. // If you need a LAPACK function that is not included here, feel free to add its declaration as needed. extern "C" { -// === Generalized Hermitian-definite eigenproblems === -void dsygvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - double* a, const int* lda, - double* b, const int* ldb, - double* w, - double* work, const int* lwork, - int* iwork, const int* liwork, - int* info); - -void chegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - std::complex* b, const int* ldb, - float* w, - std::complex* work, const int* lwork, - float* rwork, const int* lrwork, - int* iwork, const int* liwork, - int* info); - -void zhegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - std::complex* b, const int* ldb, - double* w, - std::complex* work, const int* lwork, - double* rwork, const int* lrwork, - int* iwork, const int* liwork, - int* info); - -// === Selected eigenvalues/vectors: standard Hermitian === - -void dsyevx_(const char* jobz, const char* range, const char* uplo, const int* n, - double* a, const int* lda, - const double* vl, const double* vu, - const int* il, const int* iu, - const double* abstol, - int* m, double* w, double* z, const int* ldz, - double* work, const int* lwork, - int* iwork, int* ifail, - int* info); - -void cheevx_(const char* jobz, const char* range, const char* uplo, const int* n, - std::complex* a, const int* lda, - const float* vl, const float* vu, - const int* il, const int* iu, - const float* abstol, - int* m, float* w, std::complex* z, const int* ldz, - std::complex* work, const int* lwork, - float* rwork, int* iwork, int* ifail, - int* info); - -void zheevx_(const char* jobz, const char* range, const char* uplo, const int* n, - std::complex* a, const int* lda, - const double* vl, const double* vu, - const int* il, const int* iu, - const double* abstol, - int* m, double* w, std::complex* z, const int* ldz, - std::complex* work, const int* lwork, - double* rwork, int* iwork, int* ifail, - int* info); - -// === Selected eigenvalues/vectors: generalized Hermitian === - -void dsygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, - double* a, const int* lda, - double* b, const int* ldb, - const double* vl, const double* vu, - const int* il, const int* iu, - const double* abstol, - int* m, double* w, double* z, const int* ldz, - double* work, const int* lwork, - int* iwork, int* ifail, - int* info); - -void chegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, - std::complex* a, const int* lda, - std::complex* b, const int* ldb, - const float* vl, const float* vu, - const int* il, const int* iu, - const float* abstol, - int* m, float* w, std::complex* z, const int* ldz, - std::complex* work, const int* lwork, - float* rwork, int* iwork, int* ifail, - int* info); - -void zhegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, - std::complex* a, const int* lda, - std::complex* b, const int* ldb, - const double* vl, const double* vu, - const int* il, const int* iu, - const double* abstol, - int* m, double* w, std::complex* z, const int* ldz, - std::complex* work, const int* lwork, - double* rwork, int* iwork, int* ifail, - int* info); - -// === Generalized Hermitian: all eigenvalues (simple driver) === - -void dsygv_(const int* itype, const char* jobz, const char* uplo, const int* n, - double* a, const int* lda, - double* b, const int* ldb, - double* w, - double* work, const int* lwork, - int* info); - -void chegv_(const int* itype, const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - std::complex* b, const int* ldb, - float* w, - std::complex* work, const int* lwork, - float* rwork, - int* info); - -void zhegv_(const int* itype, const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - std::complex* b, const int* ldb, - double* w, - std::complex* work, const int* lwork, - double* rwork, - int* info); - -// === Standard Hermitian eigenproblem === - - void ssyev_(const char* jobz, - const char* uplo, - const int* n, - float* a, - const int* lda, - float* w, - float* work, - const int* lwork, - int* info); - void dsyev_(const char* jobz, - const char* uplo, - const int* n, - double* a, - const int* lda, - double* w, - double* work, - const int* lwork, - int* info); - -void cheev_(const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - float* w, - std::complex* work, const int* lwork, - float* rwork, - int* info); - -void zheev_(const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - double* w, - std::complex* work, const int* lwork, - double* rwork, - int* info); - -// === General (non-Hermitian) eigenproblem === - -void dgeev_(const char* jobvl, const char* jobvr, const int* n, - double* a, const int* lda, - double* wr, double* wi, - double* vl, const int* ldvl, - double* vr, const int* ldvr, - double* work, const int* lwork, - int* info); - -void zgeev_(const char* jobvl, const char* jobvr, const int* n, - std::complex* a, const int* lda, - std::complex* w, - std::complex* vl, const int* ldvl, - std::complex* vr, const int* ldvr, - std::complex* work, const int* lwork, - double* rwork, - int* info); - -// === Matrix inversion (LU) === - -void dgetrf_(const int* m, const int* n, double* a, const int* lda, - int* ipiv, int* info); - -void dgetri_(const int* n, double* a, const int* lda, - const int* ipiv, - double* work, const int* lwork, - int* info); - -// === Symmetric indefinite inversion (Bunch-Kaufman) === - -void dsytrf_(const char* uplo, const int* n, double* a, const int* lda, - int* ipiv, double* work, const int* lwork, int* info); - -void dsytri_(const char* uplo, const int* n, double* a, const int* lda, - const int* ipiv, double* work, int* info); - -// === Cholesky factorization & inversion === - -void spotrf_(const char* uplo, const int* n, float* a, const int* lda, int* info); -void dpotrf_(const char* uplo, const int* n, double* a, const int* lda, int* info); -void cpotrf_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); -void zpotrf_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); - -void spotri_(const char* uplo, const int* n, float* a, const int* lda, int* info); -void dpotri_(const char* uplo, const int* n, double* a, const int* lda, int* info); -void cpotri_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); -void zpotri_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); - -// === Complex LU inversion === - -void zgetrf_(const int* m, const int* n, std::complex* a, const int* lda, - int* ipiv, int* info); - -void zgetri_(const int* n, std::complex* a, const int* lda, - const int* ipiv, - std::complex* work, const int* lwork, - int* info); - - -// === Tridiagonal eigen solvers === - -void dsterf_(const int* n, double* d, double* e, int* info); - -void dstein_(const int* n, const double* d, const double* e, - const int* m, const double* w, - const int* iblock, const int* isplit, - double* z, const int* ldz, - double* work, int* iwork, int* ifail, - int* info); - -void zstein_(const int* n, const double* d, const double* e, - const int* m, const double* w, - const int* iblock, const int* isplit, - std::complex* z, const int* ldz, - double* work, int* iwork, int* ifail, - int* info); - -// === Unblocked Cholesky (level 2 BLAS) === - -void dpotf2_(const char* uplo, const int* n, double* a, const int* lda, int* info); -void zpotf2_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); - -// === Tridiagonal solver === - -void dgtsv_(const int* n, const int* nrhs, - double* dl, double* d, double* du, - double* b, const int* ldb, - int* info); - -// === Symmetric indefinite linear solver === - -void dsysv_(const char* uplo, const int* n, const int* nrhs, - double* a, const int* lda, - int* ipiv, - double* b, const int* ldb, - double* work, const int* lwork, - int* info); -} // extern "C" + // === Generalized Hermitian-definite eigenproblems === + void dsygvd_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + double* a, + const int* lda, + double* b, + const int* ldb, + double* w, + double* work, + const int* lwork, + int* iwork, + const int* liwork, + int* info); + + void chegvd_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + float* w, + std::complex* work, + const int* lwork, + float* rwork, + const int* lrwork, + int* iwork, + const int* liwork, + int* info); + + void zhegvd_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + double* w, + std::complex* work, + const int* lwork, + double* rwork, + const int* lrwork, + int* iwork, + const int* liwork, + int* info); + + // === Selected eigenvalues/vectors: standard Hermitian === + + void dsyevx_ (const char* jobz, + const char* range, + const char* uplo, + const int* n, + double* a, + const int* lda, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + double* z, + const int* ldz, + double* work, + const int* lwork, + int* iwork, + int* ifail, + int* info); + + void cheevx_ (const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + float* w, + std::complex* z, + const int* ldz, + std::complex* work, + const int* lwork, + float* rwork, + int* iwork, + int* ifail, + int* info); + + void zheevx_ (const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + std::complex* z, + const int* ldz, + std::complex* work, + const int* lwork, + double* rwork, + int* iwork, + int* ifail, + int* info); + + // === Selected eigenvalues/vectors: generalized Hermitian === + + void dsygvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + double* a, + const int* lda, + double* b, + const int* ldb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + double* z, + const int* ldz, + double* work, + const int* lwork, + int* iwork, + int* ifail, + int* info); + + void chegvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + float* w, + std::complex* z, + const int* ldz, + std::complex* work, + const int* lwork, + float* rwork, + int* iwork, + int* ifail, + int* info); + + void zhegvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + std::complex* z, + const int* ldz, + std::complex* work, + const int* lwork, + double* rwork, + int* iwork, + int* ifail, + int* info); + + // === Generalized Hermitian: all eigenvalues (simple driver) === + + void dsygv_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + double* a, + const int* lda, + double* b, + const int* ldb, + double* w, + double* work, + const int* lwork, + int* info); + + void chegv_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + float* w, + std::complex* work, + const int* lwork, + float* rwork, + int* info); + + void zhegv_ (const int* itype, + const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + double* w, + std::complex* work, + const int* lwork, + double* rwork, + int* info); + + // === Standard Hermitian eigenproblem === + + void ssyev_ (const char* jobz, + const char* uplo, + const int* n, + float* a, + const int* lda, + float* w, + float* work, + const int* lwork, + int* info); + void dsyev_ (const char* jobz, + const char* uplo, + const int* n, + double* a, + const int* lda, + double* w, + double* work, + const int* lwork, + int* info); + + void cheev_ (const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + float* w, + std::complex* work, + const int* lwork, + float* rwork, + int* info); + + void zheev_ (const char* jobz, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + double* w, + std::complex* work, + const int* lwork, + double* rwork, + int* info); + + // === General (non-Hermitian) eigenproblem === + + void dgeev_ (const char* jobvl, + const char* jobvr, + const int* n, + double* a, + const int* lda, + double* wr, + double* wi, + double* vl, + const int* ldvl, + double* vr, + const int* ldvr, + double* work, + const int* lwork, + int* info); + + void zgeev_ (const char* jobvl, + const char* jobvr, + const int* n, + std::complex* a, + const int* lda, + std::complex* w, + std::complex* vl, + const int* ldvl, + std::complex* vr, + const int* ldvr, + std::complex* work, + const int* lwork, + double* rwork, + int* info); + + // === Matrix inversion (LU) === + + void dgetrf_ (const int* m, const int* n, double* a, const int* lda, int* ipiv, int* info); + + void dgetri_ (const int* n, double* a, const int* lda, const int* ipiv, double* work, const int* lwork, int* info); + + // === Symmetric indefinite inversion (Bunch-Kaufman) === + + void dsytrf_ (const char* uplo, + const int* n, + double* a, + const int* lda, + int* ipiv, + double* work, + const int* lwork, + int* info); + + void dsytri_ (const char* uplo, const int* n, double* a, const int* lda, const int* ipiv, double* work, int* info); + + // === Cholesky factorization & inversion === + + void spotrf_ (const char* uplo, const int* n, float* a, const int* lda, int* info); + void dpotrf_ (const char* uplo, const int* n, double* a, const int* lda, int* info); + void cpotrf_ (const char* uplo, const int* n, std::complex* a, const int* lda, int* info); + void zpotrf_ (const char* uplo, const int* n, std::complex* a, const int* lda, int* info); + + void spotri_ (const char* uplo, const int* n, float* a, const int* lda, int* info); + void dpotri_ (const char* uplo, const int* n, double* a, const int* lda, int* info); + void cpotri_ (const char* uplo, const int* n, std::complex* a, const int* lda, int* info); + void zpotri_ (const char* uplo, const int* n, std::complex* a, const int* lda, int* info); + + // === Complex LU inversion === + + void zgetrf_ (const int* m, const int* n, std::complex* a, const int* lda, int* ipiv, int* info); + + void zgetri_ (const int* n, + std::complex* a, + const int* lda, + const int* ipiv, + std::complex* work, + const int* lwork, + int* info); + + // === Tridiagonal eigen solvers === + + void dsterf_ (const int* n, double* d, double* e, int* info); + + void dstein_ (const int* n, + const double* d, + const double* e, + const int* m, + const double* w, + const int* iblock, + const int* isplit, + double* z, + const int* ldz, + double* work, + int* iwork, + int* ifail, + int* info); + + void zstein_ (const int* n, + const double* d, + const double* e, + const int* m, + const double* w, + const int* iblock, + const int* isplit, + std::complex* z, + const int* ldz, + double* work, + int* iwork, + int* ifail, + int* info); + + // === Unblocked Cholesky (level 2 BLAS) === + + void dpotf2_ (const char* uplo, const int* n, double* a, const int* lda, int* info); + void zpotf2_ (const char* uplo, const int* n, std::complex* a, const int* lda, int* info); + + // === Tridiagonal solver === + + void + dgtsv_ (const int* n, const int* nrhs, double* dl, double* d, double* du, double* b, const int* ldb, int* info); + + // === Symmetric indefinite linear solver === + + void dsysv_ (const char* uplo, + const int* n, + const int* nrhs, + double* a, + const int* lda, + int* ipiv, + double* b, + const int* ldb, + double* work, + const int* lwork, + int* info); +} // extern "C" #ifdef GATHER_INFO #define zhegvx_ zhegvx_i -void zhegvx_i(const int* itype, - const char* jobz, - const char* range, - const char* uplo, - const int* n, - std::complex* a, - const int* lda, - std::complex* b, - const int* ldb, - const double* vl, - const double* vu, - const int* il, - const int* iu, - const double* abstol, - int* m, - double* w, - std::complex* z, - const int* ldz, - std::complex* work, - const int* lwork, - double* rwork, - int* iwork, - int* ifail, - int* info); +void zhegvx_i (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* a, + const int* lda, + std::complex* b, + const int* ldb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + double* w, + std::complex* z, + const int* ldz, + std::complex* work, + const int* lwork, + double* rwork, + int* iwork, + int* ifail, + int* info); #endif // GATHER_INFO // Class LapackConnector provide the connector to fortran lapack routine. @@ -336,250 +490,269 @@ void zhegvx_i(const int* itype, // Usage example: LapackConnector::functionname(parameter list). namespace LapackConnector { - // Transpose the std::complex matrix to the fortran-form real-std::complex array. - static inline - std::complex* transpose(const ModuleBase::ComplexMatrix& a, const int n, const int lda) - { - std::complex* aux = new std::complex[lda*n]; - for (int i = 0; i < n; ++i) +// Transpose the std::complex matrix to the fortran-form real-std::complex array. +static inline std::complex* + transpose (const ModuleBase::ComplexMatrix& a, const int n, const int lda) +{ + std::complex* aux = new std::complex[lda * n]; + for (int i = 0; i < n; ++i) { for (int j = 0; j < lda; ++j) - { - aux[i*lda+j] = a(j,i); // aux[i*lda+j] means aux[i][j] in semantic, not in syntax! - } + { + aux[i * lda + j] = a (j, i); // aux[i*lda+j] means aux[i][j] in semantic, not in syntax! + } } - return aux; - } - - static inline - std::complex* transpose(const std::complex* a, const int n, const int lda, const int nbase_x) - { - std::complex* aux = new std::complex[lda*n]; - for (int i = 0; i < n; ++i) + return aux; +} + +static inline std::complex* + transpose (const std::complex* a, const int n, const int lda, const int nbase_x) +{ + std::complex* aux = new std::complex[lda * n]; + for (int i = 0; i < n; ++i) { for (int j = 0; j < lda; ++j) - { - aux[j * n + i] = a[i * nbase_x + j]; - } + { + aux[j * n + i] = a[i * nbase_x + j]; + } } - return aux; - } - - static inline - std::complex* transpose(const std::complex* a, const int n, const int lda, const int nbase_x) - { - std::complex* aux = new std::complex[lda*n]; - for (int i = 0; i < n; ++i) + return aux; +} + +static inline std::complex* + transpose (const std::complex* a, const int n, const int lda, const int nbase_x) +{ + std::complex* aux = new std::complex[lda * n]; + for (int i = 0; i < n; ++i) { for (int j = 0; j < lda; ++j) - { - aux[j * n + i] = a[i * nbase_x + j]; - } + { + aux[j * n + i] = a[i * nbase_x + j]; + } } - return aux; - } + return aux; +} - // Transpose the fortran-form real-std::complex array to the std::complex matrix. - static inline - void transpose(const std::complex* aux, ModuleBase::ComplexMatrix& a, const int n, const int lda) - { - for (int i = 0; i < n; ++i) +// Transpose the fortran-form real-std::complex array to the std::complex matrix. +static inline void + transpose (const std::complex* aux, ModuleBase::ComplexMatrix& a, const int n, const int lda) +{ + for (int i = 0; i < n; ++i) { for (int j = 0; j < lda; ++j) - { - a(j, i) = aux[i*lda+j]; // aux[i*lda+j] means aux[i][j] in semantic, not in syntax! - } + { + a (j, i) = aux[i * lda + j]; // aux[i*lda+j] means aux[i][j] in semantic, not in syntax! + } } - } +} - // Transpose the fortran-form real-std::complex array to the std::complex matrix. - static inline - void transpose(const std::complex* aux, std::complex* a, const int n, const int lda, const int nbase_x) - { - for (int i = 0; i < n; ++i) +// Transpose the fortran-form real-std::complex array to the std::complex matrix. +static inline void + transpose (const std::complex* aux, std::complex* a, const int n, const int lda, const int nbase_x) +{ + for (int i = 0; i < n; ++i) { for (int j = 0; j < lda; ++j) - { - a[j * nbase_x + i] = aux[i * lda + j]; // aux[i*lda+j] means aux[i][j] in semantic, not in syntax! - } + { + a[j * nbase_x + i] = aux[i * lda + j]; // aux[i*lda+j] means aux[i][j] in semantic, not in syntax! + } } - } +} - // Transpose the fortran-form real-std::complex array to the std::complex matrix. - static inline - void transpose(const std::complex* aux, std::complex* a, const int n, const int lda, const int nbase_x) - { - for (int i = 0; i < n; ++i) +// Transpose the fortran-form real-std::complex array to the std::complex matrix. +static inline void + transpose (const std::complex* aux, std::complex* a, const int n, const int lda, const int nbase_x) +{ + for (int i = 0; i < n; ++i) { for (int j = 0; j < lda; ++j) - { - a[j * nbase_x + i] = aux[i * lda + j]; // aux[i*lda+j] means aux[i][j] in semantic, not in syntax! - } + { + a[j * nbase_x + i] = aux[i * lda + j]; // aux[i*lda+j] means aux[i][j] in semantic, not in syntax! + } + } +} + +// Peize Lin add 2015-12-27 +static inline char + change_uplo (const char& uplo) +{ + switch (uplo) + { + case 'U': + return 'L'; + case 'L': + return 'U'; + default: + throw std::invalid_argument ("uplo must be 'U' or 'L'"); + } +} + +// Peize Lin add 2019-04-14 +static inline char + change_trans_NC (const char& trans) +{ + switch (trans) + { + case 'N': + return 'C'; + case 'C': + return 'N'; + default: + throw std::invalid_argument ("trans must be 'N' or 'C'"); } - } - - // Peize Lin add 2015-12-27 - static inline - char change_uplo(const char &uplo) - { - switch(uplo) - { - case 'U': return 'L'; - case 'L': return 'U'; - default: throw std::invalid_argument("uplo must be 'U' or 'L'"); - } - } - - // Peize Lin add 2019-04-14 - static inline - char change_trans_NC(const char &trans) - { - switch(trans) - { - case 'N': return 'C'; - case 'C': return 'N'; - default: throw std::invalid_argument("trans must be 'N' or 'C'"); - } - } - - // wrap function of fortran lapack routine zheev. - static inline - void zheev( const char jobz, - const char uplo, - const int n, - ModuleBase::ComplexMatrix& a, - const int lda, - double* w, - std::complex< double >* work, - const int lwork, - double* rwork, - int *info ) - { // Transpose the std::complex matrix to the fortran-form real-std::complex array. - std::complex *aux = LapackConnector::transpose(a, n, lda); - // call the fortran routine - zheev_(&jobz, &uplo, &n, aux, &lda, w, work, &lwork, rwork, info); - // Transpose the fortran-form real-std::complex array to the std::complex matrix. - LapackConnector::transpose(aux, a, n, lda); - // free the memory. - delete[] aux; - } - - static inline - void zgetrf(int m, int n, ModuleBase::ComplexMatrix &a, const int lda, int *ipiv, int *info) - { - std::complex *aux = LapackConnector::transpose(a, n, lda); - zgetrf_( &m, &n, aux, &lda, ipiv, info); - LapackConnector::transpose(aux, a, n, lda); - delete[] aux; - return; - } - static inline - void zgetri(int n, ModuleBase::ComplexMatrix &a, int lda, int *ipiv, std::complex * work, int lwork, int *info) - { - std::complex *aux = LapackConnector::transpose(a, n, lda); - zgetri_( &n, aux, &lda, ipiv, work, &lwork, info); - LapackConnector::transpose(aux, a, n, lda); - delete[] aux; - return; - } - - // Peize Lin add 2016-07-09 - static inline - void potrf( const char &uplo, const int &n, float*const A, const int &lda, int &info ) - { - const char uplo_changed = change_uplo(uplo); - spotrf_( &uplo_changed, &n, A, &lda, &info ); - } - static inline - void potrf( const char &uplo, const int &n, double*const A, const int &lda, int &info ) - { - const char uplo_changed = change_uplo(uplo); - dpotrf_( &uplo_changed, &n, A, &lda, &info ); - } - static inline - void potrf( const char &uplo, const int &n, std::complex*const A, const int &lda, int &info ) - { - const char uplo_changed = change_uplo(uplo); - cpotrf_( &uplo_changed, &n, A, &lda, &info ); - } - static inline - void potrf( const char &uplo, const int &n, std::complex*const A, const int &lda, int &info ) - { - const char uplo_changed = change_uplo(uplo); - zpotrf_( &uplo_changed, &n, A, &lda, &info ); - } - - - // Peize Lin add 2016-07-09 - static inline - void potri( const char &uplo, const int &n, float*const A, const int &lda, int &info ) - { - const char uplo_changed = change_uplo(uplo); - spotri_( &uplo_changed, &n, A, &lda, &info); - } - static inline - void potri( const char &uplo, const int &n, double*const A, const int &lda, int &info ) - { - const char uplo_changed = change_uplo(uplo); - dpotri_( &uplo_changed, &n, A, &lda, &info); - } - static inline - void potri( const char &uplo, const int &n, std::complex*const A, const int &lda, int &info ) - { - const char uplo_changed = change_uplo(uplo); - cpotri_( &uplo_changed, &n, A, &lda, &info); - } - static inline - void potri( const char &uplo, const int &n, std::complex*const A, const int &lda, int &info ) - { - const char uplo_changed = change_uplo(uplo); - zpotri_( &uplo_changed, &n, A, &lda, &info); - } - - // Peize Lin add 2016-07-09 - static inline - void potrf( const char &uplo, const int &n, ModuleBase::matrix &A, const int &lda, int &info ) - { - potrf( uplo, n, A.c, lda, info ); - } - static inline - void potrf( const char &uplo, const int &n, ModuleBase::ComplexMatrix &A, const int &lda, int &info ) - { - potrf( uplo, n, A.c, lda, info ); - } - - // Peize Lin add 2016-07-09 - static inline - void potri( const char &uplo, const int &n, ModuleBase::matrix &A, const int &lda, int &info ) - { - potri( uplo, n, A.c, lda, info); - } - static inline - void potri( const char &uplo, const int &n, ModuleBase::ComplexMatrix &A, const int &lda, int &info ) - { - potri( uplo, n, A.c, lda, info); - } - - // Peize Lin add 2019-04-14 - // if trans=='N': C = a * A * A.H + b * C - // if trans=='C': C = a * A.H * A + b * C - static inline - void herk(const char uplo, const char trans, const int n, const int k, - const double alpha, const std::complex *A, const int lda, - const double beta, std::complex *C, const int ldc) - { - const char uplo_changed = change_uplo(uplo); - const char trans_changed = change_trans_NC(trans); - zherk_(&uplo_changed, &trans_changed, &n, &k, &alpha, A, &lda, &beta, C, &ldc); - } - static inline - void herk(const char uplo, const char trans, const int n, const int k, - const float alpha, const std::complex* A, const int lda, - const float beta, std::complex* C, const int ldc) - { - const char uplo_changed = change_uplo(uplo); - const char trans_changed = change_trans_NC(trans); - cherk_(&uplo_changed, &trans_changed, &n, &k, &alpha, A, &lda, &beta, C, &ldc); - } +} + +// wrap function of fortran lapack routine zheev. +static inline void + zheev (const char jobz, + const char uplo, + const int n, + ModuleBase::ComplexMatrix& a, + const int lda, + double* w, + std::complex* work, + const int lwork, + double* rwork, + int* info) +{ // Transpose the std::complex matrix to the fortran-form real-std::complex array. + std::complex* aux = LapackConnector::transpose (a, n, lda); + // call the fortran routine + zheev_ (&jobz, &uplo, &n, aux, &lda, w, work, &lwork, rwork, info); + // Transpose the fortran-form real-std::complex array to the std::complex matrix. + LapackConnector::transpose (aux, a, n, lda); + // free the memory. + delete[] aux; +} + +static inline void + zgetrf (int m, int n, ModuleBase::ComplexMatrix& a, const int lda, int* ipiv, int* info) +{ + std::complex* aux = LapackConnector::transpose (a, n, lda); + zgetrf_ (&m, &n, aux, &lda, ipiv, info); + LapackConnector::transpose (aux, a, n, lda); + delete[] aux; + return; +} +static inline void + zgetri (int n, ModuleBase::ComplexMatrix& a, int lda, int* ipiv, std::complex* work, int lwork, int* info) +{ + std::complex* aux = LapackConnector::transpose (a, n, lda); + zgetri_ (&n, aux, &lda, ipiv, work, &lwork, info); + LapackConnector::transpose (aux, a, n, lda); + delete[] aux; + return; +} + +// Peize Lin add 2016-07-09 +static inline void + potrf (const char& uplo, const int& n, float* const A, const int& lda, int& info) +{ + const char uplo_changed = change_uplo (uplo); + spotrf_ (&uplo_changed, &n, A, &lda, &info); +} +static inline void + potrf (const char& uplo, const int& n, double* const A, const int& lda, int& info) +{ + const char uplo_changed = change_uplo (uplo); + dpotrf_ (&uplo_changed, &n, A, &lda, &info); +} +static inline void + potrf (const char& uplo, const int& n, std::complex* const A, const int& lda, int& info) +{ + const char uplo_changed = change_uplo (uplo); + cpotrf_ (&uplo_changed, &n, A, &lda, &info); +} +static inline void + potrf (const char& uplo, const int& n, std::complex* const A, const int& lda, int& info) +{ + const char uplo_changed = change_uplo (uplo); + zpotrf_ (&uplo_changed, &n, A, &lda, &info); +} + +// Peize Lin add 2016-07-09 +static inline void + potri (const char& uplo, const int& n, float* const A, const int& lda, int& info) +{ + const char uplo_changed = change_uplo (uplo); + spotri_ (&uplo_changed, &n, A, &lda, &info); +} +static inline void + potri (const char& uplo, const int& n, double* const A, const int& lda, int& info) +{ + const char uplo_changed = change_uplo (uplo); + dpotri_ (&uplo_changed, &n, A, &lda, &info); +} +static inline void + potri (const char& uplo, const int& n, std::complex* const A, const int& lda, int& info) +{ + const char uplo_changed = change_uplo (uplo); + cpotri_ (&uplo_changed, &n, A, &lda, &info); +} +static inline void + potri (const char& uplo, const int& n, std::complex* const A, const int& lda, int& info) +{ + const char uplo_changed = change_uplo (uplo); + zpotri_ (&uplo_changed, &n, A, &lda, &info); +} + +// Peize Lin add 2016-07-09 +static inline void + potrf (const char& uplo, const int& n, ModuleBase::matrix& A, const int& lda, int& info) +{ + potrf (uplo, n, A.c, lda, info); +} +static inline void + potrf (const char& uplo, const int& n, ModuleBase::ComplexMatrix& A, const int& lda, int& info) +{ + potrf (uplo, n, A.c, lda, info); +} + +// Peize Lin add 2016-07-09 +static inline void + potri (const char& uplo, const int& n, ModuleBase::matrix& A, const int& lda, int& info) +{ + potri (uplo, n, A.c, lda, info); +} +static inline void + potri (const char& uplo, const int& n, ModuleBase::ComplexMatrix& A, const int& lda, int& info) +{ + potri (uplo, n, A.c, lda, info); +} + +// Peize Lin add 2019-04-14 +// if trans=='N': C = a * A * A.H + b * C +// if trans=='C': C = a * A.H * A + b * C +static inline void + herk (const char uplo, + const char trans, + const int n, + const int k, + const double alpha, + const std::complex* A, + const int lda, + const double beta, + std::complex* C, + const int ldc) +{ + const char uplo_changed = change_uplo (uplo); + const char trans_changed = change_trans_NC (trans); + zherk_ (&uplo_changed, &trans_changed, &n, &k, &alpha, A, &lda, &beta, C, &ldc); +} +static inline void + herk (const char uplo, + const char trans, + const int n, + const int k, + const float alpha, + const std::complex* A, + const int lda, + const float beta, + std::complex* C, + const int ldc) +{ + const char uplo_changed = change_uplo (uplo); + const char trans_changed = change_trans_NC (trans); + cherk_ (&uplo_changed, &trans_changed, &n, &k, &alpha, A, &lda, &beta, C, &ldc); +} } // namespace LapackConnector -#endif // LAPACK_CONNECTOR_HPP +#endif // LAPACK_CONNECTOR_HPP diff --git a/source/source_base/module_external/scalapack_connector.h b/source/source_base/module_external/scalapack_connector.h index 25eb3db59fa..4b0b6035094 100644 --- a/source/source_base/module_external/scalapack_connector.h +++ b/source/source_base/module_external/scalapack_connector.h @@ -7,463 +7,950 @@ extern "C" { - int numroc_( const int *n, const int *nb, const int *iproc, const int *srcproc, const int *nprocs ); - void descinit_( - int *desc, - const int *m, const int *n, const int *mb, const int *nb, const int *irsrc, const int *icsrc, - const int *ictxt, const int *lld, int *info); - - void pddot_(int* n, double* dot, double* x, int* ix, int* jx, int* descx, int* incx, - double* y, int* iy, int* jy, int* descy, int* incy); - void pzdotc_(int* n, std::complex* dot, std::complex* x, int* ix, int* jx, int* descx, int* incx, - std::complex* y, int* iy, int* jy, int* descy, int* incy); - - void pdpotrf_(char *uplo, int *n, double *a, int *ia, int *ja, int *desca, int *info); -// void pzpotrf_(char *uplo, int *n, double _Complex *a, int *ia, int *ja, int *desca, int *info); - void pzpotrf_(char *uplo, int *n, std::complex *a, int *ia, int *ja, int *desca, int *info); - - void pdtran_(const int* m, const int* n, - const double* alpha, const double* a, const int* ia, const int* ja, const int* desca, - const double* beta, double* c, const int* ic, const int* jc, const int* descc); - - void pztranu_(const int *m,const int*n, - const std::complex* alpha, const std::complex* a, const int* ia, const int* ja, const int* desca, - const std::complex *beta , std::complex *c , const int *ic ,const int *jc ,const int *descc); - - void pzgemv_( - const char *transa, - const int *M, const int *N, - const double *alpha, - const std::complex *A, const int *IA, const int *JA, const int *DESCA, - const std::complex *B, const int *IB, const int *JB, const int *DESCB, const int *K, - const double *beta, std::complex *C, const int *IC, const int *JC, const int *DESCC,const int *L); - void pdgemv_( - const char *transa, - const int *M, const int *N, - const double *alpha, - const double *A, const int *IA, const int *JA, const int *DESCA, - const double *B, const int *IB, const int *JB, const int *DESCB, const int *K, - const double *beta, double *C, const int *IC, const int *JC, const int *DESCC,const int *L); - // C = a * A.? * B.? + b * C - void pdgemm_( - const char *transa, const char *transb, - const int *M, const int *N, const int *K, - const double *alpha, - const double *A, const int *IA, const int *JA, const int *DESCA, - const double *B, const int *IB, const int *JB, const int *DESCB, - const double *beta, - double *C, const int *IC, const int *JC, const int *DESCC); - void pzgemm_( - const char *transa, const char *transb, - const int *M, const int *N, const int *K, - const std::complex *alpha, - const std::complex *A, const int *IA, const int *JA, const int *DESCA, - const std::complex *B, const int *IB, const int *JB, const int *DESCB, - const std::complex *beta, - std::complex *C, const int *IC, const int *JC, const int *DESCC); - void pdsymm_(char *side , char *uplo , int *m , int *n , - double *alpha , double *a , int *ia , int *ja , int *desca , - double *b , int *ib , int *jb , int *descb , - double *beta , double *c , int *ic , int *jc , int *descc ); - void pdtrmm_(char *side , char *uplo , char *transa , char *diag , int *m , int *n , - double *alpha , double *a , int *ia , int *ja , int *desca , - double *b , int *ib , int *jb , int *descb ); -// void pztrmm_(char *side , char *uplo , char *transa , char *diag , int *m , int *n , -// double *alpha , double _Complex *a , int *ia , int *ja , int *desca , -// double _Complex *b , int *ib , int *jb , int *descb ); - void pztrmm_(char *side , char *uplo , char *transa , char *diag , int *m , int *n , - std::complex *alpha , std::complex *a , int *ia , int *ja , int *desca , - std::complex *b , int *ib , int *jb , int *descb ); - void pzhemm_(char* side , char* uplo , int* m , int* n , - std::complex* alpha , std::complex* a , int* ia , int* ja , int* desca , - std::complex* b , int* ib , int* jb , int* descb , - std::complex* beta , std::complex* c , int* ic , int* jc , int* descc ); - void pzgetrf_( - const int *M, const int *N, - std::complex *A, const int *IA, const int *JA, const int *DESCA, - int *ipiv, int *info); - - void pzgesv_( - const int *n, const int *nrhs, - const std::complex *A, const int *ia, const int *ja, const int *desca, - int *ipiv, std::complex* B, const int* ib, const int* jb, const int*descb, const int *info - ); - - void pdsygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, double* A, const int* ia, const int* ja, const int*desca, double* B, const int* ib, const int* jb, const int*descb, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, int* m, int* nz, double* w, const double*orfac, double* Z, const int* iz, const int* jz, const int*descz, - double* work, int* lwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info); - - void pzhegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, int* m, int* nz, double* w, const double*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, - std::complex* work, int* lwork, double* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info); - - void pssygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, float* A, const int* ia, const int* ja, const int*desca, float* B, const int* ib, const int* jb, const int*descb, - const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, int* m, int* nz, float* w, const float*orfac, float* Z, const int* iz, const int* jz, const int*descz, - float* work, int* lwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info); - - void pchegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, - const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, int* m, int* nz, float* w, const float*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, - std::complex* work, int* lwork, float* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info); - - - void pzgetri_( - const int *n, - const std::complex *A, const int *ia, const int *ja, const int *desca, - int *ipiv, const std::complex *work, const int *lwork, const int *iwork, const int *liwork, const int *info); - - void pzgeadd_( - const char *transa, - const int *m, const int *n, - const std::complex *alpha, - const std::complex *a, const int *ia, const int *ja, const int *desca, - const std::complex *beta, - const std::complex *c, const int *ic, const int *jc, const int *descc); - - void pztranc_( - const int *M, const int *N, - const std::complex *alpha, - const std::complex *A, const int *IA, const int *JA, const int *DESCA, - const std::complex *beta, - std::complex *C, const int *IC, const int *JC, const int *DESCC); - - void pdgemr2d_(const int *M, const int *N, - double *A, const int *IA, const int *JA, const int *DESCA, - double *B, const int *IB, const int *JB, const int *DESCB, - const int *ICTXT); - - void pzgemr2d_(const int *M, const int *N, - std::complex *A, const int *IA, const int *JA, const int *DESCA, - std::complex *B, const int *IB, const int *JB, const int *DESCB, - const int *ICTXT); - - // Scalapack wrappers to copy 2D blocks of data - // more info: - // https://netlib.org/scalapack/explore-html/da/db5/pigemr_8c.html - // https://netlib.org/scalapack/explore-html/dd/dcd/pdgemr_8c.html - // https://netlib.org/scalapack/explore-html/d5/dd4/pzgemr_8c.html - // https://netlib.org/scalapack/explore-html/d5/deb/psgemr_8c.html - // https://netlib.org/scalapack/explore-html/d4/dad/pcgemr_8c.html - void Cpigemr2d (int m, int n, int *ptrmyblock, int ia, int ja, int *ma, int *ptrmynewblock, int ib, int jb, int *mb, int globcontext); - void Cpdgemr2d (int m, int n, double *ptrmyblock, int ia, int ja, int *ma, double *ptrmynewblock, int ib, int jb, int *mb, int globcontext); - void Cpzgemr2d (int m, int n, std::complex *ptrmyblock, int ia, int ja, int *ma, std::complex *ptrmynewblock, int ib, int jb, int *mb, int globcontext); - void Cpsgemr2d (int m, int n, float *ptrmyblock, int ia, int ja, int *ma, float *ptrmynewblock, int ib, int jb, int *mb, int globcontext); - void Cpcgemr2d (int m, int n, std::complex *ptrmyblock, int ia, int ja, int *ma, std::complex *ptrmynewblock, int ib, int jb, int *mb, int globcontext); + int numroc_ (const int* n, const int* nb, const int* iproc, const int* srcproc, const int* nprocs); + void descinit_ (int* desc, + const int* m, + const int* n, + const int* mb, + const int* nb, + const int* irsrc, + const int* icsrc, + const int* ictxt, + const int* lld, + int* info); + + void pddot_ (int* n, + double* dot, + double* x, + int* ix, + int* jx, + int* descx, + int* incx, + double* y, + int* iy, + int* jy, + int* descy, + int* incy); + void pzdotc_ (int* n, + std::complex* dot, + std::complex* x, + int* ix, + int* jx, + int* descx, + int* incx, + std::complex* y, + int* iy, + int* jy, + int* descy, + int* incy); + + void pdpotrf_ (char* uplo, int* n, double* a, int* ia, int* ja, int* desca, int* info); + // void pzpotrf_(char *uplo, int *n, double _Complex *a, int *ia, int *ja, int *desca, int *info); + void pzpotrf_ (char* uplo, int* n, std::complex* a, int* ia, int* ja, int* desca, int* info); + + void pdtran_ (const int* m, + const int* n, + const double* alpha, + const double* a, + const int* ia, + const int* ja, + const int* desca, + const double* beta, + double* c, + const int* ic, + const int* jc, + const int* descc); + + void pztranu_ (const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* ia, + const int* ja, + const int* desca, + const std::complex* beta, + std::complex* c, + const int* ic, + const int* jc, + const int* descc); + + void pzgemv_ (const char* transa, + const int* M, + const int* N, + const double* alpha, + const std::complex* A, + const int* IA, + const int* JA, + const int* DESCA, + const std::complex* B, + const int* IB, + const int* JB, + const int* DESCB, + const int* K, + const double* beta, + std::complex* C, + const int* IC, + const int* JC, + const int* DESCC, + const int* L); + void pdgemv_ (const char* transa, + const int* M, + const int* N, + const double* alpha, + const double* A, + const int* IA, + const int* JA, + const int* DESCA, + const double* B, + const int* IB, + const int* JB, + const int* DESCB, + const int* K, + const double* beta, + double* C, + const int* IC, + const int* JC, + const int* DESCC, + const int* L); + // C = a * A.? * B.? + b * C + void pdgemm_ (const char* transa, + const char* transb, + const int* M, + const int* N, + const int* K, + const double* alpha, + const double* A, + const int* IA, + const int* JA, + const int* DESCA, + const double* B, + const int* IB, + const int* JB, + const int* DESCB, + const double* beta, + double* C, + const int* IC, + const int* JC, + const int* DESCC); + void pzgemm_ (const char* transa, + const char* transb, + const int* M, + const int* N, + const int* K, + const std::complex* alpha, + const std::complex* A, + const int* IA, + const int* JA, + const int* DESCA, + const std::complex* B, + const int* IB, + const int* JB, + const int* DESCB, + const std::complex* beta, + std::complex* C, + const int* IC, + const int* JC, + const int* DESCC); + void pdsymm_ (char* side, + char* uplo, + int* m, + int* n, + double* alpha, + double* a, + int* ia, + int* ja, + int* desca, + double* b, + int* ib, + int* jb, + int* descb, + double* beta, + double* c, + int* ic, + int* jc, + int* descc); + void pdtrmm_ (char* side, + char* uplo, + char* transa, + char* diag, + int* m, + int* n, + double* alpha, + double* a, + int* ia, + int* ja, + int* desca, + double* b, + int* ib, + int* jb, + int* descb); + // void pztrmm_(char *side , char *uplo , char *transa , char *diag , int *m , int *n , + // double *alpha , double _Complex *a , int *ia , int *ja , int *desca , + // double _Complex *b , int *ib , int *jb , int *descb ); + void pztrmm_ (char* side, + char* uplo, + char* transa, + char* diag, + int* m, + int* n, + std::complex* alpha, + std::complex* a, + int* ia, + int* ja, + int* desca, + std::complex* b, + int* ib, + int* jb, + int* descb); + void pzhemm_ (char* side, + char* uplo, + int* m, + int* n, + std::complex* alpha, + std::complex* a, + int* ia, + int* ja, + int* desca, + std::complex* b, + int* ib, + int* jb, + int* descb, + std::complex* beta, + std::complex* c, + int* ic, + int* jc, + int* descc); + void pzgetrf_ (const int* M, + const int* N, + std::complex* A, + const int* IA, + const int* JA, + const int* DESCA, + int* ipiv, + int* info); + + void pzgesv_ (const int* n, + const int* nrhs, + const std::complex* A, + const int* ia, + const int* ja, + const int* desca, + int* ipiv, + std::complex* B, + const int* ib, + const int* jb, + const int* descb, + const int* info); + + void pdsygvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + double* A, + const int* ia, + const int* ja, + const int* desca, + double* B, + const int* ib, + const int* jb, + const int* descb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + int* nz, + double* w, + const double* orfac, + double* Z, + const int* iz, + const int* jz, + const int* descz, + double* work, + int* lwork, + int* iwork, + int* liwork, + int* ifail, + int* iclustr, + double* gap, + int* info); + + void pzhegvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* A, + const int* ia, + const int* ja, + const int* desca, + std::complex* B, + const int* ib, + const int* jb, + const int* descb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + int* nz, + double* w, + const double* orfac, + std::complex* Z, + const int* iz, + const int* jz, + const int* descz, + std::complex* work, + int* lwork, + double* rwork, + int* lrwork, + int* iwork, + int* liwork, + int* ifail, + int* iclustr, + double* gap, + int* info); + + void pssygvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + float* A, + const int* ia, + const int* ja, + const int* desca, + float* B, + const int* ib, + const int* jb, + const int* descb, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + int* nz, + float* w, + const float* orfac, + float* Z, + const int* iz, + const int* jz, + const int* descz, + float* work, + int* lwork, + int* iwork, + int* liwork, + int* ifail, + int* iclustr, + float* gap, + int* info); + + void pchegvx_ (const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* A, + const int* ia, + const int* ja, + const int* desca, + std::complex* B, + const int* ib, + const int* jb, + const int* descb, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + int* nz, + float* w, + const float* orfac, + std::complex* Z, + const int* iz, + const int* jz, + const int* descz, + std::complex* work, + int* lwork, + float* rwork, + int* lrwork, + int* iwork, + int* liwork, + int* ifail, + int* iclustr, + float* gap, + int* info); + + void pzgetri_ (const int* n, + const std::complex* A, + const int* ia, + const int* ja, + const int* desca, + int* ipiv, + const std::complex* work, + const int* lwork, + const int* iwork, + const int* liwork, + const int* info); + + void pzgeadd_ (const char* transa, + const int* m, + const int* n, + const std::complex* alpha, + const std::complex* a, + const int* ia, + const int* ja, + const int* desca, + const std::complex* beta, + const std::complex* c, + const int* ic, + const int* jc, + const int* descc); + + void pztranc_ (const int* M, + const int* N, + const std::complex* alpha, + const std::complex* A, + const int* IA, + const int* JA, + const int* DESCA, + const std::complex* beta, + std::complex* C, + const int* IC, + const int* JC, + const int* DESCC); + + void pdgemr2d_ (const int* M, + const int* N, + double* A, + const int* IA, + const int* JA, + const int* DESCA, + double* B, + const int* IB, + const int* JB, + const int* DESCB, + const int* ICTXT); + + void pzgemr2d_ (const int* M, + const int* N, + std::complex* A, + const int* IA, + const int* JA, + const int* DESCA, + std::complex* B, + const int* IB, + const int* JB, + const int* DESCB, + const int* ICTXT); + + // Scalapack wrappers to copy 2D blocks of data + // more info: + // https://netlib.org/scalapack/explore-html/da/db5/pigemr_8c.html + // https://netlib.org/scalapack/explore-html/dd/dcd/pdgemr_8c.html + // https://netlib.org/scalapack/explore-html/d5/dd4/pzgemr_8c.html + // https://netlib.org/scalapack/explore-html/d5/deb/psgemr_8c.html + // https://netlib.org/scalapack/explore-html/d4/dad/pcgemr_8c.html + void Cpigemr2d (int m, + int n, + int* ptrmyblock, + int ia, + int ja, + int* ma, + int* ptrmynewblock, + int ib, + int jb, + int* mb, + int globcontext); + void Cpdgemr2d (int m, + int n, + double* ptrmyblock, + int ia, + int ja, + int* ma, + double* ptrmynewblock, + int ib, + int jb, + int* mb, + int globcontext); + void Cpzgemr2d (int m, + int n, + std::complex* ptrmyblock, + int ia, + int ja, + int* ma, + std::complex* ptrmynewblock, + int ib, + int jb, + int* mb, + int globcontext); + void Cpsgemr2d (int m, + int n, + float* ptrmyblock, + int ia, + int ja, + int* ma, + float* ptrmynewblock, + int ib, + int jb, + int* mb, + int globcontext); + void Cpcgemr2d (int m, + int n, + std::complex* ptrmyblock, + int ia, + int ja, + int* ma, + std::complex* ptrmynewblock, + int ib, + int jb, + int* mb, + int globcontext); } - template - struct block2d_data_type - { - constexpr static bool value = std::is_same::value || std::is_same>::value || std::is_same::value || std::is_same>::value || std::is_same::value; - }; - - - /** - * Copies a 2D block of data from matrix A to matrix B using the Scalapack library. - * This function supports different data types: double, std::complex, float, std::complex, and int. - * - * @tparam T The data type of the matrices A and B. - * @param M The number of rows of matrix A. - * @param N The number of columns of matrix A. - * @param A Pointer to the source matrix A. - * @param IA The starting row index of the block in matrix A. - * @param JA The starting column index of the block in matrix A. - * @param DESCA Descriptor array for matrix A. - * @param B Pointer to the destination matrix B. - * @param IB The starting row index of the block in matrix B. - * @param JB The starting column index of the block in matrix B. - * @param DESCB Descriptor array for matrix B. - * @param ICTXT The context identifier. - */ - template - typename std::enable_if::value,void>::type Cpxgemr2d(int M, int N, T *A, int IA, int JA, int *DESCA, T *B, int IB, int JB, int *DESCB, int ICTXT) - { - if (std::is_same::value) Cpdgemr2d(M, N, reinterpret_cast(A),IA, JA, DESCA,reinterpret_cast(B),IB,JB, DESCB,ICTXT); - if (std::is_same>::value) Cpzgemr2d(M, N, reinterpret_cast*>(A),IA, JA, DESCA,reinterpret_cast*>(B),IB,JB, DESCB,ICTXT); - if (std::is_same::value) Cpsgemr2d(M, N, reinterpret_cast(A),IA, JA, DESCA,reinterpret_cast(B),IB,JB, DESCB,ICTXT); - if (std::is_same>::value) Cpcgemr2d(M, N, reinterpret_cast*>(A),IA, JA, DESCA,reinterpret_cast*>(B),IB,JB, DESCB,ICTXT); - if (std::is_same::value) Cpigemr2d(M, N, reinterpret_cast(A),IA, JA, DESCA,reinterpret_cast(B),IB,JB, DESCB,ICTXT); - }; - +template +struct block2d_data_type +{ + constexpr static bool value = std::is_same::value || std::is_same>::value + || std::is_same::value || std::is_same>::value + || std::is_same::value; +}; + +/** + * Copies a 2D block of data from matrix A to matrix B using the Scalapack library. + * This function supports different data types: double, std::complex, float, std::complex, and int. + * + * @tparam T The data type of the matrices A and B. + * @param M The number of rows of matrix A. + * @param N The number of columns of matrix A. + * @param A Pointer to the source matrix A. + * @param IA The starting row index of the block in matrix A. + * @param JA The starting column index of the block in matrix A. + * @param DESCA Descriptor array for matrix A. + * @param B Pointer to the destination matrix B. + * @param IB The starting row index of the block in matrix B. + * @param JB The starting column index of the block in matrix B. + * @param DESCB Descriptor array for matrix B. + * @param ICTXT The context identifier. + */ +template +typename std::enable_if::value, void>::type + Cpxgemr2d (int M, int N, T* A, int IA, int JA, int* DESCA, T* B, int IB, int JB, int* DESCB, int ICTXT) +{ + if (std::is_same::value) + Cpdgemr2d (M, + N, + reinterpret_cast (A), + IA, + JA, + DESCA, + reinterpret_cast (B), + IB, + JB, + DESCB, + ICTXT); + if (std::is_same>::value) + Cpzgemr2d (M, + N, + reinterpret_cast*> (A), + IA, + JA, + DESCA, + reinterpret_cast*> (B), + IB, + JB, + DESCB, + ICTXT); + if (std::is_same::value) + Cpsgemr2d (M, + N, + reinterpret_cast (A), + IA, + JA, + DESCA, + reinterpret_cast (B), + IB, + JB, + DESCB, + ICTXT); + if (std::is_same>::value) + Cpcgemr2d (M, + N, + reinterpret_cast*> (A), + IA, + JA, + DESCA, + reinterpret_cast*> (B), + IB, + JB, + DESCB, + ICTXT); + if (std::is_same::value) + Cpigemr2d (M, N, reinterpret_cast (A), IA, JA, DESCA, reinterpret_cast (B), IB, JB, DESCB, ICTXT); +}; class ScalapackConnector { -public: - static inline - void geadd( - const char transa, - const int m, const int n, - const std::complex alpha, - const std::complex *a, const int ia, const int ja, const int *desca, - const std::complex beta, - const std::complex *c, const int ic, const int jc, const int *descc) - { - pzgeadd_(&transa, &m, &n, &alpha, a, &ia, &ja, desca, &beta, c, &ic, &jc, descc); - } - - static inline - void dot(int n, - double& dot, - double* a, - int ia, - int ja, - int inca, - double* b, - int ib, - int jb, - int incb, - int* desc) - { - pddot_(&n, &dot, a, &ia, &ja, desc, &inca, b, &ib, &jb, desc, &incb); - } - - static inline - void dot(int n, - std::complex& dotc, - std::complex* a, - int ia, - int ja, - int inca, - std::complex* b, - int ib, - int jb, - int incb, - int* desc) - { - pzdotc_(&n, &dotc, a, &ia, &ja, desc, &inca, b, &ib, &jb, desc, &incb); - } - - static inline - void gemm( - const char transa, const char transb, - const int M, const int N, const int K, - const double alpha, - const double* A, const int IA, const int JA, const int* DESCA, - const double* B, const int IB, const int JB, const int* DESCB, - const double beta, - double* C, const int IC, const int JC, const int* DESCC) + public: + static inline void + geadd (const char transa, + const int m, + const int n, + const std::complex alpha, + const std::complex* a, + const int ia, + const int ja, + const int* desca, + const std::complex beta, + const std::complex* c, + const int ic, + const int jc, + const int* descc) + { + pzgeadd_ (&transa, &m, &n, &alpha, a, &ia, &ja, desca, &beta, c, &ic, &jc, descc); + } + + static inline void + dot (int n, double& dot, double* a, int ia, int ja, int inca, double* b, int ib, int jb, int incb, int* desc) + { + pddot_ (&n, &dot, a, &ia, &ja, desc, &inca, b, &ib, &jb, desc, &incb); + } + + static inline void + dot (int n, + std::complex& dotc, + std::complex* a, + int ia, + int ja, + int inca, + std::complex* b, + int ib, + int jb, + int incb, + int* desc) + { + pzdotc_ (&n, &dotc, a, &ia, &ja, desc, &inca, b, &ib, &jb, desc, &incb); + } + + static inline void + gemm (const char transa, + const char transb, + const int M, + const int N, + const int K, + const double alpha, + const double* A, + const int IA, + const int JA, + const int* DESCA, + const double* B, + const int IB, + const int JB, + const int* DESCB, + const double beta, + double* C, + const int IC, + const int JC, + const int* DESCC) + { + pdgemm_ (&transa, + &transb, + &M, + &N, + &K, + &alpha, + A, + &IA, + &JA, + DESCA, + B, + &IB, + &JB, + DESCB, + &beta, + C, + &IC, + &JC, + DESCC); + } + + static inline void + gemm (const char transa, + const char transb, + const int M, + const int N, + const int K, + const std::complex alpha, + const std::complex* A, + const int IA, + const int JA, + const int* DESCA, + const std::complex* B, + const int IB, + const int JB, + const int* DESCB, + const std::complex beta, + std::complex* C, + const int IC, + const int JC, + const int* DESCC) + { + pzgemm_ (&transa, + &transb, + &M, + &N, + &K, + &alpha, + A, + &IA, + &JA, + DESCA, + B, + &IB, + &JB, + DESCB, + &beta, + C, + &IC, + &JC, + DESCC); + } + + static inline void + gemm (char transa, + char transb, + int M, + int N, + int K, + double alpha, + double* A, + double* B, + double beta, + double* C, + int* DESC) + { + int isrc = 1; + pdgemm_ (&transa, + &transb, + &M, + &N, + &K, + &alpha, + A, + &isrc, + &isrc, + DESC, + B, + &isrc, + &isrc, + DESC, + &beta, + C, + &isrc, + &isrc, + DESC); + } + + static inline void + gemm (char transa, + char transb, + int M, + int N, + int K, + std::complex alpha, + std::complex* A, + std::complex* B, + std::complex beta, + std::complex* C, + int* DESC) + { + + int isrc = 1; + pzgemm_ (&transa, + &transb, + &M, + &N, + &K, + &alpha, + A, + &isrc, + &isrc, + DESC, + B, + &isrc, + &isrc, + DESC, + &beta, + C, + &isrc, + &isrc, + DESC); + } + + static inline void + symm (char side, char uplo, int m, int n, double alpha, double* a, double* b, double beta, double* c, int* desc) { - pdgemm_(&transa, &transb, &M, &N, &K, &alpha, A, &IA, &JA, DESCA, - B, &IB, &JB, DESCB, &beta, C, &IC, &JC, DESCC); + int isrc = 1; + pdsymm_ (&side, + &uplo, + &m, + &n, + &alpha, + a, + &isrc, + &isrc, + desc, + b, + &isrc, + &isrc, + desc, + &beta, + c, + &isrc, + &isrc, + desc); } - static inline - void gemm( - const char transa, const char transb, - const int M, const int N, const int K, - const std::complex alpha, - const std::complex *A, const int IA, const int JA, const int *DESCA, - const std::complex *B, const int IB, const int JB, const int *DESCB, - const std::complex beta, - std::complex *C, const int IC, const int JC, const int *DESCC) - { - pzgemm_(&transa, &transb, &M, &N, &K, &alpha, A, &IA, &JA, DESCA, - B, &IB, &JB, DESCB, &beta, C, &IC, &JC, DESCC); - } - - static inline - void gemm(char transa, char transb, int M, int N, int K, - double alpha, - double* A, - double* B, - double beta, - double* C, - int* DESC) - { - int isrc = 1; - pdgemm_(&transa, - &transb, - &M, - &N, - &K, - &alpha, - A, - &isrc, - &isrc, - DESC, - B, - &isrc, - &isrc, - DESC, - &beta, - C, - &isrc, - &isrc, - DESC); - } - - static inline - void gemm(char transa, char transb, int M, int N, int K, - std::complex alpha, - std::complex* A, - std::complex* B, - std::complex beta, - std::complex* C, - int* DESC) - { - - int isrc = 1; - pzgemm_(&transa, - &transb, - &M, - &N, - &K, - &alpha, - A, - &isrc, - &isrc, - DESC, - B, - &isrc, - &isrc, - DESC, - &beta, - C, - &isrc, - &isrc, - DESC); - } - - static inline - void symm(char side, - char uplo, - int m, - int n, - double alpha, - double* a, - double* b, - double beta, - double* c, - int* desc) - { - int isrc = 1; - pdsymm_(&side, &uplo, &m, &n, &alpha, a, &isrc, &isrc, desc, b, &isrc, &isrc, desc, &beta, c, &isrc, &isrc, desc); - } - - static inline - void getrf( - const int M, const int N, - std::complex *A, const int IA, const int JA, const int *DESCA, - int *ipiv, int *info) //fix a bug: info is output and we must use int* - { - pzgetrf_(&M, &N, A, &IA, &JA, DESCA, ipiv, info); - } - - static inline - void getri( - const int n, - const std::complex *A, const int ia, const int ja, const int *desca, int *ipiv, - const std::complex *work, const int *lwork, const int *iwork, const int *liwork, int *info) - { - pzgetri_(&n, A, &ia, &ja, desca, ipiv, work, lwork, iwork, liwork, info); - } - - static inline - void gesv( - const int n, const int nrhs, - const std::complex *A, const int ia, const int ja, const int *desca, - int *ipiv, std::complex* B, const int ib, const int jb, const int*descb, int *info) - { - pzgesv_(&n, &nrhs, A, &ia, &ja, desca, ipiv, B, &ib, &jb, descb, info); - } - - static inline - void tranu( - const int m, const int n, - const std::complex alpha , std::complex *a , const int ia , const int ja , const int *desca, - const std::complex beta , std::complex *c , const int ic , const int jc , const int *descc) - { - pztranu_(&m, &n, &alpha, a, &ia, &ja, desca, &beta, c, &ic, &jc, descc); - } - - static inline - int potrf(char uplo, int na, double* U, int* desc) - { - int isrc = 1; - int info; - pdpotrf_(&uplo, &na, U, &isrc, &isrc, desc, &info); - return info; - } - - static inline - int potrf(char uplo, int na, std::complex* U, int* desc) - { - int isrc = 1; - int info; - pzpotrf_(&uplo, &na, U, &isrc, &isrc, desc, &info); - return info; - } - - static inline - void trmm(char side, - char uplo, - char trans, - char diag, - int m, - int n, - double alpha, - double* a, - double* b, - int* desc) - { - int isrc = 1; - pdtrmm_(&side, &uplo, &trans, &diag, &m, &n, &alpha, a, &isrc, &isrc, desc, b, &isrc, &isrc, desc); - } - - static inline - void trmm(char side, - char uplo, - char trans, - char diag, - int m, - int n, - std::complex alpha, - std::complex* a, - std::complex* b, - int* desc) - { - int isrc = 1; - pztrmm_(&side, &uplo, &trans, &diag, &m, &n, &alpha, a, &isrc, &isrc, desc, b, &isrc, &isrc, desc); - } - - static inline - void hemm(char side, - char uplo, - int na, - std::complex alpha, - std::complex* a, - std::complex* b, - std::complex beta, - std::complex* c, - int* desc) - { - int isrc = 1; - pzhemm_(&side, - &uplo, - &na, - &na, - &alpha, - a, - &isrc, - &isrc, - desc, - b, - &isrc, - &isrc, - desc, - &beta, - c, - &isrc, - &isrc, - desc); - } + static inline void + getrf (const int M, + const int N, + std::complex* A, + const int IA, + const int JA, + const int* DESCA, + int* ipiv, + int* info) // fix a bug: info is output and we must use int* + { + pzgetrf_ (&M, &N, A, &IA, &JA, DESCA, ipiv, info); + } + + static inline void + getri (const int n, + const std::complex* A, + const int ia, + const int ja, + const int* desca, + int* ipiv, + const std::complex* work, + const int* lwork, + const int* iwork, + const int* liwork, + int* info) + { + pzgetri_ (&n, A, &ia, &ja, desca, ipiv, work, lwork, iwork, liwork, info); + } + + static inline void + gesv (const int n, + const int nrhs, + const std::complex* A, + const int ia, + const int ja, + const int* desca, + int* ipiv, + std::complex* B, + const int ib, + const int jb, + const int* descb, + int* info) + { + pzgesv_ (&n, &nrhs, A, &ia, &ja, desca, ipiv, B, &ib, &jb, descb, info); + } + + static inline void + tranu (const int m, + const int n, + const std::complex alpha, + std::complex* a, + const int ia, + const int ja, + const int* desca, + const std::complex beta, + std::complex* c, + const int ic, + const int jc, + const int* descc) + { + pztranu_ (&m, &n, &alpha, a, &ia, &ja, desca, &beta, c, &ic, &jc, descc); + } + + static inline int + potrf (char uplo, int na, double* U, int* desc) + { + int isrc = 1; + int info; + pdpotrf_ (&uplo, &na, U, &isrc, &isrc, desc, &info); + return info; + } + + static inline int + potrf (char uplo, int na, std::complex* U, int* desc) + { + int isrc = 1; + int info; + pzpotrf_ (&uplo, &na, U, &isrc, &isrc, desc, &info); + return info; + } + + static inline void + trmm (char side, char uplo, char trans, char diag, int m, int n, double alpha, double* a, double* b, int* desc) + { + int isrc = 1; + pdtrmm_ (&side, &uplo, &trans, &diag, &m, &n, &alpha, a, &isrc, &isrc, desc, b, &isrc, &isrc, desc); + } + + static inline void + trmm (char side, + char uplo, + char trans, + char diag, + int m, + int n, + std::complex alpha, + std::complex* a, + std::complex* b, + int* desc) + { + int isrc = 1; + pztrmm_ (&side, &uplo, &trans, &diag, &m, &n, &alpha, a, &isrc, &isrc, desc, b, &isrc, &isrc, desc); + } + + static inline void + hemm (char side, + char uplo, + int na, + std::complex alpha, + std::complex* a, + std::complex* b, + std::complex beta, + std::complex* c, + int* desc) + { + int isrc = 1; + pzhemm_ (&side, + &uplo, + &na, + &na, + &alpha, + a, + &isrc, + &isrc, + desc, + b, + &isrc, + &isrc, + desc, + &beta, + c, + &isrc, + &isrc, + desc); + } }; #endif // __MPI diff --git a/source/source_base/module_fft/fft_base.h b/source/source_base/module_fft/fft_base.h index 1fcbc514129..3edcbc4939e 100644 --- a/source/source_base/module_fft/fft_base.h +++ b/source/source_base/module_fft/fft_base.h @@ -8,26 +8,26 @@ template class FFT_BASE { public: - FFT_BASE() {}; - virtual ~FFT_BASE() {}; + FFT_BASE () {}; + virtual ~FFT_BASE () {}; /** * @brief Initialize the fft parameters as virtual function. * * The function is used to initialize the fft parameters. */ - virtual __attribute__((weak)) void initfft(int nx_in, - int ny_in, - int nz_in, - int lixy_in, - int rixy_in, - int ns_in, - int nplane_in, - int nproc_in, - bool gamma_only_in, - bool xprime_in = true); - - virtual __attribute__((weak)) void initfft(int nx_in, int ny_in, int nz_in); + virtual __attribute__ ((weak)) void initfft (int nx_in, + int ny_in, + int nz_in, + int lixy_in, + int rixy_in, + int ns_in, + int nplane_in, + int nproc_in, + bool gamma_only_in, + bool xprime_in = true); + + virtual __attribute__ ((weak)) void initfft (int nx_in, int ny_in, int nz_in); /** * @brief Setup the fft plan and data as pure virtual function. @@ -36,7 +36,7 @@ class FFT_BASE * override the function in the derived class.In the derived * class, the function is used to setup the fft plan and data. */ - virtual void setupFFT() = 0; + virtual void setupFFT () = 0; /** * @brief Clean the fft plan as pure virtual function. @@ -45,7 +45,7 @@ class FFT_BASE * override the function in the derived class.In the derived * class, the function is used to clean the fft plan. */ - virtual void cleanFFT() = 0; + virtual void cleanFFT () = 0; /** * @brief Clear the fft data as pure virtual function. @@ -54,17 +54,17 @@ class FFT_BASE * override the function in the derived class.In the derived * class, the function is used to clear the fft data. */ - virtual void clear() = 0; + virtual void clear () = 0; /** * @brief Allocate and destory the resoure in FFT running time, * Now it only used in the DSP mode. - * + * * The function is set as pure virtual function.In order to * override the function in the derived class.In the derived * class, the function is used to allocate and destory the * resoure in FFT running time. */ - virtual void resource_handler(const int flag) const {}; + virtual void resource_handler (const int flag) const {}; /** * @brief Get the real space data in cpu-like fft * @@ -72,11 +72,11 @@ class FFT_BASE * FFT_BASE is an abstract class,the function will be override, * The attribute weak is used to avoid define the function. */ - virtual __attribute__((weak)) FPTYPE* get_rspace_data() const; + virtual __attribute__ ((weak)) FPTYPE* get_rspace_data () const; - virtual __attribute__((weak)) std::complex* get_auxr_data() const; + virtual __attribute__ ((weak)) std::complex* get_auxr_data () const; - virtual __attribute__((weak)) std::complex* get_auxg_data() const; + virtual __attribute__ ((weak)) std::complex* get_auxg_data () const; /** * @brief Get the auxiliary real space data in 3D @@ -85,7 +85,7 @@ class FFT_BASE * While the FFT_BASE is an abstract class,the function will be override, * The attribute weak is used to avoid define the function. */ - virtual __attribute__((weak)) std::complex* get_auxr_3d_data() const; + virtual __attribute__ ((weak)) std::complex* get_auxr_3d_data () const; // forward fft in x-y direction @@ -100,11 +100,9 @@ class FFT_BASE * determined by the xprime flag).Notably, the Y axis operates in * "many-many-FFT" mode. */ - virtual __attribute__((weak)) void fftxyfor(std::complex* in, - std::complex* out) const; + virtual __attribute__ ((weak)) void fftxyfor (std::complex* in, std::complex* out) const; - virtual __attribute__((weak)) void fftxybac(std::complex* in, - std::complex* out) const; + virtual __attribute__ ((weak)) void fftxybac (std::complex* in, std::complex* out) const; /** * @brief Forward FFT in z direction @@ -115,11 +113,9 @@ class FFT_BASE * It involves only one axis, z. The FFT is applied only once. * Notably, the Z axis operates in many FFT with nz*ns. */ - virtual __attribute__((weak)) void fftzfor(std::complex* in, - std::complex* out) const; + virtual __attribute__ ((weak)) void fftzfor (std::complex* in, std::complex* out) const; - virtual __attribute__((weak)) void fftzbac(std::complex* in, - std::complex* out) const; + virtual __attribute__ ((weak)) void fftzbac (std::complex* in, std::complex* out) const; /** * @brief Forward FFT in x-y direction with real to complex @@ -129,11 +125,9 @@ class FFT_BASE * This function performs the forward FFT in the x-y direction * with real to complex.There is no difference between fftxyfor. */ - virtual __attribute__((weak)) void fftxyr2c(FPTYPE* in, - std::complex* out) const; + virtual __attribute__ ((weak)) void fftxyr2c (FPTYPE* in, std::complex* out) const; - virtual __attribute__((weak)) void fftxyc2r(std::complex* in, - FPTYPE* out) const; + virtual __attribute__ ((weak)) void fftxyc2r (std::complex* in, FPTYPE* out) const; /** * @brief Forward FFT in 3D @@ -144,11 +138,9 @@ class FFT_BASE * It involves three axes, x, y, and z. The FFT is applied multiple times * for fft3D_forward. */ - virtual __attribute__((weak)) void fft3D_forward(std::complex* in, - std::complex* out) const; + virtual __attribute__ ((weak)) void fft3D_forward (std::complex* in, std::complex* out) const; - virtual __attribute__((weak)) void fft3D_backward(std::complex* in, - std::complex* out) const; + virtual __attribute__ ((weak)) void fft3D_backward (std::complex* in, std::complex* out) const; protected: int nx = 0; @@ -156,9 +148,9 @@ class FFT_BASE int nz = 0; }; -template FFT_BASE::FFT_BASE(); -template FFT_BASE::FFT_BASE(); -template FFT_BASE::~FFT_BASE(); -template FFT_BASE::~FFT_BASE(); +template FFT_BASE::FFT_BASE (); +template FFT_BASE::FFT_BASE (); +template FFT_BASE::~FFT_BASE (); +template FFT_BASE::~FFT_BASE (); } // namespace ModuleBase #endif // FFT_BASE_H diff --git a/source/source_base/module_fft/fft_bundle.cpp b/source/source_base/module_fft/fft_bundle.cpp index a1292c34e41..bcf014b6539 100644 --- a/source/source_base/module_fft/fft_bundle.cpp +++ b/source/source_base/module_fft/fft_bundle.cpp @@ -15,24 +15,24 @@ #include "fft_dsp.h" #endif template -std::unique_ptr make_unique(Args&&... args) +std::unique_ptr + make_unique (Args&&... args) { - return std::unique_ptr(new FFT_BASE(std::forward(args)...)); + return std::unique_ptr (new FFT_BASE (std::forward (args)...)); } namespace ModuleBase { -FFT_Bundle::~FFT_Bundle() -{ - this->clear(); -} +FFT_Bundle::~FFT_Bundle () { this->clear (); } -void FFT_Bundle::setfft(std::string device_in, std::string precision_in) +void + FFT_Bundle::setfft (std::string device_in, std::string precision_in) { this->device = device_in; this->precision = precision_in; } -void FFT_Bundle::initfft(int nx_in, +void + FFT_Bundle::initfft (int nx_in, int ny_in, int nz_in, int lixy_in, @@ -44,258 +44,304 @@ void FFT_Bundle::initfft(int nx_in, bool xprime_in, bool mpifft_in) { - assert(this->device == "cpu" || this->device == "gpu" || this->device == "dsp"); - assert(this->precision == "single" || this->precision == "double" || this->precision == "mixing"); + assert (this->device == "cpu" || this->device == "gpu" || this->device == "dsp"); + assert (this->precision == "single" || this->precision == "double" || this->precision == "mixing"); if (this->precision == "single" || this->precision == "mixing") - { - float_flag = true; - if (this->precision == "mixing") + { + float_flag = true; + if (this->precision == "mixing") + { + double_flag = true; + } +#if not defined(__ENABLE_FLOAT_FFTW) + if (this->device == "cpu") + { + ModuleBase::WARNING_QUIT ("FFT_Bundle", "Please enable float fftw in the cmake to use float fft"); + } +#endif + } + else if (this->precision == "double") { double_flag = true; } -#if not defined(__ENABLE_FLOAT_FFTW) - if (this->device == "cpu") + else { - ModuleBase::WARNING_QUIT("FFT_Bundle", "Please enable float fftw in the cmake to use float fft"); + ModuleBase::WARNING_QUIT ("FFT_Bundle", "Please set the precision to single or double or mixing"); } -#endif - } - else if (this->precision == "double") - { - double_flag = true; - }else{ - ModuleBase::WARNING_QUIT("FFT_Bundle", "Please set the precision to single or double or mixing"); - } #if defined(__DSP) if (device == "dsp") - { - if (float_flag) { - ModuleBase::WARNING_QUIT("device", "now dsp fft is not supported for the float type"); + if (float_flag) + { + ModuleBase::WARNING_QUIT ("device", "now dsp fft is not supported for the float type"); + } + auto dsp_fft = make_unique> (); + dsp_fft->cluster_id = this->dsp_cluster_id_; + fft_double = std::move (dsp_fft); + fft_double->initfft (nx_in, ny_in, nz_in); } - auto dsp_fft = make_unique>(); - dsp_fft->cluster_id = this->dsp_cluster_id_; - fft_double = std::move(dsp_fft); - fft_double->initfft(nx_in, ny_in, nz_in); - }else + else #endif - if (device == "cpu") - { - if (float_flag) + if (device == "cpu") { - fft_float = make_unique>(this->fft_mode); - fft_float - ->initfft(nx_in, ny_in, nz_in, lixy_in, rixy_in, ns_in, nplane_in, nproc_in, gamma_only_in, xprime_in); + if (float_flag) + { + fft_float = make_unique> (this->fft_mode); + fft_float->initfft (nx_in, + ny_in, + nz_in, + lixy_in, + rixy_in, + ns_in, + nplane_in, + nproc_in, + gamma_only_in, + xprime_in); + } + if (double_flag) + { + fft_double = make_unique> (this->fft_mode); + fft_double->initfft (nx_in, + ny_in, + nz_in, + lixy_in, + rixy_in, + ns_in, + nplane_in, + nproc_in, + gamma_only_in, + xprime_in); + } } - if (double_flag) + else if (device == "gpu") { - fft_double = make_unique>(this->fft_mode); - fft_double - ->initfft(nx_in, ny_in, nz_in, lixy_in, rixy_in, ns_in, nplane_in, nproc_in, gamma_only_in, xprime_in); - } - }else if (device == "gpu") - { #if defined(__ROCM) - fft_float = make_unique>(); - fft_float->initfft(nx_in, ny_in, nz_in); - fft_double = make_unique>(); - fft_double->initfft(nx_in, ny_in, nz_in); + fft_float = make_unique> (); + fft_float->initfft (nx_in, ny_in, nz_in); + fft_double = make_unique> (); + fft_double->initfft (nx_in, ny_in, nz_in); #elif defined(__CUDA) - fft_float = make_unique>(); - fft_float->initfft(nx_in, ny_in, nz_in); - fft_double = make_unique>(); - fft_double->initfft(nx_in, ny_in, nz_in); + fft_float = make_unique> (); + fft_float->initfft (nx_in, ny_in, nz_in); + fft_double = make_unique> (); + fft_double->initfft (nx_in, ny_in, nz_in); #endif - }else{ - ModuleBase::WARNING_QUIT("FFT_Bundle", "Please set the device to cpu or gpu or dsp"); - } + } + else + { + ModuleBase::WARNING_QUIT ("FFT_Bundle", "Please set the device to cpu or gpu or dsp"); + } } -void FFT_Bundle::setupFFT() +void + FFT_Bundle::setupFFT () { if (double_flag) - { - fft_double->setupFFT(); - } + { + fft_double->setupFFT (); + } if (float_flag) - { - fft_float->setupFFT(); - } + { + fft_float->setupFFT (); + } } -void FFT_Bundle::clearFFT() +void + FFT_Bundle::clearFFT () { if (double_flag) - { - fft_double->cleanFFT(); - } + { + fft_double->cleanFFT (); + } if (float_flag) - { - fft_float->cleanFFT(); - } + { + fft_float->cleanFFT (); + } } -void FFT_Bundle::clear() +void + FFT_Bundle::clear () { - this->clearFFT(); + this->clearFFT (); if (double_flag) - { - fft_double->clear(); - } + { + fft_double->clear (); + } if (float_flag) - { - fft_float->clear(); - } + { + fft_float->clear (); + } } -void FFT_Bundle::resource_handler(const int flag) const +void + FFT_Bundle::resource_handler (const int flag) const { - if (this->device=="dsp") - { - if (double_flag) - { - fft_double->resource_handler(flag); - } - if (float_flag) + if (this->device == "dsp") { - fft_float->resource_handler(flag); + if (double_flag) + { + fft_double->resource_handler (flag); + } + if (float_flag) + { + fft_float->resource_handler (flag); + } } - } } template <> -void FFT_Bundle::fftxyfor(std::complex* in, std::complex* out) const +void + FFT_Bundle::fftxyfor (std::complex* in, std::complex* out) const { - fft_float->fftxyfor(in, out); + fft_float->fftxyfor (in, out); } template <> -void FFT_Bundle::fftxyfor(std::complex* in, std::complex* out) const +void + FFT_Bundle::fftxyfor (std::complex* in, std::complex* out) const { - fft_double->fftxyfor(in, out); + fft_double->fftxyfor (in, out); } template <> -void FFT_Bundle::fftzfor(std::complex* in, std::complex* out) const +void + FFT_Bundle::fftzfor (std::complex* in, std::complex* out) const { - fft_float->fftzfor(in, out); + fft_float->fftzfor (in, out); } template <> -void FFT_Bundle::fftzfor(std::complex* in, std::complex* out) const +void + FFT_Bundle::fftzfor (std::complex* in, std::complex* out) const { - fft_double->fftzfor(in, out); + fft_double->fftzfor (in, out); } template <> -void FFT_Bundle::fftxybac(std::complex* in, std::complex* out) const +void + FFT_Bundle::fftxybac (std::complex* in, std::complex* out) const { - fft_float->fftxybac(in, out); + fft_float->fftxybac (in, out); } template <> -void FFT_Bundle::fftxybac(std::complex* in, std::complex* out) const +void + FFT_Bundle::fftxybac (std::complex* in, std::complex* out) const { - fft_double->fftxybac(in, out); + fft_double->fftxybac (in, out); } template <> -void FFT_Bundle::fftzbac(std::complex* in, std::complex* out) const +void + FFT_Bundle::fftzbac (std::complex* in, std::complex* out) const { - fft_float->fftzbac(in, out); + fft_float->fftzbac (in, out); } template <> -void FFT_Bundle::fftzbac(std::complex* in, std::complex* out) const +void + FFT_Bundle::fftzbac (std::complex* in, std::complex* out) const { - fft_double->fftzbac(in, out); + fft_double->fftzbac (in, out); } template <> -void FFT_Bundle::fftxyr2c(float* in, std::complex* out) const +void + FFT_Bundle::fftxyr2c (float* in, std::complex* out) const { - fft_float->fftxyr2c(in, out); + fft_float->fftxyr2c (in, out); } template <> -void FFT_Bundle::fftxyr2c(double* in, std::complex* out) const +void + FFT_Bundle::fftxyr2c (double* in, std::complex* out) const { - fft_double->fftxyr2c(in, out); + fft_double->fftxyr2c (in, out); } template <> -void FFT_Bundle::fftxyc2r(std::complex* in, float* out) const +void + FFT_Bundle::fftxyc2r (std::complex* in, float* out) const { - fft_float->fftxyc2r(in, out); + fft_float->fftxyc2r (in, out); } template <> -void FFT_Bundle::fftxyc2r(std::complex* in, double* out) const +void + FFT_Bundle::fftxyc2r (std::complex* in, double* out) const { - fft_double->fftxyc2r(in, out); + fft_double->fftxyc2r (in, out); } template <> -void FFT_Bundle::fft3D_forward(std::complex* in, - std::complex* out) const +void + FFT_Bundle::fft3D_forward (std::complex* in, std::complex* out) const { - fft_float->fft3D_forward(in, out); + fft_float->fft3D_forward (in, out); } template <> -void FFT_Bundle::fft3D_forward(std::complex* in, - std::complex* out) const +void + FFT_Bundle::fft3D_forward (std::complex* in, std::complex* out) const { - fft_double->fft3D_forward(in, out); + fft_double->fft3D_forward (in, out); } template <> -void FFT_Bundle::fft3D_backward(std::complex* in, - std::complex* out) const +void + FFT_Bundle::fft3D_backward (std::complex* in, std::complex* out) const { - fft_float->fft3D_backward(in, out); + fft_float->fft3D_backward (in, out); } template <> -void FFT_Bundle::fft3D_backward(std::complex* in, - std::complex* out) const +void + FFT_Bundle::fft3D_backward (std::complex* in, std::complex* out) const { - fft_double->fft3D_backward(in, out); + fft_double->fft3D_backward (in, out); } // access the real space data template <> -float* FFT_Bundle::get_rspace_data() const +float* + FFT_Bundle::get_rspace_data () const { - return fft_float->get_rspace_data(); + return fft_float->get_rspace_data (); } template <> -double* FFT_Bundle::get_rspace_data() const +double* + FFT_Bundle::get_rspace_data () const { - return fft_double->get_rspace_data(); + return fft_double->get_rspace_data (); } template <> -std::complex* FFT_Bundle::get_auxr_data() const +std::complex* + FFT_Bundle::get_auxr_data () const { - return fft_float->get_auxr_data(); + return fft_float->get_auxr_data (); } template <> -std::complex* FFT_Bundle::get_auxr_data() const +std::complex* + FFT_Bundle::get_auxr_data () const { - return fft_double->get_auxr_data(); + return fft_double->get_auxr_data (); } template <> -std::complex* FFT_Bundle::get_auxg_data() const +std::complex* + FFT_Bundle::get_auxg_data () const { - return fft_float->get_auxg_data(); + return fft_float->get_auxg_data (); } template <> -std::complex* FFT_Bundle::get_auxg_data() const +std::complex* + FFT_Bundle::get_auxg_data () const { - return fft_double->get_auxg_data(); + return fft_double->get_auxg_data (); } template <> -std::complex* FFT_Bundle::get_auxr_3d_data() const +std::complex* + FFT_Bundle::get_auxr_3d_data () const { - return fft_float->get_auxr_3d_data(); + return fft_float->get_auxr_3d_data (); } template <> -std::complex* FFT_Bundle::get_auxr_3d_data() const +std::complex* + FFT_Bundle::get_auxr_3d_data () const { - return fft_double->get_auxr_3d_data(); + return fft_double->get_auxr_3d_data (); } } // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/module_fft/fft_bundle.h b/source/source_base/module_fft/fft_bundle.h index 21e5067a6b2..36ae74c0ff7 100644 --- a/source/source_base/module_fft/fft_bundle.h +++ b/source/source_base/module_fft/fft_bundle.h @@ -10,8 +10,8 @@ namespace ModuleBase class FFT_Bundle { public: - FFT_Bundle() {}; - ~FFT_Bundle(); + FFT_Bundle () {}; + ~FFT_Bundle (); /** * @brief Constructor with device and precision. * @param device_in device type, cpu or gpu. @@ -20,7 +20,7 @@ class FFT_Bundle * the function will check the input device and precision, * and set the device and precision. */ - FFT_Bundle(std::string device_in, std::string precision_in) : device(device_in), precision(precision_in) {}; + FFT_Bundle (std::string device_in, std::string precision_in) : device (device_in), precision (precision_in) {}; /** * @brief Set device and precision. @@ -30,7 +30,7 @@ class FFT_Bundle * the function will check the input device and precision, * and set the device and precision. */ - void setfft(std::string device_in, std::string precision_in); + void setfft (std::string device_in, std::string precision_in); /** * @brief Set the DSP cluster id for the FFT_DSP backend. @@ -38,7 +38,11 @@ class FFT_Bundle * * Caller-injected DSP routing info; only used when device == "dsp". */ - void set_dsp_cluster_id(int id) { this->dsp_cluster_id_ = id; } + void + set_dsp_cluster_id (int id) + { + this->dsp_cluster_id_ = id; + } /** * @brief Initialize the fft parameters. @@ -59,17 +63,17 @@ class FFT_Bundle * the function will initialize the many-fft parameters * Wheatley in cpu or gpu device. */ - void initfft(int nx_in, - int ny_in, - int nz_in, - int lixy_in, - int rixy_in, - int ns_in, - int nplane_in, - int nproc_in, - bool gamma_only_in, - bool xprime_in = true, - bool mpifft_in = false); + void initfft (int nx_in, + int ny_in, + int nz_in, + int lixy_in, + int rixy_in, + int ns_in, + int nplane_in, + int nproc_in, + bool gamma_only_in, + bool xprime_in = true, + bool mpifft_in = false); /** * @brief Initialize the fft mode. @@ -78,18 +82,19 @@ class FFT_Bundle * the function will initialize the fft mode. */ - void initfftmode(int fft_mode_in) + void + initfftmode (int fft_mode_in) { this->fft_mode = fft_mode_in; } - void setupFFT(); + void setupFFT (); - void clearFFT(); + void clearFFT (); - void clear(); + void clear (); - void resource_handler(const int flag) const; + void resource_handler (const int flag) const; /** * @brief Get the real space data. * @return FPTYPE* the real space data. @@ -98,7 +103,7 @@ class FFT_Bundle * which is used in the cpu-like fft. */ template - FPTYPE* get_rspace_data() const; + FPTYPE* get_rspace_data () const; /** * @brief Get the auxr data. * @return std::complex* the auxr data. @@ -107,7 +112,7 @@ class FFT_Bundle * which is used in the cpu-like fft. */ template - std::complex* get_auxr_data() const; + std::complex* get_auxr_data () const; /** * @brief Get the auxg data. * @return std::complex* the auxg data. @@ -116,7 +121,7 @@ class FFT_Bundle * which is used in the cpu-like fft. */ template - std::complex* get_auxg_data() const; + std::complex* get_auxg_data () const; /** * @brief Get the auxr 3d data. * @return std::complex* the auxr 3d data. @@ -125,7 +130,7 @@ class FFT_Bundle * which is used in the gpu-like fft. */ template - std::complex* get_auxr_3d_data() const; + std::complex* get_auxr_3d_data () const; /** * @brief Forward fft in z direction. @@ -138,7 +143,7 @@ class FFT_Bundle * which is used in the cpu-like fft. */ template - void fftzfor(std::complex* in, std::complex* out) const; + void fftzfor (std::complex* in, std::complex* out) const; /** * @brief Forward fft in x-y direction. * @param in input data. @@ -149,7 +154,7 @@ class FFT_Bundle * the function will call the fftxyfor in the accurate fft class. */ template - void fftxyfor(std::complex* in, std::complex* out) const; + void fftxyfor (std::complex* in, std::complex* out) const; /** * @brief Backward fft in z direction. * @param in input data. @@ -160,7 +165,7 @@ class FFT_Bundle * the function will call the fftzbac in the accurate fft class. */ template - void fftzbac(std::complex* in, std::complex* out) const; + void fftzbac (std::complex* in, std::complex* out) const; /** * @brief Backward fft in x-y direction. * @param in input data. @@ -171,7 +176,7 @@ class FFT_Bundle * the function will call the fftxybac in the accurate fft class. */ template - void fftxybac(std::complex* in, std::complex* out) const; + void fftxybac (std::complex* in, std::complex* out) const; /** * @brief Real to complex fft in x-y direction. @@ -183,7 +188,7 @@ class FFT_Bundle * the function will call the fftxyr2c in the accurate fft class. */ template - void fftxyr2c(FPTYPE* in, std::complex* out) const; + void fftxyr2c (FPTYPE* in, std::complex* out) const; /** * @brief Complex to real fft in x-y direction. * @param in input data. @@ -194,12 +199,12 @@ class FFT_Bundle * the function will call the fftxyc2r in the accurate fft class. */ template - void fftxyc2r(std::complex* in, FPTYPE* out) const; + void fftxyc2r (std::complex* in, FPTYPE* out) const; template - void fft3D_forward(std::complex* in, std::complex* out) const; + void fft3D_forward (std::complex* in, std::complex* out) const; template - void fft3D_backward(std::complex* in, std::complex* out) const; + void fft3D_backward (std::complex* in, std::complex* out) const; private: int fft_mode = 0; @@ -212,18 +217,14 @@ class FFT_Bundle std::string precision = "double"; int dsp_cluster_id_ = 0; }; -// Use RAII (Resource Acquisition Is Initialization) to +// Use RAII (Resource Acquisition Is Initialization) to // control the resources used by hthread when setting the DSP struct FFT_Guard - { - const FFT_Bundle& fft_; - FFT_Guard(const FFT_Bundle& fft) : fft_(fft) - {fft_.resource_handler(1);} - ~FFT_Guard() - { - fft_.resource_handler(0); - } - }; +{ + const FFT_Bundle& fft_; + FFT_Guard (const FFT_Bundle& fft) : fft_ (fft) { fft_.resource_handler (1); } + ~FFT_Guard () { fft_.resource_handler (0); } +}; } // namespace ModuleBase #endif // FFT_H diff --git a/source/source_base/module_fft/fft_cpu.cpp b/source/source_base/module_fft/fft_cpu.cpp index f50f6e9e868..de348c9fbc7 100644 --- a/source/source_base/module_fft/fft_cpu.cpp +++ b/source/source_base/module_fft/fft_cpu.cpp @@ -4,15 +4,16 @@ namespace ModuleBase { template -void FFT_CPU::initfft(int nx_in, - int ny_in, - int nz_in, - int lixy_in, - int rixy_in, - int ns_in, - int nplane_in, - int nproc_in, - bool gamma_only_in, +void + FFT_CPU::initfft (int nx_in, + int ny_in, + int nz_in, + int lixy_in, + int rixy_in, + int ns_in, + int nplane_in, + int nproc_in, + bool gamma_only_in, bool xprime_in) { this->gamma_only = gamma_only_in; @@ -20,13 +21,16 @@ void FFT_CPU::initfft(int nx_in, this->fftnx = this->nx = nx_in; this->fftny = this->ny = ny_in; if (this->gamma_only) - { - if (xprime) { - this->fftnx = int(this->nx / 2) + 1; - } else { - this->fftny = int(this->ny / 2) + 1; + { + if (xprime) + { + this->fftnx = int (this->nx / 2) + 1; + } + else + { + this->fftny = int (this->ny / 2) + 1; + } } - } this->nz = nz_in; this->ns = ns_in; this->lixy = lixy_in; @@ -40,57 +44,58 @@ void FFT_CPU::initfft(int nx_in, this->maxgrids = (nsz > nrxx) ? nsz : nrxx; } template <> -void FFT_CPU::setupFFT() +void + FFT_CPU::setupFFT () { - + unsigned int flag = FFTW_ESTIMATE; switch (this->fft_mode) - { - case 0: - flag = FFTW_ESTIMATE; - break; - case 1: - flag = FFTW_MEASURE; - break; - case 2: - flag = FFTW_PATIENT; - break; - case 3: - flag = FFTW_EXHAUSTIVE; - break; - default: - break; - } - z_auxg = (std::complex*)fftw_malloc(sizeof(fftw_complex) * this->maxgrids); - z_auxr = (std::complex*)fftw_malloc(sizeof(fftw_complex) * this->maxgrids); + { + case 0: + flag = FFTW_ESTIMATE; + break; + case 1: + flag = FFTW_MEASURE; + break; + case 2: + flag = FFTW_PATIENT; + break; + case 3: + flag = FFTW_EXHAUSTIVE; + break; + default: + break; + } + z_auxg = (std::complex*)fftw_malloc (sizeof (fftw_complex) * this->maxgrids); + z_auxr = (std::complex*)fftw_malloc (sizeof (fftw_complex) * this->maxgrids); d_rspace = (double*)z_auxg; - this->planzfor = fftw_plan_many_dft(1, - &this->nz, - this->ns, - (fftw_complex*)z_auxg, - &this->nz, - 1, - this->nz, - (fftw_complex*)z_auxg, - &this->nz, - 1, - this->nz, - FFTW_FORWARD, - flag); + this->planzfor = fftw_plan_many_dft (1, + &this->nz, + this->ns, + (fftw_complex*)z_auxg, + &this->nz, + 1, + this->nz, + (fftw_complex*)z_auxg, + &this->nz, + 1, + this->nz, + FFTW_FORWARD, + flag); - this->planzbac = fftw_plan_many_dft(1, - &this->nz, - this->ns, - (fftw_complex*)z_auxg, - &this->nz, - 1, - this->nz, - (fftw_complex*)z_auxg, - &this->nz, - 1, - this->nz, - FFTW_BACKWARD, - flag); + this->planzbac = fftw_plan_many_dft (1, + &this->nz, + this->ns, + (fftw_complex*)z_auxg, + &this->nz, + 1, + this->nz, + (fftw_complex*)z_auxg, + &this->nz, + 1, + this->nz, + FFTW_BACKWARD, + flag); //--------------------------------------------------------- // 2 D - XY @@ -100,377 +105,399 @@ void FFT_CPU::setupFFT() int* embed = nullptr; int npy = this->nplane * this->ny; if (this->xprime) - { - this->planyfor = fftw_plan_many_dft(1, - &this->ny, - this->nplane, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - FFTW_FORWARD, - flag); - this->planybac = fftw_plan_many_dft(1, - &this->ny, - this->nplane, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - FFTW_BACKWARD, - flag); - if (this->gamma_only) - { - this->planxr2c = fftw_plan_many_dft_r2c(1, - &this->nx, - npy, - d_rspace, - embed, - npy, - 1, - (fftw_complex*)z_auxr, - embed, - npy, - 1, - flag); - this->planxc2r = fftw_plan_many_dft_c2r(1, - &this->nx, - npy, - (fftw_complex*)z_auxr, - embed, - npy, - 1, - d_rspace, - embed, - npy, - 1, - flag); - } - else { - this->planxfor1 = fftw_plan_many_dft(1, - &this->nx, - npy, - (fftw_complex*)z_auxr, - embed, - npy, + this->planyfor = fftw_plan_many_dft (1, + &this->ny, + this->nplane, + (fftw_complex*)z_auxr, + embed, + this->nplane, 1, - (fftw_complex*)z_auxr, - embed, - npy, - 1, - FFTW_FORWARD, + (fftw_complex*)z_auxr, + embed, + this->nplane, + 1, + FFTW_FORWARD, flag); - this->planxbac1 = fftw_plan_many_dft(1, - &this->nx, - npy, - (fftw_complex*)z_auxr, - embed, - npy, + this->planybac = fftw_plan_many_dft (1, + &this->ny, + this->nplane, + (fftw_complex*)z_auxr, + embed, + this->nplane, 1, - (fftw_complex*)z_auxr, - embed, - npy, - 1, - FFTW_BACKWARD, - flag); + (fftw_complex*)z_auxr, + embed, + this->nplane, + 1, + FFTW_BACKWARD, + flag); + if (this->gamma_only) + { + this->planxr2c = fftw_plan_many_dft_r2c (1, + &this->nx, + npy, + d_rspace, + embed, + npy, + 1, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + flag); + this->planxc2r = fftw_plan_many_dft_c2r (1, + &this->nx, + npy, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + d_rspace, + embed, + npy, + 1, + flag); + } + else + { + this->planxfor1 = fftw_plan_many_dft (1, + &this->nx, + npy, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + FFTW_FORWARD, + flag); + this->planxbac1 = fftw_plan_many_dft (1, + &this->nx, + npy, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + FFTW_BACKWARD, + flag); + } } - } else - { - this->planxfor1 = fftw_plan_many_dft(1, - &this->nx, - this->nplane * (this->lixy + 1), - (fftw_complex*)z_auxr, - embed, - npy, - 1, - (fftw_complex*)z_auxr, - embed, - npy, - 1, - FFTW_FORWARD, - flag); - this->planxbac1 = fftw_plan_many_dft(1, - &this->nx, - this->nplane * (this->lixy + 1), - (fftw_complex*)z_auxr, - embed, - npy, - 1, - (fftw_complex*)z_auxr, - embed, - npy, - 1, - FFTW_BACKWARD, - flag); - if (this->gamma_only) { - this->planyr2c = fftw_plan_many_dft_r2c(1, - &this->ny, - this->nplane, - d_rspace, - embed, - this->nplane, - 1, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - flag); - this->planyc2r = fftw_plan_many_dft_c2r(1, - &this->ny, - this->nplane, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - d_rspace, - embed, - this->nplane, - 1, - flag); + this->planxfor1 = fftw_plan_many_dft (1, + &this->nx, + this->nplane * (this->lixy + 1), + (fftw_complex*)z_auxr, + embed, + npy, + 1, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + FFTW_FORWARD, + flag); + this->planxbac1 = fftw_plan_many_dft (1, + &this->nx, + this->nplane * (this->lixy + 1), + (fftw_complex*)z_auxr, + embed, + npy, + 1, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + FFTW_BACKWARD, + flag); + if (this->gamma_only) + { + this->planyr2c = fftw_plan_many_dft_r2c (1, + &this->ny, + this->nplane, + d_rspace, + embed, + this->nplane, + 1, + (fftw_complex*)z_auxr, + embed, + this->nplane, + 1, + flag); + this->planyc2r = fftw_plan_many_dft_c2r (1, + &this->ny, + this->nplane, + (fftw_complex*)z_auxr, + embed, + this->nplane, + 1, + d_rspace, + embed, + this->nplane, + 1, + flag); + } + else + { + this->planxfor2 = fftw_plan_many_dft (1, + &this->nx, + this->nplane * (this->ny - this->rixy), + (fftw_complex*)z_auxr, + embed, + npy, + 1, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + FFTW_FORWARD, + flag); + this->planxbac2 = fftw_plan_many_dft (1, + &this->nx, + this->nplane * (this->ny - this->rixy), + (fftw_complex*)z_auxr, + embed, + npy, + 1, + (fftw_complex*)z_auxr, + embed, + npy, + 1, + FFTW_BACKWARD, + flag); + this->planyfor = fftw_plan_many_dft (1, + &this->ny, + this->nplane, + (fftw_complex*)z_auxr, + embed, + this->nplane, + 1, + (fftw_complex*)z_auxr, + embed, + this->nplane, + 1, + FFTW_FORWARD, + flag); + this->planybac = fftw_plan_many_dft (1, + &this->ny, + this->nplane, + (fftw_complex*)z_auxr, + embed, + this->nplane, + 1, + (fftw_complex*)z_auxr, + embed, + this->nplane, + 1, + FFTW_BACKWARD, + flag); + } } - else - { - this->planxfor2 = fftw_plan_many_dft(1, - &this->nx, - this->nplane * (this->ny - this->rixy), - (fftw_complex*)z_auxr, - embed, - npy, - 1, (fftw_complex*)z_auxr, - embed, - npy, - 1, - FFTW_FORWARD, - flag); - this->planxbac2 = fftw_plan_many_dft(1, - &this->nx, - this->nplane * (this->ny - this->rixy), - (fftw_complex*)z_auxr, - embed, - npy, - 1, - (fftw_complex*)z_auxr, - embed, - npy, - 1, - FFTW_BACKWARD, - flag); - this->planyfor = fftw_plan_many_dft(1, - &this->ny, - this->nplane, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - FFTW_FORWARD, - flag); - this->planybac = fftw_plan_many_dft(1, - &this->ny, - this->nplane, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - (fftw_complex*)z_auxr, - embed, - this->nplane, - 1, - FFTW_BACKWARD, - flag); - } - } return; } template <> -void FFT_CPU::clearfft(fftw_plan& plan) +void + FFT_CPU::clearfft (fftw_plan& plan) { if (plan) - { - fftw_destroy_plan(plan); - plan = nullptr; - } + { + fftw_destroy_plan (plan); + plan = nullptr; + } } template <> -void FFT_CPU::cleanFFT() +void + FFT_CPU::cleanFFT () { - clearfft(planzfor); - clearfft(planzbac); - clearfft(planxfor1); - clearfft(planxbac1); - clearfft(planxfor2); - clearfft(planxbac2); - clearfft(planyfor); - clearfft(planybac); - clearfft(planxr2c); - clearfft(planxc2r); - clearfft(planyr2c); - clearfft(planyc2r); + clearfft (planzfor); + clearfft (planzbac); + clearfft (planxfor1); + clearfft (planxbac1); + clearfft (planxfor2); + clearfft (planxbac2); + clearfft (planyfor); + clearfft (planybac); + clearfft (planxr2c); + clearfft (planxc2r); + clearfft (planyr2c); + clearfft (planyc2r); } template <> -void FFT_CPU::clear() +void + FFT_CPU::clear () { - this->cleanFFT(); + this->cleanFFT (); if (z_auxg != nullptr) - { - fftw_free(z_auxg); - z_auxg = nullptr; - } + { + fftw_free (z_auxg); + z_auxg = nullptr; + } if (z_auxr != nullptr) - { - fftw_free(z_auxr); - z_auxr = nullptr; - } + { + fftw_free (z_auxr); + z_auxr = nullptr; + } d_rspace = nullptr; } template <> -void FFT_CPU::fftxyfor(std::complex* in, std::complex* out) const +void + FFT_CPU::fftxyfor (std::complex* in, std::complex* out) const { int npy = this->nplane * this->ny; if (this->xprime) - { - - fftw_execute_dft(this->planxfor1, (fftw_complex*)in, (fftw_complex*)out); - #pragma omp parallel for - for (int i = 0; i < this->lixy + 1; ++i) { - fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); - } - #pragma omp parallel for - for (int i = rixy; i < this->nx; ++i) - { - fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); + + fftw_execute_dft (this->planxfor1, (fftw_complex*)in, (fftw_complex*)out); +#pragma omp parallel for + for (int i = 0; i < this->lixy + 1; ++i) + { + fftw_execute_dft (this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); + } +#pragma omp parallel for + for (int i = rixy; i < this->nx; ++i) + { + fftw_execute_dft (this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); + } } - } else - { - #pragma omp parallel for - for (int i = 0; i < this->nx; ++i) { - fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); +#pragma omp parallel for + for (int i = 0; i < this->nx; ++i) + { + fftw_execute_dft (this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); + } + fftw_execute_dft (this->planxfor1, (fftw_complex*)in, (fftw_complex*)out); + fftw_execute_dft (this->planxfor2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]); } - fftw_execute_dft(this->planxfor1, (fftw_complex*)in, (fftw_complex*)out); - fftw_execute_dft(this->planxfor2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]); - } } template <> -void FFT_CPU::fftxybac(std::complex* in,std::complex* out) const +void + FFT_CPU::fftxybac (std::complex* in, std::complex* out) const { int npy = this->nplane * this->ny; if (this->xprime) - { - #pragma omp parallel for - for (int i = 0; i < this->lixy + 1; ++i) - { - fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); - } - #pragma omp parallel for - for (int i = rixy; i < this->nx; ++i) { - fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); +#pragma omp parallel for + for (int i = 0; i < this->lixy + 1; ++i) + { + fftw_execute_dft (this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); + } +#pragma omp parallel for + for (int i = rixy; i < this->nx; ++i) + { + fftw_execute_dft (this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); + } + fftw_execute_dft (this->planxbac1, (fftw_complex*)in, (fftw_complex*)out); } - fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)out); - } else - { - fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)out); - fftw_execute_dft(this->planxbac2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]); - #pragma omp parallel for - for (int i = 0; i < this->nx; ++i) { - fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); + fftw_execute_dft (this->planxbac1, (fftw_complex*)in, (fftw_complex*)out); + fftw_execute_dft (this->planxbac2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]); +#pragma omp parallel for + for (int i = 0; i < this->nx; ++i) + { + fftw_execute_dft (this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); + } } - } } template <> -void FFT_CPU::fftzfor(std::complex* in, std::complex* out) const +void + FFT_CPU::fftzfor (std::complex* in, std::complex* out) const { - fftw_execute_dft(this->planzfor, (fftw_complex*)in, (fftw_complex*)out); + fftw_execute_dft (this->planzfor, (fftw_complex*)in, (fftw_complex*)out); } template <> -void FFT_CPU::fftzbac(std::complex* in, std::complex* out) const +void + FFT_CPU::fftzbac (std::complex* in, std::complex* out) const { - fftw_execute_dft(this->planzbac, (fftw_complex*)in, (fftw_complex*)out); + fftw_execute_dft (this->planzbac, (fftw_complex*)in, (fftw_complex*)out); } template <> -void FFT_CPU::fftxyr2c(double* in, std::complex* out) const +void + FFT_CPU::fftxyr2c (double* in, std::complex* out) const { int npy = this->nplane * this->ny; if (this->xprime) - { - fftw_execute_dft_r2c(this->planxr2c, in, (fftw_complex*)out); - #pragma omp parallel for - for (int i = 0; i < this->lixy + 1; ++i) { - fftw_execute_dft(this->planyfor, (fftw_complex*)&out[i * npy], (fftw_complex*)&out[i * npy]); + fftw_execute_dft_r2c (this->planxr2c, in, (fftw_complex*)out); +#pragma omp parallel for + for (int i = 0; i < this->lixy + 1; ++i) + { + fftw_execute_dft (this->planyfor, (fftw_complex*)&out[i * npy], (fftw_complex*)&out[i * npy]); + } } - } else - { - #pragma omp parallel for - for (int i = 0; i < this->nx; ++i) { - fftw_execute_dft_r2c(this->planyr2c, &in[i * npy], (fftw_complex*)&out[i * npy]); +#pragma omp parallel for + for (int i = 0; i < this->nx; ++i) + { + fftw_execute_dft_r2c (this->planyr2c, &in[i * npy], (fftw_complex*)&out[i * npy]); + } + fftw_execute_dft (this->planxfor1, (fftw_complex*)out, (fftw_complex*)out); } - fftw_execute_dft(this->planxfor1, (fftw_complex*)out, (fftw_complex*)out); - } } template <> -void FFT_CPU::fftxyc2r(std::complex *in,double *out) const +void + FFT_CPU::fftxyc2r (std::complex* in, double* out) const { int npy = this->nplane * this->ny; if (this->xprime) - { - #pragma omp parallel for - for (int i = 0; i < this->lixy + 1; ++i) { - fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&in[i * npy]); +#pragma omp parallel for + for (int i = 0; i < this->lixy + 1; ++i) + { + fftw_execute_dft (this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&in[i * npy]); + } + fftw_execute_dft_c2r (this->planxc2r, (fftw_complex*)in, out); } - fftw_execute_dft_c2r(this->planxc2r, (fftw_complex*)in, out); - } else - { - fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)in); - #pragma omp parallel for - for (int i = 0; i < this->nx; ++i) { - fftw_execute_dft_c2r(this->planyc2r, (fftw_complex*)&in[i * npy], &out[i * npy]); + fftw_execute_dft (this->planxbac1, (fftw_complex*)in, (fftw_complex*)in); +#pragma omp parallel for + for (int i = 0; i < this->nx; ++i) + { + fftw_execute_dft_c2r (this->planyc2r, (fftw_complex*)&in[i * npy], &out[i * npy]); + } } - } } -template <> double* -FFT_CPU::get_rspace_data() const {return d_rspace;} -template <> std::complex* -FFT_CPU::get_auxr_data() const {return z_auxr;} -template <> std::complex* -FFT_CPU::get_auxg_data() const {return z_auxg;} +template <> +double* + FFT_CPU::get_rspace_data () const +{ + return d_rspace; +} +template <> +std::complex* + FFT_CPU::get_auxr_data () const +{ + return z_auxr; +} +template <> +std::complex* + FFT_CPU::get_auxg_data () const +{ + return z_auxg; +} -template FFT_CPU::FFT_CPU(); -template FFT_CPU::~FFT_CPU(); -template FFT_CPU::FFT_CPU(); -template FFT_CPU::~FFT_CPU(); -} \ No newline at end of file +template FFT_CPU::FFT_CPU (); +template FFT_CPU::~FFT_CPU (); +template FFT_CPU::FFT_CPU (); +template FFT_CPU::~FFT_CPU (); +} // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/module_fft/fft_cpu.h b/source/source_base/module_fft/fft_cpu.h index f33fecd74b8..da7f88588b6 100644 --- a/source/source_base/module_fft/fft_cpu.h +++ b/source/source_base/module_fft/fft_cpu.h @@ -8,10 +8,10 @@ namespace ModuleBase template class FFT_CPU : public FFT_BASE { - public: - FFT_CPU(){}; - FFT_CPU(const int fft_mode_in):fft_mode(fft_mode_in){}; - ~FFT_CPU(){}; + public: + FFT_CPU () {}; + FFT_CPU (const int fft_mode_in) : fft_mode (fft_mode_in) {}; + ~FFT_CPU () {}; /** * @brief Initialize the fft parameters. @@ -29,141 +29,124 @@ class FFT_CPU : public FFT_BASE * @param gamma_only_in whether only gamma point is used. * @param xprime_in whether xprime is used. */ - void initfft(int nx_in, - int ny_in, - int nz_in, - int lixy_in, - int rixy_in, - int ns_in, - int nplane_in, - int nproc_in, - bool gamma_only_in, - bool xprime_in = true) override; - - __attribute__((weak)) - void setupFFT() override; - - // void initplan(const unsigned int& flag = 0); - __attribute__((weak)) - void cleanFFT() override; - - __attribute__((weak)) - void clear() override; + void initfft (int nx_in, + int ny_in, + int nz_in, + int lixy_in, + int rixy_in, + int ns_in, + int nplane_in, + int nproc_in, + bool gamma_only_in, + bool xprime_in = true) override; + + __attribute__ ((weak)) void setupFFT () override; + + // void initplan(const unsigned int& flag = 0); + __attribute__ ((weak)) void cleanFFT () override; + + __attribute__ ((weak)) void clear () override; /** * @brief Get the real space data the CPU FFT. * @return FPTYPE* the real space data. - * + * * the function will return the real space data, * which is used in the CPU fft.Use the weak attribute * to avoid defining float while without flag ENABLE_FLOAT_FFTW. */ - __attribute__((weak)) - FPTYPE* get_rspace_data() const override; + __attribute__ ((weak)) FPTYPE* get_rspace_data () const override; - __attribute__((weak)) - std::complex* get_auxr_data() const override; + __attribute__ ((weak)) std::complex* get_auxr_data () const override; - __attribute__((weak)) - std::complex* get_auxg_data() const override; + __attribute__ ((weak)) std::complex* get_auxg_data () const override; /** * @brief Forward FFT in x-y direction * @param in input data * @param out output data - * + * * The function details can be found in FFT_BASE, * and the function interfaces can be found in FFT_BUNDLE. */ - __attribute__((weak)) - void fftxyfor(std::complex* in, - std::complex* out) const override; - - __attribute__((weak)) - void fftxybac(std::complex* in, - std::complex* out) const override; - - __attribute__((weak)) - void fftzfor(std::complex* in, - std::complex* out) const override; - - __attribute__((weak)) - void fftzbac(std::complex* in, - std::complex* out) const override; - - __attribute__((weak)) - void fftxyr2c(FPTYPE* in, - std::complex* out) const override; - - __attribute__((weak)) - void fftxyc2r(std::complex* in, - FPTYPE* out) const override; - private: - void clearfft(fftw_plan& plan); - void clearfft(fftwf_plan& plan); - - fftw_plan planzfor = NULL; - fftw_plan planzbac = NULL; - fftw_plan planxfor1 = NULL; - fftw_plan planxbac1 = NULL; - fftw_plan planxfor2 = NULL; - fftw_plan planxbac2 = NULL; - fftw_plan planyfor = NULL; - fftw_plan planybac = NULL; - fftw_plan planxr2c = NULL; - fftw_plan planxc2r = NULL; - fftw_plan planyr2c = NULL; - fftw_plan planyc2r = NULL; - - fftwf_plan planfzfor = NULL; - fftwf_plan planfzbac = NULL; - fftwf_plan planfxfor1= NULL; - fftwf_plan planfxbac1= NULL; - fftwf_plan planfxfor2= NULL; - fftwf_plan planfxbac2= NULL; - fftwf_plan planfyfor = NULL; - fftwf_plan planfybac = NULL; - fftwf_plan planfxr2c = NULL; - fftwf_plan planfxc2r = NULL; - fftwf_plan planfyr2c = NULL; - fftwf_plan planfyc2r = NULL; - - std::complex*c_auxg = nullptr; - std::complex*c_auxr = nullptr; // fft space - std::complex*z_auxg = nullptr; - std::complex*z_auxr = nullptr; // fft space - - float* s_rspace = nullptr; // real number space for r, [nplane * nx *ny] - double* d_rspace = nullptr; // real number space for r, [nplane * nx *ny] - int fftnx=0; - int fftny=0; - int fftnxy=0; - int nxy=0; - int nplane=0; - int ns=0; //number of sticks - int nproc=1; // number of proc. - int maxgrids = 0; - bool gamma_only = false; - - /** - * @brief lixy: the left edge of the pw ball in the y direction - */ - int lixy=0; - - /** - * @brief rixy: the right edge of the pw ball in the x or y direction - */ - int rixy=0; - /** - * @brief xprime: whether xprime is used,when do recip2real, x-fft will - * be done last and when doing real2recip, x-fft will be done first; - * false: y-fft For gamma_only, true: we use half x; false: we use half y - */ - bool xprime = true; - /** - * @brief fft_mode: fftw mode 0: estimate, 1: measure, 2: patient, 3: exhaustive - */ - int fft_mode = 0; + __attribute__ ((weak)) void fftxyfor (std::complex* in, std::complex* out) const override; + + __attribute__ ((weak)) void fftxybac (std::complex* in, std::complex* out) const override; + + __attribute__ ((weak)) void fftzfor (std::complex* in, std::complex* out) const override; + + __attribute__ ((weak)) void fftzbac (std::complex* in, std::complex* out) const override; + + __attribute__ ((weak)) void fftxyr2c (FPTYPE* in, std::complex* out) const override; + + __attribute__ ((weak)) void fftxyc2r (std::complex* in, FPTYPE* out) const override; + + private: + void clearfft (fftw_plan& plan); + void clearfft (fftwf_plan& plan); + + fftw_plan planzfor = nullptr; + fftw_plan planzbac = nullptr; + fftw_plan planxfor1 = nullptr; + fftw_plan planxbac1 = nullptr; + fftw_plan planxfor2 = nullptr; + fftw_plan planxbac2 = nullptr; + fftw_plan planyfor = nullptr; + fftw_plan planybac = nullptr; + fftw_plan planxr2c = nullptr; + fftw_plan planxc2r = nullptr; + fftw_plan planyr2c = nullptr; + fftw_plan planyc2r = nullptr; + + fftwf_plan planfzfor = nullptr; + fftwf_plan planfzbac = nullptr; + fftwf_plan planfxfor1 = nullptr; + fftwf_plan planfxbac1 = nullptr; + fftwf_plan planfxfor2 = nullptr; + fftwf_plan planfxbac2 = nullptr; + fftwf_plan planfyfor = nullptr; + fftwf_plan planfybac = nullptr; + fftwf_plan planfxr2c = nullptr; + fftwf_plan planfxc2r = nullptr; + fftwf_plan planfyr2c = nullptr; + fftwf_plan planfyc2r = nullptr; + + std::complex* c_auxg = nullptr; + std::complex* c_auxr = nullptr; // fft space + std::complex* z_auxg = nullptr; + std::complex* z_auxr = nullptr; // fft space + + float* s_rspace = nullptr; // real number space for r, [nplane * nx *ny] + double* d_rspace = nullptr; // real number space for r, [nplane * nx *ny] + int fftnx = 0; + int fftny = 0; + int fftnxy = 0; + int nxy = 0; + int nplane = 0; + int ns = 0; // number of sticks + int nproc = 1; // number of proc. + int maxgrids = 0; + bool gamma_only = false; + + /** + * @brief lixy: the left edge of the pw ball in the y direction + */ + int lixy = 0; + + /** + * @brief rixy: the right edge of the pw ball in the x or y direction + */ + int rixy = 0; + /** + * @brief xprime: whether xprime is used,when do recip2real, x-fft will + * be done last and when doing real2recip, x-fft will be done first; + * false: y-fft For gamma_only, true: we use half x; false: we use half y + */ + bool xprime = true; + /** + * @brief fft_mode: fftw mode 0: estimate, 1: measure, 2: patient, 3: exhaustive + */ + int fft_mode = 0; }; -} +} // namespace ModuleBase #endif // FFT_CPU_H \ No newline at end of file diff --git a/source/source_base/module_fft/fft_cpu_float.cpp b/source/source_base/module_fft/fft_cpu_float.cpp index 9d1265fd795..4858ccf1309 100644 --- a/source/source_base/module_fft/fft_cpu_float.cpp +++ b/source/source_base/module_fft/fft_cpu_float.cpp @@ -3,66 +3,67 @@ namespace ModuleBase { template <> -void FFT_CPU::setupFFT() +void + FFT_CPU::setupFFT () { unsigned int flag = FFTW_ESTIMATE; switch (this->fft_mode) - { - case 0: - flag = FFTW_ESTIMATE; - break; - case 1: - flag = FFTW_MEASURE; - break; - case 2: - flag = FFTW_PATIENT; - break; - case 3: - flag = FFTW_EXHAUSTIVE; - break; - default: - break; - } - c_auxg = (std::complex*)fftwf_malloc(sizeof(fftwf_complex) * this->maxgrids); - c_auxr = (std::complex*)fftwf_malloc(sizeof(fftwf_complex) * this->maxgrids); + { + case 0: + flag = FFTW_ESTIMATE; + break; + case 1: + flag = FFTW_MEASURE; + break; + case 2: + flag = FFTW_PATIENT; + break; + case 3: + flag = FFTW_EXHAUSTIVE; + break; + default: + break; + } + c_auxg = (std::complex*)fftwf_malloc (sizeof (fftwf_complex) * this->maxgrids); + c_auxr = (std::complex*)fftwf_malloc (sizeof (fftwf_complex) * this->maxgrids); s_rspace = (float*)c_auxg; //--------------------------------------------------------- // 1 D //--------------------------------------------------------- - // fftw_plan_many_dft(int rank, + // fftw_plan_many_dft(int rank, // const int *n, int howmany, // fftw_complex *in, const int *inembed, int istride, int idist, - // fftw_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned - //flags); + // fftw_complex *out, const int *onembed, int ostride, int odist, int sign, + // unsigned flags); - this->planfzfor = fftwf_plan_many_dft(1, - &this->nz, - this->ns, - (fftwf_complex*)c_auxg, - &this->nz, - 1, - this->nz, - (fftwf_complex*)c_auxg, - &this->nz, - 1, - this->nz, - FFTW_FORWARD, - flag); + this->planfzfor = fftwf_plan_many_dft (1, + &this->nz, + this->ns, + (fftwf_complex*)c_auxg, + &this->nz, + 1, + this->nz, + (fftwf_complex*)c_auxg, + &this->nz, + 1, + this->nz, + FFTW_FORWARD, + flag); - this->planfzbac = fftwf_plan_many_dft(1, - &this->nz, - this->ns, - (fftwf_complex*)c_auxg, - &this->nz, - 1, - this->nz, - (fftwf_complex*)c_auxg, - &this->nz, - 1, - this->nz, - FFTW_BACKWARD, - flag); + this->planfzbac = fftwf_plan_many_dft (1, + &this->nz, + this->ns, + (fftwf_complex*)c_auxg, + &this->nz, + 1, + this->nz, + (fftwf_complex*)c_auxg, + &this->nz, + 1, + this->nz, + FFTW_BACKWARD, + flag); //--------------------------------------------------------- // 2 D //--------------------------------------------------------- @@ -70,364 +71,389 @@ void FFT_CPU::setupFFT() int* embed = nullptr; int npy = this->nplane * this->ny; if (this->xprime) - { - this->planfyfor = fftwf_plan_many_dft(1, - &this->ny, - this->nplane, - (fftwf_complex*)c_auxr, - embed, - nplane, - 1, - (fftwf_complex*)c_auxr, - embed, - nplane, - 1, - FFTW_FORWARD, - flag); - this->planfybac = fftwf_plan_many_dft(1, - &this->ny, - this->nplane, - (fftwf_complex*)c_auxr, - embed, - nplane, - 1, - (fftwf_complex*)c_auxr, - embed, nplane, - 1, - FFTW_BACKWARD, - flag); - if (this->gamma_only) - { - this->planfxr2c = fftwf_plan_many_dft_r2c(1, - &this->nx, - npy, - s_rspace, - embed, - npy, - 1, - (fftwf_complex*)c_auxr, - embed, npy, - 1, - flag); - this->planfxc2r = fftwf_plan_many_dft_c2r(1, - &this->nx, - npy, - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - s_rspace, - embed, - npy, - 1, - flag); - } - else { - this->planfxfor1 = fftwf_plan_many_dft(1, - &this->nx, - npy, - (fftwf_complex*)c_auxr, - embed, - npy, + this->planfyfor = fftwf_plan_many_dft (1, + &this->ny, + this->nplane, + (fftwf_complex*)c_auxr, + embed, + nplane, + 1, + (fftwf_complex*)c_auxr, + embed, + nplane, 1, - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - FFTW_FORWARD, + FFTW_FORWARD, flag); - this->planfxbac1 = fftwf_plan_many_dft(1, - &this->nx, - npy, - (fftwf_complex*)c_auxr, - embed, - npy, + this->planfybac = fftwf_plan_many_dft (1, + &this->ny, + this->nplane, + (fftwf_complex*)c_auxr, + embed, + nplane, + 1, + (fftwf_complex*)c_auxr, + embed, + nplane, 1, - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - FFTW_BACKWARD, + FFTW_BACKWARD, flag); + if (this->gamma_only) + { + this->planfxr2c = fftwf_plan_many_dft_r2c (1, + &this->nx, + npy, + s_rspace, + embed, + npy, + 1, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + flag); + this->planfxc2r = fftwf_plan_many_dft_c2r (1, + &this->nx, + npy, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + s_rspace, + embed, + npy, + 1, + flag); + } + else + { + this->planfxfor1 = fftwf_plan_many_dft (1, + &this->nx, + npy, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + FFTW_FORWARD, + flag); + this->planfxbac1 = fftwf_plan_many_dft (1, + &this->nx, + npy, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + FFTW_BACKWARD, + flag); + } } - } else - { - this->planfxfor1 = fftwf_plan_many_dft(1, - &this->nx, - this->nplane * (lixy + 1), - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - FFTW_FORWARD, - flag); - this->planfxbac1 = fftwf_plan_many_dft(1, - &this->nx, - this->nplane * (lixy + 1), - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - FFTW_BACKWARD, - flag); - if (this->gamma_only) { - this->planfyr2c = fftwf_plan_many_dft_r2c(1, - &this->ny, - this->nplane, - s_rspace, - embed, - this->nplane, - 1, - (fftwf_complex*)c_auxr, - embed, - this->nplane, - 1, - flag); - this->planfyc2r = fftwf_plan_many_dft_c2r(1, - &this->ny, - this->nplane, - (fftwf_complex*)c_auxr, - embed, - this->nplane, - 1, - s_rspace, - embed, - this->nplane, - 1, - flag); - } - else - { - this->planfxfor2 = fftwf_plan_many_dft(1, - &this->nx, - this->nplane * (this->ny - rixy), - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - FFTW_FORWARD, - flag); - this->planfxbac2 = fftwf_plan_many_dft(1, - &this->nx, - this->nplane * (this->ny - rixy), - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - (fftwf_complex*)c_auxr, - embed, - npy, - 1, - FFTW_BACKWARD, - flag); - this->planfyfor = fftwf_plan_many_dft(1, - &this->ny, - this->nplane, - (fftwf_complex*)c_auxr, - embed, - this->nplane, - 1, - (fftwf_complex*)c_auxr, - embed, - this->nplane, - 1, - FFTW_FORWARD, - flag); - this->planfybac = fftwf_plan_many_dft(1, - &this->ny, - this->nplane, - (fftwf_complex*)c_auxr, - embed, - this->nplane, - 1, - (fftwf_complex*)c_auxr, - embed, - this->nplane, - 1, - FFTW_BACKWARD, - flag); + this->planfxfor1 = fftwf_plan_many_dft (1, + &this->nx, + this->nplane * (lixy + 1), + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + FFTW_FORWARD, + flag); + this->planfxbac1 = fftwf_plan_many_dft (1, + &this->nx, + this->nplane * (lixy + 1), + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + FFTW_BACKWARD, + flag); + if (this->gamma_only) + { + this->planfyr2c = fftwf_plan_many_dft_r2c (1, + &this->ny, + this->nplane, + s_rspace, + embed, + this->nplane, + 1, + (fftwf_complex*)c_auxr, + embed, + this->nplane, + 1, + flag); + this->planfyc2r = fftwf_plan_many_dft_c2r (1, + &this->ny, + this->nplane, + (fftwf_complex*)c_auxr, + embed, + this->nplane, + 1, + s_rspace, + embed, + this->nplane, + 1, + flag); + } + else + { + this->planfxfor2 = fftwf_plan_many_dft (1, + &this->nx, + this->nplane * (this->ny - rixy), + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + FFTW_FORWARD, + flag); + this->planfxbac2 = fftwf_plan_many_dft (1, + &this->nx, + this->nplane * (this->ny - rixy), + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + (fftwf_complex*)c_auxr, + embed, + npy, + 1, + FFTW_BACKWARD, + flag); + this->planfyfor = fftwf_plan_many_dft (1, + &this->ny, + this->nplane, + (fftwf_complex*)c_auxr, + embed, + this->nplane, + 1, + (fftwf_complex*)c_auxr, + embed, + this->nplane, + 1, + FFTW_FORWARD, + flag); + this->planfybac = fftwf_plan_many_dft (1, + &this->ny, + this->nplane, + (fftwf_complex*)c_auxr, + embed, + this->nplane, + 1, + (fftwf_complex*)c_auxr, + embed, + this->nplane, + 1, + FFTW_BACKWARD, + flag); + } } - } return; } template <> -void FFT_CPU::clearfft(fftwf_plan& plan) +void + FFT_CPU::clearfft (fftwf_plan& plan) { if (plan) - { - fftwf_destroy_plan(plan); - plan = nullptr; - } + { + fftwf_destroy_plan (plan); + plan = nullptr; + } } template <> -void FFT_CPU::cleanFFT() +void + FFT_CPU::cleanFFT () { - clearfft(planfzfor); - clearfft(planfzbac); - clearfft(planfxfor1); - clearfft(planfxbac1); - clearfft(planfxfor2); - clearfft(planfxbac2); - clearfft(planfyfor); - clearfft(planfybac); - clearfft(planfxr2c); - clearfft(planfxc2r); - clearfft(planfyr2c); - clearfft(planfyc2r); + clearfft (planfzfor); + clearfft (planfzbac); + clearfft (planfxfor1); + clearfft (planfxbac1); + clearfft (planfxfor2); + clearfft (planfxbac2); + clearfft (planfyfor); + clearfft (planfybac); + clearfft (planfxr2c); + clearfft (planfxc2r); + clearfft (planfyr2c); + clearfft (planfyc2r); } - template <> -void FFT_CPU::clear() +void + FFT_CPU::clear () { - this->cleanFFT(); + this->cleanFFT (); if (c_auxg != nullptr) - { - fftw_free(c_auxg); - c_auxg = nullptr; - } + { + fftw_free (c_auxg); + c_auxg = nullptr; + } if (c_auxr != nullptr) - { - fftw_free(c_auxr); - c_auxr = nullptr; - } + { + fftw_free (c_auxr); + c_auxr = nullptr; + } s_rspace = nullptr; } - template <> -void FFT_CPU::fftxyfor(std::complex* in, std::complex* out) const +void + FFT_CPU::fftxyfor (std::complex* in, std::complex* out) const { int npy = this->nplane * this->ny; if (this->xprime) - { - fftwf_execute_dft(this->planfxfor1, (fftwf_complex*)in, (fftwf_complex*)out); - - for (int i = 0; i < this->lixy + 1; ++i) { - fftwf_execute_dft(this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } - for (int i = rixy; i < this->nx; ++i) - { - fftwf_execute_dft(this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); + fftwf_execute_dft (this->planfxfor1, (fftwf_complex*)in, (fftwf_complex*)out); + + for (int i = 0; i < this->lixy + 1; ++i) + { + fftwf_execute_dft (this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); + } + for (int i = rixy; i < this->nx; ++i) + { + fftwf_execute_dft (this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); + } } - } else - { - for (int i = 0; i < this->nx; ++i) { - fftwf_execute_dft(this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } + for (int i = 0; i < this->nx; ++i) + { + fftwf_execute_dft (this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); + } - fftwf_execute_dft(this->planfxfor1, (fftwf_complex*)in, (fftwf_complex*)out); - fftwf_execute_dft(this->planfxfor2, (fftwf_complex*)&in[rixy * nplane], (fftwf_complex*)&out[rixy * nplane]); - } + fftwf_execute_dft (this->planfxfor1, (fftwf_complex*)in, (fftwf_complex*)out); + fftwf_execute_dft (this->planfxfor2, + (fftwf_complex*)&in[rixy * nplane], + (fftwf_complex*)&out[rixy * nplane]); + } } template <> -void FFT_CPU::fftxybac(std::complex* in,std::complex * out) const +void + FFT_CPU::fftxybac (std::complex* in, std::complex* out) const { int npy = this->nplane * this->ny; if (this->xprime) - { - for (int i = 0; i < this->lixy + 1; ++i) - { - fftwf_execute_dft(this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } - for (int i = rixy; i < this->nx; ++i) { - fftwf_execute_dft(this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } + for (int i = 0; i < this->lixy + 1; ++i) + { + fftwf_execute_dft (this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); + } + for (int i = rixy; i < this->nx; ++i) + { + fftwf_execute_dft (this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); + } - fftwf_execute_dft(this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)out); - } + fftwf_execute_dft (this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)out); + } else - { - fftwf_execute_dft(this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)out); - fftwf_execute_dft(this->planfxbac2, (fftwf_complex*)&in[rixy * nplane], (fftwf_complex*)&out[rixy * nplane]); - - for (int i = 0; i < this->nx; ++i) { - fftwf_execute_dft(this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); + fftwf_execute_dft (this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)out); + fftwf_execute_dft (this->planfxbac2, + (fftwf_complex*)&in[rixy * nplane], + (fftwf_complex*)&out[rixy * nplane]); + + for (int i = 0; i < this->nx; ++i) + { + fftwf_execute_dft (this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); + } } - } } template <> -void FFT_CPU::fftzfor(std::complex* in, std::complex* out) const +void + FFT_CPU::fftzfor (std::complex* in, std::complex* out) const { - fftwf_execute_dft(this->planfzfor, (fftwf_complex*)in, (fftwf_complex*)out); + fftwf_execute_dft (this->planfzfor, (fftwf_complex*)in, (fftwf_complex*)out); } template <> -void FFT_CPU::fftzbac(std::complex* in, std::complex* out) const +void + FFT_CPU::fftzbac (std::complex* in, std::complex* out) const { - fftwf_execute_dft(this->planfzbac, (fftwf_complex*)in, (fftwf_complex*)out); + fftwf_execute_dft (this->planfzbac, (fftwf_complex*)in, (fftwf_complex*)out); } template <> -void FFT_CPU::fftxyr2c(float* in, std::complex* out) const +void + FFT_CPU::fftxyr2c (float* in, std::complex* out) const { int npy = this->nplane * this->ny; if (this->xprime) - { - fftwf_execute_dft_r2c(this->planfxr2c, in, (fftwf_complex*)out); - - for (int i = 0; i < this->lixy + 1; ++i) { - fftwf_execute_dft(this->planfyfor, (fftwf_complex*)&out[i * npy], (fftwf_complex*)&out[i * npy]); + fftwf_execute_dft_r2c (this->planfxr2c, in, (fftwf_complex*)out); + + for (int i = 0; i < this->lixy + 1; ++i) + { + fftwf_execute_dft (this->planfyfor, (fftwf_complex*)&out[i * npy], (fftwf_complex*)&out[i * npy]); + } } - } else - { - for (int i = 0; i < this->nx; ++i) { - fftwf_execute_dft_r2c(this->planfyr2c, &in[i * npy], (fftwf_complex*)&out[i * npy]); - } + for (int i = 0; i < this->nx; ++i) + { + fftwf_execute_dft_r2c (this->planfyr2c, &in[i * npy], (fftwf_complex*)&out[i * npy]); + } - fftwf_execute_dft(this->planfxfor1, (fftwf_complex*)out, (fftwf_complex*)out); - } + fftwf_execute_dft (this->planfxfor1, (fftwf_complex*)out, (fftwf_complex*)out); + } } template <> -void FFT_CPU::fftxyc2r(std::complex* in, float* out) const +void + FFT_CPU::fftxyc2r (std::complex* in, float* out) const { int npy = this->nplane * this->ny; if (this->xprime) - { - for (int i = 0; i < this->lixy + 1; ++i) { - fftwf_execute_dft(this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&in[i * npy]); - } + for (int i = 0; i < this->lixy + 1; ++i) + { + fftwf_execute_dft (this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&in[i * npy]); + } - fftwf_execute_dft_c2r(this->planfxc2r, (fftwf_complex*)in, out); - } + fftwf_execute_dft_c2r (this->planfxc2r, (fftwf_complex*)in, out); + } else - { - fftwf_execute_dft(this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)in); - - for (int i = 0; i < this->nx; ++i) { - fftwf_execute_dft_c2r(this->planfyc2r, (fftwf_complex*)&in[i * npy], &out[i * npy]); + fftwf_execute_dft (this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)in); + + for (int i = 0; i < this->nx; ++i) + { + fftwf_execute_dft_c2r (this->planfyc2r, (fftwf_complex*)&in[i * npy], &out[i * npy]); + } } - } } -template <> float* -FFT_CPU::get_rspace_data() const {return s_rspace;} -template <> std::complex* -FFT_CPU::get_auxr_data() const {return c_auxr;} -template <> std::complex* -FFT_CPU::get_auxg_data() const {return c_auxg;} -} \ No newline at end of file +template <> +float* + FFT_CPU::get_rspace_data () const +{ + return s_rspace; +} +template <> +std::complex* + FFT_CPU::get_auxr_data () const +{ + return c_auxr; +} +template <> +std::complex* + FFT_CPU::get_auxg_data () const +{ + return c_auxg; +} +} // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/module_fft/fft_cuda.cpp b/source/source_base/module_fft/fft_cuda.cpp index e33fd203118..4ba86093e66 100644 --- a/source/source_base/module_fft/fft_cuda.cpp +++ b/source/source_base/module_fft/fft_cuda.cpp @@ -5,109 +5,122 @@ namespace ModuleBase { template -void FFT_CUDA::initfft(int nx_in, int ny_in, int nz_in) +void + FFT_CUDA::initfft (int nx_in, int ny_in, int nz_in) { this->nx = nx_in; this->ny = ny_in; this->nz = nz_in; } template <> -void FFT_CUDA::setupFFT() +void + FFT_CUDA::setupFFT () { - cufftPlan3d(&c_handle, this->nx, this->ny, this->nz, CUFFT_C2C); - resmem_cd_op()(this->c_auxr_3d, this->nx * this->ny * this->nz); + cufftPlan3d (&c_handle, this->nx, this->ny, this->nz, CUFFT_C2C); + resmem_cd_op () (this->c_auxr_3d, this->nx * this->ny * this->nz); } template <> -void FFT_CUDA::setupFFT() +void + FFT_CUDA::setupFFT () { - cufftPlan3d(&z_handle, this->nx, this->ny, this->nz, CUFFT_Z2Z); - resmem_zd_op()(this->z_auxr_3d, this->nx * this->ny * this->nz); + cufftPlan3d (&z_handle, this->nx, this->ny, this->nz, CUFFT_Z2Z); + resmem_zd_op () (this->z_auxr_3d, this->nx * this->ny * this->nz); } template <> -void FFT_CUDA::cleanFFT() +void + FFT_CUDA::cleanFFT () { if (c_handle) - { - cufftDestroy(c_handle); - c_handle = {}; - } + { + cufftDestroy (c_handle); + c_handle = {}; + } } template <> -void FFT_CUDA::cleanFFT() +void + FFT_CUDA::cleanFFT () { if (z_handle) - { - cufftDestroy(z_handle); - z_handle = {}; - } + { + cufftDestroy (z_handle); + z_handle = {}; + } } template <> -void FFT_CUDA::clear() +void + FFT_CUDA::clear () { - this->cleanFFT(); + this->cleanFFT (); if (c_auxr_3d != nullptr) - { - delmem_cd_op()(c_auxr_3d); - c_auxr_3d = nullptr; - } + { + delmem_cd_op () (c_auxr_3d); + c_auxr_3d = nullptr; + } } template <> -void FFT_CUDA::clear() +void + FFT_CUDA::clear () { - this->cleanFFT(); + this->cleanFFT (); if (z_auxr_3d != nullptr) - { - delmem_zd_op()(z_auxr_3d); - z_auxr_3d = nullptr; - } + { + delmem_zd_op () (z_auxr_3d); + z_auxr_3d = nullptr; + } } template <> -void FFT_CUDA::fft3D_forward(std::complex* in, std::complex* out) const +void + FFT_CUDA::fft3D_forward (std::complex* in, std::complex* out) const { - CHECK_CUFFT(cufftExecC2C(this->c_handle, - reinterpret_cast(in), - reinterpret_cast(out), - CUFFT_FORWARD)); + CHECK_CUFFT (cufftExecC2C (this->c_handle, + reinterpret_cast (in), + reinterpret_cast (out), + CUFFT_FORWARD)); } template <> -void FFT_CUDA::fft3D_forward(std::complex* in, std::complex* out) const +void + FFT_CUDA::fft3D_forward (std::complex* in, std::complex* out) const { - CHECK_CUFFT(cufftExecZ2Z(this->z_handle, - reinterpret_cast(in), - reinterpret_cast(out), - CUFFT_FORWARD)); + CHECK_CUFFT (cufftExecZ2Z (this->z_handle, + reinterpret_cast (in), + reinterpret_cast (out), + CUFFT_FORWARD)); } template <> -void FFT_CUDA::fft3D_backward(std::complex* in, std::complex* out) const +void + FFT_CUDA::fft3D_backward (std::complex* in, std::complex* out) const { - CHECK_CUFFT(cufftExecC2C(this->c_handle, - reinterpret_cast(in), - reinterpret_cast(out), - CUFFT_INVERSE)); + CHECK_CUFFT (cufftExecC2C (this->c_handle, + reinterpret_cast (in), + reinterpret_cast (out), + CUFFT_INVERSE)); } template <> -void FFT_CUDA::fft3D_backward(std::complex* in, std::complex* out) const +void + FFT_CUDA::fft3D_backward (std::complex* in, std::complex* out) const { - CHECK_CUFFT(cufftExecZ2Z(this->z_handle, - reinterpret_cast(in), - reinterpret_cast(out), - CUFFT_INVERSE)); + CHECK_CUFFT (cufftExecZ2Z (this->z_handle, + reinterpret_cast (in), + reinterpret_cast (out), + CUFFT_INVERSE)); } template <> -std::complex* FFT_CUDA::get_auxr_3d_data() const +std::complex* + FFT_CUDA::get_auxr_3d_data () const { return this->c_auxr_3d; } template <> -std::complex* FFT_CUDA::get_auxr_3d_data() const +std::complex* + FFT_CUDA::get_auxr_3d_data () const { return this->z_auxr_3d; } -template FFT_CUDA::FFT_CUDA(); -template FFT_CUDA::~FFT_CUDA(); -template FFT_CUDA::FFT_CUDA(); -template FFT_CUDA::~FFT_CUDA(); +template FFT_CUDA::FFT_CUDA (); +template FFT_CUDA::~FFT_CUDA (); +template FFT_CUDA::FFT_CUDA (); +template FFT_CUDA::~FFT_CUDA (); } // namespace ModuleBase diff --git a/source/source_base/module_fft/fft_cuda.h b/source/source_base/module_fft/fft_cuda.h index 7734caffa9d..314ef1dbe79 100644 --- a/source/source_base/module_fft/fft_cuda.h +++ b/source/source_base/module_fft/fft_cuda.h @@ -9,58 +9,54 @@ namespace ModuleBase template class FFT_CUDA : public FFT_BASE { - public: - FFT_CUDA(){}; - ~FFT_CUDA(){}; - - void setupFFT() override; + public: + FFT_CUDA () {}; + ~FFT_CUDA () {}; - void clear() override; + void setupFFT () override; - void cleanFFT() override; + void clear () override; - /** - * @brief Initialize the fft parameters - * @param nx_in number of grid points in x direction - * @param ny_in number of grid points in y direction - * @param nz_in number of grid points in z direction - * - */ - void initfft(int nx_in, - int ny_in, - int nz_in) override; - - /** - * @brief Get the real space data - * @return real space data - */ - std::complex* get_auxr_3d_data() const override; - - /** - * @brief Forward FFT in 3D - * @param in input data, complex FPTYPE - * @param out output data, complex FPTYPE - * - * This function performs the forward FFT in 3D. - */ - void fft3D_forward(std::complex* in, - std::complex* out) const override; - /** - * @brief Backward FFT in 3D - * @param in input data, complex FPTYPE - * @param out output data, complex FPTYPE - * - * This function performs the backward FFT in 3D. - */ - void fft3D_backward(std::complex* in, - std::complex* out) const override; - private: - cufftHandle c_handle = {}; - cufftHandle z_handle = {}; - - std::complex* c_auxr_3d = nullptr; // fft space - std::complex* z_auxr_3d = nullptr; // fft space + void cleanFFT () override; + /** + * @brief Initialize the fft parameters + * @param nx_in number of grid points in x direction + * @param ny_in number of grid points in y direction + * @param nz_in number of grid points in z direction + * + */ + void initfft (int nx_in, int ny_in, int nz_in) override; + + /** + * @brief Get the real space data + * @return real space data + */ + std::complex* get_auxr_3d_data () const override; + + /** + * @brief Forward FFT in 3D + * @param in input data, complex FPTYPE + * @param out output data, complex FPTYPE + * + * This function performs the forward FFT in 3D. + */ + void fft3D_forward (std::complex* in, std::complex* out) const override; + /** + * @brief Backward FFT in 3D + * @param in input data, complex FPTYPE + * @param out output data, complex FPTYPE + * + * This function performs the backward FFT in 3D. + */ + void fft3D_backward (std::complex* in, std::complex* out) const override; + + private: + cufftHandle c_handle = {}; + cufftHandle z_handle = {}; + + std::complex* c_auxr_3d = nullptr; // fft space + std::complex* z_auxr_3d = nullptr; // fft space }; } // namespace ModuleBase diff --git a/source/source_base/module_fft/fft_dsp.cpp b/source/source_base/module_fft/fft_dsp.cpp index d9dad611526..77b7aaaffa9 100644 --- a/source/source_base/module_fft/fft_dsp.cpp +++ b/source/source_base/module_fft/fft_dsp.cpp @@ -8,7 +8,8 @@ namespace ModuleBase { template <> -void FFT_DSP::initfft(int nx_in, int ny_in, int nz_in) +void + FFT_DSP::initfft (int nx_in, int ny_in, int nz_in) { this->nx = nx_in; this->ny = ny_in; @@ -16,21 +17,22 @@ void FFT_DSP::initfft(int nx_in, int ny_in, int nz_in) nxyz = this->nx * this->ny * this->nz; } template <> -void FFT_DSP::setupFFT() +void + FFT_DSP::setupFFT () { PROBLEM pbm_forward; PROBLEM pbm_backward; PLAN* ptr_plan_forward = nullptr; PLAN* ptr_plan_backward = nullptr; INT num_thread = 8; - INT size=0; - hthread_dat_load(cluster_id, FFT_DAT_DIR); + INT size = 0; + hthread_dat_load (cluster_id, FFT_DAT_DIR); // compute the size of and malloc thread - size = nx * ny * nz * 2 * sizeof(E); - forward_in = (E*)hthread_malloc((int)cluster_id, size, HT_MEM_RW); + size = nx * ny * nz * 2 * sizeof (E); + forward_in = (E*)hthread_malloc ((int)cluster_id, size, HT_MEM_RW); - //init 3d fft problem + // init 3d fft problem pbm_forward.num_dim = 3; pbm_forward.n[0] = nx; pbm_forward.n[1] = ny; @@ -39,8 +41,8 @@ void FFT_DSP::setupFFT() pbm_forward.in = forward_in; pbm_forward.out = forward_in; - //make ptr plan - make_plan(&pbm_forward, &ptr_plan_forward, cluster_id, num_thread); + // make ptr plan + make_plan (&pbm_forward, &ptr_plan_forward, cluster_id, num_thread); ptr_plan_forward->in = forward_in; ptr_plan_forward->out = forward_in; args_for[1] = (unsigned long)ptr_plan_forward; @@ -54,73 +56,81 @@ void FFT_DSP::setupFFT() pbm_backward.in = forward_in; pbm_backward.out = forward_in; - make_plan(&pbm_backward, &ptr_plan_backward, cluster_id, num_thread); + make_plan (&pbm_backward, &ptr_plan_backward, cluster_id, num_thread); ptr_plan_backward->in = forward_in; ptr_plan_backward->out = forward_in; args_back[1] = (unsigned long)ptr_plan_backward; } template <> -void FFT_DSP::resource_handler(const int flag) const +void + FFT_DSP::resource_handler (const int flag) const { if (flag == 0) - { - hthread_barrier_destroy(b_id); - hthread_group_destroy(thread_id_for); - } - else if (flag==1) - { - INT num_thread = 8; - thread_id_for = hthread_group_create(cluster_id, num_thread, NULL, 0, 0, NULL); - // create b_id for the barrier - b_id = hthread_barrier_create(cluster_id); - args_for[0] = b_id; - args_back[0] = b_id; - }else{ - ModuleBase::WARNING_QUIT("FFT_DSP", "Error use of fft resource handle"); - } + { + hthread_barrier_destroy (b_id); + hthread_group_destroy (thread_id_for); + } + else if (flag == 1) + { + INT num_thread = 8; + thread_id_for = hthread_group_create (cluster_id, num_thread, NULL, 0, 0, NULL); + // create b_id for the barrier + b_id = hthread_barrier_create (cluster_id); + args_for[0] = b_id; + args_back[0] = b_id; + } + else + { + ModuleBase::WARNING_QUIT ("FFT_DSP", "Error use of fft resource handle"); + } } template <> -void FFT_DSP::fft3D_forward(std::complex* in, std::complex* out) const +void + FFT_DSP::fft3D_forward (std::complex* in, std::complex* out) const { - hthread_group_exec(thread_id_for, "execute_mtfft_3d", 1, 1, args_for); - hthread_group_wait(thread_id_for); + hthread_group_exec (thread_id_for, "execute_mtfft_3d", 1, 1, args_for); + hthread_group_wait (thread_id_for); } template <> -void FFT_DSP::fft3D_backward(std::complex* in, std::complex* out) const +void + FFT_DSP::fft3D_backward (std::complex* in, std::complex* out) const { - hthread_group_exec(thread_id_for, "execute_mtfft_3d", 1, 1, args_back); - hthread_group_wait(thread_id_for); + hthread_group_exec (thread_id_for, "execute_mtfft_3d", 1, 1, args_back); + hthread_group_wait (thread_id_for); } template <> -void FFT_DSP::cleanFFT() +void + FFT_DSP::cleanFFT () { if (ptr_plan_forward != nullptr) - { - destroy_plan(ptr_plan_forward); - ptr_plan_forward = nullptr; - } + { + destroy_plan (ptr_plan_forward); + ptr_plan_forward = nullptr; + } if (ptr_plan_backward != nullptr) - { - destroy_plan(ptr_plan_backward); - ptr_plan_backward = nullptr; - } + { + destroy_plan (ptr_plan_backward); + ptr_plan_backward = nullptr; + } } template <> -void FFT_DSP::clear() +void + FFT_DSP::clear () { - this->cleanFFT(); - hthread_free(forward_in); + this->cleanFFT (); + hthread_free (forward_in); } template <> -std::complex* FFT_DSP::get_auxr_3d_data() const +std::complex* + FFT_DSP::get_auxr_3d_data () const { - return reinterpret_cast*>(this->forward_in); + return reinterpret_cast*> (this->forward_in); } -template FFT_DSP::FFT_DSP(); -template FFT_DSP::~FFT_DSP(); -template FFT_DSP::FFT_DSP(); -template FFT_DSP::~FFT_DSP(); +template FFT_DSP::FFT_DSP (); +template FFT_DSP::~FFT_DSP (); +template FFT_DSP::FFT_DSP (); +template FFT_DSP::~FFT_DSP (); } // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/module_fft/fft_dsp.h b/source/source_base/module_fft/fft_dsp.h index a735fd1c500..e7c834c6573 100644 --- a/source/source_base/module_fft/fft_dsp.h +++ b/source/source_base/module_fft/fft_dsp.h @@ -12,77 +12,71 @@ namespace ModuleBase { - + template class FFT_DSP : public FFT_BASE { - public: - FFT_DSP(){}; - ~FFT_DSP(){}; - - void setupFFT() override; + public: + FFT_DSP () {}; + ~FFT_DSP () {}; + + void setupFFT () override; + + void clear () override; + + void cleanFFT () override; + /** + * @brief Control the allocation or deallocation of hthread + * resource + * @param flag 0: deallocate, 1: allocate + */ + void resource_handler (const int flag) const override; + /** + * @brief Initialize the fft parameters + * @param nx_in number of grid points in x direction + * @param ny_in number of grid points in y direction + * @param nz_in number of grid points in z direction + * + */ + virtual __attribute__ ((weak)) void initfft (int nx_in, int ny_in, int nz_in) override; - void clear() override; + /** + * @brief Get the real space data + * @return real space data + */ + virtual __attribute__ ((weak)) std::complex* get_auxr_3d_data () const override; - void cleanFFT() override; - /** - * @brief Control the allocation or deallocation of hthread - * resource - * @param flag 0: deallocate, 1: allocate - */ - void resource_handler(const int flag) const override; - /** - * @brief Initialize the fft parameters - * @param nx_in number of grid points in x direction - * @param ny_in number of grid points in y direction - * @param nz_in number of grid points in z direction - * - */ - virtual __attribute__((weak)) - void initfft(int nx_in, - int ny_in, - int nz_in) override; - - /** - * @brief Get the real space data - * @return real space data - */ - virtual __attribute__((weak)) - std::complex* get_auxr_3d_data() const override; - - /** - * @brief Forward FFT in 3D - * @param in input data, complex FPTYPE - * @param out output data, complex FPTYPE - * - * This function performs the forward FFT in 3D. - */ - virtual __attribute__((weak)) - void fft3D_forward(std::complex* in, - std::complex* out) const override; - /** - * @brief Backward FFT in 3D - * @param in input data, complex FPTYPE - * @param out output data, complex FPTYPE - * - * This function performs the backward FFT in 3D. - */ - virtual __attribute__((weak)) - void fft3D_backward(std::complex* in, - std::complex* out) const override; - public: - int nxyz=0; - INT cluster_id=0; - mutable INT b_id=0; - mutable INT thread_id_for=0; - PLAN* ptr_plan_forward=nullptr; - PLAN* ptr_plan_backward=nullptr; - mutable unsigned long args_for[2]; - mutable unsigned long args_back[2]; - E * forward_in=nullptr; - std::complex* c_auxr_3d = nullptr; // fft space - std::complex* z_auxr_3d = nullptr; // fft space + /** + * @brief Forward FFT in 3D + * @param in input data, complex FPTYPE + * @param out output data, complex FPTYPE + * + * This function performs the forward FFT in 3D. + */ + virtual __attribute__ ((weak)) void fft3D_forward (std::complex* in, + std::complex* out) const override; + /** + * @brief Backward FFT in 3D + * @param in input data, complex FPTYPE + * @param out output data, complex FPTYPE + * + * This function performs the backward FFT in 3D. + */ + virtual __attribute__ ((weak)) void fft3D_backward (std::complex* in, + std::complex* out) const override; + public: + int nxyz = 0; + INT cluster_id = 0; + mutable INT b_id = 0; + mutable INT thread_id_for = 0; + PLAN* ptr_plan_forward = nullptr; + PLAN* ptr_plan_backward = nullptr; + mutable unsigned long args_for[2]; + mutable unsigned long args_back[2]; + E* forward_in = nullptr; + std::complex* c_auxr_3d = nullptr; // fft space + std::complex* z_auxr_3d = nullptr; // fft space }; } // namespace ModuleBase #endif \ No newline at end of file diff --git a/source/source_base/module_fft/fft_dsp_float.cpp b/source/source_base/module_fft/fft_dsp_float.cpp index 79efa4f000a..51c05e2135e 100644 --- a/source/source_base/module_fft/fft_dsp_float.cpp +++ b/source/source_base/module_fft/fft_dsp_float.cpp @@ -1,25 +1,25 @@ #include "fft_dsp.h" namespace ModuleBase { - -template<> -void FFT_DSP::setupFFT() -{ -} -template<> -void FFT_DSP::clear() +template <> +void + FFT_DSP::setupFFT () { - } -template<> -void FFT_DSP::cleanFFT() +template <> +void + FFT_DSP::clear () +{ +} +template <> +void + FFT_DSP::cleanFFT () { - } -template<> -void FFT_DSP::resource_handler(const int flag) const +template <> +void + FFT_DSP::resource_handler (const int flag) const { - } -} \ No newline at end of file +} // namespace ModuleBase \ No newline at end of file diff --git a/source/source_base/module_fft/fft_rocm.cpp b/source/source_base/module_fft/fft_rocm.cpp index 1730a0cdda5..aab156a26d4 100644 --- a/source/source_base/module_fft/fft_rocm.cpp +++ b/source/source_base/module_fft/fft_rocm.cpp @@ -5,107 +5,119 @@ namespace ModuleBase { template -void FFT_ROCM::initfft(int nx_in, - int ny_in, - int nz_in) +void + FFT_ROCM::initfft (int nx_in, int ny_in, int nz_in) { this->nx = nx_in; this->ny = ny_in; this->nz = nz_in; } template <> -void FFT_ROCM::setupFFT() +void + FFT_ROCM::setupFFT () { - hipfftPlan3d(&c_handle, this->nx, this->ny, this->nz, HIPFFT_C2C); - resmem_cd_op()(this->c_auxr_3d, this->nx * this->ny * this->nz); - + hipfftPlan3d (&c_handle, this->nx, this->ny, this->nz, HIPFFT_C2C); + resmem_cd_op () (this->c_auxr_3d, this->nx * this->ny * this->nz); } -template <> -void FFT_ROCM::setupFFT() +template <> +void + FFT_ROCM::setupFFT () { - hipfftPlan3d(&z_handle, this->nx, this->ny, this->nz, HIPFFT_Z2Z); - resmem_zd_op()(this->z_auxr_3d, this->nx * this->ny * this->nz); + hipfftPlan3d (&z_handle, this->nx, this->ny, this->nz, HIPFFT_Z2Z); + resmem_zd_op () (this->z_auxr_3d, this->nx * this->ny * this->nz); } template <> -void FFT_ROCM::cleanFFT() +void + FFT_ROCM::cleanFFT () { if (c_handle) - { - hipfftDestroy(c_handle); - c_handle = {}; - } + { + hipfftDestroy (c_handle); + c_handle = {}; + } } template <> -void FFT_ROCM::cleanFFT() +void + FFT_ROCM::cleanFFT () { if (z_handle) - { - hipfftDestroy(z_handle); - z_handle = {}; - } + { + hipfftDestroy (z_handle); + z_handle = {}; + } } template <> -void FFT_ROCM::clear() +void + FFT_ROCM::clear () { - this->cleanFFT(); + this->cleanFFT (); if (c_auxr_3d != nullptr) - { - delmem_cd_op()(c_auxr_3d); - c_auxr_3d = nullptr; - } + { + delmem_cd_op () (c_auxr_3d); + c_auxr_3d = nullptr; + } } template <> -void FFT_ROCM::clear() +void + FFT_ROCM::clear () { - this->cleanFFT(); + this->cleanFFT (); if (z_auxr_3d != nullptr) - { - delmem_zd_op()(z_auxr_3d); - z_auxr_3d = nullptr; - } + { + delmem_zd_op () (z_auxr_3d); + z_auxr_3d = nullptr; + } +} +template <> +void + FFT_ROCM::fft3D_forward (std::complex* in, std::complex* out) const +{ + CHECK_CUFFT (hipfftExecC2C (this->c_handle, + reinterpret_cast (in), + reinterpret_cast (out), + HIPFFT_FORWARD)); +} +template <> +void + FFT_ROCM::fft3D_forward (std::complex* in, std::complex* out) const +{ + CHECK_CUFFT (hipfftExecZ2Z (this->z_handle, + reinterpret_cast (in), + reinterpret_cast (out), + HIPFFT_FORWARD)); } template <> -void FFT_ROCM::fft3D_forward(std::complex* in, - std::complex* out) const +void + FFT_ROCM::fft3D_backward (std::complex* in, std::complex* out) const { - CHECK_CUFFT(hipfftExecC2C(this->c_handle, - reinterpret_cast(in), - reinterpret_cast(out), - HIPFFT_FORWARD)); + CHECK_CUFFT (hipfftExecC2C (this->c_handle, + reinterpret_cast (in), + reinterpret_cast (out), + HIPFFT_BACKWARD)); } template <> -void FFT_ROCM::fft3D_forward(std::complex* in, - std::complex* out) const +void + FFT_ROCM::fft3D_backward (std::complex* in, std::complex* out) const { - CHECK_CUFFT(hipfftExecZ2Z(this->z_handle, - reinterpret_cast(in), - reinterpret_cast(out), - HIPFFT_FORWARD)); + CHECK_CUFFT (hipfftExecZ2Z (this->z_handle, + reinterpret_cast (in), + reinterpret_cast (out), + HIPFFT_BACKWARD)); } template <> -void FFT_ROCM::fft3D_backward(std::complex* in, - std::complex* out) const +std::complex* + FFT_ROCM::get_auxr_3d_data () const { - CHECK_CUFFT(hipfftExecC2C(this->c_handle, - reinterpret_cast(in), - reinterpret_cast(out), - HIPFFT_BACKWARD)); + return this->c_auxr_3d; } template <> -void FFT_ROCM::fft3D_backward(std::complex* in, - std::complex* out) const +std::complex* + FFT_ROCM::get_auxr_3d_data () const { - CHECK_CUFFT(hipfftExecZ2Z(this->z_handle, - reinterpret_cast(in), - reinterpret_cast(out), - HIPFFT_BACKWARD)); + return this->z_auxr_3d; } -template <> std::complex* -FFT_ROCM::get_auxr_3d_data() const {return this->c_auxr_3d;} -template <> std::complex* -FFT_ROCM::get_auxr_3d_data() const {return this->z_auxr_3d;} -template FFT_ROCM::FFT_ROCM(); -template FFT_ROCM::~FFT_ROCM(); -template FFT_ROCM::FFT_ROCM(); -template FFT_ROCM::~FFT_ROCM(); -}// namespace ModuleBase +template FFT_ROCM::FFT_ROCM (); +template FFT_ROCM::~FFT_ROCM (); +template FFT_ROCM::FFT_ROCM (); +template FFT_ROCM::~FFT_ROCM (); +} // namespace ModuleBase diff --git a/source/source_base/module_fft/fft_rocm.h b/source/source_base/module_fft/fft_rocm.h index 96c4cde091b..2ee6b9f338e 100644 --- a/source/source_base/module_fft/fft_rocm.h +++ b/source/source_base/module_fft/fft_rocm.h @@ -10,54 +10,50 @@ namespace ModuleBase template class FFT_ROCM : public FFT_BASE { - public: - FFT_ROCM(){}; - ~FFT_ROCM(){}; - - void setupFFT() override; - - void clear() override; - - void cleanFFT() override; - - /** - * @brief Initialize the fft parameters for ROCM - * @param nx_in number of grid points in x direction - * @param ny_in number of grid points in y direction - * @param nz_in number of grid points in z direction - * - */ - void initfft(int nx_in, - int ny_in, - int nz_in) override; - - /** - * @brief Get the real space data - * @return real space data - */ - std::complex* get_auxr_3d_data() const override; - - /** - * @brief Forward FFT in 3D for ROCM - * @param in input data, complex FPTYPE - * @param out output data, complex FPTYPE - */ - void fft3D_forward(std::complex* in, - std::complex* out) const override; - - /** - * @brief Backward FFT in 3D for ROCM - * @param in input data, complex FPTYPE - * @param out output data, complex FPTYPE - */ - void fft3D_backward(std::complex* in, - std::complex* out) const override; - private: - hipfftHandle c_handle = {}; - hipfftHandle z_handle = {}; - mutable std::complex* c_auxr_3d = nullptr; // fft space - mutable std::complex* z_auxr_3d = nullptr; // fft space + public: + FFT_ROCM () {}; + ~FFT_ROCM () {}; + void setupFFT () override; + + void clear () override; + + void cleanFFT () override; + + /** + * @brief Initialize the fft parameters for ROCM + * @param nx_in number of grid points in x direction + * @param ny_in number of grid points in y direction + * @param nz_in number of grid points in z direction + * + */ + void initfft (int nx_in, int ny_in, int nz_in) override; + + /** + * @brief Get the real space data + * @return real space data + */ + std::complex* get_auxr_3d_data () const override; + + /** + * @brief Forward FFT in 3D for ROCM + * @param in input data, complex FPTYPE + * @param out output data, complex FPTYPE + */ + void fft3D_forward (std::complex* in, std::complex* out) const override; + + /** + * @brief Backward FFT in 3D for ROCM + * @param in input data, complex FPTYPE + * @param out output data, complex FPTYPE + */ + void fft3D_backward (std::complex* in, std::complex* out) const override; + + private: + hipfftHandle c_handle = {}; + hipfftHandle z_handle = {}; + mutable std::complex* c_auxr_3d = nullptr; // fft space + mutable std::complex* z_auxr_3d = nullptr; // fft space }; -}// namespace ModuleBase +} // namespace ModuleBase #endif diff --git a/source/source_base/module_grid/batch.cpp b/source/source_base/module_grid/batch.cpp index 718d9277d36..996df3b4838 100644 --- a/source/source_base/module_grid/batch.cpp +++ b/source/source_base/module_grid/batch.cpp @@ -6,7 +6,8 @@ #include "source_base/module_external/lapack_connector.h" -namespace { +namespace +{ /** * @brief Divide a set of points into two subsets by the "MaxMin" algorithm. @@ -37,86 +38,85 @@ namespace { * @return The number of points in the first subset within idx. * */ -int _maxmin_divide(const double* grid, int* idx, int m) { - assert(m > 1); - if (m == 2) { - return 1; - } - - std::vector centroid(3, 0.0); - for (int i = 0; i < m; ++i) { - int j = idx[i]; - centroid[0] += grid[3*j ]; - centroid[1] += grid[3*j + 1]; - centroid[2] += grid[3*j + 2]; - } +int + _maxmin_divide (const double* grid, int* idx, int m) +{ + assert (m > 1); + if (m == 2) + { + return 1; + } + + std::vector centroid (3, 0.0); + for (int i = 0; i < m; ++i) + { + int j = idx[i]; + centroid[0] += grid[3 * j]; + centroid[1] += grid[3 * j + 1]; + centroid[2] += grid[3 * j + 2]; + } centroid[0] /= m; centroid[1] /= m; centroid[2] /= m; // positions w.r.t. the centroid - std::vector R(3*m, 0.0); - for (int i = 0; i < m; ++i) { - int j = idx[i]; - R[3*i ] = grid[3*j ] - centroid[0]; - R[3*i + 1] = grid[3*j + 1] - centroid[1]; - R[3*i + 2] = grid[3*j + 2] - centroid[2]; - } + std::vector R (3 * m, 0.0); + for (int i = 0; i < m; ++i) + { + int j = idx[i]; + R[3 * i] = grid[3 * j] - centroid[0]; + R[3 * i + 1] = grid[3 * j + 1] - centroid[1]; + R[3 * i + 2] = grid[3 * j + 2] - centroid[2]; + } // The normal vector of the cut plane is taken to be the eigenvector // corresponding to the largest eigenvalue of the 3x3 matrix A = R*R^T. - std::vector A(9, 0.0); + std::vector A (9, 0.0); int i3 = 3, i1 = 1; double d0 = 0.0, d1 = 1.0; - dsyrk_("U", "N", &i3, &m, &d1, R.data(), &i3, &d0, A.data(), &i3); + dsyrk_ ("U", "N", &i3, &m, &d1, R.data (), &i3, &d0, A.data (), &i3); int info = 0, lwork = 102 /* determined by a work space query */; - std::vector e(3), work(lwork); - dsyev_("V", "U", &i3, A.data(), &i3, e.data(), work.data(), &lwork, &info); - double* n = A.data() + 6; // normal vector of the cut plane + std::vector e (3), work (lwork); + dsyev_ ("V", "U", &i3, A.data (), &i3, e.data (), work.data (), &lwork, &info); + double* n = A.data () + 6; // normal vector of the cut plane // Rearrange the indices to put points in each subset together by // examining the signed distances of points to the cut plane (R^T*n). - std::vector dist(m); - dgemv_("T", &i3, &m, &d1, R.data(), &i3, n, &i1, &d0, dist.data(), &i1); - - int *head = idx; - std::reverse_iterator tail(idx + m), rend(idx); - auto is_negative = [&dist, &idx](int& j) { return dist[&j - idx] < 0; }; - while ( ( head = std::find_if(head, idx + m, is_negative) ) < - ( tail = std::find_if_not(tail, rend, is_negative) ).base() ) { - std::swap(*head, *tail); - std::swap(dist[head - idx], dist[tail.base() - idx - 1]); - ++head; - ++tail; - } + std::vector dist (m); + dgemv_ ("T", &i3, &m, &d1, R.data (), &i3, n, &i1, &d0, dist.data (), &i1); + + int* head = idx; + std::reverse_iterator tail (idx + m), rend (idx); + auto is_negative = [&dist, &idx] (int& j) { return dist[&j - idx] < 0; }; + while ((head = std::find_if (head, idx + m, is_negative)) + < (tail = std::find_if_not (tail, rend, is_negative)).base ()) + { + std::swap (*head, *tail); + std::swap (dist[head - idx], dist[tail.base () - idx - 1]); + ++head; + ++tail; + } return head - idx; } } // end of anonymous namespace +std::vector + Grid::Batch::maxmin (const double* grid, int* idx, int m, int m_thr) +{ + if (m <= m_thr) + { + return std::vector{0}; + } -std::vector Grid::Batch::maxmin( - const double* grid, - int* idx, - int m, - int m_thr -) { - if (m <= m_thr) { - return std::vector{0}; - } + int m_left = _maxmin_divide (grid, idx, m); - int m_left = _maxmin_divide(grid, idx, m); + std::vector left = maxmin (grid, idx, m_left, m_thr); + std::vector right = maxmin (grid, idx + m_left, m - m_left, m_thr); + std::for_each (right.begin (), right.end (), [m_left] (int& x) { x += m_left; }); - std::vector left = maxmin(grid, idx, m_left, m_thr); - std::vector right = maxmin(grid, idx + m_left, m - m_left, m_thr); - std::for_each(right.begin(), right.end(), - [m_left](int& x) { x += m_left; } - ); - - left.insert(left.end(), right.begin(), right.end()); + left.insert (left.end (), right.begin (), right.end ()); return left; } - - diff --git a/source/source_base/module_grid/batch.h b/source/source_base/module_grid/batch.h index 8a6ef733e13..3666d2b3dda 100644 --- a/source/source_base/module_grid/batch.h +++ b/source/source_base/module_grid/batch.h @@ -3,8 +3,10 @@ #include -namespace Grid { -namespace Batch { +namespace Grid +{ +namespace Batch +{ /** * @brief Divide a set of points into batches by the "MaxMin" algorithm. @@ -46,7 +48,7 @@ namespace Batch { * {0, 1, 4, 5}, {8, 9, 12, 13}, {2, 3, 6, 7}, {10, 11, 14, 15}. * */ -std::vector maxmin(const double* grid, int* idx, int m, int m_thr); +std::vector maxmin (const double* grid, int* idx, int m, int m_thr); } // end of namespace Batch } // end of namespace Grid diff --git a/source/source_base/module_grid/delley.cpp b/source/source_base/module_grid/delley.cpp index 3c4431542ae..38b3fef5c01 100644 --- a/source/source_base/module_grid/delley.cpp +++ b/source/source_base/module_grid/delley.cpp @@ -5,9 +5,11 @@ #include #include -namespace { +namespace +{ -struct DelleyTable { +struct DelleyTable +{ const int lmax_; const int ngrid_; const int ntype_[6]; @@ -15,202 +17,177 @@ struct DelleyTable { }; // Delley's table from the original article -const std::vector delley_table = { - { - 17, 110, {1, 1, 0, 3, 1, 0}, +const std::vector delley_table + = {{17, + 110, + {1, 1, 0, 3, 1, 0}, { - 0.00000000000000000, 0.00000000000000000, 0.0038282704949371616, - 0.57735026918962576, 0.57735026918962576, 0.0097937375124875125, - 0.18511563534473617, 0.18511563534473617, 0.0082117372831911110, - 0.39568947305594191, 0.39568947305594191, 0.0095954713360709628, - 0.69042104838229218, 0.21595729184584883, 0.0099428148911781033, - 0.47836902881215020, 0.00000000000000000, 0.0096949963616630283, - } - }, - { - 23, 194, {1, 1, 1, 4, 1, 1}, + 0.00000000000000000, + 0.00000000000000000, + 0.0038282704949371616, + 0.57735026918962576, + 0.57735026918962576, + 0.0097937375124875125, + 0.18511563534473617, + 0.18511563534473617, + 0.0082117372831911110, + 0.39568947305594191, + 0.39568947305594191, + 0.0095954713360709628, + 0.69042104838229218, + 0.21595729184584883, + 0.0099428148911781033, + 0.47836902881215020, + 0.00000000000000000, + 0.0096949963616630283, + }}, + {23, + 194, + {1, 1, 1, 4, 1, 1}, { - 0.00000000000000000, 0.00000000000000000, 0.0017823404472446112, - 0.57735026918962576, 0.57735026918962576, 0.0055733831788487380, - 0.70710678118654752, 0.00000000000000000, 0.0057169059499771019, - 0.44469331787174373, 0.44469331787174373, 0.0055187714672736137, - 0.28924656275754386, 0.28924656275754386, 0.0051582377118053831, - 0.67129734426952263, 0.31419699418258608, 0.0056087040825879968, - 0.12993354476500669, 0.12993354476500669, 0.0041067770281693941, - 0.34577021976112827, 0.00000000000000000, 0.0050518460646148085, - 0.52511857244364202, 0.15904171053835295, 0.0055302489162330937, - } - }, - { - 29, 302, {1, 1, 0, 6, 2, 2}, + 0.00000000000000000, 0.00000000000000000, 0.0017823404472446112, 0.57735026918962576, + 0.57735026918962576, 0.0055733831788487380, 0.70710678118654752, 0.00000000000000000, + 0.0057169059499771019, 0.44469331787174373, 0.44469331787174373, 0.0055187714672736137, + 0.28924656275754386, 0.28924656275754386, 0.0051582377118053831, 0.67129734426952263, + 0.31419699418258608, 0.0056087040825879968, 0.12993354476500669, 0.12993354476500669, + 0.0041067770281693941, 0.34577021976112827, 0.00000000000000000, 0.0050518460646148085, + 0.52511857244364202, 0.15904171053835295, 0.0055302489162330937, + }}, + {29, + 302, + {1, 1, 0, 6, 2, 2}, { - 0.00000000000000000, 0.00000000000000000, 0.0008545911725128148, - 0.57735026918962576, 0.57735026918962576, 0.0035991192850255715, - 0.70117664160895449, 0.12923867271051493, 0.0036500458076772554, - 0.65663294102196118, 0.37103417838482119, 0.0036048226014198817, - 0.47290541325810046, 0.47290541325810046, 0.0035767296617433671, - 0.35156403455701051, 0.35156403455701051, 0.0034497884243058833, - 0.22196452362941784, 0.22196452362941784, 0.0031089531224136753, - 0.09618308522614784, 0.09618308522614784, 0.0023521014136891644, - 0.57189558918789607, 0.00000000000000000, 0.0036008209322164603, - 0.26441528870606625, 0.00000000000000000, 0.0029823449631718039, - 0.54486773725807738, 0.25100347517704651, 0.0035715405542733871, - 0.41277240831685310, 0.12335485325833274, 0.0033923122050061702, - } - }, - { - 35, 434, {1, 1, 1, 7, 2, 4}, + 0.00000000000000000, 0.00000000000000000, 0.0008545911725128148, 0.57735026918962576, + 0.57735026918962576, 0.0035991192850255715, 0.70117664160895449, 0.12923867271051493, + 0.0036500458076772554, 0.65663294102196118, 0.37103417838482119, 0.0036048226014198817, + 0.47290541325810046, 0.47290541325810046, 0.0035767296617433671, 0.35156403455701051, + 0.35156403455701051, 0.0034497884243058833, 0.22196452362941784, 0.22196452362941784, + 0.0031089531224136753, 0.09618308522614784, 0.09618308522614784, 0.0023521014136891644, + 0.57189558918789607, 0.00000000000000000, 0.0036008209322164603, 0.26441528870606625, + 0.00000000000000000, 0.0029823449631718039, 0.54486773725807738, 0.25100347517704651, + 0.0035715405542733871, 0.41277240831685310, 0.12335485325833274, 0.0033923122050061702, + }}, + {35, + 434, + {1, 1, 1, 7, 2, 4}, { - 0.00000000000000000, 0.00000000000000000, 0.0005265897968224436, - 0.57735026918962576, 0.57735026918962576, 0.0025123174189273072, - 0.70710678118654752, 0.00000000000600000, 0.0025482199720026072, - 0.69093463075091106, 0.21264682470755207, 0.0025304038011863550, - 0.64566647074242561, 0.40771266489776951, 0.0025132671745975644, - 0.49143426377847465, 0.49143426377847465, 0.0025017251684029361, - 0.39272597633680022, 0.39272597633680022, 0.0024453734373129800, - 0.28612890103076384, 0.28612890103076384, 0.0023026947822274158, - 0.17748360546091578, 0.17748360546091578, 0.0020142790209185282, - 0.07568084367178018, 0.07568084367178018, 0.0014624956215946138, - 0.21027252285730696, 0.00000000000000000, 0.0019109512821795323, - 0.47159869115131592, 0.00000000000000000, 0.0024174423756389808, - 0.33443631453434549, 0.09921769636429237, 0.0022366077604378487, - 0.45023303825826254, 0.20548236964030437, 0.0024169300443247753, - 0.55501523610768072, 0.31042840351665415, 0.0024966440545530860, - 0.59051570489252711, 0.10680182607580483, 0.0025122368545634951, - } - }, - { - 41, 590, {1, 1, 0, 8, 4, 6}, + 0.00000000000000000, 0.00000000000000000, 0.0005265897968224436, 0.57735026918962576, + 0.57735026918962576, 0.0025123174189273072, 0.70710678118654752, 0.00000000000600000, + 0.0025482199720026072, 0.69093463075091106, 0.21264682470755207, 0.0025304038011863550, + 0.64566647074242561, 0.40771266489776951, 0.0025132671745975644, 0.49143426377847465, + 0.49143426377847465, 0.0025017251684029361, 0.39272597633680022, 0.39272597633680022, + 0.0024453734373129800, 0.28612890103076384, 0.28612890103076384, 0.0023026947822274158, + 0.17748360546091578, 0.17748360546091578, 0.0020142790209185282, 0.07568084367178018, + 0.07568084367178018, 0.0014624956215946138, 0.21027252285730696, 0.00000000000000000, + 0.0019109512821795323, 0.47159869115131592, 0.00000000000000000, 0.0024174423756389808, + 0.33443631453434549, 0.09921769636429237, 0.0022366077604378487, 0.45023303825826254, + 0.20548236964030437, 0.0024169300443247753, 0.55501523610768072, 0.31042840351665415, + 0.0024966440545530860, 0.59051570489252711, 0.10680182607580483, 0.0025122368545634951, + }}, + {41, + 590, + {1, 1, 0, 8, 4, 6}, { - 0.00000000000000000, 0.00000000000000000, 0.0001009005753378758, - 0.57735026918962576, 0.57735026918962576, 0.0018514016873890461, - 0.70404760433146996, 0.09291900596883211, 0.0018686219518306975, - 0.68084561988024238, 0.26999719217017240, 0.0018648696345606001, - 0.63723669159418917, 0.43342680786054810, 0.0018497643975168892, - 0.50447558060926046, 0.50447558060926046, 0.0018450277740822388, - 0.42175447334398773, 0.42175447334398773, 0.0018164174988262214, - 0.33201962086729379, 0.33201962086729379, 0.0017449464690023229, - 0.23917494336556047, 0.23917494336556047, 0.0016278016126848035, - 0.14024070738935403, 0.14024070738935403, 0.0015576827519901693, - 0.09161634328605240, 0.00000000000000000, 0.0012680968886048433, - 0.20326292518419433, 0.00000000000000000, 0.0011183965414769017, - 0.39364042372978295, 0.00000000000000000, 0.0017287035120530033, - 0.61262355812929648, 0.00000000000000000, 0.0018551905629473527, - 0.28114771623428322, 0.08959875911893791, 0.0014697353123693616, - 0.38175470908581117, 0.17327600238498666, 0.0016819651914742022, - 0.47452376478986998, 0.26422260656245780, 0.0017876372876796954, - 0.56127905075920534, 0.35189965873835832, 0.0018400735685528423, - 0.50324791996964975, 0.08886791018186295, 0.0018072536817113700, - 0.59768324320748616, 0.18154345643517542, 0.0018527289739424312, - } - }, - { - 47, 770, {1, 1, 1, 9, 4, 9}, + 0.00000000000000000, 0.00000000000000000, 0.0001009005753378758, 0.57735026918962576, + 0.57735026918962576, 0.0018514016873890461, 0.70404760433146996, 0.09291900596883211, + 0.0018686219518306975, 0.68084561988024238, 0.26999719217017240, 0.0018648696345606001, + 0.63723669159418917, 0.43342680786054810, 0.0018497643975168892, 0.50447558060926046, + 0.50447558060926046, 0.0018450277740822388, 0.42175447334398773, 0.42175447334398773, + 0.0018164174988262214, 0.33201962086729379, 0.33201962086729379, 0.0017449464690023229, + 0.23917494336556047, 0.23917494336556047, 0.0016278016126848035, 0.14024070738935403, + 0.14024070738935403, 0.0015576827519901693, 0.09161634328605240, 0.00000000000000000, + 0.0012680968886048433, 0.20326292518419433, 0.00000000000000000, 0.0011183965414769017, + 0.39364042372978295, 0.00000000000000000, 0.0017287035120530033, 0.61262355812929648, + 0.00000000000000000, 0.0018551905629473527, 0.28114771623428322, 0.08959875911893791, + 0.0014697353123693616, 0.38175470908581117, 0.17327600238498666, 0.0016819651914742022, + 0.47452376478986998, 0.26422260656245780, 0.0017876372876796954, 0.56127905075920534, + 0.35189965873835832, 0.0018400735685528423, 0.50324791996964975, 0.08886791018186295, + 0.0018072536817113700, 0.59768324320748616, 0.18154345643517542, 0.0018527289739424312, + }}, + {47, + 770, + {1, 1, 1, 9, 4, 9}, { - 0.00000000000000000, 0.00000000000000000, 0.0011685335608691628, - 0.57735026918962576, 0.57735026918962576, 0.0014121215930643264, - 0.70710678118654752, 0.00000000000000000, 0.0014468645950992776, - 0.11441365123336336, 0.11441365123336336, 0.0010478418864629224, - 0.19944675708548970, 0.19944675708548970, 0.0012392547584848484, - 0.28401278368259530, 0.28401278368259530, 0.0013259295792415379, - 0.36646411416548296, 0.36646411416548296, 0.0013756097758625958, - 0.44356118052513995, 0.44356118052513995, 0.0013999348863558624, - 0.51435709575333968, 0.51435709575333968, 0.0014096221218822673, - 0.63052081196671812, 0.45264446462279973, 0.0014108746499638577, - 0.67164784337293865, 0.31269529735024947, 0.0014134887639034478, - 0.69812332010174177, 0.15889512220405632, 0.0014366946685816802, - 0.12047667931264991, 0.00000000000000000, 0.0010901543574180667, - 0.30940302315480606, 0.00000000000000000, 0.0001869137844803852, - 0.34884276430183016, 0.00000000000000000, 0.0011284267652336505, - 0.53224214285417946, 0.00000000000000000, 0.0013844558026568455, - 0.23249923409267532, 0.06616159933437003, 0.0011853923885095502, - 0.32477344409682044, 0.14568618765136356, 0.0012949021664637693, - 0.41056989039349425, 0.22832839132127622, 0.0013525857420363760, - 0.49213658085114203, 0.30714431901543855, 0.0013925025908786082, - 0.56548849812588755, 0.38271180625074657, 0.0014073257894372725, - 0.43713473693946563, 0.07970715187939190, 0.0013128954307755017, - 0.52320749473197761, 0.15892620239864833, 0.0013784632898490457, - 0.60283033994386521, 0.23667220253873893, 0.0014125450609821936, - 0.62037164721742807, 0.07982328826030880, 0.0014289835314095131, - } - }, - { - 53, 974, {1, 1, 0, 12, 4, 12}, + 0.00000000000000000, 0.00000000000000000, 0.0011685335608691628, 0.57735026918962576, + 0.57735026918962576, 0.0014121215930643264, 0.70710678118654752, 0.00000000000000000, + 0.0014468645950992776, 0.11441365123336336, 0.11441365123336336, 0.0010478418864629224, + 0.19944675708548970, 0.19944675708548970, 0.0012392547584848484, 0.28401278368259530, + 0.28401278368259530, 0.0013259295792415379, 0.36646411416548296, 0.36646411416548296, + 0.0013756097758625958, 0.44356118052513995, 0.44356118052513995, 0.0013999348863558624, + 0.51435709575333968, 0.51435709575333968, 0.0014096221218822673, 0.63052081196671812, + 0.45264446462279973, 0.0014108746499638577, 0.67164784337293865, 0.31269529735024947, + 0.0014134887639034478, 0.69812332010174177, 0.15889512220405632, 0.0014366946685816802, + 0.12047667931264991, 0.00000000000000000, 0.0010901543574180667, 0.30940302315480606, + 0.00000000000000000, 0.0001869137844803852, 0.34884276430183016, 0.00000000000000000, + 0.0011284267652336505, 0.53224214285417946, 0.00000000000000000, 0.0013844558026568455, + 0.23249923409267532, 0.06616159933437003, 0.0011853923885095502, 0.32477344409682044, + 0.14568618765136356, 0.0012949021664637693, 0.41056989039349425, 0.22832839132127622, + 0.0013525857420363760, 0.49213658085114203, 0.30714431901543855, 0.0013925025908786082, + 0.56548849812588755, 0.38271180625074657, 0.0014073257894372725, 0.43713473693946563, + 0.07970715187939190, 0.0013128954307755017, 0.52320749473197761, 0.15892620239864833, + 0.0013784632898490457, 0.60283033994386521, 0.23667220253873893, 0.0014125450609821936, + 0.62037164721742807, 0.07982328826030880, 0.0014289835314095131, + }}, + {53, + 974, + {1, 1, 0, 12, 4, 12}, { - 0.00000000000000000, 0.00000000800000000, 0.0001438294190527431, - 0.57735026918962576, 0.57735026918962576, 0.0011257722882870041, - 0.04292963545341347, 0.04292963545341347, 0.0004948029341949241, - 0.10514268540864042, 0.10514268540864042, 0.0007357990109125470, - 0.17500248676230874, 0.17500248676230874, 0.0008889132771304384, - 0.24776533796502568, 0.24776533796502568, 0.0009888347838921435, - 0.32065671239559574, 0.32065671239559574, 0.0010532996817094706, - 0.39165207498499835, 0.39165207498499835, 0.0010927788070145785, - 0.45908258741876237, 0.45908258741876237, 0.0011143893940632272, - 0.52145638884158605, 0.52145638884158605, 0.0011237247880515553, - 0.62531702446541989, 0.46685890569574328, 0.0011252393252438136, - 0.66379267445231699, 0.34461365423743822, 0.0011261532718159050, - 0.69104103984983007, 0.21195415185018465, 0.0011302869311238408, - 0.70529070074577603, 0.07162440144995566, 0.0011349865343639549, - 0.12366867626579899, 0.00000000000000000, 0.0006823367927109931, - 0.29407771144683870, 0.00000000000000000, 0.0009454158160447096, - 0.46977538492076491, 0.00000000000000000, 0.0010744299753856791, - 0.63345632411395669, 0.00000000000000000, 0.0011293000865691317, - 0.20291287527775228, 0.05974048614181342, 0.0008436884500901954, - 0.46026219424840539, 0.13757604084736365, 0.0010752557204488846, - 0.50306739996620357, 0.33910165263362857, 0.0011085772368644620, - 0.28176064224421343, 0.12716751914398195, 0.0009566475323783357, - 0.43315612917201574, 0.26931207404135125, 0.0010806632507173907, - 0.62561673585808142, 0.14197864526019183, 0.0011267971311962946, - 0.37983952168591567, 0.06709284600738255, 0.0010225687153580612, - 0.55175054214235205, 0.07057738183256172, 0.0011089602677131075, - 0.60296191561591869, 0.27838884778821546, 0.0011227906534357658, - 0.35896063295890958, 0.19795789389174069, 0.0010324018471174598, - 0.53486664381354765, 0.20873070611032740, 0.0011072493822838539, - 0.56749975460743735, 0.40551221378728359, 0.0011217800485199721, - } - }, - { - 59, 1202, {1, 1, 1, 13, 4, 16}, + 0.00000000000000000, 0.00000000800000000, 0.0001438294190527431, 0.57735026918962576, + 0.57735026918962576, 0.0011257722882870041, 0.04292963545341347, 0.04292963545341347, + 0.0004948029341949241, 0.10514268540864042, 0.10514268540864042, 0.0007357990109125470, + 0.17500248676230874, 0.17500248676230874, 0.0008889132771304384, 0.24776533796502568, + 0.24776533796502568, 0.0009888347838921435, 0.32065671239559574, 0.32065671239559574, + 0.0010532996817094706, 0.39165207498499835, 0.39165207498499835, 0.0010927788070145785, + 0.45908258741876237, 0.45908258741876237, 0.0011143893940632272, 0.52145638884158605, + 0.52145638884158605, 0.0011237247880515553, 0.62531702446541989, 0.46685890569574328, + 0.0011252393252438136, 0.66379267445231699, 0.34461365423743822, 0.0011261532718159050, + 0.69104103984983007, 0.21195415185018465, 0.0011302869311238408, 0.70529070074577603, + 0.07162440144995566, 0.0011349865343639549, 0.12366867626579899, 0.00000000000000000, + 0.0006823367927109931, 0.29407771144683870, 0.00000000000000000, 0.0009454158160447096, + 0.46977538492076491, 0.00000000000000000, 0.0010744299753856791, 0.63345632411395669, + 0.00000000000000000, 0.0011293000865691317, 0.20291287527775228, 0.05974048614181342, + 0.0008436884500901954, 0.46026219424840539, 0.13757604084736365, 0.0010752557204488846, + 0.50306739996620357, 0.33910165263362857, 0.0011085772368644620, 0.28176064224421343, + 0.12716751914398195, 0.0009566475323783357, 0.43315612917201574, 0.26931207404135125, + 0.0010806632507173907, 0.62561673585808142, 0.14197864526019183, 0.0011267971311962946, + 0.37983952168591567, 0.06709284600738255, 0.0010225687153580612, 0.55175054214235205, + 0.07057738183256172, 0.0011089602677131075, 0.60296191561591869, 0.27838884778821546, + 0.0011227906534357658, 0.35896063295890958, 0.19795789389174069, 0.0010324018471174598, + 0.53486664381354765, 0.20873070611032740, 0.0011072493822838539, 0.56749975460743735, + 0.40551221378728359, 0.0011217800485199721, + }}, + {59, + 1202, + {1, 1, 1, 13, 4, 16}, { - 0.00006000000000000, 0.00000000000000000, 0.0001105189233267572, - 0.57735026918962576, 0.57735026918962576, 0.0009133159786443561, - 0.70710678118654752, 0.00000000000000000, 0.0009205232738090741, - 0.03712636449657089, 0.03712636449657089, 0.0003690421898017899, - 0.09140060412262223, 0.09140060412262223, 0.0005603990928680660, - 0.15310778524699062, 0.15310778524699062, 0.0006865297629282609, - 0.21809288916606116, 0.21809288916606116, 0.0007720338551145630, - 0.28398745322001746, 0.28398745322001746, 0.0008301545958894795, - 0.34911776009637644, 0.34911776009637644, 0.0008686692550179628, - 0.41214314614443092, 0.41214314614443092, 0.0008927076285846890, - 0.47189936271491266, 0.47189936271491266, 0.0009060820238568219, - 0.52731454528423366, 0.52731454528423366, 0.0009119777254940867, - 0.62094753324440192, 0.47838093807695216, 0.0009128720138604181, - 0.65697227118572905, 0.36983086645942597, 0.0009130714935691735, - 0.68417883090701434, 0.25258395570071777, 0.0009152873784554116, - 0.70126043301236308, 0.12832618665972300, 0.0009187436274321654, - 0.10723822154781661, 0.00000000000000000, 0.0005176977312965694, - 0.25820689594969680, 0.00006000000000000, 0.0007331143682101417, - 0.41727529553067168, 0.00000000000000000, 0.0008463232836379928, - 0.57003669117925033, 0.00000000000000000, 0.0009031122694253992, - 0.17717740226153253, 0.05210639477011284, 0.0006485778453163257, - 0.24757164634262876, 0.11156409571564867, 0.0007435030910982369, - 0.31736152466119767, 0.17465516775786261, 0.0008101731497468018, - 0.38542911506692237, 0.23902784793817240, 0.0008556299257311812, - 0.45074225931570644, 0.30294669735289819, 0.0008850282341265444, - 0.51235184864198708, 0.36498322605976536, 0.0009022692938426915, - 0.56937024984684411, 0.42386447815223403, 0.0009105760258970126, - 0.33546162890664885, 0.05905888853235508, 0.0007998527891839054, - 0.40902684270853572, 0.12172350510959870, 0.0008483389574594331, - 0.47853206759224352, 0.18575051945473351, 0.0008811048182425720, - 0.54343035696939004, 0.24941121623622365, 0.0009010091677105086, - 0.60311616930963100, 0.31122759471496082, 0.0009107813579482705, - 0.49322211848512846, 0.06266250624154169, 0.0008803208679738260, - 0.56321230207620997, 0.12677748006842827, 0.0009021342299040653, - 0.62698055090243917, 0.19060182227792370, 0.0009131578003189435, - 0.63942796347491023, 0.06424549224220589, 0.0009158016174693465, - } - } -}; // end of the definition of "delley_table" + 0.00006000000000000, 0.00000000000000000, 0.0001105189233267572, 0.57735026918962576, + 0.57735026918962576, 0.0009133159786443561, 0.70710678118654752, 0.00000000000000000, + 0.0009205232738090741, 0.03712636449657089, 0.03712636449657089, 0.0003690421898017899, + 0.09140060412262223, 0.09140060412262223, 0.0005603990928680660, 0.15310778524699062, + 0.15310778524699062, 0.0006865297629282609, 0.21809288916606116, 0.21809288916606116, + 0.0007720338551145630, 0.28398745322001746, 0.28398745322001746, 0.0008301545958894795, + 0.34911776009637644, 0.34911776009637644, 0.0008686692550179628, 0.41214314614443092, + 0.41214314614443092, 0.0008927076285846890, 0.47189936271491266, 0.47189936271491266, + 0.0009060820238568219, 0.52731454528423366, 0.52731454528423366, 0.0009119777254940867, + 0.62094753324440192, 0.47838093807695216, 0.0009128720138604181, 0.65697227118572905, + 0.36983086645942597, 0.0009130714935691735, 0.68417883090701434, 0.25258395570071777, + 0.0009152873784554116, 0.70126043301236308, 0.12832618665972300, 0.0009187436274321654, + 0.10723822154781661, 0.00000000000000000, 0.0005176977312965694, 0.25820689594969680, + 0.00006000000000000, 0.0007331143682101417, 0.41727529553067168, 0.00000000000000000, + 0.0008463232836379928, 0.57003669117925033, 0.00000000000000000, 0.0009031122694253992, + 0.17717740226153253, 0.05210639477011284, 0.0006485778453163257, 0.24757164634262876, + 0.11156409571564867, 0.0007435030910982369, 0.31736152466119767, 0.17465516775786261, + 0.0008101731497468018, 0.38542911506692237, 0.23902784793817240, 0.0008556299257311812, + 0.45074225931570644, 0.30294669735289819, 0.0008850282341265444, 0.51235184864198708, + 0.36498322605976536, 0.0009022692938426915, 0.56937024984684411, 0.42386447815223403, + 0.0009105760258970126, 0.33546162890664885, 0.05905888853235508, 0.0007998527891839054, + 0.40902684270853572, 0.12172350510959870, 0.0008483389574594331, 0.47853206759224352, + 0.18575051945473351, 0.0008811048182425720, 0.54343035696939004, 0.24941121623622365, + 0.0009010091677105086, 0.60311616930963100, 0.31122759471496082, 0.0009107813579482705, + 0.49322211848512846, 0.06266250624154169, 0.0008803208679738260, 0.56321230207620997, + 0.12677748006842827, 0.0009021342299040653, 0.62698055090243917, 0.19060182227792370, + 0.0009131578003189435, 0.63942796347491023, 0.06424549224220589, 0.0009158016174693465, + }}}; // end of the definition of "delley_table" // size of each group of points with octahedral symmetry // 6: (1, 0, 0) x sign x permutation (vertices) @@ -221,157 +198,190 @@ const std::vector delley_table = { // 48: (u, v, sqrt(1-u^2-v^2)) x sign x permutation const int group_size[] = {6, 8, 12, 24, 24, 48}; -using Fill_t = std::function; +using Fill_t = std::function; // functors that fill the grid group-wise const std::vector fill = { // (1, 0, 0) x sign x permutation (vertices) - [](double* grid, double, double) { - for (int i = 0; i < 3; ++i) { - for (double one : {-1.0, 1.0}) { - grid[i] = one; - grid[(i+1)%3] = 0.0; - grid[(i+2)%3] = 0.0; - std::advance(grid, 3); - } - } - }, + [] (double* grid, double, double) + { + for (int i = 0; i < 3; ++i) + { + for (double one: {-1.0, 1.0}) + { + grid[i] = one; + grid[(i + 1) % 3] = 0.0; + grid[(i + 2) % 3] = 0.0; + std::advance (grid, 3); + } + } + }, // (sqrt(1/3), sqrt(1/3), sqrt(1/3)) x sign (face centers) - [](double* grid, double, double) { - const double a = std::sqrt(3) / 3.0; - for (int xsign : {-1, 1}) { - for (int ysign : {-1, 1}) { - for (int zsign : {-1, 1}) { - grid[0] = xsign * a; - grid[1] = ysign * a; - grid[2] = zsign * a; - std::advance(grid, 3); + [] (double* grid, double, double) + { + const double a = std::sqrt (3) / 3.0; + for (int xsign: {-1, 1}) + { + for (int ysign: {-1, 1}) + { + for (int zsign: {-1, 1}) + { + grid[0] = xsign * a; + grid[1] = ysign * a; + grid[2] = zsign * a; + std::advance (grid, 3); + } + } } - } - } - }, + }, // (sqrt(2)/2, sqrt(2)/2, 0) x sign x permutation (edge centers) - [](double* grid, double, double) { - const double a = std::sqrt(2) / 2.0; - for (int i = 0; i < 3; ++i) { - for (int sign1 : {-1, 1}) { - for (int sign2 : {-1, 1}) { - grid[i] = 0; - grid[(i+1)%3] = sign1 * a; - grid[(i+2)%3] = sign2 * a; - std::advance(grid, 3); + [] (double* grid, double, double) + { + const double a = std::sqrt (2) / 2.0; + for (int i = 0; i < 3; ++i) + { + for (int sign1: {-1, 1}) + { + for (int sign2: {-1, 1}) + { + grid[i] = 0; + grid[(i + 1) % 3] = sign1 * a; + grid[(i + 2) % 3] = sign2 * a; + std::advance (grid, 3); + } + } } - } - } - }, - + }, + // (u, u, sqrt(1-2u^2)) x sign x permutation - [](double* grid, double x, double y) { - double u = x == y ? x : std::sqrt(1.0 - x * x - y * y); - double v = std::sqrt(1.0 - 2.0 * u * u); - for (int i = 0; i < 3; ++i) { - for (int sign1 : {-1, 1}) { - for (int sign2 : {-1, 1}) { - for (int sign3 : {-1, 1}) { - grid[i] = sign1 * u; - grid[(i+1)%3] = sign2 * u; - grid[(i+2)%3] = sign3 * v; - std::advance(grid, 3); - } + [] (double* grid, double x, double y) + { + double u = x == y ? x : std::sqrt (1.0 - x * x - y * y); + double v = std::sqrt (1.0 - 2.0 * u * u); + for (int i = 0; i < 3; ++i) + { + for (int sign1: {-1, 1}) + { + for (int sign2: {-1, 1}) + { + for (int sign3: {-1, 1}) + { + grid[i] = sign1 * u; + grid[(i + 1) % 3] = sign2 * u; + grid[(i + 2) % 3] = sign3 * v; + std::advance (grid, 3); + } + } + } } - } - } - }, + }, // (u, 0, sqrt(1-u^2)) x sign x permutation - [](double* grid, double x, double y) { - double u = x > 0 ? x : y; - double v = std::sqrt(1.0 - u * u); - for (int i0 = 0; i0 < 3; ++i0) { - for (int iu0 : {1, 2}) { - for (int sign_u : {-1, 1}) { - for (int sign_v : {-1, 1}) { - grid[i0] = 0; - grid[(i0+iu0)%3] = sign_u * u; - grid[(i0-iu0+3)%3] = sign_v * v; - std::advance(grid, 3); - } + [] (double* grid, double x, double y) + { + double u = x > 0 ? x : y; + double v = std::sqrt (1.0 - u * u); + for (int i0 = 0; i0 < 3; ++i0) + { + for (int iu0: {1, 2}) + { + for (int sign_u: {-1, 1}) + { + for (int sign_v: {-1, 1}) + { + grid[i0] = 0; + grid[(i0 + iu0) % 3] = sign_u * u; + grid[(i0 - iu0 + 3) % 3] = sign_v * v; + std::advance (grid, 3); + } + } + } } - } - } - }, + }, // (u, v, sqrt(1-u^2-v^2)) x sign x permutation - [](double* grid, double x, double y) { - double r = x; - double s = y; - double t = std::sqrt(1.0 - r * r - s * s); - for (int ir = 0; ir < 3; ++ir) { - for (int irs : {1, 2}) { - for (int sign_r : {-1, 1}) { - for (int sign_s : {-1, 1}) { - for (int sign_t : {-1, 1}) { - grid[ir] = sign_r * r; - grid[(ir+irs)%3] = sign_s * s; - grid[(ir-irs+3)%3] = sign_t * t; - std::advance(grid, 3); + [] (double* grid, double x, double y) + { + double r = x; + double s = y; + double t = std::sqrt (1.0 - r * r - s * s); + for (int ir = 0; ir < 3; ++ir) + { + for (int irs: {1, 2}) + { + for (int sign_r: {-1, 1}) + { + for (int sign_s: {-1, 1}) + { + for (int sign_t: {-1, 1}) + { + grid[ir] = sign_r * r; + grid[(ir + irs) % 3] = sign_s * s; + grid[(ir - irs + 3) % 3] = sign_t * t; + std::advance (grid, 3); + } + } + } } - } } - } - } - }, + }, }; // end of the definition of "fill" -const DelleyTable* _find_delley(int& lmax) { +const DelleyTable* + _find_delley (int& lmax) +{ // NOTE: this function assumes elements in "delley_table" are // arranged such that their members "lmax_" are in ascending order. - auto tab = std::find_if(delley_table.begin(), delley_table.end(), - [lmax](const DelleyTable& t) { return t.lmax_ >= lmax; }); - return tab == delley_table.end() ? nullptr : (lmax = tab->lmax_, &(*tab)); + auto tab = std::find_if (delley_table.begin (), + delley_table.end (), + [lmax] (const DelleyTable& t) { return t.lmax_ >= lmax; }); + return tab == delley_table.end () ? nullptr : (lmax = tab->lmax_, &(*tab)); } -void _delley(const DelleyTable* tab, double* grid, double* weight) { - assert(tab); +void + _delley (const DelleyTable* tab, double* grid, double* weight) +{ + assert (tab); const double* ptr = &tab->data_[0]; - for (int itype = 0; itype < 6; ++itype) { - int stride = group_size[itype]; - for (int i = 0; i < tab->ntype_[itype]; ++i, ptr += 3, - grid += 3*stride, weight += stride) { - fill[itype](grid, ptr[0], ptr[1]); - std::fill(weight, weight + stride, ptr[2]); + for (int itype = 0; itype < 6; ++itype) + { + int stride = group_size[itype]; + for (int i = 0; i < tab->ntype_[itype]; ++i, ptr += 3, grid += 3 * stride, weight += stride) + { + fill[itype](grid, ptr[0], ptr[1]); + std::fill (weight, weight + stride, ptr[2]); + } } - } } } // end of anonymous namespace - -int Grid::Angular::ngrid_delley(int& lmax) { - auto tab = _find_delley(lmax); +int + Grid::Angular::ngrid_delley (int& lmax) +{ + auto tab = _find_delley (lmax); return tab ? tab->ngrid_ : -1; } - -int Grid::Angular::delley(int& lmax, double* grid, double* weight) { - auto tab = _find_delley(lmax); - return tab ? _delley(tab, grid, weight), 0 : -1; +int + Grid::Angular::delley (int& lmax, double* grid, double* weight) +{ + auto tab = _find_delley (lmax); + return tab ? _delley (tab, grid, weight), 0 : -1; } - -int Grid::Angular::delley( - int& lmax, - std::vector& grid, - std::vector& weight -) { - auto tab = _find_delley(lmax); - if (!tab) { - return -1; - } - grid.resize(3 * tab->ngrid_); - weight.resize(tab->ngrid_); - _delley(tab, grid.data(), weight.data()); +int + Grid::Angular::delley (int& lmax, std::vector& grid, std::vector& weight) +{ + auto tab = _find_delley (lmax); + if (!tab) + { + return -1; + } + grid.resize (3 * tab->ngrid_); + weight.resize (tab->ngrid_); + _delley (tab, grid.data (), weight.data ()); return 0; } diff --git a/source/source_base/module_grid/delley.h b/source/source_base/module_grid/delley.h index 25f5fd35bed..7d8acf27f53 100644 --- a/source/source_base/module_grid/delley.h +++ b/source/source_base/module_grid/delley.h @@ -3,8 +3,10 @@ #include -namespace Grid { -namespace Angular { +namespace Grid +{ +namespace Angular +{ /** * @brief Number of Delley's grid points for a certain order of accuracy. @@ -18,8 +20,7 @@ namespace Angular { * lmax will be set to 23. * */ -int ngrid_delley(int& lmax); - +int ngrid_delley (int& lmax); /** * @brief Delley's quadrature grid and weights. @@ -40,10 +41,10 @@ int ngrid_delley(int& lmax); * Delley, B. (1996). High order integration schemes on the unit sphere. * Journal of computational chemistry, 17(9), 1152-1155. */ -int delley(int& lmax, double* grid, double* weight); +int delley (int& lmax, double* grid, double* weight); // a handy wrapper doing the same as above -int delley(int& lmax, std::vector& grid, std::vector& weight); +int delley (int& lmax, std::vector& grid, std::vector& weight); } // end of namespace Angular } // end of namespace Grid diff --git a/source/source_base/module_grid/partition.cpp b/source/source_base/module_grid/partition.cpp index 9147d39b1fb..1bcf9021206 100644 --- a/source/source_base/module_grid/partition.cpp +++ b/source/source_base/module_grid/partition.cpp @@ -8,37 +8,37 @@ #include #include -namespace Grid { -namespace Partition { +namespace Grid +{ +namespace Partition +{ const double stratmann_a = 0.64; -double w_becke( - int nR0, - const double* drR, - const double* dRR, - int nR, - const int* iR, - int c -) { - assert(nR > 0 && nR0 >= nR); - std::vector P(nR, 1.0); - for (int i = 0; i < nR; ++i) { - int I = iR[i]; - for (int j = i + 1; j < nR; ++j) { - int J = iR[j]; - double mu = (drR[I] - drR[J]) / dRR[I*nR0 + J]; - double s = s_becke(mu); - P[I] *= s; - P[J] *= (1.0 - s); // s(-mu) = 1 - s(mu) +double + w_becke (int nR0, const double* drR, const double* dRR, int nR, const int* iR, int c) +{ + assert (nR > 0 && nR0 >= nR); + std::vector P (nR, 1.0); + for (int i = 0; i < nR; ++i) + { + int I = iR[i]; + for (int j = i + 1; j < nR; ++j) + { + int J = iR[j]; + double mu = (drR[I] - drR[J]) / dRR[I * nR0 + J]; + double s = s_becke (mu); + P[I] *= s; + P[J] *= (1.0 - s); // s(-mu) = 1 - s(mu) + } } - } - return P[c] / std::accumulate(P.begin(), P.end(), 0.0); + return P[c] / std::accumulate (P.begin (), P.end (), 0.0); } - -double s_becke(double mu) { - /* +double + s_becke (double mu) +{ + /* * Becke's iterated polynomials (3rd order) * * s(mu) = 0.5 * (1 - p(p(p(mu)))) @@ -46,32 +46,27 @@ double s_becke(double mu) { * p(x) = 0.5 * x * (3 - x^2) * */ - double p = 0.5 * mu * (3.0 - mu*mu); - p = 0.5 * p * (3.0 - p*p); - p = 0.5 * p * (3.0 - p*p); + double p = 0.5 * mu * (3.0 - mu * mu); + p = 0.5 * p * (3.0 - p * p); + p = 0.5 * p * (3.0 - p * p); return 0.5 * (1.0 - p); } - -double w_stratmann( - int nR0, - const double* drR, - const double* dRR, - const double* drR_thr, - int nR, - int* iR, - int c -) { - assert(nR > 0 && nR0 >= nR); +double + w_stratmann (int nR0, const double* drR, const double* dRR, const double* drR_thr, int nR, int* iR, int c) +{ + assert (nR > 0 && nR0 >= nR); int I = iR[c], J = 0; // If r falls within the exclusive zone of a center, return immediately. - for (int j = 0; j < nR; ++j) { - J = iR[j]; - if (drR[J] <= drR_thr[J]) { - return static_cast(I == J); + for (int j = 0; j < nR; ++j) + { + J = iR[j]; + if (drR[J] <= drR_thr[J]) + { + return static_cast (I == J); + } } - } // Even if the grid point does not fall within the exclusive zone of any // center, the normalized weight could still be 0 or 1, and this can be @@ -79,43 +74,47 @@ double w_stratmann( // Swap the grid center to the first position in iteration for convenience. // Restore the original order before return. - std::swap(iR[0], iR[c]); - - std::vector P(nR); - for (int j = 1; j < nR; ++j) { - J = iR[j]; - double mu = (drR[I] - drR[J]) / dRR[I*nR0 + J]; - P[j] = s_stratmann(mu); - } - P[0] = std::accumulate(P.begin() + 1, P.end(), 1.0, - std::multiplies()); - - if (P[0] == 0.0 || P[0] == 1.0) { - std::swap(iR[0], iR[c]); // restore the original order - return P[0]; - } + std::swap (iR[0], iR[c]); + + std::vector P (nR); + for (int j = 1; j < nR; ++j) + { + J = iR[j]; + double mu = (drR[I] - drR[J]) / dRR[I * nR0 + J]; + P[j] = s_stratmann (mu); + } + P[0] = std::accumulate (P.begin () + 1, P.end (), 1.0, std::multiplies ()); + + if (P[0] == 0.0 || P[0] == 1.0) + { + std::swap (iR[0], iR[c]); // restore the original order + return P[0]; + } // If it passes all the screening above, all unnormalized weights // have to be calculated in order to get the normalized weight. - std::for_each(P.begin() + 1, P.end(), [](double& s) { s = 1.0 - s; }); - for (int i = 1; i < nR; ++i) { - I = iR[i]; - for (int j = i + 1; j < nR; ++j) { - J = iR[j]; - double mu = (drR[I] - drR[J]) / dRR[I*nR0 + J]; - double s = s_stratmann(mu); - P[i] *= s; - P[j] *= (1.0 - s); // s(-mu) = 1 - s(mu) + std::for_each (P.begin () + 1, P.end (), [] (double& s) { s = 1.0 - s; }); + for (int i = 1; i < nR; ++i) + { + I = iR[i]; + for (int j = i + 1; j < nR; ++j) + { + J = iR[j]; + double mu = (drR[I] - drR[J]) / dRR[I * nR0 + J]; + double s = s_stratmann (mu); + P[i] *= s; + P[j] *= (1.0 - s); // s(-mu) = 1 - s(mu) + } } - } - std::swap(iR[0], iR[c]); // restore the original order - return P[0] / std::accumulate(P.begin(), P.end(), 0.0); + std::swap (iR[0], iR[c]); // restore the original order + return P[0] / std::accumulate (P.begin (), P.end (), 0.0); } - -double s_stratmann(double mu) { +double + s_stratmann (double mu) +{ /* * Stratmann's piecewise cell function * @@ -132,11 +131,10 @@ double s_stratmann(double mu) { double x2 = x * x; double h = 0.0625 * x * (35 + x2 * (-35 + x2 * (21 - 5 * x2))); - bool mid = std::abs(x) < 1; - double g = !mid * (1 - 2 * std::signbit(x)) + mid * h; + bool mid = std::abs (x) < 1; + double g = !mid * (1 - 2 * std::signbit (x)) + mid * h; return 0.5 * (1.0 - g); } - } // end of namespace Partition } // end of namespace Grid diff --git a/source/source_base/module_grid/partition.h b/source/source_base/module_grid/partition.h index cc4879e3a0d..6066256affa 100644 --- a/source/source_base/module_grid/partition.h +++ b/source/source_base/module_grid/partition.h @@ -1,10 +1,13 @@ #ifndef GRID_PARTITION_H #define GRID_PARTITION_H -namespace Grid { -namespace Partition { +namespace Grid +{ +namespace Partition +{ -enum class Type { +enum class Type +{ Becke, Stratmann, }; @@ -35,18 +38,10 @@ extern const double stratmann_a; * The Journal of chemical physics, 88(4), 2547-2553. * */ -double w_becke( - int nR0, - const double* drR, - const double* dRR, - int nR, - const int* iR, - int c -); +double w_becke (int nR0, const double* drR, const double* dRR, int nR, const int* iR, int c); // Becke's cell function (iterated polynomial) -double s_becke(double mu); - +double s_becke (double mu); /** * @brief Becke's partition weight with Stratmann's scheme. @@ -65,18 +60,10 @@ double s_becke(double mu); * Chemical physics letters, 257(3-4), 213-223. * */ -double w_stratmann( - int nR0, - const double* drR, - const double* dRR, - const double* drR_thr, - int nR, - int* iR, - int c -); +double w_stratmann (int nR0, const double* drR, const double* dRR, const double* drR_thr, int nR, int* iR, int c); // Stratmann's piecewise cell function -double s_stratmann(double mu); +double s_stratmann (double mu); } // end of namespace Partition } // end of namespace Grid diff --git a/source/source_base/module_grid/radial.cpp b/source/source_base/module_grid/radial.cpp index 943e61e9bb7..35a49a3330b 100644 --- a/source/source_base/module_grid/radial.cpp +++ b/source/source_base/module_grid/radial.cpp @@ -2,63 +2,71 @@ #include -namespace Grid { -namespace Radial { +namespace Grid +{ +namespace Radial +{ -void baker(int nbase, double R, double* r, double* w, int mult) { - int n = (nbase+1) * mult - 1; - double r0 = -R / std::log((2.0 * nbase + 1.0) / ((nbase+1)*(nbase+1))); - for (int i = 1; i <= n; ++i) { - r[i-1] = -r0 * std::log(1.0 - static_cast(i)*i/((n+1)*(n+1))); - w[i-1] = 2.0 * i * r0 * r[i-1] * r[i-1] / ((n+1+i)*(n+1-i)); - } +void + baker (int nbase, double R, double* r, double* w, int mult) +{ + int n = (nbase + 1) * mult - 1; + double r0 = -R / std::log ((2.0 * nbase + 1.0) / ((nbase + 1) * (nbase + 1))); + for (int i = 1; i <= n; ++i) + { + r[i - 1] = -r0 * std::log (1.0 - static_cast (i) * i / ((n + 1) * (n + 1))); + w[i - 1] = 2.0 * i * r0 * r[i - 1] * r[i - 1] / ((n + 1 + i) * (n + 1 - i)); + } } - -void baker(int nbase, double R, std::vector& r, - std::vector& w, int mult) { - int n = (nbase+1) * mult - 1; - r.resize(n); - w.resize(n); - baker(nbase, R, r.data(), w.data(), mult); +void + baker (int nbase, double R, std::vector& r, std::vector& w, int mult) +{ + int n = (nbase + 1) * mult - 1; + r.resize (n); + w.resize (n); + baker (nbase, R, r.data (), w.data (), mult); } - -void murray(int n, double R, double* r, double* w) { - for (int i = 1; i <= n; ++i) { - double x = static_cast(i) / (n + 1); - r[i-1] = std::pow(x / (1.0 - x), 2) * R; - w[i-1] = 2.0 / (n + 1) * std::pow(R, 3) * std::pow(x, 5) - / std::pow(1.0 - x, 7); - } +void + murray (int n, double R, double* r, double* w) +{ + for (int i = 1; i <= n; ++i) + { + double x = static_cast (i) / (n + 1); + r[i - 1] = std::pow (x / (1.0 - x), 2) * R; + w[i - 1] = 2.0 / (n + 1) * std::pow (R, 3) * std::pow (x, 5) / std::pow (1.0 - x, 7); + } } +void + treutler_m4 (int n, double R, double* r, double* w, double alpha) +{ + const double pi = std::acos (-1.0); + const double inv_ln2 = 1.0 / std::log (2.0); -void treutler_m4(int n, double R, double* r, double* w, double alpha) { - const double pi = std::acos(-1.0); - const double inv_ln2 = 1.0 / std::log(2.0); - - for (int i = 1; i <= n; ++i) { - double x = std::cos(i * pi / (n + 1)); - double beta = std::sqrt((1.0 + x) / (1.0 - x)); - double gamma = std::log((1.0 - x) / 2.0); - double delta = std::pow(1.0 + x, alpha); - r[i-1] = -R * inv_ln2 * delta * gamma; - w[i-1] = pi / (n + 1) * std::pow(delta * R * inv_ln2, 3) - * gamma * gamma * (beta - alpha / beta * gamma); - } + for (int i = 1; i <= n; ++i) + { + double x = std::cos (i * pi / (n + 1)); + double beta = std::sqrt ((1.0 + x) / (1.0 - x)); + double gamma = std::log ((1.0 - x) / 2.0); + double delta = std::pow (1.0 + x, alpha); + r[i - 1] = -R * inv_ln2 * delta * gamma; + w[i - 1] = pi / (n + 1) * std::pow (delta * R * inv_ln2, 3) * gamma * gamma * (beta - alpha / beta * gamma); + } } - -void mura(int n, double R, double* r, double* w) { - for (int i = 1; i <= n; ++i) { - double x = static_cast(i) / (n + 1); - double alpha = 1.0 - x * x * x; - r[i-1] = -R * std::log(alpha); - w[i-1] = 3.0 * R * std::pow(x * r[i-1], 2) / ((n+1) * alpha); - } +void + mura (int n, double R, double* r, double* w) +{ + for (int i = 1; i <= n; ++i) + { + double x = static_cast (i) / (n + 1); + double alpha = 1.0 - x * x * x; + r[i - 1] = -R * std::log (alpha); + w[i - 1] = 3.0 * R * std::pow (x * r[i - 1], 2) / ((n + 1) * alpha); + } } - } // end of namespace Radial } // end of namespace Grid diff --git a/source/source_base/module_grid/radial.h b/source/source_base/module_grid/radial.h index b8378f58340..e235386ebe1 100644 --- a/source/source_base/module_grid/radial.h +++ b/source/source_base/module_grid/radial.h @@ -3,8 +3,10 @@ #include -namespace Grid { -namespace Radial { +namespace Grid +{ +namespace Radial +{ /** * @brief Radial quadratures. @@ -33,18 +35,15 @@ namespace Radial { * before applying the "radial multiplier" introduced by Zhang et al. * The true number of grid points is (nbase+1) * mult - 1. */ -void baker(int nbase, double R, double* r, double* w, int mult = 1); -void baker(int nbase, double R, std::vector& r, - std::vector& w, int mult = 1); - +void baker (int nbase, double R, double* r, double* w, int mult = 1); +void baker (int nbase, double R, std::vector& r, std::vector& w, int mult = 1); /** * Murray, C. W., Handy, N. C., & Laming, G. J. (1993). * Quadrature schemes for integrals of density functional theory. * Molecular Physics, 78(4), 997-1014. */ -void murray(int n, double R, double* r, double* w); - +void murray (int n, double R, double* r, double* w); /** * Treutler, O., & Ahlrichs, R. (1995). @@ -53,8 +52,7 @@ void murray(int n, double R, double* r, double* w); * * @note M4 reduces to M3 at alpha = 0. */ -void treutler_m4(int n, double R, double* r, double* w, double alpha = 0.6); - +void treutler_m4 (int n, double R, double* r, double* w, double alpha = 0.6); /** * Mura, M. E., & Knowles, P. J. (1996). @@ -62,7 +60,7 @@ void treutler_m4(int n, double R, double* r, double* w, double alpha = 0.6); * density‐functional calculations. * The Journal of chemical physics, 104(24), 9848-9858. */ -void mura(int n, double R, double* r, double* w); +void mura (int n, double R, double* r, double* w); } // end of namespace Radial } // end of namespace Grid diff --git a/source/source_base/module_grid/test/test_batch.cpp b/source/source_base/module_grid/test/test_batch.cpp index 8f727602c77..975d6aa3184 100644 --- a/source/source_base/module_grid/test/test_batch.cpp +++ b/source/source_base/module_grid/test/test_batch.cpp @@ -3,7 +3,7 @@ #include "gtest/gtest.h" #include #include -//#include +// #include #ifdef __MPI #include @@ -11,11 +11,9 @@ using namespace Grid::Batch; - -class BatchTest: public ::testing::Test +class BatchTest : public ::testing::Test { -protected: - + protected: std::vector grid_; std::vector idx_; @@ -33,7 +31,6 @@ class BatchTest: public ::testing::Test // plane are equivalent in terms of the maxmin optimization problem. // This means eigenvectors are arbitrary in this case. - // parameters for a random cluster const int n_grid_rand_ = 1000; const int n_batch_rand_ = 200; @@ -43,191 +40,191 @@ class BatchTest: public ::testing::Test const double zc_ = 2.0; }; - -void gen_random( - int ngrid, - double xc, - double yc, - double zc, - double width, - std::vector& grid, - std::vector& idx -) { +void + gen_random (int ngrid, + double xc, + double yc, + double zc, + double width, + std::vector& grid, + std::vector& idx) +{ // Generates a set of points centered around (xc, yc, zc). std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(-width, width); - - grid.resize(3 * ngrid); - for (int i = 0; i < ngrid; ++i) { - grid[3*i ] = xc + dis(gen); - grid[3*i + 1] = yc + dis(gen); - grid[3*i + 2] = zc + dis(gen); - } - - idx.resize(ngrid); - std::iota(idx.begin(), idx.end(), 0); - std::shuffle(idx.begin(), idx.end(), gen); -} + std::mt19937 gen (rd ()); + std::uniform_real_distribution dis (-width, width); + + grid.resize (3 * ngrid); + for (int i = 0; i < ngrid; ++i) + { + grid[3 * i] = xc + dis (gen); + grid[3 * i + 1] = yc + dis (gen); + grid[3 * i + 2] = zc + dis (gen); + } + idx.resize (ngrid); + std::iota (idx.begin (), idx.end (), 0); + std::shuffle (idx.begin (), idx.end (), gen); +} -void gen_octant( - int n_each, - double offset_x, - double offset_y, - double offset_z, - double width, - std::vector& grid, - std::vector& idx -) { +void + gen_octant (int n_each, + double offset_x, + double offset_y, + double offset_z, + double width, + std::vector& grid, + std::vector& idx) +{ // Generates a set of points consisting of 8 well-separated, equal-sized // clusters located in individual octants. std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(-width, width); + std::mt19937 gen (rd ()); + std::uniform_real_distribution dis (-width, width); int ngrid = 8 * n_each; - grid.resize(3 * ngrid); + grid.resize (3 * ngrid); int I = 0; - for (int sign_x : {-1, 1}) { - for (int sign_y : {-1, 1}) { - for (int sign_z : {-1, 1}) { - for (int i = 0; i < n_each; ++i, ++I) { - grid[3*I ] = sign_x * offset_x + dis(gen); - grid[3*I + 1] = sign_y * offset_y + dis(gen); - grid[3*I + 2] = sign_z * offset_z + dis(gen); + for (int sign_x: {-1, 1}) + { + for (int sign_y: {-1, 1}) + { + for (int sign_z: {-1, 1}) + { + for (int i = 0; i < n_each; ++i, ++I) + { + grid[3 * I] = sign_x * offset_x + dis (gen); + grid[3 * I + 1] = sign_y * offset_y + dis (gen); + grid[3 * I + 2] = sign_z * offset_z + dis (gen); + } + } } - } } - } - idx.resize(ngrid); - std::iota(idx.begin(), idx.end(), 0); - std::shuffle(idx.begin(), idx.end(), gen); + idx.resize (ngrid); + std::iota (idx.begin (), idx.end (), 0); + std::shuffle (idx.begin (), idx.end (), gen); } - -bool is_same_octant(int ngrid, const double* grid) { - if (ngrid == 0) { - return true; - } +bool + is_same_octant (int ngrid, const double* grid) +{ + if (ngrid == 0) + { + return true; + } const bool is_positive_x = grid[0] > 0; const bool is_positive_y = grid[1] > 0; const bool is_positive_z = grid[2] > 0; const double* end = grid + 3 * ngrid; - for (; grid != end; grid += 3) { - if ( is_positive_x != (grid[0] > 0) || - is_positive_y != (grid[1] > 0) || - is_positive_z != (grid[2] > 0) ) { - return false; + for (; grid != end; grid += 3) + { + if (is_positive_x != (grid[0] > 0) || is_positive_y != (grid[1] > 0) || is_positive_z != (grid[2] > 0)) + { + return false; + } } - } return true; } - -bool good_batch_size( - const std::vector& idx, - const std::vector& delim, - int n_batch_thr -) { +bool + good_batch_size (const std::vector& idx, const std::vector& delim, int n_batch_thr) +{ // checks if the sizes of batches are within the specified limit bool flag = (delim[0] == 0); size_t i = 1; - while (flag && i < delim.size()) { - int sz_batch = delim[i] - delim[i-1]; - flag = flag && (sz_batch > 0) && (sz_batch <= n_batch_thr); - ++i; - } + while (flag && i < delim.size ()) + { + int sz_batch = delim[i] - delim[i - 1]; + flag = flag && (sz_batch > 0) && (sz_batch <= n_batch_thr); + ++i; + } - return flag && ( ((int)idx.size() - delim.back()) < n_batch_thr ); + return flag && (((int)idx.size () - delim.back ()) < n_batch_thr); } - -TEST_F(BatchTest, MaxMinRandom) +TEST_F (BatchTest, MaxMinRandom) { // This test verifies that the sizes of batches produced by maxmin // do not exceed the specified limit. - gen_random(n_grid_rand_, xc_, yc_, zc_, width_rand_, grid_, idx_); + gen_random (n_grid_rand_, xc_, yc_, zc_, width_rand_, grid_, idx_); - std::vector delim = - maxmin(grid_.data(), idx_.data(), idx_.size(), n_batch_rand_); + std::vector delim = maxmin (grid_.data (), idx_.data (), idx_.size (), n_batch_rand_); - EXPECT_TRUE(good_batch_size(idx_, delim, n_batch_rand_)); + EXPECT_TRUE (good_batch_size (idx_, delim, n_batch_rand_)); // write grid, idx & delim to file - //FILE* fp = fopen("grid.dat", "w"); - //for (size_t i = 0; i < grid_.size() / 3; ++i) { + // FILE* fp = fopen("grid.dat", "w"); + // for (size_t i = 0; i < grid_.size() / 3; ++i) { // std::fprintf(fp, "% 12.6f % 12.6f % 12.6f\n", // grid_[3*i], grid_[3*i + 1], grid_[3*i + 2]); //} - //fclose(fp); - - //fp = fopen("idx.dat", "w"); - //for (size_t i = 0; i < idx_.size(); ++i) { - // std::fprintf(fp, "%d\n", idx_[i]); - //} - //fclose(fp); - - //fp = fopen("delim.dat", "w"); - //for (size_t i = 0; i < delim.size(); ++i) { - // std::fprintf(fp, "%d\n", delim[i]); - //} - //fclose(fp); + // fclose(fp); + + // fp = fopen("idx.dat", "w"); + // for (size_t i = 0; i < idx_.size(); ++i) { + // std::fprintf(fp, "%d\n", idx_[i]); + // } + // fclose(fp); + + // fp = fopen("delim.dat", "w"); + // for (size_t i = 0; i < delim.size(); ++i) { + // std::fprintf(fp, "%d\n", delim[i]); + // } + // fclose(fp); } - -TEST_F(BatchTest, MaxMinOctant) +TEST_F (BatchTest, MaxMinOctant) { // This test applies maxmin to a set of points consisting of 8 // well-separated, equal-sized clusters located in individual octants. // The resulting batches should be able to recover this structure. - gen_octant(n_batch_oct_, offset_x_, offset_y_, offset_z_, width_oct_, - grid_, idx_); + gen_octant (n_batch_oct_, offset_x_, offset_y_, offset_z_, width_oct_, grid_, idx_); - std::vector delim = - maxmin(grid_.data(), idx_.data(), idx_.size(), n_batch_oct_); + std::vector delim = maxmin (grid_.data (), idx_.data (), idx_.size (), n_batch_oct_); - EXPECT_EQ(delim.size(), 8); + EXPECT_EQ (delim.size (), 8); - std::vector grid_batch(3 * n_batch_oct_); - for (int i = 0; i < 8; ++i) { + std::vector grid_batch (3 * n_batch_oct_); + for (int i = 0; i < 8; ++i) + { - EXPECT_EQ(delim[i], i * n_batch_oct_); + EXPECT_EQ (delim[i], i * n_batch_oct_); - // collect points within the present batch - for (int j = 0; j < n_batch_oct_; ++j) { - int ig = idx_[delim[i] + j]; - grid_batch[3*j ] = grid_[3*ig ]; - grid_batch[3*j + 1] = grid_[3*ig + 1]; - grid_batch[3*j + 2] = grid_[3*ig + 2]; - } + // collect points within the present batch + for (int j = 0; j < n_batch_oct_; ++j) + { + int ig = idx_[delim[i] + j]; + grid_batch[3 * j] = grid_[3 * ig]; + grid_batch[3 * j + 1] = grid_[3 * ig + 1]; + grid_batch[3 * j + 2] = grid_[3 * ig + 2]; + } - // verify that points in a batch reside in the same octant - EXPECT_TRUE(is_same_octant(n_batch_oct_, grid_batch.data())); - } + // verify that points in a batch reside in the same octant + EXPECT_TRUE (is_same_octant (n_batch_oct_, grid_batch.data ())); + } } - -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_base/module_grid/test/test_delley.cpp b/source/source_base/module_grid/test/test_delley.cpp index 8de247a5335..6abff75cbe7 100644 --- a/source/source_base/module_grid/test/test_delley.cpp +++ b/source/source_base/module_grid/test/test_delley.cpp @@ -10,71 +10,80 @@ using namespace Grid::Angular; // mock the function to prevent unnecessary dependency -namespace ModuleBase { -void WARNING_QUIT(const std::string&, const std::string&) {} +namespace ModuleBase +{ +void + WARNING_QUIT (const std::string&, const std::string&) +{ } +} // namespace ModuleBase -class DelleyTest: public ::testing::Test { -protected: - void randgen(int lmax, std::vector& coef); +class DelleyTest : public ::testing::Test +{ + protected: + void randgen (int lmax, std::vector& coef); const double tol = 1e-12; }; - -void DelleyTest::randgen(int lmax, std::vector& coef) { - coef.resize((lmax + 1) * (lmax + 1)); +void + DelleyTest::randgen (int lmax, std::vector& coef) +{ + coef.resize ((lmax + 1) * (lmax + 1)); // fill coef with uniformly distributed random numbers std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(0.0, 1.0); - for (size_t i = 0; i < coef.size(); ++i) { - coef[i] = dis(gen); - } + std::mt19937 gen (rd ()); + std::uniform_real_distribution dis (0.0, 1.0); + for (size_t i = 0; i < coef.size (); ++i) + { + coef[i] = dis (gen); + } // normalize the coefficients double fac = 0.0; - for (size_t i = 0; i < coef.size(); ++i) { - fac += coef[i] * coef[i]; - } - - fac = 1.0 / std::sqrt(fac); - for (size_t i = 0; i < coef.size(); ++i) { - coef[i] *= fac; - } -} + for (size_t i = 0; i < coef.size (); ++i) + { + fac += coef[i] * coef[i]; + } + fac = 1.0 / std::sqrt (fac); + for (size_t i = 0; i < coef.size (); ++i) + { + coef[i] *= fac; + } +} -TEST_F(DelleyTest, NumGrid) { +TEST_F (DelleyTest, NumGrid) +{ int lmax = 5; - int ngrid = ngrid_delley(lmax); - EXPECT_EQ(lmax, 17); - EXPECT_EQ(ngrid, 110); + int ngrid = ngrid_delley (lmax); + EXPECT_EQ (lmax, 17); + EXPECT_EQ (ngrid, 110); lmax = 17; - ngrid = ngrid_delley(lmax); - EXPECT_EQ(lmax, 17); - EXPECT_EQ(ngrid, 110); + ngrid = ngrid_delley (lmax); + EXPECT_EQ (lmax, 17); + EXPECT_EQ (ngrid, 110); lmax = 20; - ngrid = ngrid_delley(lmax); - EXPECT_EQ(lmax, 23); - EXPECT_EQ(ngrid, 194); + ngrid = ngrid_delley (lmax); + EXPECT_EQ (lmax, 23); + EXPECT_EQ (ngrid, 194); lmax = 59; - ngrid = ngrid_delley(lmax); - EXPECT_EQ(lmax, 59); - EXPECT_EQ(ngrid, 1202); + ngrid = ngrid_delley (lmax); + EXPECT_EQ (lmax, 59); + EXPECT_EQ (ngrid, 1202); lmax = 60; - ngrid = ngrid_delley(lmax); - EXPECT_EQ(lmax, 60); - EXPECT_EQ(ngrid, -1); + ngrid = ngrid_delley (lmax); + EXPECT_EQ (lmax, 60); + EXPECT_EQ (ngrid, -1); } - -TEST_F(DelleyTest, Accuracy) { - /* +TEST_F (DelleyTest, Accuracy) +{ + /* * Given * * f = c[0]*Y00 + c[1]*Y10 + c[2]*Y11 + ..., @@ -90,48 +99,51 @@ TEST_F(DelleyTest, Accuracy) { */ std::vector grid, weight, coef; - for (int grid_lmax = 17; grid_lmax < 60; grid_lmax +=6) { - delley(grid_lmax, grid, weight); - int func_lmax = grid_lmax / 2; - randgen(func_lmax, coef); - - double val = 0.0; - std::vector ylm_real; - for (size_t i = 0; i < weight.size(); i++) { - ModuleBase::Ylm::sph_harm(func_lmax, - grid[3*i], grid[3*i+1], grid[3*i+2], ylm_real); - double tmp = 0.0; - for (size_t j = 0; j < coef.size(); ++j) { - tmp += coef[j] * ylm_real[j]; - } - val += weight[i] * tmp * tmp; + for (int grid_lmax = 17; grid_lmax < 60; grid_lmax += 6) + { + delley (grid_lmax, grid, weight); + int func_lmax = grid_lmax / 2; + randgen (func_lmax, coef); + + double val = 0.0; + std::vector ylm_real; + for (size_t i = 0; i < weight.size (); i++) + { + ModuleBase::Ylm::sph_harm (func_lmax, grid[3 * i], grid[3 * i + 1], grid[3 * i + 2], ylm_real); + double tmp = 0.0; + for (size_t j = 0; j < coef.size (); ++j) + { + tmp += coef[j] * ylm_real[j]; + } + val += weight[i] * tmp * tmp; + } + val *= 4.0 * std::acos (-1.0); + + double val_ref = 0.0; + for (size_t i = 0; i < coef.size (); ++i) + { + val_ref += coef[i] * coef[i]; + } + + double abs_diff = std::abs (val - val_ref); + EXPECT_LT (abs_diff, tol); + // printf("order = %2i val_ref = %8.5f abs_diff = %8.5e\n", + // grid_lmax, val_ref, abs_diff); } - val *= 4.0 * std::acos(-1.0); - - double val_ref = 0.0; - for (size_t i = 0; i < coef.size(); ++i) { - val_ref += coef[i] * coef[i]; - } - - double abs_diff = std::abs(val - val_ref); - EXPECT_LT(abs_diff, tol); - //printf("order = %2i val_ref = %8.5f abs_diff = %8.5e\n", - // grid_lmax, val_ref, abs_diff); - } } - -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_base/module_grid/test/test_partition.cpp b/source/source_base/module_grid/test/test_partition.cpp index 0615dc44637..29f00eeafbf 100644 --- a/source/source_base/module_grid/test/test_partition.cpp +++ b/source/source_base/module_grid/test/test_partition.cpp @@ -22,122 +22,131 @@ using iclock = std::chrono::high_resolution_clock; iclock::time_point start; std::chrono::duration dur; -double norm(const Vec3& v) { - return std::sqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]); +double + norm (const Vec3& v) +{ + return std::sqrt (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); } -Vec3 operator-(const Vec3& v1, const Vec3& v2) { +Vec3 + operator- (const Vec3& v1, const Vec3& v2) +{ return {v1[0] - v2[0], v1[1] - v2[1], v1[2] - v2[2]}; } -Vec3 operator+(const Vec3& v1, const Vec3& v2) { +Vec3 + operator+ (const Vec3& v1, const Vec3& v2) +{ return {v1[0] + v2[0], v1[1] + v2[1], v1[2] + v2[2]}; } // |r|^n * exp(-a*|r|^2) -double func_core(const Vec3& r, double a, double n) { - double rabs = norm(r); - return std::pow(rabs, n) * std::exp(-a * rabs * rabs); +double + func_core (const Vec3& r, double a, double n) +{ + double rabs = norm (r); + return std::pow (rabs, n) * std::exp (-a * rabs * rabs); } // func_core integrated over all space -double ref_core(double a, double n) { +double + ref_core (double a, double n) +{ double p = 0.5 * (n + 3); - return 2.0 * PI * std::pow(a, -p) * std::tgamma(p); + return 2.0 * PI * std::pow (a, -p) * std::tgamma (p); } // the test function is a combination of several func_core -double func( - const Vec3& r, - const std::vector& R, - const std::vector& a, - const std::vector& n -) { +double + func (const Vec3& r, const std::vector& R, const std::vector& a, const std::vector& n) +{ double val = 0.0; - for (size_t i = 0; i < R.size(); i++) { - val += func_core(r - R[i], a[i], n[i]); - } + for (size_t i = 0; i < R.size (); i++) + { + val += func_core (r - R[i], a[i], n[i]); + } return val; } -double ref(const std::vector& a, const std::vector& n) { +double + ref (const std::vector& a, const std::vector& n) +{ double val = 0.0; - for (size_t i = 0; i < a.size(); i++) { - val += ref_core(a[i], n[i]); - } + for (size_t i = 0; i < a.size (); i++) + { + val += ref_core (a[i], n[i]); + } return val; } // A Param object specifies a test function -struct Param { +struct Param +{ std::vector R; std::vector a; std::vector n; }; std::vector test_params = { - { - { - {0.0, 0.0, 0.0}, - {0.0, 0.0, 2.0}, - }, - {0.5, 2.0}, - {0, 0} - }, - { - { - {0.0, 0.0, 0.0}, - {0.0, 0.0, 2.0}, - {0.0, 3.0, 0.0}, - }, - {0.5, 2.0, 1.5}, - {1, 2, 0.5} - }, - { - { - {0.0, 0.0, 0.0}, - {0.0, 0.0, 3.0}, - {0.0, 3.0, 0.0}, - {9.0, 0.0, 0.0}, - }, - {1.0, 2.0, 1.5, 2.0}, - {2.5, 2, 0.5, 1} - }, - { - { - {0.0, 0.0, 0.0}, - {0.0, 0.0, 3.0}, - {0.0, 3.0, 0.0}, - {9.0, 0.0, 0.0}, - {1.0, 1.0, 1.0}, - {2.0, 2.0, 2.0}, - {3.0, 3.0, 3.0}, - {4.0, 4.0, 4.0}, - {5.0, 5.0, 5.0}, - {6.0, 6.0, 6.0}, - }, - {1.0, 2.0, 1.5, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0}, - {2.5, 2.0, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0} - }, + {{ + {0.0, 0.0, 0.0}, + {0.0, 0.0, 2.0}, + }, + {0.5, 2.0}, + {0, 0}}, + {{ + {0.0, 0.0, 0.0}, + {0.0, 0.0, 2.0}, + {0.0, 3.0, 0.0}, + }, + {0.5, 2.0, 1.5}, + {1, 2, 0.5}}, + {{ + {0.0, 0.0, 0.0}, + {0.0, 0.0, 3.0}, + {0.0, 3.0, 0.0}, + {9.0, 0.0, 0.0}, + }, + {1.0, 2.0, 1.5, 2.0}, + {2.5, 2, 0.5, 1}}, + {{ + {0.0, 0.0, 0.0}, + {0.0, 0.0, 3.0}, + {0.0, 3.0, 0.0}, + {9.0, 0.0, 0.0}, + {1.0, 1.0, 1.0}, + {2.0, 2.0, 2.0}, + {3.0, 3.0, 3.0}, + {4.0, 4.0, 4.0}, + {5.0, 5.0, 5.0}, + {6.0, 6.0, 6.0}, + }, + {1.0, 2.0, 1.5, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0}, + {2.5, 2.0, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}}, }; -std::vector dist_R_R(const std::vector& R) { +std::vector + dist_R_R (const std::vector& R) +{ // tabulate dRR[I,J] = || R[I] - R[J] || - size_t nR = R.size(); - std::vector dRR(nR*nR, 0.0); - for (size_t I = 0; I < nR; I++) { - for (size_t J = I + 1; J < nR; J++) { - double d = norm(R[I] - R[J]); - dRR[I*nR + J] = d; - dRR[J*nR + I] = d; + size_t nR = R.size (); + std::vector dRR (nR * nR, 0.0); + for (size_t I = 0; I < nR; I++) + { + for (size_t J = I + 1; J < nR; J++) + { + double d = norm (R[I] - R[J]); + dRR[I * nR + J] = d; + dRR[J * nR + I] = d; + } } - } return dRR; } -class PartitionTest: public ::testing::Test { -protected: - PartitionTest(); +class PartitionTest : public ::testing::Test +{ + protected: + PartitionTest (); // grid & weight for one-center integration std::vector r; @@ -146,149 +155,168 @@ class PartitionTest: public ::testing::Test { const double tol = 1e-5; }; -PartitionTest::PartitionTest() { +PartitionTest::PartitionTest () +{ // angular grid & weight std::vector r_ang, w_ang; int lmax = 25; - Grid::Angular::delley(lmax, r_ang, w_ang); + Grid::Angular::delley (lmax, r_ang, w_ang); // radial grid & weight std::vector r_rad, w_rad; int nrad = 60; int Rcut = 7.0; int mult = 2; - Grid::Radial::baker(nrad, Rcut, r_rad, w_rad, mult); + Grid::Radial::baker (nrad, Rcut, r_rad, w_rad, mult); // complete grid & weight for one-center integration - size_t ngrid = w_rad.size() * w_ang.size(); - r.resize(3*ngrid); - w.resize(ngrid); + size_t ngrid = w_rad.size () * w_ang.size (); + r.resize (3 * ngrid); + w.resize (ngrid); size_t ir = 0; - for (size_t i = 0; i < w_rad.size(); i++) { - for (size_t j = 0; j < w_ang.size(); j++) { - r[3*ir] = r_rad[i] * r_ang[3*j]; - r[3*ir+1] = r_rad[i] * r_ang[3*j+1]; - r[3*ir+2] = r_rad[i] * r_ang[3*j+2]; - w[ir] = w_rad[i] * w_ang[j] * 4.0 * PI; - ++ir; + for (size_t i = 0; i < w_rad.size (); i++) + { + for (size_t j = 0; j < w_ang.size (); j++) + { + r[3 * ir] = r_rad[i] * r_ang[3 * j]; + r[3 * ir + 1] = r_rad[i] * r_ang[3 * j + 1]; + r[3 * ir + 2] = r_rad[i] * r_ang[3 * j + 2]; + w[ir] = w_rad[i] * w_ang[j] * 4.0 * PI; + ++ir; + } } - } } - -TEST_F(PartitionTest, Becke) { - dur = dur.zero(); - for (const Param& param : test_params) { - double val = 0.0; - double val_ref = ref(param.a, param.n); - - // tabulate || R[I] - R[J] || - std::vector dRR(dist_R_R(param.R)); - - // all centers are involved - size_t nR = param.R.size(); - std::vector iR(nR); - std::iota(iR.begin(), iR.end(), 0); - - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(iR.begin(), iR.end(), g); - - for (size_t I = 0; I < nR; ++I) { // for each center - for (size_t i = 0; i < w.size(); i++) { - Vec3 ri = Vec3{r[3*i], r[3*i+1], r[3*i+2]} + param.R[I]; - - // tabulate || r - R[J] || - std::vector drR(nR); - for (size_t J = 0; J < nR; ++J) { - drR[J] = norm(ri - param.R[J]); +TEST_F (PartitionTest, Becke) +{ + dur = dur.zero (); + for (const Param& param: test_params) + { + double val = 0.0; + double val_ref = ref (param.a, param.n); + + // tabulate || R[I] - R[J] || + std::vector dRR (dist_R_R (param.R)); + + // all centers are involved + size_t nR = param.R.size (); + std::vector iR (nR); + std::iota (iR.begin (), iR.end (), 0); + + std::random_device rd; + std::mt19937 g (rd ()); + std::shuffle (iR.begin (), iR.end (), g); + + for (size_t I = 0; I < nR; ++I) + { // for each center + for (size_t i = 0; i < w.size (); i++) + { + Vec3 ri = Vec3{r[3 * i], r[3 * i + 1], r[3 * i + 2]} + param.R[I]; + + // tabulate || r - R[J] || + std::vector drR (nR); + for (size_t J = 0; J < nR; ++J) + { + drR[J] = norm (ri - param.R[J]); + } + + // partition weight for this grid point + start = iclock::now (); + double w_part = Grid::Partition::w_becke (drR.size (), + drR.data (), + dRR.data (), + iR.size (), + iR.data (), + I); + dur += iclock::now () - start; + + val += w_part * w[i] * func (ri, param.R, param.a, param.n); + } } - // partition weight for this grid point - start = iclock::now(); - double w_part = Grid::Partition::w_becke( - drR.size(), drR.data(), dRR.data(), - iR.size(), iR.data(), I - ); - dur += iclock::now() - start; - - val += w_part * w[i] * func(ri, param.R, param.a, param.n); - } + EXPECT_NEAR (val, val_ref, tol); } - - EXPECT_NEAR(val, val_ref, tol); - } - printf("time elapsed = %8.3e seconds\n", dur.count()); + printf ("time elapsed = %8.3e seconds\n", dur.count ()); } +TEST_F (PartitionTest, Stratmann) +{ + dur = dur.zero (); -TEST_F(PartitionTest, Stratmann) { - dur = dur.zero(); - - for (const Param& param : test_params) { - double val = 0.0; - double val_ref = ref(param.a, param.n); - - // tabulate || R[I] - R[J] || - std::vector dRR(dist_R_R(param.R)); - - // all centers are involved - size_t nR = param.R.size(); - std::vector iR(nR); - std::iota(iR.begin(), iR.end(), 0); - - // radii of exclusive zone - std::vector drR_thr(nR); - for (size_t I = 0; I < nR; ++I) { - double dRRmin = 1e100; - for (size_t J = 0; J < nR; ++J) { - if (J != I) { - dRRmin = std::min(dRRmin, dRR[I*nR + J]); + for (const Param& param: test_params) + { + double val = 0.0; + double val_ref = ref (param.a, param.n); + + // tabulate || R[I] - R[J] || + std::vector dRR (dist_R_R (param.R)); + + // all centers are involved + size_t nR = param.R.size (); + std::vector iR (nR); + std::iota (iR.begin (), iR.end (), 0); + + // radii of exclusive zone + std::vector drR_thr (nR); + for (size_t I = 0; I < nR; ++I) + { + double dRRmin = 1e100; + for (size_t J = 0; J < nR; ++J) + { + if (J != I) + { + dRRmin = std::min (dRRmin, dRR[I * nR + J]); + } + } + drR_thr[I] = 0.5 * (1.0 - Grid::Partition::stratmann_a) * dRRmin; } - } - drR_thr[I] = 0.5 * (1.0 - Grid::Partition::stratmann_a) * dRRmin; - } - - for (size_t I = 0; I < nR; ++I) { // for each center - for (size_t i = 0; i < w.size(); i++) { - Vec3 ri = Vec3{r[3*i], r[3*i+1], r[3*i+2]} + param.R[I]; - // tabulate || r - R[J] || - std::vector drR(nR); - for (size_t J = 0; J < nR; ++J) { - drR[J] = norm(ri - param.R[J]); + for (size_t I = 0; I < nR; ++I) + { // for each center + for (size_t i = 0; i < w.size (); i++) + { + Vec3 ri = Vec3{r[3 * i], r[3 * i + 1], r[3 * i + 2]} + param.R[I]; + + // tabulate || r - R[J] || + std::vector drR (nR); + for (size_t J = 0; J < nR; ++J) + { + drR[J] = norm (ri - param.R[J]); + } + + // partition weight for this grid point + start = iclock::now (); + double w_part = Grid::Partition::w_stratmann (drR.size (), + drR.data (), + dRR.data (), + drR_thr.data (), + iR.size (), + iR.data (), + I); + dur += iclock::now () - start; + + val += w_part * w[i] * func (ri, param.R, param.a, param.n); + } } - // partition weight for this grid point - start = iclock::now(); - double w_part = Grid::Partition::w_stratmann( - drR.size(), drR.data(), dRR.data(), drR_thr.data(), - iR.size(), iR.data(), I - ); - dur += iclock::now() - start; - - val += w_part * w[i] * func(ri, param.R, param.a, param.n); - } + EXPECT_NEAR (val, val_ref, tol); } - - EXPECT_NEAR(val, val_ref, tol); - } - printf("time elapsed = %8.3e seconds\n", dur.count()); + printf ("time elapsed = %8.3e seconds\n", dur.count ()); } - -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_base/module_grid/test/test_radial.cpp b/source/source_base/module_grid/test/test_radial.cpp index abd91931f5e..a137e1b932d 100644 --- a/source/source_base/module_grid/test/test_radial.cpp +++ b/source/source_base/module_grid/test/test_radial.cpp @@ -8,7 +8,7 @@ #endif using namespace Grid::Radial; -using Func_t = std::function; +using Func_t = std::function; /** * This test briefly checks various radial quadrature schemes by comparing @@ -19,7 +19,7 @@ using Func_t = std::function; * */ -const double pi = std::acos(-1.0); +const double pi = std::acos (-1.0); // test functions f(r) and their analytical integrals // @@ -28,37 +28,27 @@ const double pi = std::acos(-1.0); // / 0 // std::vector> test_func_ref = { - { - [](double r) { - return std::exp(-0.3 * r * r) + std::exp(-3.0 * r * r); - }, - 0.25 * std::sqrt(pi) * (std::pow(0.3, -1.5) + std::pow(3.0, -1.5)) - }, - { - [](double r) { - return r * (std::exp(-0.3 * r * r) + std::exp(-3.0 * r * r)); - }, - 0.5 / (0.3 * 0.3) + 0.5 / (3.0 * 3.0) - }, - { - [](double r) { - return r * r * (std::exp(-0.3 * r * r) + std::exp(-3.0 * r * r)); - }, - 0.375 * std::sqrt(pi) * (std::pow(0.3, -2.5) + std::pow(3.0, -2.5)) - }, + {[] (double r) { return std::exp (-0.3 * r * r) + std::exp (-3.0 * r * r); }, + 0.25 * std::sqrt (pi) * (std::pow (0.3, -1.5) + std::pow (3.0, -1.5))}, + {[] (double r) { return r * (std::exp (-0.3 * r * r) + std::exp (-3.0 * r * r)); }, + 0.5 / (0.3 * 0.3) + 0.5 / (3.0 * 3.0)}, + {[] (double r) { return r * r * (std::exp (-0.3 * r * r) + std::exp (-3.0 * r * r)); }, + 0.375 * std::sqrt (pi) * (std::pow (0.3, -2.5) + std::pow (3.0, -2.5))}, }; - -double quadrature(const Func_t& f, int n, double* r, double* w) { +double + quadrature (const Func_t& f, int n, double* r, double* w) +{ double res = 0.0; - for (int i = 0; i < n; i++) { - res += w[i] * f(r[i]); - } + for (int i = 0; i < n; i++) + { + res += w[i] * f (r[i]); + } return res; } - -TEST(RadialTest, Baker) { +TEST (RadialTest, Baker) +{ // R should be large enough to cover the range of the function. // For mult = 1, R is the cutoff radius; for mult > 1, there // are (mult - 1) grid points extend beyond R. @@ -66,70 +56,75 @@ TEST(RadialTest, Baker) { int mult = 2; double R = 7.0; std::vector r, w; - baker(nbase, R, r, w, mult); + baker (nbase, R, r, w, mult); - EXPECT_EQ(r.size(), (nbase + 1) * mult - 1); + EXPECT_EQ (r.size (), (nbase + 1) * mult - 1); - for (auto& t : test_func_ref) { - double res = quadrature(t.first, r.size(), r.data(), w.data()); - EXPECT_NEAR(res, t.second, 1.0e-6); - } + for (auto& t: test_func_ref) + { + double res = quadrature (t.first, r.size (), r.data (), w.data ()); + EXPECT_NEAR (res, t.second, 1.0e-6); + } } - -TEST(RadialTest, Murray) { +TEST (RadialTest, Murray) +{ int n = 40; double R = 7.0; - std::vector r(n), w(n); - murray(n, R, r.data(), w.data()); + std::vector r (n), w (n); + murray (n, R, r.data (), w.data ()); - for (auto& t : test_func_ref) { - double res = quadrature(t.first, r.size(), r.data(), w.data()); - EXPECT_NEAR(res, t.second, 1.0e-6); - } + for (auto& t: test_func_ref) + { + double res = quadrature (t.first, r.size (), r.data (), w.data ()); + EXPECT_NEAR (res, t.second, 1.0e-6); + } } - -TEST(RadialTest, Treutler) { +TEST (RadialTest, Treutler) +{ int n = 40; double R = 7.0; - std::vector r(n), w(n); + std::vector r (n), w (n); - for (auto alpha : {0.0, 0.6, 1.0}) { - treutler_m4(n, R, r.data(), w.data(), alpha); + for (auto alpha: {0.0, 0.6, 1.0}) + { + treutler_m4 (n, R, r.data (), w.data (), alpha); - for (auto& t : test_func_ref) { - double res = quadrature(t.first, r.size(), r.data(), w.data()); - EXPECT_NEAR(res, t.second, 1.0e-6); + for (auto& t: test_func_ref) + { + double res = quadrature (t.first, r.size (), r.data (), w.data ()); + EXPECT_NEAR (res, t.second, 1.0e-6); + } } - } } - -TEST(RadialTest, Mura) { +TEST (RadialTest, Mura) +{ int n = 40; double R = 7.0; - std::vector r(n), w(n); - mura(n, R, r.data(), w.data()); + std::vector r (n), w (n); + mura (n, R, r.data (), w.data ()); - for (auto& t : test_func_ref) { - double res = quadrature(t.first, r.size(), r.data(), w.data()); - EXPECT_NEAR(res, t.second, 1.0e-6); - } + for (auto& t: test_func_ref) + { + double res = quadrature (t.first, r.size (), r.data (), w.data ()); + EXPECT_NEAR (res, t.second, 1.0e-6); + } } - -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_base/module_mixing/broyden_mixing.cpp b/source/source_base/module_mixing/broyden_mixing.cpp index 04852d7437e..f8e7456841c 100644 --- a/source/source_base/module_mixing/broyden_mixing.cpp +++ b/source/source_base/module_mixing/broyden_mixing.cpp @@ -7,197 +7,199 @@ #include "source_base/tool_title.h" namespace Base_Mixing { -template void Broyden_Mixing::tem_push_data(Mixing_Data& mdata, - const double* data_in, - const double* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef); -template void Broyden_Mixing::tem_push_data( +template void Broyden_Mixing::tem_push_data (Mixing_Data& mdata, + const double* data_in, + const double* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef); +template void Broyden_Mixing::tem_push_data ( Mixing_Data& mdata, const std::complex* data_in, const std::complex* data_out, - std::function*)> screen, - std::function*, const std::complex*, const std::complex*)> mix, + std::function*)> screen, + std::function*, const std::complex*, const std::complex*)> mix, const bool& need_calcoef); template -void Broyden_Mixing::tem_push_data(Mixing_Data& mdata, +void + Broyden_Mixing::tem_push_data (Mixing_Data& mdata, const FPTYPE* data_in, const FPTYPE* data_out, - std::function screen, - std::function mix, + std::function screen, + std::function mix, const bool& need_calcoef) { const size_t length = mdata.length; - std::vector F_tmp(length); + std::vector F_tmp (length); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < length; ++i) - { - F_tmp[i] = data_out[i] - data_in[i]; - } + { + F_tmp[i] = data_out[i] - data_in[i]; + } // get screened F if (screen != nullptr) - screen(F_tmp.data()); + screen (F_tmp.data ()); // container::Tensor data = data_in + mixing_beta * F; - std::vector data(length); - mix(data.data(), data_in, F_tmp.data()); + std::vector data (length); + mix (data.data (), data_in, F_tmp.data ()); - mdata.push(data.data()); + mdata.push (data.data ()); if (!need_calcoef) return; if (address != &mdata && address != nullptr) - ModuleBase::WARNING_QUIT( + ModuleBase::WARNING_QUIT ( "Broyden_Mixing", "One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients"); - FPTYPE* FP_dF = static_cast(dF); - FPTYPE* FP_F = static_cast(F); + FPTYPE* FP_dF = static_cast (dF); + FPTYPE* FP_F = static_cast (F); if (mdata.ndim_use == 1) - { - address = &mdata; - // allocate - if (F != nullptr) - free(F); - F = malloc(sizeof(FPTYPE) * length); - FP_F = static_cast(F); - if (dF != nullptr) - free(dF); - dF = malloc(sizeof(FPTYPE) * length * mixing_ndim); - FP_dF = static_cast(dF); + { + address = &mdata; + // allocate + if (F != nullptr) + free (F); + F = malloc (sizeof (FPTYPE) * length); + FP_F = static_cast (F); + if (dF != nullptr) + free (dF); + dF = malloc (sizeof (FPTYPE) * length * mixing_ndim); + FP_dF = static_cast (dF); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < length; ++i) - { - FP_F[i] = F_tmp[i]; + for (int i = 0; i < length; ++i) + { + FP_F[i] = F_tmp[i]; + } } - } else - { - this->ndim_cal_dF = std::min(this->ndim_cal_dF + 1, this->mixing_ndim); - start_dF = (this->start_dF + 1) % this->mixing_ndim; + { + this->ndim_cal_dF = std::min (this->ndim_cal_dF + 1, this->mixing_ndim); + start_dF = (this->start_dF + 1) % this->mixing_ndim; #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < length; ++i) - { - FP_F[i] = F_tmp[i]; - // dF{n} = F{n-1} - F{n} = -(F{n} - F{n-1}) - FP_dF[start_dF * length + i] -= FP_F[i]; + for (int i = 0; i < length; ++i) + { + FP_F[i] = F_tmp[i]; + // dF{n} = F{n-1} - F{n} = -(F{n} - F{n-1}) + FP_dF[start_dF * length + i] -= FP_F[i]; + } } - } }; -template void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, - std::function inner_product); -template void Broyden_Mixing::tem_cal_coef( - const Mixing_Data& mdata, - std::function*, std::complex*)> inner_product); +template void Broyden_Mixing::tem_cal_coef (const Mixing_Data& mdata, + std::function inner_product); +template void + Broyden_Mixing::tem_cal_coef (const Mixing_Data& mdata, + std::function*, std::complex*)> inner_product); template -void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function inner_product) +void + Broyden_Mixing::tem_cal_coef (const Mixing_Data& mdata, std::function inner_product) { - ModuleBase::TITLE("Broyden_Mixing", "Simplified_Broyden_mixing"); - ModuleBase::timer::start("Broyden_Mixing", "tem_cal_coef"); + ModuleBase::TITLE ("Broyden_Mixing", "Simplified_Broyden_mixing"); + ModuleBase::timer::start ("Broyden_Mixing", "tem_cal_coef"); - if (address != &mdata && address != nullptr) - { - ModuleBase::WARNING_QUIT( - "Broyden_mixing", - "One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients"); - } + if (address != &mdata && address != nullptr) + { + ModuleBase::WARNING_QUIT ( + "Broyden_mixing", + "One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients"); + } - const int length = mdata.length; - FPTYPE* FP_dF = static_cast(dF); - FPTYPE* FP_F = static_cast(F); + const int length = mdata.length; + FPTYPE* FP_dF = static_cast (dF); + FPTYPE* FP_F = static_cast (F); if (ndim_cal_dF > 0) - { - ModuleBase::matrix beta_tmp(ndim_cal_dF, ndim_cal_dF); - // beta(i, j) = - for (int i = 0; i < ndim_cal_dF; ++i) { - FPTYPE* dFi = FP_dF + i * length; - for (int j = i; j < ndim_cal_dF; ++j) - { - if (i != start_dF && j != start_dF) + ModuleBase::matrix beta_tmp (ndim_cal_dF, ndim_cal_dF); + // beta(i, j) = + for (int i = 0; i < ndim_cal_dF; ++i) { - beta_tmp(i, j) = beta(i, j); + FPTYPE* dFi = FP_dF + i * length; + for (int j = i; j < ndim_cal_dF; ++j) + { + if (i != start_dF && j != start_dF) + { + beta_tmp (i, j) = beta (i, j); + } + else + { + FPTYPE* dFj = FP_dF + j * length; + beta (i, j) = beta_tmp (i, j) = inner_product (dFi, dFj); + } + if (j != i) + { + beta_tmp (j, i) = beta_tmp (i, j); + } + } } - else + double* work = new double[ndim_cal_dF]; // workspace + int* iwork = new int[ndim_cal_dF]; // ipiv + char uu = 'U'; + int info = 0; + int m = 1; + // gamma means the coeficients for mixing + // but now gamma store , namely c + std::vector gamma (ndim_cal_dF); + for (int i = 0; i < ndim_cal_dF; ++i) { - FPTYPE* dFj = FP_dF + j * length; - beta(i, j) = beta_tmp(i, j) = inner_product(dFi, dFj); + FPTYPE* dFi = FP_dF + i * length; + gamma[i] = inner_product (dFi, FP_F); } - if (j != i) + + // solve aG = c + dsysv_ (&uu, + &ndim_cal_dF, + &m, + beta_tmp.c, + &ndim_cal_dF, + iwork, + gamma.data (), + &ndim_cal_dF, + work, + &ndim_cal_dF, + &info); + + if (info != 0) { - beta_tmp(j, i) = beta_tmp(i, j); + ModuleBase::WARNING_QUIT ("Charge_Mixing", "Error when DSYSV."); } - } - } - double* work = new double[ndim_cal_dF]; // workspace - int* iwork = new int[ndim_cal_dF]; // ipiv - char uu = 'U'; - int info = 0; - int m = 1; - // gamma means the coeficients for mixing - // but now gamma store , namely c - std::vector gamma(ndim_cal_dF); - for (int i = 0; i < ndim_cal_dF; ++i) - { - FPTYPE* dFi = FP_dF + i * length; - gamma[i] = inner_product(dFi, FP_F); - } - // solve aG = c - dsysv_(&uu, - &ndim_cal_dF, - &m, - beta_tmp.c, - &ndim_cal_dF, - iwork, - gamma.data(), - &ndim_cal_dF, - work, - &ndim_cal_dF, - &info); - - if (info != 0) - { - ModuleBase::WARNING_QUIT("Charge_Mixing", "Error when DSYSV."); - } - - // after solving, gamma store the coeficients for mixing - coef[mdata.start] = 1 + gamma[dFindex_move(0)]; - for (int i = 1; i < ndim_cal_dF; ++i) - { - coef[mdata.index_move(-i)] = gamma[dFindex_move(-i)] - gamma[dFindex_move(-i + 1)]; - } - coef[mdata.index_move(-ndim_cal_dF)] = -gamma[dFindex_move(-ndim_cal_dF + 1)]; + // after solving, gamma store the coeficients for mixing + coef[mdata.start] = 1 + gamma[dFindex_move (0)]; + for (int i = 1; i < ndim_cal_dF; ++i) + { + coef[mdata.index_move (-i)] = gamma[dFindex_move (-i)] - gamma[dFindex_move (-i + 1)]; + } + coef[mdata.index_move (-ndim_cal_dF)] = -gamma[dFindex_move (-ndim_cal_dF + 1)]; - delete[] work; - delete[] iwork; - } + delete[] work; + delete[] iwork; + } else - { - coef[0] = 1.0; - } + { + coef[0] = 1.0; + } - FPTYPE* dFnext = FP_dF + dFindex_move(1) * length; + FPTYPE* dFnext = FP_dF + dFindex_move (1) * length; #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < length; ++i) - { - dFnext[i] = FP_F[i]; - } - ModuleBase::timer::end("Broyden_Mixing", "tem_cal_coef"); + { + dFnext[i] = FP_F[i]; + } + ModuleBase::timer::end ("Broyden_Mixing", "tem_cal_coef"); }; } // namespace Base_Mixing diff --git a/source/source_base/module_mixing/broyden_mixing.h b/source/source_base/module_mixing/broyden_mixing.h index dc0486aca87..7f6104eee83 100644 --- a/source/source_base/module_mixing/broyden_mixing.h +++ b/source/source_base/module_mixing/broyden_mixing.h @@ -25,29 +25,30 @@ namespace Base_Mixing class Broyden_Mixing : public Mixing { public: - Broyden_Mixing(const int& mixing_ndim) + Broyden_Mixing (const int& mixing_ndim) { this->mixing_ndim = mixing_ndim; this->data_ndim = mixing_ndim + 1; - this->coef = std::vector(mixing_ndim + 1); - this->beta = ModuleBase::matrix(mixing_ndim, mixing_ndim, true); + this->coef = std::vector (mixing_ndim + 1); + this->beta = ModuleBase::matrix (mixing_ndim, mixing_ndim, true); } - Broyden_Mixing(const int& mixing_ndim, const double& mixing_beta) : Broyden_Mixing(mixing_ndim) + Broyden_Mixing (const int& mixing_ndim, const double& mixing_beta) : Broyden_Mixing (mixing_ndim) { this->mixing_beta = mixing_beta; } - virtual ~Broyden_Mixing() override + virtual ~Broyden_Mixing () override { if (F != nullptr) - free(F); + free (F); if (dF != nullptr) - free(dF); + free (dF); }; /** * @brief reset mixing * */ - virtual void reset() override + virtual void + reset () override { this->ndim_cal_dF = 0; this->start_dF = -1; @@ -69,24 +70,26 @@ class Broyden_Mixing : public Mixing * @param need_calcoef whether need to calculate the coef * */ - virtual void push_data(Mixing_Data& mdata, - const double* data_in, - const double* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef) override + virtual void + push_data (Mixing_Data& mdata, + const double* data_in, + const double* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef) override { - this->tem_push_data(mdata, data_in, data_out, screen, mix, need_calcoef); + this->tem_push_data (mdata, data_in, data_out, screen, mix, need_calcoef); }; - virtual void push_data( - Mixing_Data& mdata, - const std::complex* data_in, - const std::complex* data_out, - std::function*)> screen, - std::function*, const std::complex*, const std::complex*)> mix, - const bool& need_calcoef) override + virtual void + push_data ( + Mixing_Data& mdata, + const std::complex* data_in, + const std::complex* data_out, + std::function*)> screen, + std::function*, const std::complex*, const std::complex*)> mix, + const bool& need_calcoef) override { - this->tem_push_data(mdata, data_in, data_out, screen, mix, need_calcoef); + this->tem_push_data (mdata, data_in, data_out, screen, mix, need_calcoef); }; /** @@ -95,14 +98,16 @@ class Broyden_Mixing : public Mixing * @param mdata Mixing_Data * @param inner_product pointer to the inner dot function */ - virtual void cal_coef(const Mixing_Data& mdata, std::function inner_product) override + virtual void + cal_coef (const Mixing_Data& mdata, std::function inner_product) override { - tem_cal_coef(mdata, inner_product); + tem_cal_coef (mdata, inner_product); } - virtual void cal_coef(const Mixing_Data& mdata, - std::function*, std::complex*)> inner_product) override + virtual void + cal_coef (const Mixing_Data& mdata, + std::function*, std::complex*)> inner_product) override { - tem_cal_coef(mdata, inner_product); + tem_cal_coef (mdata, inner_product); } private: @@ -118,12 +123,12 @@ class Broyden_Mixing : public Mixing * */ template - void tem_push_data(Mixing_Data& mdata, - const FPTYPE* data_in, - const FPTYPE* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef); + void tem_push_data (Mixing_Data& mdata, + const FPTYPE* data_in, + const FPTYPE* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef); /** * @brief calculate coeficients for mixing @@ -132,7 +137,7 @@ class Broyden_Mixing : public Mixing * @param inner_product pointer to the inner dot function */ template - void tem_cal_coef(const Mixing_Data& mdata, std::function inner_product); + void tem_cal_coef (const Mixing_Data& mdata, std::function inner_product); private: // F = data_out - data_in @@ -149,7 +154,8 @@ class Broyden_Mixing : public Mixing // start index for dF int start_dF = -1; // get the index of i-th dF vector - int dFindex_move(const int& index) + int + dFindex_move (const int& index) { return (start_dF + index + mixing_ndim) % mixing_ndim; } diff --git a/source/source_base/module_mixing/mixing.cpp b/source/source_base/module_mixing/mixing.cpp index a252587ac67..1db3e894bb0 100644 --- a/source/source_base/module_mixing/mixing.cpp +++ b/source/source_base/module_mixing/mixing.cpp @@ -4,110 +4,116 @@ namespace Base_Mixing { -void Mixing::push_data(Mixing_Data& mdata, +void + Mixing::push_data (Mixing_Data& mdata, const double* data_in, const double* data_out, - std::function screen, + std::function screen, const bool& need_calcoef) { const size_t length = mdata.length; - this->push_data( + this->push_data ( mdata, data_in, data_out, screen, - [this, length](double* out, const double* in, const double* sres) { + [this, length] (double* out, const double* in, const double* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for (int i = 0; i < length; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - }, + for (int i = 0; i < length; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + }, need_calcoef); return; } -void Mixing::push_data(Mixing_Data& mdata, +void + Mixing::push_data (Mixing_Data& mdata, const std::complex* data_in, const std::complex* data_out, - std::function*)> screen, + std::function*)> screen, const bool& need_calcoef) { const size_t length = mdata.length; - this->push_data( + this->push_data ( mdata, data_in, data_out, screen, - [this, length](std::complex* out, const std::complex* in, const std::complex* sres) { + [this, length] (std::complex* out, const std::complex* in, const std::complex* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < length; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - }, + for (int i = 0; i < length; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + }, need_calcoef); return; } -void Mixing::mix_data(const Mixing_Data& mdata, double* data_mix) +void + Mixing::mix_data (const Mixing_Data& mdata, double* data_mix) { if (mdata.length <= 0) return; - double* FP_data = static_cast(mdata.data); + double* FP_data = static_cast (mdata.data); if (mdata.ndim_use == 1) - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for (int i = 0; i < mdata.length; ++i) - data_mix[i] = FP_data[i]; - return; - } - container::BlasConnector::gemv('N', - mdata.length, - mdata.ndim_use, - 1.0, - FP_data, - mdata.length, - coef.data(), - 1, - 0.0, - data_mix, - 1); + for (int i = 0; i < mdata.length; ++i) + data_mix[i] = FP_data[i]; + return; + } + container::BlasConnector::gemv ('N', + mdata.length, + mdata.ndim_use, + 1.0, + FP_data, + mdata.length, + coef.data (), + 1, + 0.0, + data_mix, + 1); } -void Mixing::mix_data(const Mixing_Data& mdata, std::complex* data_mix) +void + Mixing::mix_data (const Mixing_Data& mdata, std::complex* data_mix) { if (mdata.length <= 0) return; - std::complex* FP_data = static_cast*>(mdata.data); + std::complex* FP_data = static_cast*> (mdata.data); if (mdata.ndim_use == 1) - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < mdata.length; ++i) - data_mix[i] = FP_data[i]; - return; - } + for (int i = 0; i < mdata.length; ++i) + data_mix[i] = FP_data[i]; + return; + } // conver coef to complex - std::vector> coef_complex(coef.size()); - for (int i = 0; i < coef.size(); ++i) + std::vector> coef_complex (coef.size ()); + for (int i = 0; i < coef.size (); ++i) coef_complex[i] = coef[i]; - container::BlasConnector::gemv('N', - mdata.length, - mdata.ndim_use, - 1.0, - FP_data, - mdata.length, - coef_complex.data(), - 1, - 0.0, - data_mix, - 1); + container::BlasConnector::gemv ('N', + mdata.length, + mdata.ndim_use, + 1.0, + FP_data, + mdata.length, + coef_complex.data (), + 1, + 0.0, + data_mix, + 1); } } // namespace Base_Mixing \ No newline at end of file diff --git a/source/source_base/module_mixing/mixing.h b/source/source_base/module_mixing/mixing.h index ba0226ee790..ee78165bd4c 100644 --- a/source/source_base/module_mixing/mixing.h +++ b/source/source_base/module_mixing/mixing.h @@ -18,8 +18,8 @@ namespace Base_Mixing class Mixing { public: - Mixing(){}; - virtual ~Mixing(){}; + Mixing () {}; + virtual ~Mixing () {}; /** * @brief init mixing data @@ -29,9 +29,10 @@ class Mixing * @param type_size size of type * */ - virtual void init_mixing_data(Mixing_Data& mdata, const int& length, const size_t& type_size) const + virtual void + init_mixing_data (Mixing_Data& mdata, const int& length, const size_t& type_size) const { - mdata.resize(data_ndim, length, type_size); + mdata.resize (data_ndim, length, type_size); } /** @@ -45,21 +46,19 @@ class Mixing * @param need_calcoef whether need to calculate the coef * */ - virtual void push_data(Mixing_Data& mdata, - const double* data_in, - const double* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef) - = 0; - virtual void push_data( + virtual void push_data (Mixing_Data& mdata, + const double* data_in, + const double* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef) = 0; + virtual void push_data ( Mixing_Data& mdata, const std::complex* data_in, const std::complex* data_out, - std::function*)> screen, - std::function*, const std::complex*, const std::complex*)> mix, - const bool& need_calcoef) - = 0; + std::function*)> screen, + std::function*, const std::complex*, const std::complex*)> mix, + const bool& need_calcoef) = 0; /** * @brief @@ -71,16 +70,16 @@ class Mixing * @param need_calcoef whether need to calculate the coef * */ - virtual void push_data(Mixing_Data& mdata, - const double* data_in, - const double* data_out, - std::function screen, - const bool& need_calcoef); - virtual void push_data(Mixing_Data& mdata, - const std::complex* data_in, - const std::complex* data_out, - std::function*)> screen, - const bool& need_calcoef); + virtual void push_data (Mixing_Data& mdata, + const double* data_in, + const double* data_out, + std::function screen, + const bool& need_calcoef); + virtual void push_data (Mixing_Data& mdata, + const std::complex* data_in, + const std::complex* data_out, + std::function*)> screen, + const bool& need_calcoef); /** * @brief calculate coeficients for mixing @@ -88,10 +87,9 @@ class Mixing * @param mdata Mixing_Data * @param inner_product pointer to the inner dot function */ - virtual void cal_coef(const Mixing_Data& mdata, std::function inner_product) = 0; - virtual void cal_coef(const Mixing_Data& mdata, - std::function*, std::complex*)> inner_product) - = 0; + virtual void cal_coef (const Mixing_Data& mdata, std::function inner_product) = 0; + virtual void cal_coef (const Mixing_Data& mdata, + std::function*, std::complex*)> inner_product) = 0; /** * @brief calculate the mixing data @@ -99,14 +97,14 @@ class Mixing * @param mdata Mixing_Data * @param data_mix output data */ - void mix_data(const Mixing_Data& mdata, double* data_mix); - void mix_data(const Mixing_Data& mdata, std::complex* data_mix); + void mix_data (const Mixing_Data& mdata, double* data_mix); + void mix_data (const Mixing_Data& mdata, std::complex* data_mix); /** * @brief reset mixing * */ - virtual void reset() = 0; + virtual void reset () = 0; public: // mixing_beta from INPUT diff --git a/source/source_base/module_mixing/mixing_data.cpp b/source/source_base/module_mixing/mixing_data.cpp index 27d3292a727..e73fbc3d8d3 100644 --- a/source/source_base/module_mixing/mixing_data.cpp +++ b/source/source_base/module_mixing/mixing_data.cpp @@ -3,34 +3,37 @@ namespace Base_Mixing { -Mixing_Data::Mixing_Data(const int& ndim, const std::size_t& length, const size_t& type_size) +Mixing_Data::Mixing_Data (const int& ndim, const std::size_t& length, const size_t& type_size) { this->ndim_tot = ndim; this->length = length; if (ndim * length > 0) - { - this->data = malloc(ndim * length * type_size); - } + { + this->data = malloc (ndim * length * type_size); + } } -Mixing_Data::~Mixing_Data() +Mixing_Data::~Mixing_Data () { - if (this->data != nullptr) { - free(this->data); -} + if (this->data != nullptr) + { + free (this->data); + } } -void Mixing_Data::resize(const int& ndim, const std::size_t& length, const size_t& type_size) +void + Mixing_Data::resize (const int& ndim, const std::size_t& length, const size_t& type_size) { this->ndim_tot = ndim; this->length = length; - if (this->data != nullptr) { - free(this->data); -} + if (this->data != nullptr) + { + free (this->data); + } if (ndim * length > 0) - { - this->data = malloc(ndim * length * type_size); - } + { + this->data = malloc (ndim * length * type_size); + } this->start = -1; this->ndim_use = 0; this->ndim_history = 0; diff --git a/source/source_base/module_mixing/mixing_data.h b/source/source_base/module_mixing/mixing_data.h index df5ff4dc24f..766923eaf63 100644 --- a/source/source_base/module_mixing/mixing_data.h +++ b/source/source_base/module_mixing/mixing_data.h @@ -13,7 +13,7 @@ namespace Base_Mixing class Mixing_Data { public: - Mixing_Data() = default; + Mixing_Data () = default; /** * @brief Construct a new Mixing_Data object * @@ -22,13 +22,13 @@ class Mixing_Data * @param type_size size of type * */ - Mixing_Data(const int& ndim, const std::size_t& length, const size_t& type_size); + Mixing_Data (const int& ndim, const std::size_t& length, const size_t& type_size); /** * @brief Destroy the Mixing_Data object * */ - ~Mixing_Data(); + ~Mixing_Data (); /** * @brief resize the data @@ -38,33 +38,35 @@ class Mixing_Data * @param type_size size of type * */ - void resize(const int& ndim, const std::size_t& length, const size_t& type_size); + void resize (const int& ndim, const std::size_t& length, const size_t& type_size); /** * @brief push data to the tensor * */ template - void push(const FPTYPE* data_in) + void + push (const FPTYPE* data_in) { this->start = (this->start + 1) % this->ndim_tot; - this->ndim_use = std::min(this->ndim_use + 1, this->ndim_tot); + this->ndim_use = std::min (this->ndim_use + 1, this->ndim_tot); ++this->ndim_history; - FPTYPE* FP_startdata = static_cast(this->data) + this->start * this->length; + FPTYPE* FP_startdata = static_cast (this->data) + this->start * this->length; #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (std::size_t i = 0; i < length; ++i) - { - FP_startdata[i] = data_in[i]; - } + { + FP_startdata[i] = data_in[i]; + } } /** * @brief reset mixing * */ - void reset() + void + reset () { this->ndim_use = 0; this->ndim_history = 0; @@ -75,7 +77,8 @@ class Mixing_Data * @brief get the index of i-th vector * */ - int index_move(const int& n) const + int + index_move (const int& n) const { return (n + this->start + ndim_tot) % ndim_tot; } diff --git a/source/source_base/module_mixing/plain_mixing.cpp b/source/source_base/module_mixing/plain_mixing.cpp index 591519e79f0..d7f08eddbae 100644 --- a/source/source_base/module_mixing/plain_mixing.cpp +++ b/source/source_base/module_mixing/plain_mixing.cpp @@ -5,95 +5,97 @@ #include "source_base/tool_title.h" namespace Base_Mixing { -template void Plain_Mixing::tem_push_data(Mixing_Data& mdata, - const double* data_in, - const double* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef); -template void Plain_Mixing::tem_push_data( +template void Plain_Mixing::tem_push_data (Mixing_Data& mdata, + const double* data_in, + const double* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef); +template void Plain_Mixing::tem_push_data ( Mixing_Data& mdata, const std::complex* data_in, const std::complex* data_out, - std::function*)> screen, - std::function*, const std::complex*, const std::complex*)> mix, + std::function*)> screen, + std::function*, const std::complex*, const std::complex*)> mix, const bool& need_calcoef); template -void Plain_Mixing::tem_push_data(Mixing_Data& mdata, +void + Plain_Mixing::tem_push_data (Mixing_Data& mdata, const FPTYPE* data_in, const FPTYPE* data_out, - std::function screen, - std::function mix, + std::function screen, + std::function mix, const bool& need_calcoef) { const size_t length = mdata.length; - std::vector F_tmp(length); + std::vector F_tmp (length); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < length; ++i) - { - F_tmp[i] = data_out[i] - data_in[i]; - } + { + F_tmp[i] = data_out[i] - data_in[i]; + } // get screened F if (screen != nullptr) - screen(F_tmp.data()); + screen (F_tmp.data ()); // container::Tensor data = data_in + mixing_beta * F; - std::vector data(length); - mix(data.data(), data_in, F_tmp.data()); + std::vector data (length); + mix (data.data (), data_in, F_tmp.data ()); - mdata.push(data.data()); + mdata.push (data.data ()); }; -template void Plain_Mixing::simple_mix(double* data_new, - const double* data_in, - const double* data_out, - const int& length, - std::function screen); -template void Plain_Mixing::simple_mix(std::complex* data_new, - const std::complex* data_in, - const std::complex* data_out, - const int& length, - std::function*)> screen); +template void Plain_Mixing::simple_mix (double* data_new, + const double* data_in, + const double* data_out, + const int& length, + std::function screen); +template void Plain_Mixing::simple_mix (std::complex* data_new, + const std::complex* data_in, + const std::complex* data_out, + const int& length, + std::function*)> screen); template -void Plain_Mixing::simple_mix(FPTYPE* data_new, +void + Plain_Mixing::simple_mix (FPTYPE* data_new, const FPTYPE* data_in, const FPTYPE* data_out, const int& length, - std::function screen) + std::function screen) { if (screen == nullptr) - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < length; ig++) - { - data_new[ig] = data_in[ig] + this->mixing_beta * (data_out[ig] - data_in[ig]); + for (int ig = 0; ig < length; ig++) + { + data_new[ig] = data_in[ig] + this->mixing_beta * (data_out[ig] - data_in[ig]); + } } - } else - { - std::vector F_tmp(length); + { + std::vector F_tmp (length); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < length; ++i) - { - F_tmp[i] = data_out[i] - data_in[i]; - } - screen(F_tmp.data()); + for (int i = 0; i < length; ++i) + { + F_tmp[i] = data_out[i] - data_in[i]; + } + screen (F_tmp.data ()); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < length; ++i) - { - data_new[i] = data_in[i] + this->mixing_beta * F_tmp[i]; + for (int i = 0; i < length; ++i) + { + data_new[i] = data_in[i] + this->mixing_beta * F_tmp[i]; + } } - } } } // namespace Base_Mixing \ No newline at end of file diff --git a/source/source_base/module_mixing/plain_mixing.h b/source/source_base/module_mixing/plain_mixing.h index 88cb280ca23..14760a10045 100644 --- a/source/source_base/module_mixing/plain_mixing.h +++ b/source/source_base/module_mixing/plain_mixing.h @@ -11,21 +11,16 @@ namespace Base_Mixing class Plain_Mixing : public Mixing { public: - Plain_Mixing() - { - this->coef = std::vector(1, 1.0); - } - Plain_Mixing(const double& mixing_beta) : Plain_Mixing() - { - this->mixing_beta = mixing_beta; - } - virtual ~Plain_Mixing() override{}; + Plain_Mixing () { this->coef = std::vector (1, 1.0); } + Plain_Mixing (const double& mixing_beta) : Plain_Mixing () { this->mixing_beta = mixing_beta; } + virtual ~Plain_Mixing () override {}; /** * @brief reset mixing * */ - virtual void reset() override + virtual void + reset () override { return; } @@ -45,24 +40,26 @@ class Plain_Mixing : public Mixing * @param need_calcoef whether need to calculate the coef * */ - virtual void push_data(Mixing_Data& mdata, - const double* data_in, - const double* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef) override + virtual void + push_data (Mixing_Data& mdata, + const double* data_in, + const double* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef) override { - this->tem_push_data(mdata, data_in, data_out, screen, mix, need_calcoef); + this->tem_push_data (mdata, data_in, data_out, screen, mix, need_calcoef); }; - virtual void push_data( - Mixing_Data& mdata, - const std::complex* data_in, - const std::complex* data_out, - std::function*)> screen, - std::function*, const std::complex*, const std::complex*)> mix, - const bool& need_calcoef) override + virtual void + push_data ( + Mixing_Data& mdata, + const std::complex* data_in, + const std::complex* data_out, + std::function*)> screen, + std::function*, const std::complex*, const std::complex*)> mix, + const bool& need_calcoef) override { - this->tem_push_data(mdata, data_in, data_out, screen, mix, need_calcoef); + this->tem_push_data (mdata, data_in, data_out, screen, mix, need_calcoef); }; /** @@ -71,12 +68,14 @@ class Plain_Mixing : public Mixing * @param mdata Mixing_Data * @param inner_product pointer to the inner dot function */ - virtual void cal_coef(const Mixing_Data& mdata, std::function inner_product) override + virtual void + cal_coef (const Mixing_Data& mdata, std::function inner_product) override { return; } - virtual void cal_coef(const Mixing_Data& mdata, - std::function*, std::complex*)> inner_product) override + virtual void + cal_coef (const Mixing_Data& mdata, + std::function*, std::complex*)> inner_product) override { return; } @@ -87,21 +86,23 @@ class Plain_Mixing : public Mixing * * @param data_new can be the same as data_in or data_out */ - void plain_mix(double* data_new, - const double* data_in, - const double* data_out, - const int& length, - std::function screen) + void + plain_mix (double* data_new, + const double* data_in, + const double* data_out, + const int& length, + std::function screen) { - this->simple_mix(data_new, data_in, data_out, length, screen); + this->simple_mix (data_new, data_in, data_out, length, screen); } - void plain_mix(std::complex* data_new, - const std::complex* data_in, - const std::complex* data_out, - const int& length, - std::function*)> screen) + void + plain_mix (std::complex* data_new, + const std::complex* data_in, + const std::complex* data_out, + const int& length, + std::function*)> screen) { - this->simple_mix(data_new, data_in, data_out, length, screen); + this->simple_mix (data_new, data_in, data_out, length, screen); } private: @@ -117,12 +118,12 @@ class Plain_Mixing : public Mixing * */ template - void tem_push_data(Mixing_Data& mdata, - const FPTYPE* data_in, - const FPTYPE* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef); + void tem_push_data (Mixing_Data& mdata, + const FPTYPE* data_in, + const FPTYPE* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef); /** * @brief Simple plain mixing @@ -131,11 +132,11 @@ class Plain_Mixing : public Mixing * @param data_new can be the same as data_in or data_out */ template - void simple_mix(FPTYPE* data_new, - const FPTYPE* data_in, - const FPTYPE* data_out, - const int& length, - std::function screen); + void simple_mix (FPTYPE* data_new, + const FPTYPE* data_in, + const FPTYPE* data_out, + const int& length, + std::function screen); }; } // namespace Base_Mixing #endif \ No newline at end of file diff --git a/source/source_base/module_mixing/pulay_mixing.cpp b/source/source_base/module_mixing/pulay_mixing.cpp index b342c0b5f35..e91dac4a1a5 100644 --- a/source/source_base/module_mixing/pulay_mixing.cpp +++ b/source/source_base/module_mixing/pulay_mixing.cpp @@ -6,177 +6,179 @@ #include "source_base/tool_title.h" namespace Base_Mixing { -template void Pulay_Mixing::tem_push_data(Mixing_Data& mdata, - const double* data_in, - const double* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef); -template void Pulay_Mixing::tem_push_data( +template void Pulay_Mixing::tem_push_data (Mixing_Data& mdata, + const double* data_in, + const double* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef); +template void Pulay_Mixing::tem_push_data ( Mixing_Data& mdata, const std::complex* data_in, const std::complex* data_out, - std::function*)> screen, - std::function*, const std::complex*, const std::complex*)> mix, + std::function*)> screen, + std::function*, const std::complex*, const std::complex*)> mix, const bool& need_calcoef); template -void Pulay_Mixing::tem_push_data(Mixing_Data& mdata, +void + Pulay_Mixing::tem_push_data (Mixing_Data& mdata, const FPTYPE* data_in, const FPTYPE* data_out, - std::function screen, - std::function mix, + std::function screen, + std::function mix, const bool& need_calcoef) { const std::size_t length = mdata.length; - std::vector F_tmp(length); + std::vector F_tmp (length); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (std::size_t i = 0; i < length; ++i) - { - F_tmp[i] = data_out[i] - data_in[i]; - } + { + F_tmp[i] = data_out[i] - data_in[i]; + } // get screened F if (screen != nullptr) - screen(F_tmp.data()); + screen (F_tmp.data ()); // container::Tensor data = data_in + mixing_beta * F; - std::vector data(length); - mix(data.data(), data_in, F_tmp.data()); + std::vector data (length); + mix (data.data (), data_in, F_tmp.data ()); - mdata.push(data.data()); + mdata.push (data.data ()); if (!need_calcoef) return; if (address != &mdata && address != nullptr) - ModuleBase::WARNING_QUIT( + ModuleBase::WARNING_QUIT ( "Pulay_Mixing", "One Pulay_Mixing object can only bind one Mixing_Data object to calculate coefficients"); - FPTYPE* FP_F = static_cast(F); + FPTYPE* FP_F = static_cast (F); if (mdata.ndim_use == 1) - { - address = &mdata; - // allocate - if (F != nullptr) - free(F); - F = malloc(sizeof(FPTYPE) * length * mixing_ndim); - FP_F = static_cast(F); + { + address = &mdata; + // allocate + if (F != nullptr) + free (F); + F = malloc (sizeof (FPTYPE) * length * mixing_ndim); + FP_F = static_cast (F); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (std::size_t i = 0; i < length; ++i) - { - FP_F[i] = F_tmp[i]; + for (std::size_t i = 0; i < length; ++i) + { + FP_F[i] = F_tmp[i]; + } } - } else - { - start_F = (this->start_F + 1) % this->mixing_ndim; - FPTYPE* FP_startF = FP_F + start_F * length; + { + start_F = (this->start_F + 1) % this->mixing_ndim; + FPTYPE* FP_startF = FP_F + start_F * length; #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (std::size_t i = 0; i < length; ++i) - { - FP_startF[i] = F_tmp[i]; + for (std::size_t i = 0; i < length; ++i) + { + FP_startF[i] = F_tmp[i]; + } } - } }; -template void Pulay_Mixing::tem_cal_coef(const Mixing_Data& mdata, - std::function inner_product); -template void Pulay_Mixing::tem_cal_coef( - const Mixing_Data& mdata, - std::function*, std::complex*)> inner_product); +template void Pulay_Mixing::tem_cal_coef (const Mixing_Data& mdata, + std::function inner_product); +template void + Pulay_Mixing::tem_cal_coef (const Mixing_Data& mdata, + std::function*, std::complex*)> inner_product); template -void Pulay_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function inner_product) +void + Pulay_Mixing::tem_cal_coef (const Mixing_Data& mdata, std::function inner_product) { - ModuleBase::TITLE("Charge_Mixing", "Pulay_mixing"); - ModuleBase::timer::start("Charge", "Pulay_mixing"); + ModuleBase::TITLE ("Charge_Mixing", "Pulay_mixing"); + ModuleBase::timer::start ("Charge", "Pulay_mixing"); if (address != &mdata && address != nullptr) - ModuleBase::WARNING_QUIT( + ModuleBase::WARNING_QUIT ( "Pulay_mixing", "One Pulay_Mixing object can only bind one Mixing_Data object to calculate coefficients"); const std::size_t length = mdata.length; - FPTYPE* FP_F = static_cast(F); + FPTYPE* FP_F = static_cast (F); if (mdata.ndim_use > 1) - { - const int ndim_use = mdata.ndim_use; - ModuleBase::matrix beta_tmp(ndim_use, ndim_use); - // beta(i, j) = - for (int i = 0; i < ndim_use; ++i) { - FPTYPE* Fi = FP_F + i * length; - for (int j = i; j < ndim_use; ++j) - { - if (i != start_F && j != start_F) + const int ndim_use = mdata.ndim_use; + ModuleBase::matrix beta_tmp (ndim_use, ndim_use); + // beta(i, j) = + for (int i = 0; i < ndim_use; ++i) + { + FPTYPE* Fi = FP_F + i * length; + for (int j = i; j < ndim_use; ++j) + { + if (i != start_F && j != start_F) + { + beta_tmp (i, j) = beta (i, j); + } + else + { + FPTYPE* Fj = FP_F + j * length; + beta (i, j) = beta_tmp (i, j) = inner_product (Fi, Fj); + } + if (j != i) + { + beta (j, i) = beta_tmp (j, i) = beta_tmp (i, j); + } + } + } + + double* work = new double[ndim_use]; + int* iwork = new int[ndim_use]; + char uu = 'U'; + int info; + dsytrf_ (&uu, &ndim_use, beta_tmp.c, &ndim_use, iwork, work, &ndim_use, &info); + if (info != 0) + ModuleBase::WARNING_QUIT ("Charge_Mixing", "Error when factorizing beta."); + dsytri_ (&uu, &ndim_use, beta_tmp.c, &ndim_use, iwork, work, &info); + if (info != 0) + ModuleBase::WARNING_QUIT ("Charge_Mixing", "Error when DSYTRI beta."); + for (int i = 0; i < ndim_use; ++i) { - beta_tmp(i, j) = beta(i, j); + for (int j = i + 1; j < ndim_use; ++j) + { + beta_tmp (i, j) = beta_tmp (j, i); + } } - else + + // coef{i} = \sum_j beta{ij} / \sum_k \sum_j beta{kj} + double sum_beta = 0.; + for (int i = 0; i < ndim_use; ++i) { - FPTYPE* Fj = FP_F + j * length; - beta(i, j) = beta_tmp(i, j) = inner_product(Fi, Fj); + for (int j = 0; j < ndim_use; ++j) + { + sum_beta += beta_tmp (j, i); + } } - if (j != i) + for (int i = 0; i < ndim_use; ++i) { - beta(j, i) = beta_tmp(j, i) = beta_tmp(i, j); + coef[i] = 0.; + for (int j = 0; j < ndim_use; ++j) + { + coef[i] += beta_tmp (i, j); + } + coef[i] /= sum_beta; } - } - } - - double* work = new double[ndim_use]; - int* iwork = new int[ndim_use]; - char uu = 'U'; - int info; - dsytrf_(&uu, &ndim_use, beta_tmp.c, &ndim_use, iwork, work, &ndim_use, &info); - if (info != 0) - ModuleBase::WARNING_QUIT("Charge_Mixing", "Error when factorizing beta."); - dsytri_(&uu, &ndim_use, beta_tmp.c, &ndim_use, iwork, work, &info); - if (info != 0) - ModuleBase::WARNING_QUIT("Charge_Mixing", "Error when DSYTRI beta."); - for (int i = 0; i < ndim_use; ++i) - { - for (int j = i + 1; j < ndim_use; ++j) - { - beta_tmp(i, j) = beta_tmp(j, i); - } + delete[] work; + delete[] iwork; } - - // coef{i} = \sum_j beta{ij} / \sum_k \sum_j beta{kj} - double sum_beta = 0.; - for (int i = 0; i < ndim_use; ++i) - { - for (int j = 0; j < ndim_use; ++j) - { - sum_beta += beta_tmp(j, i); - } - } - for (int i = 0; i < ndim_use; ++i) + else { - coef[i] = 0.; - for (int j = 0; j < ndim_use; ++j) - { - coef[i] += beta_tmp(i, j); - } - coef[i] /= sum_beta; + beta (0, 0) = inner_product (FP_F, FP_F); + coef[0] = 1.0; } - delete[] work; - delete[] iwork; - } - else - { - beta(0, 0) = inner_product(FP_F, FP_F); - coef[0] = 1.0; - } - ModuleBase::timer::end("Charge", "Pulay_mixing"); + ModuleBase::timer::end ("Charge", "Pulay_mixing"); }; } // namespace Base_Mixing diff --git a/source/source_base/module_mixing/pulay_mixing.h b/source/source_base/module_mixing/pulay_mixing.h index c336959dc93..30a6743adac 100644 --- a/source/source_base/module_mixing/pulay_mixing.h +++ b/source/source_base/module_mixing/pulay_mixing.h @@ -19,27 +19,28 @@ namespace Base_Mixing class Pulay_Mixing : public Mixing { public: - Pulay_Mixing(const int& mixing_ndim) + Pulay_Mixing (const int& mixing_ndim) { this->mixing_ndim = mixing_ndim; this->data_ndim = mixing_ndim; - this->coef = std::vector(mixing_ndim); - this->beta = ModuleBase::matrix(mixing_ndim, mixing_ndim, true); + this->coef = std::vector (mixing_ndim); + this->beta = ModuleBase::matrix (mixing_ndim, mixing_ndim, true); } - Pulay_Mixing(const int& mixing_ndim, const double& mixing_beta) : Pulay_Mixing(mixing_ndim) + Pulay_Mixing (const int& mixing_ndim, const double& mixing_beta) : Pulay_Mixing (mixing_ndim) { this->mixing_beta = mixing_beta; } - virtual ~Pulay_Mixing() override + virtual ~Pulay_Mixing () override { if (F != nullptr) - free(F); + free (F); } /** * @brief reset mixing * */ - virtual void reset() override + virtual void + reset () override { this->start_F = 0; this->address = nullptr; @@ -56,24 +57,26 @@ class Pulay_Mixing : public Mixing * @param need_calcoef whether need to calculate the coef * */ - virtual void push_data(Mixing_Data& mdata, - const double* data_in, - const double* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef) override + virtual void + push_data (Mixing_Data& mdata, + const double* data_in, + const double* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef) override { - this->tem_push_data(mdata, data_in, data_out, screen, mix, need_calcoef); + this->tem_push_data (mdata, data_in, data_out, screen, mix, need_calcoef); }; - virtual void push_data( - Mixing_Data& mdata, - const std::complex* data_in, - const std::complex* data_out, - std::function*)> screen, - std::function*, const std::complex*, const std::complex*)> mix, - const bool& need_calcoef) override + virtual void + push_data ( + Mixing_Data& mdata, + const std::complex* data_in, + const std::complex* data_out, + std::function*)> screen, + std::function*, const std::complex*, const std::complex*)> mix, + const bool& need_calcoef) override { - this->tem_push_data(mdata, data_in, data_out, screen, mix, need_calcoef); + this->tem_push_data (mdata, data_in, data_out, screen, mix, need_calcoef); }; /** @@ -82,14 +85,16 @@ class Pulay_Mixing : public Mixing * @param mdata Mixing_Data * @param inner_product pointer to the inner dot function */ - virtual void cal_coef(const Mixing_Data& mdata, std::function inner_product) override + virtual void + cal_coef (const Mixing_Data& mdata, std::function inner_product) override { - tem_cal_coef(mdata, inner_product); + tem_cal_coef (mdata, inner_product); } - virtual void cal_coef(const Mixing_Data& mdata, - std::function*, std::complex*)> inner_product) override + virtual void + cal_coef (const Mixing_Data& mdata, + std::function*, std::complex*)> inner_product) override { - tem_cal_coef(mdata, inner_product); + tem_cal_coef (mdata, inner_product); } private: @@ -105,12 +110,12 @@ class Pulay_Mixing : public Mixing * */ template - void tem_push_data(Mixing_Data& mdata, - const FPTYPE* data_in, - const FPTYPE* data_out, - std::function screen, - std::function mix, - const bool& need_calcoef); + void tem_push_data (Mixing_Data& mdata, + const FPTYPE* data_in, + const FPTYPE* data_out, + std::function screen, + std::function mix, + const bool& need_calcoef); /** * @brief calculate coeficients for mixing @@ -119,7 +124,7 @@ class Pulay_Mixing : public Mixing * @param inner_product pointer to the inner dot function */ template - void tem_cal_coef(const Mixing_Data& mdata, std::function inner_product); + void tem_cal_coef (const Mixing_Data& mdata, std::function inner_product); // F = data_out - data_in void* F = nullptr; diff --git a/source/source_base/module_mixing/test/mixing_test.cpp b/source/source_base/module_mixing/test/mixing_test.cpp index cd9340cdf32..06abf805416 100644 --- a/source/source_base/module_mixing/test/mixing_test.cpp +++ b/source/source_base/module_mixing/test/mixing_test.cpp @@ -9,20 +9,16 @@ #include "gtest/gtest.h" #define DOUBLETHRESHOLD 1e-8 -double ext_inner_product_mock(double* x1, double* x2) +double + ext_inner_product_mock (double* x1, double* x2) { return 0.0; } class Mixing_Test : public testing::Test { protected: - Mixing_Test() - { - } - ~Mixing_Test() - { - delete this->mixing; - } + Mixing_Test () {} + ~Mixing_Test () { delete this->mixing; } const double mixing_beta = 0.6; const int mixing_ndim = 3; Base_Mixing::Mixing_Data xdata; @@ -31,28 +27,26 @@ class Mixing_Test : public testing::Test int niter = 0; int maxiter = 10; std::vector xd_ref = {0.0, 0.0, 0.0}; - std::vector> xc_ref = { - {0.0, 1.0}, - {1.0, 0.0}, - 0.0 - }; - void init_method(std::string method) + std::vector> xc_ref = {{0.0, 1.0}, {1.0, 0.0}, 0.0}; + void + init_method (std::string method) { if (method == "broyden") - { - this->mixing = new Base_Mixing::Broyden_Mixing(this->mixing_ndim, this->mixing_beta); - } + { + this->mixing = new Base_Mixing::Broyden_Mixing (this->mixing_ndim, this->mixing_beta); + } else if (method == "pulay") - { - this->mixing = new Base_Mixing::Pulay_Mixing(this->mixing_ndim, this->mixing_beta); - } + { + this->mixing = new Base_Mixing::Pulay_Mixing (this->mixing_ndim, this->mixing_beta); + } else if (method == "plain") - { - this->mixing = new Base_Mixing::Plain_Mixing(this->mixing_beta); - } + { + this->mixing = new Base_Mixing::Plain_Mixing (this->mixing_beta); + } } - void clear() + void + clear () { delete this->mixing; this->mixing = nullptr; @@ -69,241 +63,246 @@ class Mixing_Test : public testing::Test * [x3] [-6/12 -3/12 36/12][x3] */ template - void solve_linear_eq(FPTYPE* x_in, FPTYPE* x_out, bool diff_beta = false) + void + solve_linear_eq (FPTYPE* x_in, FPTYPE* x_out, bool diff_beta = false) { - this->mixing->init_mixing_data(xdata, 3, sizeof(FPTYPE)); - std::vector delta_x(3); + this->mixing->init_mixing_data (xdata, 3, sizeof (FPTYPE)); + std::vector delta_x (3); - auto screen = std::bind(&Mixing_Test::Kerker_mock, this, std::placeholders::_1); + auto screen = std::bind (&Mixing_Test::Kerker_mock, this, std::placeholders::_1); auto inner_product - = std::bind(static_cast(&Mixing_Test::inner_product_mock), - this, - std::placeholders::_1, - std::placeholders::_2); + = std::bind (static_cast (&Mixing_Test::inner_product_mock), + this, + std::placeholders::_1, + std::placeholders::_2); double residual = 10.; this->niter = 0; while (niter < maxiter) - { - x_out[0] = (3. * x_in[1] - 2. * x_in[2] + 20.) / 8.; - x_out[1] = (-4. * x_out[0] + 1. * x_in[2] + 33.) / 11.; - x_out[2] = (-6. * x_out[0] - 3. * x_out[1] + 36.) / 12.; - - niter++; - - for (int i = 0; i < 3; ++i) - { - delta_x[i] = x_out[i] - x_in[i]; - } - residual = this->inner_product_mock(delta_x.data(), delta_x.data()); - if (residual <= thr) { - break; + x_out[0] = (3. * x_in[1] - 2. * x_in[2] + 20.) / 8.; + x_out[1] = (-4. * x_out[0] + 1. * x_in[2] + 33.) / 11.; + x_out[2] = (-6. * x_out[0] - 3. * x_out[1] + 36.) / 12.; + + niter++; + + for (int i = 0; i < 3; ++i) + { + delta_x[i] = x_out[i] - x_in[i]; + } + residual = this->inner_product_mock (delta_x.data (), delta_x.data ()); + if (residual <= thr) + { + break; + } + if (diff_beta) + { + this->mixing->push_data ( + this->xdata, + x_in, + x_out, + screen, + // mixing can use different mixing_beta for one vector + [] (FPTYPE* out, const FPTYPE* in, const FPTYPE* sres) + { + out[0] = in[0] + 0.5 * sres[0]; + out[1] = in[1] + 0.6 * sres[1]; + out[2] = in[2] + 0.5 * sres[2]; + }, + true); + } + else + { + this->mixing->push_data (this->xdata, x_in, x_out, screen, true); + } + + this->mixing->cal_coef (this->xdata, inner_product); + + this->mixing->mix_data (this->xdata, x_in); } - if (diff_beta) - { - this->mixing->push_data( - this->xdata, - x_in, - x_out, - screen, - // mixing can use different mixing_beta for one vector - [](FPTYPE* out, const FPTYPE* in, const FPTYPE* sres) { - out[0] = in[0] + 0.5 * sres[0]; - out[1] = in[1] + 0.6 * sres[1]; - out[2] = in[2] + 0.5 * sres[2]; - }, - true); - } - else - { - this->mixing->push_data(this->xdata, x_in, x_out, screen, true); - } - - this->mixing->cal_coef(this->xdata, inner_product); - - this->mixing->mix_data(this->xdata, x_in); - } } template - void Kerker_mock(FPTYPE* drho) + void + Kerker_mock (FPTYPE* drho) { } - double inner_product_mock(double* x1, double* x2) + double + inner_product_mock (double* x1, double* x2) { double xnorm = 0.0; for (int ir = 0; ir < 3; ++ir) - { - xnorm += x1[ir] * x2[ir]; - } + { + xnorm += x1[ir] * x2[ir]; + } return xnorm; } - double inner_product_mock(std::complex* x1, std::complex* x2) + double + inner_product_mock (std::complex* x1, std::complex* x2) { double xnorm = 0.0; for (int ir = 0; ir < 3; ++ir) - { - xnorm += x1[ir].real() * x2[ir].real() + x1[ir].imag() * x2[ir].imag(); - } + { + xnorm += x1[ir].real () * x2[ir].real () + x1[ir].imag () * x2[ir].imag (); + } return xnorm; } }; -TEST_F(Mixing_Test, BroydenSolveLinearEq) +TEST_F (Mixing_Test, BroydenSolveLinearEq) { #ifdef _OPENMP - omp_set_num_threads(1); + omp_set_num_threads (1); #endif - init_method("broyden"); + init_method ("broyden"); std::vector x_in = xd_ref; - std::vector x_out(3); - solve_linear_eq(x_in.data(), x_out.data(), true); - EXPECT_NEAR(x_out[0], 3.0, DOUBLETHRESHOLD); - EXPECT_NEAR(x_out[1], 2.0, DOUBLETHRESHOLD); - EXPECT_NEAR(x_out[2], 1.0, DOUBLETHRESHOLD); - ASSERT_EQ(niter, 5); + std::vector x_out (3); + solve_linear_eq (x_in.data (), x_out.data (), true); + EXPECT_NEAR (x_out[0], 3.0, DOUBLETHRESHOLD); + EXPECT_NEAR (x_out[1], 2.0, DOUBLETHRESHOLD); + EXPECT_NEAR (x_out[2], 1.0, DOUBLETHRESHOLD); + ASSERT_EQ (niter, 5); - this->mixing->reset(); - xdata.reset(); + this->mixing->reset (); + xdata.reset (); std::vector> xc_in = xc_ref; - std::vector> xc_out(3); - solve_linear_eq>(xc_in.data(), xc_out.data(), true); - EXPECT_NEAR(xc_out[0].real(), 3.0, DOUBLETHRESHOLD); - EXPECT_NEAR(xc_out[1].real(), 2.0, DOUBLETHRESHOLD); - EXPECT_NEAR(xc_out[2].real(), 1.0, DOUBLETHRESHOLD); - ASSERT_EQ(niter, 5); + std::vector> xc_out (3); + solve_linear_eq> (xc_in.data (), xc_out.data (), true); + EXPECT_NEAR (xc_out[0].real (), 3.0, DOUBLETHRESHOLD); + EXPECT_NEAR (xc_out[1].real (), 2.0, DOUBLETHRESHOLD); + EXPECT_NEAR (xc_out[2].real (), 1.0, DOUBLETHRESHOLD); + ASSERT_EQ (niter, 5); std::string output; Base_Mixing::Mixing_Data testdata; - this->mixing->init_mixing_data(testdata, 3, sizeof(double)); - - testing::internal::CaptureStdout(); - EXPECT_EXIT(this->mixing->push_data(testdata, x_in.data(), x_out.data(), nullptr, true), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT( - output, - testing::HasSubstr("One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients")); - - testing::internal::CaptureStdout(); - EXPECT_EXIT(this->mixing->cal_coef(testdata, ext_inner_product_mock), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT( - output, - testing::HasSubstr("One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients")); - - clear(); + this->mixing->init_mixing_data (testdata, 3, sizeof (double)); + + testing::internal::CaptureStdout (); + EXPECT_EXIT (this->mixing->push_data (testdata, x_in.data (), x_out.data (), nullptr, true), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, + testing::HasSubstr ( + "One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients")); + + testing::internal::CaptureStdout (); + EXPECT_EXIT (this->mixing->cal_coef (testdata, ext_inner_product_mock), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, + testing::HasSubstr ( + "One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients")); + + clear (); } -TEST_F(Mixing_Test, PulaySolveLinearEq) +TEST_F (Mixing_Test, PulaySolveLinearEq) { #ifdef _OPENMP - omp_set_num_threads(1); + omp_set_num_threads (1); #endif - init_method("pulay"); + init_method ("pulay"); std::vector x_in = xd_ref; - std::vector x_out(3); - solve_linear_eq(x_in.data(), x_out.data()); - EXPECT_NEAR(x_out[0], 2.9999959638248037, DOUBLETHRESHOLD); - EXPECT_NEAR(x_out[1], 2.0000002552633349, DOUBLETHRESHOLD); - EXPECT_NEAR(x_out[2], 1.0000019542717642, DOUBLETHRESHOLD); - ASSERT_EQ(niter, 6); + std::vector x_out (3); + solve_linear_eq (x_in.data (), x_out.data ()); + EXPECT_NEAR (x_out[0], 2.9999959638248037, DOUBLETHRESHOLD); + EXPECT_NEAR (x_out[1], 2.0000002552633349, DOUBLETHRESHOLD); + EXPECT_NEAR (x_out[2], 1.0000019542717642, DOUBLETHRESHOLD); + ASSERT_EQ (niter, 6); - this->mixing->reset(); - xdata.reset(); + this->mixing->reset (); + xdata.reset (); std::vector> xc_in = xc_ref; - std::vector> xc_out(3); - solve_linear_eq>(xc_in.data(), xc_out.data()); - EXPECT_NEAR(xc_out[0].real(), 3.0000063220482565, DOUBLETHRESHOLD); - EXPECT_NEAR(xc_out[1].real(), 1.9999939191147462, DOUBLETHRESHOLD); - EXPECT_NEAR(xc_out[2].real(), 0.99999835919718549, DOUBLETHRESHOLD); - ASSERT_EQ(niter, 6); + std::vector> xc_out (3); + solve_linear_eq> (xc_in.data (), xc_out.data ()); + EXPECT_NEAR (xc_out[0].real (), 3.0000063220482565, DOUBLETHRESHOLD); + EXPECT_NEAR (xc_out[1].real (), 1.9999939191147462, DOUBLETHRESHOLD); + EXPECT_NEAR (xc_out[2].real (), 0.99999835919718549, DOUBLETHRESHOLD); + ASSERT_EQ (niter, 6); std::string output; Base_Mixing::Mixing_Data testdata; - this->mixing->init_mixing_data(testdata, 3, sizeof(double)); - - testing::internal::CaptureStdout(); - EXPECT_EXIT(this->mixing->push_data(testdata, x_in.data(), x_out.data(), nullptr, true), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT( + this->mixing->init_mixing_data (testdata, 3, sizeof (double)); + + testing::internal::CaptureStdout (); + EXPECT_EXIT (this->mixing->push_data (testdata, x_in.data (), x_out.data (), nullptr, true), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT ( output, - testing::HasSubstr("One Pulay_Mixing object can only bind one Mixing_Data object to calculate coefficients")); + testing::HasSubstr ("One Pulay_Mixing object can only bind one Mixing_Data object to calculate coefficients")); - testing::internal::CaptureStdout(); - EXPECT_EXIT(this->mixing->cal_coef(testdata, ext_inner_product_mock), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT( + testing::internal::CaptureStdout (); + EXPECT_EXIT (this->mixing->cal_coef (testdata, ext_inner_product_mock), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT ( output, - testing::HasSubstr("One Pulay_Mixing object can only bind one Mixing_Data object to calculate coefficients")); + testing::HasSubstr ("One Pulay_Mixing object can only bind one Mixing_Data object to calculate coefficients")); - clear(); + clear (); } -TEST_F(Mixing_Test, PlainSolveLinearEq) +TEST_F (Mixing_Test, PlainSolveLinearEq) { #ifdef _OPENMP - omp_set_num_threads(1); + omp_set_num_threads (1); #endif - init_method("plain"); + init_method ("plain"); std::vector x_in = xd_ref; - std::vector x_out(3); - solve_linear_eq(x_in.data(), x_out.data()); - EXPECT_NEAR(x_out[0], 2.9999613068687698, DOUBLETHRESHOLD); - EXPECT_NEAR(x_out[1], 2.0000472873362103, DOUBLETHRESHOLD); - EXPECT_NEAR(x_out[2], 1.0000075247315625, DOUBLETHRESHOLD); - ASSERT_EQ(niter, 10); + std::vector x_out (3); + solve_linear_eq (x_in.data (), x_out.data ()); + EXPECT_NEAR (x_out[0], 2.9999613068687698, DOUBLETHRESHOLD); + EXPECT_NEAR (x_out[1], 2.0000472873362103, DOUBLETHRESHOLD); + EXPECT_NEAR (x_out[2], 1.0000075247315625, DOUBLETHRESHOLD); + ASSERT_EQ (niter, 10); - this->mixing->reset(); - xdata.reset(); + this->mixing->reset (); + xdata.reset (); std::vector> xc_in = xc_ref; - std::vector> xc_out(3); - solve_linear_eq>(xc_in.data(), xc_out.data()); - EXPECT_NEAR(xc_out[0].real(), 2.9999418982632711, DOUBLETHRESHOLD); - EXPECT_NEAR(xc_out[1].real(), 2.0000317031363761, DOUBLETHRESHOLD); - EXPECT_NEAR(xc_out[2].real(), 1.0000211250842703, DOUBLETHRESHOLD); - ASSERT_EQ(niter, 10); + std::vector> xc_out (3); + solve_linear_eq> (xc_in.data (), xc_out.data ()); + EXPECT_NEAR (xc_out[0].real (), 2.9999418982632711, DOUBLETHRESHOLD); + EXPECT_NEAR (xc_out[1].real (), 2.0000317031363761, DOUBLETHRESHOLD); + EXPECT_NEAR (xc_out[2].real (), 1.0000211250842703, DOUBLETHRESHOLD); + ASSERT_EQ (niter, 10); // test mix_data of plain_mixing - std::vector x_tmp(3); - this->mixing->push_data(this->xdata, x_in.data(), x_out.data(), nullptr, true); - this->mixing->mix_data(this->xdata, x_tmp.data()); - Base_Mixing::Plain_Mixing plain_mix(mixing_beta); - plain_mix.plain_mix(x_in.data(), x_in.data(), x_out.data(), 3, [](double* x) {}); - EXPECT_NEAR(x_tmp[0], x_in[0], DOUBLETHRESHOLD); - EXPECT_NEAR(x_tmp[1], x_in[1], DOUBLETHRESHOLD); - EXPECT_NEAR(x_tmp[2], x_in[2], DOUBLETHRESHOLD); - - std::vector> xc_tmp(3); - this->mixing->push_data(this->xdata, xc_in.data(), xc_out.data(), nullptr, true); - this->mixing->mix_data(this->xdata, xc_tmp.data()); - plain_mix.plain_mix(xc_in.data(), xc_in.data(), xc_out.data(), 3, nullptr); - EXPECT_NEAR(xc_tmp[0].real(), xc_in[0].real(), DOUBLETHRESHOLD); - EXPECT_NEAR(xc_tmp[1].real(), xc_in[1].real(), DOUBLETHRESHOLD); - EXPECT_NEAR(xc_tmp[2].real(), xc_in[2].real(), DOUBLETHRESHOLD); - - this->mixing->reset(); - - clear(); + std::vector x_tmp (3); + this->mixing->push_data (this->xdata, x_in.data (), x_out.data (), nullptr, true); + this->mixing->mix_data (this->xdata, x_tmp.data ()); + Base_Mixing::Plain_Mixing plain_mix (mixing_beta); + plain_mix.plain_mix (x_in.data (), x_in.data (), x_out.data (), 3, [] (double* x) {}); + EXPECT_NEAR (x_tmp[0], x_in[0], DOUBLETHRESHOLD); + EXPECT_NEAR (x_tmp[1], x_in[1], DOUBLETHRESHOLD); + EXPECT_NEAR (x_tmp[2], x_in[2], DOUBLETHRESHOLD); + + std::vector> xc_tmp (3); + this->mixing->push_data (this->xdata, xc_in.data (), xc_out.data (), nullptr, true); + this->mixing->mix_data (this->xdata, xc_tmp.data ()); + plain_mix.plain_mix (xc_in.data (), xc_in.data (), xc_out.data (), 3, nullptr); + EXPECT_NEAR (xc_tmp[0].real (), xc_in[0].real (), DOUBLETHRESHOLD); + EXPECT_NEAR (xc_tmp[1].real (), xc_in[1].real (), DOUBLETHRESHOLD); + EXPECT_NEAR (xc_tmp[2].real (), xc_in[2].real (), DOUBLETHRESHOLD); + + this->mixing->reset (); + + clear (); } -TEST_F(Mixing_Test, OtherCover) +TEST_F (Mixing_Test, OtherCover) { - this->mixing = new Base_Mixing::Broyden_Mixing(2, 0.7); + this->mixing = new Base_Mixing::Broyden_Mixing (2, 0.7); Base_Mixing::Mixing_Data nodata; - this->mixing->init_mixing_data(nodata, 0, sizeof(double)); - this->mixing->push_data(nodata, (double*)nullptr, (double*)nullptr, nullptr, false); - this->mixing->push_data(nodata, (double*)nullptr, (double*)nullptr, nullptr, false); - this->mixing->mix_data(nodata, (double*)nullptr); - this->mixing->mix_data(nodata, (std::complex*)nullptr); - EXPECT_EQ(nodata.length, 0); - - clear(); + this->mixing->init_mixing_data (nodata, 0, sizeof (double)); + this->mixing->push_data (nodata, (double*)nullptr, (double*)nullptr, nullptr, false); + this->mixing->push_data (nodata, (double*)nullptr, (double*)nullptr, nullptr, false); + this->mixing->mix_data (nodata, (double*)nullptr); + this->mixing->mix_data (nodata, (std::complex*)nullptr); + EXPECT_EQ (nodata.length, 0); + + clear (); } \ No newline at end of file diff --git a/source/source_base/mymath.cpp b/source/source_base/mymath.cpp index fd4b4d0168a..095a6398c3a 100644 --- a/source/source_base/mymath.cpp +++ b/source/source_base/mymath.cpp @@ -5,7 +5,8 @@ namespace ModuleBase { -void heapAjust(double *r, int *ind, int s, int m) +void + heapAjust (double* r, int* ind, int s, int m) { int j = 0, ic = 0; double rc = 0.0; @@ -13,19 +14,23 @@ void heapAjust(double *r, int *ind, int s, int m) ic = ind[s]; for (j = 2 * s; j <= m; j *= 2) - { - if (j < m && (r[j] < r[j + 1])) - j++; + { + if (j < m && (r[j] < r[j + 1])) + { + j++; + } - if (!(rc < r[j])) - break; + if (!(rc < r[j])) + { + break; + } - r[s] = r[j]; + r[s] = r[j]; - ind[s] = ind[j]; + ind[s] = ind[j]; - s = j; - } + s = j; + } r[s] = rc; @@ -33,36 +38,37 @@ void heapAjust(double *r, int *ind, int s, int m) return; } -void heapsort(const int n, double *r, int *ind) +void + heapsort (const int n, double* r, int* ind) { - ModuleBase::timer::start("mymath", "heapsort"); + ModuleBase::timer::start ("mymath", "heapsort"); int i = 0, ic = 0; double rc = 0.0; if (ind[0] == 0) - { - for (i = 0; i < n; i++) { - ind[i] = i; + for (i = 0; i < n; i++) + { + ind[i] = i; + } } - } for (i = n / 2; i >= 0; i--) - { - heapAjust(r, ind, i, n - 1); - } + { + heapAjust (r, ind, i, n - 1); + } for (i = n - 1; i > 0; i--) - { - rc = r[0]; - r[0] = r[i]; - r[i] = rc; - ic = ind[0]; - ind[0] = ind[i]; - ind[i] = ic; - heapAjust(r, ind, 0, i - 1); - } - ModuleBase::timer::end("mymath", "heapsort"); + { + rc = r[0]; + r[0] = r[i]; + r[i] = rc; + ic = ind[0]; + ind[0] = ind[i]; + ind[i] = ic; + heapAjust (r, ind, 0, i - 1); + } + ModuleBase::timer::end ("mymath", "heapsort"); return; } @@ -87,94 +93,105 @@ c adapted from Numerical Recipes pg. 329 (new edition) *********************************************************************/ // from hpsort.f90 -void hpsort(int n, double *ra, int *ind) +void + hpsort (int n, double* ra, int* ind) { int i = 0, ir = 0, j = 0, k = 0, iind = 0; double rra = 0.0; if (ind[0] == 0) - { - for (i = 1; i <= n; i++) - ind[i - 1] = i; - } + { + for (i = 1; i <= n; i++) + { + ind[i - 1] = i; + } + } if (n < 2) - return; // nothing to order + { + return; // nothing to order + } k = n / 2; ir = n - 1; while (true) - { - if (k > 0) // still in hiring phase - { - k = k - 1; - rra = ra[k]; - iind = ind[k]; - } - else // in retirement-promotion phase. { - rra = ra[ir]; // clear a space at the end of the array - iind = ind[ir]; // - ra[ir] = ra[0]; // retire the top of the heap into it - ind[ir] = ind[0]; // - ir = ir - 1; // decrease the size of the corporation - - if (ir == 0) // done with the last promotion - { - ra[0] = rra; // the least competent worker at all // - ind[0] = iind; // - return; - } - } - - i = k; // wheter in hiring or promotion phase, we - - j = k + k + 1; // set up to place rra in its proper level - - while (j <= ir) - { - if (j < ir) - { - if (ra[j] < ra[j + 1]) // compare to better underling + if (k > 0) // still in hiring phase { - j = j + 1; + k = k - 1; + rra = ra[k]; + iind = ind[k]; } - else if (ra[j] == ra[j + 1]) + else // in retirement-promotion phase. { - if (ind[j] < ind[j + 1]) - j = j + 1; + rra = ra[ir]; // clear a space at the end of the array + iind = ind[ir]; // + ra[ir] = ra[0]; // retire the top of the heap into it + ind[ir] = ind[0]; // + ir = ir - 1; // decrease the size of the corporation + + if (ir == 0) // done with the last promotion + { + ra[0] = rra; // the least competent worker at all // + ind[0] = iind; // + return; + } } - } - - if (rra < ra[j]) // demote rra - { - ra[i] = ra[j]; - ind[i] = ind[j]; - i = j; - j = j + j + 1; - } - else if (rra == ra[j]) - { - if (iind < ind[j]) // demote rra + + i = k; // wheter in hiring or promotion phase, we + + j = k + k + 1; // set up to place rra in its proper level + + while (j <= ir) { - ra[i] = ra[j]; - ind[i] = ind[j]; - i = j; - j = j + j + 1; + if (j < ir) + { + if (ra[j] < ra[j + 1]) // compare to better underling + { + j = j + 1; + } + else if (ra[j] == ra[j + 1]) + { + if (ind[j] < ind[j + 1]) + { + j = j + 1; + } + } + } + + if (rra < ra[j]) // demote rra + { + ra[i] = ra[j]; + ind[i] = ind[j]; + i = j; + j = j + j + 1; + } + else if (rra == ra[j]) + { + if (iind < ind[j]) // demote rra + { + ra[i] = ra[j]; + ind[i] = ind[j]; + i = j; + j = j + j + 1; + } + else + { + j = ir + 1; // set j to terminate do-while loop + } + } + else + { // this is the right place for rra + j = ir + 1; // set j to terminate do-while loop + } } - else - j = ir + 1; // set j to terminate do-while loop - } - else // this is the right place for rra - j = ir + 1; // set j to terminate do-while loop - } - ra[i] = rra; + ra[i] = rra; - ind[i] = iind; - } + ind[i] = iind; + } } } // namespace ModuleBase diff --git a/source/source_base/mymath.h b/source/source_base/mymath.h index 206380b8719..e595dc0fddc 100644 --- a/source/source_base/mymath.h +++ b/source/source_base/mymath.h @@ -3,11 +3,9 @@ namespace ModuleBase { -void heapsort(int n, double *r, int *ind); -void heapAjust(double r[], int ind[], int s, int m);//not be used now! -void hpsort(int n, double *ra, int *ind); -} +void heapsort (int n, double* r, int* ind); +void heapAjust (double r[], int ind[], int s, int m); // not be used now! +void hpsort (int n, double* ra, int* ind); +} // namespace ModuleBase #endif // MYMATH_H - - diff --git a/source/source_base/name_angular.h b/source/source_base/name_angular.h index 48b9dae1520..5f6e142dd70 100644 --- a/source/source_base/name_angular.h +++ b/source/source_base/name_angular.h @@ -3,14 +3,12 @@ namespace ModuleBase { - const std::string Name_Angular[5][11] = - { - {"s"}, - {"pz", "px", "py"}, - {"dz^2", "dxz", "dyz", "dx^2-y^2", "dxy"}, - {"fz^3", "fxz^2", "fyz^2", "fzx^2-zy^2", "fxyz", "fx^3-3*xy^2", "f3yx^2-y^3"}, - {"g1", "g2", "g3", "g4", "g5", "g6", "g7", "g8", "g9"} - }; // name of atomic orbital jiyy add 2022-05-10 +const std::string Name_Angular[5][11] + = {{"s"}, + {"pz", "px", "py"}, + {"dz^2", "dxz", "dyz", "dx^2-y^2", "dxy"}, + {"fz^3", "fxz^2", "fyz^2", "fzx^2-zy^2", "fxyz", "fx^3-3*xy^2", "f3yx^2-y^3"}, + {"g1", "g2", "g3", "g4", "g5", "g6", "g7", "g8", "g9"}}; // name of atomic orbital jiyy add 2022-05-10 } #endif diff --git a/source/source_base/ndarray.h b/source/source_base/ndarray.h index 5f76fb9e282..124c3a8e29b 100644 --- a/source/source_base/ndarray.h +++ b/source/source_base/ndarray.h @@ -1,12 +1,15 @@ /** * @file NDArray.h * @author your name (you@domain.com) - * @brief under the restriction of C++11, a simple alternative to std::vector + std::mdspan. In source_base/module_container/ATen/tensor.h, tensor class provides a cross-device container, but std::string is not supported. Therefore, this class is to provide a general (but CPU-only) container for multi-dimensional data. It can easily convert to ontainer::Tensor. + * @brief under the restriction of C++11, a simple alternative to std::vector + std::mdspan. In + * source_base/module_container/ATen/tensor.h, tensor class provides a cross-device container, but std::string is not + * supported. Therefore, this class is to provide a general (but CPU-only) container for multi-dimensional data. It can + * easily convert to ontainer::Tensor. * @version 0.1 * @date 2024-04-24 - * + * * @copyright Copyright (c) 2024 - * + * */ #ifndef NDARRAY_H @@ -19,14 +22,17 @@ #include #include // for heterogeneous computing, we can use ATen::Tensor -//#include "./module_container/ATen/tensor.h" +// #include "./module_container/ATen/tensor.h" /** - * @brief under the restriction of C++11, a simple alternative to std::vector + std::mdspan. In source_base/module_container/ATen/tensor.h, tensor class provides a cross-device container, but std::string is not supported. Therefore, this class is to provide a general (but CPU-only) container for multi-dimensional data. It can easily convert to container::Tensor. - * - * @tparam T + * @brief under the restriction of C++11, a simple alternative to std::vector + std::mdspan. In + * source_base/module_container/ATen/tensor.h, tensor class provides a cross-device container, but std::string is not + * supported. Therefore, this class is to provide a general (but CPU-only) container for multi-dimensional data. It can + * easily convert to container::Tensor. + * + * @tparam T */ -template +template class NDArray { // align with STL container implementation, there are several functions compulsory to be implemented @@ -37,165 +43,266 @@ class NDArray // element access: [], at, front, back, data // modifiers: clear, insert, erase, push_back, pop_back, resize, swap // allocator: get_allocator -public: + public: // constructors /** * @brief Construct a new NDArray object - * + * */ - NDArray()= delete; + NDArray () = delete; // initializer_list constructor - NDArray(std::initializer_list il) : shape_(il), data_(std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies())) {} - NDArray(std::initializer_list il) : shape_(il.begin(), il.end()), data_(std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies())) {} + NDArray (std::initializer_list il) + : shape_ (il), data_ (std::accumulate (shape_.begin (), shape_.end (), 1, std::multiplies ())) + { + } + NDArray (std::initializer_list il) + : shape_ (il.begin (), il.end ()), + data_ (std::accumulate (shape_.begin (), shape_.end (), 1, std::multiplies ())) + { + } // variadic template constructor, (delegate constructor) - template NDArray(const size_t idx, Args... args) : NDArray({idx, static_cast(args)...}) {} - template NDArray(const int& idx, Args... args) : NDArray({idx, static_cast(args)...}) {} // not happy with this because size_t can have larger range + template + NDArray (const size_t idx, Args... args) : NDArray ({idx, static_cast (args)...}) + { + } + template + NDArray (const int& idx, Args... args) : NDArray ({idx, static_cast (args)...}) + { + } // not happy with this because size_t can have larger range // copy constructor - NDArray(const NDArray& other) : data_(other.data_), shape_(other.shape_) {} + NDArray (const NDArray& other) : data_ (other.data_), shape_ (other.shape_) {} // move constructor - NDArray(NDArray&& other) : data_(std::move(other.data_)), shape_(std::move(other.shape_)) {} + NDArray (NDArray&& other) : data_ (std::move (other.data_)), shape_ (std::move (other.shape_)) {} // destructor - ~NDArray() {} + ~NDArray () {} // operators /** * @brief = operator, copy assignment - * - * @param other - * @return NDArray& + * + * @param other + * @return NDArray& */ - NDArray& operator=(const NDArray& other) + NDArray& + operator= (const NDArray& other) { if (this != &other) - { - data_ = other.data_; - shape_ = other.shape_; - } + { + data_ = other.data_; + shape_ = other.shape_; + } return *this; } /** * @brief = operator, move assignment - */ - NDArray& operator=(NDArray&& other) + */ + NDArray& + operator= (NDArray&& other) { if (this != &other) - { - data_ = std::move(other.data_); - shape_ = std::move(other.shape_); - } + { + data_ = std::move (other.data_); + shape_ = std::move (other.shape_); + } return *this; } /** * @brief == operator - * - * @param other + * + * @param other * @return true if the data and shape are the same * @return false otherwise */ - bool operator==(const NDArray& other) const { return data_ == other.data_ && shape_ == other.shape_; } + bool + operator== (const NDArray& other) const + { + return data_ == other.data_ && shape_ == other.shape_; + } /** * @brief != operator - * - * @param other + * + * @param other * @return true if the data and shape are different * @return false otherwise */ - bool operator!=(const NDArray& other) const { return !(*this == other); } + bool + operator!= (const NDArray& other) const + { + return !(*this == other); + } // other operators are not generally supported // element access /** * @brief at function - * - * @tparam Args + * + * @tparam Args * @param args indices of the element * @return T& or const T& */ - template T& at(const size_t idx, Args... args) { return data_[index(idx, args...)]; } - template const T& at(const size_t idx, Args... args) const { return data_[index(idx, args...)]; } + template + T& + at (const size_t idx, Args... args) + { + return data_[index (idx, args...)]; + } + template + const T& + at (const size_t idx, Args... args) const + { + return data_[index (idx, args...)]; + } /** * @brief [] operator - * - * @tparam Args + * + * @tparam Args * @param args indices of the element * @return T& or const T& */ - template T& operator()(const size_t idx, Args... args) { return data_[index(idx, args...)]; } - template const T& operator()(const size_t idx, Args... args) const { return data_[index(idx, args...)]; } + template + T& + operator() (const size_t idx, Args... args) + { + return data_[index (idx, args...)]; + } + template + const T& + operator() (const size_t idx, Args... args) const + { + return data_[index (idx, args...)]; + } // front - T& front() { return data_.front(); } - const T& front() const { return data_.front(); } + T& + front () + { + return data_.front (); + } + const T& + front () const + { + return data_.front (); + } // back - T& back() { return data_.back(); } - const T& back() const { return data_.back(); } + T& + back () + { + return data_.back (); + } + const T& + back () const + { + return data_.back (); + } // data - T* data() { return data_.data(); } - const T* data() const { return data_.data(); } + T* + data () + { + return data_.data (); + } + const T* + data () const + { + return data_.data (); + } // iterators // iterators on the whole data - T* begin() { return data_.data(); } - T* end() { return data_.data() + data_.size(); } - const T* cbegin() const { return data_.data(); } - const T* cend() const { return data_.data() + data_.size(); } + T* + begin () + { + return data_.data (); + } + T* + end () + { + return data_.data () + data_.size (); + } + const T* + cbegin () const + { + return data_.data (); + } + const T* + cend () const + { + return data_.data () + data_.size (); + } // iterators on different dimensions - + // capacity // size - size_t size() const { return data_.size(); } - size_t size(const size_t& dim) const { return shape_.at(dim); } + size_t + size () const + { + return data_.size (); + } + size_t + size (const size_t& dim) const + { + return shape_.at (dim); + } // empty - bool empty() const { return data_.empty(); } + bool + empty () const + { + return data_.empty (); + } // multi-dimensional specific // shape - const std::vector& shape() const { return shape_; } + const std::vector& + shape () const + { + return shape_; + } // reshape - template - void reshape(Args... args) + template + void + reshape (Args... args) { // DEVELP WARNING: what if arg = -2? :) // save args into a vector - //std::vector dims = {static_cast(args)...}; + // std::vector dims = {static_cast(args)...}; std::vector dims = {args...}; // assert number of -1 in dims is at most 1 // -1 is not type-safe!!! - size_t count = std::count_if(dims.begin(), dims.end(), [](size_t i) { return i == -1; }); - assert(count <= 1); + size_t count = std::count_if (dims.begin (), dims.end (), [] (size_t i) { return i == -1; }); + assert (count <= 1); // if there is -1, calculate the size if (count == 1) - { - size_t size = 1; - for (size_t i = 0; i < dims.size(); ++i) { - if (dims[i] != -1) - { - size *= dims[i]; - } + size_t size = 1; + for (size_t i = 0; i < dims.size (); ++i) + { + if (dims[i] != -1) + { + size *= dims[i]; + } + } + size_t idx = std::find (dims.begin (), dims.end (), -1) - dims.begin (); + dims[idx] = data_.size () / size; } - size_t idx = std::find(dims.begin(), dims.end(), -1) - dims.begin(); - dims[idx] = data_.size() / size; - } // calculate the size - size_t size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); + size_t size = std::accumulate (dims.begin (), dims.end (), 1, std::multiplies ()); // assert size is the same - assert(size == data_.size()); + assert (size == data_.size ()); // assign dims to shape_ - std::copy(dims.begin(), dims.end(), shape_.begin()); + std::copy (dims.begin (), dims.end (), shape_.begin ()); } // interface to ATen::Tensor, but constraint to int, double, float, std::complex, std::complex /** - * @brief SFINAE (Substitution Failure Is Not An Error) to_tensor function, only if T is int, double, float, std::complex, std::complex, otherwise there is no such function - * + * @brief SFINAE (Substitution Failure Is Not An Error) to_tensor function, only if T is int, double, float, + * std::complex, std::complex, otherwise there is no such function + * * @return container::Tensor, only if T is int, double, float, std::complex, std::complex */ // std::enable_if< - // std::is_same::value - // || std::is_same::value - // || std::is_same::value - // || std::is_same>::value + // std::is_same::value + // || std::is_same::value + // || std::is_same::value + // || std::is_same>::value // || std::is_same>::value, container::Tensor // >::type to_tensor() const // { @@ -204,20 +311,23 @@ class NDArray // std::memcpy(result.data(), data_.data(), data_.size() * sizeof(T)); // return result; // } - template - size_t index(const size_t idx, Args... args) const + template + size_t + index (const size_t idx, Args... args) const { - assert(sizeof...(args) == shape_.size() - 1); // assert the indices are the same as the shape - size_t indices[] = {idx, static_cast(args)...}; + assert (sizeof...(args) == shape_.size () - 1); // assert the indices are the same as the shape + size_t indices[] = {idx, static_cast (args)...}; size_t index = 0; - for (size_t i = 0; i < shape_.size(); ++i) - { - index += indices[i] * std::accumulate(shape_.begin() + i + 1, shape_.end(), 1, std::multiplies()); - } - assert(index < data_.size()); // assert the index is within the data + for (size_t i = 0; i < shape_.size (); ++i) + { + index += indices[i] + * std::accumulate (shape_.begin () + i + 1, shape_.end (), 1, std::multiplies ()); + } + assert (index < data_.size ()); // assert the index is within the data return index; } -private: + + private: std::vector shape_; // for GPU-compatible data container, will be replaced by raw pointer std::vector data_; diff --git a/source/source_base/opt_CG.cpp b/source/source_base/opt_CG.cpp index aa289da44db..aab881d47da 100644 --- a/source/source_base/opt_CG.cpp +++ b/source/source_base/opt_CG.cpp @@ -3,11 +3,9 @@ namespace ModuleBase { -Opt_CG::Opt_CG() -{ -} +Opt_CG::Opt_CG () {} -Opt_CG::~Opt_CG() +Opt_CG::~Opt_CG () { delete[] this->pb_; delete[] this->pdirect_old_; @@ -19,13 +17,18 @@ Opt_CG::~Opt_CG() * * @param pinp_b b in the linear equation Ax = b */ -void Opt_CG::init_b(double* pinp_b) +void + Opt_CG::init_b (double* pinp_b) { if (this->pb_ != nullptr) - delete[] this->pb_; + { + delete[] this->pb_; + } this->pb_ = new double[this->nx_]; for (int i = 0; i < this->nx_; ++i) - this->pb_[i] = pinp_b[i]; + { + this->pb_[i] = pinp_b[i]; + } } /** @@ -33,18 +36,20 @@ void Opt_CG::init_b(double* pinp_b) * * @param nx length of the solution array x */ -void Opt_CG::allocate(int nx) +void + Opt_CG::allocate (int nx) { this->nx_ = nx; delete[] this->pdirect_old_; delete[] this->pgradient_old_; this->pdirect_old_ = new double[this->nx_]; this->pgradient_old_ = new double[this->nx_]; - ModuleBase::GlobalFunc::ZEROS(this->pdirect_old_, this->nx_); - ModuleBase::GlobalFunc::ZEROS(this->pgradient_old_, this->nx_); + ModuleBase::GlobalFunc::ZEROS (this->pdirect_old_, this->nx_); + ModuleBase::GlobalFunc::ZEROS (this->pgradient_old_, this->nx_); } -void Opt_CG::set_para(double dV) +void + Opt_CG::set_para (double dV) { this->dV_ = dV; } @@ -56,22 +61,25 @@ void Opt_CG::set_para(double dV) * @param nx_new length of new x, default 0 means the length doesn't change * @param pinp_b new b in Ax = b, default nullptr means we are dealing with general case */ -void Opt_CG::refresh(int nx_new, double* pinp_b) +void + Opt_CG::refresh (int nx_new, double* pinp_b) { this->iter_ = 0; this->alpha_ = 0.; this->beta_ = 0.; if (nx_new != 0) - { - this->allocate(nx_new); - } + { + this->allocate (nx_new); + } else - { - ModuleBase::GlobalFunc::ZEROS(this->pdirect_old_, this->nx_); - ModuleBase::GlobalFunc::ZEROS(this->pgradient_old_, this->nx_); - } + { + ModuleBase::GlobalFunc::ZEROS (this->pdirect_old_, this->nx_); + ModuleBase::GlobalFunc::ZEROS (this->pgradient_old_, this->nx_); + } if (pinp_b != nullptr) - this->init_b(pinp_b); + { + this->init_b (pinp_b); + } } /** @@ -82,42 +90,43 @@ void Opt_CG::refresh(int nx_new, double* pinp_b) * @param [in, out] rdirect the next optimization direction * */ -void Opt_CG::next_direct(double* pgradient, int label, double* rdirect) +void + Opt_CG::next_direct (double* pgradient, int label, double* rdirect) { if (label == 0) // standard CG to solve Ap=x - { - this->stantard_CGdirect(pgradient, rdirect); - } - else if (label == 1 or label == 2) // FR formula or HZ form - { - if (this->iter_ == 0) // if iter == 0, d = -g { - for (int i = 0; i < this->nx_; ++i) - { - rdirect[i] = -pgradient[i]; - this->pgradient_old_[i] = pgradient[i]; - this->pdirect_old_[i] = rdirect[i]; - } + this->stantard_CGdirect (pgradient, rdirect); } - else // d = -g + beta * d + else if (label == 1 or label == 2) // FR formula or HZ form { - if (label == 1) - { - this->PR_beta(pgradient); - } - else if (label == 2) - { - this->HZ_beta(pgradient); - } - for (int i = 0; i < this->nx_; ++i) - { - rdirect[i] = -pgradient[i] + this->beta_ * this->pdirect_old_[i]; - this->pgradient_old_[i] = pgradient[i]; - this->pdirect_old_[i] = rdirect[i]; - } + if (this->iter_ == 0) // if iter == 0, d = -g + { + for (int i = 0; i < this->nx_; ++i) + { + rdirect[i] = -pgradient[i]; + this->pgradient_old_[i] = pgradient[i]; + this->pdirect_old_[i] = rdirect[i]; + } + } + else // d = -g + beta * d + { + if (label == 1) + { + this->PR_beta (pgradient); + } + else if (label == 2) + { + this->HZ_beta (pgradient); + } + for (int i = 0; i < this->nx_; ++i) + { + rdirect[i] = -pgradient[i] + this->beta_ * this->pdirect_old_[i]; + this->pgradient_old_[i] = pgradient[i]; + this->pdirect_old_[i] = rdirect[i]; + } + } + this->iter_++; } - this->iter_++; - } } /** @@ -128,28 +137,29 @@ void Opt_CG::next_direct(double* pgradient, int label, double* rdirect) * @param ifPD 0 if positive definite, -1, -2 when not * @return the step length alpha */ -double Opt_CG::step_length(double* pAd, double* pdirect, int& ifPD) +double + Opt_CG::step_length (double* pAd, double* pdirect, int& ifPD) { - double dAd = this->inner_product(pdirect, pAd, this->nx_); - Parallel_Reduce::reduce_all(dAd); + double dAd = this->inner_product (pdirect, pAd, this->nx_); + Parallel_Reduce::reduce_all (dAd); ifPD = 0; // check for positive-definiteness, very important for convergence if (dAd == 0) - { - this->alpha_ = 0; - return 0; - } - else if (dAd < 0) - { - if (this->iter_ == 1) { - ifPD = -1; + this->alpha_ = 0; + return 0; } - else + else if (dAd < 0) { - ifPD = -2; + if (this->iter_ == 1) + { + ifPD = -1; + } + else + { + ifPD = -2; + } } - } this->alpha_ = this->gg_ / dAd; return this->alpha_; } @@ -160,35 +170,36 @@ double Opt_CG::step_length(double* pAd, double* pdirect, int& ifPD) * @param [in] pAd Ad for Ax=b * @param [out] rdirect the next direction */ -void Opt_CG::stantard_CGdirect(double* pAd, double* rdirect) +void + Opt_CG::stantard_CGdirect (double* pAd, double* rdirect) { if (this->iter_ == 0) - { - for (int i = 0; i < this->nx_; ++i) { - this->pgradient_old_[i] = -this->pb_[i]; - rdirect[i] = this->pb_[i]; - this->pdirect_old_[i] = this->pb_[i]; + for (int i = 0; i < this->nx_; ++i) + { + this->pgradient_old_[i] = -this->pb_[i]; + rdirect[i] = this->pb_[i]; + this->pdirect_old_[i] = this->pb_[i]; + } } - } else - { - std::vector temp_gradient(this->nx_); - for (int i = 0; i < this->nx_; ++i) { - temp_gradient[i] = this->pgradient_old_[i] + this->alpha_ * pAd[i]; - } - this->beta_ = this->inner_product(temp_gradient.data(), temp_gradient.data(), this->nx_) / this->gg_; - Parallel_Reduce::reduce_all(this->beta_); - for (int i = 0; i < this->nx_; ++i) - { - this->pgradient_old_[i] = temp_gradient[i]; - rdirect[i] = -this->pgradient_old_[i] + this->beta_ * this->pdirect_old_[i]; - this->pdirect_old_[i] = rdirect[i]; + std::vector temp_gradient (this->nx_); + for (int i = 0; i < this->nx_; ++i) + { + temp_gradient[i] = this->pgradient_old_[i] + this->alpha_ * pAd[i]; + } + this->beta_ = this->inner_product (temp_gradient.data (), temp_gradient.data (), this->nx_) / this->gg_; + Parallel_Reduce::reduce_all (this->beta_); + for (int i = 0; i < this->nx_; ++i) + { + this->pgradient_old_[i] = temp_gradient[i]; + rdirect[i] = -this->pgradient_old_[i] + this->beta_ * this->pdirect_old_[i]; + this->pdirect_old_[i] = rdirect[i]; + } } - } - this->gg_ = this->inner_product(this->pgradient_old_, this->pgradient_old_, this->nx_); - Parallel_Reduce::reduce_all(this->gg_); + this->gg_ = this->inner_product (this->pgradient_old_, this->pgradient_old_, this->nx_); + Parallel_Reduce::reduce_all (this->gg_); this->iter_++; } @@ -199,17 +210,18 @@ void Opt_CG::stantard_CGdirect(double* pAd, double* rdirect) * * @param pgradient df(x)/dx */ -void Opt_CG::PR_beta(double* pgradient) +void + Opt_CG::PR_beta (double* pgradient) { double temp_beta = 0.; - temp_beta = this->inner_product(pgradient, pgradient, this->nx_); - temp_beta -= this->inner_product(pgradient, this->pgradient_old_, this->nx_); - Parallel_Reduce::reduce_all(temp_beta); - double gg_old = this->inner_product(this->pgradient_old_, this->pgradient_old_, this->nx_); - Parallel_Reduce::reduce_all(gg_old); + temp_beta = this->inner_product (pgradient, pgradient, this->nx_); + temp_beta -= this->inner_product (pgradient, this->pgradient_old_, this->nx_); + Parallel_Reduce::reduce_all (temp_beta); + double gg_old = this->inner_product (this->pgradient_old_, this->pgradient_old_, this->nx_); + Parallel_Reduce::reduce_all (gg_old); // temp_beta /= this->inner_product(this->pgradient_old_, this->pgradient_old_, this->nx_); temp_beta /= gg_old; - this->beta_ = std::max(0., temp_beta); + this->beta_ = std::max (0., temp_beta); } /** @@ -219,29 +231,32 @@ void Opt_CG::PR_beta(double* pgradient) * * @param pgradient df(x)/dx */ -void Opt_CG::HZ_beta(double* pgradient) +void + Opt_CG::HZ_beta (double* pgradient) { double* y = new double[this->nx_]; for (int i = 0; i < this->nx_; ++i) - y[i] = pgradient[i] - this->pgradient_old_[i]; + { + y[i] = pgradient[i] - this->pgradient_old_[i]; + } - double py = this->inner_product(this->pdirect_old_, y, this->nx_); - Parallel_Reduce::reduce_all(py); - double yy = this->inner_product(y, y, this->nx_); - Parallel_Reduce::reduce_all(yy); - double pg = this->inner_product(this->pdirect_old_, pgradient, this->nx_); - Parallel_Reduce::reduce_all(pg); - double yg = this->inner_product(y, pgradient, this->nx_); - Parallel_Reduce::reduce_all(yg); + double py = this->inner_product (this->pdirect_old_, y, this->nx_); + Parallel_Reduce::reduce_all (py); + double yy = this->inner_product (y, y, this->nx_); + Parallel_Reduce::reduce_all (yy); + double pg = this->inner_product (this->pdirect_old_, pgradient, this->nx_); + Parallel_Reduce::reduce_all (pg); + double yg = this->inner_product (y, pgradient, this->nx_); + Parallel_Reduce::reduce_all (yg); double temp_beta = (yg - 2 * pg * yy / py) / py; - double pp = this->inner_product(this->pdirect_old_, this->pdirect_old_, this->nx_); - Parallel_Reduce::reduce_all(pp); - double gg = this->inner_product(this->pgradient_old_, this->pgradient_old_, this->nx_); - Parallel_Reduce::reduce_all(gg); - double temp_eta = -1 / (sqrt(pp) * std::min(this->eta_, sqrt(gg))); + double pp = this->inner_product (this->pdirect_old_, this->pdirect_old_, this->nx_); + Parallel_Reduce::reduce_all (pp); + double gg = this->inner_product (this->pgradient_old_, this->pgradient_old_, this->nx_); + Parallel_Reduce::reduce_all (gg); + double temp_eta = -1 / (sqrt (pp) * std::min (this->eta_, sqrt (gg))); - this->beta_ = std::max(temp_beta, temp_eta); + this->beta_ = std::max (temp_beta, temp_eta); delete[] y; } diff --git a/source/source_base/opt_CG.h b/source/source_base/opt_CG.h index ee1218168e3..388f60fd1c4 100644 --- a/source/source_base/opt_CG.h +++ b/source/source_base/opt_CG.h @@ -1,7 +1,7 @@ #ifndef OPT_CG_H #define OPT_CG_H -#include +#include #include @@ -24,32 +24,34 @@ namespace ModuleBase class Opt_CG { public: - Opt_CG(); - ~Opt_CG(); + Opt_CG (); + ~Opt_CG (); - void init_b(double* pinp_b // b in the linear equation Ax = b + void init_b (double* pinp_b // b in the linear equation Ax = b ); - void allocate(int nx // length of the solution array x + void allocate (int nx // length of the solution array x ); - void set_para(double dV); - void refresh(int nx_new = 0, // length of new x, default 0 means the length doesn't change - double* pinp_b = nullptr // new b in Ax = b, default nullptr means we are dealing with general case + void set_para (double dV); + void refresh (int nx_new = 0, // length of new x, default 0 means the length doesn't change + double* pinp_b = nullptr // new b in Ax = b, default nullptr means we are dealing with general case ); - void next_direct(double* pgradient, // Ad for linear equaiont Ax=b, and gradient for general case - int label, // 0 for solve Ax=b, 1 for PR form, 2 for HZ form - double* rdirect // next direct + void next_direct (double* pgradient, // Ad for linear equaiont Ax=b, and gradient for general case + int label, // 0 for solve Ax=b, 1 for PR form, 2 for HZ form + double* rdirect // next direct ); - double step_length(double* pAd, // Ad for Ax=b - double* pdirect, // direct - int& ifPD // if postive definit + double step_length (double* pAd, // Ad for Ax=b + double* pdirect, // direct + int& ifPD // if postive definit ); - double get_residual() const + double + get_residual () const { - return sqrt(this->gg_); + return sqrt (this->gg_); }; - int get_iter() const + int + get_iter () const { return this->iter_; } @@ -73,16 +75,17 @@ class Opt_CG double alpha_ = 0.; // step length in standard CG double* pb_ = nullptr; // b in Ax=b, only for standard CG - void stantard_CGdirect(double* pAd, // Ad for Ax=b - double* rdirect // next direct + void stantard_CGdirect (double* pAd, // Ad for Ax=b + double* rdirect // next direct ); - void PR_beta(double* pgradient // df(x)/dx + void PR_beta (double* pgradient // df(x)/dx ); - void HZ_beta(double* pgradient // df(x)/dx + void HZ_beta (double* pgradient // df(x)/dx ); - double inner_product(double* pa, double* pb, int length) + double + inner_product (double* pa, double* pb, int length) { - double innerproduct = BlasConnector::dot(length, pa, 1, pb, 1); + double innerproduct = BlasConnector::dot (length, pa, 1, pb, 1); innerproduct *= this->dV_; return innerproduct; } diff --git a/source/source_base/opt_DCsrch.cpp b/source/source_base/opt_DCsrch.cpp index 924e0dcc229..f9247d27eb1 100644 --- a/source/source_base/opt_DCsrch.cpp +++ b/source/source_base/opt_DCsrch.cpp @@ -1,7 +1,7 @@ #include "opt_DCsrch.h" -#include -#include +#include +#include // This file is translated from fortran codes dcstep.f of scipy. // The structure and all annotation of the original file have been retained. @@ -10,17 +10,18 @@ namespace ModuleBase { -int dcsrch(double& stp, - double& f, - double& g, - double& ftol, - double& gtol, - double& xtol, - char* task, - double& stpmin, - double& stpmax, - int* isave, - double* dsave) +int + dcsrch (double& stp, + double& f, + double& g, + double& ftol, + double& gtol, + double& xtol, + char* task, + double& stpmin, + double& stpmax, + int* isave, + double* dsave) { // c ********** // c @@ -171,235 +172,239 @@ int dcsrch(double& stp, double ginit = 0.0, gtest = 0.0, gm = 0.0, gx = 0.0, gxm = 0.0, gy = 0.0, gym = 0.0; double stx = 0.0, sty = 0.0, stmin = 0.0, stmax = 0.0, width = 0.0, width1 = 0.0; - extern /* Subroutine */ void dcstep(double&, - double&, - double&, - double&, - double&, - double&, - double&, - double&, - double&, - bool&, - double&, - double&); + extern /* Subroutine */ void dcstep (double&, + double&, + double&, + double&, + double&, + double&, + double&, + double&, + double&, + bool&, + double&, + double&); // c Initialization block. - if (strncmp(task, "START", 5) == 0) - { - // c Check the input arguments for errors. - if (stp < stpmin) + if (strncmp (task, "START", 5) == 0) { - strcpy(task, "ERROR: STP .LT. STPMIN"); - } - if (stp > stpmax) - { - strcpy(task, "ERROR: STP .GT. STPMAX"); - } - if (g >= 0.) - { - strcpy(task, "ERROR: INITIAL G .GE. ZERO"); - } - if (ftol < 0.) - { - strcpy(task, "ERROR: FTOL .LT. ZERO"); - } - if (gtol < 0.) - { - strcpy(task, "ERROR: GTOL .LT. ZERO"); - } - if (xtol < 0.) - { - strcpy(task, "ERROR: XTOL .LT. ZERO"); - } - if (stpmin < 0.) - { - strcpy(task, "ERROR: STPMIN .LT. ZERO"); - } - if (stpmax < stpmin) - { - strcpy(task, "ERROR: STPMAX .LT. STPMIN"); - } - - // c Exit if there are errors on input. - - if (strncmp(task, "ERROR", 5) == 0) - { - return 0; + // c Check the input arguments for errors. + if (stp < stpmin) + { + strcpy (task, "ERROR: STP .LT. STPMIN"); + } + if (stp > stpmax) + { + strcpy (task, "ERROR: STP .GT. STPMAX"); + } + if (g >= 0.) + { + strcpy (task, "ERROR: INITIAL G .GE. ZERO"); + } + if (ftol < 0.) + { + strcpy (task, "ERROR: FTOL .LT. ZERO"); + } + if (gtol < 0.) + { + strcpy (task, "ERROR: GTOL .LT. ZERO"); + } + if (xtol < 0.) + { + strcpy (task, "ERROR: XTOL .LT. ZERO"); + } + if (stpmin < 0.) + { + strcpy (task, "ERROR: STPMIN .LT. ZERO"); + } + if (stpmax < stpmin) + { + strcpy (task, "ERROR: STPMAX .LT. STPMIN"); + } + + // c Exit if there are errors on input. + + if (strncmp (task, "ERROR", 5) == 0) + { + return 0; + } + // c Initialize local variables. + brackt = false; + stage = 1; + finit = f; + ginit = g; + gtest = ftol * ginit; + width = stpmax - stpmin; + width1 = width / p5; + + // c The variables stx, fx, gx contain the values of the step, + // c function, and derivative at the best step. + // c The variables sty, fy, gy contain the value of the step, + // c function, and derivative at sty. + // c The variables stp, f, g contain the values of the step, + // c function, and derivative at stp. + + stx = zero; + fx = finit; + gx = ginit; + sty = zero; + fy = finit; + gy = ginit; + stmin = zero; + stmax = stp + stp * xtrapu; + strcpy (task, "FG"); + goto L10; } - // c Initialize local variables. - brackt = false; - stage = 1; - finit = f; - ginit = g; - gtest = ftol * ginit; - width = stpmax - stpmin; - width1 = width / p5; - - // c The variables stx, fx, gx contain the values of the step, - // c function, and derivative at the best step. - // c The variables sty, fy, gy contain the value of the step, - // c function, and derivative at sty. - // c The variables stp, f, g contain the values of the step, - // c function, and derivative at stp. - - stx = zero; - fx = finit; - gx = ginit; - sty = zero; - fy = finit; - gy = ginit; - stmin = zero; - stmax = stp + stp * xtrapu; - strcpy(task, "FG"); - goto L10; - } else - { + { - // c Restore local variables. + // c Restore local variables. - if (isave[1] == 1) - { - brackt = true; - } - else - { - brackt = false; + if (isave[1] == 1) + { + brackt = true; + } + else + { + brackt = false; + } + stage = isave[2]; + ginit = dsave[1]; + gtest = dsave[2]; + gx = dsave[3]; + gy = dsave[4]; + finit = dsave[5]; + fx = dsave[6]; + fy = dsave[7]; + stx = dsave[8]; + sty = dsave[9]; + stmin = dsave[10]; + stmax = dsave[11]; + width = dsave[12]; + width1 = dsave[13]; } - stage = isave[2]; - ginit = dsave[1]; - gtest = dsave[2]; - gx = dsave[3]; - gy = dsave[4]; - finit = dsave[5]; - fx = dsave[6]; - fy = dsave[7]; - stx = dsave[8]; - sty = dsave[9]; - stmin = dsave[10]; - stmax = dsave[11]; - width = dsave[12]; - width1 = dsave[13]; - } // c If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the // c algorithm enters the second stage. ftest = finit + stp * gtest; if (stage == 1 && f <= ftest && g >= zero) - stage = 2; + { + stage = 2; + } // c Test for warnings. if (brackt && (stp <= stmin || stp >= stmax)) - { - strcpy(task, "WARNING: ROUNDING ERRORS PREVENT PROGRESS"); - } + { + strcpy (task, "WARNING: ROUNDING ERRORS PREVENT PROGRESS"); + } if (brackt && stmax - stmin <= xtol * stmax) - { - strcpy(task, "WARNING: XTOL TEST SATISFIED"); - } + { + strcpy (task, "WARNING: XTOL TEST SATISFIED"); + } if (stp == stpmax && f <= ftest && g <= gtest) - { - strcpy(task, "WARNING: STP = STPMAX"); - } + { + strcpy (task, "WARNING: STP = STPMAX"); + } if (stp == stpmin && (f > ftest || g >= gtest)) - { - strcpy(task, "WARNING: STP = STPMIN"); - } + { + strcpy (task, "WARNING: STP = STPMIN"); + } // c Test for convergence. - if (f <= ftest && std::abs(g) <= gtol * (-ginit)) - { - strcpy(task, "CONVERGENCE"); - // strcpy(task, "CONVERGENCE", 11); - } + if (f <= ftest && std::abs (g) <= gtol * (-ginit)) + { + strcpy (task, "CONVERGENCE"); + // strcpy(task, "CONVERGENCE", 11); + } // c Test for termination. - if (strncmp(task, "WARN", 4) == 0 || strncmp(task, "CONV", 4) == 0) - { - goto L10; - } + if (strncmp (task, "WARN", 4) == 0 || strncmp (task, "CONV", 4) == 0) + { + goto L10; + } // c A modified function is used to predict the step during the // c first stage if a lower function value has been obtained but // c the decrease is not sufficient. if (stage == 1 && f <= fx && f > ftest) - { + { - // c Define the modified function and derivative values. + // c Define the modified function and derivative values. - fm = f - stp * gtest; - fxm = fx - stx * gtest; - fym = fy - sty * gtest; - gm = g - gtest; - gxm = gx - gtest; - gym = gy - gtest; + fm = f - stp * gtest; + fxm = fx - stx * gtest; + fym = fy - sty * gtest; + gm = g - gtest; + gxm = gx - gtest; + gym = gy - gtest; - // c Call dcstep to update stx, sty, and to compute the new step. + // c Call dcstep to update stx, sty, and to compute the new step. - dcstep(stx, fxm, gxm, sty, fym, gym, stp, fm, gm, brackt, stmin, stmax); + dcstep (stx, fxm, gxm, sty, fym, gym, stp, fm, gm, brackt, stmin, stmax); - // c Reset the function and derivative values for f. + // c Reset the function and derivative values for f. - fx = fxm + stx * gtest; - fy = fym + sty * gtest; - gx = gxm + gtest; - gy = gym + gtest; - } + fx = fxm + stx * gtest; + fy = fym + sty * gtest; + gx = gxm + gtest; + gy = gym + gtest; + } else - { + { - // c Call dcstep to update stx, sty, and to compute the new step. + // c Call dcstep to update stx, sty, and to compute the new step. - dcstep(stx, fx, gx, sty, fy, gy, stp, f, g, brackt, stmin, stmax); - } + dcstep (stx, fx, gx, sty, fy, gy, stp, f, g, brackt, stmin, stmax); + } // c Decide if a bisection step is needed. if (brackt) - { - if (std::abs(sty - stx) >= p66 * width1) - stp = stx + p5 * (sty - stx); - width1 = width; - width = std::abs(sty - stx); - } + { + if (std::abs (sty - stx) >= p66 * width1) + { + stp = stx + p5 * (sty - stx); + } + width1 = width; + width = std::abs (sty - stx); + } // c Set the minimum and maximum steps allowed for stp. if (brackt) - { - stmin = std::min(stx, sty); - stmax = std::max(stx, sty); - } + { + stmin = std::min (stx, sty); + stmax = std::max (stx, sty); + } else - { - stmin = stp + xtrapl * (stp - stx); - stmax = stp + xtrapu * (stp - stx); - } + { + stmin = stp + xtrapl * (stp - stx); + stmax = stp + xtrapu * (stp - stx); + } // c Force the step to be within the bounds stpmax and stpmin. - stp = std::max(stp, stpmin); - stp = std::min(stp, stpmax); + stp = std::max (stp, stpmin); + stp = std::min (stp, stpmax); // c If further progress is not possible, let stp be the best // c point obtained during the search. if ((brackt && (stp <= stmin || stp >= stmax)) || (brackt && stmax - stmin <= xtol * stmax)) - { - stp = stx; - } + { + stp = stx; + } // c Obtain another function and derivative. - strcpy(task, "FG"); + strcpy (task, "FG"); L10: // c Save local variables. if (brackt) - { - isave[1] = 1; - } + { + isave[1] = 1; + } else - { - isave[1] = 0; - } + { + isave[1] = 0; + } isave[2] = stage; dsave[1] = ginit; dsave[2] = gtest; @@ -417,18 +422,19 @@ int dcsrch(double& stp, return 0; } -/* Subroutine */ void dcstep(double& stx, - double& fx, - double& dx, - double& sty, - double& fy, - double& dy, - double& stp, - double& fp, - double& dp, - bool& brackt, - double& stpmin, - double& stpmax) +/* Subroutine */ void + dcstep (double& stx, + double& fx, + double& dx, + double& sty, + double& fy, + double& dy, + double& stp, + double& fp, + double& dp, + bool& brackt, + double& stpmin, + double& stpmax) { // c ********** // c @@ -529,7 +535,7 @@ int dcsrch(double& stp, double gamma, p, q, r, s, sgnd, stpc, stpf, stpq, theta; - sgnd = dp * (dx / std::abs(dx)); + sgnd = dp * (dx / std::abs (dx)); // c First case: A higher function value. The minimum is bracketed. // c If the cubic step is closer to stx than the quadratic step, the @@ -537,28 +543,31 @@ int dcsrch(double& stp, // c quadratic steps is taken. if (fp > fx) - { - theta = three * (fx - fp) / (stp - stx) + dx + dp; - double temps = std::max(std::abs(theta), std::abs(dx)); // get max(std::abs(theta),std::abs(dx),std::abs(dp)) - s = std::max(temps, std::abs(dp)); - gamma = s * sqrt(pow(theta / s, 2) - (dx / s) * (dp / s)); - if (stp < stx) - gamma = -gamma; - p = (gamma - dx) + theta; - q = ((gamma - dx) + gamma) + dp; - r = p / q; - stpc = stx + r * (stp - stx); - stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / two) * (stp - stx); - if (std::abs(stpc - stx) < std::abs(stpq - stx)) - { - stpf = stpc; - } - else { - stpf = stpc + (stpq - stpc) / two; + theta = three * (fx - fp) / (stp - stx) + dx + dp; + double temps + = std::max (std::abs (theta), std::abs (dx)); // get max(std::abs(theta),std::abs(dx),std::abs(dp)) + s = std::max (temps, std::abs (dp)); + gamma = s * sqrt (pow (theta / s, 2) - (dx / s) * (dp / s)); + if (stp < stx) + { + gamma = -gamma; + } + p = (gamma - dx) + theta; + q = ((gamma - dx) + gamma) + dp; + r = p / q; + stpc = stx + r * (stp - stx); + stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / two) * (stp - stx); + if (std::abs (stpc - stx) < std::abs (stpq - stx)) + { + stpf = stpc; + } + else + { + stpf = stpc + (stpq - stpc) / two; + } + brackt = true; } - brackt = true; - } // c Second case: A lower function value and derivatives of opposite // c sign. The minimum is bracketed. If the cubic step is farther from @@ -566,167 +575,176 @@ int dcsrch(double& stp, // c secant step is taken. else if (sgnd < zero) - { - theta = three * (fx - fp) / (stp - stx) + dx + dp; - double temps = std::max(std::abs(theta), std::abs(dx)); // get max(std::abs(theta),std::abs(dx),std::abs(dp)) - s = std::max(temps, std::abs(dp)); - gamma = s * sqrt(pow(theta / s, 2) - (dx / s) * (dp / s)); - if (stp > stx) - gamma = -gamma; - p = (gamma - dp) + theta; - q = ((gamma - dp) + gamma) + dx; - r = p / q; - stpc = stp + r * (stx - stp); - stpq = stp + (dp / (dp - dx)) * (stx - stp); - if (std::abs(stpc - stp) > std::abs(stpq - stp)) - { - stpf = stpc; - } - else { - stpf = stpq; + theta = three * (fx - fp) / (stp - stx) + dx + dp; + double temps + = std::max (std::abs (theta), std::abs (dx)); // get max(std::abs(theta),std::abs(dx),std::abs(dp)) + s = std::max (temps, std::abs (dp)); + gamma = s * sqrt (pow (theta / s, 2) - (dx / s) * (dp / s)); + if (stp > stx) + { + gamma = -gamma; + } + p = (gamma - dp) + theta; + q = ((gamma - dp) + gamma) + dx; + r = p / q; + stpc = stp + r * (stx - stp); + stpq = stp + (dp / (dp - dx)) * (stx - stp); + if (std::abs (stpc - stp) > std::abs (stpq - stp)) + { + stpf = stpc; + } + else + { + stpf = stpq; + } + brackt = true; } - brackt = true; - } // c Third case: A lower function value, derivatives of the same sign, // c and the magnitude of the derivative decreases. - else if (std::abs(dp) < std::abs(dx)) - { - // c The cubic step is computed only if the cubic tends to infinity - // c in the direction of the step or if the minimum of the cubic - // c is beyond stp. Otherwise the cubic step is defined to be the - // c secant step. - theta = three * (fx - fp) / (stp - stx) + dx + dp; - double temps = std::max(std::abs(theta), std::abs(dx)); // get max(std::abs(theta),std::abs(dx),std::abs(dp)) - s = std::max(temps, std::abs(dp)); - // c The case gamma = 0 only arises if the cubic does not tend - // c to infinity in the direction of the step. - gamma = s * sqrt(std::max(zero, pow(theta / s, 2) - (dx / s) * (dp / s))); - if (stp > stx) - gamma = -gamma; - p = (gamma - dp) + theta; - q = (gamma + (dx - dp)) + gamma; - r = p / q; - if (r < zero && gamma != zero) + else if (std::abs (dp) < std::abs (dx)) { - stpc = stp + r * (stx - stp); - } - else if (stp > stx) - { - stpc = stpmax; - } - else - { - stpc = stpmin; - } - stpq = stp + (dp / (dp - dx)) * (stx - stp); - - if (brackt) - { - // c A minimizer has been bracketed. If the cubic step is - // c closer to stp than the secant step, the cubic step is - // c taken, otherwise the secant step is taken. - if (std::abs(stpc - stp) < std::abs(stpq - stp)) - { - stpf = stpc; - } - else - { - stpf = stpq; - } + // c The cubic step is computed only if the cubic tends to infinity + // c in the direction of the step or if the minimum of the cubic + // c is beyond stp. Otherwise the cubic step is defined to be the + // c secant step. + theta = three * (fx - fp) / (stp - stx) + dx + dp; + double temps + = std::max (std::abs (theta), std::abs (dx)); // get max(std::abs(theta),std::abs(dx),std::abs(dp)) + s = std::max (temps, std::abs (dp)); + // c The case gamma = 0 only arises if the cubic does not tend + // c to infinity in the direction of the step. + gamma = s * sqrt (std::max (zero, pow (theta / s, 2) - (dx / s) * (dp / s))); if (stp > stx) - { - stpf = std::min(stp + p66 * (sty - stp), stpf); - } + { + gamma = -gamma; + } + p = (gamma - dp) + theta; + q = (gamma + (dx - dp)) + gamma; + r = p / q; + if (r < zero && gamma != zero) + { + stpc = stp + r * (stx - stp); + } + else if (stp > stx) + { + stpc = stpmax; + } else - { - stpf = std::max(stp + p66 * (sty - stp), stpf); - } - } - else - { - // c A minimizer has not been bracketed. If the cubic step is - // c farther from stp than the secant step, the cubic step is - // c taken, otherwise the secant step is taken. - if (std::abs(stpc - stp) > std::abs(stpq - stp)) - { - stpf = stpc; - } + { + stpc = stpmin; + } + stpq = stp + (dp / (dp - dx)) * (stx - stp); + + if (brackt) + { + // c A minimizer has been bracketed. If the cubic step is + // c closer to stp than the secant step, the cubic step is + // c taken, otherwise the secant step is taken. + if (std::abs (stpc - stp) < std::abs (stpq - stp)) + { + stpf = stpc; + } + else + { + stpf = stpq; + } + if (stp > stx) + { + stpf = std::min (stp + p66 * (sty - stp), stpf); + } + else + { + stpf = std::max (stp + p66 * (sty - stp), stpf); + } + } else - { - stpf = stpq; - } - stpf = std::min(stpmax, stpf); - stpf = std::max(stpmin, stpf); + { + // c A minimizer has not been bracketed. If the cubic step is + // c farther from stp than the secant step, the cubic step is + // c taken, otherwise the secant step is taken. + if (std::abs (stpc - stp) > std::abs (stpq - stp)) + { + stpf = stpc; + } + else + { + stpf = stpq; + } + stpf = std::min (stpmax, stpf); + stpf = std::max (stpmin, stpf); + } } - } // c Fourth case: A lower function value, derivatives of the same sign, // c and the magnitude of the derivative does not decrease. If the // c minimum is not bracketed, the step is either stpmin or stpmax, // c otherwise the cubic step is taken. else - { - if (brackt) - { - theta = three * (fp - fy) / (sty - stp) + dy + dp; - double temps - = std::max(std::abs(theta), std::abs(dy)); // get max(std::abs(theta),std::abs(dy),std::abs(dp)) - s = std::max(temps, std::abs(dp)); - gamma = s * sqrt(pow(theta / s, 2) - (dy / s) * (dp / s)); - if (stp > sty) - gamma = -gamma; - p = (gamma - dp) + theta; - q = ((gamma - dp) + gamma) + dy; - r = p / q; - stpc = stp + r * (sty - stp); - stpf = stpc; - } - else if (stp > stx) { - stpf = stpmax; - } - else - { - stpf = stpmin; + if (brackt) + { + theta = three * (fp - fy) / (sty - stp) + dy + dp; + double temps = std::max (std::abs (theta), + std::abs (dy)); // get max(std::abs(theta),std::abs(dy),std::abs(dp)) + s = std::max (temps, std::abs (dp)); + gamma = s * sqrt (pow (theta / s, 2) - (dy / s) * (dp / s)); + if (stp > sty) + { + gamma = -gamma; + } + p = (gamma - dp) + theta; + q = ((gamma - dp) + gamma) + dy; + r = p / q; + stpc = stp + r * (sty - stp); + stpf = stpc; + } + else if (stp > stx) + { + stpf = stpmax; + } + else + { + stpf = stpmin; + } } - } // c Update the interval which contains a minimizer. if (fp > fx) - { - sty = stp; - fy = fp; - dy = dp; - } + { + sty = stp; + fy = fp; + dy = dp; + } else - { - if (sgnd < zero) { - sty = stx; - fy = fx; - dy = dx; + if (sgnd < zero) + { + sty = stx; + fy = fx; + dy = dx; + } + stx = stp; + fx = fp; + dx = dp; } - stx = stp; - fx = fp; - dx = dp; - } // c Compute the new step. stp = stpf; } -void Opt_DCsrch::dcSrch(double& f, double& g, double& rstp, char* rtask) +void + Opt_DCsrch::dcSrch (double& f, double& g, double& rstp, char* rtask) { - dcsrch(rstp, - f, - g, - this->ftol_, - this->gtol_, - this->xtol_, - rtask, - this->stpmin_, - this->stpmax_, - this->isave_, - this->dsave_); + dcsrch (rstp, + f, + g, + this->ftol_, + this->gtol_, + this->xtol_, + rtask, + this->stpmin_, + this->stpmax_, + this->isave_, + this->dsave_); } } // namespace ModuleBase diff --git a/source/source_base/opt_DCsrch.h b/source/source_base/opt_DCsrch.h index 6432833350c..53b0e61593a 100644 --- a/source/source_base/opt_DCsrch.h +++ b/source/source_base/opt_DCsrch.h @@ -15,12 +15,12 @@ namespace ModuleBase class Opt_DCsrch { public: - Opt_DCsrch() + Opt_DCsrch () { this->isave_ = new int[3]; this->dsave_ = new double[14]; } - ~Opt_DCsrch() + ~Opt_DCsrch () { delete[] this->isave_; delete[] this->dsave_; @@ -43,7 +43,8 @@ class Opt_DCsrch * @param stpmin nonnegative lower bound for the step. * @param stpmax nonnegative upper bound for the step. */ - void set_paras(double ftol = 1e-4, + void + set_paras (double ftol = 1e-4, double gtol = 2e-1, double xtol = 1e-12, double stpmin = 0., @@ -73,7 +74,7 @@ class Opt_DCsrch * The exit value of stp contains the best point found during the search. * If task(1:5) = 'ERROR' then there is an error in the input arguments. */ - void dcSrch(double& f, double& g, double& rstp, char* rtask); + void dcSrch (double& f, double& g, double& rstp, char* rtask); private: double ftol_ = 1e-4; // nonnegative tolerance for the sufficient decrease condition. diff --git a/source/source_base/opt_TN.hpp b/source/source_base/opt_TN.hpp index aad30b0f600..53b780a3e75 100644 --- a/source/source_base/opt_TN.hpp +++ b/source/source_base/opt_TN.hpp @@ -21,27 +21,29 @@ namespace ModuleBase class Opt_TN { public: - Opt_TN() + Opt_TN () { - this->mach_prec_ = std::numeric_limits::epsilon(); // get machine precise + this->mach_prec_ = std::numeric_limits::epsilon (); // get machine precise } - ~Opt_TN() {}; + ~Opt_TN () {}; /** * @brief Allocate the space for the arrays in cg_. * * @param nx length of the solution array x */ - void allocate(int nx) + void + allocate (int nx) { this->nx_ = nx; - this->cg_.allocate(this->nx_); + this->cg_.allocate (this->nx_); } - void set_para(double dV) + void + set_para (double dV) { this->dV_ = dV; - this->cg_.set_para(this->dV_); + this->cg_.set_para (this->dV_); } /** @@ -50,29 +52,31 @@ class Opt_TN * * @param nx_new length of new x, default 0 means the length doesn't change */ - void refresh(int nx_new = 0) + void + refresh (int nx_new = 0) { this->iter_ = 0; if (nx_new != 0) - { - this->nx_ = nx_new; - } - this->cg_.refresh(nx_new); + { + this->nx_ = nx_new; + } + this->cg_.refresh (nx_new); } template - void next_direct( + void next_direct ( double* px, // current x double* pgradient, // df(x)/dx int& flag, // record which truncated condition was triggered, 0 for cond.1, 1 for cond.2, and 2 for cond.3 double* rdirect, // next optimization direction T* t, // point of class T, which contains the gradient function - void (T::*p_calGradient)( + void (T::*p_calGradient) ( double* ptemp_x, double* rtemp_gradient) // a function point, which calculates the gradient at provided x ); - int get_iter() + int + get_iter () { return this->iter_; } @@ -85,9 +89,10 @@ class Opt_TN int iter_ = 0; // number of the iteration double mach_prec_ = 0.; // machine precision - double inner_product(double* pa, double* pb, int length) + double + inner_product (double* pa, double* pb, int length) { - double innerproduct = BlasConnector::dot(length, pa, 1, pb, 1); + double innerproduct = BlasConnector::dot (length, pa, 1, pb, 1); innerproduct *= this->dV_; return innerproduct; } @@ -100,14 +105,15 @@ class Opt_TN * @param pcg_direction the direction of cg_ * @return epsilon */ - double get_epsilon(double* px, double* pcg_direction) + double + get_epsilon (double* px, double* pcg_direction) { double epsilon = 0.; - double xx = this->inner_product(px, px, this->nx_); - Parallel_Reduce::reduce_all(xx); - double dd = this->inner_product(pcg_direction, pcg_direction, this->nx_); - Parallel_Reduce::reduce_all(dd); - epsilon = 2 * sqrt(this->mach_prec_) * (1 + sqrt(xx)) / sqrt(dd); + double xx = this->inner_product (px, px, this->nx_); + Parallel_Reduce::reduce_all (xx); + double dd = this->inner_product (pcg_direction, pcg_direction, this->nx_); + Parallel_Reduce::reduce_all (dd); + epsilon = 2 * sqrt (this->mach_prec_) * (1 + sqrt (xx)) / sqrt (dd); // epsilon = 2 * sqrt(this->mach_prec_) * (1 + sqrt(this->inner_product(px, px, this->nx_))) // / sqrt(this->inner_product(pcg_direction, pcg_direction, this->nx_)); return epsilon; @@ -126,15 +132,16 @@ class Opt_TN * @param [in] p_calGradient a function pointer, which calculates gradient at provided x. */ template -void Opt_TN::next_direct(double* px, +void + Opt_TN::next_direct (double* px, double* pgradient, int& flag, double* rdirect, T* t, - void (T::*p_calGradient)(double* px, double* rgradient)) + void (T::*p_calGradient) (double* px, double* rgradient)) { // initialize arrays and parameters - ModuleBase::GlobalFunc::ZEROS(rdirect, this->nx_); // very important + ModuleBase::GlobalFunc::ZEROS (rdirect, this->nx_); // very important double* minus_gradient = new double[this->nx_]; // b=-g, which will be used in CG double* temp_x = new double[this->nx_]; // temp_x = x + step * cg_direct, used in interpolation @@ -142,15 +149,15 @@ void Opt_TN::next_direct(double* px, double* cg_direct = new double[this->nx_]; // rdirect += cg_alpha * cg_direct at each step double* temp_Hcgd = new double[this->nx_]; // Hessian * cg_direct for (int i = 0; i < this->nx_; ++i) - { - minus_gradient[i] = -pgradient[i]; - } - ModuleBase::GlobalFunc::ZEROS(cg_direct, this->nx_); - ModuleBase::GlobalFunc::ZEROS(temp_x, this->nx_); - ModuleBase::GlobalFunc::ZEROS(temp_gradient, this->nx_); - ModuleBase::GlobalFunc::ZEROS(temp_Hcgd, this->nx_); + { + minus_gradient[i] = -pgradient[i]; + } + ModuleBase::GlobalFunc::ZEROS (cg_direct, this->nx_); + ModuleBase::GlobalFunc::ZEROS (temp_x, this->nx_); + ModuleBase::GlobalFunc::ZEROS (temp_gradient, this->nx_); + ModuleBase::GlobalFunc::ZEROS (temp_Hcgd, this->nx_); - cg_.refresh(0, minus_gradient); + cg_.refresh (0, minus_gradient); int cg_iter = 0; int cg_ifPD = 0; @@ -161,73 +168,73 @@ void Opt_TN::next_direct(double* px, double curr_residual = 0.; // current residual of CG while (true) - { - cg_.next_direct(temp_Hcgd, 0, cg_direct); - - // get temp_Hcgd with interpolation - // Hcgd = (df(temp_x)/dx - df(x)/x) / epsilon, where temp_x = x + step * cg_direct - epsilon = this->get_epsilon(px, cg_direct); - // epsilon = 1e-9; - for (int i = 0; i < this->nx_; ++i) - { - temp_x[i] = px[i] + epsilon * cg_direct[i]; - } - (t->*p_calGradient)(temp_x, temp_gradient); - for (int i = 0; i < this->nx_; ++i) { - temp_Hcgd[i] = (temp_gradient[i] - pgradient[i]) / epsilon; - } + cg_.next_direct (temp_Hcgd, 0, cg_direct); - // get CG step length and update rdirect - cg_alpha = cg_.step_length(temp_Hcgd, cg_direct, cg_ifPD); - if (cg_ifPD == -1) // Hessian is not positive definite, and cgiter = 1. - { + // get temp_Hcgd with interpolation + // Hcgd = (df(temp_x)/dx - df(x)/x) / epsilon, where temp_x = x + step * cg_direct + epsilon = this->get_epsilon (px, cg_direct); + // epsilon = 1e-9; for (int i = 0; i < this->nx_; ++i) - { - rdirect[i] += cg_alpha * cg_direct[i]; - } - flag = -1; - break; - } - else if (cg_ifPD == -2) // Hessian is not positive definite, and cgiter > 1. - { - flag = -2; - break; - } - - for (int i = 0; i < this->nx_; ++i) - { - rdirect[i] += cg_alpha * cg_direct[i]; - } - - // store residuals used in truncated conditions - last_residual = curr_residual; - curr_residual = cg_.get_residual(); - cg_iter = cg_.get_iter(); - if (cg_iter == 1) - { - init_residual = curr_residual; - } + { + temp_x[i] = px[i] + epsilon * cg_direct[i]; + } + (t->*p_calGradient) (temp_x, temp_gradient); + for (int i = 0; i < this->nx_; ++i) + { + temp_Hcgd[i] = (temp_gradient[i] - pgradient[i]) / epsilon; + } + + // get CG step length and update rdirect + cg_alpha = cg_.step_length (temp_Hcgd, cg_direct, cg_ifPD); + if (cg_ifPD == -1) // Hessian is not positive definite, and cgiter = 1. + { + for (int i = 0; i < this->nx_; ++i) + { + rdirect[i] += cg_alpha * cg_direct[i]; + } + flag = -1; + break; + } + else if (cg_ifPD == -2) // Hessian is not positive definite, and cgiter > 1. + { + flag = -2; + break; + } - // check truncated conditions - // if (curr_residual < 1e-12) - if (curr_residual < 0.1 * init_residual) - { - flag = 0; - // std::cout << "cg_ iter_ = " << cg_iter << "\n"; - break; - } - else if (cg_iter > 50) - { - flag = 1; - break; - } - else if ((fabs(curr_residual - last_residual) / curr_residual) < 0.01 && cg_iter > 9) - { - flag = 2; - break; + for (int i = 0; i < this->nx_; ++i) + { + rdirect[i] += cg_alpha * cg_direct[i]; + } + + // store residuals used in truncated conditions + last_residual = curr_residual; + curr_residual = cg_.get_residual (); + cg_iter = cg_.get_iter (); + if (cg_iter == 1) + { + init_residual = curr_residual; + } + + // check truncated conditions + // if (curr_residual < 1e-12) + if (curr_residual < 0.1 * init_residual) + { + flag = 0; + // std::cout << "cg_ iter_ = " << cg_iter << "\n"; + break; + } + else if (cg_iter > 50) + { + flag = 1; + break; + } + else if ((fabs (curr_residual - last_residual) / curr_residual) < 0.01 && cg_iter > 9) + { + flag = 2; + break; + } } - } this->iter_++; delete[] minus_gradient; delete[] temp_gradient; diff --git a/source/source_base/para_gemm.cpp b/source/source_base/para_gemm.cpp index 3e56aa83ac2..41743fc9578 100644 --- a/source/source_base/para_gemm.cpp +++ b/source/source_base/para_gemm.cpp @@ -6,41 +6,42 @@ namespace ModuleBase { template -PGemmCN::PGemmCN() +PGemmCN::PGemmCN () { } template -PGemmCN::~PGemmCN() +PGemmCN::~PGemmCN () { #ifdef __MPI - delmem_dev_op()(C_local_tmp_); - delmem_dev_op()(A_tmp_device_); - delmem_dev_op()(B_tmp_device_); + delmem_dev_op () (C_local_tmp_); + delmem_dev_op () (A_tmp_device_); + delmem_dev_op () (B_tmp_device_); #endif } template -void PGemmCN::set_dimension( +void + PGemmCN::set_dimension ( #ifdef __MPI - MPI_Comm comm_col, - MPI_Comm comm_row, + MPI_Comm comm_col, + MPI_Comm comm_row, #endif - const int ncolA_in, - const int LDA_in, - const int ncolB_in, - const int LDB_in, - const int nrow_in, - const int LDC_in, - const int mode) + const int ncolA_in, + const int LDA_in, + const int ncolB_in, + const int LDB_in, + const int nrow_in, + const int LDC_in, + const int mode) { #ifdef __MPI - MPI_Comm_rank(comm_col, &col_rank); - MPI_Comm_size(comm_col, &col_nproc); + MPI_Comm_rank (comm_col, &col_rank); + MPI_Comm_size (comm_col, &col_nproc); if (comm_row != MPI_COMM_NULL) - { - MPI_Comm_rank(comm_row, &row_rank); - MPI_Comm_size(comm_row, &row_nproc); - } + { + MPI_Comm_rank (comm_row, &row_rank); + MPI_Comm_size (comm_row, &row_nproc); + } col_world = comm_col; row_world = comm_row; #endif @@ -52,123 +53,125 @@ void PGemmCN::set_dimension( this->nrow = nrow_in; #ifdef __MPI switch (mode) - { - case 1: - gatherC = true; - divideCrow = false; - break; - case 2: - gatherC = false; - divideCrow = false; - break; - case 3: - gatherC = false; - divideCrow = true; - break; - default: - break; - } - - if(col_nproc > 1) - { - requests.resize(col_nproc); - if (this->divideCrow) { - colB_loc.resize(col_nproc); - MPI_Allgather(&ncolB, 1, MPI_INT, colB_loc.data(), 1, MPI_INT, col_world); - int sum = 0; - for (int ip = 0; ip < col_nproc; ip++) - { - max_colB = std::max(max_colB, colB_loc[ip]); - sum += colB_loc[ip]; - } - size_C_local = sum * LDC; - - // allocate temperory memory - if (std::is_same::value) - { - resmem_dev_op()(B_tmp_device_, max_colB * LDB); - } - B_tmp_.resize(max_colB * LDB); + case 1: + gatherC = true; + divideCrow = false; + break; + case 2: + gatherC = false; + divideCrow = false; + break; + case 3: + gatherC = false; + divideCrow = true; + break; + default: + break; } - else + + if (col_nproc > 1) { - colA_loc.resize(col_nproc); - MPI_Allgather(&ncolA, 1, MPI_INT, colA_loc.data(), 1, MPI_INT, col_world); - for (int ip = 0; ip < col_nproc; ip++) - { - max_colA = std::max(max_colA, colA_loc[ip]); - } - size_C_local = ncolB * LDC; + requests.resize (col_nproc); + if (this->divideCrow) + { + colB_loc.resize (col_nproc); + MPI_Allgather (&ncolB, 1, MPI_INT, colB_loc.data (), 1, MPI_INT, col_world); + int sum = 0; + for (int ip = 0; ip < col_nproc; ip++) + { + max_colB = std::max (max_colB, colB_loc[ip]); + sum += colB_loc[ip]; + } + size_C_local = sum * LDC; - // allocate temperory memory - if (std::is_same::value) - { - resmem_dev_op()(A_tmp_device_, max_colA * LDA); + // allocate temperory memory + if (std::is_same::value) + { + resmem_dev_op () (B_tmp_device_, max_colB * LDB); + } + B_tmp_.resize (max_colB * LDB); + } + else + { + colA_loc.resize (col_nproc); + MPI_Allgather (&ncolA, 1, MPI_INT, colA_loc.data (), 1, MPI_INT, col_world); + for (int ip = 0; ip < col_nproc; ip++) + { + max_colA = std::max (max_colA, colA_loc[ip]); + } + size_C_local = ncolB * LDC; + + // allocate temperory memory + if (std::is_same::value) + { + resmem_dev_op () (A_tmp_device_, max_colA * LDA); #ifndef __CUDA_MPI - isend_tmp_.resize(max_colA * LDA); + isend_tmp_.resize (max_colA * LDA); #endif - } - A_tmp_.resize(max_colA * LDA); - } + } + A_tmp_.resize (max_colA * LDA); + } - if (this->gatherC) - { - colB_loc.resize(col_nproc); - recv_counts.resize(col_nproc); - displs.resize(col_nproc); - MPI_Allgather(&ncolB, 1, MPI_INT, colB_loc.data(), 1, MPI_INT, col_world); - for (int ip = 0; ip < col_nproc; ip++) - { - recv_counts[ip] = LDC * colB_loc[ip]; - } - displs[0] = 0; - for (int ip = 1; ip < col_nproc; ip++) - { - displs[ip] = displs[ip - 1] + recv_counts[ip - 1]; - } - size_C_global = displs[col_nproc - 1] + recv_counts[col_nproc - 1]; + if (this->gatherC) + { + colB_loc.resize (col_nproc); + recv_counts.resize (col_nproc); + displs.resize (col_nproc); + MPI_Allgather (&ncolB, 1, MPI_INT, colB_loc.data (), 1, MPI_INT, col_world); + for (int ip = 0; ip < col_nproc; ip++) + { + recv_counts[ip] = LDC * colB_loc[ip]; + } + displs[0] = 0; + for (int ip = 1; ip < col_nproc; ip++) + { + displs[ip] = displs[ip - 1] + recv_counts[ip - 1]; + } + size_C_global = displs[col_nproc - 1] + recv_counts[col_nproc - 1]; - // allocate temperory memory - if (std::is_same::value) - { - resmem_dev_op()(C_local_tmp_, size_C_local); + // allocate temperory memory + if (std::is_same::value) + { + resmem_dev_op () (C_local_tmp_, size_C_local); #if !defined(__CUDA_MPI) && !defined(__NCCL_PARALLEL_DEVICE) - C_global_tmp_.resize(size_C_global); + C_global_tmp_.resize (size_C_global); #endif - } - C_tmp_.resize(size_C_local); + } + C_tmp_.resize (size_C_local); + } } - } #endif } template -void PGemmCN::multiply(const T alpha, const T* A, const T* B, const T beta, T* C) +void + PGemmCN::multiply (const T alpha, const T* A, const T* B, const T beta, T* C) { - ModuleBase::timer::start("PGemmCN", "multiply"); + ModuleBase::timer::start ("PGemmCN", "multiply"); #ifdef __MPI if (this->col_nproc > 1) - { - if (this->divideCrow) - { - multiply_row(alpha, A, B, beta, C); - } - else { - multiply_col(alpha, A, B, beta, C); + if (this->divideCrow) + { + multiply_row (alpha, A, B, beta, C); + } + else + { + multiply_col (alpha, A, B, beta, C); + } } - } else #endif - { - multiply_single(alpha, A, B, beta, C); - } - ModuleBase::timer::end("PGemmCN", "multiply"); + { + multiply_single (alpha, A, B, beta, C); + } + ModuleBase::timer::end ("PGemmCN", "multiply"); } template -void PGemmCN::multiply_single(const T alpha, const T* A, const T* B, const T beta, T* C) +void + PGemmCN::multiply_single (const T alpha, const T* A, const T* B, const T beta, T* C) { const Device* ctx = {}; #ifdef __MPI @@ -176,214 +179,215 @@ void PGemmCN::multiply_single(const T alpha, const T* A, const T* B, #else T real_beta = beta; #endif - ModuleBase::gemm_op()('C', 'N', ncolA, ncolB, nrow, &alpha, A, LDA, B, LDB, &real_beta, C, LDC); + ModuleBase::gemm_op () ('C', 'N', ncolA, ncolB, nrow, &alpha, A, LDA, B, LDB, &real_beta, C, LDC); #ifdef __MPI if (this->row_nproc > 1) - { - const int size = ncolB * LDC; - Parallel_Common::reduce_dev(C, size, row_world); - } + { + const int size = ncolB * LDC; + Parallel_Common::reduce_dev (C, size, row_world); + } #endif } #ifdef __MPI template -void PGemmCN::multiply_col(const T alpha, const T* A, const T* B, const T beta, T* C) +void + PGemmCN::multiply_col (const T alpha, const T* A, const T* B, const T beta, T* C) { const Device* ctx = {}; // send A to other procs - T* isend_tmp = isend_tmp_.data(); + T* isend_tmp = isend_tmp_.data (); for (int ip = 0; ip < col_nproc; ip++) - { - if (col_rank != ip) { - int size = ncolA * LDA; - Parallel_Common::isend_dev(A, size, ip, 0, col_world, &requests[ip], isend_tmp); + if (col_rank != ip) + { + int size = ncolA * LDA; + Parallel_Common::isend_dev (A, size, ip, 0, col_world, &requests[ip], isend_tmp); + } } - } - - //init pointers + // init pointers T* C_local = C; if (this->gatherC) - { - if (std::is_same::value) { - C_local = C_local_tmp_; - } - else - { - C_local = C_tmp_.data(); + if (std::is_same::value) + { + C_local = C_local_tmp_; + } + else + { + C_local = C_tmp_.data (); + } + syncmem_dev_op () (C_local, C + displs[col_rank], size_C_local); } - syncmem_dev_op()(C_local, C + displs[col_rank], size_C_local); - } T* Atmp_device = nullptr; if (std::is_same::value) - { - Atmp_device = A_tmp_device_; - } + { + Atmp_device = A_tmp_device_; + } else - { - Atmp_device = A_tmp_.data(); - } + { + Atmp_device = A_tmp_.data (); + } // multiply int shift = 0; T real_beta = row_rank == 0 ? beta : 0; for (int ip = 0; ip < col_nproc; ip++) - { - T* C_start = C_local + shift; - if (col_rank == ip) - { - ModuleBase::gemm_op()('C', - 'N', - ncolA, - ncolB, - nrow, - &alpha, - A, - LDA, - B, - LDB, - &real_beta, - C_start, - LDC); - shift += ncolA; - } - else { - int m = colA_loc[ip]; - int size = m * LDA; - MPI_Status status; - Parallel_Common::recv_dev(Atmp_device, size, ip, 0, col_world, &status, A_tmp_.data()); - MPI_Wait(&requests[ip], &status); - ModuleBase::gemm_op()('C', - 'N', - m, - ncolB, - nrow, - &alpha, - Atmp_device, - LDA, - B, - LDB, - &real_beta, - C_start, - LDC); - shift += m; + T* C_start = C_local + shift; + if (col_rank == ip) + { + ModuleBase::gemm_op () ('C', + 'N', + ncolA, + ncolB, + nrow, + &alpha, + A, + LDA, + B, + LDB, + &real_beta, + C_start, + LDC); + shift += ncolA; + } + else + { + int m = colA_loc[ip]; + int size = m * LDA; + MPI_Status status; + Parallel_Common::recv_dev (Atmp_device, size, ip, 0, col_world, &status, A_tmp_.data ()); + MPI_Wait (&requests[ip], &status); + ModuleBase::gemm_op () ('C', + 'N', + m, + ncolB, + nrow, + &alpha, + Atmp_device, + LDA, + B, + LDB, + &real_beta, + C_start, + LDC); + shift += m; + } } - } if (this->gatherC) - { - T* reduce_tmp = nullptr; - T* gather_tmp = nullptr; -#if !defined(__CUDA_MPI) && !defined(__NCCL_PARALLEL_DEVICE) - if (std::is_same::value) { - reduce_tmp = C_tmp_.data(); - gather_tmp = C_global_tmp_.data(); - } + T* reduce_tmp = nullptr; + T* gather_tmp = nullptr; +#if !defined(__CUDA_MPI) && !defined(__NCCL_PARALLEL_DEVICE) + if (std::is_same::value) + { + reduce_tmp = C_tmp_.data (); + gather_tmp = C_global_tmp_.data (); + } #endif - if (this->row_nproc > 1) - { - Parallel_Common::reduce_dev(C_local, size_C_local, row_world, reduce_tmp); + if (this->row_nproc > 1) + { + Parallel_Common::reduce_dev (C_local, size_C_local, row_world, reduce_tmp); + } + Parallel_Common::gatherv_dev (C_local, + size_C_local, + C, + recv_counts.data (), + displs.data (), + col_world, + reduce_tmp, + gather_tmp); } - Parallel_Common::gatherv_dev(C_local, - size_C_local, - C, - recv_counts.data(), - displs.data(), - col_world, - reduce_tmp, - gather_tmp); - } else - { - if (this->row_nproc > 1) { - Parallel_Common::reduce_dev(C, size_C_local, row_world); + if (this->row_nproc > 1) + { + Parallel_Common::reduce_dev (C, size_C_local, row_world); + } } - } } template -void PGemmCN::multiply_row(const T alpha, const T* A, const T* B, const T beta, T* C) +void + PGemmCN::multiply_row (const T alpha, const T* A, const T* B, const T beta, T* C) { const Device* ctx = {}; // Send B to other procs for (int ip = 0; ip < col_nproc; ip++) - { - if (col_rank != ip) { - int size = ncolB * LDB; - Parallel_Common::isend_dev(B, size, ip, 0, col_world, &requests[ip], B_tmp_.data()); + if (col_rank != ip) + { + int size = ncolB * LDB; + Parallel_Common::isend_dev (B, size, ip, 0, col_world, &requests[ip], B_tmp_.data ()); + } } - } // init pointers T* Btmp_device = nullptr; if (std::is_same::value) - { - Btmp_device = B_tmp_device_; - } + { + Btmp_device = B_tmp_device_; + } else - { - Btmp_device = B_tmp_.data(); - } + { + Btmp_device = B_tmp_.data (); + } // multiply int shift = 0; T real_beta = row_rank == 0 ? beta : 0; for (int ip = 0; ip < col_nproc; ip++) - { - T* C_start = C + shift; - if (col_rank == ip) { - ModuleBase::gemm_op()('C', - 'N', - ncolA, - ncolB, - nrow, - &alpha, - A, - LDA, - B, - LDB, - &real_beta, - C_start, - LDC); - shift += ncolB * LDC; + T* C_start = C + shift; + if (col_rank == ip) + { + ModuleBase::gemm_op () ('C', + 'N', + ncolA, + ncolB, + nrow, + &alpha, + A, + LDA, + B, + LDB, + &real_beta, + C_start, + LDC); + shift += ncolB * LDC; + } + else + { + int m = colB_loc[ip]; + int size = m * LDB; + MPI_Status status; + Parallel_Common::recv_dev (Btmp_device, size, ip, 0, col_world, &status, B_tmp_.data ()); + MPI_Wait (&requests[ip], &status); + ModuleBase::gemm_op () ('C', + 'N', + ncolA, + m, + nrow, + &alpha, + A, + LDA, + Btmp_device, + LDB, + &real_beta, + C_start, + LDC); + shift += m * LDC; + } } - else + if (this->row_nproc > 1) { - int m = colB_loc[ip]; - int size = m * LDB; - MPI_Status status; - Parallel_Common::recv_dev(Btmp_device, size, ip, 0, col_world, &status, B_tmp_.data()); - MPI_Wait(&requests[ip], &status); - ModuleBase::gemm_op()('C', - 'N', - ncolA, - m, - nrow, - &alpha, - A, - LDA, - Btmp_device, - LDB, - &real_beta, - C_start, - LDC); - shift += m * LDC; + Parallel_Common::reduce_dev (C, size_C_local, row_world); } - } - if (this->row_nproc > 1) - { - Parallel_Common::reduce_dev(C, size_C_local, row_world); - } } #endif diff --git a/source/source_base/para_gemm.h b/source/source_base/para_gemm.h index 6cf391d56d3..d3cd60089b6 100644 --- a/source/source_base/para_gemm.h +++ b/source/source_base/para_gemm.h @@ -24,8 +24,8 @@ template class PGemmCN { public: - PGemmCN(); - ~PGemmCN(); + PGemmCN (); + ~PGemmCN (); /** * @brief set the dimension of A, B, and C @@ -38,7 +38,7 @@ class PGemmCN * @param LDC leading dimension of C. C can be C_local or C_global * @param mode 1: gather C_local to C_global, 2:C_local(nrow * ncol_loc), 3:C_global(nrow_loc * ncol) */ - void set_dimension( + void set_dimension ( #ifdef __MPI MPI_Comm comm_col, MPI_Comm comm_row, @@ -55,7 +55,7 @@ class PGemmCN * @brief calculate C = alpha * A^H * B + beta * C * */ - void multiply(const T alpha, const T* A, const T* B, const T beta, T* C); + void multiply (const T alpha, const T* A, const T* B, const T beta, T* C); #ifdef __MPI MPI_Comm col_world = MPI_COMM_NULL; ///< column communicator world MPI_Comm row_world = MPI_COMM_NULL; ///< row communicator world @@ -86,12 +86,12 @@ class PGemmCN int LDC = 0; ///< leading dimension of C, which can be C_local or C_global private: /// @brief for col_nproc == 1 - void multiply_single(const T alpha, const T* A, const T* B, const T beta, T* C); + void multiply_single (const T alpha, const T* A, const T* B, const T beta, T* C); #ifdef __MPI /// @brief for mode = 1 or 2 - void multiply_col(const T alpha, const T* A, const T* B, const T beta, T* C); + void multiply_col (const T alpha, const T* A, const T* B, const T beta, T* C); /// @brief for mode = 3 - void multiply_row(const T alpha, const T* A, const T* B, const T beta, T* C); + void multiply_row (const T alpha, const T* A, const T* B, const T beta, T* C); #endif using resmem_dev_op = base_device::memory::resize_memory_op; using delmem_dev_op = base_device::memory::delete_memory_op; @@ -101,17 +101,15 @@ class PGemmCN #ifdef __MPI private: - std::vector isend_tmp_; ///< temperory memory for sending data - std::vector A_tmp_; ///< temperory memory for A - std::vector B_tmp_; ///< temperory memory for B - std::vector C_tmp_; ///< temperory memory for C + std::vector isend_tmp_; ///< temperory memory for sending data + std::vector A_tmp_; ///< temperory memory for A + std::vector B_tmp_; ///< temperory memory for B + std::vector C_tmp_; ///< temperory memory for C std::vector C_global_tmp_; ///< temperory memory for C_global T* C_local_tmp_ = nullptr; ///< temperory memory for C_local T* A_tmp_device_ = nullptr; ///< temperory memory for A T* B_tmp_device_ = nullptr; ///< temperory memory for B #endif - - }; } // namespace ModuleBase #endif \ No newline at end of file diff --git a/source/source_base/parallel_2d.cpp b/source/source_base/parallel_2d.cpp index b5bab96694b..c433683ea8c 100644 --- a/source/source_base/parallel_2d.cpp +++ b/source/source_base/parallel_2d.cpp @@ -6,136 +6,145 @@ #include #include -bool Parallel_2D::in_this_processor(const int iw1_all, const int iw2_all) const +bool + Parallel_2D::in_this_processor (const int iw1_all, const int iw2_all) const { - return global2local_row(iw1_all) != -1 && global2local_col(iw2_all) != -1; + return global2local_row (iw1_all) != -1 && global2local_col (iw2_all) != -1; } -int Parallel_2D::get_global_row_size() const +int + Parallel_2D::get_global_row_size () const { if (!is_serial) - { + { #ifdef __MPI - return desc[2]; + return desc[2]; #endif - } + } return nrow; } -int Parallel_2D::get_global_col_size() const +int + Parallel_2D::get_global_col_size () const { if (!is_serial) - { + { #ifdef __MPI - return desc[3]; + return desc[3]; #endif - } + } return ncol; } #ifdef __MPI -MPI_Comm Parallel_2D::comm() const +MPI_Comm + Parallel_2D::comm () const { // it is an error to call blacs_get with an invalid BLACS context if (blacs_ctxt < 0) - { - return MPI_COMM_NULL; - } + { + return MPI_COMM_NULL; + } int sys_ctxt = 0; - Cblacs_get(blacs_ctxt, 10, &sys_ctxt); + Cblacs_get (blacs_ctxt, 10, &sys_ctxt); // blacs_get with "what" = 10 takes a BLACS context and returns the index // of the associated system context (MPI communicator) that can be used by // blacs2sys_handle to get the MPI communicator. - return Cblacs2sys_handle(sys_ctxt); + return Cblacs2sys_handle (sys_ctxt); } -void Parallel_2D::_init_proc_grid(const MPI_Comm comm, const bool mode) +void + Parallel_2D::_init_proc_grid (const MPI_Comm comm, const bool mode) { // determine the number of rows and columns of the process grid // by factorizing n = p * q such that p, q are closest and p <= q int num_proc = 0; - MPI_Comm_size(comm, &num_proc); - dim0 = static_cast(std::sqrt(num_proc + 0.5)); + MPI_Comm_size (comm, &num_proc); + dim0 = static_cast (std::sqrt (num_proc + 0.5)); while (dim1 = num_proc / dim0, dim0 * dim1 != num_proc) - { - --dim0; - } + { + --dim0; + } if (mode) - { - std::swap(dim0, dim1); - } + { + std::swap (dim0, dim1); + } // initialize the BLACS grid accordingly - blacs_ctxt = Csys2blacs_handle(comm); + blacs_ctxt = Csys2blacs_handle (comm); char order = 'R'; // row-major - Cblacs_gridinit(&blacs_ctxt, &order, dim0, dim1); - Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]); + Cblacs_gridinit (&blacs_ctxt, &order, dim0, dim1); + Cblacs_gridinfo (blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]); } -void Parallel_2D::_set_dist_info(const int mg, const int ng, const int nb) +void + Parallel_2D::_set_dist_info (const int mg, const int ng, const int nb) { this->nb = nb; // number of local rows and columns const int zero = 0; - nrow = numroc_(&mg, &nb, &coord[0], &zero, &dim0); - ncol = numroc_(&ng, &nb, &coord[1], &zero, &dim1); - nloc = static_cast(nrow) * ncol; + nrow = numroc_ (&mg, &nb, &coord[0], &zero, &dim0); + ncol = numroc_ (&ng, &nb, &coord[1], &zero, &dim1); + nloc = static_cast (nrow) * ncol; // initialize the ScaLAPACK descriptor - int info = 0, lld = std::max(nrow, 1); - descinit_(desc, &mg, &ng, &nb, &nb, &zero, &zero, &blacs_ctxt, &lld, &info); + int info = 0, lld = std::max (nrow, 1); + descinit_ (desc, &mg, &ng, &nb, &nb, &zero, &zero, &blacs_ctxt, &lld, &info); // generate the global-to-local and local-to-global index maps - local2global_row_.resize(nrow); - global2local_row_ = std::vector(mg, -1); + local2global_row_.resize (nrow); + global2local_row_ = std::vector (mg, -1); for (int i = 0; i < nrow; ++i) - { - local2global_row_[i] = (i / nb * dim0 + coord[0]) * nb + i % nb; - global2local_row_[local2global_row_[i]] = i; - } + { + local2global_row_[i] = (i / nb * dim0 + coord[0]) * nb + i % nb; + global2local_row_[local2global_row_[i]] = i; + } - local2global_col_.resize(ncol); - global2local_col_ = std::vector(ng, -1); + local2global_col_.resize (ncol); + global2local_col_ = std::vector (ng, -1); for (int j = 0; j < ncol; ++j) - { - local2global_col_[j] = (j / nb * dim1 + coord[1]) * nb + j % nb; - global2local_col_[local2global_col_[j]] = j; - } + { + local2global_col_[j] = (j / nb * dim1 + coord[1]) * nb + j % nb; + global2local_col_[local2global_col_[j]] = j; + } } -int Parallel_2D::init(const int mg, const int ng, const int nb, const MPI_Comm comm, const bool mode) +int + Parallel_2D::init (const int mg, const int ng, const int nb, const MPI_Comm comm, const bool mode) { - _init_proc_grid(comm, mode); - _set_dist_info(mg, ng, nb); + _init_proc_grid (comm, mode); + _set_dist_info (mg, ng, nb); return nrow == 0 || ncol == 0; } -int Parallel_2D::set(const int mg, const int ng, const int nb, const int blacs_ctxt) +int + Parallel_2D::set (const int mg, const int ng, const int nb, const int blacs_ctxt) { this->blacs_ctxt = blacs_ctxt; - Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]); - _set_dist_info(mg, ng, nb); + Cblacs_gridinfo (blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]); + _set_dist_info (mg, ng, nb); return nrow == 0 || ncol == 0; } #endif -void Parallel_2D::set_serial(const int mg, const int ng) +void + Parallel_2D::set_serial (const int mg, const int ng) { - assert(mg > 0 && ng > 0); + assert (mg > 0 && ng > 0); nb = 1; dim0 = dim1 = 1; coord[0] = coord[1] = 0; nrow = mg; ncol = ng; - nloc = static_cast(nrow) * ncol; - local2global_row_.resize(nrow); - local2global_col_.resize(ncol); - std::iota(local2global_row_.begin(), local2global_row_.end(), 0); - std::iota(local2global_col_.begin(), local2global_col_.end(), 0); + nloc = static_cast (nrow) * ncol; + local2global_row_.resize (nrow); + local2global_col_.resize (ncol); + std::iota (local2global_row_.begin (), local2global_row_.end (), 0); + std::iota (local2global_col_.begin (), local2global_col_.end (), 0); global2local_row_ = local2global_row_; global2local_col_ = local2global_col_; is_serial = true; diff --git a/source/source_base/parallel_2d.h b/source/source_base/parallel_2d.h index c179dd18e79..3bd875abd53 100644 --- a/source/source_base/parallel_2d.h +++ b/source/source_base/parallel_2d.h @@ -11,65 +11,73 @@ class Parallel_2D { public: - Parallel_2D() = default; - ~Parallel_2D() = default; + Parallel_2D () = default; + ~Parallel_2D () = default; - Parallel_2D& operator=(Parallel_2D&& rhs) = default; - Parallel_2D(Parallel_2D&& rhs) = default; + Parallel_2D& operator= (Parallel_2D&& rhs) = default; + Parallel_2D (Parallel_2D&& rhs) = default; /// number of local rows - int get_row_size() const + int + get_row_size () const { return nrow; }; /// number of local columns - int get_col_size() const + int + get_col_size () const { return ncol; }; /// number of global rows - int get_global_row_size() const; + int get_global_row_size () const; /// number of global columns - int get_global_col_size() const; + int get_global_col_size () const; /// number of local matrix elements - int64_t get_local_size() const + int64_t + get_local_size () const { return nloc; }; /// get the local index of a global index (row) - int global2local_row(const int igr) const + int + global2local_row (const int igr) const { return global2local_row_[igr]; } /// get the local index of a global index (col) - int global2local_col(const int igc) const + int + global2local_col (const int igc) const { return global2local_col_[igc]; } /// get the global index of a local index (row) - int local2global_row(const int ilr) const + int + local2global_row (const int ilr) const { return local2global_row_[ilr]; } /// get the global index of a local index (col) - int local2global_col(const int ilc) const + int + local2global_col (const int ilc) const { return local2global_col_[ilc]; } /// check whether a global index is in this process - bool in_this_processor(const int iw1_all, const int iw2_all) const; + bool in_this_processor (const int iw1_all, const int iw2_all) const; /// side length of 2d square block - int get_block_size() const + int + get_block_size () const { return nb; }; @@ -80,21 +88,21 @@ class Parallel_2D * and set up the info of a block-cyclic distribution. * */ - int init(const int mg, - const int ng, - const int nb, // square block is assumed - const MPI_Comm comm, - bool mode = false); + int init (const int mg, + const int ng, + const int nb, // square block is assumed + const MPI_Comm comm, + bool mode = false); /** * @brief Set up the info of a block-cyclic distribution using given * BLACS context. * */ - int set(const int mg, - const int ng, - const int nb, // square block is assumed - const int blacs_ctxt); + int set (const int mg, + const int ng, + const int nb, // square block is assumed + const int blacs_ctxt); /// BLACS context int blacs_ctxt = -1; @@ -102,10 +110,10 @@ class Parallel_2D /// ScaLAPACK descriptor int desc[9] = {}; - MPI_Comm comm() const; + MPI_Comm comm () const; #endif - void set_serial(const int mg, const int ng); + void set_serial (const int mg, const int ng); // FIXME the following variables should be private, but they are // widely used in the code currently. Public visibility is kept @@ -142,8 +150,8 @@ class Parallel_2D std::vector local2global_col_; #ifdef __MPI - void _init_proc_grid(const MPI_Comm comm, const bool mode); - void _set_dist_info(const int mg, const int ng, const int nb); + void _init_proc_grid (const MPI_Comm comm, const bool mode); + void _set_dist_info (const int mg, const int ng, const int nb); #endif }; #endif diff --git a/source/source_base/parallel_comm.cpp b/source/source_base/parallel_comm.cpp index 5d03447b5aa..7b9b35c9763 100644 --- a/source/source_base/parallel_comm.cpp +++ b/source/source_base/parallel_comm.cpp @@ -3,53 +3,57 @@ #include "mpi.h" #include "parallel_global.h" -MPI_Comm POOL_WORLD; //groups for different plane waves. In this group, only plane waves are different. K-points and bands are the same. -MPI_Comm KP_WORLD; // groups for differnt k. In this group, only k-points are different. Bands and plane waves are the same. -MPI_Comm BP_WORLD; // groups for differnt bands. In this group, only bands are different. K-points and plane waves are the same. -MPI_Comm INT_BGROUP; // internal comm groups for same bands. In this group, only bands are the same. K-points and plane waves are different. +MPI_Comm POOL_WORLD; // groups for different plane waves. In this group, only plane waves are different. K-points and + // bands are the same. +MPI_Comm + KP_WORLD; // groups for differnt k. In this group, only k-points are different. Bands and plane waves are the same. +MPI_Comm BP_WORLD; // groups for differnt bands. In this group, only bands are different. K-points and plane waves are + // the same. +MPI_Comm INT_BGROUP; // internal comm groups for same bands. In this group, only bands are the same. K-points and plane + // waves are different. MPI_Comm GRID_WORLD; // mohan add 2012-01-13 MPI_Comm DIAG_WORLD; // mohan add 2012-01-13 -MPICommGroup::MPICommGroup(MPI_Comm parent_comm) - : parent_comm(parent_comm) +MPICommGroup::MPICommGroup (MPI_Comm parent_comm) : parent_comm (parent_comm) { - MPI_Comm_size(parent_comm, &this->gsize); - MPI_Comm_rank(parent_comm, &this->grank); + MPI_Comm_size (parent_comm, &this->gsize); + MPI_Comm_rank (parent_comm, &this->grank); } -MPICommGroup::~MPICommGroup() +MPICommGroup::~MPICommGroup () { if (group_comm != MPI_COMM_NULL) - { - MPI_Comm_free(&group_comm); - } + { + MPI_Comm_free (&group_comm); + } if (inter_comm != MPI_COMM_NULL) - { - MPI_Comm_free(&inter_comm); - } + { + MPI_Comm_free (&inter_comm); + } } -void MPICommGroup::divide_group_comm(const int& ngroup, const bool assert_even) +void + MPICommGroup::divide_group_comm (const int& ngroup, const bool assert_even) { this->ngroups = ngroup; - Parallel_Global::divide_mpi_groups(this->gsize, - ngroup, - this->grank, - this->nprocs_in_group, - this->my_group, - this->rank_in_group, - assert_even); - - MPI_Comm_split(parent_comm, my_group, rank_in_group, &group_comm); - if(this->gsize % ngroup == 0) - { - this->is_even = true; - } + Parallel_Global::divide_mpi_groups (this->gsize, + ngroup, + this->grank, + this->nprocs_in_group, + this->my_group, + this->rank_in_group, + assert_even); + + MPI_Comm_split (parent_comm, my_group, rank_in_group, &group_comm); + if (this->gsize % ngroup == 0) + { + this->is_even = true; + } if (this->is_even) - { - MPI_Comm_split(parent_comm, my_inter, rank_in_inter, &inter_comm); - } + { + MPI_Comm_split (parent_comm, my_inter, rank_in_inter, &inter_comm); + } } #endif \ No newline at end of file diff --git a/source/source_base/parallel_comm.h b/source/source_base/parallel_comm.h index 127fcd72ca5..ea799956a60 100644 --- a/source/source_base/parallel_comm.h +++ b/source/source_base/parallel_comm.h @@ -10,34 +10,32 @@ extern MPI_Comm BP_WORLD; extern MPI_Comm GRID_WORLD; // mohan add 2012-01-13 extern MPI_Comm DIAG_WORLD; // mohan add 2012-01-13 - class MPICommGroup { - public: - - MPICommGroup(MPI_Comm parent_comm); - ~MPICommGroup(); + public: + MPICommGroup (MPI_Comm parent_comm); + ~MPICommGroup (); - void divide_group_comm(const int& ngroup, const bool assert_even = true); + void divide_group_comm (const int& ngroup, const bool assert_even = true); - bool is_even = false; ///< whether the group is even + bool is_even = false; ///< whether the group is even - MPI_Comm parent_comm = MPI_COMM_NULL; ///< parent communicator - int gsize = 0; ///< size of parent communicator - int grank = 0; ///< rank of parent communicator + MPI_Comm parent_comm = MPI_COMM_NULL; ///< parent communicator + int gsize = 0; ///< size of parent communicator + int grank = 0; ///< rank of parent communicator - MPI_Comm group_comm = MPI_COMM_NULL; ///< group communicator - int ngroups = 0; ///< number of groups - int nprocs_in_group = 0; ///< number of processes in the group - int my_group = 0; ///< the group index - int rank_in_group = 0; ///< the rank in the group + MPI_Comm group_comm = MPI_COMM_NULL; ///< group communicator + int ngroups = 0; ///< number of groups + int nprocs_in_group = 0; ///< number of processes in the group + int my_group = 0; ///< the group index + int rank_in_group = 0; ///< the rank in the group - MPI_Comm inter_comm = MPI_COMM_NULL; ///< inter communicator - bool has_inter_comm = false; ///< whether has inter communicator - int& nprocs_in_inter = ngroups; ///< number of processes in the inter communicator - int& my_inter = rank_in_group; ///< the rank in the inter communicator - int& rank_in_inter = my_group; ///< the inter group index + MPI_Comm inter_comm = MPI_COMM_NULL; ///< inter communicator + bool has_inter_comm = false; ///< whether has inter communicator + int& nprocs_in_inter = ngroups; ///< number of processes in the inter communicator + int& my_inter = rank_in_group; ///< the rank in the inter communicator + int& rank_in_inter = my_group; ///< the inter group index }; #endif diff --git a/source/source_base/parallel_common.cpp b/source/source_base/parallel_common.cpp index 6f8ce79fbcb..15bb1f16310 100644 --- a/source/source_base/parallel_common.cpp +++ b/source/source_base/parallel_common.cpp @@ -7,75 +7,85 @@ #include #ifdef __MPI -void Parallel_Common::bcast_string(std::string& object) // Peize Lin fix bug 2019-03-18 +void + Parallel_Common::bcast_string (std::string& object) // Peize Lin fix bug 2019-03-18 { - int size = object.size(); - MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD); - + int size = object.size (); + MPI_Bcast (&size, 1, MPI_INT, 0, MPI_COMM_WORLD); + int my_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); - + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); + if (0 != my_rank) - { - object.resize(size); - } + { + object.resize (size); + } - MPI_Bcast(&object[0], size, MPI_CHAR, 0, MPI_COMM_WORLD); + MPI_Bcast (&object[0], size, MPI_CHAR, 0, MPI_COMM_WORLD); return; } -void Parallel_Common::bcast_string(std::string* object, const int n) // Peize Lin fix bug 2019-03-18 +void + Parallel_Common::bcast_string (std::string* object, const int n) // Peize Lin fix bug 2019-03-18 { for (int i = 0; i < n; i++) - bcast_string(object[i]); + bcast_string (object[i]); return; } -void Parallel_Common::bcast_complex_double(std::complex& object) +void + Parallel_Common::bcast_complex_double (std::complex& object) { - MPI_Bcast(&object, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast (&object, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); } -void Parallel_Common::bcast_complex_double(std::complex* object, const int n) +void + Parallel_Common::bcast_complex_double (std::complex* object, const int n) { - MPI_Bcast(object, n, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast (object, n, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); } -void Parallel_Common::bcast_double(double& object) +void + Parallel_Common::bcast_double (double& object) { - MPI_Bcast(&object, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast (&object, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); } -void Parallel_Common::bcast_double(double* object, const int n) +void + Parallel_Common::bcast_double (double* object, const int n) { - MPI_Bcast(object, n, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast (object, n, MPI_DOUBLE, 0, MPI_COMM_WORLD); } -void Parallel_Common::bcast_int(int& object) +void + Parallel_Common::bcast_int (int& object) { - MPI_Bcast(&object, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast (&object, 1, MPI_INT, 0, MPI_COMM_WORLD); } -void Parallel_Common::bcast_int(int* object, const int n) +void + Parallel_Common::bcast_int (int* object, const int n) { - MPI_Bcast(object, n, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast (object, n, MPI_INT, 0, MPI_COMM_WORLD); } -void Parallel_Common::bcast_bool(bool& object) +void + Parallel_Common::bcast_bool (bool& object) { int swap = object; int my_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); if (my_rank == 0) swap = object; - MPI_Bcast(&swap, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast (&swap, 1, MPI_INT, 0, MPI_COMM_WORLD); if (my_rank != 0) - object = static_cast(swap); + object = static_cast (swap); } -void Parallel_Common::bcast_char(char* object, const int n) +void + Parallel_Common::bcast_char (char* object, const int n) { - MPI_Bcast(object, n, MPI_CHAR, 0, MPI_COMM_WORLD); + MPI_Bcast (object, n, MPI_CHAR, 0, MPI_COMM_WORLD); } #endif diff --git a/source/source_base/parallel_common.h b/source/source_base/parallel_common.h index 26054dbe171..eae8f7b4232 100644 --- a/source/source_base/parallel_common.h +++ b/source/source_base/parallel_common.h @@ -10,18 +10,18 @@ namespace Parallel_Common { //(1) bcast array -void bcast_complex_double(std::complex* object, const int n); -void bcast_string(std::string* object, const int n); -void bcast_double(double* object, const int n); -void bcast_int(int* object, const int n); -void bcast_char(char* object, const int n); +void bcast_complex_double (std::complex* object, const int n); +void bcast_string (std::string* object, const int n); +void bcast_double (double* object, const int n); +void bcast_int (int* object, const int n); +void bcast_char (char* object, const int n); //(2) bcast single -void bcast_complex_double(std::complex& object); -void bcast_string(std::string& object); -void bcast_double(double& object); -void bcast_int(int& object); -void bcast_bool(bool& object); +void bcast_complex_double (std::complex& object); +void bcast_string (std::string& object); +void bcast_double (double& object); +void bcast_int (int& object); +void bcast_bool (bool& object); } // namespace Parallel_Common diff --git a/source/source_base/parallel_device.cpp b/source/source_base/parallel_device.cpp index 8ba8c3ae8ed..cc7dd32a0e1 100644 --- a/source/source_base/parallel_device.cpp +++ b/source/source_base/parallel_device.cpp @@ -31,46 +31,48 @@ struct NcclCommContext class NcclCommRegistry { public: - ~NcclCommRegistry() + ~NcclCommRegistry () { - for (std::map::iterator it = contexts_.begin(); it != contexts_.end(); ++it) - { - if (it->second.comm != nullptr) + for (std::map::iterator it = contexts_.begin (); it != contexts_.end (); ++it) { - ncclCommDestroy(it->second.comm); + if (it->second.comm != nullptr) + { + ncclCommDestroy (it->second.comm); + } } - } } - NcclCommContext& get(MPI_Comm comm) + NcclCommContext& + get (MPI_Comm comm) { - const MPI_Fint key = MPI_Comm_c2f(comm); - std::lock_guard lock(mutex_); - std::map::iterator found = contexts_.find(key); - if (found != contexts_.end()) - { - return found->second; - } + const MPI_Fint key = MPI_Comm_c2f (comm); + std::lock_guard lock (mutex_); + std::map::iterator found = contexts_.find (key); + if (found != contexts_.end ()) + { + return found->second; + } int rank = 0; int size = 0; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); + MPI_Comm_rank (comm, &rank); + MPI_Comm_size (comm, &size); NcclCommContext ctx; ctx.size = size; if (size > 1) - { - ncclUniqueId id; - if (rank == 0) { - CHECK_NCCL(ncclGetUniqueId(&id)); + ncclUniqueId id; + if (rank == 0) + { + CHECK_NCCL (ncclGetUniqueId (&id)); + } + MPI_Bcast (&id, sizeof (id), MPI_BYTE, 0, comm); + CHECK_NCCL (ncclCommInitRank (&ctx.comm, size, id, rank)); } - MPI_Bcast(&id, sizeof(id), MPI_BYTE, 0, comm); - CHECK_NCCL(ncclCommInitRank(&ctx.comm, size, id, rank)); - } - std::pair::iterator, bool> inserted = contexts_.insert(std::make_pair(key, ctx)); + std::pair::iterator, bool> inserted + = contexts_.insert (std::make_pair (key, ctx)); return inserted.first->second; } @@ -79,276 +81,356 @@ class NcclCommRegistry std::mutex mutex_; }; -NcclCommRegistry& get_nccl_registry() +NcclCommRegistry& + get_nccl_registry () { static NcclCommRegistry registry; return registry; } template -void nccl_bcast_impl(T* object, const int n, MPI_Comm& comm, ncclDataType_t datatype, int root = 0, const int count_scale = 1) -{ - NcclCommContext& ctx = get_nccl_registry().get(comm); +void + nccl_bcast_impl (T* object, + const int n, + MPI_Comm& comm, + ncclDataType_t datatype, + int root = 0, + const int count_scale = 1) +{ + NcclCommContext& ctx = get_nccl_registry ().get (comm); if (ctx.size <= 1 || n <= 0) - { - return; - } - CHECK_NCCL(ncclBroadcast(object, object, static_cast(n) * count_scale, datatype, root, ctx.comm, ctx.stream)); - CHECK_CUDA(cudaStreamSynchronize(ctx.stream)); + { + return; + } + CHECK_NCCL ( + ncclBroadcast (object, object, static_cast (n) * count_scale, datatype, root, ctx.comm, ctx.stream)); + CHECK_CUDA (cudaStreamSynchronize (ctx.stream)); } template -void nccl_reduce_impl(T* object, const int n, MPI_Comm& comm, ncclDataType_t datatype, const int count_scale = 1) +void + nccl_reduce_impl (T* object, const int n, MPI_Comm& comm, ncclDataType_t datatype, const int count_scale = 1) { - NcclCommContext& ctx = get_nccl_registry().get(comm); + NcclCommContext& ctx = get_nccl_registry ().get (comm); if (ctx.size <= 1 || n <= 0) - { - return; - } - CHECK_NCCL(ncclAllReduce(object, object, static_cast(n) * count_scale, datatype, ncclSum, ctx.comm, ctx.stream)); - CHECK_CUDA(cudaStreamSynchronize(ctx.stream)); + { + return; + } + CHECK_NCCL ( + ncclAllReduce (object, object, static_cast (n) * count_scale, datatype, ncclSum, ctx.comm, ctx.stream)); + CHECK_CUDA (cudaStreamSynchronize (ctx.stream)); } template -void nccl_gatherv_impl(const T* sendbuf, +void + nccl_gatherv_impl (const T* sendbuf, const int sendcount, T* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) { - NcclCommContext& ctx = get_nccl_registry().get(comm); + NcclCommContext& ctx = get_nccl_registry ().get (comm); if (ctx.size <= 1) - { - if (sendbuf != recvbuf && sendcount > 0) { - CHECK_CUDA(cudaMemcpy(recvbuf, sendbuf, static_cast(sendcount) * sizeof(T), cudaMemcpyDeviceToDevice)); + if (sendbuf != recvbuf && sendcount > 0) + { + CHECK_CUDA (cudaMemcpy (recvbuf, + sendbuf, + static_cast (sendcount) * sizeof (T), + cudaMemcpyDeviceToDevice)); + } + return; } - return; - } int chunk_count = 0; int rank = 0; - MPI_Comm_rank(comm, &rank); + MPI_Comm_rank (comm, &rank); for (int i = 0; i < ctx.size; ++i) - { - if (recvcounts[i] > chunk_count) { - chunk_count = recvcounts[i]; + if (recvcounts[i] > chunk_count) + { + chunk_count = recvcounts[i]; + } } - } if (recvcounts[rank] != sendcount) - { - throw std::runtime_error("nccl_gatherv_data: sendcount does not match recvcounts[rank]"); - } + { + throw std::runtime_error ("nccl_gatherv_data: sendcount does not match recvcounts[rank]"); + } if (chunk_count <= 0) - { - return; - } + { + return; + } - const size_t chunk_bytes = static_cast(chunk_count) * sizeof(T); + const size_t chunk_bytes = static_cast (chunk_count) * sizeof (T); const size_t recv_bytes = chunk_bytes * ctx.size; unsigned char* staged_send = nullptr; unsigned char* staged_recv = nullptr; - CHECK_CUDA(cudaMalloc(&staged_send, chunk_bytes)); - CHECK_CUDA(cudaMalloc(&staged_recv, recv_bytes)); - CHECK_CUDA(cudaMemsetAsync(staged_send, 0, chunk_bytes, ctx.stream)); + CHECK_CUDA (cudaMalloc (&staged_send, chunk_bytes)); + CHECK_CUDA (cudaMalloc (&staged_recv, recv_bytes)); + CHECK_CUDA (cudaMemsetAsync (staged_send, 0, chunk_bytes, ctx.stream)); if (sendcount > 0) - { - CHECK_CUDA(cudaMemcpyAsync(staged_send, - sendbuf, - static_cast(sendcount) * sizeof(T), - cudaMemcpyDeviceToDevice, - ctx.stream)); - } + { + CHECK_CUDA (cudaMemcpyAsync (staged_send, + sendbuf, + static_cast (sendcount) * sizeof (T), + cudaMemcpyDeviceToDevice, + ctx.stream)); + } - CHECK_NCCL(ncclAllGather(staged_send, staged_recv, chunk_bytes, ncclUint8, ctx.comm, ctx.stream)); + CHECK_NCCL (ncclAllGather (staged_send, staged_recv, chunk_bytes, ncclUint8, ctx.comm, ctx.stream)); for (int i = 0; i < ctx.size; ++i) - { - if (recvcounts[i] > 0) { - CHECK_CUDA(cudaMemcpyAsync(recvbuf + displs[i], - staged_recv + static_cast(i) * chunk_bytes, - static_cast(recvcounts[i]) * sizeof(T), - cudaMemcpyDeviceToDevice, - ctx.stream)); + if (recvcounts[i] > 0) + { + CHECK_CUDA (cudaMemcpyAsync (recvbuf + displs[i], + staged_recv + static_cast (i) * chunk_bytes, + static_cast (recvcounts[i]) * sizeof (T), + cudaMemcpyDeviceToDevice, + ctx.stream)); + } } - } - CHECK_CUDA(cudaStreamSynchronize(ctx.stream)); - CHECK_CUDA(cudaFree(staged_send)); - CHECK_CUDA(cudaFree(staged_recv)); + CHECK_CUDA (cudaStreamSynchronize (ctx.stream)); + CHECK_CUDA (cudaFree (staged_send)); + CHECK_CUDA (cudaFree (staged_recv)); } } // namespace -void nccl_bcast_data(double* object, const int& n, MPI_Comm& comm, int root) +void + nccl_bcast_data (double* object, const int& n, MPI_Comm& comm, int root) { - nccl_bcast_impl(object, n, comm, ncclDouble, root); + nccl_bcast_impl (object, n, comm, ncclDouble, root); } -void nccl_bcast_data(std::complex* object, const int& n, MPI_Comm& comm, int root) +void + nccl_bcast_data (std::complex* object, const int& n, MPI_Comm& comm, int root) { - nccl_bcast_impl(reinterpret_cast(object), n, comm, ncclDouble, root, 2); + nccl_bcast_impl (reinterpret_cast (object), n, comm, ncclDouble, root, 2); } -void nccl_bcast_data(float* object, const int& n, MPI_Comm& comm, int root) +void + nccl_bcast_data (float* object, const int& n, MPI_Comm& comm, int root) { - nccl_bcast_impl(object, n, comm, ncclFloat, root); + nccl_bcast_impl (object, n, comm, ncclFloat, root); } -void nccl_bcast_data(std::complex* object, const int& n, MPI_Comm& comm, int root) +void + nccl_bcast_data (std::complex* object, const int& n, MPI_Comm& comm, int root) { - nccl_bcast_impl(reinterpret_cast(object), n, comm, ncclFloat, root, 2); + nccl_bcast_impl (reinterpret_cast (object), n, comm, ncclFloat, root, 2); } -void nccl_reduce_data(double* object, const int& n, MPI_Comm& comm) +void + nccl_reduce_data (double* object, const int& n, MPI_Comm& comm) { - nccl_reduce_impl(object, n, comm, ncclDouble); + nccl_reduce_impl (object, n, comm, ncclDouble); } -void nccl_reduce_data(std::complex* object, const int& n, MPI_Comm& comm) +void + nccl_reduce_data (std::complex* object, const int& n, MPI_Comm& comm) { - nccl_reduce_impl(reinterpret_cast(object), n, comm, ncclDouble, 2); + nccl_reduce_impl (reinterpret_cast (object), n, comm, ncclDouble, 2); } -void nccl_reduce_data(float* object, const int& n, MPI_Comm& comm) +void + nccl_reduce_data (float* object, const int& n, MPI_Comm& comm) { - nccl_reduce_impl(object, n, comm, ncclFloat); + nccl_reduce_impl (object, n, comm, ncclFloat); } -void nccl_reduce_data(std::complex* object, const int& n, MPI_Comm& comm) +void + nccl_reduce_data (std::complex* object, const int& n, MPI_Comm& comm) { - nccl_reduce_impl(reinterpret_cast(object), n, comm, ncclFloat, 2); + nccl_reduce_impl (reinterpret_cast (object), n, comm, ncclFloat, 2); } -void nccl_gatherv_data(const double* sendbuf, int sendcount, double* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) +void + nccl_gatherv_data (const double* sendbuf, + int sendcount, + double* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm) { - nccl_gatherv_impl(sendbuf, sendcount, recvbuf, recvcounts, displs, comm); + nccl_gatherv_impl (sendbuf, sendcount, recvbuf, recvcounts, displs, comm); } -void nccl_gatherv_data(const std::complex* sendbuf, +void + nccl_gatherv_data (const std::complex* sendbuf, int sendcount, std::complex* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) { - nccl_gatherv_impl(sendbuf, sendcount, recvbuf, recvcounts, displs, comm); + nccl_gatherv_impl (sendbuf, sendcount, recvbuf, recvcounts, displs, comm); } -void nccl_gatherv_data(const float* sendbuf, int sendcount, float* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) +void + nccl_gatherv_data (const float* sendbuf, + int sendcount, + float* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm) { - nccl_gatherv_impl(sendbuf, sendcount, recvbuf, recvcounts, displs, comm); + nccl_gatherv_impl (sendbuf, sendcount, recvbuf, recvcounts, displs, comm); } -void nccl_gatherv_data(const std::complex* sendbuf, +void + nccl_gatherv_data (const std::complex* sendbuf, int sendcount, std::complex* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) { - nccl_gatherv_impl(sendbuf, sendcount, recvbuf, recvcounts, displs, comm); + nccl_gatherv_impl (sendbuf, sendcount, recvbuf, recvcounts, displs, comm); } #endif -void isend_data(const double* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request) +void + isend_data (const double* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request) { - MPI_Isend(buf, count, MPI_DOUBLE, dest, tag, comm, request); + MPI_Isend (buf, count, MPI_DOUBLE, dest, tag, comm, request); } -void isend_data(const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request) +void + isend_data (const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request) { - MPI_Isend(buf, count, MPI_DOUBLE_COMPLEX, dest, tag, comm, request); + MPI_Isend (buf, count, MPI_DOUBLE_COMPLEX, dest, tag, comm, request); } -void isend_data(const float* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request) +void + isend_data (const float* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request) { - MPI_Isend(buf, count, MPI_FLOAT, dest, tag, comm, request); + MPI_Isend (buf, count, MPI_FLOAT, dest, tag, comm, request); } -void isend_data(const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request) +void + isend_data (const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request) { - MPI_Isend(buf, count, MPI_COMPLEX, dest, tag, comm, request); + MPI_Isend (buf, count, MPI_COMPLEX, dest, tag, comm, request); } -void send_data(const double* buf, int count, int dest, int tag, MPI_Comm& comm) +void + send_data (const double* buf, int count, int dest, int tag, MPI_Comm& comm) { - MPI_Send(buf, count, MPI_DOUBLE, dest, tag, comm); + MPI_Send (buf, count, MPI_DOUBLE, dest, tag, comm); } -void send_data(const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm) +void + send_data (const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm) { - MPI_Send(buf, count, MPI_DOUBLE_COMPLEX, dest, tag, comm); + MPI_Send (buf, count, MPI_DOUBLE_COMPLEX, dest, tag, comm); } -void send_data(const float* buf, int count, int dest, int tag, MPI_Comm& comm) +void + send_data (const float* buf, int count, int dest, int tag, MPI_Comm& comm) { - MPI_Send(buf, count, MPI_FLOAT, dest, tag, comm); + MPI_Send (buf, count, MPI_FLOAT, dest, tag, comm); } -void send_data(const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm) +void + send_data (const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm) { - MPI_Send(buf, count, MPI_COMPLEX, dest, tag, comm); + MPI_Send (buf, count, MPI_COMPLEX, dest, tag, comm); } -void recv_data(double* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status) +void + recv_data (double* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status) { - MPI_Recv(buf, count, MPI_DOUBLE, source, tag, comm, status); + MPI_Recv (buf, count, MPI_DOUBLE, source, tag, comm, status); } -void recv_data(std::complex* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status) +void + recv_data (std::complex* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status) { - MPI_Recv(buf, count, MPI_DOUBLE_COMPLEX, source, tag, comm, status); + MPI_Recv (buf, count, MPI_DOUBLE_COMPLEX, source, tag, comm, status); } -void recv_data(float* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status) +void + recv_data (float* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status) { - MPI_Recv(buf, count, MPI_FLOAT, source, tag, comm, status); + MPI_Recv (buf, count, MPI_FLOAT, source, tag, comm, status); } -void recv_data(std::complex* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status) +void + recv_data (std::complex* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status) { - MPI_Recv(buf, count, MPI_COMPLEX, source, tag, comm, status); + MPI_Recv (buf, count, MPI_COMPLEX, source, tag, comm, status); } -void bcast_data(std::complex* object, const int& n, const MPI_Comm& comm, int root) +void + bcast_data (std::complex* object, const int& n, const MPI_Comm& comm, int root) { - MPI_Bcast(object, n * 2, MPI_DOUBLE, root, comm); + MPI_Bcast (object, n * 2, MPI_DOUBLE, root, comm); } -void bcast_data(std::complex* object, const int& n, const MPI_Comm& comm, int root) +void + bcast_data (std::complex* object, const int& n, const MPI_Comm& comm, int root) { - MPI_Bcast(object, n * 2, MPI_FLOAT, root, comm); + MPI_Bcast (object, n * 2, MPI_FLOAT, root, comm); } -void bcast_data(double* object, const int& n, const MPI_Comm& comm, int root) +void + bcast_data (double* object, const int& n, const MPI_Comm& comm, int root) { - MPI_Bcast(object, n, MPI_DOUBLE, root, comm); + MPI_Bcast (object, n, MPI_DOUBLE, root, comm); } -void bcast_data(float* object, const int& n, const MPI_Comm& comm, int root) +void + bcast_data (float* object, const int& n, const MPI_Comm& comm, int root) { - MPI_Bcast(object, n, MPI_FLOAT, root, comm); + MPI_Bcast (object, n, MPI_FLOAT, root, comm); } -void reduce_data(std::complex* object, const int& n, const MPI_Comm& comm) +void + reduce_data (std::complex* object, const int& n, const MPI_Comm& comm) { - MPI_Allreduce(MPI_IN_PLACE, object, n * 2, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce (MPI_IN_PLACE, object, n * 2, MPI_DOUBLE, MPI_SUM, comm); } -void reduce_data(std::complex* object, const int& n, const MPI_Comm& comm) +void + reduce_data (std::complex* object, const int& n, const MPI_Comm& comm) { - MPI_Allreduce(MPI_IN_PLACE, object, n * 2, MPI_FLOAT, MPI_SUM, comm); + MPI_Allreduce (MPI_IN_PLACE, object, n * 2, MPI_FLOAT, MPI_SUM, comm); } -void reduce_data(double* object, const int& n, const MPI_Comm& comm) +void + reduce_data (double* object, const int& n, const MPI_Comm& comm) { - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, comm); } -void reduce_data(float* object, const int& n, const MPI_Comm& comm) +void + reduce_data (float* object, const int& n, const MPI_Comm& comm) { - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_FLOAT, MPI_SUM, comm); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_FLOAT, MPI_SUM, comm); } -void gatherv_data(const double* sendbuf, int sendcount, double* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) +void + gatherv_data (const double* sendbuf, + int sendcount, + double* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm) { - MPI_Allgatherv(sendbuf, sendcount, MPI_DOUBLE, recvbuf, recvcounts, displs, MPI_DOUBLE, comm); + MPI_Allgatherv (sendbuf, sendcount, MPI_DOUBLE, recvbuf, recvcounts, displs, MPI_DOUBLE, comm); } -void gatherv_data(const std::complex* sendbuf, int sendcount, std::complex* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) +void + gatherv_data (const std::complex* sendbuf, + int sendcount, + std::complex* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm) { - MPI_Allgatherv(sendbuf, sendcount, MPI_DOUBLE_COMPLEX, recvbuf, recvcounts, displs, MPI_DOUBLE_COMPLEX, comm); + MPI_Allgatherv (sendbuf, sendcount, MPI_DOUBLE_COMPLEX, recvbuf, recvcounts, displs, MPI_DOUBLE_COMPLEX, comm); } -void gatherv_data(const float* sendbuf, int sendcount, float* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) +void + gatherv_data (const float* sendbuf, + int sendcount, + float* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm) { - MPI_Allgatherv(sendbuf, sendcount, MPI_FLOAT, recvbuf, recvcounts, displs, MPI_FLOAT, comm); + MPI_Allgatherv (sendbuf, sendcount, MPI_FLOAT, recvbuf, recvcounts, displs, MPI_FLOAT, comm); } -void gatherv_data(const std::complex* sendbuf, int sendcount, std::complex* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm) +void + gatherv_data (const std::complex* sendbuf, + int sendcount, + std::complex* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm) { - MPI_Allgatherv(sendbuf, sendcount, MPI_COMPLEX, recvbuf, recvcounts, displs, MPI_COMPLEX, comm); + MPI_Allgatherv (sendbuf, sendcount, MPI_COMPLEX, recvbuf, recvcounts, displs, MPI_COMPLEX, comm); } #ifndef __CUDA_MPI @@ -356,49 +438,54 @@ template struct object_cpu_point { bool alloc = false; - T* get_buffer(const T* object, const int& n, T* tmp_space = nullptr) + T* + get_buffer (const T* object, const int& n, T* tmp_space = nullptr) { T* object_cpu = nullptr; alloc = false; if (tmp_space == nullptr) - { - base_device::memory::resize_memory_op()(object_cpu, n); - alloc = true; - } + { + base_device::memory::resize_memory_op () (object_cpu, n); + alloc = true; + } else - { - object_cpu = tmp_space; - } + { + object_cpu = tmp_space; + } return object_cpu; } - T* get(const T* object, const int& n, T* tmp_space = nullptr) + T* + get (const T* object, const int& n, T* tmp_space = nullptr) { - T* object_cpu = get_buffer(object, n, tmp_space); - base_device::memory::synchronize_memory_op()(object_cpu, - object, - n); + T* object_cpu = get_buffer (object, n, tmp_space); + base_device::memory::synchronize_memory_op () (object_cpu, + object, + n); return object_cpu; } - void sync_h2d(T* object, const T* object_cpu, const int& n) + void + sync_h2d (T* object, const T* object_cpu, const int& n) { - base_device::memory::synchronize_memory_op()(object, - object_cpu, - n); + base_device::memory::synchronize_memory_op () (object, + object_cpu, + n); } - void sync_d2h(T* object_cpu, const T* object, const int& n) + void + sync_d2h (T* object_cpu, const T* object, const int& n) { - base_device::memory::synchronize_memory_op()(object_cpu, - object, - n); + base_device::memory::synchronize_memory_op () (object_cpu, + object, + n); } - void del(T* object_cpu) + void + del (T* object_cpu) { if (alloc) - { - base_device::memory::delete_memory_op()(object_cpu); - } + { + base_device::memory::delete_memory_op () (object_cpu); + } } }; @@ -406,21 +493,26 @@ template struct object_cpu_point { bool alloc = false; - T* get_buffer(const T* object, const int& n, T* tmp_space = nullptr) + T* + get_buffer (const T* object, const int& n, T* tmp_space = nullptr) { - return const_cast(object); + return const_cast (object); } - T* get(const T* object, const int& n, T* tmp_space = nullptr) + T* + get (const T* object, const int& n, T* tmp_space = nullptr) { - return const_cast(object); + return const_cast (object); } - void sync_h2d(T* object, const T* object_cpu, const int& n) + void + sync_h2d (T* object, const T* object_cpu, const int& n) { } - void sync_d2h(T* object_cpu, const T* object, const int& n) + void + sync_d2h (T* object_cpu, const T* object, const int& n) { } - void del(T* object_cpu) + void + del (T* object_cpu) { } }; diff --git a/source/source_base/parallel_device.h b/source/source_base/parallel_device.h index 7826e4b4653..ca02db58fc0 100644 --- a/source/source_base/parallel_device.h +++ b/source/source_base/parallel_device.h @@ -7,73 +7,114 @@ #include "source_base/module_device/types.h" namespace Parallel_Common { -void isend_data(const double* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request); -void isend_data(const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request); -void isend_data(const float* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request); -void isend_data(const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request); -void send_data(const double* buf, int count, int dest, int tag, MPI_Comm& comm); -void send_data(const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm); -void send_data(const float* buf, int count, int dest, int tag, MPI_Comm& comm); -void send_data(const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm); -void recv_data(double* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status); -void recv_data(std::complex* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status); -void recv_data(float* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status); -void recv_data(std::complex* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status); -void bcast_data(std::complex* object, const int& n, const MPI_Comm& comm, int root = 0); -void bcast_data(std::complex* object, const int& n, const MPI_Comm& comm, int root = 0); -void bcast_data(double* object, const int& n, const MPI_Comm& comm, int root = 0); -void bcast_data(float* object, const int& n, const MPI_Comm& comm, int root = 0); -void reduce_data(std::complex* object, const int& n, const MPI_Comm& comm); -void reduce_data(std::complex* object, const int& n, const MPI_Comm& comm); -void reduce_data(double* object, const int& n, const MPI_Comm& comm); -void reduce_data(float* object, const int& n, const MPI_Comm& comm); -void gatherv_data(const double* sendbuf, int sendcount, double* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm); -void gatherv_data(const std::complex* sendbuf, int sendcount, std::complex* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm); -void gatherv_data(const float* sendbuf, int sendcount, float* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm); -void gatherv_data(const std::complex* sendbuf, int sendcount, std::complex* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm); +void isend_data (const double* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request); +void isend_data (const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request); +void isend_data (const float* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request); +void isend_data (const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request); +void send_data (const double* buf, int count, int dest, int tag, MPI_Comm& comm); +void send_data (const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm); +void send_data (const float* buf, int count, int dest, int tag, MPI_Comm& comm); +void send_data (const std::complex* buf, int count, int dest, int tag, MPI_Comm& comm); +void recv_data (double* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status); +void recv_data (std::complex* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status); +void recv_data (float* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status); +void recv_data (std::complex* buf, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status); +void bcast_data (std::complex* object, const int& n, const MPI_Comm& comm, int root = 0); +void bcast_data (std::complex* object, const int& n, const MPI_Comm& comm, int root = 0); +void bcast_data (double* object, const int& n, const MPI_Comm& comm, int root = 0); +void bcast_data (float* object, const int& n, const MPI_Comm& comm, int root = 0); +void reduce_data (std::complex* object, const int& n, const MPI_Comm& comm); +void reduce_data (std::complex* object, const int& n, const MPI_Comm& comm); +void reduce_data (double* object, const int& n, const MPI_Comm& comm); +void reduce_data (float* object, const int& n, const MPI_Comm& comm); +void gatherv_data (const double* sendbuf, + int sendcount, + double* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm); +void gatherv_data (const std::complex* sendbuf, + int sendcount, + std::complex* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm); +void gatherv_data (const float* sendbuf, + int sendcount, + float* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm); +void gatherv_data (const std::complex* sendbuf, + int sendcount, + std::complex* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm); #if defined(__NCCL_PARALLEL_DEVICE) -void nccl_bcast_data(double* object, const int& n, MPI_Comm& comm, int root = 0); -void nccl_bcast_data(std::complex* object, const int& n, MPI_Comm& comm, int root = 0); -void nccl_bcast_data(float* object, const int& n, MPI_Comm& comm, int root = 0); -void nccl_bcast_data(std::complex* object, const int& n, MPI_Comm& comm, int root = 0); -void nccl_reduce_data(double* object, const int& n, MPI_Comm& comm); -void nccl_reduce_data(std::complex* object, const int& n, MPI_Comm& comm); -void nccl_reduce_data(float* object, const int& n, MPI_Comm& comm); -void nccl_reduce_data(std::complex* object, const int& n, MPI_Comm& comm); -void nccl_gatherv_data(const double* sendbuf, int sendcount, double* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm); -void nccl_gatherv_data(const std::complex* sendbuf, int sendcount, std::complex* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm); -void nccl_gatherv_data(const float* sendbuf, int sendcount, float* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm); -void nccl_gatherv_data(const std::complex* sendbuf, int sendcount, std::complex* recvbuf, const int* recvcounts, const int* displs, MPI_Comm& comm); +void nccl_bcast_data (double* object, const int& n, MPI_Comm& comm, int root = 0); +void nccl_bcast_data (std::complex* object, const int& n, MPI_Comm& comm, int root = 0); +void nccl_bcast_data (float* object, const int& n, MPI_Comm& comm, int root = 0); +void nccl_bcast_data (std::complex* object, const int& n, MPI_Comm& comm, int root = 0); +void nccl_reduce_data (double* object, const int& n, MPI_Comm& comm); +void nccl_reduce_data (std::complex* object, const int& n, MPI_Comm& comm); +void nccl_reduce_data (float* object, const int& n, MPI_Comm& comm); +void nccl_reduce_data (std::complex* object, const int& n, MPI_Comm& comm); +void nccl_gatherv_data (const double* sendbuf, + int sendcount, + double* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm); +void nccl_gatherv_data (const std::complex* sendbuf, + int sendcount, + std::complex* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm); +void nccl_gatherv_data (const float* sendbuf, + int sendcount, + float* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm); +void nccl_gatherv_data (const std::complex* sendbuf, + int sendcount, + std::complex* recvbuf, + const int* recvcounts, + const int* displs, + MPI_Comm& comm); #endif #ifndef __CUDA_MPI -template +template struct object_cpu_point { bool alloc = false; - T* get_buffer(const T* object, const int& n, T* tmp_space = nullptr); - T* get(const T* object, const int& n, T* tmp_space = nullptr); - void del(T* object); - void sync_d2h(T* object_cpu, const T* object, const int& n); - void sync_h2d(T* object, const T* object_cpu, const int& n); + T* get_buffer (const T* object, const int& n, T* tmp_space = nullptr); + T* get (const T* object, const int& n, T* tmp_space = nullptr); + void del (T* object); + void sync_d2h (T* object_cpu, const T* object, const int& n); + void sync_h2d (T* object, const T* object_cpu, const int& n); }; #endif /** * @brief send data in Device - * + * */ template -void send_dev(const T* object, int count, int dest, int tag, MPI_Comm& comm, T* tmp_space = nullptr) +void + send_dev (const T* object, int count, int dest, int tag, MPI_Comm& comm, T* tmp_space = nullptr) { #ifdef __CUDA_MPI - send_data(object, count, dest, tag, comm); + send_data (object, count, dest, tag, comm); #else - object_cpu_point o; - T* object_cpu = o.get(object, count, tmp_space); - send_data(object_cpu, count, dest, tag, comm); - o.del(object_cpu); + object_cpu_point o; + T* object_cpu = o.get (object, count, tmp_space); + send_data (object_cpu, count, dest, tag, comm); + o.del (object_cpu); #endif return; } @@ -81,46 +122,48 @@ void send_dev(const T* object, int count, int dest, int tag, MPI_Comm& comm, T* /** * @brief isend data in Device * @note before the date in send_space is recieved, it should not be modified - * + * */ template -void isend_dev(const T* object, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request, T* send_space) +void + isend_dev (const T* object, int count, int dest, int tag, MPI_Comm& comm, MPI_Request* request, T* send_space) { #ifdef __CUDA_MPI - isend_data(object, count, dest, tag, comm, request); + isend_data (object, count, dest, tag, comm, request); #else - object_cpu_point o; - T* object_cpu = o.get(object, count, send_space); - isend_data(object_cpu, count, dest, tag, comm, request); - o.del(object_cpu); + object_cpu_point o; + T* object_cpu = o.get (object, count, send_space); + isend_data (object_cpu, count, dest, tag, comm, request); + o.del (object_cpu); #endif return; } /** * @brief recv data in Device - * + * */ template -void recv_dev(T* object, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status, T* tmp_space = nullptr) +void + recv_dev (T* object, int count, int source, int tag, MPI_Comm& comm, MPI_Status* status, T* tmp_space = nullptr) { #ifdef __CUDA_MPI - recv_data(object, count, source, tag, comm, status); + recv_data (object, count, source, tag, comm, status); #else - object_cpu_point o; - T* object_cpu = o.get_buffer(object, count, tmp_space); - recv_data(object_cpu, count, source, tag, comm, status); - o.sync_h2d(object, object_cpu, count); - o.del(object_cpu); + object_cpu_point o; + T* object_cpu = o.get_buffer (object, count, tmp_space); + recv_data (object_cpu, count, source, tag, comm, status); + o.sync_h2d (object, object_cpu, count); + o.del (object_cpu); #endif return; } /** * @brief broadcast data in Device - * + * * @tparam T: float, double, std::complex, std::complex - * @tparam Device + * @tparam Device * @param object arrays in Device * @param n the size of array * @param comm MPI_Comm @@ -128,56 +171,59 @@ void recv_dev(T* object, int count, int source, int tag, MPI_Comm& comm, MPI_Sta * @param tmp_space optional tmp space in CPU (default nullptr) */ template -void bcast_dev(T* object, const int& n, const MPI_Comm& comm, int root = 0, T* tmp_space = nullptr) +void + bcast_dev (T* object, const int& n, const MPI_Comm& comm, int root = 0, T* tmp_space = nullptr) { #if defined(__NCCL_PARALLEL_DEVICE) if (std::is_same::value) - { - nccl_bcast_data(object, n, const_cast(comm), root); - return; - } + { + nccl_bcast_data (object, n, const_cast (comm), root); + return; + } #endif #ifdef __CUDA_MPI - bcast_data(object, n, comm, root); + bcast_data (object, n, comm, root); #else - object_cpu_point o; + object_cpu_point o; int rank = 0; - MPI_Comm_rank(comm, &rank); - T* object_cpu = rank == root ? o.get(object, n, tmp_space) : o.get_buffer(object, n, tmp_space); - bcast_data(object_cpu, n, comm, root); + MPI_Comm_rank (comm, &rank); + T* object_cpu = rank == root ? o.get (object, n, tmp_space) : o.get_buffer (object, n, tmp_space); + bcast_data (object_cpu, n, comm, root); if (rank != root) - { - o.sync_h2d(object, object_cpu, n); - } - o.del(object_cpu); + { + o.sync_h2d (object, object_cpu, n); + } + o.del (object_cpu); #endif return; } template -void reduce_dev(T* object, const int& n, const MPI_Comm& comm, T* tmp_space = nullptr) +void + reduce_dev (T* object, const int& n, const MPI_Comm& comm, T* tmp_space = nullptr) { #if defined(__NCCL_PARALLEL_DEVICE) if (std::is_same::value) - { - nccl_reduce_data(object, n, const_cast(comm)); - return; - } + { + nccl_reduce_data (object, n, const_cast (comm)); + return; + } #endif #ifdef __CUDA_MPI - reduce_data(object, n, comm); + reduce_data (object, n, comm); #else - object_cpu_point o; - T* object_cpu = o.get(object, n, tmp_space); - reduce_data(object_cpu, n, comm); - o.sync_h2d(object, object_cpu, n); - o.del(object_cpu); + object_cpu_point o; + T* object_cpu = o.get (object, n, tmp_space); + reduce_data (object_cpu, n, comm); + o.sync_h2d (object, object_cpu, n); + o.del (object_cpu); #endif return; } template -void gatherv_dev(const T* sendbuf, +void + gatherv_dev (const T* sendbuf, int sendcount, T* recvbuf, const int* recvcounts, @@ -188,30 +234,29 @@ void gatherv_dev(const T* sendbuf, { #if defined(__NCCL_PARALLEL_DEVICE) if (std::is_same::value) - { - nccl_gatherv_data(sendbuf, sendcount, recvbuf, recvcounts, displs, comm); - return; - } + { + nccl_gatherv_data (sendbuf, sendcount, recvbuf, recvcounts, displs, comm); + return; + } #endif #ifdef __CUDA_MPI - gatherv_data(sendbuf, sendcount, recvbuf, recvcounts, displs, comm); + gatherv_data (sendbuf, sendcount, recvbuf, recvcounts, displs, comm); #else - object_cpu_point o1, o2; + object_cpu_point o1, o2; int size = 0; - MPI_Comm_size(comm, &size); + MPI_Comm_size (comm, &size); int gather_space = displs[size - 1] + recvcounts[size - 1]; - T* sendbuf_cpu = o1.get(sendbuf, sendcount, tmp_sspace); - T* recvbuf_cpu = o2.get_buffer(recvbuf, gather_space, tmp_rspace); - gatherv_data(sendbuf_cpu, sendcount, recvbuf_cpu, recvcounts, displs, comm); - o2.sync_h2d(recvbuf, recvbuf_cpu, gather_space); - o1.del(sendbuf_cpu); - o2.del(recvbuf_cpu); + T* sendbuf_cpu = o1.get (sendbuf, sendcount, tmp_sspace); + T* recvbuf_cpu = o2.get_buffer (recvbuf, gather_space, tmp_rspace); + gatherv_data (sendbuf_cpu, sendcount, recvbuf_cpu, recvcounts, displs, comm); + o2.sync_h2d (recvbuf, recvbuf_cpu, gather_space); + o1.del (sendbuf_cpu); + o2.del (recvbuf_cpu); #endif return; } -} - +} // namespace Parallel_Common #endif #endif diff --git a/source/source_base/parallel_global.cpp b/source/source_base/parallel_global.cpp index 67eaff4235c..4d581eea2b8 100644 --- a/source/source_base/parallel_global.cpp +++ b/source/source_base/parallel_global.cpp @@ -27,24 +27,26 @@ int mpi_number = 0; int omp_number = 0; } // namespace Parallel_Global -void Parallel_Global::myProd(std::complex* in, std::complex* inout, int* len, MPI_Datatype* dptr) +void + Parallel_Global::myProd (std::complex* in, std::complex* inout, int* len, MPI_Datatype* dptr) { for (int i = 0; i < *len; i++) - { - // (*inout).real()=(*inout).real()+(*in).real(); - // (*inout).imag()=(*inout).imag()+(*in).imag(); + { + // (*inout).real()=(*inout).real()+(*in).real(); + // (*inout).imag()=(*inout).imag()+(*in).imag(); - // mohan updat 2011-09-21 - (*inout) = std::complex((*inout).real() + (*in).real(), (*inout).imag() + (*in).imag()); + // mohan updat 2011-09-21 + (*inout) = std::complex ((*inout).real () + (*in).real (), (*inout).imag () + (*in).imag ()); - in++; - inout++; - } + in++; + inout++; + } return; } #endif -void Parallel_Global::split_diag_world(const int& diag_np, +void + Parallel_Global::split_diag_world (const int& diag_np, const int& nproc, const int& my_rank, int& drank, @@ -52,14 +54,14 @@ void Parallel_Global::split_diag_world(const int& diag_np, int& dcolor) { #ifdef __MPI - assert(diag_np > 0); + assert (diag_np > 0); int group_grid_np = -1; int color = -1; int key = -1; - divide_mpi_groups(nproc, diag_np, my_rank, group_grid_np, key, color); - MPI_Comm_split(MPI_COMM_WORLD, color, key, &DIAG_WORLD); - MPI_Comm_rank(DIAG_WORLD, &drank); - MPI_Comm_size(DIAG_WORLD, &dsize); + divide_mpi_groups (nproc, diag_np, my_rank, group_grid_np, key, color); + MPI_Comm_split (MPI_COMM_WORLD, color, key, &DIAG_WORLD); + MPI_Comm_rank (DIAG_WORLD, &drank); + MPI_Comm_size (DIAG_WORLD, &dsize); dcolor = color; #else dcolor = 0; // mohan fix bug 2012-02-04 @@ -69,17 +71,18 @@ void Parallel_Global::split_diag_world(const int& diag_np, return; } -void Parallel_Global::split_grid_world(const int diag_np, const int& nproc, const int& my_rank, int& grank, int& gsize) +void + Parallel_Global::split_grid_world (const int diag_np, const int& nproc, const int& my_rank, int& grank, int& gsize) { #ifdef __MPI - assert(diag_np > 0); + assert (diag_np > 0); int group_grid_np = -1; int color = -1; int key = -1; - divide_mpi_groups(nproc, diag_np, my_rank, group_grid_np, color, key); - MPI_Comm_split(MPI_COMM_WORLD, color, key, &GRID_WORLD); - MPI_Comm_rank(GRID_WORLD, &grank); - MPI_Comm_size(GRID_WORLD, &gsize); + divide_mpi_groups (nproc, diag_np, my_rank, group_grid_np, color, key); + MPI_Comm_split (MPI_COMM_WORLD, color, key, &GRID_WORLD); + MPI_Comm_rank (GRID_WORLD, &grank); + MPI_Comm_size (GRID_WORLD, &gsize); #else grank = 0; // mohan fix bug 2012-02-04 gsize = 1; @@ -88,40 +91,37 @@ void Parallel_Global::split_grid_world(const int diag_np, const int& nproc, cons } // changed from read_mpi_parameters in 2024-1018 -void Parallel_Global::read_pal_param(int argc, - char** argv, - int& NPROC, - int& NTHREAD_PER_PROC, - int& MY_RANK) +void + Parallel_Global::read_pal_param (int argc, char** argv, int& NPROC, int& NTHREAD_PER_PROC, int& MY_RANK) { #ifdef __MPI #ifdef _OPENMP int provided = 0; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); + MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &provided); if (provided != MPI_THREAD_MULTIPLE) - { - std::cerr << "MPI_Init_thread request " << MPI_THREAD_MULTIPLE << " but provide " << provided << std::endl; - } - // Peize Lin change 2022.08.08 - // MPI_THREAD_FUNNELED is enough for ABACUS. Using MPI_THREAD_SERIALIZED for elpa, using MPI_THREAD_MULTIPLE for - // libRI. + { + std::cerr << "MPI_Init_thread request " << MPI_THREAD_MULTIPLE << " but provide " << provided << std::endl; + } + // Peize Lin change 2022.08.08 + // MPI_THREAD_FUNNELED is enough for ABACUS. Using MPI_THREAD_SERIALIZED for elpa, using MPI_THREAD_MULTIPLE for + // libRI. #else - MPI_Init(&argc, &argv); // Peize Lin change 2018-07-12 + MPI_Init (&argc, &argv); // Peize Lin change 2018-07-12 #endif //_OPENMP // KPAR = atoi(argv[1]); // mohan abandon 2010-06-09 // get world size --> NPROC // get global rank --> MY_RANK - MPI_Comm_size(MPI_COMM_WORLD, &NPROC); - MPI_Comm_rank(MPI_COMM_WORLD, &MY_RANK); + MPI_Comm_size (MPI_COMM_WORLD, &NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &MY_RANK); int process_num = 0; // number of processes in the current node int local_rank = 0; // rank of the process in the current node MPI_Comm shmcomm; - MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm); - MPI_Comm_size(shmcomm, &process_num); - MPI_Comm_rank(shmcomm, &local_rank); - MPI_Comm_free(&shmcomm); + MPI_Comm_split_type (MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm); + MPI_Comm_size (shmcomm, &process_num); + MPI_Comm_rank (shmcomm, &local_rank); + MPI_Comm_free (&shmcomm); // Determining appropriate thread number for OpenMP: // 1. If the number of threads is set by the user by `OMP_NUM_THREADS`, use it. @@ -132,82 +132,85 @@ void Parallel_Global::read_pal_param(int argc, // CAVEAT: The user should set the number of threads properly to avoid oversubscribing. // This mechanism only handles the worst case for the default setting (not setting number of threads at all, causing // oversubscribing and extremely slow performance), not guaranteed to be optimal. - const int max_thread_num = std::thread::hardware_concurrency(); // Consider Hyperthreading disabled. + const int max_thread_num = std::thread::hardware_concurrency (); // Consider Hyperthreading disabled. #ifdef _OPENMP - int current_thread_num = omp_get_max_threads(); // Get the number of threads set by the user. + int current_thread_num = omp_get_max_threads (); // Get the number of threads set by the user. if (current_thread_num == max_thread_num && process_num >= 1) // Avoid oversubscribing on the number of threads not set. - { - current_thread_num = max_thread_num / process_num; - omp_set_num_threads(current_thread_num); - } + { + current_thread_num = max_thread_num / process_num; + omp_set_num_threads (current_thread_num); + } #else int current_thread_num = 1; #endif mpi_number = process_num; omp_number = current_thread_num; if (current_thread_num * process_num > max_thread_num && local_rank == 0) - { - std::stringstream mess; - mess << "WARNING: Total thread number(" << current_thread_num * process_num << ") " - << "is larger than hardware availability(" << max_thread_num << ")." << std::endl - << "The results may be INCORRECT. Please set the environment variable OMP_NUM_THREADS to a proper value." - << std::endl; - std::cerr << mess.str() << std::endl; - // the user may take their own risk by set the OMP_NUM_THREADS env var. - if (std::getenv("OMP_NUM_THREADS") == nullptr) { - // usage of WARNING_QUIT need source_base/tool_quit.cpp - // lead to undefined error in unit_test building - // ModuleBase::WARNING_QUIT( "Parallel_Global::read_pal_param","OMP_NUM_THREADS setting is invalid. Please set it to a proper value."); - std::cerr << "ERROR: OMP_NUM_THREADS setting is invalid. Please set it to a proper value." << std::endl; - exit(1); + std::stringstream mess; + mess << "WARNING: Total thread number(" << current_thread_num * process_num << ") " + << "is larger than hardware availability(" << max_thread_num << ")." << std::endl + << "The results may be INCORRECT. Please set the environment variable OMP_NUM_THREADS to a proper " + "value." + << std::endl; + std::cerr << mess.str () << std::endl; + // the user may take their own risk by set the OMP_NUM_THREADS env var. + if (std::getenv ("OMP_NUM_THREADS") == nullptr) + { + // usage of WARNING_QUIT need source_base/tool_quit.cpp + // lead to undefined error in unit_test building + // ModuleBase::WARNING_QUIT( "Parallel_Global::read_pal_param","OMP_NUM_THREADS setting is invalid. + // Please set it to a proper value."); + std::cerr << "ERROR: OMP_NUM_THREADS setting is invalid. Please set it to a proper value." + << std::endl; + exit (1); + } } - } else if (current_thread_num * process_num < max_thread_num && local_rank == 0) - { - // only output info in local rank 0 - std::cerr << "Info: Local MPI proc number: " << process_num << "," - << "OpenMP thread number: " << current_thread_num << "," - << "Total thread number: " << current_thread_num * process_num << "," - << "Local thread limit: " << max_thread_num << std::endl; - } + { + // only output info in local rank 0 + std::cerr << "Info: Local MPI proc number: " << process_num << "," + << "OpenMP thread number: " << current_thread_num << "," + << "Total thread number: " << current_thread_num * process_num << "," + << "Local thread limit: " << max_thread_num << std::endl; + } NTHREAD_PER_PROC = current_thread_num; if (MY_RANK == 0) - { + { #ifdef VERSION - const char* version = VERSION; + const char* version = VERSION; #else - const char* version = "unknown"; + const char* version = "unknown"; #endif #ifdef COMMIT_INFO #include "commit.h" - const char* commit = COMMIT; + const char* commit = COMMIT; #else - const char* commit = "unknown"; + const char* commit = "unknown"; #endif - std::cout << " " - << std::endl - << " ABACUS " << version << std::endl - << std::endl - << " Atomic-orbital Based Ab-initio Computation at UStc " - << std::endl - << std::endl - << " Website: http://abacus.ustc.edu.cn/ " - << std::endl - << " Documentation: https://abacus.deepmodeling.com/ " - << std::endl - << " Repository: https://github.com/abacusmodeling/abacus-develop " - << std::endl - << " https://github.com/deepmodeling/abacus-develop " - << std::endl - << " Commit: " << commit << std::endl - << std::endl; - time_t time_now = time(nullptr); - std::cout << " " << ctime(&time_now); - } + std::cout << " " + << std::endl + << " ABACUS " << version << std::endl + << std::endl + << " Atomic-orbital Based Ab-initio Computation at UStc " + << std::endl + << std::endl + << " Website: http://abacus.ustc.edu.cn/ " + << std::endl + << " Documentation: https://abacus.deepmodeling.com/ " + << std::endl + << " Repository: https://github.com/abacusmodeling/abacus-develop " + << std::endl + << " https://github.com/deepmodeling/abacus-develop " + << std::endl + << " Commit: " << commit << std::endl + << std::endl; + time_t time_now = time (nullptr); + std::cout << " " << ctime (&time_now); + } // for test /* @@ -224,32 +227,34 @@ void Parallel_Global::read_pal_param(int argc, // This section can be chosen !! // mohan 2011-03-15 if (MY_RANK != 0) - { - // std::cout.rdbuf(NULL); - std::cout.setstate(std::ios::failbit); // qianrui modify 2020-10-14 - } - // end test + { + // std::cout.rdbuf(NULL); + std::cout.setstate (std::ios::failbit); // qianrui modify 2020-10-14 + } + // end test #endif //__MPI return; } #ifdef __MPI -void Parallel_Global::finalize_mpi() +void + Parallel_Global::finalize_mpi () { - MPI_Comm_free(&POOL_WORLD); + MPI_Comm_free (&POOL_WORLD); if (KP_WORLD != MPI_COMM_NULL) - { - MPI_Comm_free(&KP_WORLD); - } - MPI_Comm_free(&INT_BGROUP); - MPI_Comm_free(&BP_WORLD); - MPI_Comm_free(&GRID_WORLD); - MPI_Comm_free(&DIAG_WORLD); - MPI_Finalize(); + { + MPI_Comm_free (&KP_WORLD); + } + MPI_Comm_free (&INT_BGROUP); + MPI_Comm_free (&BP_WORLD); + MPI_Comm_free (&GRID_WORLD); + MPI_Comm_free (&DIAG_WORLD); + MPI_Finalize (); } #endif -void Parallel_Global::init_pools(const int& NPROC, +void + Parallel_Global::init_pools (const int& NPROC, const int& MY_RANK, const int& BNDPAR, const int& KPAR, @@ -264,16 +269,16 @@ void Parallel_Global::init_pools(const int& NPROC, //---------------------------------------------------------- // CALL Function : divide_pools //---------------------------------------------------------- - Parallel_Global::divide_pools(NPROC, - MY_RANK, - BNDPAR, - KPAR, - NPROC_IN_BNDGROUP, - RANK_IN_BPGROUP, - MY_BNDGROUP, - NPROC_IN_POOL, - RANK_IN_POOL, - MY_POOL); + Parallel_Global::divide_pools (NPROC, + MY_RANK, + BNDPAR, + KPAR, + NPROC_IN_BNDGROUP, + RANK_IN_BPGROUP, + MY_BNDGROUP, + NPROC_IN_POOL, + RANK_IN_POOL, + MY_POOL); // for test // turn on when you want to check the index of pools. @@ -312,7 +317,8 @@ void Parallel_Global::init_pools(const int& NPROC, } #ifdef __MPI -void Parallel_Global::divide_pools(const int& NPROC, +void + Parallel_Global::divide_pools (const int& NPROC, const int& MY_RANK, const int& BNDPAR, const int& KPAR, @@ -326,20 +332,21 @@ void Parallel_Global::divide_pools(const int& NPROC, // note: the order of k-point parallelization and band parallelization is important // The order will not change the behavior of KP_WORLD or BP_WORLD, and MY_POOL // and MY_BNDGROUP will be the same as well. - if(BNDPAR > 1 && NPROC %(BNDPAR * KPAR) != 0) - { - std::cout << "Error: When BNDPAR = " << BNDPAR << " > 1, number of processes (" << NPROC - << ") must be divisible by the number of groups (" << BNDPAR * KPAR << ")." << std::endl; - ModuleBase::WARNING_QUIT("ParallelGlobal::divide_pools", - "When BNDPAR > 1, number of processes NPROC must be divisible by the number of groups BNDPAR * KPAR."); - } + if (BNDPAR > 1 && NPROC % (BNDPAR * KPAR) != 0) + { + std::cout << "Error: When BNDPAR = " << BNDPAR << " > 1, number of processes (" << NPROC + << ") must be divisible by the number of groups (" << BNDPAR * KPAR << ")." << std::endl; + ModuleBase::WARNING_QUIT ( + "ParallelGlobal::divide_pools", + "When BNDPAR > 1, number of processes NPROC must be divisible by the number of groups BNDPAR * KPAR."); + } // k-point parallelization - MPICommGroup kpar_group(MPI_COMM_WORLD); - kpar_group.divide_group_comm(KPAR, false); + MPICommGroup kpar_group (MPI_COMM_WORLD); + kpar_group.divide_group_comm (KPAR, false); // band parallelization - MPICommGroup bndpar_group(kpar_group.group_comm); - bndpar_group.divide_group_comm(BNDPAR, true); + MPICommGroup bndpar_group (kpar_group.group_comm); + bndpar_group.divide_group_comm (BNDPAR, true); // Set parallel index. // In previous versions, the order of k-point parallelization and band parallelization is reversed. @@ -347,36 +354,37 @@ void Parallel_Global::divide_pools(const int& NPROC, NPROC_IN_POOL = bndpar_group.nprocs_in_group; RANK_IN_POOL = bndpar_group.rank_in_group; MY_POOL = kpar_group.my_group; - MPI_Comm_dup(bndpar_group.group_comm, &POOL_WORLD); - if(kpar_group.inter_comm != MPI_COMM_NULL) - { - MPI_Comm_dup(kpar_group.inter_comm, &KP_WORLD); - } + MPI_Comm_dup (bndpar_group.group_comm, &POOL_WORLD); + if (kpar_group.inter_comm != MPI_COMM_NULL) + { + MPI_Comm_dup (kpar_group.inter_comm, &KP_WORLD); + } else - { - KP_WORLD = MPI_COMM_NULL; - } + { + KP_WORLD = MPI_COMM_NULL; + } - if(BNDPAR > 1) - { - NPROC_IN_BNDGROUP = kpar_group.ngroups * bndpar_group.nprocs_in_group; - RANK_IN_BPGROUP = kpar_group.my_group * bndpar_group.nprocs_in_group + bndpar_group.rank_in_group; - MY_BNDGROUP = bndpar_group.my_group; - MPI_Comm_split(MPI_COMM_WORLD, MY_BNDGROUP, RANK_IN_BPGROUP, &INT_BGROUP); - MPI_Comm_dup(bndpar_group.inter_comm, &BP_WORLD); - } + if (BNDPAR > 1) + { + NPROC_IN_BNDGROUP = kpar_group.ngroups * bndpar_group.nprocs_in_group; + RANK_IN_BPGROUP = kpar_group.my_group * bndpar_group.nprocs_in_group + bndpar_group.rank_in_group; + MY_BNDGROUP = bndpar_group.my_group; + MPI_Comm_split (MPI_COMM_WORLD, MY_BNDGROUP, RANK_IN_BPGROUP, &INT_BGROUP); + MPI_Comm_dup (bndpar_group.inter_comm, &BP_WORLD); + } else - { - NPROC_IN_BNDGROUP = NPROC; - RANK_IN_BPGROUP = MY_RANK; - MY_BNDGROUP = 0; - MPI_Comm_dup(MPI_COMM_WORLD, &INT_BGROUP); - MPI_Comm_split(MPI_COMM_WORLD, MY_RANK, 0, &BP_WORLD); - } + { + NPROC_IN_BNDGROUP = NPROC; + RANK_IN_BPGROUP = MY_RANK; + MY_BNDGROUP = 0; + MPI_Comm_dup (MPI_COMM_WORLD, &INT_BGROUP); + MPI_Comm_split (MPI_COMM_WORLD, MY_RANK, 0, &BP_WORLD); + } return; } -void Parallel_Global::divide_mpi_groups(const int& procs, +void + Parallel_Global::divide_mpi_groups (const int& procs, const int& num_groups, const int& rank, int& procs_in_group, @@ -385,45 +393,41 @@ void Parallel_Global::divide_mpi_groups(const int& procs, const bool even) { if (num_groups == 0) - { - ModuleBase::WARNING_QUIT( - "Parallel_Global::divide_mpi_groups", - "Number of groups must be greater than 0." - ); - } + { + ModuleBase::WARNING_QUIT ("Parallel_Global::divide_mpi_groups", "Number of groups must be greater than 0."); + } if (procs < num_groups) - { - std::cout << "Error: Number of processes (" << procs << ") must be greater than the number of groups (" - << num_groups << ")." << std::endl; - ModuleBase::WARNING_QUIT( - "Parallel_Global::divide_mpi_groups", - "Number of processes must be greater than the number of groups." - ); - } + { + std::cout << "Error: Number of processes (" << procs << ") must be greater than the number of groups (" + << num_groups << ")." << std::endl; + ModuleBase::WARNING_QUIT ("Parallel_Global::divide_mpi_groups", + "Number of processes must be greater than the number of groups."); + } // Calculate the distribution of processes among pools. procs_in_group = procs / num_groups; int extra_procs = procs % num_groups; if (even && extra_procs != 0) - { - std::cout << "Error: Number of processes (" << procs << ") must be evenly divisible by the number of groups (" - << num_groups << " in the even partition case)." << std::endl; - exit(1); - } + { + std::cout << "Error: Number of processes (" << procs + << ") must be evenly divisible by the number of groups (" << num_groups + << " in the even partition case)." << std::endl; + exit (1); + } - if(rank < extra_procs * (procs_in_group + 1)) - { - // The first extra_procs groups have procs_in_group + 1 processes. - procs_in_group++; - my_group = rank / procs_in_group; - rank_in_group = rank % procs_in_group; - } + if (rank < extra_procs * (procs_in_group + 1)) + { + // The first extra_procs groups have procs_in_group + 1 processes. + procs_in_group++; + my_group = rank / procs_in_group; + rank_in_group = rank % procs_in_group; + } else - { - // The remaining groups have procs_in_group processes. - my_group = (rank - extra_procs) / procs_in_group; - rank_in_group = (rank - extra_procs) % procs_in_group; - } + { + // The remaining groups have procs_in_group processes. + my_group = (rank - extra_procs) / procs_in_group; + rank_in_group = (rank - extra_procs) % procs_in_group; + } } #endif diff --git a/source/source_base/parallel_global.h b/source/source_base/parallel_global.h index 71e933a33e7..38111485cf5 100644 --- a/source/source_base/parallel_global.h +++ b/source/source_base/parallel_global.h @@ -20,9 +20,9 @@ extern int omp_number; //--------------------------- // changed from read_mpi_parameters in 2024-1018 -void read_pal_param(int argc, char** argv, int& NPROC, int& NTHREAD_PER_PROC, int& MY_RANK); +void read_pal_param (int argc, char** argv, int& NPROC, int& NTHREAD_PER_PROC, int& MY_RANK); #ifdef __MPI -void myProd(std::complex* in, std::complex* inout, int* len, MPI_Datatype* dptr); +void myProd (std::complex* in, std::complex* inout, int* len, MPI_Datatype* dptr); #endif /**------------------------------------------- @@ -37,34 +37,34 @@ void myProd(std::complex* in, std::complex* inout, int* len, MPI * leads to the 'diag world', diag * is only carried out using those 4 proc. */ -void split_diag_world(const int& diag_np, const int& nproc, const int& my_rank, int& drank, int& dsize, int& dcolor); -void split_grid_world(const int diag_np, const int& nproc, const int& my_rank, int& grank, int& gsize); +void split_diag_world (const int& diag_np, const int& nproc, const int& my_rank, int& drank, int& dsize, int& dcolor); +void split_grid_world (const int diag_np, const int& nproc, const int& my_rank, int& grank, int& gsize); /** * @brief An interface function to call "Parallel_Global::divide_pools()" * */ -void init_pools(const int& NPROC, - const int& MY_RANK, - const int& BNDPAR, - const int& KPAR, - int& NPROC_IN_BNDGROUP, - int& RANK_IN_BPGROUP, - int& MY_BNDGROUP, - int& NPROC_IN_POOL, - int& RANK_IN_POOL, - int& MY_POOL); +void init_pools (const int& NPROC, + const int& MY_RANK, + const int& BNDPAR, + const int& KPAR, + int& NPROC_IN_BNDGROUP, + int& RANK_IN_BPGROUP, + int& MY_BNDGROUP, + int& NPROC_IN_POOL, + int& RANK_IN_POOL, + int& MY_POOL); -void divide_pools(const int& NPROC, - const int& MY_RANK, - const int& BNDPAR, - const int& KPAR, - int& NPROC_IN_BNDGROUP, - int& RANK_IN_BPGROUP, - int& MY_BNDGROUP, - int& NPROC_IN_POOL, - int& RANK_IN_POOL, - int& MY_POOL); +void divide_pools (const int& NPROC, + const int& MY_RANK, + const int& BNDPAR, + const int& KPAR, + int& NPROC_IN_BNDGROUP, + int& RANK_IN_BPGROUP, + int& MY_BNDGROUP, + int& NPROC_IN_POOL, + int& RANK_IN_POOL, + int& MY_POOL); /** * @brief Divide MPI processes into groups @@ -76,20 +76,20 @@ void divide_pools(const int& NPROC, * @param[out] rank_in_group Rank of the process in the group * @param[in] even If true, require the number of processes in each group is the same */ -void divide_mpi_groups(const int& procs, - const int& num_groups, - const int& rank, - int& procs_in_group, - int& my_group, - int& rank_in_group, - const bool even = false); +void divide_mpi_groups (const int& procs, + const int& num_groups, + const int& rank, + int& procs_in_group, + int& my_group, + int& rank_in_group, + const bool even = false); /** * @brief Release MPI communicator and resources * */ #ifdef __MPI -void finalize_mpi(); +void finalize_mpi (); #endif } // namespace Parallel_Global diff --git a/source/source_base/parallel_reduce.cpp b/source/source_base/parallel_reduce.cpp index 36095f6d033..4b83b1fc71c 100644 --- a/source/source_base/parallel_reduce.cpp +++ b/source/source_base/parallel_reduce.cpp @@ -5,301 +5,332 @@ #include template <> -void Parallel_Reduce::reduce_all(int& object) +void + Parallel_Reduce::reduce_all (int& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_all(long long& object) +void + Parallel_Reduce::reduce_all (long long& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); #endif return; } -void Parallel_Reduce::reduce_int_diag(int& object) +void + Parallel_Reduce::reduce_int_diag (int& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_INT, MPI_SUM, DIAG_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_INT, MPI_SUM, DIAG_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_all(double& object) +void + Parallel_Reduce::reduce_all (double& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_all(float& object) +void + Parallel_Reduce::reduce_all (float& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_all(int* object, const int n) +void + Parallel_Reduce::reduce_all (int* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, MPI_COMM_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_all(long long* object, const int n) +void + Parallel_Reduce::reduce_all (long long* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); #endif return; } -void Parallel_Reduce::reduce_int_grid(int* object, const int n) +void + Parallel_Reduce::reduce_int_grid (int* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, GRID_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, GRID_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_all(double* object, const int n) +void + Parallel_Reduce::reduce_all (double* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #endif return; } -void Parallel_Reduce::reduce_double_grid(double* object, const int n) +void + Parallel_Reduce::reduce_double_grid (double* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, GRID_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, GRID_WORLD); #endif return; } -void Parallel_Reduce::reduce_double_diag(double* object, const int n) +void + Parallel_Reduce::reduce_double_diag (double* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, DIAG_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, DIAG_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_pool(float& object) +void + Parallel_Reduce::reduce_pool (float& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_FLOAT, MPI_SUM, POOL_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_pool(double& object) +void + Parallel_Reduce::reduce_pool (double& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_DOUBLE, MPI_SUM, POOL_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_pool(int* object, const int n) +void + Parallel_Reduce::reduce_pool (int* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_INT, MPI_SUM, POOL_WORLD); #endif } template <> -void Parallel_Reduce::reduce_pool(double* object, const int n) +void + Parallel_Reduce::reduce_pool (double* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_DOUBLE, MPI_SUM, POOL_WORLD); #endif return; } // (1) the value is same in each pool. // (2) we need to reduce the value from different pool. -void Parallel_Reduce::reduce_double_allpool(const int& npool, const int& nproc_in_pool, double& object) +void + Parallel_Reduce::reduce_double_allpool (const int& npool, const int& nproc_in_pool, double& object) { - if (npool == 1) - { - return; - } + if (npool == 1) + { + return; + } #ifdef __MPI double swap = object / nproc_in_pool; - MPI_Allreduce(&swap, &object, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (&swap, &object, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #endif } // (1) the value is same in each pool. // (2) we need to reduce the value from different pool. -void Parallel_Reduce::reduce_double_allpool(const int& npool, const int& nproc_in_pool, double* object, const int n) +void + Parallel_Reduce::reduce_double_allpool (const int& npool, const int& nproc_in_pool, double* object, const int n) { - if (npool == 1) - { - return; - } + if (npool == 1) + { + return; + } #ifdef __MPI - std::vector swap(n, 0.0); + std::vector swap (n, 0.0); for (int i = 0; i < n; i++) - { - swap[i] = object[i] / nproc_in_pool; - } - MPI_Allreduce(swap.data(), object, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + swap[i] = object[i] / nproc_in_pool; + } + MPI_Allreduce (swap.data (), object, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #endif } template <> -void Parallel_Reduce::reduce_all>(std::complex& object) +void + Parallel_Reduce::reduce_all> (std::complex& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); #endif return; } // LiuXh add 2019-07-16 template <> -void Parallel_Reduce::reduce_all>(std::complex* object, const int n) +void + Parallel_Reduce::reduce_all> (std::complex* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); #endif return; } - template <> -void Parallel_Reduce::reduce_all>(std::complex& object) +void + Parallel_Reduce::reduce_all> (std::complex& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_C_FLOAT_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_C_FLOAT_COMPLEX, MPI_SUM, MPI_COMM_WORLD); #endif return; } // LiuXh add 2019-07-16 template <> -void Parallel_Reduce::reduce_all>(std::complex* object, const int n) +void + Parallel_Reduce::reduce_all> (std::complex* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_C_FLOAT_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_C_FLOAT_COMPLEX, MPI_SUM, MPI_COMM_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_pool>(std::complex& object) +void + Parallel_Reduce::reduce_pool> (std::complex& object) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &object, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &object, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, POOL_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_pool>(std::complex* object, const int n) +void + Parallel_Reduce::reduce_pool> (std::complex* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_C_FLOAT_COMPLEX, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_C_FLOAT_COMPLEX, MPI_SUM, POOL_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_pool>(std::complex* object, const int n) +void + Parallel_Reduce::reduce_pool> (std::complex* object, const int n) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, object, n, MPI_DOUBLE_COMPLEX, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, object, n, MPI_DOUBLE_COMPLEX, MPI_SUM, POOL_WORLD); #endif return; } -void Parallel_Reduce::gather_int_all(int& v, int* all) +void + Parallel_Reduce::gather_int_all (int& v, int* all) { #ifdef __MPI - assert(all != nullptr); - MPI_Allgather(&v, 1, MPI_INT, all, 1, MPI_INT, MPI_COMM_WORLD); + assert (all != nullptr); + MPI_Allgather (&v, 1, MPI_INT, all, 1, MPI_INT, MPI_COMM_WORLD); #endif return; } template <> -void Parallel_Reduce::reduce_min(int& v) +void + Parallel_Reduce::reduce_min (int& v) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &v, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); #endif } template <> -void Parallel_Reduce::reduce_min(float& v) +void + Parallel_Reduce::reduce_min (float& v) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &v, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); #endif } template <> -void Parallel_Reduce::reduce_min(double& v) +void + Parallel_Reduce::reduce_min (double& v) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); #endif } template <> -void Parallel_Reduce::reduce_max(float& v) +void + Parallel_Reduce::reduce_max (float& v) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &v, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD); #endif } template <> -void Parallel_Reduce::reduce_max(double& v) +void + Parallel_Reduce::reduce_max (double& v) { #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); #endif } template <> -void Parallel_Reduce::reduce_max_pool(const int& nproc_in_pool, double& v) +void + Parallel_Reduce::reduce_max_pool (const int& nproc_in_pool, double& v) { #ifdef __MPI - if (nproc_in_pool == 1) - { - return; - } - MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MAX, POOL_WORLD); + if (nproc_in_pool == 1) + { + return; + } + MPI_Allreduce (MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MAX, POOL_WORLD); #endif } template <> -void Parallel_Reduce::reduce_min_pool(const int& nproc_in_pool, double& v) +void + Parallel_Reduce::reduce_min_pool (const int& nproc_in_pool, double& v) { #ifdef __MPI - if (nproc_in_pool == 1) - { - return; - } - MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MIN, POOL_WORLD); + if (nproc_in_pool == 1) + { + return; + } + MPI_Allreduce (MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MIN, POOL_WORLD); #endif } \ No newline at end of file diff --git a/source/source_base/parallel_reduce.h b/source/source_base/parallel_reduce.h index a7819899516..3f4598f162b 100644 --- a/source/source_base/parallel_reduce.h +++ b/source/source_base/parallel_reduce.h @@ -14,57 +14,59 @@ namespace Parallel_Reduce { /// reduce in all process template -void reduce_all(T& object); +void reduce_all (T& object); template -void reduce_all(T* object, const int n); +void reduce_all (T* object, const int n); template -void reduce_pool(T& object); +void reduce_pool (T& object); template -void reduce_pool(T* object, const int n); +void reduce_pool (T* object, const int n); template -void reduce_min(T& v); +void reduce_min (T& v); template -void reduce_max(T& v); +void reduce_max (T& v); template -void reduce_min_pool(const int& nproc_in_pool, T& v); +void reduce_min_pool (const int& nproc_in_pool, T& v); template -void reduce_max_pool(const int& nproc_in_pool, T& v); +void reduce_max_pool (const int& nproc_in_pool, T& v); -void reduce_int_diag(int& object); // mohan add 2012-01-12 +void reduce_int_diag (int& object); // mohan add 2012-01-12 -void reduce_int_grid(int* object, const int n); // mohan add 2012-01-12 +void reduce_int_grid (int* object, const int n); // mohan add 2012-01-12 // reduce double only in this pool // (each pool contain different k points) -void reduce_double_grid(double* object, const int n); -void reduce_double_diag(double* object, const int n); +void reduce_double_grid (double* object, const int n); +void reduce_double_diag (double* object, const int n); -void reduce_double_allpool(const int& npool, const int& nproc_in_pool, double& object); -void reduce_double_allpool(const int& npool, const int& nproc_in_pool, double* object, const int n); +void reduce_double_allpool (const int& npool, const int& nproc_in_pool, double& object); +void reduce_double_allpool (const int& npool, const int& nproc_in_pool, double* object, const int n); -void gather_int_all(int& v, int* all); +void gather_int_all (int& v, int* all); -bool check_if_equal(double& v); // mohan add 2009-11-11 +bool check_if_equal (double& v); // mohan add 2009-11-11 template -inline void ZEROS(std::complex* u, const TI n) +inline void + ZEROS (std::complex* u, const TI n) { - assert(n >= 0); + assert (n >= 0); for (TI i = 0; i < n; i++) - { - u[i] = std::complex(0.0, 0.0); - } + { + u[i] = std::complex (0.0, 0.0); + } return; } template -inline void ZEROS(T* u, const TI n) +inline void + ZEROS (T* u, const TI n) { - assert(n >= 0); + assert (n >= 0); for (TI i = 0; i < n; i++) - { - u[i] = 0; - } + { + u[i] = 0; + } } } // namespace Parallel_Reduce diff --git a/source/source_base/projgen.cpp b/source/source_base/projgen.cpp index 681bc2a6f06..08ef1d45375 100644 --- a/source/source_base/projgen.cpp +++ b/source/source_base/projgen.cpp @@ -12,7 +12,8 @@ using namespace ModuleBase; -void projgen(const int l, +void + projgen (const int l, const int nr, const double* r, const double* chi, @@ -20,176 +21,196 @@ void projgen(const int l, const int nbes, std::vector& alpha) { - assert(rcut < r[nr - 1]); - assert(std::is_sorted(r, r + nr)); + assert (rcut < r[nr - 1]); + assert (std::is_sorted (r, r + nr)); - std::vector dr(nr); - std::adjacent_difference(r, r + nr, dr.begin()); + std::vector dr (nr); + std::adjacent_difference (r, r + nr, dr.begin ()); // lower_bound returns the first element that is equal or larger than rcut - int nr_proj = std::distance(r, std::lower_bound(r, r + nr, rcut)) + 1; + int nr_proj = std::distance (r, std::lower_bound (r, r + nr, rcut)) + 1; // zeros of spherical Bessel function - std::vector theta(nbes); - Sphbes::sphbes_zeros(l, nbes, theta.data()); + std::vector theta (nbes); + Sphbes::sphbes_zeros (l, nbes, theta.data ()); // z & w vectors (see notes) - std::vector z(nbes); - std::vector w(nbes); + std::vector z (nbes); + std::vector w (nbes); - std::transform(theta.begin(), theta.end(), z.begin(), [rcut, l](double theta_p) { - return 0.5 * std::pow(rcut, 3) * std::pow(Sphbes::sphbesj(l + 1, theta_p), 2); - }); + std::transform (theta.begin (), + theta.end (), + z.begin (), + [rcut, l] (double theta_p) + { return 0.5 * std::pow (rcut, 3) * std::pow (Sphbes::sphbesj (l + 1, theta_p), 2); }); // r^2 * chi (independent from p) - std::vector tmp(nr_proj); - std::transform(r, r + nr_proj, chi, tmp.begin(), [](double r_i, double chi_i) { return r_i * r_i * chi_i; }); + std::vector tmp (nr_proj); + std::transform (r, r + nr_proj, chi, tmp.begin (), [] (double r_i, double chi_i) { return r_i * r_i * chi_i; }); // r^2 * chi * j_l(theta[p] * r / rcut) (dependent on p) - std::vector integrand(nr_proj); + std::vector integrand (nr_proj); for (int p = 0; p < nbes; ++p) - { - std::transform(r, r + nr_proj, tmp.begin(), integrand.begin(), [theta, p, rcut, l](double r_i, double tmp_i) { - return tmp_i * Sphbes::sphbesj(l, theta[p] * r_i / rcut); - }); - w[p] = Integral::simpson(nr_proj, integrand.data(), &dr[1]); - } + { + std::transform (r, + r + nr_proj, + tmp.begin (), + integrand.begin (), + [theta, p, rcut, l] (double r_i, double tmp_i) + { return tmp_i * Sphbes::sphbesj (l, theta[p] * r_i / rcut); }); + w[p] = Integral::simpson (nr_proj, integrand.data (), &dr[1]); + } // optimal coefficients - std::vector c(nbes, 0.0); - std::transform(w.begin(), w.end(), z.begin(), c.begin(), [](double w_p, double z_p) { return w_p * w_p / z_p; }); - double prefac = 1.0 / std::sqrt(std::accumulate(c.begin(), c.end(), 0.0)); - std::transform(w.begin(), w.end(), z.begin(), c.begin(), [prefac](double w_p, double z_p) { - return prefac * w_p / z_p; - }); + std::vector c (nbes, 0.0); + std::transform (w.begin (), + w.end (), + z.begin (), + c.begin (), + [] (double w_p, double z_p) { return w_p * w_p / z_p; }); + double prefac = 1.0 / std::sqrt (std::accumulate (c.begin (), c.end (), 0.0)); + std::transform (w.begin (), + w.end (), + z.begin (), + c.begin (), + [prefac] (double w_p, double z_p) { return prefac * w_p / z_p; }); // new radial function - alpha.resize(nr_proj); - std::fill(alpha.begin(), alpha.end(), 0.0); + alpha.resize (nr_proj); + std::fill (alpha.begin (), alpha.end (), 0.0); for (int i = 0; i < nr_proj; ++i) - { - for (int p = 0; p < nbes; ++p) { - alpha[i] += c[p] * Sphbes::sphbesj(l, theta[p] * r[i] / rcut); + for (int p = 0; p < nbes; ++p) + { + alpha[i] += c[p] * Sphbes::sphbesj (l, theta[p] * r[i] / rcut); + } } - } } -void smoothgen(const int nr, const double* r, const double* chi, const double rcut, std::vector& alpha) +void + smoothgen (const int nr, const double* r, const double* chi, const double rcut, std::vector& alpha) { // lambda function for generate the new radial function - assert(rcut < r[nr - 1]); - assert(std::is_sorted(r, r + nr)); + assert (rcut < r[nr - 1]); + assert (std::is_sorted (r, r + nr)); - std::vector dr(nr); - std::adjacent_difference(r, r + nr, dr.begin()); + std::vector dr (nr); + std::adjacent_difference (r, r + nr, dr.begin ()); // lower_bound returns the first element that is equal or larger than rcut - int nr_proj = std::distance(r, std::lower_bound(r, r + nr, rcut)) + 1; - alpha.resize(nr_proj); - auto smooth_sigma = [&](double sigma_in) { - for (int i = 0; i < nr_proj; i++) + int nr_proj = std::distance (r, std::lower_bound (r, r + nr, rcut)) + 1; + alpha.resize (nr_proj); + auto smooth_sigma = [&] (double sigma_in) { - alpha[i] = chi[i] * (1 - std::exp(-std::pow((r[i] - rcut), 2) / 2 / sigma_in / sigma_in)); - } - // r^2 * chi (independent from p) - std::vector tmp(nr_proj); - std::transform(r, r + nr_proj, alpha.data(), tmp.begin(), [](double r_i, double chi_i) { - return r_i * r_i * chi_i; - }); - - // r^2 * chi * chi - std::vector integrand(nr_proj); - - std::transform(alpha.data(), - alpha.data() + nr_proj, - tmp.begin(), - integrand.begin(), - [](double chi_i, double tmp_i) { return tmp_i * chi_i; }); - double overlap = ModuleBase::Integral::simpson(nr_proj, integrand.data(), &dr[1]); - for (int i = 0; i < nr_proj; i++) - { - alpha[i] /= std::sqrt(overlap); - } - return; - }; + for (int i = 0; i < nr_proj; i++) + { + alpha[i] = chi[i] * (1 - std::exp (-std::pow ((r[i] - rcut), 2) / 2 / sigma_in / sigma_in)); + } + // r^2 * chi (independent from p) + std::vector tmp (nr_proj); + std::transform (r, + r + nr_proj, + alpha.data (), + tmp.begin (), + [] (double r_i, double chi_i) { return r_i * r_i * chi_i; }); + + // r^2 * chi * chi + std::vector integrand (nr_proj); + + std::transform (alpha.data (), + alpha.data () + nr_proj, + tmp.begin (), + integrand.begin (), + [] (double chi_i, double tmp_i) { return tmp_i * chi_i; }); + double overlap = ModuleBase::Integral::simpson (nr_proj, integrand.data (), &dr[1]); + for (int i = 0; i < nr_proj; i++) + { + alpha[i] /= std::sqrt (overlap); + } + return; + }; // cubic spline interpolation - ModuleBase::CubicSpline cubspl(nr_proj, r, chi); - std::vector dchi(nr_proj); - cubspl.eval(nr_proj, r, nullptr, dchi.data()); + ModuleBase::CubicSpline cubspl (nr_proj, r, chi); + std::vector dchi (nr_proj); + cubspl.eval (nr_proj, r, nullptr, dchi.data ()); // function for calculating the overlap between dalpha and dchi - auto overlap_dalpha_dchi = [&]() { - // calculate dalpha first - ModuleBase::CubicSpline cubspl_alpha(nr_proj, r, alpha.data()); - std::vector dalpha(nr_proj); - cubspl_alpha.eval(nr_proj, r, nullptr, dalpha.data()); - for (int i = 0; i < nr_proj; i++) - dalpha[i] -= dchi[i]; - // r^2 * dchi (independent from p) - std::vector tmp(nr_proj); - std::transform(r, r + nr_proj, dalpha.data(), tmp.begin(), [](double r_i, double dalpha_i) { - return r_i * r_i * dalpha_i; - }); - - // r^2 * dalpha * dchi - std::vector integrand(nr_proj); - - std::transform(dalpha.data(), - dalpha.data() + nr_proj, - tmp.begin(), - integrand.begin(), - [](double dalpha_i, double tmp_i) { return tmp_i * dalpha_i; }); - return ModuleBase::Integral::simpson(nr_proj, integrand.data(), &dr[1]); - }; + auto overlap_dalpha_dchi = [&] () + { + // calculate dalpha first + ModuleBase::CubicSpline cubspl_alpha (nr_proj, r, alpha.data ()); + std::vector dalpha (nr_proj); + cubspl_alpha.eval (nr_proj, r, nullptr, dalpha.data ()); + for (int i = 0; i < nr_proj; i++) + { + dalpha[i] -= dchi[i]; + } + // r^2 * dchi (independent from p) + std::vector tmp (nr_proj); + std::transform (r, + r + nr_proj, + dalpha.data (), + tmp.begin (), + [] (double r_i, double dalpha_i) { return r_i * r_i * dalpha_i; }); + + // r^2 * dalpha * dchi + std::vector integrand (nr_proj); + + std::transform (dalpha.data (), + dalpha.data () + nr_proj, + tmp.begin (), + integrand.begin (), + [] (double dalpha_i, double tmp_i) { return tmp_i * dalpha_i; }); + return ModuleBase::Integral::simpson (nr_proj, integrand.data (), &dr[1]); + }; // optimize sigma double sigma_left = 0.1; - smooth_sigma(sigma_left); - double overlap_alpha_chi_left = overlap_dalpha_dchi(); + smooth_sigma (sigma_left); + double overlap_alpha_chi_left = overlap_dalpha_dchi (); double sigma_right = 1.0; - smooth_sigma(sigma_right); - double overlap_alpha_chi_right = overlap_dalpha_dchi(); + smooth_sigma (sigma_right); + double overlap_alpha_chi_right = overlap_dalpha_dchi (); double overlap_alpha_chi = 0.0; double sigma = 0.0; - while (std::abs(overlap_alpha_chi_right - overlap_alpha_chi_left) > 1e-6) - { - sigma = (sigma_left + sigma_right) / 2; - smooth_sigma(sigma); - overlap_alpha_chi = overlap_dalpha_dchi(); - if (overlap_alpha_chi < overlap_alpha_chi_left && overlap_alpha_chi < overlap_alpha_chi_right) - { // the minimum is in the middle - if (overlap_alpha_chi_left > overlap_alpha_chi_right) - { - sigma_left = sigma; - overlap_alpha_chi_left = overlap_alpha_chi; - } - else - { - sigma_right = sigma; - overlap_alpha_chi_right = overlap_alpha_chi; - } - } - else - { // the minimum is on the left or right - if (overlap_alpha_chi_left < overlap_alpha_chi_right) - { - sigma_right = sigma; - overlap_alpha_chi_right = overlap_alpha_chi; - sigma_left = sigma_left - (sigma_right - sigma_left) * 0.5; - smooth_sigma(sigma_left); - overlap_alpha_chi_left = overlap_dalpha_dchi(); - } + while (std::abs (overlap_alpha_chi_right - overlap_alpha_chi_left) > 1e-6) + { + sigma = (sigma_left + sigma_right) / 2; + smooth_sigma (sigma); + overlap_alpha_chi = overlap_dalpha_dchi (); + if (overlap_alpha_chi < overlap_alpha_chi_left && overlap_alpha_chi < overlap_alpha_chi_right) + { // the minimum is in the middle + if (overlap_alpha_chi_left > overlap_alpha_chi_right) + { + sigma_left = sigma; + overlap_alpha_chi_left = overlap_alpha_chi; + } + else + { + sigma_right = sigma; + overlap_alpha_chi_right = overlap_alpha_chi; + } + } else - { - sigma_left = sigma; - overlap_alpha_chi_left = overlap_alpha_chi; - sigma_right = sigma_right + (sigma_right - sigma_left) * 0.5; - smooth_sigma(sigma_right); - overlap_alpha_chi_right = overlap_dalpha_dchi(); - } + { // the minimum is on the left or right + if (overlap_alpha_chi_left < overlap_alpha_chi_right) + { + sigma_right = sigma; + overlap_alpha_chi_right = overlap_alpha_chi; + sigma_left = sigma_left - (sigma_right - sigma_left) * 0.5; + smooth_sigma (sigma_left); + overlap_alpha_chi_left = overlap_dalpha_dchi (); + } + else + { + sigma_left = sigma; + overlap_alpha_chi_left = overlap_alpha_chi; + sigma_right = sigma_right + (sigma_right - sigma_left) * 0.5; + smooth_sigma (sigma_right); + overlap_alpha_chi_right = overlap_dalpha_dchi (); + } + } } - } } diff --git a/source/source_base/projgen.h b/source/source_base/projgen.h index 5d886ec06c4..7184b52af92 100644 --- a/source/source_base/projgen.h +++ b/source/source_base/projgen.h @@ -24,20 +24,14 @@ * @param[out] alpha new radial function of the projector * */ -void projgen( - const int l, - const int nr, - const double* r, - const double* chi, - const double rcut, - const int nbes, - std::vector& alpha); +void projgen (const int l, + const int nr, + const double* r, + const double* chi, + const double rcut, + const int nbes, + std::vector& alpha); -void smoothgen( - const int nr, - const double* r, - const double* chi, - const double rcut, - std::vector& alpha); +void smoothgen (const int nr, const double* r, const double* chi, const double rcut, std::vector& alpha); #endif \ No newline at end of file diff --git a/source/source_base/random.h b/source/source_base/random.h index 42c526866ed..b020f7799a2 100644 --- a/source/source_base/random.h +++ b/source/source_base/random.h @@ -8,45 +8,53 @@ namespace ModuleBase class Random { - public: - Random(); - ~Random(); - - static void between0and1( double *v, const int &num ) - { - assert( v!= NULL); - assert( num > 1); - for(int i=0; i( std::rand() ) / RAND_MAX; - } - } - - static double betweenMinus2and2(void) - { - return 2.0*betweenMinus1and1(); - } - - static double betweenMinus1and1(void) - { - const int a = std::rand() % 2; - if(a==0) return between0and1(); - else if(a==1) return betweenMinus1and0(); - else throw(std::string(__FILE__)+" line "+std::to_string(__LINE__)); // Peize Lin add to fix warning 2019-05-01 - } - - static double between0and1(void) - { - return static_cast( std::rand() )/RAND_MAX; - } - - static double betweenMinus1and0(void) - { - return -static_cast( std::rand() )/RAND_MAX; - } - + public: + Random (); + ~Random (); + + static void + between0and1 (double* v, const int& num) + { + assert (v != NULL); + assert (num > 1); + for (int i = 0; i < num; i++) + { + v[i] = static_cast (std::rand ()) / RAND_MAX; + } + } + + static double + betweenMinus2and2 (void) + { + return 2.0 * betweenMinus1and1 (); + } + + static double + betweenMinus1and1 (void) + { + const int a = std::rand () % 2; + if (a == 0) + return between0and1 (); + else if (a == 1) + return betweenMinus1and0 (); + else + throw (std::string (__FILE__) + " line " + + std::to_string (__LINE__)); // Peize Lin add to fix warning 2019-05-01 + } + + static double + between0and1 (void) + { + return static_cast (std::rand ()) / RAND_MAX; + } + + static double + betweenMinus1and0 (void) + { + return -static_cast (std::rand ()) / RAND_MAX; + } }; -} +} // namespace ModuleBase #endif diff --git a/source/source_base/realarray.cpp b/source/source_base/realarray.cpp index b9983ed804e..5829f15cf93 100644 --- a/source/source_base/realarray.cpp +++ b/source/source_base/realarray.cpp @@ -11,129 +11,139 @@ namespace ModuleBase int realArray::arrayCount = 0; -void realArrayAlloc() +void + realArrayAlloc () { - std::cout << "\n Allocation error for realArray " << std::endl; - exit(0); + std::cout << "\n Allocation error for realArray " << std::endl; + exit (0); } -realArray::realArray(const int d1,const int d2,const int d3) +realArray::realArray (const int d1, const int d2, const int d3) { - dim = 3; - bound1 = (d1 <= 0) ? 1 : d1; - bound2 = (d2 <= 0) ? 1 : d2; - bound3 = (d3 <= 0) ? 1 : d3; - bound4 = 0; + dim = 3; + bound1 = (d1 <= 0) ? 1 : d1; + bound2 = (d2 <= 0) ? 1 : d2; + bound3 = (d3 <= 0) ? 1 : d3; + bound4 = 0; - size = bound1 * bound2 * bound3 ; //* sizeof(float); + size = bound1 * bound2 * bound3; //* sizeof(float); - auto handler_old = std::set_new_handler(realArrayAlloc); - ptr = new double[size]; - std::set_new_handler(handler_old); - zero_out(); - assert(ptr != 0); + auto handler_old = std::set_new_handler (realArrayAlloc); + ptr = new double[size]; + std::set_new_handler (handler_old); + zero_out (); + assert (ptr != nullptr); - ++arrayCount; + ++arrayCount; } -realArray::realArray(const int d1,const int d2,const int d3,const int d4) +realArray::realArray (const int d1, const int d2, const int d3, const int d4) { - dim = 4; - bound1 = (d1 <= 0) ? 1 : d1; - bound2 = (d2 <= 0) ? 1 : d2; - bound3 = (d3 <= 0) ? 1 : d3; - bound4 = (d4 <= 0) ? 1 : d4; + dim = 4; + bound1 = (d1 <= 0) ? 1 : d1; + bound2 = (d2 <= 0) ? 1 : d2; + bound3 = (d3 <= 0) ? 1 : d3; + bound4 = (d4 <= 0) ? 1 : d4; - size = bound1 * bound2 * bound3 * bound4 ; //* sizeof(float); + size = bound1 * bound2 * bound3 * bound4; //* sizeof(float); - auto handler_old = std::set_new_handler(realArrayAlloc); - ptr = new double[size]; - std::set_new_handler(handler_old); - zero_out(); + auto handler_old = std::set_new_handler (realArrayAlloc); + ptr = new double[size]; + std::set_new_handler (handler_old); + zero_out (); - ++arrayCount; + ++arrayCount; } -realArray::realArray(const realArray &cd) +realArray::realArray (const realArray& cd) { - this->size = cd.getSize(); - this->ptr = new double[size]; - for (int i = 0; i < size; i++) - this->ptr[i] = cd.ptr[i]; - this->dim = cd.dim; - this->bound1 = cd.bound1; - this->bound2 = cd.bound2; - this->bound3 = cd.bound3; - this->bound4 = cd.bound4; - - ++arrayCount; -} + this->size = cd.getSize (); + this->ptr = new double[size]; + for (int i = 0; i < size; i++) + { + this->ptr[i] = cd.ptr[i]; + } + this->dim = cd.dim; + this->bound1 = cd.bound1; + this->bound2 = cd.bound2; + this->bound3 = cd.bound3; + this->bound4 = cd.bound4; + ++arrayCount; +} //******************************** // // Destructor for class realArray // //******************************** -realArray ::~realArray() -{ - freemem(); -} +realArray ::~realArray () { freemem (); } -void realArray::freemem() +void + realArray::freemem () { - delete [] ptr; - ptr = NULL; + delete[] ptr; + ptr = nullptr; } -void realArray::create(const int d1,const int d2,const int d3,const int d4) +void + realArray::create (const int d1, const int d2, const int d3, const int d4) { - size = d1 * d2 * d3 * d4; - assert(size>0); + size = d1 * d2 * d3 * d4; + assert (size > 0); + + dim = 4; - dim = 4; + bound1 = d1; + bound2 = d2; + bound3 = d3; + bound4 = d4; - bound1 = d1; - bound2 = d2; - bound3 = d3; - bound4 = d4; + delete[] ptr; + ptr = new double[size]; - delete [] ptr; - ptr = new double[size]; + zero_out (); // mohan modify 2009-09-17 - zero_out(); // mohan modify 2009-09-17 - - assert(ptr != 0); + assert (ptr != nullptr); } -void realArray::create(const int d1,const int d2,const int d3) +void + realArray::create (const int d1, const int d2, const int d3) { - size = d1 * d2 * d3; - assert(size>0); + size = d1 * d2 * d3; + assert (size > 0); - dim = 3; + dim = 3; - bound1 = d1; - bound2 = d2; - bound3 = d3; - bound4 = 1; + bound1 = d1; + bound2 = d2; + bound3 = d3; + bound4 = 1; - delete [] ptr; + delete[] ptr; ptr = new double[size]; - zero_out(); - assert(ptr != 0); + zero_out (); + assert (ptr != nullptr); } -const realArray &realArray::operator=(const realArray &right) +const realArray& + realArray::operator= (const realArray& right) { - for (int i = 0;i < size;i++) ptr[i] = right.ptr[i]; - return *this;// enables x = y = z; + for (int i = 0; i < size; i++) + { + ptr[i] = right.ptr[i]; + } + return *this; // enables x = y = z; } -const realArray &realArray::operator=(const double &right) +const realArray& + realArray::operator= (const double& right) { - for (int i = 0;i < size;i++) ptr[i] = right; - return *this;// enables x = y = z; + for (int i = 0; i < size; i++) + { + ptr[i] = right; + } + return *this; // enables x = y = z; } //******************************************************** @@ -141,54 +151,77 @@ const realArray &realArray::operator=(const double &right) // const reference return create an cvakue //******************************************************** // Peize Lin add assert 2016-08-22 -const double &realArray::operator() -(const int ind1,const int ind2,const int ind3)const +const double& + realArray::operator() (const int ind1, const int ind2, const int ind3) const { - assert(ind1>=0); assert(ind1=0); assert(ind2=0); assert(ind3= 0); + assert (ind1 < bound1); + assert (ind2 >= 0); + assert (ind2 < bound2); + assert (ind3 >= 0); + assert (ind3 < bound3); + return ptr[(ind1 * bound2 + ind2) * bound3 + ind3]; } -const double &realArray::operator() -(const int ind1,const int ind2,const int ind3,const int ind4)const +const double& + realArray::operator() (const int ind1, const int ind2, const int ind3, const int ind4) const { - assert(ind1>=0); assert(ind1=0); assert(ind2=0); assert(ind3=0); assert(ind4= 0); + assert (ind1 < bound1); + assert (ind2 >= 0); + assert (ind2 < bound2); + assert (ind3 >= 0); + assert (ind3 < bound3); + assert (ind4 >= 0); + assert (ind4 < bound4); + return ptr[((ind1 * bound2 + ind2) * bound3 + ind3) * bound4 + ind4]; } //******************************************************** // overloaded subscript operator for non-const real Array // const reference return creates an lvakue //******************************************************** -double &realArray::operator()(const int ind1,const int ind2,const int ind3) +double& + realArray::operator() (const int ind1, const int ind2, const int ind3) { - assert(ind1>=0); assert(ind1=0); assert(ind2=0); assert(ind3= 0); + assert (ind1 < bound1); + assert (ind2 >= 0); + assert (ind2 < bound2); + assert (ind3 >= 0); + assert (ind3 < bound3); + return ptr[(ind1 * bound2 + ind2) * bound3 + ind3]; } -double &realArray::operator()(const int ind1,const int ind2,const int ind3,const int ind4) +double& + realArray::operator() (const int ind1, const int ind2, const int ind3, const int ind4) { - assert(ind1>=0); assert(ind1=0); assert(ind2=0); assert(ind3=0); assert(ind4= 0); + assert (ind1 < bound1); + assert (ind2 >= 0); + assert (ind2 < bound2); + assert (ind3 >= 0); + assert (ind3 < bound3); + assert (ind4 >= 0); + assert (ind4 < bound4); + return ptr[((ind1 * bound2 + ind2) * bound3 + ind3) * bound4 + ind4]; } //**************************** // zeroes out the whole array //**************************** -void realArray::zero_out(void) +void + realArray::zero_out () { - if (size <= 0) return; - for (int i = 0;i < size; i++) ptr[i] = 0; - return; + if (size <= 0) + { + return; + } + for (int i = 0; i < size; i++) + { + ptr[i] = 0; + } + return; } -} +} // namespace ModuleBase diff --git a/source/source_base/realarray.h b/source/source_base/realarray.h index 0fb76921487..11f3d0873bf 100644 --- a/source/source_base/realarray.h +++ b/source/source_base/realarray.h @@ -10,7 +10,6 @@ #include #include - namespace ModuleBase { /** @@ -20,11 +19,11 @@ namespace ModuleBase class realArray { public: - double * ptr = nullptr; + double* ptr = nullptr; - realArray(const int d1 = 1, const int d2 = 1, const int d3 = 1); - realArray(const int d1, const int d2, const int d3, const int d4); - ~realArray(); + realArray (const int d1 = 1, const int d2 = 1, const int d3 = 1); + realArray (const int d1, const int d2, const int d3, const int d4); + ~realArray (); /** * @brief create 3 dimensional real array @@ -33,10 +32,10 @@ class realArray * @param[in] d2 The second dimension size * @param[in] d3 The third dimension size */ - void create(const int d1, const int d2, const int d3); - void create(const int d1, const int d2, const int d3, const int d4); + void create (const int d1, const int d2, const int d3); + void create (const int d1, const int d2, const int d3, const int d4); - realArray(const realArray &cd); + realArray (const realArray& cd); /** * @brief Equal a realArray to another one @@ -44,14 +43,14 @@ class realArray * @param right * @return const realArray& */ - const realArray &operator=(const realArray &right); + const realArray& operator= (const realArray& right); /** * @brief Set all value of an array to a double float number * * @param right * @return const realArray& */ - const realArray &operator=(const double &right); + const realArray& operator= (const double& right); /** * @brief Access elements by using operator "()" @@ -61,8 +60,8 @@ class realArray * @param d3 * @return double& */ - double &operator()(const int d1, const int d2, const int d3); - double &operator()(const int d1, const int d2, const int d3, const int d4); + double& operator() (const int d1, const int d2, const int d3); + double& operator() (const int d1, const int d2, const int d3, const int d4); /** * @brief Access elements by using "()" through pointer @@ -73,21 +72,22 @@ class realArray * @param d3 * @return const double& */ - const double &operator()(const int d1, const int d2, const int d3) const; - const double &operator()(const int d1, const int d2, const int d3, const int d4) const; + const double& operator() (const int d1, const int d2, const int d3) const; + const double& operator() (const int d1, const int d2, const int d3, const int d4) const; /** * @brief Set all elements of an IntArray to zero * */ - void zero_out(void); + void zero_out (); /** * @brief Get the Size object * * @return int */ - int getSize() const + int + getSize () const { return size; } @@ -98,7 +98,8 @@ class realArray * * @return int */ - int getDim() const + int + getDim () const { return dim; } @@ -109,22 +110,26 @@ class realArray * * @return int */ - int getBound1() const + int + getBound1 () const { return bound1; } - int getBound2() const + int + getBound2 () const { return bound2; } - int getBound3() const + int + getBound3 () const { return bound3; } - int getBound4() const + int + getBound4 () const { return bound4; } @@ -134,7 +139,8 @@ class realArray * * @return int */ - static int getArrayCount(void) + static int + getArrayCount () { return arrayCount; } @@ -145,17 +151,21 @@ class realArray int bound1, bound2, bound3, bound4; static int arrayCount; - void freemem(); + void freemem (); }; //************************************************** // set elements of a as zeros which a is 1_d array. //************************************************** -template void zeros(T *u, const int n) +template +void + zeros (T* u, const int n) { - assert(n > 0); + assert (n > 0); for (int i = 0; i < n; i++) - u[i] = 0; + { + u[i] = 0; + } } } // namespace ModuleBase diff --git a/source/source_base/sph_bessel_recursive-d1.cpp b/source/source_base/sph_bessel_recursive-d1.cpp index c34e2ea97cb..a2416be3ad7 100644 --- a/source/source_base/sph_bessel_recursive-d1.cpp +++ b/source/source_base/sph_bessel_recursive-d1.cpp @@ -14,369 +14,395 @@ namespace ModuleBase std::vector Sph_Bessel_Recursive_Pool::D1::sb_pool; -void Sph_Bessel_Recursive::D1::set_dx( const double dx_in ) +void + Sph_Bessel_Recursive::D1::set_dx (const double dx_in) { - if(finish_set_dx && dx_in!=dx) - throw std::runtime_error("Sph_Bessel_Recursive::set_dx, dx can only set once"); - else - { - dx = dx_in; - finish_set_dx = true; - } + if (finish_set_dx && dx_in != dx) + { + throw std::runtime_error ("Sph_Bessel_Recursive::set_dx, dx can only set once"); + } + else + { + dx = dx_in; + finish_set_dx = true; + } } -const std::vector> & Sph_Bessel_Recursive::D1::cal_jlx( const int lmax, const size_t ix_size ) +const std::vector>& + Sph_Bessel_Recursive::D1::cal_jlx (const int lmax, const size_t ix_size) { - if(lmax<0) - throw std::invalid_argument("Sph_Bessel_Recursive::jlx l<0"); - cal_jlx_0( lmax+1 ); - cal_jlx_smallx( lmax+1, ix_size ); - cal_jlx_recursive( lmax+1, ix_size ); - return jlx; + if (lmax < 0) + { + throw std::invalid_argument ("Sph_Bessel_Recursive::jlx l<0"); + } + cal_jlx_0 (lmax + 1); + cal_jlx_smallx (lmax + 1, ix_size); + cal_jlx_recursive (lmax + 1, ix_size); + return jlx; } -void Sph_Bessel_Recursive::D1::cal_jlx_0( const int l_size ) +void + Sph_Bessel_Recursive::D1::cal_jlx_0 (const int l_size) { - if(jlx.size() < static_cast(l_size)) - jlx.resize(l_size); - - for( int l=0; l!=l_size; ++l ) - { - if(jlx[l].size()<1) - { - jlx[l].resize(1); - if(0==l) - jlx[l][0] = 1.0; - else - jlx[l][0] = 0.0; - } - } + if (jlx.size () < static_cast (l_size)) + { + jlx.resize (l_size); + } + + for (int l = 0; l != l_size; ++l) + { + if (jlx[l].size () < 1) + { + jlx[l].resize (1); + if (0 == l) + { + jlx[l][0] = 1.0; + } + else + { + jlx[l][0] = 0.0; + } + } + } } -void Sph_Bessel_Recursive::D1::cal_jlx_smallx( const int l_size, const size_t ix_size ) +void + Sph_Bessel_Recursive::D1::cal_jlx_smallx (const int l_size, const size_t ix_size) { - if(jlx.size() < static_cast(l_size)) - jlx.resize(l_size); - - for( int l=0; l!=l_size; ++l ) - { - if(jlx[l].size()threshold/coeff*(l+1.5)*4, 1.0/(l+2) ); - - const size_t ix_size_begin = static_cast(jlx[l].size()); - const size_t ix_size_end = std::min( ix_size, static_cast(smallx_range/dx) ); - if(jlx[l].size() (l_size)) + { + jlx.resize (l_size); + } + + for (int l = 0; l != l_size; ++l) + { + if (jlx[l].size () < ix_size) + { + const double coeff = sqrt (ModuleBase::PI) / tgamma (l + 1.5) / pow (2, l + 1); + const double smallx_range = pow (this->threshold / coeff * (l + 1.5) * 4, 1.0 / (l + 2)); + + const size_t ix_size_begin = static_cast (jlx[l].size ()); + const size_t ix_size_end = std::min (ix_size, static_cast (smallx_range / dx)); + if (jlx[l].size () < ix_size_end) + { + jlx[l].resize (ix_size_end); + for (size_t ix = ix_size_begin; ix < ix_size_end; ++ix) + { + const double x1 = ix * dx; + jlx[l][ix] = coeff * pow (x1, l); + } + } + } + } } -void Sph_Bessel_Recursive::D1::cal_jlx_recursive( const int l_size, const size_t ix_size ) +void + Sph_Bessel_Recursive::D1::cal_jlx_recursive (const int l_size, const size_t ix_size) { - if(jlx.size() < static_cast(l_size)) - jlx.resize(l_size); - - for( int l=0; l!=l_size; ++l ) - { - if(jlx[l].size()(jlx[l].size()); - jlx[l].resize(ix_size); - switch(l) - { - case 0: - for( size_t ix=ix_size_begin; ix (l_size)) + { + jlx.resize (l_size); + } + for (int l = 0; l != l_size; ++l) + { + if (jlx[l].size () < ix_size) + { + const size_t ix_size_begin = static_cast (jlx[l].size ()); + jlx[l].resize (ix_size); + switch (l) + { + case 0: + for (size_t ix = ix_size_begin; ix < ix_size; ++ix) + { + const double x1 = ix * dx; + jlx[l][ix] = sin (x1) / x1; + } + break; + case 1: + for (size_t ix = ix_size_begin; ix < ix_size; ++ix) + { + const double x1 = ix * dx; + const double x2 = x1 * x1; + jlx[l][ix] = sin (x1) / x2 - cos (x1) / x1; + } + break; + default: + for (size_t ix = ix_size_begin; ix < ix_size; ++ix) + { + const double x1 = ix * dx; + jlx[l][ix] = (2 * l - 1) / x1 * jlx[l - 1][ix] - jlx[l - 2][ix]; + } + break; + } + } + } } +} // namespace ModuleBase + /* void Sph_Bessel_Recursive::cal_jlx_preset(const int l_size, const size_t ix_size) -{ +{ std::cout<(jlx[l].size()); - jlx[l].resize(ix_size); - switch(l) - { - case 0: - { - for( size_t ix=ix_size_begin; ix(0.14/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix6"); - } - } + if(jlx[l].size()(jlx[l].size()); + jlx[l].resize(ix_size); + switch(l) + { + case 0: + { + for( size_t ix=ix_size_begin; ix(0.14/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix6"); + } + } //timeval t_end; gettimeofday( &t_end, NULL); -//std::cout< -#include +#include +#include namespace ModuleBase { std::vector Sph_Bessel_Recursive_Pool::D2::sb_pool; -void Sph_Bessel_Recursive::D2::set_dx( const double dx_in ) +void + Sph_Bessel_Recursive::D2::set_dx (const double dx_in) { - if(finish_set_dx && dx_in!=dx) - throw std::runtime_error("Sph_Bessel_Recursive::set_dx, dx can only set once"); - else - { - dx = dx_in; - finish_set_dx = true; - } + if (finish_set_dx && dx_in != dx) + { + throw std::runtime_error ("Sph_Bessel_Recursive::set_dx, dx can only set once"); + } + else + { + dx = dx_in; + finish_set_dx = true; + } } -const std::vector>> & Sph_Bessel_Recursive::D2::cal_jlx( const int lmax, const size_t ix1_size, const size_t ix2_size ) +const std::vector>>& + Sph_Bessel_Recursive::D2::cal_jlx (const int lmax, const size_t ix1_size, const size_t ix2_size) { - if(lmax<0) - throw std::invalid_argument("Sph_Bessel_Recursive::jlx l<0"); - cal_jlx_0( lmax+1, ix1_size, ix2_size ); - cal_jlx_smallx( lmax+1, ix1_size, ix2_size ); - cal_jlx_recursive( lmax+1, ix1_size, ix2_size ); - ModuleBase::Memory::record("ORB::Jl(x)", sizeof(double) * (lmax+1) * ix1_size * ix2_size); - return jlx; + if (lmax < 0) + { + throw std::invalid_argument ("Sph_Bessel_Recursive::jlx l<0"); + } + cal_jlx_0 (lmax + 1, ix1_size, ix2_size); + cal_jlx_smallx (lmax + 1, ix1_size, ix2_size); + cal_jlx_recursive (lmax + 1, ix1_size, ix2_size); + ModuleBase::Memory::record ("ORB::Jl(x)", sizeof (double) * (lmax + 1) * ix1_size * ix2_size); + return jlx; } -void Sph_Bessel_Recursive::D2::cal_jlx_0( const int l_size, const size_t ix1_size, const size_t ix2_size ) +void + Sph_Bessel_Recursive::D2::cal_jlx_0 (const int l_size, const size_t ix1_size, const size_t ix2_size) { - if(jlx.size() < static_cast(l_size)) - jlx.resize(l_size); - - for( int l=0; l (l_size)) + { + jlx.resize (l_size); + } - if( jlx[l].size()(1,jlx0)); - - if( jlx[l][0].size() (1, jlx0)); + } + + if (jlx[l][0].size () < ix2_size) + { + jlx[l][0].resize (ix2_size, jlx0); + } + } } -void Sph_Bessel_Recursive::D2::cal_jlx_smallx( const int l_size, const size_t ix1_size, const size_t ix2_size ) +void + Sph_Bessel_Recursive::D2::cal_jlx_smallx (const int l_size, const size_t ix1_size, const size_t ix2_size) { - if(jlx.size() < static_cast(l_size)) - jlx.resize(l_size); - - for( int l=0; l!=l_size; ++l ) - { - const double coeff = sqrt(ModuleBase::PI)/tgamma(l+1.5)/pow(2,l+1); - const double smallx_range = pow( this->threshold/coeff*(l+1.5)*4, 1.0/(l+2) ); - - if(jlx[l].size()(1), jlx[l][ix1].size() ); - const size_t ix2_size_end = std::min( ix2_size, static_cast(smallx_range/dx)/ix1 ); - if(jlx[l][ix1].size() (l_size)) + { + jlx.resize (l_size); + } + + for (int l = 0; l != l_size; ++l) + { + const double coeff = sqrt (ModuleBase::PI) / tgamma (l + 1.5) / pow (2, l + 1); + const double smallx_range = pow (this->threshold / coeff * (l + 1.5) * 4, 1.0 / (l + 2)); + + if (jlx[l].size () < ix1_size) + { + jlx[l].resize (ix1_size); + } + for (size_t ix1 = 1; ix1 < ix1_size; ++ix1) + { + const size_t ix2_size_begin = std::max (static_cast (1), jlx[l][ix1].size ()); + const size_t ix2_size_end = std::min (ix2_size, static_cast (smallx_range / dx) / ix1); + if (jlx[l][ix1].size () < ix2_size_end) + { + jlx[l][ix1].resize (ix2_size_end); + } + for (size_t ix2 = ix2_size_begin; ix2 < ix2_size_end; ++ix2) + { + const double x1 = ix1 * ix2 * dx; + jlx[l][ix1][ix2] = coeff * pow (x1, l); + } + } + } } -void Sph_Bessel_Recursive::D2::cal_jlx_recursive( const int l_size, const size_t ix1_size, const size_t ix2_size ) +void + Sph_Bessel_Recursive::D2::cal_jlx_recursive (const int l_size, const size_t ix1_size, const size_t ix2_size) { - if(jlx.size() < static_cast(l_size)) - jlx.resize(l_size); - - for( int l=0; l!=l_size; ++l ) - { - switch(l) - { - case 0: - if(jlx[l].size()(1), jlx[l][ix1].size() ); - if(jlx[l][ix1].size()(1), jlx[l][ix1].size() ); - if(jlx[l][ix1].size()(1), jlx[l][ix1].size() ); - if(jlx[l][ix1].size() (l_size)) + { + jlx.resize (l_size); + } + for (int l = 0; l != l_size; ++l) + { + switch (l) + { + case 0: + if (jlx[l].size () < ix1_size) + { + jlx[l].resize (ix1_size); + } + for (size_t ix1 = 1; ix1 < ix1_size; ++ix1) + { + const size_t ix2_size_begin = std::max (static_cast (1), jlx[l][ix1].size ()); + if (jlx[l][ix1].size () < ix2_size) + { + jlx[l][ix1].resize (ix2_size); + } + for (size_t ix2 = ix2_size_begin; ix2 < ix2_size; ++ix2) + { + const double x1 = ix1 * ix2 * dx; + jlx[l][ix1][ix2] = sin (x1) / x1; + } + } + break; + case 1: + if (jlx[l].size () < ix1_size) + { + jlx[l].resize (ix1_size); + } + for (size_t ix1 = 1; ix1 < ix1_size; ++ix1) + { + const size_t ix2_size_begin = std::max (static_cast (1), jlx[l][ix1].size ()); + if (jlx[l][ix1].size () < ix2_size) + { + jlx[l][ix1].resize (ix2_size); + } + for (size_t ix2 = ix2_size_begin; ix2 < ix2_size; ++ix2) + { + const double x1 = ix1 * ix2 * dx; + const double x2 = x1 * x1; + jlx[l][ix1][ix2] = sin (x1) / x2 - cos (x1) / x1; + } + } + break; + default: + if (jlx[l].size () < ix1_size) + { + jlx[l].resize (ix1_size); + } + for (size_t ix1 = 1; ix1 < ix1_size; ++ix1) + { + const size_t ix2_size_begin = std::max (static_cast (1), jlx[l][ix1].size ()); + if (jlx[l][ix1].size () < ix2_size) + { + jlx[l][ix1].resize (ix2_size); + } + for (size_t ix2 = ix2_size_begin; ix2 < ix2_size; ++ix2) + { + const double x1 = ix1 * ix2 * dx; + jlx[l][ix1][ix2] = (2 * l - 1) / x1 * jlx[l - 1][ix1][ix2] - jlx[l - 2][ix1][ix2]; + } + } + break; + } + } } +} // namespace ModuleBase + /* void Sph_Bessel_Recursive::cal_jlx_preset(const int l_size, const size_t ix_size) -{ +{ std::cout<(jlx[l].size()); - jlx[l].resize(ix_size); - switch(l) - { - case 0: - { - for( size_t ix=ix_size_begin; ix(0.14/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); - for( size_t ix=ix_size_begin; ix6"); - } - } + if(jlx[l].size()(jlx[l].size()); + jlx[l].resize(ix_size); + switch(l) + { + case 0: + { + for( size_t ix=ix_size_begin; ix(0.14/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix(0.29/dx)+1 ); + for( size_t ix=ix_size_begin; ix6"); + } + } //timeval t_end; gettimeofday( &t_end, NULL); -//std::cout< -#include +#include namespace ModuleBase { class Sph_Bessel_Recursive { -public: - class D1; - class D2; + public: + class D1; + class D2; }; - - class Sph_Bessel_Recursive::D1 { -public: - const std::vector> & cal_jlx( const int lmax, const size_t ix_size ); - const std::vector> & get_jlx()const{ return jlx; } - - void set_dx(const double dx_in); - double get_dx()const{ return dx; } - -private: - std::vector> jlx; // jlx[l][x] - double dx = 0.0; - bool finish_set_dx = false; - - void cal_jlx_0( const int l_size ); - void cal_jlx_smallx( const int l_size, const size_t ix_size ); - void cal_jlx_recursive( const int l_size, const size_t ix_size ); - - - double threshold = 1e-8; // Peize Lin test + public: + const std::vector>& cal_jlx (const int lmax, const size_t ix_size); + const std::vector>& + get_jlx () const + { + return jlx; + } + + void set_dx (const double dx_in); + double + get_dx () const + { + return dx; + } + + private: + std::vector> jlx; // jlx[l][x] + double dx = 0.0; + bool finish_set_dx = false; + + void cal_jlx_0 (const int l_size); + void cal_jlx_smallx (const int l_size, const size_t ix_size); + void cal_jlx_recursive (const int l_size, const size_t ix_size); + + double threshold = 1e-8; // Peize Lin test }; - - class Sph_Bessel_Recursive::D2 { -public: - const std::vector>> & cal_jlx( const int lmax, const size_t ix1_size, const size_t ix2_size ); - const std::vector>> & get_jlx()const{ return jlx; } - - void set_dx(const double dx_in); - double get_dx()const{ return dx; } - -private: - std::vector>> jlx; // jlx[l][x1][x2] - double dx = 0.0; - bool finish_set_dx = false; - - void cal_jlx_0( const int l_size, const size_t ix1_size, const size_t ix2_size ); - void cal_jlx_smallx( const int l_size, const size_t ix1_size, const size_t ix2_size ); - void cal_jlx_recursive( const int l_size, const size_t ix1_size, const size_t ix2_size ); - - - double threshold = 1e-8; // Peize Lin test + public: + const std::vector>>& + cal_jlx (const int lmax, const size_t ix1_size, const size_t ix2_size); + const std::vector>>& + get_jlx () const + { + return jlx; + } + + void set_dx (const double dx_in); + double + get_dx () const + { + return dx; + } + + private: + std::vector>> jlx; // jlx[l][x1][x2] + double dx = 0.0; + bool finish_set_dx = false; + + void cal_jlx_0 (const int l_size, const size_t ix1_size, const size_t ix2_size); + void cal_jlx_smallx (const int l_size, const size_t ix1_size, const size_t ix2_size); + void cal_jlx_recursive (const int l_size, const size_t ix1_size, const size_t ix2_size); + + double threshold = 1e-8; // Peize Lin test }; - - class Sph_Bessel_Recursive_Pool { -public: - class D1 - { - public: - static std::vector sb_pool; - }; - class D2 - { - public: - static std::vector sb_pool; - }; + public: + class D1 + { + public: + static std::vector sb_pool; + }; + class D2 + { + public: + static std::vector sb_pool; + }; }; -} +} // namespace ModuleBase -#endif // SPH_BESSEL_RECURSIVE_H \ No newline at end of file +#endif // SPH_BESSEL_RECURSIVE_H \ No newline at end of file diff --git a/source/source_base/spherical_bessel_transformer.cpp b/source/source_base/spherical_bessel_transformer.cpp index ce4ee7e8477..618f4b11614 100644 --- a/source/source_base/spherical_bessel_transformer.cpp +++ b/source/source_base/spherical_bessel_transformer.cpp @@ -19,48 +19,41 @@ namespace ModuleBase class SphericalBesselTransformer::Impl { -public: + public: + Impl (const bool cache_enabled = false); + ~Impl () { _rfft_clear (); }; - Impl(const bool cache_enabled = false); - ~Impl() { _rfft_clear(); }; + Impl (Impl const&) = delete; + Impl (Impl&&) = delete; - Impl(Impl const&) = delete; - Impl(Impl&&) = delete; - - Impl& operator=(Impl const&) = delete; - Impl& operator=(Impl&&) = delete; + Impl& operator= (Impl const&) = delete; + Impl& operator= (Impl&&) = delete; // see the interface class for details - void radrfft( - const int l, - const int ngrid, - const double cutoff, - const double* const in, - double* const out, - const int p = 0 - ); + void radrfft (const int l, + const int ngrid, + const double cutoff, + const double* const in, + double* const out, + const int p = 0); // see the interface class for details - void direct( - const int l, - const int ngrid_in, - const double* const grid_in, - const double* const in, - const int ngrid_out, - const double* const grid_out, - double* const out, - const int p = 0 - ); + void direct (const int l, + const int ngrid_in, + const double* const grid_in, + const double* const in, + const int ngrid_out, + const double* const grid_out, + double* const out, + const int p = 0); // total heap usage (in bytes) from the FFTW buffer and tabulated jl - size_t heap_usage() const; + size_t heap_usage () const; // clear the FFTW plan & buffer as well as the tabulated jl - void clear(); - - -private: + void clear (); + private: //***************************************************************** // FFT-based algorithm //***************************************************************** @@ -82,11 +75,10 @@ class SphericalBesselTransformer::Impl const unsigned fftw_plan_flag_; /// buffer allocation and plan creation for in-place real-input FFT - void _rfft_prepare(const int n); + void _rfft_prepare (const int n); /// clear the FFTW plan and buffer - void _rfft_clear(); - + void _rfft_clear (); //***************************************************************** // numerical integration @@ -107,39 +99,34 @@ class SphericalBesselTransformer::Impl std::vector jl_; /// tabulate spherical Bessel function on the mesh grid - void _tabulate( - const int l, - const int ngrid_in, - const double* const grid_in, - const int ngrid_out, - const double* const grid_out - ); + void _tabulate (const int l, + const int ngrid_in, + const double* const grid_in, + const int ngrid_out, + const double* const grid_out); /// clear the tabulated jl - void _table_clear(); + void _table_clear (); }; // class SphericalBesselTransformer::Impl +SphericalBesselTransformer::Impl::Impl (const bool cache_enabled) + : // NOTE: For the current usage of this class, the performance gain + // by using FFTW_MEASURE instead of FFTW_ESTIMATE usually does not + // worth the extra overhead, and may cause a timeout of the integrated + // test. This might need more investigation and change in the future. + // fftw_plan_flag_(cache_enabled ? FFTW_MEASURE : FFTW_ESTIMATE), + fftw_plan_flag_ (FFTW_ESTIMATE), cache_enabled_ (cache_enabled) +{ +} -SphericalBesselTransformer::Impl::Impl(const bool cache_enabled): - // NOTE: For the current usage of this class, the performance gain - // by using FFTW_MEASURE instead of FFTW_ESTIMATE usually does not - // worth the extra overhead, and may cause a timeout of the integrated - // test. This might need more investigation and change in the future. - //fftw_plan_flag_(cache_enabled ? FFTW_MEASURE : FFTW_ESTIMATE), - fftw_plan_flag_(FFTW_ESTIMATE), - cache_enabled_(cache_enabled) -{} - - -void SphericalBesselTransformer::Impl::radrfft( - const int l, - const int ngrid, - const double cutoff, - const double* const in, - double* const out, - const int p -) +void + SphericalBesselTransformer::Impl::radrfft (const int l, + const int ngrid, + const double cutoff, + const double* const in, + double* const out, + const int p) { /* * An l-th order spherical Bessel transform F(x) -> G(y) can be expressed @@ -168,18 +155,18 @@ void SphericalBesselTransformer::Impl::radrfft( * imaginary, which suggests the use of real-input FFT. * */ - assert(l >= 0); - assert(ngrid > 1); - assert(p <= 2); + assert (l >= 0); + assert (ngrid > 1); + assert (p <= 2); - const double pi = std::acos(-1.0); + const double pi = std::acos (-1.0); const int n = ngrid - 1; const double dx = cutoff / n; const double dy = pi / cutoff; - const double pref = dx / std::sqrt(2. * pi); + const double pref = dx / std::sqrt (2. * pi); // temporary storage for the output (in order to support in-place transform) - std::vector tmp(ngrid); + std::vector tmp (ngrid); // The l-th order spherical Bessel function of the first kind can be expressed as // @@ -201,280 +188,262 @@ void SphericalBesselTransformer::Impl::radrfft( // c(1,0) = 0 c(1,0) = 1 // c(1,1) = -1 c(1,1) = 0 // - std::vector> c((l+1) * (l+2) / 2); // cos->real; sin->imag - auto idx = [](int ll, int m) { return (ll+1)*ll/2 + m; }; + std::vector> c ((l + 1) * (l + 2) / 2); // cos->real; sin->imag + auto idx = [] (int ll, int m) { return (ll + 1) * ll / 2 + m; }; - c[idx(0, 0)] = {0, 1}; + c[idx (0, 0)] = {0, 1}; if (l > 0) - { - c[idx(1, 0)] = {0, 1}; - c[idx(1, 1)] = {-1, 0}; - } + { + c[idx (1, 0)] = {0, 1}; + c[idx (1, 1)] = {-1, 0}; + } for (int ll = 2; ll <= l; ++ll) - { - for (int m = 0; m < ll; ++m) { - c[idx(ll,m)] = (2*ll-1.0) * c[idx(ll-1, m)] - (m >= 2 ? c[idx(ll-2, m-2)] : 0); + for (int m = 0; m < ll; ++m) + { + c[idx (ll, m)] = (2 * ll - 1.0) * c[idx (ll - 1, m)] - (m >= 2 ? c[idx (ll - 2, m - 2)] : 0); + } + c[idx (ll, ll)] = -c[idx (ll - 2, ll - 2)]; } - c[idx(ll,ll)] = - c[idx(ll-2, ll-2)]; - } - _rfft_prepare(2 * n); + _rfft_prepare (2 * n); bool is_imag = true; int sign = -1; for (int m = 0; m <= l; ++m) - { - // m even --> sin; f[2*n-i] = -f[i]; out += -imag(rfft(f)) / y^(l+1-m) - // m odd --> cos; f[2*n-i] = +f[i]; out += +real(rfft(f)) / y^(l+1-m) - const double coef = reinterpret_cast(c[idx(l,m)])[is_imag]; - - f_[0] = f_[n] = 0.0; - for (int i = 1; i != n; ++i) { - f_[i] = pref * coef * in[i] * std::pow(i * dx, m + 1 - l - p); - f_[2 * n - i] = sign * f_[i]; + // m even --> sin; f[2*n-i] = -f[i]; out += -imag(rfft(f)) / y^(l+1-m) + // m odd --> cos; f[2*n-i] = +f[i]; out += +real(rfft(f)) / y^(l+1-m) + const double coef = reinterpret_cast (c[idx (l, m)])[is_imag]; + + f_[0] = f_[n] = 0.0; + for (int i = 1; i != n; ++i) + { + f_[i] = pref * coef * in[i] * std::pow (i * dx, m + 1 - l - p); + f_[2 * n - i] = sign * f_[i]; + } + + fftw_execute (rfft_plan_); // perform in-place rfft on f_ + + // sum up the series by ( ... ( ( g0/y + g1 )/y + g2 )/y + ... + gl )/y + // out[0] is handled later by direct integration + for (int j = 1; j <= n; ++j) + { + tmp[j] = (tmp[j] + sign * f_[2 * j + is_imag]) / (j * dy); + } + + is_imag = !is_imag; + sign = -sign; } - fftw_execute(rfft_plan_); // perform in-place rfft on f_ - - // sum up the series by ( ... ( ( g0/y + g1 )/y + g2 )/y + ... + gl )/y - // out[0] is handled later by direct integration - for (int j = 1; j <= n; ++j) - { - tmp[j] = (tmp[j] + sign * f_[2*j + is_imag]) / (j * dy); - } - - is_imag = !is_imag; - sign = -sign; - } - // out[0] is done by direct integration // note that only the zeroth order spherical Bessel function is nonzero at 0 if (l == 0) - { - for (int i = 0; i <= n; ++i) { - tmp[0] += 2.0 * pref * in[i] * std::pow(i*dx, 2-p); // p <= 2 is required here + for (int i = 0; i <= n; ++i) + { + tmp[0] += 2.0 * pref * in[i] * std::pow (i * dx, 2 - p); // p <= 2 is required here + } } - } // FFT-based method does not yield accurate results for small y at high l // use numerical integration in this case - const int n_direct = (l == 0) ? 0 : static_cast(ngrid * std::pow(1e-8, 1.0/l)); + const int n_direct = (l == 0) ? 0 : static_cast (ngrid * std::pow (1e-8, 1.0 / l)); if (n_direct > 0) - { - std::vector buffer(ngrid + n_direct); - double* grid_in = buffer.data(); - double* grid_out = grid_in + ngrid; + { + std::vector buffer (ngrid + n_direct); + double* grid_in = buffer.data (); + double* grid_out = grid_in + ngrid; - std::for_each(grid_in, grid_in + ngrid, - [&](double& x) { x = (&x - grid_in) * dx; }); - std::for_each(grid_out, grid_out + n_direct, - [&](double& y) { y = ((&y - grid_out) + 1) * dy; }); + std::for_each (grid_in, grid_in + ngrid, [&] (double& x) { x = (&x - grid_in) * dx; }); + std::for_each (grid_out, grid_out + n_direct, [&] (double& y) { y = ((&y - grid_out) + 1) * dy; }); - direct(l, ngrid, grid_in, in, n_direct, grid_out, &tmp[1], p); - } + direct (l, ngrid, grid_in, in, n_direct, grid_out, &tmp[1], p); + } - std::copy(tmp.begin(), tmp.end(), out); + std::copy (tmp.begin (), tmp.end (), out); if (!cache_enabled_) - { - _rfft_clear(); - } + { + _rfft_clear (); + } } - -void SphericalBesselTransformer::Impl::direct( - const int l, - const int ngrid_in, - const double* const grid_in, - const double* const in, - const int ngrid_out, - const double* const grid_out, - double* const out, - const int p -) +void + SphericalBesselTransformer::Impl::direct (const int l, + const int ngrid_in, + const double* const grid_in, + const double* const in, + const int ngrid_out, + const double* const grid_out, + double* const out, + const int p) { - assert(p <= 2); - assert(ngrid_in > 1 && ngrid_out > 0); - assert(grid_in[0] >= 0.0 && grid_out[0] >= 0.0); - assert(std::is_sorted(grid_in, grid_in + ngrid_in, std::less_equal())); - assert(std::is_sorted(grid_out, grid_out + ngrid_out, std::less_equal())); - - std::vector buffer(3 * ngrid_in); - double* rab = buffer.data(); - double* tmp = rab + ngrid_in; // integrand without the jl part + assert (p <= 2); + assert (ngrid_in > 1 && ngrid_out > 0); + assert (grid_in[0] >= 0.0 && grid_out[0] >= 0.0); + assert (std::is_sorted (grid_in, grid_in + ngrid_in, std::less_equal ())); + assert (std::is_sorted (grid_out, grid_out + ngrid_out, std::less_equal ())); + + std::vector buffer (3 * ngrid_in); + double* rab = buffer.data (); + double* tmp = rab + ngrid_in; // integrand without the jl part double* integrand = tmp + ngrid_in; // integrand - std::adjacent_difference(grid_in, grid_in + ngrid_in, rab); + std::adjacent_difference (grid_in, grid_in + ngrid_in, rab); - std::copy(in, in + ngrid_in, tmp); - std::for_each(tmp, tmp + ngrid_in, - [&](double& x) { x *= std::pow(grid_in[&x - tmp], 2 - p); }); + std::copy (in, in + ngrid_in, tmp); + std::for_each (tmp, tmp + ngrid_in, [&] (double& x) { x *= std::pow (grid_in[&x - tmp], 2 - p); }); // compute spherical Bessel function on the grid and store the results in jl_ // (will be cleared at the end of this function if cache is disabled) - _tabulate(l, ngrid_in, grid_in, ngrid_out, grid_out); + _tabulate (l, ngrid_in, grid_in, ngrid_out, grid_out); for (int j = 0; j < ngrid_out; ++j) - { - double* jl = &jl_[j * grid_in_.size()]; - std::transform(tmp, tmp + ngrid_in, jl, integrand, std::multiplies()); - out[j] = ModuleBase::Integral::simpson(ngrid_in, integrand, &rab[1]); - } + { + double* jl = &jl_[j * grid_in_.size ()]; + std::transform (tmp, tmp + ngrid_in, jl, integrand, std::multiplies ()); + out[j] = ModuleBase::Integral::simpson (ngrid_in, integrand, &rab[1]); + } - const double pref = std::sqrt(2.0 / std::acos(-1.0)); - std::for_each(out, out + ngrid_out, [pref](double& x) { x *= pref; }); + const double pref = std::sqrt (2.0 / std::acos (-1.0)); + std::for_each (out, out + ngrid_out, [pref] (double& x) { x *= pref; }); if (!cache_enabled_) - { - _table_clear(); - } + { + _table_clear (); + } } - -void SphericalBesselTransformer::Impl::_rfft_prepare(const int n) +void + SphericalBesselTransformer::Impl::_rfft_prepare (const int n) { if (n != sz_planned_) - { - if (f_) - { - fftw_free(f_); - } - f_ = fftw_alloc_real(sizeof(double) * 2 * (n/2 + 1)); - // see FFTW documentation "one-dimensional DFTs of real data" - - if (rfft_plan_) { - fftw_destroy_plan(rfft_plan_); + if (f_) + { + fftw_free (f_); + } + f_ = fftw_alloc_real (sizeof (double) * 2 * (n / 2 + 1)); + // see FFTW documentation "one-dimensional DFTs of real data" + + if (rfft_plan_) + { + fftw_destroy_plan (rfft_plan_); + } + auto* out = reinterpret_cast (f_); // in-place transform + rfft_plan_ = fftw_plan_dft_r2c_1d (n, f_, out, fftw_plan_flag_); + + sz_planned_ = n; } - auto* out = reinterpret_cast(f_); // in-place transform - rfft_plan_ = fftw_plan_dft_r2c_1d(n, f_, out, fftw_plan_flag_); - - sz_planned_ = n; - } } - -void SphericalBesselTransformer::Impl::_tabulate( - const int l, - const int ngrid_in, - const double* const grid_in, - const int ngrid_out, - const double* const grid_out -) +void + SphericalBesselTransformer::Impl::_tabulate (const int l, + const int ngrid_in, + const double* const grid_in, + const int ngrid_out, + const double* const grid_out) { - const bool is_cached = - cache_enabled_ && l == l_ - && ngrid_in <= grid_in_.size() && ngrid_out <= grid_out_.size() - && std::equal(grid_in, grid_in + ngrid_in, grid_in_.begin()) - && std::equal(grid_out, grid_out + ngrid_out, grid_out_.begin()); + const bool is_cached = cache_enabled_ && l == l_ && ngrid_in <= grid_in_.size () && ngrid_out <= grid_out_.size () + && std::equal (grid_in, grid_in + ngrid_in, grid_in_.begin ()) + && std::equal (grid_out, grid_out + ngrid_out, grid_out_.begin ()); if (is_cached) - { - return; - } + { + return; + } l_ = l; - grid_in_ = std::vector(grid_in, grid_in + ngrid_in); - grid_out_ = std::vector(grid_out, grid_out + ngrid_out); - jl_.resize(grid_in_.size() * grid_out_.size()); + grid_in_ = std::vector (grid_in, grid_in + ngrid_in); + grid_out_ = std::vector (grid_out, grid_out + ngrid_out); + jl_.resize (grid_in_.size () * grid_out_.size ()); for (int j = 0; j < ngrid_out; ++j) - { - ModuleBase::Sphbes::sphbesj(ngrid_in, grid_in, grid_out[j], l, &jl_[j * ngrid_in]); - } + { + ModuleBase::Sphbes::sphbesj (ngrid_in, grid_in, grid_out[j], l, &jl_[j * ngrid_in]); + } } - -void SphericalBesselTransformer::Impl::_rfft_clear() +void + SphericalBesselTransformer::Impl::_rfft_clear () { if (rfft_plan_) - { - fftw_destroy_plan(rfft_plan_); - rfft_plan_ = nullptr; - } + { + fftw_destroy_plan (rfft_plan_); + rfft_plan_ = nullptr; + } if (f_) - { - fftw_free(f_); - f_ = nullptr; - } + { + fftw_free (f_); + f_ = nullptr; + } sz_planned_ = 0; } - -void SphericalBesselTransformer::Impl::_table_clear() +void + SphericalBesselTransformer::Impl::_table_clear () { - std::vector().swap(grid_in_); - std::vector().swap(grid_out_); - std::vector().swap(jl_); + std::vector ().swap (grid_in_); + std::vector ().swap (grid_out_); + std::vector ().swap (jl_); } - -size_t SphericalBesselTransformer::Impl::heap_usage() const +size_t + SphericalBesselTransformer::Impl::heap_usage () const { - return (grid_in_.capacity() + grid_out_.capacity() + jl_.capacity() - + sz_planned_) * sizeof(double); + return (grid_in_.capacity () + grid_out_.capacity () + jl_.capacity () + sz_planned_) * sizeof (double); } - -void SphericalBesselTransformer::Impl::clear() +void + SphericalBesselTransformer::Impl::clear () { - _rfft_clear(); - _table_clear(); + _rfft_clear (); + _table_clear (); } - //********************************************************************** // Interface //********************************************************************** -SphericalBesselTransformer::SphericalBesselTransformer(const bool cache_enabled) - : impl_(new Impl(cache_enabled)) -{} - - -void SphericalBesselTransformer::radrfft( - const int l, - const int ngrid, - const double cutoff, - const double* const in, - double* const out, - const int p -) const +SphericalBesselTransformer::SphericalBesselTransformer (const bool cache_enabled) : impl_ (new Impl (cache_enabled)) {} + +void + SphericalBesselTransformer::radrfft (const int l, + const int ngrid, + const double cutoff, + const double* const in, + double* const out, + const int p) const { - impl_->radrfft(l, ngrid, cutoff, in, out, p); + impl_->radrfft (l, ngrid, cutoff, in, out, p); } - -void SphericalBesselTransformer::direct( - const int l, - const int ngrid_in, - const double* const grid_in, - const double* const in, - const int ngrid_out, - const double* const grid_out, - double* const out, - const int p -) const +void + SphericalBesselTransformer::direct (const int l, + const int ngrid_in, + const double* const grid_in, + const double* const in, + const int ngrid_out, + const double* const grid_out, + double* const out, + const int p) const { - impl_->direct(l, ngrid_in, grid_in, in, ngrid_out, grid_out, out, p); + impl_->direct (l, ngrid_in, grid_in, in, ngrid_out, grid_out, out, p); } - -size_t SphericalBesselTransformer::heap_usage() const +size_t + SphericalBesselTransformer::heap_usage () const { - return impl_->heap_usage(); + return impl_->heap_usage (); } - -void SphericalBesselTransformer::clear() +void + SphericalBesselTransformer::clear () { - impl_->clear(); + impl_->clear (); } - } // namespace ModuleBase diff --git a/source/source_base/spherical_bessel_transformer.h b/source/source_base/spherical_bessel_transformer.h index 5a0a4b66633..8021daf8f0b 100644 --- a/source/source_base/spherical_bessel_transformer.h +++ b/source/source_base/spherical_bessel_transformer.h @@ -67,15 +67,15 @@ namespace ModuleBase */ class SphericalBesselTransformer { -public: - SphericalBesselTransformer(const bool cache_enabled = false); - ~SphericalBesselTransformer() = default; + public: + SphericalBesselTransformer (const bool cache_enabled = false); + ~SphericalBesselTransformer () = default; - SphericalBesselTransformer(SphericalBesselTransformer const&) = default; - SphericalBesselTransformer(SphericalBesselTransformer &&) = default; + SphericalBesselTransformer (SphericalBesselTransformer const&) = default; + SphericalBesselTransformer (SphericalBesselTransformer&&) = default; - SphericalBesselTransformer& operator=(const SphericalBesselTransformer&) = default; - SphericalBesselTransformer& operator=(SphericalBesselTransformer&&) = default; + SphericalBesselTransformer& operator= (const SphericalBesselTransformer&) = default; + SphericalBesselTransformer& operator= (SphericalBesselTransformer&&) = default; /** * @brief Spherical Bessel transform via fast Fourier transforms. @@ -113,15 +113,12 @@ class SphericalBesselTransformer * @note p is restricted to p <= 2 in order to avoid the situation that one has to * determine x^2*F(x) at x = 0 from x[i]^p*F(x[i]). */ - void radrfft( - const int l, - const int ngrid, - const double cutoff, - const double* const in, - double* const out, - const int p = 0 - ) const; - + void radrfft (const int l, + const int ngrid, + const double cutoff, + const double* const in, + double* const out, + const int p = 0) const; /** * @brief Spherical Bessel transform via numerical integration with Simpson's rule. @@ -151,30 +148,29 @@ class SphericalBesselTransformer * determine x^2*F(x) at x = 0 from x[i]^p*F(x[i]). * */ - void direct( - const int l, - const int ngrid_in, - const double* const grid_in, - const double* const in, - const int ngrid_out, - const double* const grid_out, - double* const out, - const int p = 0 - ) const; - + void direct (const int l, + const int ngrid_in, + const double* const grid_in, + const double* const in, + const int ngrid_out, + const double* const grid_out, + double* const out, + const int p = 0) const; /// total heap usage (in bytes) from the FFTW buffer and tabulated jl - size_t heap_usage() const; + size_t heap_usage () const; /// clear the FFTW plan & buffer as well as the tabulated jl - void clear(); + void clear (); /// check if two objects share the same underlying implementation object - bool operator==(const SphericalBesselTransformer& rhs) const { return impl_ == rhs.impl_; } - - -private: + bool + operator== (const SphericalBesselTransformer& rhs) const + { + return impl_ == rhs.impl_; + } + private: class Impl; // forward declaration std::shared_ptr impl_; }; diff --git a/source/source_base/test/assoc_laguerre_test.cpp b/source/source_base/test/assoc_laguerre_test.cpp index c992f06f0c5..d26aa5b6f44 100644 --- a/source/source_base/test/assoc_laguerre_test.cpp +++ b/source/source_base/test/assoc_laguerre_test.cpp @@ -8,25 +8,23 @@ class AssocLaguerreTest : public ::testing::Test { - protected: - AssocLaguerreTest() - { - } + protected: + AssocLaguerreTest () {} - ~AssocLaguerreTest() - { - } + ~AssocLaguerreTest () {} - void SetUp() - { - } + void + SetUp () + { + } - void TearDown() - { - } + void + TearDown () + { + } }; -TEST_F(AssocLaguerreTest, LaguerreTest) +TEST_F (AssocLaguerreTest, LaguerreTest) { // reference value from scipy.special.assoc_laguerre with k = 0 Assoc_Laguerre al; @@ -34,74 +32,74 @@ TEST_F(AssocLaguerreTest, LaguerreTest) int n = 0; std::vector xs = {0.0, 1.0, 2.0, 3.0}; std::vector ref_ys; - for(int i = 0; i < xs.size(); i++) - { - ref_ys.push_back(1.0); - } - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + ref_ys.push_back (1.0); + } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // 1-st order Laguerre polynomial n = 1; ref_ys = {1.0, 0.0, -1.0, -2.0}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // 2-nd order Laguerre polynomial n = 2; ref_ys = {1.0, -0.5, -1.0, -0.5}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // 3-rd order Laguerre polynomial n = 3; ref_ys = {1.0, -0.6666666666666666, -0.33333333333333337, 1.0}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // 4-th order Laguerre polynomial n = 4; ref_ys = {1.0, -0.625, 0.33333333333333337, 1.375}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // 5-th order Laguerre polynomial n = 5; ref_ys = {1.0, -0.4666666666666667, 0.7333333333333334, 0.8500000000000001}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // 6-th order Laguerre polynomial n = 6; ref_ys = {1.0, -0.2569444444444444, 0.8222222222222224, -0.012499999999999956}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // 7-th order Laguerre polynomial n = 7; ref_ys = {1.0, -0.04047619047619044, 0.6634920634920637, -0.7464285714285714}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // 8-th order Laguerre polynomial n = 8; ref_ys = {1.0, 0.1539930555555556, 0.3587301587301589, -1.1087053571428571}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.laguerre(n, xs[i]), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.laguerre (n, xs[i]), ref_ys[i], 1e-6); + } // all physical and possible n values are tested } -TEST_F(AssocLaguerreTest, AssociateLaguerreTest) +TEST_F (AssocLaguerreTest, AssociateLaguerreTest) { // reference value from scipy.special.assoc_laguerre Assoc_Laguerre al; @@ -110,199 +108,199 @@ TEST_F(AssocLaguerreTest, AssociateLaguerreTest) int k = 0; std::vector xs = {0.0, 1.0, 2.0, 3.0}; std::vector ref_ys = {1.0, 0.1539930555555556, 0.3587301587301589, -1.1087053571428571}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // test n = 8, k = 1 n = 8; k = 1; xs = {0.0, 1.0, 2.0, 3.0}; ref_ys = {9.0, -1.4017609126984123, 1.5777777777777775, -0.14263392857142831}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // test n = 8, k = 2 n = 8; k = 2; xs = {0.0, 1.0, 2.0, 3.0}; ref_ys = {45.0, -4.189459325396824, 0.7523809523809523, 3.1359374999999994}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // test n = 8, k = 3 n = 8; k = 3; xs = {0.0, 1.0, 2.0, 3.0}; ref_ys = {165.0, 1.449231150793654, -6.384126984126984, 5.70200892857143}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // test n = 8, k = 4 n = 8; k = 4; xs = {0.0, 1.0, 2.0, 3.0}; ref_ys = {495.0, 51.8809771825397, -20.098412698412695, -0.09441964285714727}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // test n = 8, k = 5 n = 8; k = 5; xs = {0.0, 1.0, 2.0, 3.0}; ref_ys = {1287.0, 242.01411210317463, -21.990476190476176, -23.028348214285717}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // test n = 8, k = 6 n = 8; k = 6; xs = {0.0, 1.0, 2.0, 3.0}; ref_ys = {3003.0, 777.6319692460318, 53.67301587301589, -60.999776785714275}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // test n = 8, k = 7 n = 8; k = 7; xs = {0.0, 1.0, 2.0, 3.0}; ref_ys = {6435.0, 2055.2262152777776, 368.62539682539676, -75.53370535714284}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // test n = 8, k = 8 n = 8; k = 8; xs = {0.0, 1.0, 2.0, 3.0}; ref_ys = {12870.0, 4777.330183531744, 1256.2666666666667, 53.21986607142856}; - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.associate_laguerre(n, xs[i], k), ref_ys[i], 1e-6); - } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.associate_laguerre (n, xs[i], k), ref_ys[i], 1e-6); + } // all physical and possible n, k values are tested } -TEST_F(AssocLaguerreTest, FactorialTest) +TEST_F (AssocLaguerreTest, FactorialTest) { // this test is simple, no need to compare with reference value Assoc_Laguerre al; - EXPECT_DOUBLE_EQ(al.factorial(0), 1); - EXPECT_DOUBLE_EQ(al.factorial(1), 1); - EXPECT_DOUBLE_EQ(al.factorial(2), 2); - EXPECT_DOUBLE_EQ(al.factorial(3), 6); - EXPECT_DOUBLE_EQ(al.factorial(4), 24); - EXPECT_DOUBLE_EQ(al.factorial(5), 120); - EXPECT_DOUBLE_EQ(al.factorial(6), 720); - EXPECT_DOUBLE_EQ(al.factorial(7), 5040); - EXPECT_DOUBLE_EQ(al.factorial(8), 40320); - EXPECT_DOUBLE_EQ(al.factorial(9), 362880); - EXPECT_DOUBLE_EQ(al.factorial(10), 3628800); + EXPECT_DOUBLE_EQ (al.factorial (0), 1); + EXPECT_DOUBLE_EQ (al.factorial (1), 1); + EXPECT_DOUBLE_EQ (al.factorial (2), 2); + EXPECT_DOUBLE_EQ (al.factorial (3), 6); + EXPECT_DOUBLE_EQ (al.factorial (4), 24); + EXPECT_DOUBLE_EQ (al.factorial (5), 120); + EXPECT_DOUBLE_EQ (al.factorial (6), 720); + EXPECT_DOUBLE_EQ (al.factorial (7), 5040); + EXPECT_DOUBLE_EQ (al.factorial (8), 40320); + EXPECT_DOUBLE_EQ (al.factorial (9), 362880); + EXPECT_DOUBLE_EQ (al.factorial (10), 3628800); } -TEST_F(AssocLaguerreTest, ValueTest) +TEST_F (AssocLaguerreTest, ValueTest) { Assoc_Laguerre al; - //test n = 1, 2, ..., 4, from 1s to 4f + // test n = 1, 2, ..., 4, from 1s to 4f std::vector xs; // segment1: 0-0.25, 0.01 - for(double x = 0.0; x < 0.25; x += 0.01) - { - xs.push_back(x); - } + for (double x = 0.0; x < 0.25; x += 0.01) + { + xs.push_back (x); + } // segment2: 0.25-1.0, 0.05 - for(double x = 0.25; x < 1.0; x += 0.05) - { - xs.push_back(x); - } + for (double x = 0.25; x < 1.0; x += 0.05) + { + xs.push_back (x); + } // segment3: 1.0-2.0, 0.1 - for(double x = 1.0; x < 2.0; x += 0.1) - { - xs.push_back(x); - } + for (double x = 1.0; x < 2.0; x += 0.1) + { + xs.push_back (x); + } // segment4: 2.0-5.0, 0.2 - for(double x = 2.0; x < 5.0; x += 0.2) - { - xs.push_back(x); - } + for (double x = 2.0; x < 5.0; x += 0.2) + { + xs.push_back (x); + } // segment5: 5.0-10.0, 0.5 - for(double x = 5.0; x < 10.0; x += 0.5) - { - xs.push_back(x); - } + for (double x = 5.0; x < 10.0; x += 0.5) + { + xs.push_back (x); + } int nmax = 4; - for(int n = 1; n < nmax; n++) - { - for(int l = 0; l < n; l++) + for (int n = 1; n < nmax; n++) { - std::vector ref_ys; - for(int i = 0; i < xs.size(); i++) - { - ref_ys.push_back(std::tr1::assoc_laguerre(n-l-1, 2*l+1, xs[i])); - //ref_ys.push_back(al.associate_laguerre(n-l-1, xs[i], 2*l+1)); - } - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(al.value(n, l, xs[i]), ref_ys[i], 1e-10); - } + for (int l = 0; l < n; l++) + { + std::vector ref_ys; + for (int i = 0; i < xs.size (); i++) + { + ref_ys.push_back (std::tr1::assoc_laguerre (n - l - 1, 2 * l + 1, xs[i])); + // ref_ys.push_back(al.associate_laguerre(n-l-1, xs[i], 2*l+1)); + } + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (al.value (n, l, xs[i]), ref_ys[i], 1e-10); + } + } } - } } -TEST_F(AssocLaguerreTest, GenerateVectorTest) +TEST_F (AssocLaguerreTest, GenerateVectorTest) { Assoc_Laguerre al; - //test n = 1, 2, ..., 4, from 1s to 4f + // test n = 1, 2, ..., 4, from 1s to 4f std::vector xs = {0.0, 1.0, 2.0, 3.0}; int nmax = 4; - for(int n = 1; n <= nmax; n++) - { - for(int l = 0; l < n; l++) + for (int n = 1; n <= nmax; n++) { - std::vector ref_ys; - for(int i = 0; i < xs.size(); i++) - { - ref_ys.push_back(al.associate_laguerre(n-l-1, xs[i], 2*l+1)); - } - std::vector ys; - ys.resize(xs.size()); - al.generate(n, l, xs, ys); - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(ys[i], ref_ys[i], 1e-6); - } + for (int l = 0; l < n; l++) + { + std::vector ref_ys; + for (int i = 0; i < xs.size (); i++) + { + ref_ys.push_back (al.associate_laguerre (n - l - 1, xs[i], 2 * l + 1)); + } + std::vector ys; + ys.resize (xs.size ()); + al.generate (n, l, xs, ys); + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (ys[i], ref_ys[i], 1e-6); + } + } } - } } -TEST_F(AssocLaguerreTest, GeneratePointerTest) +TEST_F (AssocLaguerreTest, GeneratePointerTest) { Assoc_Laguerre al; - //test n = 1, 2, ..., 4, from 1s to 4f + // test n = 1, 2, ..., 4, from 1s to 4f std::vector xs = {0.0, 1.0, 2.0, 3.0}; int nmax = 4; - for(int n = 1; n <= nmax; n++) - { - for(int l = 0; l < n; l++) + for (int n = 1; n <= nmax; n++) { - std::vector ref_ys; - for(int i = 0; i < xs.size(); i++) - { - ref_ys.push_back(al.associate_laguerre(n-l-1, xs[i], 2*l+1)); - } - double* ys = new double[xs.size()]; - al.generate(n, l, xs.size(), xs.data(), ys); - for(int i = 0; i < xs.size(); i++) - { - EXPECT_NEAR(ys[i], ref_ys[i], 1e-6); - } - delete[] ys; + for (int l = 0; l < n; l++) + { + std::vector ref_ys; + for (int i = 0; i < xs.size (); i++) + { + ref_ys.push_back (al.associate_laguerre (n - l - 1, xs[i], 2 * l + 1)); + } + double* ys = new double[xs.size ()]; + al.generate (n, l, xs.size (), xs.data (), ys); + for (int i = 0; i < xs.size (); i++) + { + EXPECT_NEAR (ys[i], ref_ys[i], 1e-6); + } + delete[] ys; + } } - } } \ No newline at end of file diff --git a/source/source_base/test/atom_in_test.cpp b/source/source_base/test/atom_in_test.cpp index 09816ea5b79..623b55e2412 100644 --- a/source/source_base/test/atom_in_test.cpp +++ b/source/source_base/test/atom_in_test.cpp @@ -10,25 +10,23 @@ * - Tested functions of class atom_in: * - map atom_Z * - get the atomic number - * + * * - map atom_RCS * - get the atomic radius - * + * */ class atom_in_test : public testing::Test { -protected: - + protected: atom_in Atomin; int a = Atomin.atom_Z["H"]; - }; -TEST_F(atom_in_test, atomin) -{ - EXPECT_EQ(atom_in_test::a, 1); - EXPECT_EQ(Atomin.atom_RCS["H"],0.603774); - EXPECT_EQ(Atomin.atom_symbol["H"],"Hydrogen"); - EXPECT_EQ(Atomin.symbol_Z["Hydrogen"],1); +TEST_F (atom_in_test, atomin) +{ + EXPECT_EQ (atom_in_test::a, 1); + EXPECT_EQ (Atomin.atom_RCS["H"], 0.603774); + EXPECT_EQ (Atomin.atom_symbol["H"], "Hydrogen"); + EXPECT_EQ (Atomin.symbol_Z["Hydrogen"], 1); } \ No newline at end of file diff --git a/source/source_base/test/blas_connector_test.cpp b/source/source_base/test/blas_connector_test.cpp index 21de7ef2e24..3661c772eb8 100644 --- a/source/source_base/test/blas_connector_test.cpp +++ b/source/source_base/test/blas_connector_test.cpp @@ -6,190 +6,244 @@ #include #include #include -TEST(blas_connector, sscal_) { +TEST (blas_connector, sscal_) +{ typedef float T; const int size = 8; const T scale = 2; const int incx = 1; std::array result, answer; - std::generate(result.begin(), result.end(), - []() { return std::rand() / T(RAND_MAX); }); + std::generate (result.begin (), result.end (), [] () { return std::rand () / T (RAND_MAX); }); for (int i = 0; i < size; i++) - answer[i] = result[i] * scale; - sscal_(&size, &scale, result.data(), &incx); + { + answer[i] = result[i] * scale; + } + sscal_ (&size, &scale, result.data (), &incx); for (int i = 0; i < size; i++) - EXPECT_FLOAT_EQ(answer[i], result[i]); + { + EXPECT_FLOAT_EQ (answer[i], result[i]); + } } -TEST(blas_connector, dscal_) { +TEST (blas_connector, dscal_) +{ typedef double T; const int size = 8; const T scale = 2; const int incx = 1; std::array result, answer; - std::generate(result.begin(), result.end(), - []() { return std::rand() / T(RAND_MAX); }); + std::generate (result.begin (), result.end (), [] () { return std::rand () / T (RAND_MAX); }); for (int i = 0; i < size; i++) - answer[i] = result[i] * scale; - dscal_(&size, &scale, result.data(), &incx); + { + answer[i] = result[i] * scale; + } + dscal_ (&size, &scale, result.data (), &incx); for (int i = 0; i < size; i++) - EXPECT_DOUBLE_EQ(answer[i], result[i]); + { + EXPECT_DOUBLE_EQ (answer[i], result[i]); + } } -TEST(blas_connector, cscal_) { +TEST (blas_connector, cscal_) +{ typedef std::complex T; const int size = 8; const T scale = {2, 3}; const int incx = 1; std::array result, answer; - std::generate(result.begin(), result.end(), []() { - return T{static_cast(std::rand() / float(RAND_MAX)), - static_cast(std::rand() / float(RAND_MAX))}; - }); + std::generate (result.begin (), + result.end (), + [] () + { + return T{static_cast (std::rand () / float (RAND_MAX)), + static_cast (std::rand () / float (RAND_MAX))}; + }); for (int i = 0; i < size; i++) - answer[i] = result[i] * scale; - cscal_(&size, &scale, result.data(), &incx); - for (int i = 0; i < size; i++) { - EXPECT_FLOAT_EQ(answer[i].real(), result[i].real()); - EXPECT_FLOAT_EQ(answer[i].imag(), result[i].imag()); - } + { + answer[i] = result[i] * scale; + } + cscal_ (&size, &scale, result.data (), &incx); + for (int i = 0; i < size; i++) + { + EXPECT_FLOAT_EQ (answer[i].real (), result[i].real ()); + EXPECT_FLOAT_EQ (answer[i].imag (), result[i].imag ()); + } } -TEST(blas_connector, zscal_) { +TEST (blas_connector, zscal_) +{ typedef std::complex T; const int size = 8; const T scale = {2, 3}; const int incx = 1; std::array result, answer; - std::generate(result.begin(), result.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); + std::generate (result.begin (), + result.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); for (int i = 0; i < size; i++) - answer[i] = result[i] * scale; - zscal_(&size, &scale, result.data(), &incx); - for (int i = 0; i < size; i++) { - EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); - EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); - } + { + answer[i] = result[i] * scale; + } + zscal_ (&size, &scale, result.data (), &incx); + for (int i = 0; i < size; i++) + { + EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ()); + EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ()); + } } -TEST(blas_connector, Scal) { +TEST (blas_connector, Scal) +{ const int size = 8; const std::complex scale = {2, 3}; const int incx = 1; std::complex result[8], answer[8]; - for (int i=0; i< size; i++) { - result[i] = std::complex{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }; for (int i = 0; i < size; i++) - answer[i] = result[i] * scale; - BlasConnector::scal(size,scale,result,incx); + { + result[i] = std::complex{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }; + for (int i = 0; i < size; i++) + { + answer[i] = result[i] * scale; + } + BlasConnector::scal (size, scale, result, incx); // incx is the spacing between elements if result - for (int i = 0; i < size; i++) { - EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); - EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); - } + for (int i = 0; i < size; i++) + { + EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ()); + EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ()); + } } #ifdef __CUDA -TEST(blas_connector, ScalGpu) { +TEST (blas_connector, ScalGpu) +{ const int size = 8; const std::complex scale = {2, 3}; const int incx = 1; std::complex result[8], answer[8]; std::complex* result_gpu = nullptr; - resmem_zd_op()(result_gpu, 8 * sizeof(std::complex)); - for (int i=0; i< size; i++) { - result[i] = std::complex{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }; + resmem_zd_op () (result_gpu, 8 * sizeof (std::complex)); + for (int i = 0; i < size; i++) + { + result[i] = std::complex{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }; for (int i = 0; i < size; i++) answer[i] = result[i] * scale; - syncmem_z2z_h2d_op()(result_gpu, result, sizeof(std::complex) * 8); - BlasConnector::scal(size,scale,result_gpu,incx,base_device::AbacusDevice_t::GpuDevice); - syncmem_z2z_d2h_op()(result, result_gpu, sizeof(std::complex) * 8); - delmem_zd_op()(result_gpu); + syncmem_z2z_h2d_op () (result_gpu, result, sizeof (std::complex) * 8); + BlasConnector::scal (size, scale, result_gpu, incx, base_device::AbacusDevice_t::GpuDevice); + syncmem_z2z_d2h_op () (result, result_gpu, sizeof (std::complex) * 8); + delmem_zd_op () (result_gpu); // incx is the spacing between elements if result - for (int i = 0; i < size; i++) { - EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); - EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); - } + for (int i = 0; i < size; i++) + { + EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ()); + EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ()); + } } #endif -TEST(blas_connector, daxpy_) { +TEST (blas_connector, daxpy_) +{ typedef double T; const int size = 8; const T scale = 2; const int incx = 1; const int incy = 1; std::array x_const, result, answer; - std::generate(x_const.begin(), x_const.end(), - []() { return std::rand() / double(RAND_MAX); }); - std::generate(result.begin(), result.end(), - []() { return std::rand() / double(RAND_MAX); }); + std::generate (x_const.begin (), x_const.end (), [] () { return std::rand () / double (RAND_MAX); }); + std::generate (result.begin (), result.end (), [] () { return std::rand () / double (RAND_MAX); }); for (int i = 0; i < size; i++) - answer[i] = x_const[i] * scale + result[i]; - daxpy_(&size, &scale, x_const.data(), &incx, result.data(), &incy); + { + answer[i] = x_const[i] * scale + result[i]; + } + daxpy_ (&size, &scale, x_const.data (), &incx, result.data (), &incy); for (int i = 0; i < size; i++) - EXPECT_DOUBLE_EQ(answer[i], result[i]); + { + EXPECT_DOUBLE_EQ (answer[i], result[i]); + } } -TEST(blas_connector, zaxpy_) { +TEST (blas_connector, zaxpy_) +{ typedef std::complex T; const int size = 8; const T scale = {2, 3}; const int incx = 1; const int incy = 1; std::array x_const, result, answer; - std::generate(x_const.begin(), x_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result.begin(), result.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); + std::generate (x_const.begin (), + x_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result.begin (), + result.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); for (int i = 0; i < size; i++) - answer[i] = x_const[i] * scale + result[i]; - zaxpy_(&size, &scale, x_const.data(), &incx, result.data(), &incy); - for (int i = 0; i < size; i++) { - EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); - EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); - } + { + answer[i] = x_const[i] * scale + result[i]; + } + zaxpy_ (&size, &scale, x_const.data (), &incx, result.data (), &incy); + for (int i = 0; i < size; i++) + { + EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ()); + EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ()); + } } -TEST(blas_connector, Axpy) { +TEST (blas_connector, Axpy) +{ typedef std::complex T; const int size = 8; const T scale = {2, 3}; const int incx = 1; const int incy = 1; std::array x_const, result, answer; - std::generate(x_const.begin(), x_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result.begin(), result.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); + std::generate (x_const.begin (), + x_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result.begin (), + result.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); for (int i = 0; i < size; i++) - answer[i] = x_const[i] * scale + result[i]; - BlasConnector::axpy(size, scale, x_const.data(), incx, result.data(), incy); - for (int i = 0; i < size; i++) { - EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); - EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); - } + { + answer[i] = x_const[i] * scale + result[i]; + } + BlasConnector::axpy (size, scale, x_const.data (), incx, result.data (), incy); + for (int i = 0; i < size; i++) + { + EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ()); + EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ()); + } } #ifdef __CUDA -TEST(blas_connector, AxpyGpu) { +TEST (blas_connector, AxpyGpu) +{ typedef std::complex T; const int size = 8; const T scale = {2, 3}; @@ -198,85 +252,106 @@ TEST(blas_connector, AxpyGpu) { std::array x_const, result, answer; T* x_gpu = nullptr; T* result_gpu = nullptr; - resmem_zd_op()(x_gpu, size * sizeof(std::complex)); - resmem_zd_op()(result_gpu, size * sizeof(std::complex)); - std::generate(x_const.begin(), x_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result.begin(), result.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); + resmem_zd_op () (x_gpu, size * sizeof (std::complex)); + resmem_zd_op () (result_gpu, size * sizeof (std::complex)); + std::generate (x_const.begin (), + x_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result.begin (), + result.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); for (int i = 0; i < size; i++) answer[i] = x_const[i] * scale + result[i]; - syncmem_z2z_h2d_op()(result_gpu, result.data(), sizeof(std::complex) * size); - syncmem_z2z_h2d_op()(x_gpu, x_const.data(), sizeof(std::complex) * size); - BlasConnector::axpy(size, scale, x_gpu, incx, result_gpu, incy, base_device::AbacusDevice_t::GpuDevice); - syncmem_z2z_d2h_op()(result.data(), result_gpu, sizeof(std::complex) * size); - delmem_zd_op()(result_gpu); - delmem_zd_op()(x_gpu); - for (int i = 0; i < size; i++) { - EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); - EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); - } + syncmem_z2z_h2d_op () (result_gpu, result.data (), sizeof (std::complex) * size); + syncmem_z2z_h2d_op () (x_gpu, x_const.data (), sizeof (std::complex) * size); + BlasConnector::axpy (size, scale, x_gpu, incx, result_gpu, incy, base_device::AbacusDevice_t::GpuDevice); + syncmem_z2z_d2h_op () (result.data (), result_gpu, sizeof (std::complex) * size); + delmem_zd_op () (result_gpu); + delmem_zd_op () (x_gpu); + for (int i = 0; i < size; i++) + { + EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ()); + EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ()); + } } #endif -TEST(blas_connector, dcopy_) { +TEST (blas_connector, dcopy_) +{ typedef double T; int const size = 8; int const incx = 1; int const incy = 1; std::array x_const, result, answer; - std::generate(x_const.begin(), x_const.end(), - []() { return std::rand() / double(RAND_MAX); }); + std::generate (x_const.begin (), x_const.end (), [] () { return std::rand () / double (RAND_MAX); }); for (int i = 0; i < size; i++) - answer[i] = x_const[i]; - dcopy_(&size, x_const.data(), &incx, result.data(), &incy); + { + answer[i] = x_const[i]; + } + dcopy_ (&size, x_const.data (), &incx, result.data (), &incy); for (int i = 0; i < size; i++) - EXPECT_DOUBLE_EQ(answer[i], result[i]); + { + EXPECT_DOUBLE_EQ (answer[i], result[i]); + } } -TEST(blas_connector, zcopy_) { +TEST (blas_connector, zcopy_) +{ typedef std::complex T; int const size = 8; int const incx = 1; int const incy = 1; std::array x_const, result, answer; - std::generate(x_const.begin(), x_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); + std::generate (x_const.begin (), + x_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); for (int i = 0; i < size; i++) - answer[i] = x_const[i]; - zcopy_(&size, x_const.data(), &incx, result.data(), &incy); - for (int i = 0; i < size; i++) { - EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); - EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); - } + { + answer[i] = x_const[i]; + } + zcopy_ (&size, x_const.data (), &incx, result.data (), &incy); + for (int i = 0; i < size; i++) + { + EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ()); + EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ()); + } } -TEST(blas_connector, copy) { +TEST (blas_connector, copy) +{ int const size = 8; int const incx = 1; int const incy = 1; std::complex result[8], answer[8]; for (int i = 0; i < size; i++) - { - answer[i] = std::complex{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - } + { + answer[i] = std::complex{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + } BlasConnector bs; - bs.copy(size, answer, incx, result, incy); - for (int i = 0; i < size; i++) { - EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real()); - EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag()); - } + bs.copy (size, answer, incx, result, incy); + for (int i = 0; i < size; i++) + { + EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ()); + EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ()); + } } -TEST(blas_connector, dgemv_) { +TEST (blas_connector, dgemv_) +{ typedef double T; const char transa_m = 'N'; const char transa_n = 'T'; @@ -289,42 +364,64 @@ TEST(blas_connector, dgemv_) { const int incy = 1; std::array x_const_m, result_m, answer_m, c_dot_m{}; std::array x_const_n, result_n, answer_n, c_dot_n{}; - std::generate(x_const_n.begin(), x_const_n.end(), - []() { return std::rand() / double(RAND_MAX); }); - std::generate(result_n.begin(), result_n.end(), - []() { return std::rand() / double(RAND_MAX); }); - std::generate(x_const_m.begin(), x_const_m.end(), - []() { return std::rand() / double(RAND_MAX); }); - std::generate(result_m.begin(), result_m.end(), - []() { return std::rand() / double(RAND_MAX); }); + std::generate (x_const_n.begin (), x_const_n.end (), [] () { return std::rand () / double (RAND_MAX); }); + std::generate (result_n.begin (), result_n.end (), [] () { return std::rand () / double (RAND_MAX); }); + std::generate (x_const_m.begin (), x_const_m.end (), [] () { return std::rand () / double (RAND_MAX); }); + std::generate (result_m.begin (), result_m.end (), [] () { return std::rand () / double (RAND_MAX); }); std::array a_const; - std::generate(a_const.begin(), a_const.end(), - []() { return std::rand() / double(RAND_MAX); }); - for (int i = 0; i < size_m; i++) { - for (int j = 0; j < size_n; j++) { - c_dot_m[i] += a_const[i + j * lda] * x_const_n[j]; - } - answer_m[i] = alpha_const * c_dot_m[i] + beta_const * result_m[i]; - } - dgemv_(&transa_m, &size_m, &size_n, &alpha_const, a_const.data(), &lda, - x_const_n.data(), &incx, &beta_const, result_m.data(), &incy); - - for (int j = 0; j < size_n; j++) { - for (int i = 0; i < size_m; i++) { - c_dot_n[j] += a_const[i + j * lda] * x_const_m[i]; - } - answer_n[j] = alpha_const * c_dot_n[j] + beta_const * result_n[j]; - } - dgemv_(&transa_n, &size_m, &size_n, &alpha_const, a_const.data(), &lda, - x_const_m.data(), &incx, &beta_const, result_n.data(), &incy); + std::generate (a_const.begin (), a_const.end (), [] () { return std::rand () / double (RAND_MAX); }); + for (int i = 0; i < size_m; i++) + { + for (int j = 0; j < size_n; j++) + { + c_dot_m[i] += a_const[i + j * lda] * x_const_n[j]; + } + answer_m[i] = alpha_const * c_dot_m[i] + beta_const * result_m[i]; + } + dgemv_ (&transa_m, + &size_m, + &size_n, + &alpha_const, + a_const.data (), + &lda, + x_const_n.data (), + &incx, + &beta_const, + result_m.data (), + &incy); + + for (int j = 0; j < size_n; j++) + { + for (int i = 0; i < size_m; i++) + { + c_dot_n[j] += a_const[i + j * lda] * x_const_m[i]; + } + answer_n[j] = alpha_const * c_dot_n[j] + beta_const * result_n[j]; + } + dgemv_ (&transa_n, + &size_m, + &size_n, + &alpha_const, + a_const.data (), + &lda, + x_const_m.data (), + &incx, + &beta_const, + result_n.data (), + &incy); for (int i = 0; i < size_m; i++) - EXPECT_DOUBLE_EQ(answer_m[i], result_m[i]); + { + EXPECT_DOUBLE_EQ (answer_m[i], result_m[i]); + } for (int j = 0; j < size_n; j++) - EXPECT_DOUBLE_EQ(answer_n[j], result_n[j]); + { + EXPECT_DOUBLE_EQ (answer_n[j], result_n[j]); + } } -TEST(blas_connector, zgemv_) { +TEST (blas_connector, zgemv_) +{ typedef std::complex T; const char transa_m = 'N'; const char transa_n = 'T'; @@ -337,71 +434,122 @@ TEST(blas_connector, zgemv_) { const int incx = 1; const int incy = 1; std::array x_const_m, x_const_c, result_m, answer_m, c_dot_m{}; - std::array x_const_n, result_n, result_c, answer_n, answer_c, - c_dot_n{}, c_dot_c{}; - std::generate(x_const_n.begin(), x_const_n.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result_n.begin(), result_n.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(x_const_m.begin(), x_const_m.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result_m.begin(), result_m.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); + std::array x_const_n, result_n, result_c, answer_n, answer_c, c_dot_n{}, c_dot_c{}; + std::generate (x_const_n.begin (), + x_const_n.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result_n.begin (), + result_n.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (x_const_m.begin (), + x_const_m.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result_m.begin (), + result_m.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); std::array a_const; - std::generate(a_const.begin(), a_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - for (int i = 0; i < size_m; i++) { - for (int j = 0; j < size_n; j++) { - c_dot_m[i] += a_const[i + j * lda] * x_const_n[j]; - } - answer_m[i] = alpha_const * c_dot_m[i] + beta_const * result_m[i]; - } - zgemv_(&transa_m, &size_m, &size_n, &alpha_const, a_const.data(), &lda, - x_const_n.data(), &incx, &beta_const, result_m.data(), &incy); - - for (int j = 0; j < size_n; j++) { - for (int i = 0; i < size_m; i++) { - c_dot_n[j] += a_const[i + j * lda] * x_const_m[i]; - } - answer_n[j] = alpha_const * c_dot_n[j] + beta_const * result_n[j]; - } - zgemv_(&transa_n, &size_m, &size_n, &alpha_const, a_const.data(), &lda, - x_const_m.data(), &incx, &beta_const, result_n.data(), &incy); - - for (int j = 0; j < size_n; j++) { - for (int i = 0; i < size_m; i++) { - c_dot_c[j] += conj(a_const[i + j * lda]) * x_const_c[i]; - } - answer_c[j] = alpha_const * c_dot_c[j] + beta_const * result_c[j]; - } - zgemv_(&transa_h, &size_m, &size_n, &alpha_const, a_const.data(), &lda, - x_const_c.data(), &incx, &beta_const, result_c.data(), &incy); - - for (int i = 0; i < size_m; i++) { - EXPECT_DOUBLE_EQ(answer_m[i].real(), result_m[i].real()); - EXPECT_DOUBLE_EQ(answer_m[i].imag(), result_m[i].imag()); - } - for (int j = 0; j < size_n; j++) { - EXPECT_DOUBLE_EQ(answer_n[j].real(), result_n[j].real()); - EXPECT_DOUBLE_EQ(answer_n[j].imag(), result_n[j].imag()); - } - for (int j = 0; j < size_n; j++) { - EXPECT_DOUBLE_EQ(answer_c[j].real(), result_c[j].real()); - EXPECT_DOUBLE_EQ(answer_c[j].imag(), result_c[j].imag()); - } + std::generate (a_const.begin (), + a_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + for (int i = 0; i < size_m; i++) + { + for (int j = 0; j < size_n; j++) + { + c_dot_m[i] += a_const[i + j * lda] * x_const_n[j]; + } + answer_m[i] = alpha_const * c_dot_m[i] + beta_const * result_m[i]; + } + zgemv_ (&transa_m, + &size_m, + &size_n, + &alpha_const, + a_const.data (), + &lda, + x_const_n.data (), + &incx, + &beta_const, + result_m.data (), + &incy); + + for (int j = 0; j < size_n; j++) + { + for (int i = 0; i < size_m; i++) + { + c_dot_n[j] += a_const[i + j * lda] * x_const_m[i]; + } + answer_n[j] = alpha_const * c_dot_n[j] + beta_const * result_n[j]; + } + zgemv_ (&transa_n, + &size_m, + &size_n, + &alpha_const, + a_const.data (), + &lda, + x_const_m.data (), + &incx, + &beta_const, + result_n.data (), + &incy); + + for (int j = 0; j < size_n; j++) + { + for (int i = 0; i < size_m; i++) + { + c_dot_c[j] += conj (a_const[i + j * lda]) * x_const_c[i]; + } + answer_c[j] = alpha_const * c_dot_c[j] + beta_const * result_c[j]; + } + zgemv_ (&transa_h, + &size_m, + &size_n, + &alpha_const, + a_const.data (), + &lda, + x_const_c.data (), + &incx, + &beta_const, + result_c.data (), + &incy); + + for (int i = 0; i < size_m; i++) + { + EXPECT_DOUBLE_EQ (answer_m[i].real (), result_m[i].real ()); + EXPECT_DOUBLE_EQ (answer_m[i].imag (), result_m[i].imag ()); + } + for (int j = 0; j < size_n; j++) + { + EXPECT_DOUBLE_EQ (answer_n[j].real (), result_n[j].real ()); + EXPECT_DOUBLE_EQ (answer_n[j].imag (), result_n[j].imag ()); + } + for (int j = 0; j < size_n; j++) + { + EXPECT_DOUBLE_EQ (answer_c[j].real (), result_c[j].real ()); + EXPECT_DOUBLE_EQ (answer_c[j].imag (), result_c[j].imag ()); + } } -TEST(blas_connector, Gemv) { +TEST (blas_connector, Gemv) +{ typedef std::complex T; const char transa_m = 'N'; const char transa_n = 'T'; @@ -414,72 +562,122 @@ TEST(blas_connector, Gemv) { const int incx = 1; const int incy = 1; std::array x_const_m, x_const_c, result_m, answer_m, c_dot_m{}; - std::array x_const_n, result_n, result_c, answer_n, answer_c, - c_dot_n{}, c_dot_c{}; - std::generate(x_const_n.begin(), x_const_n.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result_n.begin(), result_n.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(x_const_m.begin(), x_const_m.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result_m.begin(), result_m.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); + std::array x_const_n, result_n, result_c, answer_n, answer_c, c_dot_n{}, c_dot_c{}; + std::generate (x_const_n.begin (), + x_const_n.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result_n.begin (), + result_n.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (x_const_m.begin (), + x_const_m.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result_m.begin (), + result_m.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); std::array a_const; - std::generate(a_const.begin(), a_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - for (int i = 0; i < size_m; i++) { - for (int j = 0; j < size_n; j++) { - c_dot_m[i] += a_const[i + j * lda] * x_const_n[j]; - } - answer_m[i] = alpha_const * c_dot_m[i] + beta_const * result_m[i]; - } - BlasConnector::gemv(transa_m, size_m, size_n, alpha_const, a_const.data(), lda, - x_const_n.data(), incx, beta_const, result_m.data(), incy); - - for (int j = 0; j < size_n; j++) { - for (int i = 0; i < size_m; i++) { - c_dot_n[j] += a_const[i + j * lda] * x_const_m[i]; - } - answer_n[j] = alpha_const * c_dot_n[j] + beta_const * result_n[j]; - } - BlasConnector::gemv(transa_n, size_m, size_n, alpha_const, a_const.data(), lda, - x_const_m.data(), incx, beta_const, result_n.data(), incy); - - for (int j = 0; j < size_n; j++) { - for (int i = 0; i < size_m; i++) { - c_dot_c[j] += conj(a_const[i + j * lda]) * x_const_c[i]; - } - answer_c[j] = alpha_const * c_dot_c[j] + beta_const * result_c[j]; - } - BlasConnector::gemv(transa_h, size_m, size_n, alpha_const, a_const.data(), lda, - x_const_c.data(), incx, beta_const, result_c.data(), incy); - - for (int i = 0; i < size_m; i++) { - EXPECT_DOUBLE_EQ(answer_m[i].real(), result_m[i].real()); - EXPECT_DOUBLE_EQ(answer_m[i].imag(), result_m[i].imag()); - } - for (int j = 0; j < size_n; j++) { - EXPECT_DOUBLE_EQ(answer_n[j].real(), result_n[j].real()); - EXPECT_DOUBLE_EQ(answer_n[j].imag(), result_n[j].imag()); - } - for (int j = 0; j < size_n; j++) { - EXPECT_DOUBLE_EQ(answer_c[j].real(), result_c[j].real()); - EXPECT_DOUBLE_EQ(answer_c[j].imag(), result_c[j].imag()); - } -} + std::generate (a_const.begin (), + a_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + for (int i = 0; i < size_m; i++) + { + for (int j = 0; j < size_n; j++) + { + c_dot_m[i] += a_const[i + j * lda] * x_const_n[j]; + } + answer_m[i] = alpha_const * c_dot_m[i] + beta_const * result_m[i]; + } + BlasConnector::gemv (transa_m, + size_m, + size_n, + alpha_const, + a_const.data (), + lda, + x_const_n.data (), + incx, + beta_const, + result_m.data (), + incy); + for (int j = 0; j < size_n; j++) + { + for (int i = 0; i < size_m; i++) + { + c_dot_n[j] += a_const[i + j * lda] * x_const_m[i]; + } + answer_n[j] = alpha_const * c_dot_n[j] + beta_const * result_n[j]; + } + BlasConnector::gemv (transa_n, + size_m, + size_n, + alpha_const, + a_const.data (), + lda, + x_const_m.data (), + incx, + beta_const, + result_n.data (), + incy); + + for (int j = 0; j < size_n; j++) + { + for (int i = 0; i < size_m; i++) + { + c_dot_c[j] += conj (a_const[i + j * lda]) * x_const_c[i]; + } + answer_c[j] = alpha_const * c_dot_c[j] + beta_const * result_c[j]; + } + BlasConnector::gemv (transa_h, + size_m, + size_n, + alpha_const, + a_const.data (), + lda, + x_const_c.data (), + incx, + beta_const, + result_c.data (), + incy); + + for (int i = 0; i < size_m; i++) + { + EXPECT_DOUBLE_EQ (answer_m[i].real (), result_m[i].real ()); + EXPECT_DOUBLE_EQ (answer_m[i].imag (), result_m[i].imag ()); + } + for (int j = 0; j < size_n; j++) + { + EXPECT_DOUBLE_EQ (answer_n[j].real (), result_n[j].real ()); + EXPECT_DOUBLE_EQ (answer_n[j].imag (), result_n[j].imag ()); + } + for (int j = 0; j < size_n; j++) + { + EXPECT_DOUBLE_EQ (answer_c[j].real (), result_c[j].real ()); + EXPECT_DOUBLE_EQ (answer_c[j].imag (), result_c[j].imag ()); + } +} -TEST(blas_connector, dgemm_) { +TEST (blas_connector, dgemm_) +{ typedef double T; const char transa_m = 'N'; const char transb_m = 'N'; @@ -494,33 +692,45 @@ TEST(blas_connector, dgemm_) { std::array a_const; std::array b_const; std::array c_dot{}, answer, result; - std::generate(a_const.begin(), a_const.end(), - []() { return std::rand() / double(RAND_MAX); }); - std::generate(b_const.begin(), b_const.end(), - []() { return std::rand() / double(RAND_MAX); }); - std::generate(result.begin(), result.end(), - []() { return std::rand() / double(RAND_MAX); }); - for (int i = 0; i < size_m; i++) { - for (int j = 0; j < size_n; j++) { - for (int k = 0; k < size_k; k++) { - c_dot[i + j * ldc] += - a_const[i + k * lda] * b_const[k + j * ldb]; - } - answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] + - beta_const * result[i + j * ldc]; + std::generate (a_const.begin (), a_const.end (), [] () { return std::rand () / double (RAND_MAX); }); + std::generate (b_const.begin (), b_const.end (), [] () { return std::rand () / double (RAND_MAX); }); + std::generate (result.begin (), result.end (), [] () { return std::rand () / double (RAND_MAX); }); + for (int i = 0; i < size_m; i++) + { + for (int j = 0; j < size_n; j++) + { + for (int k = 0; k < size_k; k++) + { + c_dot[i + j * ldc] += a_const[i + k * lda] * b_const[k + j * ldb]; + } + answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] + beta_const * result[i + j * ldc]; + } } - } - dgemm_(&transa_m, &transb_m, &size_m, &size_n, &size_k, &alpha_const, - a_const.data(), &lda, b_const.data(), &ldb, &beta_const, - result.data(), &ldc); + dgemm_ (&transa_m, + &transb_m, + &size_m, + &size_n, + &size_k, + &alpha_const, + a_const.data (), + &lda, + b_const.data (), + &ldb, + &beta_const, + result.data (), + &ldc); for (int i = 0; i < size_m; i++) - for (int j = 0; j < size_n; j++) { - EXPECT_DOUBLE_EQ(answer[i + j * ldc], result[i + j * ldc]); + { + for (int j = 0; j < size_n; j++) + { + EXPECT_DOUBLE_EQ (answer[i + j * ldc], result[i + j * ldc]); + } } } -TEST(blas_connector, zgemm_) { +TEST (blas_connector, zgemm_) +{ typedef std::complex T; const char transa_m = 'N'; const char transb_m = 'N'; @@ -535,42 +745,64 @@ TEST(blas_connector, zgemm_) { std::array a_const; std::array b_const; std::array c_dot{}, answer, result; - std::generate(a_const.begin(), a_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(b_const.begin(), b_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result.begin(), result.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - for (int i = 0; i < size_m; i++) { - for (int j = 0; j < size_n; j++) { - for (int k = 0; k < size_k; k++) { - c_dot[i + j * ldc] += - a_const[i + k * lda] * b_const[k + j * ldb]; - } - answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] + - beta_const * result[i + j * ldc]; + std::generate (a_const.begin (), + a_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (b_const.begin (), + b_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result.begin (), + result.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + for (int i = 0; i < size_m; i++) + { + for (int j = 0; j < size_n; j++) + { + for (int k = 0; k < size_k; k++) + { + c_dot[i + j * ldc] += a_const[i + k * lda] * b_const[k + j * ldb]; + } + answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] + beta_const * result[i + j * ldc]; + } } - } - zgemm_(&transa_m, &transb_m, &size_m, &size_n, &size_k, &alpha_const, - a_const.data(), &lda, b_const.data(), &ldb, &beta_const, - result.data(), &ldc); + zgemm_ (&transa_m, + &transb_m, + &size_m, + &size_n, + &size_k, + &alpha_const, + a_const.data (), + &lda, + b_const.data (), + &ldb, + &beta_const, + result.data (), + &ldc); for (int i = 0; i < size_m; i++) - for (int j = 0; j < size_n; j++) { - EXPECT_DOUBLE_EQ(answer[i + j * ldc].real(), - result[i + j * ldc].real()); - EXPECT_DOUBLE_EQ(answer[i + j * ldc].imag(), - result[i + j * ldc].imag()); + { + for (int j = 0; j < size_n; j++) + { + EXPECT_DOUBLE_EQ (answer[i + j * ldc].real (), result[i + j * ldc].real ()); + EXPECT_DOUBLE_EQ (answer[i + j * ldc].imag (), result[i + j * ldc].imag ()); + } } } -TEST(blas_connector, Gemm) { +TEST (blas_connector, Gemm) +{ typedef std::complex T; const char transa_m = 'N'; const char transb_m = 'N'; @@ -585,44 +817,66 @@ TEST(blas_connector, Gemm) { std::array a_const; std::array b_const; std::array c_dot{}, answer, result; - std::generate(a_const.begin(), a_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(b_const.begin(), b_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result.begin(), result.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - for (int i = 0; i < size_m; i++) { - for (int j = 0; j < size_n; j++) { - for (int k = 0; k < size_k; k++) { - c_dot[i + j * ldc] += - a_const[i + k * lda] * b_const[k + j * ldb]; - } - answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] + - beta_const * result[i + j * ldc]; + std::generate (a_const.begin (), + a_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (b_const.begin (), + b_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result.begin (), + result.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + for (int i = 0; i < size_m; i++) + { + for (int j = 0; j < size_n; j++) + { + for (int k = 0; k < size_k; k++) + { + c_dot[i + j * ldc] += a_const[i + k * lda] * b_const[k + j * ldb]; + } + answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] + beta_const * result[i + j * ldc]; + } } - } - BlasConnector::gemm_cm(transa_m, transb_m, size_m, size_n, size_k, alpha_const, - a_const.data(), lda, b_const.data(), ldb, beta_const, - result.data(), ldc); + BlasConnector::gemm_cm (transa_m, + transb_m, + size_m, + size_n, + size_k, + alpha_const, + a_const.data (), + lda, + b_const.data (), + ldb, + beta_const, + result.data (), + ldc); for (int i = 0; i < size_m; i++) - for (int j = 0; j < size_n; j++) { - EXPECT_DOUBLE_EQ(answer[i + j * ldc].real(), - result[i + j * ldc].real()); - EXPECT_DOUBLE_EQ(answer[i + j * ldc].imag(), - result[i + j * ldc].imag()); + { + for (int j = 0; j < size_n; j++) + { + EXPECT_DOUBLE_EQ (answer[i + j * ldc].real (), result[i + j * ldc].real ()); + EXPECT_DOUBLE_EQ (answer[i + j * ldc].imag (), result[i + j * ldc].imag ()); + } } } #ifdef __CUDA -TEST(blas_connector, GemmGpu) { +TEST (blas_connector, GemmGpu) +{ typedef std::complex T; const char transa_m = 'N'; const char transb_m = 'N'; @@ -640,58 +894,80 @@ TEST(blas_connector, GemmGpu) { std::complex* a_gpu = nullptr; std::complex* b_gpu = nullptr; std::complex* result_gpu = nullptr; - resmem_zd_op()(a_gpu, size_k * lda * sizeof(std::complex)); - resmem_zd_op()(b_gpu, size_n * ldb * sizeof(std::complex)); - resmem_zd_op()(result_gpu, size_n * ldc * sizeof(std::complex)); - std::generate(a_const.begin(), a_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(b_const.begin(), b_const.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - std::generate(result.begin(), result.end(), []() { - return T{static_cast(std::rand() / double(RAND_MAX)), - static_cast(std::rand() / double(RAND_MAX))}; - }); - for (int i = 0; i < size_m; i++) { - for (int j = 0; j < size_n; j++) { - for (int k = 0; k < size_k; k++) { - c_dot[i + j * ldc] += - a_const[i + k * lda] * b_const[k + j * ldb]; - } - answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] + - beta_const * result[i + j * ldc]; - } - } - syncmem_z2z_h2d_op()(a_gpu, a_const.data(), sizeof(std::complex) * size_k * lda); - syncmem_z2z_h2d_op()(b_gpu, b_const.data(), sizeof(std::complex) * size_n * ldb); - syncmem_z2z_h2d_op()(result_gpu, result.data(), sizeof(std::complex) * size_n * ldc); - BlasConnector::gemm_cm(transa_m, transb_m, size_m, size_n, size_k, alpha_const, - a_gpu, lda, b_gpu, ldb, beta_const, - result_gpu, ldc, base_device::AbacusDevice_t::GpuDevice); - syncmem_z2z_d2h_op()(result.data(), result_gpu, sizeof(std::complex) * size_n * ldc); - delmem_zd_op()(result_gpu); - delmem_zd_op()(a_gpu); - delmem_zd_op()(b_gpu); + resmem_zd_op () (a_gpu, size_k * lda * sizeof (std::complex)); + resmem_zd_op () (b_gpu, size_n * ldb * sizeof (std::complex)); + resmem_zd_op () (result_gpu, size_n * ldc * sizeof (std::complex)); + std::generate (a_const.begin (), + a_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (b_const.begin (), + b_const.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); + std::generate (result.begin (), + result.end (), + [] () + { + return T{static_cast (std::rand () / double (RAND_MAX)), + static_cast (std::rand () / double (RAND_MAX))}; + }); for (int i = 0; i < size_m; i++) - for (int j = 0; j < size_n; j++) { - EXPECT_DOUBLE_EQ(answer[i + j * ldc].real(), - result[i + j * ldc].real()); - EXPECT_DOUBLE_EQ(answer[i + j * ldc].imag(), - result[i + j * ldc].imag()); + { + for (int j = 0; j < size_n; j++) + { + for (int k = 0; k < size_k; k++) + { + c_dot[i + j * ldc] += a_const[i + k * lda] * b_const[k + j * ldb]; + } + answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] + beta_const * result[i + j * ldc]; + } } + syncmem_z2z_h2d_op () (a_gpu, a_const.data (), sizeof (std::complex) * size_k * lda); + syncmem_z2z_h2d_op () (b_gpu, b_const.data (), sizeof (std::complex) * size_n * ldb); + syncmem_z2z_h2d_op () (result_gpu, result.data (), sizeof (std::complex) * size_n * ldc); + BlasConnector::gemm_cm (transa_m, + transb_m, + size_m, + size_n, + size_k, + alpha_const, + a_gpu, + lda, + b_gpu, + ldb, + beta_const, + result_gpu, + ldc, + base_device::AbacusDevice_t::GpuDevice); + syncmem_z2z_d2h_op () (result.data (), result_gpu, sizeof (std::complex) * size_n * ldc); + delmem_zd_op () (result_gpu); + delmem_zd_op () (a_gpu); + delmem_zd_op () (b_gpu); + for (int i = 0; i < size_m; i++) + for (int j = 0; j < size_n; j++) + { + EXPECT_DOUBLE_EQ (answer[i + j * ldc].real (), result[i + j * ldc].real ()); + EXPECT_DOUBLE_EQ (answer[i + j * ldc].imag (), result[i + j * ldc].imag ()); + } } #endif -int main(int argc, char **argv) { +int + main (int argc, char** argv) +{ #ifdef __CUDA std::cout << "Initializing CublasHandle..." << std::endl; - BlasUtils::createGpuBlasHandle(); + BlasUtils::createGpuBlasHandle (); std::cout << "Initializing CublasHandle Done." << std::endl; #endif - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + return RUN_ALL_TESTS (); } diff --git a/source/source_base/test/clebsch_gordan_coeff_test.cpp b/source/source_base/test/clebsch_gordan_coeff_test.cpp index 888249765fa..439928beeca 100644 --- a/source/source_base/test/clebsch_gordan_coeff_test.cpp +++ b/source/source_base/test/clebsch_gordan_coeff_test.cpp @@ -16,7 +16,7 @@ * - functions: gen_rndm_r and compute_ap */ -TEST(ClebschGordanTest, ClebschGordanExit) +TEST (ClebschGordanTest, ClebschGordanExit) { int lmaxkb = -2; ModuleBase::realArray ap; @@ -24,23 +24,25 @@ TEST(ClebschGordanTest, ClebschGordanExit) ModuleBase::IntArray lpl; std::string output; - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::Clebsch_Gordan::clebsch_gordan(lmaxkb + 1, ap, lpx, lpl), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Clebsch_Gordan: lmaxkb + 1 < 0")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::Clebsch_Gordan::clebsch_gordan (lmaxkb + 1, ap, lpx, lpl), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Clebsch_Gordan: lmaxkb + 1 < 0")); } -TEST(ClebschGordanTest, ClebschGordan) +TEST (ClebschGordanTest, ClebschGordan) { int lmaxkb = 1; ModuleBase::realArray ap; ModuleBase::IntArray lpx; ModuleBase::IntArray lpl; - ModuleBase::Clebsch_Gordan::clebsch_gordan(lmaxkb + 1, ap, lpx, lpl); - EXPECT_DOUBLE_EQ(ap(0, 0, 0), 0.28209479177387564); - EXPECT_EQ(lpx(0, 0), 1); - EXPECT_EQ(lpx(3, 3), 3); - EXPECT_EQ(lpl(0, 0, 5), 0); - EXPECT_EQ(lpl(3, 3, 8), 0); + ModuleBase::Clebsch_Gordan::clebsch_gordan (lmaxkb + 1, ap, lpx, lpl); + EXPECT_DOUBLE_EQ (ap (0, 0, 0), 0.28209479177387564); + EXPECT_EQ (lpx (0, 0), 1); + EXPECT_EQ (lpx (3, 3), 3); + EXPECT_EQ (lpl (0, 0, 5), 0); + EXPECT_EQ (lpl (3, 3, 8), 0); } \ No newline at end of file diff --git a/source/source_base/test/complexarray_test.cpp b/source/source_base/test/complexarray_test.cpp index c424fe8417b..894152a4482 100644 --- a/source/source_base/test/complexarray_test.cpp +++ b/source/source_base/test/complexarray_test.cpp @@ -1,122 +1,122 @@ -#include"../complexarray.h" -#include"gtest/gtest.h" -#include"gmock/gmock.h" +#include "../complexarray.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" /************************************************ -* unit test of class ComplexArray and related functions -***********************************************/ + * unit test of class ComplexArray and related functions + ***********************************************/ /** -* - Tested functions of class ComplexArray: -* - constructor: -* - ComplexArray(const int bnd1=0, const int bnd2=1, const int bnd3=1, const int bnd4=1) -* - ComplexArray(const ComplexArray &cd) -* - ComplexArray(ComplexArray &&cd) -* - operator "=": -* - assign a complex to all elements of a ComplexArray -* - assign a ComplexArray to another ComplexArray -* - rvalue reference to a ComplexArray -* -* - operator "+": -* - one ComplexArray plus another ComplexArray that has -* the same dimension. -* - throw error when one ComplexArray plus another ComplexArray -* that has different dimension. -* -* - operator "+=": -* - one ComplexArray plus another ComplexArray, and assign the -* value to first ComplexArray -* - throw error when one ComplexArray plus another ComplexArray -* that has different dimension. -* -* - operator "-": -* - one ComplexArray minus another ComplexArray that has -* the same dimension. -* - throw error when one ComplexArray minus another ComplexArray -* that has different dimension. -* -* - operator "-=": -* - one ComplexArray minus another ComplexArray, and assign the -* value to first ComplexArray -* - throw error when one ComplexArray minus another ComplexArray -* that has different dimension. -* -* - operator "*": -* - one ComplexArray is multiplied by a double -* - one ComplexArray is multiplied by a complex -* - one ComplexArray is multiplied by another ComplexArray that has same dimension -* - throw error when one ComplexArray is miltiplied by another ComplexArray -* that has different dimension. -* -* - operator "*=": -* similar as "*" -* -* - operator "==": -* judge if two ComplexArray is equal, all element is equal. -* -* - operator "!=": -* judge if two ComplexArray is not equal -* -* - oprator "()": -* - access the element -* -* - function zero_out, negate, randomize, getBound/Size, create. -* -* Test relative functions: -* - overloading of operator "*". a double/complex multiply a Class ComplexArray. -* - functon abs2(), return the sum of squares of all elements -* - function dot() -* - function scale_accumulate() -* - function scaled_sum() -* - function point_mult() -* - function complexArrayxAlloc -* - overloading of the function scaled_sum(). Does cd3 = c1*cd1 + c2*cd2. c1 and c2 are complex numbers. -* - overloading of operator "()". The operator is effectively overload by "const". -*/ - -//compare two complex by using EXPECT_DOUBLE_EQ() -void EXPECT_COMPLEX_EQUAL(const std::complex& a,const std::complex& b) -{ - EXPECT_DOUBLE_EQ(a.real(),b.real()); - EXPECT_DOUBLE_EQ(a.imag(),b.imag()); + * - Tested functions of class ComplexArray: + * - constructor: + * - ComplexArray(const int bnd1=0, const int bnd2=1, const int bnd3=1, const int bnd4=1) + * - ComplexArray(const ComplexArray &cd) + * - ComplexArray(ComplexArray &&cd) + * - operator "=": + * - assign a complex to all elements of a ComplexArray + * - assign a ComplexArray to another ComplexArray + * - rvalue reference to a ComplexArray + * + * - operator "+": + * - one ComplexArray plus another ComplexArray that has + * the same dimension. + * - throw error when one ComplexArray plus another ComplexArray + * that has different dimension. + * + * - operator "+=": + * - one ComplexArray plus another ComplexArray, and assign the + * value to first ComplexArray + * - throw error when one ComplexArray plus another ComplexArray + * that has different dimension. + * + * - operator "-": + * - one ComplexArray minus another ComplexArray that has + * the same dimension. + * - throw error when one ComplexArray minus another ComplexArray + * that has different dimension. + * + * - operator "-=": + * - one ComplexArray minus another ComplexArray, and assign the + * value to first ComplexArray + * - throw error when one ComplexArray minus another ComplexArray + * that has different dimension. + * + * - operator "*": + * - one ComplexArray is multiplied by a double + * - one ComplexArray is multiplied by a complex + * - one ComplexArray is multiplied by another ComplexArray that has same dimension + * - throw error when one ComplexArray is miltiplied by another ComplexArray + * that has different dimension. + * + * - operator "*=": + * similar as "*" + * + * - operator "==": + * judge if two ComplexArray is equal, all element is equal. + * + * - operator "!=": + * judge if two ComplexArray is not equal + * + * - oprator "()": + * - access the element + * + * - function zero_out, negate, randomize, getBound/Size, create. + * + * Test relative functions: + * - overloading of operator "*". a double/complex multiply a Class ComplexArray. + * - functon abs2(), return the sum of squares of all elements + * - function dot() + * - function scale_accumulate() + * - function scaled_sum() + * - function point_mult() + * - function complexArrayxAlloc + * - overloading of the function scaled_sum(). Does cd3 = c1*cd1 + c2*cd2. c1 and c2 are complex numbers. + * - overloading of operator "()". The operator is effectively overload by "const". + */ + +// compare two complex by using EXPECT_DOUBLE_EQ() +void + EXPECT_COMPLEX_EQUAL (const std::complex& a, const std::complex& b) +{ + EXPECT_DOUBLE_EQ (a.real (), b.real ()); + EXPECT_DOUBLE_EQ (a.imag (), b.imag ()); } namespace ModuleBase { - void complexArrayxAlloc(); +void complexArrayxAlloc (); } class ComplexArray_test : public testing::Test { - protected: - ModuleBase::ComplexArray a2,a4,b2,b4,c2,c4,d2; - std::complex com1 {1.0,2.0}; - std::complex com2 {3.0,4.0}; - std::complex com3 {-2.0,-3.0}; - std::complex comzero {0.0,0.0}; - - void SetUp() - { - a2 = ModuleBase::ComplexArray(2,1,1,1); // if define this class as a matrix - b2 = ModuleBase::ComplexArray(2,1,1,1); // of 4 dimesions, - c2 = ModuleBase::ComplexArray(2,1,1,1); // is a2 +/-/* d2 allowed or not??? - d2 = ModuleBase::ComplexArray(1,2,1,1); // it does not matter, this situation will not appear in ABACUS - a4 = ModuleBase::ComplexArray(2,2,1,1); - b4 = ModuleBase::ComplexArray(2,2,1,1); - c4 = ModuleBase::ComplexArray(2,2,1,1); - } + protected: + ModuleBase::ComplexArray a2, a4, b2, b4, c2, c4, d2; + std::complex com1{1.0, 2.0}; + std::complex com2{3.0, 4.0}; + std::complex com3{-2.0, -3.0}; + std::complex comzero{0.0, 0.0}; + void + SetUp () + { + a2 = ModuleBase::ComplexArray (2, 1, 1, 1); // if define this class as a matrix + b2 = ModuleBase::ComplexArray (2, 1, 1, 1); // of 4 dimesions, + c2 = ModuleBase::ComplexArray (2, 1, 1, 1); // is a2 +/-/* d2 allowed or not??? + d2 = ModuleBase::ComplexArray (1, 2, 1, 1); // it does not matter, this situation will not appear in ABACUS + a4 = ModuleBase::ComplexArray (2, 2, 1, 1); + b4 = ModuleBase::ComplexArray (2, 2, 1, 1); + c4 = ModuleBase::ComplexArray (2, 2, 1, 1); + } }; - -TEST(ComplexArray, constructor_bnd1234_test) +TEST (ComplexArray, constructor_bnd1234_test) { - ModuleBase::ComplexArray a(1,2,3,4); - ASSERT_EQ(a.getSize(),24); - ASSERT_EQ(a.getBound1(),1); - ASSERT_EQ(a.getBound2(),2); - ASSERT_EQ(a.getBound3(),3); - ASSERT_EQ(a.getBound4(),4); + ModuleBase::ComplexArray a (1, 2, 3, 4); + ASSERT_EQ (a.getSize (), 24); + ASSERT_EQ (a.getBound1 (), 1); + ASSERT_EQ (a.getBound2 (), 2); + ASSERT_EQ (a.getBound3 (), 3); + ASSERT_EQ (a.getBound4 (), 4); /* ASSERT_DEATH(ModuleBase::ComplexArray a(0,2,3,4),""); ASSERT_DEATH(ModuleBase::ComplexArray a(1,0,3,4),""); @@ -125,379 +125,371 @@ TEST(ComplexArray, constructor_bnd1234_test) */ } -TEST(ComplexArray, constructor_copy_test) +TEST (ComplexArray, constructor_copy_test) { - ModuleBase::ComplexArray a(1,2,3,4); - ModuleBase::ComplexArray b(a); - ASSERT_EQ(b.getSize(),24); - ASSERT_EQ(b.getBound1(),1); - ASSERT_EQ(b.getBound2(),2); - ASSERT_EQ(b.getBound3(),3); - ASSERT_EQ(b.getBound4(),4); - - ASSERT_EQ(a.getSize(),24); - ASSERT_EQ(a.getBound1(),1); - ASSERT_EQ(a.getBound2(),2); - ASSERT_EQ(a.getBound3(),3); - ASSERT_EQ(a.getBound4(),4); + ModuleBase::ComplexArray a (1, 2, 3, 4); + ModuleBase::ComplexArray b (a); + ASSERT_EQ (b.getSize (), 24); + ASSERT_EQ (b.getBound1 (), 1); + ASSERT_EQ (b.getBound2 (), 2); + ASSERT_EQ (b.getBound3 (), 3); + ASSERT_EQ (b.getBound4 (), 4); + ASSERT_EQ (a.getSize (), 24); + ASSERT_EQ (a.getBound1 (), 1); + ASSERT_EQ (a.getBound2 (), 2); + ASSERT_EQ (a.getBound3 (), 3); + ASSERT_EQ (a.getBound4 (), 4); } -TEST(ComplexArray, constructor_rvalue_test) +TEST (ComplexArray, constructor_rvalue_test) { - ModuleBase::ComplexArray b(std::move(ModuleBase::ComplexArray(1,2,3,4))); - ASSERT_EQ(b.getSize(),24); - ASSERT_EQ(b.getBound1(),1); - ASSERT_EQ(b.getBound2(),2); - ASSERT_EQ(b.getBound3(),3); - ASSERT_EQ(b.getBound4(),4); + ModuleBase::ComplexArray b (std::move (ModuleBase::ComplexArray (1, 2, 3, 4))); + ASSERT_EQ (b.getSize (), 24); + ASSERT_EQ (b.getBound1 (), 1); + ASSERT_EQ (b.getBound2 (), 2); + ASSERT_EQ (b.getBound3 (), 3); + ASSERT_EQ (b.getBound4 (), 4); } - -TEST_F(ComplexArray_test,operator_equal_complex) -{ +TEST_F (ComplexArray_test, operator_equal_complex) +{ a2 = com1; - for(int i = 0;i= -0.5) && (a.ptr[i].imag() >= -0.5)); - EXPECT_TRUE((a.ptr[i].real() < 0.5) && (a.ptr[i].imag() < 0.5)); - } + for (int i = 1; i < 5; ++i) + { + allequal = allequal && (a.ptr[i] == a.ptr[0]); + } + EXPECT_FALSE (allequal); + for (int i = 0; i < a.getSize (); ++i) + { + EXPECT_TRUE ((a.ptr[i].real () >= -0.5) && (a.ptr[i].imag () >= -0.5)); + EXPECT_TRUE ((a.ptr[i].real () < 0.5) && (a.ptr[i].imag () < 0.5)); + } } -TEST(ComplexArray,getBoundSize) +TEST (ComplexArray, getBoundSize) { - ModuleBase::ComplexArray a(1,2,3,4); - EXPECT_EQ(a.getSize(),24); - EXPECT_EQ(a.getBound1(),1); - EXPECT_EQ(a.getBound2(),2); - EXPECT_EQ(a.getBound3(),3); - EXPECT_EQ(a.getBound4(),4); + ModuleBase::ComplexArray a (1, 2, 3, 4); + EXPECT_EQ (a.getSize (), 24); + EXPECT_EQ (a.getBound1 (), 1); + EXPECT_EQ (a.getBound2 (), 2); + EXPECT_EQ (a.getBound3 (), 3); + EXPECT_EQ (a.getBound4 (), 4); } - -TEST_F(ComplexArray_test,create) +TEST_F (ComplexArray_test, create) { - a2.create(2,3,4,5); - EXPECT_EQ(a2.getSize(),120); + a2.create (2, 3, 4, 5); + EXPECT_EQ (a2.getSize (), 120); } -TEST_F(ComplexArray_test,operator_double_multiply) +TEST_F (ComplexArray_test, operator_double_multiply) { a2 = com1; - c2 = 2.0 * a2 ; - for (int i = 0;i{0.0,0.0}; - a(1,0,0,0) = std::complex{1.0,1.0}; - a(0,1,0,0) = std::complex{2.0,2.0}; - a(1,1,0,0) = std::complex{3.0,3.0}; - EXPECT_DOUBLE_EQ(abs2(a),28.0); + ModuleBase::ComplexArray a (2, 2, 1, 1); + a (0, 0, 0, 0) = std::complex{0.0, 0.0}; + a (1, 0, 0, 0) = std::complex{1.0, 1.0}; + a (0, 1, 0, 0) = std::complex{2.0, 2.0}; + a (1, 1, 0, 0) = std::complex{3.0, 3.0}; + EXPECT_DOUBLE_EQ (abs2 (a), 28.0); } -TEST(ComplexArray,dot) +TEST (ComplexArray, dot) { - ModuleBase::ComplexArray a(2,2,1,1); - a(0,0,0,0) = std::complex{0.0,1.0}; - a(1,0,0,0) = std::complex{1.0,0.0}; - a(0,1,0,0) = std::complex{2.0,3.0}; - a(1,1,0,0) = std::complex{3.0,2.0}; + ModuleBase::ComplexArray a (2, 2, 1, 1); + a (0, 0, 0, 0) = std::complex{0.0, 1.0}; + a (1, 0, 0, 0) = std::complex{1.0, 0.0}; + a (0, 1, 0, 0) = std::complex{2.0, 3.0}; + a (1, 1, 0, 0) = std::complex{3.0, 2.0}; - ModuleBase::ComplexArray b(2,2,1,1); - b(0,0,0,0) = std::complex{1.0,0.0}; - b(1,0,0,0) = std::complex{2.0,-1.0}; - b(0,1,0,0) = std::complex{3.0,-2.0}; - b(1,1,0,0) = std::complex{4.0,-3.0}; - std::complex expectab {8.0,-32.0}; - std::complex expectba {8.0,32.0}; - EXPECT_COMPLEX_EQUAL(dot(a,b),expectab); - EXPECT_COMPLEX_EQUAL(dot(b,a),expectba); + ModuleBase::ComplexArray b (2, 2, 1, 1); + b (0, 0, 0, 0) = std::complex{1.0, 0.0}; + b (1, 0, 0, 0) = std::complex{2.0, -1.0}; + b (0, 1, 0, 0) = std::complex{3.0, -2.0}; + b (1, 1, 0, 0) = std::complex{4.0, -3.0}; + std::complex expectab{8.0, -32.0}; + std::complex expectba{8.0, 32.0}; + EXPECT_COMPLEX_EQUAL (dot (a, b), expectab); + EXPECT_COMPLEX_EQUAL (dot (b, a), expectba); } -TEST_F(ComplexArray_test,scale_accumulate_double) +TEST_F (ComplexArray_test, scale_accumulate_double) { a2 = com1; b2 = com2; - scale_accumulate(0.3,a2,b2); - for (int i = 0;i& a,const std::complex& b) +inline void + EXPECT_COMPLEX_EQ (const std::complex& a, const std::complex& b) { - EXPECT_DOUBLE_EQ(a.real(),b.real()); - EXPECT_DOUBLE_EQ(a.imag(),b.imag()); + EXPECT_DOUBLE_EQ (a.real (), b.real ()); + EXPECT_DOUBLE_EQ (a.imag (), b.imag ()); } class ComplexMatrixTest : public testing::Test { - protected: + protected: ModuleBase::ComplexMatrix cm22, cm23, cm32, cm33; - std::complex comzero {0.0,0.0}; + std::complex comzero{0.0, 0.0}; - void SetUp() + void + SetUp () { - cm22.create(2,2); - cm22(0,0) = std::complex{1.0,2.0}; - cm22(0,1) = std::complex{2.0,3.0}; - cm22(1,0) = std::complex{3.0,4.0}; - cm22(1,1) = std::complex{4.0,5.0}; - - cm23.create(2,3); - cm23(0,0) = std::complex{1.0,2.0}; - cm23(0,1) = std::complex{2.0,3.0}; - cm23(0,2) = std::complex{3.0,4.0}; - cm23(1,0) = std::complex{4.0,5.0}; - cm23(1,1) = std::complex{5.0,6.0}; - cm23(1,2) = std::complex{6.0,7.0}; + cm22.create (2, 2); + cm22 (0, 0) = std::complex{1.0, 2.0}; + cm22 (0, 1) = std::complex{2.0, 3.0}; + cm22 (1, 0) = std::complex{3.0, 4.0}; + cm22 (1, 1) = std::complex{4.0, 5.0}; + + cm23.create (2, 3); + cm23 (0, 0) = std::complex{1.0, 2.0}; + cm23 (0, 1) = std::complex{2.0, 3.0}; + cm23 (0, 2) = std::complex{3.0, 4.0}; + cm23 (1, 0) = std::complex{4.0, 5.0}; + cm23 (1, 1) = std::complex{5.0, 6.0}; + cm23 (1, 2) = std::complex{6.0, 7.0}; } }; -TEST(ComplexMatrix,Constructor) +TEST (ComplexMatrix, Constructor) { ModuleBase::ComplexMatrix cm; - EXPECT_EQ(cm.nr,0); - EXPECT_EQ(cm.nc,0); - EXPECT_EQ(cm.size,0); + EXPECT_EQ (cm.nr, 0); + EXPECT_EQ (cm.nc, 0); + EXPECT_EQ (cm.size, 0); } -TEST(ComplexMatrix,ConstructorNrNc) +TEST (ComplexMatrix, ConstructorNrNc) { - ModuleBase::ComplexMatrix cm(3,4); - EXPECT_EQ(cm.nr,3); - EXPECT_EQ(cm.nc,4); - EXPECT_EQ(cm.size,12); + ModuleBase::ComplexMatrix cm (3, 4); + EXPECT_EQ (cm.nr, 3); + EXPECT_EQ (cm.nc, 4); + EXPECT_EQ (cm.size, 12); } -TEST_F(ComplexMatrixTest,ConstructorCM) +TEST_F (ComplexMatrixTest, ConstructorCM) { - ModuleBase::ComplexMatrix cm1(cm22); - EXPECT_EQ(cm1.nr,cm22.nr); - EXPECT_EQ(cm1.nc,cm22.nc); - EXPECT_EQ(cm1.size,cm22.size); - for(int i=0; i{1.0,0.0}); - EXPECT_COMPLEX_EQ(cm(0,1),std::complex{2.0,0.0}); - EXPECT_COMPLEX_EQ(cm(1,0),std::complex{3.0,0.0}); - EXPECT_COMPLEX_EQ(cm(1,1),std::complex{4.0,0.0}); + EXPECT_COMPLEX_EQ (cm (0, 0), std::complex{1.0, 0.0}); + EXPECT_COMPLEX_EQ (cm (0, 1), std::complex{2.0, 0.0}); + EXPECT_COMPLEX_EQ (cm (1, 0), std::complex{3.0, 0.0}); + EXPECT_COMPLEX_EQ (cm (1, 1), std::complex{4.0, 0.0}); } -TEST(ComplexMatrix,Create) +TEST (ComplexMatrix, Create) { ModuleBase::ComplexMatrix cm; - cm.create(111,222); - EXPECT_EQ(cm.nr,111); - EXPECT_EQ(cm.nc,222); - EXPECT_EQ(cm.size,111*222); + cm.create (111, 222); + EXPECT_EQ (cm.nr, 111); + EXPECT_EQ (cm.nc, 222); + EXPECT_EQ (cm.size, 111 * 222); } -TEST_F(ComplexMatrixTest,OperatorEqual) +TEST_F (ComplexMatrixTest, OperatorEqual) { ModuleBase::ComplexMatrix cm; cm = cm22; - EXPECT_EQ(cm.nr,cm22.nr); - EXPECT_EQ(cm.nc,cm22.nc); - EXPECT_EQ(cm.size,cm22.size); + EXPECT_EQ (cm.nr, cm22.nr); + EXPECT_EQ (cm.nc, cm22.nc); + EXPECT_EQ (cm.size, cm22.size); - for(int i=0; i com{1.0,2.0}; + ModuleBase::ComplexMatrix cm (cm22); + std::complex com{1.0, 2.0}; cm *= com; - EXPECT_EQ(cm.nr,cm22.nr); - EXPECT_EQ(cm.nc,cm22.nc); - EXPECT_EQ(cm.size,cm22.size); + EXPECT_EQ (cm.nr, cm22.nr); + EXPECT_EQ (cm.nc, cm22.nc); + EXPECT_EQ (cm.size, cm22.size); - for(int i=0; i{0.0,0.0}); - } + cm22.zero_out (); + for (int i = 0; i < cm22.size; ++i) + { + EXPECT_COMPLEX_EQ (cm22.c[i], std::complex{0.0, 0.0}); + } } - -TEST_F(ComplexMatrixTest,SetAsIdentityMatrix) +TEST_F (ComplexMatrixTest, SetAsIdentityMatrix) { - cm22.set_as_identity_matrix(); - EXPECT_COMPLEX_EQ(cm22(0,0),std::complex {1.0,0.0}); - EXPECT_COMPLEX_EQ(cm22(0,1),std::complex {0.0,0.0}); - EXPECT_COMPLEX_EQ(cm22(1,0),std::complex {0.0,0.0}); - EXPECT_COMPLEX_EQ(cm22(1,1),std::complex {1.0,0.0}); + cm22.set_as_identity_matrix (); + EXPECT_COMPLEX_EQ (cm22 (0, 0), std::complex{1.0, 0.0}); + EXPECT_COMPLEX_EQ (cm22 (0, 1), std::complex{0.0, 0.0}); + EXPECT_COMPLEX_EQ (cm22 (1, 0), std::complex{0.0, 0.0}); + EXPECT_COMPLEX_EQ (cm22 (1, 1), std::complex{1.0, 0.0}); } -TEST(ComplexMatrix,CheckReal) +TEST (ComplexMatrix, CheckReal) { - ModuleBase::ComplexMatrix cm22(2,2); - cm22(0,0) = std::complex {0.0,0.0}; - cm22(0,1) = std::complex {1.0,0.0}; - cm22(1,0) = std::complex {2.0,0.0}; - cm22(1,1) = std::complex {3.0,0.0}; - EXPECT_TRUE(cm22.checkreal()); + ModuleBase::ComplexMatrix cm22 (2, 2); + cm22 (0, 0) = std::complex{0.0, 0.0}; + cm22 (0, 1) = std::complex{1.0, 0.0}; + cm22 (1, 0) = std::complex{2.0, 0.0}; + cm22 (1, 1) = std::complex{3.0, 0.0}; + EXPECT_TRUE (cm22.checkreal ()); - cm22(0,0) = std::complex {0.0,0.01}; - EXPECT_FALSE(cm22.checkreal()); + cm22 (0, 0) = std::complex{0.0, 0.01}; + EXPECT_FALSE (cm22.checkreal ()); } -TEST_F(ComplexMatrixTest,OperatorPlus) +TEST_F (ComplexMatrixTest, OperatorPlus) { - ModuleBase::ComplexMatrix cm1(cm22),cm2; + ModuleBase::ComplexMatrix cm1 (cm22), cm2; cm2 = cm1 + cm22; - EXPECT_EQ(cm2.nr,cm22.nr); - EXPECT_EQ(cm2.nc,cm22.nc); - EXPECT_EQ(cm2.size,cm22.size); + EXPECT_EQ (cm2.nr, cm22.nr); + EXPECT_EQ (cm2.nc, cm22.nc); + EXPECT_EQ (cm2.size, cm22.size); - for(int i=0; i{1.0,1.0}; - cm23(0,1)=std::complex{2.0,0.0}; - cm23(0,2)=std::complex{3.0,-1.0}; - cm23(1,0)=std::complex{4.0,-2.0}; - cm23(1,1)=std::complex{5.0,-3.0}; - cm23(1,2)=std::complex{6.0,-4.0}; + ModuleBase::ComplexMatrix cm23 (2, 3), cm32 (3, 2), cm22, cm33; + cm23 (0, 0) = std::complex{1.0, 1.0}; + cm23 (0, 1) = std::complex{2.0, 0.0}; + cm23 (0, 2) = std::complex{3.0, -1.0}; + cm23 (1, 0) = std::complex{4.0, -2.0}; + cm23 (1, 1) = std::complex{5.0, -3.0}; + cm23 (1, 2) = std::complex{6.0, -4.0}; - cm32(0,0)=std::complex{-11.0,11.0}; - cm32(0,1)=std::complex{-12.0,12.0}; - cm32(1,0)=std::complex{-13.0,13.0}; - cm32(1,1)=std::complex{-14.0,14.0}; - cm32(2,0)=std::complex{-15.0,15.0}; - cm32(2,1)=std::complex{-16.0,16.0}; + cm32 (0, 0) = std::complex{-11.0, 11.0}; + cm32 (0, 1) = std::complex{-12.0, 12.0}; + cm32 (1, 0) = std::complex{-13.0, 13.0}; + cm32 (1, 1) = std::complex{-14.0, 14.0}; + cm32 (2, 0) = std::complex{-15.0, 15.0}; + cm32 (2, 1) = std::complex{-16.0, 16.0}; cm22 = cm23 * cm32; - EXPECT_EQ(cm22.nr,2); - EXPECT_EQ(cm22.nc,2); - EXPECT_EQ(cm22.size,4); - EXPECT_COMPLEX_EQ(cm22(0,0),std::complex{-78.0,86.0}); - EXPECT_COMPLEX_EQ(cm22(0,1),std::complex{-84.0,92.0}); - EXPECT_COMPLEX_EQ(cm22(1,0),std::complex{-78.0,320.0}); - EXPECT_COMPLEX_EQ(cm22(1,1),std::complex{-84.0,344.0}); + EXPECT_EQ (cm22.nr, 2); + EXPECT_EQ (cm22.nc, 2); + EXPECT_EQ (cm22.size, 4); + EXPECT_COMPLEX_EQ (cm22 (0, 0), std::complex{-78.0, 86.0}); + EXPECT_COMPLEX_EQ (cm22 (0, 1), std::complex{-84.0, 92.0}); + EXPECT_COMPLEX_EQ (cm22 (1, 0), std::complex{-78.0, 320.0}); + EXPECT_COMPLEX_EQ (cm22 (1, 1), std::complex{-84.0, 344.0}); cm33 = cm32 * cm23; - EXPECT_EQ(cm33.nr,3); - EXPECT_EQ(cm33.nc,3); - EXPECT_EQ(cm33.size,9); - EXPECT_COMPLEX_EQ(cm33(0,0),std::complex{-46.0,72.0 }); - EXPECT_COMPLEX_EQ(cm33(0,1),std::complex{-46.0,118.0 }); - EXPECT_COMPLEX_EQ(cm33(0,2),std::complex{-46.0,164.0 }); - EXPECT_COMPLEX_EQ(cm33(1,0),std::complex{-54.0,84.0 }); - EXPECT_COMPLEX_EQ(cm33(1,1),std::complex{-54.0,138.0 }); - EXPECT_COMPLEX_EQ(cm33(1,2),std::complex{-54.0,192.0 }); - EXPECT_COMPLEX_EQ(cm33(2,0),std::complex{-62.0,96.0 }); - EXPECT_COMPLEX_EQ(cm33(2,1),std::complex{-62.0,158.0 }); - EXPECT_COMPLEX_EQ(cm33(2,2),std::complex{-62.0,220.0 }); + EXPECT_EQ (cm33.nr, 3); + EXPECT_EQ (cm33.nc, 3); + EXPECT_EQ (cm33.size, 9); + EXPECT_COMPLEX_EQ (cm33 (0, 0), std::complex{-46.0, 72.0}); + EXPECT_COMPLEX_EQ (cm33 (0, 1), std::complex{-46.0, 118.0}); + EXPECT_COMPLEX_EQ (cm33 (0, 2), std::complex{-46.0, 164.0}); + EXPECT_COMPLEX_EQ (cm33 (1, 0), std::complex{-54.0, 84.0}); + EXPECT_COMPLEX_EQ (cm33 (1, 1), std::complex{-54.0, 138.0}); + EXPECT_COMPLEX_EQ (cm33 (1, 2), std::complex{-54.0, 192.0}); + EXPECT_COMPLEX_EQ (cm33 (2, 0), std::complex{-62.0, 96.0}); + EXPECT_COMPLEX_EQ (cm33 (2, 1), std::complex{-62.0, 158.0}); + EXPECT_COMPLEX_EQ (cm33 (2, 2), std::complex{-62.0, 220.0}); - EXPECT_DEATH(cm22 * cm32,""); + EXPECT_DEATH (cm22 * cm32, ""); } -TEST_F(ComplexMatrixTest,OperatorMultDouble) +TEST_F (ComplexMatrixTest, OperatorMultDouble) { - ModuleBase::ComplexMatrix cm2,cm3; + ModuleBase::ComplexMatrix cm2, cm3; cm2 = cm22 * 2.0; - EXPECT_EQ(cm2.nr,cm22.nr); - EXPECT_EQ(cm2.nc,cm22.nc); - EXPECT_EQ(cm2.size,cm22.size); - for(int i=0; i com {2.0,3.0}; + ModuleBase::ComplexMatrix cm2, cm3; + std::complex com{2.0, 3.0}; cm2 = cm22 * com; - EXPECT_EQ(cm2.nr,cm22.nr); - EXPECT_EQ(cm2.nc,cm22.nc); - EXPECT_EQ(cm2.size,cm22.size); - for(int i=0; i{5.0,7.0}); -} +TEST_F (ComplexMatrixTest, Trace) { EXPECT_COMPLEX_EQ (trace (cm22), std::complex{5.0, 7.0}); } -TEST_F(ComplexMatrixTest,abs2Row) +TEST_F (ComplexMatrixTest, abs2Row) { - EXPECT_EQ(abs2_row(cm22,0),18.0); - EXPECT_EQ(abs2_row(cm22,1),66.0); + EXPECT_EQ (abs2_row (cm22, 0), 18.0); + EXPECT_EQ (abs2_row (cm22, 1), 66.0); } -TEST_F(ComplexMatrixTest,abs2Column) +TEST_F (ComplexMatrixTest, abs2Column) { - EXPECT_EQ(abs2_column(cm22,0),30.0); - EXPECT_EQ(abs2_column(cm22,1),54.0); + EXPECT_EQ (abs2_column (cm22, 0), 30.0); + EXPECT_EQ (abs2_column (cm22, 1), 54.0); } -TEST_F(ComplexMatrixTest,abs2) -{ - EXPECT_EQ(abs2(cm22),84.0); -} +TEST_F (ComplexMatrixTest, abs2) { EXPECT_EQ (abs2 (cm22), 84.0); } -TEST_F(ComplexMatrixTest,abs2arraymatrix) +TEST_F (ComplexMatrixTest, abs2arraymatrix) { - ModuleBase::ComplexMatrix **m; + ModuleBase::ComplexMatrix** m; m = new ModuleBase::ComplexMatrix*[2]; m[0] = &cm22; m[1] = &cm23; - EXPECT_EQ(abs2(1,m),84.0); - EXPECT_EQ(abs2(2,m),314.0); - delete [] m; + EXPECT_EQ (abs2 (1, m), 84.0); + EXPECT_EQ (abs2 (2, m), 314.0); + delete[] m; } -TEST_F(ComplexMatrixTest,transpose) +TEST_F (ComplexMatrixTest, transpose) { - ModuleBase::ComplexMatrix m(transpose(cm22,false)); - EXPECT_COMPLEX_EQ(m(0,0),cm22(0,0)); - EXPECT_COMPLEX_EQ(m(0,1),cm22(1,0)); - EXPECT_COMPLEX_EQ(m(1,0),cm22(0,1)); - EXPECT_COMPLEX_EQ(m(1,1),cm22(1,1)); + ModuleBase::ComplexMatrix m (transpose (cm22, false)); + EXPECT_COMPLEX_EQ (m (0, 0), cm22 (0, 0)); + EXPECT_COMPLEX_EQ (m (0, 1), cm22 (1, 0)); + EXPECT_COMPLEX_EQ (m (1, 0), cm22 (0, 1)); + EXPECT_COMPLEX_EQ (m (1, 1), cm22 (1, 1)); - ModuleBase::ComplexMatrix m1(transpose(cm22,true)); - EXPECT_COMPLEX_EQ(m1(0,0),conj(cm22(0,0))); - EXPECT_COMPLEX_EQ(m1(0,1),conj(cm22(1,0))); - EXPECT_COMPLEX_EQ(m1(1,0),conj(cm22(0,1))); - EXPECT_COMPLEX_EQ(m1(1,1),conj(cm22(1,1))); + ModuleBase::ComplexMatrix m1 (transpose (cm22, true)); + EXPECT_COMPLEX_EQ (m1 (0, 0), conj (cm22 (0, 0))); + EXPECT_COMPLEX_EQ (m1 (0, 1), conj (cm22 (1, 0))); + EXPECT_COMPLEX_EQ (m1 (1, 0), conj (cm22 (0, 1))); + EXPECT_COMPLEX_EQ (m1 (1, 1), conj (cm22 (1, 1))); } -TEST_F(ComplexMatrixTest,conj) +TEST_F (ComplexMatrixTest, conj) { - ModuleBase::ComplexMatrix m = conj(cm22); - EXPECT_COMPLEX_EQ(m(0,0),std::conj(cm22(0,0))); - EXPECT_COMPLEX_EQ(m(0,1),std::conj(cm22(0,1))); - EXPECT_COMPLEX_EQ(m(1,0),std::conj(cm22(1,0))); - EXPECT_COMPLEX_EQ(m(1,1),std::conj(cm22(1,1))); + ModuleBase::ComplexMatrix m = conj (cm22); + EXPECT_COMPLEX_EQ (m (0, 0), std::conj (cm22 (0, 0))); + EXPECT_COMPLEX_EQ (m (0, 1), std::conj (cm22 (0, 1))); + EXPECT_COMPLEX_EQ (m (1, 0), std::conj (cm22 (1, 0))); + EXPECT_COMPLEX_EQ (m (1, 1), std::conj (cm22 (1, 1))); } -TEST_F(ComplexMatrixTest,ScaleAccumulate) +TEST_F (ComplexMatrixTest, ScaleAccumulate) { - std::complex com1{2.0,2.0}; - ModuleBase::ComplexMatrix m(2.0*cm22),m1; + std::complex com1{2.0, 2.0}; + ModuleBase::ComplexMatrix m (2.0 * cm22), m1; m1 = m; - ModuleBase::scale_accumulate(com1,cm22,m); - for(int i=0; i com1{2.0,2.0}; - ModuleBase::ComplexMatrix cm1(cm22),cm2(2.0*cm22),cm3(3.0*cm22),cm4(4.0*cm22); - ModuleBase::ComplexMatrix **cmout; - ModuleBase::ComplexMatrix **cmin; + std::complex com1{2.0, 2.0}; + ModuleBase::ComplexMatrix cm1 (cm22), cm2 (2.0 * cm22), cm3 (3.0 * cm22), cm4 (4.0 * cm22); + ModuleBase::ComplexMatrix** cmout; + ModuleBase::ComplexMatrix** cmin; cmout = new ModuleBase::ComplexMatrix*[2]; - cmin = new ModuleBase::ComplexMatrix*[2]; + cmin = new ModuleBase::ComplexMatrix*[2]; cmout[0] = &cm1; cmout[1] = &cm2; @@ -478,44 +471,40 @@ TEST_F(ComplexMatrixTest,ScaleAccumulateArray) cmin[1] = &cm4; int size = cm22.size; - ModuleBase::scale_accumulate(2,com1,cmin,cmout); + ModuleBase::scale_accumulate (2, com1, cmin, cmout); - for(int i=0; i com1{2.0,2.0},com2{-1.0,-2.0}; - ModuleBase::ComplexMatrix cm1(1.1*cm22),cm2(2.5*cm22),cmout(2,2); + std::complex com1{2.0, 2.0}, com2{-1.0, -2.0}; + ModuleBase::ComplexMatrix cm1 (1.1 * cm22), cm2 (2.5 * cm22), cmout (2, 2); - ModuleBase::scaled_sum(com1,cm1,com2,cm2,cmout); - for(int i=0; i com1{2.0,2.0},com2{-1.0,-2.0}; - ModuleBase::ComplexMatrix cm1(cm22),cm2(2.0*cm22),cm3(3.0*cm22),cm4(4.0*cm22); - ModuleBase::ComplexMatrix cm5(2,2),cm6(2,2); - ModuleBase::ComplexMatrix **cmout; - ModuleBase::ComplexMatrix **cmin1; - ModuleBase::ComplexMatrix **cmin2; + std::complex com1{2.0, 2.0}, com2{-1.0, -2.0}; + ModuleBase::ComplexMatrix cm1 (cm22), cm2 (2.0 * cm22), cm3 (3.0 * cm22), cm4 (4.0 * cm22); + ModuleBase::ComplexMatrix cm5 (2, 2), cm6 (2, 2); + ModuleBase::ComplexMatrix** cmout; + ModuleBase::ComplexMatrix** cmin1; + ModuleBase::ComplexMatrix** cmin2; cmout = new ModuleBase::ComplexMatrix*[2]; cmin1 = new ModuleBase::ComplexMatrix*[2]; cmin2 = new ModuleBase::ComplexMatrix*[2]; @@ -529,54 +518,54 @@ TEST_F(ComplexMatrixTest,ScaleSumArray) int size = cm22.size; - ModuleBase::scaled_sum(2,com1,cmin1,com2,cmin2,cmout); - - for(int i=0; ithreshold_abs && std::imag(data)) <= threshold_imag - ofs.open("printtest2.log"); - cm22.print(ofs,1e-10,2); - ofs.close(); - ifs.open("printtest2.log"); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("1\t(2,3)\t")); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("(3,4)\t(4,5)\t")); - ifs.close(); - remove("printtest2.log"); -// The condition of std::abs(data) threshold_imag - ofs.open("printtest3.log"); - cm22.print(ofs,3,1e-10); - ofs.close(); - ifs.open("printtest3.log"); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("0\t(2,3)\t")); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("(3,4)\t(4,5)\t")); - ifs.close(); - remove("printtest3.log"); + ModuleBase::scaled_sum (2, com1, cmin1, com2, cmin2, cmout); + + for (int i = 0; i < size; ++i) + { + EXPECT_COMPLEX_EQ ((*cmout[0]).c[i], com1 * (*cmin1[0]).c[i] + com2 * (*cmin2[0]).c[i]); + EXPECT_COMPLEX_EQ ((*cmout[1]).c[i], com1 * (*cmin1[1]).c[i] + com2 * (*cmin2[1]).c[i]); + } + + delete[] cmout; + delete[] cmin1; + delete[] cmin2; +} + +TEST_F (ComplexMatrixTest, print) +{ + std::ifstream ifs; + std::ofstream ofs; + ofs.open ("printtest1.log"); + cm22.print (ofs, 1e-10, 1e-10); + ofs.close (); + ifs.open ("printtest1.log"); + std::string output; + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("(1,2)\t(2,3)\t")); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("(3,4)\t(4,5)\t")); + ifs.close (); + remove ("printtest1.log"); + // The condition of std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag + ofs.open ("printtest2.log"); + cm22.print (ofs, 1e-10, 2); + ofs.close (); + ifs.open ("printtest2.log"); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("1\t(2,3)\t")); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("(3,4)\t(4,5)\t")); + ifs.close (); + remove ("printtest2.log"); + // The condition of std::abs(data) threshold_imag + ofs.open ("printtest3.log"); + cm22.print (ofs, 3, 1e-10); + ofs.close (); + ifs.open ("printtest3.log"); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("0\t(2,3)\t")); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("(3,4)\t(4,5)\t")); + ifs.close (); + remove ("printtest3.log"); } diff --git a/source/source_base/test/container_operator_test.cpp b/source/source_base/test/container_operator_test.cpp index cae4fefad1e..18693558430 100644 --- a/source/source_base/test/container_operator_test.cpp +++ b/source/source_base/test/container_operator_test.cpp @@ -22,108 +22,117 @@ * - "*" operator for scalar*map */ -TEST(ContainerOperator,VectorPlus) +TEST (ContainerOperator, VectorPlus) { - std::vector a(20,10.0); - std::vector b(20,1.0); - std::vector c(20); - c = a+b; - for (int i=0;i a (20, 10.0); + std::vector b (20, 1.0); + std::vector c (20); + c = a + b; + for (int i = 0; i < c.size (); i++) + { + EXPECT_EQ (c[i], 11.0); + } } -TEST(ContainerOperator,VectorMinus) +TEST (ContainerOperator, VectorMinus) { - std::vector a(20,10.0); - std::vector b(20,1.0); - std::vector c(20); - c = a-b; - for (int i=0;i a (20, 10.0); + std::vector b (20, 1.0); + std::vector c (20); + c = a - b; + for (int i = 0; i < c.size (); i++) + { + EXPECT_EQ (c[i], 9.0); + } } -TEST(ContainerOperator,VectorMultiply) +TEST (ContainerOperator, VectorMultiply) { - std::vector a(20,10.0); - double b = 2.0; - std::vector c(20); - c = b*a; - for (int i=0;i a (20, 10.0); + double b = 2.0; + std::vector c (20); + c = b * a; + for (int i = 0; i < c.size (); i++) + { + EXPECT_EQ (c[i], 20.0); + } } -TEST(ContainerOperator,VectorLengthCheck) +TEST (ContainerOperator, VectorLengthCheck) { - std::vector a(20,10.0); - std::vector b(19,1.0); - std::vector c(20); - EXPECT_DEATH(c=a+b,""); - EXPECT_DEATH(c=a-b,""); + std::vector a (20, 10.0); + std::vector b (19, 1.0); + std::vector c (20); + EXPECT_DEATH (c = a + b, ""); + EXPECT_DEATH (c = a - b, ""); } -TEST(ContainerOperator,MapLengthCheck) +TEST (ContainerOperator, MapLengthCheck) { - std::map a; - std::map b; - std::map c; - for (int i=0;i<10;i++) - { - a.insert(std::pair (i, i*2.0)); - if (i<9) b.insert(std::pair (i, i*3.0)); - } - EXPECT_DEATH(c=a+b,""); - EXPECT_DEATH(c=a-b,""); + std::map a; + std::map b; + std::map c; + for (int i = 0; i < 10; i++) + { + a.insert (std::pair (i, i * 2.0)); + if (i < 9) + { + b.insert (std::pair (i, i * 3.0)); + } + } + EXPECT_DEATH (c = a + b, ""); + EXPECT_DEATH (c = a - b, ""); } -TEST(ContainerOperator,MapPlus) +TEST (ContainerOperator, MapPlus) { - std::map a; - std::map b; - std::map c; - for (int i=0;i<10;i++) - { - a.insert(std::pair (i, i*2.0)); - b.insert(std::pair (i, i*3.0)); - } - c = a+b; - for (int i=0;i<10;i++) - { - //std::cout << c[i] << std::endl; - EXPECT_EQ(c[i],i*5.0); - } + std::map a; + std::map b; + std::map c; + for (int i = 0; i < 10; i++) + { + a.insert (std::pair (i, i * 2.0)); + b.insert (std::pair (i, i * 3.0)); + } + c = a + b; + for (int i = 0; i < 10; i++) + { + // std::cout << c[i] << std::endl; + EXPECT_EQ (c[i], i * 5.0); + } } -TEST(ContainerOperator,MapMinus) +TEST (ContainerOperator, MapMinus) { - std::map a; - std::map b; - std::map c; - for (int i=0;i<10;i++) - { - a.insert(std::pair (i, i*4.0)); - b.insert(std::pair (i, i*2.0)); - } - c = a-b; - for (int i=0;i<10;i++) - { - //std::cout << c[i] << std::endl; - EXPECT_EQ(c[i],i*2.0); - } + std::map a; + std::map b; + std::map c; + for (int i = 0; i < 10; i++) + { + a.insert (std::pair (i, i * 4.0)); + b.insert (std::pair (i, i * 2.0)); + } + c = a - b; + for (int i = 0; i < 10; i++) + { + // std::cout << c[i] << std::endl; + EXPECT_EQ (c[i], i * 2.0); + } } -TEST(ContainerOperator,MapMultiply) +TEST (ContainerOperator, MapMultiply) { - std::map a; - double b = 3.0; - std::map c; - for (int i=0;i<10;i++) - { - a.insert(std::pair (i, i*4.0)); - } - c = b*a; - for (int i=0;i<10;i++) - { - //std::cout << c[i] << std::endl; - EXPECT_EQ(c[i],i*4.0*3.0); - } + std::map a; + double b = 3.0; + std::map c; + for (int i = 0; i < 10; i++) + { + a.insert (std::pair (i, i * 4.0)); + } + c = b * a; + for (int i = 0; i < 10; i++) + { + // std::cout << c[i] << std::endl; + EXPECT_EQ (c[i], i * 4.0 * 3.0); + } } diff --git a/source/source_base/test/cubic_spline_test.cpp b/source/source_base/test/cubic_spline_test.cpp index ffe5461dc06..2740e39560b 100644 --- a/source/source_base/test/cubic_spline_test.cpp +++ b/source/source_base/test/cubic_spline_test.cpp @@ -42,10 +42,9 @@ using BoundaryType = CubicSpline::BoundaryType; */ class CubicSplineTest : public ::testing::Test { -protected: - - CubicSplineTest(); - ~CubicSplineTest() = default; + protected: + CubicSplineTest (); + ~CubicSplineTest () = default; /// maximum number of knots int n_max_; @@ -84,7 +83,6 @@ class CubicSplineTest : public ::testing::Test /// y/dy/d2y tolerance for cross-check const double tol_[3] = {1e-14, 1e-13, 1e-12}; - /** * @brief Sample functions & derivatives in error bound check. * @@ -96,226 +94,196 @@ class CubicSplineTest : public ::testing::Test * Functions here should not include those kind of functions. * */ - std::vector>> f_; + std::vector>> f_; /// theoretical error bound for complete cubic spline - double error_bound( - int n, - const double* x, - const std::function& f, - int d = 0 - ) const; + double error_bound (int n, const double* x, const std::function& f, int d = 0) const; /// - void read( - const std::string& fname, - int& n, - double* x, - double* y, - BoundaryCondition& bc_start, - BoundaryCondition& bc_end, - int& n_interp, - double* x_interp, - double* y_interp, - double* dy_interp, - double* d2y_interp - ) const; + void read (const std::string& fname, + int& n, + double* x, + double* y, + BoundaryCondition& bc_start, + BoundaryCondition& bc_end, + int& n_interp, + double* x_interp, + double* y_interp, + double* dy_interp, + double* d2y_interp) const; }; +CubicSplineTest::CubicSplineTest () + : n_max_ (1000), spline_ (3 * n_max_), x_ (spline_.data ()), y_ (x_ + n_max_), dy_ (y_ + n_max_), + n_interp_max_ (1000), interp_ (7 * n_interp_max_), x_interp_ (interp_.data ()), + y_interp_ (x_interp_ + n_interp_max_), dy_interp_ (y_interp_ + n_interp_max_), + d2y_interp_ (dy_interp_ + n_interp_max_), y_ref_ (d2y_interp_ + n_interp_max_), dy_ref_ (y_ref_ + n_interp_max_), + d2y_ref_ (dy_ref_ + n_interp_max_), f_{ + { + [] (double x) { return std::sin (x); }, + [] (double x) { return std::cos (x); }, + [] (double x) { return -std::sin (x); }, + [] (double x) { return -std::cos (x); }, + [] (double x) { return std::sin (x); }, + }, + { + [] (double x) { return std::exp (-x); }, + [] (double x) { return -std::exp (-x); }, + [] (double x) { return std::exp (-x); }, + [] (double x) { return -std::exp (-x); }, + [] (double x) { return std::exp (-x); }, + }, + { + [] (double x) { return std::log (x); }, + [] (double x) { return 1.0 / x; }, + [] (double x) { return -1.0 / (x * x); }, + [] (double x) { return 2.0 / (x * x * x); }, + [] (double x) { return -6.0 / (x * x * x * x); }, + }, + } +{ +} -CubicSplineTest::CubicSplineTest(): - n_max_(1000), - spline_(3 * n_max_), - x_(spline_.data()), - y_(x_ + n_max_), - dy_(y_ + n_max_), - n_interp_max_(1000), - interp_(7 * n_interp_max_), - x_interp_(interp_.data()), - y_interp_(x_interp_ + n_interp_max_), - dy_interp_(y_interp_ + n_interp_max_), - d2y_interp_(dy_interp_ + n_interp_max_), - y_ref_(d2y_interp_ + n_interp_max_), - dy_ref_(y_ref_ + n_interp_max_), - d2y_ref_(dy_ref_ + n_interp_max_), - f_{ - { - [](double x) { return std::sin(x); }, - [](double x) { return std::cos(x); }, - [](double x) { return -std::sin(x); }, - [](double x) { return -std::cos(x); }, - [](double x) { return std::sin(x); }, - }, - { - [](double x) { return std::exp(-x); }, - [](double x) { return -std::exp(-x); }, - [](double x) { return std::exp(-x); }, - [](double x) { return -std::exp(-x); }, - [](double x) { return std::exp(-x); }, - }, - { - [](double x) { return std::log(x); }, - [](double x) { return 1.0 / x; }, - [](double x) { return -1.0 / (x * x); }, - [](double x) { return 2.0 / (x * x * x); }, - [](double x) { return -6.0 / (x * x * x * x); }, - }, - } -{} - - -double CubicSplineTest::error_bound( - int n, - const double* x, - const std::function& d4f, - int d -) const +double + CubicSplineTest::error_bound (int n, const double* x, const std::function& d4f, int d) const { - std::vector buffer(n); + std::vector buffer (n); - std::adjacent_difference(x, x + n, buffer.begin()); - double max_dx = *std::max_element(buffer.begin() + 1, buffer.end()); + std::adjacent_difference (x, x + n, buffer.begin ()); + double max_dx = *std::max_element (buffer.begin () + 1, buffer.end ()); - auto d4f_abs = [&d4f](double x) { return std::abs(d4f(x)); }; - std::transform(x, x + n, buffer.begin(), d4f_abs); - double max_d4f = *std::max_element(buffer.begin(), buffer.end()); + auto d4f_abs = [&d4f] (double x) { return std::abs (d4f (x)); }; + std::transform (x, x + n, buffer.begin (), d4f_abs); + double max_d4f = *std::max_element (buffer.begin (), buffer.end ()); // See Carl de Boor, "A Practical Guide to Splines", Chapter V. switch (d) - { + { case 0: - return 5.0 / 384.0 * std::pow(max_dx, 4) * max_d4f; + return 5.0 / 384.0 * std::pow (max_dx, 4) * max_d4f; case 1: - return 1.0 / 24.0 * std::pow(max_dx, 3) * max_d4f; + return 1.0 / 24.0 * std::pow (max_dx, 3) * max_d4f; case 2: - return 3.0 / 8.0 * std::pow(max_dx, 2) * max_d4f; + return 3.0 / 8.0 * std::pow (max_dx, 2) * max_d4f; default: - assert(false); // should not reach here - } + assert (false); // should not reach here + } } - -void CubicSplineTest::read( - const std::string& fname, - int& n, - double* x, - double* y, - BoundaryCondition& bc_start, - BoundaryCondition& bc_end, - int& n_interp, - double* x_interp, - double* y_interp, - double* dy_interp, - double* d2y_interp -) const +void + CubicSplineTest::read (const std::string& fname, + int& n, + double* x, + double* y, + BoundaryCondition& bc_start, + BoundaryCondition& bc_end, + int& n_interp, + double* x_interp, + double* y_interp, + double* dy_interp, + double* d2y_interp) const { - std::ifstream ifs(fname); - assert(ifs.is_open()); + std::ifstream ifs (fname); + assert (ifs.is_open ()); std::string line, bc1, bc2; // read boundary conditions - std::getline(ifs, line); - std::stringstream ss(line); + std::getline (ifs, line); + std::stringstream ss (line); ss >> bc1 >> bc2; - auto bc_parse = [](const std::string& bc) - { - if (bc == "periodic") - { - return BoundaryCondition(BoundaryType::periodic); - } - if (bc == "not-a-knot") + auto bc_parse = [] (const std::string& bc) { - return BoundaryCondition(BoundaryType::not_a_knot); - } - if (bc.find("first_deriv") != std::string::npos) - { - return BoundaryCondition(BoundaryType::first_deriv, - std::stod(bc.substr(12, std::string::npos))); - } - if (bc.find("second_deriv") != std::string::npos) - { - return BoundaryCondition(BoundaryType::second_deriv, - std::stod(bc.substr(13, std::string::npos))); - } - else - { - assert(false); - } - }; - - bc_start = bc_parse(bc1); - bc_end = bc_parse(bc2); + if (bc == "periodic") + { + return BoundaryCondition (BoundaryType::periodic); + } + if (bc == "not-a-knot") + { + return BoundaryCondition (BoundaryType::not_a_knot); + } + if (bc.find ("first_deriv") != std::string::npos) + { + return BoundaryCondition (BoundaryType::first_deriv, std::stod (bc.substr (12, std::string::npos))); + } + if (bc.find ("second_deriv") != std::string::npos) + { + return BoundaryCondition (BoundaryType::second_deriv, + std::stod (bc.substr (13, std::string::npos))); + } + else + { + assert (false); + } + }; + + bc_start = bc_parse (bc1); + bc_end = bc_parse (bc2); double* data[6] = {x, y, x_interp, y_interp, dy_interp, d2y_interp}; for (int i = 0; i < 6; ++i) - { - std::getline(ifs, line); - std::stringstream ss(line); - data[i] = std::copy(std::istream_iterator(ss), - std::istream_iterator(), data[i]); - } + { + std::getline (ifs, line); + std::stringstream ss (line); + data[i] = std::copy (std::istream_iterator (ss), std::istream_iterator (), data[i]); + } n = data[0] - x; n_interp = data[2] - x_interp; } - -TEST_F(CubicSplineTest, MultiEval) +TEST_F (CubicSplineTest, MultiEval) { int n = 100; double xmin = 0.1; double xmax = 10; double dx = (xmax - xmin) / (n - 1); - std::for_each(x_, x_ + n, [&](double& x) { x = xmin + (&x - x_) * dx; }); + std::for_each (x_, x_ + n, [&] (double& x) { x = xmin + (&x - x_) * dx; }); // empty interpolant with specified knots - CubicSpline cubspl(n, xmin, dx); - cubspl.reserve(f_.size()); - - for (size_t i = 0; i < f_.size(); ++i) - { - std::transform(x_, x_ + n, y_, [this, i](double x) { return f_[i][0](x); }); - cubspl.add(y_, {BoundaryType::first_deriv, f_[i][1](xmin)}, - {BoundaryType::first_deriv, f_[i][1](xmax)}); - } - - EXPECT_EQ(cubspl.n_spline(), f_.size()); - - std::vector> err_bound(f_.size(), std::vector(3)); - for (size_t i = 0; i < f_.size(); ++i) - { - err_bound[i][0] = error_bound(n, x_, f_[i][4], 0); - err_bound[i][1] = error_bound(n, x_, f_[i][4], 1); - err_bound[i][2] = error_bound(n, x_, f_[i][4], 2); - } + CubicSpline cubspl (n, xmin, dx); + cubspl.reserve (f_.size ()); + + for (size_t i = 0; i < f_.size (); ++i) + { + std::transform (x_, x_ + n, y_, [this, i] (double x) { return f_[i][0](x); }); + cubspl.add (y_, {BoundaryType::first_deriv, f_[i][1](xmin)}, {BoundaryType::first_deriv, f_[i][1](xmax)}); + } + + EXPECT_EQ (cubspl.n_spline (), f_.size ()); + + std::vector> err_bound (f_.size (), std::vector (3)); + for (size_t i = 0; i < f_.size (); ++i) + { + err_bound[i][0] = error_bound (n, x_, f_[i][4], 0); + err_bound[i][1] = error_bound (n, x_, f_[i][4], 1); + err_bound[i][2] = error_bound (n, x_, f_[i][4], 2); + } int n_interp = 1000; double dx_interp = (xmax - xmin) / (n_interp - 1); - std::for_each(x_interp_, x_interp_ + n_interp, - [&](double& x) { x = (&x - x_interp_) * dx_interp + xmin; }); + std::for_each (x_interp_, x_interp_ + n_interp, [&] (double& x) { x = (&x - x_interp_) * dx_interp + xmin; }); for (int p = 0; p < n_interp; ++p) - { - cubspl.multi_eval(x_interp_[p], y_interp_, dy_interp_, d2y_interp_); - double ytmp, dytmp, d2ytmp; - for (size_t i = 0; i < f_.size(); ++i) { - EXPECT_LT(std::abs(y_interp_[i] - f_[i][0](x_interp_[p])), err_bound[i][0]); - EXPECT_LT(std::abs(dy_interp_[i] - f_[i][1](x_interp_[p])), err_bound[i][1]); - EXPECT_LT(std::abs(d2y_interp_[i] - f_[i][2](x_interp_[p])), err_bound[i][2]); - - cubspl.eval(1, &x_interp_[p], &ytmp, &dytmp, &d2ytmp, i); - EXPECT_NEAR(ytmp, y_interp_[i], tol_[0]); - EXPECT_NEAR(dytmp, dy_interp_[i], tol_[1]); - EXPECT_NEAR(d2ytmp, d2y_interp_[i], tol_[2]); + cubspl.multi_eval (x_interp_[p], y_interp_, dy_interp_, d2y_interp_); + double ytmp, dytmp, d2ytmp; + for (size_t i = 0; i < f_.size (); ++i) + { + EXPECT_LT (std::abs (y_interp_[i] - f_[i][0](x_interp_[p])), err_bound[i][0]); + EXPECT_LT (std::abs (dy_interp_[i] - f_[i][1](x_interp_[p])), err_bound[i][1]); + EXPECT_LT (std::abs (d2y_interp_[i] - f_[i][2](x_interp_[p])), err_bound[i][2]); + + cubspl.eval (1, &x_interp_[p], &ytmp, &dytmp, &d2ytmp, i); + EXPECT_NEAR (ytmp, y_interp_[i], tol_[0]); + EXPECT_NEAR (dytmp, dy_interp_[i], tol_[1]); + EXPECT_NEAR (d2ytmp, d2y_interp_[i], tol_[2]); + } } - } } - -TEST_F(CubicSplineTest, ErrorBound) +TEST_F (CubicSplineTest, ErrorBound) { // Error bound formula used in this test correspond to the complete cubic // spline interpolant (exact first_deriv boundary conditions at both ends). @@ -326,95 +294,91 @@ TEST_F(CubicSplineTest, ErrorBound) double xmin = 0.1; double xmax = 10; - double rho0 = std::log(xmin); - double drho = (std::log(xmax) - rho0) / (n - 1); - std::for_each(x_, x_ + n, [&](double& x) { x = std::exp(rho0 + (&x - x_) * drho); }); + double rho0 = std::log (xmin); + double drho = (std::log (xmax) - rho0) / (n - 1); + std::for_each (x_, x_ + n, [&] (double& x) { x = std::exp (rho0 + (&x - x_) * drho); }); // places to evaluate the interpolant int n_interp = 777; double dx_interp = (xmax - xmin) / (n_interp - 1); - std::for_each(x_interp_, x_interp_ + n_interp, - [&](double& x) { x = (&x - x_interp_) * dx_interp + xmin; }); + std::for_each (x_interp_, x_interp_ + n_interp, [&] (double& x) { x = (&x - x_interp_) * dx_interp + xmin; }); // make sure x_interp is inside the range of x x_interp_[0] += tol_[0]; x_interp_[n_interp - 1] -= tol_[0]; - for (size_t i = 0; i < f_.size(); ++i) - { - std::transform(x_, x_ + n, y_, f_[i][0]); - - // complete cubic spline (exact first_deriv boundary conditions at both ends) - CubicSpline::build( - n, x_, y_, - {BoundaryType::first_deriv, f_[i][1](x_[0])}, - {BoundaryType::first_deriv, f_[i][1](x_[n - 1])}, - dy_ - ); - - CubicSpline::eval( - n, x_, y_, dy_, - n_interp, x_interp_, y_interp_, dy_interp_, d2y_interp_ - ); - - double* diff[3] = {y_interp_, dy_interp_, d2y_interp_}; - for (int d = 0; d < 3; ++d) + for (size_t i = 0; i < f_.size (); ++i) { - std::transform(x_interp_, x_interp_ + n_interp, diff[d], diff[d], - [&](double x, double y) { return std::abs(y - f_[i][d](x)); }); - - double err_bound = error_bound(n, x_, f_[i][4], d); - EXPECT_TRUE(std::all_of(diff[d], diff[d] + n_interp, - [err_bound](double diff) { return diff < err_bound; })); + std::transform (x_, x_ + n, y_, f_[i][0]); + + // complete cubic spline (exact first_deriv boundary conditions at both ends) + CubicSpline::build (n, + x_, + y_, + {BoundaryType::first_deriv, f_[i][1](x_[0])}, + {BoundaryType::first_deriv, f_[i][1](x_[n - 1])}, + dy_); + + CubicSpline::eval (n, x_, y_, dy_, n_interp, x_interp_, y_interp_, dy_interp_, d2y_interp_); + + double* diff[3] = {y_interp_, dy_interp_, d2y_interp_}; + for (int d = 0; d < 3; ++d) + { + std::transform (x_interp_, + x_interp_ + n_interp, + diff[d], + diff[d], + [&] (double x, double y) { return std::abs (y - f_[i][d](x)); }); + + double err_bound = error_bound (n, x_, f_[i][4], d); + EXPECT_TRUE (std::all_of (diff[d], + diff[d] + n_interp, + [err_bound] (double diff) { return diff < err_bound; })); + } } - } } - -TEST_F(CubicSplineTest, Reserve) +TEST_F (CubicSplineTest, Reserve) { int n_spline = 20; int n = 1000; double x0 = 0.0, dx = 0.01; for (int i = 0; i < n; ++i) - { - x_[i] = x0 + i * dx; - y_[i] = std::sin(x_[i]); - } - - CubicSpline cubspl(n, x0, dx, y_); - cubspl.reserve(n_spline); - EXPECT_EQ(cubspl.heap_usage(), n_spline * 2 * n * sizeof(double)); - - cubspl = CubicSpline(n, x_, y_); - cubspl.reserve(n_spline); - EXPECT_EQ(cubspl.heap_usage(), (1 + n_spline * 2) * n * sizeof(double)); -} + { + x_[i] = x0 + i * dx; + y_[i] = std::sin (x_[i]); + } + + CubicSpline cubspl (n, x0, dx, y_); + cubspl.reserve (n_spline); + EXPECT_EQ (cubspl.heap_usage (), n_spline * 2 * n * sizeof (double)); + cubspl = CubicSpline (n, x_, y_); + cubspl.reserve (n_spline); + EXPECT_EQ (cubspl.heap_usage (), (1 + n_spline * 2) * n * sizeof (double)); +} -TEST_F(CubicSplineTest, MinMax) +TEST_F (CubicSplineTest, MinMax) { int n = 1000; double x0 = 0.0, dx = 0.01; for (int i = 0; i < n; ++i) - { - x_[i] = x0 + i * dx; - y_[i] = std::sin(x_[i]); - } + { + x_[i] = x0 + i * dx; + y_[i] = std::sin (x_[i]); + } - CubicSpline cubspl(n, x_, y_); - EXPECT_EQ(cubspl.xmin(), x_[0]); - EXPECT_EQ(cubspl.xmax(), x_[n - 1]); + CubicSpline cubspl (n, x_, y_); + EXPECT_EQ (cubspl.xmin (), x_[0]); + EXPECT_EQ (cubspl.xmax (), x_[n - 1]); int m = 300; - cubspl = CubicSpline(m, x0, dx, y_); - EXPECT_EQ(cubspl.xmin(), x0); - EXPECT_EQ(cubspl.xmax(), x0 + (m - 1) * dx); - + cubspl = CubicSpline (m, x0, dx, y_); + EXPECT_EQ (cubspl.xmin (), x0); + EXPECT_EQ (cubspl.xmax (), x0 + (m - 1) * dx); } - -TEST_F(CubicSplineTest, CrossCheck) +TEST_F (CubicSplineTest, CrossCheck) { std::vector fnames = { "./data/sin_not_a_knot.dat", @@ -431,29 +395,27 @@ TEST_F(CubicSplineTest, CrossCheck) int n = 0, n_interp = 0; BoundaryCondition bc_start, bc_end; - for (const auto& fname : fnames) - { - read(fname, n, x_, y_, bc_start, bc_end, - n_interp, x_interp_, y_ref_, dy_ref_, d2y_ref_); - CubicSpline cubspl(n, x_, y_, bc_start, bc_end); - cubspl.eval(n_interp, x_interp_, y_interp_, dy_interp_, d2y_interp_); - - double* diff[] = {y_interp_, dy_interp_, d2y_interp_}; - double* ref[] = {y_ref_, dy_ref_, d2y_ref_}; - for (int d = 0; d < 3; ++d) + for (const auto& fname: fnames) { - std::transform(diff[d], diff[d] + n_interp, ref[d], diff[d], std::minus()); - EXPECT_TRUE(std::all_of(diff[d], diff[d] + n_interp, - [this, d](double diff) { return std::abs(diff) < tol_[d]; })); + read (fname, n, x_, y_, bc_start, bc_end, n_interp, x_interp_, y_ref_, dy_ref_, d2y_ref_); + CubicSpline cubspl (n, x_, y_, bc_start, bc_end); + cubspl.eval (n_interp, x_interp_, y_interp_, dy_interp_, d2y_interp_); + + double* diff[] = {y_interp_, dy_interp_, d2y_interp_}; + double* ref[] = {y_ref_, dy_ref_, d2y_ref_}; + for (int d = 0; d < 3; ++d) + { + std::transform (diff[d], diff[d] + n_interp, ref[d], diff[d], std::minus ()); + EXPECT_TRUE (std::all_of (diff[d], + diff[d] + n_interp, + [this, d] (double diff) { return std::abs (diff) < tol_[d]; })); + } } - } } - -int main() +int + main () { - ::testing::InitGoogleTest(); - return RUN_ALL_TESTS(); + ::testing::InitGoogleTest (); + return RUN_ALL_TESTS (); } - - diff --git a/source/source_base/test/element_basis_index_test.cpp b/source/source_base/test/element_basis_index_test.cpp index f844e32bed1..3e419208ddb 100644 --- a/source/source_base/test/element_basis_index_test.cpp +++ b/source/source_base/test/element_basis_index_test.cpp @@ -14,41 +14,39 @@ class IndexLNMTest : public testing::Test { - }; -TEST_F(IndexLNMTest,makeindex) +TEST_F (IndexLNMTest, makeindex) { ModuleBase::Element_Basis_Index::Range rangtest; - rangtest.resize(2); - rangtest[0].resize(3); - rangtest[1].resize(3); - rangtest[0][0].N=1; - rangtest[0][0].M=2; - rangtest[0][1].N=1; - rangtest[0][1].M=3; - rangtest[0][2].N=2; - rangtest[0][2].M=1; - rangtest[1][0].N=2; - rangtest[1][0].M=2; - rangtest[1][1].N=2; - rangtest[1][1].M=3; - rangtest[1][2].N=3; - rangtest[1][2].M=3; + rangtest.resize (2); + rangtest[0].resize (3); + rangtest[1].resize (3); + rangtest[0][0].N = 1; + rangtest[0][0].M = 2; + rangtest[0][1].N = 1; + rangtest[0][1].M = 3; + rangtest[0][2].N = 2; + rangtest[0][2].M = 1; + rangtest[1][0].N = 2; + rangtest[1][0].M = 2; + rangtest[1][1].N = 2; + rangtest[1][1].M = 3; + rangtest[1][2].N = 3; + rangtest[1][2].M = 3; ModuleBase::Element_Basis_Index::IndexLNM testindex; - testindex=ModuleBase::Element_Basis_Index::construct_index(rangtest); - EXPECT_EQ(rangtest[0][0].N,testindex[0][0].N); - EXPECT_EQ(rangtest[0][0].M,testindex[0][0].M); - EXPECT_EQ(rangtest[1][1].N,testindex[1][1].N); - EXPECT_EQ(rangtest[1][1].M,testindex[1][1].M); - EXPECT_EQ(rangtest[0][2].N,testindex[0][2].N); - EXPECT_EQ(rangtest[0][2].M,testindex[0][2].M); - EXPECT_EQ(testindex[0][0][0][0],0); - EXPECT_EQ(testindex[0][0][0][1],1); - EXPECT_EQ(testindex[0][1][0][0],2); - EXPECT_EQ(testindex[1][1][0][0],4); - EXPECT_EQ(testindex[1][2][0][1],11); - EXPECT_EQ(testindex[0].count_size,7); - EXPECT_EQ(testindex[1].count_size,19); + testindex = ModuleBase::Element_Basis_Index::construct_index (rangtest); + EXPECT_EQ (rangtest[0][0].N, testindex[0][0].N); + EXPECT_EQ (rangtest[0][0].M, testindex[0][0].M); + EXPECT_EQ (rangtest[1][1].N, testindex[1][1].N); + EXPECT_EQ (rangtest[1][1].M, testindex[1][1].M); + EXPECT_EQ (rangtest[0][2].N, testindex[0][2].N); + EXPECT_EQ (rangtest[0][2].M, testindex[0][2].M); + EXPECT_EQ (testindex[0][0][0][0], 0); + EXPECT_EQ (testindex[0][0][0][1], 1); + EXPECT_EQ (testindex[0][1][0][0], 2); + EXPECT_EQ (testindex[1][1][0][0], 4); + EXPECT_EQ (testindex[1][2][0][1], 11); + EXPECT_EQ (testindex[0].count_size, 7); + EXPECT_EQ (testindex[1].count_size, 19); } - diff --git a/source/source_base/test/formatter_test.cpp b/source/source_base/test/formatter_test.cpp index 8d1ad9e8cba..c0593dc1f38 100644 --- a/source/source_base/test/formatter_test.cpp +++ b/source/source_base/test/formatter_test.cpp @@ -3,206 +3,207 @@ #include #include -TEST(FormatterTest, FmtCoreStaticFormat) { +TEST (FormatterTest, FmtCoreStaticFormat) +{ // const char* - std::string result = FmtCore::format("Hello, %s!", "world"); + std::string result = FmtCore::format ("Hello, %s!", "world"); // remove the last '\0' character - EXPECT_EQ(result, "Hello, world!"); + EXPECT_EQ (result, "Hello, world!"); // std::string - result = FmtCore::format("Hello, %s!", std::string("world")); - EXPECT_EQ(result, "Hello, world!"); + result = FmtCore::format ("Hello, %s!", std::string ("world")); + EXPECT_EQ (result, "Hello, world!"); // int - result = FmtCore::format("Hello, %d!", 123); - EXPECT_EQ(result, "Hello, 123!"); + result = FmtCore::format ("Hello, %d!", 123); + EXPECT_EQ (result, "Hello, 123!"); // float - result = FmtCore::format("Hello, %f!", 123.456); - EXPECT_EQ(result, "Hello, 123.456000!"); + result = FmtCore::format ("Hello, %f!", 123.456); + EXPECT_EQ (result, "Hello, 123.456000!"); // char - result = FmtCore::format("Hello, %c!", 'a'); - EXPECT_EQ(result, "Hello, a!"); + result = FmtCore::format ("Hello, %c!", 'a'); + EXPECT_EQ (result, "Hello, a!"); // invalid format - result = FmtCore::format("Hello, %z!", "world"); - EXPECT_EQ(result, "Hello, %!"); + result = FmtCore::format ("Hello, %z!", "world"); + EXPECT_EQ (result, "Hello, %!"); // varadic template case - result = FmtCore::format("Hello, %s, %d, %f, %c!", "world", 123, 123.456, 'a'); - EXPECT_EQ(result, "Hello, world, 123, 123.456000, a!"); + result = FmtCore::format ("Hello, %s, %d, %f, %c!", "world", 123, 123.456, 'a'); + EXPECT_EQ (result, "Hello, world, 123, 123.456000, a!"); } -TEST(FormatterTest, FmtCoreDynamic) +TEST (FormatterTest, FmtCoreDynamic) { - FmtCore fmt("Hello, %s!"); - EXPECT_EQ(fmt.fmt(), "Hello, %s!"); - std::string result = fmt.format(std::string("world")); - EXPECT_EQ(result, "Hello, world!"); + FmtCore fmt ("Hello, %s!"); + EXPECT_EQ (fmt.fmt (), "Hello, %s!"); + std::string result = fmt.format (std::string ("world")); + EXPECT_EQ (result, "Hello, world!"); - fmt.reset("Hello, %d!"); - EXPECT_EQ(fmt.fmt(), "Hello, %d!"); - result = fmt.format(123); - EXPECT_EQ(result, "Hello, 123!"); + fmt.reset ("Hello, %d!"); + EXPECT_EQ (fmt.fmt (), "Hello, %d!"); + result = fmt.format (123); + EXPECT_EQ (result, "Hello, 123!"); - fmt.reset("Hello, %f!"); - EXPECT_EQ(fmt.fmt(), "Hello, %f!"); - result = fmt.format(123.456); - EXPECT_EQ(result, "Hello, 123.456000!"); + fmt.reset ("Hello, %f!"); + EXPECT_EQ (fmt.fmt (), "Hello, %f!"); + result = fmt.format (123.456); + EXPECT_EQ (result, "Hello, 123.456000!"); - fmt.reset("Hello, %c!"); - EXPECT_EQ(fmt.fmt(), "Hello, %c!"); - result = fmt.format('a'); - EXPECT_EQ(result, "Hello, a!"); + fmt.reset ("Hello, %c!"); + EXPECT_EQ (fmt.fmt (), "Hello, %c!"); + result = fmt.format ('a'); + EXPECT_EQ (result, "Hello, a!"); // varadic template case - fmt.reset("Hello, %s, %d, %f, %c!"); - EXPECT_EQ(fmt.fmt(), "Hello, %s, %d, %f, %c!"); - result = fmt.format(std::string("world"), 123, 123.456, 'a'); - EXPECT_EQ(result, "Hello, world, 123, 123.456000, a!"); + fmt.reset ("Hello, %s, %d, %f, %c!"); + EXPECT_EQ (fmt.fmt (), "Hello, %s, %d, %f, %c!"); + result = fmt.format (std::string ("world"), 123, 123.456, 'a'); + EXPECT_EQ (result, "Hello, world, 123, 123.456000, a!"); } -TEST(FormatterTest, FmtPyStrFuncSplit) +TEST (FormatterTest, FmtPyStrFuncSplit) { std::string fmt = "Hello, %s, %d, %f, %c!"; // default delimiter, whitespace - std::vector result = FmtCore::split(fmt); + std::vector result = FmtCore::split (fmt); std::vector ref = {"Hello,", "%s,", "%d,", "%f,", "%c!"}; - for(int i = 0; i < result.size(); i++) - { - EXPECT_EQ(result[i], ref[i]); - } + for (int i = 0; i < result.size (); i++) + { + EXPECT_EQ (result[i], ref[i]); + } fmt = "Hello, %s, %d, %f, %c"; // other delimiter - result = FmtCore::split(fmt, "%"); + result = FmtCore::split (fmt, "%"); ref = {"Hello, ", "s, ", "d, ", "f, ", "c"}; - for(int i = 0; i < result.size(); i++) - { - EXPECT_EQ(result[i], ref[i]); - } + for (int i = 0; i < result.size (); i++) + { + EXPECT_EQ (result[i], ref[i]); + } // really string case, multiple chars - result = FmtCore::split(fmt, ", %"); + result = FmtCore::split (fmt, ", %"); ref = {"Hello", "s", "d", "f", "c"}; - for(int i = 0; i < result.size(); i++) - { - EXPECT_EQ(result[i], ref[i]); - } + for (int i = 0; i < result.size (); i++) + { + EXPECT_EQ (result[i], ref[i]); + } // no such delimiter - result = FmtCore::split(fmt, "z"); + result = FmtCore::split (fmt, "z"); ref = {"Hello, %s, %d, %f, %c"}; - for(int i = 0; i < result.size(); i++) - { - EXPECT_EQ(result[i], ref[i]); - } + for (int i = 0; i < result.size (); i++) + { + EXPECT_EQ (result[i], ref[i]); + } // multiple delimiters exist fmt = "Hello, %s, %d, %f, %c!"; - result = FmtCore::split(fmt); + result = FmtCore::split (fmt); ref = {"Hello,", "%s,", "%d,", "%f,", "%c!"}; - for(int i = 0; i < result.size(); i++) - { - EXPECT_EQ(result[i], ref[i]); - } - result = FmtCore::split(fmt, " "); + for (int i = 0; i < result.size (); i++) + { + EXPECT_EQ (result[i], ref[i]); + } + result = FmtCore::split (fmt, " "); ref = {"Hello,", "", "", "", "", "", "", "%s,", "", "%d,", "", "", "", "%f,", "", "%c!"}; - for(int i = 0; i < result.size(); i++) - { - EXPECT_EQ(result[i], ref[i]); - } + for (int i = 0; i < result.size (); i++) + { + EXPECT_EQ (result[i], ref[i]); + } } -TEST(FormatterTest, FmtPyStrFuncStartswith) +TEST (FormatterTest, FmtPyStrFuncStartswith) { const std::string fmt = "Hello, %s, %d, %f, %c!"; - EXPECT_TRUE(FmtCore::startswith(fmt, "Hello")); - EXPECT_FALSE(FmtCore::startswith(fmt, "world")); + EXPECT_TRUE (FmtCore::startswith (fmt, "Hello")); + EXPECT_FALSE (FmtCore::startswith (fmt, "world")); } -TEST(FormatterTest, FmtPyStrFuncEndswith) +TEST (FormatterTest, FmtPyStrFuncEndswith) { const std::string fmt = "Hello, %s, %d, %f, %c!"; - EXPECT_TRUE(FmtCore::endswith(fmt, "!")); - EXPECT_FALSE(FmtCore::endswith(fmt, "world")); + EXPECT_TRUE (FmtCore::endswith (fmt, "!")); + EXPECT_FALSE (FmtCore::endswith (fmt, "world")); } -TEST(FormatterTest, FmtPyStrFuncStrip) +TEST (FormatterTest, FmtPyStrFuncStrip) { std::string fmt = " Hello, %s, %d, %f, %c! "; - std::string result = FmtCore::strip(fmt); + std::string result = FmtCore::strip (fmt); std::string ref = "Hello, %s, %d, %f, %c!"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); fmt = " Hello, %s, %d, %f, %c! "; - result = FmtCore::strip(fmt, " "); + result = FmtCore::strip (fmt, " "); ref = "Hello, %s, %d, %f, %c!"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); fmt = ""; - result = FmtCore::strip(fmt); + result = FmtCore::strip (fmt); ref = ""; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); fmt = " "; - result = FmtCore::strip(fmt); + result = FmtCore::strip (fmt); ref = ""; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -TEST(FormatterTest, FmtPyStrFuncCenter) +TEST (FormatterTest, FmtPyStrFuncCenter) { std::string fmt = "Hello, %s, %d, %f, %c!"; - std::string result = FmtCore::center(fmt, 30); + std::string result = FmtCore::center (fmt, 30); std::string ref = " Hello, %s, %d, %f, %c! "; - EXPECT_EQ(result, ref); - result = FmtCore::center(fmt, 30, '*'); + EXPECT_EQ (result, ref); + result = FmtCore::center (fmt, 30, '*'); ref = "****Hello, %s, %d, %f, %c!****"; fmt = "Hello, %s, %d, %f, %c"; // length 21 - result = FmtCore::center(fmt, 30, '*'); + result = FmtCore::center (fmt, 30, '*'); ref = "****Hello, %s, %d, %f, %c*****"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); fmt = ""; - result = FmtCore::center(fmt, 30, '*'); + result = FmtCore::center (fmt, 30, '*'); ref = "******************************"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -TEST(FormatterTest, FmtPyStrFuncReplace) +TEST (FormatterTest, FmtPyStrFuncReplace) { const std::string fmt = "Hello, %s, %d, %f, %c!"; - std::string result = FmtCore::replace(fmt, "%s", "world"); + std::string result = FmtCore::replace (fmt, "%s", "world"); std::string ref = "Hello, world, %d, %f, %c!"; - EXPECT_EQ(result, ref); - result = FmtCore::replace(fmt, "%d", "world"); + EXPECT_EQ (result, ref); + result = FmtCore::replace (fmt, "%d", "world"); ref = "Hello, %s, world, %f, %c!"; - EXPECT_EQ(result, ref); - result = FmtCore::replace(fmt, "%f", "world"); + EXPECT_EQ (result, ref); + result = FmtCore::replace (fmt, "%f", "world"); ref = "Hello, %s, %d, world, %c!"; - EXPECT_EQ(result, ref); - result = FmtCore::replace(fmt, "%c", "world"); + EXPECT_EQ (result, ref); + result = FmtCore::replace (fmt, "%c", "world"); ref = "Hello, %s, %d, %f, world!"; - EXPECT_EQ(result, ref); - result = FmtCore::replace(fmt, "%z", "world"); + EXPECT_EQ (result, ref); + result = FmtCore::replace (fmt, "%z", "world"); ref = "Hello, %s, %d, %f, %c!"; - EXPECT_EQ(result, ref); - result = FmtCore::replace(fmt, "%", "world"); + EXPECT_EQ (result, ref); + result = FmtCore::replace (fmt, "%", "world"); ref = "Hello, worlds, worldd, worldf, worldc!"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -TEST(FormatterTest, FmtPyStrFuncJoin) +TEST (FormatterTest, FmtPyStrFuncJoin) { const std::vector strs = {"Hello", "world", "!"}; - std::string result = FmtCore::join("", strs); + std::string result = FmtCore::join ("", strs); std::string ref = "Helloworld!"; - EXPECT_EQ(result, ref); - result = FmtCore::join(" ", strs); + EXPECT_EQ (result, ref); + result = FmtCore::join (" ", strs); ref = "Hello world !"; - EXPECT_EQ(result, ref); - result = FmtCore::join("__", strs); + EXPECT_EQ (result, ref); + result = FmtCore::join ("__", strs); ref = "Hello__world__!"; } -TEST(FormatterTest, FmtTableDefaultArgs) +TEST (FormatterTest, FmtTableDefaultArgs) { const std::vector titles = {"title1", "t i t l e 2", "t-i-t-l-e-3"}; const std::vector fmts = {"%s", "%d", "%f"}; - FmtTable table(titles, 5, fmts); + FmtTable table (titles, 5, fmts); const std::vector col1 = {"row1", "row2", "row3", "row4", "row5"}; const std::vector col2 = {1, 2, 3, 4, 5}; const std::vector col3 = {1.1, 2.2, 3.3, 4.4, 5.5}; table << col1 << col2 << col3; - const std::string result = table.str(); + const std::string result = table.str (); std::cout << result << std::endl; std::string ref = ""; ref += "--------------------------------\n"; @@ -214,19 +215,19 @@ TEST(FormatterTest, FmtTableDefaultArgs) ref += " row4 4 4.400000 \n"; ref += " row5 5 5.500000 \n"; ref += "--------------------------------\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -TEST(FormatterTest, FmtTableHeadless) +TEST (FormatterTest, FmtTableHeadless) { const std::vector titles = {"", "", ""}; const std::vector fmts = {"%s", "%d", "%f"}; - FmtTable table(titles, 5, fmts); + FmtTable table (titles, 5, fmts); const std::vector col1 = {"row1", "row2", "row3", "row4", "row5"}; const std::vector col2 = {1, 2, 3, 4, 5}; const std::vector col3 = {1.1, 2.2, 3.3, 4.4, 5.5}; table << col1 << col2 << col3; - const std::string result = table.str(); + const std::string result = table.str (); std::cout << result << std::endl; std::string ref = ""; ref += "-----------------\n"; @@ -236,10 +237,10 @@ TEST(FormatterTest, FmtTableHeadless) ref += " row4 4 4.400000 \n"; ref += " row5 5 5.500000 \n"; ref += "-----------------\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -TEST(FormatterTest, FmtTableCustomArgsAlign) +TEST (FormatterTest, FmtTableCustomArgsAlign) { // shared data std::vector titles = {"title1", "t i t l e 2", "t-i-t-l-e-3"}; @@ -248,9 +249,9 @@ TEST(FormatterTest, FmtTableCustomArgsAlign) std::vector col2 = {1, 2, 3, 4, 5}; std::vector col3 = {1.1, 2.2, 3.3, 4.4, 5.5}; // align: l and l - FmtTable table(titles, 5, fmts, 0, {FmtTable::Align::LEFT, FmtTable::Align::LEFT}); + FmtTable table (titles, 5, fmts, 0, {FmtTable::Align::LEFT, FmtTable::Align::LEFT}); table << col1 << col2 << col3; - std::string result = table.str(); + std::string result = table.str (); std::cout << result << std::endl; std::string ref = ""; ref += "--------------------------------\n"; @@ -262,12 +263,12 @@ TEST(FormatterTest, FmtTableCustomArgsAlign) ref += " row4 4 4.400000 \n"; ref += " row5 5 5.500000 \n"; ref += "--------------------------------\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); // align: r and r - FmtTable table2(titles, 5, fmts, 0, {FmtTable::Align::RIGHT, FmtTable::Align::RIGHT}); + FmtTable table2 (titles, 5, fmts, 0, {FmtTable::Align::RIGHT, FmtTable::Align::RIGHT}); table2 << col1 << col2 << col3; - result = table2.str(); + result = table2.str (); std::cout << result << std::endl; ref = ""; ref += "--------------------------------\n"; @@ -279,12 +280,12 @@ TEST(FormatterTest, FmtTableCustomArgsAlign) ref += " row4 4 4.400000 \n"; ref += " row5 5 5.500000 \n"; ref += "--------------------------------\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); // align: l and r - FmtTable table3(titles, 5, fmts, 0, {FmtTable::Align::RIGHT, FmtTable::Align::LEFT}); + FmtTable table3 (titles, 5, fmts, 0, {FmtTable::Align::RIGHT, FmtTable::Align::LEFT}); table3 << col1 << col2 << col3; - result = table3.str(); + result = table3.str (); std::cout << result << std::endl; ref = ""; ref += "--------------------------------\n"; @@ -296,12 +297,12 @@ TEST(FormatterTest, FmtTableCustomArgsAlign) ref += " row4 4 4.400000 \n"; ref += " row5 5 5.500000 \n"; ref += "--------------------------------\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); // align: r and l - FmtTable table4(titles, 5, fmts, 0, {FmtTable::Align::LEFT, FmtTable::Align::RIGHT}); + FmtTable table4 (titles, 5, fmts, 0, {FmtTable::Align::LEFT, FmtTable::Align::RIGHT}); table4 << col1 << col2 << col3; - result = table4.str(); + result = table4.str (); std::cout << result << std::endl; ref = ""; ref += "--------------------------------\n"; @@ -313,10 +314,10 @@ TEST(FormatterTest, FmtTableCustomArgsAlign) ref += " row4 4 4.400000 \n"; ref += " row5 5 5.500000 \n"; ref += "--------------------------------\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -TEST(FormatterTest, FmtTableCustomArgsAlignFrame) +TEST (FormatterTest, FmtTableCustomArgsAlignFrame) { // shared data std::vector titles = {"title1", "t i t l e 2", "t-i-t-l-e-3"}; @@ -325,14 +326,9 @@ TEST(FormatterTest, FmtTableCustomArgsAlignFrame) std::vector col2 = {1, 2, 3, 4, 5}; std::vector col3 = {1.1, 2.2, 3.3, 4.4, 5.5}; - FmtTable table1(titles, - 5, - fmts, - 0, - {FmtTable::Align::LEFT, FmtTable::Align::LEFT}, - {'+', '?', '*', '.', '^'}); + FmtTable table1 (titles, 5, fmts, 0, {FmtTable::Align::LEFT, FmtTable::Align::LEFT}, {'+', '?', '*', '.', '^'}); table1 << col1 << col2 << col3; - std::string result = table1.str(); + std::string result = table1.str (); std::cout << result << std::endl; std::string ref = ""; ref += "++++++++++++++++++++++++++++++++\n"; @@ -344,10 +340,10 @@ TEST(FormatterTest, FmtTableCustomArgsAlignFrame) ref += ".row4 4 4.400000 ^\n"; ref += ".row5 5 5.500000 ^\n"; ref += "********************************\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -TEST(FormatterTest, FmtTableCustomArgsAlignFrameDelim) +TEST (FormatterTest, FmtTableCustomArgsAlignFrameDelim) { // shared data std::vector titles = {"title1", "t i t l e 2", "t-i-t-l-e-3"}; @@ -355,15 +351,15 @@ TEST(FormatterTest, FmtTableCustomArgsAlignFrameDelim) std::vector col1 = {"row1", "row2", "row3", "row4", "row5"}; std::vector col2 = {1, 2, 3, 4, 5}; std::vector col3 = {1.1, 2.2, 3.3, 4.4, 5.5}; - FmtTable table1(titles, - 5, - fmts, - 0, - {FmtTable::Align::LEFT, FmtTable::Align::LEFT}, - {'=', '/', '&', '#', '%'}, - {'"', ']'}); + FmtTable table1 (titles, + 5, + fmts, + 0, + {FmtTable::Align::LEFT, FmtTable::Align::LEFT}, + {'=', '/', '&', '#', '%'}, + {'"', ']'}); table1 << col1 << col2 << col3; - std::string result = table1.str(); + std::string result = table1.str (); std::cout << result << std::endl; std::string ref = ""; ref += "================================\n"; @@ -375,19 +371,19 @@ TEST(FormatterTest, FmtTableCustomArgsAlignFrameDelim) ref += "#row4 ]4 ]4.400000 %\n"; ref += "#row5 ]5 ]5.500000 %\n"; ref += "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -TEST(FormatterTest, FmtTableCustomIndent) +TEST (FormatterTest, FmtTableCustomIndent) { const std::vector titles = {"title1", "t i t l e 2", "t-i-t-l-e-3"}; const std::vector fmts = {"%s", "%d", "%f"}; - FmtTable table(titles, 5, fmts, 4); + FmtTable table (titles, 5, fmts, 4); const std::vector col1 = {"row1", "row2", "row3", "row4", "row5"}; const std::vector col2 = {1, 2, 3, 4, 5}; const std::vector col3 = {1.1, 2.2, 3.3, 4.4, 5.5}; table << col1 << col2 << col3; - const std::string result = table.str(); + const std::string result = table.str (); std::cout << result << std::endl; std::string ref = ""; ref += " --------------------------------\n"; @@ -399,10 +395,12 @@ TEST(FormatterTest, FmtTableCustomIndent) ref += " row4 4 4.400000 \n"; ref += " row5 5 5.500000 \n"; ref += " --------------------------------\n"; - EXPECT_EQ(result, ref); + EXPECT_EQ (result, ref); } -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); +int + main (int argc, char** argv) +{ + ::testing::InitGoogleTest (&argc, argv); + return RUN_ALL_TESTS (); } \ No newline at end of file diff --git a/source/source_base/test/global_file_test.cpp b/source/source_base/test/global_file_test.cpp index 338070ecfa7..767c6f247e0 100644 --- a/source/source_base/test/global_file_test.cpp +++ b/source/source_base/test/global_file_test.cpp @@ -26,149 +26,150 @@ class GlobalFile : public testing::Test { - protected: - void SetUp() override - { - const std::string suffix = "Si"; - PARAM.sys.global_out_dir = "OUT." + suffix + "/"; - PARAM.sys.global_stru_dir = PARAM.sys.global_out_dir + "STRU/"; - PARAM.sys.global_matrix_dir = PARAM.sys.global_out_dir + "matrix/"; - PARAM.sys.global_wfc_dir = PARAM.sys.global_out_dir + "WFC/"; - PARAM.sys.global_mlkedf_descriptor_dir = PARAM.sys.global_out_dir + "MLKEDF_Descriptors/"; - PARAM.sys.global_deepks_label_elec_dir = PARAM.sys.global_out_dir + "DeePKS_Labels_Elec/"; - } + protected: + void + SetUp () override + { + const std::string suffix = "Si"; + PARAM.sys.global_out_dir = "OUT." + suffix + "/"; + PARAM.sys.global_stru_dir = PARAM.sys.global_out_dir + "STRU/"; + PARAM.sys.global_matrix_dir = PARAM.sys.global_out_dir + "matrix/"; + PARAM.sys.global_wfc_dir = PARAM.sys.global_out_dir + "WFC/"; + PARAM.sys.global_mlkedf_descriptor_dir = PARAM.sys.global_out_dir + "MLKEDF_Descriptors/"; + PARAM.sys.global_deepks_label_elec_dir = PARAM.sys.global_out_dir + "DeePKS_Labels_Elec/"; + } }; -TEST_F(GlobalFile,mkdirout) +TEST_F (GlobalFile, mkdirout) { - std::string output; - testing::internal::CaptureStdout(); - PARAM.sys.log_file = "running_m_1.log"; - ModuleBase::Global_File::make_dir_out("Si","m",false,false,0,true,true); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("MAKE THE DIR")); - GlobalV::ofs_warning.close(); - EXPECT_TRUE(GlobalV::ofs_running.is_open()); - GlobalV::ofs_running.close(); - std::string dd = "OUT.Si/running_m_1.log"; - remove(dd.c_str()); + std::string output; + testing::internal::CaptureStdout (); + PARAM.sys.log_file = "running_m_1.log"; + ModuleBase::Global_File::make_dir_out ("Si", "m", false, false, 0, true, true); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("MAKE THE DIR")); + GlobalV::ofs_warning.close (); + EXPECT_TRUE (GlobalV::ofs_running.is_open ()); + GlobalV::ofs_running.close (); + std::string dd = "OUT.Si/running_m_1.log"; + remove (dd.c_str ()); - testing::internal::CaptureStdout(); - PARAM.sys.log_file = "running_md.log"; - ModuleBase::Global_File::make_dir_out("Si","md",false,false,0,true,false); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("MAKE THE STRU DIR")); - EXPECT_TRUE(GlobalV::ofs_running.is_open()); - GlobalV::ofs_running.close(); - GlobalV::ofs_warning.close(); - std::string bb = "OUT.Si/running_md.log"; - remove(bb.c_str()); + testing::internal::CaptureStdout (); + PARAM.sys.log_file = "running_md.log"; + ModuleBase::Global_File::make_dir_out ("Si", "md", false, false, 0, true, false); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("MAKE THE STRU DIR")); + EXPECT_TRUE (GlobalV::ofs_running.is_open ()); + GlobalV::ofs_running.close (); + GlobalV::ofs_warning.close (); + std::string bb = "OUT.Si/running_md.log"; + remove (bb.c_str ()); - testing::internal::CaptureStdout(); - PARAM.sys.log_file = "running_md_1.log"; - ModuleBase::Global_File::make_dir_out("Si","md",true,false,0,true,true); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("MAKE THE MATRIX DIR")); - EXPECT_TRUE(GlobalV::ofs_running.is_open()); - GlobalV::ofs_running.close(); - std::string cc = "OUT.Si/running_md_1.log"; - remove(cc.c_str()); - std::string aa = "OUT.Si/warning.log"; - remove(aa.c_str()); - rmdir(PARAM.sys.global_stru_dir.c_str()); - rmdir(PARAM.sys.global_matrix_dir.c_str()); - rmdir(PARAM.sys.global_out_dir.c_str()); + testing::internal::CaptureStdout (); + PARAM.sys.log_file = "running_md_1.log"; + ModuleBase::Global_File::make_dir_out ("Si", "md", true, false, 0, true, true); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("MAKE THE MATRIX DIR")); + EXPECT_TRUE (GlobalV::ofs_running.is_open ()); + GlobalV::ofs_running.close (); + std::string cc = "OUT.Si/running_md_1.log"; + remove (cc.c_str ()); + std::string aa = "OUT.Si/warning.log"; + remove (aa.c_str ()); + rmdir (PARAM.sys.global_stru_dir.c_str ()); + rmdir (PARAM.sys.global_matrix_dir.c_str ()); + rmdir (PARAM.sys.global_out_dir.c_str ()); } -TEST_F(GlobalFile,mkdiratom) +TEST_F (GlobalFile, mkdiratom) { - PARAM.sys.global_out_dir = "./"; - ModuleBase::Global_File::make_dir_atom("Si"); - int a = access("./Si/",0); - EXPECT_EQ(a , 0); - std::string ss = "./Si/"; - rmdir(ss.c_str()); + PARAM.sys.global_out_dir = "./"; + ModuleBase::Global_File::make_dir_atom ("Si"); + int a = access ("./Si/", 0); + EXPECT_EQ (a, 0); + std::string ss = "./Si/"; + rmdir (ss.c_str ()); } -TEST_F(GlobalFile,openlog) +TEST_F (GlobalFile, openlog) { - PARAM.sys.global_out_dir = "./"; - std::ofstream ofs; - ModuleBase::Global_File::open_log(ofs,"Si.log","md",true); - EXPECT_TRUE(ofs.is_open()); - ofs.close(); - ModuleBase::Global_File::open_log(ofs,"Si.log","md",false); - EXPECT_TRUE(ofs.is_open()); - ofs.close(); - std::string sss = "Si.log"; - remove(sss.c_str()); + PARAM.sys.global_out_dir = "./"; + std::ofstream ofs; + ModuleBase::Global_File::open_log (ofs, "Si.log", "md", true); + EXPECT_TRUE (ofs.is_open ()); + ofs.close (); + ModuleBase::Global_File::open_log (ofs, "Si.log", "md", false); + EXPECT_TRUE (ofs.is_open ()); + ofs.close (); + std::string sss = "Si.log"; + remove (sss.c_str ()); } -TEST_F(GlobalFile,closelog) +TEST_F (GlobalFile, closelog) { - std::ofstream ofs; - std::string sss = "Si.log"; - ofs.open(sss.c_str()); - ModuleBase::Global_File::close_log(ofs, sss); - EXPECT_FALSE(ofs.is_open()); - if (ofs.is_open()) - { - ofs.close(); - } - remove(sss.c_str()); + std::ofstream ofs; + std::string sss = "Si.log"; + ofs.open (sss.c_str ()); + ModuleBase::Global_File::close_log (ofs, sss); + EXPECT_FALSE (ofs.is_open ()); + if (ofs.is_open ()) + { + ofs.close (); + } + remove (sss.c_str ()); } -TEST_F(GlobalFile,closealllog) +TEST_F (GlobalFile, closealllog) { - /* - For source_io/input.cpp:line3578 close_log() is a void function, - All its contents is calling close_all_log() in source_base/global_file.cpp - For Input::close_log() what is left to test are the validities of parameters - GlobalV::MY_RANK and this->out_alllog. - */ - /* Test out_alllog == true case */ - std::string header = "running_"; - std::string tailCpuRank0 = "_cpu0.log"; - std::string tail = ".log"; - std::string f1 = header + PARAM.input.calculation + tailCpuRank0; - - if (GlobalV::ofs_running.is_open()) - { - GlobalV::ofs_running.close(); - } - if (GlobalV::ofs_warning.is_open()) - { - GlobalV::ofs_warning.close(); - } - GlobalV::ofs_running.open(f1.c_str()); - GlobalV::ofs_warning.open("warning.log"); - ModuleBase::Global_File::close_all_log(0,true,PARAM.input.calculation); - EXPECT_FALSE(GlobalV::ofs_running.is_open()); - if (GlobalV::ofs_running.is_open()) - { - GlobalV::ofs_running.close(); - } - EXPECT_FALSE(GlobalV::ofs_warning.is_open()); - if (GlobalV::ofs_warning.is_open()) - { - GlobalV::ofs_warning.close(); - } - remove(f1.c_str()); - //remove("warning.log"); - /* Test out_alllog == false case */ - GlobalV::ofs_running.open("running.log"); - GlobalV::ofs_warning.open("warning.log"); - ModuleBase::Global_File::close_all_log(0,false,PARAM.input.calculation); - EXPECT_FALSE(GlobalV::ofs_running.is_open()); - if (GlobalV::ofs_running.is_open()) - { - GlobalV::ofs_running.close(); - } - EXPECT_FALSE(GlobalV::ofs_warning.is_open()); - if (GlobalV::ofs_warning.is_open()) - { - GlobalV::ofs_warning.close(); - } - remove("running.log"); - remove("warning.log"); + /* + For source_io/input.cpp:line3578 close_log() is a void function, + All its contents is calling close_all_log() in source_base/global_file.cpp + For Input::close_log() what is left to test are the validities of parameters + GlobalV::MY_RANK and this->out_alllog. + */ + /* Test out_alllog == true case */ + std::string header = "running_"; + std::string tailCpuRank0 = "_cpu0.log"; + std::string tail = ".log"; + std::string f1 = header + PARAM.input.calculation + tailCpuRank0; + + if (GlobalV::ofs_running.is_open ()) + { + GlobalV::ofs_running.close (); + } + if (GlobalV::ofs_warning.is_open ()) + { + GlobalV::ofs_warning.close (); + } + GlobalV::ofs_running.open (f1.c_str ()); + GlobalV::ofs_warning.open ("warning.log"); + ModuleBase::Global_File::close_all_log (0, true, PARAM.input.calculation); + EXPECT_FALSE (GlobalV::ofs_running.is_open ()); + if (GlobalV::ofs_running.is_open ()) + { + GlobalV::ofs_running.close (); + } + EXPECT_FALSE (GlobalV::ofs_warning.is_open ()); + if (GlobalV::ofs_warning.is_open ()) + { + GlobalV::ofs_warning.close (); + } + remove (f1.c_str ()); + // remove("warning.log"); + /* Test out_alllog == false case */ + GlobalV::ofs_running.open ("running.log"); + GlobalV::ofs_warning.open ("warning.log"); + ModuleBase::Global_File::close_all_log (0, false, PARAM.input.calculation); + EXPECT_FALSE (GlobalV::ofs_running.is_open ()); + if (GlobalV::ofs_running.is_open ()) + { + GlobalV::ofs_running.close (); + } + EXPECT_FALSE (GlobalV::ofs_warning.is_open ()); + if (GlobalV::ofs_warning.is_open ()) + { + GlobalV::ofs_warning.close (); + } + remove ("running.log"); + remove ("warning.log"); } \ No newline at end of file diff --git a/source/source_base/test/global_function_test.cpp b/source/source_base/test/global_function_test.cpp index 5e315b6e71f..399e66845dc 100644 --- a/source/source_base/test/global_function_test.cpp +++ b/source/source_base/test/global_function_test.cpp @@ -71,21 +71,23 @@ * - add the block */ -inline void EXPECT_COMPLEX_FLOAT_EQ(const std::complex& a, const std::complex& b) +inline void + EXPECT_COMPLEX_FLOAT_EQ (const std::complex& a, const std::complex& b) { - EXPECT_FLOAT_EQ(a.real(), b.real()); - EXPECT_FLOAT_EQ(a.imag(), b.imag()); + EXPECT_FLOAT_EQ (a.real (), b.real ()); + EXPECT_FLOAT_EQ (a.imag (), b.imag ()); } -inline void EXPECT_COMPLEX_DOUBLE_EQ(const std::complex& a, const std::complex& b) +inline void + EXPECT_COMPLEX_DOUBLE_EQ (const std::complex& a, const std::complex& b) { - EXPECT_DOUBLE_EQ(a.real(), b.real()); - EXPECT_DOUBLE_EQ(a.imag(), b.imag()); + EXPECT_DOUBLE_EQ (a.real (), b.real ()); + EXPECT_DOUBLE_EQ (a.imag (), b.imag ()); } - -template -inline void CHECK_ZEROS(T &size) +template +inline void + CHECK_ZEROS (T& size) { bool* pt_b = nullptr; int* pt_i = nullptr; @@ -109,40 +111,40 @@ inline void CHECK_ZEROS(T &size) double value_d = 4.8; std::complex value_cf{1.3, 2.2}; std::complex value_cd{1.1, 2.2}; - std::fill(&pt_b[0], &pt_b[size], value_b); - std::fill(&pt_i[0], &pt_i[size], value_i); - std::fill(&pt_f[0], &pt_f[size], value_f); - std::fill(&pt_d[0], &pt_d[size], value_d); - std::fill(&pt_cf[0], &pt_cf[size], value_cf); - std::fill(&pt_cd[0], &pt_cd[size], value_cd); + std::fill (&pt_b[0], &pt_b[size], value_b); + std::fill (&pt_i[0], &pt_i[size], value_i); + std::fill (&pt_f[0], &pt_f[size], value_f); + std::fill (&pt_d[0], &pt_d[size], value_d); + std::fill (&pt_cf[0], &pt_cf[size], value_cf); + std::fill (&pt_cd[0], &pt_cd[size], value_cd); for (int i = 0; i < size; ++i) - { - pt_v3[i].set(1.1,2.2,3.3); - } - ModuleBase::GlobalFunc::ZEROS(pt_b, size); - ModuleBase::GlobalFunc::ZEROS(pt_i, size); - ModuleBase::GlobalFunc::ZEROS(pt_f, size); - ModuleBase::GlobalFunc::ZEROS(pt_d, size); - ModuleBase::GlobalFunc::ZEROS(pt_cf, size); - ModuleBase::GlobalFunc::ZEROS(pt_cd, size); - ModuleBase::GlobalFunc::ZEROS(pt_v3, size); + { + pt_v3[i].set (1.1, 2.2, 3.3); + } + ModuleBase::GlobalFunc::ZEROS (pt_b, size); + ModuleBase::GlobalFunc::ZEROS (pt_i, size); + ModuleBase::GlobalFunc::ZEROS (pt_f, size); + ModuleBase::GlobalFunc::ZEROS (pt_d, size); + ModuleBase::GlobalFunc::ZEROS (pt_cf, size); + ModuleBase::GlobalFunc::ZEROS (pt_cd, size); + ModuleBase::GlobalFunc::ZEROS (pt_v3, size); int zero_i = 0; float zero_f = 0.0; double zero_d = 0.0; std::complex zero_cf{0.0, 0.0}; std::complex zero_cd{0.0, 0.0}; for (int i = 0; i < size; ++i) - { - EXPECT_FALSE(pt_b[i]); - EXPECT_EQ(pt_i[i],zero_i); - EXPECT_FLOAT_EQ(pt_f[i],zero_f); - EXPECT_DOUBLE_EQ(pt_d[i],zero_d); - EXPECT_COMPLEX_FLOAT_EQ(pt_cf[i], zero_cf); - EXPECT_COMPLEX_DOUBLE_EQ(pt_cd[i], zero_cd); - EXPECT_DOUBLE_EQ(pt_v3[i].x,zero_d); - EXPECT_DOUBLE_EQ(pt_v3[i].y,zero_d); - EXPECT_DOUBLE_EQ(pt_v3[i].z,zero_d); - } + { + EXPECT_FALSE (pt_b[i]); + EXPECT_EQ (pt_i[i], zero_i); + EXPECT_FLOAT_EQ (pt_f[i], zero_f); + EXPECT_DOUBLE_EQ (pt_d[i], zero_d); + EXPECT_COMPLEX_FLOAT_EQ (pt_cf[i], zero_cf); + EXPECT_COMPLEX_DOUBLE_EQ (pt_cd[i], zero_cd); + EXPECT_DOUBLE_EQ (pt_v3[i].x, zero_d); + EXPECT_DOUBLE_EQ (pt_v3[i].y, zero_d); + EXPECT_DOUBLE_EQ (pt_v3[i].z, zero_d); + } delete[] pt_b; delete[] pt_i; delete[] pt_f; @@ -160,62 +162,64 @@ class GlobalFunctionTest : public testing::Test time_t start, end; // for capturing output in files and on screen std::string output; - void SetUp() + void + SetUp () { - GlobalV::ofs_warning.open("warning.log"); - GlobalV::ofs_running.open("running.log"); + GlobalV::ofs_warning.open ("warning.log"); + GlobalV::ofs_running.open ("running.log"); } - void TearDown() + void + TearDown () { - GlobalV::ofs_warning.close(); - GlobalV::ofs_running.close(); - remove("warning.log"); - remove("running.log"); - remove("tmp"); + GlobalV::ofs_warning.close (); + GlobalV::ofs_running.close (); + remove ("warning.log"); + remove ("running.log"); + remove ("tmp"); } }; -TEST_F(GlobalFunctionTest, NewPart) +TEST_F (GlobalFunctionTest, NewPart) { - ModuleBase::GlobalFunc::NEW_PART("New Part Starts ..."); - GlobalV::ofs_running.close(); - ifs.open("running.log"); - getline(ifs, output); - getline(ifs, output); - getline(ifs, output); - getline(ifs, output); + ModuleBase::GlobalFunc::NEW_PART ("New Part Starts ..."); + GlobalV::ofs_running.close (); + ifs.open ("running.log"); + getline (ifs, output); + getline (ifs, output); + getline (ifs, output); + getline (ifs, output); // output in running.log file - EXPECT_THAT(output, testing::HasSubstr("New Part Starts ...")); - ifs.close(); + EXPECT_THAT (output, testing::HasSubstr ("New Part Starts ...")); + ifs.close (); } -TEST_F(GlobalFunctionTest, OutScreen) +TEST_F (GlobalFunctionTest, OutScreen) { - testing::internal::CaptureStdout(); - int nbx = 100; - double rcut = 10.5; - ModuleBase::GlobalFunc::OUT("nbx", nbx); - ModuleBase::GlobalFunc::OUT("rcut", rcut); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("nbx = 100")); - EXPECT_THAT(output,testing::HasSubstr("rcut = 10.5")); + testing::internal::CaptureStdout (); + int nbx = 100; + double rcut = 10.5; + ModuleBase::GlobalFunc::OUT ("nbx", nbx); + ModuleBase::GlobalFunc::OUT ("rcut", rcut); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("nbx = 100")); + EXPECT_THAT (output, testing::HasSubstr ("rcut = 10.5")); } -TEST_F(GlobalFunctionTest, OutV1) +TEST_F (GlobalFunctionTest, OutV1) { - ofs.open("tmp"); - ModuleBase::GlobalFunc::OUT(ofs, "abacus"); - ofs.close(); - ifs.open("tmp"); - getline(ifs, output); - getline(ifs, output); - EXPECT_THAT(output, testing::HasSubstr("abacus")); - ifs.close(); + ofs.open ("tmp"); + ModuleBase::GlobalFunc::OUT (ofs, "abacus"); + ofs.close (); + ifs.open ("tmp"); + getline (ifs, output); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("abacus")); + ifs.close (); } -TEST_F(GlobalFunctionTest, OutV2) +TEST_F (GlobalFunctionTest, OutV2) { - ofs.open("tmp"); + ofs.open ("tmp"); bool tmp_bool = true; int tmp_int = 1; long tmp_long = 2; @@ -223,112 +227,138 @@ TEST_F(GlobalFunctionTest, OutV2) float tmp_float = 4.0; double tmp_double = 5.0; std::string tmp_string = "string"; - ModuleBase::GlobalFunc::OUT(ofs, "tmp_bool", tmp_bool); - ModuleBase::GlobalFunc::OUT(ofs, "tmp_int", tmp_int); - ModuleBase::GlobalFunc::OUT(ofs, "tmp_long", tmp_long); - ModuleBase::GlobalFunc::OUT(ofs, "tmp_unsigned_long", tmp_unsigned_long); - ModuleBase::GlobalFunc::OUT(ofs, "tmp_float", tmp_float); - ModuleBase::GlobalFunc::OUT(ofs, "tmp_double", tmp_double); - ModuleBase::GlobalFunc::OUT(ofs, "tmp_string", tmp_string); + ModuleBase::GlobalFunc::OUT (ofs, "tmp_bool", tmp_bool); + ModuleBase::GlobalFunc::OUT (ofs, "tmp_int", tmp_int); + ModuleBase::GlobalFunc::OUT (ofs, "tmp_long", tmp_long); + ModuleBase::GlobalFunc::OUT (ofs, "tmp_unsigned_long", tmp_unsigned_long); + ModuleBase::GlobalFunc::OUT (ofs, "tmp_float", tmp_float); + ModuleBase::GlobalFunc::OUT (ofs, "tmp_double", tmp_double); + ModuleBase::GlobalFunc::OUT (ofs, "tmp_string", tmp_string); std::string para = ""; int length = 0; - for (int i=0;i<50;i++) - { - para += "a"; - length = para.size()+1; - if(length == 5){ - char tmp_char[5]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 6){ - char tmp_char[6]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 13){ - char tmp_char[13]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 15){ - char tmp_char[15]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 20){ - char tmp_char[20]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 22){ - char tmp_char[22]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 23){ - char tmp_char[23]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 25){ - char tmp_char[25]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 28){ - char tmp_char[28]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 29){ - char tmp_char[29]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 30){ - char tmp_char[30]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - else if (length == 32){ - char tmp_char[32]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::OUT(ofs, "para", tmp_char);} - } - ofs.close(); - ifs.open("tmp"); - std::string str((std::istreambuf_iterator(ifs)),std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("tmp_bool = 1")); - EXPECT_THAT(str, testing::HasSubstr("tmp_int = 1")); - EXPECT_THAT(str, testing::HasSubstr("tmp_long = 2")); - EXPECT_THAT(str, testing::HasSubstr("tmp_unsigned_long = 3")); - EXPECT_THAT(str, testing::HasSubstr("tmp_float = 4")); - EXPECT_THAT(str, testing::HasSubstr("tmp_double = 5")); - EXPECT_THAT(str, testing::HasSubstr("tmp_string = string")); + for (int i = 0; i < 50; i++) + { + para += "a"; + length = para.size () + 1; + if (length == 5) + { + char tmp_char[5]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 6) + { + char tmp_char[6]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 13) + { + char tmp_char[13]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 15) + { + char tmp_char[15]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 20) + { + char tmp_char[20]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 22) + { + char tmp_char[22]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 23) + { + char tmp_char[23]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 25) + { + char tmp_char[25]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 28) + { + char tmp_char[28]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 29) + { + char tmp_char[29]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 30) + { + char tmp_char[30]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + else if (length == 32) + { + char tmp_char[32]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::OUT (ofs, "para", tmp_char); + } + } + ofs.close (); + ifs.open ("tmp"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("tmp_bool = 1")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_int = 1")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_long = 2")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_unsigned_long = 3")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_float = 4")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_double = 5")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_string = string")); std::string tmp_para = "a"; - for (int i=0;i<50;i++) - { - tmp_para += "a"; - length = tmp_para.size()+1; - if (length == 32) { EXPECT_THAT(str, testing::HasSubstr(tmp_para)); -} - } - ifs.close(); + for (int i = 0; i < 50; i++) + { + tmp_para += "a"; + length = tmp_para.size () + 1; + if (length == 32) + { + EXPECT_THAT (str, testing::HasSubstr (tmp_para)); + } + } + ifs.close (); } -TEST_F(GlobalFunctionTest, OutV3) +TEST_F (GlobalFunctionTest, OutV3) { - ofs.open("tmp"); + ofs.open ("tmp"); int nx = 100; int ny = 125; int nz = 375; double ax = 1.1; double ay = 2.2; double az = 3.3; - ModuleBase::GlobalFunc::OUT(ofs, "grid", nx, ny, nz); - ModuleBase::GlobalFunc::OUT(ofs, "direct", ax, ay, az); - ofs.close(); - ifs.open("tmp"); - getline(ifs, output); - EXPECT_THAT(output, testing::HasSubstr("grid = [ 100, 125, 375 ]")); - getline(ifs, output); - EXPECT_THAT(output, testing::HasSubstr("direct = [ 1.1, 2.2, 3.3 ]")); - ifs.close(); + ModuleBase::GlobalFunc::OUT (ofs, "grid", nx, ny, nz); + ModuleBase::GlobalFunc::OUT (ofs, "direct", ax, ay, az); + ofs.close (); + ifs.open ("tmp"); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("grid = [ 100, 125, 375 ]")); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("direct = [ 1.1, 2.2, 3.3 ]")); + ifs.close (); } // P for parameters -TEST_F(GlobalFunctionTest, OutP) +TEST_F (GlobalFunctionTest, OutP) { - ofs.open("tmp"); + ofs.open ("tmp"); bool tmp_bool = true; int tmp_int = 1; double tmp_double = 2.0; @@ -337,22 +367,22 @@ TEST_F(GlobalFunctionTest, OutP) std::string tmp_int_ex = "tmp_int_ex"; std::string tmp_double_ex = "tmp_double_ex"; std::string tmp_string_ex = "tmp_string_ex"; - ofs << std::setiosflags(std::ios::left); - ModuleBase::GlobalFunc::OUTP(ofs, "tmp_bool", tmp_bool, tmp_bool_ex); - ModuleBase::GlobalFunc::OUTP(ofs, "tmp_int", tmp_int, tmp_int_ex); - ModuleBase::GlobalFunc::OUTP(ofs, "tmp_double", tmp_double, tmp_double_ex); - ModuleBase::GlobalFunc::OUTP(ofs, "tmp_string", tmp_string, tmp_string_ex); - ofs.close(); - ifs.open("tmp"); - std::string str((std::istreambuf_iterator(ifs)),std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("tmp_bool 1 #tmp_bool_ex")); - EXPECT_THAT(str, testing::HasSubstr("tmp_int 1 #tmp_int_ex")); - EXPECT_THAT(str, testing::HasSubstr("tmp_double 2 #tmp_double_ex")); - EXPECT_THAT(str, testing::HasSubstr("tmp_string string #tmp_string_ex")); - ifs.close(); + ofs << std::setiosflags (std::ios::left); + ModuleBase::GlobalFunc::OUTP (ofs, "tmp_bool", tmp_bool, tmp_bool_ex); + ModuleBase::GlobalFunc::OUTP (ofs, "tmp_int", tmp_int, tmp_int_ex); + ModuleBase::GlobalFunc::OUTP (ofs, "tmp_double", tmp_double, tmp_double_ex); + ModuleBase::GlobalFunc::OUTP (ofs, "tmp_string", tmp_string, tmp_string_ex); + ofs.close (); + ifs.open ("tmp"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("tmp_bool 1 #tmp_bool_ex")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_int 1 #tmp_int_ex")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_double 2 #tmp_double_ex")); + EXPECT_THAT (str, testing::HasSubstr ("tmp_string string #tmp_string_ex")); + ifs.close (); } -TEST_F(GlobalFunctionTest, ToString) +TEST_F (GlobalFunctionTest, ToString) { bool tmp_bool = true; int tmp_int = 1; @@ -361,273 +391,301 @@ TEST_F(GlobalFunctionTest, ToString) float tmp_float = 4.0; double tmp_double = 5.0; std::string tmp_string = "string"; - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_bool),"1"); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_int),"1"); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_long),"2"); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_unsigned_long),"3"); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_float),"4"); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_double),"5"); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_string),"string"); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_bool), "1"); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_int), "1"); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_long), "2"); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_unsigned_long), "3"); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_float), "4"); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_double), "5"); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_string), "string"); std::string para = ""; int length = 0; - for (int i=0;i<100;i++) - { - para += "a"; - length = para.size()+1; - if(length == 42){ - char tmp_char[42]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 47){ - char tmp_char[47]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 50){ - char tmp_char[50]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 52){ - char tmp_char[52]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 53){ - char tmp_char[53]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 63){ - char tmp_char[63]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 64){ - char tmp_char[64]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 74){ - char tmp_char[74]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 81){ - char tmp_char[81]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - else if (length == 83){ - char tmp_char[83]; - strcpy(tmp_char,para.c_str()); - EXPECT_EQ(ModuleBase::GlobalFunc::TO_STRING(tmp_char),para);} - } + for (int i = 0; i < 100; i++) + { + para += "a"; + length = para.size () + 1; + if (length == 42) + { + char tmp_char[42]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 47) + { + char tmp_char[47]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 50) + { + char tmp_char[50]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 52) + { + char tmp_char[52]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 53) + { + char tmp_char[53]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 63) + { + char tmp_char[63]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 64) + { + char tmp_char[64]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 74) + { + char tmp_char[74]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 81) + { + char tmp_char[81]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + else if (length == 83) + { + char tmp_char[83]; + strcpy (tmp_char, para.c_str ()); + EXPECT_EQ (ModuleBase::GlobalFunc::TO_STRING (tmp_char), para); + } + } } -TEST_F(GlobalFunctionTest, MakeDir) +TEST_F (GlobalFunctionTest, MakeDir) { GlobalV::MY_RANK = 0; - ModuleBase::GlobalFunc::MAKE_DIR("scf"); - + ModuleBase::GlobalFunc::MAKE_DIR ("scf"); + struct stat st; - int error1 = stat("scf", &st); - EXPECT_EQ(error1, 0); - EXPECT_TRUE(S_ISDIR(st.st_mode)); + int error1 = stat ("scf", &st); + EXPECT_EQ (error1, 0); + EXPECT_TRUE (S_ISDIR (st.st_mode)); - int error2 = rmdir("scf"); - EXPECT_EQ(error2, 0); - SUCCEED(); + int error2 = rmdir ("scf"); + EXPECT_EQ (error2, 0); + SUCCEED (); } -TEST_F(GlobalFunctionTest, OutTime) +TEST_F (GlobalFunctionTest, OutTime) { std::string name = "scf"; - start = time(nullptr); - end = time(nullptr) + 200; - ModuleBase::GlobalFunc::OUT_TIME(name, start, end); - GlobalV::ofs_warning.close(); - ifs.open("warning.log"); - getline(ifs, output); - getline(ifs, output); - EXPECT_THAT(output, testing::HasSubstr("NAME < scf >")); - ifs.close(); + start = time (nullptr); + end = time (nullptr) + 200; + ModuleBase::GlobalFunc::OUT_TIME (name, start, end); + GlobalV::ofs_warning.close (); + ifs.open ("warning.log"); + getline (ifs, output); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("NAME < scf >")); + ifs.close (); } -TEST_F(GlobalFunctionTest, AutoSet) +TEST_F (GlobalFunctionTest, AutoSet) { bool tmp_b = false; int tmp_i = 1; float tmp_f = 2.0; double tmp_d = 3.0; std::string tmp_string = "string"; - ModuleBase::GlobalFunc::AUTO_SET("tmp_b", tmp_b); - ModuleBase::GlobalFunc::AUTO_SET("tmp_i", tmp_i); - ModuleBase::GlobalFunc::AUTO_SET("tmp_f", tmp_f); - ModuleBase::GlobalFunc::AUTO_SET("tmp_d", tmp_d); - ModuleBase::GlobalFunc::AUTO_SET("tmp_string", tmp_string); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_b", tmp_b); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_i", tmp_i); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_f", tmp_f); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_d", tmp_d); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_string", tmp_string); std::string para = ""; int length = 0; - for (int i=0;i<10;i++) - { - para += "a"; - length = para.size()+1; - if(length == 2){ - char tmp_char[2]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::AUTO_SET("tmp_char",tmp_char);} - else if (length == 3){ - char tmp_char[3]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::AUTO_SET("tmp_char",tmp_char);} - else if (length == 6){ - char tmp_char[6]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::AUTO_SET("tmp_char",tmp_char);} - else if (length == 8){ - char tmp_char[8]; - strcpy(tmp_char,para.c_str()); - ModuleBase::GlobalFunc::AUTO_SET("tmp_char",tmp_char);} - } - GlobalV::ofs_warning.close(); - ifs.open("warning.log"); - std::string str((std::istreambuf_iterator(ifs)),std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("AUTO_SET tmp_b to 0")); - EXPECT_THAT(str, testing::HasSubstr("AUTO_SET tmp_i to 1")); - EXPECT_THAT(str, testing::HasSubstr("AUTO_SET tmp_f to 2")); - EXPECT_THAT(str, testing::HasSubstr("AUTO_SET tmp_d to 3")); - EXPECT_THAT(str, testing::HasSubstr("AUTO_SET tmp_string to string")); - EXPECT_THAT(str, testing::HasSubstr("AUTO_SET tmp_char to aaaaaaa")); - ifs.close(); + for (int i = 0; i < 10; i++) + { + para += "a"; + length = para.size () + 1; + if (length == 2) + { + char tmp_char[2]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_char", tmp_char); + } + else if (length == 3) + { + char tmp_char[3]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_char", tmp_char); + } + else if (length == 6) + { + char tmp_char[6]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_char", tmp_char); + } + else if (length == 8) + { + char tmp_char[8]; + strcpy (tmp_char, para.c_str ()); + ModuleBase::GlobalFunc::AUTO_SET ("tmp_char", tmp_char); + } + } + GlobalV::ofs_warning.close (); + ifs.open ("warning.log"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("AUTO_SET tmp_b to 0")); + EXPECT_THAT (str, testing::HasSubstr ("AUTO_SET tmp_i to 1")); + EXPECT_THAT (str, testing::HasSubstr ("AUTO_SET tmp_f to 2")); + EXPECT_THAT (str, testing::HasSubstr ("AUTO_SET tmp_d to 3")); + EXPECT_THAT (str, testing::HasSubstr ("AUTO_SET tmp_string to string")); + EXPECT_THAT (str, testing::HasSubstr ("AUTO_SET tmp_char to aaaaaaa")); + ifs.close (); } -TEST_F(GlobalFunctionTest, Done) +TEST_F (GlobalFunctionTest, Done) { - ofs.open("tmp"); - testing::internal::CaptureStdout(); - ModuleBase::GlobalFunc::DONE(ofs, "SETUP UNITCELL"); + ofs.open ("tmp"); + testing::internal::CaptureStdout (); + ModuleBase::GlobalFunc::DONE (ofs, "SETUP UNITCELL"); // output on screen - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("DONE")); - EXPECT_THAT(output, testing::HasSubstr("SETUP UNITCELL")); - ofs.close(); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("DONE")); + EXPECT_THAT (output, testing::HasSubstr ("SETUP UNITCELL")); + ofs.close (); // output in file - ifs.open("tmp"); + ifs.open ("tmp"); std::string outputf; - getline(ifs, outputf); - EXPECT_THAT(outputf, testing::HasSubstr("DONE")); - EXPECT_THAT(outputf, testing::HasSubstr("SETUP UNITCELL")); - ifs.close(); + getline (ifs, outputf); + EXPECT_THAT (outputf, testing::HasSubstr ("DONE")); + EXPECT_THAT (outputf, testing::HasSubstr ("SETUP UNITCELL")); + ifs.close (); } -TEST_F(GlobalFunctionTest, Zeros) +TEST_F (GlobalFunctionTest, Zeros) { int size_i = 1000; - CHECK_ZEROS(size_i); + CHECK_ZEROS (size_i); long size_l = 1000; - CHECK_ZEROS(size_l); + CHECK_ZEROS (size_l); unsigned long size_ul = 1000; - CHECK_ZEROS(size_ul); + CHECK_ZEROS (size_ul); long long size_ll = 1000; - CHECK_ZEROS(size_ll); + CHECK_ZEROS (size_ll); } -TEST_F(GlobalFunctionTest, Scan) +TEST_F (GlobalFunctionTest, Scan) { - ofs.open("tmp"); + ofs.open ("tmp"); ofs << "" << std::endl; ofs << "100 100 100" << std::endl; ofs << "" << std::endl; - ofs.close(); - ifs.open("tmp"); - EXPECT_FALSE(ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "")); - getline(ifs, output); - getline(ifs, output); + ofs.close (); + ifs.open ("tmp"); + EXPECT_FALSE (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "")); + getline (ifs, output); + getline (ifs, output); // std::cout << output << std::endl; - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - EXPECT_TRUE(ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "")); - getline(ifs, output); - getline(ifs, output); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + EXPECT_TRUE (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "")); + getline (ifs, output); + getline (ifs, output); // std::cout << output << std::endl; - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - ifs.close(); - ifs.open("warning.log"); - getline(ifs, output); - EXPECT_THAT(output, testing::HasSubstr("In SCAN_BEGIN, can't find: block.")); - getline(ifs, output); - EXPECT_THAT(output, testing::HasSubstr("In SCAN_END, can't find: block.")); - ifs.close(); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + ifs.close (); + ifs.open ("warning.log"); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("In SCAN_BEGIN, can't find: block.")); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("In SCAN_END, can't find: block.")); + ifs.close (); } -TEST_F(GlobalFunctionTest, MapExist) +TEST_F (GlobalFunctionTest, MapExist) { std::map SPIN = {{1, 2}, {3, 4}, {5, 6}}; - EXPECT_EQ(ModuleBase::GlobalFunc::MAP_EXIST(SPIN, 1), &SPIN[1]); - EXPECT_EQ(ModuleBase::GlobalFunc::MAP_EXIST(SPIN, 3), &SPIN[3]); - EXPECT_EQ(ModuleBase::GlobalFunc::MAP_EXIST(SPIN, 5), &SPIN[5]); + EXPECT_EQ (ModuleBase::GlobalFunc::MAP_EXIST (SPIN, 1), &SPIN[1]); + EXPECT_EQ (ModuleBase::GlobalFunc::MAP_EXIST (SPIN, 3), &SPIN[3]); + EXPECT_EQ (ModuleBase::GlobalFunc::MAP_EXIST (SPIN, 5), &SPIN[5]); } -TEST_F(GlobalFunctionTest, ReadValue) +TEST_F (GlobalFunctionTest, ReadValue) { - ofs.open("tmp"); + ofs.open ("tmp"); ofs << "100" << std::endl; ofs << "3.0" << std::endl; ofs << "string" << std::endl; - ofs.close(); - ifs.open("tmp"); + ofs.close (); + ifs.open ("tmp"); int tmp_int = 0; double tmp_double = 0.0; std::string tmp_string; // source/source_cell/read_atoms.cpp line 153:154 - ModuleBase::GlobalFunc::READ_VALUE(ifs, tmp_int); - ModuleBase::GlobalFunc::READ_VALUE(ifs, tmp_double); - ModuleBase::GlobalFunc::READ_VALUE(ifs, tmp_string); - ifs.close(); - EXPECT_EQ(tmp_int, 100); - EXPECT_DOUBLE_EQ(tmp_double, 3.0); - EXPECT_EQ(tmp_string, "string"); + ModuleBase::GlobalFunc::READ_VALUE (ifs, tmp_int); + ModuleBase::GlobalFunc::READ_VALUE (ifs, tmp_double); + ModuleBase::GlobalFunc::READ_VALUE (ifs, tmp_string); + ifs.close (); + EXPECT_EQ (tmp_int, 100); + EXPECT_DOUBLE_EQ (tmp_double, 3.0); + EXPECT_EQ (tmp_string, "string"); } -TEST_F(GlobalFunctionTest, Dcopy) +TEST_F (GlobalFunctionTest, Dcopy) { int size = 100; - std::vector> aa(size, std::complex(1.0, 2.0)); - std::vector> bb(size); - std::vector daa(size,1.1); - std::vector dbb(size); + std::vector> aa (size, std::complex (1.0, 2.0)); + std::vector> bb (size); + std::vector daa (size, 1.1); + std::vector dbb (size); std::complex* aalist = new std::complex[size]; std::complex* bblist = new std::complex[size]; - for (int i=0;i(1.0,2.0); - bblist[i] = std::complex(0.0,0.0); - } + for (int i = 0; i < size; i++) + { + aalist[i] = std::complex (1.0, 2.0); + bblist[i] = std::complex (0.0, 0.0); + } double* daalist = new double[size]; double* dbblist = new double[size]; - for (int i=0;i aa(size, 1.0); - EXPECT_EQ(ModuleBase::GlobalFunc::VECTOR_TO_PTR(aa), aa.data()); - std::valarray bb(1.0, size); - EXPECT_EQ(ModuleBase::GlobalFunc::VECTOR_TO_PTR(bb), &bb[0]); - const std::vector cc(size, 1.0); - EXPECT_EQ(ModuleBase::GlobalFunc::VECTOR_TO_PTR(cc), cc.data()); - const std::valarray dd(1.0, size); - EXPECT_EQ(ModuleBase::GlobalFunc::VECTOR_TO_PTR(dd), &dd[0]); + std::vector aa (size, 1.0); + EXPECT_EQ (ModuleBase::GlobalFunc::VECTOR_TO_PTR (aa), aa.data ()); + std::valarray bb (1.0, size); + EXPECT_EQ (ModuleBase::GlobalFunc::VECTOR_TO_PTR (bb), &bb[0]); + const std::vector cc (size, 1.0); + EXPECT_EQ (ModuleBase::GlobalFunc::VECTOR_TO_PTR (cc), cc.data ()); + const std::valarray dd (1.0, size); + EXPECT_EQ (ModuleBase::GlobalFunc::VECTOR_TO_PTR (dd), &dd[0]); } -TEST_F(GlobalFunctionTest, COPYARRAY) +TEST_F (GlobalFunctionTest, COPYARRAY) { long size = 100; std::complex* aa = nullptr; @@ -635,120 +693,122 @@ TEST_F(GlobalFunctionTest, COPYARRAY) aa = new std::complex[size]; bb = new std::complex[size]; std::complex value{1.1, 2.2}; - std::fill(&aa[0], &aa[size], value); - ModuleBase::GlobalFunc::COPYARRAY(aa,bb,size); + std::fill (&aa[0], &aa[size], value); + ModuleBase::GlobalFunc::COPYARRAY (aa, bb, size); for (int i = 0; i < size; ++i) - { - EXPECT_COMPLEX_DOUBLE_EQ(bb[i], value); - } + { + EXPECT_COMPLEX_DOUBLE_EQ (bb[i], value); + } double* daa = nullptr; double* dbb = nullptr; daa = new double[size]; dbb = new double[size]; - std::fill(&daa[0],&daa[size],3.3); - ModuleBase::GlobalFunc::COPYARRAY(daa,dbb,size); + std::fill (&daa[0], &daa[size], 3.3); + ModuleBase::GlobalFunc::COPYARRAY (daa, dbb, size); for (int i = 0; i < size; ++i) - { - EXPECT_DOUBLE_EQ(dbb[i], 3.3); - } + { + EXPECT_DOUBLE_EQ (dbb[i], 3.3); + } delete[] aa; delete[] bb; delete[] daa; delete[] dbb; } -TEST_F(GlobalFunctionTest,IsColumnMajor) +TEST_F (GlobalFunctionTest, IsColumnMajor) { - EXPECT_TRUE(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER("genelpa")); + EXPECT_TRUE (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER ("genelpa")); } -TEST_F(GlobalFunctionTest,Vector2Ptr) +TEST_F (GlobalFunctionTest, Vector2Ptr) { int size = 100; - std::vector> aa(size, std::complex(1.0, 2.0)); + std::vector> aa (size, std::complex (1.0, 2.0)); std::complex* ptr_d = nullptr; - ptr_d=ModuleBase::GlobalFunc::VECTOR_TO_PTR(aa); + ptr_d = ModuleBase::GlobalFunc::VECTOR_TO_PTR (aa); for (int i = 0; i < size; ++i) - { - EXPECT_COMPLEX_DOUBLE_EQ(ptr_d[i],std::complex(1.0,2.0)); - } + { + EXPECT_COMPLEX_DOUBLE_EQ (ptr_d[i], std::complex (1.0, 2.0)); + } } -TEST_F(GlobalFunctionTest,MemAvailable) +TEST_F (GlobalFunctionTest, MemAvailable) { - for(int i=0;i<5;i++) - { - std::ifstream ifs("/proc/meminfo"); - while (ifs.good()) + for (int i = 0; i < 5; i++) { - std::string label, size, kB; - ifs >> label >> size >> kB; - if (label == "MemAvailable:") - { - EXPECT_LE(std::stol(size)-1000,ModuleBase::GlobalFunc::MemAvailable()); - EXPECT_GE(std::stol(size)+1000,ModuleBase::GlobalFunc::MemAvailable()); - } + std::ifstream ifs ("/proc/meminfo"); + while (ifs.good ()) + { + std::string label, size, kB; + ifs >> label >> size >> kB; + if (label == "MemAvailable:") + { + EXPECT_LE (std::stol (size) - 1000, ModuleBase::GlobalFunc::MemAvailable ()); + EXPECT_GE (std::stol (size) + 1000, ModuleBase::GlobalFunc::MemAvailable ()); + } + } } - } } - -TEST_F(GlobalFunctionTest,BlockHere) +TEST_F (GlobalFunctionTest, BlockHere) { #ifdef __MPI #undef __MPI #endif - std::string output2; - std::string block_in="111"; - GlobalV::MY_RANK=1; - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::GlobalFunc::BLOCK_HERE(block_in), ::testing::ExitedWithCode(0),""); - output2 = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output2,testing::HasSubstr("\n********************************************" - "\n Here is a Block, 1: go on 0: quit" - "\n 111" - "\n********************************************")); + std::string output2; + std::string block_in = "111"; + GlobalV::MY_RANK = 1; + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::GlobalFunc::BLOCK_HERE (block_in), ::testing::ExitedWithCode (0), ""); + output2 = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output2, + testing::HasSubstr ("\n********************************************" + "\n Here is a Block, 1: go on 0: quit" + "\n 111" + "\n********************************************")); } -TEST_F(GlobalFunctionTest,BlockHere2) +TEST_F (GlobalFunctionTest, BlockHere2) { #ifdef __MPI #undef __MPI #endif - std::string output2; - std::string block_in="111"; - GlobalV::MY_RANK=0; - std::string fake_input = "1"; - std::istringstream iss{fake_input}; - std::cin.rdbuf(iss.rdbuf()); - testing::internal::CaptureStdout(); -// EXPECT_EXIT(ModuleBase::GlobalFunc::BLOCK_HERE(block_in), ::testing::ExitedWithCode(1),""); - ModuleBase::GlobalFunc::BLOCK_HERE(block_in); - output2 = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output2,testing::HasSubstr("\n********************************************" - "\n Here is a Block, 1: go on 0: quit" - "\n 111" - "\n********************************************")); + std::string output2; + std::string block_in = "111"; + GlobalV::MY_RANK = 0; + std::string fake_input = "1"; + std::istringstream iss{fake_input}; + std::cin.rdbuf (iss.rdbuf ()); + testing::internal::CaptureStdout (); + // EXPECT_EXIT(ModuleBase::GlobalFunc::BLOCK_HERE(block_in), ::testing::ExitedWithCode(1),""); + ModuleBase::GlobalFunc::BLOCK_HERE (block_in); + output2 = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output2, + testing::HasSubstr ("\n********************************************" + "\n Here is a Block, 1: go on 0: quit" + "\n 111" + "\n********************************************")); } -TEST_F(GlobalFunctionTest,BlockHere3) +TEST_F (GlobalFunctionTest, BlockHere3) { #ifdef __MPI #undef __MPI #endif - std::string output2; - std::string block_in="111"; - GlobalV::MY_RANK=0; - testing::internal::CaptureStdout(); - std::string fake_input = "0"; - std::istringstream iss{fake_input}; - std::cin.rdbuf(iss.rdbuf()); - EXPECT_EXIT(ModuleBase::GlobalFunc::BLOCK_HERE(block_in), ::testing::ExitedWithCode(0),""); - output2 = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output2,testing::HasSubstr("\n********************************************" - "\n Here is a Block, 1: go on 0: quit" - "\n 111" - "\n********************************************")); + std::string output2; + std::string block_in = "111"; + GlobalV::MY_RANK = 0; + testing::internal::CaptureStdout (); + std::string fake_input = "0"; + std::istringstream iss{fake_input}; + std::cin.rdbuf (iss.rdbuf ()); + EXPECT_EXIT (ModuleBase::GlobalFunc::BLOCK_HERE (block_in), ::testing::ExitedWithCode (0), ""); + output2 = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output2, + testing::HasSubstr ("\n********************************************" + "\n Here is a Block, 1: go on 0: quit" + "\n 111" + "\n********************************************")); } /* @@ -765,16 +825,14 @@ TEST_F(GlobalFunctionTest, Note) } */ -TEST_F(GlobalFunctionTest,Vector2Ptr_v3double) +TEST_F (GlobalFunctionTest, Vector2Ptr_v3double) { int size = 100; - std::vector> abcd(size, ModuleBase::Vector3(1.1,2.2,3.3)); + std::vector> abcd (size, ModuleBase::Vector3 (1.1, 2.2, 3.3)); ModuleBase::Vector3* ptr_v3d = nullptr; - ptr_v3d=ModuleBase::GlobalFunc::VECTOR_TO_PTR(abcd); + ptr_v3d = ModuleBase::GlobalFunc::VECTOR_TO_PTR (abcd); for (int i = 0; i < size; ++i) - { - EXPECT_EQ(ptr_v3d[i],ModuleBase::Vector3(1.1,2.2,3.3)); - } + { + EXPECT_EQ (ptr_v3d[i], ModuleBase::Vector3 (1.1, 2.2, 3.3)); + } } - - diff --git a/source/source_base/test/gram_schmidt_orth_test.cpp b/source/source_base/test/gram_schmidt_orth_test.cpp index b1b8f409b85..00586e87c44 100644 --- a/source/source_base/test/gram_schmidt_orth_test.cpp +++ b/source/source_base/test/gram_schmidt_orth_test.cpp @@ -1,99 +1,106 @@ -#include"../gram_schmidt_orth.h" -#include"../gram_schmidt_orth-inl.h" -#include"gtest/gtest.h" - +#include "../gram_schmidt_orth.h" +#include "../gram_schmidt_orth-inl.h" +#include "gtest/gtest.h" #define DOUBLETHRESHOLD 1e-8 - /************************************************ -* unit test of class Gram_Schmidt_Orth -***********************************************/ + * unit test of class Gram_Schmidt_Orth + ***********************************************/ /** - * Based on an linearly independent, but not orthonormal, - * set of functions x:{x1,x2,x3,...}, we can construct an - * orthonormal set X:{X1, X2, X3, ...} by using Gram-Schmidt + * Based on an linearly independent, but not orthonormal, + * set of functions x:{x1,x2,x3,...}, we can construct an + * orthonormal set X:{X1, X2, X3, ...} by using Gram-Schmidt * orthogonalization. * The new set X should has below properties: * 1. X1 = x1/||x1|| * 2. = 1 if (i == j) else 0 - * - * Note:in this class, for coordinate of sphere, the inner product + * + * Note:in this class, for coordinate of sphere, the inner product * of two radial function f(r) and g(r) equals the integral of r^2*f(r)*g(r) * $$ (f(r),g(r)) = {\int}r^2f(r)g(r)dr $$ - * + * */ -class GramSchmidtOrth +class GramSchmidtOrth { - public: + public: int nbasis; int ndim; double dr; std::vector r2; double norm0; - ModuleBase::Gram_Schmidt_Orth::Coordinate coordinate; + ModuleBase::Gram_Schmidt_Orth::Coordinate coordinate; std::vector rab; std::vector> basis; - GramSchmidtOrth(int nbasis, int ndim, double dr, - ModuleBase::Gram_Schmidt_Orth::Coordinate coordinate): - nbasis(nbasis),ndim(ndim),dr(dr),coordinate(coordinate) + GramSchmidtOrth (int nbasis, + int ndim, + double dr, + ModuleBase::Gram_Schmidt_Orth::Coordinate coordinate) + : nbasis (nbasis), ndim (ndim), dr (dr), coordinate (coordinate) { - basis.resize(nbasis,std::vector(ndim)); - rab.resize(ndim,dr); - r2.resize(ndim,1.0); + basis.resize (nbasis, std::vector (ndim)); + rab.resize (ndim, dr); + r2.resize (ndim, 1.0); - norm0 = sqrt(1.0/3.0 * pow(dr*(static_cast(ndim-1)),3.0)); - if (ModuleBase::Gram_Schmidt_Orth::Coordinate::Sphere == this->coordinate) - { - for(int i=0;i(ndim-1)),5.0)); - } + norm0 = sqrt (1.0 / 3.0 * pow (dr * (static_cast (ndim - 1)), 3.0)); + if (ModuleBase::Gram_Schmidt_Orth::Coordinate::Sphere == this->coordinate) + { + for (int i = 0; i < ndim; ++i) + { + r2[i] = dr * i * dr * i; + } + norm0 = sqrt (1.0 / 5.0 * pow (dr * (static_cast (ndim - 1)), 5.0)); + } - //build the function basis - for(int i=0;i(j) * dr, static_cast(i+1)); + for (int j = 0; j < ndim; ++j) + { + // function: f_i(x) = x^(i+1) + basis[i][j] = pow (static_cast (j) * dr, static_cast (i + 1)); + } } - } } - //calculate the inner product of two vector - double inner_product(std::vector a, std::vector b) + // calculate the inner product of two vector + double + inner_product (std::vector a, std::vector b) { double ip; - std::vector mul_func = ModuleBase::Mathzone::Pointwise_Product(a,b); - std::vector mul_func1 = ModuleBase::Mathzone::Pointwise_Product(mul_func,r2); - ModuleBase::Integral::Simpson_Integral(mul_func1.size(),ModuleBase::GlobalFunc::VECTOR_TO_PTR(mul_func1),ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab),ip); + std::vector mul_func = ModuleBase::Mathzone::Pointwise_Product (a, b); + std::vector mul_func1 = ModuleBase::Mathzone::Pointwise_Product (mul_func, r2); + ModuleBase::Integral::Simpson_Integral (mul_func1.size (), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (mul_func1), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (rab), + ip); return ip; } - }; -class GramSchmidtOrthTest : public ::testing::TestWithParam {}; - +class GramSchmidtOrthTest : public ::testing::TestWithParam +{ +}; -TEST_P(GramSchmidtOrthTest,CalOrth) +TEST_P (GramSchmidtOrthTest, CalOrth) { - GramSchmidtOrth gsot = GetParam(); - ModuleBase::Gram_Schmidt_Orth gso_sphere(gsot.rab,gsot.coordinate); - std::vector> old_basis = gsot.basis; - std::vector> new_basis = gso_sphere.cal_orth(old_basis); - + GramSchmidtOrth gsot = GetParam (); + ModuleBase::Gram_Schmidt_Orth gso_sphere (gsot.rab, gsot.coordinate); + std::vector> old_basis = gsot.basis; + std::vector> new_basis = gso_sphere.cal_orth (old_basis); + //========================================================== // VERIFY X0=x0/|x0| // the integral of old_basis[0] = {\int}_{0}^{dr*(ndim-1)} r^2*r*r dr // =1/5*r^5|_{0}^{dr*(ndim-1)} //========================================================== - for(int i=0;i = 0 for i!=j @@ -101,39 +108,53 @@ TEST_P(GramSchmidtOrthTest,CalOrth) int niter = 1; int maxiter = 1; bool pass = false; - double maxip; + double maxip; - //do iteration. + // do iteration. while (true) - { - int nbasis = new_basis.size(); - maxip = std::abs(gsot.inner_product(new_basis[nbasis-1],new_basis[nbasis-2])); - for(int i=0;i maxip) {maxip = std::abs(ip);} - } + int nbasis = new_basis.size (); + maxip = std::abs (gsot.inner_product (new_basis[nbasis - 1], new_basis[nbasis - 2])); + for (int i = 0; i < nbasis - 1; ++i) + { + for (int j = i + 1; j < nbasis; ++j) + { + double ip = gsot.inner_product (new_basis[i], new_basis[j]); + // std::cout << "i=" << i << ", j=" << j << ": " << ip << std::endl; + if (std::abs (ip) > maxip) + { + maxip = std::abs (ip); + } + } + } + if (maxip < DOUBLETHRESHOLD) + { + pass = true; + break; + }; + if (niter >= maxiter) + { + break; + } + + niter += 1; + old_basis = gso_sphere.cal_orth (new_basis); + new_basis = old_basis; } - if (maxip < DOUBLETHRESHOLD) {pass = true; break;}; - if (niter >= maxiter) {break;} - niter += 1; - old_basis = gso_sphere.cal_orth(new_basis); new_basis = old_basis; - } - - //std::cout << "nbasis=" << gsot.nbasis << "niter=" << niter << " max_inner_product=" << std::setprecision(15) << maxip << std::endl; - EXPECT_TRUE(pass) << "nbasis=" << gsot.nbasis << "niter=" << niter << " max_inner_product=" << std::setprecision(15) << maxip; + // std::cout << "nbasis=" << gsot.nbasis << "niter=" << niter << " max_inner_product=" << std::setprecision(15) << + // maxip << std::endl; + EXPECT_TRUE (pass) << "nbasis=" << gsot.nbasis << "niter=" << niter + << " max_inner_product=" << std::setprecision (15) << maxip; } -INSTANTIATE_TEST_SUITE_P(VerifyOrth,GramSchmidtOrthTest,::testing::Values( - GramSchmidtOrth(10,101,0.1,ModuleBase::Gram_Schmidt_Orth::Coordinate::Sphere), - GramSchmidtOrth(20,1001,0.01,ModuleBase::Gram_Schmidt_Orth::Coordinate::Sphere), - GramSchmidtOrth(50,10001,0.001,ModuleBase::Gram_Schmidt_Orth::Coordinate::Sphere), - GramSchmidtOrth(10,10001,0.001,ModuleBase::Gram_Schmidt_Orth::Coordinate::Cartesian), - GramSchmidtOrth(20,1001,0.01,ModuleBase::Gram_Schmidt_Orth::Coordinate::Cartesian), - GramSchmidtOrth(50,101,0.1,ModuleBase::Gram_Schmidt_Orth::Coordinate::Cartesian) -)); - +INSTANTIATE_TEST_SUITE_P ( + VerifyOrth, + GramSchmidtOrthTest, + ::testing::Values ( + GramSchmidtOrth (10, 101, 0.1, ModuleBase::Gram_Schmidt_Orth::Coordinate::Sphere), + GramSchmidtOrth (20, 1001, 0.01, ModuleBase::Gram_Schmidt_Orth::Coordinate::Sphere), + GramSchmidtOrth (50, 10001, 0.001, ModuleBase::Gram_Schmidt_Orth::Coordinate::Sphere), + GramSchmidtOrth (10, 10001, 0.001, ModuleBase::Gram_Schmidt_Orth::Coordinate::Cartesian), + GramSchmidtOrth (20, 1001, 0.01, ModuleBase::Gram_Schmidt_Orth::Coordinate::Cartesian), + GramSchmidtOrth (50, 101, 0.1, ModuleBase::Gram_Schmidt_Orth::Coordinate::Cartesian))); diff --git a/source/source_base/test/intarray_test.cpp b/source/source_base/test/intarray_test.cpp index bc9bb7b6fa6..b758deea8d9 100644 --- a/source/source_base/test/intarray_test.cpp +++ b/source/source_base/test/intarray_test.cpp @@ -10,7 +10,7 @@ * - Tested Functions: * - Construct * - construct an int array (2 to 6 dimensions) - * - Creat + * - Creat * - create an int array (2 to 6 dimensions) * - GetSize * - get the total size of an int array @@ -41,12 +41,12 @@ namespace ModuleBase { -void IntArrayAlloc(); +void IntArrayAlloc (); } class IntArrayTest : public testing::Test { -protected: + protected: ModuleBase::IntArray a2, a3, a4, a5, a6; int aa; int bb; @@ -54,331 +54,333 @@ class IntArrayTest : public testing::Test int count1; const int zero; - IntArrayTest() : aa(11), bb(1), zero(0) - { - } + IntArrayTest () : aa (11), bb (1), zero (0) {} }; -TEST_F(IntArrayTest,Construct) +TEST_F (IntArrayTest, Construct) { - ModuleBase::IntArray x2(1,5); - ModuleBase::IntArray x3(1,5,3); - ModuleBase::IntArray x4(1,7,3,4); - ModuleBase::IntArray x5(1,5,3,8,2); - ModuleBase::IntArray x6(1,7,3,4,3,2); - EXPECT_EQ(x2.getSize(),5); - EXPECT_EQ(x3.getSize(),15); - EXPECT_EQ(x4.getSize(),84); - EXPECT_EQ(x5.getSize(),240); - EXPECT_EQ(x6.getSize(),504); + ModuleBase::IntArray x2 (1, 5); + ModuleBase::IntArray x3 (1, 5, 3); + ModuleBase::IntArray x4 (1, 7, 3, 4); + ModuleBase::IntArray x5 (1, 5, 3, 8, 2); + ModuleBase::IntArray x6 (1, 7, 3, 4, 3, 2); + EXPECT_EQ (x2.getSize (), 5); + EXPECT_EQ (x3.getSize (), 15); + EXPECT_EQ (x4.getSize (), 84); + EXPECT_EQ (x5.getSize (), 240); + EXPECT_EQ (x6.getSize (), 504); } -TEST_F(IntArrayTest,Create) +TEST_F (IntArrayTest, Create) { - a2.create(2,1); - a3.create(3,2,1); - a4.create(4,3,2,1); - a5.create(5,4,3,2,1); - a6.create(6,5,4,3,2,1); - EXPECT_EQ(a2.getSize(),2); - EXPECT_EQ(a3.getSize(),6); - EXPECT_EQ(a4.getSize(),24); - EXPECT_EQ(a5.getSize(),120); - EXPECT_EQ(a6.getSize(),720); + a2.create (2, 1); + a3.create (3, 2, 1); + a4.create (4, 3, 2, 1); + a5.create (5, 4, 3, 2, 1); + a6.create (6, 5, 4, 3, 2, 1); + EXPECT_EQ (a2.getSize (), 2); + EXPECT_EQ (a3.getSize (), 6); + EXPECT_EQ (a4.getSize (), 24); + EXPECT_EQ (a5.getSize (), 120); + EXPECT_EQ (a6.getSize (), 720); } - -TEST_F(IntArrayTest,GetSize) +TEST_F (IntArrayTest, GetSize) { - ModuleBase::IntArray x3(1,5,3); - ModuleBase::IntArray x4(1,7,3,4); - EXPECT_EQ(x3.getSize(),15); - EXPECT_EQ(x4.getSize(),84); + ModuleBase::IntArray x3 (1, 5, 3); + ModuleBase::IntArray x4 (1, 7, 3, 4); + EXPECT_EQ (x3.getSize (), 15); + EXPECT_EQ (x4.getSize (), 84); } -TEST_F(IntArrayTest,GetDim) +TEST_F (IntArrayTest, GetDim) { - a2.create(2,3); - a3.create(3,5,1); - a4.create(4,3,7,1); - a5.create(5,4,1,2,1); - a6.create(6,5,9,3,2,1); - EXPECT_EQ(a2.getDim(),2); - EXPECT_EQ(a3.getDim(),3); - EXPECT_EQ(a4.getDim(),4); - EXPECT_EQ(a5.getDim(),5); - EXPECT_EQ(a6.getDim(),6); + a2.create (2, 3); + a3.create (3, 5, 1); + a4.create (4, 3, 7, 1); + a5.create (5, 4, 1, 2, 1); + a6.create (6, 5, 9, 3, 2, 1); + EXPECT_EQ (a2.getDim (), 2); + EXPECT_EQ (a3.getDim (), 3); + EXPECT_EQ (a4.getDim (), 4); + EXPECT_EQ (a5.getDim (), 5); + EXPECT_EQ (a6.getDim (), 6); } -TEST_F(IntArrayTest,ZeroOut) +TEST_F (IntArrayTest, ZeroOut) { - a2.create(2,3); - a3.create(3,5,1); - a4.create(4,3,7,1); - a5.create(5,4,1,2,1); - a6.create(6,5,9,3,2,1); - a2.zero_out(); - a3.zero_out(); - a4.zero_out(); - a5.zero_out(); - a6.zero_out(); - for (int i=0;i(c, 0.0); - } - else - { - a = std::rand(); - b = std::rand(); - B(i, j) = std::complex(a, b); - B(j, i) = conj(B(i, j)); - } + for (int i = 0; i <= j; i++) + { + if (i == j) + { + c = std::rand (); + B (i, j) = std::complex (c, 0.0); + } + else + { + a = std::rand (); + b = std::rand (); + B (i, j) = std::complex (a, b); + B (j, i) = conj (B (i, j)); + } + } } - } ModuleBase::Inverse_Matrix_Complex IMC; - IMC.init(dim); - IMC.using_zheev(B, C); + IMC.init (dim); + IMC.using_zheev (B, C); D = B * C; for (int i = 0; i < dim; i++) - { - EXPECT_NEAR(D(i, i).real(), 1.0, 1e-14); - EXPECT_NEAR(D(i, i).imag(), 0.0, 1e-14); - // std::cout << D(i,i).real() << " " << D(i,i).imag() << std::endl; - } + { + EXPECT_NEAR (D (i, i).real (), 1.0, 1e-14); + EXPECT_NEAR (D (i, i).imag (), 0.0, 1e-14); + // std::cout << D(i,i).real() << " " << D(i,i).imag() << std::endl; + } } -TEST(InverseMatrixRealTest, InverseMatrixReal) +TEST (InverseMatrixRealTest, InverseMatrixReal) { int dim = 3; double in[9]; double out[9]; for (int i = 0; i < dim; i++) - { - for (int j = 0; j < dim; j++) { - if (i == j) - { - in[i * dim + j] = 2.0; - } - else - { - in[i * dim + j] = 0.0; - } + for (int j = 0; j < dim; j++) + { + if (i == j) + { + in[i * dim + j] = 2.0; + } + else + { + in[i * dim + j] = 0.0; + } + } } - } - ModuleBase::Inverse_Matrix_Real(dim, in, out); + ModuleBase::Inverse_Matrix_Real (dim, in, out); for (int i = 0; i < dim; i++) - { - for (int j = 0; j < dim; j++) { - if (i == j) - { - EXPECT_DOUBLE_EQ(in[i * dim + j], 2.0); - } - else - { - EXPECT_DOUBLE_EQ(in[i * dim + j], 0.0); - } + for (int j = 0; j < dim; j++) + { + if (i == j) + { + EXPECT_DOUBLE_EQ (in[i * dim + j], 2.0); + } + else + { + EXPECT_DOUBLE_EQ (in[i * dim + j], 0.0); + } + } } - } - EXPECT_DOUBLE_EQ(out[0], 0.5); - EXPECT_DOUBLE_EQ(out[1], 0.0); - EXPECT_DOUBLE_EQ(out[2], 0.0); - EXPECT_DOUBLE_EQ(out[3], 0.0); - EXPECT_DOUBLE_EQ(out[4], 0.5); - EXPECT_DOUBLE_EQ(out[5], 0.0); - EXPECT_DOUBLE_EQ(out[6], 0.0); - EXPECT_DOUBLE_EQ(out[7], 0.0); - EXPECT_DOUBLE_EQ(out[8], 0.5); + EXPECT_DOUBLE_EQ (out[0], 0.5); + EXPECT_DOUBLE_EQ (out[1], 0.0); + EXPECT_DOUBLE_EQ (out[2], 0.0); + EXPECT_DOUBLE_EQ (out[3], 0.0); + EXPECT_DOUBLE_EQ (out[4], 0.5); + EXPECT_DOUBLE_EQ (out[5], 0.0); + EXPECT_DOUBLE_EQ (out[6], 0.0); + EXPECT_DOUBLE_EQ (out[7], 0.0); + EXPECT_DOUBLE_EQ (out[8], 0.5); } diff --git a/source/source_base/test/lapack_connector_test.cpp b/source/source_base/test/lapack_connector_test.cpp index 70e00f82e44..7341201a2ab 100644 --- a/source/source_base/test/lapack_connector_test.cpp +++ b/source/source_base/test/lapack_connector_test.cpp @@ -20,32 +20,26 @@ class LapackConnectorTest : public testing::Test { protected: - void SetUp() override + void + SetUp () override { // Initialize matrices A and B and the eigenvalue vector // (Use appropriate values for your test case) A = { - std::complex(2.0, 0.0), - std::complex(1.0, -1.0), - std::complex(1.0, 1.0), - std::complex(3.0, 0.0), + std::complex (2.0, 0.0), + std::complex (1.0, -1.0), + std::complex (1.0, 1.0), + std::complex (3.0, 0.0), }; // Create a random square matrix C with complex elements - std::vector> C = { - {1.0, 2.0}, - {3.0, 4.0}, - {5.0, 6.0}, - {7.0, 8.0} - }; + std::vector> C = {{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}, {7.0, 8.0}}; // Compute the conjugate transpose of C - std::vector> C_conj_transpose = { - {C[0].real(), -C[0].imag()}, - {C[1].real(), -C[1].imag()}, - {C[2].real(), -C[2].imag()}, - {C[3].real(), -C[3].imag()} - }; + std::vector> C_conj_transpose = {{C[0].real (), -C[0].imag ()}, + {C[1].real (), -C[1].imag ()}, + {C[2].real (), -C[2].imag ()}, + {C[3].real (), -C[3].imag ()}}; // Compute the product of C_conj_transpose and C to obtain B B = {{C_conj_transpose[0] * C[0] + C_conj_transpose[1] * C[1]}, @@ -53,10 +47,10 @@ class LapackConnectorTest : public testing::Test {C_conj_transpose[2] * C[0] + C_conj_transpose[3] * C[1]}, {C_conj_transpose[2] * C[2] + C_conj_transpose[3] * C[3]}}; - n = sqrt(A.size()); + n = sqrt (A.size ()); lda = n; ldb = n; - w.resize(n); + w.resize (n); // Set up the parameters for zhegv_ itype = 1; @@ -83,72 +77,71 @@ class LapackConnectorTest : public testing::Test }; // Test the zhegv_ function -TEST_F(LapackConnectorTest, ZHEGV) +TEST_F (LapackConnectorTest, ZHEGV) { // First, query the optimal size of the work array std::complex work_query; double rwork_query; - zhegv_(&itype, - &jobz, - &uplo, - &n, - A.data(), - &lda, - B.data(), - &ldb, - w.data(), - &work_query, - &lwork, - &rwork_query, - &info); - lwork = static_cast(work_query.real()); - std::vector> work(lwork); + zhegv_ (&itype, + &jobz, + &uplo, + &n, + A.data (), + &lda, + B.data (), + &ldb, + w.data (), + &work_query, + &lwork, + &rwork_query, + &info); + lwork = static_cast (work_query.real ()); + std::vector> work (lwork); // std::vector rwork(static_cast(rwork_query)); // the above line is not working as rwork_query will return -nan // std::vector rwork(7 * lwork); - std::vector rwork(7 * n); + std::vector rwork (7 * n); // Now, call zhegv_ with the optimal work array size - zhegv_(&itype, - &jobz, - &uplo, - &n, - A.data(), - &lda, - B.data(), - &ldb, - w.data(), - work.data(), - &lwork, - rwork.data(), - &info); + zhegv_ (&itype, + &jobz, + &uplo, + &n, + A.data (), + &lda, + B.data (), + &ldb, + w.data (), + work.data (), + &lwork, + rwork.data (), + &info); // Check that the function completed successfully - ASSERT_EQ(info, 0); + ASSERT_EQ (info, 0); // Check the computed eigenvalues and eigenvectors // (Use appropriate values for your test case) std::vector expected_eigenvalues = {0.014371905048252809, 1.0871905949517402}; - std::vector> expected_eigenvectors = { - {0.00029066041795582461, -0.042636598658647745}, - {0.07557994526773984, 0.0 }, - {-0.81903769393029213, -0.083945171943878405}, - {0.33387897788468901, 0.0 } - }; + std::vector> expected_eigenvectors = {{0.00029066041795582461, -0.042636598658647745}, + {0.07557994526773984, 0.0}, + {-0.81903769393029213, -0.083945171943878405}, + {0.33387897788468901, 0.0}}; for (size_t i = 0; i < n; ++i) - { - EXPECT_NEAR(w[i], expected_eigenvalues[i], 1e-8); - for (size_t j = 0; j < n; ++j) { - EXPECT_NEAR(A[i * n + j].real(), expected_eigenvectors[i * n + j].real(), 1e-8); - EXPECT_NEAR(A[i * n + j].imag(), expected_eigenvectors[i * n + j].imag(), 1e-8); + EXPECT_NEAR (w[i], expected_eigenvalues[i], 1e-8); + for (size_t j = 0; j < n; ++j) + { + EXPECT_NEAR (A[i * n + j].real (), expected_eigenvectors[i * n + j].real (), 1e-8); + EXPECT_NEAR (A[i * n + j].imag (), expected_eigenvectors[i * n + j].imag (), 1e-8); + } } - } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + return RUN_ALL_TESTS (); } \ No newline at end of file diff --git a/source/source_base/test/math_bspline_test.cpp b/source/source_base/test/math_bspline_test.cpp index 607321f4724..094ca913b4f 100644 --- a/source/source_base/test/math_bspline_test.cpp +++ b/source/source_base/test/math_bspline_test.cpp @@ -17,43 +17,43 @@ class MathBsplineTest : public testing::Test { -protected: - ModuleBase::Bspline bp; - int norder; + protected: + ModuleBase::Bspline bp; + int norder; }; -TEST_F(MathBsplineTest,Init) +TEST_F (MathBsplineTest, Init) { - EXPECT_DEATH( - { - norder = 3; // norder must be even - bp.init(norder,0.05,0); - },"" - ); - EXPECT_DEATH( - { - norder = 0; // norder must be positive - bp.init(norder,0.05,0); - },"" - ); + EXPECT_DEATH ( + { + norder = 3; // norder must be even + bp.init (norder, 0.05, 0); + }, + ""); + EXPECT_DEATH ( + { + norder = 0; // norder must be positive + bp.init (norder, 0.05, 0); + }, + ""); } // summation over n is unity -TEST_F(MathBsplineTest,Properties) +TEST_F (MathBsplineTest, Properties) { - int by = 2; - for (norder=2;norder<=20;norder=norder+by) - { - bp.init(norder,1.0,0); - bp.getbspline(0.2); - double sum=0.0; - //std::cout << "\n" << "norder : "<< norder< expi(std::complex x) + std::complex + expi (std::complex x) { - const std::complex j(0.0, 1.0); - return exp(j * x); + const std::complex j (0.0, 1.0); + return exp (j * x); } - std::complex expi2(std::complex x) + std::complex + expi2 (std::complex x) { - const std::complex j(0.0, 1.0); + const std::complex j (0.0, 1.0); const double PI = 3.14159265358979323846; - return exp(j * PI / 2.0 * x); + return exp (j * PI / 2.0 * x); } // Pauli matrix: [0,-i;i,0] int LDA = 2; double factor = 1; - void sigma_y(std::complex* spin_in, std::complex* spin_out, const int m = 1) + void + sigma_y (std::complex* spin_in, std::complex* spin_out, const int m = 1) { - const std::complex j(0.0, 1.0); - if (this->LDA < 2) { - this->LDA = 2; -} + const std::complex j (0.0, 1.0); + if (this->LDA < 2) + { + this->LDA = 2; + } for (int i = 0; i < m; ++i) - { - spin_out[LDA * i] = -factor * j * spin_in[LDA * i + 1]; - spin_out[LDA * i + 1] = factor * j * spin_in[LDA * i]; - } + { + spin_out[LDA * i] = -factor * j * spin_in[LDA * i + 1]; + spin_out[LDA * i + 1] = factor * j * spin_in[LDA * i]; + } } #ifdef __ENABLE_FLOAT_FFTW - float x7(float x) + float + x7 (float x) { - return pow(x, 7); + return pow (x, 7); } - float x6(float x) + float + x6 (float x) { - return pow(x, 6); + return pow (x, 6); } - float expr(float x) + float + expr (float x) { - return exp(x); + return exp (x); } - std::complex expi(std::complex x) + std::complex + expi (std::complex x) { - const std::complex j(0.0, 1.0); - return exp(j * x); + const std::complex j (0.0, 1.0); + return exp (j * x); } - std::complex expi2(std::complex x) + std::complex + expi2 (std::complex x) { - const std::complex j(0.0, 1.0); + const std::complex j (0.0, 1.0); const float PI = 3.14159265358979323846; - return exp(j * PI / 2.0f * x); + return exp (j * PI / 2.0f * x); } // Pauli matrix: [0,-i;i,0] - void sigma_y(std::complex* spin_in, std::complex* spin_out, const int m = 1) + void + sigma_y (std::complex* spin_in, std::complex* spin_out, const int m = 1) { - const std::complex j(0.0, 1.0); + const std::complex j (0.0, 1.0); if (this->LDA < 2) this->LDA = 2; for (int i = 0; i < m; ++i) - { - spin_out[LDA * i] = -j * spin_in[LDA * i + 1]; - spin_out[LDA * i + 1] = j * spin_in[LDA * i]; - } + { + spin_out[LDA * i] = -j * spin_in[LDA * i + 1]; + spin_out[LDA * i + 1] = j * spin_in[LDA * i]; + } } #endif }; @@ -102,73 +115,73 @@ class MathChebyshevTest : public testing::Test toolfunc fun; }; -TEST_F(MathChebyshevTest, calcoef_real) +TEST_F (MathChebyshevTest, calcoef_real) { - auto fun_x6 = [&](double x) { return fun.x6(x); }; - auto fun_x7 = [&](double x) { return fun.x7(x); }; - p_chetest = new ModuleBase::Chebyshev(10); + auto fun_x6 = [&] (double x) { return fun.x6 (x); }; + auto fun_x7 = [&] (double x) { return fun.x7 (x); }; + p_chetest = new ModuleBase::Chebyshev (10); // x^6 = 1/32*( 10T_0 + 15T_2 + 6T_4 + T_6 ) // x^7 = 1/64*( 35T_1 + 21T_3 + 7T_5 + T_7 ) const double x6ref[10] = {10, 0, 15, 0, 6, 0, 1, 0, 0, 0}; const double x7ref[10] = {0, 35, 0, 21, 0, 7, 0, 1, 0, 0}; - p_chetest->calcoef_real(fun_x6); + p_chetest->calcoef_real (fun_x6); for (int i = 0; i < 10; ++i) - { - EXPECT_NEAR(p_chetest->coef_real[i] * 32.0, x6ref[i], 1.e-8); - } - p_chetest->calcoef_real(fun_x7); + { + EXPECT_NEAR (p_chetest->coef_real[i] * 32.0, x6ref[i], 1.e-8); + } + p_chetest->calcoef_real (fun_x7); for (int i = 0; i < 10; ++i) - { - EXPECT_NEAR(p_chetest->coef_real[i] * 64.0, x7ref[i], 1.e-8); - } + { + EXPECT_NEAR (p_chetest->coef_real[i] * 64.0, x7ref[i], 1.e-8); + } delete p_chetest; } -TEST_F(MathChebyshevTest, calcoef_pair) +TEST_F (MathChebyshevTest, calcoef_pair) { - auto fun_x6 = [&](double x) { return fun.x6(x); }; - auto fun_x7 = [&](double x) { return fun.x7(x); }; - p_chetest = new ModuleBase::Chebyshev(10); + auto fun_x6 = [&] (double x) { return fun.x6 (x); }; + auto fun_x7 = [&] (double x) { return fun.x7 (x); }; + p_chetest = new ModuleBase::Chebyshev (10); // x^6 = 1/32*( 10T_0 + 15T_2 + 6T_4 + T_6 ) // x^7 = 1/64*( 35T_1 + 21T_3 + 7T_5 + T_7 ) const double x6ref[10] = {10, 0, 15, 0, 6, 0, 1, 0, 0, 0}; const double x7ref[10] = {0, 35, 0, 21, 0, 7, 0, 1, 0, 0}; - p_chetest->calcoef_pair(fun_x6, fun_x7); + p_chetest->calcoef_pair (fun_x6, fun_x7); for (int i = 0; i < 10; ++i) - { - EXPECT_NEAR(p_chetest->coef_complex[i].real() * 32.0, x6ref[i], 1.e-8); - EXPECT_NEAR(p_chetest->coef_complex[i].imag() * 64.0, x7ref[i], 1.e-8); - } + { + EXPECT_NEAR (p_chetest->coef_complex[i].real () * 32.0, x6ref[i], 1.e-8); + EXPECT_NEAR (p_chetest->coef_complex[i].imag () * 64.0, x7ref[i], 1.e-8); + } delete p_chetest; } -TEST_F(MathChebyshevTest, calcoef_complex) +TEST_F (MathChebyshevTest, calcoef_complex) { - auto fun_expi = [&](std::complex x) { return fun.expi(x); }; + auto fun_expi = [&] (std::complex x) { return fun.expi (x); }; const int norder = 100; const double PI = 3.14159265358979323846; - p_chetest = new ModuleBase::Chebyshev(norder); + p_chetest = new ModuleBase::Chebyshev (norder); double* T = new double[norder]; // check exp(i\pi/4) = \sum_n C_n[exp(ix)]T_n(\pi/4) = sqrt(2)/2*(1, i) - p_chetest->calcoef_complex(fun_expi); - p_chetest->getpolyval(PI / 4, T, norder); - std::complex sum(0, 0); + p_chetest->calcoef_complex (fun_expi); + p_chetest->getpolyval (PI / 4, T, norder); + std::complex sum (0, 0); for (int i = 0; i < norder; ++i) - { - sum += p_chetest->coef_complex[i] * T[i]; - } - EXPECT_NEAR(sum.real(), sqrt(2) / 2, 1.e-8); - EXPECT_NEAR(sum.imag(), sqrt(2) / 2, 1.e-8); + { + sum += p_chetest->coef_complex[i] * T[i]; + } + EXPECT_NEAR (sum.real (), sqrt (2) / 2, 1.e-8); + EXPECT_NEAR (sum.imag (), sqrt (2) / 2, 1.e-8); delete[] T; delete p_chetest; } -TEST_F(MathChebyshevTest, calfinalvec_real) +TEST_F (MathChebyshevTest, calfinalvec_real) { const int norder = 100; const double E = 2.718281828459046; - p_chetest = new ModuleBase::Chebyshev(norder); + p_chetest = new ModuleBase::Chebyshev (norder); // 1 [ 1/e+e -i(e-1/e) ] // exp(\sigma_y)= - [ ], where \sigma_y = [0, -i; i, 0] // 2 [ i(e-1/e) 1/e+e ] @@ -179,30 +192,30 @@ TEST_F(MathChebyshevTest, calfinalvec_real) v[2] = 0.0; v[3] = 1.0; //[1 0; 0 1] - auto fun_expr = [&](double x) { return fun.expr(x); }; + auto fun_expr = [&] (double x) { return fun.expr (x); }; auto fun_sigma_y - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - p_chetest->calcoef_real(fun_expr); - p_chetest->calfinalvec_real(fun_sigma_y, v, vout, 2, 2, 2); - EXPECT_NEAR(vout[0].real(), 0.5 * (E + 1 / E), 1.e-8); - EXPECT_NEAR(vout[0].imag(), 0, 1.e-8); - EXPECT_NEAR(vout[1].real(), 0, 1.e-8); - EXPECT_NEAR(vout[1].imag(), 0.5 * (E - 1 / E), 1.e-8); - EXPECT_NEAR(vout[2].real(), 0, 1.e-8); - EXPECT_NEAR(vout[2].imag(), -0.5 * (E - 1 / E), 1.e-8); - EXPECT_NEAR(vout[3].real(), 0.5 * (E + 1 / E), 1.e-8); - EXPECT_NEAR(vout[3].imag(), 0, 1.e-8); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + p_chetest->calcoef_real (fun_expr); + p_chetest->calfinalvec_real (fun_sigma_y, v, vout, 2, 2, 2); + EXPECT_NEAR (vout[0].real (), 0.5 * (E + 1 / E), 1.e-8); + EXPECT_NEAR (vout[0].imag (), 0, 1.e-8); + EXPECT_NEAR (vout[1].real (), 0, 1.e-8); + EXPECT_NEAR (vout[1].imag (), 0.5 * (E - 1 / E), 1.e-8); + EXPECT_NEAR (vout[2].real (), 0, 1.e-8); + EXPECT_NEAR (vout[2].imag (), -0.5 * (E - 1 / E), 1.e-8); + EXPECT_NEAR (vout[3].real (), 0.5 * (E + 1 / E), 1.e-8); + EXPECT_NEAR (vout[3].imag (), 0, 1.e-8); delete[] v; delete[] vout; delete p_chetest; } -TEST_F(MathChebyshevTest, calfinalvec_complex) +TEST_F (MathChebyshevTest, calfinalvec_complex) { const int norder = 100; const double E = 2.718281828459046; - p_chetest = new ModuleBase::Chebyshev(norder); + p_chetest = new ModuleBase::Chebyshev (norder); // [ 0 1 ] // exp(i pi/2*\sigma_y)= [ ], where \sigma_y = [0, -i; i, 0] // [ -1 0 ] @@ -214,29 +227,29 @@ TEST_F(MathChebyshevTest, calfinalvec_complex) v[3] = 1.0; //[1 0; 0 1] auto fun_sigma_y - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - auto fun_expi2 = [&](std::complex x) { return fun.expi2(x); }; - p_chetest->calcoef_complex(fun_expi2); - p_chetest->calfinalvec_complex(fun_sigma_y, v, vout, 2, 2, 2); - EXPECT_NEAR(vout[0].real(), 0, 1.e-8); - EXPECT_NEAR(vout[0].imag(), 0, 1.e-8); - EXPECT_NEAR(vout[1].real(), -1, 1.e-8); - EXPECT_NEAR(vout[1].imag(), 0, 1.e-8); - EXPECT_NEAR(vout[2].real(), 1, 1.e-8); - EXPECT_NEAR(vout[2].imag(), 0, 1.e-8); - EXPECT_NEAR(vout[3].real(), 0, 1.e-8); - EXPECT_NEAR(vout[3].imag(), 0, 1.e-8); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + auto fun_expi2 = [&] (std::complex x) { return fun.expi2 (x); }; + p_chetest->calcoef_complex (fun_expi2); + p_chetest->calfinalvec_complex (fun_sigma_y, v, vout, 2, 2, 2); + EXPECT_NEAR (vout[0].real (), 0, 1.e-8); + EXPECT_NEAR (vout[0].imag (), 0, 1.e-8); + EXPECT_NEAR (vout[1].real (), -1, 1.e-8); + EXPECT_NEAR (vout[1].imag (), 0, 1.e-8); + EXPECT_NEAR (vout[2].real (), 1, 1.e-8); + EXPECT_NEAR (vout[2].imag (), 0, 1.e-8); + EXPECT_NEAR (vout[3].real (), 0, 1.e-8); + EXPECT_NEAR (vout[3].imag (), 0, 1.e-8); delete[] v; delete[] vout; delete p_chetest; } -TEST_F(MathChebyshevTest, calpolyvec_complex) +TEST_F (MathChebyshevTest, calpolyvec_complex) { const int norder = 100; const double E = 2.718281828459046; - p_chetest = new ModuleBase::Chebyshev(norder); + p_chetest = new ModuleBase::Chebyshev (norder); // [ 0 1 ] // exp(i pi/2*\sigma_y)= [ ], where \sigma_y = [0, -i; i, 0] // [ -1 0 ] @@ -253,25 +266,25 @@ TEST_F(MathChebyshevTest, calpolyvec_complex) vout[3] = 0; auto fun_sigma_y - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - auto fun_expi2 = [&](std::complex x) { return fun.expi2(x); }; - p_chetest->calcoef_complex(fun_expi2); - p_chetest->calpolyvec_complex(fun_sigma_y, v, polyv, 2, 2, 2); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + auto fun_expi2 = [&] (std::complex x) { return fun.expi2 (x); }; + p_chetest->calcoef_complex (fun_expi2); + p_chetest->calpolyvec_complex (fun_sigma_y, v, polyv, 2, 2, 2); for (int i = 0; i < norder; ++i) - { - for (int j = 0; j < 4; ++j) { - vout[j] += polyv[i * 4 + j] * p_chetest->coef_complex[i]; + for (int j = 0; j < 4; ++j) + { + vout[j] += polyv[i * 4 + j] * p_chetest->coef_complex[i]; + } } - } - EXPECT_NEAR(vout[0].real(), 0, 1.e-8); - EXPECT_NEAR(vout[0].imag(), 0, 1.e-8); - EXPECT_NEAR(vout[1].real(), -1, 1.e-8); - EXPECT_NEAR(vout[1].imag(), 0, 1.e-8); - EXPECT_NEAR(vout[2].real(), 1, 1.e-8); - EXPECT_NEAR(vout[2].imag(), 0, 1.e-8); - EXPECT_NEAR(vout[3].real(), 0, 1.e-8); - EXPECT_NEAR(vout[3].imag(), 0, 1.e-8); + EXPECT_NEAR (vout[0].real (), 0, 1.e-8); + EXPECT_NEAR (vout[0].imag (), 0, 1.e-8); + EXPECT_NEAR (vout[1].real (), -1, 1.e-8); + EXPECT_NEAR (vout[1].imag (), 0, 1.e-8); + EXPECT_NEAR (vout[2].real (), 1, 1.e-8); + EXPECT_NEAR (vout[2].imag (), 0, 1.e-8); + EXPECT_NEAR (vout[3].real (), 0, 1.e-8); + EXPECT_NEAR (vout[3].imag (), 0, 1.e-8); delete[] v; delete[] vout; @@ -279,10 +292,10 @@ TEST_F(MathChebyshevTest, calpolyvec_complex) delete p_chetest; } -TEST_F(MathChebyshevTest, tracepolyA) +TEST_F (MathChebyshevTest, tracepolyA) { const int norder = 100; - p_chetest = new ModuleBase::Chebyshev(norder); + p_chetest = new ModuleBase::Chebyshev (norder); // N == LDA std::complex* v = new std::complex[4]; @@ -292,17 +305,20 @@ TEST_F(MathChebyshevTest, tracepolyA) v[3] = 1.0; //[1 0; 0 1] auto fun_sigma_y - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - p_chetest->tracepolyA(fun_sigma_y, v, 2, 2, 2); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + p_chetest->tracepolyA (fun_sigma_y, v, 2, 2, 2); // Trace: even function: 2 ; odd function 0. for (int i = 0; i < norder; ++i) - { - if (i % 2 == 0) { - EXPECT_NEAR(p_chetest->polytrace[i], 2, 1.e-8); - } else { - EXPECT_NEAR(p_chetest->polytrace[i], 0, 1.e-8); -} - } + { + if (i % 2 == 0) + { + EXPECT_NEAR (p_chetest->polytrace[i], 2, 1.e-8); + } + else + { + EXPECT_NEAR (p_chetest->polytrace[i], 0, 1.e-8); + } + } delete[] v; // N < LDA @@ -316,29 +332,32 @@ TEST_F(MathChebyshevTest, tracepolyA) v[4] = 1.0; v[5] = 1.0; //[1 0; 0 1; 100 2] - p_chetest->tracepolyA(fun_sigma_y, v, 2, LDA, 2); + p_chetest->tracepolyA (fun_sigma_y, v, 2, LDA, 2); // Trace: even function: 2 ; odd function 0. for (int i = 0; i < norder; ++i) - { - if (i % 2 == 0) { - EXPECT_NEAR(p_chetest->polytrace[i], 2, 1.e-8); - } else { - EXPECT_NEAR(p_chetest->polytrace[i], 0, 1.e-8); -} - } + { + if (i % 2 == 0) + { + EXPECT_NEAR (p_chetest->polytrace[i], 2, 1.e-8); + } + else + { + EXPECT_NEAR (p_chetest->polytrace[i], 0, 1.e-8); + } + } fun.LDA = 2; delete[] v; delete p_chetest; } -TEST_F(MathChebyshevTest, checkconverge) +TEST_F (MathChebyshevTest, checkconverge) { #ifdef __MPI #undef __MPI const int norder = 100; - p_chetest = new ModuleBase::Chebyshev(norder); + p_chetest = new ModuleBase::Chebyshev (norder); auto fun_sigma_y - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; std::complex* v = new std::complex[4]; v[0] = 1.0; @@ -348,30 +367,30 @@ TEST_F(MathChebyshevTest, checkconverge) double tmin = -1.1; double tmax = 1.1; bool converge; - converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); - EXPECT_TRUE(converge); - converge = p_chetest->checkconverge(fun_sigma_y, v + 2, 2, 2, tmax, tmin, 0.2); - EXPECT_TRUE(converge); - EXPECT_NEAR(tmin, -1.1, 1e-8); - EXPECT_NEAR(tmax, 1.1, 1e-8); + converge = p_chetest->checkconverge (fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE (converge); + converge = p_chetest->checkconverge (fun_sigma_y, v + 2, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE (converge); + EXPECT_NEAR (tmin, -1.1, 1e-8); + EXPECT_NEAR (tmax, 1.1, 1e-8); tmax = -1.1; - converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 2.2); - EXPECT_TRUE(converge); - EXPECT_NEAR(tmin, -1.1, 1e-8); - EXPECT_NEAR(tmax, 1.1, 1e-8); + converge = p_chetest->checkconverge (fun_sigma_y, v, 2, 2, tmax, tmin, 2.2); + EXPECT_TRUE (converge); + EXPECT_NEAR (tmin, -1.1, 1e-8); + EXPECT_NEAR (tmax, 1.1, 1e-8); // not converge - v[0] = std::complex(0, 1), v[1] = 1; + v[0] = std::complex (0, 1), v[1] = 1; fun.factor = 1.5; tmin = -1.1, tmax = 1.1; - converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); - EXPECT_FALSE(converge); + converge = p_chetest->checkconverge (fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_FALSE (converge); fun.factor = -1.5; tmin = -1.1, tmax = 1.1; - converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); - EXPECT_FALSE(converge); + converge = p_chetest->checkconverge (fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_FALSE (converge); fun.factor = 1; delete[] v; @@ -380,87 +399,87 @@ TEST_F(MathChebyshevTest, checkconverge) #endif } -TEST_F(MathChebyshevTest, recurs) +TEST_F (MathChebyshevTest, recurs) { - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::Chebyshev noneche(0), ::testing::ExitedWithCode(1), ""); - std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("NOTICE")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::Chebyshev noneche (0), ::testing::ExitedWithCode (1), ""); + std::string output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("NOTICE")); int norder = 100; - p_chetest = new ModuleBase::Chebyshev(norder); - int Tnp = p_chetest->recurs(1, 1, 0); - EXPECT_EQ(Tnp, 2); + p_chetest = new ModuleBase::Chebyshev (norder); + int Tnp = p_chetest->recurs (1, 1, 0); + EXPECT_EQ (Tnp, 2); delete p_chetest; } #ifdef __ENABLE_FLOAT_FFTW -TEST_F(MathChebyshevTest, calcoef_real_float) +TEST_F (MathChebyshevTest, calcoef_real_float) { - auto fun_x6f = [&](float x) { return fun.x6(x); }; - auto fun_x7f = [&](float x) { return fun.x7(x); }; - p_fchetest = new ModuleBase::Chebyshev(10); + auto fun_x6f = [&] (float x) { return fun.x6 (x); }; + auto fun_x7f = [&] (float x) { return fun.x7 (x); }; + p_fchetest = new ModuleBase::Chebyshev (10); // x^6 = 1/32*( 10T_0 + 15T_2 + 6T_4 + T_6 ) // x^7 = 1/64*( 35T_1 + 21T_3 + 7T_5 + T_7 ) const float x6ref[10] = {10, 0, 15, 0, 6, 0, 1, 0, 0, 0}; const float x7ref[10] = {0, 35, 0, 21, 0, 7, 0, 1, 0, 0}; - p_fchetest->calcoef_real(fun_x6f); + p_fchetest->calcoef_real (fun_x6f); for (int i = 0; i < 10; ++i) - { - EXPECT_NEAR(p_fchetest->coef_real[i] * 32.0, x6ref[i], 1.e-5); - } - p_fchetest->calcoef_real(fun_x7f); + { + EXPECT_NEAR (p_fchetest->coef_real[i] * 32.0, x6ref[i], 1.e-5); + } + p_fchetest->calcoef_real (fun_x7f); for (int i = 0; i < 10; ++i) - { - EXPECT_NEAR(p_fchetest->coef_real[i] * 64.0, x7ref[i], 1.e-5); - } + { + EXPECT_NEAR (p_fchetest->coef_real[i] * 64.0, x7ref[i], 1.e-5); + } delete p_fchetest; } -TEST_F(MathChebyshevTest, calcoef_pair_float) +TEST_F (MathChebyshevTest, calcoef_pair_float) { - auto fun_x6f = [&](float x) { return fun.x6(x); }; - auto fun_x7f = [&](float x) { return fun.x7(x); }; - p_fchetest = new ModuleBase::Chebyshev(10); + auto fun_x6f = [&] (float x) { return fun.x6 (x); }; + auto fun_x7f = [&] (float x) { return fun.x7 (x); }; + p_fchetest = new ModuleBase::Chebyshev (10); // x^6 = 1/32*( 10T_0 + 15T_2 + 6T_4 + T_6 ) // x^7 = 1/64*( 35T_1 + 21T_3 + 7T_5 + T_7 ) const float x6ref[10] = {10, 0, 15, 0, 6, 0, 1, 0, 0, 0}; const float x7ref[10] = {0, 35, 0, 21, 0, 7, 0, 1, 0, 0}; - p_fchetest->calcoef_pair(fun_x6f, fun_x7f); + p_fchetest->calcoef_pair (fun_x6f, fun_x7f); for (int i = 0; i < 10; ++i) - { - EXPECT_NEAR(p_fchetest->coef_complex[i].real() * 32.0, x6ref[i], 1.e-5); - EXPECT_NEAR(p_fchetest->coef_complex[i].imag() * 64.0, x7ref[i], 1.e-5); - } + { + EXPECT_NEAR (p_fchetest->coef_complex[i].real () * 32.0, x6ref[i], 1.e-5); + EXPECT_NEAR (p_fchetest->coef_complex[i].imag () * 64.0, x7ref[i], 1.e-5); + } delete p_fchetest; } -TEST_F(MathChebyshevTest, calcoef_complex_float) +TEST_F (MathChebyshevTest, calcoef_complex_float) { - auto fun_expif = [&](std::complex x) { return fun.expi(x); }; + auto fun_expif = [&] (std::complex x) { return fun.expi (x); }; const int norder = 100; const float PI = 3.14159265358979323846; - p_fchetest = new ModuleBase::Chebyshev(norder); + p_fchetest = new ModuleBase::Chebyshev (norder); float* T = new float[norder]; // check exp(i\pi/4) = \sum_n C_n[exp(ix)]T_n(\pi/4) = sqrt(2)/2*(1, i) - p_fchetest->calcoef_complex(fun_expif); - p_fchetest->getpolyval(PI / 4, T, norder); - std::complex sum(0, 0); + p_fchetest->calcoef_complex (fun_expif); + p_fchetest->getpolyval (PI / 4, T, norder); + std::complex sum (0, 0); for (int i = 0; i < norder; ++i) - { - sum += p_fchetest->coef_complex[i] * T[i]; - } - EXPECT_NEAR(sum.real(), sqrt(2) / 2, 1.e-6); - EXPECT_NEAR(sum.imag(), sqrt(2) / 2, 1.e-6); + { + sum += p_fchetest->coef_complex[i] * T[i]; + } + EXPECT_NEAR (sum.real (), sqrt (2) / 2, 1.e-6); + EXPECT_NEAR (sum.imag (), sqrt (2) / 2, 1.e-6); delete[] T; delete p_fchetest; } -TEST_F(MathChebyshevTest, calfinalvec_real_float) +TEST_F (MathChebyshevTest, calfinalvec_real_float) { const int norder = 100; const float E = 2.718281828459046; - p_fchetest = new ModuleBase::Chebyshev(norder); + p_fchetest = new ModuleBase::Chebyshev (norder); // 1 [ 1/e+e -i(e-1/e) ] // exp(\sigma_y)= - [ ], where \sigma_y = [0, -i; i, 0] // 2 [ i(e-1/e) 1/e+e ] @@ -471,30 +490,30 @@ TEST_F(MathChebyshevTest, calfinalvec_real_float) v[2] = 0.0; v[3] = 1.0; //[1 0; 0 1] - auto fun_exprf = [&](float x) { return fun.expr(x); }; + auto fun_exprf = [&] (float x) { return fun.expr (x); }; auto fun_sigma_yf - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - p_fchetest->calcoef_real(fun_exprf); - p_fchetest->calfinalvec_real(fun_sigma_yf, v, vout, 2, 2, 2); - EXPECT_NEAR(vout[0].real(), 0.5 * (E + 1 / E), 1.e-6); - EXPECT_NEAR(vout[0].imag(), 0, 1.e-6); - EXPECT_NEAR(vout[1].real(), 0, 1.e-6); - EXPECT_NEAR(vout[1].imag(), 0.5 * (E - 1 / E), 1.e-6); - EXPECT_NEAR(vout[2].real(), 0, 1.e-6); - EXPECT_NEAR(vout[2].imag(), -0.5 * (E - 1 / E), 1.e-6); - EXPECT_NEAR(vout[3].real(), 0.5 * (E + 1 / E), 1.e-6); - EXPECT_NEAR(vout[3].imag(), 0, 1.e-6); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + p_fchetest->calcoef_real (fun_exprf); + p_fchetest->calfinalvec_real (fun_sigma_yf, v, vout, 2, 2, 2); + EXPECT_NEAR (vout[0].real (), 0.5 * (E + 1 / E), 1.e-6); + EXPECT_NEAR (vout[0].imag (), 0, 1.e-6); + EXPECT_NEAR (vout[1].real (), 0, 1.e-6); + EXPECT_NEAR (vout[1].imag (), 0.5 * (E - 1 / E), 1.e-6); + EXPECT_NEAR (vout[2].real (), 0, 1.e-6); + EXPECT_NEAR (vout[2].imag (), -0.5 * (E - 1 / E), 1.e-6); + EXPECT_NEAR (vout[3].real (), 0.5 * (E + 1 / E), 1.e-6); + EXPECT_NEAR (vout[3].imag (), 0, 1.e-6); delete[] v; delete[] vout; delete p_fchetest; } -TEST_F(MathChebyshevTest, calfinalvec_complex_float) +TEST_F (MathChebyshevTest, calfinalvec_complex_float) { const int norder = 100; const float E = 2.718281828459046; - p_fchetest = new ModuleBase::Chebyshev(norder); + p_fchetest = new ModuleBase::Chebyshev (norder); // [ 0 1 ] // exp(i pi/2*\sigma_y)= [ ], where \sigma_y = [0, -i; i, 0] // [ -1 0 ] @@ -506,29 +525,29 @@ TEST_F(MathChebyshevTest, calfinalvec_complex_float) v[3] = 1.0; //[1 0; 0 1] auto fun_sigma_yf - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - auto fun_expi2f = [&](std::complex x) { return fun.expi2(x); }; - p_fchetest->calcoef_complex(fun_expi2f); - p_fchetest->calfinalvec_complex(fun_sigma_yf, v, vout, 2, 2, 2); - EXPECT_NEAR(vout[0].real(), 0, 1.e-6); - EXPECT_NEAR(vout[0].imag(), 0, 1.e-6); - EXPECT_NEAR(vout[1].real(), -1, 1.e-6); - EXPECT_NEAR(vout[1].imag(), 0, 1.e-6); - EXPECT_NEAR(vout[2].real(), 1, 1.e-6); - EXPECT_NEAR(vout[2].imag(), 0, 1.e-6); - EXPECT_NEAR(vout[3].real(), 0, 1.e-6); - EXPECT_NEAR(vout[3].imag(), 0, 1.e-6); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + auto fun_expi2f = [&] (std::complex x) { return fun.expi2 (x); }; + p_fchetest->calcoef_complex (fun_expi2f); + p_fchetest->calfinalvec_complex (fun_sigma_yf, v, vout, 2, 2, 2); + EXPECT_NEAR (vout[0].real (), 0, 1.e-6); + EXPECT_NEAR (vout[0].imag (), 0, 1.e-6); + EXPECT_NEAR (vout[1].real (), -1, 1.e-6); + EXPECT_NEAR (vout[1].imag (), 0, 1.e-6); + EXPECT_NEAR (vout[2].real (), 1, 1.e-6); + EXPECT_NEAR (vout[2].imag (), 0, 1.e-6); + EXPECT_NEAR (vout[3].real (), 0, 1.e-6); + EXPECT_NEAR (vout[3].imag (), 0, 1.e-6); delete[] v; delete[] vout; delete p_fchetest; } -TEST_F(MathChebyshevTest, calpolyvec_float) +TEST_F (MathChebyshevTest, calpolyvec_float) { const int norder = 100; const float E = 2.718281828459046; - p_fchetest = new ModuleBase::Chebyshev(norder); + p_fchetest = new ModuleBase::Chebyshev (norder); // [ 0 1 ] // exp(i pi/2*\sigma_y)= [ ], where \sigma_y = [0, -i; i, 0] // [ -1 0 ] @@ -545,25 +564,25 @@ TEST_F(MathChebyshevTest, calpolyvec_float) vout[3] = 0; auto fun_sigma_yf - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - auto fun_expi2f = [&](std::complex x) { return fun.expi2(x); }; - p_fchetest->calcoef_complex(fun_expi2f); - p_fchetest->calpolyvec_complex(fun_sigma_yf, v, polyv, 2, 2, 2); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + auto fun_expi2f = [&] (std::complex x) { return fun.expi2 (x); }; + p_fchetest->calcoef_complex (fun_expi2f); + p_fchetest->calpolyvec_complex (fun_sigma_yf, v, polyv, 2, 2, 2); for (int i = 0; i < norder; ++i) - { - for (int j = 0; j < 4; ++j) { - vout[j] += polyv[i * 4 + j] * p_fchetest->coef_complex[i]; + for (int j = 0; j < 4; ++j) + { + vout[j] += polyv[i * 4 + j] * p_fchetest->coef_complex[i]; + } } - } - EXPECT_NEAR(vout[0].real(), 0, 1.e-6); - EXPECT_NEAR(vout[0].imag(), 0, 1.e-6); - EXPECT_NEAR(vout[1].real(), -1, 1.e-6); - EXPECT_NEAR(vout[1].imag(), 0, 1.e-6); - EXPECT_NEAR(vout[2].real(), 1, 1.e-6); - EXPECT_NEAR(vout[2].imag(), 0, 1.e-6); - EXPECT_NEAR(vout[3].real(), 0, 1.e-6); - EXPECT_NEAR(vout[3].imag(), 0, 1.e-6); + EXPECT_NEAR (vout[0].real (), 0, 1.e-6); + EXPECT_NEAR (vout[0].imag (), 0, 1.e-6); + EXPECT_NEAR (vout[1].real (), -1, 1.e-6); + EXPECT_NEAR (vout[1].imag (), 0, 1.e-6); + EXPECT_NEAR (vout[2].real (), 1, 1.e-6); + EXPECT_NEAR (vout[2].imag (), 0, 1.e-6); + EXPECT_NEAR (vout[3].real (), 0, 1.e-6); + EXPECT_NEAR (vout[3].imag (), 0, 1.e-6); delete[] v; delete[] vout; @@ -571,10 +590,10 @@ TEST_F(MathChebyshevTest, calpolyvec_float) delete p_fchetest; } -TEST_F(MathChebyshevTest, tracepolyA_float) +TEST_F (MathChebyshevTest, tracepolyA_float) { const int norder = 100; - p_fchetest = new ModuleBase::Chebyshev(norder); + p_fchetest = new ModuleBase::Chebyshev (norder); std::complex* v = new std::complex[4]; v[0] = 1.0; @@ -583,16 +602,16 @@ TEST_F(MathChebyshevTest, tracepolyA_float) v[3] = 1.0; //[1 0; 0 1] auto fun_sigma_yf - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - p_fchetest->tracepolyA(fun_sigma_yf, v, 2, 2, 2); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + p_fchetest->tracepolyA (fun_sigma_yf, v, 2, 2, 2); // Trace: even function: 2 ; odd function 0. for (int i = 0; i < norder; ++i) - { - if (i % 2 == 0) - EXPECT_NEAR(p_fchetest->polytrace[i], 2, 1.e-6); - else - EXPECT_NEAR(p_fchetest->polytrace[i], 0, 1.e-6); - } + { + if (i % 2 == 0) + EXPECT_NEAR (p_fchetest->polytrace[i], 2, 1.e-6); + else + EXPECT_NEAR (p_fchetest->polytrace[i], 0, 1.e-6); + } delete[] v; // N < LDA @@ -606,26 +625,26 @@ TEST_F(MathChebyshevTest, tracepolyA_float) v[4] = 1.0; v[5] = 1.0; //[1 0; 0 1; 100 2] - p_fchetest->tracepolyA(fun_sigma_yf, v, 2, LDA, 2); + p_fchetest->tracepolyA (fun_sigma_yf, v, 2, LDA, 2); // Trace: even function: 2 ; odd function 0. for (int i = 0; i < norder; ++i) - { - if (i % 2 == 0) - EXPECT_NEAR(p_fchetest->polytrace[i], 2, 1.e-6); - else - EXPECT_NEAR(p_fchetest->polytrace[i], 0, 1.e-6); - } + { + if (i % 2 == 0) + EXPECT_NEAR (p_fchetest->polytrace[i], 2, 1.e-6); + else + EXPECT_NEAR (p_fchetest->polytrace[i], 0, 1.e-6); + } fun.LDA = 2; delete[] v; delete p_fchetest; } -TEST_F(MathChebyshevTest, checkconverge_float) +TEST_F (MathChebyshevTest, checkconverge_float) { - #ifdef __MPI - #undef __MPI +#ifdef __MPI +#undef __MPI const int norder = 100; - p_fchetest = new ModuleBase::Chebyshev(norder); + p_fchetest = new ModuleBase::Chebyshev (norder); std::complex* v = new std::complex[4]; v[0] = 1.0; @@ -637,16 +656,16 @@ TEST_F(MathChebyshevTest, checkconverge_float) bool converge; auto fun_sigma_yf - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - converge = p_fchetest->checkconverge(fun_sigma_yf, v, 2, 2, tmax, tmin, 0.2); - EXPECT_TRUE(converge); - converge = p_fchetest->checkconverge(fun_sigma_yf, v + 2, 2, 2, tmax, tmin, 0.2); - EXPECT_TRUE(converge); - EXPECT_NEAR(tmin, -1.1, 1e-6); - EXPECT_NEAR(tmax, 1.1, 1e-6); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + converge = p_fchetest->checkconverge (fun_sigma_yf, v, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE (converge); + converge = p_fchetest->checkconverge (fun_sigma_yf, v + 2, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE (converge); + EXPECT_NEAR (tmin, -1.1, 1e-6); + EXPECT_NEAR (tmax, 1.1, 1e-6); delete[] v; delete p_fchetest; - #endif +#endif } #endif \ No newline at end of file diff --git a/source/source_base/test/math_integral_test.cpp b/source/source_base/test/math_integral_test.cpp index d9034b297b8..5ad0ccf6c83 100644 --- a/source/source_base/test/math_integral_test.cpp +++ b/source/source_base/test/math_integral_test.cpp @@ -1,7 +1,7 @@ -#include"../math_integral.h" -#include"gtest/gtest.h" +#include "../math_integral.h" +#include "gtest/gtest.h" -#include +#include #include #include @@ -9,11 +9,9 @@ #define doublethreshold 1e-12 - - /************************************************ -* unit test of class Integral -***********************************************/ + * unit test of class Integral + ***********************************************/ /** * Tested functions: @@ -27,20 +25,24 @@ */ // generate irregular grid with sinx -void sinspace(double start, double end, const int nums, double* xv, double* h){ - double astart = asin(start); - double aend = asin(end); +void + sinspace (double start, double end, const int nums, double* xv, double* h) +{ + double astart = asin (start); + double aend = asin (end); double step = (aend - astart) / (nums - 1); - for(int i = 0; i < nums; ++i){ - h[i] = sin(astart + i * step); - } + for (int i = 0; i < nums; ++i) + { + h[i] = sin (astart + i * step); + } // calculate the difference xv[0] = start; - for(int i = 0; i< nums - 1; ++i){ - h[i] = h[i+1] - h[i]; - xv[i+1] = xv[i] + h[i]; - } + for (int i = 0; i < nums - 1; ++i) + { + h[i] = h[i + 1] - h[i]; + xv[i + 1] = xv[i] + h[i]; + } } class SimpsonIntegralSinx : public testing::Test @@ -49,31 +51,32 @@ class SimpsonIntegralSinx : public testing::Test * test the integral of sinx between [0,PI], * devide to mesh-1 parts */ - - protected: - - double* func; - double* rab; - int mesh = 10001; - double dr = M_PI/(mesh-1); - double asum; - double* asumlist; - double expectvalue = 2.0; - - void SetUp() + + protected: + double* func; + double* rab; + int mesh = 10001; + double dr = M_PI / (mesh - 1); + double asum; + double* asumlist; + double expectvalue = 2.0; + + void + SetUp () { - func = new double[mesh]; - rab = new double[mesh]; - asumlist = new double[mesh]; + func = new double[mesh]; + rab = new double[mesh]; + asumlist = new double[mesh]; - for (int i=0;i (&err), sizeof (double)); } - double tol = (end-start) * std::pow(dx, 4) * 24 / 180; - EXPECT_NEAR(std::atan(end) - std::atan(start), ModuleBase::Integral::simpson(ngrid, f, dx), std::max(tol, doublethreshold)); - double err = std::abs(ModuleBase::Integral::simpson(ngrid, f, dx) - ref_val) / std::abs(ref_val); - file_o.write(reinterpret_cast(&err), sizeof(double)); - } delete[] f; - file_o.close(); + file_o.close (); } -TEST_F(SimpsonIntegralITF, SinGridOdd) +TEST_F (SimpsonIntegralITF, SinGridOdd) { double start = -1.0, end = 1.0; const int ngrid_max = 10000; - double *xv = new double[ngrid_max]; - double *h = new double[ngrid_max]; - double *f = new double[ngrid_max]; - std::ofstream file_o("data/itf_sin_out.bin", std::ios::binary); - double ref_val = std::atan(end) - std::atan(start); - for (int ngrid = 5; ngrid <= ngrid_max; ngrid += 2) { - sinspace(start, end, ngrid, xv, h); - for (int i = 0; i < ngrid; ++i) { - f[i] = 1.0 / (1.0 + xv[i] * xv[i]); + double* xv = new double[ngrid_max]; + double* h = new double[ngrid_max]; + double* f = new double[ngrid_max]; + std::ofstream file_o ("data/itf_sin_out.bin", std::ios::binary); + double ref_val = std::atan (end) - std::atan (start); + for (int ngrid = 5; ngrid <= ngrid_max; ngrid += 2) + { + sinspace (start, end, ngrid, xv, h); + for (int i = 0; i < ngrid; ++i) + { + f[i] = 1.0 / (1.0 + xv[i] * xv[i]); + } + + // crude estimate for irregular-grid error bound + double dx = h[ngrid / 2]; + double tol = (end - start) * std::pow (dx, 4); + EXPECT_NEAR (std::atan (end) - std::atan (start), + ModuleBase::Integral::simpson (ngrid, f, h), + std::max (tol, doublethreshold)); + double err = std::abs (ModuleBase::Integral::simpson (ngrid, f, h) - ref_val) / std::abs (ref_val); + file_o.write (reinterpret_cast (&err), sizeof (double)); } - - // crude estimate for irregular-grid error bound - double dx = h[ngrid / 2]; - double tol = (end-start) * std::pow(dx, 4); - EXPECT_NEAR(std::atan(end) - std::atan(start), ModuleBase::Integral::simpson(ngrid, f, h), std::max(tol, doublethreshold)); - double err = std::abs(ModuleBase::Integral::simpson(ngrid, f, h) - ref_val) / std::abs(ref_val); - file_o.write(reinterpret_cast(&err), sizeof(double)); - } - + delete[] xv; delete[] h; delete[] f; - file_o.close(); + file_o.close (); } -class SimpsonIntegralExp : public testing::Test{ - +class SimpsonIntegralExp : public testing::Test +{ }; -TEST_F(SimpsonIntegralExp, UniformGridOdd) +TEST_F (SimpsonIntegralExp, UniformGridOdd) { double start = 0.0, end = 1.0; const int ngrid_max = 10000; - double *f = new double[ngrid_max]; - std::ofstream file_o("data/exp_uni_out.bin", std::ios::binary); - double ref_val = std::exp(end) - std::exp(start); - for (int ngrid = 5; ngrid <= ngrid_max; ngrid += 2) { - const double dx = (end - start) / (ngrid - 1); - for (int i = 0; i < ngrid; ++i) { - double x = start + i * dx; - f[i] = std::exp(x); + double* f = new double[ngrid_max]; + std::ofstream file_o ("data/exp_uni_out.bin", std::ios::binary); + double ref_val = std::exp (end) - std::exp (start); + for (int ngrid = 5; ngrid <= ngrid_max; ngrid += 2) + { + const double dx = (end - start) / (ngrid - 1); + for (int i = 0; i < ngrid; ++i) + { + double x = start + i * dx; + f[i] = std::exp (x); + } + double tol = (end - start) * std::exp (1) * std::pow (dx, 4) / 180; + EXPECT_NEAR (std::exp (end) - std::exp (start), + ModuleBase::Integral::simpson (ngrid, f, dx), + std::max (tol, doublethreshold)); + double err = std::abs (ModuleBase::Integral::simpson (ngrid, f, dx) - ref_val) / std::abs (ref_val); + file_o.write (reinterpret_cast (&err), sizeof (double)); } - double tol = (end-start) * std::exp(1) * std::pow(dx, 4) / 180; - EXPECT_NEAR(std::exp(end) - std::exp(start), ModuleBase::Integral::simpson(ngrid, f, dx), std::max(tol, doublethreshold)); - double err = std::abs(ModuleBase::Integral::simpson(ngrid, f, dx) - ref_val) / std::abs(ref_val); - file_o.write(reinterpret_cast(&err), sizeof(double)); - } delete[] f; - file_o.close(); + file_o.close (); } -TEST_F(SimpsonIntegralExp, SinGridOdd) +TEST_F (SimpsonIntegralExp, SinGridOdd) { double start = 0.0, end = 1.0; const int ngrid_max = 10000; - double *xv = new double[ngrid_max]; - double *h = new double[ngrid_max]; - double *f = new double[ngrid_max]; - std::ofstream file_o("data/exp_sin_out.bin", std::ios::binary); - double ref_val = std::exp(end) - std::exp(start); - // skip ngrid = 3 since the errors exceeds the threshold - for (int ngrid = 5; ngrid <= ngrid_max; ngrid += 2) { - sinspace(start, end, ngrid, xv, h); - for (int i = 0; i < ngrid; ++i) { - f[i] = std::exp(xv[i]); + double* xv = new double[ngrid_max]; + double* h = new double[ngrid_max]; + double* f = new double[ngrid_max]; + std::ofstream file_o ("data/exp_sin_out.bin", std::ios::binary); + double ref_val = std::exp (end) - std::exp (start); + // skip ngrid = 3 since the errors exceeds the threshold + for (int ngrid = 5; ngrid <= ngrid_max; ngrid += 2) + { + sinspace (start, end, ngrid, xv, h); + for (int i = 0; i < ngrid; ++i) + { + f[i] = std::exp (xv[i]); + } + + double dx = h[ngrid / 2]; + double tol = (end - start) * std::pow (dx, 4); + EXPECT_NEAR (std::exp (end) - std::exp (start), + ModuleBase::Integral::simpson (ngrid, f, h), + std::max (tol, doublethreshold)); + double err = std::abs (ModuleBase::Integral::simpson (ngrid, f, h) - ref_val) / std::abs (ref_val); + file_o.write (reinterpret_cast (&err), sizeof (double)); } - double dx = h[ngrid / 2]; - double tol = (end-start) * std::pow(dx, 4); - EXPECT_NEAR(std::exp(end) - std::exp(start), ModuleBase::Integral::simpson(ngrid, f, h), std::max(tol, doublethreshold)); - double err = std::abs(ModuleBase::Integral::simpson(ngrid, f, h) - ref_val) / std::abs(ref_val); - file_o.write(reinterpret_cast(&err), sizeof(double)); - } - delete[] xv; delete[] h; delete[] f; - file_o.close(); + file_o.close (); } \ No newline at end of file diff --git a/source/source_base/test/math_polyint_test.cpp b/source/source_base/test/math_polyint_test.cpp index c2578f85314..13387f7ee50 100644 --- a/source/source_base/test/math_polyint_test.cpp +++ b/source/source_base/test/math_polyint_test.cpp @@ -1,6 +1,6 @@ #include "../math_polyint.h" -#include +#include #include "../realarray.h" #include "gmock/gmock.h" @@ -9,135 +9,140 @@ #define doublethreshold 1e-9 /************************************************ -* unit test of class PolyInt -***********************************************/ + * unit test of class PolyInt + ***********************************************/ /** - * This unit test is to verify the accuracy of + * This unit test is to verify the accuracy of * interpolation method on the function sin(x)/x * with a interval of 0.01. * sin(x)/x is one of the solution of spherical bessel * function when l=0. - * + * * - Tested function: - * - 4 types of Polynomial_Interpolation + * - 4 types of Polynomial_Interpolation * - Polynomial_Interpolation_xy */ - class bessell0 : public testing::Test { - protected: - + protected: int TableLength = 400; double interval = 0.01; - ModuleBase::realArray table3,table4; + ModuleBase::realArray table3, table4; ModuleBase::realArray y3; - double *tablex; - double *tabley; + double* tablex; + double* tabley; - double sinc(double x) {return sin(x)/x;} + double + sinc (double x) + { + return sin (x) / x; + } - void SetUp() + void + SetUp () { tablex = new double[TableLength]; tabley = new double[TableLength]; - table3.create(1,1,TableLength); - table4.create(1,1,1,TableLength); - y3.create(1,1,TableLength); - - for(int i=1;i(&y), sizeof(double))) - { - Y[i] = y; - ++i; - } - fin.close(); + while (fin.read (reinterpret_cast (&y), sizeof (double))) + { + Y[i] = y; + ++i; + } + fin.close (); for (int i = 0; i < nr; ++i) - { - r[i] = (i + 1) * dr; - } + { + r[i] = (i + 1) * dr; + } // test for new sphbesj for (int l = l_lo; l <= l_hi; ++l) - { - for (int i = 0; i < nr; ++i) { - EXPECT_NEAR(ModuleBase::Sphbes::sphbesj(l, r[i] * q), Y[l * nr + i], 1e-12); - double tmp = std::abs(Y[l * nr + i] - ModuleBase::Sphbes::sphbesj(l, r[i] * q)); - file_n.write(reinterpret_cast(&tmp), sizeof(double)); + for (int i = 0; i < nr; ++i) + { + EXPECT_NEAR (ModuleBase::Sphbes::sphbesj (l, r[i] * q), Y[l * nr + i], 1e-12); + double tmp = std::abs (Y[l * nr + i] - ModuleBase::Sphbes::sphbesj (l, r[i] * q)); + file_n.write (reinterpret_cast (&tmp), sizeof (double)); + } } - } // test for old Bessel // most of l cases precision failed to achieve 1e-12 double* jl_old = new double[nr + 10]; for (int l = l_lo; l <= l_hi; ++l) - { - ModuleBase::Sphbes::Spherical_Bessel(nr, r, q, l, jl_old); - for (int i = 0; i < nr; ++i) { - double tmp = std::abs(jl_old[i] - Y[l * nr + i]); - file_o.write(reinterpret_cast(&tmp), sizeof(double)); + ModuleBase::Sphbes::Spherical_Bessel (nr, r, q, l, jl_old); + for (int i = 0; i < nr; ++i) + { + double tmp = std::abs (jl_old[i] - Y[l * nr + i]); + file_o.write (reinterpret_cast (&tmp), sizeof (double)); + } } - } delete[] r; delete[] Y; delete[] jl_old; - file_o.close(); - file_n.close(); + file_o.close (); + file_n.close (); } -TEST_F(Sphbes, SphericalBesselPrecisionNearZero) +TEST_F (Sphbes, SphericalBesselPrecisionNearZero) { // This test checks whether sphbesj agrees with the Octave implementation // when x is near zero point for a range of l. @@ -328,63 +336,62 @@ TEST_F(Sphbes, SphericalBesselPrecisionNearZero) // read reference data double y; - std::ifstream fin("data/bjxo.bin", std::ios::binary); + std::ifstream fin ("data/bjxo.bin", std::ios::binary); int i = 0; - while (fin.read(reinterpret_cast(&y), sizeof(double))) - { - Y[i] = y; - ++i; - } - fin.close(); + while (fin.read (reinterpret_cast (&y), sizeof (double))) + { + Y[i] = y; + ++i; + } + fin.close (); // generate x x[0] = 1.0 / (1 << 5); for (int i = 1; i < n; i++) - { - x[i] = x[i - 1] / 2; - } + { + x[i] = x[i - 1] / 2; + } // test for sphbesj near zero for (int l = l_lo; l <= l_hi; ++l) - { - for (int i = 0; i < n; ++i) { - EXPECT_NEAR(ModuleBase::Sphbes::sphbesj(l, x[i]), Y[l * n + i], 1e-12); + for (int i = 0; i < n; ++i) + { + EXPECT_NEAR (ModuleBase::Sphbes::sphbesj (l, x[i]), Y[l * n + i], 1e-12); + } } - } delete[] x; delete[] Y; } -TEST_F(Sphbes, Zeros) +TEST_F (Sphbes, Zeros) { // This test checks whether sphbes_zeros properly computes the zeros of sphbesj. int lmax = 20; int nzeros = 500; - double* zeros = new double[nzeros*(lmax+1)]; + double* zeros = new double[nzeros * (lmax + 1)]; for (int l = 0; l <= lmax; ++l) - { - ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros, false); - for (int i = 0; i < nzeros; ++i) { - EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[i])), 1e-14); + ModuleBase::Sphbes::sphbes_zeros (l, nzeros, zeros, false); + for (int i = 0; i < nzeros; ++i) + { + EXPECT_LT (std::abs (ModuleBase::Sphbes::sphbesj (l, zeros[i])), 1e-14); + } } - } - - ModuleBase::Sphbes::sphbes_zeros(lmax, nzeros, zeros, true); + ModuleBase::Sphbes::sphbes_zeros (lmax, nzeros, zeros, true); for (int l = 0; l <= lmax; ++l) - { - for (int i = 0; i < nzeros; ++i) { - EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[l*nzeros+i])), 1e-14); + for (int i = 0; i < nzeros; ++i) + { + EXPECT_LT (std::abs (ModuleBase::Sphbes::sphbesj (l, zeros[l * nzeros + i])), 1e-14); + } } - } delete[] zeros; } -TEST_F(Sphbes, ZerosOld) +TEST_F (Sphbes, ZerosOld) { // This test checks whether Spherical_Bessel_Roots properly computes the zeros of sphbesj. @@ -392,16 +399,16 @@ TEST_F(Sphbes, ZerosOld) int nzeros = 50; double* zeros = new double[nzeros]; for (int l = 0; l <= lmax; ++l) - { - ModuleBase::Sphbes::Spherical_Bessel_Roots(nzeros, l, 1e-7, zeros, 1.0); - for (int i = 0; i < nzeros; ++i) { - EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[i])), 1e-7); + ModuleBase::Sphbes::Spherical_Bessel_Roots (nzeros, l, 1e-7, zeros, 1.0); + for (int i = 0; i < nzeros; ++i) + { + EXPECT_LT (std::abs (ModuleBase::Sphbes::sphbesj (l, zeros[i])), 1e-7); + } } - } } -TEST_F(Sphbes, Derivatives) +TEST_F (Sphbes, Derivatives) { int lmax = 20; int numr = 20; @@ -410,25 +417,25 @@ TEST_F(Sphbes, Derivatives) double q = 0.001; r[0] = 1.0; for (int i = 0; i < numr; ++i) - { - r[i + 1] = r[i] * 2.0; - } + { + r[i + 1] = r[i] * 2.0; + } for (int l = 0; l <= lmax; ++l) - { - ModuleBase::Sphbes::dsphbesj(numr, r, q, l, djl); - for (int i = 0; i < numr; ++i) { - double h = 1e-8; - EXPECT_LT( - abs(djl[i] * 2 * h - - (ModuleBase::Sphbes::sphbesj(l, q * r[i] + h) - ModuleBase::Sphbes::sphbesj(l, q * r[i] - h))), - 1e-14); + ModuleBase::Sphbes::dsphbesj (numr, r, q, l, djl); + for (int i = 0; i < numr; ++i) + { + double h = 1e-8; + EXPECT_LT (abs (djl[i] * 2 * h + - (ModuleBase::Sphbes::sphbesj (l, q * r[i] + h) + - ModuleBase::Sphbes::sphbesj (l, q * r[i] - h))), + 1e-14); + } } - } } -TEST_F(Sphbes, DerivativesOld) +TEST_F (Sphbes, DerivativesOld) { int lmax = 20; int numr = 20; @@ -437,51 +444,52 @@ TEST_F(Sphbes, DerivativesOld) double q = 0.001; r[0] = 1.0; for (int i = 0; i < numr; ++i) - { - r[i + 1] = r[i] * 2.0; - } + { + r[i + 1] = r[i] * 2.0; + } for (int l = 0; l < lmax; l++) - { - ModuleBase::Sphbes::dSpherical_Bessel_dx(numr, r, q, l, djl); - for (int i = 0; i < numr; i++) { - double h = 1e-8; - double errs - = abs(djl[i] * 2 * h - - (ModuleBase::Sphbes::sphbesj(l, q * r[i] + h) - ModuleBase::Sphbes::sphbesj(l, q * r[i] - h))); - if (errs > 1e-14) - { - std::cout << "l = " << l << ", r = " << r[i] << ", errs = " << errs << std::endl; - } + ModuleBase::Sphbes::dSpherical_Bessel_dx (numr, r, q, l, djl); + for (int i = 0; i < numr; i++) + { + double h = 1e-8; + double errs = abs (djl[i] * 2 * h + - (ModuleBase::Sphbes::sphbesj (l, q * r[i] + h) + - ModuleBase::Sphbes::sphbesj (l, q * r[i] - h))); + if (errs > 1e-14) + { + std::cout << "l = " << l << ", r = " << r[i] << ", errs = " << errs << std::endl; + } + } } - } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } -TEST_F(Sphbes, SphericalBesselsjp) +TEST_F (Sphbes, SphericalBesselsjp) { int iii = 0; double* sjp = new double[msh]; - std::memset(sjp, 0, msh * sizeof(double)); - ModuleBase::Sphbes::Spherical_Bessel(msh, r, q, l0, jl, sjp); - EXPECT_NEAR(mean(jl, msh) / 0.2084468748396, 1.0, doublethreshold); + std::memset (sjp, 0, msh * sizeof (double)); + ModuleBase::Sphbes::Spherical_Bessel (msh, r, q, l0, jl, sjp); + EXPECT_NEAR (mean (jl, msh) / 0.2084468748396, 1.0, doublethreshold); for (int iii = 0; iii < msh; ++iii) - { - EXPECT_EQ(sjp[iii], 1.0); - } + { + EXPECT_EQ (sjp[iii], 1.0); + } delete[] sjp; } diff --git a/source/source_base/test/math_ylmreal_test.cpp b/source/source_base/test/math_ylmreal_test.cpp index 641bb2af4d1..0b4732c51f5 100644 --- a/source/source_base/test/math_ylmreal_test.cpp +++ b/source/source_base/test/math_ylmreal_test.cpp @@ -1,16 +1,16 @@ -#include"../math_ylmreal.h" -#include"../ylm.h" -#include"../vector3.h" -#include"../matrix.h" -#include"gtest/gtest.h" -#include +#include "../math_ylmreal.h" +#include "../ylm.h" +#include "../vector3.h" +#include "../matrix.h" +#include "gtest/gtest.h" +#include #include "source_psi/psi.h" #define doublethreshold 1e-12 /************************************************ -* unit test of class YlmReal and Ylm -***********************************************/ + * unit test of class YlmReal and Ylm + ***********************************************/ /** * For lmax <5 cases, the reference values are calculated by the formula from @@ -30,474 +30,843 @@ * - sph_harm * - rl_sph_harm * - grad_rl_sph_harm - * - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and get_ylm_real (Cartesian input) - * - equality_gradient_test:test the eqaulity of Ylm gradient function between grad_rl_sph_harm(spherical input) and rlylm (Cartesian input) + * - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and get_ylm_real + * (Cartesian input) + * - equality_gradient_test:test the eqaulity of Ylm gradient function between grad_rl_sph_harm(spherical input) + * and rlylm (Cartesian input) * */ class YlmRealTest : public testing::Test { - protected: - - int lmax = 7; //maximum angular quantum number - int ng = 4; //test the 4 selected points on the sphere - int nylm = 64; //total Ylm number; - - ModuleBase::matrix ylm; //Ylm - ModuleBase::matrix *dylm; //dYlm/dx, dYlm/dy, dYlm/dz - ModuleBase::Vector3 *g; //vectors of the 4 points - double *ref; //reference of Ylm - double *rly; //Ylm - double (*rlgy)[3]; //the gradient of Ylm - std::vector rlyvector; //Ylm - std::vector rlgyvector; //the gradient of Ylm (flat, size nylm*3) - - //Ylm function - inline double norm(const double &x, const double &y, const double &z) {return sqrt(x*x + y*y + z*z);} - double y00(const double &x, const double &y, const double &z) {return 1.0/2.0/sqrt(M_PI);} - double y10(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return sqrt(3.0/(4.0*M_PI)) * z / r;} - double y11(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*sqrt(3.0/(4.*M_PI)) * x / r;} - double y1m1(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*sqrt(3./(4.*M_PI)) * y / r;} // y1m1 means Y1,-1 - double y20(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 1./4. * sqrt(5./M_PI) * (-1.*x*x - y*y + 2.*z*z) / (r*r);} - double y21(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*1./2. * sqrt(15./M_PI) * (z*x) / (r*r);} - double y2m1(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*1./2. * sqrt(15./M_PI) * (z*y) / (r*r);} - double y22(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 1./4. * sqrt(15./M_PI) * (x*x - y*y) / (r*r);} - double y2m2(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 1./2. * sqrt(15./M_PI) * (x*y) / (r*r);} - double y30(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 1./4. * sqrt(7./M_PI) * z*(2.*z*z-3.*x*x-3.*y*y) / (r*r*r);} - double y31(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*1./4. * sqrt(21./2./M_PI) * x*(4.*z*z-x*x-y*y) / (r*r*r);} - double y3m1(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*1./4. * sqrt(21./2./M_PI) * y*(4.*z*z-x*x-y*y) / (r*r*r);} - double y32(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 1./4. * sqrt(105./M_PI) * (x*x - y*y)*z / (r*r*r);} - double y3m2(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 1./2. * sqrt(105./M_PI) * x*y*z / (r*r*r);} - double y33(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*1./4. * sqrt(35./2./M_PI) * x*(x*x - 3.*y*y) / (r*r*r);} - double y3m3(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*1./4. * sqrt(35./2./M_PI) * y*(3.*x*x - y*y) / (r*r*r);} - double y40(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./16.*sqrt(1./M_PI) * (35.*z*z*z*z - 30.*z*z*r*r + 3*r*r*r*r) / (r*r*r*r);} - double y41(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*3./4.*sqrt(5./2./M_PI) * x*z*(7.*z*z - 3*r*r) / (r*r*r*r);} - double y4m1(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*3./4.*sqrt(5./2./M_PI) * y*z*(7.*z*z - 3.*r*r) / (r*r*r*r);} - double y42(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./8.*sqrt(5./M_PI) * (x*x-y*y)*(7.*z*z-r*r) / (r*r*r*r);} - double y4m2(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./4.*sqrt(5./M_PI) * x*y*(7.*z*z - r*r) / (r*r*r*r);} - double y43(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*3./4.*sqrt(35./2./M_PI) * x*z*(x*x - 3.*y*y) / (r*r*r*r);} - double y4m3(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return -1.0*3./4.*sqrt(35./2./M_PI) * y*z*(3.*x*x - y*y) / (r*r*r*r);} - double y44(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./16.*sqrt(35./M_PI) * (x*x*(x*x - 3.*y*y) - y*y*(3.*x*x-y*y)) / (r*r*r*r);} - double y4m4(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./4.*sqrt(35./M_PI) * x*y*(x*x - y*y) / (r*r*r*r);} - - //the reference values are calculated by ModuleBase::Ylm::grad_rl_sph_harm - //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz - double rlgyref[4][64][3] = { - { { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, {-6.30783e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -1.09255e+00}, - { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 1.09255e+00, -0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -1.11953e+00}, { 1.37114e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 4.57046e-01, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 1.44531e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.77013e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, -1.77013e+00, 0.00000e+00}, { 1.26943e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.00714e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.89235e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -9.46175e-01, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 2.50334e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 2.50334e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-2.26473e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.52947e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00, 0.00000e+00}, - { 2.44619e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.46771e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, - {-1.90708e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, - { 2.76362e+00, 0.00000e+00, -0.00000e+00}, {-0.00000e+00, 9.21205e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.76362e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.02739e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -2.01826e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 4.09910e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 4.09910e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.38995e+00}, { 3.16161e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, 4.51658e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.31900e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-3.28564e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.40813e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -3.11349e+00}, - {-0.00000e+00, -0.00000e+00, 0.00000e+00}, { 3.63241e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 2.59458e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-4.95014e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, -4.95014e+00, 0.00000e+00} - }, - { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, -6.30783e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.37114e+00, -0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.77013e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 1.77013e+00, 0.00000e+00}, { 0.00000e+00, 1.26943e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.00714e+00}, { 0.00000e+00, 1.89235e+00, -0.00000e+00}, {-9.46175e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.77013e+00}, { 0.00000e+00, 2.50334e+00, -0.00000e+00}, - {-2.50334e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-4.52947e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -2.26473e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.39677e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-1.46771e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.44619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, - { 0.00000e+00, -1.90708e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00}, - { 0.00000e+00, -2.76362e+00, 0.00000e+00}, { 9.21205e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00, 0.00000e+00}, { 2.01826e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00, 0.00000e+00}, - { 4.09910e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 1.40813e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 2.59458e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.63241e+00, -0.00000e+00}, - { 0.00000e+00, 0.00000e+00, -2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.95014e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 4.95014e+00, -0.00000e+00} - }, - { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.26157e+00}, {-1.09255e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.22045e-16}, {-0.00000e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.23906e+00}, {-1.82818e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.82818e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 8.81212e-16}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.84324e-16, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 5.55112e-17, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.38514e+00}, {-2.67619e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -2.67619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.30756e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-5.52973e-16, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.66533e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.67801e+00}, {-3.62357e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -3.62357e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.87108e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-1.22267e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.68219e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.93038e-32, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -6.16298e-33, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 6.10264e+00}, {-4.66097e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -4.66097e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 8.98664e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-2.30221e-15, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 6.93334e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 1.77767e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.22209e-32, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 7.64784e+00}, {-5.78122e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -5.78122e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.51096e-14}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-3.91011e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.17757e-15, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.67737e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -5.84671e-32, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.13319e-47, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -1.41649e-48, 0.00000e+00} - }, - { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 3.64183e-01, 3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00, 6.30783e-01}, - {-0.00000e+00, 6.30783e-01, 6.30783e-01}, {-6.30783e-01, 6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01, 0.00000e+00}, - {-7.46353e-01, -7.46353e-01, 0.00000e+00}, { 0.00000e+00, 3.04697e-01, -1.21879e+00}, { 3.04697e-01, 0.00000e+00, -1.21879e+00}, - { 9.63537e-01, -9.63537e-01, 4.01253e-16}, { 9.63537e-01, 9.63537e-01, 9.63537e-01}, {-4.44089e-16, 1.18009e+00, -2.22045e-16}, - {-1.18009e+00, -1.11022e-16, 0.00000e+00}, { 4.88603e-01, 4.88603e-01, 1.30294e+00}, {-1.03006e+00, -7.72548e-01, 7.72548e-01}, - {-7.72548e-01, -1.03006e+00, 7.72548e-01}, {-7.28366e-01, 7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00}, - { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00, 1.92296e-16, 6.81324e-01}, { 9.63537e-01, 9.63537e-01, -1.44756e-16}, - {-9.63537e-01, 9.63537e-01, -5.55112e-17}, { 5.19779e-01, 5.19779e-01, -1.81923e+00}, { 1.40917e+00, 8.05238e-01, 8.05238e-01}, - { 8.05238e-01, 1.40917e+00, 8.05238e-01}, { 0.00000e+00, -4.44089e-16, 3.24739e-16}, {-1.06523e+00, -1.06523e+00, 2.13046e+00}, - {-2.17439e-01, 1.73951e+00, 1.73951e+00}, {-1.73951e+00, 2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01}, - { 1.84503e+00, -1.84503e+00, 6.58625e-16}, { 1.45863e+00, 1.11022e-15, 0.00000e+00}, {-8.88178e-16, 1.45863e+00, 0.00000e+00}, - {-1.46807e+00, -1.46807e+00, 5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00}, - { 7.09144e-01, -7.09144e-01, 1.87222e-16}, { 2.12743e+00, 2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00}, - { 1.02201e-15, -7.09144e-01, 2.12743e+00}, { 1.81260e+00, 1.81260e+00, 2.58943e+00}, {-2.07154e+00, 2.07154e+00, -1.66969e-15}, - {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00, 5.77778e-17}, - { 1.05183e+00, 1.05183e+00, 4.03986e-17}, { 1.27464e+00, 1.27464e+00, 1.69952e+00}, {-1.28472e+00, -1.20442e+00, 1.92707e+00}, - {-1.20442e+00, -1.28472e+00, 1.92707e+00}, {-8.52285e-01, 8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00}, - {-1.11260e+00, -2.08612e+00, 9.27164e-01}, { 2.08612e+00, 1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00}, - { 1.23002e+00, -1.23002e+00, 2.28018e-15}, { 3.69007e+00, -1.53753e-01, 1.84503e+00}, {-1.53753e-01, 3.69007e+00, 1.84503e+00}, - {-2.35197e+00, 2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00, 9.77875e-17}, - { 1.46671e+00, 1.14919e-15, 1.34475e-16} - } - }; + protected: + int lmax = 7; // maximum angular quantum number + int ng = 4; // test the 4 selected points on the sphere + int nylm = 64; // total Ylm number; + + ModuleBase::matrix ylm; // Ylm + ModuleBase::matrix* dylm; // dYlm/dx, dYlm/dy, dYlm/dz + ModuleBase::Vector3* g; // vectors of the 4 points + double* ref; // reference of Ylm + double* rly; // Ylm + double (*rlgy)[3]; // the gradient of Ylm + std::vector rlyvector; // Ylm + std::vector rlgyvector; // the gradient of Ylm (flat, size nylm*3) + + // Ylm function + inline double + norm (const double& x, const double& y, const double& z) + { + return sqrt (x * x + y * y + z * z); + } + double + y00 (const double& x, const double& y, const double& z) + { + return 1.0 / 2.0 / sqrt (M_PI); + } + double + y10 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return sqrt (3.0 / (4.0 * M_PI)) * z / r; + } + double + y11 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * sqrt (3.0 / (4. * M_PI)) * x / r; + } + double + y1m1 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * sqrt (3. / (4. * M_PI)) * y / r; + } // y1m1 means Y1,-1 + double + y20 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 1. / 4. * sqrt (5. / M_PI) * (-1. * x * x - y * y + 2. * z * z) / (r * r); + } + double + y21 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 1. / 2. * sqrt (15. / M_PI) * (z * x) / (r * r); + } + double + y2m1 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 1. / 2. * sqrt (15. / M_PI) * (z * y) / (r * r); + } + double + y22 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 1. / 4. * sqrt (15. / M_PI) * (x * x - y * y) / (r * r); + } + double + y2m2 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 1. / 2. * sqrt (15. / M_PI) * (x * y) / (r * r); + } + double + y30 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 1. / 4. * sqrt (7. / M_PI) * z * (2. * z * z - 3. * x * x - 3. * y * y) / (r * r * r); + } + double + y31 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 1. / 4. * sqrt (21. / 2. / M_PI) * x * (4. * z * z - x * x - y * y) / (r * r * r); + } + double + y3m1 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 1. / 4. * sqrt (21. / 2. / M_PI) * y * (4. * z * z - x * x - y * y) / (r * r * r); + } + double + y32 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 1. / 4. * sqrt (105. / M_PI) * (x * x - y * y) * z / (r * r * r); + } + double + y3m2 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 1. / 2. * sqrt (105. / M_PI) * x * y * z / (r * r * r); + } + double + y33 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 1. / 4. * sqrt (35. / 2. / M_PI) * x * (x * x - 3. * y * y) / (r * r * r); + } + double + y3m3 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 1. / 4. * sqrt (35. / 2. / M_PI) * y * (3. * x * x - y * y) / (r * r * r); + } + double + y40 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 3. / 16. * sqrt (1. / M_PI) * (35. * z * z * z * z - 30. * z * z * r * r + 3 * r * r * r * r) + / (r * r * r * r); + } + double + y41 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 3. / 4. * sqrt (5. / 2. / M_PI) * x * z * (7. * z * z - 3 * r * r) / (r * r * r * r); + } + double + y4m1 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 3. / 4. * sqrt (5. / 2. / M_PI) * y * z * (7. * z * z - 3. * r * r) / (r * r * r * r); + } + double + y42 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 3. / 8. * sqrt (5. / M_PI) * (x * x - y * y) * (7. * z * z - r * r) / (r * r * r * r); + } + double + y4m2 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 3. / 4. * sqrt (5. / M_PI) * x * y * (7. * z * z - r * r) / (r * r * r * r); + } + double + y43 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 3. / 4. * sqrt (35. / 2. / M_PI) * x * z * (x * x - 3. * y * y) / (r * r * r * r); + } + double + y4m3 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return -1.0 * 3. / 4. * sqrt (35. / 2. / M_PI) * y * z * (3. * x * x - y * y) / (r * r * r * r); + } + double + y44 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 3. / 16. * sqrt (35. / M_PI) * (x * x * (x * x - 3. * y * y) - y * y * (3. * x * x - y * y)) + / (r * r * r * r); + } + double + y4m4 (const double& x, const double& y, const double& z) + { + double r = norm (x, y, z); + return 3. / 4. * sqrt (35. / M_PI) * x * y * (x * x - y * y) / (r * r * r * r); + } - void SetUp() + // the reference values are calculated by ModuleBase::Ylm::grad_rl_sph_harm + // 1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz + double rlgyref[4][64][3] = {{{0.00000e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 4.88603e-01}, + {-4.88603e-01, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -4.88603e-01, 0.00000e+00}, + {-6.30783e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -1.09255e+00}, + {0.00000e+00, -0.00000e+00, 0.00000e+00}, {1.09255e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 1.09255e+00, -0.00000e+00}, {-0.00000e+00, 0.00000e+00, -1.11953e+00}, + {1.37114e+00, 0.00000e+00, -0.00000e+00}, {0.00000e+00, 4.57046e-01, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 1.44531e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-1.77013e+00, 0.00000e+00, -0.00000e+00}, {0.00000e+00, -1.77013e+00, 0.00000e+00}, + {1.26943e+00, 0.00000e+00, -0.00000e+00}, {0.00000e+00, 0.00000e+00, 2.00714e+00}, + {0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.89235e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, -9.46175e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -1.77013e+00}, + {0.00000e+00, -0.00000e+00, 0.00000e+00}, {2.50334e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 2.50334e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 1.75425e+00}, + {-2.26473e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -4.52947e-01, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00, 0.00000e+00}, + {2.44619e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 1.46771e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 2.07566e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-3.28191e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -3.28191e+00, 0.00000e+00}, + {-1.90708e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.91311e+00}, + {0.00000e+00, -0.00000e+00, 0.00000e+00}, {2.76362e+00, 0.00000e+00, -0.00000e+00}, + {-0.00000e+00, 9.21205e-01, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 2.76362e+00}, + {0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.02739e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, -2.01826e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.36662e+00}, + {0.00000e+00, -0.00000e+00, 0.00000e+00}, {4.09910e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 4.09910e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.38995e+00}, + {3.16161e+00, 0.00000e+00, -0.00000e+00}, {0.00000e+00, 4.51658e-01, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 3.31900e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-3.28564e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -1.40813e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, -3.11349e+00}, {-0.00000e+00, -0.00000e+00, 0.00000e+00}, + {3.63241e+00, 0.00000e+00, -0.00000e+00}, {0.00000e+00, 2.59458e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 2.64596e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-4.95014e+00, 0.00000e+00, -0.00000e+00}, {0.00000e+00, -4.95014e+00, 0.00000e+00}}, + {{0.00000e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 4.88603e-01}, + {-4.88603e-01, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -4.88603e-01, 0.00000e+00}, + {0.00000e+00, -6.30783e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, -0.00000e+00, -1.09255e+00}, {0.00000e+00, -1.09255e+00, 0.00000e+00}, + {1.09255e+00, 0.00000e+00, -0.00000e+00}, {0.00000e+00, -0.00000e+00, -1.11953e+00}, + {4.57046e-01, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 1.37114e+00, -0.00000e+00}, + {0.00000e+00, -0.00000e+00, -1.44531e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {1.77013e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 1.77013e+00, 0.00000e+00}, + {0.00000e+00, 1.26943e+00, -0.00000e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 2.00714e+00}, {0.00000e+00, 1.89235e+00, -0.00000e+00}, + {-9.46175e-01, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 1.77013e+00}, {0.00000e+00, 2.50334e+00, -0.00000e+00}, + {-2.50334e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 1.75425e+00}, + {-4.52947e-01, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -2.26473e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 2.39677e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-1.46771e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -2.44619e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 2.07566e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-3.28191e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -3.28191e+00, 0.00000e+00}, + {0.00000e+00, -1.90708e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, -0.00000e+00, -2.91311e+00}, {0.00000e+00, -2.76362e+00, 0.00000e+00}, + {9.21205e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, -0.00000e+00, -2.76362e+00}, {0.00000e+00, -3.02739e+00, 0.00000e+00}, + {2.01826e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, -0.00000e+00, -2.36662e+00}, {0.00000e+00, -4.09910e+00, 0.00000e+00}, + {4.09910e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -0.00000e+00, -2.38995e+00}, + {4.51658e-01, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 3.16161e+00, -0.00000e+00}, + {0.00000e+00, -0.00000e+00, -3.31900e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {1.40813e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 3.28564e+00, -0.00000e+00}, + {0.00000e+00, -0.00000e+00, -3.11349e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {2.59458e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 3.63241e+00, -0.00000e+00}, + {0.00000e+00, 0.00000e+00, -2.64596e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {4.95014e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 4.95014e+00, -0.00000e+00}}, + {{0.00000e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 4.88603e-01}, + {-4.88603e-01, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -4.88603e-01, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 1.26157e+00}, {-1.09255e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, -1.09255e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 2.22045e-16}, + {-0.00000e+00, 0.00000e+00, -0.00000e+00}, {0.00000e+00, 0.00000e+00, 2.23906e+00}, + {-1.82818e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -1.82818e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 8.81212e-16}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-1.84324e-16, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 5.55112e-17, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 3.38514e+00}, {-2.67619e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, -2.67619e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 2.30756e-15}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-5.52973e-16, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, 1.66533e-16, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 4.67801e+00}, + {-3.62357e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -3.62357e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 4.87108e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-1.22267e-15, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 3.68219e-16, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {4.93038e-32, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -6.16298e-33, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 6.10264e+00}, {-4.66097e+00, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, -4.66097e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 8.98664e-15}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-2.30221e-15, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, 6.93334e-16, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {1.77767e-31, 0.00000e+00, 0.00000e+00}, + {0.00000e+00, -2.22209e-32, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 7.64784e+00}, + {-5.78122e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -5.78122e+00, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 1.51096e-14}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-3.91011e-15, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 1.17757e-15, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {4.67737e-31, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -5.84671e-32, 0.00000e+00}, + {0.00000e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 0.00000e+00}, + {1.13319e-47, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -1.41649e-48, 0.00000e+00}}, + {{0.00000e+00, 0.00000e+00, 0.00000e+00}, {0.00000e+00, 0.00000e+00, 4.88603e-01}, + {-4.88603e-01, 0.00000e+00, 0.00000e+00}, {0.00000e+00, -4.88603e-01, 0.00000e+00}, + {3.64183e-01, 3.64183e-01, -7.28366e-01}, {6.30783e-01, -0.00000e+00, 6.30783e-01}, + {-0.00000e+00, 6.30783e-01, 6.30783e-01}, {-6.30783e-01, 6.30783e-01, -1.66533e-16}, + {-6.30783e-01, -6.30783e-01, 0.00000e+00}, {-7.46353e-01, -7.46353e-01, 0.00000e+00}, + {0.00000e+00, 3.04697e-01, -1.21879e+00}, {3.04697e-01, 0.00000e+00, -1.21879e+00}, + {9.63537e-01, -9.63537e-01, 4.01253e-16}, {9.63537e-01, 9.63537e-01, 9.63537e-01}, + {-4.44089e-16, 1.18009e+00, -2.22045e-16}, {-1.18009e+00, -1.11022e-16, 0.00000e+00}, + {4.88603e-01, 4.88603e-01, 1.30294e+00}, {-1.03006e+00, -7.72548e-01, 7.72548e-01}, + {-7.72548e-01, -1.03006e+00, 7.72548e-01}, {-7.28366e-01, 7.28366e-01, -5.25363e-16}, + {-3.64183e-01, -3.64183e-01, -2.18510e+00}, {7.69185e-16, -2.04397e+00, -6.81324e-01}, + {2.04397e+00, 1.92296e-16, 6.81324e-01}, {9.63537e-01, 9.63537e-01, -1.44756e-16}, + {-9.63537e-01, 9.63537e-01, -5.55112e-17}, {5.19779e-01, 5.19779e-01, -1.81923e+00}, + {1.40917e+00, 8.05238e-01, 8.05238e-01}, {8.05238e-01, 1.40917e+00, 8.05238e-01}, + {0.00000e+00, -4.44089e-16, 3.24739e-16}, {-1.06523e+00, -1.06523e+00, 2.13046e+00}, + {-2.17439e-01, 1.73951e+00, 1.73951e+00}, {-1.73951e+00, 2.17439e-01, -1.73951e+00}, + {-1.84503e+00, -1.84503e+00, -9.22517e-01}, {1.84503e+00, -1.84503e+00, 6.58625e-16}, + {1.45863e+00, 1.11022e-15, 0.00000e+00}, {-8.88178e-16, 1.45863e+00, 0.00000e+00}, + {-1.46807e+00, -1.46807e+00, 5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, + {-3.36617e-16, -4.48502e-01, -2.24251e+00}, {7.09144e-01, -7.09144e-01, 1.87222e-16}, + {2.12743e+00, 2.12743e+00, -9.38779e-16}, {7.09144e-01, -5.11006e-16, -2.12743e+00}, + {1.02201e-15, -7.09144e-01, 2.12743e+00}, {1.81260e+00, 1.81260e+00, 2.58943e+00}, + {-2.07154e+00, 2.07154e+00, -1.66969e-15}, {-3.03637e+00, -2.31111e-15, -6.07275e-01}, + {1.84889e-15, -3.03637e+00, -6.07275e-01}, {1.05183e+00, -1.05183e+00, 5.77778e-17}, + {1.05183e+00, 1.05183e+00, 4.03986e-17}, {1.27464e+00, 1.27464e+00, 1.69952e+00}, + {-1.28472e+00, -1.20442e+00, 1.92707e+00}, {-1.20442e+00, -1.28472e+00, 1.92707e+00}, + {-8.52285e-01, 8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00}, + {-1.11260e+00, -2.08612e+00, 9.27164e-01}, {2.08612e+00, 1.11260e+00, -9.27164e-01}, + {-3.07506e-01, -3.07506e-01, -3.69007e+00}, {1.23002e+00, -1.23002e+00, 2.28018e-15}, + {3.69007e+00, -1.53753e-01, 1.84503e+00}, {-1.53753e-01, 3.69007e+00, 1.84503e+00}, + {-2.35197e+00, 2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, + {1.37903e-15, -1.46671e+00, 9.77875e-17}, {1.46671e+00, 1.14919e-15, 1.34475e-16}}}; + + void + SetUp () { - ylm.create(nylm,ng); + ylm.create (nylm, ng); dylm = new ModuleBase::matrix[3]; - for(int i = 0 ; i < 3 ; ++i) dylm[i].create(nylm,ng); + for (int i = 0; i < 3; ++i) + { + dylm[i].create (nylm, ng); + } g = new ModuleBase::Vector3[ng]; - g[0].set(1.0,0.0,0.0); - g[1].set(0.0,1.0,0.0); - g[2].set(0.0,0.0,1.0); - g[3].set(-1.0,-1.0,-1.0); + g[0].set (1.0, 0.0, 0.0); + g[1].set (0.0, 1.0, 0.0); + g[2].set (0.0, 0.0, 1.0); + g[3].set (-1.0, -1.0, -1.0); rly = new double[nylm]; - rlyvector.resize(nylm); + rlyvector.resize (nylm); rlgy = new double[nylm][3]; - rlgyvector.assign(nylm * 3, 0.0); - ref = new double[64*4]{ - y00(g[0].x, g[0].y, g[0].z), y00(g[1].x, g[1].y, g[1].z), y00(g[2].x, g[2].y, g[2].z), y00(g[3].x, g[3].y, g[3].z), - y10(g[0].x, g[0].y, g[0].z), y10(g[1].x, g[1].y, g[1].z), y10(g[2].x, g[2].y, g[2].z), y10(g[3].x, g[3].y, g[3].z), - y11(g[0].x, g[0].y, g[0].z), y11(g[1].x, g[1].y, g[1].z), y11(g[2].x, g[2].y, g[2].z), y11(g[3].x, g[3].y, g[3].z), - y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z), - y20(g[0].x, g[0].y, g[0].z), y20(g[1].x, g[1].y, g[1].z), y20(g[2].x, g[2].y, g[2].z), y20(g[3].x, g[3].y, g[3].z), - y21(g[0].x, g[0].y, g[0].z), y21(g[1].x, g[1].y, g[1].z), y21(g[2].x, g[2].y, g[2].z), y21(g[3].x, g[3].y, g[3].z), - y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z), - y22(g[0].x, g[0].y, g[0].z), y22(g[1].x, g[1].y, g[1].z), y22(g[2].x, g[2].y, g[2].z), y22(g[3].x, g[3].y, g[3].z), - y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z), - y30(g[0].x, g[0].y, g[0].z), y30(g[1].x, g[1].y, g[1].z), y30(g[2].x, g[2].y, g[2].z), y30(g[3].x, g[3].y, g[3].z), - y31(g[0].x, g[0].y, g[0].z), y31(g[1].x, g[1].y, g[1].z), y31(g[2].x, g[2].y, g[2].z), y31(g[3].x, g[3].y, g[3].z), - y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z), - y32(g[0].x, g[0].y, g[0].z), y32(g[1].x, g[1].y, g[1].z), y32(g[2].x, g[2].y, g[2].z), y32(g[3].x, g[3].y, g[3].z), - y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z), - y33(g[0].x, g[0].y, g[0].z), y33(g[1].x, g[1].y, g[1].z), y33(g[2].x, g[2].y, g[2].z), y33(g[3].x, g[3].y, g[3].z), - y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z), - y40(g[0].x, g[0].y, g[0].z), y40(g[1].x, g[1].y, g[1].z), y40(g[2].x, g[2].y, g[2].z), y40(g[3].x, g[3].y, g[3].z), - y41(g[0].x, g[0].y, g[0].z), y41(g[1].x, g[1].y, g[1].z), y41(g[2].x, g[2].y, g[2].z), y41(g[3].x, g[3].y, g[3].z), - y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z), - y42(g[0].x, g[0].y, g[0].z), y42(g[1].x, g[1].y, g[1].z), y42(g[2].x, g[2].y, g[2].z), y42(g[3].x, g[3].y, g[3].z), - y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z), - y43(g[0].x, g[0].y, g[0].z), y43(g[1].x, g[1].y, g[1].z), y43(g[2].x, g[2].y, g[2].z), y43(g[3].x, g[3].y, g[3].z), - y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z), - y44(g[0].x, g[0].y, g[0].z), y44(g[1].x, g[1].y, g[1].z), y44(g[2].x, g[2].y, g[2].z), y44(g[3].x, g[3].y, g[3].z), - y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z), - 0.000000000000000, 0.000000000000000, 0.935602579627389, 0.090028400200397, - -0.452946651195697, -0.000000000000000, -0.000000000000000, -0.348678494661834, - -0.000000000000000, -0.452946651195697, -0.000000000000000, -0.348678494661834, - -0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - -0.000000000000000, -0.000000000000000, 0.000000000000000, -0.000000000000000, - 0.489238299435250, 0.000000000000000, -0.000000000000000, -0.376615818502422, - 0.000000000000000, -0.489238299435250, -0.000000000000000, 0.376615818502422, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.532615198330370, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - -0.656382056840170, -0.000000000000000, -0.000000000000000, -0.168427714314628, - -0.000000000000000, -0.656382056840170, -0.000000000000000, -0.168427714314628, - -0.317846011338142, -0.317846011338142, 1.017107236282055, 0.226023830284901, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, - 0.460602629757462, -0.460602629757462, 0.000000000000000, -0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.409424559784410, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.136474853261470, - -0.000000000000000, 0.000000000000000, -0.000000000000000, -0.136474853261470, - -0.504564900728724, -0.504564900728724, 0.000000000000000, -0.598002845308118, - -0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, - 0.683184105191914, -0.683184105191914, 0.000000000000000, -0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.202424920056864, - 0.000000000000000, 0.000000000000000, 1.092548430592079, -0.350435072502801, - 0.451658037912587, 0.000000000000000, -0.000000000000000, 0.046358202625865, - 0.000000000000000, 0.451658037912587, -0.000000000000000, 0.046358202625865, - 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.492067081245654, - -0.469376801586882, -0.000000000000000, -0.000000000000000, 0.187354445356332, - -0.000000000000000, 0.469376801586882, -0.000000000000000, -0.187354445356332, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.355076798886913, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - 0.518915578720260, 0.000000000000000, -0.000000000000000, -0.443845998608641, - 0.000000000000000, 0.518915578720260, -0.000000000000000, -0.443845998608641, - 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.452635881587108, - -0.707162732524596, 0.000000000000000, -0.000000000000000, 0.120972027847095, - -0.000000000000000, 0.707162732524596, -0.000000000000000, -0.120972027847095 - } ; + rlgyvector.assign (nylm * 3, 0.0); + ref = new double[64 * 4]{y00 (g[0].x, g[0].y, g[0].z), + y00 (g[1].x, g[1].y, g[1].z), + y00 (g[2].x, g[2].y, g[2].z), + y00 (g[3].x, g[3].y, g[3].z), + y10 (g[0].x, g[0].y, g[0].z), + y10 (g[1].x, g[1].y, g[1].z), + y10 (g[2].x, g[2].y, g[2].z), + y10 (g[3].x, g[3].y, g[3].z), + y11 (g[0].x, g[0].y, g[0].z), + y11 (g[1].x, g[1].y, g[1].z), + y11 (g[2].x, g[2].y, g[2].z), + y11 (g[3].x, g[3].y, g[3].z), + y1m1 (g[0].x, g[0].y, g[0].z), + y1m1 (g[1].x, g[1].y, g[1].z), + y1m1 (g[2].x, g[2].y, g[2].z), + y1m1 (g[3].x, g[3].y, g[3].z), + y20 (g[0].x, g[0].y, g[0].z), + y20 (g[1].x, g[1].y, g[1].z), + y20 (g[2].x, g[2].y, g[2].z), + y20 (g[3].x, g[3].y, g[3].z), + y21 (g[0].x, g[0].y, g[0].z), + y21 (g[1].x, g[1].y, g[1].z), + y21 (g[2].x, g[2].y, g[2].z), + y21 (g[3].x, g[3].y, g[3].z), + y2m1 (g[0].x, g[0].y, g[0].z), + y2m1 (g[1].x, g[1].y, g[1].z), + y2m1 (g[2].x, g[2].y, g[2].z), + y2m1 (g[3].x, g[3].y, g[3].z), + y22 (g[0].x, g[0].y, g[0].z), + y22 (g[1].x, g[1].y, g[1].z), + y22 (g[2].x, g[2].y, g[2].z), + y22 (g[3].x, g[3].y, g[3].z), + y2m2 (g[0].x, g[0].y, g[0].z), + y2m2 (g[1].x, g[1].y, g[1].z), + y2m2 (g[2].x, g[2].y, g[2].z), + y2m2 (g[3].x, g[3].y, g[3].z), + y30 (g[0].x, g[0].y, g[0].z), + y30 (g[1].x, g[1].y, g[1].z), + y30 (g[2].x, g[2].y, g[2].z), + y30 (g[3].x, g[3].y, g[3].z), + y31 (g[0].x, g[0].y, g[0].z), + y31 (g[1].x, g[1].y, g[1].z), + y31 (g[2].x, g[2].y, g[2].z), + y31 (g[3].x, g[3].y, g[3].z), + y3m1 (g[0].x, g[0].y, g[0].z), + y3m1 (g[1].x, g[1].y, g[1].z), + y3m1 (g[2].x, g[2].y, g[2].z), + y3m1 (g[3].x, g[3].y, g[3].z), + y32 (g[0].x, g[0].y, g[0].z), + y32 (g[1].x, g[1].y, g[1].z), + y32 (g[2].x, g[2].y, g[2].z), + y32 (g[3].x, g[3].y, g[3].z), + y3m2 (g[0].x, g[0].y, g[0].z), + y3m2 (g[1].x, g[1].y, g[1].z), + y3m2 (g[2].x, g[2].y, g[2].z), + y3m2 (g[3].x, g[3].y, g[3].z), + y33 (g[0].x, g[0].y, g[0].z), + y33 (g[1].x, g[1].y, g[1].z), + y33 (g[2].x, g[2].y, g[2].z), + y33 (g[3].x, g[3].y, g[3].z), + y3m3 (g[0].x, g[0].y, g[0].z), + y3m3 (g[1].x, g[1].y, g[1].z), + y3m3 (g[2].x, g[2].y, g[2].z), + y3m3 (g[3].x, g[3].y, g[3].z), + y40 (g[0].x, g[0].y, g[0].z), + y40 (g[1].x, g[1].y, g[1].z), + y40 (g[2].x, g[2].y, g[2].z), + y40 (g[3].x, g[3].y, g[3].z), + y41 (g[0].x, g[0].y, g[0].z), + y41 (g[1].x, g[1].y, g[1].z), + y41 (g[2].x, g[2].y, g[2].z), + y41 (g[3].x, g[3].y, g[3].z), + y4m1 (g[0].x, g[0].y, g[0].z), + y4m1 (g[1].x, g[1].y, g[1].z), + y4m1 (g[2].x, g[2].y, g[2].z), + y4m1 (g[3].x, g[3].y, g[3].z), + y42 (g[0].x, g[0].y, g[0].z), + y42 (g[1].x, g[1].y, g[1].z), + y42 (g[2].x, g[2].y, g[2].z), + y42 (g[3].x, g[3].y, g[3].z), + y4m2 (g[0].x, g[0].y, g[0].z), + y4m2 (g[1].x, g[1].y, g[1].z), + y4m2 (g[2].x, g[2].y, g[2].z), + y4m2 (g[3].x, g[3].y, g[3].z), + y43 (g[0].x, g[0].y, g[0].z), + y43 (g[1].x, g[1].y, g[1].z), + y43 (g[2].x, g[2].y, g[2].z), + y43 (g[3].x, g[3].y, g[3].z), + y4m3 (g[0].x, g[0].y, g[0].z), + y4m3 (g[1].x, g[1].y, g[1].z), + y4m3 (g[2].x, g[2].y, g[2].z), + y4m3 (g[3].x, g[3].y, g[3].z), + y44 (g[0].x, g[0].y, g[0].z), + y44 (g[1].x, g[1].y, g[1].z), + y44 (g[2].x, g[2].y, g[2].z), + y44 (g[3].x, g[3].y, g[3].z), + y4m4 (g[0].x, g[0].y, g[0].z), + y4m4 (g[1].x, g[1].y, g[1].z), + y4m4 (g[2].x, g[2].y, g[2].z), + y4m4 (g[3].x, g[3].y, g[3].z), + 0.000000000000000, + 0.000000000000000, + 0.935602579627389, + 0.090028400200397, + -0.452946651195697, + -0.000000000000000, + -0.000000000000000, + -0.348678494661834, + -0.000000000000000, + -0.452946651195697, + -0.000000000000000, + -0.348678494661834, + -0.000000000000000, + 0.000000000000000, + 0.000000000000000, + -0.000000000000000, + -0.000000000000000, + -0.000000000000000, + 0.000000000000000, + -0.000000000000000, + 0.489238299435250, + 0.000000000000000, + -0.000000000000000, + -0.376615818502422, + 0.000000000000000, + -0.489238299435250, + -0.000000000000000, + 0.376615818502422, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.532615198330370, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + -0.000000000000000, + -0.656382056840170, + -0.000000000000000, + -0.000000000000000, + -0.168427714314628, + -0.000000000000000, + -0.656382056840170, + -0.000000000000000, + -0.168427714314628, + -0.317846011338142, + -0.317846011338142, + 1.017107236282055, + 0.226023830284901, + -0.000000000000000, + -0.000000000000000, + -0.000000000000000, + 0.258942827786103, + -0.000000000000000, + -0.000000000000000, + -0.000000000000000, + 0.258942827786103, + 0.460602629757462, + -0.460602629757462, + 0.000000000000000, + -0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + -0.409424559784410, + -0.000000000000000, + -0.000000000000000, + -0.000000000000000, + 0.136474853261470, + -0.000000000000000, + 0.000000000000000, + -0.000000000000000, + -0.136474853261470, + -0.504564900728724, + -0.504564900728724, + 0.000000000000000, + -0.598002845308118, + -0.000000000000000, + -0.000000000000000, + 0.000000000000000, + 0.000000000000000, + -0.000000000000000, + -0.000000000000000, + -0.000000000000000, + 0.350610246256556, + -0.000000000000000, + -0.000000000000000, + -0.000000000000000, + 0.350610246256556, + 0.683184105191914, + -0.683184105191914, + 0.000000000000000, + -0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + -0.202424920056864, + 0.000000000000000, + 0.000000000000000, + 1.092548430592079, + -0.350435072502801, + 0.451658037912587, + 0.000000000000000, + -0.000000000000000, + 0.046358202625865, + 0.000000000000000, + 0.451658037912587, + -0.000000000000000, + 0.046358202625865, + 0.000000000000000, + -0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.492067081245654, + -0.469376801586882, + -0.000000000000000, + -0.000000000000000, + 0.187354445356332, + -0.000000000000000, + 0.469376801586882, + -0.000000000000000, + -0.187354445356332, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.355076798886913, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + -0.000000000000000, + 0.518915578720260, + 0.000000000000000, + -0.000000000000000, + -0.443845998608641, + 0.000000000000000, + 0.518915578720260, + -0.000000000000000, + -0.443845998608641, + 0.000000000000000, + -0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.000000000000000, + 0.452635881587108, + -0.707162732524596, + 0.000000000000000, + -0.000000000000000, + 0.120972027847095, + -0.000000000000000, + 0.707162732524596, + -0.000000000000000, + -0.120972027847095}; } - void TearDown() + void + TearDown () { - delete [] dylm; - delete [] g; - delete [] ref; - delete [] rly; - delete [] rlgy; + delete[] dylm; + delete[] g; + delete[] ref; + delete[] rly; + delete[] rlgy; } }; -TEST_F(YlmRealTest,Constructor) -{ - EXPECT_NO_THROW(ModuleBase::YlmReal YR); -} +TEST_F (YlmRealTest, Constructor) { EXPECT_NO_THROW (ModuleBase::YlmReal YR); } -TEST_F(YlmRealTest,YlmReal) +TEST_F (YlmRealTest, YlmReal) { - ModuleBase::YlmReal::Ylm_Real(nylm,ng,g,ylm); - for(int i=0;i(g), ylm.c); - for(int i=0;i (g), ylm.c); + for (int i = 0; i < nylm; ++i) { - EXPECT_NEAR(ylm(i,j),ref[i*ng+j],doublethreshold) << "Ylm[" << i << "], example " << j << " not pass"; + for (int j = 0; j < ng; ++j) + { + EXPECT_NEAR (ylm (i, j), ref[i * ng + j], doublethreshold) + << "Ylm[" << i << "], example " << j << " not pass"; + } } - } } -TEST_F(YlmRealTest,gradYlmReal) +TEST_F (YlmRealTest, gradYlmReal) { - ModuleBase::YlmReal::grad_Ylm_Real(nylm,ng,g,ylm,dylm[0],dylm[1],dylm[2]); - for(int i=0;i gplus = g[j]; - ModuleBase::Vector3 gminus = g[j]; - gplus[id] += step/2; - gminus[id] -= step/2; - ModuleBase::YlmReal::Ylm_Real(nylm,1,&gplus,ylmplus); - ModuleBase::YlmReal::Ylm_Real(nylm,1,&gminus,ylmminus); - for(int i=0;i gplus = g[j]; + ModuleBase::Vector3 gminus = g[j]; + gplus[id] += step / 2; + gminus[id] -= step / 2; + ModuleBase::YlmReal::Ylm_Real (nylm, 1, &gplus, ylmplus); + ModuleBase::YlmReal::Ylm_Real (nylm, 1, &gminus, ylmminus); + for (int i = 0; i < nylm; ++i) + { + if (std::abs (ylmplus (i, 0)) < 1e-6 && std::abs (ylmminus (i, 0)) < 1e-6) + { + continue; + } + double diff = (ylmplus (i, 0) - ylmminus (i, 0)) / step; + EXPECT_NEAR (diff, dylm[id](i, j), 1e-6) + << "dYlm[" << id << "][" << i << "], example " << j << " not pass"; + } + } } - } } - -TEST_F(YlmRealTest,YlmReal2) +TEST_F (YlmRealTest, YlmReal2) { - ModuleBase::YlmReal::Ylm_Real2(nylm,ng,g,ylm); - for(int i=0;i R (20.0, 0.0, 0.0); - const double xdr = R.x/R.norm(); - const double ydr = R.y/R.norm(); - const double zdr = R.z/R.norm(); - const int L = 9; - const double rl = std::pow( R.norm(), L); - //std::cout << " rl=" << rl << std::endl; - ModuleBase::Ylm::set_coefficients(); - - int nu = 100; - - // Peize Lin change rlya 2016-08-26 - std::vector rlya; - double rlyb[400]; - ModuleBase::Ylm::ZEROS( rlyb, 400); - - ModuleBase::Ylm::rl_sph_harm(L, xdr, ydr, zdr, rlya); - ModuleBase::Ylm::get_ylm_real(L+1, R, rlyb); - - for (int i=0; i < nu; i++) - { - double diff = fabs(rlya[i]-rlyb[i]); - EXPECT_LT(diff,1e-8); - } + const double xdr = R.x / R.norm (); + const double ydr = R.y / R.norm (); + const double zdr = R.z / R.norm (); + const int L = 9; + const double rl = std::pow (R.norm (), L); + // std::cout << " rl=" << rl << std::endl; + ModuleBase::Ylm::set_coefficients (); -} + int nu = 100; -//used to be test2 in ylm.h -TEST_F(YlmRealTest, equality_gradient_test) -{ + // Peize Lin change rlya 2016-08-26 + std::vector rlya; + double rlyb[400]; + ModuleBase::Ylm::ZEROS (rlyb, 400); + + ModuleBase::Ylm::rl_sph_harm (L, xdr, ydr, zdr, rlya); + ModuleBase::Ylm::get_ylm_real (L + 1, R, rlyb); + for (int i = 0; i < nu; i++) + { + double diff = fabs (rlya[i] - rlyb[i]); + EXPECT_LT (diff, 1e-8); + } +} - ModuleBase::Vector3 R (0.1,-0.2,0.5); - ModuleBase::Ylm::set_coefficients(); +// used to be test2 in ylm.h +TEST_F (YlmRealTest, equality_gradient_test) +{ - //int nu = 100; + ModuleBase::Vector3 R (0.1, -0.2, 0.5); + ModuleBase::Ylm::set_coefficients (); - double rlya[100]; - double rlyb[400]; + // int nu = 100; - std::vector grlya(100 * 3); - double grlyb[400][3]; + double rlya[100]; + double rlyb[400]; - ModuleBase::Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya.data()); - ModuleBase::Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb); + std::vector grlya (100 * 3); + double grlyb[400][3]; - for (int i = 0; i < 100; i++) - { - double diffx = fabs(grlya[i*3 + 2]-grlyb[i][2]); - EXPECT_LT(diffx,1e-8); - } + ModuleBase::Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya.data ()); + ModuleBase::Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb); + for (int i = 0; i < 100; i++) + { + double diffx = fabs (grlya[i * 3 + 2] - grlyb[i][2]); + EXPECT_LT (diffx, 1e-8); + } } -TEST_F(YlmRealTest,YlmRealTemplatefloat) +TEST_F (YlmRealTest, YlmRealTemplatefloat) { - ModuleBase::Vector3 *gg; + ModuleBase::Vector3* gg; gg = new ModuleBase::Vector3[ng]; - gg[0].set(1.0,0.0,0.0); - gg[1].set(0.0,1.0,0.0); - gg[2].set(0.0,0.0,1.0); - gg[3].set(-1.0,-1.0,-1.0); - float*ccc; - ccc=new float[nylm*ng]; + gg[0].set (1.0, 0.0, 0.0); + gg[1].set (0.0, 1.0, 0.0); + gg[2].set (0.0, 0.0, 1.0); + gg[3].set (-1.0, -1.0, -1.0); + float* ccc; + ccc = new float[nylm * ng]; base_device::DEVICE_CPU* cpu_ctx = {}; - ModuleBase::YlmReal::Ylm_Real(cpu_ctx, nylm, ng, reinterpret_cast(gg), ccc); - for(int i=0;i (cpu_ctx, + nylm, + ng, + reinterpret_cast (gg), + ccc); + for (int i = 0; i < nylm; ++i) { - EXPECT_NEAR(ccc[i*ng+j],ref[i*ng+j],2.45e-06) << "Ylm[" << i << "], example " << j << " not pass"; + for (int j = 0; j < ng; ++j) + { + EXPECT_NEAR (ccc[i * ng + j], ref[i * ng + j], 2.45e-06) + << "Ylm[" << i << "], example " << j << " not pass"; + } } - } - delete [] gg; - delete [] ccc; + delete[] gg; + delete[] ccc; } diff --git a/source/source_base/test/mathzone_add1_test.cpp b/source/source_base/test/mathzone_add1_test.cpp index 91b8135d4dc..cd59220d44a 100644 --- a/source/source_base/test/mathzone_add1_test.cpp +++ b/source/source_base/test/mathzone_add1_test.cpp @@ -40,7 +40,8 @@ class MathzoneAdd1Test : public testing::Test double* psi_in; double* psi_out; double* dpsi; - void SetUp() + void + SetUp () { r_in = new double[nr_in]; r_out = new double[nr_out]; @@ -49,7 +50,8 @@ class MathzoneAdd1Test : public testing::Test psi_out = new double[nr_out]; dpsi = new double[nr_out]; } - void TearDown() + void + TearDown () { delete[] r_in; delete[] r_out; @@ -60,14 +62,11 @@ class MathzoneAdd1Test : public testing::Test } }; -TEST_F(MathzoneAdd1Test, Constructor) -{ - EXPECT_NO_THROW(ModuleBase::Mathzone_Add1 MA1); -} +TEST_F (MathzoneAdd1Test, Constructor) { EXPECT_NO_THROW (ModuleBase::Mathzone_Add1 MA1); } #define PI 3.1415926535897932 /// first kind boundary condition: f'(0) = f'(n) = 0.0 -TEST_F(MathzoneAdd1Test, CubicSplineBoundary1) +TEST_F (MathzoneAdd1Test, CubicSplineBoundary1) { // data from abacus/tests/integrate/tools/PP_ORB/Si_gga_8au_60Ry_2s2p1d.orb // data for d orbital of Si : L = 2, N = 0 @@ -89,120 +88,121 @@ TEST_F(MathzoneAdd1Test, CubicSplineBoundary1) psi_in[15] = 3.802139894646e-03; psi_in[16] = 0; for (int i = 0; i < nr_in; i++) - { - r_in[i] = i * 0.5; - // std::cout<< r_in[i] << " " << psi_in[i] << std::endl; // for plotting - } + { + r_in[i] = i * 0.5; + // std::cout<< r_in[i] << " " << psi_in[i] << std::endl; // for plotting + } for (int i = 0; i < nr_out; i++) - { - r_out[i] = i * 0.05; - } - ModuleBase::Mathzone_Add1::SplineD2(r_in, psi_in, nr_in, 0.0, 0.0, y2); + { + r_out[i] = i * 0.05; + } + ModuleBase::Mathzone_Add1::SplineD2 (r_in, psi_in, nr_in, 0.0, 0.0, y2); // std::cout << "y2[0] "<< y2[0] << " y2[nr_in] "<< y2[nr_in-1] << std::endl; // for checking - ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation(r_in, psi_in, y2, nr_in, r_out, nr_out, psi_out, dpsi); + ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation (r_in, psi_in, y2, nr_in, r_out, nr_out, psi_out, dpsi); for (int i = 0; i < nr_out; i++) - { - int j = i / 10; - if (i % 10 == 0) { - EXPECT_EQ(psi_in[j], psi_out[i]); + int j = i / 10; + if (i % 10 == 0) + { + EXPECT_EQ (psi_in[j], psi_out[i]); + } + // std::cout<< r_out[i] << " " << psi_out[i] << std::endl; // for plotting } - // std::cout<< r_out[i] << " " << psi_out[i] << std::endl; // for plotting - } - EXPECT_NEAR(dpsi[0], 0.0, 1e-15); - EXPECT_NEAR(dpsi[nr_out - 1], 0.0, 1e-15); + EXPECT_NEAR (dpsi[0], 0.0, 1e-15); + EXPECT_NEAR (dpsi[nr_out - 1], 0.0, 1e-15); // std::cout< (y) ? (x) : (y)) -double count_err(double* x, std::function func, int n, int derivative) +double + count_err (double* x, std::function func, int n, int derivative) { double maxf4x = -10000000000; double maxh = 0; for (int i = 0; i != n; ++i) - { - maxf4x = MAX(abs(func(x[i])), maxf4x); - } + { + maxf4x = MAX (abs (func (x[i])), maxf4x); + } // printf("maxf4x = %.8lf\n", maxf4x); for (int i = 1; i != n; ++i) - { - maxh = MAX(x[i + 1] - x[i], maxh); - } + { + maxh = MAX (x[i + 1] - x[i], maxh); + } // if(func==2) printf("maxh = %.8lf,maxf4x = %.8lf\n", maxh,maxf4x); - double err = (5.0 / 384.0) * maxf4x * pow(maxh, 4 - derivative); - return MAX(err, 1e-15); + double err = (5.0 / 384.0) * maxf4x * pow (maxh, 4 - derivative); + return MAX (err, 1e-15); ; } /// first kind boundary condition: f'(0) = f'(n) = 0.0 -TEST_F(MathzoneAdd1Test, sinx_Boundary1) +TEST_F (MathzoneAdd1Test, sinx_Boundary1) { for (int i = 0; i <= 10; i++) - { - r_in[i] = i * 0.1 * PI + 0.5 * PI; - psi_in[i] = sin(r_in[i]); - } - auto f = [](double x) -> double { return sin(x); }; - double err = count_err(r_in, f, 11, 0); + { + r_in[i] = i * 0.1 * PI + 0.5 * PI; + psi_in[i] = sin (r_in[i]); + } + auto f = [] (double x) -> double { return sin (x); }; + double err = count_err (r_in, f, 11, 0); for (int i = 0; i < 100; i++) - { - r_out[i] = i * 0.01 * PI + 0.5 * PI; - } - ModuleBase::Mathzone_Add1::SplineD2(r_in, psi_in, 11, 0.0, 0.0, y2); - ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation(r_in, psi_in, y2, 11, r_out, 100, psi_out, dpsi); + { + r_out[i] = i * 0.01 * PI + 0.5 * PI; + } + ModuleBase::Mathzone_Add1::SplineD2 (r_in, psi_in, 11, 0.0, 0.0, y2); + ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation (r_in, psi_in, y2, 11, r_out, 100, psi_out, dpsi); for (int i = 0; i < 100; i++) - { - double ans = sin(r_out[i]); - EXPECT_NEAR(psi_out[i], ans, err); - } + { + double ans = sin (r_out[i]); + EXPECT_NEAR (psi_out[i], ans, err); + } } /// second kind boundary condition: f''(0) = f''(n) = 0.0 -TEST_F(MathzoneAdd1Test, sinx_Boundary2) +TEST_F (MathzoneAdd1Test, sinx_Boundary2) { for (int i = 0; i <= 10; i++) - { - r_in[i] = i * 0.1 * PI; - psi_in[i] = sin(r_in[i]); - } - auto f = [](double x) -> double { return sin(x); }; - double err = count_err(r_in, f, 11, 0); + { + r_in[i] = i * 0.1 * PI; + psi_in[i] = sin (r_in[i]); + } + auto f = [] (double x) -> double { return sin (x); }; + double err = count_err (r_in, f, 11, 0); for (int i = 0; i < 100; i++) - { - r_out[i] = i * 0.01 * PI; - } - ModuleBase::Mathzone_Add1::SplineD2(r_in, psi_in, 11, 0.0, 0.0, y2); - ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation(r_in, psi_in, y2, 11, r_out, 100, psi_out, dpsi); + { + r_out[i] = i * 0.01 * PI; + } + ModuleBase::Mathzone_Add1::SplineD2 (r_in, psi_in, 11, 0.0, 0.0, y2); + ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation (r_in, psi_in, y2, 11, r_out, 100, psi_out, dpsi); for (int i = 0; i < 100; i++) - { - double ans = sin(r_out[i]); - EXPECT_NEAR(psi_out[i], ans, err); - } + { + double ans = sin (r_out[i]); + EXPECT_NEAR (psi_out[i], ans, err); + } } -TEST_F(MathzoneAdd1Test, expx) +TEST_F (MathzoneAdd1Test, expx) { for (int i = 0; i <= 10; i++) - { - r_in[i] = i; - psi_in[i] = exp(r_in[i]); - } - auto f = [](double x) -> double { return exp(x); }; - double err = count_err(r_in, f, 11, 0); + { + r_in[i] = i; + psi_in[i] = exp (r_in[i]); + } + auto f = [] (double x) -> double { return exp (x); }; + double err = count_err (r_in, f, 11, 0); for (int i = 0; i < 100; i++) - { - r_out[i] = i * 0.1; - } - double d2 = exp(10); - ModuleBase::Mathzone_Add1::SplineD2(r_in, psi_in, 11, 1, d2, y2); - ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation(r_in, psi_in, y2, 11, r_out, 100, psi_out, dpsi); + { + r_out[i] = i * 0.1; + } + double d2 = exp (10); + ModuleBase::Mathzone_Add1::SplineD2 (r_in, psi_in, 11, 1, d2, y2); + ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation (r_in, psi_in, y2, 11, r_out, 100, psi_out, dpsi); for (int i = 0; i < 100; i++) - { - double ans = exp(r_out[i]); - EXPECT_NEAR(psi_out[i], ans, err); - } + { + double ans = exp (r_out[i]); + EXPECT_NEAR (psi_out[i], ans, err); + } } // /// second kind boundary condition: f''(0) = f''(n) = 0.0 @@ -230,7 +230,7 @@ TEST_F(MathzoneAdd1Test, expx) // } /// second kind boundary condition: f''(0) = f''(n) = 0.0 -TEST_F(MathzoneAdd1Test, CubicSplineBoundary2) +TEST_F (MathzoneAdd1Test, CubicSplineBoundary2) { // data from abacus/tests/integrate/tools/PP_ORB/Si_gga_8au_60Ry_2s2p1d.orb // data for 1st p orbital of Si: L = 1, N= 0 @@ -252,32 +252,32 @@ TEST_F(MathzoneAdd1Test, CubicSplineBoundary2) psi_in[15] = 7.795456942712e-03; psi_in[16] = 0; for (int i = 0; i < nr_in; i++) - { - r_in[i] = i * 0.5; - // std::cout<< r_in[i] << " " << psi_in[i] << std::endl; // for plotting - } + { + r_in[i] = i * 0.5; + // std::cout<< r_in[i] << " " << psi_in[i] << std::endl; // for plotting + } for (int i = 0; i < nr_out; i++) - { - r_out[i] = i * 0.05; - } - ModuleBase::Mathzone_Add1::SplineD2(r_in, psi_in, nr_in, 100000.0, 100000.0, y2); - EXPECT_EQ(y2[0], 0.0); - EXPECT_EQ(y2[nr_in - 1], 0.0); + { + r_out[i] = i * 0.05; + } + ModuleBase::Mathzone_Add1::SplineD2 (r_in, psi_in, nr_in, 100000.0, 100000.0, y2); + EXPECT_EQ (y2[0], 0.0); + EXPECT_EQ (y2[nr_in - 1], 0.0); // std::cout << "y2[0] "<< y2[0] << " y2[nr_in] "<< y2[nr_in-1] << std::endl; // for checking - ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation(r_in, psi_in, y2, nr_in, r_out, nr_out, psi_out, dpsi); + ModuleBase::Mathzone_Add1::Cubic_Spline_Interpolation (r_in, psi_in, y2, nr_in, r_out, nr_out, psi_out, dpsi); for (int i = 0; i < nr_out; i++) - { - int j = i / 10; - if (i % 10 == 0) { - EXPECT_EQ(psi_in[j], psi_out[i]); + int j = i / 10; + if (i % 10 == 0) + { + EXPECT_EQ (psi_in[j], psi_out[i]); + } + // std::cout<< r_out[i] << " " << psi_out[i] << std::endl; // for plotting } - // std::cout<< r_out[i] << " " << psi_out[i] << std::endl; // for plotting - } // std::cout< direct, cartesian; + protected: + double R11 = 3.68; + double R12 = 0.00; + double R13 = 0.00; + double R21 = 0.00; + double R22 = 10.1; + double R23 = 0.00; + double R31 = 0.00; + double R32 = 0.00; + double R33 = 26.7; + ModuleBase::Matrix3 lattice; + ModuleBase::Vector3 direct, cartesian; }; -TEST_F(MathzoneTest, PointwiseProduct) +TEST_F (MathzoneTest, PointwiseProduct) { - std::vector aa, bb, cc; - for(int i=0;i<10;i++) - { - aa.push_back(i*i); - bb.push_back(i*2); - } - cc = ModuleBase::Mathzone::Pointwise_Product(aa,bb); - for(int i=0;i<10;i++) - { - EXPECT_EQ(cc[i],i*i*i*2); - } + std::vector aa, bb, cc; + for (int i = 0; i < 10; i++) + { + aa.push_back (i * i); + bb.push_back (i * 2); + } + cc = ModuleBase::Mathzone::Pointwise_Product (aa, bb); + for (int i = 0; i < 10; i++) + { + EXPECT_EQ (cc[i], i * i * i * 2); + } } -TEST_F(MathzoneTest, Direct2Cartesian) +TEST_F (MathzoneTest, Direct2Cartesian) { - direct.set(0.1,0.2,0.4); - cartesian.set(0.368,2.02,10.68); - ModuleBase::Vector3 cartnew; - ModuleBase::Mathzone::Direct_to_Cartesian(direct.x, - direct.y, - direct.z, - R11, R12, R13, - R21, R22, R23, - R31, R32, R33, - cartnew.x, - cartnew.y, - cartnew.z); - EXPECT_NEAR(cartnew.x,cartesian.x, 1e-15); - EXPECT_NEAR(cartnew.y,cartesian.y, 1e-15); - EXPECT_NEAR(cartnew.z,cartesian.z, 1e-15); + direct.set (0.1, 0.2, 0.4); + cartesian.set (0.368, 2.02, 10.68); + ModuleBase::Vector3 cartnew; + ModuleBase::Mathzone::Direct_to_Cartesian (direct.x, + direct.y, + direct.z, + R11, + R12, + R13, + R21, + R22, + R23, + R31, + R32, + R33, + cartnew.x, + cartnew.y, + cartnew.z); + EXPECT_NEAR (cartnew.x, cartesian.x, 1e-15); + EXPECT_NEAR (cartnew.y, cartesian.y, 1e-15); + EXPECT_NEAR (cartnew.z, cartesian.z, 1e-15); } -TEST_F(MathzoneTest, Cartesian2Direct) +TEST_F (MathzoneTest, Cartesian2Direct) { - direct.set(0.1,0.2,0.4); - cartesian.set(0.368,2.02,10.68); - ModuleBase::Vector3 directnew; - ModuleBase::Mathzone::Cartesian_to_Direct(cartesian.x, - cartesian.y, - cartesian.z, - R11, R12, R13, - R21, R22, R23, - R31, R32, R33, - directnew.x, - directnew.y, - directnew.z); - EXPECT_NEAR(directnew.x,direct.x, 1e-15); - EXPECT_NEAR(directnew.y,direct.y, 1e-15); - EXPECT_NEAR(directnew.z,direct.z, 1e-15); + direct.set (0.1, 0.2, 0.4); + cartesian.set (0.368, 2.02, 10.68); + ModuleBase::Vector3 directnew; + ModuleBase::Mathzone::Cartesian_to_Direct (cartesian.x, + cartesian.y, + cartesian.z, + R11, + R12, + R13, + R21, + R22, + R23, + R31, + R32, + R33, + directnew.x, + directnew.y, + directnew.z); + EXPECT_NEAR (directnew.x, direct.x, 1e-15); + EXPECT_NEAR (directnew.y, direct.y, 1e-15); + EXPECT_NEAR (directnew.z, direct.z, 1e-15); } diff --git a/source/source_base/test/matrix3_test.cpp b/source/source_base/test/matrix3_test.cpp index 6e98d7c09cc..1802bb28a61 100644 --- a/source/source_base/test/matrix3_test.cpp +++ b/source/source_base/test/matrix3_test.cpp @@ -64,303 +64,384 @@ class Matrix3Test : public testing::Test { -protected: - ModuleBase::Matrix3 matrix_a, matrix_a1, matrix_b; - ModuleBase::Matrix3 get_random_matrix3() - { - std::vector v(9); - for (auto &i : v) - { - i = std::rand(); - } - auto matrix_a = ModuleBase::Matrix3(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8]); - return matrix_a; - } - // for capturing stdout - std::string output; + protected: + ModuleBase::Matrix3 matrix_a, matrix_a1, matrix_b; + ModuleBase::Matrix3 + get_random_matrix3 () + { + std::vector v (9); + for (auto& i: v) + { + i = std::rand (); + } + auto matrix_a = ModuleBase::Matrix3 (v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8]); + return matrix_a; + } + // for capturing stdout + std::string output; }; -TEST_F(Matrix3Test, Construct) +TEST_F (Matrix3Test, Construct) { - // method 1 - ModuleBase::Matrix3 ma; - EXPECT_EQ(ma.e11,1); EXPECT_EQ(ma.e12,0); EXPECT_EQ(ma.e13,0); - EXPECT_EQ(ma.e21,0); EXPECT_EQ(ma.e22,1); EXPECT_EQ(ma.e23,0); - EXPECT_EQ(ma.e31,0); EXPECT_EQ(ma.e32,0); EXPECT_EQ(ma.e33,1); - // method 2 - ModuleBase::Matrix3 mb(1,2,3,4,5,6,7,8,9); - EXPECT_EQ(mb.e11,1); EXPECT_EQ(mb.e12,2); EXPECT_EQ(mb.e13,3); - EXPECT_EQ(mb.e21,4); EXPECT_EQ(mb.e22,5); EXPECT_EQ(mb.e23,6); - EXPECT_EQ(mb.e31,7); EXPECT_EQ(mb.e32,8); EXPECT_EQ(mb.e33,9); + // method 1 + ModuleBase::Matrix3 ma; + EXPECT_EQ (ma.e11, 1); + EXPECT_EQ (ma.e12, 0); + EXPECT_EQ (ma.e13, 0); + EXPECT_EQ (ma.e21, 0); + EXPECT_EQ (ma.e22, 1); + EXPECT_EQ (ma.e23, 0); + EXPECT_EQ (ma.e31, 0); + EXPECT_EQ (ma.e32, 0); + EXPECT_EQ (ma.e33, 1); + // method 2 + ModuleBase::Matrix3 mb (1, 2, 3, 4, 5, 6, 7, 8, 9); + EXPECT_EQ (mb.e11, 1); + EXPECT_EQ (mb.e12, 2); + EXPECT_EQ (mb.e13, 3); + EXPECT_EQ (mb.e21, 4); + EXPECT_EQ (mb.e22, 5); + EXPECT_EQ (mb.e23, 6); + EXPECT_EQ (mb.e31, 7); + EXPECT_EQ (mb.e32, 8); + EXPECT_EQ (mb.e33, 9); } -TEST_F(Matrix3Test, Idenity) +TEST_F (Matrix3Test, Idenity) { - ModuleBase::Matrix3 mb(1,2,3,4,5,6,7,8,9); - mb.Identity(); - EXPECT_EQ(mb.e11,1); EXPECT_EQ(mb.e12,0); EXPECT_EQ(mb.e13,0); - EXPECT_EQ(mb.e21,0); EXPECT_EQ(mb.e22,1); EXPECT_EQ(mb.e23,0); - EXPECT_EQ(mb.e31,0); EXPECT_EQ(mb.e32,0); EXPECT_EQ(mb.e33,1); + ModuleBase::Matrix3 mb (1, 2, 3, 4, 5, 6, 7, 8, 9); + mb.Identity (); + EXPECT_EQ (mb.e11, 1); + EXPECT_EQ (mb.e12, 0); + EXPECT_EQ (mb.e13, 0); + EXPECT_EQ (mb.e21, 0); + EXPECT_EQ (mb.e22, 1); + EXPECT_EQ (mb.e23, 0); + EXPECT_EQ (mb.e31, 0); + EXPECT_EQ (mb.e32, 0); + EXPECT_EQ (mb.e33, 1); } -TEST_F(Matrix3Test, Zero) +TEST_F (Matrix3Test, Zero) { - ModuleBase::Matrix3 ma; - ma.Zero(); - EXPECT_EQ(ma.e11,0); EXPECT_EQ(ma.e12,0); EXPECT_EQ(ma.e13,0); - EXPECT_EQ(ma.e21,0); EXPECT_EQ(ma.e22,0); EXPECT_EQ(ma.e23,0); - EXPECT_EQ(ma.e31,0); EXPECT_EQ(ma.e32,0); EXPECT_EQ(ma.e33,0); + ModuleBase::Matrix3 ma; + ma.Zero (); + EXPECT_EQ (ma.e11, 0); + EXPECT_EQ (ma.e12, 0); + EXPECT_EQ (ma.e13, 0); + EXPECT_EQ (ma.e21, 0); + EXPECT_EQ (ma.e22, 0); + EXPECT_EQ (ma.e23, 0); + EXPECT_EQ (ma.e31, 0); + EXPECT_EQ (ma.e32, 0); + EXPECT_EQ (ma.e33, 0); } -TEST_F(Matrix3Test, Det) +TEST_F (Matrix3Test, Det) { - ModuleBase::Matrix3 ma; - ma = get_random_matrix3(); - double determinant = ma.e11 * ma.e22 * ma.e33 - - ma.e11 * ma.e32 * ma.e23 - - ma.e12 * ma.e21 * ma.e33 - + ma.e12 * ma.e31 * ma.e23 - + ma.e13 * ma.e21 * ma.e32 - - ma.e13 * ma.e22 * ma.e31; - EXPECT_DOUBLE_EQ(ma.Det(),determinant); + ModuleBase::Matrix3 ma; + ma = get_random_matrix3 (); + double determinant = ma.e11 * ma.e22 * ma.e33 - ma.e11 * ma.e32 * ma.e23 - ma.e12 * ma.e21 * ma.e33 + + ma.e12 * ma.e31 * ma.e23 + ma.e13 * ma.e21 * ma.e32 - ma.e13 * ma.e22 * ma.e31; + EXPECT_DOUBLE_EQ (ma.Det (), determinant); } -TEST_F(Matrix3Test, Transpose) +TEST_F (Matrix3Test, Transpose) { - ModuleBase::Matrix3 ma, mb; - ma = get_random_matrix3(); - mb = ma.Transpose(); - EXPECT_EQ(ma.e11,mb.e11); EXPECT_EQ(ma.e12,mb.e21); EXPECT_EQ(ma.e13,mb.e31); - EXPECT_EQ(ma.e21,mb.e12); EXPECT_EQ(ma.e22,mb.e22); EXPECT_EQ(ma.e23,mb.e32); - EXPECT_EQ(ma.e31,mb.e13); EXPECT_EQ(ma.e32,mb.e23); EXPECT_EQ(ma.e33,mb.e33); + ModuleBase::Matrix3 ma, mb; + ma = get_random_matrix3 (); + mb = ma.Transpose (); + EXPECT_EQ (ma.e11, mb.e11); + EXPECT_EQ (ma.e12, mb.e21); + EXPECT_EQ (ma.e13, mb.e31); + EXPECT_EQ (ma.e21, mb.e12); + EXPECT_EQ (ma.e22, mb.e22); + EXPECT_EQ (ma.e23, mb.e32); + EXPECT_EQ (ma.e31, mb.e13); + EXPECT_EQ (ma.e32, mb.e23); + EXPECT_EQ (ma.e33, mb.e33); } -TEST_F(Matrix3Test, Inverse) +TEST_F (Matrix3Test, Inverse) { - ModuleBase::Matrix3 ma, mb; - ma = get_random_matrix3(); - mb = ma.Inverse(); - EXPECT_NEAR( (ma.e11*mb.e11 + ma.e12*mb.e21 + ma.e13*mb.e31), 1.0, 1e-15); - EXPECT_NEAR( (ma.e11*mb.e12 + ma.e12*mb.e22 + ma.e13*mb.e32), 0.0, 1e-15); - EXPECT_NEAR( (ma.e11*mb.e13 + ma.e12*mb.e23 + ma.e13*mb.e33), 0.0, 1e-15); - EXPECT_NEAR( (ma.e21*mb.e11 + ma.e22*mb.e21 + ma.e23*mb.e31), 0.0, 1e-15); - EXPECT_NEAR( (ma.e21*mb.e12 + ma.e22*mb.e22 + ma.e23*mb.e32), 1.0, 1e-15); - EXPECT_NEAR( (ma.e21*mb.e13 + ma.e22*mb.e23 + ma.e23*mb.e33), 0.0, 1e-15); - EXPECT_NEAR( (ma.e31*mb.e11 + ma.e32*mb.e21 + ma.e33*mb.e31), 0.0, 1e-15); - EXPECT_NEAR( (ma.e31*mb.e12 + ma.e32*mb.e22 + ma.e33*mb.e32), 0.0, 1e-15); - EXPECT_NEAR( (ma.e31*mb.e13 + ma.e32*mb.e23 + ma.e33*mb.e33), 1.0, 1e-15); + ModuleBase::Matrix3 ma, mb; + ma = get_random_matrix3 (); + mb = ma.Inverse (); + EXPECT_NEAR ((ma.e11 * mb.e11 + ma.e12 * mb.e21 + ma.e13 * mb.e31), 1.0, 1e-15); + EXPECT_NEAR ((ma.e11 * mb.e12 + ma.e12 * mb.e22 + ma.e13 * mb.e32), 0.0, 1e-15); + EXPECT_NEAR ((ma.e11 * mb.e13 + ma.e12 * mb.e23 + ma.e13 * mb.e33), 0.0, 1e-15); + EXPECT_NEAR ((ma.e21 * mb.e11 + ma.e22 * mb.e21 + ma.e23 * mb.e31), 0.0, 1e-15); + EXPECT_NEAR ((ma.e21 * mb.e12 + ma.e22 * mb.e22 + ma.e23 * mb.e32), 1.0, 1e-15); + EXPECT_NEAR ((ma.e21 * mb.e13 + ma.e22 * mb.e23 + ma.e23 * mb.e33), 0.0, 1e-15); + EXPECT_NEAR ((ma.e31 * mb.e11 + ma.e32 * mb.e21 + ma.e33 * mb.e31), 0.0, 1e-15); + EXPECT_NEAR ((ma.e31 * mb.e12 + ma.e32 * mb.e22 + ma.e33 * mb.e32), 0.0, 1e-15); + EXPECT_NEAR ((ma.e31 * mb.e13 + ma.e32 * mb.e23 + ma.e33 * mb.e33), 1.0, 1e-15); } -TEST_F(Matrix3Test, Assignment) +TEST_F (Matrix3Test, Assignment) { - ModuleBase::Matrix3 ma, mb; - ma = get_random_matrix3(); - mb = ma; - EXPECT_EQ(ma.e11,mb.e11); EXPECT_EQ(ma.e12,mb.e12); EXPECT_EQ(ma.e13,mb.e13); - EXPECT_EQ(ma.e21,mb.e21); EXPECT_EQ(ma.e22,mb.e22); EXPECT_EQ(ma.e23,mb.e23); - EXPECT_EQ(ma.e31,mb.e31); EXPECT_EQ(ma.e32,mb.e32); EXPECT_EQ(ma.e33,mb.e33); + ModuleBase::Matrix3 ma, mb; + ma = get_random_matrix3 (); + mb = ma; + EXPECT_EQ (ma.e11, mb.e11); + EXPECT_EQ (ma.e12, mb.e12); + EXPECT_EQ (ma.e13, mb.e13); + EXPECT_EQ (ma.e21, mb.e21); + EXPECT_EQ (ma.e22, mb.e22); + EXPECT_EQ (ma.e23, mb.e23); + EXPECT_EQ (ma.e31, mb.e31); + EXPECT_EQ (ma.e32, mb.e32); + EXPECT_EQ (ma.e33, mb.e33); } -TEST_F(Matrix3Test, AddEqual) +TEST_F (Matrix3Test, AddEqual) { - ModuleBase::Matrix3 ma, mb; - ma = get_random_matrix3(); - mb += ma; - EXPECT_EQ(ma.e11+1.0,mb.e11); EXPECT_EQ(ma.e12,mb.e12); EXPECT_EQ(ma.e13,mb.e13); - EXPECT_EQ(ma.e21,mb.e21); EXPECT_EQ(ma.e22+1.0,mb.e22); EXPECT_EQ(ma.e23,mb.e23); - EXPECT_EQ(ma.e31,mb.e31); EXPECT_EQ(ma.e32,mb.e32); EXPECT_EQ(ma.e33+1.0,mb.e33); + ModuleBase::Matrix3 ma, mb; + ma = get_random_matrix3 (); + mb += ma; + EXPECT_EQ (ma.e11 + 1.0, mb.e11); + EXPECT_EQ (ma.e12, mb.e12); + EXPECT_EQ (ma.e13, mb.e13); + EXPECT_EQ (ma.e21, mb.e21); + EXPECT_EQ (ma.e22 + 1.0, mb.e22); + EXPECT_EQ (ma.e23, mb.e23); + EXPECT_EQ (ma.e31, mb.e31); + EXPECT_EQ (ma.e32, mb.e32); + EXPECT_EQ (ma.e33 + 1.0, mb.e33); } -TEST_F(Matrix3Test, MinusEqual) +TEST_F (Matrix3Test, MinusEqual) { - ModuleBase::Matrix3 ma, mb; - ma = get_random_matrix3(); - mb -= ma; - EXPECT_EQ(1.0-ma.e11,mb.e11); EXPECT_EQ(-ma.e12,mb.e12); EXPECT_EQ(-ma.e13,mb.e13); - EXPECT_EQ(-ma.e21,mb.e21); EXPECT_EQ(1.0-ma.e22,mb.e22); EXPECT_EQ(-ma.e23,mb.e23); - EXPECT_EQ(-ma.e31,mb.e31); EXPECT_EQ(-ma.e32,mb.e32); EXPECT_EQ(1.0-ma.e33,mb.e33); + ModuleBase::Matrix3 ma, mb; + ma = get_random_matrix3 (); + mb -= ma; + EXPECT_EQ (1.0 - ma.e11, mb.e11); + EXPECT_EQ (-ma.e12, mb.e12); + EXPECT_EQ (-ma.e13, mb.e13); + EXPECT_EQ (-ma.e21, mb.e21); + EXPECT_EQ (1.0 - ma.e22, mb.e22); + EXPECT_EQ (-ma.e23, mb.e23); + EXPECT_EQ (-ma.e31, mb.e31); + EXPECT_EQ (-ma.e32, mb.e32); + EXPECT_EQ (1.0 - ma.e33, mb.e33); } -TEST_F(Matrix3Test, MultiplyEqual) +TEST_F (Matrix3Test, MultiplyEqual) { - ModuleBase::Matrix3 ma, mb; - ma = get_random_matrix3(); - mb = ma; - mb *= 3.0; - EXPECT_EQ(ma.e11*3.0,mb.e11); EXPECT_EQ(ma.e12*3.0,mb.e12); EXPECT_EQ(ma.e13*3.0,mb.e13); - EXPECT_EQ(ma.e21*3.0,mb.e21); EXPECT_EQ(ma.e22*3.0,mb.e22); EXPECT_EQ(ma.e23*3.0,mb.e23); - EXPECT_EQ(ma.e31*3.0,mb.e31); EXPECT_EQ(ma.e32*3.0,mb.e32); EXPECT_EQ(ma.e33*3.0,mb.e33); + ModuleBase::Matrix3 ma, mb; + ma = get_random_matrix3 (); + mb = ma; + mb *= 3.0; + EXPECT_EQ (ma.e11 * 3.0, mb.e11); + EXPECT_EQ (ma.e12 * 3.0, mb.e12); + EXPECT_EQ (ma.e13 * 3.0, mb.e13); + EXPECT_EQ (ma.e21 * 3.0, mb.e21); + EXPECT_EQ (ma.e22 * 3.0, mb.e22); + EXPECT_EQ (ma.e23 * 3.0, mb.e23); + EXPECT_EQ (ma.e31 * 3.0, mb.e31); + EXPECT_EQ (ma.e32 * 3.0, mb.e32); + EXPECT_EQ (ma.e33 * 3.0, mb.e33); } -TEST_F(Matrix3Test, OverEqual) +TEST_F (Matrix3Test, OverEqual) { - ModuleBase::Matrix3 ma, mb; - ma = get_random_matrix3(); - mb = ma; - mb /= 3.0; - EXPECT_EQ(ma.e11/3.0,mb.e11); EXPECT_EQ(ma.e12/3.0,mb.e12); EXPECT_EQ(ma.e13/3.0,mb.e13); - EXPECT_EQ(ma.e21/3.0,mb.e21); EXPECT_EQ(ma.e22/3.0,mb.e22); EXPECT_EQ(ma.e23/3.0,mb.e23); - EXPECT_EQ(ma.e31/3.0,mb.e31); EXPECT_EQ(ma.e32/3.0,mb.e32); EXPECT_EQ(ma.e33/3.0,mb.e33); + ModuleBase::Matrix3 ma, mb; + ma = get_random_matrix3 (); + mb = ma; + mb /= 3.0; + EXPECT_EQ (ma.e11 / 3.0, mb.e11); + EXPECT_EQ (ma.e12 / 3.0, mb.e12); + EXPECT_EQ (ma.e13 / 3.0, mb.e13); + EXPECT_EQ (ma.e21 / 3.0, mb.e21); + EXPECT_EQ (ma.e22 / 3.0, mb.e22); + EXPECT_EQ (ma.e23 / 3.0, mb.e23); + EXPECT_EQ (ma.e31 / 3.0, mb.e31); + EXPECT_EQ (ma.e32 / 3.0, mb.e32); + EXPECT_EQ (ma.e33 / 3.0, mb.e33); } -TEST_F(Matrix3Test, Print) +TEST_F (Matrix3Test, Print) { - ModuleBase::Matrix3 ma; - ma = get_random_matrix3(); - testing::internal::CaptureStdout(); - ma.print(); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("e")); + ModuleBase::Matrix3 ma; + ma = get_random_matrix3 (); + testing::internal::CaptureStdout (); + ma.print (); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("e")); } -TEST_F(Matrix3Test, MaddM) +TEST_F (Matrix3Test, MaddM) { - ModuleBase::Matrix3 ma, mb, mc; - ma = get_random_matrix3(); - mb = get_random_matrix3(); - mc = ma + mb; - EXPECT_EQ(ma.e11+mb.e11, mc.e11); - EXPECT_EQ(ma.e12+mb.e12, mc.e12); - EXPECT_EQ(ma.e13+mb.e13, mc.e13); - EXPECT_EQ(ma.e21+mb.e21, mc.e21); - EXPECT_EQ(ma.e22+mb.e22, mc.e22); - EXPECT_EQ(ma.e23+mb.e23, mc.e23); - EXPECT_EQ(ma.e31+mb.e31, mc.e31); - EXPECT_EQ(ma.e32+mb.e32, mc.e32); - EXPECT_EQ(ma.e33+mb.e33, mc.e33); + ModuleBase::Matrix3 ma, mb, mc; + ma = get_random_matrix3 (); + mb = get_random_matrix3 (); + mc = ma + mb; + EXPECT_EQ (ma.e11 + mb.e11, mc.e11); + EXPECT_EQ (ma.e12 + mb.e12, mc.e12); + EXPECT_EQ (ma.e13 + mb.e13, mc.e13); + EXPECT_EQ (ma.e21 + mb.e21, mc.e21); + EXPECT_EQ (ma.e22 + mb.e22, mc.e22); + EXPECT_EQ (ma.e23 + mb.e23, mc.e23); + EXPECT_EQ (ma.e31 + mb.e31, mc.e31); + EXPECT_EQ (ma.e32 + mb.e32, mc.e32); + EXPECT_EQ (ma.e33 + mb.e33, mc.e33); } -TEST_F(Matrix3Test, MminusM) +TEST_F (Matrix3Test, MminusM) { - ModuleBase::Matrix3 ma, mb, mc; - ma = get_random_matrix3(); - mb = get_random_matrix3(); - mc = ma - mb; - EXPECT_EQ(ma.e11-mb.e11, mc.e11); - EXPECT_EQ(ma.e12-mb.e12, mc.e12); - EXPECT_EQ(ma.e13-mb.e13, mc.e13); - EXPECT_EQ(ma.e21-mb.e21, mc.e21); - EXPECT_EQ(ma.e22-mb.e22, mc.e22); - EXPECT_EQ(ma.e23-mb.e23, mc.e23); - EXPECT_EQ(ma.e31-mb.e31, mc.e31); - EXPECT_EQ(ma.e32-mb.e32, mc.e32); - EXPECT_EQ(ma.e33-mb.e33, mc.e33); + ModuleBase::Matrix3 ma, mb, mc; + ma = get_random_matrix3 (); + mb = get_random_matrix3 (); + mc = ma - mb; + EXPECT_EQ (ma.e11 - mb.e11, mc.e11); + EXPECT_EQ (ma.e12 - mb.e12, mc.e12); + EXPECT_EQ (ma.e13 - mb.e13, mc.e13); + EXPECT_EQ (ma.e21 - mb.e21, mc.e21); + EXPECT_EQ (ma.e22 - mb.e22, mc.e22); + EXPECT_EQ (ma.e23 - mb.e23, mc.e23); + EXPECT_EQ (ma.e31 - mb.e31, mc.e31); + EXPECT_EQ (ma.e32 - mb.e32, mc.e32); + EXPECT_EQ (ma.e33 - mb.e33, mc.e33); } -TEST_F(Matrix3Test, MoverNum) +TEST_F (Matrix3Test, MoverNum) { - ModuleBase::Matrix3 ma, mb; - ma = get_random_matrix3(); - mb = ma/3.0; - EXPECT_EQ(ma.e11/3.0,mb.e11); EXPECT_EQ(ma.e12/3.0,mb.e12); EXPECT_EQ(ma.e13/3.0,mb.e13); - EXPECT_EQ(ma.e21/3.0,mb.e21); EXPECT_EQ(ma.e22/3.0,mb.e22); EXPECT_EQ(ma.e23/3.0,mb.e23); - EXPECT_EQ(ma.e31/3.0,mb.e31); EXPECT_EQ(ma.e32/3.0,mb.e32); EXPECT_EQ(ma.e33/3.0,mb.e33); + ModuleBase::Matrix3 ma, mb; + ma = get_random_matrix3 (); + mb = ma / 3.0; + EXPECT_EQ (ma.e11 / 3.0, mb.e11); + EXPECT_EQ (ma.e12 / 3.0, mb.e12); + EXPECT_EQ (ma.e13 / 3.0, mb.e13); + EXPECT_EQ (ma.e21 / 3.0, mb.e21); + EXPECT_EQ (ma.e22 / 3.0, mb.e22); + EXPECT_EQ (ma.e23 / 3.0, mb.e23); + EXPECT_EQ (ma.e31 / 3.0, mb.e31); + EXPECT_EQ (ma.e32 / 3.0, mb.e32); + EXPECT_EQ (ma.e33 / 3.0, mb.e33); } -TEST_F(Matrix3Test, MmultiplyM) +TEST_F (Matrix3Test, MmultiplyM) { - ModuleBase::Matrix3 ma, mb, mc; - ma = get_random_matrix3(); - mb = get_random_matrix3(); - mc = ma * mb; - EXPECT_EQ( (ma.e11*mb.e11 + ma.e12*mb.e21 + ma.e13*mb.e31), mc.e11); - EXPECT_EQ( (ma.e11*mb.e12 + ma.e12*mb.e22 + ma.e13*mb.e32), mc.e12); - EXPECT_EQ( (ma.e11*mb.e13 + ma.e12*mb.e23 + ma.e13*mb.e33), mc.e13); - EXPECT_EQ( (ma.e21*mb.e11 + ma.e22*mb.e21 + ma.e23*mb.e31), mc.e21); - EXPECT_EQ( (ma.e21*mb.e12 + ma.e22*mb.e22 + ma.e23*mb.e32), mc.e22); - EXPECT_EQ( (ma.e21*mb.e13 + ma.e22*mb.e23 + ma.e23*mb.e33), mc.e23); - EXPECT_EQ( (ma.e31*mb.e11 + ma.e32*mb.e21 + ma.e33*mb.e31), mc.e31); - EXPECT_EQ( (ma.e31*mb.e12 + ma.e32*mb.e22 + ma.e33*mb.e32), mc.e32); - EXPECT_EQ( (ma.e31*mb.e13 + ma.e32*mb.e23 + ma.e33*mb.e33), mc.e33); + ModuleBase::Matrix3 ma, mb, mc; + ma = get_random_matrix3 (); + mb = get_random_matrix3 (); + mc = ma * mb; + EXPECT_EQ ((ma.e11 * mb.e11 + ma.e12 * mb.e21 + ma.e13 * mb.e31), mc.e11); + EXPECT_EQ ((ma.e11 * mb.e12 + ma.e12 * mb.e22 + ma.e13 * mb.e32), mc.e12); + EXPECT_EQ ((ma.e11 * mb.e13 + ma.e12 * mb.e23 + ma.e13 * mb.e33), mc.e13); + EXPECT_EQ ((ma.e21 * mb.e11 + ma.e22 * mb.e21 + ma.e23 * mb.e31), mc.e21); + EXPECT_EQ ((ma.e21 * mb.e12 + ma.e22 * mb.e22 + ma.e23 * mb.e32), mc.e22); + EXPECT_EQ ((ma.e21 * mb.e13 + ma.e22 * mb.e23 + ma.e23 * mb.e33), mc.e23); + EXPECT_EQ ((ma.e31 * mb.e11 + ma.e32 * mb.e21 + ma.e33 * mb.e31), mc.e31); + EXPECT_EQ ((ma.e31 * mb.e12 + ma.e32 * mb.e22 + ma.e33 * mb.e32), mc.e32); + EXPECT_EQ ((ma.e31 * mb.e13 + ma.e32 * mb.e23 + ma.e33 * mb.e33), mc.e33); } -TEST_F(Matrix3Test, MmultiplyNum) +TEST_F (Matrix3Test, MmultiplyNum) { - ModuleBase::Matrix3 ma, mb, mc; - ma = get_random_matrix3(); - mb = ma; - mc = ma; - mb = ma*3.0; - mc = 3.0*ma; - EXPECT_EQ(ma.e11*3.0,mb.e11); EXPECT_EQ(ma.e12*3.0,mb.e12); EXPECT_EQ(ma.e13*3.0,mb.e13); - EXPECT_EQ(ma.e21*3.0,mb.e21); EXPECT_EQ(ma.e22*3.0,mb.e22); EXPECT_EQ(ma.e23*3.0,mb.e23); - EXPECT_EQ(ma.e31*3.0,mb.e31); EXPECT_EQ(ma.e32*3.0,mb.e32); EXPECT_EQ(ma.e33*3.0,mb.e33); - EXPECT_EQ(ma.e11*3.0,mc.e11); EXPECT_EQ(ma.e12*3.0,mc.e12); EXPECT_EQ(ma.e13*3.0,mc.e13); - EXPECT_EQ(ma.e21*3.0,mc.e21); EXPECT_EQ(ma.e22*3.0,mc.e22); EXPECT_EQ(ma.e23*3.0,mc.e23); - EXPECT_EQ(ma.e31*3.0,mc.e31); EXPECT_EQ(ma.e32*3.0,mc.e32); EXPECT_EQ(ma.e33*3.0,mc.e33); + ModuleBase::Matrix3 ma, mb, mc; + ma = get_random_matrix3 (); + mb = ma; + mc = ma; + mb = ma * 3.0; + mc = 3.0 * ma; + EXPECT_EQ (ma.e11 * 3.0, mb.e11); + EXPECT_EQ (ma.e12 * 3.0, mb.e12); + EXPECT_EQ (ma.e13 * 3.0, mb.e13); + EXPECT_EQ (ma.e21 * 3.0, mb.e21); + EXPECT_EQ (ma.e22 * 3.0, mb.e22); + EXPECT_EQ (ma.e23 * 3.0, mb.e23); + EXPECT_EQ (ma.e31 * 3.0, mb.e31); + EXPECT_EQ (ma.e32 * 3.0, mb.e32); + EXPECT_EQ (ma.e33 * 3.0, mb.e33); + EXPECT_EQ (ma.e11 * 3.0, mc.e11); + EXPECT_EQ (ma.e12 * 3.0, mc.e12); + EXPECT_EQ (ma.e13 * 3.0, mc.e13); + EXPECT_EQ (ma.e21 * 3.0, mc.e21); + EXPECT_EQ (ma.e22 * 3.0, mc.e22); + EXPECT_EQ (ma.e23 * 3.0, mc.e23); + EXPECT_EQ (ma.e31 * 3.0, mc.e31); + EXPECT_EQ (ma.e32 * 3.0, mc.e32); + EXPECT_EQ (ma.e33 * 3.0, mc.e33); } -TEST_F(Matrix3Test, MmultiplyV) +TEST_F (Matrix3Test, MmultiplyV) { - ModuleBase::Matrix3 ma; - ModuleBase::Vector3 u(3.0,4.0,5.0); - ModuleBase::Vector3 v; - ma = get_random_matrix3(); - v = ma * u; - EXPECT_EQ(v.x, u.x*ma.e11+u.y*ma.e12+u.z*ma.e13); - EXPECT_EQ(v.y, u.x*ma.e21+u.y*ma.e22+u.z*ma.e23); - EXPECT_EQ(v.z, u.x*ma.e31+u.y*ma.e32+u.z*ma.e33); + ModuleBase::Matrix3 ma; + ModuleBase::Vector3 u (3.0, 4.0, 5.0); + ModuleBase::Vector3 v; + ma = get_random_matrix3 (); + v = ma * u; + EXPECT_EQ (v.x, u.x * ma.e11 + u.y * ma.e12 + u.z * ma.e13); + EXPECT_EQ (v.y, u.x * ma.e21 + u.y * ma.e22 + u.z * ma.e23); + EXPECT_EQ (v.z, u.x * ma.e31 + u.y * ma.e32 + u.z * ma.e33); } -TEST_F(Matrix3Test, VmultiplyM) +TEST_F (Matrix3Test, VmultiplyM) { - ModuleBase::Matrix3 ma; - ModuleBase::Vector3 u(3.0,4.0,5.0); - ModuleBase::Vector3 v; - ma = get_random_matrix3(); - v = u*ma; - EXPECT_EQ(v.x, u.x*ma.e11+u.y*ma.e21+u.z*ma.e31); - EXPECT_EQ(v.y, u.x*ma.e12+u.y*ma.e22+u.z*ma.e32); - EXPECT_EQ(v.z, u.x*ma.e13+u.y*ma.e23+u.z*ma.e33); + ModuleBase::Matrix3 ma; + ModuleBase::Vector3 u (3.0, 4.0, 5.0); + ModuleBase::Vector3 v; + ma = get_random_matrix3 (); + v = u * ma; + EXPECT_EQ (v.x, u.x * ma.e11 + u.y * ma.e21 + u.z * ma.e31); + EXPECT_EQ (v.y, u.x * ma.e12 + u.y * ma.e22 + u.z * ma.e32); + EXPECT_EQ (v.z, u.x * ma.e13 + u.y * ma.e23 + u.z * ma.e33); } -TEST_F(Matrix3Test, MeqM) +TEST_F (Matrix3Test, MeqM) { - matrix_a = get_random_matrix3(); - matrix_a1 = matrix_a; - matrix_b = get_random_matrix3(); - EXPECT_TRUE(matrix_a == matrix_a1); - EXPECT_FALSE(matrix_a == matrix_b); + matrix_a = get_random_matrix3 (); + matrix_a1 = matrix_a; + matrix_b = get_random_matrix3 (); + EXPECT_TRUE (matrix_a == matrix_a1); + EXPECT_FALSE (matrix_a == matrix_b); } -TEST_F(Matrix3Test, MneM) +TEST_F (Matrix3Test, MneM) { - matrix_a = get_random_matrix3(); - matrix_a1 = matrix_a; - matrix_b = get_random_matrix3(); - EXPECT_FALSE(matrix_a != matrix_a1); - EXPECT_TRUE(matrix_a != matrix_b); + matrix_a = get_random_matrix3 (); + matrix_a1 = matrix_a; + matrix_b = get_random_matrix3 (); + EXPECT_FALSE (matrix_a != matrix_a1); + EXPECT_TRUE (matrix_a != matrix_b); } -TEST_F(Matrix3Test, ToMatrix) +TEST_F (Matrix3Test, ToMatrix) { - ModuleBase::Matrix3 ma; - ModuleBase::matrix mb; - ma = get_random_matrix3(); - mb = ma.to_matrix(); - EXPECT_EQ(ma.e11,mb(0,0)); EXPECT_EQ(ma.e12,mb(0,1)); EXPECT_EQ(ma.e13,mb(0,2)); - EXPECT_EQ(ma.e21,mb(1,0)); EXPECT_EQ(ma.e22,mb(1,1)); EXPECT_EQ(ma.e23,mb(1,2)); - EXPECT_EQ(ma.e31,mb(2,0)); EXPECT_EQ(ma.e32,mb(2,1)); EXPECT_EQ(ma.e33,mb(2,2)); + ModuleBase::Matrix3 ma; + ModuleBase::matrix mb; + ma = get_random_matrix3 (); + mb = ma.to_matrix (); + EXPECT_EQ (ma.e11, mb (0, 0)); + EXPECT_EQ (ma.e12, mb (0, 1)); + EXPECT_EQ (ma.e13, mb (0, 2)); + EXPECT_EQ (ma.e21, mb (1, 0)); + EXPECT_EQ (ma.e22, mb (1, 1)); + EXPECT_EQ (ma.e23, mb (1, 2)); + EXPECT_EQ (ma.e31, mb (2, 0)); + EXPECT_EQ (ma.e32, mb (2, 1)); + EXPECT_EQ (ma.e33, mb (2, 2)); } -TEST_F(Matrix3Test,TemplateVectorMultiplyMatrix) +TEST_F (Matrix3Test, TemplateVectorMultiplyMatrix) { - ModuleBase::Vector3 ui(1,2,3); - ModuleBase::Vector3 ud(1.0,2.0,3.0); - ModuleBase::Vector3 uf(1.0,2.0,3.0); - ModuleBase::Vector3 vi,vd,vf; - ModuleBase::Matrix3 ma; - ma = get_random_matrix3(); - vi=ui*ma; - vd=ud*ma; - vf=uf*ma; - EXPECT_DOUBLE_EQ(vi.x,ui.x * ma.e11 + ui.y * ma.e21 + ui.z * ma.e31); - EXPECT_DOUBLE_EQ(vi.y,ui.x * ma.e12 + ui.y * ma.e22 + ui.z * ma.e32); - EXPECT_DOUBLE_EQ(vi.z,ui.x * ma.e13 + ui.y * ma.e23 + ui.z * ma.e33); - EXPECT_DOUBLE_EQ(vd.x,ud.x * ma.e11 + ud.y * ma.e21 + ud.z * ma.e31); - EXPECT_DOUBLE_EQ(vd.y,ud.x * ma.e12 + ud.y * ma.e22 + ud.z * ma.e32); - EXPECT_DOUBLE_EQ(vd.z,ud.x * ma.e13 + ud.y * ma.e23 + ud.z * ma.e33); - EXPECT_DOUBLE_EQ(vf.x,uf.x * ma.e11 + uf.y * ma.e21 + uf.z * ma.e31); - EXPECT_DOUBLE_EQ(vf.y,uf.x * ma.e12 + uf.y * ma.e22 + uf.z * ma.e32); - EXPECT_DOUBLE_EQ(vf.z,uf.x * ma.e13 + uf.y * ma.e23 + uf.z * ma.e33); + ModuleBase::Vector3 ui (1, 2, 3); + ModuleBase::Vector3 ud (1.0, 2.0, 3.0); + ModuleBase::Vector3 uf (1.0, 2.0, 3.0); + ModuleBase::Vector3 vi, vd, vf; + ModuleBase::Matrix3 ma; + ma = get_random_matrix3 (); + vi = ui * ma; + vd = ud * ma; + vf = uf * ma; + EXPECT_DOUBLE_EQ (vi.x, ui.x * ma.e11 + ui.y * ma.e21 + ui.z * ma.e31); + EXPECT_DOUBLE_EQ (vi.y, ui.x * ma.e12 + ui.y * ma.e22 + ui.z * ma.e32); + EXPECT_DOUBLE_EQ (vi.z, ui.x * ma.e13 + ui.y * ma.e23 + ui.z * ma.e33); + EXPECT_DOUBLE_EQ (vd.x, ud.x * ma.e11 + ud.y * ma.e21 + ud.z * ma.e31); + EXPECT_DOUBLE_EQ (vd.y, ud.x * ma.e12 + ud.y * ma.e22 + ud.z * ma.e32); + EXPECT_DOUBLE_EQ (vd.z, ud.x * ma.e13 + ud.y * ma.e23 + ud.z * ma.e33); + EXPECT_DOUBLE_EQ (vf.x, uf.x * ma.e11 + uf.y * ma.e21 + uf.z * ma.e31); + EXPECT_DOUBLE_EQ (vf.y, uf.x * ma.e12 + uf.y * ma.e22 + uf.z * ma.e32); + EXPECT_DOUBLE_EQ (vf.z, uf.x * ma.e13 + uf.y * ma.e23 + uf.z * ma.e33); } \ No newline at end of file diff --git a/source/source_base/test/matrix_test.cpp b/source/source_base/test/matrix_test.cpp index 9605bad891e..e18235d5d11 100644 --- a/source/source_base/test/matrix_test.cpp +++ b/source/source_base/test/matrix_test.cpp @@ -1,10 +1,10 @@ -#include"../matrix.h" -#include"gtest/gtest.h" +#include "../matrix.h" +#include "gtest/gtest.h" #include "gmock/gmock.h" /************************************************ -* unit test of class matrix and related functions -***********************************************/ + * unit test of class matrix and related functions + ***********************************************/ /** * - Tested functions of class matrix: @@ -23,7 +23,7 @@ * - function norm * - function print(print the element which is larger than threshold) * - function reshape(change the index of the array) - * + * * - Tested functions related to class matrix * - operator "+", "-", "*" between two matrixs * - operator "*" between a double and a matrix, and reverse. @@ -33,343 +33,373 @@ * - function matrixAlloc */ -//a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37. +// a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37. namespace ModuleBase { - void matrixAlloc(); +void matrixAlloc (); } class matrixTest : public testing::Test { - protected: - ModuleBase::matrix m23a,m33a,m33b,m33c,m34a,m34b; + protected: + ModuleBase::matrix m23a, m33a, m33b, m33c, m34a, m34b; - void SetUp() + void + SetUp () { - m23a.create(2,3); - for (int i=1;i<=6;++i) {m23a.c[i-1] = i*1.0;} - - m33a.create(3,3); - for (int i=1;i<=9;++i) {m33a.c[i-1] = i*1.0;} - - m33b.create(3,3); - for (int i=1;i<=9;++i) {m33b.c[i-1] = i*11.1;} - - m33c.create(3,3,true); - m34a.create(3,4,true); - m34b.create(3,4,true); + m23a.create (2, 3); + for (int i = 1; i <= 6; ++i) + { + m23a.c[i - 1] = i * 1.0; + } + + m33a.create (3, 3); + for (int i = 1; i <= 9; ++i) + { + m33a.c[i - 1] = i * 1.0; + } + + m33b.create (3, 3); + for (int i = 1; i <= 9; ++i) + { + m33b.c[i - 1] = i * 11.1; + } + + m33c.create (3, 3, true); + m34a.create (3, 4, true); + m34b.create (3, 4, true); } - }; -TEST(matrix,ConstructorNrNc) +TEST (matrix, ConstructorNrNc) { - ModuleBase::matrix m(3,4,true); - EXPECT_EQ(m.nr,3); - EXPECT_EQ(m.nc,4); - EXPECT_DOUBLE_EQ(m(0,0),0.0); + ModuleBase::matrix m (3, 4, true); + EXPECT_EQ (m.nr, 3); + EXPECT_EQ (m.nc, 4); + EXPECT_DOUBLE_EQ (m (0, 0), 0.0); } -TEST_F(matrixTest,ConstructorMatrix) +TEST_F (matrixTest, ConstructorMatrix) { - ModuleBase::matrix m(m33a); + ModuleBase::matrix m (m33a); int mnr = m.nr; - EXPECT_EQ(mnr,m33a.nr); - EXPECT_EQ(m.nc,m33a.nc); - for (int i=0;i<9;++i) - { - EXPECT_DOUBLE_EQ(m.c[i],m33a.c[i]); - } + EXPECT_EQ (mnr, m33a.nr); + EXPECT_EQ (m.nc, m33a.nc); + for (int i = 0; i < 9; ++i) + { + EXPECT_DOUBLE_EQ (m.c[i], m33a.c[i]); + } } -TEST_F(matrixTest,ConstructorMtrixRValue) +TEST_F (matrixTest, ConstructorMtrixRValue) { - ModuleBase::matrix m(3.0*m33a); - EXPECT_EQ(m.nr,m33a.nr); - EXPECT_EQ(m.nc,m33a.nc); - for (int i=0;i<9;++i) - { - EXPECT_DOUBLE_EQ(m.c[i],m33a.c[i] * 3.0); - } + ModuleBase::matrix m (3.0 * m33a); + EXPECT_EQ (m.nr, m33a.nr); + EXPECT_EQ (m.nc, m33a.nc); + for (int i = 0; i < 9; ++i) + { + EXPECT_DOUBLE_EQ (m.c[i], m33a.c[i] * 3.0); + } } -TEST_F(matrixTest,Create) +TEST_F (matrixTest, Create) { - m33a.create(13,14,true); - EXPECT_EQ(m33a.nr,13); - EXPECT_EQ(m33a.nc,14); - for(int i=0;i<13*14;++i) - { - EXPECT_DOUBLE_EQ(m33a.c[i],0.0); - } + m33a.create (13, 14, true); + EXPECT_EQ (m33a.nr, 13); + EXPECT_EQ (m33a.nc, 14); + for (int i = 0; i < 13 * 14; ++i) + { + EXPECT_DOUBLE_EQ (m33a.c[i], 0.0); + } } -TEST_F(matrixTest,OperatorEqualMatrix) +TEST_F (matrixTest, OperatorEqualMatrix) { ModuleBase::matrix m; m = m33a; - EXPECT_EQ(m.nr,m33a.nr); - EXPECT_EQ(m.nc,m33a.nc); - for (int i=0;i<9;++i) - { - EXPECT_DOUBLE_EQ(m.c[i],m33a.c[i]); - } + EXPECT_EQ (m.nr, m33a.nr); + EXPECT_EQ (m.nc, m33a.nc); + for (int i = 0; i < 9; ++i) + { + EXPECT_DOUBLE_EQ (m.c[i], m33a.c[i]); + } m23a = m33a; - EXPECT_EQ(m23a.nr,m33a.nr); - EXPECT_EQ(m23a.nc,m33a.nc); + EXPECT_EQ (m23a.nr, m33a.nr); + EXPECT_EQ (m23a.nc, m33a.nc); } -TEST_F(matrixTest,OperatorEqualMatrixRvalue) +TEST_F (matrixTest, OperatorEqualMatrixRvalue) { ModuleBase::matrix m; m = 3.0 * m33a; - EXPECT_EQ(m.nr,m33a.nr); - EXPECT_EQ(m.nc,m33a.nc); - for (int i=0;i<9;++i) - { - EXPECT_DOUBLE_EQ(m.c[i],m33a.c[i] * 3.0); - } + EXPECT_EQ (m.nr, m33a.nr); + EXPECT_EQ (m.nc, m33a.nc); + for (int i = 0; i < 9; ++i) + { + EXPECT_DOUBLE_EQ (m.c[i], m33a.c[i] * 3.0); + } } -TEST_F(matrixTest,OperatorParentheses) +TEST_F (matrixTest, OperatorParentheses) { - //EXPECT_DEATH(m33a(3,3),""); - //EXPECT_DEATH(m33a(-1,0),""); - m33a(0,0) = 1.1; - EXPECT_DOUBLE_EQ(m33a(0,0),1.1); + // EXPECT_DEATH(m33a(3,3),""); + // EXPECT_DEATH(m33a(-1,0),""); + m33a (0, 0) = 1.1; + EXPECT_DOUBLE_EQ (m33a (0, 0), 1.1); } -TEST_F(matrixTest,OperatorMultiplyEqual) +TEST_F (matrixTest, OperatorMultiplyEqual) { m33b = m33a; m33a *= 11.1; - for (int i=0;i"), - n * 3 * double_mem); + // vector with 3 double float point number + EXPECT_EQ (ModuleBase::Memory::calculate_mem (n, "ModuleBase::Vector3"), n * 3 * double_mem); - // test a struct AtomLink defined in module_neighbor/sltk_grid.h - // AtomLink defined as class FAtom (module_neighbor/sltk_atom.h) - // which includes 3 double and 2 int numbers - EXPECT_EQ(ModuleBase::Memory::calculate_mem(n,"AtomLink"), - n * (int_mem * 2 + 3 * double_mem)); + // test a struct AtomLink defined in module_neighbor/sltk_grid.h + // AtomLink defined as class FAtom (module_neighbor/sltk_atom.h) + // which includes 3 double and 2 int numbers + EXPECT_EQ (ModuleBase::Memory::calculate_mem (n, "AtomLink"), n * (int_mem * 2 + 3 * double_mem)); - // test types of data not defined - testing::internal::CaptureStdout(); - ModuleBase::Memory::calculate_mem(n,"Exception"); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("not this type in memory storage")); + // test types of data not defined + testing::internal::CaptureStdout (); + ModuleBase::Memory::calculate_mem (n, "Exception"); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("not this type in memory storage")); } -TEST_F(MemoryTest,Record) +TEST_F (MemoryTest, Record) { - // abacus record mem in MB - // for double - double mem = ModuleBase::Memory::record("wavefunc","evc",1024*1024,"double"); - EXPECT_EQ(mem,double_mem/factor); - // for cdouble - mem = ModuleBase::Memory::record("wavefunc","evc",1024*1024,"cdouble"); - EXPECT_EQ(mem,complex_matrix_mem/factor); - // for int - mem = ModuleBase::Memory::record("wavefunc","evc",1024*1024,"int"); - EXPECT_EQ(mem,int_mem/factor); - // for bool - mem = ModuleBase::Memory::record("wavefunc","evc",1024*1024,"bool"); - EXPECT_EQ(mem,bool_mem/factor); - // for float - mem = ModuleBase::Memory::record("wavefunc","evc",1024*1024,"float"); - EXPECT_EQ(mem,float_mem/factor); - // for short - mem = ModuleBase::Memory::record("wavefunc","evc",1024*1024,"short"); - EXPECT_EQ(mem,short_mem/factor); - // for Vector3 - mem = ModuleBase::Memory::record("wavefunc","evc",1024*1024,"ModuleBase::Vector3"); - EXPECT_EQ(mem,double_mem/factor*3); - // for AtomLink - mem = ModuleBase::Memory::record("wavefunc","evc",1024*1024,"AtomLink"); - EXPECT_EQ(mem,double_mem/factor*3+int_mem/factor*2); + // abacus record mem in MB + // for double + double mem = ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "double"); + EXPECT_EQ (mem, double_mem / factor); + // for cdouble + mem = ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "cdouble"); + EXPECT_EQ (mem, complex_matrix_mem / factor); + // for int + mem = ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "int"); + EXPECT_EQ (mem, int_mem / factor); + // for bool + mem = ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "bool"); + EXPECT_EQ (mem, bool_mem / factor); + // for float + mem = ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "float"); + EXPECT_EQ (mem, float_mem / factor); + // for short + mem = ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "short"); + EXPECT_EQ (mem, short_mem / factor); + // for Vector3 + mem = ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "ModuleBase::Vector3"); + EXPECT_EQ (mem, double_mem / factor * 3); + // for AtomLink + mem = ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "AtomLink"); + EXPECT_EQ (mem, double_mem / factor * 3 + int_mem / factor * 2); } -TEST_F(MemoryTest, printall) +TEST_F (MemoryTest, printall) { - ofs.open("tmp"); - // total memory is an internal parameter and added inside the class Memory - ModuleBase::Memory::record("Charge_Mixing","Rrho",1024*1024,"ModuleBase::Vector3"); - ModuleBase::Memory::record("Charge_Mixing","drho",1024*1024,"AtomLink"); - ModuleBase::Memory::record("wavefunc","evc",1024*1024,"float"); - ModuleBase::Memory::print_all(ofs); - ofs.close(); - ifs.open("tmp"); - getline(ifs,output); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("MEMORY(MB)")); - ifs.close(); + ofs.open ("tmp"); + // total memory is an internal parameter and added inside the class Memory + ModuleBase::Memory::record ("Charge_Mixing", "Rrho", 1024 * 1024, "ModuleBase::Vector3"); + ModuleBase::Memory::record ("Charge_Mixing", "drho", 1024 * 1024, "AtomLink"); + ModuleBase::Memory::record ("wavefunc", "evc", 1024 * 1024, "float"); + ModuleBase::Memory::print_all (ofs); + ofs.close (); + ifs.open ("tmp"); + getline (ifs, output); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("MEMORY(MB)")); + ifs.close (); } -TEST_F(MemoryTest, finish) +TEST_F (MemoryTest, finish) { - *ModuleBase::Memory::name = "tmp_name"; - *ModuleBase::Memory::class_name = "tmp_class_name"; - *ModuleBase::Memory::consume = 100.0; - ModuleBase::Memory::init_flag = true; - ofs.open("tmp"); - // total memory is an internal parameter and added inside the class Memory - ModuleBase::Memory::record("Charge_Mixing","Rrho",1024*1024,"ModuleBase::Vector3"); - EXPECT_NO_THROW(ModuleBase::Memory::finish(ofs)); - ofs.close(); - EXPECT_FALSE(ModuleBase::Memory::init_flag); + *ModuleBase::Memory::name = "tmp_name"; + *ModuleBase::Memory::class_name = "tmp_class_name"; + *ModuleBase::Memory::consume = 100.0; + ModuleBase::Memory::init_flag = true; + ofs.open ("tmp"); + // total memory is an internal parameter and added inside the class Memory + ModuleBase::Memory::record ("Charge_Mixing", "Rrho", 1024 * 1024, "ModuleBase::Vector3"); + EXPECT_NO_THROW (ModuleBase::Memory::finish (ofs)); + ofs.close (); + EXPECT_FALSE (ModuleBase::Memory::init_flag); } diff --git a/source/source_base/test/mymath_test.cpp b/source/source_base/test/mymath_test.cpp index e7b1198bdf1..c94ef24fcc9 100644 --- a/source/source_base/test/mymath_test.cpp +++ b/source/source_base/test/mymath_test.cpp @@ -18,49 +18,49 @@ class MymathTest : public testing::Test { -protected: - int number; + protected: + int number; }; -TEST_F(MymathTest,Heapsort) +TEST_F (MymathTest, Heapsort) { - number = 5; - double rr[number]; - int index[number]; - rr[0] = 10; - rr[1] = 9; - rr[2] = 8; - rr[3] = 7; - rr[4] = 6; - index[0] = 0; - //for (int i=0;i a({1, 2, 3}); /* 1 * 2 * 3, 3d array */ - EXPECT_EQ(a.size(), 6); - EXPECT_EQ(a.empty(), false); + const NDArray a ({1, 2, 3}); /* 1 * 2 * 3, 3d array */ + EXPECT_EQ (a.size (), 6); + EXPECT_EQ (a.empty (), false); } -TEST(NDArray, VariadicTemplateConstructor) +TEST (NDArray, VariadicTemplateConstructor) { - const NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - EXPECT_EQ(a.size(), 6); - EXPECT_EQ(a.empty(), false); + const NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + EXPECT_EQ (a.size (), 6); + EXPECT_EQ (a.empty (), false); } -TEST(NDArray, CopyConstructor) +TEST (NDArray, CopyConstructor) { - const NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - const NDArray b(a); - EXPECT_EQ(b.size(), 6); - EXPECT_EQ(b.empty(), false); + const NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + const NDArray b (a); + EXPECT_EQ (b.size (), 6); + EXPECT_EQ (b.empty (), false); // and a will be the same as b - EXPECT_EQ(a.size(), 6); - EXPECT_EQ(a.empty(), false); + EXPECT_EQ (a.size (), 6); + EXPECT_EQ (a.empty (), false); } -TEST(NDArray, MoveConstructor) +TEST (NDArray, MoveConstructor) { - NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - const NDArray b(std::move(a)); - EXPECT_EQ(b.size(), 6); - EXPECT_EQ(b.empty(), false); + NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + const NDArray b (std::move (a)); + EXPECT_EQ (b.size (), 6); + EXPECT_EQ (b.empty (), false); // and a will be empty, but still valid (principle of std::move) - EXPECT_EQ(a.size(), 0); - EXPECT_EQ(a.empty(), true); + EXPECT_EQ (a.size (), 0); + EXPECT_EQ (a.empty (), true); } -TEST(NDArray, CopyAssignment) +TEST (NDArray, CopyAssignment) { - const NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - NDArray b(1); + const NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + NDArray b (1); b = a; - EXPECT_EQ(b.size(), 6); - EXPECT_EQ(b.empty(), false); + EXPECT_EQ (b.size (), 6); + EXPECT_EQ (b.empty (), false); // and a will be the same as b - EXPECT_EQ(a.size(), 6); - EXPECT_EQ(a.empty(), false); + EXPECT_EQ (a.size (), 6); + EXPECT_EQ (a.empty (), false); } -TEST(NDArray, MoveAssignment) +TEST (NDArray, MoveAssignment) { - NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - NDArray b(1); - b = std::move(a); - EXPECT_EQ(b.size(), 6); - EXPECT_EQ(b.empty(), false); + NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + NDArray b (1); + b = std::move (a); + EXPECT_EQ (b.size (), 6); + EXPECT_EQ (b.empty (), false); // and a will be empty, but still valid (principle of std::move) - EXPECT_EQ(a.size(), 0); - EXPECT_EQ(a.empty(), true); + EXPECT_EQ (a.size (), 0); + EXPECT_EQ (a.empty (), true); } -TEST(NDArray, EqualityOperator) +TEST (NDArray, EqualityOperator) { - const NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - const NDArray b(1, 2, 3); /* 1 * 2 * 3, 3d array */ - const NDArray c(1, 2, 4); /* 1 * 2 * 4, 3d array */ - EXPECT_EQ(a == b, true); - EXPECT_EQ(a == c, false); + const NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + const NDArray b (1, 2, 3); /* 1 * 2 * 3, 3d array */ + const NDArray c (1, 2, 4); /* 1 * 2 * 4, 3d array */ + EXPECT_EQ (a == b, true); + EXPECT_EQ (a == c, false); } -TEST(NDArray, InequalityOperator) +TEST (NDArray, InequalityOperator) { - const NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - const NDArray b(1, 2, 3); /* 1 * 2 * 3, 3d array */ - const NDArray c(1, 2, 4); /* 1 * 2 * 4, 3d array */ - EXPECT_EQ(a != b, false); - EXPECT_EQ(a != c, true); + const NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + const NDArray b (1, 2, 3); /* 1 * 2 * 3, 3d array */ + const NDArray c (1, 2, 4); /* 1 * 2 * 4, 3d array */ + EXPECT_EQ (a != b, false); + EXPECT_EQ (a != c, true); } -TEST(NDArray, Index) +TEST (NDArray, Index) { - const NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - EXPECT_EQ(a.index(0, 0, 0), 0); - EXPECT_EQ(a.index(0, 0, 1), 1); - EXPECT_EQ(a.index(0, 0, 2), 2); - EXPECT_EQ(a.index(0, 1, 0), 3); - EXPECT_EQ(a.index(0, 1, 1), 4); - EXPECT_EQ(a.index(0, 1, 2), 5); + const NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + EXPECT_EQ (a.index (0, 0, 0), 0); + EXPECT_EQ (a.index (0, 0, 1), 1); + EXPECT_EQ (a.index (0, 0, 2), 2); + EXPECT_EQ (a.index (0, 1, 0), 3); + EXPECT_EQ (a.index (0, 1, 1), 4); + EXPECT_EQ (a.index (0, 1, 2), 5); } -TEST(NDArray, AtMethodMultiIndex) +TEST (NDArray, AtMethodMultiIndex) { - NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - a.at(0, 0, 0) = 1; - a.at(0, 0, 1) = 2; - a.at(0, 0, 2) = 3; - a.at(0, 1, 0) = 4; - a.at(0, 1, 1) = 5; - a.at(0, 1, 2) = 6; - EXPECT_EQ(a.at(0, 0, 0), 1); - EXPECT_EQ(a.at(0, 0, 1), 2); - EXPECT_EQ(a.at(0, 0, 2), 3); - EXPECT_EQ(a.at(0, 1, 0), 4); - EXPECT_EQ(a.at(0, 1, 1), 5); - EXPECT_EQ(a.at(0, 1, 2), 6); + NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + a.at (0, 0, 0) = 1; + a.at (0, 0, 1) = 2; + a.at (0, 0, 2) = 3; + a.at (0, 1, 0) = 4; + a.at (0, 1, 1) = 5; + a.at (0, 1, 2) = 6; + EXPECT_EQ (a.at (0, 0, 0), 1); + EXPECT_EQ (a.at (0, 0, 1), 2); + EXPECT_EQ (a.at (0, 0, 2), 3); + EXPECT_EQ (a.at (0, 1, 0), 4); + EXPECT_EQ (a.at (0, 1, 1), 5); + EXPECT_EQ (a.at (0, 1, 2), 6); } -TEST(NDArray, IndexOperatorMultiIndex) +TEST (NDArray, IndexOperatorMultiIndex) { - NDArray a(1, 2, 3); - a(0, 0, 0) = 1; - a(0, 0, 1) = 2; - a(0, 0, 2) = 3; - a(0, 1, 0) = 4; - a(0, 1, 1) = 5; - a(0, 1, 2) = 6; - EXPECT_EQ(a(0, 0, 0), 1); - EXPECT_EQ(a(0, 0, 1), 2); - EXPECT_EQ(a(0, 0, 2), 3); - EXPECT_EQ(a(0, 1, 0), 4); - EXPECT_EQ(a(0, 1, 1), 5); - EXPECT_EQ(a(0, 1, 2), 6); + NDArray a (1, 2, 3); + a (0, 0, 0) = 1; + a (0, 0, 1) = 2; + a (0, 0, 2) = 3; + a (0, 1, 0) = 4; + a (0, 1, 1) = 5; + a (0, 1, 2) = 6; + EXPECT_EQ (a (0, 0, 0), 1); + EXPECT_EQ (a (0, 0, 1), 2); + EXPECT_EQ (a (0, 0, 2), 3); + EXPECT_EQ (a (0, 1, 0), 4); + EXPECT_EQ (a (0, 1, 1), 5); + EXPECT_EQ (a (0, 1, 2), 6); } -TEST(NDArray, Reshape) +TEST (NDArray, Reshape) { - NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ - a.reshape(2, 3, 1); /* 2 * 3 * 1, 3d array */ - EXPECT_EQ(a.size(), 6); - EXPECT_EQ(a.empty(), false); + NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ + a.reshape (2, 3, 1); /* 2 * 3 * 1, 3d array */ + EXPECT_EQ (a.size (), 6); + EXPECT_EQ (a.empty (), false); // expect assert error if the size is not the same - EXPECT_DEATH(a.reshape(2, 3, 2), ""); + EXPECT_DEATH (a.reshape (2, 3, 2), ""); } -TEST(NDArray, ReshapeValue) +TEST (NDArray, ReshapeValue) { - NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ + NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ /* [ [ @@ -153,13 +153,13 @@ TEST(NDArray, ReshapeValue) ] ] // in sequence of 1, 2, 3, 4, 5, 6 */ - a(0, 0, 0) = 1; - a(0, 0, 1) = 2; - a(0, 0, 2) = 3; - a(0, 1, 0) = 4; - a(0, 1, 1) = 5; - a(0, 1, 2) = 6; - a.reshape(2, 3, 1); /* 2 * 3 * 1, 3d array */ + a (0, 0, 0) = 1; + a (0, 0, 1) = 2; + a (0, 0, 2) = 3; + a (0, 1, 0) = 4; + a (0, 1, 1) = 5; + a (0, 1, 2) = 6; + a.reshape (2, 3, 1); /* 2 * 3 * 1, 3d array */ /* [ [ @@ -174,24 +174,25 @@ TEST(NDArray, ReshapeValue) ] ] */ - EXPECT_EQ(a(0, 0, 0), 1); - EXPECT_EQ(a(0, 1, 0), 2); - EXPECT_EQ(a(0, 2, 0), 3); - EXPECT_EQ(a(1, 0, 0), 4); - EXPECT_EQ(a(1, 1, 0), 5); - EXPECT_EQ(a(1, 2, 0), 6); + EXPECT_EQ (a (0, 0, 0), 1); + EXPECT_EQ (a (0, 1, 0), 2); + EXPECT_EQ (a (0, 2, 0), 3); + EXPECT_EQ (a (1, 0, 0), 4); + EXPECT_EQ (a (1, 1, 0), 5); + EXPECT_EQ (a (1, 2, 0), 6); } -TEST(NDArray, ReshapeInfer) +TEST (NDArray, ReshapeInfer) { - const NDArray a(1, 2, 3); /* 1 * 2 * 3, 3d array */ + const NDArray a (1, 2, 3); /* 1 * 2 * 3, 3d array */ // infer the first dimension // infer the second dimension // infer the last dimension } -int main(int argc, char **argv) +int + main (int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + return RUN_ALL_TESTS (); } diff --git a/source/source_base/test/opt_CG_test.cpp b/source/source_base/test/opt_CG_test.cpp index b8abeb57606..ef0755c5a95 100644 --- a/source/source_base/test/opt_CG_test.cpp +++ b/source/source_base/test/opt_CG_test.cpp @@ -10,7 +10,7 @@ class CG_test : public testing::Test { -protected: + protected: ModuleBase::Opt_CG cg; ModuleBase::Opt_DCsrch ds; // LinearEqu le; @@ -21,23 +21,25 @@ class CG_test : public testing::Test double residual = 10.; double tol = 1e-5; int final_iter = 0; - char *task = nullptr; - double *Ap = nullptr; - double *p = nullptr; - double *x = nullptr; + char* task = nullptr; + double* Ap = nullptr; + double* p = nullptr; + double* x = nullptr; - void SetUp() + void + SetUp () { - cg.set_para(1.); - cg.allocate(tools.nx); - cg.init_b(tools.le.b); + cg.set_para (1.); + cg.allocate (tools.nx); + cg.init_b (tools.le.b); task = new char[60]; Ap = new double[tools.nx]; p = new double[tools.nx]; x = new double[tools.nx]; } - void TearDown() + void + TearDown () { delete[] task; delete[] Ap; @@ -45,177 +47,194 @@ class CG_test : public testing::Test delete[] x; } - void CG_Solve_LinearEq() + void + CG_Solve_LinearEq () { final_iter = 0; - cg.refresh(0, tools.le.b); + cg.refresh (0, tools.le.b); step = 1; residual = 10.; for (int i = 0; i < tools.nx; ++i) - { - x[i] = 0; - p[i] = 0; - Ap[i] = 0; - } + { + x[i] = 0; + p[i] = 0; + Ap[i] = 0; + } for (int iter = 0; iter < maxiter; ++iter) - { - if (residual < tol) { - final_iter = iter; - break; + if (residual < tol) + { + final_iter = iter; + break; + } + cg.next_direct (Ap, 0, p); + tools.le.get_Ap (tools.le.A, p, Ap); + int ifPD = 0; + step = cg.step_length (Ap, p, ifPD); + for (int i = 0; i < 3; ++i) + { + x[i] += step * p[i]; + } + residual = cg.get_residual (); } - cg.next_direct(Ap, 0, p); - tools.le.get_Ap(tools.le.A, p, Ap); - int ifPD = 0; - step = cg.step_length(Ap, p, ifPD); - for (int i = 0; i < 3; ++i) { x[i] += step * p[i]; -} - residual = cg.get_residual(); - } } - void Solve(int cg_label, int func_label) + void + Solve (int cg_label, int func_label) { - if (func_label==0) - { - cg.refresh(0, tools.le.b); - } + if (func_label == 0) + { + cg.refresh (0, tools.le.b); + } else - { - cg.refresh(); - } - ds.set_paras(1e-4, 2e-1, 1e-12, 0.,12.); + { + cg.refresh (); + } + ds.set_paras (1e-4, 2e-1, 1e-12, 0., 12.); step = 1.; residual = 10.; final_iter = 0; for (int i = 0; i < tools.nx; ++i) - { - x[i] = 0; - p[i] = 0; - Ap[i] = 0; - } + { + x[i] = 0; + p[i] = 0; + Ap[i] = 0; + } double f = 0; double g = 0; - double *gradient = new double[3]; - double *temp_x = new double[3]; - ModuleBase::GlobalFunc::ZEROS(gradient, 3); - ModuleBase::GlobalFunc::ZEROS(temp_x, 3); + double* gradient = new double[3]; + double* temp_x = new double[3]; + ModuleBase::GlobalFunc::ZEROS (gradient, 3); + ModuleBase::GlobalFunc::ZEROS (temp_x, 3); for (int iter = 0; iter < maxiter; ++iter) - { - tools.dfuncdx(x, gradient, func_label); - residual = 0; - for (int i = 0; i<3 ;++i) { residual += gradient[i] * gradient[i]; -} - if (residual < tol) { - final_iter = iter; - break; + tools.dfuncdx (x, gradient, func_label); + residual = 0; + for (int i = 0; i < 3; ++i) + { + residual += gradient[i] * gradient[i]; + } + if (residual < tol) + { + final_iter = iter; + break; + } + cg.next_direct (gradient, cg_label, p); + for (int i = 0; i < 3; ++i) + { + temp_x[i] = x[i]; + } + task[0] = 'S'; + task[1] = 'T'; + task[2] = 'A'; + task[3] = 'R'; + task[4] = 'T'; + while (true) + { + f = tools.func (temp_x, func_label); + g = tools.dfuncdstp (temp_x, p, func_label); + ds.dcSrch (f, g, step, task); + if (task[0] == 'F' && task[1] == 'G') + { + for (int j = 0; j < 3; ++j) + { + temp_x[j] = x[j] + step * p[j]; + } + continue; + } + else if (task[0] == 'C' && task[1] == 'O') + { + break; + } + else if (task[0] == 'W' && task[1] == 'A') + { + break; + } + else if (task[0] == 'E' && task[1] == 'R') + { + break; + } + } + for (int i = 0; i < 3; ++i) + { + x[i] += step * p[i]; + } } - cg.next_direct(gradient, cg_label, p); - for (int i = 0; i < 3; ++i) { temp_x[i] = x[i]; -} - task[0] = 'S'; task[1] = 'T'; task[2] = 'A'; task[3] = 'R'; task[4] = 'T'; - while (true) - { - f = tools.func(temp_x, func_label); - g = tools.dfuncdstp(temp_x, p, func_label); - ds.dcSrch(f, g, step, task); - if (task[0] == 'F' && task[1] == 'G') - { - for (int j = 0; j < 3; ++j) { temp_x[j] = x[j] + step * p[j]; -} - continue; - } - else if (task[0] == 'C' && task[1] == 'O') - { - break; - } - else if (task[0] == 'W' && task[1] == 'A') - { - break; - } - else if (task[0] == 'E' && task[1] == 'R') - { - break; - } - } - for (int i = 0; i < 3; ++i) { x[i] += step * p[i]; -} - } delete[] temp_x; delete[] gradient; } }; -TEST_F(CG_test, Stand_Solve_LinearEq) +TEST_F (CG_test, Stand_Solve_LinearEq) { #ifdef __MPI #undef __MPI - CG_Solve_LinearEq(); - EXPECT_NEAR(x[0], 0.5, DOUBLETHRESHOLD); - EXPECT_NEAR(x[1], 1.6429086563584579739e-18, DOUBLETHRESHOLD); - EXPECT_NEAR(x[2], 1.5, DOUBLETHRESHOLD); - ASSERT_EQ(final_iter, 4); - ASSERT_EQ(cg.get_iter(), 4); + CG_Solve_LinearEq (); + EXPECT_NEAR (x[0], 0.5, DOUBLETHRESHOLD); + EXPECT_NEAR (x[1], 1.6429086563584579739e-18, DOUBLETHRESHOLD); + EXPECT_NEAR (x[2], 1.5, DOUBLETHRESHOLD); + ASSERT_EQ (final_iter, 4); + ASSERT_EQ (cg.get_iter (), 4); #define __MPI #endif } -TEST_F(CG_test, PR_Solve_LinearEq) +TEST_F (CG_test, PR_Solve_LinearEq) { #ifdef __MPI #undef __MPI - Solve(1, 0); - EXPECT_NEAR(x[0], 0.50000000000003430589, DOUBLETHRESHOLD); - EXPECT_NEAR(x[1], -3.4028335704761047964e-14, DOUBLETHRESHOLD); - EXPECT_NEAR(x[2], 1.5000000000000166533, DOUBLETHRESHOLD); - ASSERT_EQ(final_iter, 3); - ASSERT_EQ(cg.get_iter(), 3); + Solve (1, 0); + EXPECT_NEAR (x[0], 0.50000000000003430589, DOUBLETHRESHOLD); + EXPECT_NEAR (x[1], -3.4028335704761047964e-14, DOUBLETHRESHOLD); + EXPECT_NEAR (x[2], 1.5000000000000166533, DOUBLETHRESHOLD); + ASSERT_EQ (final_iter, 3); + ASSERT_EQ (cg.get_iter (), 3); #define __MPI #endif } -TEST_F(CG_test, HZ_Solve_LinearEq) +TEST_F (CG_test, HZ_Solve_LinearEq) { #ifdef __MPI #undef __MPI - Solve(2, 0); - EXPECT_NEAR(x[0], 0.49999999999999944489, DOUBLETHRESHOLD); - EXPECT_NEAR(x[1], -9.4368957093138305936e-16, DOUBLETHRESHOLD); - EXPECT_NEAR(x[2], 1.5000000000000011102, DOUBLETHRESHOLD); - ASSERT_EQ(final_iter, 3); - ASSERT_EQ(cg.get_iter(), 3); + Solve (2, 0); + EXPECT_NEAR (x[0], 0.49999999999999944489, DOUBLETHRESHOLD); + EXPECT_NEAR (x[1], -9.4368957093138305936e-16, DOUBLETHRESHOLD); + EXPECT_NEAR (x[2], 1.5000000000000011102, DOUBLETHRESHOLD); + ASSERT_EQ (final_iter, 3); + ASSERT_EQ (cg.get_iter (), 3); #define __MPI #endif } -TEST_F(CG_test, PR_Min_Func) +TEST_F (CG_test, PR_Min_Func) { #ifdef __MPI #undef __MPI - Solve(1, 1); - EXPECT_NEAR(x[0], 4.0006805979150792396, DOUBLETHRESHOLD); - EXPECT_NEAR(x[1], 2.0713759992720870429, DOUBLETHRESHOLD); - EXPECT_NEAR(x[2], 9.2871067233169171118, DOUBLETHRESHOLD); - ASSERT_EQ(final_iter, 18); - ASSERT_EQ(cg.get_iter(), 18); + Solve (1, 1); + EXPECT_NEAR (x[0], 4.0006805979150792396, DOUBLETHRESHOLD); + EXPECT_NEAR (x[1], 2.0713759992720870429, DOUBLETHRESHOLD); + EXPECT_NEAR (x[2], 9.2871067233169171118, DOUBLETHRESHOLD); + ASSERT_EQ (final_iter, 18); + ASSERT_EQ (cg.get_iter (), 18); #define __MPI #endif } -TEST_F(CG_test, HZ_Min_Func) +TEST_F (CG_test, HZ_Min_Func) { #ifdef __MPI #undef __MPI - Solve(2, 1); - EXPECT_NEAR(x[0], 4.0006825378033568086, DOUBLETHRESHOLD); - EXPECT_NEAR(x[1], 2.0691732100663737803, DOUBLETHRESHOLD); - EXPECT_NEAR(x[2], 9.2780872787668311474, DOUBLETHRESHOLD); - ASSERT_EQ(final_iter, 18); - ASSERT_EQ(cg.get_iter(), 18); + Solve (2, 1); + EXPECT_NEAR (x[0], 4.0006825378033568086, DOUBLETHRESHOLD); + EXPECT_NEAR (x[1], 2.0691732100663737803, DOUBLETHRESHOLD); + EXPECT_NEAR (x[2], 9.2780872787668311474, DOUBLETHRESHOLD); + ASSERT_EQ (final_iter, 18); + ASSERT_EQ (cg.get_iter (), 18); #define __MPI #endif } -// g++ -std=c++11 ../opt_CG.cpp ../opt_DCsrch.cpp ./CG_test.cpp ./test_tools.cpp -lgtest -lpthread -lgtest_main -o test.exe \ No newline at end of file +// g++ -std=c++11 ../opt_CG.cpp ../opt_DCsrch.cpp ./CG_test.cpp ./test_tools.cpp -lgtest -lpthread -lgtest_main -o +// test.exe \ No newline at end of file diff --git a/source/source_base/test/opt_TN_test.cpp b/source/source_base/test/opt_TN_test.cpp index 1fc5b7f2d6f..bbfc561f39e 100644 --- a/source/source_base/test/opt_TN_test.cpp +++ b/source/source_base/test/opt_TN_test.cpp @@ -7,7 +7,7 @@ class TN_test : public testing::Test { -protected: + protected: ModuleBase::Opt_TN tn; ModuleBase::Opt_DCsrch ds; TestTools tools; @@ -17,125 +17,139 @@ class TN_test : public testing::Test double tol = 1e-5; int final_iter = 0; int flag = 0; - char *task = nullptr; - double *p = nullptr; - double *x = nullptr; + char* task = nullptr; + double* p = nullptr; + double* x = nullptr; - void SetUp() + void + SetUp () { - tn.set_para(1.); - tn.allocate(tools.nx); + tn.set_para (1.); + tn.allocate (tools.nx); task = new char[60]; p = new double[tools.nx]; x = new double[tools.nx]; } - void TearDown() + void + TearDown () { delete[] task; delete[] p; delete[] x; } - void Solve(int func_label) + void + Solve (int func_label) { - tn.refresh(); - ds.set_paras(1e-4, 2e-1, 1e-12, 0.,12.); + tn.refresh (); + ds.set_paras (1e-4, 2e-1, 1e-12, 0., 12.); step = 1.; residual = 10.; final_iter = 0; for (int i = 0; i < tools.nx; ++i) - { - x[i] = 0; - p[i] = 0; - } + { + x[i] = 0; + p[i] = 0; + } double f = 0; double g = 0; - double *gradient = new double[3]; - double *temp_x = new double[3]; - ModuleBase::GlobalFunc::ZEROS(gradient, 3); - ModuleBase::GlobalFunc::ZEROS(temp_x, 3); + double* gradient = new double[3]; + double* temp_x = new double[3]; + ModuleBase::GlobalFunc::ZEROS (gradient, 3); + ModuleBase::GlobalFunc::ZEROS (temp_x, 3); for (int iter = 0; iter < maxiter; ++iter) - { - tools.dfuncdx(x, gradient, func_label); - residual = 0; - for (int i = 0; i<3 ;++i) { residual += gradient[i] * gradient[i]; -} - if (residual < tol) - { - final_iter = iter; - break; - } - if (func_label == 0) - { - tn.next_direct(x, gradient, flag, p, &(tools.le), &LinearEqu::dfuncdx); - } - else if (func_label == 1) { - tn.next_direct(x, gradient, flag, p, &(tools.mf), &ModuleESolver::ESolver_OF::dfuncdx); + tools.dfuncdx (x, gradient, func_label); + residual = 0; + for (int i = 0; i < 3; ++i) + { + residual += gradient[i] * gradient[i]; + } + if (residual < tol) + { + final_iter = iter; + break; + } + if (func_label == 0) + { + tn.next_direct (x, gradient, flag, p, &(tools.le), &LinearEqu::dfuncdx); + } + else if (func_label == 1) + { + tn.next_direct (x, gradient, flag, p, &(tools.mf), &ModuleESolver::ESolver_OF::dfuncdx); + } + for (int i = 0; i < 3; ++i) + { + temp_x[i] = x[i]; + } + task[0] = 'S'; + task[1] = 'T'; + task[2] = 'A'; + task[3] = 'R'; + task[4] = 'T'; + while (true) + { + f = tools.func (temp_x, func_label); + g = tools.dfuncdstp (temp_x, p, func_label); + ds.dcSrch (f, g, step, task); + if (task[0] == 'F' && task[1] == 'G') + { + for (int j = 0; j < 3; ++j) + { + temp_x[j] = x[j] + step * p[j]; + } + continue; + } + else if (task[0] == 'C' && task[1] == 'O') + { + break; + } + else if (task[0] == 'W' && task[1] == 'A') + { + break; + } + else if (task[0] == 'E' && task[1] == 'R') + { + break; + } + } + for (int i = 0; i < 3; ++i) + { + x[i] += step * p[i]; + } } - for (int i = 0; i < 3; ++i) { temp_x[i] = x[i]; -} - task[0] = 'S'; task[1] = 'T'; task[2] = 'A'; task[3] = 'R'; task[4] = 'T'; - while (true) - { - f = tools.func(temp_x, func_label); - g = tools.dfuncdstp(temp_x, p, func_label); - ds.dcSrch(f, g, step, task); - if (task[0] == 'F' && task[1] == 'G') - { - for (int j = 0; j < 3; ++j) { temp_x[j] = x[j] + step * p[j]; -} - continue; - } - else if (task[0] == 'C' && task[1] == 'O') - { - break; - } - else if (task[0] == 'W' && task[1] == 'A') - { - break; - } - else if (task[0] == 'E' && task[1] == 'R') - { - break; - } - } - for (int i = 0; i < 3; ++i) { x[i] += step * p[i]; -} - } delete[] temp_x; delete[] gradient; } }; - -TEST_F(TN_test, TN_Solve_LinearEq) +TEST_F (TN_test, TN_Solve_LinearEq) { #ifdef __MPI #undef __MPI - Solve(0); - EXPECT_NEAR(x[0], 0.50000000000003430589, DOUBLETHRESHOLD); - EXPECT_NEAR(x[1], -3.4028335704761047964e-14, DOUBLETHRESHOLD); - EXPECT_NEAR(x[2], 1.5000000000000166533, DOUBLETHRESHOLD); - ASSERT_EQ(final_iter, 1); - ASSERT_EQ(tn.get_iter(), 1); + Solve (0); + EXPECT_NEAR (x[0], 0.50000000000003430589, DOUBLETHRESHOLD); + EXPECT_NEAR (x[1], -3.4028335704761047964e-14, DOUBLETHRESHOLD); + EXPECT_NEAR (x[2], 1.5000000000000166533, DOUBLETHRESHOLD); + ASSERT_EQ (final_iter, 1); + ASSERT_EQ (tn.get_iter (), 1); #define __MPI #endif } -TEST_F(TN_test, TN_Min_Func) +TEST_F (TN_test, TN_Min_Func) { #ifdef __MPI #undef __MPI - Solve(1); - EXPECT_NEAR(x[0], 4.0049968540891525137, DOUBLETHRESHOLD); - EXPECT_NEAR(x[1], 2.1208751163987624722, DOUBLETHRESHOLD); - EXPECT_NEAR(x[2], 9.4951527720891863993, DOUBLETHRESHOLD); - ASSERT_EQ(final_iter, 6); - ASSERT_EQ(tn.get_iter(), 6); + Solve (1); + EXPECT_NEAR (x[0], 4.0049968540891525137, DOUBLETHRESHOLD); + EXPECT_NEAR (x[1], 2.1208751163987624722, DOUBLETHRESHOLD); + EXPECT_NEAR (x[2], 9.4951527720891863993, DOUBLETHRESHOLD); + ASSERT_EQ (final_iter, 6); + ASSERT_EQ (tn.get_iter (), 6); #define __MPI #endif } \ No newline at end of file diff --git a/source/source_base/test/opt_test_tools.cpp b/source/source_base/test/opt_test_tools.cpp index 71e136b3ef8..682b447095f 100644 --- a/source/source_base/test/opt_test_tools.cpp +++ b/source/source_base/test/opt_test_tools.cpp @@ -2,89 +2,101 @@ #undef __MPI #endif #include "./opt_test_tools.h" -#include +#include -LinearEqu::LinearEqu() +LinearEqu::LinearEqu () { // initial A, x, b, p, Ap // A = [[2,1,0], [1,2,1], [0,1,2]] A = new double*[this->nx]; - for (int i = 0; i < this->nx; ++i) - { - A[i] = new double[this->nx]; - } - A[0][0] = 2; A[0][1] = 1; A[0][2] = 0; - A[1][0] = 1; A[1][1] = 2; A[1][2] = 1; - A[2][0] = 0; A[2][1] = 1; A[2][2] = 2; + for (int i = 0; i < this->nx; ++i) + { + A[i] = new double[this->nx]; + } + A[0][0] = 2; + A[0][1] = 1; + A[0][2] = 0; + A[1][0] = 1; + A[1][1] = 2; + A[1][2] = 1; + A[2][0] = 0; + A[2][1] = 1; + A[2][2] = 2; // A[0][0] = 1; A[0][1] = 1; A[0][2] = 0; // A[1][0] = 1; A[1][1] = 1; A[1][2] = 1; // A[2][0] = 0; A[2][1] = 1; A[2][2] = 1; b = new double[this->nx]; - b[0] = 1; b[1] = 2; b[2] = this->nx; + b[0] = 1; + b[1] = 2; + b[2] = this->nx; } -LinearEqu::~LinearEqu() +LinearEqu::~LinearEqu () { delete[] b; for (int i = 0; i < this->nx; ++i) - { - delete[] A[i]; - } + { + delete[] A[i]; + } delete[] A; } -void LinearEqu::get_Ap(double **A, double *p, double *Ap, int nx, int ny) +void + LinearEqu::get_Ap (double** A, double* p, double* Ap, int nx, int ny) { for (int i = 0; i < this->nx; ++i) - { - Ap[i] = 0; - for (int j = 0; j < ny; ++j) { - Ap[i] += A[i][j] * p[j]; + Ap[i] = 0; + for (int j = 0; j < ny; ++j) + { + Ap[i] += A[i][j] * p[j]; + } } - } } // f(x) = xAx/2 - bx // A must be symmetrical positive definite matrix -double LinearEqu::func(double *x) +double + LinearEqu::func (double* x) { - double *Ax = new double[this->nx]; - this->get_Ap(A, x, Ax, this->nx, this->nx); + double* Ax = new double[this->nx]; + this->get_Ap (A, x, Ax, this->nx, this->nx); double result = 0; for (int i = 0; i < this->nx; ++i) - { - result += x[i] * Ax[i] / 2 - b[i] * x[i]; - } + { + result += x[i] * Ax[i] / 2 - b[i] * x[i]; + } delete[] Ax; return result; } // df(x)/dx = Ax - b -void LinearEqu::dfuncdx(double *x, double *gradient) +void + LinearEqu::dfuncdx (double* x, double* gradient) { - double *Ax = new double[this->nx]; - this->get_Ap(A, x, Ax, this->nx, this->nx); + double* Ax = new double[this->nx]; + this->get_Ap (A, x, Ax, this->nx, this->nx); for (int i = 0; i < this->nx; ++i) - { - gradient[i] = Ax[i] - b[i]; - } + { + gradient[i] = Ax[i] - b[i]; + } delete[] Ax; } // x = x + ap // df(x)/da = df(x)/dx * dx/da = (Ax - b)*p -double LinearEqu::dfuncdstp(double *x, double *p) +double + LinearEqu::dfuncdstp (double* x, double* p) { - double *Ax = new double[this->nx]; - get_Ap(A, x, Ax, this->nx, this->nx); + double* Ax = new double[this->nx]; + get_Ap (A, x, Ax, this->nx, this->nx); double result = 0; for (int i = 0; i < this->nx; ++i) - { - result += (Ax[i] - b[i]) * p[i]; - } + { + result += (Ax[i] - b[i]) * p[i]; + } delete[] Ax; return result; } @@ -93,12 +105,13 @@ namespace ModuleESolver { // f(x) = xAx/2 - bx // A must be symmetrical positive definite matrix -double ESolver_OF::func(double *x) +double + ESolver_OF::func (double* x) { double result = 0.; - result += pow(x[0] - x[1] - 2, 4.); - result += pow((x[0] * x[1] - x[2] + 1), 2.); - result += pow(x[0] - 4, 2.); + result += pow (x[0] - x[1] - 2, 4.); + result += pow ((x[0] * x[1] - x[2] + 1), 2.); + result += pow (x[0] - 4, 2.); // result += pow(x[0] - 2, 4.); // result += pow(x[1] - 2, 2.); // result += pow(x[2] - 4, 2.); @@ -106,10 +119,11 @@ double ESolver_OF::func(double *x) } // df(x)/dx = Ax - b -void ESolver_OF::dfuncdx(double *x, double *gradient) +void + ESolver_OF::dfuncdx (double* x, double* gradient) { - gradient[0] = 4 * pow(x[0] - x[1] - 2, 3) + 2 * (x[0] * x[1] - x[2] + 1) * x[1] + 2 * (x[0] - 4); - gradient[1] = -4 * pow(x[0] - x[1] - 2, 3) + 2 * (x[0] * x[1] - x[2] + 1) * x[0]; + gradient[0] = 4 * pow (x[0] - x[1] - 2, 3) + 2 * (x[0] * x[1] - x[2] + 1) * x[1] + 2 * (x[0] - 4); + gradient[1] = -4 * pow (x[0] - x[1] - 2, 3) + 2 * (x[0] * x[1] - x[2] + 1) * x[0]; gradient[2] = -2 * (x[0] * x[1] - x[2] + 1); // gradient[0] = 4 * pow(x[0] - 2, 3.); // gradient[1] = 2 * (x[1] - 2); @@ -118,13 +132,17 @@ void ESolver_OF::dfuncdx(double *x, double *gradient) // x = x + ap // df(x)/da = df(x)/dx * dx/da = gradient*p -double ESolver_OF::dfuncdstp(double *x, double *p) +double + ESolver_OF::dfuncdstp (double* x, double* p) { - double *grad = new double[3]; - dfuncdx(x, grad); + double* grad = new double[3]; + dfuncdx (x, grad); double result = 0; - for (int i = 0; i < 3; ++i) result += grad[i] * p[i]; + for (int i = 0; i < 3; ++i) + { + result += grad[i] * p[i]; + } delete[] grad; return result; } -} \ No newline at end of file +} // namespace ModuleESolver \ No newline at end of file diff --git a/source/source_base/test/opt_test_tools.h b/source/source_base/test/opt_test_tools.h index 178b36c7d0e..e26e6cbb218 100644 --- a/source/source_base/test/opt_test_tools.h +++ b/source/source_base/test/opt_test_tools.h @@ -1,14 +1,14 @@ class LinearEqu { -public: - LinearEqu(); - ~LinearEqu(); - void get_Ap(double **A, double *p, double *Ap, int nx = 3, int ny = 3); - double func(double *x); - void dfuncdx(double *x, double *gradient); - double dfuncdstp(double *x, double *p); - double *b; - double **A; + public: + LinearEqu (); + ~LinearEqu (); + void get_Ap (double** A, double* p, double* Ap, int nx = 3, int ny = 3); + double func (double* x); + void dfuncdx (double* x, double* gradient); + double dfuncdstp (double* x, double* p); + double* b; + double** A; int nx = 3; }; @@ -17,40 +17,58 @@ namespace ModuleESolver // A mock class of ModuleEsolver::ESolver_OF class ESolver_OF { -public: - ESolver_OF(){}; - ~ESolver_OF(){}; - double func(double *x); - void dfuncdx(double *x, double *gradient); - double dfuncdstp(double *x, double *p); - double *x; + public: + ESolver_OF () {}; + ~ESolver_OF () {}; + double func (double* x); + void dfuncdx (double* x, double* gradient); + double dfuncdstp (double* x, double* p); + double* x; }; -} +} // namespace ModuleESolver class TestTools { -public: - TestTools() - { - this->nx = le.nx; - } - double func(double *x, int func_label) + public: + TestTools () { this->nx = le.nx; } + double + func (double* x, int func_label) { double result = 0.; - if (func_label==0) result = le.func(x); - else if (func_label==1) result = mf.func(x); + if (func_label == 0) + { + result = le.func (x); + } + else if (func_label == 1) + { + result = mf.func (x); + } return result; } - void dfuncdx(double *x, double *gradient, int func_label) + void + dfuncdx (double* x, double* gradient, int func_label) { - if (func_label==0) le.dfuncdx(x, gradient); - else if (func_label==1) mf.dfuncdx(x, gradient); + if (func_label == 0) + { + le.dfuncdx (x, gradient); + } + else if (func_label == 1) + { + mf.dfuncdx (x, gradient); + } } - double dfuncdstp(double *x, double *p, int func_label) + double + dfuncdstp (double* x, double* p, int func_label) { double result = 0.; - if (func_label==0) result = le.dfuncdstp(x, p); - else if (func_label==1) result = mf.dfuncdstp(x, p); + if (func_label == 0) + { + result = le.dfuncdstp (x, p); + } + else if (func_label == 1) + { + result = mf.dfuncdstp (x, p); + } return result; } diff --git a/source/source_base/test/perf_sphbes_test.cpp b/source/source_base/test/perf_sphbes_test.cpp index 4c574baa8ec..a3a8828a1d6 100644 --- a/source/source_base/test/perf_sphbes_test.cpp +++ b/source/source_base/test/perf_sphbes_test.cpp @@ -1,72 +1,81 @@ -#include"../math_sphbes.h" -#include +#include "../math_sphbes.h" +#include #include #include #include #include /************************************************ -* performace test of class Sphbes -***********************************************/ + * performace test of class Sphbes + ***********************************************/ /** - * Tested function: + * Tested function: * - sphbesj * - Spherical_Bessel */ -class PerfSphbes : public benchmark::Fixture { -public: +class PerfSphbes : public benchmark::Fixture +{ + public: const double q = 1; const int n = 1000; double stop = 1000.0; double dr = 0.0; - double* rc, *rinf, *jc, *jinf; - void SetUp(const benchmark::State& state){ - const double rcut = state.range(0) + 0.5; - rc = new double[n + 10]; + double *rc, *rinf, *jc, *jinf; + void + SetUp (const benchmark::State& state) + { + const double rcut = state.range (0) + 0.5; + rc = new double[n + 10]; rinf = new double[n + 10]; jc = new double[n + 10]; jinf = new double[n + 10]; // generate data points in (0, rcut] in log scale double rmin = 0.0001; - double log_rmin = std::log(rmin); - double log_rcut = std::log(rcut); - dr = (log_rcut - log_rmin) / (n-1); - memset(rc, 0, (n+10) * sizeof(double)); + double log_rmin = std::log (rmin); + double log_rcut = std::log (rcut); + dr = (log_rcut - log_rmin) / (n - 1); + memset (rc, 0, (n + 10) * sizeof (double)); for (int i = 0; i < n; i++) - rc[i] = std::exp(log_rmin + i * dr); - + rc[i] = std::exp (log_rmin + i * dr); + // generate data points in [rcut, stop] in linear scale - memset(rinf, 0, (n+10) * sizeof(double)); + memset (rinf, 0, (n + 10) * sizeof (double)); rinf[0] = rcut; - dr = (stop - rcut) / (n-1); + dr = (stop - rcut) / (n - 1); for (int i = 1; i < n; i++) - rinf[i] += rinf[i-1] + dr; + rinf[i] += rinf[i - 1] + dr; } - void TearDown(const benchmark::State& state){ + void + TearDown (const benchmark::State& state) + { delete[] rc; delete[] rinf; delete[] jc; - delete[] jinf; + delete[] jinf; } -}; +}; -BENCHMARK_DEFINE_F(PerfSphbes, BM_Spherical_Bessel)(benchmark::State& state) { - for (auto _ : state) { - ModuleBase::Sphbes::Spherical_Bessel(n, rc, q, state.range(0), jc); - ModuleBase::Sphbes::Spherical_Bessel(n, rinf, q, state.range(0), jinf); - } +BENCHMARK_DEFINE_F (PerfSphbes, BM_Spherical_Bessel) (benchmark::State& state) +{ + for (auto _: state) + { + ModuleBase::Sphbes::Spherical_Bessel (n, rc, q, state.range (0), jc); + ModuleBase::Sphbes::Spherical_Bessel (n, rinf, q, state.range (0), jinf); + } } -BENCHMARK_DEFINE_F(PerfSphbes, BM_sphbesj)(benchmark::State& state) { - for (auto _ : state) { - ModuleBase::Sphbes::sphbesj(n, rc, q, state.range(0), jc); - ModuleBase::Sphbes::sphbesj(n, rinf, q, state.range(0), jinf); - } +BENCHMARK_DEFINE_F (PerfSphbes, BM_sphbesj) (benchmark::State& state) +{ + for (auto _: state) + { + ModuleBase::Sphbes::sphbesj (n, rc, q, state.range (0), jc); + ModuleBase::Sphbes::sphbesj (n, rinf, q, state.range (0), jinf); + } } -BENCHMARK_REGISTER_F(PerfSphbes, BM_sphbesj)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond); -BENCHMARK_REGISTER_F(PerfSphbes, BM_Spherical_Bessel)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond); -BENCHMARK_MAIN(); \ No newline at end of file +BENCHMARK_REGISTER_F (PerfSphbes, BM_sphbesj)->DenseRange (0, 11, 1)->Unit (benchmark::kMicrosecond); +BENCHMARK_REGISTER_F (PerfSphbes, BM_Spherical_Bessel)->DenseRange (0, 11, 1)->Unit (benchmark::kMicrosecond); +BENCHMARK_MAIN (); \ No newline at end of file diff --git a/source/source_base/test/realarray_test.cpp b/source/source_base/test/realarray_test.cpp index cf469c1d7d2..1a362627995 100644 --- a/source/source_base/test/realarray_test.cpp +++ b/source/source_base/test/realarray_test.cpp @@ -39,178 +39,177 @@ class realArrayTest : public testing::Test { -protected: - ModuleBase::realArray a3, a4, b3, b4; - double aa = 11.0; - double bb = 1.0; - int count0 = 0; - int count1 = 0; - const double zero = 0.0; + protected: + ModuleBase::realArray a3, a4, b3, b4; + double aa = 11.0; + double bb = 1.0; + int count0 = 0; + int count1 = 0; + const double zero = 0.0; }; namespace ModuleBase { -void realArrayAlloc(); +void realArrayAlloc (); } -TEST_F(realArrayTest,GetArrayCount) +TEST_F (realArrayTest, GetArrayCount) { - count0 = ModuleBase::realArray::getArrayCount(); - ModuleBase::realArray c3, c4; - count1 = ModuleBase::realArray::getArrayCount(); - EXPECT_EQ((count1-count0),2); + count0 = ModuleBase::realArray::getArrayCount (); + ModuleBase::realArray c3, c4; + count1 = ModuleBase::realArray::getArrayCount (); + EXPECT_EQ ((count1 - count0), 2); } -TEST_F(realArrayTest,Construct) +TEST_F (realArrayTest, Construct) { - ModuleBase::realArray x3(1,5,3); - ModuleBase::realArray xp3(x3); - ModuleBase::realArray x4(1,7,3,4); - ModuleBase::realArray xp4(x4); - EXPECT_EQ(x3.getSize(),15); - EXPECT_EQ(xp3.getSize(),15); - EXPECT_EQ(x4.getSize(),84); - EXPECT_EQ(xp4.getSize(),84); + ModuleBase::realArray x3 (1, 5, 3); + ModuleBase::realArray xp3 (x3); + ModuleBase::realArray x4 (1, 7, 3, 4); + ModuleBase::realArray xp4 (x4); + EXPECT_EQ (x3.getSize (), 15); + EXPECT_EQ (xp3.getSize (), 15); + EXPECT_EQ (x4.getSize (), 84); + EXPECT_EQ (xp4.getSize (), 84); } -TEST_F(realArrayTest,Create) +TEST_F (realArrayTest, Create) { - a3.create(1,2,3); - a4.create(1,2,3,4); - EXPECT_EQ(a3.getSize(),6); - EXPECT_EQ(a4.getSize(),24); + a3.create (1, 2, 3); + a4.create (1, 2, 3, 4); + EXPECT_EQ (a3.getSize (), 6); + EXPECT_EQ (a4.getSize (), 24); } -TEST_F(realArrayTest,GetSize) +TEST_F (realArrayTest, GetSize) { - ModuleBase::realArray a3(1,5,3); - //std::cout<< &a3 << &(this->a3) <a3) <(p,100); - for(int i = 0 ; i < 100; i ++) + double* p = new double[100]; + ModuleBase::zeros (p, 100); + for (int i = 0; i < 100; i++) { - EXPECT_DOUBLE_EQ(p[i],0.0); + EXPECT_DOUBLE_EQ (p[i], 0.0); } - delete [] p; - + delete[] p; } diff --git a/source/source_base/test/sph_bessel_recursive_test.cpp b/source/source_base/test/sph_bessel_recursive_test.cpp index 9355cb09031..b451434f8e3 100644 --- a/source/source_base/test/sph_bessel_recursive_test.cpp +++ b/source/source_base/test/sph_bessel_recursive_test.cpp @@ -1,54 +1,57 @@ -#include"../sph_bessel_recursive.h" -#include"gtest/gtest.h" +#include "../sph_bessel_recursive.h" +#include "gtest/gtest.h" #define threshold 1e-12 /************************************************ -* unit test of class Sph_Bessel_Recursive -***********************************************/ + * unit test of class Sph_Bessel_Recursive + ***********************************************/ /** * Note: this unit test try to ensure the invariance * of the spherical Bessel produced by class Sph_Bessel_Recursive, * and the reference results are produced by ModuleBase::Sph_Bessel_Recursive * at 2022-1-25. - * + * */ -double mean(std::vector &vect) +double + mean (std::vector& vect) { double meanv = 0.0; - int totN = vect.size(); - for (int i=0; i< totN; ++i) {meanv += vect[i]/totN;} + int totN = vect.size (); + for (int i = 0; i < totN; ++i) + { + meanv += vect[i] / totN; + } return meanv; } -TEST(SphBessel,D1) +TEST (SphBessel, D1) { int lmax = 7; int rmesh = 700; double dx = 0.01; ModuleBase::Sph_Bessel_Recursive::D1 sphbesseld1; - sphbesseld1.set_dx(dx); - sphbesseld1.cal_jlx(lmax,rmesh); - std::vector> jlx = sphbesseld1.get_jlx(); + sphbesseld1.set_dx (dx); + sphbesseld1.cal_jlx (lmax, rmesh); + std::vector> jlx = sphbesseld1.get_jlx (); - ASSERT_EQ(jlx.size(),static_cast(lmax + 1)); - EXPECT_NEAR( mean(jlx[0])/0.2084468748396, 1.0, threshold); - EXPECT_NEAR( mean(jlx[1])/0.12951635180384, 1.0, threshold); - EXPECT_NEAR( mean(jlx[2])/0.124201140093879, 1.0, threshold); - EXPECT_NEAR( mean(jlx[3])/0.118268654505568, 1.0, threshold); - EXPECT_NEAR( mean(jlx[4])/0.0933871035384385, 1.0, threshold); - EXPECT_NEAR( mean(jlx[5])/0.0603800487910689, 1.0, threshold); - EXPECT_NEAR( mean(jlx[6])/0.0327117051555907, 1.0, threshold); - EXPECT_NEAR( mean(jlx[7])/0.0152155566653926, 1.0, threshold); + ASSERT_EQ (jlx.size (), static_cast (lmax + 1)); + EXPECT_NEAR (mean (jlx[0]) / 0.2084468748396, 1.0, threshold); + EXPECT_NEAR (mean (jlx[1]) / 0.12951635180384, 1.0, threshold); + EXPECT_NEAR (mean (jlx[2]) / 0.124201140093879, 1.0, threshold); + EXPECT_NEAR (mean (jlx[3]) / 0.118268654505568, 1.0, threshold); + EXPECT_NEAR (mean (jlx[4]) / 0.0933871035384385, 1.0, threshold); + EXPECT_NEAR (mean (jlx[5]) / 0.0603800487910689, 1.0, threshold); + EXPECT_NEAR (mean (jlx[6]) / 0.0327117051555907, 1.0, threshold); + EXPECT_NEAR (mean (jlx[7]) / 0.0152155566653926, 1.0, threshold); } - -TEST(SphBessel,D2) +TEST (SphBessel, D2) { int lmax = 7; int rmesh = 700; @@ -56,29 +59,29 @@ TEST(SphBessel,D2) double dx = 0.0001; ModuleBase::Sph_Bessel_Recursive::D2 sphbesseld2; - sphbesseld2.set_dx(dx); - sphbesseld2.cal_jlx(lmax,rmesh,kmesh); - std::vector>> jlxd2 = sphbesseld2.get_jlx(); - std::vector> jlx(lmax+1); + sphbesseld2.set_dx (dx); + sphbesseld2.cal_jlx (lmax, rmesh, kmesh); + std::vector>> jlxd2 = sphbesseld2.get_jlx (); + std::vector> jlx (lmax + 1); - ASSERT_EQ(jlxd2.size(),static_cast(lmax + 1)); + ASSERT_EQ (jlxd2.size (), static_cast (lmax + 1)); - //calculate the mean of jlxd2[i][j] and assign to jlx[i][j] - for(int i=0; i diff) { - diff = tmp; + tmp = std::abs (arr1[i] - arr2[i]); + if (tmp > diff) + { + diff = tmp; + } } - } return diff; } -TEST_F(SphericalBesselTransformTest, RadrfftBasic) +TEST_F (SphericalBesselTransformTest, RadrfftBasic) { /* * Computes the zeroth, first and second order spherical Bessel @@ -100,50 +103,50 @@ TEST_F(SphericalBesselTransformTest, RadrfftBasic) * second: 2*sqrt(2/pi) * 4k^2 / (k^2+1)^3. * */ const int sz = 10000; - assert(sz <= sz_max); + assert (sz <= sz_max); const double dr = 0.01; const double rcut = dr * (sz - 1); const double dk = PI / rcut; - const double pref = std::sqrt(2. / PI) * 2.; + const double pref = std::sqrt (2. / PI) * 2.; SphericalBesselTransformer sbt; for (int i = 0; i != sz; ++i) - { - double r = i * dr; - f[i] = r * std::exp(-r); - } + { + double r = i * dr; + f[i] = r * std::exp (-r); + } // zeroth-order transform for (int i = 0; i != sz; ++i) - { - double k = dk * i; - g_ref[i] = pref * (3.0 - k * k) / std::pow(k * k + 1, 3); - } - sbt.radrfft(0, sz, rcut, f, g, 0); - EXPECT_LT(max_diff(sz, g_ref, g), tol); + { + double k = dk * i; + g_ref[i] = pref * (3.0 - k * k) / std::pow (k * k + 1, 3); + } + sbt.radrfft (0, sz, rcut, f, g, 0); + EXPECT_LT (max_diff (sz, g_ref, g), tol); // first-order transform for (int i = 0; i != sz; ++i) - { - double k = dk * i; - g_ref[i] = pref * 4.0 * k / std::pow(k * k + 1, 3); - } - sbt.radrfft(1, sz, rcut, f, g, 0); - EXPECT_LT(max_diff(sz, g_ref, g), tol); + { + double k = dk * i; + g_ref[i] = pref * 4.0 * k / std::pow (k * k + 1, 3); + } + sbt.radrfft (1, sz, rcut, f, g, 0); + EXPECT_LT (max_diff (sz, g_ref, g), tol); // second-order transform for (int i = 0; i != sz; ++i) - { - double k = dk * i; - g_ref[i] = pref * 4.0 * k * k / std::pow(k * k + 1, 3); - } - sbt.radrfft(2, sz, rcut, f, g, 0); - EXPECT_LT(max_diff(sz, g_ref, g), tol); + { + double k = dk * i; + g_ref[i] = pref * 4.0 * k * k / std::pow (k * k + 1, 3); + } + sbt.radrfft (2, sz, rcut, f, g, 0); + EXPECT_LT (max_diff (sz, g_ref, g), tol); } -TEST_F(SphericalBesselTransformTest, RadrfftImplicitExponent) +TEST_F (SphericalBesselTransformTest, RadrfftImplicitExponent) { /* * Computes the second order spherical Bessel transform of @@ -154,34 +157,34 @@ TEST_F(SphericalBesselTransformTest, RadrfftImplicitExponent) * 48*sqrt(2/pi) * k^2 / (k^2+1)^4. * */ const int sz = 5000; - assert(sz <= sz_max); + assert (sz <= sz_max); const double dr = 0.02; const double rcut = dr * (sz - 1); const double dk = PI / rcut; - const double pref = std::sqrt(2. / PI) * 48.; + const double pref = std::sqrt (2. / PI) * 48.; - SphericalBesselTransformer sbt(true); + SphericalBesselTransformer sbt (true); for (int i = 0; i != sz; ++i) - { - double k = dk * i; - g_ref[i] = pref * k * k / std::pow(k * k + 1, 4); - } + { + double k = dk * i; + g_ref[i] = pref * k * k / std::pow (k * k + 1, 4); + } for (int p = -2; p <= 2; ++p) - { - for (int i = 0; i != sz; ++i) { - double r = i * dr; - f[i] = std::pow(r, 2 + p) * std::exp(-r); + for (int i = 0; i != sz; ++i) + { + double r = i * dr; + f[i] = std::pow (r, 2 + p) * std::exp (-r); + } + sbt.radrfft (2, sz, rcut, f, g, p); + EXPECT_LT (max_diff (sz, g_ref, g), tol); } - sbt.radrfft(2, sz, rcut, f, g, p); - EXPECT_LT(max_diff(sz, g_ref, g), tol); - } } -TEST_F(SphericalBesselTransformTest, RadrfftVariableSize) +TEST_F (SphericalBesselTransformTest, RadrfftVariableSize) { /* * Computes the second order spherical Bessel transform of @@ -191,32 +194,32 @@ TEST_F(SphericalBesselTransformTest, RadrfftVariableSize) * 48*sqrt(2/pi) * k^2 / (k^2+1)^4. * */ const double dr = 0.02; - const double pref = std::sqrt(2. / PI) * 48.; + const double pref = std::sqrt (2. / PI) * 48.; SphericalBesselTransformer sbt; for (int sz = 5000; sz <= sz_max; sz += 1000) - { - - for (int i = 0; i != sz; ++i) { - double r = i * dr; - f[i] = r * r * std::exp(-r); - } - const double rcut = dr * (sz - 1); - const double dk = PI / rcut; - for (int i = 0; i != sz; ++i) - { - double k = dk * i; - g_ref[i] = pref * k * k / std::pow(k * k + 1, 4); + for (int i = 0; i != sz; ++i) + { + double r = i * dr; + f[i] = r * r * std::exp (-r); + } + + const double rcut = dr * (sz - 1); + const double dk = PI / rcut; + for (int i = 0; i != sz; ++i) + { + double k = dk * i; + g_ref[i] = pref * k * k / std::pow (k * k + 1, 4); + } + sbt.radrfft (2, sz, rcut, f, g, 0); + EXPECT_LT (max_diff (sz, g_ref, g), tol); } - sbt.radrfft(2, sz, rcut, f, g, 0); - EXPECT_LT(max_diff(sz, g_ref, g), tol); - } } -TEST_F(SphericalBesselTransformTest, RadrfftInPlace) +TEST_F (SphericalBesselTransformTest, RadrfftInPlace) { /* * Performs an in-place second order spherical Bessel transform @@ -226,29 +229,29 @@ TEST_F(SphericalBesselTransformTest, RadrfftInPlace) * sqrt(2)/16 * k^2 * exp(-k^2/4) * */ const double dr = 0.02; - const double pref = std::sqrt(2.) / 16.; + const double pref = std::sqrt (2.) / 16.; SphericalBesselTransformer sbt; const double sz = 10000; const double rcut = dr * (sz - 1); for (int i = 0; i != sz; ++i) - { - double r = i * dr; - f[i] = r * r * std::exp(-r * r); - } + { + double r = i * dr; + f[i] = r * r * std::exp (-r * r); + } double dk = PI / rcut; for (int i = 0; i != sz; ++i) - { - double k = dk * i; - g_ref[i] = pref * k * k * std::exp(-k * k / 4); - } - sbt.radrfft(2, sz, rcut, f, f, 0); - EXPECT_LT(max_diff(sz, g_ref, f), tol); + { + double k = dk * i; + g_ref[i] = pref * k * k * std::exp (-k * k / 4); + } + sbt.radrfft (2, sz, rcut, f, f, 0); + EXPECT_LT (max_diff (sz, g_ref, f), tol); } -TEST_F(SphericalBesselTransformTest, DirectBasic) +TEST_F (SphericalBesselTransformTest, DirectBasic) { /* * Computes the zeroth, first and second order spherical Bessel @@ -261,51 +264,63 @@ TEST_F(SphericalBesselTransformTest, DirectBasic) * */ const int sz_in = 7000; const int sz_out = 5000; - assert(sz_in <= sz_max && sz_out <= sz_max); + assert (sz_in <= sz_max && sz_out <= sz_max); const double dr = 0.007; const double dk = 0.013; - std::for_each(grid_in, grid_in + sz_in, [&](double& x) { x = (&x - grid_in) * dr; }); - std::for_each(grid_out, grid_out + sz_out, [&](double& x) { x = (&x - grid_out) * dk; }); + std::for_each (grid_in, grid_in + sz_in, [&] (double& x) { x = (&x - grid_in) * dr; }); + std::for_each (grid_out, grid_out + sz_out, [&] (double& x) { x = (&x - grid_out) * dk; }); - const double pref = std::sqrt(2. / PI) * 2.; + const double pref = std::sqrt (2. / PI) * 2.; - std::for_each(f, f + sz_in, [&](double& x) { - double r = (&x - f) * dr; - x = r * std::exp(-r); - }); + std::for_each (f, + f + sz_in, + [&] (double& x) + { + double r = (&x - f) * dr; + x = r * std::exp (-r); + }); // zeroth-order transform - std::for_each(g_ref, g_ref + sz_out, [&](double& y) { - double k = (&y - g_ref) * dk; - y = pref * (3.0 - k * k) / std::pow(k * k + 1, 3); - }); + std::for_each (g_ref, + g_ref + sz_out, + [&] (double& y) + { + double k = (&y - g_ref) * dk; + y = pref * (3.0 - k * k) / std::pow (k * k + 1, 3); + }); SphericalBesselTransformer sbt; - sbt.direct(0, sz_in, grid_in, f, sz_out, grid_out, g); - EXPECT_LT(max_diff(sz_out, g_ref, g), tol); + sbt.direct (0, sz_in, grid_in, f, sz_out, grid_out, g); + EXPECT_LT (max_diff (sz_out, g_ref, g), tol); // first-order transform - std::for_each(g_ref, g_ref + sz_out, [&](double& y) { - double k = (&y - g_ref) * dk; - y = pref * 4.0 * k / std::pow(k * k + 1, 3); - }); + std::for_each (g_ref, + g_ref + sz_out, + [&] (double& y) + { + double k = (&y - g_ref) * dk; + y = pref * 4.0 * k / std::pow (k * k + 1, 3); + }); - sbt.direct(1, sz_in, grid_in, f, sz_out, grid_out, g); - EXPECT_LT(max_diff(sz_out, g_ref, g), tol); + sbt.direct (1, sz_in, grid_in, f, sz_out, grid_out, g); + EXPECT_LT (max_diff (sz_out, g_ref, g), tol); // second-order transform - std::for_each(g_ref, g_ref + sz_out, [&](double& y) { - double k = (&y - g_ref) * dk; - y = pref * 4.0 * k * k / std::pow(k * k + 1, 3); - }); - - sbt.direct(2, sz_in, grid_in, f, sz_out, grid_out, g); - EXPECT_LT(max_diff(sz_out, g_ref, g), tol); + std::for_each (g_ref, + g_ref + sz_out, + [&] (double& y) + { + double k = (&y - g_ref) * dk; + y = pref * 4.0 * k * k / std::pow (k * k + 1, 3); + }); + + sbt.direct (2, sz_in, grid_in, f, sz_out, grid_out, g); + EXPECT_LT (max_diff (sz_out, g_ref, g), tol); } -TEST_F(SphericalBesselTransformTest, DirectImplicitExponent) +TEST_F (SphericalBesselTransformTest, DirectImplicitExponent) { /* * Computes the second order spherical Bessel transform of @@ -317,34 +332,40 @@ TEST_F(SphericalBesselTransformTest, DirectImplicitExponent) * */ const int sz_in = 7000; const int sz_out = 6000; - assert(sz_in <= sz_max && sz_out <= sz_max); + assert (sz_in <= sz_max && sz_out <= sz_max); const double dr = 0.007; const double dk = 0.011; - std::for_each(grid_in, grid_in + sz_in, [&](double& x) { x = (&x - grid_in) * dr; }); - std::for_each(grid_out, grid_out + sz_out, [&](double& x) { x = (&x - grid_out) * dk; }); + std::for_each (grid_in, grid_in + sz_in, [&] (double& x) { x = (&x - grid_in) * dr; }); + std::for_each (grid_out, grid_out + sz_out, [&] (double& x) { x = (&x - grid_out) * dk; }); - const double pref = std::sqrt(2. / PI) * 48.; - std::for_each(g_ref, g_ref + sz_out, [&](double& y) { - double k = (&y - g_ref) * dk; - y = pref * k * k / std::pow(k * k + 1, 4); - }); + const double pref = std::sqrt (2. / PI) * 48.; + std::for_each (g_ref, + g_ref + sz_out, + [&] (double& y) + { + double k = (&y - g_ref) * dk; + y = pref * k * k / std::pow (k * k + 1, 4); + }); - SphericalBesselTransformer sbt(true); + SphericalBesselTransformer sbt (true); for (int p = -2; p <= 2; ++p) - { - std::for_each(f, f + sz_in, [&](double& x) { - double r = (&x - f) * dr; - x = std::pow(r, 2 + p) * std::exp(-r); - }); - - sbt.direct(2, sz_in, grid_in, f, sz_out, grid_out, g, p); - EXPECT_LT(max_diff(sz_out, g_ref, g), tol); - } + { + std::for_each (f, + f + sz_in, + [&] (double& x) + { + double r = (&x - f) * dr; + x = std::pow (r, 2 + p) * std::exp (-r); + }); + + sbt.direct (2, sz_in, grid_in, f, sz_out, grid_out, g, p); + EXPECT_LT (max_diff (sz_out, g_ref, g), tol); + } } -TEST_F(SphericalBesselTransformTest, DirectInPlace) +TEST_F (SphericalBesselTransformTest, DirectInPlace) { /* * Performs an in-place second order spherical Bessel transform @@ -355,31 +376,37 @@ TEST_F(SphericalBesselTransformTest, DirectInPlace) * */ const int sz_in = 7000; const int sz_out = 7000; - assert(sz_in <= sz_max && sz_out == sz_in); + assert (sz_in <= sz_max && sz_out == sz_in); const double dr = 0.011; const double dk = 0.007; - std::for_each(grid_in, grid_in + sz_in, [&](double& x) { x = (&x - grid_in) * dr; }); - std::for_each(grid_out, grid_out + sz_out, [&](double& x) { x = (&x - grid_out) * dk; }); - - std::for_each(f, f + sz_in, [&](double& x) { - double r = (&x - f) * dr; - x = r * r * std::exp(-r * r); - }); - - const double pref = std::sqrt(2.) / 16.; - std::for_each(g_ref, g_ref + sz_out, [&](double& y) { - double k = (&y - g_ref) * dk; - y = pref * k * k * std::exp(-k * k / 4); - }); + std::for_each (grid_in, grid_in + sz_in, [&] (double& x) { x = (&x - grid_in) * dr; }); + std::for_each (grid_out, grid_out + sz_out, [&] (double& x) { x = (&x - grid_out) * dk; }); + + std::for_each (f, + f + sz_in, + [&] (double& x) + { + double r = (&x - f) * dr; + x = r * r * std::exp (-r * r); + }); + + const double pref = std::sqrt (2.) / 16.; + std::for_each (g_ref, + g_ref + sz_out, + [&] (double& y) + { + double k = (&y - g_ref) * dk; + y = pref * k * k * std::exp (-k * k / 4); + }); SphericalBesselTransformer sbt; - sbt.direct(2, sz_in, grid_in, f, sz_out, grid_out, f); - EXPECT_LT(max_diff(sz_out, g_ref, f), tol); + sbt.direct (2, sz_in, grid_in, f, sz_out, grid_out, f); + EXPECT_LT (max_diff (sz_out, g_ref, f), tol); } -TEST_F(SphericalBesselTransformTest, HighOrder) +TEST_F (SphericalBesselTransformTest, HighOrder) { /* * Computes the l-order spherical Bessel transforms of @@ -388,96 +415,96 @@ TEST_F(SphericalBesselTransformTest, HighOrder) * */ const int l = 6; const int sz = 5000; - assert(sz <= sz_max); + assert (sz <= sz_max); const double dr = 0.01; const double rcut = dr * (sz - 1); const double dk = PI / rcut; - std::for_each(grid_in, grid_in + sz, [&](double& x) { x = (&x - grid_in) * dr; }); - std::for_each(grid_out, grid_out + sz, [&](double& x) { x = (&x - grid_out) * dk; }); + std::for_each (grid_in, grid_in + sz, [&] (double& x) { x = (&x - grid_in) * dr; }); + std::for_each (grid_out, grid_out + sz, [&] (double& x) { x = (&x - grid_out) * dk; }); for (int i = 0; i != sz; ++i) - { - double r = i * dr; - f[i] = std::pow(r, l) * std::exp(-r * r); - } + { + double r = i * dr; + f[i] = std::pow (r, l) * std::exp (-r * r); + } SphericalBesselTransformer sbt; - sbt.radrfft(l, sz, rcut, f, g); + sbt.radrfft (l, sz, rcut, f, g); - sbt.direct(l, sz, grid_in, f, sz, grid_out, g_ref); + sbt.direct (l, sz, grid_in, f, sz, grid_out, g_ref); // NOTE: Simpson's integration gets increasingly inaccurate as k gets large // since the factor of (k*dr)^4 in its error becomes significant when k*dr // is of order 1. So we only compare the results for relatively small k. - EXPECT_LT(max_diff(sz / 2, g_ref, g), tol); + EXPECT_LT (max_diff (sz / 2, g_ref, g), tol); } -TEST_F(SphericalBesselTransformTest, HeapUsage) +TEST_F (SphericalBesselTransformTest, HeapUsage) { /* * Tests the setter and getter of the planner flag for FFTW plan * creation. * */ const int sz = 2000; - assert(sz <= sz_max); + assert (sz <= sz_max); const double dr = 0.01; const double rcut = dr * (sz - 1); - SphericalBesselTransformer sbt_no_cache(false); - SphericalBesselTransformer sbt_cached(true); + SphericalBesselTransformer sbt_no_cache (false); + SphericalBesselTransformer sbt_cached (true); - EXPECT_EQ(sbt_no_cache.heap_usage(), 0); - EXPECT_EQ(sbt_cached.heap_usage(), 0); + EXPECT_EQ (sbt_no_cache.heap_usage (), 0); + EXPECT_EQ (sbt_cached.heap_usage (), 0); for (int i = 0; i != sz; ++i) - { - f[i] = std::exp(-i * dr); - } + { + f[i] = std::exp (-i * dr); + } - sbt_no_cache.radrfft(0, sz, rcut, f, g, 0); - sbt_cached.radrfft(0, sz, rcut, f, g, 0); + sbt_no_cache.radrfft (0, sz, rcut, f, g, 0); + sbt_cached.radrfft (0, sz, rcut, f, g, 0); // zeroth-order transform does not involve tabulating jl // but FFT needs to allocate memory - EXPECT_EQ(sbt_no_cache.heap_usage(), 0); - EXPECT_EQ(sbt_cached.heap_usage(), 2*(sz-1)*sizeof(double)); + EXPECT_EQ (sbt_no_cache.heap_usage (), 0); + EXPECT_EQ (sbt_cached.heap_usage (), 2 * (sz - 1) * sizeof (double)); // higher-order transforms involve tabulating jl - sbt_no_cache.radrfft(5, sz, rcut, f, g, 0); - sbt_cached.radrfft(5, sz, rcut, f, g, 0); + sbt_no_cache.radrfft (5, sz, rcut, f, g, 0); + sbt_cached.radrfft (5, sz, rcut, f, g, 0); - EXPECT_EQ(sbt_no_cache.heap_usage(), 0); - EXPECT_GT(sbt_cached.heap_usage(), 2*(sz-1)*sizeof(double)); + EXPECT_EQ (sbt_no_cache.heap_usage (), 0); + EXPECT_GT (sbt_cached.heap_usage (), 2 * (sz - 1) * sizeof (double)); - sbt_no_cache.clear(); - sbt_cached.clear(); + sbt_no_cache.clear (); + sbt_cached.clear (); - EXPECT_EQ(sbt_no_cache.heap_usage(), 0); - EXPECT_EQ(sbt_cached.heap_usage(), 0); + EXPECT_EQ (sbt_no_cache.heap_usage (), 0); + EXPECT_EQ (sbt_cached.heap_usage (), 0); } - -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI int nprocs, id; - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &id); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank (MPI_COMM_WORLD, &id); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif - fftw_cleanup(); + fftw_cleanup (); return result; } diff --git a/source/source_base/test/test_lebedev_laikov.cpp b/source/source_base/test/test_lebedev_laikov.cpp index c9e138cb216..4741ddab777 100644 --- a/source/source_base/test/test_lebedev_laikov.cpp +++ b/source/source_base/test/test_lebedev_laikov.cpp @@ -10,43 +10,52 @@ using ModuleBase::Lebedev_laikov_grid; // mock the function to prevent unnecessary dependency -namespace ModuleBase { -void WARNING_QUIT(const std::string&, const std::string&) {} +namespace ModuleBase +{ +void + WARNING_QUIT (const std::string&, const std::string&) +{ } +} // namespace ModuleBase -class LebedevLaikovTest: public ::testing::Test { -protected: - void randgen(int lmax, std::vector& coef); +class LebedevLaikovTest : public ::testing::Test +{ + protected: + void randgen (int lmax, std::vector& coef); const double tol = 1e-12; }; - -void LebedevLaikovTest::randgen(int lmax, std::vector& coef) { - coef.resize((lmax + 1) * (lmax + 1)); +void + LebedevLaikovTest::randgen (int lmax, std::vector& coef) +{ + coef.resize ((lmax + 1) * (lmax + 1)); // fill coef with uniformly distributed random numbers std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(0.0, 1.0); - for (size_t i = 0; i < coef.size(); ++i) { - coef[i] = dis(gen); - } + std::mt19937 gen (rd ()); + std::uniform_real_distribution dis (0.0, 1.0); + for (size_t i = 0; i < coef.size (); ++i) + { + coef[i] = dis (gen); + } // normalize the coefficients double fac = 0.0; - for (size_t i = 0; i < coef.size(); ++i) { - fac += coef[i] * coef[i]; - } - - fac = 1.0 / std::sqrt(fac); - for (size_t i = 0; i < coef.size(); ++i) { - coef[i] *= fac; - } -} + for (size_t i = 0; i < coef.size (); ++i) + { + fac += coef[i] * coef[i]; + } + fac = 1.0 / std::sqrt (fac); + for (size_t i = 0; i < coef.size (); ++i) + { + coef[i] *= fac; + } +} -TEST_F(LebedevLaikovTest, Accuracy) { - /* +TEST_F (LebedevLaikovTest, Accuracy) +{ + /* * Given * * f = c[0]*Y00 + c[1]*Y10 + c[2]*Y11 + ..., @@ -63,89 +72,64 @@ TEST_F(LebedevLaikovTest, Accuracy) { // (ngrid, lmax) std::set> supported = { - {6, 3}, - {14, 5}, - {26, 7}, - {38, 9}, - {50, 11}, - {74, 13}, - {86, 15}, - {110, 17}, - {146, 19}, - {170, 21}, - {194, 23}, - {230, 25}, - {266, 27}, - {302, 29}, - {350, 31}, - {434, 35}, - {590, 41}, - {770, 47}, - {974, 53}, - {1202, 59}, - {1454, 65}, - {1730, 71}, - {2030, 77}, - {2354, 83}, - {2702, 89}, - {3074, 95}, - {3470, 101}, - {3890, 107}, - {4334, 113}, - {4802, 119}, - {5294, 125}, - {5810, 131}, + {6, 3}, {14, 5}, {26, 7}, {38, 9}, {50, 11}, {74, 13}, {86, 15}, {110, 17}, + {146, 19}, {170, 21}, {194, 23}, {230, 25}, {266, 27}, {302, 29}, {350, 31}, {434, 35}, + {590, 41}, {770, 47}, {974, 53}, {1202, 59}, {1454, 65}, {1730, 71}, {2030, 77}, {2354, 83}, + {2702, 89}, {3074, 95}, {3470, 101}, {3890, 107}, {4334, 113}, {4802, 119}, {5294, 125}, {5810, 131}, }; std::vector coef; - for (auto& grid_info: supported) { - int ngrid = grid_info.first; - int grid_lmax = grid_info.second; - - Lebedev_laikov_grid lebgrid(ngrid); - lebgrid.generate_grid_points(); - - const double* weight = lebgrid.get_weight(); - const ModuleBase::Vector3* grid = lebgrid.get_grid_coor(); - - int func_lmax = grid_lmax / 2; - randgen(func_lmax, coef); - - double val = 0.0; - std::vector ylm_real; - for (int i = 0; i < ngrid; i++) { - ModuleBase::Ylm::sph_harm(func_lmax, - grid[i].x, grid[i].y, grid[i].z, ylm_real); - double tmp = 0.0; - for (size_t j = 0; j < coef.size(); ++j) { - tmp += coef[j] * ylm_real[j]; - } - val += weight[i] * tmp * tmp; + for (auto& grid_info: supported) + { + int ngrid = grid_info.first; + int grid_lmax = grid_info.second; + + Lebedev_laikov_grid lebgrid (ngrid); + lebgrid.generate_grid_points (); + + const double* weight = lebgrid.get_weight (); + const ModuleBase::Vector3* grid = lebgrid.get_grid_coor (); + + int func_lmax = grid_lmax / 2; + randgen (func_lmax, coef); + + double val = 0.0; + std::vector ylm_real; + for (int i = 0; i < ngrid; i++) + { + ModuleBase::Ylm::sph_harm (func_lmax, grid[i].x, grid[i].y, grid[i].z, ylm_real); + double tmp = 0.0; + for (size_t j = 0; j < coef.size (); ++j) + { + tmp += coef[j] * ylm_real[j]; + } + val += weight[i] * tmp * tmp; + } + + double val_ref = 0.0; + for (size_t i = 0; i < coef.size (); ++i) + { + val_ref += coef[i] * coef[i]; + } + + double abs_diff = std::abs (val - val_ref); + EXPECT_LT (abs_diff, tol); } - - double val_ref = 0.0; - for (size_t i = 0; i < coef.size(); ++i) { - val_ref += coef[i] * coef[i]; - } - - double abs_diff = std::abs(val - val_ref); - EXPECT_LT(abs_diff, tol); - } } - -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_base/test/timer_test.cpp b/source/source_base/test/timer_test.cpp index f7a341096da..2eb6821e485 100644 --- a/source/source_base/test/timer_test.cpp +++ b/source/source_base/test/timer_test.cpp @@ -21,10 +21,10 @@ * - Start * - start total time calculation * - PrintAll - * - print computational processes with time > 0.1 s + * - print computational processes with time > 0.1 s * - Finish - * - finish total time calculation - * - print computational processes with time > 0.1 s + * - finish total time calculation + * - print computational processes with time > 0.1 s * - PrintUntilNow * - stop total time calculation * - print total time until now @@ -33,177 +33,175 @@ class TimerTest : public testing::Test { -protected: - // - // for capturing stdout - std::string output; - // for output in file - std::ofstream ofs; - std::ifstream ifs; - int T_Elapse = 100; // microseconds = 0.1 milliseconds - void TearDown() - { - remove("tmp"); - } + protected: + // + // for capturing stdout + std::string output; + // for output in file + std::ofstream ofs; + std::ifstream ifs; + int T_Elapse = 100; // microseconds = 0.1 milliseconds + void + TearDown () + { + remove ("tmp"); + } }; - -TEST_F(TimerTest, Tick) +TEST_F (TimerTest, Tick) { - ModuleBase::timer::start("wavefunc","evc"); - // after 1st call of tick, start_flag becomes false - EXPECT_FALSE(ModuleBase::timer::timer_pool["wavefunc"]["evc"].start_flag); - std::this_thread::sleep_for(std::chrono::microseconds(T_Elapse)); // 0.1 ms - // then we can have time elapsed in cpu_second - ModuleBase::timer::end("wavefunc","evc"); - EXPECT_GT(ModuleBase::timer::timer_pool["wavefunc"]["evc"].cpu_second,0.0001); + ModuleBase::timer::start ("wavefunc", "evc"); + // after 1st call of tick, start_flag becomes false + EXPECT_FALSE (ModuleBase::timer::timer_pool["wavefunc"]["evc"].start_flag); + std::this_thread::sleep_for (std::chrono::microseconds (T_Elapse)); // 0.1 ms + // then we can have time elapsed in cpu_second + ModuleBase::timer::end ("wavefunc", "evc"); + EXPECT_GT (ModuleBase::timer::timer_pool["wavefunc"]["evc"].cpu_second, 0.0001); } - -TEST_F(TimerTest, Start) +TEST_F (TimerTest, Start) { - ModuleBase::timer::start(); - // start() called tick() once - EXPECT_FALSE(ModuleBase::timer::timer_pool[""]["total"].start_flag); - ModuleBase::timer::end("","total"); + ModuleBase::timer::start (); + // start() called tick() once + EXPECT_FALSE (ModuleBase::timer::timer_pool[""]["total"].start_flag); + ModuleBase::timer::end ("", "total"); } - -TEST_F(TimerTest, write_to_json) +TEST_F (TimerTest, write_to_json) { - ModuleBase::timer::start("wavefunc","evc"); - std::this_thread::sleep_for(std::chrono::microseconds(T_Elapse)); // 0.1 ms - ModuleBase::timer::end("wavefunc","evc"); - ModuleBase::timer::write_to_json("tmp.json"); - - // check if tmp.json exists - ifs.open("tmp.json"); - EXPECT_TRUE(ifs.good()); - - // read all lines and remove all spaces and tabs and newlines - std::string line; - std::string tmp; - std::string content; - while(getline(ifs,line)) - { - tmp = line; - tmp.erase(std::remove(tmp.begin(),tmp.end(),' '),tmp.end()); - tmp.erase(std::remove(tmp.begin(),tmp.end(),'\t'),tmp.end()); - tmp.erase(std::remove(tmp.begin(),tmp.end(),'\n'),tmp.end()); - content += tmp; - } - - EXPECT_THAT(content,testing::HasSubstr("\"total\":")); - EXPECT_THAT(content,testing::HasSubstr("\"sub\":[{\"class_name\":\"wavefunc\",\"sub\":[{\"name\":\"evc\",\"cpu_second\":")); - EXPECT_THAT(content,testing::HasSubstr("\"calls\":2,\"cpu_second_per_call\":")); - EXPECT_THAT(content,testing::HasSubstr("\"cpu_second_per_total\":")); - EXPECT_THAT(content,testing::HasSubstr("}]}]}")); - ifs.close(); - remove("tmp.json"); + ModuleBase::timer::start ("wavefunc", "evc"); + std::this_thread::sleep_for (std::chrono::microseconds (T_Elapse)); // 0.1 ms + ModuleBase::timer::end ("wavefunc", "evc"); + ModuleBase::timer::write_to_json ("tmp.json"); + + // check if tmp.json exists + ifs.open ("tmp.json"); + EXPECT_TRUE (ifs.good ()); + + // read all lines and remove all spaces and tabs and newlines + std::string line; + std::string tmp; + std::string content; + while (getline (ifs, line)) + { + tmp = line; + tmp.erase (std::remove (tmp.begin (), tmp.end (), ' '), tmp.end ()); + tmp.erase (std::remove (tmp.begin (), tmp.end (), '\t'), tmp.end ()); + tmp.erase (std::remove (tmp.begin (), tmp.end (), '\n'), tmp.end ()); + content += tmp; + } + + EXPECT_THAT (content, testing::HasSubstr ("\"total\":")); + EXPECT_THAT ( + content, + testing::HasSubstr ("\"sub\":[{\"class_name\":\"wavefunc\",\"sub\":[{\"name\":\"evc\",\"cpu_second\":")); + EXPECT_THAT (content, testing::HasSubstr ("\"calls\":2,\"cpu_second_per_call\":")); + EXPECT_THAT (content, testing::HasSubstr ("\"cpu_second_per_total\":")); + EXPECT_THAT (content, testing::HasSubstr ("}]}]}")); + ifs.close (); + remove ("tmp.json"); } -TEST_F(TimerTest, PrintAll) +TEST_F (TimerTest, PrintAll) { - ModuleBase::timer::start("wavefunc","evc"); - std::this_thread::sleep_for(std::chrono::microseconds(T_Elapse)); // 0.1 ms - ModuleBase::timer::end("wavefunc","evc"); - - // call print_all - ofs.open("tmp"); - testing::internal::CaptureStdout(); - ModuleBase::timer::print_all(ofs, true); - output = testing::internal::GetCapturedStdout(); - ofs.close(); - - // checout output on screen - // std::cout << "Get captured stdout: \n" << std::endl; - // std::cout << output << std::endl; - EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS")); - EXPECT_THAT(output,testing::HasSubstr("CLASS_NAME")); - EXPECT_THAT(output,testing::HasSubstr("NAME")); - EXPECT_THAT(output,testing::HasSubstr("TIME/s")); - EXPECT_THAT(output,testing::HasSubstr("CALLS")); - EXPECT_THAT(output,testing::HasSubstr("AVG/s")); - EXPECT_THAT(output,testing::HasSubstr("PER/%")); - - // check output in file - ifs.open("tmp"); - // std::cout << "Capture contents line by line from output file: \n" << std::endl; - getline(ifs,output); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS")); - getline(ifs,output); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("CLASS_NAME")); - EXPECT_THAT(output,testing::HasSubstr("NAME")); - EXPECT_THAT(output,testing::HasSubstr("TIME/s")); - EXPECT_THAT(output,testing::HasSubstr("CALLS")); - EXPECT_THAT(output,testing::HasSubstr("AVG/s")); - EXPECT_THAT(output,testing::HasSubstr("PER/%")); - ifs.close(); - remove("time.json"); + ModuleBase::timer::start ("wavefunc", "evc"); + std::this_thread::sleep_for (std::chrono::microseconds (T_Elapse)); // 0.1 ms + ModuleBase::timer::end ("wavefunc", "evc"); + + // call print_all + ofs.open ("tmp"); + testing::internal::CaptureStdout (); + ModuleBase::timer::print_all (ofs, true); + output = testing::internal::GetCapturedStdout (); + ofs.close (); + + // checout output on screen + // std::cout << "Get captured stdout: \n" << std::endl; + // std::cout << output << std::endl; + EXPECT_THAT (output, testing::HasSubstr ("TIME STATISTICS")); + EXPECT_THAT (output, testing::HasSubstr ("CLASS_NAME")); + EXPECT_THAT (output, testing::HasSubstr ("NAME")); + EXPECT_THAT (output, testing::HasSubstr ("TIME/s")); + EXPECT_THAT (output, testing::HasSubstr ("CALLS")); + EXPECT_THAT (output, testing::HasSubstr ("AVG/s")); + EXPECT_THAT (output, testing::HasSubstr ("PER/%")); + + // check output in file + ifs.open ("tmp"); + // std::cout << "Capture contents line by line from output file: \n" << std::endl; + getline (ifs, output); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("TIME STATISTICS")); + getline (ifs, output); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("CLASS_NAME")); + EXPECT_THAT (output, testing::HasSubstr ("NAME")); + EXPECT_THAT (output, testing::HasSubstr ("TIME/s")); + EXPECT_THAT (output, testing::HasSubstr ("CALLS")); + EXPECT_THAT (output, testing::HasSubstr ("AVG/s")); + EXPECT_THAT (output, testing::HasSubstr ("PER/%")); + ifs.close (); + remove ("time.json"); } - -TEST_F(TimerTest, PrintUntilNow) +TEST_F (TimerTest, PrintUntilNow) { - long double time = ModuleBase::timer::print_until_now(); - EXPECT_GE(time, 0.0); + long double time = ModuleBase::timer::print_until_now (); + EXPECT_GE (time, 0.0); } - -TEST_F(TimerTest, Finish) +TEST_F (TimerTest, Finish) { - ModuleBase::timer::start("wavefunc","evc"); - std::this_thread::sleep_for(std::chrono::microseconds(T_Elapse)); // 0.1 ms - ModuleBase::timer::end("wavefunc","evc"); - - // call print_all - ofs.open("tmp"); - testing::internal::CaptureStdout(); - ModuleBase::timer::finish(ofs); - output = testing::internal::GetCapturedStdout(); - ofs.close(); - // checout output on screen - //std::cout << "Get captured stdout: \n" << std::endl; - //std::cout << output << std::endl; - EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS")); - EXPECT_THAT(output,testing::HasSubstr("CLASS_NAME")); - EXPECT_THAT(output,testing::HasSubstr("NAME")); - EXPECT_THAT(output,testing::HasSubstr("TIME/s")); - EXPECT_THAT(output,testing::HasSubstr("CALLS")); - EXPECT_THAT(output,testing::HasSubstr("AVG/s")); - EXPECT_THAT(output,testing::HasSubstr("PER/%")); - - // check output in file - ifs.open("tmp"); - //std::cout << "Capture contents line by line from output file: \n" << std::endl; - getline(ifs,output); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS")); - getline(ifs,output); - getline(ifs,output); - EXPECT_THAT(output,testing::HasSubstr("CLASS_NAME")); - EXPECT_THAT(output,testing::HasSubstr("NAME")); - EXPECT_THAT(output,testing::HasSubstr("TIME/s")); - EXPECT_THAT(output,testing::HasSubstr("CALLS")); - EXPECT_THAT(output,testing::HasSubstr("AVG/s")); - EXPECT_THAT(output,testing::HasSubstr("PER/%")); - ifs.close(); + ModuleBase::timer::start ("wavefunc", "evc"); + std::this_thread::sleep_for (std::chrono::microseconds (T_Elapse)); // 0.1 ms + ModuleBase::timer::end ("wavefunc", "evc"); + + // call print_all + ofs.open ("tmp"); + testing::internal::CaptureStdout (); + ModuleBase::timer::finish (ofs); + output = testing::internal::GetCapturedStdout (); + ofs.close (); + // checout output on screen + // std::cout << "Get captured stdout: \n" << std::endl; + // std::cout << output << std::endl; + EXPECT_THAT (output, testing::HasSubstr ("TIME STATISTICS")); + EXPECT_THAT (output, testing::HasSubstr ("CLASS_NAME")); + EXPECT_THAT (output, testing::HasSubstr ("NAME")); + EXPECT_THAT (output, testing::HasSubstr ("TIME/s")); + EXPECT_THAT (output, testing::HasSubstr ("CALLS")); + EXPECT_THAT (output, testing::HasSubstr ("AVG/s")); + EXPECT_THAT (output, testing::HasSubstr ("PER/%")); + + // check output in file + ifs.open ("tmp"); + // std::cout << "Capture contents line by line from output file: \n" << std::endl; + getline (ifs, output); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("TIME STATISTICS")); + getline (ifs, output); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("CLASS_NAME")); + EXPECT_THAT (output, testing::HasSubstr ("NAME")); + EXPECT_THAT (output, testing::HasSubstr ("TIME/s")); + EXPECT_THAT (output, testing::HasSubstr ("CALLS")); + EXPECT_THAT (output, testing::HasSubstr ("AVG/s")); + EXPECT_THAT (output, testing::HasSubstr ("PER/%")); + ifs.close (); } #ifdef __MPI -int main(int argc, char **argv) +int + main (int argc, char** argv) { - MPI_Init(&argc,&argv); + MPI_Init (&argc, &argv); - testing::InitGoogleTest(&argc,argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); - MPI_Finalize(); + MPI_Finalize (); - return result; + return result; } #endif - diff --git a/source/source_base/test/tool_check_test.cpp b/source/source_base/test/tool_check_test.cpp index 47b20e3a91d..b458b0dde81 100644 --- a/source/source_base/test/tool_check_test.cpp +++ b/source/source_base/test/tool_check_test.cpp @@ -26,104 +26,104 @@ class ToolCheckTest : public testing::Test { protected: - std::ofstream ofs; - std::ifstream ifs; - // define std::string, int, double variables - std::string name = "abaqus"; - int ecut = 100; - double occupation = 0.23; - std::string caltype = "nscf"; - // quit is the swith to control performance of function - bool quit = false; - // for capturing stdout - std::string output = ""; - void TearDown() - { - remove("tmp"); - } - + std::ofstream ofs; + std::ifstream ifs; + // define std::string, int, double variables + std::string name = "abaqus"; + int ecut = 100; + double occupation = 0.23; + std::string caltype = "nscf"; + // quit is the swith to control performance of function + bool quit = false; + // for capturing stdout + std::string output = ""; + void + TearDown () + { + remove ("tmp"); + } }; -TEST_F(ToolCheckTest, Name) +TEST_F (ToolCheckTest, Name) { - ofs.open("tmp"); - // double input to check continus check function - ofs << name << std::endl; - ofs << name << std::endl; - ofs.close(); - ifs.open("tmp"); - // non-quit check - testing::internal::CaptureStdout(); - ModuleBase::CHECK_NAME(ifs, "abacus", quit); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("not match")); - // quit check: quit = false - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::CHECK_NAME(ifs, "abacus"), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("NOTICE")); - ifs.close(); + ofs.open ("tmp"); + // double input to check continus check function + ofs << name << std::endl; + ofs << name << std::endl; + ofs.close (); + ifs.open ("tmp"); + // non-quit check + testing::internal::CaptureStdout (); + ModuleBase::CHECK_NAME (ifs, "abacus", quit); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("not match")); + // quit check: quit = false + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::CHECK_NAME (ifs, "abacus"), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("NOTICE")); + ifs.close (); } -TEST_F(ToolCheckTest, Int) +TEST_F (ToolCheckTest, Int) { - ofs.open("tmp"); - // double input to check continus check function - ofs << ecut << std::endl; - ofs << ecut << std::endl; - ofs.close(); - ifs.open("tmp"); - // non-quit check - testing::internal::CaptureStdout(); - ModuleBase::CHECK_INT(ifs, 80, quit); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("not match")); - // quit check: quit = false - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::CHECK_INT(ifs, 80), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("NOTICE")); - ifs.close(); + ofs.open ("tmp"); + // double input to check continus check function + ofs << ecut << std::endl; + ofs << ecut << std::endl; + ofs.close (); + ifs.open ("tmp"); + // non-quit check + testing::internal::CaptureStdout (); + ModuleBase::CHECK_INT (ifs, 80, quit); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("not match")); + // quit check: quit = false + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::CHECK_INT (ifs, 80), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("NOTICE")); + ifs.close (); } -TEST_F(ToolCheckTest, Double) +TEST_F (ToolCheckTest, Double) { - ofs.open("tmp"); - // double input to check continus check function - ofs << occupation << std::endl; - ofs << occupation << std::endl; - ofs.close(); - ifs.open("tmp"); - // non-quit check: quit = false - testing::internal::CaptureStdout(); - ModuleBase::CHECK_DOUBLE(ifs, 0.23002, quit); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("not match")); - // quit check - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::CHECK_DOUBLE(ifs, 0.22998), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("NOTICE")); - ifs.close(); + ofs.open ("tmp"); + // double input to check continus check function + ofs << occupation << std::endl; + ofs << occupation << std::endl; + ofs.close (); + ifs.open ("tmp"); + // non-quit check: quit = false + testing::internal::CaptureStdout (); + ModuleBase::CHECK_DOUBLE (ifs, 0.23002, quit); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("not match")); + // quit check + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::CHECK_DOUBLE (ifs, 0.22998), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("NOTICE")); + ifs.close (); } -TEST_F(ToolCheckTest, String) +TEST_F (ToolCheckTest, String) { - ofs.open("tmp"); - // double input to check continus check function - ofs << caltype << std::endl; - ofs << caltype << std::endl; - ofs.close(); - ifs.open("tmp"); - // non-quit check: quit=false - testing::internal::CaptureStdout(); - ModuleBase::CHECK_STRING(ifs, "scf", quit); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("not match")); - // quit check - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::CHECK_STRING(ifs, "scf"), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("NOTICE")); - ifs.close(); + ofs.open ("tmp"); + // double input to check continus check function + ofs << caltype << std::endl; + ofs << caltype << std::endl; + ofs.close (); + ifs.open ("tmp"); + // non-quit check: quit=false + testing::internal::CaptureStdout (); + ModuleBase::CHECK_STRING (ifs, "scf", quit); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("not match")); + // quit check + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::CHECK_STRING (ifs, "scf"), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("NOTICE")); + ifs.close (); } diff --git a/source/source_base/test/tool_quit_no_exit.cpp b/source/source_base/test/tool_quit_no_exit.cpp index 4c9bca7be0d..81d753dbddd 100644 --- a/source/source_base/test/tool_quit_no_exit.cpp +++ b/source/source_base/test/tool_quit_no_exit.cpp @@ -5,38 +5,41 @@ // mock for UT only namespace ModuleBase { -void WARNING(const std::string &file,const std::string &description) +void + WARNING (const std::string& file, const std::string& description) { return; } -void WARNING_QUIT(const std::string &file,const std::string &description, int ret) +void + WARNING_QUIT (const std::string& file, const std::string& description, int ret) { #ifdef __NORMAL - std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; - std::cout << " NOTICE " << std::endl; - std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; + std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; + std::cout << " NOTICE " << std::endl; + std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; #else - std::cout << " " << std::endl; - std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; - std::cout << " NOTICE " << std::endl; - std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; - std::cout << " " << std::endl; - std::cout << " " << description << std::endl; - std::cout << " " << std::endl; - std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; - std::cout << " NOTICE " << std::endl; - std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; + std::cout << " " << std::endl; + std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; + std::cout << " NOTICE " << std::endl; + std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; + std::cout << " " << std::endl; + std::cout << " " << description << std::endl; + std::cout << " " << std::endl; + std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; + std::cout << " NOTICE " << std::endl; + std::cout << " !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; #endif - throw std::runtime_error("WARNING_QUIT"); + throw std::runtime_error ("WARNING_QUIT"); } -void WARNING_QUIT(const std::string &file,const std::string &description) +void + WARNING_QUIT (const std::string& file, const std::string& description) { - WARNING_QUIT(file, description, 0); + WARNING_QUIT (file, description, 0); } -} +} // namespace ModuleBase diff --git a/source/source_base/test/tool_quit_test.cpp b/source/source_base/test/tool_quit_test.cpp index 218a9c98786..486f7c4aba1 100644 --- a/source/source_base/test/tool_quit_test.cpp +++ b/source/source_base/test/tool_quit_test.cpp @@ -25,110 +25,109 @@ class ToolQuitTest : public testing::Test { protected: - std::ifstream ifs; - // for capturing output on screen - std::string output; - void SetUp() - { - GlobalV::ofs_warning.open("warning.log"); - GlobalV::ofs_running.open("running.log"); - ModuleBase::set_quit_out_dir("OUT/"); - } - void TearDown() - { - remove("warning.log"); - remove("running.log"); - } + std::ifstream ifs; + // for capturing output on screen + std::string output; + void + SetUp () + { + GlobalV::ofs_warning.open ("warning.log"); + GlobalV::ofs_running.open ("running.log"); + ModuleBase::set_quit_out_dir ("OUT/"); + } + void + TearDown () + { + remove ("warning.log"); + remove ("running.log"); + } }; - -TEST_F(ToolQuitTest,warning) +TEST_F (ToolQuitTest, warning) { - ModuleBase::WARNING("INPUT","bad input parameter"); - GlobalV::ofs_warning.close(); - ifs.open("warning.log"); - getline(ifs,output); - // test output in warning.log file - EXPECT_THAT(output,testing::HasSubstr("warning")); - ifs.close(); + ModuleBase::WARNING ("INPUT", "bad input parameter"); + GlobalV::ofs_warning.close (); + ifs.open ("warning.log"); + getline (ifs, output); + // test output in warning.log file + EXPECT_THAT (output, testing::HasSubstr ("warning")); + ifs.close (); } -TEST_F(ToolQuitTest,quit) +TEST_F (ToolQuitTest, quit) { - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::QUIT(), ::testing::ExitedWithCode(0), ""); - output = testing::internal::GetCapturedStdout(); - // test output on screen - EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::QUIT (), ::testing::ExitedWithCode (0), ""); + output = testing::internal::GetCapturedStdout (); + // test output on screen + EXPECT_THAT (output, testing::HasSubstr ("TIME STATISTICS")); } -TEST_F(ToolQuitTest,quit_with_ret) +TEST_F (ToolQuitTest, quit_with_ret) { - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::QUIT(1), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - // test output on screen - EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::QUIT (1), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + // test output on screen + EXPECT_THAT (output, testing::HasSubstr ("TIME STATISTICS")); } // use EXPECT_EXIT to test exit codes -TEST_F(ToolQuitTest,warningquit) +TEST_F (ToolQuitTest, warningquit) { - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::WARNING_QUIT("INPUT","bad input parameter"), - ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - // test output on screening - EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS")); - GlobalV::ofs_warning.close(); - GlobalV::ofs_running.close(); - ifs.open("warning.log"); - getline(ifs,output); - // test output in warning.log file - EXPECT_THAT(output,testing::HasSubstr("warning")); - ifs.close(); - ifs.open("running.log"); - getline(ifs,output); - // test output in running.log file - EXPECT_THAT(output,testing::HasSubstr("-------")); - ifs.close(); + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::WARNING_QUIT ("INPUT", "bad input parameter"), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + // test output on screening + EXPECT_THAT (output, testing::HasSubstr ("TIME STATISTICS")); + GlobalV::ofs_warning.close (); + GlobalV::ofs_running.close (); + ifs.open ("warning.log"); + getline (ifs, output); + // test output in warning.log file + EXPECT_THAT (output, testing::HasSubstr ("warning")); + ifs.close (); + ifs.open ("running.log"); + getline (ifs, output); + // test output in running.log file + EXPECT_THAT (output, testing::HasSubstr ("-------")); + ifs.close (); } // use EXPECT_EXIT to test exit codes -TEST_F(ToolQuitTest,warningquit_with_ret) +TEST_F (ToolQuitTest, warningquit_with_ret) { - testing::internal::CaptureStdout(); - EXPECT_EXIT(ModuleBase::WARNING_QUIT("INPUT","bad input parameter",1), - ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - // test output on screening - EXPECT_THAT(output,testing::HasSubstr("TIME STATISTICS")); - GlobalV::ofs_warning.close(); - GlobalV::ofs_running.close(); - ifs.open("warning.log"); - getline(ifs,output); - // test output in warning.log file - EXPECT_THAT(output,testing::HasSubstr("warning")); - ifs.close(); - ifs.open("running.log"); - getline(ifs,output); - // test output in running.log file - EXPECT_THAT(output,testing::HasSubstr("-------")); - ifs.close(); + testing::internal::CaptureStdout (); + EXPECT_EXIT (ModuleBase::WARNING_QUIT ("INPUT", "bad input parameter", 1), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + // test output on screening + EXPECT_THAT (output, testing::HasSubstr ("TIME STATISTICS")); + GlobalV::ofs_warning.close (); + GlobalV::ofs_running.close (); + ifs.open ("warning.log"); + getline (ifs, output); + // test output in warning.log file + EXPECT_THAT (output, testing::HasSubstr ("warning")); + ifs.close (); + ifs.open ("running.log"); + getline (ifs, output); + // test output in running.log file + EXPECT_THAT (output, testing::HasSubstr ("-------")); + ifs.close (); } // use __MPI to activate parallel environment #ifdef __MPI -int main(int argc, char **argv) +int + main (int argc, char** argv) { - MPI_Init(&argc,&argv); + MPI_Init (&argc, &argv); - testing::InitGoogleTest(&argc,argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); - MPI_Finalize(); + MPI_Finalize (); - return result; + return result; } #endif - diff --git a/source/source_base/test/tool_threading_test.cpp b/source/source_base/test/tool_threading_test.cpp index d816aacded8..951a702cdd6 100644 --- a/source/source_base/test/tool_threading_test.cpp +++ b/source/source_base/test/tool_threading_test.cpp @@ -3,20 +3,20 @@ #include "gmock/gmock.h" #include /************************************************ -* unit test of threading tool -***********************************************/ + * unit test of threading tool + ***********************************************/ /** -* - Tested functions of class threading tool: -* - TASK_DIST_1D: -* - (template)Distributing 1d tasks by worker id (int and long long) -* - BLOCK_TASK_DIST_1D: -* - (template)Distributing 1d tasks by block_size and worker id (int and long long) -* - OMP_PARALLE: -* - Run functions in parallel mode -* - TRY_OMP_PARALLEL: -* - Run functions in parallel mode(Add the judgment statement to determine whether program is in parallel) -**/ + * - Tested functions of class threading tool: + * - TASK_DIST_1D: + * - (template)Distributing 1d tasks by worker id (int and long long) + * - BLOCK_TASK_DIST_1D: + * - (template)Distributing 1d tasks by block_size and worker id (int and long long) + * - OMP_PARALLE: + * - Run functions in parallel mode + * - TRY_OMP_PARALLEL: + * - Run functions in parallel mode(Add the judgment statement to determine whether program is in parallel) + **/ // The meaning of the parameters used in the following tests // nw: nworker @@ -26,137 +26,138 @@ // le: len // bs: block_size -//Test function used in the following tests -void test_fun(int a,int b) - { - std::cout< ComplexMatrix::scaled_sum()")); - ifs.close(); + ModuleBase::TITLE (claname, funname, false); + GlobalV::ofs_running.close (); + ifs.open ("TITLEtest2.log"); + getline (ifs, output2); + EXPECT_THAT (output2, testing::HasSubstr (" ==> ComplexMatrix::scaled_sum()")); + ifs.close (); } -TEST_F(ToolTitleTest, TITLE3) +TEST_F (ToolTitleTest, TITLE3) { - std::ofstream oofs; + std::ofstream oofs; std::string output3a; std::string output3b; - oofs.open("TITLEtest3.log"); - ModuleBase::TITLE(oofs,claname,funname,false); - oofs.close(); - ifs.open("TITLEtest3.log"); - getline(ifs,output3a); - EXPECT_THAT(output3a,testing::HasSubstr(" ==> ComplexMatrix::scaled_sum()")); - ifs.close(); + oofs.open ("TITLEtest3.log"); + ModuleBase::TITLE (oofs, claname, funname, false); + oofs.close (); + ifs.open ("TITLEtest3.log"); + getline (ifs, output3a); + EXPECT_THAT (output3a, testing::HasSubstr (" ==> ComplexMatrix::scaled_sum()")); + ifs.close (); } diff --git a/source/source_base/test/vector3_test.cpp b/source/source_base/test/vector3_test.cpp index 9a1cf492546..4ab64fb107e 100644 --- a/source/source_base/test/vector3_test.cpp +++ b/source/source_base/test/vector3_test.cpp @@ -77,692 +77,690 @@ class Vector3Test : public testing::Test { -protected: - double da = 3.0; - double db = 4.0; - double dc = 5.0; - int ia = 3; - int ib = 4; - int ic = 5; - float fa = 3.0; - float fb = 4.0; - float fc = 5.0; - // for capturing stdout - std::string output; + protected: + double da = 3.0; + double db = 4.0; + double dc = 5.0; + int ia = 3; + int ib = 4; + int ic = 5; + float fa = 3.0; + float fb = 4.0; + float fc = 5.0; + // for capturing stdout + std::string output; }; -TEST_F(Vector3Test,Construct) +TEST_F (Vector3Test, Construct) { - // double Vector3 - ModuleBase::Vector3 u (da,db,dc); - ModuleBase::Vector3 up (u); - EXPECT_EQ(u.x,3.0); - EXPECT_EQ(u.y,4.0); - EXPECT_EQ(u.z,5.0); - EXPECT_EQ(up.x,3.0); - EXPECT_EQ(up.y,4.0); - EXPECT_EQ(up.z,5.0); - // float Vector3 - ModuleBase::Vector3 v (fa,fb,fc); - ModuleBase::Vector3 vp (v); - EXPECT_EQ(v.x,3.0); - EXPECT_EQ(v.y,4.0); - EXPECT_EQ(v.z,5.0); - EXPECT_EQ(vp.x,3.0); - EXPECT_EQ(vp.y,4.0); - EXPECT_EQ(vp.z,5.0); - // int Vector3 - ModuleBase::Vector3 w (ia,ib,ic); - ModuleBase::Vector3 wp (w); - EXPECT_EQ(w.x,3); - EXPECT_EQ(w.y,4); - EXPECT_EQ(w.z,5); - EXPECT_EQ(wp.x,3); - EXPECT_EQ(wp.y,4); - EXPECT_EQ(wp.z,5); + // double Vector3 + ModuleBase::Vector3 u (da, db, dc); + ModuleBase::Vector3 up (u); + EXPECT_EQ (u.x, 3.0); + EXPECT_EQ (u.y, 4.0); + EXPECT_EQ (u.z, 5.0); + EXPECT_EQ (up.x, 3.0); + EXPECT_EQ (up.y, 4.0); + EXPECT_EQ (up.z, 5.0); + // float Vector3 + ModuleBase::Vector3 v (fa, fb, fc); + ModuleBase::Vector3 vp (v); + EXPECT_EQ (v.x, 3.0); + EXPECT_EQ (v.y, 4.0); + EXPECT_EQ (v.z, 5.0); + EXPECT_EQ (vp.x, 3.0); + EXPECT_EQ (vp.y, 4.0); + EXPECT_EQ (vp.z, 5.0); + // int Vector3 + ModuleBase::Vector3 w (ia, ib, ic); + ModuleBase::Vector3 wp (w); + EXPECT_EQ (w.x, 3); + EXPECT_EQ (w.y, 4); + EXPECT_EQ (w.z, 5); + EXPECT_EQ (wp.x, 3); + EXPECT_EQ (wp.y, 4); + EXPECT_EQ (wp.z, 5); } -TEST_F(Vector3Test,Set) +TEST_F (Vector3Test, Set) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - EXPECT_EQ(u.x,3.0); - EXPECT_EQ(u.y,4.0); - EXPECT_EQ(u.z,5.0); - // float Vector3 - ModuleBase::Vector3 v; - v.set(fa,fb,fc); - EXPECT_EQ(v.x,3.0); - EXPECT_EQ(v.y,4.0); - EXPECT_EQ(v.z,5.0); - // int Vector3 - ModuleBase::Vector3 w; - w.set(ia,ib,ic); - EXPECT_EQ(w.x,3); - EXPECT_EQ(w.y,4); - EXPECT_EQ(w.z,5); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + EXPECT_EQ (u.x, 3.0); + EXPECT_EQ (u.y, 4.0); + EXPECT_EQ (u.z, 5.0); + // float Vector3 + ModuleBase::Vector3 v; + v.set (fa, fb, fc); + EXPECT_EQ (v.x, 3.0); + EXPECT_EQ (v.y, 4.0); + EXPECT_EQ (v.z, 5.0); + // int Vector3 + ModuleBase::Vector3 w; + w.set (ia, ib, ic); + EXPECT_EQ (w.x, 3); + EXPECT_EQ (w.y, 4); + EXPECT_EQ (w.z, 5); } -TEST_F(Vector3Test,Equal) +TEST_F (Vector3Test, Equal) { - // double Vector3 - ModuleBase::Vector3 u, up; - u.set(da,db,dc); - up = u; - EXPECT_EQ(up.x,3.0); - EXPECT_EQ(up.y,4.0); - EXPECT_EQ(up.z,5.0); - // float Vector3 - ModuleBase::Vector3 v, vp; - v.set(fa,fb,fc); - vp = v; - EXPECT_EQ(vp.x,3.0); - EXPECT_EQ(vp.y,4.0); - EXPECT_EQ(vp.z,5.0); - // int Vector3 - ModuleBase::Vector3 w, wp; - w.set(ia,ib,ic); - wp = w; - EXPECT_EQ(wp.x,3); - EXPECT_EQ(wp.y,4); - EXPECT_EQ(wp.z,5); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up = u; + EXPECT_EQ (up.x, 3.0); + EXPECT_EQ (up.y, 4.0); + EXPECT_EQ (up.z, 5.0); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (fa, fb, fc); + vp = v; + EXPECT_EQ (vp.x, 3.0); + EXPECT_EQ (vp.y, 4.0); + EXPECT_EQ (vp.z, 5.0); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (ia, ib, ic); + wp = w; + EXPECT_EQ (wp.x, 3); + EXPECT_EQ (wp.y, 4); + EXPECT_EQ (wp.z, 5); } -TEST_F(Vector3Test,equal) +TEST_F (Vector3Test, equal) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - u = 2; - EXPECT_EQ(u.x,2.0); - EXPECT_EQ(u.y,2.0); - EXPECT_EQ(u.z,2.0); - // float Vector3 - ModuleBase::Vector3 v; - v.set(fa,fb,fc); - v = 2; - EXPECT_EQ(v.x,2.0); - EXPECT_EQ(v.y,2.0); - EXPECT_EQ(v.z,2.0); - // int Vector3 - ModuleBase::Vector3 w; - w.set(ia,ib,ic); - w = 2; - EXPECT_EQ(w.x,2); - EXPECT_EQ(w.y,2); - EXPECT_EQ(w.z,2); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + u = 2; + EXPECT_EQ (u.x, 2.0); + EXPECT_EQ (u.y, 2.0); + EXPECT_EQ (u.z, 2.0); + // float Vector3 + ModuleBase::Vector3 v; + v.set (fa, fb, fc); + v = 2; + EXPECT_EQ (v.x, 2.0); + EXPECT_EQ (v.y, 2.0); + EXPECT_EQ (v.z, 2.0); + // int Vector3 + ModuleBase::Vector3 w; + w.set (ia, ib, ic); + w = 2; + EXPECT_EQ (w.x, 2); + EXPECT_EQ (w.y, 2); + EXPECT_EQ (w.z, 2); } -TEST_F(Vector3Test,PlusEqual) +TEST_F (Vector3Test, PlusEqual) { - // double Vector3 - ModuleBase::Vector3 u, up; - u.set(da,db,dc); - up.set(da,db,dc); - up += u; - EXPECT_EQ(up.x,6.0); - EXPECT_EQ(up.y,8.0); - EXPECT_EQ(up.z,10.0); - // float Vector3 - ModuleBase::Vector3 v, vp; - v.set(fa,fb,fc); - vp.set(fa,fb,fc); - vp += v; - EXPECT_EQ(vp.x,6.0); - EXPECT_EQ(vp.y,8.0); - EXPECT_EQ(vp.z,10.0); - // int Vector3 - ModuleBase::Vector3 w, wp; - w.set(ia,ib,ic); - wp.set(ia,ib,ic); - wp += w; - EXPECT_EQ(wp.x,6); - EXPECT_EQ(wp.y,8); - EXPECT_EQ(wp.z,10); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up.set (da, db, dc); + up += u; + EXPECT_EQ (up.x, 6.0); + EXPECT_EQ (up.y, 8.0); + EXPECT_EQ (up.z, 10.0); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (fa, fb, fc); + vp.set (fa, fb, fc); + vp += v; + EXPECT_EQ (vp.x, 6.0); + EXPECT_EQ (vp.y, 8.0); + EXPECT_EQ (vp.z, 10.0); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (ia, ib, ic); + wp.set (ia, ib, ic); + wp += w; + EXPECT_EQ (wp.x, 6); + EXPECT_EQ (wp.y, 8); + EXPECT_EQ (wp.z, 10); } -TEST_F(Vector3Test,MinusEqual) +TEST_F (Vector3Test, MinusEqual) { - // double Vector3 - ModuleBase::Vector3 u, up; - u.set(da,db,dc); - up.set(3*da,3*db,3*dc); - up -= u; - EXPECT_EQ(up.x,6.0); - EXPECT_EQ(up.y,8.0); - EXPECT_EQ(up.z,10.0); - // float Vector3 - ModuleBase::Vector3 v, vp; - v.set(fa,fb,fc); - vp.set(3*fa,3*fb,3*fc); - vp -= v; - EXPECT_EQ(vp.x,6.0); - EXPECT_EQ(vp.y,8.0); - EXPECT_EQ(vp.z,10.0); - // int Vector3 - ModuleBase::Vector3 w, wp; - w.set(ia,ib,ic); - wp.set(3*ia,3*ib,3*ic); - wp -= w; - EXPECT_EQ(wp.x,6); - EXPECT_EQ(wp.y,8); - EXPECT_EQ(wp.z,10); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up.set (3 * da, 3 * db, 3 * dc); + up -= u; + EXPECT_EQ (up.x, 6.0); + EXPECT_EQ (up.y, 8.0); + EXPECT_EQ (up.z, 10.0); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (fa, fb, fc); + vp.set (3 * fa, 3 * fb, 3 * fc); + vp -= v; + EXPECT_EQ (vp.x, 6.0); + EXPECT_EQ (vp.y, 8.0); + EXPECT_EQ (vp.z, 10.0); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (ia, ib, ic); + wp.set (3 * ia, 3 * ib, 3 * ic); + wp -= w; + EXPECT_EQ (wp.x, 6); + EXPECT_EQ (wp.y, 8); + EXPECT_EQ (wp.z, 10); } -TEST_F(Vector3Test,MultiplyEqual) +TEST_F (Vector3Test, MultiplyEqual) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - u *= 2; - EXPECT_EQ(u.x,6.0); - EXPECT_EQ(u.y,8.0); - EXPECT_EQ(u.z,10.0); - // float Vector3 - ModuleBase::Vector3 v; - v.set(fa,fb,fc); - v *= 2; - EXPECT_EQ(v.x,6.0); - EXPECT_EQ(v.y,8.0); - EXPECT_EQ(v.z,10.0); - // int Vector3 - ModuleBase::Vector3 w; - w.set(ia,ib,ic); - w *= 2; - EXPECT_EQ(w.x,6); - EXPECT_EQ(w.y,8); - EXPECT_EQ(w.z,10); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + u *= 2; + EXPECT_EQ (u.x, 6.0); + EXPECT_EQ (u.y, 8.0); + EXPECT_EQ (u.z, 10.0); + // float Vector3 + ModuleBase::Vector3 v; + v.set (fa, fb, fc); + v *= 2; + EXPECT_EQ (v.x, 6.0); + EXPECT_EQ (v.y, 8.0); + EXPECT_EQ (v.z, 10.0); + // int Vector3 + ModuleBase::Vector3 w; + w.set (ia, ib, ic); + w *= 2; + EXPECT_EQ (w.x, 6); + EXPECT_EQ (w.y, 8); + EXPECT_EQ (w.z, 10); } -TEST_F(Vector3Test,OverEqual) +TEST_F (Vector3Test, OverEqual) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(4*da,4*db,4*dc); - u /= 2; - EXPECT_EQ(u.x,6.0); - EXPECT_EQ(u.y,8.0); - EXPECT_EQ(u.z,10.0); - // float Vector3 - ModuleBase::Vector3 v; - v.set(4*fa,4*fb,4*fc); - v /= 2; - EXPECT_EQ(v.x,6.0); - EXPECT_EQ(v.y,8.0); - EXPECT_EQ(v.z,10.0); - // int Vector3 - ModuleBase::Vector3 w; - w.set(4*ia,4*ib,4*ic); - w /= 2; - EXPECT_EQ(w.x,6); - EXPECT_EQ(w.y,8); - EXPECT_EQ(w.z,10); + // double Vector3 + ModuleBase::Vector3 u; + u.set (4 * da, 4 * db, 4 * dc); + u /= 2; + EXPECT_EQ (u.x, 6.0); + EXPECT_EQ (u.y, 8.0); + EXPECT_EQ (u.z, 10.0); + // float Vector3 + ModuleBase::Vector3 v; + v.set (4 * fa, 4 * fb, 4 * fc); + v /= 2; + EXPECT_EQ (v.x, 6.0); + EXPECT_EQ (v.y, 8.0); + EXPECT_EQ (v.z, 10.0); + // int Vector3 + ModuleBase::Vector3 w; + w.set (4 * ia, 4 * ib, 4 * ic); + w /= 2; + EXPECT_EQ (w.x, 6); + EXPECT_EQ (w.y, 8); + EXPECT_EQ (w.z, 10); } -TEST_F(Vector3Test,Negative) +TEST_F (Vector3Test, Negative) { - // double Vector3 - ModuleBase::Vector3 u, up; - u.set(da,db,dc); - up = -u; - EXPECT_EQ(up.x,-3.0); - EXPECT_EQ(up.y,-4.0); - EXPECT_EQ(up.z,-5.0); - // float Vector3 - ModuleBase::Vector3 v, vp; - v.set(fa,fb,fc); - vp = -v; - EXPECT_EQ(vp.x,-3.0); - EXPECT_EQ(vp.y,-4.0); - EXPECT_EQ(vp.z,-5.0); - // int Vector3 - ModuleBase::Vector3 w, wp; - w.set(ia,ib,ic); - wp = -w; - EXPECT_EQ(wp.x,-3); - EXPECT_EQ(wp.y,-4); - EXPECT_EQ(wp.z,-5); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up = -u; + EXPECT_EQ (up.x, -3.0); + EXPECT_EQ (up.y, -4.0); + EXPECT_EQ (up.z, -5.0); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (fa, fb, fc); + vp = -v; + EXPECT_EQ (vp.x, -3.0); + EXPECT_EQ (vp.y, -4.0); + EXPECT_EQ (vp.z, -5.0); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (ia, ib, ic); + wp = -w; + EXPECT_EQ (wp.x, -3); + EXPECT_EQ (wp.y, -4); + EXPECT_EQ (wp.z, -5); } -TEST_F(Vector3Test,Access) +TEST_F (Vector3Test, Access) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - EXPECT_EQ(u[0],3.0); - EXPECT_EQ(u[1],4.0); - EXPECT_EQ(u[2],5.0); - // float Vector3 - ModuleBase::Vector3 v; - v.set(fa,fb,fc); - EXPECT_EQ(v.x,3.0); - EXPECT_EQ(v.y,4.0); - EXPECT_EQ(v.z,5.0); - // int Vector3 - ModuleBase::Vector3 w; - w.set(ia,ib,ic); - EXPECT_EQ(w.x,3); - EXPECT_EQ(w.y,4); - EXPECT_EQ(w.z,5); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + EXPECT_EQ (u[0], 3.0); + EXPECT_EQ (u[1], 4.0); + EXPECT_EQ (u[2], 5.0); + // float Vector3 + ModuleBase::Vector3 v; + v.set (fa, fb, fc); + EXPECT_EQ (v.x, 3.0); + EXPECT_EQ (v.y, 4.0); + EXPECT_EQ (v.z, 5.0); + // int Vector3 + ModuleBase::Vector3 w; + w.set (ia, ib, ic); + EXPECT_EQ (w.x, 3); + EXPECT_EQ (w.y, 4); + EXPECT_EQ (w.z, 5); } - -TEST_F(Vector3Test,ConstAccess) +TEST_F (Vector3Test, ConstAccess) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - const ModuleBase::Vector3 *up(&u); - EXPECT_EQ((*up)[0],3.0); - EXPECT_EQ((*up)[1],4.0); - EXPECT_EQ((*up)[2],5.0); - // float Vector3 - ModuleBase::Vector3 v; - const ModuleBase::Vector3 *vp(&v); - v.set(fa,fb,fc); - EXPECT_EQ((*vp).x,3.0); - EXPECT_EQ((*vp).y,4.0); - EXPECT_EQ((*vp).z,5.0); - // int Vector3 - //ModuleBase::Vector3 w; - //w.set(ia,ib,ic); - //EXPECT_EQ(w.x,3); - //EXPECT_EQ(w.y,4); - //EXPECT_EQ(w.z,5); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + const ModuleBase::Vector3* up (&u); + EXPECT_EQ ((*up)[0], 3.0); + EXPECT_EQ ((*up)[1], 4.0); + EXPECT_EQ ((*up)[2], 5.0); + // float Vector3 + ModuleBase::Vector3 v; + const ModuleBase::Vector3* vp (&v); + v.set (fa, fb, fc); + EXPECT_EQ ((*vp).x, 3.0); + EXPECT_EQ ((*vp).y, 4.0); + EXPECT_EQ ((*vp).z, 5.0); + // int Vector3 + // ModuleBase::Vector3 w; + // w.set(ia,ib,ic); + // EXPECT_EQ(w.x,3); + // EXPECT_EQ(w.y,4); + // EXPECT_EQ(w.z,5); } - -TEST_F(Vector3Test,Reverse) +TEST_F (Vector3Test, Reverse) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - u.reverse(); - EXPECT_EQ(u.x,-3.0); - EXPECT_EQ(u.y,-4.0); - EXPECT_EQ(u.z,-5.0); - // float Vector3 - ModuleBase::Vector3 v; - v.set(fa,fb,fc); - v.reverse(); - EXPECT_EQ(v.x,-3.0); - EXPECT_EQ(v.y,-4.0); - EXPECT_EQ(v.z,-5.0); - // int Vector3 - ModuleBase::Vector3 w; - w.set(ia,ib,ic); - w.reverse(); - EXPECT_EQ(w.x,-3); - EXPECT_EQ(w.y,-4); - EXPECT_EQ(w.z,-5); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + u.reverse (); + EXPECT_EQ (u.x, -3.0); + EXPECT_EQ (u.y, -4.0); + EXPECT_EQ (u.z, -5.0); + // float Vector3 + ModuleBase::Vector3 v; + v.set (fa, fb, fc); + v.reverse (); + EXPECT_EQ (v.x, -3.0); + EXPECT_EQ (v.y, -4.0); + EXPECT_EQ (v.z, -5.0); + // int Vector3 + ModuleBase::Vector3 w; + w.set (ia, ib, ic); + w.reverse (); + EXPECT_EQ (w.x, -3); + EXPECT_EQ (w.y, -4); + EXPECT_EQ (w.z, -5); } -TEST_F(Vector3Test,VectorPlus) +TEST_F (Vector3Test, VectorPlus) { - // double Vector3 - ModuleBase::Vector3 u,up,upp; - u.set(da,db,dc); - up.set(da,db,dc); - upp = u + up; - EXPECT_EQ(upp[0],6.0); - EXPECT_EQ(upp[1],8.0); - EXPECT_EQ(upp[2],10.0); - // float Vector3 - ModuleBase::Vector3 v,vp,vpp; - v.set(fa,fb,fc); - vp.set(fa,fb,fc); - vpp = v + vp; - EXPECT_EQ(vpp.x,6.0); - EXPECT_EQ(vpp.y,8.0); - EXPECT_EQ(vpp.z,10.0); - // int Vector3 - ModuleBase::Vector3 w,wp,wpp; - w.set(ia,ib,ic); - wp.set(ia,ib,ic); - wpp = w + wp; - EXPECT_EQ(wpp.x,6); - EXPECT_EQ(wpp.y,8); - EXPECT_EQ(wpp.z,10); + // double Vector3 + ModuleBase::Vector3 u, up, upp; + u.set (da, db, dc); + up.set (da, db, dc); + upp = u + up; + EXPECT_EQ (upp[0], 6.0); + EXPECT_EQ (upp[1], 8.0); + EXPECT_EQ (upp[2], 10.0); + // float Vector3 + ModuleBase::Vector3 v, vp, vpp; + v.set (fa, fb, fc); + vp.set (fa, fb, fc); + vpp = v + vp; + EXPECT_EQ (vpp.x, 6.0); + EXPECT_EQ (vpp.y, 8.0); + EXPECT_EQ (vpp.z, 10.0); + // int Vector3 + ModuleBase::Vector3 w, wp, wpp; + w.set (ia, ib, ic); + wp.set (ia, ib, ic); + wpp = w + wp; + EXPECT_EQ (wpp.x, 6); + EXPECT_EQ (wpp.y, 8); + EXPECT_EQ (wpp.z, 10); } -TEST_F(Vector3Test,VectorMinus) +TEST_F (Vector3Test, VectorMinus) { - // double Vector3 - ModuleBase::Vector3 u,up,upp; - u.set(da,db,dc); - up.set(2*da,2*db,2*dc); - upp = u - up; - EXPECT_EQ(upp[0],-3.0); - EXPECT_EQ(upp[1],-4.0); - EXPECT_EQ(upp[2],-5.0); - // float Vector3 - ModuleBase::Vector3 v,vp,vpp; - v.set(fa,fb,fc); - vp.set(3*fa,3*fb,3*fc); - vpp = v - vp; - EXPECT_EQ(vpp.x,-6.0); - EXPECT_EQ(vpp.y,-8.0); - EXPECT_EQ(vpp.z,-10.0); - // int Vector3 - ModuleBase::Vector3 w,wp,wpp; - w.set(3*ia,3*ib,3*ic); - wp.set(ia,ib,ic); - wpp = w - wp; - EXPECT_EQ(wpp.x,6); - EXPECT_EQ(wpp.y,8); - EXPECT_EQ(wpp.z,10); + // double Vector3 + ModuleBase::Vector3 u, up, upp; + u.set (da, db, dc); + up.set (2 * da, 2 * db, 2 * dc); + upp = u - up; + EXPECT_EQ (upp[0], -3.0); + EXPECT_EQ (upp[1], -4.0); + EXPECT_EQ (upp[2], -5.0); + // float Vector3 + ModuleBase::Vector3 v, vp, vpp; + v.set (fa, fb, fc); + vp.set (3 * fa, 3 * fb, 3 * fc); + vpp = v - vp; + EXPECT_EQ (vpp.x, -6.0); + EXPECT_EQ (vpp.y, -8.0); + EXPECT_EQ (vpp.z, -10.0); + // int Vector3 + ModuleBase::Vector3 w, wp, wpp; + w.set (3 * ia, 3 * ib, 3 * ic); + wp.set (ia, ib, ic); + wpp = w - wp; + EXPECT_EQ (wpp.x, 6); + EXPECT_EQ (wpp.y, 8); + EXPECT_EQ (wpp.z, 10); } -TEST_F(Vector3Test,Norm2) +TEST_F (Vector3Test, Norm2) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - EXPECT_EQ(u.norm2(),50.0); - // float Vector3 - ModuleBase::Vector3 v; - v.set(fa,fb,fc); - EXPECT_EQ(v.norm2(),50.0); - // int Vector3 - ModuleBase::Vector3 w; - w.set(ia,ib,ic); - EXPECT_EQ(w.norm2(),50); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + EXPECT_EQ (u.norm2 (), 50.0); + // float Vector3 + ModuleBase::Vector3 v; + v.set (fa, fb, fc); + EXPECT_EQ (v.norm2 (), 50.0); + // int Vector3 + ModuleBase::Vector3 w; + w.set (ia, ib, ic); + EXPECT_EQ (w.norm2 (), 50); } - -TEST_F(Vector3Test,Norm) +TEST_F (Vector3Test, Norm) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - double nm = u.norm(); - double nm2= sqrt(50.0); - EXPECT_DOUBLE_EQ(nm,nm2); - EXPECT_FLOAT_EQ(nm,sqrt(50.0)); - // float Vector3 - ModuleBase::Vector3 v; - v.set(fa,fb,fc); - float nmp = v.norm(); - float nmp2= sqrt(50.0); - EXPECT_FLOAT_EQ(nmp,sqrt(50.0)); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + double nm = u.norm (); + double nm2 = sqrt (50.0); + EXPECT_DOUBLE_EQ (nm, nm2); + EXPECT_FLOAT_EQ (nm, sqrt (50.0)); + // float Vector3 + ModuleBase::Vector3 v; + v.set (fa, fb, fc); + float nmp = v.norm (); + float nmp2 = sqrt (50.0); + EXPECT_FLOAT_EQ (nmp, sqrt (50.0)); } - -TEST_F(Vector3Test,Normalize) +TEST_F (Vector3Test, Normalize) { - // double Vector3 - ModuleBase::Vector3 u; - u.set(da,db,dc); - u.normalize(); - EXPECT_DOUBLE_EQ(u.norm(),1.0); - // float Vector3 - ModuleBase::Vector3 v; - v.set(fa,fb,fc); - v.normalize(); - EXPECT_FLOAT_EQ(v.norm(),1.0); + // double Vector3 + ModuleBase::Vector3 u; + u.set (da, db, dc); + u.normalize (); + EXPECT_DOUBLE_EQ (u.norm (), 1.0); + // float Vector3 + ModuleBase::Vector3 v; + v.set (fa, fb, fc); + v.normalize (); + EXPECT_FLOAT_EQ (v.norm (), 1.0); } -TEST_F(Vector3Test,VmultiplyV) +TEST_F (Vector3Test, VmultiplyV) { - // double Vector3 - ModuleBase::Vector3 u,up; - u.set(da,db,dc); - up.set(da,db,dc); - double mpd = u * up; - EXPECT_EQ(mpd,50.0); - // float Vector3 - ModuleBase::Vector3 v,vp; - v.set(fa,fb,fc); - vp.set(fa,fb,fc); - float mpf = v*vp; - EXPECT_EQ(mpf,50.0); - // int Vector3 - ModuleBase::Vector3 w,wp; - w.set(ia,ib,ic); - wp.set(ia,ib,ic); - int mpi = w*wp; - EXPECT_EQ(mpf,50); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up.set (da, db, dc); + double mpd = u * up; + EXPECT_EQ (mpd, 50.0); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (fa, fb, fc); + vp.set (fa, fb, fc); + float mpf = v * vp; + EXPECT_EQ (mpf, 50.0); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (ia, ib, ic); + wp.set (ia, ib, ic); + int mpi = w * wp; + EXPECT_EQ (mpf, 50); } -TEST_F(Vector3Test,VdotV) +TEST_F (Vector3Test, VdotV) { - // double Vector3 - ModuleBase::Vector3 u,up; - u.set(da,db,dc); - up.set(da,db,dc); - double mpd = dot(u,up); - EXPECT_EQ(mpd,50.0); - // float Vector3 - ModuleBase::Vector3 v,vp; - v.set(fa,fb,fc); - vp.set(fa,fb,fc); - float mpf = dot(v,vp); - EXPECT_EQ(mpf,50.0); - // int Vector3 - ModuleBase::Vector3 w,wp; - w.set(ia,ib,ic); - wp.set(ia,ib,ic); - int mpi = dot(w,wp); - EXPECT_EQ(mpf,50); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up.set (da, db, dc); + double mpd = dot (u, up); + EXPECT_EQ (mpd, 50.0); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (fa, fb, fc); + vp.set (fa, fb, fc); + float mpf = dot (v, vp); + EXPECT_EQ (mpf, 50.0); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (ia, ib, ic); + wp.set (ia, ib, ic); + int mpi = dot (w, wp); + EXPECT_EQ (mpf, 50); } -TEST_F(Vector3Test,VmultiplyNum) +TEST_F (Vector3Test, VmultiplyNum) { - // double Vector3 - ModuleBase::Vector3 u,up,upp; - u.set(da,db,dc); - double s = 3.0; - up = s*u; upp = u*s; - EXPECT_EQ(upp[0],up[0]); - EXPECT_EQ(upp[1],up[1]); - EXPECT_EQ(upp[2],up[2]); - EXPECT_EQ(upp[0],9.0); - EXPECT_EQ(upp[1],12.0); - EXPECT_EQ(upp[2],15.0); - // float Vector3 - ModuleBase::Vector3 v,vp,vpp; - v.set(fa,fb,fc); - float t = 3.0; - vp = t*v; vpp = v*t; - EXPECT_EQ(vpp[0],vp[0]); - EXPECT_EQ(vpp[1],vp[1]); - EXPECT_EQ(vpp[2],vp[2]); - EXPECT_EQ(vpp[0],9.0); - EXPECT_EQ(vpp[1],12.0); - EXPECT_EQ(vpp[2],15.0); - // int Vector3 - ModuleBase::Vector3 w,wp,wpp; - w.set(ia,ib,ic); - int q = 3; - wp = q*w; wpp = w*q; - EXPECT_EQ(wpp[0],wp[0]); - EXPECT_EQ(wpp[1],wp[1]); - EXPECT_EQ(wpp[2],wp[2]); - EXPECT_EQ(wpp[0],9.0); - EXPECT_EQ(wpp[1],12.0); - EXPECT_EQ(wpp[2],15.0); + // double Vector3 + ModuleBase::Vector3 u, up, upp; + u.set (da, db, dc); + double s = 3.0; + up = s * u; + upp = u * s; + EXPECT_EQ (upp[0], up[0]); + EXPECT_EQ (upp[1], up[1]); + EXPECT_EQ (upp[2], up[2]); + EXPECT_EQ (upp[0], 9.0); + EXPECT_EQ (upp[1], 12.0); + EXPECT_EQ (upp[2], 15.0); + // float Vector3 + ModuleBase::Vector3 v, vp, vpp; + v.set (fa, fb, fc); + float t = 3.0; + vp = t * v; + vpp = v * t; + EXPECT_EQ (vpp[0], vp[0]); + EXPECT_EQ (vpp[1], vp[1]); + EXPECT_EQ (vpp[2], vp[2]); + EXPECT_EQ (vpp[0], 9.0); + EXPECT_EQ (vpp[1], 12.0); + EXPECT_EQ (vpp[2], 15.0); + // int Vector3 + ModuleBase::Vector3 w, wp, wpp; + w.set (ia, ib, ic); + int q = 3; + wp = q * w; + wpp = w * q; + EXPECT_EQ (wpp[0], wp[0]); + EXPECT_EQ (wpp[1], wp[1]); + EXPECT_EQ (wpp[2], wp[2]); + EXPECT_EQ (wpp[0], 9.0); + EXPECT_EQ (wpp[1], 12.0); + EXPECT_EQ (wpp[2], 15.0); } -TEST_F(Vector3Test,VoverNum) +TEST_F (Vector3Test, VoverNum) { - // double Vector3 - ModuleBase::Vector3 u,up; - u.set(2*da,2*db,2*dc); - double s = 2.0; - up = u/s; - EXPECT_EQ(up.x,3.0); - EXPECT_EQ(up.y,4.0); - EXPECT_EQ(up.z,5.0); - // float Vector3 - ModuleBase::Vector3 v,vp; - v.set(2*fa,2*fb,2*fc); - float t = 2.0; - vp = v/t; - EXPECT_EQ(vp.x,3.0); - EXPECT_EQ(vp.y,4.0); - EXPECT_EQ(vp.z,5.0); - // int Vector3 - ModuleBase::Vector3 w,wp; - w.set(2*ia,2*ib,2*ic); - int q = 2; - wp = w/q; - EXPECT_EQ(wp.x,3); - EXPECT_EQ(wp.y,4); - EXPECT_EQ(wp.z,5); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (2 * da, 2 * db, 2 * dc); + double s = 2.0; + up = u / s; + EXPECT_EQ (up.x, 3.0); + EXPECT_EQ (up.y, 4.0); + EXPECT_EQ (up.z, 5.0); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (2 * fa, 2 * fb, 2 * fc); + float t = 2.0; + vp = v / t; + EXPECT_EQ (vp.x, 3.0); + EXPECT_EQ (vp.y, 4.0); + EXPECT_EQ (vp.z, 5.0); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (2 * ia, 2 * ib, 2 * ic); + int q = 2; + wp = w / q; + EXPECT_EQ (wp.x, 3); + EXPECT_EQ (wp.y, 4); + EXPECT_EQ (wp.z, 5); } -TEST_F(Vector3Test,OperatorCaret) +TEST_F (Vector3Test, OperatorCaret) { - // double Vector3 - ModuleBase::Vector3 u,up,upp; - u.set(da,db,dc); - up.set(da,db,dc); - upp = u^up; - EXPECT_EQ(upp.x,u.y*up.z - u.z*up.y); - EXPECT_EQ(upp.y,u.z*up.x - u.x*up.z); - EXPECT_EQ(upp.z,u.x*up.y - u.y*up.x); - // float Vector3 - ModuleBase::Vector3 v,vp,vpp; - v.set(2*fa,2*fb,2*fc); - vp.set(fa,fb,fc); - vpp = v^vp; - EXPECT_EQ(vpp.x,v.y*vp.z - v.z*vp.y); - EXPECT_EQ(vpp.y,v.z*vp.x - v.x*vp.z); - EXPECT_EQ(vpp.z,v.x*vp.y - v.y*vp.x); - // int Vector3 - ModuleBase::Vector3 w,wp,wpp; - w.set(2*ia,2*ib,2*ic); - wp.set(ia,ib,ic); - wpp = w^wp; - EXPECT_EQ(wpp.x,w.y*wp.z - w.z*wp.y); - EXPECT_EQ(wpp.y,w.z*wp.x - w.x*wp.z); - EXPECT_EQ(wpp.z,w.x*wp.y - w.y*wp.x); + // double Vector3 + ModuleBase::Vector3 u, up, upp; + u.set (da, db, dc); + up.set (da, db, dc); + upp = u ^ up; + EXPECT_EQ (upp.x, u.y * up.z - u.z * up.y); + EXPECT_EQ (upp.y, u.z * up.x - u.x * up.z); + EXPECT_EQ (upp.z, u.x * up.y - u.y * up.x); + // float Vector3 + ModuleBase::Vector3 v, vp, vpp; + v.set (2 * fa, 2 * fb, 2 * fc); + vp.set (fa, fb, fc); + vpp = v ^ vp; + EXPECT_EQ (vpp.x, v.y * vp.z - v.z * vp.y); + EXPECT_EQ (vpp.y, v.z * vp.x - v.x * vp.z); + EXPECT_EQ (vpp.z, v.x * vp.y - v.y * vp.x); + // int Vector3 + ModuleBase::Vector3 w, wp, wpp; + w.set (2 * ia, 2 * ib, 2 * ic); + wp.set (ia, ib, ic); + wpp = w ^ wp; + EXPECT_EQ (wpp.x, w.y * wp.z - w.z * wp.y); + EXPECT_EQ (wpp.y, w.z * wp.x - w.x * wp.z); + EXPECT_EQ (wpp.z, w.x * wp.y - w.y * wp.x); } -TEST_F(Vector3Test,Cross) +TEST_F (Vector3Test, Cross) { - // double Vector3 - ModuleBase::Vector3 u,up,upp; - u.set(da,db,dc); - up.set(da,db,dc); - upp = cross(u,up); - EXPECT_EQ(upp.x,u.y*up.z - u.z*up.y); - EXPECT_EQ(upp.y,u.z*up.x - u.x*up.z); - EXPECT_EQ(upp.z,u.x*up.y - u.y*up.x); - // float Vector3 - ModuleBase::Vector3 v,vp,vpp; - v.set(2*fa,2*fb,2*fc); - vp.set(fa,fb,fc); - vpp = cross(v,vp); - EXPECT_EQ(vpp.x,v.y*vp.z - v.z*vp.y); - EXPECT_EQ(vpp.y,v.z*vp.x - v.x*vp.z); - EXPECT_EQ(vpp.z,v.x*vp.y - v.y*vp.x); - // int Vector3 - ModuleBase::Vector3 w,wp,wpp; - w.set(2*ia,2*ib,2*ic); - wp.set(ia,ib,ic); - wpp = cross(w,wp); - EXPECT_EQ(wpp.x,w.y*wp.z - w.z*wp.y); - EXPECT_EQ(wpp.y,w.z*wp.x - w.x*wp.z); - EXPECT_EQ(wpp.z,w.x*wp.y - w.y*wp.x); + // double Vector3 + ModuleBase::Vector3 u, up, upp; + u.set (da, db, dc); + up.set (da, db, dc); + upp = cross (u, up); + EXPECT_EQ (upp.x, u.y * up.z - u.z * up.y); + EXPECT_EQ (upp.y, u.z * up.x - u.x * up.z); + EXPECT_EQ (upp.z, u.x * up.y - u.y * up.x); + // float Vector3 + ModuleBase::Vector3 v, vp, vpp; + v.set (2 * fa, 2 * fb, 2 * fc); + vp.set (fa, fb, fc); + vpp = cross (v, vp); + EXPECT_EQ (vpp.x, v.y * vp.z - v.z * vp.y); + EXPECT_EQ (vpp.y, v.z * vp.x - v.x * vp.z); + EXPECT_EQ (vpp.z, v.x * vp.y - v.y * vp.x); + // int Vector3 + ModuleBase::Vector3 w, wp, wpp; + w.set (2 * ia, 2 * ib, 2 * ic); + wp.set (ia, ib, ic); + wpp = cross (w, wp); + EXPECT_EQ (wpp.x, w.y * wp.z - w.z * wp.y); + EXPECT_EQ (wpp.y, w.z * wp.x - w.x * wp.z); + EXPECT_EQ (wpp.z, w.x * wp.y - w.y * wp.x); } -TEST_F(Vector3Test,VeqV) +TEST_F (Vector3Test, VeqV) { - // double Vector3 - ModuleBase::Vector3 u,up; - u.set(da,db,dc); - up.set(da,db,dc); - EXPECT_TRUE(up == u); - // float Vector3 - ModuleBase::Vector3 v,vp; - v.set(fa,fb,fc); - vp.set(fa,fb,fc); - EXPECT_TRUE(vp == v); - // int Vector3 - ModuleBase::Vector3 w,wp; - w.set(ia,ib,ic); - wp.set(ia,ib,ic); - EXPECT_TRUE(wp == w); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up.set (da, db, dc); + EXPECT_TRUE (up == u); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (fa, fb, fc); + vp.set (fa, fb, fc); + EXPECT_TRUE (vp == v); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (ia, ib, ic); + wp.set (ia, ib, ic); + EXPECT_TRUE (wp == w); } -TEST_F(Vector3Test,VneV) +TEST_F (Vector3Test, VneV) { - // double Vector3 - ModuleBase::Vector3 u,up; - u.set(da,db,dc); - up.set(da,db,2*dc); - EXPECT_TRUE(up != u); - // float Vector3 - ModuleBase::Vector3 v,vp; - v.set(fa,fb,2*fc); - vp.set(fa,fb,fc); - EXPECT_TRUE(vp != v); - // int Vector3 - ModuleBase::Vector3 w,wp; - w.set(ia,ib,2*ic); - wp.set(ia,ib,ic); - EXPECT_TRUE(wp != w); + // double Vector3 + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up.set (da, db, 2 * dc); + EXPECT_TRUE (up != u); + // float Vector3 + ModuleBase::Vector3 v, vp; + v.set (fa, fb, 2 * fc); + vp.set (fa, fb, fc); + EXPECT_TRUE (vp != v); + // int Vector3 + ModuleBase::Vector3 w, wp; + w.set (ia, ib, 2 * ic); + wp.set (ia, ib, ic); + EXPECT_TRUE (wp != w); } -TEST_F(Vector3Test, VltV) +TEST_F (Vector3Test, VltV) { - ModuleBase::Vector3 u, up; - u.set(da, db, dc); - up.set(dc, db, da); - EXPECT_TRUE(u < up); - ModuleBase::Vector3 v, vp; - v.set(fa, fb, fc); - vp.set(fa, fb, fc); - EXPECT_FALSE(v < vp); - ModuleBase::Vector3 w, wp; - w.set(ia, ib, ic); - wp.set(ib, ib, ic); - EXPECT_TRUE(w < wp); + ModuleBase::Vector3 u, up; + u.set (da, db, dc); + up.set (dc, db, da); + EXPECT_TRUE (u < up); + ModuleBase::Vector3 v, vp; + v.set (fa, fb, fc); + vp.set (fa, fb, fc); + EXPECT_FALSE (v < vp); + ModuleBase::Vector3 w, wp; + w.set (ia, ib, ic); + wp.set (ib, ib, ic); + EXPECT_TRUE (w < wp); } -TEST_F(Vector3Test,StdOutV) +TEST_F (Vector3Test, StdOutV) { - // double Vector3 - ModuleBase::Vector3 u(da,db,dc); - testing::internal::CaptureStdout(); - std::cout << u << std::endl; - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("(")); - // float Vector3 - ModuleBase::Vector3 v(fa,fb,fc); - testing::internal::CaptureStdout(); - std::cout << v << std::endl; - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr(",")); - // int Vector3 - ModuleBase::Vector3 w(ia,ib,ic); - testing::internal::CaptureStdout(); - std::cout << w << std::endl; - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr(")")); + // double Vector3 + ModuleBase::Vector3 u (da, db, dc); + testing::internal::CaptureStdout (); + std::cout << u << std::endl; + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("(")); + // float Vector3 + ModuleBase::Vector3 v (fa, fb, fc); + testing::internal::CaptureStdout (); + std::cout << v << std::endl; + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr (",")); + // int Vector3 + ModuleBase::Vector3 w (ia, ib, ic); + testing::internal::CaptureStdout (); + std::cout << w << std::endl; + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr (")")); } -TEST_F(Vector3Test,PrintV) +TEST_F (Vector3Test, PrintV) { - // double Vector3 - ModuleBase::Vector3 u(3.1415926,db,dc); - testing::internal::CaptureStdout(); - u.print(); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("3.1416")); - // float Vector3 - ModuleBase::Vector3 v(fa,fb,3.14); - testing::internal::CaptureStdout(); - v.print(); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("3.14")); - // int Vector3 - ModuleBase::Vector3 w(ia,101,ic); - testing::internal::CaptureStdout(); - w.print(); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("101")); + // double Vector3 + ModuleBase::Vector3 u (3.1415926, db, dc); + testing::internal::CaptureStdout (); + u.print (); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("3.1416")); + // float Vector3 + ModuleBase::Vector3 v (fa, fb, 3.14); + testing::internal::CaptureStdout (); + v.print (); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("3.14")); + // int Vector3 + ModuleBase::Vector3 w (ia, 101, ic); + testing::internal::CaptureStdout (); + w.print (); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("101")); } - diff --git a/source/source_base/test/ylm_test.cpp b/source/source_base/test/ylm_test.cpp index 4e97c907867..a7dd600bac4 100644 --- a/source/source_base/test/ylm_test.cpp +++ b/source/source_base/test/ylm_test.cpp @@ -22,107 +22,109 @@ class ylmTest : public testing::Test { }; -TEST_F(ylmTest,Zeros) +TEST_F (ylmTest, Zeros) { double aaaa[100]; - ModuleBase::Ylm::ZEROS(aaaa,100); - for(int i = 0; i < 100; i++) - { - EXPECT_EQ(aaaa[i],0.0); - } + ModuleBase::Ylm::ZEROS (aaaa, 100); + for (int i = 0; i < 100; i++) + { + EXPECT_EQ (aaaa[i], 0.0); + } } // Test Hessian symmetry for l=5 -TEST_F(ylmTest, HessianSymmetryL5) +TEST_F (ylmTest, HessianSymmetryL5) { const int l = 5; const double x = 1.5, y = 2.0, z = 1.0; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // Check that Hessian is symmetric for all m values - for (int idx = l*l; idx < (l+1)*(l+1); idx++) { - // hrly format: [H_xx, H_xy, H_xz, H_yy, H_yz, H_zz] - // Symmetry is built into the storage format - // Just verify the array is properly sized - EXPECT_EQ(hrly[idx].size(), 6); - } + for (int idx = l * l; idx < (l + 1) * (l + 1); idx++) + { + // hrly format: [H_xx, H_xy, H_xz, H_yy, H_yz, H_zz] + // Symmetry is built into the storage format + // Just verify the array is properly sized + EXPECT_EQ (hrly[idx].size (), 6); + } } // Test Hessian symmetry for l=6 -TEST_F(ylmTest, HessianSymmetryL6) +TEST_F (ylmTest, HessianSymmetryL6) { const int l = 6; const double x = 1.5, y = 2.0, z = 1.0; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // Check that Hessian is symmetric for all m values - for (int idx = l*l; idx < (l+1)*(l+1); idx++) { - EXPECT_EQ(hrly[idx].size(), 6); - } + for (int idx = l * l; idx < (l + 1) * (l + 1); idx++) + { + EXPECT_EQ (hrly[idx].size (), 6); + } } // Test Hessian finite difference for l=5 using central difference -TEST_F(ylmTest, HessianFiniteDifferenceL5) +TEST_F (ylmTest, HessianFiniteDifferenceL5) { const int l = 5; const double x = 1.5, y = 2.0, z = 1.0; const double h = 1e-5; - const double tol = 1e-3; // Relaxed tolerance for numerical differentiation + const double tol = 1e-3; // Relaxed tolerance for numerical differentiation std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // Allocate gradient arrays for central difference - const int nylm = (l+1)*(l+1); - std::vector rly_xp(nylm), rly_xm(nylm); - std::vector grly_xp(nylm * 3), grly_xm(nylm * 3); + const int nylm = (l + 1) * (l + 1); + std::vector rly_xp (nylm), rly_xm (nylm); + std::vector grly_xp (nylm * 3), grly_xm (nylm * 3); // Compute gradient at (x+h, y, z) and (x-h, y, z) - ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp.data()); - ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm.data()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x + h, y, z, rly_xp.data (), grly_xp.data ()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x - h, y, z, rly_xm.data (), grly_xm.data ()); // Test H_xx for m=0 (index 25) using central difference int idx = 25; - double H_xx_fd = (grly_xp[idx*3] - grly_xm[idx*3]) / (2.0 * h); + double H_xx_fd = (grly_xp[idx * 3] - grly_xm[idx * 3]) / (2.0 * h); double H_xx_analytic = hrly[idx][0]; - EXPECT_NEAR(H_xx_fd, H_xx_analytic, tol); + EXPECT_NEAR (H_xx_fd, H_xx_analytic, tol); } // Test Hessian finite difference for l=6 using central difference -TEST_F(ylmTest, HessianFiniteDifferenceL6) +TEST_F (ylmTest, HessianFiniteDifferenceL6) { const int l = 6; const double x = 1.5, y = 2.0, z = 1.0; const double h = 1e-5; - const double tol = 1e-3; // Relaxed tolerance for numerical differentiation + const double tol = 1e-3; // Relaxed tolerance for numerical differentiation std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // Allocate gradient arrays for central difference - const int nylm = (l+1)*(l+1); - std::vector rly_xp(nylm), rly_xm(nylm); - std::vector grly_xp(nylm * 3), grly_xm(nylm * 3); + const int nylm = (l + 1) * (l + 1); + std::vector rly_xp (nylm), rly_xm (nylm); + std::vector grly_xp (nylm * 3), grly_xm (nylm * 3); // Compute gradient at (x+h, y, z) and (x-h, y, z) - ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp.data()); - ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm.data()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x + h, y, z, rly_xp.data (), grly_xp.data ()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x - h, y, z, rly_xm.data (), grly_xm.data ()); // Test H_xx for m=0 (index 36) using central difference int idx = 36; - double H_xx_fd = (grly_xp[idx*3] - grly_xm[idx*3]) / (2.0 * h); + double H_xx_fd = (grly_xp[idx * 3] - grly_xm[idx * 3]) / (2.0 * h); double H_xx_analytic = hrly[idx][0]; - EXPECT_NEAR(H_xx_fd, H_xx_analytic, tol); + EXPECT_NEAR (H_xx_fd, H_xx_analytic, tol); } // Test that l>6 triggers error -TEST_F(ylmTest, HessianL7NotImplemented) +TEST_F (ylmTest, HessianL7NotImplemented) { const int l = 7; const double x = 1.0, y = 0.0, z = 0.0; @@ -134,7 +136,7 @@ TEST_F(ylmTest, HessianL7NotImplemented) } // Test all Hessian components for l=2 -TEST_F(ylmTest, HessianAllComponentsL2) +TEST_F (ylmTest, HessianAllComponentsL2) { const int l = 2; const double x = 0.5, y = 1.0, z = 1.5; @@ -142,56 +144,56 @@ TEST_F(ylmTest, HessianAllComponentsL2) const double tol = 1e-3; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // Test all 6 Hessian components for m=0 (index 4) int idx = 4; // Allocate gradient arrays - const int nylm = (l+1)*(l+1); - std::vector rly_xp(nylm), rly_xm(nylm); - std::vector rly_yp(nylm), rly_ym(nylm); - std::vector rly_zp(nylm), rly_zm(nylm); + const int nylm = (l + 1) * (l + 1); + std::vector rly_xp (nylm), rly_xm (nylm); + std::vector rly_yp (nylm), rly_ym (nylm); + std::vector rly_zp (nylm), rly_zm (nylm); - std::vector grly_xp(nylm * 3), grly_xm(nylm * 3); - std::vector grly_yp(nylm * 3), grly_ym(nylm * 3); - std::vector grly_zp(nylm * 3), grly_zm(nylm * 3); + std::vector grly_xp (nylm * 3), grly_xm (nylm * 3); + std::vector grly_yp (nylm * 3), grly_ym (nylm * 3); + std::vector grly_zp (nylm * 3), grly_zm (nylm * 3); // Compute gradients at perturbed points - ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp.data()); - ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm.data()); - ModuleBase::Ylm::grad_rl_sph_harm(l, x, y+h, z, rly_yp.data(), grly_yp.data()); - ModuleBase::Ylm::grad_rl_sph_harm(l, x, y-h, z, rly_ym.data(), grly_ym.data()); - ModuleBase::Ylm::grad_rl_sph_harm(l, x, y, z+h, rly_zp.data(), grly_zp.data()); - ModuleBase::Ylm::grad_rl_sph_harm(l, x, y, z-h, rly_zm.data(), grly_zm.data()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x + h, y, z, rly_xp.data (), grly_xp.data ()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x - h, y, z, rly_xm.data (), grly_xm.data ()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x, y + h, z, rly_yp.data (), grly_yp.data ()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x, y - h, z, rly_ym.data (), grly_ym.data ()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x, y, z + h, rly_zp.data (), grly_zp.data ()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x, y, z - h, rly_zm.data (), grly_zm.data ()); // Test H_xx (index 0) - double H_xx_fd = (grly_xp[idx*3] - grly_xm[idx*3]) / (2.0 * h); - EXPECT_NEAR(H_xx_fd, hrly[idx][0], tol); + double H_xx_fd = (grly_xp[idx * 3] - grly_xm[idx * 3]) / (2.0 * h); + EXPECT_NEAR (H_xx_fd, hrly[idx][0], tol); // Test H_xy (index 1) - double H_xy_fd = (grly_xp[idx*3 + 1] - grly_xm[idx*3 + 1]) / (2.0 * h); - EXPECT_NEAR(H_xy_fd, hrly[idx][1], tol); + double H_xy_fd = (grly_xp[idx * 3 + 1] - grly_xm[idx * 3 + 1]) / (2.0 * h); + EXPECT_NEAR (H_xy_fd, hrly[idx][1], tol); // Test H_xz (index 2) - double H_xz_fd = (grly_xp[idx*3 + 2] - grly_xm[idx*3 + 2]) / (2.0 * h); - EXPECT_NEAR(H_xz_fd, hrly[idx][2], tol); + double H_xz_fd = (grly_xp[idx * 3 + 2] - grly_xm[idx * 3 + 2]) / (2.0 * h); + EXPECT_NEAR (H_xz_fd, hrly[idx][2], tol); // Test H_yy (index 3) - double H_yy_fd = (grly_yp[idx*3 + 1] - grly_ym[idx*3 + 1]) / (2.0 * h); - EXPECT_NEAR(H_yy_fd, hrly[idx][3], tol); + double H_yy_fd = (grly_yp[idx * 3 + 1] - grly_ym[idx * 3 + 1]) / (2.0 * h); + EXPECT_NEAR (H_yy_fd, hrly[idx][3], tol); // Test H_yz (index 4) - double H_yz_fd = (grly_yp[idx*3 + 2] - grly_ym[idx*3 + 2]) / (2.0 * h); - EXPECT_NEAR(H_yz_fd, hrly[idx][4], tol); + double H_yz_fd = (grly_yp[idx * 3 + 2] - grly_ym[idx * 3 + 2]) / (2.0 * h); + EXPECT_NEAR (H_yz_fd, hrly[idx][4], tol); // Test H_zz (index 5) - double H_zz_fd = (grly_zp[idx*3 + 2] - grly_zm[idx*3 + 2]) / (2.0 * h); - EXPECT_NEAR(H_zz_fd, hrly[idx][5], tol); + double H_zz_fd = (grly_zp[idx * 3 + 2] - grly_zm[idx * 3 + 2]) / (2.0 * h); + EXPECT_NEAR (H_zz_fd, hrly[idx][5], tol); } // Test Hessian for m=0 values across different l -TEST_F(ylmTest, HessianM0DifferentL) +TEST_F (ylmTest, HessianM0DifferentL) { const double x = 1.0, y = 0.5, z = 2.0; const double h = 1e-5; @@ -200,27 +202,28 @@ TEST_F(ylmTest, HessianM0DifferentL) // Test m=0 for l=0,1,2,3,4 std::vector l_values = {0, 1, 2, 3, 4}; - for (int l : l_values) { - std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + for (int l: l_values) + { + std::vector> hrly; + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); - // Allocate gradient arrays - const int nylm = (l+1)*(l+1); - std::vector rly_xp(nylm), rly_xm(nylm); - std::vector grly_xp(nylm * 3), grly_xm(nylm * 3); + // Allocate gradient arrays + const int nylm = (l + 1) * (l + 1); + std::vector rly_xp (nylm), rly_xm (nylm); + std::vector grly_xp (nylm * 3), grly_xm (nylm * 3); - ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp.data()); - ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm.data()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x + h, y, z, rly_xp.data (), grly_xp.data ()); + ModuleBase::Ylm::grad_rl_sph_harm (l, x - h, y, z, rly_xm.data (), grly_xm.data ()); - // Test H_xx for m=0 (index l*l) - int idx = l * l; - double H_xx_fd = (grly_xp[idx*3] - grly_xm[idx*3]) / (2.0 * h); - EXPECT_NEAR(H_xx_fd, hrly[idx][0], tol) << "Failed for l=" << l << " m=0"; - } + // Test H_xx for m=0 (index l*l) + int idx = l * l; + double H_xx_fd = (grly_xp[idx * 3] - grly_xm[idx * 3]) / (2.0 * h); + EXPECT_NEAR (H_xx_fd, hrly[idx][0], tol) << "Failed for l=" << l << " m=0"; + } } // Test Hessian at special points (on axes) -TEST_F(ylmTest, HessianSpecialPointsL4) +TEST_F (ylmTest, HessianSpecialPointsL4) { const int l = 4; const double h = 1e-5; @@ -230,63 +233,67 @@ TEST_F(ylmTest, HessianSpecialPointsL4) { const double x = 0.0, y = 0.0, z = 1.0; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // Verify array is properly sized - for (int idx = l*l; idx < (l+1)*(l+1); idx++) { - EXPECT_EQ(hrly[idx].size(), 6); - } + for (int idx = l * l; idx < (l + 1) * (l + 1); idx++) + { + EXPECT_EQ (hrly[idx].size (), 6); + } } // Test on x-axis { const double x = 1.0, y = 0.0, z = 0.0; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); - for (int idx = l*l; idx < (l+1)*(l+1); idx++) { - EXPECT_EQ(hrly[idx].size(), 6); - } + for (int idx = l * l; idx < (l + 1) * (l + 1); idx++) + { + EXPECT_EQ (hrly[idx].size (), 6); + } } // Test on y-axis { const double x = 0.0, y = 1.0, z = 0.0; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); - for (int idx = l*l; idx < (l+1)*(l+1); idx++) { - EXPECT_EQ(hrly[idx].size(), 6); - } + for (int idx = l * l; idx < (l + 1) * (l + 1); idx++) + { + EXPECT_EQ (hrly[idx].size (), 6); + } } } // Test Hessian trace property (Laplacian = 0 for harmonic functions) -TEST_F(ylmTest, HessianTraceL3) +TEST_F (ylmTest, HessianTraceL3) { const int l = 3; const double x = 1.2, y = 0.8, z = 1.5; const double tol = 1e-10; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // For spherical harmonics Y_lm(r), the Laplacian should satisfy: // ∇²(r^l * Y_lm) = l(l+1) * r^(l-2) * Y_lm // For real spherical harmonics, we need to check the trace // Note: This is a property check, not a strict zero test - for (int idx = l*l; idx < (l+1)*(l+1); idx++) { - // Trace = H_xx + H_yy + H_zz - double trace = hrly[idx][0] + hrly[idx][3] + hrly[idx][5]; - // The trace should be finite and well-defined - EXPECT_FALSE(std::isnan(trace)); - EXPECT_FALSE(std::isinf(trace)); - } + for (int idx = l * l; idx < (l + 1) * (l + 1); idx++) + { + // Trace = H_xx + H_yy + H_zz + double trace = hrly[idx][0] + hrly[idx][3] + hrly[idx][5]; + // The trace should be finite and well-defined + EXPECT_FALSE (std::isnan (trace)); + EXPECT_FALSE (std::isinf (trace)); + } } // Test Hessian consistency across different coordinate systems -TEST_F(ylmTest, HessianRotationalInvariance) +TEST_F (ylmTest, HessianRotationalInvariance) { const int l = 2; const double r = 2.0; @@ -297,102 +304,106 @@ TEST_F(ylmTest, HessianRotationalInvariance) const double x2 = 0.0, y2 = r, z2 = 0.0; std::vector> hrly1, hrly2; - ModuleBase::Ylm::hes_rl_sph_harm(l, x1, y1, z1, hrly1); - ModuleBase::Ylm::hes_rl_sph_harm(l, x2, y2, z2, hrly2); + ModuleBase::Ylm::hes_rl_sph_harm (l, x1, y1, z1, hrly1); + ModuleBase::Ylm::hes_rl_sph_harm (l, x2, y2, z2, hrly2); // For m=0 (index 4), the Hessian should have certain symmetries int idx = 4; // Both should be properly sized - EXPECT_EQ(hrly1[idx].size(), 6); - EXPECT_EQ(hrly2[idx].size(), 6); + EXPECT_EQ (hrly1[idx].size (), 6); + EXPECT_EQ (hrly2[idx].size (), 6); // Values should be finite - for (int i = 0; i < 6; i++) { - EXPECT_FALSE(std::isnan(hrly1[idx][i])); - EXPECT_FALSE(std::isnan(hrly2[idx][i])); - EXPECT_FALSE(std::isinf(hrly1[idx][i])); - EXPECT_FALSE(std::isinf(hrly2[idx][i])); - } + for (int i = 0; i < 6; i++) + { + EXPECT_FALSE (std::isnan (hrly1[idx][i])); + EXPECT_FALSE (std::isnan (hrly2[idx][i])); + EXPECT_FALSE (std::isinf (hrly1[idx][i])); + EXPECT_FALSE (std::isinf (hrly2[idx][i])); + } } // Test Hessian for l=0 (constant function) -TEST_F(ylmTest, HessianL0Constant) +TEST_F (ylmTest, HessianL0Constant) { const int l = 0; const double x = 1.0, y = 2.0, z = 3.0; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // For l=0, Y_00 is constant, so all second derivatives should be zero int idx = 0; const double tol = 1e-10; - EXPECT_NEAR(hrly[idx][0], 0.0, tol); // H_xx - EXPECT_NEAR(hrly[idx][1], 0.0, tol); // H_xy - EXPECT_NEAR(hrly[idx][2], 0.0, tol); // H_xz - EXPECT_NEAR(hrly[idx][3], 0.0, tol); // H_yy - EXPECT_NEAR(hrly[idx][4], 0.0, tol); // H_yz - EXPECT_NEAR(hrly[idx][5], 0.0, tol); // H_zz + EXPECT_NEAR (hrly[idx][0], 0.0, tol); // H_xx + EXPECT_NEAR (hrly[idx][1], 0.0, tol); // H_xy + EXPECT_NEAR (hrly[idx][2], 0.0, tol); // H_xz + EXPECT_NEAR (hrly[idx][3], 0.0, tol); // H_yy + EXPECT_NEAR (hrly[idx][4], 0.0, tol); // H_yz + EXPECT_NEAR (hrly[idx][5], 0.0, tol); // H_zz } // Test Hessian for l=1 (linear functions) -TEST_F(ylmTest, HessianL1Linear) +TEST_F (ylmTest, HessianL1Linear) { const int l = 1; const double x = 1.0, y = 2.0, z = 3.0; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // For l=1, Y_1m are linear functions, so all second derivatives should be zero const double tol = 1e-10; - for (int idx = 1; idx <= 3; idx++) { - EXPECT_NEAR(hrly[idx][0], 0.0, tol); // H_xx - EXPECT_NEAR(hrly[idx][1], 0.0, tol); // H_xy - EXPECT_NEAR(hrly[idx][2], 0.0, tol); // H_xz - EXPECT_NEAR(hrly[idx][3], 0.0, tol); // H_yy - EXPECT_NEAR(hrly[idx][4], 0.0, tol); // H_yz - EXPECT_NEAR(hrly[idx][5], 0.0, tol); // H_zz - } + for (int idx = 1; idx <= 3; idx++) + { + EXPECT_NEAR (hrly[idx][0], 0.0, tol); // H_xx + EXPECT_NEAR (hrly[idx][1], 0.0, tol); // H_xy + EXPECT_NEAR (hrly[idx][2], 0.0, tol); // H_xz + EXPECT_NEAR (hrly[idx][3], 0.0, tol); // H_yy + EXPECT_NEAR (hrly[idx][4], 0.0, tol); // H_yz + EXPECT_NEAR (hrly[idx][5], 0.0, tol); // H_zz + } } // Test Hessian numerical stability for small coordinates -TEST_F(ylmTest, HessianNumericalStability) +TEST_F (ylmTest, HessianNumericalStability) { const int l = 3; const double x = 1e-3, y = 2e-3, z = 3e-3; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // Check that all values are finite (no NaN or Inf) - for (int idx = l*l; idx < (l+1)*(l+1); idx++) { - for (int i = 0; i < 6; i++) { - EXPECT_FALSE(std::isnan(hrly[idx][i])) - << "NaN detected at idx=" << idx << " component=" << i; - EXPECT_FALSE(std::isinf(hrly[idx][i])) - << "Inf detected at idx=" << idx << " component=" << i; + for (int idx = l * l; idx < (l + 1) * (l + 1); idx++) + { + for (int i = 0; i < 6; i++) + { + EXPECT_FALSE (std::isnan (hrly[idx][i])) << "NaN detected at idx=" << idx << " component=" << i; + EXPECT_FALSE (std::isinf (hrly[idx][i])) << "Inf detected at idx=" << idx << " component=" << i; + } } - } } // Test Hessian for large coordinates -TEST_F(ylmTest, HessianLargeCoordinates) +TEST_F (ylmTest, HessianLargeCoordinates) { const int l = 4; const double x = 100.0, y = 200.0, z = 300.0; std::vector> hrly; - ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly); + ModuleBase::Ylm::hes_rl_sph_harm (l, x, y, z, hrly); // Check that all values are finite - for (int idx = l*l; idx < (l+1)*(l+1); idx++) { - for (int i = 0; i < 6; i++) { - EXPECT_FALSE(std::isnan(hrly[idx][i])); - EXPECT_FALSE(std::isinf(hrly[idx][i])); + for (int idx = l * l; idx < (l + 1) * (l + 1); idx++) + { + for (int i = 0; i < 6; i++) + { + EXPECT_FALSE (std::isnan (hrly[idx][i])); + EXPECT_FALSE (std::isinf (hrly[idx][i])); + } } - } } diff --git a/source/source_base/test_parallel/blacs_connector_test.cpp b/source/source_base/test_parallel/blacs_connector_test.cpp index 4e92bfd44d9..69208dec70f 100644 --- a/source/source_base/test_parallel/blacs_connector_test.cpp +++ b/source/source_base/test_parallel/blacs_connector_test.cpp @@ -17,10 +17,10 @@ * coordinates (row, col) in the grid. */ -class BLACSTest: public testing::Test +class BLACSTest : public testing::Test { -protected: - void SetUp(); + protected: + void SetUp (); int rank = 0; int nprocs = 0; @@ -35,42 +35,41 @@ class BLACSTest: public testing::Test int ipcol = -1; }; -void BLACSTest::SetUp() +void + BLACSTest::SetUp () { - Cblacs_pinfo(&rank, &nprocs); + Cblacs_pinfo (&rank, &nprocs); } - -TEST_F(BLACSTest, WorldGrid) +TEST_F (BLACSTest, WorldGrid) { // generate a grid of size 1 x nproc nprow = 1; npcol = nprocs; - int ictxt_row = Csys2blacs_handle(MPI_COMM_WORLD); - Cblacs_gridinit(&ictxt_row, &layout, nprow, npcol); - Cblacs_gridinfo(ictxt_row, &nprow, &npcol, &iprow, &ipcol); + int ictxt_row = Csys2blacs_handle (MPI_COMM_WORLD); + Cblacs_gridinit (&ictxt_row, &layout, nprow, npcol); + Cblacs_gridinfo (ictxt_row, &nprow, &npcol, &iprow, &ipcol); - EXPECT_EQ(iprow, 0); - EXPECT_EQ(ipcol, rank); + EXPECT_EQ (iprow, 0); + EXPECT_EQ (ipcol, rank); // generate a grid of size nproc x 1 nprow = nprocs; npcol = 1; - int ictxt_col = Csys2blacs_handle(MPI_COMM_WORLD); - Cblacs_gridinit(&ictxt_col, &layout, nprow, npcol); - Cblacs_gridinfo(ictxt_col, &nprow, &npcol, &iprow, &ipcol); - - EXPECT_EQ(iprow, rank); - EXPECT_EQ(ipcol, 0); + int ictxt_col = Csys2blacs_handle (MPI_COMM_WORLD); + Cblacs_gridinit (&ictxt_col, &layout, nprow, npcol); + Cblacs_gridinfo (ictxt_col, &nprow, &npcol, &iprow, &ipcol); + EXPECT_EQ (iprow, rank); + EXPECT_EQ (ipcol, 0); // two BLACS grids should have difference context index - EXPECT_NE(ictxt_row, ictxt_col); + EXPECT_NE (ictxt_row, ictxt_col); } -TEST_F(BLACSTest, SplitGrid) +TEST_F (BLACSTest, SplitGrid) { // this test create BLACS grids based on a disjoint communicator @@ -80,29 +79,30 @@ TEST_F(BLACSTest, SplitGrid) // sub communicators are divided based on odd / even ranks MPI_Comm comm_sub; - MPI_Comm_split(MPI_COMM_WORLD, rank % n_blacs, rank, &comm_sub); - MPI_Comm_rank(comm_sub, &rank_sub); - MPI_Comm_size(comm_sub, &nprocs_sub); + MPI_Comm_split (MPI_COMM_WORLD, rank % n_blacs, rank, &comm_sub); + MPI_Comm_rank (comm_sub, &rank_sub); + MPI_Comm_size (comm_sub, &nprocs_sub); - int ctxt_sub = Csys2blacs_handle(comm_sub); + int ctxt_sub = Csys2blacs_handle (comm_sub); nprow = 1, npcol = nprocs_sub; // row-like grids - Cblacs_gridinit(&ctxt_sub, &layout, nprow, npcol); - Cblacs_gridinfo(ctxt_sub, &nprow, &npcol, &iprow, &ipcol); + Cblacs_gridinit (&ctxt_sub, &layout, nprow, npcol); + Cblacs_gridinfo (ctxt_sub, &nprow, &npcol, &iprow, &ipcol); // verifies that the BLACS grid is created based on comm_sub instead of MPI_COMM_WORLD - EXPECT_EQ(iprow, 0); - EXPECT_EQ(ipcol, rank_sub); + EXPECT_EQ (iprow, 0); + EXPECT_EQ (ipcol, rank_sub); } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - int result = RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS (); - MPI_Finalize(); + MPI_Finalize (); return result; } diff --git a/source/source_base/test_parallel/math_chebyshev_mpi_test.cpp b/source/source_base/test_parallel/math_chebyshev_mpi_test.cpp index 09bcfb1babd..a552e3182d6 100644 --- a/source/source_base/test_parallel/math_chebyshev_mpi_test.cpp +++ b/source/source_base/test_parallel/math_chebyshev_mpi_test.cpp @@ -7,86 +7,99 @@ * unit test of class Chebyshev MPI part ***********************************************/ - /** - * - Tested Functions: - * - checkconverge - */ +/** + * - Tested Functions: + * - checkconverge + */ class toolfunc { public: - double x7(double x) + double + x7 (double x) { - return pow(x, 7); + return pow (x, 7); } - double x6(double x) + double + x6 (double x) { - return pow(x, 6); + return pow (x, 6); } - double expr(double x) + double + expr (double x) { - return exp(x); + return exp (x); } - std::complex expi(std::complex x) + std::complex + expi (std::complex x) { - const std::complex j(0.0, 1.0); - return exp(j * x); + const std::complex j (0.0, 1.0); + return exp (j * x); } - std::complex expi2(std::complex x) + std::complex + expi2 (std::complex x) { - const std::complex j(0.0, 1.0); + const std::complex j (0.0, 1.0); const double PI = 3.14159265358979323846; - return exp(j * PI / 2.0 * x); + return exp (j * PI / 2.0 * x); } // Pauli matrix: [0,-i;i,0] int LDA = 2; double factor = 1; - void sigma_y(std::complex* spin_in, std::complex* spin_out, const int m = 1) + void + sigma_y (std::complex* spin_in, std::complex* spin_out, const int m = 1) { - const std::complex j(0.0, 1.0); - if (this->LDA < 2) { - this->LDA = 2; -} + const std::complex j (0.0, 1.0); + if (this->LDA < 2) + { + this->LDA = 2; + } for (int i = 0; i < m; ++i) - { - spin_out[LDA * i] = -factor * j * spin_in[LDA * i + 1]; - spin_out[LDA * i + 1] = factor * j * spin_in[LDA * i]; - } + { + spin_out[LDA * i] = -factor * j * spin_in[LDA * i + 1]; + spin_out[LDA * i + 1] = factor * j * spin_in[LDA * i]; + } } #ifdef __ENABLE_FLOAT_FFTW - float x7(float x) + float + x7 (float x) { - return pow(x, 7); + return pow (x, 7); } - float x6(float x) + float + x6 (float x) { - return pow(x, 6); + return pow (x, 6); } - float expr(float x) + float + expr (float x) { - return exp(x); + return exp (x); } - std::complex expi(std::complex x) + std::complex + expi (std::complex x) { - const std::complex j(0.0, 1.0); - return exp(j * x); + const std::complex j (0.0, 1.0); + return exp (j * x); } - std::complex expi2(std::complex x) + std::complex + expi2 (std::complex x) { - const std::complex j(0.0, 1.0); + const std::complex j (0.0, 1.0); const float PI = 3.14159265358979323846; - return exp(j * PI / 2.0f * x); + return exp (j * PI / 2.0f * x); } // Pauli matrix: [0,-i;i,0] - void sigma_y(std::complex* spin_in, std::complex* spin_out, const int m = 1) + void + sigma_y (std::complex* spin_in, std::complex* spin_out, const int m = 1) { - const std::complex j(0.0, 1.0); + const std::complex j (0.0, 1.0); if (this->LDA < 2) this->LDA = 2; for (int i = 0; i < m; ++i) - { - spin_out[LDA * i] = -j * spin_in[LDA * i + 1]; - spin_out[LDA * i + 1] = j * spin_in[LDA * i]; - } + { + spin_out[LDA * i] = -j * spin_in[LDA * i + 1]; + spin_out[LDA * i + 1] = j * spin_in[LDA * i]; + } } #endif }; @@ -98,33 +111,35 @@ class MathChebyshevTest : public testing::Test toolfunc fun; int dsize = 0; int my_rank = 0; - void SetUp() override + void + SetUp () override { int world_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + MPI_Comm_rank (MPI_COMM_WORLD, &world_rank); int world_size; - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - + MPI_Comm_size (MPI_COMM_WORLD, &world_size); + int color = (world_rank < world_size / 2) ? 0 : 1; int key = world_rank; - - MPI_Comm_split(MPI_COMM_WORLD, color, key, &POOL_WORLD); - + + MPI_Comm_split (MPI_COMM_WORLD, color, key, &POOL_WORLD); + int pool_rank, pool_size; - MPI_Comm_rank(POOL_WORLD, &pool_rank); - MPI_Comm_size(POOL_WORLD, &pool_size); + MPI_Comm_rank (POOL_WORLD, &pool_rank); + MPI_Comm_size (POOL_WORLD, &pool_size); } - void TearDown() override + void + TearDown () override { } }; -TEST_F(MathChebyshevTest, checkconverge) +TEST_F (MathChebyshevTest, checkconverge) { const int norder = 100; - p_chetest = new ModuleBase::Chebyshev(norder); + p_chetest = new ModuleBase::Chebyshev (norder); auto fun_sigma_y - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; std::complex* v = new std::complex[4]; v[0] = 1.0; @@ -134,30 +149,30 @@ TEST_F(MathChebyshevTest, checkconverge) double tmin = -1.1; double tmax = 1.1; bool converge; - converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); - EXPECT_TRUE(converge); - converge = p_chetest->checkconverge(fun_sigma_y, v + 2, 2, 2, tmax, tmin, 0.2); - EXPECT_TRUE(converge); - EXPECT_NEAR(tmin, -1.1, 1e-8); - EXPECT_NEAR(tmax, 1.1, 1e-8); + converge = p_chetest->checkconverge (fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE (converge); + converge = p_chetest->checkconverge (fun_sigma_y, v + 2, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE (converge); + EXPECT_NEAR (tmin, -1.1, 1e-8); + EXPECT_NEAR (tmax, 1.1, 1e-8); tmax = -1.1; - converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 2.2); - EXPECT_TRUE(converge); - EXPECT_NEAR(tmin, -1.1, 1e-8); - EXPECT_NEAR(tmax, 1.1, 1e-8); + converge = p_chetest->checkconverge (fun_sigma_y, v, 2, 2, tmax, tmin, 2.2); + EXPECT_TRUE (converge); + EXPECT_NEAR (tmin, -1.1, 1e-8); + EXPECT_NEAR (tmax, 1.1, 1e-8); // not converge - v[0] = std::complex(0, 1), v[1] = 1; + v[0] = std::complex (0, 1), v[1] = 1; fun.factor = 1.5; tmin = -1.1, tmax = 1.1; - converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); - EXPECT_FALSE(converge); + converge = p_chetest->checkconverge (fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_FALSE (converge); fun.factor = -1.5; tmin = -1.1, tmax = 1.1; - converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); - EXPECT_FALSE(converge); + converge = p_chetest->checkconverge (fun_sigma_y, v, 2, 2, tmax, tmin, 0.2); + EXPECT_FALSE (converge); fun.factor = 1; delete[] v; @@ -165,10 +180,10 @@ TEST_F(MathChebyshevTest, checkconverge) } #ifdef __ENABLE_FLOAT_FFTW -TEST_F(MathChebyshevTest, checkconverge_float) +TEST_F (MathChebyshevTest, checkconverge_float) { const int norder = 100; - p_fchetest = new ModuleBase::Chebyshev(norder); + p_fchetest = new ModuleBase::Chebyshev (norder); std::complex* v = new std::complex[4]; v[0] = 1.0; @@ -180,28 +195,29 @@ TEST_F(MathChebyshevTest, checkconverge_float) bool converge; auto fun_sigma_yf - = [&](std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y(in, out, m); }; - converge = p_fchetest->checkconverge(fun_sigma_yf, v, 2, 2, tmax, tmin, 0.2); - EXPECT_TRUE(converge); - converge = p_fchetest->checkconverge(fun_sigma_yf, v + 2, 2, 2, tmax, tmin, 0.2); - EXPECT_TRUE(converge); - EXPECT_NEAR(tmin, -1.1, 1e-6); - EXPECT_NEAR(tmax, 1.1, 1e-6); + = [&] (std::complex* in, std::complex* out, const int m = 1) { fun.sigma_y (in, out, m); }; + converge = p_fchetest->checkconverge (fun_sigma_yf, v, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE (converge); + converge = p_fchetest->checkconverge (fun_sigma_yf, v + 2, 2, 2, tmax, tmin, 0.2); + EXPECT_TRUE (converge); + EXPECT_NEAR (tmin, -1.1, 1e-6); + EXPECT_NEAR (tmax, 1.1, 1e-6); delete[] v; delete p_fchetest; } #endif -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } diff --git a/source/source_base/test_parallel/parallel_2d_test.cpp b/source/source_base/test_parallel/parallel_2d_test.cpp index 5073571bcc3..bb0798e0f44 100644 --- a/source/source_base/test_parallel/parallel_2d_test.cpp +++ b/source/source_base/test_parallel/parallel_2d_test.cpp @@ -31,159 +31,183 @@ class test_para2d : public testing::Test std::vector> sizes{{30, 35}, {49, 94}, {57, 57}}; std::vector nbs{1, 2, 3}; #ifdef __MPI - void SetUp() override + void + SetUp () override { - MPI_Comm_size(MPI_COMM_WORLD, &dsize); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size (MPI_COMM_WORLD, &dsize); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); } - void TearDown() override + void + TearDown () override { } #endif }; #ifdef __MPI -TEST_F(test_para2d, Divide2D) +TEST_F (test_para2d, Divide2D) { for (auto& size: sizes) - { - int gr = size.first; - int gc = size.second; - for (auto nb: nbs) { - Parallel_2D p2d; - - for (auto mode: {0, 1}) - { - p2d.init(gr, gc, nb, MPI_COMM_WORLD, mode); - - EXPECT_EQ(p2d.get_block_size(), nb); - - // 1. dim0 and dim1 - EXPECT_EQ(p2d.dim0 * p2d.dim1, dsize); - if (mode) { - EXPECT_LE(p2d.dim1, p2d.dim0); - } else { - EXPECT_LE(p2d.dim0, p2d.dim1); -} - - // 2. MPI 2d communicator - //EXPECT_NE(p2d.comm_2D, MPI_COMM_NULL); - - // 3. local2global and local sizes - int lr = p2d.get_row_size(); - int lc = p2d.get_col_size(); - EXPECT_EQ(lr * lc, p2d.get_local_size()); - auto cal_lsize = [](const int& gsize, const int& nb, const int& np, const int& pcoord) -> int { - int nblock = gsize / nb; - return nblock / np * nb + static_cast(nblock % np > pcoord) * nb // full blocks' contribution - + static_cast(nblock % np == pcoord) * (gsize % nb); // the last block's contribution - }; - EXPECT_EQ(lr, cal_lsize(gr, nb, p2d.dim0, p2d.coord[0])); - EXPECT_EQ(lc, cal_lsize(gc, nb, p2d.dim1, p2d.coord[1])); - - // 4. ScaLAPACK descriptor - EXPECT_EQ(p2d.desc[0], 1); - EXPECT_EQ(p2d.desc[1], p2d.blacs_ctxt); - EXPECT_EQ(p2d.desc[2], gr); - EXPECT_EQ(p2d.desc[3], gc); - EXPECT_EQ(p2d.desc[4], p2d.get_block_size()); - EXPECT_EQ(p2d.desc[5], p2d.get_block_size()); - EXPECT_EQ(p2d.desc[6], 0); - EXPECT_EQ(p2d.desc[7], 0); - EXPECT_EQ(p2d.desc[8], lr); - - // 5. global2local - auto sum_array = [&p2d](const int& gr, const int& gc) -> std::pair { - int sum_row = 0; - int sum_col = 0; - for (int i = 0; i < gr; ++i) { - sum_row += p2d.global2local_row(i); -} - for (int i = 0; i < gc; ++i) { - sum_col += p2d.global2local_col(i); -} - return {sum_row, sum_col}; - }; - std::pair sumrc = sum_array(gr, gc); - EXPECT_EQ(std::get<0>(sumrc), lr * (lr - 1) / 2 - (gr - lr)); - EXPECT_EQ(std::get<1>(sumrc), lc * (lc - 1) / 2 - (gc - lc)); - for (int i = 0; i < lr; ++i) { - for (int j = 0; j < lc; ++j) { - EXPECT_TRUE(p2d.in_this_processor(p2d.local2global_row(i), p2d.local2global_col(j))); -} -} - - EXPECT_EQ(p2d.get_global_row_size(), gr); - EXPECT_EQ(p2d.get_global_col_size(), gc); - } + int gr = size.first; + int gc = size.second; + for (auto nb: nbs) + { + Parallel_2D p2d; + + for (auto mode: {0, 1}) + { + p2d.init (gr, gc, nb, MPI_COMM_WORLD, mode); + + EXPECT_EQ (p2d.get_block_size (), nb); + + // 1. dim0 and dim1 + EXPECT_EQ (p2d.dim0 * p2d.dim1, dsize); + if (mode) + { + EXPECT_LE (p2d.dim1, p2d.dim0); + } + else + { + EXPECT_LE (p2d.dim0, p2d.dim1); + } + + // 2. MPI 2d communicator + // EXPECT_NE(p2d.comm_2D, MPI_COMM_NULL); + + // 3. local2global and local sizes + int lr = p2d.get_row_size (); + int lc = p2d.get_col_size (); + EXPECT_EQ (lr * lc, p2d.get_local_size ()); + auto cal_lsize + = [] (const int& gsize, const int& nb, const int& np, const int& pcoord) -> int + { + int nblock = gsize / nb; + return nblock / np * nb + + static_cast (nblock % np > pcoord) * nb // full blocks' contribution + + static_cast (nblock % np == pcoord) + * (gsize % nb); // the last block's contribution + }; + EXPECT_EQ (lr, cal_lsize (gr, nb, p2d.dim0, p2d.coord[0])); + EXPECT_EQ (lc, cal_lsize (gc, nb, p2d.dim1, p2d.coord[1])); + + // 4. ScaLAPACK descriptor + EXPECT_EQ (p2d.desc[0], 1); + EXPECT_EQ (p2d.desc[1], p2d.blacs_ctxt); + EXPECT_EQ (p2d.desc[2], gr); + EXPECT_EQ (p2d.desc[3], gc); + EXPECT_EQ (p2d.desc[4], p2d.get_block_size ()); + EXPECT_EQ (p2d.desc[5], p2d.get_block_size ()); + EXPECT_EQ (p2d.desc[6], 0); + EXPECT_EQ (p2d.desc[7], 0); + EXPECT_EQ (p2d.desc[8], lr); + + // 5. global2local + auto sum_array = [&p2d] (const int& gr, const int& gc) -> std::pair + { + int sum_row = 0; + int sum_col = 0; + for (int i = 0; i < gr; ++i) + { + sum_row += p2d.global2local_row (i); + } + for (int i = 0; i < gc; ++i) + { + sum_col += p2d.global2local_col (i); + } + return {sum_row, sum_col}; + }; + std::pair sumrc = sum_array (gr, gc); + EXPECT_EQ (std::get<0> (sumrc), lr * (lr - 1) / 2 - (gr - lr)); + EXPECT_EQ (std::get<1> (sumrc), lc * (lc - 1) / 2 - (gc - lc)); + for (int i = 0; i < lr; ++i) + { + for (int j = 0; j < lc; ++j) + { + EXPECT_TRUE (p2d.in_this_processor (p2d.local2global_row (i), + p2d.local2global_col (j))); + } + } + + EXPECT_EQ (p2d.get_global_row_size (), gr); + EXPECT_EQ (p2d.get_global_col_size (), gc); + } + } } - } } -TEST_F(test_para2d, DescReuseCtxt) +TEST_F (test_para2d, DescReuseCtxt) { for (auto nb: nbs) - { - Parallel_2D p1; - p1.init(sizes[0].first, sizes[0].second, nb, MPI_COMM_WORLD); + { + Parallel_2D p1; + p1.init (sizes[0].first, sizes[0].second, nb, MPI_COMM_WORLD); - Parallel_2D p2; // use 2 different sizes, but they can share the same ctxt - p2.set(sizes[1].first, sizes[1].second, nb, p1.blacs_ctxt); + Parallel_2D p2; // use 2 different sizes, but they can share the same ctxt + p2.set (sizes[1].first, sizes[1].second, nb, p1.blacs_ctxt); - EXPECT_EQ(p1.desc[1], p2.desc[1]); + EXPECT_EQ (p1.desc[1], p2.desc[1]); - Parallel_2D p3; - p3.init(sizes[2].first, sizes[2].second, nb, MPI_COMM_WORLD); - EXPECT_NE(p1.desc[1], p3.desc[1]); - } + Parallel_2D p3; + p3.init (sizes[2].first, sizes[2].second, nb, MPI_COMM_WORLD); + EXPECT_NE (p1.desc[1], p3.desc[1]); + } } #else -TEST_F(test_para2d, Serial) +TEST_F (test_para2d, Serial) { for (auto& size: sizes) - { - int gr = size.first; - int gc = size.second; - - Parallel_2D p2d; - - // set_serial - p2d.set_serial(gr, gc); - EXPECT_EQ(p2d.dim0 * p2d.dim1, 1); - EXPECT_EQ(p2d.dim0, 1); - EXPECT_EQ(p2d.dim1, 1); - - EXPECT_EQ(p2d.get_row_size(), gr); - EXPECT_EQ(p2d.get_col_size(), gc); - EXPECT_EQ(p2d.get_local_size(), gr * gc); - for (int i = 0; i < gr; ++i) - EXPECT_EQ(p2d.local2global_row(i), i); - for (int i = 0; i < gc; ++i) - EXPECT_EQ(p2d.local2global_col(i), i); - - // 3. global2local - for (int i = 0; i < gr; ++i) - EXPECT_EQ(p2d.global2local_row(i), i); - for (int i = 0; i < gc; ++i) - EXPECT_EQ(p2d.global2local_col(i), i); - - // 4. get_global_row_size, get_global_col_size - EXPECT_EQ(p2d.get_global_row_size(), gr); - EXPECT_EQ(p2d.get_global_col_size(), gc); - } + { + int gr = size.first; + int gc = size.second; + + Parallel_2D p2d; + + // set_serial + p2d.set_serial (gr, gc); + EXPECT_EQ (p2d.dim0 * p2d.dim1, 1); + EXPECT_EQ (p2d.dim0, 1); + EXPECT_EQ (p2d.dim1, 1); + + EXPECT_EQ (p2d.get_row_size (), gr); + EXPECT_EQ (p2d.get_col_size (), gc); + EXPECT_EQ (p2d.get_local_size (), gr * gc); + for (int i = 0; i < gr; ++i) + { + EXPECT_EQ (p2d.local2global_row (i), i); + } + for (int i = 0; i < gc; ++i) + { + EXPECT_EQ (p2d.local2global_col (i), i); + } + + // 3. global2local + for (int i = 0; i < gr; ++i) + { + EXPECT_EQ (p2d.global2local_row (i), i); + } + for (int i = 0; i < gc; ++i) + { + EXPECT_EQ (p2d.global2local_col (i), i); + } + + // 4. get_global_row_size, get_global_col_size + EXPECT_EQ (p2d.get_global_row_size (), gr); + EXPECT_EQ (p2d.get_global_col_size (), gc); + } } #endif -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } diff --git a/source/source_base/test_parallel/parallel_common_test.cpp b/source/source_base/test_parallel/parallel_common_test.cpp index a3d526ab5b6..6415741e752 100644 --- a/source/source_base/test_parallel/parallel_common_test.cpp +++ b/source/source_base/test_parallel/parallel_common_test.cpp @@ -22,17 +22,19 @@ class MPIContext { public: - MPIContext() + MPIContext () { - MPI_Comm_rank(MPI_COMM_WORLD, &_rank); - MPI_Comm_size(MPI_COMM_WORLD, &_size); + MPI_Comm_rank (MPI_COMM_WORLD, &_rank); + MPI_Comm_size (MPI_COMM_WORLD, &_size); } - int GetRank() const + int + GetRank () const { return _rank; } - int GetSize() const + int + GetSize () const { return _size; } @@ -58,70 +60,71 @@ class ParaCommon : public testing::Test MPIContext mpiContext; }; -TEST_F(ParaCommon, Bcast) +TEST_F (ParaCommon, Bcast) { // reset data in the first process - int MY_RANK = mpiContext.GetRank(); + int MY_RANK = mpiContext.GetRank (); if (MY_RANK == 0) - { - boo = false; - is = 1; - fs = 1.0; - imgs = std::complex(1.0, -1.0); - chs = "ABACUS"; - strcpy(cha, chs.c_str()); - for (int i = 0; i < 10; i++) { - double ii = static_cast(i); - iv[i] = i; - fv[i] = ii; - imgv[i] = std::complex(ii, -ii); - std::stringstream ss; - ss << chs << i; - chv[i] = ss.str(); + boo = false; + is = 1; + fs = 1.0; + imgs = std::complex (1.0, -1.0); + chs = "ABACUS"; + strcpy (cha, chs.c_str ()); + for (int i = 0; i < 10; i++) + { + double ii = static_cast (i); + iv[i] = i; + fv[i] = ii; + imgv[i] = std::complex (ii, -ii); + std::stringstream ss; + ss << chs << i; + chv[i] = ss.str (); + } } - } // call bcast wrappers - Parallel_Common::bcast_bool(boo); - Parallel_Common::bcast_int(is); - Parallel_Common::bcast_double(fs); - Parallel_Common::bcast_complex_double(imgs); - Parallel_Common::bcast_string(chs); - Parallel_Common::bcast_char(cha, 7); - Parallel_Common::bcast_int(iv, 10); - Parallel_Common::bcast_double(fv, 10); - Parallel_Common::bcast_complex_double(imgv, 10); - Parallel_Common::bcast_string(chv, 10); + Parallel_Common::bcast_bool (boo); + Parallel_Common::bcast_int (is); + Parallel_Common::bcast_double (fs); + Parallel_Common::bcast_complex_double (imgs); + Parallel_Common::bcast_string (chs); + Parallel_Common::bcast_char (cha, 7); + Parallel_Common::bcast_int (iv, 10); + Parallel_Common::bcast_double (fv, 10); + Parallel_Common::bcast_complex_double (imgv, 10); + Parallel_Common::bcast_string (chv, 10); // make comparisons - EXPECT_FALSE(boo); - EXPECT_EQ(is, 1); - EXPECT_EQ(fs, 1.0); - EXPECT_NEAR(imgs.real(), 1.0, 1E-15); - EXPECT_NEAR(imgs.imag(), -1.0, 1E-15); - EXPECT_EQ(chs, "ABACUS"); - EXPECT_STREQ(cha, "ABACUS"); + EXPECT_FALSE (boo); + EXPECT_EQ (is, 1); + EXPECT_EQ (fs, 1.0); + EXPECT_NEAR (imgs.real (), 1.0, 1E-15); + EXPECT_NEAR (imgs.imag (), -1.0, 1E-15); + EXPECT_EQ (chs, "ABACUS"); + EXPECT_STREQ (cha, "ABACUS"); for (int i = 0; i < 10; i++) - { - double ii = static_cast(i); - EXPECT_EQ(iv[i], i); - EXPECT_NEAR(fv[i], ii, 1E-15); - EXPECT_NEAR(imgv[i].real(), ii, 1E-15); - EXPECT_NEAR(imgv[i].imag(), -ii, 1E-15); - std::stringstream ss; - ss << chs << i; - EXPECT_EQ(chv[i], ss.str()); - } + { + double ii = static_cast (i); + EXPECT_EQ (iv[i], i); + EXPECT_NEAR (fv[i], ii, 1E-15); + EXPECT_NEAR (imgv[i].real (), ii, 1E-15); + EXPECT_NEAR (imgv[i].imag (), -ii, 1E-15); + std::stringstream ss; + ss << chs << i; + EXPECT_EQ (chv[i], ss.str ()); + } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - int result = RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS (); - MPI_Finalize(); + MPI_Finalize (); return result; } diff --git a/source/source_base/test_parallel/parallel_global_test.cpp b/source/source_base/test_parallel/parallel_global_test.cpp index 3b6bf8491fc..0287909e6a3 100644 --- a/source/source_base/test_parallel/parallel_global_test.cpp +++ b/source/source_base/test_parallel/parallel_global_test.cpp @@ -30,17 +30,19 @@ class MPIContext { public: - MPIContext() + MPIContext () { - MPI_Comm_rank(MPI_COMM_WORLD, &_rank); - MPI_Comm_size(MPI_COMM_WORLD, &_size); + MPI_Comm_rank (MPI_COMM_WORLD, &_rank); + MPI_Comm_size (MPI_COMM_WORLD, &_size); } - int GetRank() const + int + GetRank () const { return _rank; } - int GetSize() const + int + GetSize () const { return _size; } @@ -74,130 +76,129 @@ class ParaGlobal : public ::testing::Test MPIContext mpi; int nproc; int my_rank; - void SetUp() override + void + SetUp () override { - nproc = mpi.GetSize(); - my_rank = mpi.GetRank(); + nproc = mpi.GetSize (); + my_rank = mpi.GetRank (); } }; - -TEST_F(ParaGlobal, SplitGrid) +TEST_F (ParaGlobal, SplitGrid) { // NPROC is set to 4 in parallel_global_test.sh if (nproc == 4) - { - Parallel_Global::split_grid_world(2, nproc, my_rank, mpi.grank, mpi.gsize); - EXPECT_EQ(mpi.gsize, 2); - if (my_rank == 0) - EXPECT_EQ(mpi.grank, 0); - if (my_rank == 1) - EXPECT_EQ(mpi.grank, 1); - if (my_rank == 2) - EXPECT_EQ(mpi.grank, 0); - if (my_rank == 3) - EXPECT_EQ(mpi.grank, 1); - Parallel_Global::split_grid_world(4, nproc, my_rank, mpi.grank, mpi.gsize); - EXPECT_EQ(mpi.gsize, 1); - if (my_rank == 0) - EXPECT_EQ(mpi.grank, 0); - if (my_rank == 1) - EXPECT_EQ(mpi.grank, 0); - if (my_rank == 2) - EXPECT_EQ(mpi.grank, 0); - if (my_rank == 3) - EXPECT_EQ(mpi.grank, 0); - } + { + Parallel_Global::split_grid_world (2, nproc, my_rank, mpi.grank, mpi.gsize); + EXPECT_EQ (mpi.gsize, 2); + if (my_rank == 0) + EXPECT_EQ (mpi.grank, 0); + if (my_rank == 1) + EXPECT_EQ (mpi.grank, 1); + if (my_rank == 2) + EXPECT_EQ (mpi.grank, 0); + if (my_rank == 3) + EXPECT_EQ (mpi.grank, 1); + Parallel_Global::split_grid_world (4, nproc, my_rank, mpi.grank, mpi.gsize); + EXPECT_EQ (mpi.gsize, 1); + if (my_rank == 0) + EXPECT_EQ (mpi.grank, 0); + if (my_rank == 1) + EXPECT_EQ (mpi.grank, 0); + if (my_rank == 2) + EXPECT_EQ (mpi.grank, 0); + if (my_rank == 3) + EXPECT_EQ (mpi.grank, 0); + } else - { - Parallel_Global::split_grid_world(nproc, nproc, my_rank, mpi.grank, mpi.gsize); - EXPECT_EQ(mpi.gsize, 1); - EXPECT_EQ(mpi.grank, 0); - } + { + Parallel_Global::split_grid_world (nproc, nproc, my_rank, mpi.grank, mpi.gsize); + EXPECT_EQ (mpi.gsize, 1); + EXPECT_EQ (mpi.grank, 0); + } // std::cout< in[2] = {std::complex(1.0, 2.0), std::complex(-1, -2)}; - std::complex inout[2] = {std::complex(2.0, 1.0), std::complex(-2, -1)}; + std::complex in[2] = {std::complex (1.0, 2.0), std::complex (-1, -2)}; + std::complex inout[2] = {std::complex (2.0, 1.0), std::complex (-2, -1)}; int len = 2; MPI_Datatype dptr = MPI_DOUBLE_COMPLEX; - Parallel_Global::myProd(in, inout, &len, &dptr); - EXPECT_EQ(inout[0], std::complex(3.0, 3.0)); - EXPECT_EQ(inout[1], std::complex(-3.0, -3.0)); + Parallel_Global::myProd (in, inout, &len, &dptr); + EXPECT_EQ (inout[0], std::complex (3.0, 3.0)); + EXPECT_EQ (inout[1], std::complex (-3.0, -3.0)); } - - -TEST_F(ParaGlobal, DivideMPIPools) +TEST_F (ParaGlobal, DivideMPIPools) { this->nproc = 12; mpi.kpar = 3; this->my_rank = 5; - Parallel_Global::divide_mpi_groups(this->nproc, - mpi.kpar, - this->my_rank, - mpi.nproc_in_pool, - mpi.my_pool, - mpi.rank_in_pool); - EXPECT_EQ(mpi.nproc_in_pool, 4); - EXPECT_EQ(mpi.my_pool, 1); - EXPECT_EQ(mpi.rank_in_pool, 1); + Parallel_Global::divide_mpi_groups (this->nproc, + mpi.kpar, + this->my_rank, + mpi.nproc_in_pool, + mpi.my_pool, + mpi.rank_in_pool); + EXPECT_EQ (mpi.nproc_in_pool, 4); + EXPECT_EQ (mpi.my_pool, 1); + EXPECT_EQ (mpi.rank_in_pool, 1); } - class FakeMPIContext { public: - FakeMPIContext() + FakeMPIContext () { _rank = 0; _size = 1; } - int GetRank() const + int + GetRank () const { return _rank; } - int GetSize() const + int + GetSize () const { return _size; } @@ -239,137 +240,147 @@ class ParaGlobalDeathTest : public ::testing::Test // DeathTest SetUp: // Init variable, single thread - void SetUp() override + void + SetUp () override { int is_init = 0; - MPI_Initialized(&is_init); - if (is_init) { - MPI_Comm_rank(MPI_COMM_WORLD, &real_rank); - } else { - real_rank = 0; - } - - if (real_rank != 0) return; - - nproc = mpi.GetSize(); - my_rank = mpi.GetRank(); + MPI_Initialized (&is_init); + if (is_init) + { + MPI_Comm_rank (MPI_COMM_WORLD, &real_rank); + } + else + { + real_rank = 0; + } + + if (real_rank != 0) + return; + + nproc = mpi.GetSize (); + my_rank = mpi.GetRank (); // init log file needed by WARNING_QUIT - GlobalV::ofs_warning.open("warning.log"); - - + GlobalV::ofs_warning.open ("warning.log"); } // clean log file - void TearDown() override + void + TearDown () override { - if (real_rank != 0) return; + if (real_rank != 0) + return; - GlobalV::ofs_warning.close(); - remove("warning.log"); + GlobalV::ofs_warning.close (); + remove ("warning.log"); } }; -TEST_F(ParaGlobalDeathTest, InitPools) +TEST_F (ParaGlobalDeathTest, InitPools) { - if (real_rank != 0) return; + if (real_rank != 0) + return; nproc = 12; mpi.kpar = 3; mpi.nstogroup = 3; my_rank = 5; - EXPECT_EXIT( - // This gtest Macro expect that a given `statement` causes the program to exit, with an - // integer exit status that satisfies `predicate`(Here ::testing::ExitedWithCode(1)), - // and emitting error output that matches `matcher`(Here "Error"). + EXPECT_EXIT ( + // This gtest Macro expect that a given `statement` causes the program to exit, with an + // integer exit status that satisfies `predicate`(Here ::testing::ExitedWithCode(1)), + // and emitting error output that matches `matcher`(Here "Error"). { // redirect stdout to stderr to capture WARNING_QUIT output - dup2(STDERR_FILENO, STDOUT_FILENO); - Parallel_Global::init_pools(nproc, - my_rank, - mpi.nstogroup, - mpi.kpar, - mpi.nproc_in_stogroup, - mpi.rank_in_stogroup, - mpi.MY_BNDGROUP, - mpi.nproc_in_pool, - mpi.rank_in_pool, - mpi.my_pool); + dup2 (STDERR_FILENO, STDOUT_FILENO); + Parallel_Global::init_pools (nproc, + my_rank, + mpi.nstogroup, + mpi.kpar, + mpi.nproc_in_stogroup, + mpi.rank_in_stogroup, + mpi.MY_BNDGROUP, + mpi.nproc_in_pool, + mpi.rank_in_pool, + mpi.my_pool); }, - ::testing::ExitedWithCode(1), + ::testing::ExitedWithCode (1), "Error"); } -TEST_F(ParaGlobalDeathTest, DivideMPIPoolsNgEqZero) +TEST_F (ParaGlobalDeathTest, DivideMPIPoolsNgEqZero) { - if (real_rank != 0) return; + if (real_rank != 0) + return; // test for num_groups == 0, // Num_group Equals 0 // WARNING_QUIT this->nproc = 12; mpi.kpar = 0; - EXPECT_EXIT( + EXPECT_EXIT ( { // redirect stdout to stderr to capture WARNING_QUIT output - dup2(STDERR_FILENO, STDOUT_FILENO); - Parallel_Global::divide_mpi_groups(this->nproc, - mpi.kpar, - this->my_rank, - mpi.nproc_in_pool, - mpi.my_pool, - mpi.rank_in_pool); + dup2 (STDERR_FILENO, STDOUT_FILENO); + Parallel_Global::divide_mpi_groups (this->nproc, + mpi.kpar, + this->my_rank, + mpi.nproc_in_pool, + mpi.my_pool, + mpi.rank_in_pool); }, - ::testing::ExitedWithCode(1), - "Number of groups must be greater than 0." - ); + ::testing::ExitedWithCode (1), + "Number of groups must be greater than 0."); } -TEST_F(ParaGlobalDeathTest, DivideMPIPoolsNgGtProc) +TEST_F (ParaGlobalDeathTest, DivideMPIPoolsNgGtProc) { - if (real_rank != 0) return; + if (real_rank != 0) + return; // test for procs < num_groups // Num_group GreaterThan Processors // WARNING_QUIT this->nproc = 12; mpi.kpar = 24; this->my_rank = 5; - EXPECT_EXIT( + EXPECT_EXIT ( { // redirect stdout to stderr to capture WARNING_QUIT output - dup2(STDERR_FILENO, STDOUT_FILENO); - Parallel_Global::divide_mpi_groups(this->nproc, - mpi.kpar, - this->my_rank, - mpi.nproc_in_pool, - mpi.my_pool, - mpi.rank_in_pool); + dup2 (STDERR_FILENO, STDOUT_FILENO); + Parallel_Global::divide_mpi_groups (this->nproc, + mpi.kpar, + this->my_rank, + mpi.nproc_in_pool, + mpi.my_pool, + mpi.rank_in_pool); }, - testing::ExitedWithCode(1), - "Error: Number of processes.*must be greater than the number of groups" - ); + testing::ExitedWithCode (1), + "Error: Number of processes.*must be greater than the number of groups"); } -int main(int argc, char** argv) +int + main (int argc, char** argv) { bool is_death_test_child = false; - for (int i = 0; i < argc; ++i) { - if (std::string(argv[i]).find("gtest_internal_run_death_test") != std::string::npos) { - is_death_test_child = true; - break; + for (int i = 0; i < argc; ++i) + { + if (std::string (argv[i]).find ("gtest_internal_run_death_test") != std::string::npos) + { + is_death_test_child = true; + break; + } } - } if (!is_death_test_child) - { - MPI_Init(&argc, &argv); - } + { + MPI_Init (&argc, &argv); + } - testing::InitGoogleTest(&argc, argv); + testing::InitGoogleTest (&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; - int result = RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS (); - if (!is_death_test_child) { - MPI_Finalize(); - } + if (!is_death_test_child) + { + MPI_Finalize (); + } return result; } #endif // __MPI diff --git a/source/source_base/test_parallel/parallel_reduce_test.cpp b/source/source_base/test_parallel/parallel_reduce_test.cpp index ac980ba24d3..0f325003641 100644 --- a/source/source_base/test_parallel/parallel_reduce_test.cpp +++ b/source/source_base/test_parallel/parallel_reduce_test.cpp @@ -55,17 +55,19 @@ class MPIContext { public: - MPIContext() + MPIContext () { - MPI_Comm_rank(MPI_COMM_WORLD, &_rank); - MPI_Comm_size(MPI_COMM_WORLD, &_size); + MPI_Comm_rank (MPI_COMM_WORLD, &_rank); + MPI_Comm_size (MPI_COMM_WORLD, &_size); } - int GetRank() const + int + GetRank () const { return _rank; } - int GetSize() const + int + GetSize () const { return _size; } @@ -92,16 +94,17 @@ const int MAX_FOR_RAND = 99999; // generate an array of random numbers template -T* get_rand_array(int num, int my_rank) +T* + get_rand_array (int num, int my_rank) { T* rand_array = new T[num](); - assert(num > 0); - std::default_random_engine e(time(NULL) * (my_rank + 1)); - std::uniform_int_distribution u(MIN_FOR_RAND, MAX_FOR_RAND); + assert (num > 0); + std::default_random_engine e (time (NULL) * (my_rank + 1)); + std::uniform_int_distribution u (MIN_FOR_RAND, MAX_FOR_RAND); for (int i = 0; i < num; i++) - { - rand_array[i] = static_cast(u(e)) % 100; - } + { + rand_array[i] = static_cast (u (e)) % 100; + } return rand_array; } @@ -112,503 +115,518 @@ class ParaReduce : public testing::Test MPIContext mpiContext; int my_rank = 0; int nproc = 0; - void SetUp() override + void + SetUp () override { - my_rank = mpiContext.GetRank(); - nproc = mpiContext.GetSize(); + my_rank = mpiContext.GetRank (); + nproc = mpiContext.GetSize (); } }; -TEST_F(ParaReduce, ReduceIntAll) +TEST_F (ParaReduce, ReduceIntAll) { // generate a random array int* rand_array = NULL; - rand_array = get_rand_array(num_per_process, my_rank); + rand_array = get_rand_array (num_per_process, my_rank); // calculate local sum int local_sum = 0; for (int i = 0; i < num_per_process; i++) - { - local_sum += rand_array[i]; - } + { + local_sum += rand_array[i]; + } // first way of calculating global sum int global_sum_first = local_sum; - Parallel_Reduce::reduce_all(global_sum_first); + Parallel_Reduce::reduce_all (global_sum_first); // second way of calculating global sum - Parallel_Reduce::reduce_all(rand_array, num_per_process); + Parallel_Reduce::reduce_all (rand_array, num_per_process); int global_sum_second = 0; for (int i = 0; i < num_per_process; i++) - { - global_sum_second += rand_array[i]; - } + { + global_sum_second += rand_array[i]; + } // compare two sums /// printf("rank %d sum1 = %d, sum2 = %d\n",my_rank, /// global_sum_first, global_sum_second); - EXPECT_EQ(global_sum_first, global_sum_second); + EXPECT_EQ (global_sum_first, global_sum_second); delete[] rand_array; } -TEST_F(ParaReduce, ReduceDoubleAll) +TEST_F (ParaReduce, ReduceDoubleAll) { // generate a random array double* rand_array = NULL; - rand_array = get_rand_array(num_per_process, my_rank); + rand_array = get_rand_array (num_per_process, my_rank); // calculate local sum double local_sum = 0.0; for (int i = 0; i < num_per_process; i++) - { - local_sum += rand_array[i]; - } + { + local_sum += rand_array[i]; + } // first way of calculating global sum double global_sum_first = local_sum; - Parallel_Reduce::reduce_all(global_sum_first); + Parallel_Reduce::reduce_all (global_sum_first); // second way of calculating global sum - Parallel_Reduce::reduce_all(rand_array, num_per_process); + Parallel_Reduce::reduce_all (rand_array, num_per_process); double global_sum_second = 0; for (int i = 0; i < num_per_process; i++) - { - global_sum_second += rand_array[i]; - } + { + global_sum_second += rand_array[i]; + } // compare two sums /// printf("rank %d sum1 = %f, sum2 = %f\n",my_rank, /// global_sum_first, global_sum_second); - EXPECT_NEAR(global_sum_first, global_sum_second, 1e-14); + EXPECT_NEAR (global_sum_first, global_sum_second, 1e-14); delete[] rand_array; } -TEST_F(ParaReduce, ReduceComplexAll) +TEST_F (ParaReduce, ReduceComplexAll) { // allocate local complex vector std::complex* rand_array = nullptr; rand_array = new std::complex[num_per_process]; // set its elements to random complex numbers - std::default_random_engine e(time(NULL) * (my_rank + 1)); - std::uniform_int_distribution u(MIN_FOR_RAND, MAX_FOR_RAND); + std::default_random_engine e (time (NULL) * (my_rank + 1)); + std::uniform_int_distribution u (MIN_FOR_RAND, MAX_FOR_RAND); // and calculate local sum std::complex local_sum = std::complex{0.0, 0.0}; for (int i = 0; i < num_per_process; i++) - { - double realpart = pow(-1.0, u(e) % 2) * static_cast(u(e)) / MAX_FOR_RAND; - double imagpart = pow(-1.0, u(e) % 2) * static_cast(u(e)) / MAX_FOR_RAND; - rand_array[i] = std::complex{realpart, imagpart}; - local_sum += rand_array[i]; - /// printf("pre rank %d rand_array[%d] = (%f,%f) \n",my_rank,i, - /// rand_array[i].real(), rand_array[i].imag()); - } + { + double realpart = pow (-1.0, u (e) % 2) * static_cast (u (e)) / MAX_FOR_RAND; + double imagpart = pow (-1.0, u (e) % 2) * static_cast (u (e)) / MAX_FOR_RAND; + rand_array[i] = std::complex{realpart, imagpart}; + local_sum += rand_array[i]; + /// printf("pre rank %d rand_array[%d] = (%f,%f) \n",my_rank,i, + /// rand_array[i].real(), rand_array[i].imag()); + } // first way of calculating global sum std::complex global_sum_first = local_sum; - Parallel_Reduce::reduce_all(global_sum_first); + Parallel_Reduce::reduce_all (global_sum_first); // second way of calculating global sum - Parallel_Reduce::reduce_all(rand_array, num_per_process); + Parallel_Reduce::reduce_all (rand_array, num_per_process); std::complex global_sum_second = std::complex{0.0, 0.0}; for (int i = 0; i < num_per_process; i++) - { - global_sum_second += rand_array[i]; - /// printf("pos rank %d rand_array[%d] = (%f,%f) \n",my_rank,i, - /// rand_array[i].real(), rand_array[i].imag()); - } + { + global_sum_second += rand_array[i]; + /// printf("pos rank %d rand_array[%d] = (%f,%f) \n",my_rank,i, + /// rand_array[i].real(), rand_array[i].imag()); + } // compare two sums /// printf("rank %d sum1 = (%f,%f) sum2 = (%f,%f)\n",my_rank, /// global_sum_first.real(), global_sum_first.imag(), /// global_sum_second.real(), global_sum_second.imag()); - EXPECT_NEAR(global_sum_first.real(), global_sum_second.real(), 1e-13); - EXPECT_NEAR(global_sum_first.imag(), global_sum_second.imag(), 1e-13); + EXPECT_NEAR (global_sum_first.real (), global_sum_second.real (), 1e-13); + EXPECT_NEAR (global_sum_first.imag (), global_sum_second.imag (), 1e-13); delete[] rand_array; } -TEST_F(ParaReduce, GatherIntAll) +TEST_F (ParaReduce, GatherIntAll) { - std::default_random_engine e(time(NULL) * (my_rank + 1)); - std::uniform_int_distribution u(MIN_FOR_RAND, MAX_FOR_RAND); - int local_number = static_cast(u(e)) % 100; + std::default_random_engine e (time (NULL) * (my_rank + 1)); + std::uniform_int_distribution u (MIN_FOR_RAND, MAX_FOR_RAND); + int local_number = static_cast (u (e)) % 100; // printf("pre rank %d local_number = %d \n ",my_rank,local_number); int* array = new int[nproc](); // use MPI_Allgather to gather together numbers - Parallel_Reduce::gather_int_all(local_number, array); - EXPECT_EQ(local_number, array[my_rank]); + Parallel_Reduce::gather_int_all (local_number, array); + EXPECT_EQ (local_number, array[my_rank]); // get minimum integer among all processes int min_number = local_number; - Parallel_Reduce::reduce_min(min_number); + Parallel_Reduce::reduce_min (min_number); for (int i = 0; i < nproc; i++) - { - EXPECT_LE(min_number, array[i]); - /// printf("post rank %d array[%d] = %d, min = %d \n", - /// my_rank,i,array[i],min_number); - } + { + EXPECT_LE (min_number, array[i]); + /// printf("post rank %d array[%d] = %d, min = %d \n", + /// my_rank,i,array[i],min_number); + } delete[] array; } -TEST_F(ParaReduce, GatherDoubleAll) +TEST_F (ParaReduce, GatherDoubleAll) { - std::default_random_engine e(time(NULL) * (my_rank + 1)); - std::uniform_int_distribution u(MIN_FOR_RAND, MAX_FOR_RAND); - double local_number = static_cast(u(e)) % 100; + std::default_random_engine e (time (NULL) * (my_rank + 1)); + std::uniform_int_distribution u (MIN_FOR_RAND, MAX_FOR_RAND); + double local_number = static_cast (u (e)) % 100; // printf("pre rank %d local_number = %d \n ",my_rank,local_number); double* array = new double[nproc](); // use MPI_Allgather to gather together numbers - MPI_Allgather(&local_number, 1, MPI_DOUBLE, array, 1, MPI_DOUBLE, MPI_COMM_WORLD); + MPI_Allgather (&local_number, 1, MPI_DOUBLE, array, 1, MPI_DOUBLE, MPI_COMM_WORLD); - EXPECT_EQ(local_number, array[my_rank]); + EXPECT_EQ (local_number, array[my_rank]); // get minimum integer among all processes double min_number = local_number; - Parallel_Reduce::reduce_min(min_number); + Parallel_Reduce::reduce_min (min_number); // get maximum integer among all processes double max_number = local_number; - Parallel_Reduce::reduce_max(max_number); + Parallel_Reduce::reduce_max (max_number); for (int i = 0; i < nproc; i++) - { - EXPECT_LE(min_number, array[i]); - EXPECT_GE(max_number, array[i]); - /// printf("post rank %d array[%d] = %f, min = %f, max = %f \n", - /// my_rank,i,array[i],min_number,max_number); - } + { + EXPECT_LE (min_number, array[i]); + EXPECT_GE (max_number, array[i]); + /// printf("post rank %d array[%d] = %f, min = %f, max = %f \n", + /// my_rank,i,array[i],min_number,max_number); + } delete[] array; } -TEST_F(ParaReduce, ReduceIntDiag) +TEST_F (ParaReduce, ReduceIntDiag) { /// num_per_process = 2; // NPROC is set to 4 in parallel_global_test.sh if (nproc == 4) - { - Parallel_Global::split_diag_world(2, nproc, my_rank, mpiContext.drank, mpiContext.dsize, mpiContext.dcolor); - // generate a random array - int* rand_array = NULL; - rand_array = get_rand_array(num_per_process, my_rank); - - // calculate local sum - int local_sum = 0; - for (int i = 0; i < num_per_process; i++) { - local_sum += rand_array[i]; - /// printf(" pre world_rank %d, drank %d rand_array[%d] = %d\n", - /// my_rank,mpiContext.dsize,i, rand_array[i]); - } - - // first way of calculating diag sum - int diag_sum_first = local_sum; - Parallel_Reduce::reduce_int_diag(diag_sum_first); - // second way of calculating global sum - int* swap = new int[num_per_process](); - MPI_Allreduce(rand_array, swap, num_per_process, MPI_INT, MPI_SUM, DIAG_WORLD); - int diag_sum_second = 0; - for (int i = 0; i < num_per_process; i++) - { - diag_sum_second += swap[i]; - /// printf(" post world_rank %d, drank %d swap[%d] = %d\n", - /// my_rank,mpiContext.dsize,i, swap[i]); + Parallel_Global::split_diag_world (2, + nproc, + my_rank, + mpiContext.drank, + mpiContext.dsize, + mpiContext.dcolor); + // generate a random array + int* rand_array = NULL; + rand_array = get_rand_array (num_per_process, my_rank); + + // calculate local sum + int local_sum = 0; + for (int i = 0; i < num_per_process; i++) + { + local_sum += rand_array[i]; + /// printf(" pre world_rank %d, drank %d rand_array[%d] = %d\n", + /// my_rank,mpiContext.dsize,i, rand_array[i]); + } + + // first way of calculating diag sum + int diag_sum_first = local_sum; + Parallel_Reduce::reduce_int_diag (diag_sum_first); + // second way of calculating global sum + int* swap = new int[num_per_process](); + MPI_Allreduce (rand_array, swap, num_per_process, MPI_INT, MPI_SUM, DIAG_WORLD); + int diag_sum_second = 0; + for (int i = 0; i < num_per_process; i++) + { + diag_sum_second += swap[i]; + /// printf(" post world_rank %d, drank %d swap[%d] = %d\n", + /// my_rank,mpiContext.dsize,i, swap[i]); + } + // compare two sums + /// printf("world_rank %d, drank %d sum1 = %d, sum2 = %d\n", + /// my_rank,mpiContext.dsize,diag_sum_first, diag_sum_second); + EXPECT_EQ (diag_sum_first, diag_sum_second); + delete[] rand_array; + delete[] swap; + MPI_Comm_free (&DIAG_WORLD); } - // compare two sums - /// printf("world_rank %d, drank %d sum1 = %d, sum2 = %d\n", - /// my_rank,mpiContext.dsize,diag_sum_first, diag_sum_second); - EXPECT_EQ(diag_sum_first, diag_sum_second); - delete[] rand_array; - delete[] swap; - MPI_Comm_free(&DIAG_WORLD); - } } -TEST_F(ParaReduce, ReduceDoubleDiag) +TEST_F (ParaReduce, ReduceDoubleDiag) { /// num_per_process = 1; // NPROC is set to 4 in parallel_global_test.sh if (nproc == 4) - { - Parallel_Global::split_diag_world(2, nproc, my_rank, mpiContext.drank, mpiContext.dsize, mpiContext.dcolor); - // generate a random array - double* rand_array = NULL; - rand_array = get_rand_array(num_per_process, my_rank); - - // calculate local sum - double local_sum = 0.0; - for (int i = 0; i < num_per_process; i++) { - local_sum += rand_array[i]; - /// printf(" pre world_rank %d, drank %d rand_array[%d] = %f\n", - /// my_rank,mpiContext.dsize,i, rand_array[i]); - } - - // first way of calculating diag sum - double diag_sum_first = 0.0; - MPI_Allreduce(&local_sum, &diag_sum_first, 1, MPI_DOUBLE, MPI_SUM, DIAG_WORLD); - - // second way of calculating global sum - Parallel_Reduce::reduce_double_diag(rand_array, num_per_process); - double diag_sum_second = 0.0; - for (int i = 0; i < num_per_process; i++) - { - diag_sum_second += rand_array[i]; - /// printf(" post world_rank %d, drank %d rand_array[%d] = %f\n", - /// my_rank,mpiContext.dsize,i, rand_array[i]); + Parallel_Global::split_diag_world (2, + nproc, + my_rank, + mpiContext.drank, + mpiContext.dsize, + mpiContext.dcolor); + // generate a random array + double* rand_array = NULL; + rand_array = get_rand_array (num_per_process, my_rank); + + // calculate local sum + double local_sum = 0.0; + for (int i = 0; i < num_per_process; i++) + { + local_sum += rand_array[i]; + /// printf(" pre world_rank %d, drank %d rand_array[%d] = %f\n", + /// my_rank,mpiContext.dsize,i, rand_array[i]); + } + + // first way of calculating diag sum + double diag_sum_first = 0.0; + MPI_Allreduce (&local_sum, &diag_sum_first, 1, MPI_DOUBLE, MPI_SUM, DIAG_WORLD); + + // second way of calculating global sum + Parallel_Reduce::reduce_double_diag (rand_array, num_per_process); + double diag_sum_second = 0.0; + for (int i = 0; i < num_per_process; i++) + { + diag_sum_second += rand_array[i]; + /// printf(" post world_rank %d, drank %d rand_array[%d] = %f\n", + /// my_rank,mpiContext.dsize,i, rand_array[i]); + } + // compare two sums + /// printf("world_rank %d, drank %d sum1 = %f, sum2 = %f\n", + /// my_rank,mpiContext.dsize,diag_sum_first, diag_sum_second); + EXPECT_NEAR (diag_sum_first, diag_sum_second, 1e-13); + delete[] rand_array; + MPI_Comm_free (&DIAG_WORLD); } - // compare two sums - /// printf("world_rank %d, drank %d sum1 = %f, sum2 = %f\n", - /// my_rank,mpiContext.dsize,diag_sum_first, diag_sum_second); - EXPECT_NEAR(diag_sum_first, diag_sum_second, 1e-13); - delete[] rand_array; - MPI_Comm_free(&DIAG_WORLD); - } } -TEST_F(ParaReduce, ReduceIntGrid) +TEST_F (ParaReduce, ReduceIntGrid) { /// num_per_process = 2; // NPROC is set to 4 in parallel_global_test.sh if (nproc == 4) - { - Parallel_Global::split_grid_world(2, nproc, my_rank, mpiContext.grank, mpiContext.gsize); - // generate a random array - int* rand_array = NULL; - rand_array = get_rand_array(num_per_process, my_rank); - - // calculate local sum - int local_sum = 0; - for (int i = 0; i < num_per_process; i++) - { - local_sum += rand_array[i]; - /// printf(" pre world_rank %d, drank %d rand_array[%d] = %d\n", - /// my_rank,mpiContext.dsize,i, rand_array[i]); - } - - // first way of calculating diag sum - int grid_sum_first = 0; - MPI_Allreduce(&local_sum, &grid_sum_first, 1, MPI_INT, MPI_SUM, GRID_WORLD); - - // second way of calculating global sum - Parallel_Reduce::reduce_int_grid(rand_array, num_per_process); - int grid_sum_second = 0; - for (int i = 0; i < num_per_process; i++) { - grid_sum_second += rand_array[i]; - /// printf(" post world_rank %d, drank %d rand_array[%d] = %d\n", - /// my_rank,mpiContext.dsize,i, rand_array[i]); + Parallel_Global::split_grid_world (2, nproc, my_rank, mpiContext.grank, mpiContext.gsize); + // generate a random array + int* rand_array = NULL; + rand_array = get_rand_array (num_per_process, my_rank); + + // calculate local sum + int local_sum = 0; + for (int i = 0; i < num_per_process; i++) + { + local_sum += rand_array[i]; + /// printf(" pre world_rank %d, drank %d rand_array[%d] = %d\n", + /// my_rank,mpiContext.dsize,i, rand_array[i]); + } + + // first way of calculating diag sum + int grid_sum_first = 0; + MPI_Allreduce (&local_sum, &grid_sum_first, 1, MPI_INT, MPI_SUM, GRID_WORLD); + + // second way of calculating global sum + Parallel_Reduce::reduce_int_grid (rand_array, num_per_process); + int grid_sum_second = 0; + for (int i = 0; i < num_per_process; i++) + { + grid_sum_second += rand_array[i]; + /// printf(" post world_rank %d, drank %d rand_array[%d] = %d\n", + /// my_rank,mpiContext.dsize,i, rand_array[i]); + } + // compare two sums + /// printf("world_rank %d, drank %d sum1 = %d, sum2 = %d\n", + /// my_rank,mpiContext.dsize,grid_sum_first, grid_sum_second); + EXPECT_EQ (grid_sum_first, grid_sum_second); + delete[] rand_array; + MPI_Comm_free (&GRID_WORLD); } - // compare two sums - /// printf("world_rank %d, drank %d sum1 = %d, sum2 = %d\n", - /// my_rank,mpiContext.dsize,grid_sum_first, grid_sum_second); - EXPECT_EQ(grid_sum_first, grid_sum_second); - delete[] rand_array; - MPI_Comm_free(&GRID_WORLD); - } } -TEST_F(ParaReduce, ReduceDoubleGrid) +TEST_F (ParaReduce, ReduceDoubleGrid) { /// num_per_process = 1; // NPROC is set to 4 in parallel_global_test.sh if (nproc == 4) - { - Parallel_Global::split_grid_world(2, nproc, my_rank, mpiContext.grank, mpiContext.gsize); - // generate a random array - double* rand_array = NULL; - rand_array = get_rand_array(num_per_process, my_rank); - - // calculate local sum - double local_sum = 0.0; - for (int i = 0; i < num_per_process; i++) - { - local_sum += rand_array[i]; - /// printf(" pre world_rank %d, drank %d rand_array[%d] = %f\n", - /// my_rank,mpiContext.dsize,i, rand_array[i]); - } - - // first way of calculating diag sum - double grid_sum_first = 0.0; - MPI_Allreduce(&local_sum, &grid_sum_first, 1, MPI_DOUBLE, MPI_SUM, GRID_WORLD); - - // second way of calculating global sum - Parallel_Reduce::reduce_double_grid(rand_array, num_per_process); - double grid_sum_second = 0.0; - for (int i = 0; i < num_per_process; i++) { - grid_sum_second += rand_array[i]; - /// printf(" post world_rank %d, drank %d rand_array[%d] = %f\n", - /// my_rank,mpiContext.dsize,i, rand_array[i]); + Parallel_Global::split_grid_world (2, nproc, my_rank, mpiContext.grank, mpiContext.gsize); + // generate a random array + double* rand_array = NULL; + rand_array = get_rand_array (num_per_process, my_rank); + + // calculate local sum + double local_sum = 0.0; + for (int i = 0; i < num_per_process; i++) + { + local_sum += rand_array[i]; + /// printf(" pre world_rank %d, drank %d rand_array[%d] = %f\n", + /// my_rank,mpiContext.dsize,i, rand_array[i]); + } + + // first way of calculating diag sum + double grid_sum_first = 0.0; + MPI_Allreduce (&local_sum, &grid_sum_first, 1, MPI_DOUBLE, MPI_SUM, GRID_WORLD); + + // second way of calculating global sum + Parallel_Reduce::reduce_double_grid (rand_array, num_per_process); + double grid_sum_second = 0.0; + for (int i = 0; i < num_per_process; i++) + { + grid_sum_second += rand_array[i]; + /// printf(" post world_rank %d, drank %d rand_array[%d] = %f\n", + /// my_rank,mpiContext.dsize,i, rand_array[i]); + } + // compare two sums + /// printf("world_rank %d, drank %d sum1 = %f, sum2 = %f\n", + /// my_rank,mpiContext.dsize,grid_sum_first, grid_sum_second); + EXPECT_NEAR (grid_sum_first, grid_sum_second, 1e-13); + delete[] rand_array; + MPI_Comm_free (&GRID_WORLD); } - // compare two sums - /// printf("world_rank %d, drank %d sum1 = %f, sum2 = %f\n", - /// my_rank,mpiContext.dsize,grid_sum_first, grid_sum_second); - EXPECT_NEAR(grid_sum_first, grid_sum_second, 1e-13); - delete[] rand_array; - MPI_Comm_free(&GRID_WORLD); - } } -TEST_F(ParaReduce, ReduceDoublePool) +TEST_F (ParaReduce, ReduceDoublePool) { /// num_per_process = 1; // NPROC is set to 4 in parallel_global_test.sh if (nproc == 4) - { - mpiContext.kpar = 2; - Parallel_Global::divide_mpi_groups(nproc, - mpiContext.kpar, - my_rank, - mpiContext.nproc_in_pool, - mpiContext.my_pool, - mpiContext.rank_in_pool); - MPI_Comm_split(MPI_COMM_WORLD, mpiContext.my_pool, mpiContext.rank_in_pool, &POOL_WORLD); - /// printf("word_rank/world_size = %d/%d, pool_rank/pool_size = %d/%d \n", - /// my_rank,nproc, - /// mpiContext.rank_in_pool,mpiContext.nproc_in_pool); - - // generate a random array - double* rand_array = NULL; - rand_array = get_rand_array(num_per_process, my_rank); - - // calculate local sum - double local_sum = 0.0; - for (int i = 0; i < num_per_process; i++) - { - local_sum += rand_array[i]; - } - - // first way of calculating pool sum - double pool_sum_first = local_sum; - Parallel_Reduce::reduce_pool(pool_sum_first); - // second way of calculating pool sum - Parallel_Reduce::reduce_pool(rand_array, num_per_process); - double pool_sum_second = 0.0; - for (int i = 0; i < num_per_process; i++) - { - pool_sum_second += rand_array[i]; - } - // compare two pool sums - /// printf("pool rank %d sum1 = %f, sum2 = %f\n",my_rank, - /// pool_sum_first, pool_sum_second); - EXPECT_NEAR(pool_sum_first, pool_sum_second, 1e-14); - - // first way of calculating global sum - double global_sum_first = pool_sum_first; - Parallel_Reduce::reduce_double_allpool(mpiContext.kpar, mpiContext.nproc_in_pool, global_sum_first); - // second way of calculating pool sum - Parallel_Reduce::reduce_double_allpool(mpiContext.kpar, mpiContext.nproc_in_pool, rand_array, num_per_process); - double global_sum_second = 0.0; - for (int i = 0; i < num_per_process; i++) { - global_sum_second += rand_array[i]; + mpiContext.kpar = 2; + Parallel_Global::divide_mpi_groups (nproc, + mpiContext.kpar, + my_rank, + mpiContext.nproc_in_pool, + mpiContext.my_pool, + mpiContext.rank_in_pool); + MPI_Comm_split (MPI_COMM_WORLD, mpiContext.my_pool, mpiContext.rank_in_pool, &POOL_WORLD); + /// printf("word_rank/world_size = %d/%d, pool_rank/pool_size = %d/%d \n", + /// my_rank,nproc, + /// mpiContext.rank_in_pool,mpiContext.nproc_in_pool); + + // generate a random array + double* rand_array = NULL; + rand_array = get_rand_array (num_per_process, my_rank); + + // calculate local sum + double local_sum = 0.0; + for (int i = 0; i < num_per_process; i++) + { + local_sum += rand_array[i]; + } + + // first way of calculating pool sum + double pool_sum_first = local_sum; + Parallel_Reduce::reduce_pool (pool_sum_first); + // second way of calculating pool sum + Parallel_Reduce::reduce_pool (rand_array, num_per_process); + double pool_sum_second = 0.0; + for (int i = 0; i < num_per_process; i++) + { + pool_sum_second += rand_array[i]; + } + // compare two pool sums + /// printf("pool rank %d sum1 = %f, sum2 = %f\n",my_rank, + /// pool_sum_first, pool_sum_second); + EXPECT_NEAR (pool_sum_first, pool_sum_second, 1e-14); + + // first way of calculating global sum + double global_sum_first = pool_sum_first; + Parallel_Reduce::reduce_double_allpool (mpiContext.kpar, mpiContext.nproc_in_pool, global_sum_first); + // second way of calculating pool sum + Parallel_Reduce::reduce_double_allpool (mpiContext.kpar, + mpiContext.nproc_in_pool, + rand_array, + num_per_process); + double global_sum_second = 0.0; + for (int i = 0; i < num_per_process; i++) + { + global_sum_second += rand_array[i]; + } + // compare two global sums + /// printf("global rank %d sum1 = %f, sum2 = %f\n",my_rank, + /// global_sum_first, global_sum_second); + EXPECT_NEAR (global_sum_first, global_sum_second, 1e-14); + + delete[] rand_array; + MPI_Comm_free (&POOL_WORLD); } - // compare two global sums - /// printf("global rank %d sum1 = %f, sum2 = %f\n",my_rank, - /// global_sum_first, global_sum_second); - EXPECT_NEAR(global_sum_first, global_sum_second, 1e-14); - - delete[] rand_array; - MPI_Comm_free(&POOL_WORLD); - } } -TEST_F(ParaReduce, ReduceComplexPool) +TEST_F (ParaReduce, ReduceComplexPool) { /// num_per_process = 1; // NPROC is set to 4 in parallel_global_test.sh if (nproc == 4) - { - mpiContext.kpar = 2; - Parallel_Global::divide_mpi_groups(nproc, - mpiContext.kpar, - my_rank, - mpiContext.nproc_in_pool, - mpiContext.my_pool, - mpiContext.rank_in_pool); - MPI_Comm_split(MPI_COMM_WORLD, mpiContext.my_pool, mpiContext.rank_in_pool, &POOL_WORLD); - /// printf("word_rank/world_size = %d/%d, pool_rank/pool_size = %d/%d \n", - /// my_rank,nproc, - /// mpiContext.rank_in_pool,mpiContext.nproc_in_pool); - // allocate local complex vector - std::complex* rand_array = nullptr; - rand_array = new std::complex[num_per_process]; - // set its elements to random complex numbers - std::default_random_engine e(time(NULL) * (my_rank + 1)); - std::uniform_int_distribution u(MIN_FOR_RAND, MAX_FOR_RAND); - // and calculate local sum - std::complex local_sum = std::complex{0.0, 0.0}; - for (int i = 0; i < num_per_process; i++) - { - double realpart = pow(-1.0, u(e) % 2) * static_cast(u(e)) / MAX_FOR_RAND; - double imagpart = pow(-1.0, u(e) % 2) * static_cast(u(e)) / MAX_FOR_RAND; - rand_array[i] = std::complex{realpart, imagpart}; - local_sum += rand_array[i]; - /// printf("pre rank %d rand_array[%d] = (%f,%f) \n",my_rank,i, - /// rand_array[i].real(), rand_array[i].imag()); - } - // first way of calculating pool sum - std::complex pool_sum_first = local_sum; - Parallel_Reduce::reduce_pool(pool_sum_first); - - // second way of calculating pool sum - Parallel_Reduce::reduce_pool(rand_array, num_per_process); - std::complex pool_sum_second = std::complex{0.0, 0.0}; - for (int i = 0; i < num_per_process; i++) { - pool_sum_second += rand_array[i]; - /// printf("pos rank %d rand_array[%d] = (%f,%f) \n",my_rank,i, - /// rand_array[i].real(), rand_array[i].imag()); + mpiContext.kpar = 2; + Parallel_Global::divide_mpi_groups (nproc, + mpiContext.kpar, + my_rank, + mpiContext.nproc_in_pool, + mpiContext.my_pool, + mpiContext.rank_in_pool); + MPI_Comm_split (MPI_COMM_WORLD, mpiContext.my_pool, mpiContext.rank_in_pool, &POOL_WORLD); + /// printf("word_rank/world_size = %d/%d, pool_rank/pool_size = %d/%d \n", + /// my_rank,nproc, + /// mpiContext.rank_in_pool,mpiContext.nproc_in_pool); + // allocate local complex vector + std::complex* rand_array = nullptr; + rand_array = new std::complex[num_per_process]; + // set its elements to random complex numbers + std::default_random_engine e (time (NULL) * (my_rank + 1)); + std::uniform_int_distribution u (MIN_FOR_RAND, MAX_FOR_RAND); + // and calculate local sum + std::complex local_sum = std::complex{0.0, 0.0}; + for (int i = 0; i < num_per_process; i++) + { + double realpart = pow (-1.0, u (e) % 2) * static_cast (u (e)) / MAX_FOR_RAND; + double imagpart = pow (-1.0, u (e) % 2) * static_cast (u (e)) / MAX_FOR_RAND; + rand_array[i] = std::complex{realpart, imagpart}; + local_sum += rand_array[i]; + /// printf("pre rank %d rand_array[%d] = (%f,%f) \n",my_rank,i, + /// rand_array[i].real(), rand_array[i].imag()); + } + // first way of calculating pool sum + std::complex pool_sum_first = local_sum; + Parallel_Reduce::reduce_pool (pool_sum_first); + + // second way of calculating pool sum + Parallel_Reduce::reduce_pool (rand_array, num_per_process); + std::complex pool_sum_second = std::complex{0.0, 0.0}; + for (int i = 0; i < num_per_process; i++) + { + pool_sum_second += rand_array[i]; + /// printf("pos rank %d rand_array[%d] = (%f,%f) \n",my_rank,i, + /// rand_array[i].real(), rand_array[i].imag()); + } + // compare two sums + /// printf("rank %d sum1 = (%f,%f) sum2 = (%f,%f)\n",my_rank, + /// pool_sum_first.real(), pool_sum_first.imag(), + /// pool_sum_second.real(), pool_sum_second.imag()); + EXPECT_NEAR (pool_sum_first.real (), pool_sum_second.real (), 1e-13); + EXPECT_NEAR (pool_sum_first.imag (), pool_sum_second.imag (), 1e-13); + + delete[] rand_array; + MPI_Comm_free (&POOL_WORLD); } - // compare two sums - /// printf("rank %d sum1 = (%f,%f) sum2 = (%f,%f)\n",my_rank, - /// pool_sum_first.real(), pool_sum_first.imag(), - /// pool_sum_second.real(), pool_sum_second.imag()); - EXPECT_NEAR(pool_sum_first.real(), pool_sum_second.real(), 1e-13); - EXPECT_NEAR(pool_sum_first.imag(), pool_sum_second.imag(), 1e-13); - - delete[] rand_array; - MPI_Comm_free(&POOL_WORLD); - } } -TEST_F(ParaReduce, GatherDoublePool) +TEST_F (ParaReduce, GatherDoublePool) { /// num_per_process = 1; // NPROC is set to 4 in parallel_global_test.sh if (nproc == 4) - { - mpiContext.kpar = 2; - Parallel_Global::divide_mpi_groups(nproc, - mpiContext.kpar, - my_rank, - mpiContext.nproc_in_pool, - mpiContext.my_pool, - mpiContext.rank_in_pool); - MPI_Comm_split(MPI_COMM_WORLD, mpiContext.my_pool, mpiContext.rank_in_pool, &POOL_WORLD); - std::default_random_engine e(time(NULL) * (my_rank + 1)); - std::uniform_int_distribution u(MIN_FOR_RAND, MAX_FOR_RAND); - double local_number = static_cast(u(e)) % 100; - // printf("pre rank %d local_number = %d \n ",my_rank,local_number); - double* array = new double[mpiContext.nproc_in_pool](); - // use MPI_Allgather to gather together numbers - MPI_Allgather(&local_number, 1, MPI_DOUBLE, array, 1, MPI_DOUBLE, POOL_WORLD); - - EXPECT_EQ(local_number, array[mpiContext.rank_in_pool]); - // get minimum integer among all processes - double min_number = local_number; - Parallel_Reduce::reduce_min_pool(mpiContext.nproc_in_pool, min_number); - // get maximum integer among all processes - double max_number = local_number; - Parallel_Reduce::reduce_max_pool(mpiContext.nproc_in_pool, max_number); - for (int i = 0; i < mpiContext.nproc_in_pool; i++) { - EXPECT_LE(min_number, array[i]); - EXPECT_GE(max_number, array[i]); - /// printf("post rank %d, pool rank %d, array[%d] = %f, min = %f, max = %f \n", - /// my_rank,mpiContext.rank_in_pool,i,array[i],min_number,max_number); + mpiContext.kpar = 2; + Parallel_Global::divide_mpi_groups (nproc, + mpiContext.kpar, + my_rank, + mpiContext.nproc_in_pool, + mpiContext.my_pool, + mpiContext.rank_in_pool); + MPI_Comm_split (MPI_COMM_WORLD, mpiContext.my_pool, mpiContext.rank_in_pool, &POOL_WORLD); + std::default_random_engine e (time (NULL) * (my_rank + 1)); + std::uniform_int_distribution u (MIN_FOR_RAND, MAX_FOR_RAND); + double local_number = static_cast (u (e)) % 100; + // printf("pre rank %d local_number = %d \n ",my_rank,local_number); + double* array = new double[mpiContext.nproc_in_pool](); + // use MPI_Allgather to gather together numbers + MPI_Allgather (&local_number, 1, MPI_DOUBLE, array, 1, MPI_DOUBLE, POOL_WORLD); + + EXPECT_EQ (local_number, array[mpiContext.rank_in_pool]); + // get minimum integer among all processes + double min_number = local_number; + Parallel_Reduce::reduce_min_pool (mpiContext.nproc_in_pool, min_number); + // get maximum integer among all processes + double max_number = local_number; + Parallel_Reduce::reduce_max_pool (mpiContext.nproc_in_pool, max_number); + for (int i = 0; i < mpiContext.nproc_in_pool; i++) + { + EXPECT_LE (min_number, array[i]); + EXPECT_GE (max_number, array[i]); + /// printf("post rank %d, pool rank %d, array[%d] = %f, min = %f, max = %f \n", + /// my_rank,mpiContext.rank_in_pool,i,array[i],min_number,max_number); + } + delete[] array; + MPI_Comm_free (&POOL_WORLD); } - delete[] array; - MPI_Comm_free(&POOL_WORLD); - } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); - MPI_Finalize(); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); + MPI_Finalize (); return result; } #endif diff --git a/source/source_base/test_parallel/test_para_gemm.cpp b/source/source_base/test_parallel/test_para_gemm.cpp index 61fcfc9ea72..e38e521d075 100644 --- a/source/source_base/test_parallel/test_para_gemm.cpp +++ b/source/source_base/test_parallel/test_para_gemm.cpp @@ -6,7 +6,8 @@ #include #include -void random_data(std::vector& A_global, +void + random_data (std::vector& A_global, std::vector& B_global, std::vector& Cref_global, std::vector& C_global, @@ -14,23 +15,24 @@ void random_data(std::vector& A_global, double& beta) { for (auto& val: A_global) - { - val = std::rand() / (RAND_MAX + 1.0); - } + { + val = std::rand () / (RAND_MAX + 1.0); + } for (auto& val: B_global) - { - val = std::rand() / (RAND_MAX + 1.0); - } + { + val = std::rand () / (RAND_MAX + 1.0); + } for (auto& val: Cref_global) - { - val = std::rand() / (RAND_MAX + 1.0); - } + { + val = std::rand () / (RAND_MAX + 1.0); + } C_global = Cref_global; - alpha = std::rand() / (RAND_MAX + 1.0); - beta = std::rand() / (RAND_MAX + 1.0); + alpha = std::rand () / (RAND_MAX + 1.0); + beta = std::rand () / (RAND_MAX + 1.0); } -void random_data(std::vector>& A_global, +void + random_data (std::vector>& A_global, std::vector>& B_global, std::vector>& Cref_global, std::vector>& C_global, @@ -38,272 +40,284 @@ void random_data(std::vector>& A_global, std::complex& beta) { for (auto& val: A_global) - { - val = std::complex(std::rand() / (RAND_MAX + 1.0), std::rand() / (RAND_MAX + 1.0)); - } + { + val = std::complex (std::rand () / (RAND_MAX + 1.0), std::rand () / (RAND_MAX + 1.0)); + } for (auto& val: B_global) - { - val = std::complex(std::rand() / (RAND_MAX + 1.0), std::rand() / (RAND_MAX + 1.0)); - } + { + val = std::complex (std::rand () / (RAND_MAX + 1.0), std::rand () / (RAND_MAX + 1.0)); + } for (auto& val: Cref_global) - { - val = std::complex(std::rand() / (RAND_MAX + 1.0), std::rand() / (RAND_MAX + 1.0)); - } + { + val = std::complex (std::rand () / (RAND_MAX + 1.0), std::rand () / (RAND_MAX + 1.0)); + } C_global = Cref_global; - alpha = std::complex(std::rand() / (RAND_MAX + 1.0), std::rand() / (RAND_MAX + 1.0)); - beta = std::complex(std::rand() / (RAND_MAX + 1.0), std::rand() / (RAND_MAX + 1.0)); + alpha = std::complex (std::rand () / (RAND_MAX + 1.0), std::rand () / (RAND_MAX + 1.0)); + beta = std::complex (std::rand () / (RAND_MAX + 1.0), std::rand () / (RAND_MAX + 1.0)); } -double get_double(std::complex& val) +double + get_double (std::complex& val) { - return val.real() + val.imag(); + return val.real () + val.imag (); } -double get_double(double& val) +double + get_double (double& val) { return val; } -void scatterv_data(const double* sendbuf, +void + scatterv_data (const double* sendbuf, const int* sendcounts, const int* displs, double* recvbuf, const int recvcount, MPI_Comm comm) { - MPI_Scatterv(sendbuf, sendcounts, displs, MPI_DOUBLE, recvbuf, recvcount, MPI_DOUBLE, 0, comm); + MPI_Scatterv (sendbuf, sendcounts, displs, MPI_DOUBLE, recvbuf, recvcount, MPI_DOUBLE, 0, comm); } -void scatterv_data(const std::complex* sendbuf, +void + scatterv_data (const std::complex* sendbuf, const int* sendcounts, const int* displs, std::complex* recvbuf, const int recvcount, MPI_Comm comm) { - MPI_Scatterv(sendbuf, sendcounts, displs, MPI_DOUBLE_COMPLEX, recvbuf, recvcount, MPI_DOUBLE_COMPLEX, 0, comm); + MPI_Scatterv (sendbuf, sendcounts, displs, MPI_DOUBLE_COMPLEX, recvbuf, recvcount, MPI_DOUBLE_COMPLEX, 0, comm); } template class PgemmTest : public ::testing::Test { protected: - void SetUp() override + void + SetUp () override { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &nproc); + MPI_Comm_rank (MPI_COMM_WORLD, &rank); + MPI_Comm_size (MPI_COMM_WORLD, &nproc); } - void TearDown() override + void + TearDown () override { - MPI_Comm_free(&col_world); - MPI_Comm_free(&row_world); + MPI_Comm_free (&col_world); + MPI_Comm_free (&row_world); } public: - void decide_ngroup(const int& willing_ncolgroup, const int& willing_nrowgroup) + void + decide_ngroup (const int& willing_ncolgroup, const int& willing_nrowgroup) { ncolgroup = willing_ncolgroup; nrowgroup = willing_nrowgroup; if (nproc % (ncolgroup * nrowgroup) != 0) - { - ncolgroup = nproc; - nrowgroup = 1; - } + { + ncolgroup = nproc; + nrowgroup = 1; + } else - { - nrowgroup = nproc / ncolgroup; - } + { + nrowgroup = nproc / ncolgroup; + } - MPI_Comm_split(MPI_COMM_WORLD, rank % nrowgroup, rank / nrowgroup, &col_world); - MPI_Comm_split(MPI_COMM_WORLD, rank / nrowgroup, rank % nrowgroup, &row_world); - MPI_Comm_rank(col_world, &rank_col); - MPI_Comm_rank(row_world, &rank_row); - MPI_Comm_size(col_world, &nproc_col); - MPI_Comm_size(row_world, &nproc_row); + MPI_Comm_split (MPI_COMM_WORLD, rank % nrowgroup, rank / nrowgroup, &col_world); + MPI_Comm_split (MPI_COMM_WORLD, rank / nrowgroup, rank % nrowgroup, &row_world); + MPI_Comm_rank (col_world, &rank_col); + MPI_Comm_rank (row_world, &rank_row); + MPI_Comm_size (col_world, &nproc_col); + MPI_Comm_size (row_world, &nproc_row); } - void randomize_initialization() + void + randomize_initialization () { - random_data(A_global, B_global, Cref_global, C_global, alpha, beta); + random_data (A_global, B_global, Cref_global, C_global, alpha, beta); } - void prepare(const int& ncolA_global, + void + prepare (const int& ncolA_global, const int& ncolB_global, const int& nrow_global, const int& LDA_global, const int& LDB_global, const int& LDC_global) { - A_global = std::vector(LDA_global * ncolA_global, 0.0); - B_global = std::vector(LDB_global * ncolB_global, 0.0); - C_global = std::vector(LDC_global * ncolB_global, 0.0); - Cref_global = std::vector(LDC_global * ncolB_global, 0.0); + A_global = std::vector (LDA_global * ncolA_global, 0.0); + B_global = std::vector (LDB_global * ncolB_global, 0.0); + C_global = std::vector (LDC_global * ncolB_global, 0.0); + Cref_global = std::vector (LDC_global * ncolB_global, 0.0); if (rank == 0) - { + { - this->randomize_initialization(); - const base_device::DEVICE_CPU* ctx = {}; - char transC = 'C'; - char transN = 'N'; - ModuleBase::gemm_op()(transC, - transN, - ncolA_global, - ncolB_global, - nrow_global, - &alpha, - A_global.data(), - LDA_global, - B_global.data(), - LDB_global, - &beta, - Cref_global.data(), - LDC_global); - } + this->randomize_initialization (); + const base_device::DEVICE_CPU* ctx = {}; + char transC = 'C'; + char transN = 'N'; + ModuleBase::gemm_op () (transC, + transN, + ncolA_global, + ncolB_global, + nrow_global, + &alpha, + A_global.data (), + LDA_global, + B_global.data (), + LDB_global, + &beta, + Cref_global.data (), + LDC_global); + } if (std::is_same::value) - { - MPI_Bcast(A_global.data(), A_global.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); - MPI_Bcast(B_global.data(), B_global.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); - MPI_Bcast(C_global.data(), C_global.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); - MPI_Bcast(Cref_global.data(), Cref_global.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); - MPI_Bcast(&alpha, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); - MPI_Bcast(&beta, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); - } + { + MPI_Bcast (A_global.data (), A_global.size (), MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast (B_global.data (), B_global.size (), MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast (C_global.data (), C_global.size (), MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast (Cref_global.data (), Cref_global.size (), MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast (&alpha, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast (&beta, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } else if (std::is_same>::value) - { - MPI_Bcast(A_global.data(), A_global.size(), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); - MPI_Bcast(B_global.data(), B_global.size(), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); - MPI_Bcast(C_global.data(), C_global.size(), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); - MPI_Bcast(Cref_global.data(), Cref_global.size(), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); - MPI_Bcast(&alpha, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); - MPI_Bcast(&beta, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); - } + { + MPI_Bcast (A_global.data (), A_global.size (), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast (B_global.data (), B_global.size (), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast (C_global.data (), C_global.size (), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast (Cref_global.data (), Cref_global.size (), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast (&alpha, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast (&beta, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + } // Broadcast A_global and B_global to all ranks - getncol_and_row(ncolA_global, ncolB_global, nrow_global); + getncol_and_row (ncolA_global, ncolB_global, nrow_global); LDA = nrow + 1; LDB = nrow + 2; - A_local = std::vector(LDA * ncolA, 0.0); - B_local = std::vector(LDB * ncolB, 0.0); + A_local = std::vector (LDA * ncolA, 0.0); + B_local = std::vector (LDB * ncolB, 0.0); - scatter_matrix(ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global); + scatter_matrix (ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global); } - void getncol_and_row(const int& ncolA_global, const int& ncolB_global, const int& nrow_global) + void + getncol_and_row (const int& ncolA_global, const int& ncolB_global, const int& nrow_global) { ncolA = ncolA_global / ncolgroup; if (ncolA_global % ncolgroup > rank_col) - { - ncolA += 1; - } + { + ncolA += 1; + } ncolB = ncolB_global / ncolgroup; if (ncolB_global % ncolgroup > rank_col) - { - ncolB += 1; - } + { + ncolB += 1; + } nrow = nrow_global / nrowgroup; if (nrow_global % nrowgroup > rank_row) - { - nrow += 1; - } + { + nrow += 1; + } - ncolA_ip.resize(nproc_col); - ncolB_ip.resize(nproc_col); - nrow_ip.resize(nproc_row); - MPI_Allgather(&ncolA, 1, MPI_INT, ncolA_ip.data(), 1, MPI_INT, col_world); - MPI_Allgather(&ncolB, 1, MPI_INT, ncolB_ip.data(), 1, MPI_INT, col_world); + ncolA_ip.resize (nproc_col); + ncolB_ip.resize (nproc_col); + nrow_ip.resize (nproc_row); + MPI_Allgather (&ncolA, 1, MPI_INT, ncolA_ip.data (), 1, MPI_INT, col_world); + MPI_Allgather (&ncolB, 1, MPI_INT, ncolB_ip.data (), 1, MPI_INT, col_world); if (row_world != MPI_COMM_NULL) - { - MPI_Allgather(&nrow, 1, MPI_INT, nrow_ip.data(), 1, MPI_INT, row_world); - } + { + MPI_Allgather (&nrow, 1, MPI_INT, nrow_ip.data (), 1, MPI_INT, row_world); + } } - void scatter_matrix(const int& ncolA_global, + void + scatter_matrix (const int& ncolA_global, const int& ncolB_global, const int& nrow_global, const int& LDA_global, const int& LDB_global) { - std::vector A_semiglobal(ncolA * LDA_global, 0.0); - std::vector B_semiglobal(ncolB * LDB_global, 0.0); + std::vector A_semiglobal (ncolA * LDA_global, 0.0); + std::vector B_semiglobal (ncolB * LDB_global, 0.0); // Scatter A_global and B_global to A_semiglobal and B_semiglobal - std::vector sendcounts(nproc_col, 0); - std::vector displs(nproc_col, 0); + std::vector sendcounts (nproc_col, 0); + std::vector displs (nproc_col, 0); for (int i = 0; i < nproc_col; i++) - { - sendcounts[i] = ncolA_ip[i] * LDA_global; - } + { + sendcounts[i] = ncolA_ip[i] * LDA_global; + } displs[0] = 0; for (int i = 1; i < nproc_col; i++) - { - displs[i] = displs[i - 1] + sendcounts[i - 1]; - } - scatterv_data(A_global.data(), - sendcounts.data(), - displs.data(), - A_semiglobal.data(), - ncolA * LDA_global, - col_world); + { + displs[i] = displs[i - 1] + sendcounts[i - 1]; + } + scatterv_data (A_global.data (), + sendcounts.data (), + displs.data (), + A_semiglobal.data (), + ncolA * LDA_global, + col_world); for (int i = 0; i < nproc_col; i++) - { - sendcounts[i] = ncolB_ip[i] * LDB_global; - } + { + sendcounts[i] = ncolB_ip[i] * LDB_global; + } displs[0] = 0; for (int i = 1; i < nproc_col; i++) - { - displs[i] = displs[i - 1] + sendcounts[i - 1]; - } - scatterv_data(B_global.data(), - sendcounts.data(), - displs.data(), - B_semiglobal.data(), - ncolB * LDB_global, - col_world); + { + displs[i] = displs[i - 1] + sendcounts[i - 1]; + } + scatterv_data (B_global.data (), + sendcounts.data (), + displs.data (), + B_semiglobal.data (), + ncolB * LDB_global, + col_world); // Scatter A_semiglobal and B_semiglobal to A_local and B_local - sendcounts.resize(nproc_row, 0); - displs.resize(nproc_row, 0); + sendcounts.resize (nproc_row, 0); + displs.resize (nproc_row, 0); for (int i = 0; i < nproc_row; i++) - { - sendcounts[i] = nrow_ip[i]; - } + { + sendcounts[i] = nrow_ip[i]; + } displs[0] = 0; for (int i = 1; i < nproc_row; i++) - { - displs[i] = displs[i - 1] + sendcounts[i - 1]; - } + { + displs[i] = displs[i - 1] + sendcounts[i - 1]; + } for (int i = 0; i < ncolA; i++) - { - scatterv_data(A_semiglobal.data() + i * LDA_global, - sendcounts.data(), - displs.data(), - A_local.data() + i * LDA, - nrow, - row_world); - } + { + scatterv_data (A_semiglobal.data () + i * LDA_global, + sendcounts.data (), + displs.data (), + A_local.data () + i * LDA, + nrow, + row_world); + } for (int i = 0; i < ncolB; i++) - { - scatterv_data(B_semiglobal.data() + i * LDB_global, - sendcounts.data(), - displs.data(), - B_local.data() + i * LDB, - nrow, - row_world); - } + { + scatterv_data (B_semiglobal.data () + i * LDB_global, + sendcounts.data (), + displs.data (), + B_local.data () + i * LDB, + nrow, + row_world); + } } - void compare_result(const int& nrowC_global, const int& ncolC_global, const int& LDC_global) + void + compare_result (const int& nrowC_global, const int& ncolC_global, const int& LDC_global) { for (int i = 0; i < ncolC_global; i++) - { - for (int j = 0; j < nrowC_global; j++) { - EXPECT_NEAR(get_double(Cref_global[i * LDC_global + j]), - get_double(C_global[i * LDC_global + j]), - 1e-10); + for (int j = 0; j < nrowC_global; j++) + { + EXPECT_NEAR (get_double (Cref_global[i * LDC_global + j]), + get_double (C_global[i * LDC_global + j]), + 1e-10); + } } - } } int rank = 0, nproc = 0; @@ -322,195 +336,212 @@ class PgemmTest : public ::testing::Test typedef ::testing::Types> MyTypes; -TYPED_TEST_SUITE(PgemmTest, MyTypes); +TYPED_TEST_SUITE (PgemmTest, MyTypes); -TYPED_TEST(PgemmTest, even_case) +TYPED_TEST (PgemmTest, even_case) { const int ncolA_global = 16, ncolB_global = 8, nrow_global = 12; const int LDA_global = 17, LDB_global = 18, LDC_global = 19; - this->decide_ngroup(2, 2); - this->prepare(ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); - - this->pgemm.set_dimension(this->col_world, - this->row_world, - this->ncolA, - this->LDA, - this->ncolB, - this->LDB, - this->nrow, - LDC_global); - this->pgemm.multiply(this->alpha, this->A_local.data(), this->B_local.data(), this->beta, this->C_global.data()); - - this->compare_result(ncolA_global, ncolB_global, LDC_global); + this->decide_ngroup (2, 2); + this->prepare (ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); + + this->pgemm.set_dimension (this->col_world, + this->row_world, + this->ncolA, + this->LDA, + this->ncolB, + this->LDB, + this->nrow, + LDC_global); + this->pgemm.multiply (this->alpha, + this->A_local.data (), + this->B_local.data (), + this->beta, + this->C_global.data ()); + + this->compare_result (ncolA_global, ncolB_global, LDC_global); } -TYPED_TEST(PgemmTest, odd_case) +TYPED_TEST (PgemmTest, odd_case) { const int ncolA_global = 17, ncolB_global = 7, nrow_global = 13; const int LDA_global = 17, LDB_global = 18, LDC_global = 19; - this->decide_ngroup(2, 2); - this->prepare(ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); - - this->pgemm.set_dimension(this->col_world, - this->row_world, - this->ncolA, - this->LDA, - this->ncolB, - this->LDB, - this->nrow, - LDC_global); - this->pgemm.multiply(this->alpha, this->A_local.data(), this->B_local.data(), this->beta, this->C_global.data()); - - this->compare_result(ncolA_global, ncolB_global, LDC_global); + this->decide_ngroup (2, 2); + this->prepare (ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); + + this->pgemm.set_dimension (this->col_world, + this->row_world, + this->ncolA, + this->LDA, + this->ncolB, + this->LDB, + this->nrow, + LDC_global); + this->pgemm.multiply (this->alpha, + this->A_local.data (), + this->B_local.data (), + this->beta, + this->C_global.data ()); + + this->compare_result (ncolA_global, ncolB_global, LDC_global); } -TYPED_TEST(PgemmTest, row_parallel) +TYPED_TEST (PgemmTest, row_parallel) { const int ncolA_global = 17, ncolB_global = 7, nrow_global = 13; const int LDA_global = 17, LDB_global = 18, LDC_global = 19; - this->decide_ngroup(1, 4); - this->prepare(ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); - - this->pgemm.set_dimension(this->col_world, - this->row_world, - this->ncolA, - this->LDA, - this->ncolB, - this->LDB, - this->nrow, - LDC_global); - this->pgemm.multiply(this->alpha, this->A_local.data(), this->B_local.data(), this->beta, this->C_global.data()); - - this->compare_result(ncolA_global, ncolB_global, LDC_global); + this->decide_ngroup (1, 4); + this->prepare (ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); + + this->pgemm.set_dimension (this->col_world, + this->row_world, + this->ncolA, + this->LDA, + this->ncolB, + this->LDB, + this->nrow, + LDC_global); + this->pgemm.multiply (this->alpha, + this->A_local.data (), + this->B_local.data (), + this->beta, + this->C_global.data ()); + + this->compare_result (ncolA_global, ncolB_global, LDC_global); } -TYPED_TEST(PgemmTest, col_parallel) +TYPED_TEST (PgemmTest, col_parallel) { const int ncolA_global = 17, ncolB_global = 7, nrow_global = 13; const int LDA_global = 17, LDB_global = 18, LDC_global = 19; - this->decide_ngroup(4, 1); - this->prepare(ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); - - this->pgemm.set_dimension(this->col_world, - this->row_world, - this->ncolA, - this->LDA, - this->ncolB, - this->LDB, - this->nrow, - LDC_global); - this->pgemm.multiply(this->alpha, this->A_local.data(), this->B_local.data(), this->beta, this->C_global.data()); - - this->compare_result(ncolA_global, ncolB_global, LDC_global); + this->decide_ngroup (4, 1); + this->prepare (ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); + + this->pgemm.set_dimension (this->col_world, + this->row_world, + this->ncolA, + this->LDA, + this->ncolB, + this->LDB, + this->nrow, + LDC_global); + this->pgemm.multiply (this->alpha, + this->A_local.data (), + this->B_local.data (), + this->beta, + this->C_global.data ()); + + this->compare_result (ncolA_global, ncolB_global, LDC_global); } -TYPED_TEST(PgemmTest, divide_col) +TYPED_TEST (PgemmTest, divide_col) { const int ncolA_global = 17, ncolB_global = 7, nrow_global = 13; const int LDA_global = 17, LDB_global = 18, LDC_global = 19; - this->decide_ngroup(2, 2); - this->prepare(ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); - std::vector colB_loc(this->nproc_col); - MPI_Allgather(&this->ncolB, 1, MPI_INT, colB_loc.data(), 1, MPI_INT, this->col_world); - std::vector displs(this->nproc_col); + this->decide_ngroup (2, 2); + this->prepare (ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); + std::vector colB_loc (this->nproc_col); + MPI_Allgather (&this->ncolB, 1, MPI_INT, colB_loc.data (), 1, MPI_INT, this->col_world); + std::vector displs (this->nproc_col); displs[0] = 0; for (int i = 1; i < this->nproc_col; i++) - { - displs[i] = (displs[i - 1] + colB_loc[i - 1]) * LDC_global; - } + { + displs[i] = (displs[i - 1] + colB_loc[i - 1]) * LDC_global; + } int start = displs[this->rank_col]; - this->pgemm.set_dimension(this->col_world, - this->row_world, - this->ncolA, - this->LDA, - this->ncolB, - this->LDB, - this->nrow, - LDC_global, - 2); - this->pgemm.multiply(this->alpha, this->A_local.data(), this->B_local.data(), this->beta, this->C_global.data()+ start); - - + this->pgemm.set_dimension (this->col_world, + this->row_world, + this->ncolA, + this->LDA, + this->ncolB, + this->LDB, + this->nrow, + LDC_global, + 2); + this->pgemm.multiply (this->alpha, + this->A_local.data (), + this->B_local.data (), + this->beta, + this->C_global.data () + start); for (int i = 0; i < this->ncolB; i++) - { - for (int j = 0; j < ncolA_global; j++) { - EXPECT_NEAR(get_double(this->Cref_global[i * LDC_global + start + j]), - get_double(this->C_global[i * LDC_global + start + j]), - 1e-10); + for (int j = 0; j < ncolA_global; j++) + { + EXPECT_NEAR (get_double (this->Cref_global[i * LDC_global + start + j]), + get_double (this->C_global[i * LDC_global + start + j]), + 1e-10); + } } - } } -TYPED_TEST(PgemmTest, divide_row) +TYPED_TEST (PgemmTest, divide_row) { const int ncolA_global = 17, ncolB_global = 7, nrow_global = 13; const int LDA_global = 17, LDB_global = 18, LDC_global = 19; - this->decide_ngroup(2, 2); - this->prepare(ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); - std::vector colA_loc(this->nproc_col); - MPI_Allgather(&this->ncolA, 1, MPI_INT, colA_loc.data(), 1, MPI_INT, this->col_world); - std::vector displs(this->nproc_col); + this->decide_ngroup (2, 2); + this->prepare (ncolA_global, ncolB_global, nrow_global, LDA_global, LDB_global, LDC_global); + std::vector colA_loc (this->nproc_col); + MPI_Allgather (&this->ncolA, 1, MPI_INT, colA_loc.data (), 1, MPI_INT, this->col_world); + std::vector displs (this->nproc_col); displs[0] = 0; for (int i = 1; i < this->nproc_col; i++) - { - displs[i] = (displs[i - 1] + colA_loc[i - 1]); - } + { + displs[i] = (displs[i - 1] + colA_loc[i - 1]); + } int start = displs[this->rank_col]; int LDC_local = this->ncolA + 2; - std::vector C_loc(LDC_local * ncolB_global, 0.0); - for(int i = 0; i < ncolB_global; i++) - { - for(int j = 0; j < this->ncolA; j++) + std::vector C_loc (LDC_local * ncolB_global, 0.0); + for (int i = 0; i < ncolB_global; i++) { - C_loc[i * LDC_local + j] = this->C_global[i * LDC_global + start + j]; + for (int j = 0; j < this->ncolA; j++) + { + C_loc[i * LDC_local + j] = this->C_global[i * LDC_global + start + j]; + } } - } - this->pgemm.set_dimension(this->col_world, - this->row_world, - this->ncolA, - this->LDA, - this->ncolB, - this->LDB, - this->nrow, - LDC_local, - 3); - this->pgemm.multiply(this->alpha, this->A_local.data(), this->B_local.data(), this->beta, C_loc.data()); - - + this->pgemm.set_dimension (this->col_world, + this->row_world, + this->ncolA, + this->LDA, + this->ncolB, + this->LDB, + this->nrow, + LDC_local, + 3); + this->pgemm.multiply (this->alpha, this->A_local.data (), this->B_local.data (), this->beta, C_loc.data ()); for (int i = 0; i < ncolB_global; i++) - { - for (int j = 0; j < this->ncolA; j++) { - EXPECT_NEAR(get_double(this->Cref_global[i * LDC_global + start + j]), - get_double(C_loc[i * LDC_local + j]), - 1e-10); + for (int j = 0; j < this->ncolA; j++) + { + EXPECT_NEAR (get_double (this->Cref_global[i * LDC_global + start + j]), + get_double (C_loc[i * LDC_local + j]), + 1e-10); + } } - } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - MPI_Init(&argc, &argv); + ::testing::InitGoogleTest (&argc, argv); + MPI_Init (&argc, &argv); int RANK, NPROC; - MPI_Comm_rank(MPI_COMM_WORLD, &RANK); - MPI_Comm_size(MPI_COMM_WORLD, &NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &RANK); + MPI_Comm_size (MPI_COMM_WORLD, &NPROC); - int result = RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS (); - MPI_Finalize(); + MPI_Finalize (); return result; } \ No newline at end of file diff --git a/source/source_base/timer.cpp b/source/source_base/timer.cpp index b0bee11e60f..c657852c8ff 100644 --- a/source/source_base/timer.cpp +++ b/source/source_base/timer.cpp @@ -28,164 +28,200 @@ namespace ModuleBase bool timer::disabled = false; bool timer::enable_nvtx_ = false; size_t timer::n_now = 0; -std::map> timer::timer_pool; +std::map> timer::timer_pool; -void timer::finish(std::ofstream &ofs, const bool print_flag, const bool check_end) +void + timer::finish (std::ofstream& ofs, const bool print_flag, const bool check_end) { - if(!timer_pool[""]["total"].start_flag) - { timer::end("","total"); } - if(print_flag) - { print_all( ofs, check_end ); } + if (!timer_pool[""]["total"].start_flag) + { + timer::end ("", "total"); + } + if (print_flag) + { + print_all (ofs, check_end); + } } //---------------------------------------------------------- // //---------------------------------------------------------- -void timer::start() +void + timer::start () { // first init ,then we can use tick - if(timer_pool[""]["total"].start_flag) - { timer::start("","total"); } + if (timer_pool[""]["total"].start_flag) + { + timer::start ("", "total"); + } } -double timer::cpu_time() +double + timer::cpu_time () { -//---------------------------------------------------------- -// EXPLAIN : here static is important !! -// only first call can let t0 = 0,clock begin -// when enter this function second time , t0 > 0 -//---------------------------------------------------------- - static auto t1 = std::chrono::system_clock::now(); - const auto t2 = std::chrono::system_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1); - return double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den; + //---------------------------------------------------------- + // EXPLAIN : here static is important !! + // only first call can let t0 = 0,clock begin + // when enter this function second time , t0 > 0 + //---------------------------------------------------------- + static auto t1 = std::chrono::system_clock::now (); + const auto t2 = std::chrono::system_clock::now (); + auto duration = std::chrono::duration_cast (t2 - t1); + return double (duration.count ()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den; } -void timer::start(const std::string &class_name,const std::string &name) +void + timer::start (const std::string& class_name, const std::string& name) { -//---------------------------------------------------------- -// EXPLAIN : if timer is disabled , return -//---------------------------------------------------------- + //---------------------------------------------------------- + // EXPLAIN : if timer is disabled , return + //---------------------------------------------------------- if (disabled) - { return; } - - #ifdef _OPENMP - if(omp_get_thread_num()) - { return; } - #endif + { + return; + } - Timer_One &timer_one = timer_pool[class_name][name]; +#ifdef _OPENMP + if (omp_get_thread_num ()) + { + return; + } +#endif -//---------------------------------------------------------- -// CALL MEMBER FUNCTION : -// NAME : cpu_time -// -// EXPLAIN : -// if start_flag == true,means a new clock counting begin, -// hence we record the start time of this clock counting. -// if start_flag == false, means it's the end of this counting, -// so we add the time during this two 'time point' to the clock time storage. -//---------------------------------------------------------- - if(!timer_one.start_flag) - { throw std::runtime_error("timer::start " + class_name + "::" + name); } - #ifdef __MPI + Timer_One& timer_one = timer_pool[class_name][name]; + + //---------------------------------------------------------- + // CALL MEMBER FUNCTION : + // NAME : cpu_time + // + // EXPLAIN : + // if start_flag == true,means a new clock counting begin, + // hence we record the start time of this clock counting. + // if start_flag == false, means it's the end of this counting, + // so we add the time during this two 'time point' to the clock time storage. + //---------------------------------------------------------- + if (!timer_one.start_flag) + { + throw std::runtime_error ("timer::start " + class_name + "::" + name); + } +#ifdef __MPI int is_initialized = 0; - MPI_Initialized(&is_initialized); - if(is_initialized) - { timer_one.cpu_start = MPI_Wtime(); } - #else - timer_one.cpu_start = cpu_time(); - #endif + MPI_Initialized (&is_initialized); + if (is_initialized) + { + timer_one.cpu_start = MPI_Wtime (); + } +#else + timer_one.cpu_start = cpu_time (); +#endif ++timer_one.calls; timer_one.start_flag = false; - #if defined(__CUDA) && defined(__USE_NVTX) - if (enable_nvtx_){ - std::string label = class_name + ":" + name; - nvtxRangePushA(label.data()); - } - #endif +#if defined(__CUDA) && defined(__USE_NVTX) + if (enable_nvtx_) + { + std::string label = class_name + ":" + name; + nvtxRangePushA (label.data ()); + } +#endif } -void timer::end(const std::string &class_name,const std::string &name) +void + timer::end (const std::string& class_name, const std::string& name) { -//---------------------------------------------------------- -// EXPLAIN : if timer is disabled , return -//---------------------------------------------------------- + //---------------------------------------------------------- + // EXPLAIN : if timer is disabled , return + //---------------------------------------------------------- if (disabled) - { return; } - - #ifdef _OPENMP - if(omp_get_thread_num()) - { return; } - #endif + { + return; + } - Timer_One &timer_one = timer_pool[class_name][name]; +#ifdef _OPENMP + if (omp_get_thread_num ()) + { + return; + } +#endif -//---------------------------------------------------------- -// CALL MEMBER FUNCTION : -// NAME : cpu_time -// -// EXPLAIN : -// if start_flag == true,means a new clock counting begin, -// hence we record the start time of this clock counting. -// if start_flag == false, means it's the end of this counting, -// so we add the time during this two 'time point' to the clock time storage. -//---------------------------------------------------------- - if(timer_one.start_flag) - { throw std::runtime_error("timer::end " + class_name + "::" + name); } - #ifdef __MPI + Timer_One& timer_one = timer_pool[class_name][name]; + + //---------------------------------------------------------- + // CALL MEMBER FUNCTION : + // NAME : cpu_time + // + // EXPLAIN : + // if start_flag == true,means a new clock counting begin, + // hence we record the start time of this clock counting. + // if start_flag == false, means it's the end of this counting, + // so we add the time during this two 'time point' to the clock time storage. + //---------------------------------------------------------- + if (timer_one.start_flag) + { + throw std::runtime_error ("timer::end " + class_name + "::" + name); + } +#ifdef __MPI int is_initialized = 0; - MPI_Initialized(&is_initialized); - if(is_initialized) - { timer_one.cpu_second += MPI_Wtime() - timer_one.cpu_start; } - #else - timer_one.cpu_second += (cpu_time() - timer_one.cpu_start); - #endif + MPI_Initialized (&is_initialized); + if (is_initialized) + { + timer_one.cpu_second += MPI_Wtime () - timer_one.cpu_start; + } +#else + timer_one.cpu_second += (cpu_time () - timer_one.cpu_start); +#endif timer_one.start_flag = true; - #if defined(__CUDA) && defined(__USE_NVTX) +#if defined(__CUDA) && defined(__USE_NVTX) if (enable_nvtx_) - { nvtxRangePop(); } - #endif + { + nvtxRangePop (); + } +#endif } -long double timer::print_until_now() +long double + timer::print_until_now () { - if(!timer_pool[""]["total"].start_flag) - timer::end("","total"); + if (!timer_pool[""]["total"].start_flag) + { + timer::end ("", "total"); + } // start again - timer::start("","total"); + timer::start ("", "total"); return timer_pool[""]["total"].cpu_second; } -void timer::write_to_json(std::string file_name) +void + timer::write_to_json (std::string file_name) { #ifdef __MPI // in some unit test, the mpi is not initialized, so we need to check it // if mpi is not initialized, we do not run this function int is_initialized = 0; - MPI_Initialized(&is_initialized); - if (!is_initialized) { - return; - } + MPI_Initialized (&is_initialized); + if (!is_initialized) + { + return; + } int my_rank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); - if (my_rank != 0) { - return; - } + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); + if (my_rank != 0) + { + return; + } #endif // check if a double is inf, if so, return "null", else return a string of the input double - auto double_to_string = [](double d) -> std::string - { - if(std::isinf(d)) - { - return "Infinity"; - } - else + auto double_to_string = [] (double d) -> std::string { - return FmtCore::format("%.15f", d); - } - }; + if (std::isinf (d)) + { + return "Infinity"; + } + else + { + return FmtCore::format ("%.15f", d); + } + }; // The output json file format is like this: // { @@ -206,149 +242,153 @@ void timer::write_to_json(std::string file_name) // ] // } - std::ofstream ofs(file_name); + std::ofstream ofs (file_name); std::string indent = " "; int order_a = 0; ofs << "{\n"; ofs << indent << "\"total\": " << timer_pool[""]["total"].cpu_second << ",\n"; ofs << indent << "\"sub\": [\n"; - for(auto &timer_pool_A : timer_pool) - { - order_a ++; - // if calss_name == "", it means total time, so we skip it - if(timer_pool_A.first == "") + for (auto& timer_pool_A: timer_pool) { - continue; - } - int order_b = 0; - const std::string class_name = timer_pool_A.first; - ofs << indent << indent << "{\n"; - ofs << indent << indent << indent << "\"class_name\": \"" << class_name << "\",\n"; - ofs << indent << indent << indent << "\"sub\": [\n"; - for(auto &timer_pool_B : timer_pool_A.second) - { - order_b ++; - const std::string name = timer_pool_B.first; - const Timer_One timer_one = timer_pool_B.second; - ofs << indent << indent << indent << indent << "{\n"; - ofs << indent << indent << indent << indent << "\"name\": \"" << name << "\",\n"; - ofs << indent << indent << indent << indent << "\"cpu_second\": " - << std::setprecision(15) << timer_one.cpu_second << ",\n"; - ofs << indent << indent << indent << indent << "\"calls\": " << timer_one.calls << ",\n"; - ofs << indent << indent << indent << indent << "\"cpu_second_per_call\": " - << double_to_string(timer_one.cpu_second/timer_one.calls) << ",\n"; - ofs << indent << indent << indent << indent << "\"cpu_second_per_total\": " - << double_to_string(timer_one.cpu_second/timer_pool[""]["total"].cpu_second) << "\n"; - - if (order_b == timer_pool_A.second.size()) - { - ofs << indent << indent << indent << indent << "}\n"; - } + order_a++; + // if calss_name == "", it means total time, so we skip it + if (timer_pool_A.first == "") + { + continue; + } + int order_b = 0; + const std::string class_name = timer_pool_A.first; + ofs << indent << indent << "{\n"; + ofs << indent << indent << indent << "\"class_name\": \"" << class_name << "\",\n"; + ofs << indent << indent << indent << "\"sub\": [\n"; + for (auto& timer_pool_B: timer_pool_A.second) + { + order_b++; + const std::string name = timer_pool_B.first; + const Timer_One timer_one = timer_pool_B.second; + ofs << indent << indent << indent << indent << "{\n"; + ofs << indent << indent << indent << indent << "\"name\": \"" << name << "\",\n"; + ofs << indent << indent << indent << indent << "\"cpu_second\": " << std::setprecision (15) + << timer_one.cpu_second << ",\n"; + ofs << indent << indent << indent << indent << "\"calls\": " << timer_one.calls << ",\n"; + ofs << indent << indent << indent << indent + << "\"cpu_second_per_call\": " << double_to_string (timer_one.cpu_second / timer_one.calls) + << ",\n"; + ofs << indent << indent << indent << indent << "\"cpu_second_per_total\": " + << double_to_string (timer_one.cpu_second / timer_pool[""]["total"].cpu_second) << "\n"; + + if (order_b == timer_pool_A.second.size ()) + { + ofs << indent << indent << indent << indent << "}\n"; + } + else + { + ofs << indent << indent << indent << indent << "},\n"; + } + } + ofs << indent << indent << indent << "]\n"; + if (order_a == timer_pool.size ()) + { + ofs << indent << indent << "}\n"; + } else - { - ofs << indent << indent << indent << indent << "},\n"; - } + { + ofs << indent << indent << "},\n"; + } } - ofs << indent << indent << indent << "]\n"; - if (order_a == timer_pool.size()) - { - ofs << indent << indent << "}\n"; - } - else - { - ofs << indent << indent << "},\n"; - } - } ofs << indent << "]\n"; ofs << "}\n"; - ofs.close(); + ofs.close (); } -void timer::print_all(std::ofstream &ofs, const bool check_end) +void + timer::print_all (std::ofstream& ofs, const bool check_end) { constexpr double small = 0.1; // cpu = 10^6 // if want to print > 1s , set small = 10^6 - std::vector,Timer_One>> timer_pool_order; - for(auto &timer_pool_A : timer_pool) - { - const std::string class_name = timer_pool_A.first; - for(auto &timer_pool_B : timer_pool_A.second) + std::vector, Timer_One>> timer_pool_order; + for (auto& timer_pool_A: timer_pool) { - const std::string name = timer_pool_B.first; - const Timer_One &timer_one = timer_pool_B.second; - if(check_end && !timer_one.start_flag) - { throw std::runtime_error("timer::print_all " + class_name + "::" + name); } - if(timer_pool_order.size() < timer_one.order+1) - { - timer_pool_order.resize(timer_one.order+1); - } - //timer_pool_order[timer_one.order] = {{class_name, name}, timer_one}; //qianrui change it to make it compatible with old compiler version - timer_pool_order[timer_one.order] = std::pair, Timer_One> { - std::pair{class_name,name}, timer_one}; + const std::string class_name = timer_pool_A.first; + for (auto& timer_pool_B: timer_pool_A.second) + { + const std::string name = timer_pool_B.first; + const Timer_One& timer_one = timer_pool_B.second; + if (check_end && !timer_one.start_flag) + { + throw std::runtime_error ("timer::print_all " + class_name + "::" + name); + } + if (timer_pool_order.size () < timer_one.order + 1) + { + timer_pool_order.resize (timer_one.order + 1); + } + // timer_pool_order[timer_one.order] = {{class_name, name}, timer_one}; //qianrui change it to make + // it compatible with old compiler version + timer_pool_order[timer_one.order] = std::pair, Timer_One>{ + std::pair{class_name, name}, + timer_one}; + } } - } std::vector class_names; std::vector names; std::vector times; std::vector calls; std::vector avgs; std::vector pers; - for(auto &timer_pool_order_A : timer_pool_order) - { - const std::string &class_name = timer_pool_order_A.first.first; - const std::string &name = timer_pool_order_A.first.second; - const Timer_One &timer_one = timer_pool_order_A.second; - - if(timer_one.cpu_second < 0) - { - continue; - } - - // only print out timers that are larger than 1% - // mohan add 2025-03-09 - const double percentage_thr = 1.0; - const double percentage = timer_one.cpu_second / timer_pool_order[0].second.cpu_second * 100; - if(percentage titles = {"CLASS_NAME", "NAME", "TIME/s", "CALLS", "AVG/s", "PER/%"}; std::vector formats = {"%-10s", "%-10s", "%6.2f", "%8d", "%6.2f", "%6.2f"}; - FmtTable time_statistics(/*titles=*/titles, - /*nrows=*/pers.size(), - /*formats=*/formats, - /*indent=*/0, - /*align=*/{/*value*/FmtTable::Align::LEFT, /*title*/FmtTable::Align::CENTER}); + FmtTable time_statistics (/*titles=*/titles, + /*nrows=*/pers.size (), + /*formats=*/formats, + /*indent=*/0, + /*align=*/{/*value*/ FmtTable::Align::LEFT, /*title*/ FmtTable::Align::CENTER}); time_statistics << class_names << names << times << calls << avgs << pers; - const std::string table = "\n TIME STATISTICS\n" + time_statistics.str(); - std::cout< 0.1 s * * @param ofs The output file for print out timings */ - static void print_all(std::ofstream &ofs, const bool check_end); + static void print_all (std::ofstream& ofs, const bool check_end); /** * @brief Stop total time calculation, print total time until now, @@ -101,7 +104,7 @@ class timer * * @return long double */ - static long double print_until_now(void); + static long double print_until_now (); private: /** @@ -126,7 +129,7 @@ class timer * * @return double */ - static double cpu_time(void); + static double cpu_time (); }; } // namespace ModuleBase diff --git a/source/source_base/timer_wrapper.h b/source/source_base/timer_wrapper.h index 6da3f391e37..1de07e01a86 100644 --- a/source/source_base/timer_wrapper.h +++ b/source/source_base/timer_wrapper.h @@ -7,7 +7,8 @@ #include #endif -namespace ModuleBase { +namespace ModuleBase +{ /** * @brief Time point type that works in both MPI and non-MPI environments @@ -16,41 +17,46 @@ typedef double TimePoint; /** * @brief Get current time as a TimePoint - * + * * @return TimePoint Current time */ -inline TimePoint get_time() +inline TimePoint + get_time () { #ifdef __MPI int is_initialized = 0; - MPI_Initialized(&is_initialized); + MPI_Initialized (&is_initialized); if (is_initialized) - { - return MPI_Wtime(); - } + { + return MPI_Wtime (); + } else - { - return std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()).count() / 1e6; - } + { + return std::chrono::duration_cast ( + std::chrono::system_clock::now ().time_since_epoch ()) + .count () + / 1e6; + } #else - return std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()).count() / 1e6; + return std::chrono::duration_cast (std::chrono::system_clock::now ().time_since_epoch ()) + .count () + / 1e6; #endif } /** * @brief Calculate duration between two TimePoints in seconds - * + * * @param start Start time point * @param end End time point * @return double Duration in seconds */ -inline double get_duration(const TimePoint& start, const TimePoint& end) +inline double + get_duration (const TimePoint& start, const TimePoint& end) { return end - start; } -} +} // namespace ModuleBase #endif // TIMER_WRAPPER_H \ No newline at end of file diff --git a/source/source_base/tool_check.cpp b/source/source_base/tool_check.cpp index dab7dd5f213..a1bf7f62a3a 100644 --- a/source/source_base/tool_check.cpp +++ b/source/source_base/tool_check.cpp @@ -4,87 +4,91 @@ namespace ModuleBase { -void CHECK_NAME(std::ifstream &ifs,const std::string &name_in,bool quit) +void + CHECK_NAME (std::ifstream& ifs, const std::string& name_in, bool quit) { std::string name; ifs >> name; - if ( name != name_in) - { - if(quit) - { - //GlobalV::ofs_warning << "\n name = " <> v_in; - if( v!= v_in) - { - if(quit) - { - std::cout << "\n value = " << v_in; - std::cout << "\n should be = " << v; - WARNING_QUIT("CHECK_INT","Some parameter name is wrong!"); - } - else - { - std::cout <<"\n Can not match well: "<> v_in; + if (v != v_in) + { + if (quit) + { + std::cout << "\n value = " << v_in; + std::cout << "\n should be = " << v; + WARNING_QUIT ("CHECK_INT", "Some parameter name is wrong!"); + } + else + { + std::cout << "\n Can not match well: " << v_in << "(readin) " << v << std::endl; + } + } + return; } -void CHECK_DOUBLE(std::ifstream &ifs,const double &v,bool quit) +void + CHECK_DOUBLE (std::ifstream& ifs, const double& v, bool quit) { - const double tiny = 1.0e-5; - double v_in = 0.0; - ifs >> v_in; - if( fabs(v - v_in) > tiny ) - { - if(quit) - { - std::cout << " read in value = " << v_in << std::endl; - std::cout << " the value should be = " << v << std::endl; - WARNING_QUIT("CHECK_DOUBLE","the name of parameter wrong!"); - } - else - { - std::cout <<" can not match well (1.0e-5): "<< v_in <<"(readin) "<> v_in; + if (fabs (v - v_in) > tiny) + { + if (quit) + { + std::cout << " read in value = " << v_in << std::endl; + std::cout << " the value should be = " << v << std::endl; + WARNING_QUIT ("CHECK_DOUBLE", "the name of parameter wrong!"); + } + else + { + std::cout << " can not match well (1.0e-5): " << v_in << "(readin) " << v << std::endl; + } + } + return; } -void CHECK_STRING(std::ifstream &ifs,const std::string &v,bool quit) +void + CHECK_STRING (std::ifstream& ifs, const std::string& v, bool quit) { - std::string v_in; - ifs >> v_in; - if( v_in != v ) - { - if(quit) - { - std::cout << " read in value = " << v_in << std::endl; - std::cout << " the value should be = " << v << std::endl; - WARNING_QUIT("CHECK_STRING","the name of parameter wrong!"); - } - else - { - std::cout <<" can not match well : "<> v_in; + if (v_in != v) + { + if (quit) + { + std::cout << " read in value = " << v_in << std::endl; + std::cout << " the value should be = " << v << std::endl; + WARNING_QUIT ("CHECK_STRING", "the name of parameter wrong!"); + } + else + { + std::cout << " can not match well : " << v_in << "(readin) " << v << std::endl; + } + } + return; } -} +} // namespace ModuleBase diff --git a/source/source_base/tool_check.h b/source/source_base/tool_check.h index 580119e7763..b9eb3abeb7a 100644 --- a/source/source_base/tool_check.h +++ b/source/source_base/tool_check.h @@ -20,7 +20,7 @@ namespace ModuleBase * @param[in] name_in The name for checking * @param[in] quit Whether call WARNING_QUIT to quit or not */ -void CHECK_NAME(std::ifstream &ifs, const std::string &name_in, bool quit = true); +void CHECK_NAME (std::ifstream& ifs, const std::string& name_in, bool quit = true); /** * @brief Check the next input from ifs is integer. @@ -30,7 +30,7 @@ void CHECK_NAME(std::ifstream &ifs, const std::string &name_in, bool quit = true * @param[in] v The int variable for checking * @param[in] quit Whether call WARNING_QUIT to quit or not */ -void CHECK_INT(std::ifstream &ifs, const int &v, bool quit = true); +void CHECK_INT (std::ifstream& ifs, const int& v, bool quit = true); /** * @brief Check the next input from ifs is double. @@ -40,7 +40,7 @@ void CHECK_INT(std::ifstream &ifs, const int &v, bool quit = true); * @param[in] v The double variable for checking * @param[in] quit Whether call WARNING_QUIT to quit or not */ -void CHECK_DOUBLE(std::ifstream &ifs, const double &v, bool quit = true); +void CHECK_DOUBLE (std::ifstream& ifs, const double& v, bool quit = true); /** * @brief Check the next input from ifs is std::string. @@ -50,7 +50,7 @@ void CHECK_DOUBLE(std::ifstream &ifs, const double &v, bool quit = true); * @param[in] v The std::string variable for checking * @param[in] quit Whether call WARNING_QUIT to quit or not */ -void CHECK_STRING(std::ifstream &ifs, const std::string &v, bool quit = true); +void CHECK_STRING (std::ifstream& ifs, const std::string& v, bool quit = true); } // namespace ModuleBase #endif diff --git a/source/source_base/tool_quit.cpp b/source/source_base/tool_quit.cpp index 40943301b69..ec0728f90a4 100644 --- a/source/source_base/tool_quit.cpp +++ b/source/source_base/tool_quit.cpp @@ -19,7 +19,8 @@ namespace std::string g_quit_out_dir; } -void set_quit_out_dir(const std::string& dir) +void + set_quit_out_dir (const std::string& dir) { g_quit_out_dir = dir; } @@ -31,147 +32,158 @@ void set_quit_out_dir(const std::string& dir) // NAME : WARNING_QUIT( write information into // GlobalV::ofs_warning , and then quit) //========================================================== -void WARNING(const std::string &file,const std::string &description) +void + WARNING (const std::string& file, const std::string& description) { #ifdef __NORMAL #else - if (GlobalV::MY_RANK==0) - { -// std::cout << "\n "< @@ -15,94 +15,98 @@ namespace ModuleBase // Distributing 1d tasks by worker id //========================================================== template -inline void TASK_DIST_1D(int nworker, int iworker, T_task ntask, T_out& start, T_out& len) +inline void + TASK_DIST_1D (int nworker, int iworker, T_task ntask, T_out& start, T_out& len) { if (nworker == 1) - { - start = 0; - len = ntask; - } - else - { - const T_task tlen = ntask / nworker; - const T_task trem = ntask - tlen * nworker; - if (iworker < trem) { - start = tlen * iworker + iworker; - len = tlen + 1; + start = 0; + len = ntask; } - else + else { - start = tlen * iworker + trem; - len = tlen; + const T_task tlen = ntask / nworker; + const T_task trem = ntask - tlen * nworker; + if (iworker < trem) + { + start = tlen * iworker + iworker; + len = tlen + 1; + } + else + { + start = tlen * iworker + trem; + len = tlen; + } } - } } template -inline void BLOCK_TASK_DIST_1D(int nworker, int iworker, T_task ntask, T_task block_size, T_out& start, T_out& len) +inline void + BLOCK_TASK_DIST_1D (int nworker, int iworker, T_task ntask, T_task block_size, T_out& start, T_out& len) { if (nworker == 1) - { - start = 0; - len = ntask; - } - else - { - const T_task nblock = (ntask + block_size - 1) / block_size; - const T_task blen = nblock / nworker; - const T_task brem = nblock - blen * nworker; - if (iworker < brem) { - start = blen * iworker + iworker; - len = blen + 1; + start = 0; + len = ntask; } - else - { - start = blen * iworker + brem; - len = blen; - } - start *= block_size; - len *= block_size; - if (start > ntask) - { - start = ntask; - len = 0; - return; - } - if (start + len > ntask) + else { - len = ntask - start; + const T_task nblock = (ntask + block_size - 1) / block_size; + const T_task blen = nblock / nworker; + const T_task brem = nblock - blen * nworker; + if (iworker < brem) + { + start = blen * iworker + iworker; + len = blen + 1; + } + else + { + start = blen * iworker + brem; + len = blen; + } + start *= block_size; + len *= block_size; + if (start > ntask) + { + start = ntask; + len = 0; + return; + } + if (start + len > ntask) + { + len = ntask - start; + } } - } } -inline void OMP_PARALLEL(const std::function &f) +inline void + OMP_PARALLEL (const std::function& f) { #ifdef _OPENMP - #pragma omp parallel +#pragma omp parallel { - f(omp_get_num_threads(), omp_get_thread_num()); + f (omp_get_num_threads (), omp_get_thread_num ()); } #else - f(1, 0); + f (1, 0); #endif } -inline void TRY_OMP_PARALLEL(const std::function &f) +inline void + TRY_OMP_PARALLEL (const std::function& f) { #ifdef _OPENMP - if (!omp_in_parallel()) - { - OMP_PARALLEL(f); - } + if (!omp_in_parallel ()) + { + OMP_PARALLEL (f); + } else #endif - { - f(1, 0); - } + { + f (1, 0); + } } -} +} // namespace ModuleBase #endif diff --git a/source/source_base/tool_title.cpp b/source/source_base/tool_title.cpp index 6d2716670e1..7b7150927a0 100644 --- a/source/source_base/tool_title.cpp +++ b/source/source_base/tool_title.cpp @@ -15,44 +15,46 @@ namespace ModuleBase // NAME : TITLE( title for each function ) //========================================================== -void TITLE(const std::string &class_name,const std::string &function_name,const bool disable) +void + TITLE (const std::string& class_name, const std::string& function_name, const bool disable) { if (disable) - { - return; // no output - } + { + return; // no output + } #ifdef __NORMAL - std::cout<<" ==> "< " << class_name << "::" << function_name << "\t" + << ModuleBase::GlobalFunc::MemAvailable () / 1024.0 / 1024 << " GB\t" + << ModuleBase::timer::print_until_now () << " s" << std::endl; #else - if(GlobalV::ofs_running) // mohan add 2009-08-25 in case the function called before allocate GlobalV::ofs_running - { - GlobalV::ofs_running<<" ==> "< " << class_name << "::" << function_name << "\t" + << ModuleBase::GlobalFunc::MemAvailable () / 1024.0 / 1024 << " GB\t" + << ModuleBase::timer::print_until_now () << " s" << std::endl; + } #endif } -void TITLE(std::ofstream &ofs,const std::string &class_name,const std::string &function_name,const bool disable) +void + TITLE (std::ofstream& ofs, const std::string& class_name, const std::string& function_name, const bool disable) { if (disable) - { - return; // no output - } + { + return; // no output + } #ifdef __NORMAL - std::cout<<"\n\n ==> "< " << class_name << "::" << function_name << "\t" + << ModuleBase::GlobalFunc::MemAvailable () / 1024.0 / 1024 << " GB\t" + << ModuleBase::timer::print_until_now () << " s" << std::endl; #else - if(GlobalV::ofs_running) - { - ofs<<" ==> "< " << class_name << "::" << function_name << "\t" + << ModuleBase::GlobalFunc::MemAvailable () / 1024.0 / 1024 << " GB\t" + << ModuleBase::timer::print_until_now () << " s" << std::endl; + } #endif } -} +} // namespace ModuleBase diff --git a/source/source_base/tool_title.h b/source/source_base/tool_title.h index 1960ea400b3..ba5e5394bb6 100644 --- a/source/source_base/tool_title.h +++ b/source/source_base/tool_title.h @@ -1,5 +1,5 @@ -#ifndef TITLE_H -#define TITLE_H +#ifndef TITLE_H +#define TITLE_H #include #include @@ -13,25 +13,23 @@ namespace ModuleBase { -void TITLE( - const std::string &class_name, - const std::string &function_name = "", +void TITLE (const std::string& class_name, + const std::string& function_name = "", #ifndef __DEBUG - const bool disable=true + const bool disable = true #else - const bool disable=false + const bool disable = false #endif ); -void TITLE( - std::ofstream &ofs, - const std::string &class_name, - const std::string &function_name, +void TITLE (std::ofstream& ofs, + const std::string& class_name, + const std::string& function_name, #ifndef __DEBUG - const bool disable=true + const bool disable = true #else - const bool disable=false + const bool disable = false #endif ); -} +} // namespace ModuleBase #endif diff --git a/source/source_base/truncated_func.h b/source/source_base/truncated_func.h index 55ce64953ad..38a75477eee 100644 --- a/source/source_base/truncated_func.h +++ b/source/source_base/truncated_func.h @@ -20,13 +20,14 @@ namespace ModuleBase * @return FPTYPE The result of the exponential function. */ template -inline FPTYPE truncated_exp(FPTYPE x) +inline FPTYPE + truncated_exp (FPTYPE x) { - if (std::real(x) < -230.0) - { - return static_cast(0.0); - } - return ModuleBase::libm::exp(x); + if (std::real (x) < -230.0) + { + return static_cast (0.0); + } + return ModuleBase::libm::exp (x); } /** @@ -40,13 +41,14 @@ inline FPTYPE truncated_exp(FPTYPE x) * @return FPTYPE The result of the erfc function. */ template -inline FPTYPE truncated_erfc(FPTYPE x) +inline FPTYPE + truncated_erfc (FPTYPE x) { - if (std::real(x) > 20.0) - { - return static_cast(0.0); - } - return std::erfc(x); + if (std::real (x) > 20.0) + { + return static_cast (0.0); + } + return std::erfc (x); } /** @@ -68,49 +70,52 @@ inline FPTYPE truncated_erfc(FPTYPE x) * @param x The input value to be checked and possibly truncated. */ template -inline void truncated_underflow(FPTYPE& x) +inline void + truncated_underflow (FPTYPE& x) { - if (std::abs(x) < 1.0e-30) - { - x = static_cast(0.0); - } + if (std::abs (x) < 1.0e-30) + { + x = static_cast (0.0); + } } template <> -inline void truncated_underflow(double& x) +inline void + truncated_underflow (double& x) { - const uint64_t u = *reinterpret_cast(&x); + const uint64_t u = *reinterpret_cast (&x); // The exponent bits are 52-62 (11 bits). The bias is 1023. // 1e-30 corresponds to -100 in base-2 exponent roughly. // 923 = 1023 - 100. if (((u >> 52) & 0x7FF) <= 923) - { - x = 0.0; - } + { + x = 0.0; + } } template <> -inline void truncated_underflow(float& x) +inline void + truncated_underflow (float& x) { - const uint32_t u = *reinterpret_cast(&x); + const uint32_t u = *reinterpret_cast (&x); // The exponent bits are 23-30 (8 bits). The bias is 127. // 1e-30 corresponds to -100 in base-2 exponent roughly. // 27 = 127 - 100. if (((u >> 23) & 0xFF) <= 27) - { - x = 0.0f; - } + { + x = 0.0f; + } } template -inline void truncated_underflow(std::complex& x) +inline void + truncated_underflow (std::complex& x) { - T* ptr = reinterpret_cast(&x); - truncated_underflow(ptr[0]); - truncated_underflow(ptr[1]); + T* ptr = reinterpret_cast (&x); + truncated_underflow (ptr[0]); + truncated_underflow (ptr[1]); } - } // namespace ModuleBase #endif // MODULE_BASE_TRUNCATED_FUNC_H \ No newline at end of file diff --git a/source/source_base/vector3.h b/source/source_base/vector3.h index 5fb2bf3f7e1..de7e54426b9 100644 --- a/source/source_base/vector3.h +++ b/source/source_base/vector3.h @@ -12,15 +12,16 @@ namespace ModuleBase { - // Small epsilon value for numerical comparisons - constexpr double epsilon = 1e-10; +// Small epsilon value for numerical comparisons +constexpr double epsilon = 1e-10; /** * @brief 3 elements vector * * @tparam T */ -template class Vector3 +template +class Vector3 { public: T x; @@ -34,24 +35,17 @@ template class Vector3 * @param y1 * @param z1 */ - Vector3(const T &x1 = 0, const T &y1 = 0, const T &z1 = 0) - : x(x1), y(y1), z(z1) - {} - Vector3(const Vector3 &v) - : x(v.x), y(v.y), z(v.z) - {} - explicit Vector3(const std::array &v) - : x(v[0]), y(v[1]), z(v[2]) - {} + Vector3 (const T& x1 = 0, const T& y1 = 0, const T& z1 = 0) : x (x1), y (y1), z (z1) {} + Vector3 (const Vector3& v) : x (v.x), y (v.y), z (v.z) {} + explicit Vector3 (const std::array& v) : x (v[0]), y (v[1]), z (v[2]) {} template - explicit Vector3(const Vector3& other) - : x(static_cast(other.x)), y(static_cast(other.y)), z(static_cast(other.z)) - {} + explicit Vector3 (const Vector3& other) + : x (static_cast (other.x)), y (static_cast (other.y)), z (static_cast (other.z)) + { + } - Vector3(Vector3 &&v) noexcept - : x(v.x), y(v.y), z(v.z) - {} + Vector3 (Vector3&& v) noexcept : x (v.x), y (v.y), z (v.z) {} /** * @brief set a 3d vector @@ -60,7 +54,8 @@ template class Vector3 * @param y1 * @param z1 */ - void set(const T &x1, const T &y1, const T &z1) + void + set (const T& x1, const T& y1, const T& z1) { x = x1; y = y1; @@ -73,7 +68,8 @@ template class Vector3 * @param u * @return Vector3& */ - Vector3 &operator=(const Vector3 &u) + Vector3& + operator= (const Vector3& u) { x = u.x; y = u.y; @@ -81,7 +77,8 @@ template class Vector3 return *this; } - Vector3 &operator=(const T &u) + Vector3& + operator= (const T& u) { x = u; y = u; @@ -95,7 +92,8 @@ template class Vector3 * @param u * @return Vector3& */ - Vector3 &operator=(Vector3 &&u) noexcept + Vector3& + operator= (Vector3&& u) noexcept { x = u.x; y = u.y; @@ -109,7 +107,8 @@ template class Vector3 * @param u * @return Vector3& */ - Vector3 &operator+=(const Vector3 &u) + Vector3& + operator+= (const Vector3& u) { x += u.x; y += u.y; @@ -123,7 +122,8 @@ template class Vector3 * @param u * @return Vector3& */ - Vector3 &operator-=(const Vector3 &u) + Vector3& + operator-= (const Vector3& u) { x -= u.x; y -= u.y; @@ -137,7 +137,8 @@ template class Vector3 * @param s * @return Vector3& */ - Vector3 &operator*=(const T &s) + Vector3& + operator*= (const T& s) { x *= s; y *= s; @@ -151,9 +152,10 @@ template class Vector3 * @param s * @return Vector3& */ - Vector3 &operator/=(const T &s) + Vector3& + operator/= (const T& s) { - assert(s != 0); // Avoid division by zero + assert (s != 0); // Avoid division by zero x /= s; y /= s; z /= s; @@ -165,9 +167,10 @@ template class Vector3 * * @return Vector3 */ - Vector3 operator-() const + Vector3 + operator- () const { - return Vector3(-x, -y, -z); + return Vector3 (-x, -y, -z); } /** @@ -176,10 +179,11 @@ template class Vector3 * @param index * @return T */ - T operator[](int index) const + T + operator[] (int index) const { - assert(index >= 0 && index < 3); - //return (&x)[index]; // this is undefind behavior and breaks with icpx + assert (index >= 0 && index < 3); + // return (&x)[index]; // this is undefind behavior and breaks with icpx T const* ptr[3] = {&x, &y, &z}; return *ptr[index]; } @@ -190,10 +194,11 @@ template class Vector3 * @param index * @return T& */ - T &operator[](int index) + T& + operator[] (int index) { - assert(index >= 0 && index < 3); - //return (&x)[index]; // this is undefind behavior and breaks with icpx + assert (index >= 0 && index < 3); + // return (&x)[index]; // this is undefind behavior and breaks with icpx T* ptr[3] = {&x, &y, &z}; return *ptr[index]; } @@ -203,7 +208,8 @@ template class Vector3 * * @return T */ - inline T norm2(void) const + inline T + norm2 () const { return x * x + y * y + z * z; } @@ -213,9 +219,10 @@ template class Vector3 * * @return T */ - inline T norm(void) const + inline T + norm () const { - return sqrt(norm2()); + return sqrt (norm2 ()); } /** @@ -223,15 +230,16 @@ template class Vector3 * * @return Vector3& */ - Vector3 &normalize(void) + Vector3& + normalize () { - const T m = norm(); + const T m = norm (); if (m > epsilon) // Avoid division by zero - { - x /= m; - y /= m; - z /= m; - } + { + x /= m; + y /= m; + z /= m; + } return *this; } // Peize Lin update return 2019-09-08 @@ -240,7 +248,8 @@ template class Vector3 * * @return Vector3& */ - Vector3 &reverse(void) + Vector3& + reverse () { x = -x; y = -y; @@ -258,7 +267,7 @@ template class Vector3 * * @param precision The number of decimal places to display (must be positive, default: 5) */ - void print(const int precision = 5) const; + void print (const int precision = 5) const; }; /** @@ -268,9 +277,11 @@ template class Vector3 * @param[in] v * @return Vector3 */ -template inline Vector3 operator+(const Vector3 &u, const Vector3 &v) +template +inline Vector3 + operator+ (const Vector3& u, const Vector3& v) { - return Vector3(u.x + v.x, u.y + v.y, u.z + v.z); + return Vector3 (u.x + v.x, u.y + v.y, u.z + v.z); } /** @@ -280,9 +291,11 @@ template inline Vector3 operator+(const Vector3 &u, const Vector * @param[in] v * @return Vector3 */ -template inline Vector3 operator-(const Vector3 &u, const Vector3 &v) +template +inline Vector3 + operator- (const Vector3& u, const Vector3& v) { - return Vector3(u.x - v.x, u.y - v.y, u.z - v.z); + return Vector3 (u.x - v.x, u.y - v.y, u.z - v.z); } /** @@ -293,7 +306,9 @@ template inline Vector3 operator-(const Vector3 &u, const Vector * @param v * @return template */ -template inline T operator*(const Vector3 &u, const Vector3 &v) +template +inline T + operator* (const Vector3& u, const Vector3& v) { return (u.x * v.x + u.y * v.y + u.z * v.z); } @@ -305,9 +320,11 @@ template inline T operator*(const Vector3 &u, const Vector3 &v) * @param[in] u * @return Vector3 */ -template inline Vector3 operator*(const T &s, const Vector3 &u) +template +inline Vector3 + operator* (const T& s, const Vector3& u) { - return Vector3(u.x * s, u.y * s, u.z * s); + return Vector3 (u.x * s, u.y * s, u.z * s); } /** @@ -317,9 +334,11 @@ template inline Vector3 operator*(const T &s, const Vector3 &u) * @param s * @return Vector3 */ -template inline Vector3 operator*(const Vector3 &u, const T &s) +template +inline Vector3 + operator* (const Vector3& u, const T& s) { - return Vector3(u.x * s, u.y * s, u.z * s); + return Vector3 (u.x * s, u.y * s, u.z * s); } // mohan add 2009-5-10 /** @@ -330,9 +349,11 @@ template inline Vector3 operator*(const Vector3 &u, const T &s) * @param s * @return Vector3 */ -template inline Vector3 operator/(const Vector3 &u, const T &s) +template +inline Vector3 + operator/ (const Vector3& u, const T& s) { - return Vector3(u.x / s, u.y / s, u.z / s); + return Vector3 (u.x / s, u.y / s, u.z / s); } /** @@ -343,9 +364,11 @@ template inline Vector3 operator/(const Vector3 &u, const T &s) * @param u * @return Vector3 */ -template inline Vector3 operator/(const T &s, const Vector3 &u) +template +inline Vector3 + operator/ (const T& s, const Vector3& u) { - return Vector3(s/u.x, s/u.y, s/u.z); + return Vector3 (s / u.x, s / u.y, s / u.z); } /** @@ -356,7 +379,9 @@ template inline Vector3 operator/(const T &s, const Vector3 &u) * @return T * @note u.v=(ux*vx)+(uy*vy)+(uz*vz) */ -template inline T dot(const Vector3 &u, const Vector3 &v) +template +inline T + dot (const Vector3& u, const Vector3& v) { return (u.x * v.x + u.y * v.y + u.z * v.z); } @@ -373,9 +398,11 @@ template inline T dot(const Vector3 &u, const Vector3 &v) * | vx vy vz | * u.v=(uy*vz-uz*vy)i+(-ux*vz+uz*vx)j+(ux*vy-uy*vx)k */ -template inline Vector3 operator^(const Vector3 &u, const Vector3 &v) +template +inline Vector3 + operator^ (const Vector3& u, const Vector3& v) { - return Vector3(u.y * v.z - u.z * v.y, -u.x * v.z + u.z * v.x, u.x * v.y - u.y * v.x); + return Vector3 (u.y * v.z - u.z * v.y, -u.x * v.z + u.z * v.x, u.x * v.y - u.y * v.x); } /** @@ -390,9 +417,11 @@ template inline Vector3 operator^(const Vector3 &u, const Vector * | vx vy vz | * u.v=(uy*vz-uz*vy)i+(-ux*vz+uz*vx)j+(ux*vy-uy*vx)k */ -template inline Vector3 cross(const Vector3 &u, const Vector3 &v) +template +inline Vector3 + cross (const Vector3& u, const Vector3& v) { - return Vector3(u.y * v.z - u.z * v.y, -u.x * v.z + u.z * v.x, u.x * v.y - u.y * v.x); + return Vector3 (u.y * v.z - u.z * v.y, -u.x * v.z + u.z * v.x, u.x * v.y - u.y * v.x); } // s = u.(v x w) // template T TripleScalarProduct(Vector3 u, Vector3 v, Vector3 w) @@ -403,43 +432,49 @@ template inline Vector3 cross(const Vector3 &u, const Vector3 // } // Overload the < operator for sorting -template bool operator<(const Vector3 &u, const Vector3 &v) +template +bool + operator< (const Vector3& u, const Vector3& v) { if (u.x < v.x) - { - return true; - } + { + return true; + } if (u.x > v.x) - { - return false; - } + { + return false; + } if (u.y < v.y) - { - return true; - } + { + return true; + } if (u.y > v.y) - { - return false; - } + { + return false; + } if (u.z < v.z) - { - return true; - } + { + return true; + } return false; } // whether m1 != m2 -template inline bool operator!=(const Vector3 &u, const Vector3 &v) +template +inline bool + operator!= (const Vector3& u, const Vector3& v) { return !(u == v); } // whether u == v -template inline bool operator==(const Vector3 &u, const Vector3 &v) +template +inline bool + operator== (const Vector3& u, const Vector3& v) { if (u.x == v.x && u.y == v.y && u.z == v.z) - { - return true; - } + { + return true; + } return false; } @@ -448,12 +483,14 @@ template inline bool operator==(const Vector3 &u, const Vector3 * * @param precision The number of decimal places to display (must be positive, default: 5) */ -template void Vector3::print(const int precision) const +template +void + Vector3::print (const int precision) const { // Ensure precision is non-negative int valid_precision = precision > 0 ? precision : 5; - std::cout.precision(valid_precision); - std::cout << "(" << std::setw(10) << x << "," << std::setw(10) << y << "," << std::setw(10) << z << ")" + std::cout.precision (valid_precision); + std::cout << "(" << std::setw (10) << x << "," << std::setw (10) << y << "," << std::setw (10) << z << ")" << std::endl; return; } @@ -467,9 +504,11 @@ template void Vector3::print(const int precision) const * @param[in] u * @return std::ostream& */ -template static std::ostream &operator<<(std::ostream &os, const Vector3 &u) +template +static std::ostream& + operator<< (std::ostream& os, const Vector3& u) { - os << "(" << std::setw(10) << u.x << "," << std::setw(10) << u.y << "," << std::setw(10) << u.z << ")"; + os << "(" << std::setw (10) << u.x << "," << std::setw (10) << u.y << "," << std::setw (10) << u.z << ")"; return os; } diff --git a/source/source_base/ylm.cpp b/source/source_base/ylm.cpp index 62d4d66707a..43b2aaff273 100644 --- a/source/source_base/ylm.cpp +++ b/source/source_base/ylm.cpp @@ -14,2046 +14,2172 @@ namespace ModuleBase int Ylm::nlm = 0; // here Lmax == max angular momentum + 1 -void Ylm::get_ylm_real( const int &Lmax, const ModuleBase::Vector3 &vec, double ylmr[] ) +void + Ylm::get_ylm_real (const int& Lmax, const ModuleBase::Vector3& vec, double ylmr[]) { - //ModuleBase::timer::start("Ylm","get_ylm_real"); - //1e-9 is too large - const double cut0 = 1e-12; - // allocate space. - Ylm::nlm = Lmax * Lmax; - if(Lmax==1) - { - for(int i=0; i 1.0 - cut0) - { - cost = sgn(cost) * (1.0 - cut0); - // std::cout << "\n" << "cost = " << cost << std::endl; - } - sint = sqrt(1.0 - cost*cost); - - if(vec.x > cut0) - { - phi = std::atan( vec.y / vec.x ); - } - else if( vec.x < -cut0 ) - { - phi = std::atan( vec.y / vec.x ) + ModuleBase::PI; - } - else - { - phi = ModuleBase::PI_HALF * ((vec.y >= 0.0) ? 1.0 : -1.0); - } - - //=============================== - // NAME : p(Legendre Polynomials) - //=============================== - double p[20][20]; - assert(Lmax <= 20); - - int m=0; - double x1=0.0; // x2; - int lm = -1; // must initialized! - - for (int l=0; l(l)) ;//liaochen modify 2009-09-06 - if (l%2 == 1) - { - x1 = -x1; - } - p[l][l] = x1; - } // end if - - // Y_lm, m = 0 - ++lm; - ylmr[lm] = c*p[0][l]; - - for(m=1;m<=l;m++) - { - // Y_lm, m > 0 - const double same = c * sqrt - ( - static_cast( Ylm::Fact(l - m)) / - static_cast( Ylm::Fact(l + m)) - ) - *ModuleBase::SQRT2; - - ++lm; - ylmr[lm] = same * p[m][l] * cos(m * phi); - - // Y_lm, m < 0 - ++lm; - ylmr[lm] = same * p[m][l] * sin(m * phi); - } - }// end do - - //ModuleBase::timer::end("Ylm", "get_ylm_real"); - return; + // ModuleBase::timer::start("Ylm","get_ylm_real"); + // 1e-9 is too large + const double cut0 = 1e-12; + // allocate space. + Ylm::nlm = Lmax * Lmax; + if (Lmax == 1) + { + for (int i = 0; i < Ylm::nlm; i++) + { + ylmr[i] = ModuleBase::SQRT_INVERSE_FOUR_PI; + } + } + + double cost = 0.0; // must initialized. + double sint = cut0; + double phi = 0.0; + + double vnorm = vec.norm (); + + if (vnorm < cut0) + { + vnorm += cut0; + } + + cost = vec.z / vnorm; + + if (fabs (cost) > 1.0 - cut0) + { + cost = sgn (cost) * (1.0 - cut0); + // std::cout << "\n" << "cost = " << cost << std::endl; + } + sint = sqrt (1.0 - cost * cost); + + if (vec.x > cut0) + { + phi = std::atan (vec.y / vec.x); + } + else if (vec.x < -cut0) + { + phi = std::atan (vec.y / vec.x) + ModuleBase::PI; + } + else + { + phi = ModuleBase::PI_HALF * ((vec.y >= 0.0) ? 1.0 : -1.0); + } + + //=============================== + // NAME : p(Legendre Polynomials) + //=============================== + double p[20][20]; + assert (Lmax <= 20); + + int m = 0; + double x1 = 0.0; // x2; + int lm = -1; // must initialized! + + for (int l = 0; l < Lmax; l++) + { + const double c = sqrt ((2 * l + 1) / ModuleBase::FOUR_PI); + if (l == 0) + { + p[0][0] = 1.0; + } + else if (l == 1) + { + p[0][1] = cost; + p[1][1] = -sint; + } + else + { + const int l1 = l - 1; + const int l2 = l - 2; + const int l3 = 2 * l - 1; + // recursion on l for P(:,l,m) + for (m = 0; m <= l2; m++) // do m = 0, l - 2//mohan modify 2007-10-13 + { + p[m][l] = (cost * l3 * p[m][l1] - (l1 + m) * p[m][l2]) / (l - m); + } // end do + p[l1][l] = cost * l3 * p[l1][l1]; + x1 = Ylm::Semi_Fact (l3) * pow (sint, static_cast (l)); // liaochen modify 2009-09-06 + if (l % 2 == 1) + { + x1 = -x1; + } + p[l][l] = x1; + } // end if + + // Y_lm, m = 0 + ++lm; + ylmr[lm] = c * p[0][l]; + + for (m = 1; m <= l; m++) + { + // Y_lm, m > 0 + const double same + = c * sqrt (static_cast (Ylm::Fact (l - m)) / static_cast (Ylm::Fact (l + m))) + * ModuleBase::SQRT2; + + ++lm; + ylmr[lm] = same * p[m][l] * cos (m * phi); + + // Y_lm, m < 0 + ++lm; + ylmr[lm] = same * p[m][l] * sin (m * phi); + } + } // end do + + // ModuleBase::timer::end("Ylm", "get_ylm_real"); + return; } -void Ylm::get_ylm_real( const int &Lmax, const ModuleBase::Vector3 &vec, double ylmr[], double dylmdr[][3] ) +void + Ylm::get_ylm_real (const int& Lmax, const ModuleBase::Vector3& vec, double ylmr[], double dylmdr[][3]) { - //1e-9 is too large - const double cut0 = 1e-12; - // allocate space. - Ylm::nlm = Lmax * Lmax; - if(Lmax==1) - { - for(int i=0; i 1.0-cut0) - { - cost = sgn(cost) * (1.0 - cut0); - } - sint = sqrt(1.0 - cost*cost); - - if(vec.x > cut0) - { - phi = std::atan( vec.y / vec.x ); - } - else if( vec.x < -cut0 ) - { - phi = std::atan( vec.y / vec.x ) + ModuleBase::PI; - } - else - { - phi = ModuleBase::PI_HALF * ((vec.y >= 0.0) ? 1.0 : -1.0); - } - - //=============================== - // NAME : p(Legendre Polynomials) - //=============================== - double p[20][20]; - double dp[20][20]; - assert(Lmax <= 20); - - int m = 0; - int lm = -1; // must initialized! - for (int l=0; l(l)) ; - if (l%2 == 1) - { - p[l][l] = -p[l][l]; - } - } - - for(m=0; m <= l; m++) - { - if( m == l ) - { - dp[l][l] = l * cost * p[l][l] / sint; - } - else - { - dp[m][l] = (l * cost * p[m][l] - (l+m) * p[m][l-1]) / sint; - } - } - - // Y_lm, m = 0 - ++lm; - ylmr[lm] = c*p[0][l]; - - dylmdr[lm][0] = c * dp[0][l] * cost * cos(phi) / vnorm; - dylmdr[lm][1] = c * dp[0][l] * cost * sin(phi) / vnorm; - dylmdr[lm][2] = -c * dp[0][l] * sint / vnorm; - - for(m=1; m <= l; m++) - { - // Y_lm, m > 0 - const double same = c * sqrt - ( - static_cast( Ylm::Fact(l - m)) / - static_cast( Ylm::Fact(l + m)) - ) - *ModuleBase::SQRT2; - - ++lm; - ylmr[lm] = same * p[m][l] * cos(m * phi); - - dylmdr[lm][0] = same * dp[m][l] * cos(m * phi) * cost * cos(phi) / vnorm - + same * p[m][l] * m * sin(m * phi) * sin(phi) / sint / vnorm; - dylmdr[lm][1] = same * dp[m][l] * cos(m * phi) * cost * sin(phi) / vnorm - - same * p[m][l] * m * sin(m * phi) * cos(phi) / sint / vnorm; - dylmdr[lm][2] = -same * dp[m][l] * cos(m * phi) * sint / vnorm; - - // Y_lm, m < 0 - ++lm; - ylmr[lm] = same * p[m][l] * sin(m * phi); - - dylmdr[lm][0] = same * dp[m][l] * sin(m * phi) * cost * cos(phi) / vnorm - - same * p[m][l] * m * cos(m * phi) * sin(phi) / sint / vnorm; - dylmdr[lm][1] = same * dp[m][l] * sin(m * phi) * cost * sin(phi) / vnorm - + same * p[m][l] * m * cos(m * phi) * cos(phi) / sint / vnorm; - dylmdr[lm][2] = -same * dp[m][l] * sin(m * phi) * sint / vnorm; - } - }// end do - - return; + // 1e-9 is too large + const double cut0 = 1e-12; + // allocate space. + Ylm::nlm = Lmax * Lmax; + if (Lmax == 1) + { + for (int i = 0; i < Ylm::nlm; i++) + { + ylmr[i] = ModuleBase::SQRT_INVERSE_FOUR_PI; + for (int j = 0; j < 3; j++) + { + dylmdr[i][j] = 0.0; + } + } + } + + double cost = 0.0; // must initialized. + double sint = 0.0; + double phi = 0.0; + + double vnorm = vec.norm (); + + if (vnorm < cut0) + { + vnorm += cut0; + } + + cost = vec.z / vnorm; + + if (fabs (cost) > 1.0 - cut0) + { + cost = sgn (cost) * (1.0 - cut0); + } + sint = sqrt (1.0 - cost * cost); + + if (vec.x > cut0) + { + phi = std::atan (vec.y / vec.x); + } + else if (vec.x < -cut0) + { + phi = std::atan (vec.y / vec.x) + ModuleBase::PI; + } + else + { + phi = ModuleBase::PI_HALF * ((vec.y >= 0.0) ? 1.0 : -1.0); + } + + //=============================== + // NAME : p(Legendre Polynomials) + //=============================== + double p[20][20]; + double dp[20][20]; + assert (Lmax <= 20); + + int m = 0; + int lm = -1; // must initialized! + for (int l = 0; l < Lmax; l++) + { + const double c = sqrt ((2 * l + 1) / ModuleBase::FOUR_PI); + if (l == 0) + { + p[0][0] = 1.0; + dp[0][0] = 0.0; + } + else if (l == 1) + { + p[0][1] = cost; + dp[0][1] = -sint; + + p[1][1] = -sint; + dp[1][1] = -cost; + } + else + { + const int l1 = l - 1; + const int l2 = l - 2; + const int l3 = 2 * l - 1; + // recursion on l for P(:,l,m) + for (m = 0; m <= l2; m++) // do m = 0, l - 2//mohan modify 2007-10-13 + { + p[m][l] = (cost * l3 * p[m][l1] - (l1 + m) * p[m][l2]) / (l - m); + } + + p[l1][l] = cost * l3 * p[l1][l1]; + + p[l][l] = Ylm::Semi_Fact (l3) * pow (sint, static_cast (l)); + if (l % 2 == 1) + { + p[l][l] = -p[l][l]; + } + } + + for (m = 0; m <= l; m++) + { + if (m == l) + { + dp[l][l] = l * cost * p[l][l] / sint; + } + else + { + dp[m][l] = (l * cost * p[m][l] - (l + m) * p[m][l - 1]) / sint; + } + } + + // Y_lm, m = 0 + ++lm; + ylmr[lm] = c * p[0][l]; + + dylmdr[lm][0] = c * dp[0][l] * cost * cos (phi) / vnorm; + dylmdr[lm][1] = c * dp[0][l] * cost * sin (phi) / vnorm; + dylmdr[lm][2] = -c * dp[0][l] * sint / vnorm; + + for (m = 1; m <= l; m++) + { + // Y_lm, m > 0 + const double same + = c * sqrt (static_cast (Ylm::Fact (l - m)) / static_cast (Ylm::Fact (l + m))) + * ModuleBase::SQRT2; + + ++lm; + ylmr[lm] = same * p[m][l] * cos (m * phi); + + dylmdr[lm][0] = same * dp[m][l] * cos (m * phi) * cost * cos (phi) / vnorm + + same * p[m][l] * m * sin (m * phi) * sin (phi) / sint / vnorm; + dylmdr[lm][1] = same * dp[m][l] * cos (m * phi) * cost * sin (phi) / vnorm + - same * p[m][l] * m * sin (m * phi) * cos (phi) / sint / vnorm; + dylmdr[lm][2] = -same * dp[m][l] * cos (m * phi) * sint / vnorm; + + // Y_lm, m < 0 + ++lm; + ylmr[lm] = same * p[m][l] * sin (m * phi); + + dylmdr[lm][0] = same * dp[m][l] * sin (m * phi) * cost * cos (phi) / vnorm + - same * p[m][l] * m * cos (m * phi) * sin (phi) / sint / vnorm; + dylmdr[lm][1] = same * dp[m][l] * sin (m * phi) * cost * sin (phi) / vnorm + + same * p[m][l] * m * cos (m * phi) * cos (phi) / sint / vnorm; + dylmdr[lm][2] = -same * dp[m][l] * sin (m * phi) * sint / vnorm; + } + } // end do + + return; } /*************************** * Solid Spherical Harmonic * *************************/ -void Ylm::rlylm -( - const int Lmax, //max momentum of l + 1 - const double x, - const double y, - const double z, - double rly[] -) +void + Ylm::rlylm (const int Lmax, // max momentum of l + 1 + const double x, + const double y, + const double z, + double rly[]) { -// ModuleBase::TITLE("Ylm","rlylm"); -// ModuleBase::timer::start("Ylm","rlylm"); - - int MaxL = Lmax - 1; - - assert(MaxL >= 0); - - //get xy_dependence - assert(MaxL <= 19); - - double Am[20]; - double Bm[20]; - -// ZEROS(Am, 20); -// ZEROS(Bm, 20); - - double x2, x3, x4, x5; - double y2, y3, y4, y5; - - x2 = x * x; - x3 = x2 * x; - x4 = x3 * x; - x5 = x4 * x; - - y2 = y * y; - y3 = y2 * y; - y4 = y3 * y; - y5 = y4 * y; - - //x-y dependence - //Am - //Bm - for(int im = 0; im < MaxL+1; im++) - { - if(im == 0) - { - Am[0] = 1.0; - Bm[0] = 0.0; - } - else if(im == 1) - { - Am[1] = x; - Bm[1] = y; - } - else if(im == 2) - { - Am[2] = x2- y2; - Bm[2] = 2.0 * x * y; - } - else if(im == 3) - { - Am[3] = x3 - 3.0 * x * y2; - Bm[3] = 3.0 * x2 * y - y3; - } - else if(im == 4) - { - Am[4] = x4 - 6.0 * x2 * y2 + y4; - Bm[4] = 4.0 * (x3 * y - x * y3); - } - else if(im == 5) - { - Am[5] = x5 - 10.0 * x3 * y2 + 5.0 * x * y4; - Bm[5] = 5.0 * x4 * y - 10.0 * x2 * y3 + y5; - } - else - { - for(int ip = 0; ip <= im; ip++) - { - double aux = Fact(im) / Fact(ip) / Fact(im - ip); - Am[im] += aux * pow(x, ip) * pow(y, im-ip) * cos( (im-ip) * ModuleBase::PI / 2.0 ); - Bm[im] += aux * pow(x, ip) * pow(y, im-ip) * sin( (im-ip) * ModuleBase::PI / 2.0 ); - } - } - } - - //z dependence - double zdep[20][20]; - -// for(int il = 0; il < 20; il++) -// { -// ZEROS(zdep[il], 20); -// } - - double z2 = z * z; - double z3 = z2 * z; - double z4 = z3 * z; - //double z5 = z4 * z; - - double r = sqrt(x*x + y*y + z*z); - double r2 = r * r; - double r3 = r2 * r; - double r4 = r3 * r; - - for(int il = 0; il < MaxL+1; il++) - { - if(il == 0) - { - zdep[0][0] = 1.0; - } - else if(il == 1) - { - zdep[1][0] = z; - zdep[1][1] = 1.0; - } - else if(il == 2) - { - zdep[2][0] = 0.5 * (3.0 * z2 - r2); - zdep[2][1] = sqrt(3.0) * z; - zdep[2][2] = sqrt(3.0) * 0.5; - } - else if(il == 3) - { - zdep[3][0] = 2.5 * z3 - 1.5 * z * r2; - zdep[3][1] = 0.25 * sqrt(6.0) * (5.0 * z2 - r2); - zdep[3][2] = 0.5 * sqrt(15.0) * z; - zdep[3][3] = 0.25 * sqrt(10.0); - } - else if(il == 4) - { - zdep[4][0] = 0.125 * (35.0 * z4 - 30.0 * r2 * z2 + 3.0 * r4); - zdep[4][1] = sqrt(10.0) * 0.25 * z * (7.0 * z2 - 3.0 * r2); - zdep[4][2] = sqrt(5.0) * 0.25 * (7.0 * z2 - r2); - zdep[4][3] = sqrt(70.0) * 0.25 * z; - zdep[4][4] = sqrt(35.0) * 0.125; - } - else if(il == 5) - { - zdep[5][0] = 0.125 * z *( 63.0 * z4 - 70.0 * z2 * r2 + 15.0 * r4); - zdep[5][1] = 0.125 * sqrt(15.0) * (21.0 * z4 - 14.0 * z2 * r2 + r4); - zdep[5][2] = 0.25 * sqrt(105.0) * z * (3.0 * z2 - r2); - zdep[5][3] = 0.0625 * sqrt(70.0) * (9.0 * z2 - r2); - zdep[5][4] = 0.375 * sqrt(35.0) * z; - zdep[5][5] = 0.1875 * sqrt(14.0); - } - else - { - for(int im = 0; im <= il; im++) - { - int kmax = static_cast( (il - im) / 2 ); - for(int ik = 0; ik <= kmax; ik++) - { - int twok = 2 * ik; - - double gamma = 0.0; - double aux0, aux1, aux2, aux3; - - aux0 = pow(-1.0, ik) * pow(2.0, -il); - aux1 = Fact(il) / Fact(ik) / Fact(il-ik); - aux2 = Fact(2*il - twok) / Fact(il) / Fact(il - twok); - aux3 = Fact(il - twok) / Fact(il - twok - im); - - gamma = aux0 * aux1 * aux2 * aux3; - - assert(il - twok - im >= 0); - zdep[il][im] += pow(r, twok) * pow(z, il-twok-im) * gamma; - } - - if(im >= 1) - { - zdep[il][im] *= sqrt(2 * Fact(il - im) / Fact(il + im)); - } - } - } - } - - //calc - int ic = 0; - for(int il = 0; il <= MaxL; il++) - { - double fac = sqrt( (2.0 * il + 1.0) / ModuleBase::FOUR_PI ); - - //m=0 - rly[ic] = Am[0] * zdep[il][0] * fac; - - ic++; - - //m ! = 0 - for(int im = 1; im <= il; im++) - { - //m>0 - rly[ic] = Am[im] * zdep[il][im] * pow(-1.0, im) * fac; - - ic++; - - //m<0 - rly[ic] = Bm[im] * zdep[il][im] * pow(-1.0, im) * fac; - - ic++; - } - } - -// ModuleBase::timer::end("Ylm", "rlylm"); - return; + // ModuleBase::TITLE("Ylm","rlylm"); + // ModuleBase::timer::start("Ylm","rlylm"); + + int MaxL = Lmax - 1; + + assert (MaxL >= 0); + + // get xy_dependence + assert (MaxL <= 19); + + double Am[20]; + double Bm[20]; + + // ZEROS(Am, 20); + // ZEROS(Bm, 20); + + double x2, x3, x4, x5; + double y2, y3, y4, y5; + + x2 = x * x; + x3 = x2 * x; + x4 = x3 * x; + x5 = x4 * x; + + y2 = y * y; + y3 = y2 * y; + y4 = y3 * y; + y5 = y4 * y; + + // x-y dependence + // Am + // Bm + for (int im = 0; im < MaxL + 1; im++) + { + if (im == 0) + { + Am[0] = 1.0; + Bm[0] = 0.0; + } + else if (im == 1) + { + Am[1] = x; + Bm[1] = y; + } + else if (im == 2) + { + Am[2] = x2 - y2; + Bm[2] = 2.0 * x * y; + } + else if (im == 3) + { + Am[3] = x3 - 3.0 * x * y2; + Bm[3] = 3.0 * x2 * y - y3; + } + else if (im == 4) + { + Am[4] = x4 - 6.0 * x2 * y2 + y4; + Bm[4] = 4.0 * (x3 * y - x * y3); + } + else if (im == 5) + { + Am[5] = x5 - 10.0 * x3 * y2 + 5.0 * x * y4; + Bm[5] = 5.0 * x4 * y - 10.0 * x2 * y3 + y5; + } + else + { + for (int ip = 0; ip <= im; ip++) + { + double aux = Fact (im) / Fact (ip) / Fact (im - ip); + Am[im] += aux * pow (x, ip) * pow (y, im - ip) * cos ((im - ip) * ModuleBase::PI / 2.0); + Bm[im] += aux * pow (x, ip) * pow (y, im - ip) * sin ((im - ip) * ModuleBase::PI / 2.0); + } + } + } + + // z dependence + double zdep[20][20]; + + // for(int il = 0; il < 20; il++) + // { + // ZEROS(zdep[il], 20); + // } + + double z2 = z * z; + double z3 = z2 * z; + double z4 = z3 * z; + // double z5 = z4 * z; + + double r = sqrt (x * x + y * y + z * z); + double r2 = r * r; + double r3 = r2 * r; + double r4 = r3 * r; + + for (int il = 0; il < MaxL + 1; il++) + { + if (il == 0) + { + zdep[0][0] = 1.0; + } + else if (il == 1) + { + zdep[1][0] = z; + zdep[1][1] = 1.0; + } + else if (il == 2) + { + zdep[2][0] = 0.5 * (3.0 * z2 - r2); + zdep[2][1] = sqrt (3.0) * z; + zdep[2][2] = sqrt (3.0) * 0.5; + } + else if (il == 3) + { + zdep[3][0] = 2.5 * z3 - 1.5 * z * r2; + zdep[3][1] = 0.25 * sqrt (6.0) * (5.0 * z2 - r2); + zdep[3][2] = 0.5 * sqrt (15.0) * z; + zdep[3][3] = 0.25 * sqrt (10.0); + } + else if (il == 4) + { + zdep[4][0] = 0.125 * (35.0 * z4 - 30.0 * r2 * z2 + 3.0 * r4); + zdep[4][1] = sqrt (10.0) * 0.25 * z * (7.0 * z2 - 3.0 * r2); + zdep[4][2] = sqrt (5.0) * 0.25 * (7.0 * z2 - r2); + zdep[4][3] = sqrt (70.0) * 0.25 * z; + zdep[4][4] = sqrt (35.0) * 0.125; + } + else if (il == 5) + { + zdep[5][0] = 0.125 * z * (63.0 * z4 - 70.0 * z2 * r2 + 15.0 * r4); + zdep[5][1] = 0.125 * sqrt (15.0) * (21.0 * z4 - 14.0 * z2 * r2 + r4); + zdep[5][2] = 0.25 * sqrt (105.0) * z * (3.0 * z2 - r2); + zdep[5][3] = 0.0625 * sqrt (70.0) * (9.0 * z2 - r2); + zdep[5][4] = 0.375 * sqrt (35.0) * z; + zdep[5][5] = 0.1875 * sqrt (14.0); + } + else + { + for (int im = 0; im <= il; im++) + { + int kmax = static_cast ((il - im) / 2); + for (int ik = 0; ik <= kmax; ik++) + { + int twok = 2 * ik; + + double gamma = 0.0; + double aux0, aux1, aux2, aux3; + + aux0 = pow (-1.0, ik) * pow (2.0, -il); + aux1 = Fact (il) / Fact (ik) / Fact (il - ik); + aux2 = Fact (2 * il - twok) / Fact (il) / Fact (il - twok); + aux3 = Fact (il - twok) / Fact (il - twok - im); + + gamma = aux0 * aux1 * aux2 * aux3; + + assert (il - twok - im >= 0); + zdep[il][im] += pow (r, twok) * pow (z, il - twok - im) * gamma; + } + + if (im >= 1) + { + zdep[il][im] *= sqrt (2 * Fact (il - im) / Fact (il + im)); + } + } + } + } + + // calc + int ic = 0; + for (int il = 0; il <= MaxL; il++) + { + double fac = sqrt ((2.0 * il + 1.0) / ModuleBase::FOUR_PI); + + // m=0 + rly[ic] = Am[0] * zdep[il][0] * fac; + + ic++; + + // m ! = 0 + for (int im = 1; im <= il; im++) + { + // m>0 + rly[ic] = Am[im] * zdep[il][im] * pow (-1.0, im) * fac; + + ic++; + + // m<0 + rly[ic] = Bm[im] * zdep[il][im] * pow (-1.0, im) * fac; + + ic++; + } + } + + // ModuleBase::timer::end("Ylm", "rlylm"); + return; } -//return ylm, not rlylm -void Ylm::sph_harm -( - const int Lmax, //max momentum of l - const double xdr, - const double ydr, - const double zdr, - std::vector &rly -) +// return ylm, not rlylm +void + Ylm::sph_harm (const int Lmax, // max momentum of l + const double xdr, + const double ydr, + const double zdr, + std::vector& rly) { - rly.resize( (Lmax+1)*(Lmax+1) ); - - //begin calculation - /*************************** - L = 0 - ***************************/ - rly[0] = ylmcoef[0]; //l=0, m=0 - if (Lmax == 0) return; - - /*************************** - L = 1 - ***************************/ - rly[1] = ylmcoef[1]*zdr; //l=1, m=0 - rly[2] = -ylmcoef[1]*xdr; //l=1, m=1 - rly[3] = -ylmcoef[1]*ydr; //l=1, m=-1 - if (Lmax == 1) return; - - /*************************** - L = 2 - ***************************/ - rly[4] = ylmcoef[2]*zdr*rly[1]-ylmcoef[3]*rly[0];//l=2, m=0 - - double tmp0 = ylmcoef[4]*zdr; - rly[5] = tmp0*rly[2];//l=2,m=1 - rly[6] = tmp0*rly[3];//l=2,m=-1 - - double tmp2 = ylmcoef[4]*xdr; - rly[7]= ylmcoef[5]*rly[4]-ylmcoef[6]*rly[0] - tmp2*rly[2];//l=2,m=2 - rly[8] = -tmp2*rly[3]; -// rly[8] = tmp1+tmp2*rly[3];//l=2,m=-2 - if (Lmax == 2) return; - - /*************************** - L = 3 - ***************************/ - rly[9] = ylmcoef[7]*zdr*rly[4]-ylmcoef[8]*rly[1]; //l=3, m=0 - - double tmp3 = ylmcoef[9]*zdr; - rly[10] = tmp3*rly[5]-ylmcoef[10]*rly[2];//l=3,m=1 - rly[11] = tmp3*rly[6]-ylmcoef[10]*rly[3];//l=3,m=-1 - - double tmp4 = ylmcoef[11]*zdr; - rly[12] = tmp4*rly[7];//l=3,m=2 - rly[13] = tmp4*rly[8];//l=3,m=-2 - - double tmp5 = ylmcoef[14]*xdr; - rly[14] = ylmcoef[12]*rly[10]-ylmcoef[13]*rly[2]-tmp5*rly[7];//l=3,m=3 - rly[15] = ylmcoef[12]*rly[11]-ylmcoef[13]*rly[3]-tmp5*rly[8];//l=3,m=-3 - if (Lmax == 3) return; - - /*************************** - L = 4 - ***************************/ - rly[16] = ylmcoef[15]*zdr*rly[9]-ylmcoef[16]*rly[4];//l=4,m=0 - - double tmp6 = ylmcoef[17]*zdr; - rly[17] = tmp6*rly[10]-ylmcoef[18]*rly[5];//l=4,m=1 - rly[18] = tmp6*rly[11]-ylmcoef[18]*rly[6];//l=4,m=-1 - - double tmp7 = ylmcoef[19]*zdr; - rly[19] = tmp7*rly[12]-ylmcoef[20]*rly[7];//l=4,m=2 - rly[20] = tmp7*rly[13]-ylmcoef[20]*rly[8];//l=4,m=-2 - - double tmp8 = 3.0*zdr; - rly[21] = tmp8*rly[14];//l=4,m=3 - rly[22] = tmp8*rly[15];//l=4,m=-3 - - double tmp9 = ylmcoef[23]*xdr; - rly[23] = ylmcoef[21]*rly[19]-ylmcoef[22]*rly[7]-tmp9*rly[14];//l=4,m=4 - rly[24] = ylmcoef[21]*rly[20]-ylmcoef[22]*rly[8]-tmp9*rly[15];//l=4,m=-4 - if (Lmax == 4) return; - - /*************************** - L = 5 - ***************************/ - rly[25] = ylmcoef[24]*zdr*rly[16]-ylmcoef[25]*rly[9];//l=5,m=0 - - double tmp10 = ylmcoef[26]*zdr; - rly[26] = tmp10*rly[17]-ylmcoef[27]*rly[10];//l=5,m=1 - rly[27] = tmp10*rly[18]-ylmcoef[27]*rly[11];//l=5,m=-1 - - double tmp11 = ylmcoef[28]*zdr; - rly[28] = tmp11*rly[19]-ylmcoef[29]*rly[12];//l=5,m=2 - rly[29] = tmp11*rly[20]-ylmcoef[29]*rly[13];//l=5,m=-2 - - double tmp12 = ylmcoef[30]*zdr; - rly[30] = tmp12*rly[21]-ylmcoef[31]*rly[14];//l=5,m=3 - rly[31] = tmp12*rly[22]-ylmcoef[31]*rly[15];//l=5,m=-3 - - double tmp13 = ylmcoef[32]*zdr; - rly[32] = tmp13*rly[23];//l=5,m=4 - rly[33] = tmp13*rly[24];//l=5,m=-4 - - double tmp14 = ylmcoef[35]*xdr; - rly[34] = ylmcoef[33]*rly[30]-ylmcoef[34]*rly[14]-tmp14*rly[23];//l=5,m=5 - rly[35] = ylmcoef[33]*rly[31]-ylmcoef[34]*rly[15]-tmp14*rly[24];//l=5,m=-5 - if (Lmax == 5) return; - - //if Lmax > 5 - for (int il = 6; il <= Lmax; il++) - { - int istart = il*il; - int istart1 = (il-1)*(il-1); - int istart2 = (il-2)*(il-2); - - double fac2 = sqrt(4.0*istart-1.0); - double fac4 = sqrt(4.0*istart1-1.0); - - for (int im = 0; im < 2*il-1; im++) - { - int imm = (im+1)/2; -// if (im % 2 == 0) imm *= -1; - - rly[istart+im] = fac2/sqrt((double)istart-imm*imm)* - (zdr*rly[istart1+im] - sqrt((double)istart1-imm*imm)/fac4*rly[istart2+im]); - } - - double bl1 = sqrt(2.0*il/(2.0*il+1.0)); - double bl2 = sqrt((2.0*il-2.0)/(2.0*il-1.0)); - double bl3 = sqrt(2.0)/fac2; - - rly[istart+2*il-1] = (bl3*rly[istart+2*il-5]-bl2*rly[istart2+2*il-5]-2.0*xdr*rly[istart1+2*il-3]) / bl1; - rly[istart+2*il] = (bl3*rly[istart+2*il-4]-bl2*rly[istart2+2*il-4]-2.0*xdr*rly[istart1+2*il-2]) / bl1; - } - - - return; + rly.resize ((Lmax + 1) * (Lmax + 1)); + + // begin calculation + /*************************** + L = 0 + ***************************/ + rly[0] = ylmcoef[0]; // l=0, m=0 + if (Lmax == 0) + { + return; + } + + /*************************** + L = 1 + ***************************/ + rly[1] = ylmcoef[1] * zdr; // l=1, m=0 + rly[2] = -ylmcoef[1] * xdr; // l=1, m=1 + rly[3] = -ylmcoef[1] * ydr; // l=1, m=-1 + if (Lmax == 1) + { + return; + } + + /*************************** + L = 2 + ***************************/ + rly[4] = ylmcoef[2] * zdr * rly[1] - ylmcoef[3] * rly[0]; // l=2, m=0 + + double tmp0 = ylmcoef[4] * zdr; + rly[5] = tmp0 * rly[2]; // l=2,m=1 + rly[6] = tmp0 * rly[3]; // l=2,m=-1 + + double tmp2 = ylmcoef[4] * xdr; + rly[7] = ylmcoef[5] * rly[4] - ylmcoef[6] * rly[0] - tmp2 * rly[2]; // l=2,m=2 + rly[8] = -tmp2 * rly[3]; + // rly[8] = tmp1+tmp2*rly[3];//l=2,m=-2 + if (Lmax == 2) + { + return; + } + + /*************************** + L = 3 + ***************************/ + rly[9] = ylmcoef[7] * zdr * rly[4] - ylmcoef[8] * rly[1]; // l=3, m=0 + + double tmp3 = ylmcoef[9] * zdr; + rly[10] = tmp3 * rly[5] - ylmcoef[10] * rly[2]; // l=3,m=1 + rly[11] = tmp3 * rly[6] - ylmcoef[10] * rly[3]; // l=3,m=-1 + + double tmp4 = ylmcoef[11] * zdr; + rly[12] = tmp4 * rly[7]; // l=3,m=2 + rly[13] = tmp4 * rly[8]; // l=3,m=-2 + + double tmp5 = ylmcoef[14] * xdr; + rly[14] = ylmcoef[12] * rly[10] - ylmcoef[13] * rly[2] - tmp5 * rly[7]; // l=3,m=3 + rly[15] = ylmcoef[12] * rly[11] - ylmcoef[13] * rly[3] - tmp5 * rly[8]; // l=3,m=-3 + if (Lmax == 3) + { + return; + } + + /*************************** + L = 4 + ***************************/ + rly[16] = ylmcoef[15] * zdr * rly[9] - ylmcoef[16] * rly[4]; // l=4,m=0 + + double tmp6 = ylmcoef[17] * zdr; + rly[17] = tmp6 * rly[10] - ylmcoef[18] * rly[5]; // l=4,m=1 + rly[18] = tmp6 * rly[11] - ylmcoef[18] * rly[6]; // l=4,m=-1 + + double tmp7 = ylmcoef[19] * zdr; + rly[19] = tmp7 * rly[12] - ylmcoef[20] * rly[7]; // l=4,m=2 + rly[20] = tmp7 * rly[13] - ylmcoef[20] * rly[8]; // l=4,m=-2 + + double tmp8 = 3.0 * zdr; + rly[21] = tmp8 * rly[14]; // l=4,m=3 + rly[22] = tmp8 * rly[15]; // l=4,m=-3 + + double tmp9 = ylmcoef[23] * xdr; + rly[23] = ylmcoef[21] * rly[19] - ylmcoef[22] * rly[7] - tmp9 * rly[14]; // l=4,m=4 + rly[24] = ylmcoef[21] * rly[20] - ylmcoef[22] * rly[8] - tmp9 * rly[15]; // l=4,m=-4 + if (Lmax == 4) + { + return; + } + + /*************************** + L = 5 + ***************************/ + rly[25] = ylmcoef[24] * zdr * rly[16] - ylmcoef[25] * rly[9]; // l=5,m=0 + + double tmp10 = ylmcoef[26] * zdr; + rly[26] = tmp10 * rly[17] - ylmcoef[27] * rly[10]; // l=5,m=1 + rly[27] = tmp10 * rly[18] - ylmcoef[27] * rly[11]; // l=5,m=-1 + + double tmp11 = ylmcoef[28] * zdr; + rly[28] = tmp11 * rly[19] - ylmcoef[29] * rly[12]; // l=5,m=2 + rly[29] = tmp11 * rly[20] - ylmcoef[29] * rly[13]; // l=5,m=-2 + + double tmp12 = ylmcoef[30] * zdr; + rly[30] = tmp12 * rly[21] - ylmcoef[31] * rly[14]; // l=5,m=3 + rly[31] = tmp12 * rly[22] - ylmcoef[31] * rly[15]; // l=5,m=-3 + + double tmp13 = ylmcoef[32] * zdr; + rly[32] = tmp13 * rly[23]; // l=5,m=4 + rly[33] = tmp13 * rly[24]; // l=5,m=-4 + + double tmp14 = ylmcoef[35] * xdr; + rly[34] = ylmcoef[33] * rly[30] - ylmcoef[34] * rly[14] - tmp14 * rly[23]; // l=5,m=5 + rly[35] = ylmcoef[33] * rly[31] - ylmcoef[34] * rly[15] - tmp14 * rly[24]; // l=5,m=-5 + if (Lmax == 5) + { + return; + } + + // if Lmax > 5 + for (int il = 6; il <= Lmax; il++) + { + int istart = il * il; + int istart1 = (il - 1) * (il - 1); + int istart2 = (il - 2) * (il - 2); + + double fac2 = sqrt (4.0 * istart - 1.0); + double fac4 = sqrt (4.0 * istart1 - 1.0); + + for (int im = 0; im < 2 * il - 1; im++) + { + int imm = (im + 1) / 2; + // if (im % 2 == 0) imm *= -1; + + rly[istart + im] + = fac2 / sqrt ((double)istart - imm * imm) + * (zdr * rly[istart1 + im] - sqrt ((double)istart1 - imm * imm) / fac4 * rly[istart2 + im]); + } + + double bl1 = sqrt (2.0 * il / (2.0 * il + 1.0)); + double bl2 = sqrt ((2.0 * il - 2.0) / (2.0 * il - 1.0)); + double bl3 = sqrt (2.0) / fac2; + + rly[istart + 2 * il - 1] = (bl3 * rly[istart + 2 * il - 5] - bl2 * rly[istart2 + 2 * il - 5] + - 2.0 * xdr * rly[istart1 + 2 * il - 3]) + / bl1; + rly[istart + 2 * il] = (bl3 * rly[istart + 2 * il - 4] - bl2 * rly[istart2 + 2 * il - 4] + - 2.0 * xdr * rly[istart1 + 2 * il - 2]) + / bl1; + } + + return; } // Peize Lin change rly 2016-08-26 -void Ylm::rl_sph_harm -( - const int Lmax, //max momentum of L - const double x, - const double y, - const double z, - std::vector& rly -) +void + Ylm::rl_sph_harm (const int Lmax, // max momentum of L + const double x, + const double y, + const double z, + std::vector& rly) { - rly.resize( (Lmax+1)*(Lmax+1) ); - - double radius2 = x*x+y*y+z*z; - - //begin calculation - /*************************** - L = 0 - ***************************/ - rly[0] = ylmcoef[0]; //l=0, m=0 - if (Lmax == 0) return; - - /*************************** - L = 1 - ***************************/ - rly[1] = ylmcoef[1]*z; //l=1, m=0 - rly[2] = -ylmcoef[1]*x; //l=1, m=1 - rly[3] = -ylmcoef[1]*y; //l=1, m=-1 - if (Lmax == 1) return; - - /*************************** - L = 2 - ***************************/ - rly[4] = ylmcoef[2]*z*rly[1]-ylmcoef[3]*rly[0]*radius2;//l=2, m=0 - - double tmp0 = ylmcoef[4]*z; - rly[5] = tmp0*rly[2];//l=2,m=1 - rly[6] = tmp0*rly[3];//l=2,m=-1 - - double tmp2 = ylmcoef[4]*x; - rly[7]= ylmcoef[5]*rly[4]-ylmcoef[6]*rly[0]*radius2 - tmp2*rly[2];//l=2,m=2 - rly[8] = -tmp2*rly[3]; -// rly[8] = tmp1+tmp2*rly[3];//l=2,m=-2 - if (Lmax == 2) return; - - /*************************** - L = 3 - ***************************/ - rly[9] = ylmcoef[7]*z*rly[4]-ylmcoef[8]*rly[1]*radius2; //l=3, m=0 - - double tmp3 = ylmcoef[9]*z; - rly[10] = tmp3*rly[5]-ylmcoef[10]*rly[2]*radius2;//l=3,m=1 - rly[11] = tmp3*rly[6]-ylmcoef[10]*rly[3]*radius2;//l=3,m=-1 - - double tmp4 = ylmcoef[11]*z; - rly[12] = tmp4*rly[7];//l=3,m=2 - rly[13] = tmp4*rly[8];//l=3,m=-2 - - double tmp5 = ylmcoef[14]*x; - rly[14] = ylmcoef[12]*rly[10]-ylmcoef[13]*rly[2]*radius2-tmp5*rly[7];//l=3,m=3 - rly[15] = ylmcoef[12]*rly[11]-ylmcoef[13]*rly[3]*radius2-tmp5*rly[8];//l=3,m=-3 - if (Lmax == 3) return; - - /*************************** - L = 4 - ***************************/ - rly[16] = ylmcoef[15]*z*rly[9]-ylmcoef[16]*rly[4]*radius2;//l=4,m=0 - - double tmp6 = ylmcoef[17]*z; - rly[17] = tmp6*rly[10]-ylmcoef[18]*rly[5]*radius2;//l=4,m=1 - rly[18] = tmp6*rly[11]-ylmcoef[18]*rly[6]*radius2;//l=4,m=-1 - - double tmp7 = ylmcoef[19]*z; - rly[19] = tmp7*rly[12]-ylmcoef[20]*rly[7]*radius2;//l=4,m=2 - rly[20] = tmp7*rly[13]-ylmcoef[20]*rly[8]*radius2;//l=4,m=-2 - - double tmp8 = 3.0*z; - rly[21] = tmp8*rly[14];//l=4,m=3 - rly[22] = tmp8*rly[15];//l=4,m=-3 - - double tmp9 = ylmcoef[23]*x; - rly[23] = ylmcoef[21]*rly[19]-ylmcoef[22]*rly[7]*radius2-tmp9*rly[14];//l=4,m=4 - rly[24] = ylmcoef[21]*rly[20]-ylmcoef[22]*rly[8]*radius2-tmp9*rly[15];//l=4,m=-4 - if (Lmax == 4) return; - - /*************************** - L = 5 - ***************************/ - rly[25] = ylmcoef[24]*z*rly[16]-ylmcoef[25]*rly[9]*radius2;//l=5,m=0 - - double tmp10 = ylmcoef[26]*z; - rly[26] = tmp10*rly[17]-ylmcoef[27]*rly[10]*radius2;//l=5,m=1 - rly[27] = tmp10*rly[18]-ylmcoef[27]*rly[11]*radius2;//l=5,m=-1 - - double tmp11 = ylmcoef[28]*z; - rly[28] = tmp11*rly[19]-ylmcoef[29]*rly[12]*radius2;//l=5,m=2 - rly[29] = tmp11*rly[20]-ylmcoef[29]*rly[13]*radius2;//l=5,m=-2 - - double tmp12 = ylmcoef[30]*z; - rly[30] = tmp12*rly[21]-ylmcoef[31]*rly[14]*radius2;//l=5,m=3 - rly[31] = tmp12*rly[22]-ylmcoef[31]*rly[15]*radius2;//l=5,m=-3 - - double tmp13 = ylmcoef[32]*z; - rly[32] = tmp13*rly[23];//l=5,m=4 - rly[33] = tmp13*rly[24];//l=5,m=-4 - - double tmp14 = ylmcoef[35]*x; - rly[34] = ylmcoef[33]*rly[30]-ylmcoef[34]*rly[14]*radius2-tmp14*rly[23];//l=5,m=5 - rly[35] = ylmcoef[33]*rly[31]-ylmcoef[34]*rly[15]*radius2-tmp14*rly[24];//l=5,m=-5 - if (Lmax == 5) return; - - //if Lmax > 5 - for (int il = 6; il <= Lmax; il++) - { - int istart = il*il; - int istart1 = (il-1)*(il-1); - int istart2 = (il-2)*(il-2); - - double fac2 = sqrt(4.0*istart-1); - double fac4 = sqrt(4.0*istart1-1); - - for (int im = 0; im < 2*il-1; im++) - { - int imm = (im+1)/2; -// if (im % 2 == 0) imm *= -1; - - rly[istart+im] = fac2/sqrt((double)istart-imm*imm)* - (z*rly[istart1+im] - sqrt((double)istart1-imm*imm)/fac4*rly[istart2+im]*radius2); - } - - double bl1 = sqrt(2.0*il/(2.0*il+1.0)); - double bl2 = sqrt((2.0*il-2.0)/(2.0*il-1.0)); - double bl3 = sqrt(2.0)/fac2; - - rly[istart+2*il-1] = (bl3*rly[istart+2*il-5]-bl2*rly[istart2+2*il-5]*radius2-2.0*x*rly[istart1+2*il-3]) / bl1; - rly[istart+2*il] = (bl3*rly[istart+2*il-4]-bl2*rly[istart2+2*il-4]*radius2-2.0*x*rly[istart1+2*il-2]) / bl1; - } - - return; + rly.resize ((Lmax + 1) * (Lmax + 1)); + + double radius2 = x * x + y * y + z * z; + + // begin calculation + /*************************** + L = 0 + ***************************/ + rly[0] = ylmcoef[0]; // l=0, m=0 + if (Lmax == 0) + { + return; + } + + /*************************** + L = 1 + ***************************/ + rly[1] = ylmcoef[1] * z; // l=1, m=0 + rly[2] = -ylmcoef[1] * x; // l=1, m=1 + rly[3] = -ylmcoef[1] * y; // l=1, m=-1 + if (Lmax == 1) + { + return; + } + + /*************************** + L = 2 + ***************************/ + rly[4] = ylmcoef[2] * z * rly[1] - ylmcoef[3] * rly[0] * radius2; // l=2, m=0 + + double tmp0 = ylmcoef[4] * z; + rly[5] = tmp0 * rly[2]; // l=2,m=1 + rly[6] = tmp0 * rly[3]; // l=2,m=-1 + + double tmp2 = ylmcoef[4] * x; + rly[7] = ylmcoef[5] * rly[4] - ylmcoef[6] * rly[0] * radius2 - tmp2 * rly[2]; // l=2,m=2 + rly[8] = -tmp2 * rly[3]; + // rly[8] = tmp1+tmp2*rly[3];//l=2,m=-2 + if (Lmax == 2) + { + return; + } + + /*************************** + L = 3 + ***************************/ + rly[9] = ylmcoef[7] * z * rly[4] - ylmcoef[8] * rly[1] * radius2; // l=3, m=0 + + double tmp3 = ylmcoef[9] * z; + rly[10] = tmp3 * rly[5] - ylmcoef[10] * rly[2] * radius2; // l=3,m=1 + rly[11] = tmp3 * rly[6] - ylmcoef[10] * rly[3] * radius2; // l=3,m=-1 + + double tmp4 = ylmcoef[11] * z; + rly[12] = tmp4 * rly[7]; // l=3,m=2 + rly[13] = tmp4 * rly[8]; // l=3,m=-2 + + double tmp5 = ylmcoef[14] * x; + rly[14] = ylmcoef[12] * rly[10] - ylmcoef[13] * rly[2] * radius2 - tmp5 * rly[7]; // l=3,m=3 + rly[15] = ylmcoef[12] * rly[11] - ylmcoef[13] * rly[3] * radius2 - tmp5 * rly[8]; // l=3,m=-3 + if (Lmax == 3) + { + return; + } + + /*************************** + L = 4 + ***************************/ + rly[16] = ylmcoef[15] * z * rly[9] - ylmcoef[16] * rly[4] * radius2; // l=4,m=0 + + double tmp6 = ylmcoef[17] * z; + rly[17] = tmp6 * rly[10] - ylmcoef[18] * rly[5] * radius2; // l=4,m=1 + rly[18] = tmp6 * rly[11] - ylmcoef[18] * rly[6] * radius2; // l=4,m=-1 + + double tmp7 = ylmcoef[19] * z; + rly[19] = tmp7 * rly[12] - ylmcoef[20] * rly[7] * radius2; // l=4,m=2 + rly[20] = tmp7 * rly[13] - ylmcoef[20] * rly[8] * radius2; // l=4,m=-2 + + double tmp8 = 3.0 * z; + rly[21] = tmp8 * rly[14]; // l=4,m=3 + rly[22] = tmp8 * rly[15]; // l=4,m=-3 + + double tmp9 = ylmcoef[23] * x; + rly[23] = ylmcoef[21] * rly[19] - ylmcoef[22] * rly[7] * radius2 - tmp9 * rly[14]; // l=4,m=4 + rly[24] = ylmcoef[21] * rly[20] - ylmcoef[22] * rly[8] * radius2 - tmp9 * rly[15]; // l=4,m=-4 + if (Lmax == 4) + { + return; + } + + /*************************** + L = 5 + ***************************/ + rly[25] = ylmcoef[24] * z * rly[16] - ylmcoef[25] * rly[9] * radius2; // l=5,m=0 + + double tmp10 = ylmcoef[26] * z; + rly[26] = tmp10 * rly[17] - ylmcoef[27] * rly[10] * radius2; // l=5,m=1 + rly[27] = tmp10 * rly[18] - ylmcoef[27] * rly[11] * radius2; // l=5,m=-1 + + double tmp11 = ylmcoef[28] * z; + rly[28] = tmp11 * rly[19] - ylmcoef[29] * rly[12] * radius2; // l=5,m=2 + rly[29] = tmp11 * rly[20] - ylmcoef[29] * rly[13] * radius2; // l=5,m=-2 + + double tmp12 = ylmcoef[30] * z; + rly[30] = tmp12 * rly[21] - ylmcoef[31] * rly[14] * radius2; // l=5,m=3 + rly[31] = tmp12 * rly[22] - ylmcoef[31] * rly[15] * radius2; // l=5,m=-3 + + double tmp13 = ylmcoef[32] * z; + rly[32] = tmp13 * rly[23]; // l=5,m=4 + rly[33] = tmp13 * rly[24]; // l=5,m=-4 + + double tmp14 = ylmcoef[35] * x; + rly[34] = ylmcoef[33] * rly[30] - ylmcoef[34] * rly[14] * radius2 - tmp14 * rly[23]; // l=5,m=5 + rly[35] = ylmcoef[33] * rly[31] - ylmcoef[34] * rly[15] * radius2 - tmp14 * rly[24]; // l=5,m=-5 + if (Lmax == 5) + { + return; + } + + // if Lmax > 5 + for (int il = 6; il <= Lmax; il++) + { + int istart = il * il; + int istart1 = (il - 1) * (il - 1); + int istart2 = (il - 2) * (il - 2); + + double fac2 = sqrt (4.0 * istart - 1); + double fac4 = sqrt (4.0 * istart1 - 1); + + for (int im = 0; im < 2 * il - 1; im++) + { + int imm = (im + 1) / 2; + // if (im % 2 == 0) imm *= -1; + + rly[istart + im] = fac2 / sqrt ((double)istart - imm * imm) + * (z * rly[istart1 + im] + - sqrt ((double)istart1 - imm * imm) / fac4 * rly[istart2 + im] * radius2); + } + + double bl1 = sqrt (2.0 * il / (2.0 * il + 1.0)); + double bl2 = sqrt ((2.0 * il - 2.0) / (2.0 * il - 1.0)); + double bl3 = sqrt (2.0) / fac2; + + rly[istart + 2 * il - 1] = (bl3 * rly[istart + 2 * il - 5] - bl2 * rly[istart2 + 2 * il - 5] * radius2 + - 2.0 * x * rly[istart1 + 2 * il - 3]) + / bl1; + rly[istart + 2 * il] = (bl3 * rly[istart + 2 * il - 4] - bl2 * rly[istart2 + 2 * il - 4] * radius2 + - 2.0 * x * rly[istart1 + 2 * il - 2]) + / bl1; + } + + return; } -void Ylm::grad_rl_sph_harm -( - const int Lmax, //max momentum of L - const double x, - const double y, - const double z, - double* rly, - double* grly_flat -) +void + Ylm::grad_rl_sph_harm (const int Lmax, // max momentum of L + const double x, + const double y, + const double z, + double* rly, + double* grly_flat) { - // Alias the flat buffer as a pointer-to-array-of-3-doubles so the body - // below can continue to use the natural grly[lm][xyz] indexing without - // any performance penalty — the memory layout is unchanged. - double (*grly)[3] = reinterpret_cast(grly_flat); - double radius2 = x*x+y*y+z*z; - double tx = 2.0*x; - double ty = 2.0*y; - double tz = 2.0*z; - - //begin calculation - /*************************** - L = 0 - ***************************/ - rly[0] = ylmcoef[0]; //l=0, m=0 - grly[0][0] = grly[0][1] = grly[0][2] = 0.0; - if (Lmax == 0) return; - - /*************************** - L = 1 - ***************************/ - rly[1] = ylmcoef[1]*z; //l=1, m=0 - grly[1][0] = grly[1][1] = 0.0; - grly[1][2] = ylmcoef[1]; - - rly[2] = -ylmcoef[1]*x; //l=1, m=1 - grly[2][1] = grly[2][2] = 0.0; - grly[2][0] = -ylmcoef[1]; - - rly[3] = -ylmcoef[1]*y; //l=1, m=-1 - grly[3][0] = grly[3][2] = 0.0; - grly[3][1] = -ylmcoef[1]; - - if (Lmax == 1) return; - - /*************************** - L = 2 - ***************************/ - rly[4] = ylmcoef[2]*z*rly[1]-ylmcoef[3]*rly[0]*radius2;//l=2, m=0 - grly[4][0] = ylmcoef[2]*z*grly[1][0]-ylmcoef[3]*(grly[0][0]*radius2+rly[0]*tx);//l=2, m=0 - grly[4][1] = ylmcoef[2]*z*grly[1][1]-ylmcoef[3]*(grly[0][1]*radius2+rly[0]*ty);//l=2, m=0 - grly[4][2] = ylmcoef[2]*(z*grly[1][2]+rly[1])-ylmcoef[3]*(grly[0][2]*radius2+rly[0]*tz);//l=2, m=0 - - - double tmp0 = ylmcoef[4]*z; - rly[5] = tmp0*rly[2];//l=2,m=1 - grly[5][0] = tmp0*grly[2][0]; - grly[5][1] = tmp0*grly[2][1]; - grly[5][2] = ylmcoef[4]*(rly[2]+z*grly[2][2]); - - rly[6] = tmp0*rly[3];//l=2,m=-1 - grly[6][0] = tmp0*grly[3][0]; - grly[6][1] = tmp0*grly[3][1]; - grly[6][2] = ylmcoef[4]*(rly[3]+z*grly[3][2]); - - double tmp2 = ylmcoef[4]*x; - rly[7]= ylmcoef[5]*rly[4]-ylmcoef[6]*rly[0]*radius2 - tmp2*rly[2];//l=2,m=2 - grly[7][0] = ylmcoef[5]*grly[4][0]-ylmcoef[6]*(rly[0]*tx+grly[0][0]*radius2)-ylmcoef[4]*(x*grly[2][0]+rly[2]); - -// std::cout << "\np1 = "<< ylmcoef[5]*grly[4][0] << " p2 = " << -ylmcoef[6]*rly[0]*tx -// << " p3 = " << -ylmcoef[4]*x*grly[2][0] << " p4 = " << -ylmcoef[4]*rly[2] << std::endl; - - grly[7][1] = ylmcoef[5]*grly[4][1]-ylmcoef[6]*(rly[0]*ty+grly[0][1]*radius2)-tmp2*grly[2][1]; - grly[7][2] = ylmcoef[5]*grly[4][2]-ylmcoef[6]*(rly[0]*tz+grly[0][2]*radius2)-tmp2*grly[2][2]; - - rly[8] = -tmp2*rly[3]; - grly[8][0] = -ylmcoef[4]*(rly[3]+x*grly[3][0]); - grly[8][1] = -tmp2*grly[3][1]; - grly[8][2] = -tmp2*grly[3][2]; -// rly[8] = tmp1+tmp2*rly[3];//l=2,m=-2 - if (Lmax == 2) return; - - /*************************** - L = 3 - ***************************/ - rly[9] = ylmcoef[7]*z*rly[4]-ylmcoef[8]*rly[1]*radius2; //l=3, m=0 - grly[9][0] = ylmcoef[7]*z*grly[4][0]-ylmcoef[8]*(rly[1]*tx+grly[1][0]*radius2); - grly[9][1] = ylmcoef[7]*z*grly[4][1]-ylmcoef[8]*(rly[1]*ty+grly[1][1]*radius2); - grly[9][2] = ylmcoef[7]*(rly[4]+z*grly[4][2])-ylmcoef[8]*(rly[1]*tz+grly[1][2]*radius2); - - double tmp3 = ylmcoef[9]*z; - rly[10] = tmp3*rly[5]-ylmcoef[10]*rly[2]*radius2;//l=3,m=1 - grly[10][0] = tmp3*grly[5][0]-ylmcoef[10]*(grly[2][0]*radius2+rly[2]*tx); - grly[10][1] = tmp3*grly[5][1]-ylmcoef[10]*(grly[2][1]*radius2+rly[2]*ty); - grly[10][2] = ylmcoef[9]*(z*grly[5][2]+rly[5])-ylmcoef[10]*(grly[2][2]*radius2+rly[2]*tz); - - rly[11] = tmp3*rly[6]-ylmcoef[10]*rly[3]*radius2;//l=3,m=-1 - grly[11][0] = tmp3*grly[6][0]-ylmcoef[10]*(grly[3][0]*radius2+rly[3]*tx); - grly[11][1] = tmp3*grly[6][1]-ylmcoef[10]*(grly[3][1]*radius2+rly[3]*ty); - grly[11][2] = ylmcoef[9]*(z*grly[6][2]+rly[6])-ylmcoef[10]*(grly[3][2]*radius2+rly[3]*tz); - - double tmp4 = ylmcoef[11]*z; - rly[12] = tmp4*rly[7];//l=3,m=2 - grly[12][0] = tmp4*grly[7][0]; - grly[12][1] = tmp4*grly[7][1]; - grly[12][2] = ylmcoef[11]*(z*grly[7][2]+rly[7]); - - rly[13] = tmp4*rly[8];//l=3,m=-2 - grly[13][0] = tmp4*grly[8][0]; - grly[13][1] = tmp4*grly[8][1]; - grly[13][2] = ylmcoef[11]*(z*grly[8][2]+rly[8]); - - double tmp5 = ylmcoef[14]*x; - rly[14] = ylmcoef[12]*rly[10]-ylmcoef[13]*rly[2]*radius2-tmp5*rly[7];//l=3,m=3 - grly[14][0] = ylmcoef[12]*grly[10][0]-ylmcoef[13]*(rly[2]*tx+grly[2][0]*radius2)-ylmcoef[14]*(rly[7]+x*grly[7][0]); - grly[14][1] = ylmcoef[12]*grly[10][1]-ylmcoef[13]*(rly[2]*ty+grly[2][1]*radius2)-tmp5*grly[7][1]; - grly[14][2] = ylmcoef[12]*grly[10][2]-ylmcoef[13]*(rly[2]*tz+grly[2][2]*radius2)-tmp5*grly[7][2]; - - rly[15] = ylmcoef[12]*rly[11]-ylmcoef[13]*rly[3]*radius2-tmp5*rly[8];//l=3,m=-3 - grly[15][0] = ylmcoef[12]*grly[11][0]-ylmcoef[13]*(rly[3]*tx+grly[3][0]*radius2)-ylmcoef[14]*(rly[8]+x*grly[8][0]); - grly[15][1] = ylmcoef[12]*grly[11][1]-ylmcoef[13]*(rly[3]*ty+grly[3][1]*radius2)-tmp5*grly[8][1]; - grly[15][2] = ylmcoef[12]*grly[11][2]-ylmcoef[13]*(rly[3]*tz+grly[3][2]*radius2)-tmp5*grly[8][2]; - if (Lmax == 3) return; - - /*************************** - L = 4 - ***************************/ - rly[16] = ylmcoef[15]*z*rly[9]-ylmcoef[16]*rly[4]*radius2;//l=4,m=0 - grly[16][0] = ylmcoef[15]*z*grly[9][0]-ylmcoef[16]*(rly[4]*tx+grly[4][0]*radius2); - grly[16][1] = ylmcoef[15]*z*grly[9][1]-ylmcoef[16]*(rly[4]*ty+grly[4][1]*radius2); - grly[16][2] = ylmcoef[15]*(z*grly[9][2]+rly[9])-ylmcoef[16]*(rly[4]*tz+grly[4][2]*radius2); - - double tmp6 = ylmcoef[17]*z; - rly[17] = tmp6*rly[10]-ylmcoef[18]*rly[5]*radius2;//l=4,m=1 - grly[17][0] = tmp6*grly[10][0]-ylmcoef[18]*(rly[5]*tx+grly[5][0]*radius2); - grly[17][1] = tmp6*grly[10][1]-ylmcoef[18]*(rly[5]*ty+grly[5][1]*radius2); - grly[17][2] = ylmcoef[17]*(z*grly[10][2]+rly[10])-ylmcoef[18]*(rly[5]*tz+grly[5][2]*radius2); - - rly[18] = tmp6*rly[11]-ylmcoef[18]*rly[6]*radius2;//l=4,m=-1 - grly[18][0] = tmp6*grly[11][0]-ylmcoef[18]*(rly[6]*tx+grly[6][0]*radius2); - grly[18][1] = tmp6*grly[11][1]-ylmcoef[18]*(rly[6]*ty+grly[6][1]*radius2); - grly[18][2] = ylmcoef[17]*(z*grly[11][2]+rly[11])-ylmcoef[18]*(rly[6]*tz+grly[6][2]*radius2); - - double tmp7 = ylmcoef[19]*z; - rly[19] = tmp7*rly[12]-ylmcoef[20]*rly[7]*radius2;//l=4,m=2 - grly[19][0] = tmp7*grly[12][0]-ylmcoef[20]*(rly[7]*tx+grly[7][0]*radius2); - grly[19][1] = tmp7*grly[12][1]-ylmcoef[20]*(rly[7]*ty+grly[7][1]*radius2); - grly[19][2] = ylmcoef[19]*(z*grly[12][2]+rly[12])-ylmcoef[20]*(rly[7]*tz+grly[7][2]*radius2); - - rly[20] = tmp7*rly[13]-ylmcoef[20]*rly[8]*radius2;//l=4,m=-2 - grly[20][0] = tmp7*grly[13][0]-ylmcoef[20]*(rly[8]*tx+grly[8][0]*radius2); - grly[20][1] = tmp7*grly[13][1]-ylmcoef[20]*(rly[8]*ty+grly[8][1]*radius2); - grly[20][2] = ylmcoef[19]*(z*grly[13][2]+rly[13])-ylmcoef[20]*(rly[8]*tz+grly[8][2]*radius2); - - double tmp8 = 3.0*z; - rly[21] = tmp8*rly[14];//l=4,m=3 - grly[21][0] = tmp8*grly[14][0]; - grly[21][1] = tmp8*grly[14][1]; - grly[21][2] = 3.0*(z*grly[14][2]+rly[14]); - - - rly[22] = tmp8*rly[15];//l=4,m=-3 - grly[22][0] = tmp8*grly[15][0]; - grly[22][1] = tmp8*grly[15][1]; - grly[22][2] = 3.0*(z*grly[15][2]+rly[15]); - - double tmp9 = ylmcoef[23]*x; - rly[23] = ylmcoef[21]*rly[19]-ylmcoef[22]*rly[7]*radius2-tmp9*rly[14];//l=4,m=4 - grly[23][0] = ylmcoef[21]*grly[19][0]-ylmcoef[22]*(rly[7]*tx+grly[7][0]*radius2)-ylmcoef[23]*(x*grly[14][0]+rly[14]); - grly[23][1] = ylmcoef[21]*grly[19][1]-ylmcoef[22]*(rly[7]*ty+grly[7][1]*radius2)-tmp9*grly[14][1]; - grly[23][2] = ylmcoef[21]*grly[19][2]-ylmcoef[22]*(rly[7]*tz+grly[7][2]*radius2)-tmp9*grly[14][2]; - - rly[24] = ylmcoef[21]*rly[20]-ylmcoef[22]*rly[8]*radius2-tmp9*rly[15];//l=4,m=-4 - grly[24][0] = ylmcoef[21]*grly[20][0]-ylmcoef[22]*(rly[8]*tx+grly[8][0]*radius2)-ylmcoef[23]*(x*grly[15][0]+rly[15]); - grly[24][1] = ylmcoef[21]*grly[20][1]-ylmcoef[22]*(rly[8]*ty+grly[8][1]*radius2)-tmp9*grly[15][1]; - grly[24][2] = ylmcoef[21]*grly[20][2]-ylmcoef[22]*(rly[8]*tz+grly[8][2]*radius2)-tmp9*grly[15][2]; - - if (Lmax == 4) return; - - /*************************** - L = 5 - ***************************/ - rly[25] = ylmcoef[24]*z*rly[16]-ylmcoef[25]*rly[9]*radius2;//l=5,m=0 - grly[25][0] = ylmcoef[24]*z*grly[16][0]-ylmcoef[25]*(rly[9]*tx+grly[9][0]*radius2); - grly[25][1] = ylmcoef[24]*z*grly[16][1]-ylmcoef[25]*(rly[9]*ty+grly[9][1]*radius2); - grly[25][2] = ylmcoef[24]*(z*grly[16][2]+rly[16])-ylmcoef[25]*(rly[9]*tz+grly[9][2]*radius2); - - double tmp10 = ylmcoef[26]*z; - rly[26] = tmp10*rly[17]-ylmcoef[27]*rly[10]*radius2;//l=5,m=1 - grly[26][0] = tmp10*grly[17][0]-ylmcoef[27]*(rly[10]*tx+grly[10][0]*radius2); - grly[26][1] = tmp10*grly[17][1]-ylmcoef[27]*(rly[10]*ty+grly[10][1]*radius2); - grly[26][2] = ylmcoef[26]*(z*grly[17][2]+rly[17])-ylmcoef[27]*(rly[10]*tz+grly[10][2]*radius2); - - rly[27] = tmp10*rly[18]-ylmcoef[27]*rly[11]*radius2;//l=5,m=-1 - grly[27][0] = tmp10*grly[18][0]-ylmcoef[27]*(rly[11]*tx+grly[11][0]*radius2); - grly[27][1] = tmp10*grly[18][1]-ylmcoef[27]*(rly[11]*ty+grly[11][1]*radius2); - grly[27][2] = ylmcoef[26]*(z*grly[18][2]+rly[18])-ylmcoef[27]*(rly[11]*tz+grly[11][2]*radius2); - - double tmp11 = ylmcoef[28]*z; - rly[28] = tmp11*rly[19]-ylmcoef[29]*rly[12]*radius2;//l=5,m=2 - grly[28][0] = tmp11*grly[19][0]-ylmcoef[29]*(rly[12]*tx+grly[12][0]*radius2); - grly[28][1] = tmp11*grly[19][1]-ylmcoef[29]*(rly[12]*ty+grly[12][1]*radius2); - grly[28][2] = ylmcoef[28]*(z*grly[19][2]+rly[19])-ylmcoef[29]*(rly[12]*tz+grly[12][2]*radius2); - - rly[29] = tmp11*rly[20]-ylmcoef[29]*rly[13]*radius2;//l=5,m=-2 - grly[29][0] = tmp11*grly[20][0]-ylmcoef[29]*(rly[13]*tx+grly[13][0]*radius2); - grly[29][1] = tmp11*grly[20][1]-ylmcoef[29]*(rly[13]*ty+grly[13][1]*radius2); - grly[29][2] = ylmcoef[28]*(z*grly[20][2]+rly[20])-ylmcoef[29]*(rly[13]*tz+grly[13][2]*radius2); - - double tmp12 = ylmcoef[30]*z; - rly[30] = tmp12*rly[21]-ylmcoef[31]*rly[14]*radius2;//l=5,m=3 - grly[30][0] = tmp12*grly[21][0]-ylmcoef[31]*(grly[14][0]*radius2+rly[14]*tx); - grly[30][1] = tmp12*grly[21][1]-ylmcoef[31]*(grly[14][1]*radius2+rly[14]*ty); - grly[30][2] = ylmcoef[30]*(z*grly[21][2]+rly[21])-ylmcoef[31]*(grly[14][2]*radius2+rly[14]*tz); - - rly[31] = tmp12*rly[22]-ylmcoef[31]*rly[15]*radius2;//l=5,m=-3 - grly[31][0] = tmp12*grly[22][0]-ylmcoef[31]*(grly[15][0]*radius2+rly[15]*tx); - grly[31][1] = tmp12*grly[22][1]-ylmcoef[31]*(grly[15][1]*radius2+rly[15]*ty); - grly[31][2] = ylmcoef[30]*(z*grly[22][2]+rly[22])-ylmcoef[31]*(grly[15][2]*radius2+rly[15]*tz); - - double tmp13 = ylmcoef[32]*z; - rly[32] = tmp13*rly[23];//l=5,m=4 - grly[32][0] = tmp13*grly[23][0]; - grly[32][1] = tmp13*grly[23][1]; - grly[32][2] = ylmcoef[32]*(rly[23]+z*grly[23][2]); - - rly[33] = tmp13*rly[24];//l=5,m=-4 - grly[33][0] = tmp13*grly[24][0]; - grly[33][1] = tmp13*grly[24][1]; - grly[33][2] = ylmcoef[32]*(rly[24]+z*grly[24][2]); - - double tmp14 = ylmcoef[35]*x; - rly[34] = ylmcoef[33]*rly[30]-ylmcoef[34]*rly[14]*radius2-tmp14*rly[23];//l=5,m=5 - grly[34][0] = ylmcoef[33]*grly[30][0]-ylmcoef[34]*(rly[14]*tx+grly[14][0]*radius2)-ylmcoef[35]*(x*grly[23][0]+rly[23]); - grly[34][1] = ylmcoef[33]*grly[30][1]-ylmcoef[34]*(rly[14]*ty+grly[14][1]*radius2)-tmp14*grly[23][1]; - grly[34][2] = ylmcoef[33]*grly[30][2]-ylmcoef[34]*(rly[14]*tz+grly[14][2]*radius2)-tmp14*grly[23][2]; - - rly[35] = ylmcoef[33]*rly[31]-ylmcoef[34]*rly[15]*radius2-tmp14*rly[24];//l=5,m=-5 - grly[35][0] = ylmcoef[33]*grly[31][0]-ylmcoef[34]*(rly[15]*tx+grly[15][0]*radius2)-ylmcoef[35]*(x*grly[24][0]+rly[24]); - grly[35][1] = ylmcoef[33]*grly[31][1]-ylmcoef[34]*(rly[15]*ty+grly[15][1]*radius2)-tmp14*grly[24][1]; - grly[35][2] = ylmcoef[33]*grly[31][2]-ylmcoef[34]*(rly[15]*tz+grly[15][2]*radius2)-tmp14*grly[24][2]; - - if (Lmax == 5) return; - - //if Lmax > 5 - for (int il = 6; il <= Lmax; il++) - { - int istart = il*il; - int istart1 = (il-1)*(il-1); - int istart2 = (il-2)*(il-2); - - double fac2 = sqrt(4.0*istart-1.0); - double fac4 = sqrt(4.0*istart1-1.0); - - for (int im = 0; im < 2*il-1; im++) - { - int imm = (im+1)/2; -// if (im % 2 == 0) imm *= -1; - - double var1 = fac2/sqrt((double)istart-imm*imm); - double var2 = sqrt((double)istart1-imm*imm)/fac4; - - rly[istart+im] = var1*(z*rly[istart1+im] - var2*rly[istart2+im]*radius2); - - grly[istart+im][0]=var1*(z*grly[istart1+im][0]-var2*(rly[istart2+im]*tx+grly[istart2+im][0]*radius2)); - grly[istart+im][1]=var1*(z*grly[istart1+im][1]-var2*(rly[istart2+im]*ty+grly[istart2+im][1]*radius2)); - grly[istart+im][2]=var1*(z*grly[istart1+im][2]+rly[istart1+im]-var2*(rly[istart2+im]*tz+grly[istart2+im][2]*radius2)); - - } - - double bl1 = sqrt(2.0*il/(2.0*il+1.0)); - double bl2 = sqrt((2.0*il-2.0)/(2.0*il-1.0)); - double bl3 = sqrt(2.0)/fac2; - - int id1 = istart+2*il-1; - int id2 = istart+2*il-5; - int id3 = istart2+2*il-5; - int id4 = istart1+2*il-3; - - rly[id1] = (bl3*rly[id2]-bl2*rly[id3]*radius2-2.0*x*rly[id4]) / bl1; - grly[id1][0] = (bl3*grly[id2][0]-bl2*(grly[id3][0]*radius2+rly[id3]*tx)-2.0*(rly[id4]+x*grly[id4][0]))/bl1; - grly[id1][1] = (bl3*grly[id2][1]-bl2*(grly[id3][1]*radius2+rly[id3]*ty)-2.0*x*grly[id4][1])/bl1; - grly[id1][2] = (bl3*grly[id2][2]-bl2*(grly[id3][2]*radius2+rly[id3]*tz)-2.0*x*grly[id4][2])/bl1; - - - rly[id1+1] = (bl3*rly[id2+1]-bl2*rly[id3+1]*radius2-2.0*x*rly[id4+1]) / bl1; - grly[id1+1][0] = (bl3*grly[id2+1][0]-bl2*(grly[id3+1][0]*radius2+rly[id3+1]*tx)-2.0*(rly[id4+1]+x*grly[id4+1][0]))/bl1; - grly[id1+1][1] = (bl3*grly[id2+1][1]-bl2*(grly[id3+1][1]*radius2+rly[id3+1]*ty)-2.0*x*grly[id4+1][1])/bl1; - grly[id1+1][2] = (bl3*grly[id2+1][2]-bl2*(grly[id3+1][2]*radius2+rly[id3+1]*tz)-2.0*x*grly[id4+1][2])/bl1; - } - - - return; + // Alias the flat buffer as a pointer-to-array-of-3-doubles so the body + // below can continue to use the natural grly[lm][xyz] indexing without + // any performance penalty — the memory layout is unchanged. + double (*grly)[3] = reinterpret_cast (grly_flat); + double radius2 = x * x + y * y + z * z; + double tx = 2.0 * x; + double ty = 2.0 * y; + double tz = 2.0 * z; + + // begin calculation + /*************************** + L = 0 + ***************************/ + rly[0] = ylmcoef[0]; // l=0, m=0 + grly[0][0] = grly[0][1] = grly[0][2] = 0.0; + if (Lmax == 0) + { + return; + } + + /*************************** + L = 1 + ***************************/ + rly[1] = ylmcoef[1] * z; // l=1, m=0 + grly[1][0] = grly[1][1] = 0.0; + grly[1][2] = ylmcoef[1]; + + rly[2] = -ylmcoef[1] * x; // l=1, m=1 + grly[2][1] = grly[2][2] = 0.0; + grly[2][0] = -ylmcoef[1]; + + rly[3] = -ylmcoef[1] * y; // l=1, m=-1 + grly[3][0] = grly[3][2] = 0.0; + grly[3][1] = -ylmcoef[1]; + + if (Lmax == 1) + { + return; + } + + /*************************** + L = 2 + ***************************/ + rly[4] = ylmcoef[2] * z * rly[1] - ylmcoef[3] * rly[0] * radius2; // l=2, m=0 + grly[4][0] = ylmcoef[2] * z * grly[1][0] - ylmcoef[3] * (grly[0][0] * radius2 + rly[0] * tx); // l=2, m=0 + grly[4][1] = ylmcoef[2] * z * grly[1][1] - ylmcoef[3] * (grly[0][1] * radius2 + rly[0] * ty); // l=2, m=0 + grly[4][2] = ylmcoef[2] * (z * grly[1][2] + rly[1]) - ylmcoef[3] * (grly[0][2] * radius2 + rly[0] * tz); // l=2, m=0 + + double tmp0 = ylmcoef[4] * z; + rly[5] = tmp0 * rly[2]; // l=2,m=1 + grly[5][0] = tmp0 * grly[2][0]; + grly[5][1] = tmp0 * grly[2][1]; + grly[5][2] = ylmcoef[4] * (rly[2] + z * grly[2][2]); + + rly[6] = tmp0 * rly[3]; // l=2,m=-1 + grly[6][0] = tmp0 * grly[3][0]; + grly[6][1] = tmp0 * grly[3][1]; + grly[6][2] = ylmcoef[4] * (rly[3] + z * grly[3][2]); + + double tmp2 = ylmcoef[4] * x; + rly[7] = ylmcoef[5] * rly[4] - ylmcoef[6] * rly[0] * radius2 - tmp2 * rly[2]; // l=2,m=2 + grly[7][0] = ylmcoef[5] * grly[4][0] - ylmcoef[6] * (rly[0] * tx + grly[0][0] * radius2) + - ylmcoef[4] * (x * grly[2][0] + rly[2]); + + // std::cout << "\np1 = "<< ylmcoef[5]*grly[4][0] << " p2 = " << -ylmcoef[6]*rly[0]*tx + // << " p3 = " << -ylmcoef[4]*x*grly[2][0] << " p4 = " << -ylmcoef[4]*rly[2] << std::endl; + + grly[7][1] = ylmcoef[5] * grly[4][1] - ylmcoef[6] * (rly[0] * ty + grly[0][1] * radius2) - tmp2 * grly[2][1]; + grly[7][2] = ylmcoef[5] * grly[4][2] - ylmcoef[6] * (rly[0] * tz + grly[0][2] * radius2) - tmp2 * grly[2][2]; + + rly[8] = -tmp2 * rly[3]; + grly[8][0] = -ylmcoef[4] * (rly[3] + x * grly[3][0]); + grly[8][1] = -tmp2 * grly[3][1]; + grly[8][2] = -tmp2 * grly[3][2]; + // rly[8] = tmp1+tmp2*rly[3];//l=2,m=-2 + if (Lmax == 2) + { + return; + } + + /*************************** + L = 3 + ***************************/ + rly[9] = ylmcoef[7] * z * rly[4] - ylmcoef[8] * rly[1] * radius2; // l=3, m=0 + grly[9][0] = ylmcoef[7] * z * grly[4][0] - ylmcoef[8] * (rly[1] * tx + grly[1][0] * radius2); + grly[9][1] = ylmcoef[7] * z * grly[4][1] - ylmcoef[8] * (rly[1] * ty + grly[1][1] * radius2); + grly[9][2] = ylmcoef[7] * (rly[4] + z * grly[4][2]) - ylmcoef[8] * (rly[1] * tz + grly[1][2] * radius2); + + double tmp3 = ylmcoef[9] * z; + rly[10] = tmp3 * rly[5] - ylmcoef[10] * rly[2] * radius2; // l=3,m=1 + grly[10][0] = tmp3 * grly[5][0] - ylmcoef[10] * (grly[2][0] * radius2 + rly[2] * tx); + grly[10][1] = tmp3 * grly[5][1] - ylmcoef[10] * (grly[2][1] * radius2 + rly[2] * ty); + grly[10][2] = ylmcoef[9] * (z * grly[5][2] + rly[5]) - ylmcoef[10] * (grly[2][2] * radius2 + rly[2] * tz); + + rly[11] = tmp3 * rly[6] - ylmcoef[10] * rly[3] * radius2; // l=3,m=-1 + grly[11][0] = tmp3 * grly[6][0] - ylmcoef[10] * (grly[3][0] * radius2 + rly[3] * tx); + grly[11][1] = tmp3 * grly[6][1] - ylmcoef[10] * (grly[3][1] * radius2 + rly[3] * ty); + grly[11][2] = ylmcoef[9] * (z * grly[6][2] + rly[6]) - ylmcoef[10] * (grly[3][2] * radius2 + rly[3] * tz); + + double tmp4 = ylmcoef[11] * z; + rly[12] = tmp4 * rly[7]; // l=3,m=2 + grly[12][0] = tmp4 * grly[7][0]; + grly[12][1] = tmp4 * grly[7][1]; + grly[12][2] = ylmcoef[11] * (z * grly[7][2] + rly[7]); + + rly[13] = tmp4 * rly[8]; // l=3,m=-2 + grly[13][0] = tmp4 * grly[8][0]; + grly[13][1] = tmp4 * grly[8][1]; + grly[13][2] = ylmcoef[11] * (z * grly[8][2] + rly[8]); + + double tmp5 = ylmcoef[14] * x; + rly[14] = ylmcoef[12] * rly[10] - ylmcoef[13] * rly[2] * radius2 - tmp5 * rly[7]; // l=3,m=3 + grly[14][0] = ylmcoef[12] * grly[10][0] - ylmcoef[13] * (rly[2] * tx + grly[2][0] * radius2) + - ylmcoef[14] * (rly[7] + x * grly[7][0]); + grly[14][1] = ylmcoef[12] * grly[10][1] - ylmcoef[13] * (rly[2] * ty + grly[2][1] * radius2) - tmp5 * grly[7][1]; + grly[14][2] = ylmcoef[12] * grly[10][2] - ylmcoef[13] * (rly[2] * tz + grly[2][2] * radius2) - tmp5 * grly[7][2]; + + rly[15] = ylmcoef[12] * rly[11] - ylmcoef[13] * rly[3] * radius2 - tmp5 * rly[8]; // l=3,m=-3 + grly[15][0] = ylmcoef[12] * grly[11][0] - ylmcoef[13] * (rly[3] * tx + grly[3][0] * radius2) + - ylmcoef[14] * (rly[8] + x * grly[8][0]); + grly[15][1] = ylmcoef[12] * grly[11][1] - ylmcoef[13] * (rly[3] * ty + grly[3][1] * radius2) - tmp5 * grly[8][1]; + grly[15][2] = ylmcoef[12] * grly[11][2] - ylmcoef[13] * (rly[3] * tz + grly[3][2] * radius2) - tmp5 * grly[8][2]; + if (Lmax == 3) + { + return; + } + + /*************************** + L = 4 + ***************************/ + rly[16] = ylmcoef[15] * z * rly[9] - ylmcoef[16] * rly[4] * radius2; // l=4,m=0 + grly[16][0] = ylmcoef[15] * z * grly[9][0] - ylmcoef[16] * (rly[4] * tx + grly[4][0] * radius2); + grly[16][1] = ylmcoef[15] * z * grly[9][1] - ylmcoef[16] * (rly[4] * ty + grly[4][1] * radius2); + grly[16][2] = ylmcoef[15] * (z * grly[9][2] + rly[9]) - ylmcoef[16] * (rly[4] * tz + grly[4][2] * radius2); + + double tmp6 = ylmcoef[17] * z; + rly[17] = tmp6 * rly[10] - ylmcoef[18] * rly[5] * radius2; // l=4,m=1 + grly[17][0] = tmp6 * grly[10][0] - ylmcoef[18] * (rly[5] * tx + grly[5][0] * radius2); + grly[17][1] = tmp6 * grly[10][1] - ylmcoef[18] * (rly[5] * ty + grly[5][1] * radius2); + grly[17][2] = ylmcoef[17] * (z * grly[10][2] + rly[10]) - ylmcoef[18] * (rly[5] * tz + grly[5][2] * radius2); + + rly[18] = tmp6 * rly[11] - ylmcoef[18] * rly[6] * radius2; // l=4,m=-1 + grly[18][0] = tmp6 * grly[11][0] - ylmcoef[18] * (rly[6] * tx + grly[6][0] * radius2); + grly[18][1] = tmp6 * grly[11][1] - ylmcoef[18] * (rly[6] * ty + grly[6][1] * radius2); + grly[18][2] = ylmcoef[17] * (z * grly[11][2] + rly[11]) - ylmcoef[18] * (rly[6] * tz + grly[6][2] * radius2); + + double tmp7 = ylmcoef[19] * z; + rly[19] = tmp7 * rly[12] - ylmcoef[20] * rly[7] * radius2; // l=4,m=2 + grly[19][0] = tmp7 * grly[12][0] - ylmcoef[20] * (rly[7] * tx + grly[7][0] * radius2); + grly[19][1] = tmp7 * grly[12][1] - ylmcoef[20] * (rly[7] * ty + grly[7][1] * radius2); + grly[19][2] = ylmcoef[19] * (z * grly[12][2] + rly[12]) - ylmcoef[20] * (rly[7] * tz + grly[7][2] * radius2); + + rly[20] = tmp7 * rly[13] - ylmcoef[20] * rly[8] * radius2; // l=4,m=-2 + grly[20][0] = tmp7 * grly[13][0] - ylmcoef[20] * (rly[8] * tx + grly[8][0] * radius2); + grly[20][1] = tmp7 * grly[13][1] - ylmcoef[20] * (rly[8] * ty + grly[8][1] * radius2); + grly[20][2] = ylmcoef[19] * (z * grly[13][2] + rly[13]) - ylmcoef[20] * (rly[8] * tz + grly[8][2] * radius2); + + double tmp8 = 3.0 * z; + rly[21] = tmp8 * rly[14]; // l=4,m=3 + grly[21][0] = tmp8 * grly[14][0]; + grly[21][1] = tmp8 * grly[14][1]; + grly[21][2] = 3.0 * (z * grly[14][2] + rly[14]); + + rly[22] = tmp8 * rly[15]; // l=4,m=-3 + grly[22][0] = tmp8 * grly[15][0]; + grly[22][1] = tmp8 * grly[15][1]; + grly[22][2] = 3.0 * (z * grly[15][2] + rly[15]); + + double tmp9 = ylmcoef[23] * x; + rly[23] = ylmcoef[21] * rly[19] - ylmcoef[22] * rly[7] * radius2 - tmp9 * rly[14]; // l=4,m=4 + grly[23][0] = ylmcoef[21] * grly[19][0] - ylmcoef[22] * (rly[7] * tx + grly[7][0] * radius2) + - ylmcoef[23] * (x * grly[14][0] + rly[14]); + grly[23][1] = ylmcoef[21] * grly[19][1] - ylmcoef[22] * (rly[7] * ty + grly[7][1] * radius2) - tmp9 * grly[14][1]; + grly[23][2] = ylmcoef[21] * grly[19][2] - ylmcoef[22] * (rly[7] * tz + grly[7][2] * radius2) - tmp9 * grly[14][2]; + + rly[24] = ylmcoef[21] * rly[20] - ylmcoef[22] * rly[8] * radius2 - tmp9 * rly[15]; // l=4,m=-4 + grly[24][0] = ylmcoef[21] * grly[20][0] - ylmcoef[22] * (rly[8] * tx + grly[8][0] * radius2) + - ylmcoef[23] * (x * grly[15][0] + rly[15]); + grly[24][1] = ylmcoef[21] * grly[20][1] - ylmcoef[22] * (rly[8] * ty + grly[8][1] * radius2) - tmp9 * grly[15][1]; + grly[24][2] = ylmcoef[21] * grly[20][2] - ylmcoef[22] * (rly[8] * tz + grly[8][2] * radius2) - tmp9 * grly[15][2]; + + if (Lmax == 4) + { + return; + } + + /*************************** + L = 5 + ***************************/ + rly[25] = ylmcoef[24] * z * rly[16] - ylmcoef[25] * rly[9] * radius2; // l=5,m=0 + grly[25][0] = ylmcoef[24] * z * grly[16][0] - ylmcoef[25] * (rly[9] * tx + grly[9][0] * radius2); + grly[25][1] = ylmcoef[24] * z * grly[16][1] - ylmcoef[25] * (rly[9] * ty + grly[9][1] * radius2); + grly[25][2] = ylmcoef[24] * (z * grly[16][2] + rly[16]) - ylmcoef[25] * (rly[9] * tz + grly[9][2] * radius2); + + double tmp10 = ylmcoef[26] * z; + rly[26] = tmp10 * rly[17] - ylmcoef[27] * rly[10] * radius2; // l=5,m=1 + grly[26][0] = tmp10 * grly[17][0] - ylmcoef[27] * (rly[10] * tx + grly[10][0] * radius2); + grly[26][1] = tmp10 * grly[17][1] - ylmcoef[27] * (rly[10] * ty + grly[10][1] * radius2); + grly[26][2] = ylmcoef[26] * (z * grly[17][2] + rly[17]) - ylmcoef[27] * (rly[10] * tz + grly[10][2] * radius2); + + rly[27] = tmp10 * rly[18] - ylmcoef[27] * rly[11] * radius2; // l=5,m=-1 + grly[27][0] = tmp10 * grly[18][0] - ylmcoef[27] * (rly[11] * tx + grly[11][0] * radius2); + grly[27][1] = tmp10 * grly[18][1] - ylmcoef[27] * (rly[11] * ty + grly[11][1] * radius2); + grly[27][2] = ylmcoef[26] * (z * grly[18][2] + rly[18]) - ylmcoef[27] * (rly[11] * tz + grly[11][2] * radius2); + + double tmp11 = ylmcoef[28] * z; + rly[28] = tmp11 * rly[19] - ylmcoef[29] * rly[12] * radius2; // l=5,m=2 + grly[28][0] = tmp11 * grly[19][0] - ylmcoef[29] * (rly[12] * tx + grly[12][0] * radius2); + grly[28][1] = tmp11 * grly[19][1] - ylmcoef[29] * (rly[12] * ty + grly[12][1] * radius2); + grly[28][2] = ylmcoef[28] * (z * grly[19][2] + rly[19]) - ylmcoef[29] * (rly[12] * tz + grly[12][2] * radius2); + + rly[29] = tmp11 * rly[20] - ylmcoef[29] * rly[13] * radius2; // l=5,m=-2 + grly[29][0] = tmp11 * grly[20][0] - ylmcoef[29] * (rly[13] * tx + grly[13][0] * radius2); + grly[29][1] = tmp11 * grly[20][1] - ylmcoef[29] * (rly[13] * ty + grly[13][1] * radius2); + grly[29][2] = ylmcoef[28] * (z * grly[20][2] + rly[20]) - ylmcoef[29] * (rly[13] * tz + grly[13][2] * radius2); + + double tmp12 = ylmcoef[30] * z; + rly[30] = tmp12 * rly[21] - ylmcoef[31] * rly[14] * radius2; // l=5,m=3 + grly[30][0] = tmp12 * grly[21][0] - ylmcoef[31] * (grly[14][0] * radius2 + rly[14] * tx); + grly[30][1] = tmp12 * grly[21][1] - ylmcoef[31] * (grly[14][1] * radius2 + rly[14] * ty); + grly[30][2] = ylmcoef[30] * (z * grly[21][2] + rly[21]) - ylmcoef[31] * (grly[14][2] * radius2 + rly[14] * tz); + + rly[31] = tmp12 * rly[22] - ylmcoef[31] * rly[15] * radius2; // l=5,m=-3 + grly[31][0] = tmp12 * grly[22][0] - ylmcoef[31] * (grly[15][0] * radius2 + rly[15] * tx); + grly[31][1] = tmp12 * grly[22][1] - ylmcoef[31] * (grly[15][1] * radius2 + rly[15] * ty); + grly[31][2] = ylmcoef[30] * (z * grly[22][2] + rly[22]) - ylmcoef[31] * (grly[15][2] * radius2 + rly[15] * tz); + + double tmp13 = ylmcoef[32] * z; + rly[32] = tmp13 * rly[23]; // l=5,m=4 + grly[32][0] = tmp13 * grly[23][0]; + grly[32][1] = tmp13 * grly[23][1]; + grly[32][2] = ylmcoef[32] * (rly[23] + z * grly[23][2]); + + rly[33] = tmp13 * rly[24]; // l=5,m=-4 + grly[33][0] = tmp13 * grly[24][0]; + grly[33][1] = tmp13 * grly[24][1]; + grly[33][2] = ylmcoef[32] * (rly[24] + z * grly[24][2]); + + double tmp14 = ylmcoef[35] * x; + rly[34] = ylmcoef[33] * rly[30] - ylmcoef[34] * rly[14] * radius2 - tmp14 * rly[23]; // l=5,m=5 + grly[34][0] = ylmcoef[33] * grly[30][0] - ylmcoef[34] * (rly[14] * tx + grly[14][0] * radius2) + - ylmcoef[35] * (x * grly[23][0] + rly[23]); + grly[34][1] + = ylmcoef[33] * grly[30][1] - ylmcoef[34] * (rly[14] * ty + grly[14][1] * radius2) - tmp14 * grly[23][1]; + grly[34][2] + = ylmcoef[33] * grly[30][2] - ylmcoef[34] * (rly[14] * tz + grly[14][2] * radius2) - tmp14 * grly[23][2]; + + rly[35] = ylmcoef[33] * rly[31] - ylmcoef[34] * rly[15] * radius2 - tmp14 * rly[24]; // l=5,m=-5 + grly[35][0] = ylmcoef[33] * grly[31][0] - ylmcoef[34] * (rly[15] * tx + grly[15][0] * radius2) + - ylmcoef[35] * (x * grly[24][0] + rly[24]); + grly[35][1] + = ylmcoef[33] * grly[31][1] - ylmcoef[34] * (rly[15] * ty + grly[15][1] * radius2) - tmp14 * grly[24][1]; + grly[35][2] + = ylmcoef[33] * grly[31][2] - ylmcoef[34] * (rly[15] * tz + grly[15][2] * radius2) - tmp14 * grly[24][2]; + + if (Lmax == 5) + { + return; + } + + // if Lmax > 5 + for (int il = 6; il <= Lmax; il++) + { + int istart = il * il; + int istart1 = (il - 1) * (il - 1); + int istart2 = (il - 2) * (il - 2); + + double fac2 = sqrt (4.0 * istart - 1.0); + double fac4 = sqrt (4.0 * istart1 - 1.0); + + for (int im = 0; im < 2 * il - 1; im++) + { + int imm = (im + 1) / 2; + // if (im % 2 == 0) imm *= -1; + + double var1 = fac2 / sqrt ((double)istart - imm * imm); + double var2 = sqrt ((double)istart1 - imm * imm) / fac4; + + rly[istart + im] = var1 * (z * rly[istart1 + im] - var2 * rly[istart2 + im] * radius2); + + grly[istart + im][0] = var1 + * (z * grly[istart1 + im][0] + - var2 * (rly[istart2 + im] * tx + grly[istart2 + im][0] * radius2)); + grly[istart + im][1] = var1 + * (z * grly[istart1 + im][1] + - var2 * (rly[istart2 + im] * ty + grly[istart2 + im][1] * radius2)); + grly[istart + im][2] = var1 + * (z * grly[istart1 + im][2] + rly[istart1 + im] + - var2 * (rly[istart2 + im] * tz + grly[istart2 + im][2] * radius2)); + } + + double bl1 = sqrt (2.0 * il / (2.0 * il + 1.0)); + double bl2 = sqrt ((2.0 * il - 2.0) / (2.0 * il - 1.0)); + double bl3 = sqrt (2.0) / fac2; + + int id1 = istart + 2 * il - 1; + int id2 = istart + 2 * il - 5; + int id3 = istart2 + 2 * il - 5; + int id4 = istart1 + 2 * il - 3; + + rly[id1] = (bl3 * rly[id2] - bl2 * rly[id3] * radius2 - 2.0 * x * rly[id4]) / bl1; + grly[id1][0] = (bl3 * grly[id2][0] - bl2 * (grly[id3][0] * radius2 + rly[id3] * tx) + - 2.0 * (rly[id4] + x * grly[id4][0])) + / bl1; + grly[id1][1] + = (bl3 * grly[id2][1] - bl2 * (grly[id3][1] * radius2 + rly[id3] * ty) - 2.0 * x * grly[id4][1]) / bl1; + grly[id1][2] + = (bl3 * grly[id2][2] - bl2 * (grly[id3][2] * radius2 + rly[id3] * tz) - 2.0 * x * grly[id4][2]) / bl1; + + rly[id1 + 1] = (bl3 * rly[id2 + 1] - bl2 * rly[id3 + 1] * radius2 - 2.0 * x * rly[id4 + 1]) / bl1; + grly[id1 + 1][0] = (bl3 * grly[id2 + 1][0] - bl2 * (grly[id3 + 1][0] * radius2 + rly[id3 + 1] * tx) + - 2.0 * (rly[id4 + 1] + x * grly[id4 + 1][0])) + / bl1; + grly[id1 + 1][1] = (bl3 * grly[id2 + 1][1] - bl2 * (grly[id3 + 1][1] * radius2 + rly[id3 + 1] * ty) + - 2.0 * x * grly[id4 + 1][1]) + / bl1; + grly[id1 + 1][2] = (bl3 * grly[id2 + 1][2] - bl2 * (grly[id3 + 1][2] * radius2 + rly[id3 + 1] * tz) + - 2.0 * x * grly[id4 + 1][2]) + / bl1; + } + + return; } -void Ylm::hes_rl_sph_harm -( - const int Lmax, //max momentum of L - const double x, - const double y, - const double z, - std::vector>& hrly -) +void + Ylm::hes_rl_sph_harm (const int Lmax, // max momentum of L + const double x, + const double y, + const double z, + std::vector>& hrly) { - hrly.resize( (Lmax+1)*(Lmax+1), std::vector(6) ); - - double radius2 = x*x+y*y+z*z; - double coeff = 0.0; - - //begin calculation - /*************************** - L = 0 - ***************************/ - hrly[0][0] = hrly[0][1] = hrly[0][2] = 0.0; - hrly[0][3] = hrly[0][4] = hrly[0][5] = 0.0; - if (Lmax == 0) return; - - /*************************** - L = 1 - ***************************/ - hrly[1][0] = hrly[1][1] = hrly[1][2] = 0.0; - hrly[1][3] = hrly[1][4] = hrly[1][5] = 0.0; - - hrly[2][0] = hrly[2][1] = hrly[2][2] = 0.0; - hrly[2][3] = hrly[2][4] = hrly[2][5] = 0.0; - - hrly[3][0] = hrly[3][1] = hrly[3][2] = 0.0; - hrly[3][3] = hrly[3][4] = hrly[3][5] = 0.0; - - if (Lmax == 1) return; - - /*************************** - L = 2 - ***************************/ - //m=0 : 3z^2-r^2 - coeff = sqrt(5.0 / ModuleBase::PI) / 4.0; - hrly[4][0] = hrly[4][3] = -2.0 * coeff; - hrly[4][5] = 4.0 * coeff; - hrly[4][1] = hrly[4][2] = hrly[4][4] = 0.0; - - //m=1 : xz - coeff = sqrt(15.0 / ModuleBase::PI) / 2.0; - hrly[5][2] = coeff; - hrly[5][0] = hrly[5][1] = 0.0; - hrly[5][3] = hrly[5][4] = hrly[5][5] = 0.0; - - //m=-1 : yz - hrly[6][4] = coeff; - hrly[6][0] = hrly[6][1] = 0.0; - hrly[6][2] = hrly[6][3] = hrly[6][5] = 0.0; - - //m=-2 : xy - hrly[8][1] = coeff; - hrly[8][0] = hrly[8][2] = 0.0; - hrly[8][3] = hrly[8][4] = hrly[8][5] = 0.0; - - //m=2 : (x^2-y^2) - coeff = sqrt(15.0 / ModuleBase::PI) / 4.0; - hrly[7][0] = 2.0 * coeff; - hrly[7][3] = -2.0 * coeff; - hrly[7][1] = hrly[7][2] = 0.0; - hrly[7][4] = hrly[7][5] = 0.0; - - if (Lmax == 2) return; - - /*************************** - L = 3 - ***************************/ - //m=0 : (5z^3-3zr^2) - coeff = sqrt(7.0 / ModuleBase::PI) / 4.0; - hrly[9][0] = hrly[9][3] = -6.0 * z * coeff; - hrly[9][1] = 0.0; - hrly[9][2] = -6.0 * x * coeff; - hrly[9][4] = -6.0 * y * coeff; - hrly[9][5] = 12.0 * z * coeff; - - //m=1 : x(5z^2-r^2) - coeff = sqrt(21.0 / 2.0 / ModuleBase::PI) / 4.0; - hrly[10][0] = -6.0 * x * coeff; - hrly[10][1] = -2.0 * y * coeff; - hrly[10][2] = 8.0 * z * coeff; - hrly[10][3] = -2.0 * x * coeff; - hrly[10][4] = 0.0; - hrly[10][5] = 8.0 * x * coeff; - - //m=-1 : y(5z^2-r^2) - hrly[11][0] = -2.0 * y * coeff; - hrly[11][1] = -2.0 * x * coeff; - hrly[11][2] = 0.0; - hrly[11][3] = -6.0 * y * coeff; - hrly[11][4] = 8.0 * z * coeff; - hrly[11][5] = 8.0 * y * coeff; - - //m=2 : (x^2-y^2)z - coeff = sqrt(105.0 / ModuleBase::PI) / 4.0; - hrly[12][0] = 2.0 * z * coeff; - hrly[12][1] = 0.0; - hrly[12][2] = 2.0 * x * coeff; - hrly[12][3] = -hrly[12][0]; - hrly[12][4] = -2.0 * y * coeff; - hrly[12][5] = 0.0; - - //m=-2 : xyz - coeff = sqrt(105.0 / ModuleBase::PI) / 2.0; - hrly[13][0] = 0.0; - hrly[13][1] = z * coeff; - hrly[13][2] = y * coeff; - hrly[13][3] = 0.0; - hrly[13][4] = x * coeff; - hrly[13][5] = 0.0; - - //m=3 : x(x^2-3y^2) - coeff = sqrt(35.0 / 2.0 / ModuleBase::PI) / 4.0; - hrly[14][0] = 6.0 * x * coeff; - hrly[14][1] = -6.0 * y * coeff; - hrly[14][2] = 0.0; - hrly[14][3] = -hrly[14][0]; - hrly[14][4] = 0.0; - hrly[14][5] = 0.0; - - //m=-3 : y(3x^2-y^2) - hrly[15][0] = 6.0 * y * coeff; - hrly[15][1] = 6.0 * x * coeff; - hrly[15][2] = 0.0; - hrly[15][3] = -hrly[15][0]; - hrly[15][4] = 0.0; - hrly[15][5] = 0.0; - - if (Lmax == 3) return; - - /*************************** - L = 4 - ***************************/ - //m=0 : (35z^4 - 30z^2r^2 + 3r^4) - coeff = sqrt(1.0 / ModuleBase::PI) * 3.0 / 16.0; - hrly[16][0] = 12.0 * (3.0 * x*x + y*y - 4.0 * z*z) * coeff; - hrly[16][1] = 24.0 * x * y * coeff; - hrly[16][2] = -96.0 * x * z * coeff; - hrly[16][3] = 12.0 * (x*x + 3.0 * y*y - 4.0 * z*z) * coeff; - hrly[16][4] = -96.0 * y * z * coeff; - hrly[16][5] = -48.0 * (x*x + y*y -2.0 * z*z) * coeff; - - //m=1 : x(7z^3 - 3zr^2) - coeff = 3.0 / 4.0 * sqrt(5.0 / 2.0 / ModuleBase::PI); - hrly[17][0] = -18.0 * x * z * coeff; - hrly[17][1] = -6.0 * y * z * coeff; - hrly[17][2] = -3.0 * (3.0 * x*x + y*y - 4.0 * z*z) * coeff; - hrly[17][3] = -6.0 * x * z * coeff; - hrly[17][4] = -6.0 * x * y * coeff; - hrly[17][5] = 24.0 * x * z * coeff; - - //m=-1 : y(7z^3 - 3zr^2) - hrly[18][0] = -6.0 * y * z * coeff; - hrly[18][1] = -6.0 * x * z * coeff; - hrly[18][2] = -6.0 * x * y * coeff; - hrly[18][3] = -18.0 * y * z * coeff; - hrly[18][4] = -3.0 * (x*x + 3.0 * y*y - 4.0 * z*z) * coeff; - hrly[18][5] = 24.0 * y * z * coeff; - - //m=2 : (x^2 - y^2)(7z^2 - r^2) - coeff = 3.0 / 8.0 * sqrt(5.0 / ModuleBase::PI); - hrly[19][0] = -12.0 * (x*x - z*z) * coeff; - hrly[19][1] = 0.0; - hrly[19][2] = 24.0 * x * z * coeff; - hrly[19][3] = 12.0 * (y*y - z*z) * coeff; - hrly[19][4] = -24.0 * y * z * coeff; - hrly[19][5] = 12.0 * (x*x - y*y) * coeff; - - //m=-2 : xy(7z^2 - r^2) - coeff = 3.0 / 4.0 * sqrt(5.0 / ModuleBase::PI); - hrly[20][0] = -6.0 * x * y * coeff; - hrly[20][1] = -3.0 * (x*x + y*y - 2.0 * z*z) * coeff; - hrly[20][2] = 2.0 * y * z * coeff; - hrly[20][3] = hrly[20][0]; - hrly[20][4] = 12.0 * x * z * coeff; - hrly[20][5] = 12.0 * x * y * coeff; - - //m=3 : x(x^2-3y^2)z - coeff = 3.0 / 4.0 * sqrt(35.0 / 2.0 / ModuleBase::PI); - hrly[21][0] = 6.0 * x * z * coeff; - hrly[21][1] = -6.0 * y * z * coeff; - hrly[21][2] = 3.0 * (x*x - y*y) * coeff; - hrly[21][3] = -6.0 * x * z * coeff; - hrly[21][4] = -6.0 * x * y * coeff; - hrly[21][5] = 0.0; - - //m=-3 : y(3x^2-y^2)z - hrly[22][0] = 6.0 * y * z * coeff; - hrly[22][1] = 6.0 * x * z * coeff; - hrly[22][2] = 6.0 * x * y * coeff; - hrly[22][3] = -6.0 * y * z * coeff; - hrly[22][4] = 3.0 * (x*x - y*y) * coeff; - hrly[22][5] = 0.0; - - //m=4 : x^4 + y^4 - 6 x^2y^2 - coeff = 3.0 / 16.0 * sqrt(35.0 / ModuleBase::PI); - hrly[23][0] = 12.0 * (x*x - y*y) * coeff; - hrly[23][1] = -24.0 * x * y * coeff; - hrly[23][2] = 0.0; - hrly[23][3] = -hrly[23][0]; - hrly[23][4] = 0.0; - hrly[23][5] = 0.0; - - //m=-4 : xy(x^2 - y^2) - coeff = 3.0 / 4.0 * sqrt(35.0 / ModuleBase::PI); - hrly[24][0] = 6.0 * x * y * coeff; - hrly[24][1] = 3.0 * (x*x - y*y) * coeff; - hrly[24][2] = 0.0; - hrly[24][3] = -hrly[24][0]; - hrly[24][4] = 0.0; - hrly[24][5] = 0.0; - - if (Lmax == 4) return; - - /*************************** - L = 5 - ***************************/ - //m=0 : (63z^5 - 70z^3*r^2 + 15z*r^4) - coeff = sqrt(11.0 / ModuleBase::PI) / 16.0; - hrly[25][0] = (180*x*x*z + 60*y*y*z - 80*z*z*z) * coeff; - hrly[25][1] = (120*x*y*z) * coeff; - hrly[25][2] = (60*x*x*x + 60*x*y*y - 240*x*z*z) * coeff; - hrly[25][3] = (60*x*x*z + 180*y*y*z - 80*z*z*z) * coeff; - hrly[25][4] = (60*x*x*y + 60*y*y*y - 240*y*z*z) * coeff; - hrly[25][5] = (-240*x*x*z - 240*y*y*z + 160*z*z*z) * coeff; - - //m=1 : x(21z^4 - 14z^2*r^2 + r^4) - coeff = sqrt(165.0 / 2.0 / ModuleBase::PI) / 16.0; - hrly[26][0] = (20*x*x*x + 12*x*y*y - 72*x*z*z) * coeff; - hrly[26][1] = (12*x*x*y + 4*y*y*y - 24*y*z*z) * coeff; - hrly[26][2] = (-72*x*x*z - 24*y*y*z + 32*z*z*z) * coeff; - hrly[26][3] = (4*x*x*x + 12*x*y*y - 24*x*z*z) * coeff; - hrly[26][4] = (-48*x*y*z) * coeff; - hrly[26][5] = (-24*x*x*x - 24*x*y*y + 96*x*z*z) * coeff; - - //m=-1 : y(21z^4 - 14z^2*r^2 + r^4) - hrly[27][0] = (12*x*x*y + 4*y*y*y - 24*y*z*z) * coeff; - hrly[27][1] = (4*x*x*x + 12*x*y*y - 24*x*z*z) * coeff; - hrly[27][2] = (-48*x*y*z) * coeff; - hrly[27][3] = (12*x*x*y + 20*y*y*y - 72*y*z*z) * coeff; - hrly[27][4] = (-24*x*x*z - 72*y*y*z + 32*z*z*z) * coeff; - hrly[27][5] = (-24*x*x*y - 24*y*y*y + 96*y*z*z) * coeff; - - //m=2 : (x^2 - y^2)(3z^3 - z*r^2) - coeff = sqrt(1155.0 / ModuleBase::PI) / 8.0; - hrly[28][0] = (-12*x*x*z + 4*z*z*z) * coeff; - hrly[28][1] = 0.0; - hrly[28][2] = (-4*x*x*x + 12*x*z*z) * coeff; - hrly[28][3] = (12*y*y*z - 4*z*z*z) * coeff; - hrly[28][4] = (4*y*y*y - 12*y*z*z) * coeff; - hrly[28][5] = (12*x*x*z - 12*y*y*z) * coeff; - - //m=-2 : xy(3z^3 - z*r^2) - hrly[29][0] = (-6*x*y*z) * coeff; - hrly[29][1] = (-3*x*x*z - 3*y*y*z + 2*z*z*z) * coeff; - hrly[29][2] = (-3*x*x*y - y*y*y + 6*y*z*z) * coeff; - hrly[29][3] = (-6*x*y*z) * coeff; - hrly[29][4] = (-x*x*x - 3*x*y*y + 6*x*z*z) * coeff; - hrly[29][5] = (12*x*y*z) * coeff; - - //m=3 : x(x^2 - 3y^2)(9z^2 - r^2) - coeff = sqrt(385.0 / 2.0 / ModuleBase::PI) / 16.0; - hrly[30][0] = (-20*x*x*x + 12*x*y*y + 48*x*z*z) * coeff; - hrly[30][1] = (12*x*x*y + 12*y*y*y - 48*y*z*z) * coeff; - hrly[30][2] = (48*x*x*z - 48*y*y*z) * coeff; - hrly[30][3] = (4*x*x*x + 36*x*y*y - 48*x*z*z) * coeff; - hrly[30][4] = (-96*x*y*z) * coeff; - hrly[30][5] = (16*x*x*x - 48*x*y*y) * coeff; - - //m=-3 : y(3x^2 - y^2)(9z^2 - r^2) - hrly[31][0] = (-36*x*x*y - 4*y*y*y + 48*y*z*z) * coeff; - hrly[31][1] = (-12*x*x*x - 12*x*y*y + 48*x*z*z) * coeff; - hrly[31][2] = (96*x*y*z) * coeff; - hrly[31][3] = (-12*x*x*y + 20*y*y*y - 48*y*z*z) * coeff; - hrly[31][4] = (48*x*x*z - 48*y*y*z) * coeff; - hrly[31][5] = (48*x*x*y - 16*y*y*y) * coeff; - - //m=4 : (x^4 - 6x^2*y^2 + y^4) * z - coeff = sqrt(385.0 / ModuleBase::PI) / 16.0; - hrly[32][0] = (12*x*x*z - 12*y*y*z) * coeff; - hrly[32][1] = (-24*x*y*z) * coeff; - hrly[32][2] = (4*x*x*x - 12*x*y*y) * coeff; - hrly[32][3] = (-12*x*x*z + 12*y*y*z) * coeff; - hrly[32][4] = (-12*x*x*y + 4*y*y*y) * coeff; - hrly[32][5] = 0.0; - - //m=-4 : xy(x^2 - y^2) * z - hrly[33][0] = (6*x*y*z) * coeff; - hrly[33][1] = (3*x*x*z - 3*y*y*z) * coeff; - hrly[33][2] = (3*x*x*y - y*y*y) * coeff; - hrly[33][3] = (-6*x*y*z) * coeff; - hrly[33][4] = (x*x*x - 3*x*y*y) * coeff; - hrly[33][5] = 0.0; - - //m=5 : x(x^4 - 10x^2*y^2 + 5y^4) - coeff = sqrt(77.0 / 2.0 / ModuleBase::PI) / 16.0; - hrly[34][0] = (20.0 * x*x*x - 60.0 * x * y*y) * coeff; - hrly[34][1] = (-60.0 * x*x * y + 20.0 * y*y*y) * coeff; - hrly[34][2] = 0.0; - hrly[34][3] = (-20.0 * x*x*x + 60.0 * x * y*y) * coeff; - hrly[34][4] = 0.0; - hrly[34][5] = 0.0; - - //m=-5 : y(5x^4 - 10x^2*y^2 + y^4) - hrly[35][0] = (60.0 * x*x * y - 20.0 * y*y*y) * coeff; - hrly[35][1] = (20.0 * x*x*x - 60.0 * x * y*y) * coeff; - hrly[35][2] = 0.0; - hrly[35][3] = (-60.0 * x*x * y + 20.0 * y*y*y) * coeff; - hrly[35][4] = 0.0; - hrly[35][5] = 0.0; - - if (Lmax == 5) return; - - /*************************** - L = 6 - ***************************/ - //m=0 : (231z^6 - 315z^4*r^2 + 105z^2*r^4 - 5r^6) - coeff = sqrt(13.0 / ModuleBase::PI) / 32.0; - hrly[36][0] = (-150*x*x*x*x - 180*x*x*y*y + 1080*x*x*z*z - 30*y*y*y*y + 360*y*y*z*z - 240*z*z*z*z) * coeff; - hrly[36][1] = (-120*x*x*x*y - 120*x*y*y*y + 720*x*y*z*z) * coeff; - hrly[36][2] = (720*x*x*x*z + 720*x*y*y*z - 960*x*z*z*z) * coeff; - hrly[36][3] = (-30*x*x*x*x - 180*x*x*y*y + 360*x*x*z*z - 150*y*y*y*y + 1080*y*y*z*z - 240*z*z*z*z) * coeff; - hrly[36][4] = (720*x*x*y*z + 720*y*y*y*z - 960*y*z*z*z) * coeff; - hrly[36][5] = (180*x*x*x*x + 360*x*x*y*y - 1440*x*x*z*z + 180*y*y*y*y - 1440*y*y*z*z + 480*z*z*z*z) * coeff; - - //m=1 : x(33z^5 - 30z^3*r^2 + 5z*r^4) - coeff = sqrt(273.0 / 2.0 / ModuleBase::PI) / 16.0; - hrly[37][0] = (100*x*x*x*z + 60*x*y*y*z - 120*x*z*z*z) * coeff; - hrly[37][1] = (60*x*x*y*z + 20*y*y*y*z - 40*y*z*z*z) * coeff; - hrly[37][2] = (25*x*x*x*x + 30*x*x*y*y - 180*x*x*z*z + 5*y*y*y*y - 60*y*y*z*z + 40*z*z*z*z) * coeff; - hrly[37][3] = (20*x*x*x*z + 60*x*y*y*z - 40*x*z*z*z) * coeff; - hrly[37][4] = (20*x*x*x*y + 20*x*y*y*y - 120*x*y*z*z) * coeff; - hrly[37][5] = (-120*x*x*x*z - 120*x*y*y*z + 160*x*z*z*z) * coeff; - - //m=-1 : y(33z^5 - 30z^3*r^2 + 5z*r^4) - hrly[38][0] = (60*x*x*y*z + 20*y*y*y*z - 40*y*z*z*z) * coeff; - hrly[38][1] = (20*x*x*x*z + 60*x*y*y*z - 40*x*z*z*z) * coeff; - hrly[38][2] = (20*x*x*x*y + 20*x*y*y*y - 120*x*y*z*z) * coeff; - hrly[38][3] = (60*x*x*y*z + 100*y*y*y*z - 120*y*z*z*z) * coeff; - hrly[38][4] = (5*x*x*x*x + 30*x*x*y*y - 60*x*x*z*z + 25*y*y*y*y - 180*y*y*z*z + 40*z*z*z*z) * coeff; - hrly[38][5] = (-120*x*x*y*z - 120*y*y*y*z + 160*y*z*z*z) * coeff; - - //m=2 : (x^2 - y^2)(33z^4 - 18z^2*r^2 + r^4) - coeff = sqrt(1365.0 / ModuleBase::PI) / 32.0; - hrly[39][0] = (30*x*x*x*x + 12*x*x*y*y - 192*x*x*z*z - 2*y*y*y*y + 32*z*z*z*z) * coeff; - hrly[39][1] = (8*x*x*x*y - 8*x*y*y*y) * coeff; - hrly[39][2] = (-128*x*x*x*z + 128*x*z*z*z) * coeff; - hrly[39][3] = (2*x*x*x*x - 12*x*x*y*y - 30*y*y*y*y + 192*y*y*z*z - 32*z*z*z*z) * coeff; - hrly[39][4] = (128*y*y*y*z - 128*y*z*z*z) * coeff; - hrly[39][5] = (-32*x*x*x*x + 192*x*x*z*z + 32*y*y*y*y - 192*y*y*z*z) * coeff; - - //m=-2 : xy(33z^4 - 18z^2*r^2 + r^4) - hrly[40][0] = (20*x*x*x*y + 12*x*y*y*y - 96*x*y*z*z) * coeff; - hrly[40][1] = (20*x*x*x*x + 36*x*x*y*y - 96*x*x*z*z + 20*y*y*y*y - 96*y*y*z*z + 32*z*z*z*z) * coeff; - hrly[40][2] = (-96*x*x*y*z - 32*y*y*y*z + 64*y*z*z*z) * coeff; - hrly[40][3] = (12*x*x*x*y + 20*x*y*y*y - 96*x*y*z*z) * coeff; - hrly[40][4] = (-32*x*x*x*z - 96*x*y*y*z + 64*x*z*z*z) * coeff; - hrly[40][5] = (-32*x*x*x*y - 32*x*y*y*y + 192*x*y*z*z) * coeff; - - //m=3 : x(x^2 - 3y^2)(11z^3 - 3z*r^2) - coeff = sqrt(1365.0 / ModuleBase::PI) / 16.0; - hrly[41][0] = (-60*x*x*x*z + 36*x*y*y*z + 48*x*z*z*z) * coeff; - hrly[41][1] = (36*x*x*y*z + 36*y*y*y*z - 48*y*z*z*z) * coeff; - hrly[41][2] = (-30*x*x*x*x + 36*x*x*y*y + 72*x*x*z*z + 18*y*y*y*y - 72*y*y*z*z) * coeff; - hrly[41][3] = (12*x*x*x*z + 108*x*y*y*z - 48*x*z*z*z) * coeff; - hrly[41][4] = (12*x*x*x*y + 36*x*y*y*y - 144*x*y*z*z) * coeff; - hrly[41][5] = (48*x*x*x*z - 144*x*y*y*z) * coeff; - - //m=-3 : y(3x^2 - y^2)(11z^3 - 3z*r^2) - hrly[42][0] = (-108*x*x*y*z - 12*y*y*y*z + 48*y*z*z*z) * coeff; - hrly[42][1] = (-36*x*x*x*z - 36*x*y*y*z + 48*x*z*z*z) * coeff; - hrly[42][2] = (-36*x*x*x*y - 12*x*y*y*y + 144*x*y*z*z) * coeff; - hrly[42][3] = (-36*x*x*y*z + 60*y*y*y*z - 48*y*z*z*z) * coeff; - hrly[42][4] = (-18*x*x*x*x - 36*x*x*y*y + 72*x*x*z*z + 30*y*y*y*y - 72*y*y*z*z) * coeff; - hrly[42][5] = (144*x*x*y*z - 48*y*y*y*z) * coeff; - - //m=4 : (x^4 - 6x^2*y^2 + y^4)(11z^2 - r^2) - coeff = sqrt(91.0 / ModuleBase::PI) / 32.0; - hrly[43][0] = (-30*x*x*x*x + 60*x*x*y*y + 120*x*x*z*z + 10*y*y*y*y - 120*y*y*z*z) * coeff; - hrly[43][1] = (40*x*x*x*y + 40*x*y*y*y - 240*x*y*z*z) * coeff; - hrly[43][2] = (80*x*x*x*z - 240*x*y*y*z) * coeff; - hrly[43][3] = (10*x*x*x*x + 60*x*x*y*y - 120*x*x*z*z - 30*y*y*y*y + 120*y*y*z*z) * coeff; - hrly[43][4] = (-240*x*x*y*z + 80*y*y*y*z) * coeff; - hrly[43][5] = (20*x*x*x*x - 120*x*x*y*y + 20*y*y*y*y) * coeff; - - //m=-4 : xy(x^2 - y^2)(11z^2 - r^2) - hrly[44][0] = (-20*x*x*x*y + 60*x*y*z*z) * coeff; - hrly[44][1] = (-5*x*x*x*x + 30*x*x*z*z + 5*y*y*y*y - 30*y*y*z*z) * coeff; - hrly[44][2] = (60*x*x*y*z - 20*y*y*y*z) * coeff; - hrly[44][3] = (20*x*y*y*y - 60*x*y*z*z) * coeff; - hrly[44][4] = (20*x*x*x*z - 60*x*y*y*z) * coeff; - hrly[44][5] = (20*x*x*x*y - 20*x*y*y*y) * coeff; - - //m=5 : x(x^4 - 10x^2*y^2 + 5y^4) * z - coeff = sqrt(1001.0 / 2.0 / ModuleBase::PI) / 16.0; - hrly[45][0] = (20*x*x*x*z - 60*x*y*y*z) * coeff; - hrly[45][1] = (-60*x*x*y*z + 20*y*y*y*z) * coeff; - hrly[45][2] = (5*x*x*x*x - 30*x*x*y*y + 5*y*y*y*y) * coeff; - hrly[45][3] = (-20*x*x*x*z + 60*x*y*y*z) * coeff; - hrly[45][4] = (-20*x*x*x*y + 20*x*y*y*y) * coeff; - hrly[45][5] = 0.0; - - //m=-5 : y(5x^4 - 10x^2*y^2 + y^4) * z - hrly[46][0] = (60*x*x*y*z - 20*y*y*y*z) * coeff; - hrly[46][1] = (20*x*x*x*z - 60*x*y*y*z) * coeff; - hrly[46][2] = (20*x*x*x*y - 20*x*y*y*y) * coeff; - hrly[46][3] = (-60*x*x*y*z + 20*y*y*y*z) * coeff; - hrly[46][4] = (5*x*x*x*x - 30*x*x*y*y + 5*y*y*y*y) * coeff; - hrly[46][5] = 0.0; - - //m=6 : (x^6 - 15x^4*y^2 + 15x^2*y^4 - y^6) - coeff = sqrt(3003.0 / ModuleBase::PI) / 32.0; - hrly[47][0] = (30*x*x*x*x - 180*x*x*y*y + 30*y*y*y*y) * coeff; - hrly[47][1] = (-120*x*x*x*y + 120*x*y*y*y) * coeff; - hrly[47][2] = 0.0; - hrly[47][3] = (-30*x*x*x*x + 180*x*x*y*y - 30*y*y*y*y) * coeff; - hrly[47][4] = 0.0; - hrly[47][5] = 0.0; - - //m=-6 : xy(3x^4 - 10x^2*y^2 + 3y^4) - hrly[48][0] = (60*x*x*x*y - 60*x*y*y*y) * coeff; - hrly[48][1] = (15*x*x*x*x - 90*x*x*y*y + 15*y*y*y*y) * coeff; - hrly[48][2] = 0.0; - hrly[48][3] = (-60*x*x*x*y + 60*x*y*y*y) * coeff; - hrly[48][4] = 0.0; - hrly[48][5] = 0.0; - - if (Lmax == 6) return; - - /*************************** - L > 6 - ***************************/ - ModuleBase::WARNING_QUIT("hes_rl_sph_harm","l>6 not implemented!"); - - - return; + hrly.resize ((Lmax + 1) * (Lmax + 1), std::vector (6)); + + double radius2 = x * x + y * y + z * z; + double coeff = 0.0; + + // begin calculation + /*************************** + L = 0 + ***************************/ + hrly[0][0] = hrly[0][1] = hrly[0][2] = 0.0; + hrly[0][3] = hrly[0][4] = hrly[0][5] = 0.0; + if (Lmax == 0) + { + return; + } + + /*************************** + L = 1 + ***************************/ + hrly[1][0] = hrly[1][1] = hrly[1][2] = 0.0; + hrly[1][3] = hrly[1][4] = hrly[1][5] = 0.0; + + hrly[2][0] = hrly[2][1] = hrly[2][2] = 0.0; + hrly[2][3] = hrly[2][4] = hrly[2][5] = 0.0; + + hrly[3][0] = hrly[3][1] = hrly[3][2] = 0.0; + hrly[3][3] = hrly[3][4] = hrly[3][5] = 0.0; + + if (Lmax == 1) + { + return; + } + + /*************************** + L = 2 + ***************************/ + // m=0 : 3z^2-r^2 + coeff = sqrt (5.0 / ModuleBase::PI) / 4.0; + hrly[4][0] = hrly[4][3] = -2.0 * coeff; + hrly[4][5] = 4.0 * coeff; + hrly[4][1] = hrly[4][2] = hrly[4][4] = 0.0; + + // m=1 : xz + coeff = sqrt (15.0 / ModuleBase::PI) / 2.0; + hrly[5][2] = coeff; + hrly[5][0] = hrly[5][1] = 0.0; + hrly[5][3] = hrly[5][4] = hrly[5][5] = 0.0; + + // m=-1 : yz + hrly[6][4] = coeff; + hrly[6][0] = hrly[6][1] = 0.0; + hrly[6][2] = hrly[6][3] = hrly[6][5] = 0.0; + + // m=-2 : xy + hrly[8][1] = coeff; + hrly[8][0] = hrly[8][2] = 0.0; + hrly[8][3] = hrly[8][4] = hrly[8][5] = 0.0; + + // m=2 : (x^2-y^2) + coeff = sqrt (15.0 / ModuleBase::PI) / 4.0; + hrly[7][0] = 2.0 * coeff; + hrly[7][3] = -2.0 * coeff; + hrly[7][1] = hrly[7][2] = 0.0; + hrly[7][4] = hrly[7][5] = 0.0; + + if (Lmax == 2) + { + return; + } + + /*************************** + L = 3 + ***************************/ + // m=0 : (5z^3-3zr^2) + coeff = sqrt (7.0 / ModuleBase::PI) / 4.0; + hrly[9][0] = hrly[9][3] = -6.0 * z * coeff; + hrly[9][1] = 0.0; + hrly[9][2] = -6.0 * x * coeff; + hrly[9][4] = -6.0 * y * coeff; + hrly[9][5] = 12.0 * z * coeff; + + // m=1 : x(5z^2-r^2) + coeff = sqrt (21.0 / 2.0 / ModuleBase::PI) / 4.0; + hrly[10][0] = -6.0 * x * coeff; + hrly[10][1] = -2.0 * y * coeff; + hrly[10][2] = 8.0 * z * coeff; + hrly[10][3] = -2.0 * x * coeff; + hrly[10][4] = 0.0; + hrly[10][5] = 8.0 * x * coeff; + + // m=-1 : y(5z^2-r^2) + hrly[11][0] = -2.0 * y * coeff; + hrly[11][1] = -2.0 * x * coeff; + hrly[11][2] = 0.0; + hrly[11][3] = -6.0 * y * coeff; + hrly[11][4] = 8.0 * z * coeff; + hrly[11][5] = 8.0 * y * coeff; + + // m=2 : (x^2-y^2)z + coeff = sqrt (105.0 / ModuleBase::PI) / 4.0; + hrly[12][0] = 2.0 * z * coeff; + hrly[12][1] = 0.0; + hrly[12][2] = 2.0 * x * coeff; + hrly[12][3] = -hrly[12][0]; + hrly[12][4] = -2.0 * y * coeff; + hrly[12][5] = 0.0; + + // m=-2 : xyz + coeff = sqrt (105.0 / ModuleBase::PI) / 2.0; + hrly[13][0] = 0.0; + hrly[13][1] = z * coeff; + hrly[13][2] = y * coeff; + hrly[13][3] = 0.0; + hrly[13][4] = x * coeff; + hrly[13][5] = 0.0; + + // m=3 : x(x^2-3y^2) + coeff = sqrt (35.0 / 2.0 / ModuleBase::PI) / 4.0; + hrly[14][0] = 6.0 * x * coeff; + hrly[14][1] = -6.0 * y * coeff; + hrly[14][2] = 0.0; + hrly[14][3] = -hrly[14][0]; + hrly[14][4] = 0.0; + hrly[14][5] = 0.0; + + // m=-3 : y(3x^2-y^2) + hrly[15][0] = 6.0 * y * coeff; + hrly[15][1] = 6.0 * x * coeff; + hrly[15][2] = 0.0; + hrly[15][3] = -hrly[15][0]; + hrly[15][4] = 0.0; + hrly[15][5] = 0.0; + + if (Lmax == 3) + { + return; + } + + /*************************** + L = 4 + ***************************/ + // m=0 : (35z^4 - 30z^2r^2 + 3r^4) + coeff = sqrt (1.0 / ModuleBase::PI) * 3.0 / 16.0; + hrly[16][0] = 12.0 * (3.0 * x * x + y * y - 4.0 * z * z) * coeff; + hrly[16][1] = 24.0 * x * y * coeff; + hrly[16][2] = -96.0 * x * z * coeff; + hrly[16][3] = 12.0 * (x * x + 3.0 * y * y - 4.0 * z * z) * coeff; + hrly[16][4] = -96.0 * y * z * coeff; + hrly[16][5] = -48.0 * (x * x + y * y - 2.0 * z * z) * coeff; + + // m=1 : x(7z^3 - 3zr^2) + coeff = 3.0 / 4.0 * sqrt (5.0 / 2.0 / ModuleBase::PI); + hrly[17][0] = -18.0 * x * z * coeff; + hrly[17][1] = -6.0 * y * z * coeff; + hrly[17][2] = -3.0 * (3.0 * x * x + y * y - 4.0 * z * z) * coeff; + hrly[17][3] = -6.0 * x * z * coeff; + hrly[17][4] = -6.0 * x * y * coeff; + hrly[17][5] = 24.0 * x * z * coeff; + + // m=-1 : y(7z^3 - 3zr^2) + hrly[18][0] = -6.0 * y * z * coeff; + hrly[18][1] = -6.0 * x * z * coeff; + hrly[18][2] = -6.0 * x * y * coeff; + hrly[18][3] = -18.0 * y * z * coeff; + hrly[18][4] = -3.0 * (x * x + 3.0 * y * y - 4.0 * z * z) * coeff; + hrly[18][5] = 24.0 * y * z * coeff; + + // m=2 : (x^2 - y^2)(7z^2 - r^2) + coeff = 3.0 / 8.0 * sqrt (5.0 / ModuleBase::PI); + hrly[19][0] = -12.0 * (x * x - z * z) * coeff; + hrly[19][1] = 0.0; + hrly[19][2] = 24.0 * x * z * coeff; + hrly[19][3] = 12.0 * (y * y - z * z) * coeff; + hrly[19][4] = -24.0 * y * z * coeff; + hrly[19][5] = 12.0 * (x * x - y * y) * coeff; + + // m=-2 : xy(7z^2 - r^2) + coeff = 3.0 / 4.0 * sqrt (5.0 / ModuleBase::PI); + hrly[20][0] = -6.0 * x * y * coeff; + hrly[20][1] = -3.0 * (x * x + y * y - 2.0 * z * z) * coeff; + hrly[20][2] = 2.0 * y * z * coeff; + hrly[20][3] = hrly[20][0]; + hrly[20][4] = 12.0 * x * z * coeff; + hrly[20][5] = 12.0 * x * y * coeff; + + // m=3 : x(x^2-3y^2)z + coeff = 3.0 / 4.0 * sqrt (35.0 / 2.0 / ModuleBase::PI); + hrly[21][0] = 6.0 * x * z * coeff; + hrly[21][1] = -6.0 * y * z * coeff; + hrly[21][2] = 3.0 * (x * x - y * y) * coeff; + hrly[21][3] = -6.0 * x * z * coeff; + hrly[21][4] = -6.0 * x * y * coeff; + hrly[21][5] = 0.0; + + // m=-3 : y(3x^2-y^2)z + hrly[22][0] = 6.0 * y * z * coeff; + hrly[22][1] = 6.0 * x * z * coeff; + hrly[22][2] = 6.0 * x * y * coeff; + hrly[22][3] = -6.0 * y * z * coeff; + hrly[22][4] = 3.0 * (x * x - y * y) * coeff; + hrly[22][5] = 0.0; + + // m=4 : x^4 + y^4 - 6 x^2y^2 + coeff = 3.0 / 16.0 * sqrt (35.0 / ModuleBase::PI); + hrly[23][0] = 12.0 * (x * x - y * y) * coeff; + hrly[23][1] = -24.0 * x * y * coeff; + hrly[23][2] = 0.0; + hrly[23][3] = -hrly[23][0]; + hrly[23][4] = 0.0; + hrly[23][5] = 0.0; + + // m=-4 : xy(x^2 - y^2) + coeff = 3.0 / 4.0 * sqrt (35.0 / ModuleBase::PI); + hrly[24][0] = 6.0 * x * y * coeff; + hrly[24][1] = 3.0 * (x * x - y * y) * coeff; + hrly[24][2] = 0.0; + hrly[24][3] = -hrly[24][0]; + hrly[24][4] = 0.0; + hrly[24][5] = 0.0; + + if (Lmax == 4) + { + return; + } + + /*************************** + L = 5 + ***************************/ + // m=0 : (63z^5 - 70z^3*r^2 + 15z*r^4) + coeff = sqrt (11.0 / ModuleBase::PI) / 16.0; + hrly[25][0] = (180 * x * x * z + 60 * y * y * z - 80 * z * z * z) * coeff; + hrly[25][1] = (120 * x * y * z) * coeff; + hrly[25][2] = (60 * x * x * x + 60 * x * y * y - 240 * x * z * z) * coeff; + hrly[25][3] = (60 * x * x * z + 180 * y * y * z - 80 * z * z * z) * coeff; + hrly[25][4] = (60 * x * x * y + 60 * y * y * y - 240 * y * z * z) * coeff; + hrly[25][5] = (-240 * x * x * z - 240 * y * y * z + 160 * z * z * z) * coeff; + + // m=1 : x(21z^4 - 14z^2*r^2 + r^4) + coeff = sqrt (165.0 / 2.0 / ModuleBase::PI) / 16.0; + hrly[26][0] = (20 * x * x * x + 12 * x * y * y - 72 * x * z * z) * coeff; + hrly[26][1] = (12 * x * x * y + 4 * y * y * y - 24 * y * z * z) * coeff; + hrly[26][2] = (-72 * x * x * z - 24 * y * y * z + 32 * z * z * z) * coeff; + hrly[26][3] = (4 * x * x * x + 12 * x * y * y - 24 * x * z * z) * coeff; + hrly[26][4] = (-48 * x * y * z) * coeff; + hrly[26][5] = (-24 * x * x * x - 24 * x * y * y + 96 * x * z * z) * coeff; + + // m=-1 : y(21z^4 - 14z^2*r^2 + r^4) + hrly[27][0] = (12 * x * x * y + 4 * y * y * y - 24 * y * z * z) * coeff; + hrly[27][1] = (4 * x * x * x + 12 * x * y * y - 24 * x * z * z) * coeff; + hrly[27][2] = (-48 * x * y * z) * coeff; + hrly[27][3] = (12 * x * x * y + 20 * y * y * y - 72 * y * z * z) * coeff; + hrly[27][4] = (-24 * x * x * z - 72 * y * y * z + 32 * z * z * z) * coeff; + hrly[27][5] = (-24 * x * x * y - 24 * y * y * y + 96 * y * z * z) * coeff; + + // m=2 : (x^2 - y^2)(3z^3 - z*r^2) + coeff = sqrt (1155.0 / ModuleBase::PI) / 8.0; + hrly[28][0] = (-12 * x * x * z + 4 * z * z * z) * coeff; + hrly[28][1] = 0.0; + hrly[28][2] = (-4 * x * x * x + 12 * x * z * z) * coeff; + hrly[28][3] = (12 * y * y * z - 4 * z * z * z) * coeff; + hrly[28][4] = (4 * y * y * y - 12 * y * z * z) * coeff; + hrly[28][5] = (12 * x * x * z - 12 * y * y * z) * coeff; + + // m=-2 : xy(3z^3 - z*r^2) + hrly[29][0] = (-6 * x * y * z) * coeff; + hrly[29][1] = (-3 * x * x * z - 3 * y * y * z + 2 * z * z * z) * coeff; + hrly[29][2] = (-3 * x * x * y - y * y * y + 6 * y * z * z) * coeff; + hrly[29][3] = (-6 * x * y * z) * coeff; + hrly[29][4] = (-x * x * x - 3 * x * y * y + 6 * x * z * z) * coeff; + hrly[29][5] = (12 * x * y * z) * coeff; + + // m=3 : x(x^2 - 3y^2)(9z^2 - r^2) + coeff = sqrt (385.0 / 2.0 / ModuleBase::PI) / 16.0; + hrly[30][0] = (-20 * x * x * x + 12 * x * y * y + 48 * x * z * z) * coeff; + hrly[30][1] = (12 * x * x * y + 12 * y * y * y - 48 * y * z * z) * coeff; + hrly[30][2] = (48 * x * x * z - 48 * y * y * z) * coeff; + hrly[30][3] = (4 * x * x * x + 36 * x * y * y - 48 * x * z * z) * coeff; + hrly[30][4] = (-96 * x * y * z) * coeff; + hrly[30][5] = (16 * x * x * x - 48 * x * y * y) * coeff; + + // m=-3 : y(3x^2 - y^2)(9z^2 - r^2) + hrly[31][0] = (-36 * x * x * y - 4 * y * y * y + 48 * y * z * z) * coeff; + hrly[31][1] = (-12 * x * x * x - 12 * x * y * y + 48 * x * z * z) * coeff; + hrly[31][2] = (96 * x * y * z) * coeff; + hrly[31][3] = (-12 * x * x * y + 20 * y * y * y - 48 * y * z * z) * coeff; + hrly[31][4] = (48 * x * x * z - 48 * y * y * z) * coeff; + hrly[31][5] = (48 * x * x * y - 16 * y * y * y) * coeff; + + // m=4 : (x^4 - 6x^2*y^2 + y^4) * z + coeff = sqrt (385.0 / ModuleBase::PI) / 16.0; + hrly[32][0] = (12 * x * x * z - 12 * y * y * z) * coeff; + hrly[32][1] = (-24 * x * y * z) * coeff; + hrly[32][2] = (4 * x * x * x - 12 * x * y * y) * coeff; + hrly[32][3] = (-12 * x * x * z + 12 * y * y * z) * coeff; + hrly[32][4] = (-12 * x * x * y + 4 * y * y * y) * coeff; + hrly[32][5] = 0.0; + + // m=-4 : xy(x^2 - y^2) * z + hrly[33][0] = (6 * x * y * z) * coeff; + hrly[33][1] = (3 * x * x * z - 3 * y * y * z) * coeff; + hrly[33][2] = (3 * x * x * y - y * y * y) * coeff; + hrly[33][3] = (-6 * x * y * z) * coeff; + hrly[33][4] = (x * x * x - 3 * x * y * y) * coeff; + hrly[33][5] = 0.0; + + // m=5 : x(x^4 - 10x^2*y^2 + 5y^4) + coeff = sqrt (77.0 / 2.0 / ModuleBase::PI) / 16.0; + hrly[34][0] = (20.0 * x * x * x - 60.0 * x * y * y) * coeff; + hrly[34][1] = (-60.0 * x * x * y + 20.0 * y * y * y) * coeff; + hrly[34][2] = 0.0; + hrly[34][3] = (-20.0 * x * x * x + 60.0 * x * y * y) * coeff; + hrly[34][4] = 0.0; + hrly[34][5] = 0.0; + + // m=-5 : y(5x^4 - 10x^2*y^2 + y^4) + hrly[35][0] = (60.0 * x * x * y - 20.0 * y * y * y) * coeff; + hrly[35][1] = (20.0 * x * x * x - 60.0 * x * y * y) * coeff; + hrly[35][2] = 0.0; + hrly[35][3] = (-60.0 * x * x * y + 20.0 * y * y * y) * coeff; + hrly[35][4] = 0.0; + hrly[35][5] = 0.0; + + if (Lmax == 5) + { + return; + } + + /*************************** + L = 6 + ***************************/ + // m=0 : (231z^6 - 315z^4*r^2 + 105z^2*r^4 - 5r^6) + coeff = sqrt (13.0 / ModuleBase::PI) / 32.0; + hrly[36][0] = (-150 * x * x * x * x - 180 * x * x * y * y + 1080 * x * x * z * z - 30 * y * y * y * y + + 360 * y * y * z * z - 240 * z * z * z * z) + * coeff; + hrly[36][1] = (-120 * x * x * x * y - 120 * x * y * y * y + 720 * x * y * z * z) * coeff; + hrly[36][2] = (720 * x * x * x * z + 720 * x * y * y * z - 960 * x * z * z * z) * coeff; + hrly[36][3] = (-30 * x * x * x * x - 180 * x * x * y * y + 360 * x * x * z * z - 150 * y * y * y * y + + 1080 * y * y * z * z - 240 * z * z * z * z) + * coeff; + hrly[36][4] = (720 * x * x * y * z + 720 * y * y * y * z - 960 * y * z * z * z) * coeff; + hrly[36][5] = (180 * x * x * x * x + 360 * x * x * y * y - 1440 * x * x * z * z + 180 * y * y * y * y + - 1440 * y * y * z * z + 480 * z * z * z * z) + * coeff; + + // m=1 : x(33z^5 - 30z^3*r^2 + 5z*r^4) + coeff = sqrt (273.0 / 2.0 / ModuleBase::PI) / 16.0; + hrly[37][0] = (100 * x * x * x * z + 60 * x * y * y * z - 120 * x * z * z * z) * coeff; + hrly[37][1] = (60 * x * x * y * z + 20 * y * y * y * z - 40 * y * z * z * z) * coeff; + hrly[37][2] = (25 * x * x * x * x + 30 * x * x * y * y - 180 * x * x * z * z + 5 * y * y * y * y + - 60 * y * y * z * z + 40 * z * z * z * z) + * coeff; + hrly[37][3] = (20 * x * x * x * z + 60 * x * y * y * z - 40 * x * z * z * z) * coeff; + hrly[37][4] = (20 * x * x * x * y + 20 * x * y * y * y - 120 * x * y * z * z) * coeff; + hrly[37][5] = (-120 * x * x * x * z - 120 * x * y * y * z + 160 * x * z * z * z) * coeff; + + // m=-1 : y(33z^5 - 30z^3*r^2 + 5z*r^4) + hrly[38][0] = (60 * x * x * y * z + 20 * y * y * y * z - 40 * y * z * z * z) * coeff; + hrly[38][1] = (20 * x * x * x * z + 60 * x * y * y * z - 40 * x * z * z * z) * coeff; + hrly[38][2] = (20 * x * x * x * y + 20 * x * y * y * y - 120 * x * y * z * z) * coeff; + hrly[38][3] = (60 * x * x * y * z + 100 * y * y * y * z - 120 * y * z * z * z) * coeff; + hrly[38][4] = (5 * x * x * x * x + 30 * x * x * y * y - 60 * x * x * z * z + 25 * y * y * y * y + - 180 * y * y * z * z + 40 * z * z * z * z) + * coeff; + hrly[38][5] = (-120 * x * x * y * z - 120 * y * y * y * z + 160 * y * z * z * z) * coeff; + + // m=2 : (x^2 - y^2)(33z^4 - 18z^2*r^2 + r^4) + coeff = sqrt (1365.0 / ModuleBase::PI) / 32.0; + hrly[39][0] + = (30 * x * x * x * x + 12 * x * x * y * y - 192 * x * x * z * z - 2 * y * y * y * y + 32 * z * z * z * z) + * coeff; + hrly[39][1] = (8 * x * x * x * y - 8 * x * y * y * y) * coeff; + hrly[39][2] = (-128 * x * x * x * z + 128 * x * z * z * z) * coeff; + hrly[39][3] + = (2 * x * x * x * x - 12 * x * x * y * y - 30 * y * y * y * y + 192 * y * y * z * z - 32 * z * z * z * z) + * coeff; + hrly[39][4] = (128 * y * y * y * z - 128 * y * z * z * z) * coeff; + hrly[39][5] = (-32 * x * x * x * x + 192 * x * x * z * z + 32 * y * y * y * y - 192 * y * y * z * z) * coeff; + + // m=-2 : xy(33z^4 - 18z^2*r^2 + r^4) + hrly[40][0] = (20 * x * x * x * y + 12 * x * y * y * y - 96 * x * y * z * z) * coeff; + hrly[40][1] = (20 * x * x * x * x + 36 * x * x * y * y - 96 * x * x * z * z + 20 * y * y * y * y + - 96 * y * y * z * z + 32 * z * z * z * z) + * coeff; + hrly[40][2] = (-96 * x * x * y * z - 32 * y * y * y * z + 64 * y * z * z * z) * coeff; + hrly[40][3] = (12 * x * x * x * y + 20 * x * y * y * y - 96 * x * y * z * z) * coeff; + hrly[40][4] = (-32 * x * x * x * z - 96 * x * y * y * z + 64 * x * z * z * z) * coeff; + hrly[40][5] = (-32 * x * x * x * y - 32 * x * y * y * y + 192 * x * y * z * z) * coeff; + + // m=3 : x(x^2 - 3y^2)(11z^3 - 3z*r^2) + coeff = sqrt (1365.0 / ModuleBase::PI) / 16.0; + hrly[41][0] = (-60 * x * x * x * z + 36 * x * y * y * z + 48 * x * z * z * z) * coeff; + hrly[41][1] = (36 * x * x * y * z + 36 * y * y * y * z - 48 * y * z * z * z) * coeff; + hrly[41][2] + = (-30 * x * x * x * x + 36 * x * x * y * y + 72 * x * x * z * z + 18 * y * y * y * y - 72 * y * y * z * z) + * coeff; + hrly[41][3] = (12 * x * x * x * z + 108 * x * y * y * z - 48 * x * z * z * z) * coeff; + hrly[41][4] = (12 * x * x * x * y + 36 * x * y * y * y - 144 * x * y * z * z) * coeff; + hrly[41][5] = (48 * x * x * x * z - 144 * x * y * y * z) * coeff; + + // m=-3 : y(3x^2 - y^2)(11z^3 - 3z*r^2) + hrly[42][0] = (-108 * x * x * y * z - 12 * y * y * y * z + 48 * y * z * z * z) * coeff; + hrly[42][1] = (-36 * x * x * x * z - 36 * x * y * y * z + 48 * x * z * z * z) * coeff; + hrly[42][2] = (-36 * x * x * x * y - 12 * x * y * y * y + 144 * x * y * z * z) * coeff; + hrly[42][3] = (-36 * x * x * y * z + 60 * y * y * y * z - 48 * y * z * z * z) * coeff; + hrly[42][4] + = (-18 * x * x * x * x - 36 * x * x * y * y + 72 * x * x * z * z + 30 * y * y * y * y - 72 * y * y * z * z) + * coeff; + hrly[42][5] = (144 * x * x * y * z - 48 * y * y * y * z) * coeff; + + // m=4 : (x^4 - 6x^2*y^2 + y^4)(11z^2 - r^2) + coeff = sqrt (91.0 / ModuleBase::PI) / 32.0; + hrly[43][0] + = (-30 * x * x * x * x + 60 * x * x * y * y + 120 * x * x * z * z + 10 * y * y * y * y - 120 * y * y * z * z) + * coeff; + hrly[43][1] = (40 * x * x * x * y + 40 * x * y * y * y - 240 * x * y * z * z) * coeff; + hrly[43][2] = (80 * x * x * x * z - 240 * x * y * y * z) * coeff; + hrly[43][3] + = (10 * x * x * x * x + 60 * x * x * y * y - 120 * x * x * z * z - 30 * y * y * y * y + 120 * y * y * z * z) + * coeff; + hrly[43][4] = (-240 * x * x * y * z + 80 * y * y * y * z) * coeff; + hrly[43][5] = (20 * x * x * x * x - 120 * x * x * y * y + 20 * y * y * y * y) * coeff; + + // m=-4 : xy(x^2 - y^2)(11z^2 - r^2) + hrly[44][0] = (-20 * x * x * x * y + 60 * x * y * z * z) * coeff; + hrly[44][1] = (-5 * x * x * x * x + 30 * x * x * z * z + 5 * y * y * y * y - 30 * y * y * z * z) * coeff; + hrly[44][2] = (60 * x * x * y * z - 20 * y * y * y * z) * coeff; + hrly[44][3] = (20 * x * y * y * y - 60 * x * y * z * z) * coeff; + hrly[44][4] = (20 * x * x * x * z - 60 * x * y * y * z) * coeff; + hrly[44][5] = (20 * x * x * x * y - 20 * x * y * y * y) * coeff; + + // m=5 : x(x^4 - 10x^2*y^2 + 5y^4) * z + coeff = sqrt (1001.0 / 2.0 / ModuleBase::PI) / 16.0; + hrly[45][0] = (20 * x * x * x * z - 60 * x * y * y * z) * coeff; + hrly[45][1] = (-60 * x * x * y * z + 20 * y * y * y * z) * coeff; + hrly[45][2] = (5 * x * x * x * x - 30 * x * x * y * y + 5 * y * y * y * y) * coeff; + hrly[45][3] = (-20 * x * x * x * z + 60 * x * y * y * z) * coeff; + hrly[45][4] = (-20 * x * x * x * y + 20 * x * y * y * y) * coeff; + hrly[45][5] = 0.0; + + // m=-5 : y(5x^4 - 10x^2*y^2 + y^4) * z + hrly[46][0] = (60 * x * x * y * z - 20 * y * y * y * z) * coeff; + hrly[46][1] = (20 * x * x * x * z - 60 * x * y * y * z) * coeff; + hrly[46][2] = (20 * x * x * x * y - 20 * x * y * y * y) * coeff; + hrly[46][3] = (-60 * x * x * y * z + 20 * y * y * y * z) * coeff; + hrly[46][4] = (5 * x * x * x * x - 30 * x * x * y * y + 5 * y * y * y * y) * coeff; + hrly[46][5] = 0.0; + + // m=6 : (x^6 - 15x^4*y^2 + 15x^2*y^4 - y^6) + coeff = sqrt (3003.0 / ModuleBase::PI) / 32.0; + hrly[47][0] = (30 * x * x * x * x - 180 * x * x * y * y + 30 * y * y * y * y) * coeff; + hrly[47][1] = (-120 * x * x * x * y + 120 * x * y * y * y) * coeff; + hrly[47][2] = 0.0; + hrly[47][3] = (-30 * x * x * x * x + 180 * x * x * y * y - 30 * y * y * y * y) * coeff; + hrly[47][4] = 0.0; + hrly[47][5] = 0.0; + + // m=-6 : xy(3x^4 - 10x^2*y^2 + 3y^4) + hrly[48][0] = (60 * x * x * x * y - 60 * x * y * y * y) * coeff; + hrly[48][1] = (15 * x * x * x * x - 90 * x * x * y * y + 15 * y * y * y * y) * coeff; + hrly[48][2] = 0.0; + hrly[48][3] = (-60 * x * x * x * y + 60 * x * y * y * y) * coeff; + hrly[48][4] = 0.0; + hrly[48][5] = 0.0; + + if (Lmax == 6) + { + return; + } + + /*************************** + L > 6 + ***************************/ + ModuleBase::WARNING_QUIT ("hes_rl_sph_harm", "l>6 not implemented!"); + + return; } -void Ylm::set_coefficients (){}; +void Ylm::set_coefficients () {}; /* void Ylm::test1 (void) { - ModuleBase::Vector3 R (20.0, 0.0, 0.0); - double xdr = R.x/R.norm(); - double ydr = R.y/R.norm(); - double zdr = R.z/R.norm(); - const int L = 9; - const double rl = std::pow( R.norm(), L); - std::cout << " rl=" << rl << std::endl; - Ylm::set_coefficients(); - - int nu = 100; - - // Peize Lin change rlya 2016-08-26 - std::vector rlya; - double rlyb[400]; - ZEROS( rlyb, 400); + ModuleBase::Vector3 R (20.0, 0.0, 0.0); + double xdr = R.x/R.norm(); + double ydr = R.y/R.norm(); + double zdr = R.z/R.norm(); + const int L = 9; + const double rl = std::pow( R.norm(), L); + std::cout << " rl=" << rl << std::endl; + Ylm::set_coefficients(); + + int nu = 100; + + // Peize Lin change rlya 2016-08-26 + std::vector rlya; + double rlyb[400]; + ZEROS( rlyb, 400); // Ylm::sph_harm (9, xdr, ydr, zdr, rlya); - Ylm::rl_sph_harm (L, xdr, ydr, zdr, rlya); + Ylm::rl_sph_harm (L, xdr, ydr, zdr, rlya); // Ylm::rlylm (10, R.x, R.y, R.z, rlyb); - Ylm::get_ylm_real (L+1, R, rlyb); - - for (int i=0; i < nu; i++) - { - // std::cout << "\ni= " << i << " rlya = " << rlya[i] << " rlyb = " << rlyb[i] << std::endl; - double diff = fabs(rlya[i]-rlyb[i]); - if (diff > 1e-8) - { - std::cout << "Ylm::test1, error is too large!" << std::endl; - //WARNING_QUIT ("Ylm::test1","error is too large!"); - exit(0); - } - } - return; + Ylm::get_ylm_real (L+1, R, rlyb); + + for (int i=0; i < nu; i++) + { + // std::cout << "\ni= " << i << " rlya = " << rlya[i] << " rlyb = " << rlyb[i] << std::endl; + double diff = fabs(rlya[i]-rlyb[i]); + if (diff > 1e-8) + { + std::cout << "Ylm::test1, error is too large!" << std::endl; + //WARNING_QUIT ("Ylm::test1","error is too large!"); + exit(0); + } + } + return; } */ /* void Ylm::test2 (void) { - ModuleBase::Vector3 R (0.1,-0.2,0.5); - Ylm::set_coefficients(); - - //int nu = 100; - - std::vector rlya; - double rlyb[400]; - - std::vector> grlya; - double grlyb[400][3]; - - Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya); - Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb); - - for (int i = 0; i < 100; i++) - { - double diffx = fabs(grlya[i][2]-grlyb[i][2]); - if (diffx > 1e-8) - { - std::cout << "Ylm::test2, Large error in Direv X!" << std::endl; - //WARNING_QUIT ("Ylm::test2","Large error in Direv X!"); - exit(0); - } - } - return; + ModuleBase::Vector3 R (0.1,-0.2,0.5); + Ylm::set_coefficients(); + + //int nu = 100; + + std::vector rlya; + double rlyb[400]; + + std::vector> grlya; + double grlyb[400][3]; + + Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya); + Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb); + + for (int i = 0; i < 100; i++) + { + double diffx = fabs(grlya[i][2]-grlyb[i][2]); + if (diffx > 1e-8) + { + std::cout << "Ylm::test2, Large error in Direv X!" << std::endl; + //WARNING_QUIT ("Ylm::test2","Large error in Direv X!"); + exit(0); + } + } + return; } */ -void Ylm::rlylm -( - const int Lmax, //max momentum of l + 1 - const double x, - const double y, - const double z, - double rly[], - double grly[][3] -) +void + Ylm::rlylm (const int Lmax, // max momentum of l + 1 + const double x, + const double y, + const double z, + double rly[], + double grly[][3]) { - int MaxL = Lmax - 1; - - assert(MaxL >= 0); - - //get xy_dependence - assert(MaxL <= 19); - - double Am[20]; - double Bm[20]; - double Gx_Am[20]; - double Gx_Bm[20]; - double Gy_Am[20]; - double Gy_Bm[20]; - - ZEROS(Am, 20); - ZEROS(Bm, 20); - ZEROS(Gx_Am, 20); - ZEROS(Gy_Am, 20); - - double x2, x3, x4, x5; - double y2, y3, y4, y5; - - x2 = x * x; - x3 = x2 * x; - x4 = x3 * x; - x5 = x4 * x; - - y2 = y * y; - y3 = y2 * y; - y4 = y3 * y; - y5 = y4 * y; - - //x-y dependence - //Am - //Bm - for(int im = 0; im < MaxL+1; im++) - { - if(im == 0) - { - Am[0] = 1.0; - Bm[0] = 0.0; - - Gx_Am[0] = 0.0; - Gy_Am[0] = 0.0; - - Gx_Bm[0] = 0.0; - Gy_Bm[0] = 0.0; - } - else if(im == 1) - { - Am[1] = x; - Bm[1] = y; - - Gx_Am[1] = 1.0; - Gy_Am[1] = 0.0; - - Gx_Bm[1] = 0.0; - Gy_Bm[1] = 1.0; - } - else if(im == 2) - { - Am[2] = x2- y2; - Bm[2] = 2.0 * x * y; - - Gx_Am[2] = 2.0 * x; - Gy_Am[2] = -2.0 * y; - - Gx_Bm[2] = 2.0 * y; - Gy_Bm[2] = 2.0 * x; - } - else if(im == 3) - { - Am[3] = x3 - 3.0 * x * y2; - Bm[3] = 3.0 * x2 * y - y3; - - Gx_Am[3] = 3.0 * (x2 - y2); - Gy_Am[3] = -6.0 * x * y; - - Gx_Bm[3] = 6.0 * x * y; - Gy_Bm[3] = 3.0 * (x2 - y2); - } - else if(im == 4) - { - Am[4] = x4 - 6.0 * x2 * y2 + y4; - Bm[4] = 4.0 * (x3 * y - x * y3); - - Gx_Am[4] = 4.0 * x3 - 12.0 * x * y2; - Gy_Am[4] = -12.0 * x2 * y + 4.0 * y3; - - Gx_Bm[4] = 12.0 * x2 * y - 4.0 * y3; - Gy_Bm[4] = 4.0 * x3 - 12.0 * x * y2; - } - else if(im == 5) - { - Am[5] = x5 - 10.0 * x3 * y2 + 5.0 * x * y4; - Bm[5] = 5.0 * x4 * y - 10.0 * x2 * y3 + y5; - - Gx_Am[5] = 5.0 * x4 - 30.0 * x2 * y2 + 5.0 * y4; - Gy_Am[5] = 20.0 * (x * y3 - x3 * y); - - Gx_Bm[5] = 20.0 * (x3 * y - x * y3); - Gy_Bm[5] = 5.0 * x4 - 30.0 * x2 * y2 + 5.0 * y4; - } - else - { - for(int ip = 0; ip <= im; ip++) - { - double aux = Fact(im) / Fact(ip) / Fact(im - ip); - Am[im] += aux * pow(x, ip) * pow(y, im-ip) * cos( (im-ip) * ModuleBase::PI / 2.0 ); - Bm[im] += aux * pow(x, ip) * pow(y, im-ip) * sin( (im-ip) * ModuleBase::PI / 2.0 ); - - if(ip > 0) - { - Gx_Am[im] += aux * ip * pow(x, ip-1) * pow(y, im-ip) * cos( (im-ip) * ModuleBase::PI / 2.0 ); - Gx_Bm[im] += aux * ip * pow(x, ip-1) * pow(y, im-ip) * sin( (im-ip) * ModuleBase::PI / 2.0 ); - } - - if(ip < im) - { - Gy_Am[im] += aux * pow(x, ip) * (im - ip) * pow(y, im-ip-1) * cos( (im-ip) * ModuleBase::PI / 2.0 ); - Gy_Bm[im] += aux * pow(x, ip) * (im - ip) * pow(y, im-ip-1) * sin( (im-ip) * ModuleBase::PI / 2.0 ); - } - } - } - } - - //z dependence - double zdep[20][20]; - double Gx_dep[20][20]; - double Gy_dep[20][20]; - double Gz_dep[20][20]; - - for(int il = 0; il < 20; il++) - { - ZEROS(zdep[il], 20); - ZEROS(Gx_dep[il], 20); - ZEROS(Gy_dep[il], 20); - ZEROS(Gz_dep[il], 20); - } - - double z2 = z * z; - double z3 = z2 * z; - double z4 = z3 * z; - //double z5 = z4 * z; - - double r = sqrt(x*x + y*y + z*z); - double r2 = r * r; - double r3 = r2 * r; - double r4 = r3 * r; - - for(int il = 0; il < MaxL+1; il++) - { - if(il == 0) - { - zdep[0][0] = 1.0; - } - else if(il == 1) - { - zdep[1][0] = z; - zdep[1][1] = 1.0; - - Gz_dep[1][0] = 1.0; - } - else if(il == 2) - { - zdep[2][0] = 0.5 * (3.0 * z2 - r2); - Gx_dep[2][0] = -x; - Gy_dep[2][0] = -y; - Gz_dep[2][0] = 2.0 * z; - - zdep[2][1] = sqrt(3.0) * z; - Gz_dep[2][1] = sqrt(3.0); - - zdep[2][2] = sqrt(3.0) * 0.5; - } - else if(il == 3) - { - zdep[3][0] = 2.5 * z3 - 1.5 * z * r2; - Gx_dep[3][0] = -3.0 * x * z; - Gy_dep[3][0] = -3.0 * y * z; - Gz_dep[3][0] = 1.5 * (3.0 * z2 - r2); - - zdep[3][1] = 0.25 * sqrt(6.0) * (5.0 * z2 - r2); - Gx_dep[3][1] = -0.5 * sqrt(6.0) * x; - Gy_dep[3][1] = -0.5 * sqrt(6.0) * y; - Gz_dep[3][1] = sqrt(6.0) * 2.0 * z; - - zdep[3][2] = 0.5 * sqrt(15.0) * z; - Gz_dep[3][2] = 0.5 * sqrt(15.0); - - zdep[3][3] = 0.25 * sqrt(10.0); - } - else if(il == 4) - { - zdep[4][0] = 0.125 * (35.0 * z4 - 30.0 * r2 * z2 + 3.0 * r4); - Gx_dep[4][0] = -7.5 * x * z2 + 1.5 * x * r2; - Gy_dep[4][0] = -7.5 * y * z2 + 1.5 * y * r2; - Gz_dep[4][0] = 10.0 * z3 - 6.0 * r2 * z; - - zdep[4][1] = sqrt(10.0) * 0.25 * z * (7.0 * z2 - 3.0 * r2); - Gx_dep[4][1] = -1.5 * sqrt(10.0) * x * z; - Gy_dep[4][1] = -1.5 * sqrt(10.0) * y * z; - Gz_dep[4][1] = 0.75 * sqrt(10.0) * (5.0 * z2 - r2); - - zdep[4][2] = sqrt(5.0) * 0.25 * (7.0 * z2 - r2); - Gx_dep[4][2] = -0.5 * sqrt(5.0) * x; - Gy_dep[4][2] = -0.5 * sqrt(5.0) * y; - Gz_dep[4][2] = 3.0 * sqrt(5.0) * z; - - zdep[4][3] = sqrt(70.0) * 0.25 * z; - Gz_dep[4][3] = 0.25 * sqrt(70.0); - - zdep[4][4] = sqrt(35.0) * 0.125; - } - else if(il == 5) - { - zdep[5][0] = 0.125 * z *( 63.0 * z4 - 70.0 * z2 * r2 + 15.0 * r4); - Gx_dep[5][0] = -17.5 * x * z3 + 7.5 * x * z * r2; - Gy_dep[5][0] = -17.5 * y * z3 + 7.5 * y * z * r2; - Gz_dep[5][0] = 175.0 * 0.125 * z4 + 15.0 * 0.125 * r4 - 150.0 * 0.125 * r2 * z2; - - zdep[5][1] = 0.125 * sqrt(15.0) * (21.0 * z4 - 14.0 * z2 * r2 + r4); - Gx_dep[5][1] = -3.5 * sqrt(15.0) * x * z2 + 0.5 * sqrt(15.0) * x * r2; - Gy_dep[5][1] = -3.5 * sqrt(15.0) * y * z2 + 0.5 * sqrt(15.0) * y * r2; - Gz_dep[5][1] = 7.0 * sqrt(15.0) * z3 - 3.0 * sqrt(15.0) * r2 * z; - - zdep[5][2] = 0.25 * sqrt(105.0) * z * (3.0 * z2 - r2); - Gx_dep[5][2] = -0.5 * sqrt(105.0) * x * z; - Gy_dep[5][2] = -0.5 * sqrt(105.0) * y * z; - Gz_dep[5][2] = 0.25 * sqrt(105.0) * (7.0 * z2 - r2); - - zdep[5][3] = 0.0625 * sqrt(70.0) * (9.0 * z2 - r2); - Gx_dep[5][3] = -0.125 * sqrt(70.0) * x; - Gy_dep[5][3] = -0.125 * sqrt(70.0) * y; - Gz_dep[5][3] = sqrt(70.0) * z; - - zdep[5][4] = 0.375 * sqrt(35.0) * z; - Gz_dep[5][4] = 0.375 * sqrt(35.0); - - zdep[5][5] = 0.1875 * sqrt(14.0); - } - else - { - for(int im = 0; im <= il; im++) - { - int kmax = static_cast( (il - im) / 2 ); - for(int ik = 0; ik <= kmax; ik++) - { - int twok = 2 * ik; - - double gamma = 0.0; - double aux0, aux1, aux2, aux3; - - aux0 = pow(-1.0, ik) * pow(2.0, -il); - aux1 = Fact(il) / Fact(ik) / Fact(il-ik); - aux2 = Fact(2*il - twok) / Fact(il) / Fact(il - twok); - aux3 = Fact(il - twok) / Fact(il - twok - im); - - gamma = aux0 * aux1 * aux2 * aux3; - - assert(il - twok - im >= 0); - zdep[il][im] += pow(r, twok) * pow(z, il-twok-im) * gamma; - - if(ik > 0) - { - Gx_dep[il][im] += (ik * pow(r2, ik-1) * 2.0 * x) * pow(z, il-twok-im) * gamma; - Gy_dep[il][im] += (ik * pow(r2, ik-1) * 2.0 * y) * pow(z, il-twok-im) * gamma; - } - - if(ik == 0) - { - if(il > im) - { - Gz_dep[il][im] += (il-im) * pow(z, il-im-1) * gamma; - } - } - else - { - if(il - twok - im == 0) - { - Gz_dep[il][im] += gamma * ik * pow(r2, ik-1) * 2.0 * z; - } - else - { - Gz_dep[il][im] += gamma * (ik * pow(r2, ik-1) * 2.0 * z * pow(z, il-twok-im) - + pow(r, twok) * (il-twok-im) * pow(z, il-twok-im-1)); - } - } - } - - if(im >= 1) - { - zdep[il][im] *= sqrt(2 * Fact(il - im) / Fact(il + im)); - Gx_dep[il][im] *= sqrt(2 * Fact(il - im) / Fact(il + im)); - Gy_dep[il][im] *= sqrt(2 * Fact(il - im) / Fact(il + im)); - Gz_dep[il][im] *= sqrt(2 * Fact(il - im) / Fact(il + im)); - - } - } - } - } - - //calc - int ic = 0; - for(int il = 0; il <= MaxL; il++) - { - double fac = sqrt( (2.0 * il + 1.0) / ModuleBase::FOUR_PI ); - - //m=0 - rly[ic] = Am[0] * zdep[il][0] * fac; - grly[ic][0] = (Gx_dep[il][0] * Am[0] + zdep[il][0] * Gx_Am[0]) * fac; - grly[ic][1] = (Gy_dep[il][0] * Am[0] + zdep[il][0] * Gy_Am[0]) * fac; - grly[ic][2] = Gz_dep[il][0] * Am[0] * fac; - - ic++; - - //m ! = 0 - for(int im = 1; im <= il; im++) - { - //m>0 - rly[ic] = Am[im] * zdep[il][im] * pow(-1.0, im) * fac; - grly[ic][0] = (Gx_dep[il][im] * Am[im] + zdep[il][im] * Gx_Am[im]) * pow(-1.0, im) * fac; - grly[ic][1] = (Gy_dep[il][im] * Am[im] + zdep[il][im] * Gy_Am[im]) * pow(-1.0, im) * fac; - grly[ic][2] = Gz_dep[il][im] * Am[im] * pow(-1.0, im) * fac; - - ic++; - - //m<0 - rly[ic] = Bm[im] * zdep[il][im] * pow(-1.0, im) * fac; - grly[ic][0] = (Gx_dep[il][im] * Bm[im] + zdep[il][im] * Gx_Bm[im]) * pow(-1.0, im) * fac; - grly[ic][1] = (Gy_dep[il][im] * Bm[im] + zdep[il][im] * Gy_Bm[im]) * pow(-1.0, im) * fac; - grly[ic][2] = Gz_dep[il][im] * Bm[im] * pow(-1.0, im) * fac; - - ic++; - } - } - - return; + int MaxL = Lmax - 1; + + assert (MaxL >= 0); + + // get xy_dependence + assert (MaxL <= 19); + + double Am[20]; + double Bm[20]; + double Gx_Am[20]; + double Gx_Bm[20]; + double Gy_Am[20]; + double Gy_Bm[20]; + + ZEROS (Am, 20); + ZEROS (Bm, 20); + ZEROS (Gx_Am, 20); + ZEROS (Gy_Am, 20); + + double x2, x3, x4, x5; + double y2, y3, y4, y5; + + x2 = x * x; + x3 = x2 * x; + x4 = x3 * x; + x5 = x4 * x; + + y2 = y * y; + y3 = y2 * y; + y4 = y3 * y; + y5 = y4 * y; + + // x-y dependence + // Am + // Bm + for (int im = 0; im < MaxL + 1; im++) + { + if (im == 0) + { + Am[0] = 1.0; + Bm[0] = 0.0; + + Gx_Am[0] = 0.0; + Gy_Am[0] = 0.0; + + Gx_Bm[0] = 0.0; + Gy_Bm[0] = 0.0; + } + else if (im == 1) + { + Am[1] = x; + Bm[1] = y; + + Gx_Am[1] = 1.0; + Gy_Am[1] = 0.0; + + Gx_Bm[1] = 0.0; + Gy_Bm[1] = 1.0; + } + else if (im == 2) + { + Am[2] = x2 - y2; + Bm[2] = 2.0 * x * y; + + Gx_Am[2] = 2.0 * x; + Gy_Am[2] = -2.0 * y; + + Gx_Bm[2] = 2.0 * y; + Gy_Bm[2] = 2.0 * x; + } + else if (im == 3) + { + Am[3] = x3 - 3.0 * x * y2; + Bm[3] = 3.0 * x2 * y - y3; + + Gx_Am[3] = 3.0 * (x2 - y2); + Gy_Am[3] = -6.0 * x * y; + + Gx_Bm[3] = 6.0 * x * y; + Gy_Bm[3] = 3.0 * (x2 - y2); + } + else if (im == 4) + { + Am[4] = x4 - 6.0 * x2 * y2 + y4; + Bm[4] = 4.0 * (x3 * y - x * y3); + + Gx_Am[4] = 4.0 * x3 - 12.0 * x * y2; + Gy_Am[4] = -12.0 * x2 * y + 4.0 * y3; + + Gx_Bm[4] = 12.0 * x2 * y - 4.0 * y3; + Gy_Bm[4] = 4.0 * x3 - 12.0 * x * y2; + } + else if (im == 5) + { + Am[5] = x5 - 10.0 * x3 * y2 + 5.0 * x * y4; + Bm[5] = 5.0 * x4 * y - 10.0 * x2 * y3 + y5; + + Gx_Am[5] = 5.0 * x4 - 30.0 * x2 * y2 + 5.0 * y4; + Gy_Am[5] = 20.0 * (x * y3 - x3 * y); + + Gx_Bm[5] = 20.0 * (x3 * y - x * y3); + Gy_Bm[5] = 5.0 * x4 - 30.0 * x2 * y2 + 5.0 * y4; + } + else + { + for (int ip = 0; ip <= im; ip++) + { + double aux = Fact (im) / Fact (ip) / Fact (im - ip); + Am[im] += aux * pow (x, ip) * pow (y, im - ip) * cos ((im - ip) * ModuleBase::PI / 2.0); + Bm[im] += aux * pow (x, ip) * pow (y, im - ip) * sin ((im - ip) * ModuleBase::PI / 2.0); + + if (ip > 0) + { + Gx_Am[im] += aux * ip * pow (x, ip - 1) * pow (y, im - ip) + * cos ((im - ip) * ModuleBase::PI / 2.0); + Gx_Bm[im] += aux * ip * pow (x, ip - 1) * pow (y, im - ip) + * sin ((im - ip) * ModuleBase::PI / 2.0); + } + + if (ip < im) + { + Gy_Am[im] += aux * pow (x, ip) * (im - ip) * pow (y, im - ip - 1) + * cos ((im - ip) * ModuleBase::PI / 2.0); + Gy_Bm[im] += aux * pow (x, ip) * (im - ip) * pow (y, im - ip - 1) + * sin ((im - ip) * ModuleBase::PI / 2.0); + } + } + } + } + + // z dependence + double zdep[20][20]; + double Gx_dep[20][20]; + double Gy_dep[20][20]; + double Gz_dep[20][20]; + + for (int il = 0; il < 20; il++) + { + ZEROS (zdep[il], 20); + ZEROS (Gx_dep[il], 20); + ZEROS (Gy_dep[il], 20); + ZEROS (Gz_dep[il], 20); + } + + double z2 = z * z; + double z3 = z2 * z; + double z4 = z3 * z; + // double z5 = z4 * z; + + double r = sqrt (x * x + y * y + z * z); + double r2 = r * r; + double r3 = r2 * r; + double r4 = r3 * r; + + for (int il = 0; il < MaxL + 1; il++) + { + if (il == 0) + { + zdep[0][0] = 1.0; + } + else if (il == 1) + { + zdep[1][0] = z; + zdep[1][1] = 1.0; + + Gz_dep[1][0] = 1.0; + } + else if (il == 2) + { + zdep[2][0] = 0.5 * (3.0 * z2 - r2); + Gx_dep[2][0] = -x; + Gy_dep[2][0] = -y; + Gz_dep[2][0] = 2.0 * z; + + zdep[2][1] = sqrt (3.0) * z; + Gz_dep[2][1] = sqrt (3.0); + + zdep[2][2] = sqrt (3.0) * 0.5; + } + else if (il == 3) + { + zdep[3][0] = 2.5 * z3 - 1.5 * z * r2; + Gx_dep[3][0] = -3.0 * x * z; + Gy_dep[3][0] = -3.0 * y * z; + Gz_dep[3][0] = 1.5 * (3.0 * z2 - r2); + + zdep[3][1] = 0.25 * sqrt (6.0) * (5.0 * z2 - r2); + Gx_dep[3][1] = -0.5 * sqrt (6.0) * x; + Gy_dep[3][1] = -0.5 * sqrt (6.0) * y; + Gz_dep[3][1] = sqrt (6.0) * 2.0 * z; + + zdep[3][2] = 0.5 * sqrt (15.0) * z; + Gz_dep[3][2] = 0.5 * sqrt (15.0); + + zdep[3][3] = 0.25 * sqrt (10.0); + } + else if (il == 4) + { + zdep[4][0] = 0.125 * (35.0 * z4 - 30.0 * r2 * z2 + 3.0 * r4); + Gx_dep[4][0] = -7.5 * x * z2 + 1.5 * x * r2; + Gy_dep[4][0] = -7.5 * y * z2 + 1.5 * y * r2; + Gz_dep[4][0] = 10.0 * z3 - 6.0 * r2 * z; + + zdep[4][1] = sqrt (10.0) * 0.25 * z * (7.0 * z2 - 3.0 * r2); + Gx_dep[4][1] = -1.5 * sqrt (10.0) * x * z; + Gy_dep[4][1] = -1.5 * sqrt (10.0) * y * z; + Gz_dep[4][1] = 0.75 * sqrt (10.0) * (5.0 * z2 - r2); + + zdep[4][2] = sqrt (5.0) * 0.25 * (7.0 * z2 - r2); + Gx_dep[4][2] = -0.5 * sqrt (5.0) * x; + Gy_dep[4][2] = -0.5 * sqrt (5.0) * y; + Gz_dep[4][2] = 3.0 * sqrt (5.0) * z; + + zdep[4][3] = sqrt (70.0) * 0.25 * z; + Gz_dep[4][3] = 0.25 * sqrt (70.0); + + zdep[4][4] = sqrt (35.0) * 0.125; + } + else if (il == 5) + { + zdep[5][0] = 0.125 * z * (63.0 * z4 - 70.0 * z2 * r2 + 15.0 * r4); + Gx_dep[5][0] = -17.5 * x * z3 + 7.5 * x * z * r2; + Gy_dep[5][0] = -17.5 * y * z3 + 7.5 * y * z * r2; + Gz_dep[5][0] = 175.0 * 0.125 * z4 + 15.0 * 0.125 * r4 - 150.0 * 0.125 * r2 * z2; + + zdep[5][1] = 0.125 * sqrt (15.0) * (21.0 * z4 - 14.0 * z2 * r2 + r4); + Gx_dep[5][1] = -3.5 * sqrt (15.0) * x * z2 + 0.5 * sqrt (15.0) * x * r2; + Gy_dep[5][1] = -3.5 * sqrt (15.0) * y * z2 + 0.5 * sqrt (15.0) * y * r2; + Gz_dep[5][1] = 7.0 * sqrt (15.0) * z3 - 3.0 * sqrt (15.0) * r2 * z; + + zdep[5][2] = 0.25 * sqrt (105.0) * z * (3.0 * z2 - r2); + Gx_dep[5][2] = -0.5 * sqrt (105.0) * x * z; + Gy_dep[5][2] = -0.5 * sqrt (105.0) * y * z; + Gz_dep[5][2] = 0.25 * sqrt (105.0) * (7.0 * z2 - r2); + + zdep[5][3] = 0.0625 * sqrt (70.0) * (9.0 * z2 - r2); + Gx_dep[5][3] = -0.125 * sqrt (70.0) * x; + Gy_dep[5][3] = -0.125 * sqrt (70.0) * y; + Gz_dep[5][3] = sqrt (70.0) * z; + + zdep[5][4] = 0.375 * sqrt (35.0) * z; + Gz_dep[5][4] = 0.375 * sqrt (35.0); + + zdep[5][5] = 0.1875 * sqrt (14.0); + } + else + { + for (int im = 0; im <= il; im++) + { + int kmax = static_cast ((il - im) / 2); + for (int ik = 0; ik <= kmax; ik++) + { + int twok = 2 * ik; + + double gamma = 0.0; + double aux0, aux1, aux2, aux3; + + aux0 = pow (-1.0, ik) * pow (2.0, -il); + aux1 = Fact (il) / Fact (ik) / Fact (il - ik); + aux2 = Fact (2 * il - twok) / Fact (il) / Fact (il - twok); + aux3 = Fact (il - twok) / Fact (il - twok - im); + + gamma = aux0 * aux1 * aux2 * aux3; + + assert (il - twok - im >= 0); + zdep[il][im] += pow (r, twok) * pow (z, il - twok - im) * gamma; + + if (ik > 0) + { + Gx_dep[il][im] + += (ik * pow (r2, ik - 1) * 2.0 * x) * pow (z, il - twok - im) * gamma; + Gy_dep[il][im] + += (ik * pow (r2, ik - 1) * 2.0 * y) * pow (z, il - twok - im) * gamma; + } + + if (ik == 0) + { + if (il > im) + { + Gz_dep[il][im] += (il - im) * pow (z, il - im - 1) * gamma; + } + } + else + { + if (il - twok - im == 0) + { + Gz_dep[il][im] += gamma * ik * pow (r2, ik - 1) * 2.0 * z; + } + else + { + Gz_dep[il][im] + += gamma + * (ik * pow (r2, ik - 1) * 2.0 * z * pow (z, il - twok - im) + + pow (r, twok) * (il - twok - im) + * pow (z, il - twok - im - 1)); + } + } + } + + if (im >= 1) + { + zdep[il][im] *= sqrt (2 * Fact (il - im) / Fact (il + im)); + Gx_dep[il][im] *= sqrt (2 * Fact (il - im) / Fact (il + im)); + Gy_dep[il][im] *= sqrt (2 * Fact (il - im) / Fact (il + im)); + Gz_dep[il][im] *= sqrt (2 * Fact (il - im) / Fact (il + im)); + } + } + } + } + + // calc + int ic = 0; + for (int il = 0; il <= MaxL; il++) + { + double fac = sqrt ((2.0 * il + 1.0) / ModuleBase::FOUR_PI); + + // m=0 + rly[ic] = Am[0] * zdep[il][0] * fac; + grly[ic][0] = (Gx_dep[il][0] * Am[0] + zdep[il][0] * Gx_Am[0]) * fac; + grly[ic][1] = (Gy_dep[il][0] * Am[0] + zdep[il][0] * Gy_Am[0]) * fac; + grly[ic][2] = Gz_dep[il][0] * Am[0] * fac; + + ic++; + + // m ! = 0 + for (int im = 1; im <= il; im++) + { + // m>0 + rly[ic] = Am[im] * zdep[il][im] * pow (-1.0, im) * fac; + grly[ic][0] = (Gx_dep[il][im] * Am[im] + zdep[il][im] * Gx_Am[im]) * pow (-1.0, im) * fac; + grly[ic][1] = (Gy_dep[il][im] * Am[im] + zdep[il][im] * Gy_Am[im]) * pow (-1.0, im) * fac; + grly[ic][2] = Gz_dep[il][im] * Am[im] * pow (-1.0, im) * fac; + + ic++; + + // m<0 + rly[ic] = Bm[im] * zdep[il][im] * pow (-1.0, im) * fac; + grly[ic][0] = (Gx_dep[il][im] * Bm[im] + zdep[il][im] * Gx_Bm[im]) * pow (-1.0, im) * fac; + grly[ic][1] = (Gy_dep[il][im] * Bm[im] + zdep[il][im] * Gy_Bm[im]) * pow (-1.0, im) * fac; + grly[ic][2] = Gz_dep[il][im] * Bm[im] * pow (-1.0, im) * fac; + + ic++; + } + } + + return; } /* void Ylm::test(void) { - ModuleBase::Vector3 R(0.0, 0.0, 1.0); - - double r,r2,r3,r4,r5,r6,r7; - r = R.norm(); - r2 = r * r; - r3 = r2 * r; - r4 = r3 * r; - r5 = r4 * r; - r6 = r5 * r; - r7 = r6 * r; - - //Max L = 7; - double ylm[64]; - double dylmdr[64][3]; - - double rly[64]; - double grly[64][3]; + ModuleBase::Vector3 R(0.0, 0.0, 1.0); + + double r,r2,r3,r4,r5,r6,r7; + r = R.norm(); + r2 = r * r; + r3 = r2 * r; + r4 = r3 * r; + r5 = r4 * r; + r6 = r5 * r; + r7 = r6 * r; + + //Max L = 7; + double ylm[64]; + double dylmdr[64][3]; + + double rly[64]; + double grly[64][3]; // std::cout << R.x << " " << R.y << " " << R.z << std::endl; - get_ylm_real(8, R, ylm, dylmdr); - rlylm(8, R.x, R.y, R.z, rly, grly); + get_ylm_real(8, R, ylm, dylmdr); + rlylm(8, R.x, R.y, R.z, rly, grly); // std::cout << R.x << " " << R.y << " " << R.z << std::endl; - for(int i = 0; i < 64; i++) - { - if(i >= 1 && i <= 3) - { - dylmdr[i][0] = dylmdr[i][0] * r + ylm[i] * R.x / r; - dylmdr[i][1] = dylmdr[i][1] * r + ylm[i] * R.y / r; - dylmdr[i][2] = dylmdr[i][2] * r + ylm[i] * R.z / r; - - ylm[i] *= r; - } - if(i >= 4 && i <= 8) - { - dylmdr[i][0] = dylmdr[i][0] * r2 + ylm[i] * R.x * 2.0; - dylmdr[i][1] = dylmdr[i][1] * r2 + ylm[i] * R.y * 2.0; - dylmdr[i][2] = dylmdr[i][2] * r2 + ylm[i] * R.z * 2.0; - - ylm[i] *= r2; - } - if(i >= 9 && i <= 15) - { - dylmdr[i][0] = dylmdr[i][0] * r3 + ylm[i] * R.x * 3.0 * r; - dylmdr[i][1] = dylmdr[i][1] * r3 + ylm[i] * R.y * 3.0 * r; - dylmdr[i][2] = dylmdr[i][2] * r3 + ylm[i] * R.z * 3.0 * r; - - ylm[i] *= pow(R.norm(),3); - } - if(i >= 16 && i <=24) - { - dylmdr[i][0] = dylmdr[i][0] * r4 + ylm[i] * R.x * 4.0 * r2; - dylmdr[i][1] = dylmdr[i][1] * r4 + ylm[i] * R.y * 4.0 * r2; - dylmdr[i][2] = dylmdr[i][2] * r4 + ylm[i] * R.z * 4.0 * r2; - - ylm[i] *= pow(R.norm(), 4); - } - if(i >= 25 && i <= 35) - { - dylmdr[i][0] = dylmdr[i][0] * r5 + ylm[i] * R.x * 5.0 * r3; - dylmdr[i][1] = dylmdr[i][1] * r5 + ylm[i] * R.y * 5.0 * r3; - dylmdr[i][2] = dylmdr[i][2] * r5 + ylm[i] * R.z * 5.0 * r3; - - ylm[i] *= pow(R.norm(), 5); - } - if(i >= 36 && i <= 48) - { - dylmdr[i][0] = dylmdr[i][0] * r6 + ylm[i] * R.x * 6.0 * r4; - dylmdr[i][1] = dylmdr[i][1] * r6 + ylm[i] * R.y * 6.0 * r4; - dylmdr[i][2] = dylmdr[i][2] * r6 + ylm[i] * R.z * 6.0 * r4; - ylm[i] *= pow(R.norm(), 6); - } - if(i >= 49 && i <= 63) - { - dylmdr[i][0] = dylmdr[i][0] * r7 + ylm[i] * R.x * 7.0 * r5; - dylmdr[i][1] = dylmdr[i][1] * r7 + ylm[i] * R.y * 7.0 * r5; - dylmdr[i][2] = dylmdr[i][2] * r7 + ylm[i] * R.z * 7.0 * r5; - ylm[i] *= pow(R.norm(), 7); - } - - std::cout << grly[i][0] << std::setw(20) << grly[i][1] << std::setw(20) << grly[i][2] << std::endl; - } - - return; + for(int i = 0; i < 64; i++) + { + if(i >= 1 && i <= 3) + { + dylmdr[i][0] = dylmdr[i][0] * r + ylm[i] * R.x / r; + dylmdr[i][1] = dylmdr[i][1] * r + ylm[i] * R.y / r; + dylmdr[i][2] = dylmdr[i][2] * r + ylm[i] * R.z / r; + + ylm[i] *= r; + } + if(i >= 4 && i <= 8) + { + dylmdr[i][0] = dylmdr[i][0] * r2 + ylm[i] * R.x * 2.0; + dylmdr[i][1] = dylmdr[i][1] * r2 + ylm[i] * R.y * 2.0; + dylmdr[i][2] = dylmdr[i][2] * r2 + ylm[i] * R.z * 2.0; + + ylm[i] *= r2; + } + if(i >= 9 && i <= 15) + { + dylmdr[i][0] = dylmdr[i][0] * r3 + ylm[i] * R.x * 3.0 * r; + dylmdr[i][1] = dylmdr[i][1] * r3 + ylm[i] * R.y * 3.0 * r; + dylmdr[i][2] = dylmdr[i][2] * r3 + ylm[i] * R.z * 3.0 * r; + + ylm[i] *= pow(R.norm(),3); + } + if(i >= 16 && i <=24) + { + dylmdr[i][0] = dylmdr[i][0] * r4 + ylm[i] * R.x * 4.0 * r2; + dylmdr[i][1] = dylmdr[i][1] * r4 + ylm[i] * R.y * 4.0 * r2; + dylmdr[i][2] = dylmdr[i][2] * r4 + ylm[i] * R.z * 4.0 * r2; + + ylm[i] *= pow(R.norm(), 4); + } + if(i >= 25 && i <= 35) + { + dylmdr[i][0] = dylmdr[i][0] * r5 + ylm[i] * R.x * 5.0 * r3; + dylmdr[i][1] = dylmdr[i][1] * r5 + ylm[i] * R.y * 5.0 * r3; + dylmdr[i][2] = dylmdr[i][2] * r5 + ylm[i] * R.z * 5.0 * r3; + + ylm[i] *= pow(R.norm(), 5); + } + if(i >= 36 && i <= 48) + { + dylmdr[i][0] = dylmdr[i][0] * r6 + ylm[i] * R.x * 6.0 * r4; + dylmdr[i][1] = dylmdr[i][1] * r6 + ylm[i] * R.y * 6.0 * r4; + dylmdr[i][2] = dylmdr[i][2] * r6 + ylm[i] * R.z * 6.0 * r4; + ylm[i] *= pow(R.norm(), 6); + } + if(i >= 49 && i <= 63) + { + dylmdr[i][0] = dylmdr[i][0] * r7 + ylm[i] * R.x * 7.0 * r5; + dylmdr[i][1] = dylmdr[i][1] * r7 + ylm[i] * R.y * 7.0 * r5; + dylmdr[i][2] = dylmdr[i][2] * r7 + ylm[i] * R.z * 7.0 * r5; + ylm[i] *= pow(R.norm(), 7); + } + + std::cout << grly[i][0] << std::setw(20) << grly[i][1] << std::setw(20) << grly[i][2] << std::endl; + } + + return; } */ -void Ylm::ZEROS(double u[], const int& n) +void + Ylm::ZEROS (double u[], const int& n) { - for(int i = 0; i < n; i++) - { - u[i] = 0.0; - } - return; + for (int i = 0; i < n; i++) + { + u[i] = 0.0; + } + return; } - //========================================================== // MEMBER FUNCTION : // NAME : Fact ( n! ) // NAME : Semi_Fact ( n!! ) //========================================================== -long double Ylm::Fact(const int n) +long double + Ylm::Fact (const int n) { - long double f = 1; - for(int i=n; i>1; i--) - { - f *= i; - } - return f; + long double f = 1; + for (int i = n; i > 1; i--) + { + f *= i; + } + return f; } - -int Ylm::Semi_Fact(const int n) +int + Ylm::Semi_Fact (const int n) { - int semif = 1; - for(int i=n; i>2; i -= 2) - { - semif *= i; - } - return semif; + int semif = 1; + for (int i = n; i > 2; i -= 2) + { + semif *= i; + } + return semif; } - -double Ylm::sgn(const double x) +double + Ylm::sgn (const double x) { - if(x < 0.0) return -1.0; - if(x > 0.0) return 1.0; - return 0.0; + if (x < 0.0) + { + return -1.0; + } + if (x > 0.0) + { + return 1.0; + } + return 0.0; } -} +} // namespace ModuleBase diff --git a/source/source_base/ylm.h b/source/source_base/ylm.h index ff25a56a912..a40ac111855 100644 --- a/source/source_base/ylm.h +++ b/source/source_base/ylm.h @@ -10,159 +10,128 @@ namespace ModuleBase class Ylm { - public: - Ylm(){}; - ~Ylm(){}; - - static int nlm; - - - /** - * @brief Get the ylm real object - * - * @param Lmax [in] maximum angular quantum number + 1 - * @param vec [in] the vector to be calculated - * @param ylmr [out] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... - */ - static void get_ylm_real( - const int &Lmax , - const ModuleBase::Vector3 &vec, - double ylmr[]); - - /** - * @brief Get the ylm real object and the gradient - * - * @param Lmax [in] maximum angular quantum number + l - * @param vec [in] the vector to be calculated - * @param ylmr [out] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... - * @param dylmdr [out] gradient of Ylm, [dY00/dx, dY00/dy, dY00/dz], [dY10/dx, dY10/dy, dY10/dz], [dY11/dx, dY11/dy, dY11/dz],... - */ - static void get_ylm_real( - const int &Lmax , - const ModuleBase::Vector3 &vec, - double ylmr[], - double dylmdr[][3]); - - /** - * @brief Get the ylm real (solid) object (not used anymore) - * - * @param Lmax [in] maximum angular quantum number + l - * @param x [in] x - * @param y [in] y - * @param z [in] z - * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... - */ - static void rlylm( - const int Lmax, - const double x, - const double y, - const double z, - double rly[]); - - /** - * @brief Get the ylm real (solid) object and the gradient (not used anymore) - * - * @param Lmax [in] maximum angular quantum number + 1 - * @param x [in] x - * @param y [in] y - * @param z [in] z - * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... - * @param grly [out] gradient of Ylm, [dY00/dx, dY00/dy, dY00/dz], [dY10/dx, dY10/dy, dY10/dz], [dY11/dx, dY11/dy, dY11/dz],... - */ - static void rlylm( - const int Lmax, - const double x, - const double y, - const double z, - double rly[], - double grly[][3]); - - /** - * @brief Get the ylm real object (used in grid integration) - * - * @param Lmax [in] maximum angular quantum number - * @param xdr [in] x/r - * @param ydr [in] y/r - * @param zdr [in] z/r - * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... - */ - static void sph_harm( - const int Lmax, - const double xdr, - const double ydr, - const double zdr, - std::vector &rly); - - /** - * @brief Get the ylm real object (used in getting overlap) - * - * @param Lmax [in] maximum angular quantum number - * @param x [in] x/r - * @param y [in] y/r - * @param z [in] z/r - * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... - * @author Peize Lin - * @date 2016-08-26 - */ - static void rl_sph_harm( - const int Lmax, - const double x, - const double y, - const double z, - std::vector& rly); - - /** - * @brief Get the ylm real object and the gradient (used in getting derivative of overlap) - * - * @param Lmax [in] maximum angular quantum number - * @param x [in] x/r - * @param y [in] y/r - * @param z [in] z/r - * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... - * @param grly [out] gradient of Ylm, stored as a contiguous flat array of - * size (Lmax+1)^2 * 3 in row-major order: - * [dY00/dx, dY00/dy, dY00/dz, dY10/dx, dY10/dy, dY10/dz, ...] - */ - static void grad_rl_sph_harm( - const int Lmax, - const double x, - const double y, - const double z, - double* rly, - double* grly); - - /** - * @brief Get the hessian of r^l Ylm (used in getting derivative of overlap) - * - * @param Lmax [in] maximum angular quantum number - * @param x [in] x - * @param y [in] y - * @param z [in] z - * @param hrly [out] hessian of Ylm, [dY00/dx2, dY00/dxy, dY00/dxz, dY00/dyy, dY00/dyz, dY00/dzz] , ... - */ - static void hes_rl_sph_harm( - const int Lmax, - const double x, - const double y, - const double z, - std::vector>& hrly); - - //calculate the coefficient of Ylm, ylmcoef. - static void set_coefficients (); - - //static void test(); - //static void test1(); - //static void test2(); - - //set the first n elements of u to be 0.0 - static void ZEROS(double u[], const int& n); - - private: - static long double Fact(const int n); - static int Semi_Fact(const int n); - static double sgn(const double x); + public: + Ylm () {}; + ~Ylm () {}; + + static int nlm; + + /** + * @brief Get the ylm real object + * + * @param Lmax [in] maximum angular quantum number + 1 + * @param vec [in] the vector to be calculated + * @param ylmr [out] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... + */ + static void get_ylm_real (const int& Lmax, const ModuleBase::Vector3& vec, double ylmr[]); + + /** + * @brief Get the ylm real object and the gradient + * + * @param Lmax [in] maximum angular quantum number + l + * @param vec [in] the vector to be calculated + * @param ylmr [out] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... + * @param dylmdr [out] gradient of Ylm, [dY00/dx, dY00/dy, dY00/dz], [dY10/dx, dY10/dy, dY10/dz], [dY11/dx, dY11/dy, + * dY11/dz],... + */ + static void + get_ylm_real (const int& Lmax, const ModuleBase::Vector3& vec, double ylmr[], double dylmdr[][3]); + + /** + * @brief Get the ylm real (solid) object (not used anymore) + * + * @param Lmax [in] maximum angular quantum number + l + * @param x [in] x + * @param y [in] y + * @param z [in] z + * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... + */ + static void rlylm (const int Lmax, const double x, const double y, const double z, double rly[]); + + /** + * @brief Get the ylm real (solid) object and the gradient (not used anymore) + * + * @param Lmax [in] maximum angular quantum number + 1 + * @param x [in] x + * @param y [in] y + * @param z [in] z + * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... + * @param grly [out] gradient of Ylm, [dY00/dx, dY00/dy, dY00/dz], [dY10/dx, dY10/dy, dY10/dz], [dY11/dx, dY11/dy, + * dY11/dz],... + */ + static void rlylm (const int Lmax, const double x, const double y, const double z, double rly[], double grly[][3]); + + /** + * @brief Get the ylm real object (used in grid integration) + * + * @param Lmax [in] maximum angular quantum number + * @param xdr [in] x/r + * @param ydr [in] y/r + * @param zdr [in] z/r + * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... + */ + static void + sph_harm (const int Lmax, const double xdr, const double ydr, const double zdr, std::vector& rly); + + /** + * @brief Get the ylm real object (used in getting overlap) + * + * @param Lmax [in] maximum angular quantum number + * @param x [in] x/r + * @param y [in] y/r + * @param z [in] z/r + * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... + * @author Peize Lin + * @date 2016-08-26 + */ + static void rl_sph_harm (const int Lmax, const double x, const double y, const double z, std::vector& rly); + + /** + * @brief Get the ylm real object and the gradient (used in getting derivative of overlap) + * + * @param Lmax [in] maximum angular quantum number + * @param x [in] x/r + * @param y [in] y/r + * @param z [in] z/r + * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2... + * @param grly [out] gradient of Ylm, stored as a contiguous flat array of + * size (Lmax+1)^2 * 3 in row-major order: + * [dY00/dx, dY00/dy, dY00/dz, dY10/dx, dY10/dy, dY10/dz, ...] + */ + static void + grad_rl_sph_harm (const int Lmax, const double x, const double y, const double z, double* rly, double* grly); + + /** + * @brief Get the hessian of r^l Ylm (used in getting derivative of overlap) + * + * @param Lmax [in] maximum angular quantum number + * @param x [in] x + * @param y [in] y + * @param z [in] z + * @param hrly [out] hessian of Ylm, [dY00/dx2, dY00/dxy, dY00/dxz, dY00/dyy, dY00/dyz, dY00/dzz] , ... + */ + static void hes_rl_sph_harm (const int Lmax, + const double x, + const double y, + const double z, + std::vector>& hrly); + + // calculate the coefficient of Ylm, ylmcoef. + static void set_coefficients (); + + // static void test(); + // static void test1(); + // static void test2(); + + // set the first n elements of u to be 0.0 + static void ZEROS (double u[], const int& n); + + private: + static long double Fact (const int n); + static int Semi_Fact (const int n); + static double sgn (const double x); }; -} +} // namespace ModuleBase #endif diff --git a/source/source_base/ylmcoef.h b/source/source_base/ylmcoef.h index 72a756ee74f..dd49be8b7fe 100644 --- a/source/source_base/ylmcoef.h +++ b/source/source_base/ylmcoef.h @@ -3,7 +3,8 @@ #include #include "constants.h" -namespace ModuleBase { +namespace ModuleBase +{ /// ylmcoef coefficient count (supports up to L=5) constexpr int YLMCOEF_SIZE = 36; @@ -15,42 +16,42 @@ __device__ static const double ylmcoef[YLMCOEF_SIZE] = { // Use static constexpr for C++11 compatibility (inline variables require C++17) static constexpr double ylmcoef[YLMCOEF_SIZE] = { #endif -0.28209479177387813964, // 1.0 / sqrt(ModuleBase::FOUR_PI) -0.48860251190291992263, // sqrt (3.0 / ModuleBase::FOUR_PI) -1.93649167310370851069, // sqrt (15.0) / 2.0 -1.11803398874989490253, // sqrt (5.0) / 2.0 -2.23606797749978980505, // sqrt (5.0) -0.57735026918962584208, // 1.0 / sqrt(3.0) -1.29099444873580559978, // sqrt (5.0 / 3.0) -1.97202659436653870983, // sqrt (35.0 / 9.0) -1.01835015443463117180, // sqrt (7.0/3.0)/1.5 -2.09165006633518890666, // sqrt (35.0 / 8.0) -0.93541434669348533237, // sqrt (7.0 / 8.0) -2.64575131106459071617, // sqrt (7.0) -0.25819888974716109775, // 1.0 / sqrt (15.0) -0.96609178307929588492, // sqrt (14.0 / 15.0) -2.16024689946928694084, // sqrt (14.0 / 3.0) -1.98431348329844303713, // sqrt(7.0)*3.0/4.0 -1.00623058987490532346, // 9.0/4.0/sqrt(5.0) -2.04939015319191986109, // sqrt(21.0/5.0) -0.97979589711327119694, // sqrt(24.0/25.0) -2.29128784747791991450, // sqrt(21.0)/2.0 -0.86602540378443859659, // sqrt(3.0)/2.0 -0.18898223650461359879, // 0.5/sqrt(7.0) -0.98198050606196563006, // 1.5*sqrt(3.0/7.0) -2.12132034355964238515, // 3.0/sqrt(2.0) -1.98997487421323970835, // 0.6*sqrt(11.0) -1.00285307284481395307, // 0.8*sqrt(11.0/7.0) -2.03100960115899020764, // sqrt (33.0/8.0) -0.99103120896511487370, // sqrt (55.0/56.0) -2.17124059336723762570, // sqrt (33.0/7.0) -0.94760708295868567586, // sqrt (11.0)*2.0/7.0 -2.48746859276654985749, // sqrt (11.0)*0.75 -0.82915619758884995250, // sqrt (11.0)*0.25 -3.31662479035539980998, // sqrt (11.0) -0.14907119849998595740, // 1.0/3.0/sqrt(5.0) -0.98882646494608839038, // 2.0/3.0*sqrt(11.0/5.0) -2.09761769634030326159 // sqrt(22.0/5.0) + 0.28209479177387813964, // 1.0 / sqrt(ModuleBase::FOUR_PI) + 0.48860251190291992263, // sqrt (3.0 / ModuleBase::FOUR_PI) + 1.93649167310370851069, // sqrt (15.0) / 2.0 + 1.11803398874989490253, // sqrt (5.0) / 2.0 + 2.23606797749978980505, // sqrt (5.0) + 0.57735026918962584208, // 1.0 / sqrt(3.0) + 1.29099444873580559978, // sqrt (5.0 / 3.0) + 1.97202659436653870983, // sqrt (35.0 / 9.0) + 1.01835015443463117180, // sqrt (7.0/3.0)/1.5 + 2.09165006633518890666, // sqrt (35.0 / 8.0) + 0.93541434669348533237, // sqrt (7.0 / 8.0) + 2.64575131106459071617, // sqrt (7.0) + 0.25819888974716109775, // 1.0 / sqrt (15.0) + 0.96609178307929588492, // sqrt (14.0 / 15.0) + 2.16024689946928694084, // sqrt (14.0 / 3.0) + 1.98431348329844303713, // sqrt(7.0)*3.0/4.0 + 1.00623058987490532346, // 9.0/4.0/sqrt(5.0) + 2.04939015319191986109, // sqrt(21.0/5.0) + 0.97979589711327119694, // sqrt(24.0/25.0) + 2.29128784747791991450, // sqrt(21.0)/2.0 + 0.86602540378443859659, // sqrt(3.0)/2.0 + 0.18898223650461359879, // 0.5/sqrt(7.0) + 0.98198050606196563006, // 1.5*sqrt(3.0/7.0) + 2.12132034355964238515, // 3.0/sqrt(2.0) + 1.98997487421323970835, // 0.6*sqrt(11.0) + 1.00285307284481395307, // 0.8*sqrt(11.0/7.0) + 2.03100960115899020764, // sqrt (33.0/8.0) + 0.99103120896511487370, // sqrt (55.0/56.0) + 2.17124059336723762570, // sqrt (33.0/7.0) + 0.94760708295868567586, // sqrt (11.0)*2.0/7.0 + 2.48746859276654985749, // sqrt (11.0)*0.75 + 0.82915619758884995250, // sqrt (11.0)*0.25 + 3.31662479035539980998, // sqrt (11.0) + 0.14907119849998595740, // 1.0/3.0/sqrt(5.0) + 0.98882646494608839038, // 2.0/3.0*sqrt(11.0/5.0) + 2.09761769634030326159 // sqrt(22.0/5.0) }; } // namespace ModuleBase diff --git a/source/source_basis/module_ao/ORB_atomic.cpp b/source/source_basis/module_ao/ORB_atomic.cpp index efdc611859b..dc27886cd0f 100644 --- a/source/source_basis/module_ao/ORB_atomic.cpp +++ b/source/source_basis/module_ao/ORB_atomic.cpp @@ -3,7 +3,7 @@ #include "source_io/module_parameter/parameter.h" Numerical_Orbital_AtomRelation Numerical_Orbital::NOAR; -Numerical_Orbital::Numerical_Orbital() +Numerical_Orbital::Numerical_Orbital () { // make std::pair of new and delete // question remains @@ -12,17 +12,16 @@ Numerical_Orbital::Numerical_Orbital() this->type = 0; } -Numerical_Orbital::~Numerical_Orbital() -{ -} +Numerical_Orbital::~Numerical_Orbital () {} -void Numerical_Orbital::set_orbital_info(const int& type_in, +void + Numerical_Orbital::set_orbital_info (const int& type_in, const std::string& label_in, const int& lmax_in, const int* nchi_in, const int& total_nchi_in) { - ModuleBase::TITLE("Numerical_Orbital", "set_type_info"); + ModuleBase::TITLE ("Numerical_Orbital", "set_type_info"); // (1) set type,label,lmax this->type = type_in; @@ -30,54 +29,54 @@ void Numerical_Orbital::set_orbital_info(const int& type_in, this->lmax = lmax_in; // (2) set nchi and total nchi. - this->nchi.resize(this->lmax + 1); + this->nchi.resize (this->lmax + 1); for (int i = 0; i < this->lmax + 1; i++) - { - this->nchi[i] = nchi_in[i]; - } + { + this->nchi[i] = nchi_in[i]; + } // we need this to generate numerical_orbital_lm. if (total_nchi_in < 0 || total_nchi_in > 500) - { - ModuleBase::WARNING_QUIT("Numerical_Orbital::init", "total_nchi < 0 or > 500"); - } + { + ModuleBase::WARNING_QUIT ("Numerical_Orbital::init", "total_nchi < 0 or > 500"); + } else - { - this->total_nchi = total_nchi_in; - } + { + this->total_nchi = total_nchi_in; + } // (3) set the rcut and check the rcut this->rcut = 0.0; for (int i = 0; i < total_nchi_in; i++) - { - this->rcut = this->phiLN[i].rcut; - for (int j = 0; j < total_nchi_in; j++) { - assert(rcut == this->phiLN[j].rcut); + this->rcut = this->phiLN[i].rcut; + for (int j = 0; j < total_nchi_in; j++) + { + assert (rcut == this->phiLN[j].rcut); + } } - } - assert(rcut > 0.0); + assert (rcut > 0.0); // (4) set max_nchi this->max_nchi = 0; for (int L = 0; L < lmax + 1; L++) - { - max_nchi = std::max(max_nchi, nchi[L]); - } + { + max_nchi = std::max (max_nchi, nchi[L]); + } // (8) set find_chi - assert(lmax + 1 > 0); - this->find_chi.create(lmax + 1, max_nchi); + assert (lmax + 1 > 0); + this->find_chi.create (lmax + 1, max_nchi); int ichi = 0; for (int L = 0; L <= lmax; ++L) - { - for (int N = 0; N < nchi[L]; ++N) { - find_chi(L, N) = ichi; - ++ichi; + for (int N = 0; N < nchi[L]; ++N) + { + find_chi (L, N) = ichi; + ++ichi; + } } - } - assert(ichi == total_nchi); + assert (ichi == total_nchi); return; } diff --git a/source/source_basis/module_ao/ORB_atomic.h b/source/source_basis/module_ao/ORB_atomic.h index e71c0958d32..b51d89821e1 100644 --- a/source/source_basis/module_ao/ORB_atomic.h +++ b/source/source_basis/module_ao/ORB_atomic.h @@ -9,123 +9,204 @@ class Numerical_Orbital_AtomRelation { -public: - //========================================================== - // It's about two atoms relations, thread-safe interface - //========================================================== - double distance; - ModuleBase::Vector3 R1; - ModuleBase::Vector3 R2; //three-dimesion-coordinate of R - ModuleBase::Vector3 dR; // R1-R2 - - double& get_distance() - { - if(distance < 0.0) ModuleBase::WARNING_QUIT("NUMERICAL_ORBITAL","distance should be above zero!"); - return distance; - } - - double getX() { return R2.x - R1.x ; } - double getY() { return R2.y - R1.y ; } - double getZ() { return R2.z - R1.z ; } - ModuleBase::Vector3& getR1() { return R1; } - ModuleBase::Vector3& getR2() { return R2; } - ModuleBase::Vector3& getdR() { return dR; } - - void set_position(const ModuleBase::Vector3 &R1_in, const ModuleBase::Vector3 &R2_in) - { - R1 = R1_in; - R2 = R2_in; - dR = R1-R2; - distance = dR.norm(); - } + public: + //========================================================== + // It's about two atoms relations, thread-safe interface + //========================================================== + double distance; + ModuleBase::Vector3 R1; + ModuleBase::Vector3 R2; // three-dimesion-coordinate of R + ModuleBase::Vector3 dR; // R1-R2 + + double& + get_distance () + { + if (distance < 0.0) + { + ModuleBase::WARNING_QUIT ("NUMERICAL_ORBITAL", "distance should be above zero!"); + } + return distance; + } + + double + getX () + { + return R2.x - R1.x; + } + double + getY () + { + return R2.y - R1.y; + } + double + getZ () + { + return R2.z - R1.z; + } + ModuleBase::Vector3& + getR1 () + { + return R1; + } + ModuleBase::Vector3& + getR2 () + { + return R2; + } + ModuleBase::Vector3& + getdR () + { + return dR; + } + + void + set_position (const ModuleBase::Vector3& R1_in, const ModuleBase::Vector3& R2_in) + { + R1 = R1_in; + R2 = R2_in; + dR = R1 - R2; + distance = dR.norm (); + } }; /// -///CLASS Num_Orbital +/// CLASS Num_Orbital ///------------------------------------------ /// -///Note : contain information about atoms +/// Note : contain information about atoms /// -///Feature : set and store information about atoms +/// Feature : set and store information about atoms /// class Numerical_Orbital { - friend class LCAO_Orbitals; - -public: - - Numerical_Orbital(); - ~Numerical_Orbital(); - - const int& getLmax() const { return this->lmax; } - const double& getRcut () const {return this->rcut; } - const int& getType() const { return this->type; } - const int& getTotal_nchi() const { return this->total_nchi; } - const int& getNchi(const int l) const { return this->nchi[l]; } - const std::string& getLabel() const { return this->label; } - - const inline Numerical_Orbital_Lm& PhiLN( const int &L, const int &N)const - { - return this->phiLN[ this->find_chi(L, N) ]; - } - - /// about the distance between two atoms. - static double& get_distance() - { - return NOAR.get_distance(); - } - - static double getX() { return NOAR.getX() ; } - static double getY() { return NOAR.getY() ; } - static double getZ() { return NOAR.getZ() ; } - static ModuleBase::Vector3& getR1() { return NOAR.getR1(); } - static ModuleBase::Vector3& getR2() { return NOAR.getR2(); } - static ModuleBase::Vector3& getdR() { return NOAR.getdR(); } - - /// - /// set information about Numerical Orbital - /// - void set_orbital_info( - const int& type_in, - const std::string& label_in, - const int& lmax_in, - const int* nchi_in, - const int& total_nchi); - - static void set_position(const ModuleBase::Vector3 &R1_in, const ModuleBase::Vector3 &R2_in) - { - NOAR.set_position(R1_in, R2_in); - } - - std::vector& chi() { return this->phiLN; } - -private: - - //========================================================== - // MEMBER FUNCTION : - // NAME : label (atom type) - // NAME : lmax( max value of L angular momentum) - // NAME : nchi( number of chi for each L) - // NAME : total_nchi(total chi for this type of atom, total number of NAOs) - // NAME : max_nchi( max chi for certain L) - // NAME : find_chi(lmax+1, max_nchi). - //=========================================================== - std::string label; - - int type; - int lmax; - std::vector nchi; - int total_nchi; - int max_nchi; - ModuleBase::IntArray find_chi; - double rcut; - - std::vector phiLN;// length: total_nchi (only store radial function ) - - //========================================================== - // Keep the old interface - //========================================================== - static Numerical_Orbital_AtomRelation NOAR; + friend class LCAO_Orbitals; + + public: + Numerical_Orbital (); + ~Numerical_Orbital (); + + const int& + getLmax () const + { + return this->lmax; + } + const double& + getRcut () const + { + return this->rcut; + } + const int& + getType () const + { + return this->type; + } + const int& + getTotal_nchi () const + { + return this->total_nchi; + } + const int& + getNchi (const int l) const + { + return this->nchi[l]; + } + const std::string& + getLabel () const + { + return this->label; + } + + const inline Numerical_Orbital_Lm& + PhiLN (const int& L, const int& N) const + { + return this->phiLN[this->find_chi (L, N)]; + } + + /// about the distance between two atoms. + static double& + get_distance () + { + return NOAR.get_distance (); + } + + static double + getX () + { + return NOAR.getX (); + } + static double + getY () + { + return NOAR.getY (); + } + static double + getZ () + { + return NOAR.getZ (); + } + static ModuleBase::Vector3& + getR1 () + { + return NOAR.getR1 (); + } + static ModuleBase::Vector3& + getR2 () + { + return NOAR.getR2 (); + } + static ModuleBase::Vector3& + getdR () + { + return NOAR.getdR (); + } + + /// + /// set information about Numerical Orbital + /// + void set_orbital_info (const int& type_in, + const std::string& label_in, + const int& lmax_in, + const int* nchi_in, + const int& total_nchi); + + static void + set_position (const ModuleBase::Vector3& R1_in, const ModuleBase::Vector3& R2_in) + { + NOAR.set_position (R1_in, R2_in); + } + + std::vector& + chi () + { + return this->phiLN; + } + + private: + //========================================================== + // MEMBER FUNCTION : + // NAME : label (atom type) + // NAME : lmax( max value of L angular momentum) + // NAME : nchi( number of chi for each L) + // NAME : total_nchi(total chi for this type of atom, total number of NAOs) + // NAME : max_nchi( max chi for certain L) + // NAME : find_chi(lmax+1, max_nchi). + //=========================================================== + std::string label; + + int type; + int lmax; + std::vector nchi; + int total_nchi; + int max_nchi; + ModuleBase::IntArray find_chi; + double rcut; + + std::vector phiLN; // length: total_nchi (only store radial function ) + + //========================================================== + // Keep the old interface + //========================================================== + static Numerical_Orbital_AtomRelation NOAR; }; #endif diff --git a/source/source_basis/module_ao/ORB_atomic_lm.cpp b/source/source_basis/module_ao/ORB_atomic_lm.cpp index d3542e021e3..c68345500c8 100644 --- a/source/source_basis/module_ao/ORB_atomic_lm.cpp +++ b/source/source_basis/module_ao/ORB_atomic_lm.cpp @@ -11,763 +11,829 @@ #include #endif -Numerical_Orbital_Lm::Numerical_Orbital_Lm() +Numerical_Orbital_Lm::Numerical_Orbital_Lm () { - label = ""; - index_atom_type = 0; - angular_momentum_l = 0; - index_chi = 0; + label = ""; + index_atom_type = 0; + angular_momentum_l = 0; + index_chi = 0; - nr=1; - nk=1; + nr = 1; + nk = 1; - rcut=0.0; - kcut=0.0; - dk=0.0; + rcut = 0.0; + kcut = 0.0; + dk = 0.0; - nr_uniform = 1; - dr_uniform = -1.0; - zty = 0.0; + nr_uniform = 1; + dr_uniform = -1.0; + zty = 0.0; } -Numerical_Orbital_Lm::~Numerical_Orbital_Lm() -{} - -void Numerical_Orbital_Lm::set_orbital_info -( - const std::string &label_in, - const int &index_atom_type_in, - const int &angular_momentum_l_in, - const int &index_chi_in, - const int &nr_in, - const double *rab_in, - const double *r_radial_in, - const Psi_Type &psi_type, // Peize Lin add 2017-12-12 - const double *psi_in, - const int &nk_in, - const double &dk_in, - // Peize Lin delete lat0 2016-02-03 - const double &dr_uniform_in, - bool flag_plot, // Peize Lin add flag_plot 2016-08-31 - bool flag_sbpool, // Peize Lin add flag_sbpool 2017-10-02 - const bool &force_flag // mohan add 2021-05-07 -) +Numerical_Orbital_Lm::~Numerical_Orbital_Lm () {} + +void + Numerical_Orbital_Lm::set_orbital_info (const std::string& label_in, + const int& index_atom_type_in, + const int& angular_momentum_l_in, + const int& index_chi_in, + const int& nr_in, + const double* rab_in, + const double* r_radial_in, + const Psi_Type& psi_type, // Peize Lin add 2017-12-12 + const double* psi_in, + const int& nk_in, + const double& dk_in, + // Peize Lin delete lat0 2016-02-03 + const double& dr_uniform_in, + bool flag_plot, // Peize Lin add flag_plot 2016-08-31 + bool flag_sbpool, // Peize Lin add flag_sbpool 2017-10-02 + const bool& force_flag // mohan add 2021-05-07 + ) { - copy_parameter( - label_in, - index_atom_type_in, - angular_momentum_l_in, - index_chi_in, - nr_in, - rab_in, - r_radial_in, - nk_in, - dk_in, - dr_uniform_in); - - switch(psi_type) - { - case Psi_Type::Psi: - for (int ir = 0; ir < nr; ir++) - { - this->psi[ir] = psi_in[ir]; - this->psir[ir] = psi[ir] * r_radial[ir]; //mohan 2010-04-19 - } - break; - case Psi_Type::Psif: - for( int ik=0; ik!=nk; ++ik ) - { - this->psif[ik] = psi_in[ik]; - this->psik[ik] = psif[ik] * k_radial[ik]; - this->psik2[ik] = psik[ik] * k_radial[ik]; - } - break; - case Psi_Type::Psik: - psif.resize(0); - for( int ik=0; ik!=nk; ++ik ) - { - this->psik[ik] = psi_in[ik]; - this->psik2[ik] = psik[ik] * k_radial[ik]; - } - break; - case Psi_Type::Psik2: - psif.resize(0); - psik.resize(0); - for( int ik=0; ik!=nk; ++ik ) - this->psik2[ik] = psi_in[ik]; - break; - default: - throw std::domain_error(std::string(__FILE__)+" line "+std::to_string(__LINE__)); - } - - switch(psi_type) - { - case Psi_Type::Psif: - case Psi_Type::Psik: - case Psi_Type::Psik2: - if( flag_sbpool ) - { - this->cal_rradial_sbpool(); - } - else - { - throw std::domain_error("flag_sbpool false not finished in Numerical_Orbital_Lm::set_orbital_info_k. "+std::string(__FILE__)+" line "+std::to_string(__LINE__)); - } - break; - default: break; - } - - //liaochen modify on 2010/4/7 - //we do SBT on regular mesh - //so we first generate psi_uniform first - //we put uniform in ahead of cal_kradial - - /* - bool uni = true; - if (uni) - { - this->extra_uniform(dr_uniform, force_flag); - } - else - { - this->use_uniform(dr_uniform); - } - */ - this->extra_uniform(dr_uniform, force_flag); - - switch(psi_type) - { - case Psi_Type::Psi: - if( flag_sbpool ) - { - this->cal_kradial_sbpool(); - } - else - { - this->cal_kradial(); - } - break; - default: break; - } - -// this->norm_test(); // Peize Lin delete 2016-08-31 - if( flag_plot ) - { - this->plot(); // Peize Lin add flag_plot 2016-08-31 - } - return; + copy_parameter (label_in, + index_atom_type_in, + angular_momentum_l_in, + index_chi_in, + nr_in, + rab_in, + r_radial_in, + nk_in, + dk_in, + dr_uniform_in); + + switch (psi_type) + { + case Psi_Type::Psi: + for (int ir = 0; ir < nr; ir++) + { + this->psi[ir] = psi_in[ir]; + this->psir[ir] = psi[ir] * r_radial[ir]; // mohan 2010-04-19 + } + break; + case Psi_Type::Psif: + for (int ik = 0; ik != nk; ++ik) + { + this->psif[ik] = psi_in[ik]; + this->psik[ik] = psif[ik] * k_radial[ik]; + this->psik2[ik] = psik[ik] * k_radial[ik]; + } + break; + case Psi_Type::Psik: + psif.resize (0); + for (int ik = 0; ik != nk; ++ik) + { + this->psik[ik] = psi_in[ik]; + this->psik2[ik] = psik[ik] * k_radial[ik]; + } + break; + case Psi_Type::Psik2: + psif.resize (0); + psik.resize (0); + for (int ik = 0; ik != nk; ++ik) + { + this->psik2[ik] = psi_in[ik]; + } + break; + default: + throw std::domain_error (std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } + + switch (psi_type) + { + case Psi_Type::Psif: + case Psi_Type::Psik: + case Psi_Type::Psik2: + if (flag_sbpool) + { + this->cal_rradial_sbpool (); + } + else + { + throw std::domain_error ( + "flag_sbpool false not finished in Numerical_Orbital_Lm::set_orbital_info_k. " + + std::string (__FILE__) + " line " + std::to_string (__LINE__)); + } + break; + default: + break; + } + + // liaochen modify on 2010/4/7 + // we do SBT on regular mesh + // so we first generate psi_uniform first + // we put uniform in ahead of cal_kradial + + /* + bool uni = true; + if (uni) + { + this->extra_uniform(dr_uniform, force_flag); + } + else + { + this->use_uniform(dr_uniform); + } + */ + this->extra_uniform (dr_uniform, force_flag); + + switch (psi_type) + { + case Psi_Type::Psi: + if (flag_sbpool) + { + this->cal_kradial_sbpool (); + } + else + { + this->cal_kradial (); + } + break; + default: + break; + } + + // this->norm_test(); // Peize Lin delete 2016-08-31 + if (flag_plot) + { + this->plot (); // Peize Lin add flag_plot 2016-08-31 + } + return; } -void Numerical_Orbital_Lm::copy_parameter( - const std::string &label_in, - const int &index_atom_type_in, - const int &angular_momentum_l_in, - const int &index_chi_in, - const int &nr_in, - const double *rab_in, - const double *r_radial_in, - const int &nk_in, - const double &dk_in, - const double &dr_uniform_in) +void + Numerical_Orbital_Lm::copy_parameter (const std::string& label_in, + const int& index_atom_type_in, + const int& angular_momentum_l_in, + const int& index_chi_in, + const int& nr_in, + const double* rab_in, + const double* r_radial_in, + const int& nk_in, + const double& dk_in, + const double& dr_uniform_in) { this->label = label_in; this->index_atom_type = index_atom_type_in; this->angular_momentum_l = angular_momentum_l_in; this->index_chi = index_chi_in; - assert(nr_in>=2); -// assert(nr_in<10000); // Peize Lin delete 2017-12-03 - assert(nr%2!=0); + assert (nr_in >= 2); + // assert(nr_in<10000); // Peize Lin delete 2017-12-03 + assert (nr % 2 != 0); this->nr = nr_in; - assert(r_radial_in[nr-1]>0.0); + assert (r_radial_in[nr - 1] > 0.0); // assert(r_radial_in[nr-1]<50); // Peize Lin delete 2017-08-18 - this->rcut = r_radial_in[nr-1]; - assert(nk_in>1); - //assert(nk_in<10000); // Jiyy delete 2022-07-18 + this->rcut = r_radial_in[nr - 1]; + assert (nk_in > 1); + // assert(nk_in<10000); // Jiyy delete 2022-07-18 this->nk = nk_in; - assert(nk%2!=0); - assert(dk_in>0); + assert (nk % 2 != 0); + assert (dk_in > 0); this->dk = dk_in; - this->dr_uniform=dr_uniform_in; - - /*********************************************************** - be careful! LiaoChen modify on 2010/4/21 - ************************************************************/ -// this->dk = ModuleBase::PI / rcut / 2.0; -// this->nk = this->nr; - - r_radial.resize(nr); - rab.resize(nr); - psi.resize(nr); - psir.resize(nr); - for (int ir = 0; ir < nr; ir++) - { - this->r_radial[ir] = r_radial_in[ir]; - this->rab[ir] = rab_in[ir]; - } - - k_radial.resize(nk); - psif.resize(nk); - psik.resize(nk); - psik2.resize(nk); - for (int ik = 0; ik < nk; ik++) - { - this->k_radial[ik] = ik * this->dk; - } - this->kcut = (nk-1) * this->dk; + this->dr_uniform = dr_uniform_in; + + /*********************************************************** + be careful! LiaoChen modify on 2010/4/21 + ************************************************************/ + // this->dk = ModuleBase::PI / rcut / 2.0; + // this->nk = this->nr; + + r_radial.resize (nr); + rab.resize (nr); + psi.resize (nr); + psir.resize (nr); + for (int ir = 0; ir < nr; ir++) + { + this->r_radial[ir] = r_radial_in[ir]; + this->rab[ir] = rab_in[ir]; + } + + k_radial.resize (nk); + psif.resize (nk); + psik.resize (nk); + psik2.resize (nk); + for (int ik = 0; ik < nk; ik++) + { + this->k_radial[ik] = ik * this->dk; + } + this->kcut = (nk - 1) * this->dk; } #include "source_base/mathzone_add1.h" -void Numerical_Orbital_Lm::extra_uniform(const double &dr_uniform_in, const bool &force_flag) +void + Numerical_Orbital_Lm::extra_uniform (const double& dr_uniform_in, const bool& force_flag) { - ModuleBase::timer::start("NOrbital_Lm", "extra_uniform"); - - //--------------------------------------------- - // set the dr, fixed by liaochen. - // calculate the number of radial mesh points. - //--------------------------------------------- - assert(dr_uniform>0.0); - this->dr_uniform = dr_uniform_in; - this->nr_uniform = static_cast(rcut/dr_uniform) + 10; - - this->psi_uniform.resize(nr_uniform,0); - - // do interpolation here to make grid more dense + ModuleBase::timer::start ("NOrbital_Lm", "extra_uniform"); + + //--------------------------------------------- + // set the dr, fixed by liaochen. + // calculate the number of radial mesh points. + //--------------------------------------------- + assert (dr_uniform > 0.0); + this->dr_uniform = dr_uniform_in; + this->nr_uniform = static_cast (rcut / dr_uniform) + 10; + + this->psi_uniform.resize (nr_uniform, 0); + + // do interpolation here to make grid more dense #ifdef _OPENMP - #pragma omp parallel for schedule(static) +#pragma omp parallel for schedule(static) #endif - for (int ir = 0; ir < this->nr_uniform; ir++) - { - const double psi_uniform_tmp = - ModuleBase::Mathzone_Add1::Uni_RadialF(ModuleBase::GlobalFunc::VECTOR_TO_PTR(this->psi), this->nr, this->rab[0], ir * dr_uniform); - this->psi_uniform[ir] = psi_uniform_tmp; -// this->psi_uniform[ir] = ModuleBase::Mathzone::Polynomial_Interpolation(this->psi, this->nr, this->rab[0], ir * dr_uniform); - } - - //---------------------------------------------- - // calculate the dpsi_uniform - //---------------------------------------------- - this->dpsi_uniform.resize(this->nr_uniform); - this->ddpsi_uniform.resize(this->nr_uniform); - - double* y2 = new double[nr]; - - //-------------------------------------------------------------------------- - // old code to calculate the derivate dpsi/dr, - // has problem that the derivatives of orbitals oscillate a lot - // around r=0 - //-------------------------------------------------------------------------- - //ModuleBase::Mathzone_Add1::SplineD2 (r_radial, psi, nr, 100000.0, 100000.0, y2); - //double yp1=(this->psi[1]-this->psi[0])/this->r_radial[1]; - //std::cout<<"psi0="<<" "<psi[0]<<" "<<"psi1="<<" "<psi[1]<<" "<<"r1="<<" "<r_radial[1]<angular_momentum_l ) // added by pengfei 13-8-8 different l has different boundary conditions - { - case 0: ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial), ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi), nr, 0.0, 0.0, y2); break; - case 1: ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial), ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi), nr, 100000.0, 100000.0, y2); break; - case 2: ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial), ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi), nr, 0.0, 0.0, y2); break; - case 3: ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial), ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi), nr, 100000.0, 100000.0, y2); break; - case 4: ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial), ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi), nr, 0.0, 0.0, y2); break; - default: - //GlobalV::ofs_warning << " The angular momentum larger than 4 (g orbitals) may be error about eggbox. " << std::endl; - //GlobalV::ofs_warning << " Check file " << __FILE__ << " line " << __LINE__ <angular_momentum_l<psi_uniform), - this->nr_uniform, - dr_uniform, - angular_momentum_l, - tmp); - - this->zty = tmp[0]/ModuleBase::Mathzone_Add1::factorial (angular_momentum_l); - } - - delete [] y2; - delete [] rad; - delete [] tmp; - ModuleBase::timer::end("NOrbital_Lm", "extra_uniform"); + for (int ir = 0; ir < this->nr_uniform; ir++) + { + const double psi_uniform_tmp + = ModuleBase::Mathzone_Add1::Uni_RadialF (ModuleBase::GlobalFunc::VECTOR_TO_PTR (this->psi), + this->nr, + this->rab[0], + ir * dr_uniform); + this->psi_uniform[ir] = psi_uniform_tmp; + // this->psi_uniform[ir] = ModuleBase::Mathzone::Polynomial_Interpolation(this->psi, this->nr, + // this->rab[0], ir * dr_uniform); + } + + //---------------------------------------------- + // calculate the dpsi_uniform + //---------------------------------------------- + this->dpsi_uniform.resize (this->nr_uniform); + this->ddpsi_uniform.resize (this->nr_uniform); + + double* y2 = new double[nr]; + + //-------------------------------------------------------------------------- + // old code to calculate the derivate dpsi/dr, + // has problem that the derivatives of orbitals oscillate a lot + // around r=0 + //-------------------------------------------------------------------------- + // ModuleBase::Mathzone_Add1::SplineD2 (r_radial, psi, nr, 100000.0, 100000.0, y2); + // double yp1=(this->psi[1]-this->psi[0])/this->r_radial[1]; + // std::cout<<"psi0="<<" "<psi[0]<<" "<<"psi1="<<" "<psi[1]<<" "<<"r1="<<" + // "<r_radial[1]<angular_momentum_l) // added by pengfei 13-8-8 different l has different boundary conditions + { + case 0: + ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR (r_radial), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (psi), + nr, + 0.0, + 0.0, + y2); + break; + case 1: + ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR (r_radial), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (psi), + nr, + 100000.0, + 100000.0, + y2); + break; + case 2: + ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR (r_radial), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (psi), + nr, + 0.0, + 0.0, + y2); + break; + case 3: + ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR (r_radial), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (psi), + nr, + 100000.0, + 100000.0, + y2); + break; + case 4: + ModuleBase::Mathzone_Add1::SplineD2 (ModuleBase::GlobalFunc::VECTOR_TO_PTR (r_radial), + ModuleBase::GlobalFunc::VECTOR_TO_PTR (psi), + nr, + 0.0, + 0.0, + y2); + break; + default: + // GlobalV::ofs_warning << " The angular momentum larger than 4 (g orbitals) may be error about eggbox. " << + // std::endl; GlobalV::ofs_warning << " Check file " << __FILE__ << " line " << __LINE__ <angular_momentum_l<psi_uniform), + this->nr_uniform, + dr_uniform, + angular_momentum_l, + tmp); + + this->zty = tmp[0] / ModuleBase::Mathzone_Add1::factorial (angular_momentum_l); + } + + delete[] y2; + delete[] rad; + delete[] tmp; + ModuleBase::timer::end ("NOrbital_Lm", "extra_uniform"); } /* void Numerical_Orbital_Lm::use_uniform(const double &dr_uniform_in) { - assert(dr_uniform_in>0.0); - this->dr_uniform = dr_uniform_in; - // for save: +10, because in real space interpolation, - // there may be "one grid point" more than the cutoff. - this->nr_uniform = static_cast(rcut/dr_uniform)+10; - - this->psi_uniform.resize(nr_uniform,0); - - std::string orbital_type; - // Peize Lin update 2016-08-31 - if( 0==this->angular_momentum_l ) - { - orbital_type = 's'; - } - else if( 1==this->angular_momentum_l ) - { - orbital_type = 'p'; - } - else if( 2==this->angular_momentum_l ) - { - orbital_type = 'd'; - } - else if( 3<=this->angular_momentum_l && this->angular_momentum_l<=6 ) - { - orbital_type = 'f'+this->angular_momentum_l-3; - } - else if( 7<=this->angular_momentum_l && this->angular_momentum_l<=11 ) - { - orbital_type = 'k'+this->angular_momentum_l-7; - } - else - { - orbital_type = "L" + ModuleBase::GlobalFunc::TO_STRING(this->angular_momentum_l); - } - - std::cout << "===========================================================" << std::endl; - for(int i=0; ipsi_uniform[i] = - ModuleBase::Mathzone_Add1::Uni_RadialF(ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi), this->nr, this->rab[0], i*dr_uniform); - } - - this->dpsi_uniform.resize(nr_uniform); - - ModuleBase::Mathzone_Add1::Uni_Deriv_Phi ( - ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi_uniform), - nr_uniform, dr_uniform, - 1, - ModuleBase::GlobalFunc::VECTOR_TO_PTR(dpsi_uniform)); + assert(dr_uniform_in>0.0); + this->dr_uniform = dr_uniform_in; + // for save: +10, because in real space interpolation, + // there may be "one grid point" more than the cutoff. + this->nr_uniform = static_cast(rcut/dr_uniform)+10; + + this->psi_uniform.resize(nr_uniform,0); + + std::string orbital_type; + // Peize Lin update 2016-08-31 + if( 0==this->angular_momentum_l ) + { + orbital_type = 's'; + } + else if( 1==this->angular_momentum_l ) + { + orbital_type = 'p'; + } + else if( 2==this->angular_momentum_l ) + { + orbital_type = 'd'; + } + else if( 3<=this->angular_momentum_l && this->angular_momentum_l<=6 ) + { + orbital_type = 'f'+this->angular_momentum_l-3; + } + else if( 7<=this->angular_momentum_l && this->angular_momentum_l<=11 ) + { + orbital_type = 'k'+this->angular_momentum_l-7; + } + else + { + orbital_type = "L" + ModuleBase::GlobalFunc::TO_STRING(this->angular_momentum_l); + } + + std::cout << "===========================================================" << std::endl; + for(int i=0; ipsi_uniform[i] = + ModuleBase::Mathzone_Add1::Uni_RadialF(ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi), this->nr, this->rab[0], +i*dr_uniform); + } + + this->dpsi_uniform.resize(nr_uniform); + + ModuleBase::Mathzone_Add1::Uni_Deriv_Phi ( + ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi_uniform), + nr_uniform, dr_uniform, + 1, + ModuleBase::GlobalFunc::VECTOR_TO_PTR(dpsi_uniform)); #ifdef __NORMAL -#else - if(GlobalV::MY_RANK==0) - { - std::stringstream ss; - ss << PARAM.globalv.global_out_dir << this->label << "/" - << this->label << "-" << orbital_type << ".ORBITAL_NOR_uniform.txt"; - - std::ofstream ofs(ss.str().c_str()); - - for(int i=0; ilabel << "/" + << this->label << "-" << orbital_type << ".ORBITAL_NOR_uniform.txt"; + + std::ofstream ofs(ss.str().c_str()); + + for(int i=0; inr > 0); - assert( this->nr_uniform > 0); - double *jl = new double[nr]; - double *integrated_func = new double[nr]; - - const double pref = sqrt( 2.0 / ModuleBase::PI ); - //Sbt method - - /* - double* rad = new double[nr_uniform]; - for (int ir = 0; ir < nr_uniform; ir++) - { - rad[ir] = dr_uniform * ir; - } - - //liaochen add - ModuleBase::Mathzone_Add1::Sbt_new (3, angular_momentum_l, - k_radial, dk, nk, - rad, dr_uniform, nr_uniform, - psi_uniform, 0, this->psik); - - for (int ik = 0; ik < nk; ik++) this->psik[ik] *= (pref*k_radial[ik]); - delete [] rad; - */ - - //integration directly - for (int ik = 0; ik < nk; ik++) - { - ModuleBase::Sphbes::Spherical_Bessel( - this->nr, - ModuleBase::GlobalFunc::VECTOR_TO_PTR(this->r_radial), - this->k_radial[ik], - this->angular_momentum_l, - jl); - - for (int ir = 0; ir < nr; ir++) - { - integrated_func[ir] = this->psir[ir] * this->r_radial[ir] * jl[ir]; - } - - ModuleBase::Integral::Simpson_Integral( - this->nr, - integrated_func, - ModuleBase::GlobalFunc::VECTOR_TO_PTR(this->rab), - this->psif[ik]); - this->psif[ik] *= pref; - this->psik[ik] = this->psif[ik] * k_radial[ik]; - this->psik2[ik] = this->psik[ik] * k_radial[ik]; - } - - delete[] integrated_func; - delete[] jl; + assert (this->nr > 0); + assert (this->nr_uniform > 0); + double* jl = new double[nr]; + double* integrated_func = new double[nr]; + + const double pref = sqrt (2.0 / ModuleBase::PI); + // Sbt method + + /* + double* rad = new double[nr_uniform]; + for (int ir = 0; ir < nr_uniform; ir++) + { + rad[ir] = dr_uniform * ir; + } + + //liaochen add + ModuleBase::Mathzone_Add1::Sbt_new (3, angular_momentum_l, + k_radial, dk, nk, + rad, dr_uniform, nr_uniform, + psi_uniform, 0, this->psik); + + for (int ik = 0; ik < nk; ik++) this->psik[ik] *= (pref*k_radial[ik]); + delete [] rad; + */ + + // integration directly + for (int ik = 0; ik < nk; ik++) + { + ModuleBase::Sphbes::Spherical_Bessel (this->nr, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (this->r_radial), + this->k_radial[ik], + this->angular_momentum_l, + jl); + + for (int ir = 0; ir < nr; ir++) + { + integrated_func[ir] = this->psir[ir] * this->r_radial[ir] * jl[ir]; + } + + ModuleBase::Integral::Simpson_Integral (this->nr, + integrated_func, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (this->rab), + this->psif[ik]); + this->psif[ik] *= pref; + this->psik[ik] = this->psif[ik] * k_radial[ik]; + this->psik2[ik] = this->psik[ik] * k_radial[ik]; + } + + delete[] integrated_func; + delete[] jl; } /* // Peize Lin add 2017-10-02 void Numerical_Orbital_Lm::cal_kradial_sbpool(void) { - assert( this->nr > 0); - assert( this->nr_uniform > 0); - - // dr must be all the same for Sph_Bessel_Recursive_Pool and Simpson_Integral - const double dr = this->rab[0]; - for( size_t ir=1; irnr; ++ir ) - assert( dr == this->rab[ir] ); - - ModuleBase::Sph_Bessel_Recursive::D2* pSB = nullptr; - for( auto & sb : Sph_Bessel_Recursive_Pool::D2::sb_pool ) - if( this->dk * dr == sb.get_dx() ) - { - pSB = &sb; - break; - } - if(!pSB) - { - Sph_Bessel_Recursive_Pool::D2::sb_pool.push_back({}); - pSB = &Sph_Bessel_Recursive_Pool::D2::sb_pool.back(); - } - pSB->set_dx( this->dk * dr ); - pSB->cal_jlx( this->angular_momentum_l, this->nk, this->nr ); - const std::vector> &jl = pSB->get_jlx()[this->angular_momentum_l]; - - std::vector integrated_func( this->nr ); - const double pref = sqrt( 2.0 / ModuleBase::PI ); - - std::vector psir2(nr); - for( size_t ir=0; ir!=nr; ++ir ) - psir2[ir] = this->psir[ir] * this->r_radial[ir]; - - for (int ik = 0; ik < nk; ik++) - { - const std::vector &jlk = jl[ik]; - for (int ir = 0; ir < nr; ir++) - integrated_func[ir] = psir2[ir] * jlk[ir]; - ModuleBase::Integral::Simpson_Integral( - this->nr, - ModuleBase::GlobalFunc::VECTOR_TO_PTR(integrated_func), - dr, - this->psik[ik]); - this->psik[ik] *= ( pref * k_radial[ik]); - } + assert( this->nr > 0); + assert( this->nr_uniform > 0); + + // dr must be all the same for Sph_Bessel_Recursive_Pool and Simpson_Integral + const double dr = this->rab[0]; + for( size_t ir=1; irnr; ++ir ) + assert( dr == this->rab[ir] ); + + ModuleBase::Sph_Bessel_Recursive::D2* pSB = nullptr; + for( auto & sb : Sph_Bessel_Recursive_Pool::D2::sb_pool ) + if( this->dk * dr == sb.get_dx() ) + { + pSB = &sb; + break; + } + if(!pSB) + { + Sph_Bessel_Recursive_Pool::D2::sb_pool.push_back({}); + pSB = &Sph_Bessel_Recursive_Pool::D2::sb_pool.back(); + } + pSB->set_dx( this->dk * dr ); + pSB->cal_jlx( this->angular_momentum_l, this->nk, this->nr ); + const std::vector> &jl = pSB->get_jlx()[this->angular_momentum_l]; + + std::vector integrated_func( this->nr ); + const double pref = sqrt( 2.0 / ModuleBase::PI ); + + std::vector psir2(nr); + for( size_t ir=0; ir!=nr; ++ir ) + psir2[ir] = this->psir[ir] * this->r_radial[ir]; + + for (int ik = 0; ik < nk; ik++) + { + const std::vector &jlk = jl[ik]; + for (int ir = 0; ir < nr; ir++) + integrated_func[ir] = psir2[ir] * jlk[ir]; + ModuleBase::Integral::Simpson_Integral( + this->nr, + ModuleBase::GlobalFunc::VECTOR_TO_PTR(integrated_func), + dr, + this->psik[ik]); + this->psik[ik] *= ( pref * k_radial[ik]); + } } */ // Peize Lin add 2017-10-27 -void Numerical_Orbital_Lm::cal_kradial_sbpool(void) +void + Numerical_Orbital_Lm::cal_kradial_sbpool () { - assert( this->nr > 0); - assert( this->nr_uniform > 0); - - // dr must be all the same for Sph_Bessel_Recursive_Pool - const double dr = this->rab[0]; - - for( int ir=1; irnr; ++ir ) - { - assert( dr == this->rab[ir] ); - } - - ModuleBase::Sph_Bessel_Recursive::D2* pSB = nullptr; - for( auto & sb : ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool ) - { - if( this->dk * dr == sb.get_dx() ) - { - pSB = &sb; - break; - } - } - - if(!pSB) - { - ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool.push_back({}); - pSB = &ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool.back(); - } - pSB->set_dx( this->dk * dr ); - pSB->cal_jlx( this->angular_momentum_l, this->nk, this->nr ); - const std::vector> &jl = pSB->get_jlx()[this->angular_momentum_l]; - - const double pref = sqrt( 2.0 / ModuleBase::PI ); - - std::vector r_tmp(nr); - for( int ir=0; ir!=nr; ++ir ) - { - r_tmp[ir] = this->psir[ir] * this->r_radial[ir] * this->rab[ir]; - } - - constexpr double one_three=1.0/3.0, two_three=2.0/3.0, four_three=4.0/3.0; - r_tmp[0]*=one_three; - r_tmp[nr-1]*=one_three; - - for( int ir=1; ir!=nr-1; ++ir ) - { - r_tmp[ir] *= (ir&1) ? four_three : two_three; - } + assert (this->nr > 0); + assert (this->nr_uniform > 0); + + // dr must be all the same for Sph_Bessel_Recursive_Pool + const double dr = this->rab[0]; + + for (int ir = 1; ir < this->nr; ++ir) + { + assert (dr == this->rab[ir]); + } + + ModuleBase::Sph_Bessel_Recursive::D2* pSB = nullptr; + for (auto& sb: ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool) + { + if (this->dk * dr == sb.get_dx ()) + { + pSB = &sb; + break; + } + } + + if (!pSB) + { + ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool.push_back ({}); + pSB = &ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool.back (); + } + pSB->set_dx (this->dk * dr); + pSB->cal_jlx (this->angular_momentum_l, this->nk, this->nr); + const std::vector>& jl = pSB->get_jlx ()[this->angular_momentum_l]; + + const double pref = sqrt (2.0 / ModuleBase::PI); + + std::vector r_tmp (nr); + for (int ir = 0; ir != nr; ++ir) + { + r_tmp[ir] = this->psir[ir] * this->r_radial[ir] * this->rab[ir]; + } + + constexpr double one_three = 1.0 / 3.0, two_three = 2.0 / 3.0, four_three = 4.0 / 3.0; + r_tmp[0] *= one_three; + r_tmp[nr - 1] *= one_three; + + for (int ir = 1; ir != nr - 1; ++ir) + { + r_tmp[ir] *= (ir & 1) ? four_three : two_three; + } #ifdef _OPENMP - #pragma omp parallel for schedule(static) +#pragma omp parallel for schedule(static) #endif - for (int ik = 0; ik < nk; ik++) - { + for (int ik = 0; ik < nk; ik++) + { #ifdef __NORMAL - double psi_f_tmp = 0.0; - for(int ir=0; irnr, ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_tmp), 1, ModuleBase::GlobalFunc::VECTOR_TO_PTR(jl[ik]), 1 ) ; + const double psi_f_tmp = pref + * BlasConnector::dot (this->nr, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (r_tmp), + 1, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (jl[ik]), + 1); #endif - this->psif[ik] = psi_f_tmp; - this->psik[ik] = psi_f_tmp * k_radial[ik]; - this->psik2[ik] = this->psik[ik] * k_radial[ik]; - } - return; + this->psif[ik] = psi_f_tmp; + this->psik[ik] = psi_f_tmp * k_radial[ik]; + this->psik2[ik] = this->psik[ik] * k_radial[ik]; + } + return; } // Peize Lin add 2017-12-11 -void Numerical_Orbital_Lm::cal_rradial_sbpool(void) +void + Numerical_Orbital_Lm::cal_rradial_sbpool () { - // dr must be all the same for Sph_Bessel_Recursive_Pool - const double dr = this->rab[0]; + // dr must be all the same for Sph_Bessel_Recursive_Pool + const double dr = this->rab[0]; - for( int ir=1; irnr; ++ir ) - { - assert( dr == this->rab[ir] ); - } + for (int ir = 1; ir < this->nr; ++ir) + { + assert (dr == this->rab[ir]); + } - ModuleBase::Sph_Bessel_Recursive::D2* pSB = nullptr; - for( auto & sb : ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool ) - { - if( dr * dk == sb.get_dx() ) - { - pSB = &sb; - break; - } - } + ModuleBase::Sph_Bessel_Recursive::D2* pSB = nullptr; + for (auto& sb: ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool) + { + if (dr * dk == sb.get_dx ()) + { + pSB = &sb; + break; + } + } - if(!pSB) - { - ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool.push_back({}); - pSB = &ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool.back(); - } + if (!pSB) + { + ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool.push_back ({}); + pSB = &ModuleBase::Sph_Bessel_Recursive_Pool::D2::sb_pool.back (); + } - pSB->set_dx( dr * dk ); - pSB->cal_jlx( this->angular_momentum_l, this->nr, this->nk ); + pSB->set_dx (dr * dk); + pSB->cal_jlx (this->angular_momentum_l, this->nr, this->nk); - const std::vector> &jl = pSB->get_jlx()[this->angular_momentum_l]; + const std::vector>& jl = pSB->get_jlx ()[this->angular_momentum_l]; - const double pref = sqrt(2.0/ModuleBase::PI); + const double pref = sqrt (2.0 / ModuleBase::PI); - std::vector k_tmp(nk); + std::vector k_tmp (nk); - for( int ik=0; ik!=nk; ++ik ) - { - k_tmp[ik] = this->psik2[ik] * dk; - } + for (int ik = 0; ik != nk; ++ik) + { + k_tmp[ik] = this->psik2[ik] * dk; + } - constexpr double one_three=1.0/3.0, two_three=2.0/3.0, four_three=4.0/3.0; + constexpr double one_three = 1.0 / 3.0, two_three = 2.0 / 3.0, four_three = 4.0 / 3.0; - k_tmp[0]*=one_three; - k_tmp[nk-1]*=one_three; + k_tmp[0] *= one_three; + k_tmp[nk - 1] *= one_three; - for( int ik=1; ik!=nk-1; ++ik ) - { - k_tmp[ik] *= (ik&1) ? four_three : two_three; - } + for (int ik = 1; ik != nk - 1; ++ik) + { + k_tmp[ik] *= (ik & 1) ? four_three : two_three; + } - for( int ir = 0; ir!=nr; ++ir ) - { + for (int ir = 0; ir != nr; ++ir) + { #ifdef __NORMAL - // mohan add 2021-05-08, test needed - double kj_dot = 0.0; - for( int ik=0; ikpsi[ir] = pref * kj_dot; + // mohan add 2021-05-08, test needed + double kj_dot = 0.0; + for (int ik = 0; ik < nk; ++ik) + { + kj_dot += k_tmp[ik] * jl[ir][ik]; + } + this->psi[ir] = pref * kj_dot; #else - this->psi[ir] = pref * BlasConnector::dot( this->nk, ModuleBase::GlobalFunc::VECTOR_TO_PTR(k_tmp), 1, ModuleBase::GlobalFunc::VECTOR_TO_PTR(jl[ir]), 1 ); + this->psi[ir] = pref + * BlasConnector::dot (this->nk, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (k_tmp), + 1, + ModuleBase::GlobalFunc::VECTOR_TO_PTR (jl[ir]), + 1); #endif - this->psir[ir] = this->psi[ir] * r_radial[ir]; - } + this->psir[ir] = this->psi[ir] * r_radial[ir]; + } } //=============================================== -//FOUND LOCAL VARIABLE -//asum : integral of psi*psi in whole space +// FOUND LOCAL VARIABLE +// asum : integral of psi*psi in whole space //=============================================== /* void Numerical_Orbital_Lm::norm_test(void)const { // ModuleBase::TITLE(ofs_onscaling, "Numerical_Orbital_Lm", "norm_test"); - //double asum_r = 0.0; - //double asum_k = 0.0; - - // note here psir = psi * r - double *f = new double[nr]; - for(int ir=0; irpsir[ir] * this->psir[ir]; - } + //double asum_r = 0.0; + //double asum_k = 0.0; + + // note here psir = psi * r + double *f = new double[nr]; + for(int ir=0; irpsir[ir] * this->psir[ir]; + } - double sumr = 0.0; - //double sumk = 0.0; + double sumr = 0.0; + //double sumk = 0.0; - ModuleBase::Integral::Simpson_Integral(this->nr, f, ModuleBase::GlobalFunc::VECTOR_TO_PTR(this->rab), sumr); + ModuleBase::Integral::Simpson_Integral(this->nr, f, ModuleBase::GlobalFunc::VECTOR_TO_PTR(this->rab), sumr); - delete[] f; - f = new double[nk]; - for(int ik=0; ikpsik[ik] * this->psik[ik]; - } + delete[] f; + f = new double[nk]; + for(int ik=0; ikpsik[ik] * this->psik[ik]; + } // ModuleBase::Integral::Simpson_Integral(this->nk, f, this->k_radial, sumk); - - //means nothing. - //GlobalV::ofs_running << std::setw(12) << sumk << std::endl; - delete[] f; - return; + //means nothing. + //GlobalV::ofs_running << std::setw(12) << sumk << std::endl; + + delete[] f; + return; } */ -void Numerical_Orbital_Lm::plot(void)const +void + Numerical_Orbital_Lm::plot () const { - ModuleBase::TITLE("Numerical_Orbital_Lm","plot"); - - std::string orbital_type; - // Peize Lin update 2016-08-31 - if( 0==this->angular_momentum_l ) - { - orbital_type = 's'; - } - else if( 1==this->angular_momentum_l ) - { - orbital_type = 'p'; - } - else if( 2==this->angular_momentum_l ) - { - orbital_type = 'd'; - } - else if( 3<=this->angular_momentum_l && this->angular_momentum_l<=6 ) - { - orbital_type = 'f' + this->angular_momentum_l - 3; - } - else if( 7<=this->angular_momentum_l && this->angular_momentum_l<=11 ) - { - orbital_type = 'k' + this->angular_momentum_l - 7; - } - else - { - orbital_type = "L" + ModuleBase::GlobalFunc::TO_STRING(this->angular_momentum_l); - } - - if(GlobalV::MY_RANK==0) - { - std::stringstream ssr, ssk, ssru ,ssdru; // 2013-08-10 pengfei - ssr << PARAM.globalv.global_out_dir << this->label << "/" - << this->label << "-"<< orbital_type << index_chi+1 << "-orbital-r.dat"; - - ssk << PARAM.globalv.global_out_dir << this->label << "/" - << this->label << "-" << orbital_type << index_chi+1 << "-orbital-k.dat"; - - ssru << PARAM.globalv.global_out_dir << this->label << "/" - << this->label << "-" << orbital_type << index_chi+1 << "-orbital-ru.dat"; - - ssdru << PARAM.globalv.global_out_dir << this->label << "/" // 2013-08-10 pengfei - << this->label << "-" << orbital_type << index_chi+1 << "-orbital-dru.dat"; - - std::ofstream ofsr(ssr.str().c_str()); - std::ofstream ofsk(ssk.str().c_str()); - std::ofstream ofsru(ssru.str().c_str()); - std::ofstream ofsdru(ssdru.str().c_str()); // 2013-08-10 pengfei - - if (!ofsk || !ofsr || !ofsru || !ofsdru) // 2013-08-10 pengfei - { - ModuleBase::WARNING("Numerical_Orbital_Lm : plot", "Can't open files !"); - } - - for (int i = 0; i < this->nr; i++) - { - ofsr << this->r_radial[i] << " " << psi[i] << std::endl; - } - - for (int i = 0; i < this->nk; i++) - { - ofsk << this->k_radial[i] << " " << psik[i] << std::endl; - } - - for (int i = 0; i < this->nr_uniform; i++) - { - ofsru << this->dr_uniform * i << " " << psi_uniform[i] << std::endl; - } - - for (int i = 0; i < this->nr_uniform; i++) - { - ofsdru << this->dr_uniform * i << " " << dpsi_uniform[i] << std::endl;// output dphi/dr 2013-08-10 pengfei - } - ofsr.close(); - ofsk.close(); - ofsru.close(); - ofsdru.close(); // 13-08-10 pengfei - } - - return; + ModuleBase::TITLE ("Numerical_Orbital_Lm", "plot"); + + std::string orbital_type; + // Peize Lin update 2016-08-31 + if (0 == this->angular_momentum_l) + { + orbital_type = 's'; + } + else if (1 == this->angular_momentum_l) + { + orbital_type = 'p'; + } + else if (2 == this->angular_momentum_l) + { + orbital_type = 'd'; + } + else if (3 <= this->angular_momentum_l && this->angular_momentum_l <= 6) + { + orbital_type = 'f' + this->angular_momentum_l - 3; + } + else if (7 <= this->angular_momentum_l && this->angular_momentum_l <= 11) + { + orbital_type = 'k' + this->angular_momentum_l - 7; + } + else + { + orbital_type = "L" + ModuleBase::GlobalFunc::TO_STRING (this->angular_momentum_l); + } + + if (GlobalV::MY_RANK == 0) + { + std::stringstream ssr, ssk, ssru, ssdru; // 2013-08-10 pengfei + ssr << PARAM.globalv.global_out_dir << this->label << "/" << this->label << "-" << orbital_type + << index_chi + 1 << "-orbital-r.dat"; + + ssk << PARAM.globalv.global_out_dir << this->label << "/" << this->label << "-" << orbital_type + << index_chi + 1 << "-orbital-k.dat"; + + ssru << PARAM.globalv.global_out_dir << this->label << "/" << this->label << "-" << orbital_type + << index_chi + 1 << "-orbital-ru.dat"; + + ssdru << PARAM.globalv.global_out_dir << this->label << "/" // 2013-08-10 pengfei + << this->label << "-" << orbital_type << index_chi + 1 << "-orbital-dru.dat"; + + std::ofstream ofsr (ssr.str ().c_str ()); + std::ofstream ofsk (ssk.str ().c_str ()); + std::ofstream ofsru (ssru.str ().c_str ()); + std::ofstream ofsdru (ssdru.str ().c_str ()); // 2013-08-10 pengfei + + if (!ofsk || !ofsr || !ofsru || !ofsdru) // 2013-08-10 pengfei + { + ModuleBase::WARNING ("Numerical_Orbital_Lm : plot", "Can't open files !"); + } + + for (int i = 0; i < this->nr; i++) + { + ofsr << this->r_radial[i] << " " << psi[i] << std::endl; + } + + for (int i = 0; i < this->nk; i++) + { + ofsk << this->k_radial[i] << " " << psik[i] << std::endl; + } + + for (int i = 0; i < this->nr_uniform; i++) + { + ofsru << this->dr_uniform * i << " " << psi_uniform[i] << std::endl; + } + + for (int i = 0; i < this->nr_uniform; i++) + { + ofsdru << this->dr_uniform * i << " " << dpsi_uniform[i] + << std::endl; // output dphi/dr 2013-08-10 pengfei + } + ofsr.close (); + ofsk.close (); + ofsru.close (); + ofsdru.close (); // 13-08-10 pengfei + } + + return; } diff --git a/source/source_basis/module_ao/ORB_atomic_lm.h b/source/source_basis/module_ao/ORB_atomic_lm.h index 4d013d04453..2a501b37ad8 100644 --- a/source/source_basis/module_ao/ORB_atomic_lm.h +++ b/source/source_basis/module_ao/ORB_atomic_lm.h @@ -1,7 +1,7 @@ //========================================================= -//AUTHOR : liaochen -//DATE : 2008-11-12 -//UPDATE : Peize Lin change all pointer to std::vector 2016-05-14 +// AUTHOR : liaochen +// DATE : 2008-11-12 +// UPDATE : Peize Lin change all pointer to std::vector 2016-05-14 //========================================================= #ifndef NUMERICAL_ORBITAL_LM_H #define NUMERICAL_ORBITAL_LM_H @@ -19,137 +19,280 @@ using std::vector; class Numerical_Orbital_Lm { - friend class Numerical_Orbital; - - public: - - std::vector psi_uniform;// mohan add 2009-5-10 - std::vector dpsi_uniform; //liaochen add 2010/5/11 - std::vector ddpsi_uniform; //wenfei add 2022/7/13 - - int nr_uniform;// mohan add 2009-5-10 - double dr_uniform;// mohan add 2009-5-10 - double zty; ///< the valus of psi at 0. - - Numerical_Orbital_Lm(); - ~Numerical_Orbital_Lm(); - - // Peize Lin add 2017-12-12 - enum class Psi_Type{ Psi, Psif, Psik, Psik2 }; - - /// EXPLAIN : set information about Numerical_Orbital_Lm - void set_orbital_info - ( - const std::string &label_in, - const int &index_atom_type_in, - const int &angular_momentum_l_in, - const int &index_chi_in, - const int &nr_in, - const double *rab_in, - const double *r_radial_in, - const Psi_Type &psi_type, // Peize Lin add 2017-12-12 - const double *psi_in, - const int &nk_in, - const double &dk_in, - // Peize Lin delete lat0 2016-02-03 - const double &dr_uniform, - bool flag_plot, // Peize Lin add flag_plot 2016-08-31 - bool flag_sbpool, // Peize Lin add flag_sbpool 2017-10-02 - const bool &force_flag // mohan add 2021-05-07 - ); - -private: - - void copy_parameter( - const std::string &label_in, - const int &index_atom_type_in, - const int &angular_momentum_l_in, - const int &index_chi_in, - const int &nr_in, - const double *rab_in, - const double *r_radial_in, - const int &nk_in, - const double &dk_in, - const double &dr_uniform_in); - - void cal_kradial(void); - void cal_kradial_sbpool(void); - void cal_rradial_sbpool(void); - //void norm_test()const; - void plot()const; - //void use_uniform(const double &dr_uniform_in); - void extra_uniform(const double &dr_uniform_in, const bool &force_flag); - - std::string label; - int index_atom_type; - int angular_momentum_l; - int index_chi; - - int nr; - int nk; - - double rcut; - double kcut; - double dk; - - std::vector r_radial; /// k_radial; - - std::vector rab; - - std::vector psi; ///< psi(r) - std::vector psir; ///< psi(r) * r - std::vector psif; ///< psi(k) - std::vector psik; ///< psi(k) * k - std::vector psik2; ///< psi(k) * k^2 - -public: - - const std::string& getLabel() const { return label; } - const int& getType() const { return index_atom_type; } - const int& getL() const { return angular_momentum_l; } - const int& getChi() const { return index_chi; } - - const double* getPsiuniform() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi_uniform); } - const double* getDpsiuniform() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(dpsi_uniform); } - const int& getNruniform() const { return nr_uniform; } - const double& getDruniform() const { return dr_uniform; } - - const int& getNr() const { return nr; } - const int& getNk() const { return nk; } - - const double& getRcut() const { return rcut; } - const double& getKcut() const { return kcut; } - - const double* getRadial() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial); } - const std::vector& get_r_radial() const { return r_radial; } - const double& getRadial(const int ir) const { return r_radial[ir]; } - - const double* getRab() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab); } - const std::vector& get_rab() const { return rab; } - const double& getRab(const int ir) const { return rab[ir]; } - - const double& getDk()const { return dk; } - const double* getKpoint() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(k_radial); } - const double& getKpoint(const int ik) const { return k_radial[ik]; } - const std::vector& get_k_radial() const { return k_radial; } - - const double* getPsi() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(psi);} - const double& getPsi(const int ir) const { return psi[ir];} - const std::vector& get_psi() const { return psi; } - const double* getPsi_r() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(psir); } - const double& getPsi_r(const int ir) const { return psir[ir]; } - - const double* getPsif() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(psif); } - const double& getPsif(const int ik) const { return psif[ik]; } - const std::vector& get_psif() const { return psif; } - const double* getPsi_k() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(psik); } - const double& getPsi_k(const int ik) const { return psik[ik]; } - const std::vector& get_psi_k() const { return psik; } - const double* getPsi_k2() const { return ModuleBase::GlobalFunc::VECTOR_TO_PTR(psik2); } - const double& getPsi_k2(const int ik) const { return psik2[ik]; } - const std::vector& get_psi_k2() const { return psik2; } + friend class Numerical_Orbital; + + public: + std::vector psi_uniform; // mohan add 2009-5-10 + std::vector dpsi_uniform; // liaochen add 2010/5/11 + std::vector ddpsi_uniform; // wenfei add 2022/7/13 + + int nr_uniform; // mohan add 2009-5-10 + double dr_uniform; // mohan add 2009-5-10 + double zty; ///< the valus of psi at 0. + + Numerical_Orbital_Lm (); + ~Numerical_Orbital_Lm (); + + // Peize Lin add 2017-12-12 + enum class Psi_Type + { + Psi, + Psif, + Psik, + Psik2 + }; + + /// EXPLAIN : set information about Numerical_Orbital_Lm + void set_orbital_info (const std::string& label_in, + const int& index_atom_type_in, + const int& angular_momentum_l_in, + const int& index_chi_in, + const int& nr_in, + const double* rab_in, + const double* r_radial_in, + const Psi_Type& psi_type, // Peize Lin add 2017-12-12 + const double* psi_in, + const int& nk_in, + const double& dk_in, + // Peize Lin delete lat0 2016-02-03 + const double& dr_uniform, + bool flag_plot, // Peize Lin add flag_plot 2016-08-31 + bool flag_sbpool, // Peize Lin add flag_sbpool 2017-10-02 + const bool& force_flag // mohan add 2021-05-07 + ); + + private: + void copy_parameter (const std::string& label_in, + const int& index_atom_type_in, + const int& angular_momentum_l_in, + const int& index_chi_in, + const int& nr_in, + const double* rab_in, + const double* r_radial_in, + const int& nk_in, + const double& dk_in, + const double& dr_uniform_in); + + void cal_kradial (); + void cal_kradial_sbpool (); + void cal_rradial_sbpool (); + // void norm_test()const; + void plot () const; + // void use_uniform(const double &dr_uniform_in); + void extra_uniform (const double& dr_uniform_in, const bool& force_flag); + + std::string label; + int index_atom_type; + int angular_momentum_l; + int index_chi; + + int nr; + int nk; + + double rcut; + double kcut; + double dk; + + std::vector r_radial; ///< points of r + std::vector k_radial; + + std::vector rab; + + std::vector psi; ///< psi(r) + std::vector psir; ///< psi(r) * r + std::vector psif; ///< psi(k) + std::vector psik; ///< psi(k) * k + std::vector psik2; ///< psi(k) * k^2 + + public: + const std::string& + getLabel () const + { + return label; + } + const int& + getType () const + { + return index_atom_type; + } + const int& + getL () const + { + return angular_momentum_l; + } + const int& + getChi () const + { + return index_chi; + } + + const double* + getPsiuniform () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (psi_uniform); + } + const double* + getDpsiuniform () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (dpsi_uniform); + } + const int& + getNruniform () const + { + return nr_uniform; + } + const double& + getDruniform () const + { + return dr_uniform; + } + + const int& + getNr () const + { + return nr; + } + const int& + getNk () const + { + return nk; + } + + const double& + getRcut () const + { + return rcut; + } + const double& + getKcut () const + { + return kcut; + } + + const double* + getRadial () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (r_radial); + } + const std::vector& + get_r_radial () const + { + return r_radial; + } + const double& + getRadial (const int ir) const + { + return r_radial[ir]; + } + + const double* + getRab () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (rab); + } + const std::vector& + get_rab () const + { + return rab; + } + const double& + getRab (const int ir) const + { + return rab[ir]; + } + + const double& + getDk () const + { + return dk; + } + const double* + getKpoint () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (k_radial); + } + const double& + getKpoint (const int ik) const + { + return k_radial[ik]; + } + const std::vector& + get_k_radial () const + { + return k_radial; + } + + const double* + getPsi () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (psi); + } + const double& + getPsi (const int ir) const + { + return psi[ir]; + } + const std::vector& + get_psi () const + { + return psi; + } + const double* + getPsi_r () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (psir); + } + const double& + getPsi_r (const int ir) const + { + return psir[ir]; + } + + const double* + getPsif () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (psif); + } + const double& + getPsif (const int ik) const + { + return psif[ik]; + } + const std::vector& + get_psif () const + { + return psif; + } + const double* + getPsi_k () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (psik); + } + const double& + getPsi_k (const int ik) const + { + return psik[ik]; + } + const std::vector& + get_psi_k () const + { + return psik; + } + const double* + getPsi_k2 () const + { + return ModuleBase::GlobalFunc::VECTOR_TO_PTR (psik2); + } + const double& + getPsi_k2 (const int ik) const + { + return psik2[ik]; + } + const std::vector& + get_psi_k2 () const + { + return psik2; + } }; #endif - diff --git a/source/source_basis/module_ao/ORB_gaunt_table.cpp b/source/source_basis/module_ao/ORB_gaunt_table.cpp index 416bda7505c..41f6054316a 100644 --- a/source/source_basis/module_ao/ORB_gaunt_table.cpp +++ b/source/source_basis/module_ao/ORB_gaunt_table.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include "ORB_gaunt_table.h" #include "source_base/timer.h" @@ -9,444 +9,512 @@ #include "source_base/constants.h" #include "source_base/math_ylmreal.h" -ORB_gaunt_table::ORB_gaunt_table(){} -ORB_gaunt_table::~ORB_gaunt_table(){} +ORB_gaunt_table::ORB_gaunt_table () {} +ORB_gaunt_table::~ORB_gaunt_table () {} -void ORB_gaunt_table::init_Gaunt(const int &lmax) +void + ORB_gaunt_table::init_Gaunt (const int& lmax) { -//////////////////////////////////////// -/// EXPLAIN : make table of Gaunt Coefficients -//////////////////////////////////////// - ModuleBase::TITLE("ORB_gaunt_table", "init_Gaunt"); - ModuleBase::timer::start("ORB_gaunt_table", "init_Gaunt"); - - this->Lmax_Gaunt_Coefficients = lmax; - const int nlm = (lmax * 2 + 1) * (lmax * 2 + 1); - this->Gaunt_Coefficients.create(nlm, nlm, nlm); - - // Becaful! , L ends at 2*lmax+1 - for (int L = 0; L < 2*lmax + 1; L++) - { - // m order is ( 0,0,1,-1,0,1,-1,2,-2...) - for (int m = 0; m < 2*L + 1 ; m++) + //////////////////////////////////////// + /// EXPLAIN : make table of Gaunt Coefficients + //////////////////////////////////////// + ModuleBase::TITLE ("ORB_gaunt_table", "init_Gaunt"); + ModuleBase::timer::start ("ORB_gaunt_table", "init_Gaunt"); + + this->Lmax_Gaunt_Coefficients = lmax; + const int nlm = (lmax * 2 + 1) * (lmax * 2 + 1); + this->Gaunt_Coefficients.create (nlm, nlm, nlm); + + // Becaful! , L ends at 2*lmax+1 + for (int L = 0; L < 2 * lmax + 1; L++) { - const int dim = this->get_lm_index(L,m); - for (int L1 = 0; L1 < lmax + 1; L1++) - { - for (int m1 = 0; m1 < 2*L1+1 ; m1++) + // m order is ( 0,0,1,-1,0,1,-1,2,-2...) + for (int m = 0; m < 2 * L + 1; m++) { - const int dim1 = this->get_lm_index(L1,m1); - for (int L2 = 0; L2 < lmax + 1; L2++) - { - for (int m2 = 0; m2 < 2*L2+1 ; m2++) + const int dim = this->get_lm_index (L, m); + for (int L1 = 0; L1 < lmax + 1; L1++) { - const int dim2 = this->get_lm_index(L2,m2); - ///////////////////// - ///call Calculate::Cal_G - //////////////////// - - Gaunt_Coefficients(dim1, dim2, dim) = - this->Get_Gaunt_SH (L1, m1, L2, m2, L, m); - }// m2 - }// L2 - }// m1 - }// L1 - }// m - }// L - - ModuleBase::timer::end("ORB_gaunt_table", "init_Gaunt"); + for (int m1 = 0; m1 < 2 * L1 + 1; m1++) + { + const int dim1 = this->get_lm_index (L1, m1); + for (int L2 = 0; L2 < lmax + 1; L2++) + { + for (int m2 = 0; m2 < 2 * L2 + 1; m2++) + { + const int dim2 = this->get_lm_index (L2, m2); + ///////////////////// + /// call Calculate::Cal_G + //////////////////// + + Gaunt_Coefficients (dim1, dim2, dim) + = this->Get_Gaunt_SH (L1, m1, L2, m2, L, m); + } // m2 + } // L2 + } // m1 + } // L1 + } // m + } // L + + ModuleBase::timer::end ("ORB_gaunt_table", "init_Gaunt"); return; } /* double ORB_gaunt_table::Cal_Gaunt_single ( - const int &L1, - const int &m1, - const int &L2, - const int &m2, - const int &L, - const int &m, - const double &s1, - const double &e1, - const double &s2, - const double &e2 + const int &L1, + const int &m1, + const int &L2, + const int &m2, + const int &L, + const int &m, + const double &s1, + const double &e1, + const double &s2, + const double &e2 ) { - ModuleBase::timer::start("ORB_gaunt_table", "Cal_Gaunt_single"); - if ((L1 - L2 - L) % 2 != 0) - { - return 0.0; - } - - double result = 0.0; - static double absc[16] = { - -0.9894009349916499, -0.9445750230732326, -0.8656312023878318, -0.755404408355003, - -0.6178762444026438, -0.4580167776572274, -0.2816035507792589, -0.09501250983763744, - 0.09501250983763744, 0.2816035507792589, 0.4580167776572274, 0.6178762444026438, - 0.755404408355003, 0.8656312023878318, 0.9445750230732326, 0.9894009349916499 }; - - static double weight[16] = { - 0.02715245941175406, 0.06225352393864778, 0.0951585116824929, 0.1246289712555339, - 0.1495959888165768, 0.1691565193950026, 0.1826034150449236, 0.1894506104550685, - 0.1894506104550685, 0.1826034150449236, 0.1691565193950026, 0.1495959888165768, - 0.1246289712555339, 0.0951585116824929, 0.06225352393864778, 0.02715245941175406 }; - - for (int i = 0;i < 16;i++) - { - for (int j = 0;j < 16;j++) - { - double theta = ((s1 + e1) + (e1 - s1) * absc[i]) / 2; - - result += weight[i] * weight[j] * sin(theta) * - this->Ylm_Gaunt( this->get_lm_index(L1, m1), 16 * i + j) * - this->Ylm_Gaunt( this->get_lm_index(L2, m2), 16 * i + j) * - this->Ylm_Gaunt( this->get_lm_index(L, m), 16 * i + j); - } - } - - result *= ((e1 - s1) / 2) * ((e2 - s2) / 2); - ModuleBase::timer::end("ORB_gaunt_table", "Cal_Gaunt_single"); - return result; + ModuleBase::timer::start("ORB_gaunt_table", "Cal_Gaunt_single"); + if ((L1 - L2 - L) % 2 != 0) + { + return 0.0; + } + + double result = 0.0; + static double absc[16] = { + -0.9894009349916499, -0.9445750230732326, -0.8656312023878318, -0.755404408355003, + -0.6178762444026438, -0.4580167776572274, -0.2816035507792589, -0.09501250983763744, + 0.09501250983763744, 0.2816035507792589, 0.4580167776572274, 0.6178762444026438, + 0.755404408355003, 0.8656312023878318, 0.9445750230732326, 0.9894009349916499 }; + + static double weight[16] = { + 0.02715245941175406, 0.06225352393864778, 0.0951585116824929, 0.1246289712555339, + 0.1495959888165768, 0.1691565193950026, 0.1826034150449236, 0.1894506104550685, + 0.1894506104550685, 0.1826034150449236, 0.1691565193950026, 0.1495959888165768, + 0.1246289712555339, 0.0951585116824929, 0.06225352393864778, 0.02715245941175406 }; + + for (int i = 0;i < 16;i++) + { + for (int j = 0;j < 16;j++) + { + double theta = ((s1 + e1) + (e1 - s1) * absc[i]) / 2; + + result += weight[i] * weight[j] * sin(theta) * + this->Ylm_Gaunt( this->get_lm_index(L1, m1), 16 * i + j) * + this->Ylm_Gaunt( this->get_lm_index(L2, m2), 16 * i + j) * + this->Ylm_Gaunt( this->get_lm_index(L, m), 16 * i + j); + } + } + + result *= ((e1 - s1) / 2) * ((e2 - s2) / 2); + ModuleBase::timer::end("ORB_gaunt_table", "Cal_Gaunt_single"); + return result; } */ /* void ORB_gaunt_table::init_Ylm_Gaunt ( - const int &lmax, - const double &s1, - const double &e1, - const double &s2, - const double &e2 + const int &lmax, + const double &s1, + const double &e1, + const double &s2, + const double &e2 ) { - ModuleBase::TITLE("ORB_gaunt_table", "init_Ylm_Gaunt"); - ModuleBase::timer::start("ORB_gaunt_table", "inite_Ylm_Gaunt"); + ModuleBase::TITLE("ORB_gaunt_table", "init_Ylm_Gaunt"); + ModuleBase::timer::start("ORB_gaunt_table", "inite_Ylm_Gaunt"); - const int nlm = (2*lmax+1) * (2*lmax+1); + const int nlm = (2*lmax+1) * (2*lmax+1); - static double absc[16] = { - -0.9894009349916499, -0.9445750230732326, -0.8656312023878318, -0.755404408355003, - -0.6178762444026438, -0.4580167776572274, -0.2816035507792589, -0.09501250983763744, - 0.09501250983763744, 0.2816035507792589, 0.4580167776572274, 0.6178762444026438, - 0.755404408355003, 0.8656312023878318, 0.9445750230732326, 0.9894009349916499 }; + static double absc[16] = { + -0.9894009349916499, -0.9445750230732326, -0.8656312023878318, -0.755404408355003, + -0.6178762444026438, -0.4580167776572274, -0.2816035507792589, -0.09501250983763744, + 0.09501250983763744, 0.2816035507792589, 0.4580167776572274, 0.6178762444026438, + 0.755404408355003, 0.8656312023878318, 0.9445750230732326, 0.9894009349916499 }; - //initialization of ylm_map + //initialization of ylm_map - ModuleBase::Vector3 g_gaunt[256]; + ModuleBase::Vector3 g_gaunt[256]; - this->Ylm_Gaunt.create(nlm , 256); + this->Ylm_Gaunt.create(nlm , 256); - for (int i = 0; i < 16; i++) - { - for (int j = 0; j < 16; j++) - { - const double theta = ((s1 + e1) + (e1 - s1) * absc[i]) / 2; - const double phi = ((s2 + e2) + (e2 - s2) * absc[j]) / 2; - ModuleBase::Vector3 u(sin(theta) * cos(phi), sin(theta) * sin(phi), cos(theta)); - g_gaunt[16*i+j] = u; - } - } + for (int i = 0; i < 16; i++) + { + for (int j = 0; j < 16; j++) + { + const double theta = ((s1 + e1) + (e1 - s1) * absc[i]) / 2; + const double phi = ((s2 + e2) + (e2 - s2) * absc[j]) / 2; + ModuleBase::Vector3 u(sin(theta) * cos(phi), sin(theta) * sin(phi), cos(theta)); + g_gaunt[16*i+j] = u; + } + } - ModuleBase::YlmReal::Ylm_Real(nlm, 256, &g_gaunt[0], this->Ylm_Gaunt); + ModuleBase::YlmReal::Ylm_Real(nlm, 256, &g_gaunt[0], this->Ylm_Gaunt); - ModuleBase::timer::start("ORB_gaunt_table", "init_Ylm_Gaunt"); - return; + ModuleBase::timer::start("ORB_gaunt_table", "init_Ylm_Gaunt"); + return; } */ - -///effective pointers -int ORB_gaunt_table::EP_EL(const int& L) +/// effective pointers +int + ORB_gaunt_table::EP_EL (const int& L) { - if(L % 2 == 0) return (L+2) * (L+4) * (3*L*L+14*L+24) / 192; - else return (L+1) * (L+3) * (L+5) * (3*L+5) / 192; + if (L % 2 == 0) + { + return (L + 2) * (L + 4) * (3 * L * L + 14 * L + 24) / 192; + } + else + { + return (L + 1) * (L + 3) * (L + 5) * (3 * L + 5) / 192; + } } - -int ORB_gaunt_table::index_func -( - const int& l1, - const int& l2, - const int& l3, - const int& m3 -) +int + ORB_gaunt_table::index_func (const int& l1, const int& l2, const int& l3, const int& m3) { - const int aux1 = l1*(l1*l1*l1+6*l1*l1+11*l1+6)/24; - const int aux2 = l2*(l2*l2+3*l2+2)/6; - const int aux3 = l3*(l3+1)/2; - - return aux1 + aux2 + aux3 + m3; -} + const int aux1 = l1 * (l1 * l1 * l1 + 6 * l1 * l1 + 11 * l1 + 6) / 24; + const int aux2 = l2 * (l2 * l2 + 3 * l2 + 2) / 6; + const int aux3 = l3 * (l3 + 1) / 2; + return aux1 + aux2 + aux3 + m3; +} -void ORB_gaunt_table::init_Gaunt_CH(const int& Lmax) +void + ORB_gaunt_table::init_Gaunt_CH (const int& Lmax) { - ModuleBase::TITLE("ORB_gaunt_table","init_Gaunt_CH"); - ModuleBase::timer::start("ORB_gaunt_table","init_Gaunt_CH"); - - this->Lmax_Gaunt_CH = Lmax; - int L = 2*Lmax + 1; - int Eff_Np = this->EP_EL(L); - - ModuleBase::Memory::record("ORB::Gaunt_CH", sizeof(double) * Eff_Np * 30); - - int ic1 = 0; - for(int l1 = 0; l1 <= L; l1++) - { - for(int l2 = 0; l2 <= l1; l2++) - { - for(int l3 = 0; l3 <= l2; l3++) - { - for(int m3 = 0; m3 <= l3; m3++) - { - int idx = index_func(l1, l2, l3, m3); - assert(ic1 == idx); - - int l_sum = l1 + l2 + l3; - if((l_sum % 2 == 0) && (l2 + l3 >= l1)) - { - int uplmt_m2 = l1 - m3 > l2 ? l2 : l1 - m3; - - int ic2 = 0; - for(int m2 = -l2; m2 <= uplmt_m2; m2++) - { - //m1 + m2 + m3 == 0 - int m1 = -m2 - m3; - assert(std::abs(m1) <= l1); - - Gaunt_CH[ic1][ic2] = Calc_Gaunt_CH(l1, m1, l2, m2, l3, m3); - ic2++; - } - } - - ic1++; - }// m3 - }// l3 - }// l2 - } // l1 - - ModuleBase::timer::end("ORB_gaunt_table","init_Gaunt_CH"); - return; -} + ModuleBase::TITLE ("ORB_gaunt_table", "init_Gaunt_CH"); + ModuleBase::timer::start ("ORB_gaunt_table", "init_Gaunt_CH"); + this->Lmax_Gaunt_CH = Lmax; + int L = 2 * Lmax + 1; + int Eff_Np = this->EP_EL (L); -//using wigner 3j expression -double ORB_gaunt_table::Calc_Gaunt_CH -( - const int& l1, - const int& m1, - const int& l2, - const int& m2, - const int& l3, - const int& m3 -) + ModuleBase::Memory::record ("ORB::Gaunt_CH", sizeof (double) * Eff_Np * 30); + + int ic1 = 0; + for (int l1 = 0; l1 <= L; l1++) + { + for (int l2 = 0; l2 <= l1; l2++) + { + for (int l3 = 0; l3 <= l2; l3++) + { + for (int m3 = 0; m3 <= l3; m3++) + { + int idx = index_func (l1, l2, l3, m3); + assert (ic1 == idx); + + int l_sum = l1 + l2 + l3; + if ((l_sum % 2 == 0) && (l2 + l3 >= l1)) + { + int uplmt_m2 = l1 - m3 > l2 ? l2 : l1 - m3; + + int ic2 = 0; + for (int m2 = -l2; m2 <= uplmt_m2; m2++) + { + // m1 + m2 + m3 == 0 + int m1 = -m2 - m3; + assert (std::abs (m1) <= l1); + + Gaunt_CH[ic1][ic2] = Calc_Gaunt_CH (l1, m1, l2, m2, l3, m3); + ic2++; + } + } + + ic1++; + } // m3 + } // l3 + } // l2 + } // l1 + + ModuleBase::timer::end ("ORB_gaunt_table", "init_Gaunt_CH"); + return; +} + +// using wigner 3j expression +double + ORB_gaunt_table::Calc_Gaunt_CH (const int& l1, + const int& m1, + const int& l2, + const int& m2, + const int& l3, + const int& m3) { - ModuleBase::timer::start("ORB_gaunt_table","Calc_Gaunt_CH"); - - double fac = sqrt((2*l1+1) * (2*l2+1) * (2*l3+1) / ModuleBase::FOUR_PI); + ModuleBase::timer::start ("ORB_gaunt_table", "Calc_Gaunt_CH"); + + double fac = sqrt ((2 * l1 + 1) * (2 * l2 + 1) * (2 * l3 + 1) / ModuleBase::FOUR_PI); - int g = (l1+l2+l3)/2; - double triangle_f = sqrt( Fact(l1+l2-l3) * Fact(l1-l2+l3) * Fact(-l1+l2+l3) / Fact(2*g+1) ); + int g = (l1 + l2 + l3) / 2; + double triangle_f = sqrt (Fact (l1 + l2 - l3) * Fact (l1 - l2 + l3) * Fact (-l1 + l2 + l3) / Fact (2 * g + 1)); - fac *= pow(-1.0, g) * triangle_f * Fact(g) / Fact(g-l1) / Fact(g-l2) / Fact(g-l3); + fac *= pow (-1.0, g) * triangle_f * Fact (g) / Fact (g - l1) / Fact (g - l2) / Fact (g - l3); - double aux1 = sqrt(Fact(l1+m1) * Fact(l1-m1) * Fact(l2+m2) * Fact(l2-m2) * Fact(l3+m3) * Fact(l3-m3)); + double aux1 + = sqrt (Fact (l1 + m1) * Fact (l1 - m1) * Fact (l2 + m2) * Fact (l2 - m2) * Fact (l3 + m3) * Fact (l3 - m3)); - int kmin = 0; + int kmin = 0; int kmax = 0; - - kmin = (l2-l3-m1) > (l1-l3+m2) ? (l2-l3-m1) : (l1-l3+m2); - kmin = kmin > 0 ? kmin : 0; - - kmax = (l1+l2-l3) > (l1-m1) ? (l1-m1) : (l1+l2-l3); - kmax = kmax > (l2+m2) ? (l2+m2) : kmax; - - double aux2 = 0.0; - for(int k = kmin; k <= kmax; k++) - { - aux2 += pow(-1.0, k) / Fact(k) / Fact(l1+l2-l3-k) / Fact(l1-m1-k) / Fact(l2+m2-k) - / Fact(l3-l2+m1+k) / Fact(l3-l1+k-m2); - } - - ModuleBase::timer::end("ORB_gaunt_table","Calc_Gaunt_CH"); - return fac * pow(-1.0, l1-l2-m3) * triangle_f * aux1 * aux2; + + kmin = (l2 - l3 - m1) > (l1 - l3 + m2) ? (l2 - l3 - m1) : (l1 - l3 + m2); + kmin = kmin > 0 ? kmin : 0; + + kmax = (l1 + l2 - l3) > (l1 - m1) ? (l1 - m1) : (l1 + l2 - l3); + kmax = kmax > (l2 + m2) ? (l2 + m2) : kmax; + + double aux2 = 0.0; + for (int k = kmin; k <= kmax; k++) + { + aux2 += pow (-1.0, k) / Fact (k) / Fact (l1 + l2 - l3 - k) / Fact (l1 - m1 - k) / Fact (l2 + m2 - k) + / Fact (l3 - l2 + m1 + k) / Fact (l3 - l1 + k - m2); + } + + ModuleBase::timer::end ("ORB_gaunt_table", "Calc_Gaunt_CH"); + return fac * pow (-1.0, l1 - l2 - m3) * triangle_f * aux1 * aux2; } - -double ORB_gaunt_table::Get_Gaunt_CH -( - const int& l1, - const int& m1, - const int& l2, - const int& m2, - const int& l3, - const int& m3 -) +double + ORB_gaunt_table::Get_Gaunt_CH (const int& l1, + const int& m1, + const int& l2, + const int& m2, + const int& l3, + const int& m3) { - assert(l1 >= 0); - assert(l2 >= 0); - assert(l3 >= 0); - - int l_sum = l1 + l2 + l3; - - if(l_sum % 2 == 1) return 0.0; - - if(std::abs(m1) > l1 || std::abs(m2) > l2 || std::abs(m3) > l3) return 0.0; - - if( (m1 + m2 + m3) != 0) return 0.0; + assert (l1 >= 0); + assert (l2 >= 0); + assert (l3 >= 0); + + int l_sum = l1 + l2 + l3; + + if (l_sum % 2 == 1) + { + return 0.0; + } + + if (std::abs (m1) > l1 || std::abs (m2) > l2 || std::abs (m3) > l3) + { + return 0.0; + } + + if ((m1 + m2 + m3) != 0) + { + return 0.0; + } int L1 = l1; - int M1 = m1; - int L2 = l2; + int M1 = m1; + int L2 = l2; int M2 = m2; - Swap(L1, M1, L2, M2); - - int L3 = l3; - int M3 = m3; - Swap(L1, M1, L3, M3); - - Swap(L2, M2, L3, M3); - - if(M3 < 0) - { - M1 = -M1; - M2 = -M2; - M3 = -M3; - } - - int ic1 = index_func(L1, L2, L3, M3); - int ic2 = M2 + L2; - - try - { - return Gaunt_CH.at(ic1).at(ic2); - } // Peize Lin add 2016-08-26 - catch( std::out_of_range ) - { - return 0; - } + Swap (L1, M1, L2, M2); + + int L3 = l3; + int M3 = m3; + Swap (L1, M1, L3, M3); + + Swap (L2, M2, L3, M3); + + if (M3 < 0) + { + M1 = -M1; + M2 = -M2; + M3 = -M3; + } + + int ic1 = index_func (L1, L2, L3, M3); + int ic2 = M2 + L2; + + try + { + return Gaunt_CH.at (ic1).at (ic2); + } // Peize Lin add 2016-08-26 + catch (std::out_of_range) + { + return 0; + } } - -///Input value, -///m1, m2, m3 are restricted within 0 to 2l+1, -///and should be transformed first. -double ORB_gaunt_table::Get_Gaunt_SH -( - const int& l1, - const int& mm1, - const int& l2, - const int& mm2, - const int& l3, - const int& mm3 -) +/// Input value, +/// m1, m2, m3 are restricted within 0 to 2l+1, +/// and should be transformed first. +double + ORB_gaunt_table::Get_Gaunt_SH (const int& l1, + const int& mm1, + const int& l2, + const int& mm2, + const int& l3, + const int& mm3) { - - //Tranform M index - int m1 = Index_M(mm1); - int m2 = Index_M(mm2); - int m3 = Index_M(mm3); - - if(m1 >= 0 && m2 >= 0 && m3 >= 0) - { - if(m1 * m2 * m3 > 0) - { - if(m1 == m2 + m3) return pow(-1.0, m1) * sqrt(2.0) / 2.0 - * Get_Gaunt_CH(l1, -m1, l2, m2, l3, m3); - else if(m2 == m1 + m3) return pow(-1.0, m2) * sqrt(2.0) / 2.0 - * Get_Gaunt_CH(l1, m1, l2, -m2, l3, m3); - else if(m3 == m1 + m2) return pow(-1.0, m3) * sqrt(2.0) / 2.0 - * Get_Gaunt_CH(l1, m1, l2, m2, l3, -m3); - else return 0.0; - } - else - { - if(m1 == 0 && m2 == 0 && m3 == 0) return Get_Gaunt_CH(l1, 0, l2, 0, l3, 0); - else if( (m1 == 0) && (m2 == m3)) return pow(-1.0, m2) * Get_Gaunt_CH(l1, 0, l2, m2, l3, -m2); - else if( (m2 == 0) && (m3 == m1)) return pow(-1.0, m1) * Get_Gaunt_CH(l2, 0, l1, m1, l3, -m1); - else if( (m3 == 0) && (m1 == m2)) return pow(-1.0, m2) * Get_Gaunt_CH(l3, 0, l2, m2, l1, -m2); - else return 0.0; - } - } - else - { - if(m1 >= 0 && m2 < 0 && m3 < 0) - { - if((m1 == 0) && (m2 == m3)) return pow(-1.0, m2) * Get_Gaunt_CH(l1, 0, l2, m2, l3, -m2); - else if(m1 > 0 && (m2 == m1+m3)) - return pow(-1.0, m3) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l1, m1, l2, -m2, l3, m3); - else if(m1 > 0 && (m3 == m1+m2)) - return pow(-1.0, m2) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l1, m1, l2, m2, l3, -m3); - else if(m1 > 0 && ( (m1+m2+m3) == 0)) - return pow(-1.0, m1+1) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l1, m1, l2, m2, l3, m3); - else return 0.0; - } - else if(m2 >= 0 && m1 < 0 && m3 < 0) - { - if((m2 == 0) && (m1 == m3)) return pow(-1.0, m1) * Get_Gaunt_CH(l2, 0, l1, m1, l3, -m1); - else if(m2 > 0 && (m1 == (m2 + m3))) - return pow(-1.0, m3) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l2, m2, l1, -m1, l3, m3); - else if(m2 > 0 && (m3 == (m2 + m1))) - return pow(-1.0, m1) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l2, m2, l1, m1, l3, -m3); - else if(m2 > 0 && ((m1+m2+m3) == 0)) - return pow(-1.0, m2+1) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l2, m2, l1, m1, l3, m3); - else return 0.0; - - } - else if(m3 >= 0 && m1 < 0 && m2 < 0) - { - if((m3 == 0) && (m1 == m2)) return pow(-1.0, m1) * Get_Gaunt_CH(l3, 0, l1, m1, l2, -m1); - else if(m3 > 0 && (m1 == m3+m2)) - return pow(-1.0, m2) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l3, m3, l1, -m1, l2, m2); - else if(m3 > 0 && (m2 == m3+m1)) - return pow(-1.0, m1) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l3, m3, l1, m1, l2, -m2); - else if(m3 > 0 && ( (m1+m2+m3) == 0)) - return pow(-1.0, m3+1) * sqrt(2.0) / 2.0 * Get_Gaunt_CH(l3, m3, l1, m1, l2, m2); - else return 0.0; - } - else return 0.0; - } -} + // Tranform M index + int m1 = Index_M (mm1); + int m2 = Index_M (mm2); + int m3 = Index_M (mm3); + if (m1 >= 0 && m2 >= 0 && m3 >= 0) + { + if (m1 * m2 * m3 > 0) + { + if (m1 == m2 + m3) + { + return pow (-1.0, m1) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l1, -m1, l2, m2, l3, m3); + } + else if (m2 == m1 + m3) + { + return pow (-1.0, m2) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l1, m1, l2, -m2, l3, m3); + } + else if (m3 == m1 + m2) + { + return pow (-1.0, m3) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l1, m1, l2, m2, l3, -m3); + } + else + { + return 0.0; + } + } + else + { + if (m1 == 0 && m2 == 0 && m3 == 0) + { + return Get_Gaunt_CH (l1, 0, l2, 0, l3, 0); + } + else if ((m1 == 0) && (m2 == m3)) + { + return pow (-1.0, m2) * Get_Gaunt_CH (l1, 0, l2, m2, l3, -m2); + } + else if ((m2 == 0) && (m3 == m1)) + { + return pow (-1.0, m1) * Get_Gaunt_CH (l2, 0, l1, m1, l3, -m1); + } + else if ((m3 == 0) && (m1 == m2)) + { + return pow (-1.0, m2) * Get_Gaunt_CH (l3, 0, l2, m2, l1, -m2); + } + else + { + return 0.0; + } + } + } + else + { + if (m1 >= 0 && m2 < 0 && m3 < 0) + { + if ((m1 == 0) && (m2 == m3)) + { + return pow (-1.0, m2) * Get_Gaunt_CH (l1, 0, l2, m2, l3, -m2); + } + else if (m1 > 0 && (m2 == m1 + m3)) + { + return pow (-1.0, m3) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l1, m1, l2, -m2, l3, m3); + } + else if (m1 > 0 && (m3 == m1 + m2)) + { + return pow (-1.0, m2) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l1, m1, l2, m2, l3, -m3); + } + else if (m1 > 0 && ((m1 + m2 + m3) == 0)) + { + return pow (-1.0, m1 + 1) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l1, m1, l2, m2, l3, m3); + } + else + { + return 0.0; + } + } + else if (m2 >= 0 && m1 < 0 && m3 < 0) + { + if ((m2 == 0) && (m1 == m3)) + { + return pow (-1.0, m1) * Get_Gaunt_CH (l2, 0, l1, m1, l3, -m1); + } + else if (m2 > 0 && (m1 == (m2 + m3))) + { + return pow (-1.0, m3) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l2, m2, l1, -m1, l3, m3); + } + else if (m2 > 0 && (m3 == (m2 + m1))) + { + return pow (-1.0, m1) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l2, m2, l1, m1, l3, -m3); + } + else if (m2 > 0 && ((m1 + m2 + m3) == 0)) + { + return pow (-1.0, m2 + 1) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l2, m2, l1, m1, l3, m3); + } + else + { + return 0.0; + } + } + else if (m3 >= 0 && m1 < 0 && m2 < 0) + { + if ((m3 == 0) && (m1 == m2)) + { + return pow (-1.0, m1) * Get_Gaunt_CH (l3, 0, l1, m1, l2, -m1); + } + else if (m3 > 0 && (m1 == m3 + m2)) + { + return pow (-1.0, m2) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l3, m3, l1, -m1, l2, m2); + } + else if (m3 > 0 && (m2 == m3 + m1)) + { + return pow (-1.0, m1) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l3, m3, l1, m1, l2, -m2); + } + else if (m3 > 0 && ((m1 + m2 + m3) == 0)) + { + return pow (-1.0, m3 + 1) * sqrt (2.0) / 2.0 * Get_Gaunt_CH (l3, m3, l1, m1, l2, m2); + } + else + { + return 0.0; + } + } + else + { + return 0.0; + } + } +} -double ORB_gaunt_table::Fact(const int& n) +double + ORB_gaunt_table::Fact (const int& n) { - double val = 1.0; - for(int i = 1; i <= n; i++) - { - val *= static_cast(i); - } - return val; + double val = 1.0; + for (int i = 1; i <= n; i++) + { + val *= static_cast (i); + } + return val; } - -void ORB_gaunt_table::Swap( - int& l1, - int& m1, - int& l2, - int & m2) +void + ORB_gaunt_table::Swap (int& l1, int& m1, int& l2, int& m2) { - int tmp1=0, tmp2=0; - if(l1 >= l2) return; - else - { - tmp1 = l2; - tmp2 = m2; - - l2 = l1; - m2 = m1; - - l1 = tmp1; - m1 = tmp2; - } - return; -} + int tmp1 = 0, tmp2 = 0; + if (l1 >= l2) + { + return; + } + else + { + tmp1 = l2; + tmp2 = m2; + l2 = l1; + m2 = m1; -int ORB_gaunt_table::Index_M(const int& m) + l1 = tmp1; + m1 = tmp2; + } + return; +} + +int + ORB_gaunt_table::Index_M (const int& m) { - if(m % 2 == 0) return (- m / 2); - else return ((m+1) / 2); + if (m % 2 == 0) + { + return (-m / 2); + } + else + { + return ((m + 1) / 2); + } } diff --git a/source/source_basis/module_ao/ORB_gaunt_table.h b/source/source_basis/module_ao/ORB_gaunt_table.h index 0e9aefdb701..bae65e81f37 100644 --- a/source/source_basis/module_ao/ORB_gaunt_table.h +++ b/source/source_basis/module_ao/ORB_gaunt_table.h @@ -7,126 +7,106 @@ class ORB_gaunt_table { - public: - - ORB_gaunt_table(); - ~ORB_gaunt_table(); - - /** - * Method 2: - * using WIgner 3j symbols - * \f$ Y(l1,m1), Y(l2,m2), Y(L,M) \f$ - */ - - void init_Gaunt_CH(const int& Lmax); - double Get_Gaunt_CH( - const int& l1, - const int& m1, - const int& l2, - const int& m2, - const int& l3, - const int& m3 ); - - ///M defined here are restricted within 0 to 2l+1 - /// - ///should be transformed first - double Get_Gaunt_SH( - const int& l1, - const int& mm1, - const int& l2, - const int& mm2, - const int& l3, - const int& mm3 ); - - double Calc_Gaunt_CH( - const int& l1, - const int& m1, - const int& l2, - const int& m2, - const int& l3, - const int& m3 ); - - - /** - * MEthod 2 - * - * Directly Calculate integral of - * \f$ S(l_1,m_1), S(l_2,m_2), S(L,M) \f$ - */ - ModuleBase::realArray Gaunt_Coefficients; - - /// (1) Make Ylm_Gaunt Table. - ///---------------- + public: + ORB_gaunt_table (); + ~ORB_gaunt_table (); + + /** + * Method 2: + * using WIgner 3j symbols + * \f$ Y(l1,m1), Y(l2,m2), Y(L,M) \f$ + */ + + void init_Gaunt_CH (const int& Lmax); + double Get_Gaunt_CH (const int& l1, const int& m1, const int& l2, const int& m2, const int& l3, const int& m3); + + /// M defined here are restricted within 0 to 2l+1 + /// + /// should be transformed first + double Get_Gaunt_SH (const int& l1, const int& mm1, const int& l2, const int& mm2, const int& l3, const int& mm3); + + double Calc_Gaunt_CH (const int& l1, const int& m1, const int& l2, const int& m2, const int& l3, const int& m3); + + /** + * MEthod 2 + * + * Directly Calculate integral of + * \f$ S(l_1,m_1), S(l_2,m_2), S(L,M) \f$ + */ + ModuleBase::realArray Gaunt_Coefficients; + + /// (1) Make Ylm_Gaunt Table. + ///---------------- /* - void init_Ylm_Gaunt( - const int &lmax, - const double &s1, - const double &e1, - const double &s2, - const double &e2); + void init_Ylm_Gaunt( + const int &lmax, + const double &s1, + const double &e1, + const double &s2, + const double &e2); */ - /// (2) Use Ylm_Gaunt to calculate Gaunt Coefficinets element - ///------ + /// (2) Use Ylm_Gaunt to calculate Gaunt Coefficinets element + ///------ /* - double Cal_Gaunt_single( - const int &l1, - const int &m1, - const int &l2, - const int &m2, - const int &l, - const int &m, - const double &s1, - const double &e1, - const double &s2, - const double &e2); + double Cal_Gaunt_single( + const int &l1, + const int &m1, + const int &l2, + const int &m2, + const int &l, + const int &m, + const double &s1, + const double &e1, + const double &s2, + const double &e2); */ - /// (3) Make the whole Gaunt Coefficients table - /// ------------------------------------ - void init_Gaunt(const int &lmax); - - static inline int get_lm_index(const int l, const int m) - { - return l*l+m; - } - - static int Index_M(const int& m); - - int get_Lmax_Gaunt_Coefficients() const { return Lmax_Gaunt_Coefficients; } - int get_Lmax_Gaunt_CH() const { return Lmax_Gaunt_CH; } - - private: - - // Index Function - // Yu's mehtod - // Peize Lin delete void ModuleBase::GlobalFunc::ZEROS(); 2016-08-26 - - //int P_EL(const int& L); - - int EP_EL(const int& L); - - int index_func( - const int& l1, - const int& l2, - const int& l3, - const int& m3 ); - - double Fact(const int& n); - - void Swap( - int& l1, - int& m1, - int& l2, - int& m2 ); - - //2*Lmax+1 - std::map> Gaunt_CH; // Peize Lin update 2016-08-26 - - //direct integral - ModuleBase::matrix Ylm_Gaunt; - - int Lmax_Gaunt_Coefficients = -1; - int Lmax_Gaunt_CH = -1; + /// (3) Make the whole Gaunt Coefficients table + /// ------------------------------------ + void init_Gaunt (const int& lmax); + + static inline int + get_lm_index (const int l, const int m) + { + return l * l + m; + } + + static int Index_M (const int& m); + + int + get_Lmax_Gaunt_Coefficients () const + { + return Lmax_Gaunt_Coefficients; + } + int + get_Lmax_Gaunt_CH () const + { + return Lmax_Gaunt_CH; + } + + private: + // Index Function + // Yu's mehtod + // Peize Lin delete void ModuleBase::GlobalFunc::ZEROS(); 2016-08-26 + + // int P_EL(const int& L); + + int EP_EL (const int& L); + + int index_func (const int& l1, const int& l2, const int& l3, const int& m3); + + double Fact (const int& n); + + void Swap (int& l1, int& m1, int& l2, int& m2); + + // 2*Lmax+1 + std::map> Gaunt_CH; // Peize Lin update 2016-08-26 + + // direct integral + ModuleBase::matrix Ylm_Gaunt; + + int Lmax_Gaunt_Coefficients = -1; + int Lmax_Gaunt_CH = -1; }; #endif diff --git a/source/source_basis/module_ao/ORB_nonlocal.cpp b/source/source_basis/module_ao/ORB_nonlocal.cpp index d8775e0b367..45348b5df7b 100644 --- a/source/source_basis/module_ao/ORB_nonlocal.cpp +++ b/source/source_basis/module_ao/ORB_nonlocal.cpp @@ -1,68 +1,62 @@ #include "ORB_nonlocal.h" #include "source_base/global_function.h" -Numerical_Nonlocal::Numerical_Nonlocal() +Numerical_Nonlocal::Numerical_Nonlocal () { - //make std::pair of new and delete - //question remains - this->type = 0; - this->lmax = 0; - this->rcut_max = 0.0; - this->Proj = new Numerical_Nonlocal_Lm[1]; - this->nproj = -1; - //zhengdy-soc, for optimize nonlocal part + // make std::pair of new and delete + // question remains + this->type = 0; + this->lmax = 0; + this->rcut_max = 0.0; + this->Proj = new Numerical_Nonlocal_Lm[1]; + this->nproj = -1; + // zhengdy-soc, for optimize nonlocal part } -Numerical_Nonlocal::~Numerical_Nonlocal() -{ - delete[] Proj; -} +Numerical_Nonlocal::~Numerical_Nonlocal () { delete[] Proj; } -void Numerical_Nonlocal::set_type_info -( - const int& type_in, - const std::string& label_in, - const std::string& type_ps_in, - const int& lmax_in, - const int& nproj_in, - const Numerical_Nonlocal_Lm* Proj_in -) +void + Numerical_Nonlocal::set_type_info (const int& type_in, + const std::string& label_in, + const std::string& type_ps_in, + const int& lmax_in, + const int& nproj_in, + const Numerical_Nonlocal_Lm* Proj_in) { - //ModuleBase::TITLE("Numerical_Nonlocal","set_type_info"); - - this->type = type_in; - this->label = label_in; - this->type_ps = type_ps_in; - - if (lmax_in < -1 || lmax_in > 20) - { - ModuleBase::WARNING_QUIT("Numerical_Nonlocal", "bad input of lmax : should be between -1 and 20"); - } - - this->lmax = lmax_in; - - this->nproj = nproj_in; - - assert(nproj >= 0); - -//---------------------------------------------------------- -// EXPLAIN : non_local pseudopotential projector for each l -//---------------------------------------------------------- - //only store radial function - delete[] Proj; - this->Proj = nullptr; - this->Proj = new Numerical_Nonlocal_Lm[this->nproj]; - - for (int p1=0; p1Proj[p1] = Proj_in[p1]; - } - - this->rcut_max = 0.0; - for(int p1=0; p1rcut_max = std::max( this->Proj[p1].getRcut(), rcut_max ); - } - return; + // ModuleBase::TITLE("Numerical_Nonlocal","set_type_info"); + + this->type = type_in; + this->label = label_in; + this->type_ps = type_ps_in; + + if (lmax_in < -1 || lmax_in > 20) + { + ModuleBase::WARNING_QUIT ("Numerical_Nonlocal", "bad input of lmax : should be between -1 and 20"); + } + + this->lmax = lmax_in; + + this->nproj = nproj_in; + + assert (nproj >= 0); + + //---------------------------------------------------------- + // EXPLAIN : non_local pseudopotential projector for each l + //---------------------------------------------------------- + // only store radial function + delete[] Proj; + this->Proj = nullptr; + this->Proj = new Numerical_Nonlocal_Lm[this->nproj]; + + for (int p1 = 0; p1 < nproj; p1++) + { + this->Proj[p1] = Proj_in[p1]; + } + + this->rcut_max = 0.0; + for (int p1 = 0; p1 < nproj; p1++) + { + this->rcut_max = std::max (this->Proj[p1].getRcut (), rcut_max); + } + return; } - diff --git a/source/source_basis/module_ao/ORB_nonlocal.h b/source/source_basis/module_ao/ORB_nonlocal.h index 1204dd86912..67271e14f5b 100644 --- a/source/source_basis/module_ao/ORB_nonlocal.h +++ b/source/source_basis/module_ao/ORB_nonlocal.h @@ -7,7 +7,7 @@ * \class Numerical_Nonlocal *CLASS Numerical_Nonlocal *---------------------------- - * Note : contain nonlocal ps(:pseudopotential) information + * Note : contain nonlocal ps(:pseudopotential) information * about atoms * * Feature : set and store information about ps infomation @@ -19,47 +19,66 @@ */ class Numerical_Nonlocal { -public: - - Numerical_Nonlocal(); - ~Numerical_Nonlocal(); - - const int& getLmax() const { return this->lmax; } - - const int& getType() const { return this->type; } - - const std::string& getLabel() const { return this->label; } - - const std::string& getType_ps() const { return this->type_ps; } - - - void set_type_info( - const int& type_in, - const std::string& label_in, - const std::string& type_ps_in, - const int& lmax_in, - const int& nproj_in, - const Numerical_Nonlocal_Lm* ps_orbital_in); - - Numerical_Nonlocal_Lm* Proj = nullptr; ///< length: nproj(only store radial function ) - - const double& get_rcut_max() const { return rcut_max; } - const int& get_nproj() const { return nproj; } - - private: - - std::string label=""; /// lmax; + } + + const int& + getType () const + { + return this->type; + } + + const std::string& + getLabel () const + { + return this->label; + } + + const std::string& + getType_ps () const + { + return this->type_ps; + } + + void set_type_info (const int& type_in, + const std::string& label_in, + const std::string& type_ps_in, + const int& lmax_in, + const int& nproj_in, + const Numerical_Nonlocal_Lm* ps_orbital_in); + + Numerical_Nonlocal_Lm* Proj = nullptr; ///< length: nproj(only store radial function ) + + const double& + get_rcut_max () const + { + return rcut_max; + } + const int& + get_nproj () const + { + return nproj; + } + + private: + std::string label = ""; /// #include -Numerical_Nonlocal_Lm::Numerical_Nonlocal_Lm() +Numerical_Nonlocal_Lm::Numerical_Nonlocal_Lm () { - label = ""; - index_atom_type = 0; - angular_momentum_l = 0; - index_proj = 0; - - nr = 1; - nk = 1; - - rcut = 0.0; - kcut = 0.0; - dk = 0.0; - - nr_uniform = 1; - dr_uniform = -1.0; - this->renew(); -} + label = ""; + index_atom_type = 0; + angular_momentum_l = 0; + index_proj = 0; -Numerical_Nonlocal_Lm::~Numerical_Nonlocal_Lm() -{ - this->freemem(); + nr = 1; + nk = 1; + + rcut = 0.0; + kcut = 0.0; + dk = 0.0; + + nr_uniform = 1; + dr_uniform = -1.0; + this->renew (); } -void Numerical_Nonlocal_Lm::renew() +Numerical_Nonlocal_Lm::~Numerical_Nonlocal_Lm () { this->freemem (); } + +void + Numerical_Nonlocal_Lm::renew () { - assert(nr_uniform>0); - assert(nr>0); - assert(nk>0); - this->r_radial = new double[nr]; - this->rab = new double[nr]; - this->beta_r = new double[nr]; - this->beta_uniform = new double[nr_uniform]; - this->dbeta_uniform = new double[nr_uniform]; - this->k_radial = new double[nk]; - this->beta_k = new double[nk]; - ModuleBase::GlobalFunc::ZEROS(r_radial, nr); - ModuleBase::GlobalFunc::ZEROS(rab, nr); - ModuleBase::GlobalFunc::ZEROS(beta_r, nr); - ModuleBase::GlobalFunc::ZEROS(beta_uniform, nr_uniform); - ModuleBase::GlobalFunc::ZEROS(dbeta_uniform, nr_uniform); - ModuleBase::GlobalFunc::ZEROS(k_radial, nk); - ModuleBase::GlobalFunc::ZEROS(beta_k, nk); + assert (nr_uniform > 0); + assert (nr > 0); + assert (nk > 0); + this->r_radial = new double[nr]; + this->rab = new double[nr]; + this->beta_r = new double[nr]; + this->beta_uniform = new double[nr_uniform]; + this->dbeta_uniform = new double[nr_uniform]; + this->k_radial = new double[nk]; + this->beta_k = new double[nk]; + ModuleBase::GlobalFunc::ZEROS (r_radial, nr); + ModuleBase::GlobalFunc::ZEROS (rab, nr); + ModuleBase::GlobalFunc::ZEROS (beta_r, nr); + ModuleBase::GlobalFunc::ZEROS (beta_uniform, nr_uniform); + ModuleBase::GlobalFunc::ZEROS (dbeta_uniform, nr_uniform); + ModuleBase::GlobalFunc::ZEROS (k_radial, nk); + ModuleBase::GlobalFunc::ZEROS (beta_k, nk); } -void Numerical_Nonlocal_Lm::freemem() +void + Numerical_Nonlocal_Lm::freemem () { - delete[] this->r_radial; - delete[] this->rab; - delete[] this->beta_r; - delete[] this->beta_uniform; - delete[] this->dbeta_uniform; - delete[] this->k_radial; - delete[] this->beta_k; + delete[] this->r_radial; + delete[] this->rab; + delete[] this->beta_r; + delete[] this->beta_uniform; + delete[] this->dbeta_uniform; + delete[] this->k_radial; + delete[] this->beta_k; r_radial = nullptr; rab = nullptr; @@ -78,202 +77,207 @@ void Numerical_Nonlocal_Lm::freemem() beta_k = nullptr; } -Numerical_Nonlocal_Lm& Numerical_Nonlocal_Lm::operator= -( - const Numerical_Nonlocal_Lm & nol -) +Numerical_Nonlocal_Lm& + Numerical_Nonlocal_Lm::operator= (const Numerical_Nonlocal_Lm& nol) { - this->label = nol.label; - this->index_atom_type = nol.index_atom_type; - this->angular_momentum_l = nol.angular_momentum_l; + this->label = nol.label; + this->index_atom_type = nol.index_atom_type; + this->angular_momentum_l = nol.angular_momentum_l; this->index_proj = nol.index_proj; - this->nr = nol.nr; - this->nk = nol.nk; + this->nr = nol.nr; + this->nk = nol.nk; - this->nr_uniform = nol.nr_uniform; - this->dr_uniform = nol.dr_uniform; + this->nr_uniform = nol.nr_uniform; + this->dr_uniform = nol.dr_uniform; - this->rcut = nol.rcut; - this->kcut = nol.kcut; + this->rcut = nol.rcut; + this->kcut = nol.kcut; - this->dk = nol.dk; + this->dk = nol.dk; - this->freemem(); - this->renew(); + this->freemem (); + this->renew (); - for (int ir = 0; ir < nol.nr; ir++) - { - this->r_radial[ir] = nol.r_radial[ir]; - this->rab[ir] = nol.rab[ir]; - this->beta_r[ir] = nol.beta_r[ir]; - } + for (int ir = 0; ir < nol.nr; ir++) + { + this->r_radial[ir] = nol.r_radial[ir]; + this->rab[ir] = nol.rab[ir]; + this->beta_r[ir] = nol.beta_r[ir]; + } - for (int ir = 0; ir < nr_uniform; ir++) - { - this->beta_uniform[ir] = nol.beta_uniform[ir]; - this->dbeta_uniform[ir] = nol.dbeta_uniform[ir]; - } + for (int ir = 0; ir < nr_uniform; ir++) + { + this->beta_uniform[ir] = nol.beta_uniform[ir]; + this->dbeta_uniform[ir] = nol.dbeta_uniform[ir]; + } - for (int ik = 0; ik < nol.nk; ik++) - { - this->k_radial[ik] = nol.k_radial[ik]; - this->beta_k[ik] = nol.beta_k[ik]; - } + for (int ik = 0; ik < nol.nk; ik++) + { + this->k_radial[ik] = nol.k_radial[ik]; + this->beta_k[ik] = nol.beta_k[ik]; + } - return *this; + return *this; } -void Numerical_Nonlocal_Lm::set_NL_proj( - const std::string &label_in, - const int &index_atom_type_in, - const int &angular_momentum_l_in, - const int &nr_in, - const double *rab_in, - const double *r_radial_in, - const double *beta_r_in, - const int &nk_in, - const double &dk_in, - const double &dr_uniform_in) +void + Numerical_Nonlocal_Lm::set_NL_proj (const std::string& label_in, + const int& index_atom_type_in, + const int& angular_momentum_l_in, + const int& nr_in, + const double* rab_in, + const double* r_radial_in, + const double* beta_r_in, + const int& nk_in, + const double& dk_in, + const double& dr_uniform_in) { - this->label = label_in; - this->index_atom_type = index_atom_type_in; - - this->angular_momentum_l = angular_momentum_l_in; - assert(angular_momentum_l_in>=-1); // -1 means no angular momentum. - - this->dr_uniform = dr_uniform_in; - - this->nr = nr_in; - assert(nr_in>1 && nr_in <10000); - assert(nr%2!=0); - - this->rcut = r_radial_in[nr-1]; - assert(rcut>=0.0); - - this->nk = nk_in; - assert(nk%2!=0); - - this->dk = dk_in; - assert(dk>0.0); - - this->freemem(); - this->renew(); - - for (int ir = 0; ir < nr; ir++) - { - this->r_radial[ir] = r_radial_in[ir]; - this->rab[ir] = rab_in[ir]; - this->beta_r[ir] = beta_r_in[ir]; - } - - for (int ik = 0; ik < nk; ik++) - { - this->k_radial[ik] = ik * this->dk; - } - this->kcut = (nk-1) * this->dk; - - // (1) extra the uniform mesh - //this->extra_uniform(dr_uniform); - // (2) get the beta_k - this->get_kradial(); - - return; + this->label = label_in; + this->index_atom_type = index_atom_type_in; + + this->angular_momentum_l = angular_momentum_l_in; + assert (angular_momentum_l_in >= -1); // -1 means no angular momentum. + + this->dr_uniform = dr_uniform_in; + + this->nr = nr_in; + assert (nr_in > 1 && nr_in < 10000); + assert (nr % 2 != 0); + + this->rcut = r_radial_in[nr - 1]; + assert (rcut >= 0.0); + + this->nk = nk_in; + assert (nk % 2 != 0); + + this->dk = dk_in; + assert (dk > 0.0); + + this->freemem (); + this->renew (); + + for (int ir = 0; ir < nr; ir++) + { + this->r_radial[ir] = r_radial_in[ir]; + this->rab[ir] = rab_in[ir]; + this->beta_r[ir] = beta_r_in[ir]; + } + + for (int ik = 0; ik < nk; ik++) + { + this->k_radial[ik] = ik * this->dk; + } + this->kcut = (nk - 1) * this->dk; + + // (1) extra the uniform mesh + // this->extra_uniform(dr_uniform); + // (2) get the beta_k + this->get_kradial (); + + return; } -void Numerical_Nonlocal_Lm::get_kradial() +void + Numerical_Nonlocal_Lm::get_kradial () { - //ModuleBase::TITLE("Numerical_Nonlocal_Lm","get_kradial"); - double *jl = new double[nr]; - double *integrated_func = new double[nr]; + // ModuleBase::TITLE("Numerical_Nonlocal_Lm","get_kradial"); + double* jl = new double[nr]; + double* integrated_func = new double[nr]; - const double pref = sqrt(2.0 / ModuleBase::PI); + const double pref = sqrt (2.0 / ModuleBase::PI); for (int ik = 0; ik < nk; ik++) - { - ModuleBase::Sphbes::Spherical_Bessel( - this->nr, - this->r_radial, - this->k_radial[ik], - this->angular_momentum_l, - jl); - - for (int ir = 0; ir < nr; ir++) { - // beta_r is beta*r; - integrated_func[ir] = this->beta_r[ir] * this->r_radial[ir] * jl[ir]; - } + ModuleBase::Sphbes::Spherical_Bessel (this->nr, + this->r_radial, + this->k_radial[ik], + this->angular_momentum_l, + jl); + + for (int ir = 0; ir < nr; ir++) + { + // beta_r is beta*r; + integrated_func[ir] = this->beta_r[ir] * this->r_radial[ir] * jl[ir]; + } - ModuleBase::Integral::Simpson_Integral( - this->nr, - integrated_func, - this->rab, - this->beta_k[ik]); + ModuleBase::Integral::Simpson_Integral (this->nr, integrated_func, this->rab, this->beta_k[ik]); - this->beta_k[ik] *= ( pref*k_radial[ik]); - } + this->beta_k[ik] *= (pref * k_radial[ik]); + } delete[] integrated_func; delete[] jl; } - -void Numerical_Nonlocal_Lm::plot(const int &my_rank)const +void + Numerical_Nonlocal_Lm::plot (const int& my_rank) const { - std::string orbital_type; - switch( this->angular_momentum_l ) - { - case 0: orbital_type = "s"; break; - case 1: orbital_type = "p"; break; - case 2: orbital_type = "d"; break; - case 3: orbital_type = "f"; break; - case 4: orbital_type = "g"; break; - default: ModuleBase::WARNING_QUIT("Numerical_Orbital_Lm::plot","Please check in functoin."); - } + std::string orbital_type; + switch (this->angular_momentum_l) + { + case 0: + orbital_type = "s"; + break; + case 1: + orbital_type = "p"; + break; + case 2: + orbital_type = "d"; + break; + case 3: + orbital_type = "f"; + break; + case 4: + orbital_type = "g"; + break; + default: + ModuleBase::WARNING_QUIT ("Numerical_Orbital_Lm::plot", "Please check in functoin."); + } #ifdef __NORMAL #else - if(my_rank==0) - { - std::stringstream ssr, ssk, ssru; - ssr << PARAM.globalv.global_out_dir << this->label << "/" - << this->label << "-" << orbital_type << "-proj-r.dat"; - - ssk << PARAM.globalv.global_out_dir << this->label << "/" - << this->label << "-" << orbital_type << "-proj-k.dat"; - - ssru << PARAM.globalv.global_out_dir << this->label << "/" - << this->label << "-" << orbital_type << "-proj-ru.dat"; - - std::ofstream ofsr(ssr.str().c_str()); - std::ofstream ofsk(ssk.str().c_str()); - std::ofstream ofsru(ssru.str().c_str()); - - if (!ofsk || !ofsr || !ofsru) - { - ModuleBase::WARNING_QUIT("Numerical_Orbital_Lm::plot", "Can't open files!"); - } - - for (int i = 0; i < this->nr; i++) - { - ofsr << this->r_radial[i] << " " << this->beta_r[i] << std::endl; - } - - for (int i = 0; i < this->nk; i++) - { - ofsk << this->k_radial[i] << " " << this->beta_k[i] << std::endl; - } - - for (int i = 0; i < this->nr_uniform; i++) - { - ofsru << i * this->dr_uniform << " " << this->beta_uniform[i] << std::endl; - } - - ofsr.close(); - ofsk.close(); - ofsru.close(); - } + if (my_rank == 0) + { + std::stringstream ssr, ssk, ssru; + ssr << PARAM.globalv.global_out_dir << this->label << "/" << this->label << "-" << orbital_type + << "-proj-r.dat"; + + ssk << PARAM.globalv.global_out_dir << this->label << "/" << this->label << "-" << orbital_type + << "-proj-k.dat"; + + ssru << PARAM.globalv.global_out_dir << this->label << "/" << this->label << "-" << orbital_type + << "-proj-ru.dat"; + + std::ofstream ofsr (ssr.str ().c_str ()); + std::ofstream ofsk (ssk.str ().c_str ()); + std::ofstream ofsru (ssru.str ().c_str ()); + + if (!ofsk || !ofsr || !ofsru) + { + ModuleBase::WARNING_QUIT ("Numerical_Orbital_Lm::plot", "Can't open files!"); + } + + for (int i = 0; i < this->nr; i++) + { + ofsr << this->r_radial[i] << " " << this->beta_r[i] << std::endl; + } + + for (int i = 0; i < this->nk; i++) + { + ofsk << this->k_radial[i] << " " << this->beta_k[i] << std::endl; + } + + for (int i = 0; i < this->nr_uniform; i++) + { + ofsru << i * this->dr_uniform << " " << this->beta_uniform[i] << std::endl; + } + + ofsr.close (); + ofsk.close (); + ofsru.close (); + } #endif - return; + return; } diff --git a/source/source_basis/module_ao/ORB_nonlocal_lm.h b/source/source_basis/module_ao/ORB_nonlocal_lm.h index 5ff1378b080..abe72c44222 100644 --- a/source/source_basis/module_ao/ORB_nonlocal_lm.h +++ b/source/source_basis/module_ao/ORB_nonlocal_lm.h @@ -18,79 +18,135 @@ class Numerical_Nonlocal_Lm { - public: + public: + double* beta_uniform = nullptr; + double* dbeta_uniform = nullptr; + int nr_uniform; + double dr_uniform; + + public: + Numerical_Nonlocal_Lm (); + ~Numerical_Nonlocal_Lm (); + + const int& + getL () const + { + return this->angular_momentum_l; + } + const int& + getType () const + { + return this->index_atom_type; + } + const double& + getRcut () const + { + return this->rcut; + } + + const int& + getNr () const + { + return this->nr; + } + const double* + getRadial () const + { + return this->r_radial; + } + const double& + getRadial (const int& ir) const + { + return this->r_radial[ir]; + } + const double* + getBeta_r () const + { + return this->beta_r; + } + const double& + getBeta_r (const int& ir) const + { + return this->beta_r[ir]; + } + + const double& + getDk () const + { + return this->dk; + } + const double* + getKpoint () const + { + return this->k_radial; + } + const double& + getKpoint (const int& ik) const + { + return this->k_radial[ik]; + } + const double* + getBeta_k () const + { + return this->beta_k; + } + const double& + getBeta_k (const int& ik) const + { + return this->beta_k[ik]; + } + + const int& + getNk () const + { + return nk; + } + const double& + getDruniform () const + { + return dr_uniform; + } - double* beta_uniform = nullptr; - double* dbeta_uniform = nullptr; - int nr_uniform; - double dr_uniform; - - public: - - Numerical_Nonlocal_Lm(); - ~Numerical_Nonlocal_Lm(); - - const int& getL() const { return this->angular_momentum_l; } - const int& getType() const { return this->index_atom_type; } - const double& getRcut() const { return this->rcut; } - - const int& getNr() const { return this->nr; } - const double* getRadial() const { return this->r_radial; } - const double& getRadial(const int &ir) const { return this->r_radial[ir]; } - const double* getBeta_r() const { return this->beta_r; } - const double& getBeta_r(const int &ir) const { return this->beta_r[ir]; } - - const double& getDk()const { return this->dk; } - const double* getKpoint()const { return this->k_radial; } - const double& getKpoint(const int &ik) const { return this->k_radial[ik]; } - const double* getBeta_k() const { return this->beta_k; } - const double& getBeta_k(const int &ik) const { return this->beta_k[ik]; } - - const int& getNk() const { return nk; } - const double& getDruniform() const { return dr_uniform; } - // enables deep copy - Numerical_Nonlocal_Lm& operator= (const Numerical_Nonlocal_Lm& nol ); - - void set_NL_proj( - const std::string &label, - const int &index_atom_type_in, - const int &angular_momentum_l_in, - const int &nr_in, - const double *rab_in, - const double *r_radial_in, - const double *beta_r_in, - const int &nk_in, - const double &dk_in, - const double &dr_uniform_in); - - void plot(const int &my_rank)const; - - private: - - void freemem(void); - void renew(void); - //void extra_uniform(const double &dr_uniform); - void get_kradial(void); - - std::string label; - int index_atom_type; - int angular_momentum_l; - int index_proj; - - int nr; - int nk; - - double rcut; - double kcut; - double dk; - - double* r_radial = nullptr; //points of r - double* k_radial = nullptr; - - double* rab = nullptr; - double* beta_r = nullptr; // |beta(r) * r> - double* beta_k = nullptr; + Numerical_Nonlocal_Lm& operator= (const Numerical_Nonlocal_Lm& nol); + + void set_NL_proj (const std::string& label, + const int& index_atom_type_in, + const int& angular_momentum_l_in, + const int& nr_in, + const double* rab_in, + const double* r_radial_in, + const double* beta_r_in, + const int& nk_in, + const double& dk_in, + const double& dr_uniform_in); + + void plot (const int& my_rank) const; + + private: + void freemem (); + void renew (); + // void extra_uniform(const double &dr_uniform); + void get_kradial (); + + std::string label; + int index_atom_type; + int angular_momentum_l; + int index_proj; + + int nr; + int nk; + + double rcut; + double kcut; + double dk; + + double* r_radial = nullptr; // points of r + double* k_radial = nullptr; + + double* rab = nullptr; + double* beta_r = nullptr; // |beta(r) * r> + double* beta_k = nullptr; }; #endif diff --git a/source/source_basis/module_ao/ORB_read.cpp b/source/source_basis/module_ao/ORB_read.cpp index 61c3a8139be..ce481a1afdc 100644 --- a/source/source_basis/module_ao/ORB_read.cpp +++ b/source/source_basis/module_ao/ORB_read.cpp @@ -16,7 +16,7 @@ /// PLEASE avoid using 'ORB' as global variable // mohan note 2021-03-23 -LCAO_Orbitals::LCAO_Orbitals() +LCAO_Orbitals::LCAO_Orbitals () { this->nchimax = 0; // this initialzied must specified this->Phi = new Numerical_Orbital[1]; @@ -31,43 +31,45 @@ LCAO_Orbitals::LCAO_Orbitals() this->rcutmax_Phi = 0.0; } -LCAO_Orbitals::~LCAO_Orbitals() +LCAO_Orbitals::~LCAO_Orbitals () { delete[] Phi; delete[] Alpha; } -std::vector LCAO_Orbitals::cutoffs() const { - std::vector cutoffs(ntype); - for (int it = 0; it < ntype; ++it) { - cutoffs[it] = Phi[it].getRcut(); - } +std::vector + LCAO_Orbitals::cutoffs () const +{ + std::vector cutoffs (ntype); + for (int it = 0; it < ntype; ++it) + { + cutoffs[it] = Phi[it].getRcut (); + } return cutoffs; } -void LCAO_Orbitals::init( - std::ofstream& ofs_in, - const int& ntype, - const std::string& orbital_dir, - const std::string* orbital_file, - const std::string& descriptor_file, - const int& lmax, - const double& lcao_ecut_in, - const double& lcao_dk_in, - const double& lcao_dr_in, - const double& lcao_rmax_in, - const bool& deepks_setorb, - const int& out_mat_r, - const bool& force_flag, - const int& my_rank -) +void + LCAO_Orbitals::init (std::ofstream& ofs_in, + const int& ntype, + const std::string& orbital_dir, + const std::string* orbital_file, + const std::string& descriptor_file, + const int& lmax, + const double& lcao_ecut_in, + const double& lcao_dk_in, + const double& lcao_dr_in, + const double& lcao_rmax_in, + const bool& deepks_setorb, + const int& out_mat_r, + const bool& force_flag, + const int& my_rank) { - assert(ntype > 0); - assert(lmax >= 0); - assert(lcao_ecut_in > 0.0); - assert(lcao_dk_in > 0.0); - assert(lcao_dr_in > 0.0); - assert(lcao_rmax_in > 0.0); + assert (ntype > 0); + assert (lmax >= 0); + assert (lcao_ecut_in > 0.0); + assert (lcao_dk_in > 0.0); + assert (lcao_dr_in > 0.0); + assert (lcao_rmax_in > 0.0); this->ecutwfc = lcao_ecut_in; this->dk = lcao_dk_in; @@ -75,83 +77,84 @@ void LCAO_Orbitals::init( this->Rmax = lcao_rmax_in; if (my_rank == 0 && !read_in_flag) - { - read_in_flag = true; - for (int it = 0; it < ntype; ++it) { - this->orbital_file.push_back(orbital_dir + orbital_file[it]); + read_in_flag = true; + for (int it = 0; it < ntype; ++it) + { + this->orbital_file.push_back (orbital_dir + orbital_file[it]); + } } - } this->descriptor_file = descriptor_file; #ifdef __MPI - bcast_files(ntype, my_rank); + bcast_files (ntype, my_rank); #endif - Read_Orbitals(ofs_in, ntype, lmax, deepks_setorb, out_mat_r, force_flag, my_rank); + Read_Orbitals (ofs_in, ntype, lmax, deepks_setorb, out_mat_r, force_flag, my_rank); return; } - #ifdef __MPI // be called in UnitCell. -void LCAO_Orbitals::bcast_files(const int& ntype_in, const int& my_rank) +void + LCAO_Orbitals::bcast_files (const int& ntype_in, const int& my_rank) { - ModuleBase::TITLE("LCAO_Orbitals", "bcast_files"); + ModuleBase::TITLE ("LCAO_Orbitals", "bcast_files"); // 'read_in_flag' is true when there is a // block "NUMERICAL_ORBITAL" in structure // file. - Parallel_Common::bcast_bool(read_in_flag); - Parallel_Common::bcast_string(descriptor_file); + Parallel_Common::bcast_bool (read_in_flag); + Parallel_Common::bcast_string (descriptor_file); if (!read_in_flag) - { - return; - } + { + return; + } - assert(ntype_in > 0); + assert (ntype_in > 0); GlobalV::ofs_running << "\n READING ORBITAL FILE NAMES FOR LCAO" << std::endl; for (int it = 0; it < ntype_in; it++) - { - std::string ofile; - std::string nfile; - - if (my_rank == 0) { - ofile = orbital_file[it]; - //----------------------------------- - // Turn off the read in NONLOCAL file - // function since 2013-08-02 by mohan - //----------------------------------- - // nfile = nonlocal_file[it]; - } + std::string ofile; + std::string nfile; - // PLEASE avoid using 'bcast_string' as global variable - // mohan note 2021-03-23 - Parallel_Common::bcast_string(ofile); - //----------------------------------- - // Turn off the read in NONLOCAL file - // function since 2013-08-02 by mohan - //----------------------------------- - // Parallel_Common::bcast_string(nfile); - - if (my_rank != 0) - { - orbital_file.push_back(ofile); + if (my_rank == 0) + { + ofile = orbital_file[it]; + //----------------------------------- + // Turn off the read in NONLOCAL file + // function since 2013-08-02 by mohan + //----------------------------------- + // nfile = nonlocal_file[it]; + } + + // PLEASE avoid using 'bcast_string' as global variable + // mohan note 2021-03-23 + Parallel_Common::bcast_string (ofile); //----------------------------------- // Turn off the read in NONLOCAL file // function since 2013-08-02 by mohan //----------------------------------- - // nonlocal_file.push_back ( nfile ); - } + // Parallel_Common::bcast_string(nfile); - GlobalV::ofs_running << " orbital file: " << orbital_file[it] << std::endl; - // GlobalV::ofs_running << " nonlocal file: " << nonlocal_file[it] << std::endl; - } + if (my_rank != 0) + { + orbital_file.push_back (ofile); + //----------------------------------- + // Turn off the read in NONLOCAL file + // function since 2013-08-02 by mohan + //----------------------------------- + // nonlocal_file.push_back ( nfile ); + } + + GlobalV::ofs_running << " orbital file: " << orbital_file[it] << std::endl; + // GlobalV::ofs_running << " nonlocal file: " << nonlocal_file[it] << std::endl; + } return; } #endif -void LCAO_Orbitals::Read_Orbitals(std::ofstream& ofs_in, +void + LCAO_Orbitals::Read_Orbitals (std::ofstream& ofs_in, const int& ntype_in, const int& lmax_in, const bool& deepks_setorb, @@ -159,8 +162,8 @@ void LCAO_Orbitals::Read_Orbitals(std::ofstream& ofs_in, const bool& force_flag, // mohan add 2021-05-07 const int& my_rank) // mohan add 2021-04-26 { - ModuleBase::TITLE("LCAO_Orbitals", "Read_Orbitals"); - ModuleBase::timer::start("LCAO_Orbitals", "Read_Orbitals"); + ModuleBase::TITLE ("LCAO_Orbitals", "Read_Orbitals"); + ModuleBase::timer::start ("LCAO_Orbitals", "Read_Orbitals"); ofs_in << "\n\n\n\n"; ofs_in << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; @@ -183,28 +186,29 @@ void LCAO_Orbitals::Read_Orbitals(std::ofstream& ofs_in, ofs_in << "\n SETUP ONE DIMENSIONAL ORBITALS/POTENTIAL" << std::endl; if (!read_in_flag) - { - ModuleBase::WARNING_QUIT("LCAO_Orbitals::Read_Orbitals", "Set the NUMERICAL_ORBITAL block in structure file."); - } + { + ModuleBase::WARNING_QUIT ("LCAO_Orbitals::Read_Orbitals", + "Set the NUMERICAL_ORBITAL block in structure file."); + } // OUT(ofs_in,"ecutwfc for kmesh",ecutwfc); - ModuleBase::GlobalFunc::OUT(ofs_in, "delta k (1/Bohr)", dk); - ModuleBase::GlobalFunc::OUT(ofs_in, "delta r (Bohr)", dR); - ModuleBase::GlobalFunc::OUT(ofs_in, "dr_uniform (Bohr)", dr_uniform); - ModuleBase::GlobalFunc::OUT(ofs_in, "rmax (Bohr)", Rmax); + ModuleBase::GlobalFunc::OUT (ofs_in, "delta k (1/Bohr)", dk); + ModuleBase::GlobalFunc::OUT (ofs_in, "delta r (Bohr)", dR); + ModuleBase::GlobalFunc::OUT (ofs_in, "dr_uniform (Bohr)", dr_uniform); + ModuleBase::GlobalFunc::OUT (ofs_in, "rmax (Bohr)", Rmax); // check the read in data. - assert(dk > 0.0); - assert(ecutwfc > 0.0); - assert(dR > 0.0); - assert(Rmax > 0.0); + assert (dk > 0.0); + assert (ecutwfc > 0.0); + assert (dR > 0.0); + assert (Rmax > 0.0); /// ntype: number of atom species this->ntype = ntype_in; - assert(ntype > 0); + assert (ntype > 0); /// lmax: lmax used in local orbitals as basis sets - assert(lmax_in >= 0); // mohan add 2021-04-16 + assert (lmax_in >= 0); // mohan add 2021-04-16 this->lmax = lmax_in; ////////////////////////////////////////////////////////// @@ -217,13 +221,13 @@ void LCAO_Orbitals::Read_Orbitals(std::ofstream& ofs_in, // std::cout << " ecutwfc=" << ecutwfc << std::endl; // LiuXh modified 2016-01-25, 2016-07-20 if (ecutwfc < 20) - { - this->kmesh = static_cast(2 * sqrt(ecutwfc) / dk) + 4; - } + { + this->kmesh = static_cast (2 * sqrt (ecutwfc) / dk) + 4; + } else - { - this->kmesh = static_cast(sqrt(ecutwfc) / dk) + 4; - } + { + this->kmesh = static_cast (sqrt (ecutwfc) / dk) + 4; + } // jingan add for calculate r(R) matrix // if(out_mat_r) @@ -232,10 +236,11 @@ void LCAO_Orbitals::Read_Orbitals(std::ofstream& ofs_in, // } // this->kmesh = static_cast (PI / 0.01 / 4 / this->dk); - if (kmesh % 2 == 0) { - kmesh++; -} - ModuleBase::GlobalFunc::OUT(ofs_in, "kmesh", kmesh); + if (kmesh % 2 == 0) + { + kmesh++; + } + ModuleBase::GlobalFunc::OUT (ofs_in, "kmesh", kmesh); //----------------------------------------------------------------- //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> @@ -246,11 +251,11 @@ void LCAO_Orbitals::Read_Orbitals(std::ofstream& ofs_in, this->Phi = new Numerical_Orbital[ntype]; for (int it = 0; it < ntype; it++) - { - this->Read_PAO(ofs_in, it, force_flag, my_rank); - // caoyu add 2021-05-24 to reconstruct atom_arrange::set_sr_NL - this->rcutmax_Phi = std::max(this->rcutmax_Phi, this->Phi[it].getRcut()); - } + { + this->Read_PAO (ofs_in, it, force_flag, my_rank); + // caoyu add 2021-05-24 to reconstruct atom_arrange::set_sr_NL + this->rcutmax_Phi = std::max (this->rcutmax_Phi, this->Phi[it].getRcut ()); + } // caoyu add 2021-3-16 ///>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> @@ -259,14 +264,14 @@ void LCAO_Orbitals::Read_Orbitals(std::ofstream& ofs_in, ///>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> if (deepks_setorb) // condition: descriptor in lcao line - { + { - delete[] this->Alpha; - this->Alpha = new Numerical_Orbital[1]; // not related to atom type -- remain to be discussed - this->Read_Descriptor(ofs_in, force_flag, my_rank); - } + delete[] this->Alpha; + this->Alpha = new Numerical_Orbital[1]; // not related to atom type -- remain to be discussed + this->Read_Descriptor (ofs_in, force_flag, my_rank); + } - ModuleBase::timer::end("LCAO_Orbitals", "Read_Orbitals"); + ModuleBase::timer::end ("LCAO_Orbitals", "Read_Orbitals"); return; } @@ -277,92 +282,95 @@ void LCAO_Orbitals::Read_Orbitals(std::ofstream& ofs_in, // 2. pass nchi to phi via this->Phi[it].set_orbital_info // be careful! nchi[l] may be different for differnt phi //------------------------------------------------------- -void LCAO_Orbitals::Read_PAO(std::ofstream& ofs_in, +void + LCAO_Orbitals::Read_PAO (std::ofstream& ofs_in, const int& it, const bool& force_flag, // mohan add 2021-05-07 const int& my_rank) // mohan add 2021-04-26 { - ModuleBase::TITLE("LCAO_Orbitals", "Read_PAO"); + ModuleBase::TITLE ("LCAO_Orbitals", "Read_PAO"); std::ifstream in_ao; bool open = false; if (my_rank == 0) - { - in_ao.open(this->orbital_file[it].c_str()); - if (in_ao) { - open = true; + in_ao.open (this->orbital_file[it].c_str ()); + if (in_ao) + { + open = true; + } } - } #ifdef __MPI - Parallel_Common::bcast_bool(open); + Parallel_Common::bcast_bool (open); #endif if (!open) - { - std::cout << " Orbital file : " << this->orbital_file[it] << std::endl; - ModuleBase::WARNING_QUIT("LCAO_Orbitals::Read_PAO", "Couldn't find orbital files"); - } + { + std::cout << " Orbital file : " << this->orbital_file[it] << std::endl; + ModuleBase::WARNING_QUIT ("LCAO_Orbitals::Read_PAO", "Couldn't find orbital files"); + } - ofs_in << " " << std::setw(12) << "ORBITAL" << std::setw(3) << "L" << std::setw(3) << "N" << std::setw(8) << "nr" - << std::setw(8) << "dr" << std::setw(8) << "RCUT" << std::setw(12) << "CHECK_UNIT" << std::setw(12) - << "NEW_UNIT" << std::endl; + ofs_in << " " << std::setw (12) << "ORBITAL" << std::setw (3) << "L" << std::setw (3) << "N" << std::setw (8) + << "nr" << std::setw (8) << "dr" << std::setw (8) << "RCUT" << std::setw (12) << "CHECK_UNIT" + << std::setw (12) << "NEW_UNIT" << std::endl; // lmax and nchimax for type it int lmaxt = 0; int nchimaxt = 0; - this->read_orb_file(ofs_in, in_ao, it, lmaxt, nchimaxt, this->Phi, force_flag, my_rank); + this->read_orb_file (ofs_in, in_ao, it, lmaxt, nchimaxt, this->Phi, force_flag, my_rank); // lmax and nchimax for all types - this->lmax = std::max(this->lmax, lmaxt); - this->nchimax = std::max(this->nchimax, nchimaxt); + this->lmax = std::max (this->lmax, lmaxt); + this->nchimax = std::max (this->nchimax, nchimaxt); - in_ao.close(); + in_ao.close (); return; } // caoyu add 2021-3-16 -void LCAO_Orbitals::Read_Descriptor(std::ofstream& ofs_in, +void + LCAO_Orbitals::Read_Descriptor (std::ofstream& ofs_in, const bool& force_flag, // mohan add 2021-05-07 const int& my_rank) // read descriptor basis { - ModuleBase::TITLE("LCAO_Orbitals", "Read_Descriptor"); + ModuleBase::TITLE ("LCAO_Orbitals", "Read_Descriptor"); std::ifstream in_de; - ofs_in << " " << std::setw(12) << "DESCRIPTOR" << std::setw(3) << "L" << std::setw(3) << "N" << std::setw(8) << "nr" - << std::setw(8) << "dr" << std::setw(8) << "RCUT" << std::setw(12) << "CHECK_UNIT" << std::setw(12) - << "NEW_UNIT" << std::endl; + ofs_in << " " << std::setw (12) << "DESCRIPTOR" << std::setw (3) << "L" << std::setw (3) << "N" << std::setw (8) + << "nr" << std::setw (8) << "dr" << std::setw (8) << "RCUT" << std::setw (12) << "CHECK_UNIT" + << std::setw (12) << "NEW_UNIT" << std::endl; // check if the descriptor file exists. bool open = false; if (my_rank == 0) - { - in_de.open(this->descriptor_file.c_str()); - if (in_de) { - open = true; + in_de.open (this->descriptor_file.c_str ()); + if (in_de) + { + open = true; + } } - } #ifdef __MPI - Parallel_Common::bcast_bool(open); + Parallel_Common::bcast_bool (open); #endif if (!open) - { - std::cout << " Orbital file : " << this->descriptor_file << std::endl; - ModuleBase::WARNING_QUIT("LCAO_Orbitals::Read_Descriptor", "Couldn't find orbital files for descriptor"); - } + { + std::cout << " Orbital file : " << this->descriptor_file << std::endl; + ModuleBase::WARNING_QUIT ("LCAO_Orbitals::Read_Descriptor", "Couldn't find orbital files for descriptor"); + } this->lmax_d = 0; this->nchimax_d = 0; - this->read_orb_file(ofs_in, in_de, 0, this->lmax_d, this->nchimax_d, this->Alpha, force_flag, my_rank); + this->read_orb_file (ofs_in, in_de, 0, this->lmax_d, this->nchimax_d, this->Alpha, force_flag, my_rank); - in_de.close(); + in_de.close (); return; } -void LCAO_Orbitals::read_orb_file(std::ofstream& ofs_in, // GlobalV::ofs_running +void + LCAO_Orbitals::read_orb_file (std::ofstream& ofs_in, // GlobalV::ofs_running std::ifstream& ifs, const int& it, int& lmax, @@ -371,85 +379,85 @@ void LCAO_Orbitals::read_orb_file(std::ofstream& ofs_in, // GlobalV::ofs_running const bool& force_flag, const int& my_rank) { - ModuleBase::TITLE("LCAO_Orbitals", "read_orb_file"); + ModuleBase::TITLE ("LCAO_Orbitals", "read_orb_file"); char word[80]; std::string orb_label; if (my_rank == 0) - { - while (ifs.good()) { - ifs >> word; - if (std::strcmp(word, "Element") == 0) - { - ifs >> orb_label; - continue; - } - if (std::strcmp(word, "Lmax") == 0) - { - ifs >> lmax; - break; - } + while (ifs.good ()) + { + ifs >> word; + if (std::strcmp (word, "Element") == 0) + { + ifs >> orb_label; + continue; + } + if (std::strcmp (word, "Lmax") == 0) + { + ifs >> lmax; + break; + } + } } - } #ifdef __MPI - Parallel_Common::bcast_int(lmax); + Parallel_Common::bcast_int (lmax); #endif int* nchi = new int[lmax + 1]; // allocate space: number of chi for each L. if (my_rank == 0) - { - for (int l = 0; l <= lmax; l++) { - ifs >> word >> word >> word >> nchi[l]; - nchimax = std::max(nchimax, nchi[l]); + for (int l = 0; l <= lmax; l++) + { + ifs >> word >> word >> word >> nchi[l]; + nchimax = std::max (nchimax, nchi[l]); + } } - } #ifdef __MPI - Parallel_Common::bcast_int(nchimax); - Parallel_Common::bcast_int(nchi, lmax + 1); + Parallel_Common::bcast_int (nchimax); + Parallel_Common::bcast_int (nchi, lmax + 1); #endif // calculate total number of chi int total_nchi = 0; for (int l = 0; l <= lmax; l++) - { - total_nchi += nchi[l]; - } + { + total_nchi += nchi[l]; + } // OUT(GlobalV::ofs_running,"Total number of chi(l,n)",total_nchi); - ao[it].phiLN.resize(total_nchi); + ao[it].phiLN.resize (total_nchi); int meshr = 0; // number of mesh points int meshr_read = 0; double dr = 0.0; if (my_rank == 0) - { - while (ifs.good()) { - ifs >> word; - if (std::strcmp(word, "END") == 0) // Peize Lin fix bug about strcmp 2016-08-02 - { - break; - } - } - ModuleBase::CHECK_NAME(ifs, "Mesh"); - ifs >> meshr; - meshr_read = meshr; - if (meshr % 2 == 0) - { - ++meshr; + while (ifs.good ()) + { + ifs >> word; + if (std::strcmp (word, "END") == 0) // Peize Lin fix bug about strcmp 2016-08-02 + { + break; + } + } + ModuleBase::CHECK_NAME (ifs, "Mesh"); + ifs >> meshr; + meshr_read = meshr; + if (meshr % 2 == 0) + { + ++meshr; + } + ModuleBase::CHECK_NAME (ifs, "dr"); + ifs >> dr; } - ModuleBase::CHECK_NAME(ifs, "dr"); - ifs >> dr; - } #ifdef __MPI - Parallel_Common::bcast_int(meshr); - Parallel_Common::bcast_int(meshr_read); - Parallel_Common::bcast_double(dr); + Parallel_Common::bcast_int (meshr); + Parallel_Common::bcast_int (meshr_read); + Parallel_Common::bcast_double (dr); #endif int count = 0; @@ -461,150 +469,151 @@ void LCAO_Orbitals::read_orb_file(std::ofstream& ofs_in, // GlobalV::ofs_running int tmp_n = 0; for (int L = 0; L <= lmax; L++) - { - for (int N = 0; N < nchi[L]; N++) { - ofs_in << " " << std::setw(12) << count + 1 << std::setw(3) << L << std::setw(3) << N; - - double* radial = nullptr; // radial mesh - double* psi = nullptr; // radial local orbital - double* psir; // psi * r - double* rab = nullptr; // dr - - // set the number of mesh and the interval distance. - ofs_in << std::setw(8) << meshr << std::setw(8) << dr; - - radial = new double[meshr]; - psi = new double[meshr]; - psir = new double[meshr]; - rab = new double[meshr]; - - for (int im = 0; im < meshr; ++im) - { - radial[im] = 0.0; - psi[im] = 0.0; - psir[im] = 0.0; - rab[im] = 0.0; - } - - for (int ir = 0; ir < meshr; ir++) - { - rab[ir] = dr; - // plus one because we can't read in r = 0 term now. - // change ir+1 to ir, because we need psi(r==0) information. - radial[ir] = ir * dr; // mohan 2010-04-19 - } - - // set the length of orbital - ofs_in << std::setw(8) << radial[meshr - 1]; - - // mohan update 2010-09-07 - bool find = false; - if (my_rank == 0) - { - while (!find) + for (int N = 0; N < nchi[L]; N++) { - if (ifs.eof()) - { - std::cout << " Can't find l=" << L << " n=" << N << " orbital." << std::endl; - break; - } - - ifs >> name1 >> name2 >> name3; - ifs >> tmp_it >> tmp_l >> tmp_n; - assert(name1 == "Type"); - if (L == tmp_l && N == tmp_n) - { - // meshr_read is different from meshr if meshr is even number. - for (int ir = 0; ir < meshr_read; ir++) + ofs_in << " " << std::setw (12) << count + 1 << std::setw (3) << L << std::setw (3) << N; + + double* radial = nullptr; // radial mesh + double* psi = nullptr; // radial local orbital + double* psir; // psi * r + double* rab = nullptr; // dr + + // set the number of mesh and the interval distance. + ofs_in << std::setw (8) << meshr << std::setw (8) << dr; + + radial = new double[meshr]; + psi = new double[meshr]; + psir = new double[meshr]; + rab = new double[meshr]; + + for (int im = 0; im < meshr; ++im) { - ifs >> psi[ir]; - psir[ir] = psi[ir] * radial[ir]; + radial[im] = 0.0; + psi[im] = 0.0; + psir[im] = 0.0; + rab[im] = 0.0; } - find = true; - } - else - { - double no_use; - for (int ir = 0; ir < meshr_read; ir++) + + for (int ir = 0; ir < meshr; ir++) + { + rab[ir] = dr; + // plus one because we can't read in r = 0 term now. + // change ir+1 to ir, because we need psi(r==0) information. + radial[ir] = ir * dr; // mohan 2010-04-19 + } + + // set the length of orbital + ofs_in << std::setw (8) << radial[meshr - 1]; + + // mohan update 2010-09-07 + bool find = false; + if (my_rank == 0) { - ifs >> no_use; + while (!find) + { + if (ifs.eof ()) + { + std::cout << " Can't find l=" << L << " n=" << N << " orbital." + << std::endl; + break; + } + + ifs >> name1 >> name2 >> name3; + ifs >> tmp_it >> tmp_l >> tmp_n; + assert (name1 == "Type"); + if (L == tmp_l && N == tmp_n) + { + // meshr_read is different from meshr if meshr is even number. + for (int ir = 0; ir < meshr_read; ir++) + { + ifs >> psi[ir]; + psir[ir] = psi[ir] * radial[ir]; + } + find = true; + } + else + { + double no_use; + for (int ir = 0; ir < meshr_read; ir++) + { + ifs >> no_use; + } + } + } // end find } - } - } // end find - } #ifdef __MPI - Parallel_Common::bcast_bool(find); + Parallel_Common::bcast_bool (find); #endif - if (!find) - { - ModuleBase::WARNING_QUIT("LCAO_Orbitals::read_orb_file", "Can't find orbitals."); - } + if (!find) + { + ModuleBase::WARNING_QUIT ("LCAO_Orbitals::read_orb_file", "Can't find orbitals."); + } #ifdef __MPI - Parallel_Common::bcast_double(psi, meshr_read); - Parallel_Common::bcast_double(psir, meshr_read); + Parallel_Common::bcast_double (psi, meshr_read); + Parallel_Common::bcast_double (psir, meshr_read); #endif - // renormalize radial wave functions - double* inner = new double[meshr](); - for (int ir = 0; ir < meshr; ir++) - { - inner[ir] = psir[ir] * psir[ir]; - } - double unit = 0.0; - - ModuleBase::Integral::Simpson_Integral(meshr, inner, rab, unit); - - assert(unit > 0.0); - - // check unit: \sum ( psi[r] * r )^2 = 1 - ofs_in << std::setprecision(3) << std::setw(12) << unit; - - for (int ir = 0; ir < meshr; ir++) - { - psi[ir] /= sqrt(unit); - psir[ir] /= sqrt(unit); - } - - for (int ir = 0; ir < meshr; ir++) - { - inner[ir] = psir[ir] * psir[ir]; - } - ModuleBase::Integral::Simpson_Integral(meshr, inner, rab, unit); - delete[] inner; - ofs_in << std::setw(12) << unit << std::endl; - - ao[it].phiLN[count].set_orbital_info(orb_label, - it, // type - L, // angular momentum L - N, // number of orbitals of this L - meshr, // number of radial mesh - rab, - radial, // radial mesh value(a.u.) - Numerical_Orbital_Lm::Psi_Type::Psi, // psi type next - psi, // radial wave function - this->kmesh, - this->dk, - this->dr_uniform, - PARAM.inp.out_element_info, - true, - force_flag); // delta k mesh in reciprocal space - - delete[] radial; - delete[] rab; - delete[] psir; - delete[] psi; - - ++count; + // renormalize radial wave functions + double* inner = new double[meshr](); + for (int ir = 0; ir < meshr; ir++) + { + inner[ir] = psir[ir] * psir[ir]; + } + double unit = 0.0; + + ModuleBase::Integral::Simpson_Integral (meshr, inner, rab, unit); + + assert (unit > 0.0); + + // check unit: \sum ( psi[r] * r )^2 = 1 + ofs_in << std::setprecision (3) << std::setw (12) << unit; + + for (int ir = 0; ir < meshr; ir++) + { + psi[ir] /= sqrt (unit); + psir[ir] /= sqrt (unit); + } + + for (int ir = 0; ir < meshr; ir++) + { + inner[ir] = psir[ir] * psir[ir]; + } + ModuleBase::Integral::Simpson_Integral (meshr, inner, rab, unit); + delete[] inner; + ofs_in << std::setw (12) << unit << std::endl; + + ao[it].phiLN[count].set_orbital_info (orb_label, + it, // type + L, // angular momentum L + N, // number of orbitals of this L + meshr, // number of radial mesh + rab, + radial, // radial mesh value(a.u.) + Numerical_Orbital_Lm::Psi_Type::Psi, // psi type next + psi, // radial wave function + this->kmesh, + this->dk, + this->dr_uniform, + PARAM.inp.out_element_info, + true, + force_flag); // delta k mesh in reciprocal space + + delete[] radial; + delete[] rab; + delete[] psir; + delete[] psi; + + ++count; + } } - } - ao[it].set_orbital_info(it, // type - orb_label, // label - lmax, - nchi, - total_nchi); // copy twice ! + ao[it].set_orbital_info (it, // type + orb_label, // label + lmax, + nchi, + total_nchi); // copy twice ! delete[] nchi; return; diff --git a/source/source_basis/module_ao/ORB_read.h b/source/source_basis/module_ao/ORB_read.h index 6ee794c080a..67656d659c7 100644 --- a/source/source_basis/module_ao/ORB_read.h +++ b/source/source_basis/module_ao/ORB_read.h @@ -16,116 +16,153 @@ class LCAO_Orbitals { - public: - - LCAO_Orbitals(); - ~LCAO_Orbitals(); - - void init( - std::ofstream& ofs_in, - const int& ntype, - const std::string& orbital_dir, - const std::string* orbital_file, - const std::string& descriptor_file, - const int& lmax, - const double& lcao_ecut_in, - const double& lcao_dk_in, - const double& lcao_dr_in, - const double& lcao_rmax_in, - const bool& deepks_setorb, - const int& out_mat_r, - const bool& force_flag, - const int& my_rank - ); - - void Read_Orbitals( - std::ofstream &ofs_in, // mohan add 2021-05-07 - const int &ntype_in, - const int &lmax_in, - const bool &deepks_setorb, // mohan add 2021-04-25 - const int &out_mat_r, // mohan add 2021-04-26 - const bool &force_flag, // mohan add 2021-05-07 - const int &my_rank); // mohan add 2021-04-26 - - void Read_PAO( - std::ofstream &ofs_in, - const int& it, - const bool &force_flag, // mohan add 2021-05-07 - const int& my_rank); // mohan add 2021-04-26 - - - - void Read_Descriptor( - std::ofstream &ofs_in, - const bool &force_flag, // mohan add 2021-05-07 - const int &my_rank); //caoyu add 2020-3-16 + public: + LCAO_Orbitals (); + ~LCAO_Orbitals (); + + void init (std::ofstream& ofs_in, + const int& ntype, + const std::string& orbital_dir, + const std::string* orbital_file, + const std::string& descriptor_file, + const int& lmax, + const double& lcao_ecut_in, + const double& lcao_dk_in, + const double& lcao_dr_in, + const double& lcao_rmax_in, + const bool& deepks_setorb, + const int& out_mat_r, + const bool& force_flag, + const int& my_rank); + + void Read_Orbitals (std::ofstream& ofs_in, // mohan add 2021-05-07 + const int& ntype_in, + const int& lmax_in, + const bool& deepks_setorb, // mohan add 2021-04-25 + const int& out_mat_r, // mohan add 2021-04-26 + const bool& force_flag, // mohan add 2021-05-07 + const int& my_rank); // mohan add 2021-04-26 + + void Read_PAO (std::ofstream& ofs_in, + const int& it, + const bool& force_flag, // mohan add 2021-05-07 + const int& my_rank); // mohan add 2021-04-26 + + void Read_Descriptor (std::ofstream& ofs_in, + const bool& force_flag, // mohan add 2021-05-07 + const int& my_rank); // caoyu add 2020-3-16 #ifdef __MPI - void bcast_files(const int &ntype_in, const int &my_rank); + void bcast_files (const int& ntype_in, const int& my_rank); #endif - const double& get_ecutwfc() const {return ecutwfc;} - const int& get_kmesh() const{return kmesh;} - const double& get_dk() const {return dk;} - const double& get_dR() const {return dR;} - const double& get_Rmax() const {return Rmax;} - const int& get_lmax() const {return lmax;} - const int& get_lmax_d() const { return lmax_d; } /// cutoffs() const; - - /// numerical atomic orbitals - Numerical_Orbital* Phi = nullptr; - - - //caoyu add 2021-3-10 - /// descriptor bases, saved as one-type atom orbital - Numerical_Orbital* Alpha = nullptr; - - // initialized in input.cpp - double ecutwfc; - double dk; - double dR; - double Rmax; - - double dr_uniform; - - // initalized in UnitCell - // assume ntype < 20. - bool read_in_flag; - std::vector orbital_file; - std::vector nonlocal_file; - std::string descriptor_file; //caoyu add 2020-3-16 - -private: - - int ntype; // number of elements - int kmesh; // number of points on kmesh - - int lmax; - int nchimax; - - int lmax_d; //max l of descriptor orbitals - int nchimax_d; //max number of descriptor orbitals per l - - double rcutmax_Phi; //caoyu add 2021-05-24 - - void read_orb_file( - std::ofstream &ofs_in, - std::ifstream &ifs, - const int &it, - int &lmax, - int &nchimax, - Numerical_Orbital* ao, - const bool &force_flag, // mohan add 2021-05-07 - const int &my_rank); //caoyu add 2021-04-26 + const double& + get_ecutwfc () const + { + return ecutwfc; + } + const int& + get_kmesh () const + { + return kmesh; + } + const double& + get_dk () const + { + return dk; + } + const double& + get_dR () const + { + return dR; + } + const double& + get_Rmax () const + { + return Rmax; + } + const int& + get_lmax () const + { + return lmax; + } + const int& + get_lmax_d () const + { + return lmax_d; + } ///< lmax of descriptor basis + const int& + get_nchimax () const + { + return nchimax; + } + const int& + get_nchimax_d () const + { + return nchimax_d; + } ///< nchimax of descriptor basis + const int& + get_ntype () const + { + return ntype; + } + const double& + get_dr_uniform () const + { + return dr_uniform; + } + + // caoyu add 2021-05-24 + const double& + get_rcutmax_Phi () const + { + return rcutmax_Phi; + } + + std::vector cutoffs () const; + + /// numerical atomic orbitals + Numerical_Orbital* Phi = nullptr; + + // caoyu add 2021-3-10 + /// descriptor bases, saved as one-type atom orbital + Numerical_Orbital* Alpha = nullptr; + + // initialized in input.cpp + double ecutwfc; + double dk; + double dR; + double Rmax; + + double dr_uniform; + + // initalized in UnitCell + // assume ntype < 20. + bool read_in_flag; + std::vector orbital_file; + std::vector nonlocal_file; + std::string descriptor_file; // caoyu add 2020-3-16 + + private: + int ntype; // number of elements + int kmesh; // number of points on kmesh + + int lmax; + int nchimax; + + int lmax_d; // max l of descriptor orbitals + int nchimax_d; // max number of descriptor orbitals per l + + double rcutmax_Phi; // caoyu add 2021-05-24 + + void read_orb_file (std::ofstream& ofs_in, + std::ifstream& ifs, + const int& it, + int& lmax, + int& nchimax, + Numerical_Orbital* ao, + const bool& force_flag, // mohan add 2021-05-07 + const int& my_rank); // caoyu add 2021-04-26 friend class TwoCenterBundle; // for the sake of TwoCenterBundle::to_LCAO_Orbitals }; diff --git a/source/source_basis/module_ao/element_basis_index-ORB.cpp b/source/source_basis/module_ao/element_basis_index-ORB.cpp index c9441b44978..ed12e229a49 100644 --- a/source/source_basis/module_ao/element_basis_index-ORB.cpp +++ b/source/source_basis/module_ao/element_basis_index-ORB.cpp @@ -7,38 +7,37 @@ namespace ModuleBase { ModuleBase::Element_Basis_Index::Range -Element_Basis_Index::construct_range( const LCAO_Orbitals &orb ) + Element_Basis_Index::construct_range (const LCAO_Orbitals& orb) { - ModuleBase::Element_Basis_Index::Range range; - range.resize( orb.get_ntype() ); - for( std::size_t T=0; T!=range.size(); ++T ) - { - range[T].resize( orb.Phi[T].getLmax()+1 ); - for( std::size_t L=0; L!=range[T].size(); ++L ) - { - range[T][L].N = orb.Phi[T].getNchi(L); - range[T][L].M = 2*L+1; - } - } - return range; + ModuleBase::Element_Basis_Index::Range range; + range.resize (orb.get_ntype ()); + for (std::size_t T = 0; T != range.size (); ++T) + { + range[T].resize (orb.Phi[T].getLmax () + 1); + for (std::size_t L = 0; L != range[T].size (); ++L) + { + range[T][L].N = orb.Phi[T].getNchi (L); + range[T][L].M = 2 * L + 1; + } + } + return range; } - ModuleBase::Element_Basis_Index::Range -Element_Basis_Index::construct_range( const std::vector>> &orb ) + Element_Basis_Index::construct_range (const std::vector>>& orb) { - ModuleBase::Element_Basis_Index::Range range; - range.resize( orb.size() ); - for( std::size_t T=0; T!=range.size(); ++T ) - { - range[T].resize( orb[T].size() ); - for( std::size_t L=0; L!=range[T].size(); ++L ) - { - range[T][L].N = orb[T][L].size(); - range[T][L].M = 2*L+1; - } - } - return range; + ModuleBase::Element_Basis_Index::Range range; + range.resize (orb.size ()); + for (std::size_t T = 0; T != range.size (); ++T) + { + range[T].resize (orb[T].size ()); + for (std::size_t L = 0; L != range[T].size (); ++L) + { + range[T][L].N = orb[T][L].size (); + range[T][L].M = 2 * L + 1; + } + } + return range; } -} \ No newline at end of file +} // namespace ModuleBase \ No newline at end of file diff --git a/source/source_basis/module_ao/element_basis_index-ORB.h b/source/source_basis/module_ao/element_basis_index-ORB.h index ec2415e6a0d..5e87f93b0a9 100644 --- a/source/source_basis/module_ao/element_basis_index-ORB.h +++ b/source/source_basis/module_ao/element_basis_index-ORB.h @@ -4,19 +4,19 @@ #include "../../source_base/element_basis_index.h" #include - class Numerical_Orbital_Lm; - class LCAO_Orbitals; +class Numerical_Orbital_Lm; +class LCAO_Orbitals; namespace ModuleBase { namespace Element_Basis_Index { - extern Range construct_range( const LCAO_Orbitals &orb ); +extern Range construct_range (const LCAO_Orbitals& orb); - extern Range construct_range( const std::vector>> &orb ); // orb[T][L][N] -} +extern Range construct_range (const std::vector>>& orb); // orb[T][L][N] +} // namespace Element_Basis_Index -} +} // namespace ModuleBase #endif \ No newline at end of file diff --git a/source/source_basis/module_ao/parallel_orbitals.cpp b/source/source_basis/module_ao/parallel_orbitals.cpp index 5ace1653272..7c77a64f766 100644 --- a/source/source_basis/module_ao/parallel_orbitals.cpp +++ b/source/source_basis/module_ao/parallel_orbitals.cpp @@ -4,7 +4,7 @@ #include "source_base/module_external/scalapack_connector.h" #include "source_base/global_function.h" -Parallel_Orbitals::Parallel_Orbitals() +Parallel_Orbitals::Parallel_Orbitals () { this->loc_sizes = nullptr; // in multi-k, 2D-block-division variables for FT (R<->k) @@ -12,33 +12,34 @@ Parallel_Orbitals::Parallel_Orbitals() this->nlocstart = nullptr; this->nnr = 1; this->ncol_bands = 0; - this->nrow_bands=0; - this->nloc_wfc=0; - this->nloc_Eij=0; - this->lastband_in_proc=0; - this->lastband_number=0; - this->loc_size=0; + this->nrow_bands = 0; + this->nloc_wfc = 0; + this->nloc_Eij = 0; + this->lastband_in_proc = 0; + this->lastband_number = 0; + this->loc_size = 0; this->nbands = 0; - } -Parallel_Orbitals::~Parallel_Orbitals() +Parallel_Orbitals::~Parallel_Orbitals () { - delete[] loc_sizes; + delete[] loc_sizes; delete[] nlocdim; delete[] nlocstart; } -int Parallel_Orbitals::get_wfc_global_nbands() const +int + Parallel_Orbitals::get_wfc_global_nbands () const { #ifdef __MPI return this->desc_wfc[3]; -#else +#else return this->ncol_bands; #endif } -int Parallel_Orbitals::get_wfc_global_nbasis() const +int + Parallel_Orbitals::get_wfc_global_nbasis () const { #ifdef __MPI return this->desc_wfc[2]; @@ -47,231 +48,238 @@ int Parallel_Orbitals::get_wfc_global_nbasis() const #endif } -int Parallel_Orbitals::get_nbands() const +int + Parallel_Orbitals::get_nbands () const { return this->nbands; } -void Parallel_Orbitals::set_atomic_trace(const int* iat2iwt, const int &nat, const int &nlocal) +void + Parallel_Orbitals::set_atomic_trace (const int* iat2iwt, const int& nat, const int& nlocal) { - ModuleBase::TITLE("Parallel_Orbitals", "set_atomic_trace"); + ModuleBase::TITLE ("Parallel_Orbitals", "set_atomic_trace"); this->iat2iwt_ = iat2iwt; int nat_plus_1 = nat + 1; - this->atom_begin_col.resize(nat_plus_1); - this->atom_begin_row.resize(nat_plus_1); - for(int iat=0;iatatom_begin_col[iat] = -1; - this->atom_begin_row[iat] = -1; - int irow = iat2iwt[iat]; - int icol = iat2iwt[iat]; - const int nw_global = (iat == nat-1) ? (nlocal - irow): (iat2iwt[iat+1] - irow); - //find the first local row index of atom iat - for(int i=0;iglobal2local_row_[irow] != -1) - { - this->atom_begin_row[iat] = this->global2local_row_[irow]; - break; - } - irow++; - } - //find the first local col index of atom iat - for(int i=0;iatom_begin_col.resize (nat_plus_1); + this->atom_begin_row.resize (nat_plus_1); + for (int iat = 0; iat < nat; iat++) { - if (this->global2local_col_[icol] != -1) - { - this->atom_begin_col[iat] = this->global2local_col_[icol]; - break; - } - icol++; + this->atom_begin_col[iat] = -1; + this->atom_begin_row[iat] = -1; + int irow = iat2iwt[iat]; + int icol = iat2iwt[iat]; + const int nw_global = (iat == nat - 1) ? (nlocal - irow) : (iat2iwt[iat + 1] - irow); + // find the first local row index of atom iat + for (int i = 0; i < nw_global; i++) + { + if (this->global2local_row_[irow] != -1) + { + this->atom_begin_row[iat] = this->global2local_row_[irow]; + break; + } + irow++; + } + // find the first local col index of atom iat + for (int i = 0; i < nw_global; i++) + { + if (this->global2local_col_[icol] != -1) + { + this->atom_begin_col[iat] = this->global2local_col_[icol]; + break; + } + icol++; + } } - } this->atom_begin_row[nat] = this->nrow; this->atom_begin_col[nat] = this->ncol; } // Get the number of columns of the orbital matrix of the iat-th atom -int Parallel_Orbitals::get_col_size(int iat) const +int + Parallel_Orbitals::get_col_size (int iat) const { int size = this->atom_begin_col[iat]; // If the iat-th atom does not have an orbital matrix, return 0 - if(size == -1) - { - return 0; - } + if (size == -1) + { + return 0; + } iat += 1; // Traverse the orbital matrices of the atom and calculate the number of columns - while(this->atom_begin_col[iat] <= this->ncol) - { - if(this->atom_begin_col[iat] != -1) + while (this->atom_begin_col[iat] <= this->ncol) { - size = this->atom_begin_col[iat] - size; - return size; + if (this->atom_begin_col[iat] != -1) + { + size = this->atom_begin_col[iat] - size; + return size; + } + iat++; } - iat++; - } // If the orbital matrix is not found after all atoms are traversed, throw an exception - throw std::string("error in get_col_size(iat)"); + throw std::string ("error in get_col_size(iat)"); } // Get the number of rows of the orbital matrix of the iat-th atom -int Parallel_Orbitals::get_row_size(int iat) const +int + Parallel_Orbitals::get_row_size (int iat) const { int size = this->atom_begin_row[iat]; - if(size == -1) - { - return 0; - } + if (size == -1) + { + return 0; + } iat += 1; - while(this->atom_begin_row[iat] <= this->nrow) - { - if(this->atom_begin_row[iat] != -1) + while (this->atom_begin_row[iat] <= this->nrow) { - size = this->atom_begin_row[iat] - size; - return size; + if (this->atom_begin_row[iat] != -1) + { + size = this->atom_begin_row[iat] - size; + return size; + } + iat++; } - iat++; - } // If the orbital matrix is not found after all atoms are traversed, throw an exception - throw std::string("error in get_col_size(iat)"); + throw std::string ("error in get_col_size(iat)"); } // Get the global indexes of the rows of the parallel orbital matrix -std::vector Parallel_Orbitals::get_indexes_row() const +std::vector + Parallel_Orbitals::get_indexes_row () const { - std::vector indexes(this->nrow); - for(int i = 0; i < this->nrow; i++) - { + std::vector indexes (this->nrow); + for (int i = 0; i < this->nrow; i++) + { #ifdef __MPI - indexes[i] = this->local2global_row(i); + indexes[i] = this->local2global_row (i); #else - indexes[i] = i; + indexes[i] = i; #endif - } + } return indexes; } // Get the global indexes of the columns of the parallel orbital matrix -std::vector Parallel_Orbitals::get_indexes_col() const +std::vector + Parallel_Orbitals::get_indexes_col () const { - std::vector indexes(this->ncol); - for(int i = 0; i < this->ncol; i++) - { + std::vector indexes (this->ncol); + for (int i = 0; i < this->ncol; i++) + { #ifdef __MPI - indexes[i] = this->local2global_col(i); + indexes[i] = this->local2global_col (i); #else - indexes[i] = i; + indexes[i] = i; #endif - } + } return indexes; } // Get the global indexes of the rows of the orbital matrix of the iat-th atom -std::vector Parallel_Orbitals::get_indexes_row(int iat) const +std::vector + Parallel_Orbitals::get_indexes_row (int iat) const { - int size = this->get_row_size(iat); - if(size == 0) - { - return std::vector(); - } - std::vector indexes(size); + int size = this->get_row_size (iat); + if (size == 0) + { + return std::vector (); + } + std::vector indexes (size); int irow = this->atom_begin_row[iat]; int begin = this->iat2iwt_[iat]; - for(int i = 0; i < size; ++i) - { + for (int i = 0; i < size; ++i) + { #ifdef __MPI - indexes[i] = this->local2global_row(irow + i) - begin; + indexes[i] = this->local2global_row (irow + i) - begin; #else - indexes[i] = i; + indexes[i] = i; #endif - } + } return indexes; } // Get the global indexes of the columns of the orbital matrix of the iat-th atom -std::vector Parallel_Orbitals::get_indexes_col(int iat) const +std::vector + Parallel_Orbitals::get_indexes_col (int iat) const { - int size = this->get_col_size(iat); - if(size == 0) - { - return std::vector(); - } - std::vector indexes(size); + int size = this->get_col_size (iat); + if (size == 0) + { + return std::vector (); + } + std::vector indexes (size); int icol = this->atom_begin_col[iat]; int begin = this->iat2iwt_[iat]; - for(int i = 0; i < size; ++i) - { + for (int i = 0; i < size; ++i) + { #ifdef __MPI - indexes[i] = this->local2global_col(icol + i) - begin; + indexes[i] = this->local2global_col (icol + i) - begin; #else - indexes[i] = i; + indexes[i] = i; #endif - } + } return indexes; } #ifdef __MPI -void Parallel_Orbitals::set_desc_wfc_Eij(const int& nbasis, const int& nbands, const int& lld) +void + Parallel_Orbitals::set_desc_wfc_Eij (const int& nbasis, const int& nbands, const int& lld) { - ModuleBase::TITLE("Parallel_2D", "set_desc_wfc_Eij"); + ModuleBase::TITLE ("Parallel_2D", "set_desc_wfc_Eij"); #ifdef __DEBUG - assert(nbasis > 0 && nbands > 0 && lld > 0); - assert(this->nb > 0 && this->dim0 > 0 && this->dim1 > 0); + assert (nbasis > 0 && nbands > 0 && lld > 0); + assert (this->nb > 0 && this->dim0 > 0 && this->dim1 > 0); #endif int ISRC = 0; int info = 0; - descinit_(desc_wfc, &nbasis, &nbands, &this->nb, &this->nb, &ISRC, &ISRC, &this->blacs_ctxt, &lld, &info); - descinit_(desc_wfc1, &nbands, &nbasis, &this->nb, &this->nb, &ISRC, &ISRC, &this->blacs_ctxt, &lld, &info); - descinit_(desc_Eij, &nbands, &nbands, &this->nb, &this->nb, &ISRC, &ISRC, &this->blacs_ctxt, &lld, &info); + descinit_ (desc_wfc, &nbasis, &nbands, &this->nb, &this->nb, &ISRC, &ISRC, &this->blacs_ctxt, &lld, &info); + descinit_ (desc_wfc1, &nbands, &nbasis, &this->nb, &this->nb, &ISRC, &ISRC, &this->blacs_ctxt, &lld, &info); + descinit_ (desc_Eij, &nbands, &nbands, &this->nb, &this->nb, &ISRC, &ISRC, &this->blacs_ctxt, &lld, &info); } -int Parallel_Orbitals::set_nloc_wfc_Eij( - const int& N_A, - std::ofstream& ofs_running, - std::ofstream& ofs_warning) +int + Parallel_Orbitals::set_nloc_wfc_Eij (const int& N_A, std::ofstream& ofs_running, std::ofstream& ofs_warning) { - ModuleBase::TITLE("Parallel_Orbitals", "set_nloc_wfc_Eij"); + ModuleBase::TITLE ("Parallel_Orbitals", "set_nloc_wfc_Eij"); // for wavefuncton , calculate nbands_loc this->nbands = N_A; int end_id = 0; int block = N_A / nb; if (block * nb < N_A) - { - block++; - } - if (dim1 > block) - { - ofs_warning << " cpu 2D distribution : " << dim0 << "*" << dim1 << std::endl; - ofs_warning << " but, the number of bands-row-block is " << block << std::endl; - if (nb > 1) { - return 1; + block++; } - else + if (dim1 > block) { - ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij", - "The number of columns of the 2D process grid exceeds the number of bands. " - "Try launching the calculation with fewer MPI processes." - ); + ofs_warning << " cpu 2D distribution : " << dim0 << "*" << dim1 << std::endl; + ofs_warning << " but, the number of bands-row-block is " << block << std::endl; + if (nb > 1) + { + return 1; + } + else + { + ModuleBase::WARNING_QUIT ( + "Parallel_Orbitals::set_nloc_wfc_Eij", + "The number of columns of the 2D process grid exceeds the number of bands. " + "Try launching the calculation with fewer MPI processes."); + } } - } int col_b_bands = block / dim1; if (coord[1] < block % dim1) - { - col_b_bands++; - } + { + col_b_bands++; + } if (block % dim1 == 0) - { - end_id = dim1 - 1; - } + { + end_id = dim1 - 1; + } else - { - end_id = block % dim1 - 1; - } + { + end_id = block % dim1 - 1; + } if (coord[1] == end_id) - { - this->ncol_bands = (col_b_bands - 1) * nb + (N_A - (block - 1) * nb); - } + { + this->ncol_bands = (col_b_bands - 1) * nb + (N_A - (block - 1) * nb); + } else - { - this->ncol_bands = col_b_bands * nb; - } + { + this->ncol_bands = col_b_bands * nb; + } this->nrow_bands = this->nrow; this->nloc_wfc = this->ncol_bands * this->nrow; diff --git a/source/source_basis/module_ao/parallel_orbitals.h b/source/source_basis/module_ao/parallel_orbitals.h index 2d6a6e7ad90..05985730ba7 100644 --- a/source/source_basis/module_ao/parallel_orbitals.h +++ b/source/source_basis/module_ao/parallel_orbitals.h @@ -7,46 +7,44 @@ /// parallel distribution of basis, wavefunction and matrix. class Parallel_Orbitals : public Parallel_2D { -public: - Parallel_Orbitals(); - ~Parallel_Orbitals(); + public: + Parallel_Orbitals (); + ~Parallel_Orbitals (); /// local size of bands, used for 2d wavefunction /// must divided on dim1 because of elpa interface int ncol_bands; int nrow_bands; - + /// ncol_bands*nrow long nloc_wfc; - //ncol_bands*ncol_bands + // ncol_bands*ncol_bands long nloc_Eij; int lastband_in_proc; - int lastband_number; + int lastband_number; ///--------------------------------------- /// number of elements(basis-pairs) in this processon /// on all adjacent atoms-pairs(2D division) ///--------------------------------------- - int nnr=1; - int * nlocdim = nullptr; - int * nlocstart = nullptr; - + int nnr = 1; + int* nlocdim = nullptr; + int* nlocstart = nullptr; + #ifdef __MPI - int desc_wfc[9]; //for wfc, nlocal*nbands - int desc_Eij[9]; // for Eij in TDDFT, nbands*nbands + int desc_wfc[9]; // for wfc, nlocal*nbands + int desc_Eij[9]; // for Eij in TDDFT, nbands*nbands int desc_wfc1[9]; // for wfc^T in TDDFT, nbands*nlocal /// set the local size of wavefunction and Eij - int set_nloc_wfc_Eij(const int& N_A/**< global row size*/, - std::ofstream& ofs_running, - std::ofstream& ofs_warning); + int set_nloc_wfc_Eij (const int& N_A /**< global row size*/, + std::ofstream& ofs_running, + std::ofstream& ofs_warning); ///@brief set the desc[9] of the 2D-block-cyclic distribution of wavefunction and Eij - void set_desc_wfc_Eij(const int& nbasis, - const int& nbands, - const int& lld); + void set_desc_wfc_Eij (const int& nbasis, const int& nbands, const int& lld); #endif int* loc_sizes = nullptr; @@ -64,7 +62,7 @@ class Parallel_Orbitals : public Parallel_2D * @param nat : number of atoms * @param nlocal : number of global orbitals */ - void set_atomic_trace(const int* iat2iwt, const int &nat, const int &nlocal); + void set_atomic_trace (const int* iat2iwt, const int& nat, const int& nlocal); /** * @brief dimension getters for 2D-block-cyclic division of Hamiltonian matrix @@ -72,13 +70,21 @@ class Parallel_Orbitals : public Parallel_2D * get_row_size() : total number of rows of Hamiltonian matrix in this processor * get_col_size(iat) : number of columns of Hamiltonian matrix in atom iat * get_row_size(iat) : number of rows of Hamiltonian matrix in atom iat - */ - int get_col_size()const { return this->ncol; }; - int get_row_size()const { return this->nrow; }; - int get_col_size(int iat) const; - int get_row_size(int iat) const; - - int get_nbands() const; + */ + int + get_col_size () const + { + return this->ncol; + }; + int + get_row_size () const + { + return this->nrow; + }; + int get_col_size (int iat) const; + int get_row_size (int iat) const; + + int get_nbands () const; int nbands; @@ -88,18 +94,17 @@ class Parallel_Orbitals : public Parallel_2D * get_indexes_col() : global indexes (~NLOCAL) of columns of Hamiltonian matrix in this processor * get_indexes_row(iat) : global indexes (~nw) of rows of Hamiltonian matrix in atom iat * get_indexes_col(iat) : global indexes (~nw) of columns of Hamiltonian matrix in atom iat - */ - std::vector get_indexes_row() const; - std::vector get_indexes_col() const; - std::vector get_indexes_row(int iat) const; - std::vector get_indexes_col(int iat) const; + */ + std::vector get_indexes_row () const; + std::vector get_indexes_col () const; + std::vector get_indexes_row (int iat) const; + std::vector get_indexes_col (int iat) const; // private: - // orbital index for each atom + // orbital index for each atom std::vector atom_begin_row; std::vector atom_begin_col; const int* iat2iwt_ = nullptr; - }; #endif diff --git a/source/source_basis/module_ao/test/1_snap_equal_test.cpp b/source/source_basis/module_ao/test/1_snap_equal_test.cpp index d29418ddad7..62ca0a51c1d 100644 --- a/source/source_basis/module_ao/test/1_snap_equal_test.cpp +++ b/source/source_basis/module_ao/test/1_snap_equal_test.cpp @@ -1,92 +1,111 @@ -#include -#include"ORB_unittest.h" +#include +#include "ORB_unittest.h" #include "source_base/global_variable.h" -//Test whether the 2-center-int results -// and its derivative from two clases are equal. +// Test whether the 2-center-int results +// and its derivative from two clases are equal. // - ORB_gen_table::snap_psipsi(job=0) and Center2_Orb::Orb11::cal_overlap -// - ORB_gen_table::snap_psipsi(job=1) and Center2_Orb::Orb11::cal_grad_overlap -TEST_F(test_orb, equal_test) +// - ORB_gen_table::snap_psipsi(job=1) and Center2_Orb::Orb11::cal_grad_overlap +TEST_F (test_orb, equal_test) { - - this->set_center2orbs(); - //equal test - //orb - double olm_0[1] = { 0 }; - double olm_1[3] = { 0,0,0 }; - //center2orb + + this->set_center2orbs (); + // equal test + // orb + double olm_0[1] = {0}; + double olm_1[3] = {0, 0, 0}; + // center2orb double clm_0 = 0; - ModuleBase::Vector3 clm_1; + ModuleBase::Vector3 clm_1; - //test parameters - const double rmax = 5; //Ry - srand((unsigned)time(NULL)); - ModuleBase::Vector3 R1(0, 0, 0); - ModuleBase::Vector3 R2(randr(rmax), randr(rmax), randr(rmax)); - std::cout << "random R2=(" << R2.x << "," << R2.y << "," << R2.z << ")" << std::endl; - ModuleBase::Vector3 dR = ModuleBase::Vector3(0.001, 0.001, 0.001); - //4. calculate overlap and grad_overlap by both methods - int T1 = 0; - - for (int T2 = 0;T2 < ORB.get_ntype();++T2) - { - for (int L1 = 0;L1 < ORB.Phi[T1].getLmax();++L1) - { - for (int N1 = 0;N1 < ORB.Phi[T1].getNchi(L1);++N1) - { - for (int L2 = 0;L2 < ORB.Phi[T2].getLmax();++L2) - { - for (int N2 = 0;N2 < ORB.Phi[T2].getNchi(L2);++N2) - { - for (int m1 = 0;m1 < 2 * L1 + 1;++m1) - { - for (int m2 = 0;m2 < 2 * L2 + 1;++m2) - { - OGT.snap_psipsi( - ORB, olm_0, 0, 'S', - R1, T1, L1, m1, N1, - R2, T2, L2, m2, N2 - ); - OGT.snap_psipsi( - ORB, olm_1, 1, 'S', - R1, T1, L1, m1, N1, - R2, T2, L2, m2, N2 - ); - //std::cout << this->mock_center2_orb11[T1][T2][L1][N1][L2][N2]->cal_overlap(R1, R2, m1, m2); - clm_0 = - test_center2_orb11[T1][T2][L1][N1][L2][N2]->cal_overlap(R1, R2, m1, m2); - clm_1 = - test_center2_orb11[T1][T2][L1][N1][L2][N2]->cal_grad_overlap(R1, R2, m1, m2); - EXPECT_NEAR(olm_0[0], clm_0, 1e-10); - EXPECT_NEAR(olm_1[0], clm_1.x, 1e-10); - EXPECT_NEAR(olm_1[1], clm_1.y, 1e-10); - EXPECT_NEAR(olm_1[2], clm_1.z, 1e-10); - ModuleBase::GlobalFunc::ZEROS(olm_1, 3); - } - } - } + // test parameters + const double rmax = 5; // Ry + srand ((unsigned)time (NULL)); + ModuleBase::Vector3 R1 (0, 0, 0); + ModuleBase::Vector3 R2 (randr (rmax), randr (rmax), randr (rmax)); + std::cout << "random R2=(" << R2.x << "," << R2.y << "," << R2.z << ")" << std::endl; + ModuleBase::Vector3 dR = ModuleBase::Vector3 (0.001, 0.001, 0.001); + // 4. calculate overlap and grad_overlap by both methods + int T1 = 0; - } - } - } - } + for (int T2 = 0; T2 < ORB.get_ntype (); ++T2) + { + for (int L1 = 0; L1 < ORB.Phi[T1].getLmax (); ++L1) + { + for (int N1 = 0; N1 < ORB.Phi[T1].getNchi (L1); ++N1) + { + for (int L2 = 0; L2 < ORB.Phi[T2].getLmax (); ++L2) + { + for (int N2 = 0; N2 < ORB.Phi[T2].getNchi (L2); ++N2) + { + for (int m1 = 0; m1 < 2 * L1 + 1; ++m1) + { + for (int m2 = 0; m2 < 2 * L2 + 1; ++m2) + { + OGT.snap_psipsi (ORB, + olm_0, + 0, + 'S', + R1, + T1, + L1, + m1, + N1, + R2, + T2, + L2, + m2, + N2); + OGT.snap_psipsi (ORB, + olm_1, + 1, + 'S', + R1, + T1, + L1, + m1, + N1, + R2, + T2, + L2, + m2, + N2); + // std::cout << + // this->mock_center2_orb11[T1][T2][L1][N1][L2][N2]->cal_overlap(R1, + // R2, m1, m2); + clm_0 = test_center2_orb11[T1][T2][L1][N1][L2][N2] + ->cal_overlap (R1, R2, m1, m2); + clm_1 = test_center2_orb11[T1][T2][L1][N1][L2][N2] + ->cal_grad_overlap (R1, R2, m1, m2); + EXPECT_NEAR (olm_0[0], clm_0, 1e-10); + EXPECT_NEAR (olm_1[0], clm_1.x, 1e-10); + EXPECT_NEAR (olm_1[1], clm_1.y, 1e-10); + EXPECT_NEAR (olm_1[2], clm_1.z, 1e-10); + ModuleBase::GlobalFunc::ZEROS (olm_1, 3); + } + } + } + } + } + } + } } -int main(int argc, char **argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } - diff --git a/source/source_basis/module_ao/test/ORB_atomic_lm_test.cpp b/source/source_basis/module_ao/test/ORB_atomic_lm_test.cpp index b4642d905c1..0844fd16832 100644 --- a/source/source_basis/module_ao/test/ORB_atomic_lm_test.cpp +++ b/source/source_basis/module_ao/test/ORB_atomic_lm_test.cpp @@ -53,20 +53,24 @@ class NumericalOrbitalLmTest : public ::testing::Test { -protected: - void SetUp(); - void TearDown(); + protected: + void SetUp (); + void TearDown (); // objects under unit test std::vector nolm_; // helper functions - void init(); - void pour_data(); - void init_with_different_k(double const& ecut, double const& dk); - size_t calc_nk(double const& ecutwfc, double const& dk); - size_t calc_nr_uniform(double const& rcut, double const& dr_uniform); - bool check_file_match(size_t const& nline, double const* col1, double const* col2, double const& tol, std::string const& fname); + void init (); + void pour_data (); + void init_with_different_k (double const& ecut, double const& dk); + size_t calc_nk (double const& ecutwfc, double const& dk); + size_t calc_nr_uniform (double const& rcut, double const& dr_uniform); + bool check_file_match (size_t const& nline, + double const* col1, + double const* col2, + double const& tol, + std::string const& fname); // radial real-space mesh spacing double dr_; @@ -89,34 +93,41 @@ class NumericalOrbitalLmTest : public ::testing::Test bool force_flag_; }; - -size_t NumericalOrbitalLmTest::calc_nk(double const& ecutwfc, double const& dk) { +size_t + NumericalOrbitalLmTest::calc_nk (double const& ecutwfc, double const& dk) +{ // current formula for calculating nk from ecutwfc & dk // see source_basis/module_ao/ORB_read.cpp, function "Read_Orbitals" size_t nk = 0; - if(ecutwfc < 20) { - nk = static_cast( 2 * sqrt(ecutwfc) / dk ) + 4; - } else { - nk = static_cast( sqrt(ecutwfc) / dk ) + 4; - } + if (ecutwfc < 20) + { + nk = static_cast (2 * sqrt (ecutwfc) / dk) + 4; + } + else + { + nk = static_cast (sqrt (ecutwfc) / dk) + 4; + } - if (nk%2 == 0) { - ++nk; - } + if (nk % 2 == 0) + { + ++nk; + } return nk; } - -size_t NumericalOrbitalLmTest::calc_nr_uniform(double const& rcut, double const& dr_uniform) { - return static_cast(rcut/dr_uniform) + 10; +size_t + NumericalOrbitalLmTest::calc_nr_uniform (double const& rcut, double const& dr_uniform) +{ + return static_cast (rcut / dr_uniform) + 10; } - -void NumericalOrbitalLmTest::SetUp() { +void + NumericalOrbitalLmTest::SetUp () +{ /////////////////////////////////////////////////// // Parameters @@ -137,7 +148,7 @@ void NumericalOrbitalLmTest::SetUp() { dk_ = 0.01; dr_uniform_ = 0.001; - nk_ = calc_nk(ecutwfc, dk_); + nk_ = calc_nk (ecutwfc, dk_); // not really meaningful in this unit test index_atom_type_ = 42; @@ -152,160 +163,185 @@ void NumericalOrbitalLmTest::SetUp() { // if true, extra_uniform will compute zty force_flag_ = true; - /////////////////////////////////////////////////// // Read orbital file /////////////////////////////////////////////////// - std::ifstream ifs(orb_file); + std::ifstream ifs (orb_file); // variables read from orb_file double ecut = 0.0; // energy cutoff in orbital file, not the one in INPUT double rcut = 0.0; unsigned lmax = 0; std::vector nchi_l; // number of chi of each angular momentum - int nr_read = 0; // number of mesh points + int nr_read = 0; // number of mesh points // nr_ has to be odd, probably due to the way Simpson_Integral works // nr_ equals nr_read if nr_read is odd, // and equals nr_read+1 if nr_read is even - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "Element"); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "Element"); ifs >> elem_label_; - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "Cutoff(Ry)"); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "Cutoff(Ry)"); ifs >> ecut; - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "Cutoff(a.u.)"); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "Cutoff(a.u.)"); ifs >> rcut; - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "Lmax"); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "Lmax"); ifs >> lmax; // number of chi for each angular momentum - nchi_l.resize(lmax+1); + nchi_l.resize (lmax + 1); std::vector symbol = {"S", "P", "D", "F", "G", "H", "I", "K"}; - for (size_t l = 0; l <= lmax; ++l) { - std::string key = symbol[l] + "orbital-->"; - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, key); - ifs >> nchi_l[l]; - } + for (size_t l = 0; l <= lmax; ++l) + { + std::string key = symbol[l] + "orbital-->"; + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, key); + ifs >> nchi_l[l]; + } - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "Mesh"); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "Mesh"); ifs >> nr_read; - nr_ = (nr_read%2) ? nr_read : nr_read+1; + nr_ = (nr_read % 2) ? nr_read : nr_read + 1; - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "dr"); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "dr"); ifs >> dr_; - size_t nchi_tot = std::accumulate(nchi_l.begin(), nchi_l.end(), 0); + size_t nchi_tot = std::accumulate (nchi_l.begin (), nchi_l.end (), 0); - l_.resize(nchi_tot); - index_chi_l_.resize(nchi_tot); - chi_.resize(nchi_tot); - nolm_.resize(nchi_tot); + l_.resize (nchi_tot); + index_chi_l_.resize (nchi_tot); + chi_.resize (nchi_tot); + nolm_.resize (nchi_tot); r_radial_ = new double[nr_]; rab_ = new double[nr_]; - for (int i = 0; i != nr_; ++i) { - r_radial_[i] = i*dr_; - rab_[i] = dr_; - } + for (int i = 0; i != nr_; ++i) + { + r_radial_[i] = i * dr_; + rab_[i] = dr_; + } // used in the normalization of input chi - double* integrand= new double[nr_]; - for (int ir = 0; ir != nr_; ++ir) { - integrand[ir] = 0.0; - } + double* integrand = new double[nr_]; + for (int ir = 0; ir != nr_; ++ir) + { + integrand[ir] = 0.0; + } // the orbital file contains psi(r) psi_type_ = Numerical_Orbital_Lm::Psi_Type::Psi; size_t ichi_tot = 0; - for (size_t l = 0; l <= lmax; ++l) { - for (size_t ichi_l = 0; ichi_l < nchi_l[l]; ++ichi_l) { - - // the next block of the orbital file is like - // Type L N - // 0 0 0 - std::string dummy; - unsigned type = 0, L = 0, N = 0; - ifs >> dummy >> dummy >> dummy; - ifs >> type >> L >> N; - assert(L == l); - assert(N == ichi_l); - //std::cout << "l = " << l << " index_chi_l = " << N << std::endl; - - l_[ichi_tot] = l; - index_chi_l_[ichi_tot] = ichi_l; - - // read & normalize chi - chi_[ichi_tot] = new double[nr_]; - - for (int ir = 0; ir != nr_; ++ir) { - chi_[ichi_tot][ir] = 0.0; - } - - for (int ir = 0; ir != nr_read; ++ir) { - ifs >> chi_[ichi_tot][ir]; - integrand[ir] = std::pow(chi_[ichi_tot][ir]*r_radial_[ir], 2); - } - - // radint = \int_0^{\infty} dr r^2 [chi(r)]^2 - double radint = 0.0; - ModuleBase::Integral::Simpson_Integral(nr_, integrand, rab_, radint); - - for (int ir = 0; ir != nr_; ++ir) { - chi_[ichi_tot][ir] /= std::sqrt(radint); - - } - - ++ichi_tot; - + for (size_t l = 0; l <= lmax; ++l) + { + for (size_t ichi_l = 0; ichi_l < nchi_l[l]; ++ichi_l) + { + + // the next block of the orbital file is like + // Type L N + // 0 0 0 + std::string dummy; + unsigned type = 0, L = 0, N = 0; + ifs >> dummy >> dummy >> dummy; + ifs >> type >> L >> N; + assert (L == l); + assert (N == ichi_l); + // std::cout << "l = " << l << " index_chi_l = " << N << std::endl; + + l_[ichi_tot] = l; + index_chi_l_[ichi_tot] = ichi_l; + + // read & normalize chi + chi_[ichi_tot] = new double[nr_]; + + for (int ir = 0; ir != nr_; ++ir) + { + chi_[ichi_tot][ir] = 0.0; + } + + for (int ir = 0; ir != nr_read; ++ir) + { + ifs >> chi_[ichi_tot][ir]; + integrand[ir] = std::pow (chi_[ichi_tot][ir] * r_radial_[ir], 2); + } + + // radint = \int_0^{\infty} dr r^2 [chi(r)]^2 + double radint = 0.0; + ModuleBase::Integral::Simpson_Integral (nr_, integrand, rab_, radint); + + for (int ir = 0; ir != nr_; ++ir) + { + chi_[ichi_tot][ir] /= std::sqrt (radint); + } + + ++ichi_tot; + } } - } delete[] integrand; } - -void NumericalOrbitalLmTest::TearDown() { +void + NumericalOrbitalLmTest::TearDown () +{ delete[] r_radial_; delete[] rab_; - for (size_t ichi = 0; ichi != chi_.size(); ++ichi) { - delete[] chi_[ichi]; - } + for (size_t ichi = 0; ichi != chi_.size (); ++ichi) + { + delete[] chi_[ichi]; + } } - -void NumericalOrbitalLmTest::init() { +void + NumericalOrbitalLmTest::init () +{ // initialized the tested objects by pouring // the data collected in SetUp() into nolm_ - for (size_t ichi_tot = 0; ichi_tot != chi_.size(); ++ichi_tot) { - nolm_[ichi_tot].set_orbital_info(elem_label_, index_atom_type_, - l_[ichi_tot], index_chi_l_[ichi_tot], nr_, rab_, - r_radial_, psi_type_, chi_[ichi_tot], nk_, dk_, - dr_uniform_, flag_plot_, flag_sbpool_, force_flag_); - } + for (size_t ichi_tot = 0; ichi_tot != chi_.size (); ++ichi_tot) + { + nolm_[ichi_tot].set_orbital_info (elem_label_, + index_atom_type_, + l_[ichi_tot], + index_chi_l_[ichi_tot], + nr_, + rab_, + r_radial_, + psi_type_, + chi_[ichi_tot], + nk_, + dk_, + dr_uniform_, + flag_plot_, + flag_sbpool_, + force_flag_); + } } - -void NumericalOrbitalLmTest::init_with_different_k(double const& ecut, double const& dk) { +void + NumericalOrbitalLmTest::init_with_different_k (double const& ecut, double const& dk) +{ // initialize nolm_ with a k mesh specified by given ecut & dk this->dk_ = dk; - this->nk_ = calc_nk(ecut, dk); - this->init(); - + this->nk_ = calc_nk (ecut, dk); + this->init (); } - -bool NumericalOrbitalLmTest::check_file_match(size_t const& nline, double const* col1, double const* col2, double const& tol, std::string const& fname) { +bool + NumericalOrbitalLmTest::check_file_match (size_t const& nline, + double const* col1, + double const* col2, + double const& tol, + std::string const& fname) +{ /* This function checks whether the content of file named "fname" * contains certain data with certain format. @@ -321,226 +357,234 @@ bool NumericalOrbitalLmTest::check_file_match(size_t const& nline, double const* std::ifstream ifs; std::string tmp1, tmp2, tmp_line; - ifs.open(fname); + ifs.open (fname); - for (size_t i = 0; i != nline; ++i) { - std::stringstream ss; - std::getline(ifs, tmp_line); - ss << tmp_line; - ss >> tmp1 >> tmp2; + for (size_t i = 0; i != nline; ++i) + { + std::stringstream ss; + std::getline (ifs, tmp_line); + ss << tmp_line; + ss >> tmp1 >> tmp2; - if( std::abs( col1[i] - std::stod(tmp1.c_str()) ) > tol || - std::abs( col2[i] - std::stod(tmp2.c_str()) ) > tol || - ss.tellg() != -1 ) { - return false; + if (std::abs (col1[i] - std::stod (tmp1.c_str ())) > tol + || std::abs (col2[i] - std::stod (tmp2.c_str ())) > tol || ss.tellg () != -1) + { + return false; + } } - } - std::getline(ifs, tmp_line); - if ( ifs.tellg() != -1 ) { - return false; - } + std::getline (ifs, tmp_line); + if (ifs.tellg () != -1) + { + return false; + } - ifs.close(); + ifs.close (); return true; } - -TEST_F(NumericalOrbitalLmTest, Init) { +TEST_F (NumericalOrbitalLmTest, Init) +{ // this test checks whether set_orbital_info works as expected // before // a brief check of the default constructor - for (size_t ichi_tot = 0; ichi_tot != chi_.size(); ++ichi_tot) { - EXPECT_EQ(nolm_[ichi_tot].label, ""); - EXPECT_EQ(nolm_[ichi_tot].index_atom_type, 0); - EXPECT_EQ(nolm_[ichi_tot].angular_momentum_l, 0); - EXPECT_EQ(nolm_[ichi_tot].index_chi, 0); - EXPECT_EQ(nolm_[ichi_tot].nr, 1); - EXPECT_EQ(nolm_[ichi_tot].nk, 1); - EXPECT_EQ(nolm_[ichi_tot].nr_uniform, 1); - - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].rcut, 0.0); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].kcut, 0.0); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].dk, 0.0); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].dr_uniform, -1.0); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].zty, 0.0); - - EXPECT_TRUE(nolm_[ichi_tot].r_radial.empty()); - EXPECT_TRUE(nolm_[ichi_tot].k_radial.empty()); - EXPECT_TRUE(nolm_[ichi_tot].psi.empty()); - EXPECT_TRUE(nolm_[ichi_tot].psir.empty()); - EXPECT_TRUE(nolm_[ichi_tot].psif.empty()); - EXPECT_TRUE(nolm_[ichi_tot].psik.empty()); - EXPECT_TRUE(nolm_[ichi_tot].psik2.empty()); - } - - this->init(); - - // after - for (size_t ichi_tot = 0; ichi_tot != chi_.size(); ++ichi_tot) { - EXPECT_EQ(nolm_[ichi_tot].index_atom_type, index_atom_type_); - EXPECT_EQ(nolm_[ichi_tot].nk, nk_); - EXPECT_EQ(nolm_[ichi_tot].nr_uniform, - this->calc_nr_uniform(nolm_[ichi_tot].rcut, dr_uniform_)); - - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].kcut, (nk_-1)*dk_); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].dk, dk_); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].dr_uniform, dr_uniform_); - // TODO zty yet to be understood - //EXPECT_DOUBLE_EQ(nolm_[ichi_tot].zty, 0.0); - - EXPECT_EQ(nolm_[ichi_tot].r_radial.size(), nr_); - EXPECT_EQ(nolm_[ichi_tot].k_radial.size(), nk_); - EXPECT_EQ(nolm_[ichi_tot].psi.size(), nr_); - EXPECT_EQ(nolm_[ichi_tot].psir.size(), nr_); - EXPECT_EQ(nolm_[ichi_tot].psif.size(), nk_); - EXPECT_EQ(nolm_[ichi_tot].psik.size(), nk_); - EXPECT_EQ(nolm_[ichi_tot].psik2.size(), nk_); - - for (int ir = 0; ir != nr_; ++ir) { - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].r_radial[ir], ir*0.01); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].psi[ir], chi_[ichi_tot][ir]); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].psir[ir], ir*0.01*chi_[ichi_tot][ir]); + for (size_t ichi_tot = 0; ichi_tot != chi_.size (); ++ichi_tot) + { + EXPECT_EQ (nolm_[ichi_tot].label, ""); + EXPECT_EQ (nolm_[ichi_tot].index_atom_type, 0); + EXPECT_EQ (nolm_[ichi_tot].angular_momentum_l, 0); + EXPECT_EQ (nolm_[ichi_tot].index_chi, 0); + EXPECT_EQ (nolm_[ichi_tot].nr, 1); + EXPECT_EQ (nolm_[ichi_tot].nk, 1); + EXPECT_EQ (nolm_[ichi_tot].nr_uniform, 1); + + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].rcut, 0.0); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].kcut, 0.0); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].dk, 0.0); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].dr_uniform, -1.0); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].zty, 0.0); + + EXPECT_TRUE (nolm_[ichi_tot].r_radial.empty ()); + EXPECT_TRUE (nolm_[ichi_tot].k_radial.empty ()); + EXPECT_TRUE (nolm_[ichi_tot].psi.empty ()); + EXPECT_TRUE (nolm_[ichi_tot].psir.empty ()); + EXPECT_TRUE (nolm_[ichi_tot].psif.empty ()); + EXPECT_TRUE (nolm_[ichi_tot].psik.empty ()); + EXPECT_TRUE (nolm_[ichi_tot].psik2.empty ()); } - // whether psif makes sense or not is checked in r2k2r_consistency + this->init (); - for (size_t ik = 0; ik != nk_; ++ik) { - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].k_radial[ik], ik*dk_); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].psik[ik], ik*dk_*nolm_[ichi_tot].psif[ik]); - EXPECT_DOUBLE_EQ(nolm_[ichi_tot].psik2[ik], ik*dk_*nolm_[ichi_tot].psik[ik]); + // after + for (size_t ichi_tot = 0; ichi_tot != chi_.size (); ++ichi_tot) + { + EXPECT_EQ (nolm_[ichi_tot].index_atom_type, index_atom_type_); + EXPECT_EQ (nolm_[ichi_tot].nk, nk_); + EXPECT_EQ (nolm_[ichi_tot].nr_uniform, this->calc_nr_uniform (nolm_[ichi_tot].rcut, dr_uniform_)); + + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].kcut, (nk_ - 1) * dk_); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].dk, dk_); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].dr_uniform, dr_uniform_); + // TODO zty yet to be understood + // EXPECT_DOUBLE_EQ(nolm_[ichi_tot].zty, 0.0); + + EXPECT_EQ (nolm_[ichi_tot].r_radial.size (), nr_); + EXPECT_EQ (nolm_[ichi_tot].k_radial.size (), nk_); + EXPECT_EQ (nolm_[ichi_tot].psi.size (), nr_); + EXPECT_EQ (nolm_[ichi_tot].psir.size (), nr_); + EXPECT_EQ (nolm_[ichi_tot].psif.size (), nk_); + EXPECT_EQ (nolm_[ichi_tot].psik.size (), nk_); + EXPECT_EQ (nolm_[ichi_tot].psik2.size (), nk_); + + for (int ir = 0; ir != nr_; ++ir) + { + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].r_radial[ir], ir * 0.01); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].psi[ir], chi_[ichi_tot][ir]); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].psir[ir], ir * 0.01 * chi_[ichi_tot][ir]); + } + + // whether psif makes sense or not is checked in r2k2r_consistency + + for (size_t ik = 0; ik != nk_; ++ik) + { + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].k_radial[ik], ik * dk_); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].psik[ik], ik * dk_ * nolm_[ichi_tot].psif[ik]); + EXPECT_DOUBLE_EQ (nolm_[ichi_tot].psik2[ik], ik * dk_ * nolm_[ichi_tot].psik[ik]); + } } - } // see orb file for details double max_tol = 1e-12; - EXPECT_EQ(nolm_[0].label, "O"); - EXPECT_EQ(nolm_[0].angular_momentum_l, 0); - EXPECT_EQ(nolm_[0].index_chi, 0); - EXPECT_EQ(nolm_[0].nr, 701); - EXPECT_DOUBLE_EQ(nolm_[0].rcut, 7.0); - EXPECT_NEAR(nolm_[0].psi[0], 1.208504975904e+00, max_tol); - EXPECT_NEAR(nolm_[0].psi[1], 1.208605373194e+00, max_tol); - EXPECT_NEAR(nolm_[0].psi[4], 1.210103935461e+00, max_tol); - EXPECT_NEAR(nolm_[0].psi[699], 4.465396560257e-08, max_tol); - EXPECT_NEAR(nolm_[0].psi[700], 0.0, max_tol); - - EXPECT_EQ(nolm_[1].label, "O"); - EXPECT_EQ(nolm_[1].angular_momentum_l, 0); - EXPECT_EQ(nolm_[1].index_chi, 1); - EXPECT_EQ(nolm_[1].nr, 701); - EXPECT_DOUBLE_EQ(nolm_[1].rcut, 7.0); - EXPECT_NEAR(nolm_[1].psi[0], 7.254873428942e-01, max_tol); - EXPECT_NEAR(nolm_[1].psi[1], 7.256666701836e-01, max_tol); - EXPECT_NEAR(nolm_[1].psi[4], 7.283448557011e-01, max_tol); - EXPECT_NEAR(nolm_[1].psi[699], -1.916246212603e-06, max_tol); - EXPECT_NEAR(nolm_[1].psi[700], 0.0, max_tol); - - EXPECT_EQ(nolm_[2].label, "O"); - EXPECT_EQ(nolm_[2].angular_momentum_l, 1); - EXPECT_EQ(nolm_[2].index_chi, 0); - EXPECT_EQ(nolm_[2].nr, 701); - EXPECT_DOUBLE_EQ(nolm_[2].rcut, 7.0); - EXPECT_NEAR(nolm_[2].psi[0], 0.0, max_tol); - EXPECT_NEAR(nolm_[2].psi[1], 4.626669306440e-02, max_tol); - EXPECT_NEAR(nolm_[2].psi[4], 1.845014292772e-01, max_tol); - EXPECT_NEAR(nolm_[2].psi[699], 2.870401658966e-07, max_tol); - EXPECT_NEAR(nolm_[2].psi[700], 0.0, max_tol); - - EXPECT_EQ(nolm_[3].label, "O"); - EXPECT_EQ(nolm_[3].angular_momentum_l, 1); - EXPECT_EQ(nolm_[3].index_chi, 1); - EXPECT_EQ(nolm_[3].nr, 701); - EXPECT_DOUBLE_EQ(nolm_[3].rcut, 7.0); - EXPECT_NEAR(nolm_[3].psi[0], 0.0, max_tol); - EXPECT_NEAR(nolm_[3].psi[1], 3.375340101333e-02, max_tol); - EXPECT_NEAR(nolm_[3].psi[4], 1.346256082234e-01, max_tol); - EXPECT_NEAR(nolm_[3].psi[699], -2.771091616120e-06, max_tol); - EXPECT_NEAR(nolm_[3].psi[700], 0.0, max_tol); - - EXPECT_EQ(nolm_[4].label, "O"); - EXPECT_EQ(nolm_[4].angular_momentum_l, 2); - EXPECT_EQ(nolm_[4].index_chi, 0); - EXPECT_EQ(nolm_[4].nr, 701); - EXPECT_DOUBLE_EQ(nolm_[4].rcut, 7.0); - EXPECT_NEAR(nolm_[4].psi[0], 0.0, max_tol); - EXPECT_NEAR(nolm_[4].psi[1], -3.343626342662e-04, max_tol); - EXPECT_NEAR(nolm_[4].psi[4], -5.337546547975e-03, max_tol); - EXPECT_NEAR(nolm_[4].psi[699], 1.396308876444e-06, max_tol); - EXPECT_NEAR(nolm_[4].psi[700], 0.0, max_tol); + EXPECT_EQ (nolm_[0].label, "O"); + EXPECT_EQ (nolm_[0].angular_momentum_l, 0); + EXPECT_EQ (nolm_[0].index_chi, 0); + EXPECT_EQ (nolm_[0].nr, 701); + EXPECT_DOUBLE_EQ (nolm_[0].rcut, 7.0); + EXPECT_NEAR (nolm_[0].psi[0], 1.208504975904e+00, max_tol); + EXPECT_NEAR (nolm_[0].psi[1], 1.208605373194e+00, max_tol); + EXPECT_NEAR (nolm_[0].psi[4], 1.210103935461e+00, max_tol); + EXPECT_NEAR (nolm_[0].psi[699], 4.465396560257e-08, max_tol); + EXPECT_NEAR (nolm_[0].psi[700], 0.0, max_tol); + + EXPECT_EQ (nolm_[1].label, "O"); + EXPECT_EQ (nolm_[1].angular_momentum_l, 0); + EXPECT_EQ (nolm_[1].index_chi, 1); + EXPECT_EQ (nolm_[1].nr, 701); + EXPECT_DOUBLE_EQ (nolm_[1].rcut, 7.0); + EXPECT_NEAR (nolm_[1].psi[0], 7.254873428942e-01, max_tol); + EXPECT_NEAR (nolm_[1].psi[1], 7.256666701836e-01, max_tol); + EXPECT_NEAR (nolm_[1].psi[4], 7.283448557011e-01, max_tol); + EXPECT_NEAR (nolm_[1].psi[699], -1.916246212603e-06, max_tol); + EXPECT_NEAR (nolm_[1].psi[700], 0.0, max_tol); + + EXPECT_EQ (nolm_[2].label, "O"); + EXPECT_EQ (nolm_[2].angular_momentum_l, 1); + EXPECT_EQ (nolm_[2].index_chi, 0); + EXPECT_EQ (nolm_[2].nr, 701); + EXPECT_DOUBLE_EQ (nolm_[2].rcut, 7.0); + EXPECT_NEAR (nolm_[2].psi[0], 0.0, max_tol); + EXPECT_NEAR (nolm_[2].psi[1], 4.626669306440e-02, max_tol); + EXPECT_NEAR (nolm_[2].psi[4], 1.845014292772e-01, max_tol); + EXPECT_NEAR (nolm_[2].psi[699], 2.870401658966e-07, max_tol); + EXPECT_NEAR (nolm_[2].psi[700], 0.0, max_tol); + + EXPECT_EQ (nolm_[3].label, "O"); + EXPECT_EQ (nolm_[3].angular_momentum_l, 1); + EXPECT_EQ (nolm_[3].index_chi, 1); + EXPECT_EQ (nolm_[3].nr, 701); + EXPECT_DOUBLE_EQ (nolm_[3].rcut, 7.0); + EXPECT_NEAR (nolm_[3].psi[0], 0.0, max_tol); + EXPECT_NEAR (nolm_[3].psi[1], 3.375340101333e-02, max_tol); + EXPECT_NEAR (nolm_[3].psi[4], 1.346256082234e-01, max_tol); + EXPECT_NEAR (nolm_[3].psi[699], -2.771091616120e-06, max_tol); + EXPECT_NEAR (nolm_[3].psi[700], 0.0, max_tol); + + EXPECT_EQ (nolm_[4].label, "O"); + EXPECT_EQ (nolm_[4].angular_momentum_l, 2); + EXPECT_EQ (nolm_[4].index_chi, 0); + EXPECT_EQ (nolm_[4].nr, 701); + EXPECT_DOUBLE_EQ (nolm_[4].rcut, 7.0); + EXPECT_NEAR (nolm_[4].psi[0], 0.0, max_tol); + EXPECT_NEAR (nolm_[4].psi[1], -3.343626342662e-04, max_tol); + EXPECT_NEAR (nolm_[4].psi[4], -5.337546547975e-03, max_tol); + EXPECT_NEAR (nolm_[4].psi[699], 1.396308876444e-06, max_tol); + EXPECT_NEAR (nolm_[4].psi[700], 0.0, max_tol); } - -TEST_F(NumericalOrbitalLmTest, Getters) { +TEST_F (NumericalOrbitalLmTest, Getters) +{ // this test checks whether all the getters work as expected // whether the values make sense or not is tested in "initialize" - this->init(); - - for (size_t i = 0; i != chi_.size(); ++i) { - EXPECT_EQ(nolm_[i].getLabel(), nolm_[i].label); - EXPECT_EQ(nolm_[i].getType(), nolm_[i].index_atom_type); - EXPECT_EQ(nolm_[i].getL(), nolm_[i].angular_momentum_l); - EXPECT_EQ(nolm_[i].getChi(), nolm_[i].index_chi); - - EXPECT_DOUBLE_EQ(nolm_[i].getDk(), nolm_[i].dk); - EXPECT_DOUBLE_EQ(nolm_[i].getDruniform(), dr_uniform_); - EXPECT_EQ(nolm_[i].getPsiuniform(), &nolm_[i].psi_uniform[0]); - EXPECT_EQ(nolm_[i].getDpsiuniform(), &nolm_[i].dpsi_uniform[0]); - EXPECT_EQ(nolm_[i].getNruniform(), nolm_[i].nr_uniform); - EXPECT_EQ(nolm_[i].getDruniform(), nolm_[i].dr_uniform); - - EXPECT_EQ(nolm_[i].getNr(), nolm_[i].nr); - EXPECT_EQ(nolm_[i].getNk(), nolm_[i].nk); - - EXPECT_EQ(nolm_[i].getRcut(), nolm_[i].rcut); - EXPECT_EQ(nolm_[i].getKcut(), nolm_[i].kcut); - - EXPECT_EQ(nolm_[i].getRadial(), &nolm_[i].r_radial[0]); - EXPECT_EQ(nolm_[i].get_r_radial(), nolm_[i].r_radial); - - EXPECT_EQ(nolm_[i].getRab(), &nolm_[i].rab[0]); - EXPECT_EQ(nolm_[i].get_rab(), nolm_[i].rab); - - EXPECT_EQ(nolm_[i].getDk(), nolm_[i].dk); - EXPECT_EQ(nolm_[i].getKpoint(), &nolm_[i].k_radial[0]); - EXPECT_EQ(nolm_[i].get_k_radial(), nolm_[i].k_radial); - - EXPECT_EQ(nolm_[i].getPsi(), &nolm_[i].psi[0]); - EXPECT_EQ(nolm_[i].getPsi_r(), &nolm_[i].psir[0]); - EXPECT_EQ(nolm_[i].getPsif(), &nolm_[i].psif[0]); - EXPECT_EQ(nolm_[i].getPsi_k(), &nolm_[i].psik[0]); - EXPECT_EQ(nolm_[i].getPsi_k2(), &nolm_[i].psik2[0]); - - EXPECT_EQ(nolm_[i].get_psi(), nolm_[i].psi); - EXPECT_EQ(nolm_[i].get_psif(), nolm_[i].psif); - EXPECT_EQ(nolm_[i].get_psi_k(), nolm_[i].psik); - EXPECT_EQ(nolm_[i].get_psi_k2(), nolm_[i].psik2); - - for (size_t ir = 0; ir != nolm_[i].r_radial.size(); ++ir) { - EXPECT_EQ(nolm_[i].getRadial(ir), nolm_[i].r_radial[ir]); - EXPECT_EQ(nolm_[i].getRab(ir), nolm_[i].rab[ir]); - EXPECT_EQ(nolm_[i].getPsi(ir), nolm_[i].psi[ir]); - EXPECT_EQ(nolm_[i].getPsi_r(ir), nolm_[i].psir[ir]); - } - - for (size_t ik = 0; ik != nolm_[i].k_radial.size(); ++ik) { - EXPECT_EQ(nolm_[i].getKpoint(ik), nolm_[i].k_radial[ik]); - EXPECT_EQ(nolm_[i].getPsif(ik), nolm_[i].psif[ik]); - EXPECT_EQ(nolm_[i].getPsi_k(ik), nolm_[i].psik[ik]); - EXPECT_EQ(nolm_[i].getPsi_k2(ik), nolm_[i].psik2[ik]); + this->init (); + + for (size_t i = 0; i != chi_.size (); ++i) + { + EXPECT_EQ (nolm_[i].getLabel (), nolm_[i].label); + EXPECT_EQ (nolm_[i].getType (), nolm_[i].index_atom_type); + EXPECT_EQ (nolm_[i].getL (), nolm_[i].angular_momentum_l); + EXPECT_EQ (nolm_[i].getChi (), nolm_[i].index_chi); + + EXPECT_DOUBLE_EQ (nolm_[i].getDk (), nolm_[i].dk); + EXPECT_DOUBLE_EQ (nolm_[i].getDruniform (), dr_uniform_); + EXPECT_EQ (nolm_[i].getPsiuniform (), &nolm_[i].psi_uniform[0]); + EXPECT_EQ (nolm_[i].getDpsiuniform (), &nolm_[i].dpsi_uniform[0]); + EXPECT_EQ (nolm_[i].getNruniform (), nolm_[i].nr_uniform); + EXPECT_EQ (nolm_[i].getDruniform (), nolm_[i].dr_uniform); + + EXPECT_EQ (nolm_[i].getNr (), nolm_[i].nr); + EXPECT_EQ (nolm_[i].getNk (), nolm_[i].nk); + + EXPECT_EQ (nolm_[i].getRcut (), nolm_[i].rcut); + EXPECT_EQ (nolm_[i].getKcut (), nolm_[i].kcut); + + EXPECT_EQ (nolm_[i].getRadial (), &nolm_[i].r_radial[0]); + EXPECT_EQ (nolm_[i].get_r_radial (), nolm_[i].r_radial); + + EXPECT_EQ (nolm_[i].getRab (), &nolm_[i].rab[0]); + EXPECT_EQ (nolm_[i].get_rab (), nolm_[i].rab); + + EXPECT_EQ (nolm_[i].getDk (), nolm_[i].dk); + EXPECT_EQ (nolm_[i].getKpoint (), &nolm_[i].k_radial[0]); + EXPECT_EQ (nolm_[i].get_k_radial (), nolm_[i].k_radial); + + EXPECT_EQ (nolm_[i].getPsi (), &nolm_[i].psi[0]); + EXPECT_EQ (nolm_[i].getPsi_r (), &nolm_[i].psir[0]); + EXPECT_EQ (nolm_[i].getPsif (), &nolm_[i].psif[0]); + EXPECT_EQ (nolm_[i].getPsi_k (), &nolm_[i].psik[0]); + EXPECT_EQ (nolm_[i].getPsi_k2 (), &nolm_[i].psik2[0]); + + EXPECT_EQ (nolm_[i].get_psi (), nolm_[i].psi); + EXPECT_EQ (nolm_[i].get_psif (), nolm_[i].psif); + EXPECT_EQ (nolm_[i].get_psi_k (), nolm_[i].psik); + EXPECT_EQ (nolm_[i].get_psi_k2 (), nolm_[i].psik2); + + for (size_t ir = 0; ir != nolm_[i].r_radial.size (); ++ir) + { + EXPECT_EQ (nolm_[i].getRadial (ir), nolm_[i].r_radial[ir]); + EXPECT_EQ (nolm_[i].getRab (ir), nolm_[i].rab[ir]); + EXPECT_EQ (nolm_[i].getPsi (ir), nolm_[i].psi[ir]); + EXPECT_EQ (nolm_[i].getPsi_r (ir), nolm_[i].psir[ir]); + } + + for (size_t ik = 0; ik != nolm_[i].k_radial.size (); ++ik) + { + EXPECT_EQ (nolm_[i].getKpoint (ik), nolm_[i].k_radial[ik]); + EXPECT_EQ (nolm_[i].getPsif (ik), nolm_[i].psif[ik]); + EXPECT_EQ (nolm_[i].getPsi_k (ik), nolm_[i].psik[ik]); + EXPECT_EQ (nolm_[i].getPsi_k2 (ik), nolm_[i].psik2[ik]); + } } - } } - -TEST_F(NumericalOrbitalLmTest, PsiNormalization) { +TEST_F (NumericalOrbitalLmTest, PsiNormalization) +{ // This test checks the normalization of // 1. the radial function in the real space @@ -548,107 +592,118 @@ TEST_F(NumericalOrbitalLmTest, PsiNormalization) { // 3. the interpolated radial function (psi_uniform) in the real space // default ecutwfc might be inadequate, use a larger one instead - this->init_with_different_k(1600.0, dk_); + this->init_with_different_k (1600.0, dk_); double radint = 0.0; double* rintegrand = new double[nr_]; double* kintegrand = new double[nk_]; - for (size_t i = 0; i != chi_.size(); ++i) { + for (size_t i = 0; i != chi_.size (); ++i) + { - // normalization check of chi(r) - for (int ir = 0; ir != nr_; ++ir) { - rintegrand[ir] = std::pow(nolm_[i].getPsi_r(ir), 2); - } - - ModuleBase::Integral::Simpson_Integral(nr_, rintegrand, rab_, radint); - EXPECT_NEAR(radint, 1.0, 1e-10); + // normalization check of chi(r) + for (int ir = 0; ir != nr_; ++ir) + { + rintegrand[ir] = std::pow (nolm_[i].getPsi_r (ir), 2); + } - // normalization check of chi(k) - for (size_t ik = 0; ik != nk_; ++ik) { - kintegrand[ik] = std::pow(nolm_[i].getPsi_k(ik), 2); - } + ModuleBase::Integral::Simpson_Integral (nr_, rintegrand, rab_, radint); + EXPECT_NEAR (radint, 1.0, 1e-10); - ModuleBase::Integral::Simpson_Integral(nk_, kintegrand, dk_, radint); - EXPECT_NEAR(radint, 1.0, 1e-6); // what is a reasonable error? + // normalization check of chi(k) + for (size_t ik = 0; ik != nk_; ++ik) + { + kintegrand[ik] = std::pow (nolm_[i].getPsi_k (ik), 2); + } - } + ModuleBase::Integral::Simpson_Integral (nk_, kintegrand, dk_, radint); + EXPECT_NEAR (radint, 1.0, 1e-6); // what is a reasonable error? + } delete[] rintegrand; delete[] kintegrand; - // check the normalization of psi_uniform - for (size_t i = 0; i != chi_.size(); ++i) { + for (size_t i = 0; i != chi_.size (); ++i) + { - int nr = nolm_[i].nr_uniform; + int nr = nolm_[i].nr_uniform; - // note that Simpson_Integral only accepts an odd number of mesh points - if (nr%2 == 0) { - ++nr; - } + // note that Simpson_Integral only accepts an odd number of mesh points + if (nr % 2 == 0) + { + ++nr; + } - rintegrand = new double[nr]; - for (int ir = 0; ir != nr; ++ir) { - rintegrand[ir] = 0.0; - } + rintegrand = new double[nr]; + for (int ir = 0; ir != nr; ++ir) + { + rintegrand[ir] = 0.0; + } - // normalization check of psi_uniform - for (int ir = 0; ir != nolm_[i].nr_uniform; ++ir) { - rintegrand[ir] = std::pow(ir*dr_uniform_*nolm_[i].psi_uniform[ir], 2); - } + // normalization check of psi_uniform + for (int ir = 0; ir != nolm_[i].nr_uniform; ++ir) + { + rintegrand[ir] = std::pow (ir * dr_uniform_ * nolm_[i].psi_uniform[ir], 2); + } - ModuleBase::Integral::Simpson_Integral(nr, rintegrand, dr_uniform_, radint); - EXPECT_NEAR(radint, 1.0, 1e-6); + ModuleBase::Integral::Simpson_Integral (nr, rintegrand, dr_uniform_, radint); + EXPECT_NEAR (radint, 1.0, 1e-6); - delete[] rintegrand; - } + delete[] rintegrand; + } } - -TEST_F(NumericalOrbitalLmTest, K2RConsistency) { +TEST_F (NumericalOrbitalLmTest, K2RConsistency) +{ // This test checks whether the results of // cal_kradial & cal_kradial_sbpool agree. - this->init(); + this->init (); double* chik_ = new double[nk_]; double* kchik_ = new double[nk_]; double* k2chik_ = new double[nk_]; - for (size_t i = 0; i != chi_.size(); ++i) { - // save previous chi(k) results obtained by init() - for (size_t ik = 0; ik != nk_; ++ik) { - chik_[ik] = nolm_[i].getPsif(ik); - kchik_[ik] = nolm_[i].getPsi_k(ik); - k2chik_[ik] = nolm_[i].getPsi_k2(ik); - } - - // use a different method than which used in init() - if (flag_sbpool_) { - nolm_[i].cal_kradial(); - } else { - nolm_[i].cal_kradial_sbpool(); + for (size_t i = 0; i != chi_.size (); ++i) + { + // save previous chi(k) results obtained by init() + for (size_t ik = 0; ik != nk_; ++ik) + { + chik_[ik] = nolm_[i].getPsif (ik); + kchik_[ik] = nolm_[i].getPsi_k (ik); + k2chik_[ik] = nolm_[i].getPsi_k2 (ik); + } + + // use a different method than which used in init() + if (flag_sbpool_) + { + nolm_[i].cal_kradial (); + } + else + { + nolm_[i].cal_kradial_sbpool (); + } + + double max_tol = 1e-6; + for (size_t ik = 0; ik != nk_; ++ik) + { + EXPECT_NEAR (chik_[ik], nolm_[i].getPsif (ik), max_tol); + EXPECT_NEAR (kchik_[ik], nolm_[i].getPsi_k (ik), max_tol); + EXPECT_NEAR (k2chik_[ik], nolm_[i].getPsi_k2 (ik), max_tol); + } } - - double max_tol = 1e-6; - for (size_t ik = 0; ik != nk_; ++ik) { - EXPECT_NEAR(chik_[ik], nolm_[i].getPsif(ik), max_tol); - EXPECT_NEAR(kchik_[ik], nolm_[i].getPsi_k(ik), max_tol); - EXPECT_NEAR(k2chik_[ik], nolm_[i].getPsi_k2(ik), max_tol); - } - } } - -TEST_F(NumericalOrbitalLmTest, R2K2RConsistency) { +TEST_F (NumericalOrbitalLmTest, R2K2RConsistency) +{ // This test checks whether cal_rradial_sbpool brings chi(k) // back to the original chi(r) by looking at the error // \int dr r^2 (chi_in(r)-chi_out(r))^2 - this->init_with_different_k(1600.0, dk_); + this->init_with_different_k (1600.0, dk_); // original r*psi(r) double* rchi_ = new double[nr_]; @@ -658,47 +713,51 @@ TEST_F(NumericalOrbitalLmTest, R2K2RConsistency) { // maximum tolerance of err double max_tol = 1e-6; - for (size_t i = 0; i != chi_.size(); ++i) { - for (int ir = 0; ir != nr_; ++ir) { - rchi_[ir] = nolm_[i].getPsi_r(ir); - } + for (size_t i = 0; i != chi_.size (); ++i) + { + for (int ir = 0; ir != nr_; ++ir) + { + rchi_[ir] = nolm_[i].getPsi_r (ir); + } - nolm_[i].cal_rradial_sbpool(); + nolm_[i].cal_rradial_sbpool (); - for (int ir = 0; ir != nr_; ++ir) { - err_integrand[ir] = std::pow(rchi_[ir]-nolm_[i].getPsi_r(ir), 2); - } + for (int ir = 0; ir != nr_; ++ir) + { + err_integrand[ir] = std::pow (rchi_[ir] - nolm_[i].getPsi_r (ir), 2); + } - ModuleBase::Integral::Simpson_Integral(nr_, err_integrand, rab_, err); - EXPECT_LT(err, max_tol); - } + ModuleBase::Integral::Simpson_Integral (nr_, err_integrand, rab_, err); + EXPECT_LT (err, max_tol); + } } - -TEST_F(NumericalOrbitalLmTest, FiniteDiffPsiUniform) { +TEST_F (NumericalOrbitalLmTest, FiniteDiffPsiUniform) +{ // this test checks whether dpsi_uniform agrees with the // finite difference of psi_uniform - this->init(); + this->init (); double max_tol = 1e-3; - for (size_t i = 0; i != nolm_.size(); ++i) { - std::vector& f = nolm_[i].psi_uniform; - for (int ir = 4; ir != nolm_[i].nr_uniform-4; ++ir) { - double fd = - ( +1.0/280*f[ir-4] - 4.0/105*f[ir-3] + 1.0/5*f[ir-2] - 4.0/5*f[ir-1] - -1.0/280*f[ir+4] + 4.0/105*f[ir+3] - 1.0/5*f[ir+2] + 4.0/5*f[ir+1] - ) / nolm_[i].dr_uniform; - EXPECT_NEAR(fd, nolm_[i].dpsi_uniform[ir], max_tol); + for (size_t i = 0; i != nolm_.size (); ++i) + { + std::vector& f = nolm_[i].psi_uniform; + for (int ir = 4; ir != nolm_[i].nr_uniform - 4; ++ir) + { + double fd + = (+1.0 / 280 * f[ir - 4] - 4.0 / 105 * f[ir - 3] + 1.0 / 5 * f[ir - 2] - 4.0 / 5 * f[ir - 1] + - 1.0 / 280 * f[ir + 4] + 4.0 / 105 * f[ir + 3] - 1.0 / 5 * f[ir + 2] + 4.0 / 5 * f[ir + 1]) + / nolm_[i].dr_uniform; + EXPECT_NEAR (fd, nolm_[i].dpsi_uniform[ir], max_tol); + } } - } - } - -TEST_F(NumericalOrbitalLmTest, PsiSave) { +TEST_F (NumericalOrbitalLmTest, PsiSave) +{ // This test checks whether plot() works as expected. // @@ -709,7 +768,7 @@ TEST_F(NumericalOrbitalLmTest, PsiSave) { // so files cannot be opened and plot() should fail at this stage. // but even if file cannot be opened, plot should not throw! flag_plot_ = true; - ASSERT_NO_THROW(this->init()); + ASSERT_NO_THROW (this->init ()); std::vector orb{"s", "s", "p", "p", "d"}; std::ifstream ifs; @@ -722,109 +781,143 @@ TEST_F(NumericalOrbitalLmTest, PsiSave) { std::string dir = "./O/"; // we now creat the directory to hold data files - mkdir(dir.c_str(), 0777); - - for (size_t i = 0; i != nolm_.size(); ++i) { - - // this call should successfully write data to files - ASSERT_NO_THROW(nolm_[i].plot()); - - auto get_fname = [&] (std::string const& suffix) -> std::string { - return dir+"/O-" + orb[i] + std::to_string(nolm_[i].index_chi+1) - + "-orbital-" + suffix + ".dat"; - }; - - std::string psi_fname = get_fname("r"); - std::string psik_fname = get_fname("k"); - std::string psiru_fname = get_fname("ru"); - std::string psidru_fname = get_fname("dru"); - - EXPECT_TRUE(this->check_file_match(nolm_[i].nr, - nolm_[i].getRadial(), nolm_[i].getPsi(), tol, psi_fname)); - EXPECT_TRUE(this->check_file_match(nolm_[i].nk, - nolm_[i].getKpoint(), nolm_[i].getPsi_k(), tol, psik_fname)); - - double* ru_mesh = new double[nolm_[i].nr_uniform]; - for (int ir = 0; ir != nolm_[i].nr_uniform; ++ir) { - ru_mesh[ir] = ir*nolm_[i].dr_uniform; + mkdir (dir.c_str (), 0777); + + for (size_t i = 0; i != nolm_.size (); ++i) + { + + // this call should successfully write data to files + ASSERT_NO_THROW (nolm_[i].plot ()); + + auto get_fname = [&] (std::string const& suffix) -> std::string + { + return dir + "/O-" + orb[i] + std::to_string (nolm_[i].index_chi + 1) + "-orbital-" + suffix + + ".dat"; + }; + + std::string psi_fname = get_fname ("r"); + std::string psik_fname = get_fname ("k"); + std::string psiru_fname = get_fname ("ru"); + std::string psidru_fname = get_fname ("dru"); + + EXPECT_TRUE ( + this->check_file_match (nolm_[i].nr, nolm_[i].getRadial (), nolm_[i].getPsi (), tol, psi_fname)); + EXPECT_TRUE ( + this->check_file_match (nolm_[i].nk, nolm_[i].getKpoint (), nolm_[i].getPsi_k (), tol, psik_fname)); + + double* ru_mesh = new double[nolm_[i].nr_uniform]; + for (int ir = 0; ir != nolm_[i].nr_uniform; ++ir) + { + ru_mesh[ir] = ir * nolm_[i].dr_uniform; + } + + EXPECT_TRUE ( + this->check_file_match (nolm_[i].nr_uniform, ru_mesh, nolm_[i].getPsiuniform (), tol, psiru_fname)); + EXPECT_TRUE ( + this->check_file_match (nolm_[i].nr_uniform, ru_mesh, nolm_[i].getDpsiuniform (), tol, psidru_fname)); + + remove (psi_fname.c_str ()); + remove (psik_fname.c_str ()); + remove (psiru_fname.c_str ()); + remove (psidru_fname.c_str ()); } - EXPECT_TRUE(this->check_file_match(nolm_[i].nr_uniform, - ru_mesh, nolm_[i].getPsiuniform(), tol, psiru_fname)); - EXPECT_TRUE(this->check_file_match(nolm_[i].nr_uniform, - ru_mesh, nolm_[i].getDpsiuniform(), tol, psidru_fname)); - - remove(psi_fname.c_str()); - remove(psik_fname.c_str()); - remove(psiru_fname.c_str()); - remove(psidru_fname.c_str()); - } - - remove(dir.c_str()); - + remove (dir.c_str ()); } - -TEST_F(NumericalOrbitalLmTest, VariousPsiType) { +TEST_F (NumericalOrbitalLmTest, VariousPsiType) +{ // this test checks the behavior of set_orbital_info // under various input Psi_Type - this->init_with_different_k(1600.0, dk_); + this->init_with_different_k (1600.0, dk_); // this tolerance is used for element-wise comparison // rather than an integration (as in r2k2r_consistency) double max_tol = 1e-3; - for (size_t i = 0; i != nolm_.size(); ++i) { - std::vector psi_ref, psif_ref, psik_ref, psik2_ref; - - psi_ref = nolm_[i].psi; - psif_ref = nolm_[i].psif; - psik_ref = nolm_[i].psik; - psik2_ref = nolm_[i].psik2; - - - // alternative Psi_Type input - - // Psi_Type == Psif - nolm_[i].set_orbital_info(elem_label_, index_atom_type_, - l_[i], index_chi_l_[i], nr_, rab_, - r_radial_, Numerical_Orbital_Lm::Psi_Type::Psif, - &psif_ref[0], nk_, dk_, - dr_uniform_, flag_plot_, flag_sbpool_, force_flag_); - - for (int ir = 0; ir != nolm_[i].nr; ++ir) { - EXPECT_NEAR(nolm_[i].psi[ir], psi_ref[ir], max_tol); + for (size_t i = 0; i != nolm_.size (); ++i) + { + std::vector psi_ref, psif_ref, psik_ref, psik2_ref; + + psi_ref = nolm_[i].psi; + psif_ref = nolm_[i].psif; + psik_ref = nolm_[i].psik; + psik2_ref = nolm_[i].psik2; + + // alternative Psi_Type input + + // Psi_Type == Psif + nolm_[i].set_orbital_info (elem_label_, + index_atom_type_, + l_[i], + index_chi_l_[i], + nr_, + rab_, + r_radial_, + Numerical_Orbital_Lm::Psi_Type::Psif, + &psif_ref[0], + nk_, + dk_, + dr_uniform_, + flag_plot_, + flag_sbpool_, + force_flag_); + + for (int ir = 0; ir != nolm_[i].nr; ++ir) + { + EXPECT_NEAR (nolm_[i].psi[ir], psi_ref[ir], max_tol); + } + + // Psi_Type == Psik + nolm_[i].set_orbital_info (elem_label_, + index_atom_type_, + l_[i], + index_chi_l_[i], + nr_, + rab_, + r_radial_, + Numerical_Orbital_Lm::Psi_Type::Psik, + &psik_ref[0], + nk_, + dk_, + dr_uniform_, + flag_plot_, + flag_sbpool_, + force_flag_); + + for (int ir = 0; ir != nolm_[i].nr; ++ir) + { + EXPECT_NEAR (nolm_[i].psi[ir], psi_ref[ir], max_tol); + } + + // Psi_Type == Psik2 + nolm_[i].set_orbital_info (elem_label_, + index_atom_type_, + l_[i], + index_chi_l_[i], + nr_, + rab_, + r_radial_, + Numerical_Orbital_Lm::Psi_Type::Psik2, + &psik2_ref[0], + nk_, + dk_, + dr_uniform_, + flag_plot_, + flag_sbpool_, + force_flag_); + + for (int ir = 0; ir != nolm_[i].nr; ++ir) + { + EXPECT_NEAR (nolm_[i].psi[ir], psi_ref[ir], max_tol); + } } - - // Psi_Type == Psik - nolm_[i].set_orbital_info(elem_label_, index_atom_type_, - l_[i], index_chi_l_[i], nr_, rab_, - r_radial_, Numerical_Orbital_Lm::Psi_Type::Psik, - &psik_ref[0], nk_, dk_, - dr_uniform_, flag_plot_, flag_sbpool_, force_flag_); - - for (int ir = 0; ir != nolm_[i].nr; ++ir) { - EXPECT_NEAR(nolm_[i].psi[ir], psi_ref[ir], max_tol); - } - - // Psi_Type == Psik2 - nolm_[i].set_orbital_info(elem_label_, index_atom_type_, - l_[i], index_chi_l_[i], nr_, rab_, - r_radial_, Numerical_Orbital_Lm::Psi_Type::Psik2, - &psik2_ref[0], nk_, dk_, - dr_uniform_, flag_plot_, flag_sbpool_, force_flag_); - - for (int ir = 0; ir != nolm_[i].nr; ++ir) { - EXPECT_NEAR(nolm_[i].psi[ir], psi_ref[ir], max_tol); - } - - } } - -TEST_F(NumericalOrbitalLmTest, TurnOffSphBesPool) { +TEST_F (NumericalOrbitalLmTest, TurnOffSphBesPool) +{ // checks the behavior of set_orbital_info when sbpool is turned off // @@ -833,57 +926,99 @@ TEST_F(NumericalOrbitalLmTest, TurnOffSphBesPool) { // if Psi_Type is any Fourier space type, set_orbital_info should throw flag_sbpool_ = false; - EXPECT_NO_THROW(this->init()); - - for (size_t i = 0; i != nolm_.size(); ++i) { - std::vector psi_ref, psif_ref, psik_ref, psik2_ref; - - psi_ref = nolm_[i].psi; - psif_ref = nolm_[i].psif; - psik_ref = nolm_[i].psik; - psik2_ref = nolm_[i].psik2; - - EXPECT_NO_THROW(nolm_[i].set_orbital_info(elem_label_, index_atom_type_, - l_[i], index_chi_l_[i], nr_, rab_, - r_radial_, Numerical_Orbital_Lm::Psi_Type::Psi, - &psi_ref[0], nk_, dk_, - dr_uniform_, flag_plot_, false, force_flag_)); - EXPECT_THROW(nolm_[i].set_orbital_info(elem_label_, index_atom_type_, - l_[i], index_chi_l_[i], nr_, rab_, - r_radial_, Numerical_Orbital_Lm::Psi_Type::Psif, - &psif_ref[0], nk_, dk_, - dr_uniform_, flag_plot_, false, force_flag_), std::domain_error); - EXPECT_THROW(nolm_[i].set_orbital_info(elem_label_, index_atom_type_, - l_[i], index_chi_l_[i], nr_, rab_, - r_radial_, Numerical_Orbital_Lm::Psi_Type::Psik, - &psik_ref[0], nk_, dk_, - dr_uniform_, flag_plot_, false, force_flag_), std::domain_error); - EXPECT_THROW(nolm_[i].set_orbital_info(elem_label_, index_atom_type_, - l_[i], index_chi_l_[i], nr_, rab_, - r_radial_, Numerical_Orbital_Lm::Psi_Type::Psik2, - &psik2_ref[0], nk_, dk_, - dr_uniform_, flag_plot_, false, force_flag_), std::domain_error); - } + EXPECT_NO_THROW (this->init ()); + + for (size_t i = 0; i != nolm_.size (); ++i) + { + std::vector psi_ref, psif_ref, psik_ref, psik2_ref; + + psi_ref = nolm_[i].psi; + psif_ref = nolm_[i].psif; + psik_ref = nolm_[i].psik; + psik2_ref = nolm_[i].psik2; + + EXPECT_NO_THROW (nolm_[i].set_orbital_info (elem_label_, + index_atom_type_, + l_[i], + index_chi_l_[i], + nr_, + rab_, + r_radial_, + Numerical_Orbital_Lm::Psi_Type::Psi, + &psi_ref[0], + nk_, + dk_, + dr_uniform_, + flag_plot_, + false, + force_flag_)); + EXPECT_THROW (nolm_[i].set_orbital_info (elem_label_, + index_atom_type_, + l_[i], + index_chi_l_[i], + nr_, + rab_, + r_radial_, + Numerical_Orbital_Lm::Psi_Type::Psif, + &psif_ref[0], + nk_, + dk_, + dr_uniform_, + flag_plot_, + false, + force_flag_), + std::domain_error); + EXPECT_THROW (nolm_[i].set_orbital_info (elem_label_, + index_atom_type_, + l_[i], + index_chi_l_[i], + nr_, + rab_, + r_radial_, + Numerical_Orbital_Lm::Psi_Type::Psik, + &psik_ref[0], + nk_, + dk_, + dr_uniform_, + flag_plot_, + false, + force_flag_), + std::domain_error); + EXPECT_THROW (nolm_[i].set_orbital_info (elem_label_, + index_atom_type_, + l_[i], + index_chi_l_[i], + nr_, + rab_, + r_radial_, + Numerical_Orbital_Lm::Psi_Type::Psik2, + &psik2_ref[0], + nk_, + dk_, + dr_uniform_, + flag_plot_, + false, + force_flag_), + std::domain_error); + } } - -int main(int argc, char **argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } - - diff --git a/source/source_basis/module_ao/test/ORB_nonlocal_lm_test.cpp b/source/source_basis/module_ao/test/ORB_nonlocal_lm_test.cpp index 7b91df935e8..64a78ce184d 100644 --- a/source/source_basis/module_ao/test/ORB_nonlocal_lm_test.cpp +++ b/source/source_basis/module_ao/test/ORB_nonlocal_lm_test.cpp @@ -12,12 +12,10 @@ #include "source_basis/module_ao/ORB_nonlocal_lm.h" #undef private - #ifdef __MPI #include #endif - /*********************************************************** * unit test of class "Numerical_Nonlocal_Lm" ***********************************************************/ @@ -32,8 +30,8 @@ * applies a radial Fourier transform to beta_r to obtain beta_k * * - freemem - * deallocates the allocated memory of r_radial, rab, beta_r, - * beta_uniform, dbeta_uniform, k_radial & beta_k, and set them + * deallocates the allocated memory of r_radial, rab, beta_r, + * beta_uniform, dbeta_uniform, k_radial & beta_k, and set them * to nullptr * * - renew @@ -52,22 +50,25 @@ class NumericalNonlocalLmTest : public ::testing::Test { -protected: - - void SetUp(); - void TearDown(); + protected: + void SetUp (); + void TearDown (); // a vector of objects under unit test // corresponds to all the nonlocal projectors in the above upf file std::vector nnl; // helper functions - void init(); - std::string trim(std::string const&); - double err_r2k2r(Numerical_Nonlocal_Lm&); - size_t calc_nk(double const& ecutwfc, double const& dk); - void change_k(Numerical_Nonlocal_Lm&, double const& ecut, double const& dk); - bool check_file_match(size_t const& nline, double const* col1, double const* col2, double const& tol, std::string const& fname); + void init (); + std::string trim (std::string const&); + double err_r2k2r (Numerical_Nonlocal_Lm&); + size_t calc_nk (double const& ecutwfc, double const& dk); + void change_k (Numerical_Nonlocal_Lm&, double const& ecut, double const& dk); + bool check_file_match (size_t const& nline, + double const* col1, + double const* col2, + double const& tol, + std::string const& fname); // number of beta projectors size_t nproj_; @@ -85,8 +86,9 @@ class NumericalNonlocalLmTest : public ::testing::Test double dr_uniform_; }; - -void NumericalNonlocalLmTest::SetUp() { +void + NumericalNonlocalLmTest::SetUp () +{ /////////////////////////////////////////////////// // Parameters @@ -100,14 +102,14 @@ void NumericalNonlocalLmTest::SetUp() { // where a much larger value is used. double ecutwfc = 100.0; - // In normal ABACUS calculation, dk, nk & dr_uniform - // are retrieved from the LCAO_Orbitals object, + // In normal ABACUS calculation, dk, nk & dr_uniform + // are retrieved from the LCAO_Orbitals object, // see setupNonlocal() in source_cell/setup_nonlocal.cpp. // Here we just provide some reasonable values. dk_ = 0.01; dr_uniform_ = 0.001; - nk_ = calc_nk(ecutwfc, dk_); + nk_ = calc_nk (ecutwfc, dk_); // not really meaningful in this unit test index_atom_type_ = 42; @@ -119,15 +121,15 @@ void NumericalNonlocalLmTest::SetUp() { // variables below will be read from upf_file size_t nmesh_upf = 0; - std::ifstream ifs(upf_file); + std::ifstream ifs (upf_file); // see read_pseudo_upf201 in source_cell/read_pp_upf201.cpp // the following code only works for UPF files of version 2.0.1 //----------- read header ------------ - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "" marking the end of PP_HEADER @@ -135,227 +137,273 @@ void NumericalNonlocalLmTest::SetUp() { std::string label; std::string val; - while(std::getline(ifs, linebuf)) { - - std::string::size_type pos = linebuf.find('='); - if (pos != std::string::npos) { - // extract the label - // skip leading whitespaces - auto label_start = linebuf.find_first_not_of(" \t"); - label = linebuf.substr(label_start, pos-label_start); - } else if (linebuf.find("/>") != std::string::npos) { - // reach the end of PP_HEADER - break; - } else { - // skip lines without '=' or "/>" - continue; - } - - val = trim(linebuf); - - // only the entries below are relevant to the current unit test - if (label == "element") { - elem_label_ = val; - } else if (label == "number_of_proj") { - nproj_ = std::atoi(val.c_str()); - } else if (label == "mesh_size") { - nmesh_upf = std::atoi(val.c_str()); + while (std::getline (ifs, linebuf)) + { + + std::string::size_type pos = linebuf.find ('='); + if (pos != std::string::npos) + { + // extract the label + // skip leading whitespaces + auto label_start = linebuf.find_first_not_of (" \t"); + label = linebuf.substr (label_start, pos - label_start); + } + else if (linebuf.find ("/>") != std::string::npos) + { + // reach the end of PP_HEADER + break; + } + else + { + // skip lines without '=' or "/>" + continue; + } + + val = trim (linebuf); + + // only the entries below are relevant to the current unit test + if (label == "element") + { + elem_label_ = val; + } + else if (label == "number_of_proj") + { + nproj_ = std::atoi (val.c_str ()); + } + else if (label == "mesh_size") + { + nmesh_upf = std::atoi (val.c_str ()); + } + + if (linebuf.find ("/>") != std::string::npos) + { + // reach the end of PP_HEADER + break; + } } - if (linebuf.find("/>") != std::string::npos) { - // reach the end of PP_HEADER - break; - } - } - //----------- read mesh ------------ - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "> r_radial_[ir]; - } + for (size_t ir = 0; ir != nmesh_upf; ++ir) + { + ifs >> r_radial_[ir]; + } - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "> rab_[ir]; - } - - //----------- read nonlocal projectors & initialize objects ------------ - - l_.resize(nproj_); - nr_.resize(nproj_); - beta_r_.resize(nproj_); - nnl.resize(nproj_); - - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - for (size_t iproj = 0; iproj != nproj_; ++iproj) { - - beta_r_[iproj] = new double[nmesh_upf]; - - // process beta headers - while(std::getline(ifs, linebuf)) { - if (linebuf.find('>') != std::string::npos) { - // end of PP_BETA header, beta mesh starts - break; - } - - if (linebuf.find("angular_momentum") != std::string::npos) { - l_[iproj] = std::atoi(trim(linebuf).c_str()); - } - } - - // read beta mesh (UPF mesh is r*beta(r)!) - for (size_t ir = 0; ir != nmesh_upf; ++ir) { - ifs >> beta_r_[iproj][ir]; + for (size_t ir = 0; ir != nmesh_upf; ++ir) + { + ifs >> rab_[ir]; } - // determine the actual mesh size by ignoring the trailing - // zeros (or small numbers) of beta - - // the code could fail if the original upf mesh size is even - // and there's no trailing zero. Currently we assume that - // there's always at least one trailing zero. - for (nr_[iproj] = nmesh_upf; nr_[iproj] > 1; --nr_[iproj]) { - if (std::abs(beta_r_[iproj][nr_[iproj]-1])>1e-14) { - break; - } - } - - if (nr_[iproj]%2 == 0) { - ++nr_[iproj]; - } + //----------- read nonlocal projectors & initialize objects ------------ - while(std::getline(ifs, linebuf)) { - if (linebuf.find("/PP_BETA") != std::string::npos) { - // reach beta mesh ending symbol - break; - } + l_.resize (nproj_); + nr_.resize (nproj_); + beta_r_.resize (nproj_); + nnl.resize (nproj_); + + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + for (size_t iproj = 0; iproj != nproj_; ++iproj) + { + + beta_r_[iproj] = new double[nmesh_upf]; + + // process beta headers + while (std::getline (ifs, linebuf)) + { + if (linebuf.find ('>') != std::string::npos) + { + // end of PP_BETA header, beta mesh starts + break; + } + + if (linebuf.find ("angular_momentum") != std::string::npos) + { + l_[iproj] = std::atoi (trim (linebuf).c_str ()); + } + } + + // read beta mesh (UPF mesh is r*beta(r)!) + for (size_t ir = 0; ir != nmesh_upf; ++ir) + { + ifs >> beta_r_[iproj][ir]; + } + + // determine the actual mesh size by ignoring the trailing + // zeros (or small numbers) of beta + + // the code could fail if the original upf mesh size is even + // and there's no trailing zero. Currently we assume that + // there's always at least one trailing zero. + for (nr_[iproj] = nmesh_upf; nr_[iproj] > 1; --nr_[iproj]) + { + if (std::abs (beta_r_[iproj][nr_[iproj] - 1]) > 1e-14) + { + break; + } + } + + if (nr_[iproj] % 2 == 0) + { + ++nr_[iproj]; + } + + while (std::getline (ifs, linebuf)) + { + if (linebuf.find ("/PP_BETA") != std::string::npos) + { + // reach beta mesh ending symbol + break; + } + } } - - } } - -void NumericalNonlocalLmTest::TearDown() { +void + NumericalNonlocalLmTest::TearDown () +{ delete[] r_radial_; delete[] rab_; - for (size_t ip = 0; ip != beta_r_.size(); ++ip) { - delete[] beta_r_[ip]; - } + for (size_t ip = 0; ip != beta_r_.size (); ++ip) + { + delete[] beta_r_[ip]; + } } - -void NumericalNonlocalLmTest::init() { +void + NumericalNonlocalLmTest::init () +{ // initialized the tested objects by pouring the data // collected in SetUp() to nnl; - for (size_t iproj = 0; iproj != nproj_; ++iproj) { - nnl[iproj].set_NL_proj(elem_label_, index_atom_type_, l_[iproj], - nr_[iproj], rab_, r_radial_, beta_r_[iproj], - nk_, dk_, dr_uniform_); - - /* - // normalization check - double* tmp = new double[nr_[iproj]]; - for (int ir = 0; ir != nr_[iproj]; ++ir) { - tmp[ir] = beta_r_[iproj][ir]*beta_r_[iproj][ir]; + for (size_t iproj = 0; iproj != nproj_; ++iproj) + { + nnl[iproj].set_NL_proj (elem_label_, + index_atom_type_, + l_[iproj], + nr_[iproj], + rab_, + r_radial_, + beta_r_[iproj], + nk_, + dk_, + dr_uniform_); + + /* + // normalization check + double* tmp = new double[nr_[iproj]]; + for (int ir = 0; ir != nr_[iproj]; ++ir) { + tmp[ir] = beta_r_[iproj][ir]*beta_r_[iproj][ir]; + } + double radint = 0.0; + ModuleBase::Integral::Simpson_Integral(nr_[iproj], tmp, rab_, radint); + std::cout << "proj " << iproj + << std::fixed << std::setprecision(12) + << " radial integral = " << radint << std::endl; + */ } - double radint = 0.0; - ModuleBase::Integral::Simpson_Integral(nr_[iproj], tmp, rab_, radint); - std::cout << "proj " << iproj - << std::fixed << std::setprecision(12) - << " radial integral = " << radint << std::endl; - */ - } } - -std::string NumericalNonlocalLmTest::trim(std::string const& str) { +std::string + NumericalNonlocalLmTest::trim (std::string const& str) +{ // extract the substring between quotation marks (with whitespace trimmed) // str MUST contain a pair of quotation marks - auto start = str.find('"'); - auto end = str.find_last_of('"'); - std::string tmp = str.substr(start+1, end-start-1); + auto start = str.find ('"'); + auto end = str.find_last_of ('"'); + std::string tmp = str.substr (start + 1, end - start - 1); - if (tmp.length() == 0) { - return tmp; - } + if (tmp.length () == 0) + { + return tmp; + } - start = tmp.find_first_not_of(" \t"); - end = tmp.find_last_not_of(" \t"); - return tmp.substr(start, end-start+1); + start = tmp.find_first_not_of (" \t"); + end = tmp.find_last_not_of (" \t"); + return tmp.substr (start, end - start + 1); } - -double NumericalNonlocalLmTest::err_r2k2r(Numerical_Nonlocal_Lm& nnl_tmp) { +double + NumericalNonlocalLmTest::err_r2k2r (Numerical_Nonlocal_Lm& nnl_tmp) +{ // err_r2k2r makes use of Numerical_Nonlocal_Lm::get_kradial() - // to transform beta_k back to beta_r. - // The error is computed as the difference between the origional + // to transform beta_k back to beta_r. + // The error is computed as the difference between the origional // and the transformed beta_r. double* beta_r_old = new double[nnl_tmp.nr]; double* rab_old = new double[nnl_tmp.nr]; double* err = new double[nnl_tmp.nr]; - for (int ir = 0; ir != nnl_tmp.nr; ++ir) { - beta_r_old[ir] = nnl_tmp.beta_r[ir]; - rab_old[ir] = nnl_tmp.rab[ir]; - } + for (int ir = 0; ir != nnl_tmp.nr; ++ir) + { + beta_r_old[ir] = nnl_tmp.beta_r[ir]; + rab_old[ir] = nnl_tmp.rab[ir]; + } - std::swap(nnl_tmp.nr, nnl_tmp.nk); - std::swap(nnl_tmp.r_radial, nnl_tmp.k_radial); - std::swap(nnl_tmp.beta_r, nnl_tmp.beta_k); + std::swap (nnl_tmp.nr, nnl_tmp.nk); + std::swap (nnl_tmp.r_radial, nnl_tmp.k_radial); + std::swap (nnl_tmp.beta_r, nnl_tmp.beta_k); delete[] nnl_tmp.rab; nnl_tmp.rab = new double[nnl_tmp.nr]; - for (int ik = 0; ik != nnl_tmp.nr; ++ik) { - nnl_tmp.rab[ik] = nnl_tmp.dk; - } + for (int ik = 0; ik != nnl_tmp.nr; ++ik) + { + nnl_tmp.rab[ik] = nnl_tmp.dk; + } - nnl_tmp.get_kradial(); + nnl_tmp.get_kradial (); - for (int ir = 0; ir != nnl_tmp.nk; ++ir) { - err[ir] = std::pow(nnl_tmp.getBeta_k(ir) - beta_r_old[ir], 2); - } + for (int ir = 0; ir != nnl_tmp.nk; ++ir) + { + err[ir] = std::pow (nnl_tmp.getBeta_k (ir) - beta_r_old[ir], 2); + } double errint = 0.0; - ModuleBase::Integral::Simpson_Integral(nnl_tmp.nk, err, rab_old, errint); + ModuleBase::Integral::Simpson_Integral (nnl_tmp.nk, err, rab_old, errint); return errint; } - -size_t NumericalNonlocalLmTest::calc_nk(double const& ecutwfc, double const& dk) { +size_t + NumericalNonlocalLmTest::calc_nk (double const& ecutwfc, double const& dk) +{ // current formula for nk // see source_basis/module_ao/ORB_read.cpp, function "Read_Orbitals" size_t nk = 0; - if(ecutwfc < 20) { - nk = static_cast( 2 * sqrt(ecutwfc) / dk ) + 4; - } else { - nk = static_cast( sqrt(ecutwfc) / dk ) + 4; - } + if (ecutwfc < 20) + { + nk = static_cast (2 * sqrt (ecutwfc) / dk) + 4; + } + else + { + nk = static_cast (sqrt (ecutwfc) / dk) + 4; + } - if (nk%2 == 0) { - ++nk; - } + if (nk % 2 == 0) + { + ++nk; + } return nk; } - -void NumericalNonlocalLmTest::change_k(Numerical_Nonlocal_Lm& nnl_, double const& ecut, double const& dk) { +void + NumericalNonlocalLmTest::change_k (Numerical_Nonlocal_Lm& nnl_, double const& ecut, double const& dk) +{ // recalculates k mesh & beta_k with given ecut & dk // used in r2k2r_consistency @@ -363,165 +411,170 @@ void NumericalNonlocalLmTest::change_k(Numerical_Nonlocal_Lm& nnl_, double const Numerical_Nonlocal_Lm tmp; tmp = nnl_; - nnl_.set_NL_proj( - tmp.label, - tmp.index_atom_type, - tmp.angular_momentum_l, - tmp.nr, - tmp.rab, - tmp.r_radial, - tmp.beta_r, - this->calc_nk(ecut, dk), - dk, - tmp.dr_uniform - ); + nnl_.set_NL_proj (tmp.label, + tmp.index_atom_type, + tmp.angular_momentum_l, + tmp.nr, + tmp.rab, + tmp.r_radial, + tmp.beta_r, + this->calc_nk (ecut, dk), + dk, + tmp.dr_uniform); } +TEST_F (NumericalNonlocalLmTest, Init) +{ -TEST_F(NumericalNonlocalLmTest, Init) { - - this->init(); - - for (size_t ip = 0; ip != nproj_; ++ip) { - EXPECT_EQ(elem_label_, nnl[ip].label); - EXPECT_EQ(index_atom_type_, nnl[ip].index_atom_type); - EXPECT_EQ(l_[ip], nnl[ip].angular_momentum_l); - EXPECT_EQ(dr_uniform_, nnl[ip].dr_uniform); - EXPECT_EQ(nr_[ip], nnl[ip].nr); - EXPECT_EQ(r_radial_[nr_[ip]-1], nnl[ip].rcut); - EXPECT_EQ(nk_, nnl[ip].nk); - EXPECT_EQ(dk_, nnl[ip].dk); - - // freemem() & renew() will be tested elsewhere - - for (int ir = 0; ir != nr_[ip]; ++ir) { - EXPECT_EQ(r_radial_[ir], nnl[ip].r_radial[ir]); - EXPECT_EQ(rab_[ir], nnl[ip].rab[ir]); - EXPECT_EQ(beta_r_[ip][ir], nnl[ip].beta_r[ir]); - } - - for (size_t ik = 0; ik != nk_; ++ik) { - EXPECT_EQ(ik*dk_, nnl[ip].k_radial[ik]); + this->init (); + + for (size_t ip = 0; ip != nproj_; ++ip) + { + EXPECT_EQ (elem_label_, nnl[ip].label); + EXPECT_EQ (index_atom_type_, nnl[ip].index_atom_type); + EXPECT_EQ (l_[ip], nnl[ip].angular_momentum_l); + EXPECT_EQ (dr_uniform_, nnl[ip].dr_uniform); + EXPECT_EQ (nr_[ip], nnl[ip].nr); + EXPECT_EQ (r_radial_[nr_[ip] - 1], nnl[ip].rcut); + EXPECT_EQ (nk_, nnl[ip].nk); + EXPECT_EQ (dk_, nnl[ip].dk); + + // freemem() & renew() will be tested elsewhere + + for (int ir = 0; ir != nr_[ip]; ++ir) + { + EXPECT_EQ (r_radial_[ir], nnl[ip].r_radial[ir]); + EXPECT_EQ (rab_[ir], nnl[ip].rab[ir]); + EXPECT_EQ (beta_r_[ip][ir], nnl[ip].beta_r[ir]); + } + + for (size_t ik = 0; ik != nk_; ++ik) + { + EXPECT_EQ (ik * dk_, nnl[ip].k_radial[ik]); + } + EXPECT_EQ ((nk_ - 1) * dk_, nnl[ip].kcut); + + // get_kradial() will be tested elsewhere } - EXPECT_EQ((nk_-1)*dk_, nnl[ip].kcut); - - // get_kradial() will be tested elsewhere - } } - -TEST_F(NumericalNonlocalLmTest, Getters) { +TEST_F (NumericalNonlocalLmTest, Getters) +{ // this test checks all the getter functions - this->init(); - - for (size_t iproj = 0; iproj != nnl.size(); ++iproj) { - EXPECT_EQ(nnl[iproj].getType(), 42); // index_atom_type - EXPECT_DOUBLE_EQ(nnl[iproj].getDk(), 0.01); - - ASSERT_NE(nnl[iproj].getRadial(), nullptr); - ASSERT_NE(nnl[iproj].getKpoint(), nullptr); - ASSERT_NE(nnl[iproj].getBeta_r(), nullptr); - ASSERT_NE(nnl[iproj].getBeta_k(), nullptr); - - for (int ir = 0; ir != nnl[iproj].nr; ++ir) { - EXPECT_DOUBLE_EQ(nnl[iproj].getRadial(ir), 0.01*ir); - EXPECT_DOUBLE_EQ(nnl[iproj].getRadial()[ir], 0.01*ir); - EXPECT_DOUBLE_EQ(nnl[iproj].getBeta_r()[ir], nnl[iproj].getBeta_r(ir)); - } - - for (int ik = 0; ik != nnl[iproj].nk; ++ik) { - EXPECT_DOUBLE_EQ(nnl[iproj].getKpoint(ik), ik*0.01); - EXPECT_DOUBLE_EQ(nnl[iproj].getKpoint()[ik], ik*0.01); - EXPECT_DOUBLE_EQ(nnl[iproj].getBeta_k()[ik], nnl[iproj].getBeta_k(ik)); + this->init (); + + for (size_t iproj = 0; iproj != nnl.size (); ++iproj) + { + EXPECT_EQ (nnl[iproj].getType (), 42); // index_atom_type + EXPECT_DOUBLE_EQ (nnl[iproj].getDk (), 0.01); + + ASSERT_NE (nnl[iproj].getRadial (), nullptr); + ASSERT_NE (nnl[iproj].getKpoint (), nullptr); + ASSERT_NE (nnl[iproj].getBeta_r (), nullptr); + ASSERT_NE (nnl[iproj].getBeta_k (), nullptr); + + for (int ir = 0; ir != nnl[iproj].nr; ++ir) + { + EXPECT_DOUBLE_EQ (nnl[iproj].getRadial (ir), 0.01 * ir); + EXPECT_DOUBLE_EQ (nnl[iproj].getRadial ()[ir], 0.01 * ir); + EXPECT_DOUBLE_EQ (nnl[iproj].getBeta_r ()[ir], nnl[iproj].getBeta_r (ir)); + } + + for (int ik = 0; ik != nnl[iproj].nk; ++ik) + { + EXPECT_DOUBLE_EQ (nnl[iproj].getKpoint (ik), ik * 0.01); + EXPECT_DOUBLE_EQ (nnl[iproj].getKpoint ()[ik], ik * 0.01); + EXPECT_DOUBLE_EQ (nnl[iproj].getBeta_k ()[ik], nnl[iproj].getBeta_k (ik)); + } } - } // see upf file for details - EXPECT_EQ(nnl[0].getL(), 0); - EXPECT_DOUBLE_EQ(nnl[0].getRcut(), 1.3200); - EXPECT_DOUBLE_EQ(nnl[0].getBeta_r(0), 0.0); - EXPECT_DOUBLE_EQ(nnl[0].getBeta_r(1), -8.2277987587e-02); - EXPECT_DOUBLE_EQ(nnl[0].getBeta_r(4), -3.2850076507e-01); - EXPECT_DOUBLE_EQ(nnl[0].getBeta_r(132), 1.6220072646e-06); - - - EXPECT_EQ(nnl[1].getL(), 0); - EXPECT_DOUBLE_EQ(nnl[1].getRcut(), 1.3200); - EXPECT_DOUBLE_EQ(nnl[1].getBeta_r(0), 0.0); - EXPECT_DOUBLE_EQ(nnl[1].getBeta_r(1), -1.1723087215e-02); - EXPECT_DOUBLE_EQ(nnl[1].getBeta_r(4), -4.2201369170e-02); - EXPECT_DOUBLE_EQ(nnl[1].getBeta_r(132), -2.7996494097e-06); - - EXPECT_EQ(nnl[2].getL(), 1); - EXPECT_DOUBLE_EQ(nnl[2].getRcut(), 1.5000); - EXPECT_DOUBLE_EQ(nnl[2].getBeta_r(0), 0.0); - EXPECT_DOUBLE_EQ(nnl[2].getBeta_r(1), 3.5860269827e-03); - EXPECT_DOUBLE_EQ(nnl[2].getBeta_r(4), 5.6837367274e-02); - EXPECT_DOUBLE_EQ(nnl[2].getBeta_r(150), 9.6147639048e-06); - - EXPECT_EQ(nnl[3].getL(), 1); - EXPECT_DOUBLE_EQ(nnl[3].getRcut(), 1.5000); - EXPECT_DOUBLE_EQ(nnl[3].getBeta_r(0), 0.0); - EXPECT_DOUBLE_EQ(nnl[3].getBeta_r(1), 9.2893242255e-04); - EXPECT_DOUBLE_EQ(nnl[3].getBeta_r(4), 1.4588689808e-02); - EXPECT_DOUBLE_EQ(nnl[3].getBeta_r(150), 5.9608986436e-06); + EXPECT_EQ (nnl[0].getL (), 0); + EXPECT_DOUBLE_EQ (nnl[0].getRcut (), 1.3200); + EXPECT_DOUBLE_EQ (nnl[0].getBeta_r (0), 0.0); + EXPECT_DOUBLE_EQ (nnl[0].getBeta_r (1), -8.2277987587e-02); + EXPECT_DOUBLE_EQ (nnl[0].getBeta_r (4), -3.2850076507e-01); + EXPECT_DOUBLE_EQ (nnl[0].getBeta_r (132), 1.6220072646e-06); + + EXPECT_EQ (nnl[1].getL (), 0); + EXPECT_DOUBLE_EQ (nnl[1].getRcut (), 1.3200); + EXPECT_DOUBLE_EQ (nnl[1].getBeta_r (0), 0.0); + EXPECT_DOUBLE_EQ (nnl[1].getBeta_r (1), -1.1723087215e-02); + EXPECT_DOUBLE_EQ (nnl[1].getBeta_r (4), -4.2201369170e-02); + EXPECT_DOUBLE_EQ (nnl[1].getBeta_r (132), -2.7996494097e-06); + + EXPECT_EQ (nnl[2].getL (), 1); + EXPECT_DOUBLE_EQ (nnl[2].getRcut (), 1.5000); + EXPECT_DOUBLE_EQ (nnl[2].getBeta_r (0), 0.0); + EXPECT_DOUBLE_EQ (nnl[2].getBeta_r (1), 3.5860269827e-03); + EXPECT_DOUBLE_EQ (nnl[2].getBeta_r (4), 5.6837367274e-02); + EXPECT_DOUBLE_EQ (nnl[2].getBeta_r (150), 9.6147639048e-06); + + EXPECT_EQ (nnl[3].getL (), 1); + EXPECT_DOUBLE_EQ (nnl[3].getRcut (), 1.5000); + EXPECT_DOUBLE_EQ (nnl[3].getBeta_r (0), 0.0); + EXPECT_DOUBLE_EQ (nnl[3].getBeta_r (1), 9.2893242255e-04); + EXPECT_DOUBLE_EQ (nnl[3].getBeta_r (4), 1.4588689808e-02); + EXPECT_DOUBLE_EQ (nnl[3].getBeta_r (150), 5.9608986436e-06); } - -TEST_F(NumericalNonlocalLmTest, DeepCopy) { +TEST_F (NumericalNonlocalLmTest, DeepCopy) +{ // this test checks whether the operator= overload properly // performs a deep copy - this->init(); + this->init (); Numerical_Nonlocal_Lm tmp; size_t iproj = 3; tmp = nnl[iproj]; - EXPECT_EQ(tmp.label, nnl[iproj].label); - EXPECT_EQ(tmp.index_atom_type, nnl[iproj].index_atom_type); - EXPECT_EQ(tmp.angular_momentum_l, nnl[iproj].angular_momentum_l); - EXPECT_EQ(tmp.nr, nnl[iproj].nr); - EXPECT_EQ(tmp.nk, nnl[iproj].nk); - EXPECT_EQ(tmp.index_proj, nnl[iproj].index_proj); - - EXPECT_DOUBLE_EQ(tmp.rcut, nnl[iproj].rcut); - EXPECT_DOUBLE_EQ(tmp.kcut, nnl[iproj].kcut); - EXPECT_DOUBLE_EQ(tmp.dk, nnl[iproj].dk); - - ASSERT_NE(tmp.getRadial(), nullptr); - ASSERT_NE(tmp.getKpoint(), nullptr); - ASSERT_NE(tmp.getBeta_k(), nullptr); - ASSERT_NE(tmp.getBeta_r(), nullptr); - - for (int ir = 0; ir != nnl[iproj].nr; ++ir) { - EXPECT_DOUBLE_EQ(tmp.r_radial[ir], nnl[iproj].r_radial[ir]); - EXPECT_DOUBLE_EQ(tmp.rab[ir], nnl[iproj].rab[ir]); - EXPECT_DOUBLE_EQ(tmp.beta_r[ir], nnl[iproj].beta_r[ir]); - } + EXPECT_EQ (tmp.label, nnl[iproj].label); + EXPECT_EQ (tmp.index_atom_type, nnl[iproj].index_atom_type); + EXPECT_EQ (tmp.angular_momentum_l, nnl[iproj].angular_momentum_l); + EXPECT_EQ (tmp.nr, nnl[iproj].nr); + EXPECT_EQ (tmp.nk, nnl[iproj].nk); + EXPECT_EQ (tmp.index_proj, nnl[iproj].index_proj); + + EXPECT_DOUBLE_EQ (tmp.rcut, nnl[iproj].rcut); + EXPECT_DOUBLE_EQ (tmp.kcut, nnl[iproj].kcut); + EXPECT_DOUBLE_EQ (tmp.dk, nnl[iproj].dk); + + ASSERT_NE (tmp.getRadial (), nullptr); + ASSERT_NE (tmp.getKpoint (), nullptr); + ASSERT_NE (tmp.getBeta_k (), nullptr); + ASSERT_NE (tmp.getBeta_r (), nullptr); + + for (int ir = 0; ir != nnl[iproj].nr; ++ir) + { + EXPECT_DOUBLE_EQ (tmp.r_radial[ir], nnl[iproj].r_radial[ir]); + EXPECT_DOUBLE_EQ (tmp.rab[ir], nnl[iproj].rab[ir]); + EXPECT_DOUBLE_EQ (tmp.beta_r[ir], nnl[iproj].beta_r[ir]); + } - for (int ik = 0; ik != nnl[iproj].nk; ++ik) { - EXPECT_DOUBLE_EQ(tmp.k_radial[ik], nnl[iproj].k_radial[ik]); - EXPECT_DOUBLE_EQ(tmp.beta_k[ik], nnl[iproj].beta_k[ik]); - } + for (int ik = 0; ik != nnl[iproj].nk; ++ik) + { + EXPECT_DOUBLE_EQ (tmp.k_radial[ik], nnl[iproj].k_radial[ik]); + EXPECT_DOUBLE_EQ (tmp.beta_k[ik], nnl[iproj].beta_k[ik]); + } } - -TEST_F(NumericalNonlocalLmTest, R2K2RConsistency) { +TEST_F (NumericalNonlocalLmTest, R2K2RConsistency) +{ /* - * This test checks whether get_kradial() works as expected - * by applying it to k*beta(k) and see if it gives back the + * This test checks whether get_kradial() works as expected + * by applying it to k*beta(k) and see if it gives back the * original r*beta(r). * * This is NOT a convergence test, so a large ecutwfc is used. * * get_kradial() transforms r*beta(r) to k*beta(k) according to - * + * * beta(k) = sqrt(2/pi) * \int_0^{\infty} beta(r) * jl(kr) * r^2 dr * * whose inverse transform @@ -532,18 +585,18 @@ TEST_F(NumericalNonlocalLmTest, R2K2RConsistency) { * */ - this->init(); + this->init (); double ecut = 1000.0; - for (size_t iproj = 0; iproj != nnl.size(); ++iproj) { - Numerical_Nonlocal_Lm tmp; - tmp = nnl[iproj]; - this->change_k(tmp, ecut, tmp.dk); - EXPECT_LT(err_r2k2r(tmp), 1e-6); - } + for (size_t iproj = 0; iproj != nnl.size (); ++iproj) + { + Numerical_Nonlocal_Lm tmp; + tmp = nnl[iproj]; + this->change_k (tmp, ecut, tmp.dk); + EXPECT_LT (err_r2k2r (tmp), 1e-6); + } } - /* TEST_F(NumericalNonlocalLmTest, R2K2RConsistencyMany) { @@ -562,7 +615,7 @@ TEST_F(NumericalNonlocalLmTest, R2K2RConsistencyMany) { std::cout << "proj = " << iproj << " ecut = " << std::setw(8) << ecut_list[ie] << " dk = " << std::setw(6) << tmp.dk - << " error = " << std::setw(10) << err + << " error = " << std::setw(10) << err << std::endl; } std::cout << std::endl; @@ -571,7 +624,7 @@ TEST_F(NumericalNonlocalLmTest, R2K2RConsistencyMany) { std::cout << std::endl; - std::vector dk_list{0.001, 0.01, 0.1, + std::vector dk_list{0.001, 0.01, 0.1, 0.3, 1.0, 1.1, 1.2, 1.3, 1.5, 2.0}; std::cout << "dk convergence test" << std::endl; @@ -595,62 +648,70 @@ TEST_F(NumericalNonlocalLmTest, R2K2RConsistencyMany) { } */ +TEST_F (NumericalNonlocalLmTest, FreeAndRenew) +{ -TEST_F(NumericalNonlocalLmTest, FreeAndRenew) { - - this->init(); + this->init (); + + EXPECT_NE (nnl[0].r_radial, nullptr); + EXPECT_NE (nnl[0].rab, nullptr); + EXPECT_NE (nnl[0].beta_r, nullptr); + EXPECT_NE (nnl[0].beta_uniform, nullptr); + EXPECT_NE (nnl[0].dbeta_uniform, nullptr); + EXPECT_NE (nnl[0].k_radial, nullptr); + EXPECT_NE (nnl[0].beta_k, nullptr); + + nnl[0].freemem (); + + EXPECT_EQ (nnl[0].r_radial, nullptr); + EXPECT_EQ (nnl[0].rab, nullptr); + EXPECT_EQ (nnl[0].beta_r, nullptr); + EXPECT_EQ (nnl[0].beta_uniform, nullptr); + EXPECT_EQ (nnl[0].dbeta_uniform, nullptr); + EXPECT_EQ (nnl[0].k_radial, nullptr); + EXPECT_EQ (nnl[0].beta_k, nullptr); + + nnl[0].renew (); + + ASSERT_NE (nnl[0].r_radial, nullptr); + ASSERT_NE (nnl[0].rab, nullptr); + ASSERT_NE (nnl[0].beta_r, nullptr); + ASSERT_NE (nnl[0].beta_uniform, nullptr); + ASSERT_NE (nnl[0].dbeta_uniform, nullptr); + ASSERT_NE (nnl[0].k_radial, nullptr); + ASSERT_NE (nnl[0].beta_k, nullptr); + + for (int ir = 0; ir != nnl[0].nr; ++ir) + { + EXPECT_DOUBLE_EQ (nnl[0].r_radial[ir], 0.0); + EXPECT_DOUBLE_EQ (nnl[0].rab[ir], 0.0); + EXPECT_DOUBLE_EQ (nnl[0].beta_r[ir], 0.0); + } - EXPECT_NE(nnl[0].r_radial, nullptr); - EXPECT_NE(nnl[0].rab, nullptr); - EXPECT_NE(nnl[0].beta_r, nullptr); - EXPECT_NE(nnl[0].beta_uniform, nullptr); - EXPECT_NE(nnl[0].dbeta_uniform, nullptr); - EXPECT_NE(nnl[0].k_radial, nullptr); - EXPECT_NE(nnl[0].beta_k, nullptr); - - nnl[0].freemem(); - - EXPECT_EQ(nnl[0].r_radial, nullptr); - EXPECT_EQ(nnl[0].rab, nullptr); - EXPECT_EQ(nnl[0].beta_r, nullptr); - EXPECT_EQ(nnl[0].beta_uniform, nullptr); - EXPECT_EQ(nnl[0].dbeta_uniform, nullptr); - EXPECT_EQ(nnl[0].k_radial, nullptr); - EXPECT_EQ(nnl[0].beta_k, nullptr); - - nnl[0].renew(); - - ASSERT_NE(nnl[0].r_radial, nullptr); - ASSERT_NE(nnl[0].rab, nullptr); - ASSERT_NE(nnl[0].beta_r, nullptr); - ASSERT_NE(nnl[0].beta_uniform, nullptr); - ASSERT_NE(nnl[0].dbeta_uniform, nullptr); - ASSERT_NE(nnl[0].k_radial, nullptr); - ASSERT_NE(nnl[0].beta_k, nullptr); - - for (int ir = 0; ir != nnl[0].nr; ++ir) { - EXPECT_DOUBLE_EQ(nnl[0].r_radial[ir], 0.0); - EXPECT_DOUBLE_EQ(nnl[0].rab[ir], 0.0); - EXPECT_DOUBLE_EQ(nnl[0].beta_r[ir], 0.0); - } + for (int ir = 0; ir != nnl[0].nr_uniform; ++ir) + { + EXPECT_DOUBLE_EQ (nnl[0].beta_uniform[ir], 0.0); + EXPECT_DOUBLE_EQ (nnl[0].dbeta_uniform[ir], 0.0); + } - for (int ir = 0; ir != nnl[0].nr_uniform; ++ir) { - EXPECT_DOUBLE_EQ(nnl[0].beta_uniform[ir], 0.0); - EXPECT_DOUBLE_EQ(nnl[0].dbeta_uniform[ir], 0.0); - } - - for (int ik = 0; ik != nnl[0].nk; ++ik) { - EXPECT_DOUBLE_EQ(nnl[0].k_radial[ik], 0.0); - EXPECT_DOUBLE_EQ(nnl[0].beta_k[ik], 0.0); - } + for (int ik = 0; ik != nnl[0].nk; ++ik) + { + EXPECT_DOUBLE_EQ (nnl[0].k_radial[ik], 0.0); + EXPECT_DOUBLE_EQ (nnl[0].beta_k[ik], 0.0); + } } - -bool NumericalNonlocalLmTest::check_file_match(size_t const& nline, double const* col1, double const* col2, double const& tol, std::string const& fname) { +bool + NumericalNonlocalLmTest::check_file_match (size_t const& nline, + double const* col1, + double const* col2, + double const& tol, + std::string const& fname) +{ /* This function checks whether the content of file named "fname" * contains certain data with certain format. - * + * * The file should contain only two colums of floating-point numbers, * each column should match col1 or col2 within a tolerance of tol, * and the number of lines should be nline. No empty line is allowed. @@ -662,35 +723,37 @@ bool NumericalNonlocalLmTest::check_file_match(size_t const& nline, double const std::ifstream ifs; std::string tmp1, tmp2, tmp_line; - ifs.open(fname); + ifs.open (fname); - for (size_t i = 0; i != nline; ++i) { - std::stringstream ss; - std::getline(ifs, tmp_line); - ss << tmp_line; - ss >> tmp1 >> tmp2; + for (size_t i = 0; i != nline; ++i) + { + std::stringstream ss; + std::getline (ifs, tmp_line); + ss << tmp_line; + ss >> tmp1 >> tmp2; - if( std::abs( col1[i] - std::stod(tmp1.c_str()) ) > tol || - std::abs( col2[i] - std::stod(tmp2.c_str()) ) > tol || - ss.tellg() != -1 ) { - return false; + if (std::abs (col1[i] - std::stod (tmp1.c_str ())) > tol + || std::abs (col2[i] - std::stod (tmp2.c_str ())) > tol || ss.tellg () != -1) + { + return false; + } } - } - std::getline(ifs, tmp_line); - if ( ifs.tellg() != -1 ) { - return false; - } + std::getline (ifs, tmp_line); + if (ifs.tellg () != -1) + { + return false; + } - ifs.close(); + ifs.close (); return true; } +TEST_F (NumericalNonlocalLmTest, BetaSave) +{ -TEST_F(NumericalNonlocalLmTest, BetaSave) { - - this->init(); + this->init (); std::vector orb{"s", "s", "p", "p"}; std::ifstream ifs; @@ -702,57 +765,52 @@ TEST_F(NumericalNonlocalLmTest, BetaSave) { // see Numerical_Nonlocal_Lm::plot() for details std::string dir = "./O/"; - mkdir(dir.c_str(), 0777); - - for (size_t i : {0, 2}) { + mkdir (dir.c_str (), 0777); - ASSERT_NO_THROW(nnl[i].plot(0)); + for (size_t i: {0, 2}) + { - std::string betar_fname = dir+"/O-" + orb[i] + "-proj-r.dat"; - std::string betak_fname = dir+"/O-" + orb[i] + "-proj-k.dat"; - std::string betaru_fname = dir+"/O-" + orb[i] + "-proj-ru.dat"; + ASSERT_NO_THROW (nnl[i].plot (0)); - EXPECT_EQ(true, this->check_file_match(nnl[i].nr, - nnl[i].r_radial, nnl[i].beta_r, tol, betar_fname)); - EXPECT_EQ(true, this->check_file_match(nnl[i].nk, - nnl[i].k_radial, nnl[i].beta_k, tol, betak_fname)); + std::string betar_fname = dir + "/O-" + orb[i] + "-proj-r.dat"; + std::string betak_fname = dir + "/O-" + orb[i] + "-proj-k.dat"; + std::string betaru_fname = dir + "/O-" + orb[i] + "-proj-ru.dat"; - double* r_uniform_mesh = new double[nnl[i].nr_uniform]; - for (int ir = 0; ir != nnl[i].nr_uniform; ++ir) { - r_uniform_mesh[ir] = ir*nnl[i].dr_uniform; - } - EXPECT_EQ(true, this->check_file_match(nnl[i].nr_uniform, - r_uniform_mesh, nnl[i].beta_uniform, tol, betaru_fname)); + EXPECT_EQ (true, this->check_file_match (nnl[i].nr, nnl[i].r_radial, nnl[i].beta_r, tol, betar_fname)); + EXPECT_EQ (true, this->check_file_match (nnl[i].nk, nnl[i].k_radial, nnl[i].beta_k, tol, betak_fname)); - remove(betar_fname.c_str()); - remove(betak_fname.c_str()); - remove(betaru_fname.c_str()); - } + double* r_uniform_mesh = new double[nnl[i].nr_uniform]; + for (int ir = 0; ir != nnl[i].nr_uniform; ++ir) + { + r_uniform_mesh[ir] = ir * nnl[i].dr_uniform; + } + EXPECT_EQ ( + true, + this->check_file_match (nnl[i].nr_uniform, r_uniform_mesh, nnl[i].beta_uniform, tol, betaru_fname)); - remove(dir.c_str()); + remove (betar_fname.c_str ()); + remove (betak_fname.c_str ()); + remove (betaru_fname.c_str ()); + } + remove (dir.c_str ()); } - -int main(int argc, char **argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } - - - - - diff --git a/source/source_basis/module_ao/test/ORB_nonlocal_test.cpp b/source/source_basis/module_ao/test/ORB_nonlocal_test.cpp index eda76899307..81138c22645 100644 --- a/source/source_basis/module_ao/test/ORB_nonlocal_test.cpp +++ b/source/source_basis/module_ao/test/ORB_nonlocal_test.cpp @@ -25,89 +25,86 @@ * */ - class NumericalNonlocalTest : public ::testing::Test { -protected: - - void SetUp(); - void TearDown(); - - // object under unit test - Numerical_Nonlocal nn; - - // parameters used to initialize the Numerical_Nonlocal object - std::string elem_label_; - int ielem_; - int lmax_; - double rcut_max_; - std::string type_ps_; - int nproj_; - std::vector nnl; + protected: + void SetUp (); + void TearDown (); + + // object under unit test + Numerical_Nonlocal nn; + + // parameters used to initialize the Numerical_Nonlocal object + std::string elem_label_; + int ielem_; + int lmax_; + double rcut_max_; + std::string type_ps_; + int nproj_; + std::vector nnl; }; - -void NumericalNonlocalTest::SetUp() { - elem_label_ = "O"; - ielem_ = 1; - lmax_ = 2; - type_ps_ = "NC"; - nproj_ = 4; - - nnl.resize(nproj_); - nnl[0].rcut = 1.0; - nnl[1].rcut = 3.0; - nnl[2].rcut = 4.0; - nnl[3].rcut = 2.0; - rcut_max_ = 4.0; +void + NumericalNonlocalTest::SetUp () +{ + elem_label_ = "O"; + ielem_ = 1; + lmax_ = 2; + type_ps_ = "NC"; + nproj_ = 4; + + nnl.resize (nproj_); + nnl[0].rcut = 1.0; + nnl[1].rcut = 3.0; + nnl[2].rcut = 4.0; + nnl[3].rcut = 2.0; + rcut_max_ = 4.0; } - -void NumericalNonlocalTest::TearDown() { - +void + NumericalNonlocalTest::TearDown () +{ } +TEST_F (NumericalNonlocalTest, SetTypeInfo) +{ -TEST_F(NumericalNonlocalTest, SetTypeInfo) { - - nn.set_type_info(ielem_, elem_label_, type_ps_, lmax_, nproj_, &nnl[0]); + nn.set_type_info (ielem_, elem_label_, type_ps_, lmax_, nproj_, &nnl[0]); - EXPECT_EQ(nn.label, elem_label_); - EXPECT_EQ(nn.type, ielem_); - EXPECT_EQ(nn.lmax, lmax_); - EXPECT_DOUBLE_EQ(nn.rcut_max, rcut_max_); - EXPECT_EQ(nn.nproj, nproj_); + EXPECT_EQ (nn.label, elem_label_); + EXPECT_EQ (nn.type, ielem_); + EXPECT_EQ (nn.lmax, lmax_); + EXPECT_DOUBLE_EQ (nn.rcut_max, rcut_max_); + EXPECT_EQ (nn.nproj, nproj_); } +TEST_F (NumericalNonlocalTest, Getters) +{ -TEST_F(NumericalNonlocalTest, Getters) { - - nn.set_type_info(ielem_, elem_label_, type_ps_, lmax_, nproj_, &nnl[0]); + nn.set_type_info (ielem_, elem_label_, type_ps_, lmax_, nproj_, &nnl[0]); - EXPECT_EQ(nn.getLmax(), nn.lmax); - EXPECT_EQ(nn.getType(), nn.type); - EXPECT_EQ(nn.getLabel(), nn.label); - EXPECT_EQ(nn.getType_ps(), nn.type_ps); - EXPECT_EQ(nn.get_rcut_max(), rcut_max_); + EXPECT_EQ (nn.getLmax (), nn.lmax); + EXPECT_EQ (nn.getType (), nn.type); + EXPECT_EQ (nn.getLabel (), nn.label); + EXPECT_EQ (nn.getType_ps (), nn.type_ps); + EXPECT_EQ (nn.get_rcut_max (), rcut_max_); } - -int main(int argc, char **argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } - - diff --git a/source/source_basis/module_ao/test/ORB_read_test.cpp b/source/source_basis/module_ao/test/ORB_read_test.cpp index 78f137a62bc..f18d0e2a9ca 100644 --- a/source/source_basis/module_ao/test/ORB_read_test.cpp +++ b/source/source_basis/module_ao/test/ORB_read_test.cpp @@ -16,7 +16,7 @@ * unit test of class "LCAO_Orbitals" ***********************************************************/ -/** +/** * Tested functions: * * - Read_Orbitals @@ -33,16 +33,15 @@ class LcaoOrbitalsTest : public ::testing::Test { -protected: - - void SetUp(); - void TearDown(); + protected: + void SetUp (); + void TearDown (); // object under unit test LCAO_Orbitals lcao_; // initialize lcao_ with parameters below & call Read_Orbitals - void lcao_read(); + void lcao_read (); // parameters to pass to lcao_ std::ofstream ofs_log_; @@ -61,9 +60,10 @@ class LcaoOrbitalsTest : public ::testing::Test double Rmax_; }; - -void LcaoOrbitalsTest::SetUp() { - ofs_log_.open("ORB_read_test.log"); +void + LcaoOrbitalsTest::SetUp () +{ + ofs_log_.open ("ORB_read_test.log"); ntype_ = 2; lmax_ = 2; out_mat_r_ = 0; // unused variable @@ -73,8 +73,8 @@ void LcaoOrbitalsTest::SetUp() { read_in_flag_ = true; descriptor_file_ = "./lcao_H2O/jle.orb"; - orbital_file_.push_back("./lcao_H2O/H_gga_8au_60Ry_2s1p.orb"); - orbital_file_.push_back("./lcao_H2O/O_gga_7au_60Ry_2s2p1d.orb"); + orbital_file_.push_back ("./lcao_H2O/H_gga_8au_60Ry_2s1p.orb"); + orbital_file_.push_back ("./lcao_H2O/O_gga_7au_60Ry_2s2p1d.orb"); ecutwfc_ = 123.0; dk_ = 0.01; @@ -82,15 +82,16 @@ void LcaoOrbitalsTest::SetUp() { Rmax_ = 20; } - -void LcaoOrbitalsTest::lcao_read() { +void + LcaoOrbitalsTest::lcao_read () +{ // see UnitCell::read_atom_species in source_cell/read_atoms.cpp lcao_.read_in_flag = read_in_flag_; lcao_.descriptor_file = descriptor_file_; lcao_.orbital_file = orbital_file_; #ifdef __MPI - lcao_.bcast_files(ntype_, GlobalV::MY_RANK); + lcao_.bcast_files (ntype_, GlobalV::MY_RANK); #endif lcao_.ecutwfc = ecutwfc_; @@ -98,54 +99,56 @@ void LcaoOrbitalsTest::lcao_read() { lcao_.dR = dR_; lcao_.Rmax = Rmax_; - lcao_.Read_Orbitals(ofs_log_, ntype_, lmax_, deepks_setorb_, out_mat_r_, - force_flag_, my_rank_); + lcao_.Read_Orbitals (ofs_log_, ntype_, lmax_, deepks_setorb_, out_mat_r_, force_flag_, my_rank_); } - -void LcaoOrbitalsTest::TearDown() { +void + LcaoOrbitalsTest::TearDown () +{ } - -TEST_F(LcaoOrbitalsTest, ReadInFlag) { +TEST_F (LcaoOrbitalsTest, ReadInFlag) +{ read_in_flag_ = false; - EXPECT_EXIT(this->lcao_read(), testing::ExitedWithCode(1), ""); + EXPECT_EXIT (this->lcao_read (), testing::ExitedWithCode (1), ""); } - -TEST_F(LcaoOrbitalsTest, WrongOrbFile) { +TEST_F (LcaoOrbitalsTest, WrongOrbFile) +{ orbital_file_[0] = "./lcao_H2O/H_gga_8au_60Ry_2s1.orb"; - EXPECT_EXIT(this->lcao_read(), testing::ExitedWithCode(1), ""); + EXPECT_EXIT (this->lcao_read (), testing::ExitedWithCode (1), ""); } - -TEST_F(LcaoOrbitalsTest, WrongDescFile) { +TEST_F (LcaoOrbitalsTest, WrongDescFile) +{ descriptor_file_ = "./lcao_H2O/jl.orb"; - EXPECT_EXIT(this->lcao_read(), testing::ExitedWithCode(1), ""); + EXPECT_EXIT (this->lcao_read (), testing::ExitedWithCode (1), ""); } - -TEST_F(LcaoOrbitalsTest, BcastFiles) { +TEST_F (LcaoOrbitalsTest, BcastFiles) +{ #ifdef __MPI - if ( GlobalV::MY_RANK == 0 ) { - lcao_.orbital_file = orbital_file_; - } + if (GlobalV::MY_RANK == 0) + { + lcao_.orbital_file = orbital_file_; + } - if ( GlobalV::MY_RANK != 0) { - EXPECT_EQ(lcao_.orbital_file, std::vector{}); - } + if (GlobalV::MY_RANK != 0) + { + EXPECT_EQ (lcao_.orbital_file, std::vector{}); + } lcao_.read_in_flag = read_in_flag_; - lcao_.bcast_files(2, GlobalV::MY_RANK); + lcao_.bcast_files (2, GlobalV::MY_RANK); - EXPECT_EQ(lcao_.orbital_file, orbital_file_); + EXPECT_EQ (lcao_.orbital_file, orbital_file_); #endif } +TEST_F (LcaoOrbitalsTest, ReadOrbitals) +{ -TEST_F(LcaoOrbitalsTest, ReadOrbitals) { - - this->lcao_read(); + this->lcao_read (); // This test checks whether Read_Orbitals behaves as expected. @@ -161,184 +164,186 @@ TEST_F(LcaoOrbitalsTest, ReadOrbitals) { double max_tol = 1e-12; // H - EXPECT_EQ(ao0.getType(), 0); - EXPECT_EQ(ao0.getLabel(), "H"); - EXPECT_EQ(ao0.getLmax(), 1); - EXPECT_EQ(ao0.getNchi(0), 2); - EXPECT_EQ(ao0.getNchi(1), 1); - ASSERT_EQ(ao0.getTotal_nchi(), 3); - - std::vector L0_list{0,0,1}; - std::vector N0_list{0,1,0}; - - for (size_t i = 0; i != 3; ++i) { - int L = L0_list[i], N = N0_list[i]; - EXPECT_EQ(ao0.PhiLN(L,N).getLabel(), "H"); - EXPECT_EQ(ao0.PhiLN(L,N).getType(), 0); - EXPECT_EQ(ao0.PhiLN(L,N).getL(), L); - EXPECT_EQ(ao0.PhiLN(L,N).getChi(), N); - EXPECT_EQ(ao0.PhiLN(L,N).getNr(), 801); - EXPECT_EQ(ao0.PhiLN(L,N).getNk(), lcao_.kmesh); - EXPECT_EQ(ao0.PhiLN(L,N).getDk(), lcao_.dk); - EXPECT_EQ(ao0.PhiLN(L,N).getDruniform(), lcao_.dr_uniform); - - for (int ir = 0; ir != 801; ++ir) { - EXPECT_DOUBLE_EQ(ao0.PhiLN(L,N).getRab(ir), 0.01); - EXPECT_DOUBLE_EQ(ao0.PhiLN(L,N).getRadial(ir), 0.01*ir); + EXPECT_EQ (ao0.getType (), 0); + EXPECT_EQ (ao0.getLabel (), "H"); + EXPECT_EQ (ao0.getLmax (), 1); + EXPECT_EQ (ao0.getNchi (0), 2); + EXPECT_EQ (ao0.getNchi (1), 1); + ASSERT_EQ (ao0.getTotal_nchi (), 3); + + std::vector L0_list{0, 0, 1}; + std::vector N0_list{0, 1, 0}; + + for (size_t i = 0; i != 3; ++i) + { + int L = L0_list[i], N = N0_list[i]; + EXPECT_EQ (ao0.PhiLN (L, N).getLabel (), "H"); + EXPECT_EQ (ao0.PhiLN (L, N).getType (), 0); + EXPECT_EQ (ao0.PhiLN (L, N).getL (), L); + EXPECT_EQ (ao0.PhiLN (L, N).getChi (), N); + EXPECT_EQ (ao0.PhiLN (L, N).getNr (), 801); + EXPECT_EQ (ao0.PhiLN (L, N).getNk (), lcao_.kmesh); + EXPECT_EQ (ao0.PhiLN (L, N).getDk (), lcao_.dk); + EXPECT_EQ (ao0.PhiLN (L, N).getDruniform (), lcao_.dr_uniform); + + for (int ir = 0; ir != 801; ++ir) + { + EXPECT_DOUBLE_EQ (ao0.PhiLN (L, N).getRab (ir), 0.01); + EXPECT_DOUBLE_EQ (ao0.PhiLN (L, N).getRadial (ir), 0.01 * ir); + } } - } - - EXPECT_NEAR(ao0.PhiLN(0,0).getPsi(0 ), 1.837183001954e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,0).getPsi(1 ), 1.836944589913e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,0).getPsi(4 ), 1.833374417163e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,0).getPsi(799), 3.037233152557e-07, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,0).getPsi(800), 0.000000000000e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,1).getPsi(0 ), -2.482045090982e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,1).getPsi(1 ), -2.481575045574e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,1).getPsi(4 ), -2.474535579529e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,1).getPsi(799), 1.115867959482e-06, max_tol); - EXPECT_NEAR(ao0.PhiLN(0,1).getPsi(800), 0.000000000000e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 0).getPsi (0), 1.837183001954e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 0).getPsi (1), 1.836944589913e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 0).getPsi (4), 1.833374417163e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 0).getPsi (799), 3.037233152557e-07, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 0).getPsi (800), 0.000000000000e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(1,0).getPsi(0 ), 0.000000000000e+00, max_tol); - EXPECT_NEAR(ao0.PhiLN(1,0).getPsi(1 ), -2.619148756396e-02, max_tol); - EXPECT_NEAR(ao0.PhiLN(1,0).getPsi(4 ), -1.045849793771e-01, max_tol); - EXPECT_NEAR(ao0.PhiLN(1,0).getPsi(799), 3.217573100688e-06, max_tol); - EXPECT_NEAR(ao0.PhiLN(1,0).getPsi(800), 0.000000000000e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 1).getPsi (0), -2.482045090982e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 1).getPsi (1), -2.481575045574e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 1).getPsi (4), -2.474535579529e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 1).getPsi (799), 1.115867959482e-06, max_tol); + EXPECT_NEAR (ao0.PhiLN (0, 1).getPsi (800), 0.000000000000e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (1, 0).getPsi (0), 0.000000000000e+00, max_tol); + EXPECT_NEAR (ao0.PhiLN (1, 0).getPsi (1), -2.619148756396e-02, max_tol); + EXPECT_NEAR (ao0.PhiLN (1, 0).getPsi (4), -1.045849793771e-01, max_tol); + EXPECT_NEAR (ao0.PhiLN (1, 0).getPsi (799), 3.217573100688e-06, max_tol); + EXPECT_NEAR (ao0.PhiLN (1, 0).getPsi (800), 0.000000000000e+00, max_tol); // O - EXPECT_EQ(ao1.getType(), 1); - EXPECT_EQ(ao1.getLabel(), "O"); - EXPECT_EQ(ao1.getLmax(), 2); - EXPECT_EQ(ao1.getNchi(0), 2); - EXPECT_EQ(ao1.getNchi(1), 2); - EXPECT_EQ(ao1.getNchi(2), 1); - ASSERT_EQ(ao1.getTotal_nchi(), 5); - - std::vector L1_list{0,0,1,1,2}; - std::vector N1_list{0,1,0,1,0}; - - for (size_t i = 0; i != 5; ++i) { - int L = L1_list[i], N = N1_list[i]; - EXPECT_EQ(ao1.PhiLN(L,N).getLabel(), "O"); - EXPECT_EQ(ao1.PhiLN(L,N).getType(), 1); - EXPECT_EQ(ao1.PhiLN(L,N).getL(), L); - EXPECT_EQ(ao1.PhiLN(L,N).getChi(), N); - EXPECT_EQ(ao1.PhiLN(L,N).getNr(), 701); - EXPECT_EQ(ao1.PhiLN(L,N).getNk(), lcao_.kmesh); - EXPECT_EQ(ao1.PhiLN(L,N).getDk(), lcao_.dk); - EXPECT_EQ(ao1.PhiLN(L,N).getDruniform(), lcao_.dr_uniform); - - for (int ir = 0; ir != 701; ++ir) { - EXPECT_DOUBLE_EQ(ao1.PhiLN(L,N).getRab(ir), 0.01); - EXPECT_DOUBLE_EQ(ao1.PhiLN(L,N).getRadial(ir), 0.01*ir); + EXPECT_EQ (ao1.getType (), 1); + EXPECT_EQ (ao1.getLabel (), "O"); + EXPECT_EQ (ao1.getLmax (), 2); + EXPECT_EQ (ao1.getNchi (0), 2); + EXPECT_EQ (ao1.getNchi (1), 2); + EXPECT_EQ (ao1.getNchi (2), 1); + ASSERT_EQ (ao1.getTotal_nchi (), 5); + + std::vector L1_list{0, 0, 1, 1, 2}; + std::vector N1_list{0, 1, 0, 1, 0}; + + for (size_t i = 0; i != 5; ++i) + { + int L = L1_list[i], N = N1_list[i]; + EXPECT_EQ (ao1.PhiLN (L, N).getLabel (), "O"); + EXPECT_EQ (ao1.PhiLN (L, N).getType (), 1); + EXPECT_EQ (ao1.PhiLN (L, N).getL (), L); + EXPECT_EQ (ao1.PhiLN (L, N).getChi (), N); + EXPECT_EQ (ao1.PhiLN (L, N).getNr (), 701); + EXPECT_EQ (ao1.PhiLN (L, N).getNk (), lcao_.kmesh); + EXPECT_EQ (ao1.PhiLN (L, N).getDk (), lcao_.dk); + EXPECT_EQ (ao1.PhiLN (L, N).getDruniform (), lcao_.dr_uniform); + + for (int ir = 0; ir != 701; ++ir) + { + EXPECT_DOUBLE_EQ (ao1.PhiLN (L, N).getRab (ir), 0.01); + EXPECT_DOUBLE_EQ (ao1.PhiLN (L, N).getRadial (ir), 0.01 * ir); + } } - } - - EXPECT_NEAR(ao1.PhiLN(0,0).getPsi(0), 1.208504975904e+00, max_tol); - EXPECT_NEAR(ao1.PhiLN(0,0).getPsi(1), 1.208605373194e+00, max_tol); - EXPECT_NEAR(ao1.PhiLN(0,0).getPsi(4), 1.210103935461e+00, max_tol); - EXPECT_NEAR(ao1.PhiLN(0,0).getPsi(699), 4.465396560257e-08, max_tol); - EXPECT_NEAR(ao1.PhiLN(0,0).getPsi(700), 0.0, max_tol); - - EXPECT_NEAR(ao1.PhiLN(0,1).getPsi(0), 7.254873428942e-01, max_tol); - EXPECT_NEAR(ao1.PhiLN(0,1).getPsi(1), 7.256666701836e-01, max_tol); - EXPECT_NEAR(ao1.PhiLN(0,1).getPsi(4), 7.283448557011e-01, max_tol); - EXPECT_NEAR(ao1.PhiLN(0,1).getPsi(699), -1.916246212603e-06, max_tol); - EXPECT_NEAR(ao1.PhiLN(0,1).getPsi(700), 0.0, max_tol); - - EXPECT_NEAR(ao1.PhiLN(1,0).getPsi(0), 0.0, max_tol); - EXPECT_NEAR(ao1.PhiLN(1,0).getPsi(1), 4.626669306440e-02, max_tol); - EXPECT_NEAR(ao1.PhiLN(1,0).getPsi(4), 1.845014292772e-01, max_tol); - EXPECT_NEAR(ao1.PhiLN(1,0).getPsi(699), 2.870401658966e-07, max_tol); - EXPECT_NEAR(ao1.PhiLN(1,0).getPsi(700), 0.0, max_tol); - - EXPECT_NEAR(ao1.PhiLN(1,1).getPsi(0), 0.0, max_tol); - EXPECT_NEAR(ao1.PhiLN(1,1).getPsi(1), 3.375340101333e-02, max_tol); - EXPECT_NEAR(ao1.PhiLN(1,1).getPsi(4), 1.346256082234e-01, max_tol); - EXPECT_NEAR(ao1.PhiLN(1,1).getPsi(699), -2.771091616120e-06, max_tol); - EXPECT_NEAR(ao1.PhiLN(1,1).getPsi(700), 0.0, max_tol); - - EXPECT_NEAR(ao1.PhiLN(2,0).getPsi(0), 0.0, max_tol); - EXPECT_NEAR(ao1.PhiLN(2,0).getPsi(1), -3.343626342662e-04, max_tol); - EXPECT_NEAR(ao1.PhiLN(2,0).getPsi(4), -5.337546547975e-03, max_tol); - EXPECT_NEAR(ao1.PhiLN(2,0).getPsi(699), 1.396308876444e-06, max_tol); - EXPECT_NEAR(ao1.PhiLN(2,0).getPsi(700), 0.0, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 0).getPsi (0), 1.208504975904e+00, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 0).getPsi (1), 1.208605373194e+00, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 0).getPsi (4), 1.210103935461e+00, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 0).getPsi (699), 4.465396560257e-08, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 0).getPsi (700), 0.0, max_tol); + + EXPECT_NEAR (ao1.PhiLN (0, 1).getPsi (0), 7.254873428942e-01, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 1).getPsi (1), 7.256666701836e-01, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 1).getPsi (4), 7.283448557011e-01, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 1).getPsi (699), -1.916246212603e-06, max_tol); + EXPECT_NEAR (ao1.PhiLN (0, 1).getPsi (700), 0.0, max_tol); + + EXPECT_NEAR (ao1.PhiLN (1, 0).getPsi (0), 0.0, max_tol); + EXPECT_NEAR (ao1.PhiLN (1, 0).getPsi (1), 4.626669306440e-02, max_tol); + EXPECT_NEAR (ao1.PhiLN (1, 0).getPsi (4), 1.845014292772e-01, max_tol); + EXPECT_NEAR (ao1.PhiLN (1, 0).getPsi (699), 2.870401658966e-07, max_tol); + EXPECT_NEAR (ao1.PhiLN (1, 0).getPsi (700), 0.0, max_tol); + + EXPECT_NEAR (ao1.PhiLN (1, 1).getPsi (0), 0.0, max_tol); + EXPECT_NEAR (ao1.PhiLN (1, 1).getPsi (1), 3.375340101333e-02, max_tol); + EXPECT_NEAR (ao1.PhiLN (1, 1).getPsi (4), 1.346256082234e-01, max_tol); + EXPECT_NEAR (ao1.PhiLN (1, 1).getPsi (699), -2.771091616120e-06, max_tol); + EXPECT_NEAR (ao1.PhiLN (1, 1).getPsi (700), 0.0, max_tol); + + EXPECT_NEAR (ao1.PhiLN (2, 0).getPsi (0), 0.0, max_tol); + EXPECT_NEAR (ao1.PhiLN (2, 0).getPsi (1), -3.343626342662e-04, max_tol); + EXPECT_NEAR (ao1.PhiLN (2, 0).getPsi (4), -5.337546547975e-03, max_tol); + EXPECT_NEAR (ao1.PhiLN (2, 0).getPsi (699), 1.396308876444e-06, max_tol); + EXPECT_NEAR (ao1.PhiLN (2, 0).getPsi (700), 0.0, max_tol); // Descriptor - EXPECT_EQ(aod.getType(), 0); - EXPECT_EQ(aod.getLabel(), ""); - EXPECT_EQ(aod.getLmax(), 2); - EXPECT_EQ(aod.getNchi(0), 2); - EXPECT_EQ(aod.getNchi(1), 2); - EXPECT_EQ(aod.getNchi(2), 2); - ASSERT_EQ(aod.getTotal_nchi(), 6); - - std::vector Ld_list{0,0,1,1,2,2}; - std::vector Nd_list{0,1,0,1,0,1}; - - for (size_t i = 0; i != 6; ++i) { - int L = Ld_list[i], N = Nd_list[i]; - EXPECT_EQ(aod.PhiLN(L,N).getLabel(), ""); - EXPECT_EQ(aod.PhiLN(L,N).getType(), 0); - EXPECT_EQ(aod.PhiLN(L,N).getL(), L); - EXPECT_EQ(aod.PhiLN(L,N).getChi(), N); - EXPECT_EQ(aod.PhiLN(L,N).getNr(), 205); - EXPECT_EQ(aod.PhiLN(L,N).getNk(), lcao_.kmesh); - EXPECT_EQ(aod.PhiLN(L,N).getDk(), lcao_.dk); - EXPECT_EQ(aod.PhiLN(L,N).getDruniform(), lcao_.dr_uniform); - - for (int ir = 0; ir != 205; ++ir) { - EXPECT_DOUBLE_EQ(aod.PhiLN(L,N).getRab(ir), 0.01); - EXPECT_DOUBLE_EQ(aod.PhiLN(L,N).getRadial(ir), 0.01*ir); + EXPECT_EQ (aod.getType (), 0); + EXPECT_EQ (aod.getLabel (), ""); + EXPECT_EQ (aod.getLmax (), 2); + EXPECT_EQ (aod.getNchi (0), 2); + EXPECT_EQ (aod.getNchi (1), 2); + EXPECT_EQ (aod.getNchi (2), 2); + ASSERT_EQ (aod.getTotal_nchi (), 6); + + std::vector Ld_list{0, 0, 1, 1, 2, 2}; + std::vector Nd_list{0, 1, 0, 1, 0, 1}; + + for (size_t i = 0; i != 6; ++i) + { + int L = Ld_list[i], N = Nd_list[i]; + EXPECT_EQ (aod.PhiLN (L, N).getLabel (), ""); + EXPECT_EQ (aod.PhiLN (L, N).getType (), 0); + EXPECT_EQ (aod.PhiLN (L, N).getL (), L); + EXPECT_EQ (aod.PhiLN (L, N).getChi (), N); + EXPECT_EQ (aod.PhiLN (L, N).getNr (), 205); + EXPECT_EQ (aod.PhiLN (L, N).getNk (), lcao_.kmesh); + EXPECT_EQ (aod.PhiLN (L, N).getDk (), lcao_.dk); + EXPECT_EQ (aod.PhiLN (L, N).getDruniform (), lcao_.dr_uniform); + + for (int ir = 0; ir != 205; ++ir) + { + EXPECT_DOUBLE_EQ (aod.PhiLN (L, N).getRab (ir), 0.01); + EXPECT_DOUBLE_EQ (aod.PhiLN (L, N).getRadial (ir), 0.01 * ir); + } } - } // TODO chi value check is skipped for now // orbitals in jle.orb are not normalized // getPsi() does not gives the numbers in jle.orb } +TEST_F (LcaoOrbitalsTest, Getters) +{ -TEST_F(LcaoOrbitalsTest, Getters) { - - this->lcao_read(); - - EXPECT_EQ(lcao_.get_ecutwfc(), lcao_.ecutwfc); - EXPECT_EQ(lcao_.get_kmesh(), lcao_.kmesh); - EXPECT_EQ(lcao_.get_dk(), lcao_.dk); - EXPECT_EQ(lcao_.get_dR(), lcao_.dR); - EXPECT_EQ(lcao_.get_Rmax(), lcao_.Rmax); - EXPECT_EQ(lcao_.get_lmax(), lcao_.lmax); - EXPECT_EQ(lcao_.get_lmax_d(), lcao_.lmax_d); - EXPECT_EQ(lcao_.get_nchimax(), lcao_.nchimax); - EXPECT_EQ(lcao_.get_nchimax_d(), lcao_.nchimax_d); - EXPECT_EQ(lcao_.get_ntype(), lcao_.ntype); - EXPECT_EQ(lcao_.get_dr_uniform(), lcao_.dr_uniform); - EXPECT_EQ(lcao_.get_rcutmax_Phi(), lcao_.rcutmax_Phi); + this->lcao_read (); + + EXPECT_EQ (lcao_.get_ecutwfc (), lcao_.ecutwfc); + EXPECT_EQ (lcao_.get_kmesh (), lcao_.kmesh); + EXPECT_EQ (lcao_.get_dk (), lcao_.dk); + EXPECT_EQ (lcao_.get_dR (), lcao_.dR); + EXPECT_EQ (lcao_.get_Rmax (), lcao_.Rmax); + EXPECT_EQ (lcao_.get_lmax (), lcao_.lmax); + EXPECT_EQ (lcao_.get_lmax_d (), lcao_.lmax_d); + EXPECT_EQ (lcao_.get_nchimax (), lcao_.nchimax); + EXPECT_EQ (lcao_.get_nchimax_d (), lcao_.nchimax_d); + EXPECT_EQ (lcao_.get_ntype (), lcao_.ntype); + EXPECT_EQ (lcao_.get_dr_uniform (), lcao_.dr_uniform); + EXPECT_EQ (lcao_.get_rcutmax_Phi (), lcao_.rcutmax_Phi); } - -int main(int argc, char **argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; -} - - +} diff --git a/source/source_basis/module_ao/test/ORB_unittest.cpp b/source/source_basis/module_ao/test/ORB_unittest.cpp index adec0796c31..b853bc5f7cf 100644 --- a/source/source_basis/module_ao/test/ORB_unittest.cpp +++ b/source/source_basis/module_ao/test/ORB_unittest.cpp @@ -1,6 +1,7 @@ #include "ORB_unittest.h" -void test_orb::SetUp() +void + test_orb::SetUp () { // test constructor /*Center2_Orb::Orb11 testcto = Center2_Orb::Orb11( @@ -8,30 +9,33 @@ void test_orb::SetUp() ORB.Phi[0].PhiLN(0, 0), OGT.MOT, Center2_MGT);*/ // 1. setup orbitals - this->ofs_running.open("log.txt"); - this->count_ntype(); - this->set_files(); - this->set_ekcut(); + this->ofs_running.open ("log.txt"); + this->count_ntype (); + this->set_files (); + this->set_ekcut (); // 2. setup 2-center-integral tables by basic methods // not including center2orb, it will be set up when needed // in some test cases. - this->set_orbs(); + this->set_orbs (); // this->set_center2orbs(); } -void test_orb::TearDown() +void + test_orb::TearDown () { - int* nproj = new int[ORB.get_ntype()]; - for (int i = 0; i < ORB.get_ntype(); ++i) { - nproj[i] = 0; -} - ooo.clear_after_ions(OGT, ORB, 0, nproj); + int* nproj = new int[ORB.get_ntype ()]; + for (int i = 0; i < ORB.get_ntype (); ++i) + { + nproj[i] = 0; + } + ooo.clear_after_ions (OGT, ORB, 0, nproj); delete[] nproj; return; } -void test_orb::set_ekcut() +void + test_orb::set_ekcut () { std::cout << "set lcao_ecut from LCAO files" << std::endl; // set as max of ekcut from every element @@ -40,164 +44,173 @@ void test_orb::set_ekcut() std::ifstream in_ao; for (int it = 0; it < ntype_read; it++) - { - double ek_current; - - in_ao.open((this->case_dir + ORB.orbital_file[it].c_str())); - if (!in_ao) { - std::cout << "error : cannot find LCAO file : " << ORB.orbital_file[it] << std::endl; - } - ORB.orbital_file[it] = this->case_dir + ORB.orbital_file[it].c_str(); - std::string word; - while (in_ao.good()) - { - in_ao >> word; - if (word == "Cutoff(Ry)") { - break; -} - } - in_ao >> ek_current; - lcao_ecut = std::max(lcao_ecut, ek_current); + double ek_current; - in_ao.close(); - } + in_ao.open ((this->case_dir + ORB.orbital_file[it].c_str ())); + if (!in_ao) + { + std::cout << "error : cannot find LCAO file : " << ORB.orbital_file[it] << std::endl; + } + ORB.orbital_file[it] = this->case_dir + ORB.orbital_file[it].c_str (); + std::string word; + while (in_ao.good ()) + { + in_ao >> word; + if (word == "Cutoff(Ry)") + { + break; + } + } + in_ao >> ek_current; + lcao_ecut = std::max (lcao_ecut, ek_current); + + in_ao.close (); + } ORB.ecutwfc = lcao_ecut; std::cout << "lcao_ecut : " << lcao_ecut << std::endl; return; } -void test_orb::set_orbs() +void + test_orb::set_orbs () { - ORB.init(ofs_running, - ntype_read, - "./", - orbital_fn.data(), - descriptor_file, - lmax, - lcao_ecut, - lcao_dk, - lcao_dr, - lcao_rmax, - 0, - 0, - 1, // force - 0); // myrank - - int* nproj = new int[ORB.get_ntype()]; - for (int i = 0; i < ORB.get_ntype(); ++i) { - nproj[i] = 0; -} - const Numerical_Nonlocal beta_[ORB.get_ntype()]; - - ooo.set_orb_tables(ofs_running, - OGT, - ORB, - lat0, - 0, // no out_descriptor - lmax, - 0, // no nproj - nproj, - beta_); + ORB.init (ofs_running, + ntype_read, + "./", + orbital_fn.data (), + descriptor_file, + lmax, + lcao_ecut, + lcao_dk, + lcao_dr, + lcao_rmax, + 0, + 0, + 1, // force + 0); // myrank + + int* nproj = new int[ORB.get_ntype ()]; + for (int i = 0; i < ORB.get_ntype (); ++i) + { + nproj[i] = 0; + } + const Numerical_Nonlocal beta_[ORB.get_ntype ()]; + + ooo.set_orb_tables (ofs_running, + OGT, + ORB, + lat0, + 0, // no out_descriptor + lmax, + 0, // no nproj + nproj, + beta_); delete[] nproj; return; } -void test_orb::set_files() +void + test_orb::set_files () { std::cout << "read names of atomic basis set files" << std::endl; - std::ifstream ifs((this->case_dir + "STRU"), std::ios::in); + std::ifstream ifs ((this->case_dir + "STRU"), std::ios::in); - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "NUMERICAL_ORBITAL"); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "NUMERICAL_ORBITAL"); - orbital_fn.resize(ntype_read); + orbital_fn.resize (ntype_read); for (int it = 0; it < ntype_read; it++) - { - ifs >> orbital_fn[it]; - ORB.orbital_file.push_back(orbital_fn[it]); + { + ifs >> orbital_fn[it]; + ORB.orbital_file.push_back (orbital_fn[it]); - std::cout << "Numerical orbital file : " << orbital_fn[it] << std::endl; - } + std::cout << "Numerical orbital file : " << orbital_fn[it] << std::endl; + } return; } -void test_orb::count_ntype() +void + test_orb::count_ntype () { std::cout << "count number of atom types" << std::endl; std::cout << this->case_dir + "STRU" << std::endl; - std::ifstream ifs((this->case_dir + "STRU"), std::ios::in); + std::ifstream ifs ((this->case_dir + "STRU"), std::ios::in); if (!ifs) - { - std::cout << "ERROR : file STRU does not exist" << std::endl; - exit(1); - } + { + std::cout << "ERROR : file STRU does not exist" << std::endl; + exit (1); + } - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "ATOMIC_SPECIES"); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "ATOMIC_SPECIES"); ntype_read = 0; std::string x; - ifs.rdstate(); - while (ifs.good()) - { - // read a line - std::getline(ifs, x); - - // trim white space - const char* typeOfWhitespaces = " \t\n\r\f\v"; - x.erase(x.find_last_not_of(typeOfWhitespaces) + 1); - x.erase(0, x.find_first_not_of(typeOfWhitespaces)); - - if (x == "LATTICE_CONSTANT" || x == "NUMERICAL_ORBITAL" || x == "LATTICE_VECTORS" || x == "ATOMIC_POSITIONS") { - break; -} + ifs.rdstate (); + while (ifs.good ()) + { + // read a line + std::getline (ifs, x); - std::string tmpid = x.substr(0, 1); - if (!x.empty() && tmpid != "#") { - ntype_read++; -} - } + // trim white space + const char* typeOfWhitespaces = " \t\n\r\f\v"; + x.erase (x.find_last_not_of (typeOfWhitespaces) + 1); + x.erase (0, x.find_first_not_of (typeOfWhitespaces)); + + if (x == "LATTICE_CONSTANT" || x == "NUMERICAL_ORBITAL" || x == "LATTICE_VECTORS" + || x == "ATOMIC_POSITIONS") + { + break; + } + + std::string tmpid = x.substr (0, 1); + if (!x.empty () && tmpid != "#") + { + ntype_read++; + } + } std::cout << "ntype=" << ntype_read << std::endl; - ifs.close(); + ifs.close (); return; } -void test_orb::set_center2orbs() +void + test_orb::set_center2orbs () { // 1. setup Gaunt coeffs - Center2_MGT.init_Gaunt_CH(lmax); - Center2_MGT.init_Gaunt(lmax); + Center2_MGT.init_Gaunt_CH (lmax); + Center2_MGT.init_Gaunt (lmax); // 2. setup tables - for (int TA = 0; TA < ORB.get_ntype(); TA++) - { - for (int TB = 0; TB < ORB.get_ntype(); TB++) + for (int TA = 0; TA < ORB.get_ntype (); TA++) { - for (int LA = 0; LA <= ORB.Phi[TA].getLmax(); LA++) - { - for (int NA = 0; NA < ORB.Phi[TA].getNchi(LA); ++NA) + for (int TB = 0; TB < ORB.get_ntype (); TB++) { - for (int LB = 0; LB <= ORB.Phi[TB].getLmax(); ++LB) - { - for (int NB = 0; NB < ORB.Phi[TB].getNchi(LB); ++NB) + for (int LA = 0; LA <= ORB.Phi[TA].getLmax (); LA++) { - this->set_single_c2o(TA, TB, LA, NA, LB, NB); - // test_center2_orb11[TA][TB][LA][NA][LB].insert( - // make_pair(NB, MockCenter2Orb11(ORB.Phi[TA].PhiLN(LA, NA), - // ORB.Phi[TB].PhiLN(LB, NB), OGT.MOT, Center2_MGT))); + for (int NA = 0; NA < ORB.Phi[TA].getNchi (LA); ++NA) + { + for (int LB = 0; LB <= ORB.Phi[TB].getLmax (); ++LB) + { + for (int NB = 0; NB < ORB.Phi[TB].getNchi (LB); ++NB) + { + this->set_single_c2o (TA, TB, LA, NA, LB, NB); + // test_center2_orb11[TA][TB][LA][NA][LB].insert( + // make_pair(NB, MockCenter2Orb11(ORB.Phi[TA].PhiLN(LA, NA), + // ORB.Phi[TB].PhiLN(LB, NB), OGT.MOT, Center2_MGT))); + } + } + } } - } } - } } - } for (auto& co1: this->test_center2_orb11) for (auto& co2: co1.second) @@ -205,18 +218,20 @@ void test_orb::set_center2orbs() for (auto& co4: co3.second) for (auto& co5: co4.second) for (auto& co6: co5.second) - co6.second->init_radial_table(); + co6.second->init_radial_table (); } template -void test_orb::set_single_c2o(int TA, int TB, int LA, int NA, int LB, int NB) +void + test_orb::set_single_c2o (int TA, int TB, int LA, int NA, int LB, int NB) { - this->test_center2_orb11[TA][TB][LA][NA][LB].insert(std::make_pair( + this->test_center2_orb11[TA][TB][LA][NA][LB].insert (std::make_pair ( NB, - std::make_unique(ORB.Phi[TA].PhiLN(LA, NA), ORB.Phi[TB].PhiLN(LB, NB), OGT.MOT.pSB, Center2_MGT))); + std::make_unique (ORB.Phi[TA].PhiLN (LA, NA), ORB.Phi[TB].PhiLN (LB, NB), OGT.MOT.pSB, Center2_MGT))); } -double test_orb::randr(double Rmax) +double + test_orb::randr (double Rmax) { - return double(rand()) / double(RAND_MAX) * Rmax; + return double (rand ()) / double (RAND_MAX) * Rmax; } /* diff --git a/source/source_basis/module_ao/test/ORB_unittest.h b/source/source_basis/module_ao/test/ORB_unittest.h index b8c2d70f14e..30099dd389c 100644 --- a/source/source_basis/module_ao/test/ORB_unittest.h +++ b/source/source_basis/module_ao/test/ORB_unittest.h @@ -6,7 +6,7 @@ #include "source_lcao/center2_orb-orb11.h" #include "gtest/gtest.h" -//#include "mock_center2.h" +// #include "mock_center2.h" #include #include #include @@ -17,8 +17,8 @@ class test_orb : public testing::Test { protected: - void SetUp() override; - void TearDown() override; + void SetUp () override; + void TearDown () override; public: LCAO_Orbitals ORB; @@ -41,15 +41,15 @@ class test_orb : public testing::Test std::map>>>>>> mock_center2_orb11; */ - void count_ntype(); // from STRU, count types of elements - void set_files(); // from STRU, read names of LCAO files - void set_ekcut(); // from LCAO files, read and set ekcut - void set_orbs(); // interface to Read_PAO - void set_center2orbs(); // interface to Center2orb + void count_ntype (); // from STRU, count types of elements + void set_files (); // from STRU, read names of LCAO files + void set_ekcut (); // from LCAO files, read and set ekcut + void set_orbs (); // interface to Read_PAO + void set_center2orbs (); // interface to Center2orb template - void set_single_c2o(int TA, int TB, int LA, int NA, int LB, int NB); - double randr(double Rmax); - void gen_table_center2(); + void set_single_c2o (int TA, int TB, int LA, int NA, int LB, int NB); + double randr (double Rmax); + void gen_table_center2 (); bool force_flag = false; int my_rank = 0; diff --git a/source/source_basis/module_ao/test/parallel_orbitals_test.cpp b/source/source_basis/module_ao/test/parallel_orbitals_test.cpp index fe09d9fca63..0c6d3d0c256 100644 --- a/source/source_basis/module_ao/test/parallel_orbitals_test.cpp +++ b/source/source_basis/module_ao/test/parallel_orbitals_test.cpp @@ -15,197 +15,214 @@ * - get_indexes_col * - get_indexes_row(iat) * - get_indexes_col(iat) - * + * * the test framework is based on parallel_2d_test.cpp -*/ + */ class TestParaO : public testing::Test { -protected: + protected: int dsize; int my_rank = 0; - std::vector> sizes{ {50, 50} , {60, 60}}; - std::vector nat{ 10, 5}; - std::vector nbs{ 1,2,3 }; + std::vector> sizes{{50, 50}, {60, 60}}; + std::vector nat{10, 5}; + std::vector nbs{1, 2, 3}; #ifdef __MPI - void SetUp() override + void + SetUp () override { - MPI_Comm_size(MPI_COMM_WORLD, &dsize); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size (MPI_COMM_WORLD, &dsize); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); } - void TearDown() override + void + TearDown () override { } #endif }; #ifdef __MPI -TEST_F(TestParaO, Divide2D) +TEST_F (TestParaO, Divide2D) { - for (auto& size : sizes) - { - int gr = size.first; - int gc = size.second; - for (auto nb : nbs) + for (auto& size: sizes) { - Parallel_Orbitals po; + int gr = size.first; + int gc = size.second; + for (auto nb: nbs) + { + Parallel_Orbitals po; - for (auto mode : { 0,1 }) - { - po.init(gr, gc, nb, MPI_COMM_WORLD, mode); - EXPECT_EQ(po.get_block_size(), nb); + for (auto mode: {0, 1}) + { + po.init (gr, gc, nb, MPI_COMM_WORLD, mode); + EXPECT_EQ (po.get_block_size (), nb); - //1. dim0 and dim1 - EXPECT_EQ(po.dim0 * po.dim1, dsize); - if (mode)EXPECT_LE(po.dim1, po.dim0); - else EXPECT_LE(po.dim0, po.dim1); + // 1. dim0 and dim1 + EXPECT_EQ (po.dim0 * po.dim1, dsize); + if (mode) + EXPECT_LE (po.dim1, po.dim0); + else + EXPECT_LE (po.dim0, po.dim1); - //2. comm_2D - //EXPECT_NE(po.comm_2D, MPI_COMM_NULL); + // 2. comm_2D + // EXPECT_NE(po.comm_2D, MPI_COMM_NULL); - //3. local2global and local sizes - int lr = po.get_row_size(); - int lc = po.get_col_size(); - EXPECT_EQ(lr * lc, po.get_local_size()); - auto cal_lsize = [](const int& gsize, const int& nb, const int& np, const int& pcoord) -> int - { - int nblock = gsize / nb; - return nblock / np * nb + static_cast(nblock % np > pcoord) * nb //full blocks' contribution - + static_cast(nblock % np == pcoord) * (gsize % nb); // the last block's contribution - }; - EXPECT_EQ(lr, cal_lsize(gr, nb, po.dim0, po.coord[0])); - EXPECT_EQ(lc, cal_lsize(gc, nb, po.dim1, po.coord[1])); + // 3. local2global and local sizes + int lr = po.get_row_size (); + int lc = po.get_col_size (); + EXPECT_EQ (lr * lc, po.get_local_size ()); + auto cal_lsize + = [] (const int& gsize, const int& nb, const int& np, const int& pcoord) -> int + { + int nblock = gsize / nb; + return nblock / np * nb + + static_cast (nblock % np > pcoord) * nb // full blocks' contribution + + static_cast (nblock % np == pcoord) + * (gsize % nb); // the last block's contribution + }; + EXPECT_EQ (lr, cal_lsize (gr, nb, po.dim0, po.coord[0])); + EXPECT_EQ (lc, cal_lsize (gc, nb, po.dim1, po.coord[1])); - //4. ScaLAPACK descriptor - EXPECT_EQ(po.desc[0], 1); - EXPECT_EQ(po.desc[1], po.blacs_ctxt); - EXPECT_EQ(po.desc[2], gr); - EXPECT_EQ(po.desc[3], gc); - EXPECT_EQ(po.desc[4], po.get_block_size()); - EXPECT_EQ(po.desc[5], po.get_block_size()); - EXPECT_EQ(po.desc[6], 0); - EXPECT_EQ(po.desc[7], 0); - EXPECT_EQ(po.desc[8], lr); + // 4. ScaLAPACK descriptor + EXPECT_EQ (po.desc[0], 1); + EXPECT_EQ (po.desc[1], po.blacs_ctxt); + EXPECT_EQ (po.desc[2], gr); + EXPECT_EQ (po.desc[3], gc); + EXPECT_EQ (po.desc[4], po.get_block_size ()); + EXPECT_EQ (po.desc[5], po.get_block_size ()); + EXPECT_EQ (po.desc[6], 0); + EXPECT_EQ (po.desc[7], 0); + EXPECT_EQ (po.desc[8], lr); - //5. global2local - auto sum_array = [&po](const int& gr, const int& gc) -> std::pair - { - int sum_row = 0; int sum_col = 0; - for (int i = 0; i < gr; ++i) - sum_row += po.global2local_row(i); - for (int i = 0; i < gc; ++i) - sum_col += po.global2local_col(i); - return { sum_row, sum_col }; - }; - std::pair sumrc = sum_array(gr, gc); - EXPECT_EQ(std::get<0>(sumrc), lr * (lr - 1) / 2 - (gr - lr)); - EXPECT_EQ(std::get<1>(sumrc), lc * (lc - 1) / 2 - (gc - lc)); - for (int i = 0;i < lr;++i) - for (int j = 0;j < lc;++j) - EXPECT_TRUE(po.in_this_processor(po.local2global_row(i), po.local2global_col(j))); - - //6. set_atomic_trace - for(auto nat0 : nat) - { - EXPECT_EQ(gr, gc); - std::vector iat2iwt(nat0); - int nw = gr / nat0; - for (int i = 0; i < nat0; ++i) - { - iat2iwt[i] = i * nw; - } - po.set_atomic_trace(iat2iwt.data(), nat0, gr); - auto global_row_array = po.get_indexes_row(); - auto global_col_array = po.get_indexes_col(); - int local_index_trace_row = 0; - int local_index_trace_col = 0; - // check get_col_size(iat) and get_row_size(iat) - for (int i = 0; i < nat0; ++i) - { - auto atomic_row_array = po.get_indexes_row(i); - auto atomic_col_array = po.get_indexes_col(i); - EXPECT_EQ(po.get_col_size(i), atomic_col_array.size()); - EXPECT_EQ(po.get_row_size(i), atomic_row_array.size()); - for (int j = 0; j < atomic_row_array.size(); ++j) - { - //check global_index == global_index - EXPECT_EQ(atomic_row_array[j]+iat2iwt[i], global_row_array[local_index_trace_row]); - //check local_index == local_index - EXPECT_EQ(local_index_trace_row, po.global2local_row(atomic_row_array[j]+iat2iwt[i])); - local_index_trace_row++; - } - for (int j = 0; j < atomic_col_array.size(); ++j) - { - //check global_index == global_index - EXPECT_EQ(atomic_col_array[j]+iat2iwt[i], global_col_array[local_index_trace_col]); - //check local_index == local_index - EXPECT_EQ(local_index_trace_col, po.global2local_col(atomic_col_array[j]+iat2iwt[i])); - local_index_trace_col++; + // 5. global2local + auto sum_array = [&po] (const int& gr, const int& gc) -> std::pair + { + int sum_row = 0; + int sum_col = 0; + for (int i = 0; i < gr; ++i) + sum_row += po.global2local_row (i); + for (int i = 0; i < gc; ++i) + sum_col += po.global2local_col (i); + return {sum_row, sum_col}; + }; + std::pair sumrc = sum_array (gr, gc); + EXPECT_EQ (std::get<0> (sumrc), lr * (lr - 1) / 2 - (gr - lr)); + EXPECT_EQ (std::get<1> (sumrc), lc * (lc - 1) / 2 - (gc - lc)); + for (int i = 0; i < lr; ++i) + for (int j = 0; j < lc; ++j) + EXPECT_TRUE ( + po.in_this_processor (po.local2global_row (i), po.local2global_col (j))); + + // 6. set_atomic_trace + for (auto nat0: nat) + { + EXPECT_EQ (gr, gc); + std::vector iat2iwt (nat0); + int nw = gr / nat0; + for (int i = 0; i < nat0; ++i) + { + iat2iwt[i] = i * nw; + } + po.set_atomic_trace (iat2iwt.data (), nat0, gr); + auto global_row_array = po.get_indexes_row (); + auto global_col_array = po.get_indexes_col (); + int local_index_trace_row = 0; + int local_index_trace_col = 0; + // check get_col_size(iat) and get_row_size(iat) + for (int i = 0; i < nat0; ++i) + { + auto atomic_row_array = po.get_indexes_row (i); + auto atomic_col_array = po.get_indexes_col (i); + EXPECT_EQ (po.get_col_size (i), atomic_col_array.size ()); + EXPECT_EQ (po.get_row_size (i), atomic_row_array.size ()); + for (int j = 0; j < atomic_row_array.size (); ++j) + { + // check global_index == global_index + EXPECT_EQ (atomic_row_array[j] + iat2iwt[i], + global_row_array[local_index_trace_row]); + // check local_index == local_index + EXPECT_EQ (local_index_trace_row, + po.global2local_row (atomic_row_array[j] + iat2iwt[i])); + local_index_trace_row++; + } + for (int j = 0; j < atomic_col_array.size (); ++j) + { + // check global_index == global_index + EXPECT_EQ (atomic_col_array[j] + iat2iwt[i], + global_col_array[local_index_trace_col]); + // check local_index == local_index + EXPECT_EQ (local_index_trace_col, + po.global2local_col (atomic_col_array[j] + iat2iwt[i])); + local_index_trace_col++; + } + } + } } - - } } - } } - } } #else -TEST_F(TestParaO, Serial) +TEST_F (TestParaO, Serial) { - for (auto& size : sizes) - { - int gr = size.first; - int gc = size.second; + for (auto& size: sizes) + { + int gr = size.first; + int gc = size.second; - Parallel_Orbitals po; + Parallel_Orbitals po; - //1. set dim0 and dim1 - //2. set_serial - po.set_serial(gr, gc); - EXPECT_EQ(po.dim0 * po.dim1, 1); - EXPECT_EQ(po.get_row_size(), gr); - EXPECT_EQ(po.get_col_size(), gc); - EXPECT_EQ(po.get_local_size(), gr * gc); + // 1. set dim0 and dim1 + // 2. set_serial + po.set_serial (gr, gc); + EXPECT_EQ (po.dim0 * po.dim1, 1); + EXPECT_EQ (po.get_row_size (), gr); + EXPECT_EQ (po.get_col_size (), gc); + EXPECT_EQ (po.get_local_size (), gr * gc); - //3. global2local - for (int i = 0;i < gr;++i) - EXPECT_EQ(po.global2local_row(i), i); - for (int i = 0;i < gc;++i) - EXPECT_EQ(po.global2local_col(i), i); - //6. set_atomic_trace - for(auto nat0 : nat) - { - EXPECT_EQ(gr, gc); - std::vector iat2iwt(nat0); - int nw = gr / nat0; - for (int i = 0; i < nat0; ++i) - { - iat2iwt[i] = i * nw; - } - po.set_atomic_trace(iat2iwt.data(), nat0, gr); - EXPECT_EQ(po.get_col_size(), gr); - EXPECT_EQ(po.get_row_size(), gr); - // check get_col_size(iat) and get_row_size(iat) - for (int i = 0; i < nat0; ++i) - { - std::cout<<__FILE__<<__LINE__<<" i = "< iat2iwt (nat0); + int nw = gr / nat0; + for (int i = 0; i < nat0; ++i) + { + iat2iwt[i] = i * nw; + } + po.set_atomic_trace (iat2iwt.data (), nat0, gr); + EXPECT_EQ (po.get_col_size (), gr); + EXPECT_EQ (po.get_row_size (), gr); + // check get_col_size(iat) and get_row_size(iat) + for (int i = 0; i < nat0; ++i) + { + std::cout << __FILE__ << __LINE__ << " i = " << i << " size = " << po.get_row_size (i) + << " " << po.get_col_size (i) << std::endl; + // EXPECT_EQ(po.get_col_size(i), nw); + // EXPECT_EQ(po.get_row_size(i), nw); + } + } } - } } #endif -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } - diff --git a/source/source_basis/module_nao/atomic_radials.cpp b/source/source_basis/module_nao/atomic_radials.cpp index 008b8d25d51..cc6ebf40158 100644 --- a/source/source_basis/module_nao/atomic_radials.cpp +++ b/source/source_basis/module_nao/atomic_radials.cpp @@ -14,101 +14,105 @@ #include #include -AtomicRadials& AtomicRadials::operator=(const AtomicRadials& rhs) +AtomicRadials& + AtomicRadials::operator= (const AtomicRadials& rhs) { - RadialSet::operator=(rhs); + RadialSet::operator= (rhs); orb_ecut_ = rhs.orb_ecut_; return *this; } -void AtomicRadials::build(const std::string& file, const int itype, std::ofstream* ptr_log, const int rank) +void + AtomicRadials::build (const std::string& file, const int itype, std::ofstream* ptr_log, const int rank) { // deallocates all arrays and reset variables (excluding sbt_) - cleanup(); + cleanup (); std::ifstream ifs; bool is_open = false; if (rank == 0) - { - ifs.open(file); - is_open = ifs.is_open(); - } + { + ifs.open (file); + is_open = ifs.is_open (); + } #ifdef __MPI - Parallel_Common::bcast_bool(is_open); + Parallel_Common::bcast_bool (is_open); #endif if (!is_open) - { - ModuleBase::WARNING_QUIT("AtomicRadials::build", "Couldn't open orbital file: " + file); - } + { + ModuleBase::WARNING_QUIT ("AtomicRadials::build", "Couldn't open orbital file: " + file); + } if (ptr_log) - { - (*ptr_log) << "\n\n\n\n"; - (*ptr_log) << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " | SETUP NUMERICAL ATOMIC ORBITALS |" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " | Orbital information includes the cutoff radius, angular momentum, |" << std::endl; - (*ptr_log) << " | zeta number and numerical values on a radial grid. |" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; - (*ptr_log) << "\n\n\n\n"; - } + { + (*ptr_log) << "\n\n\n\n"; + (*ptr_log) << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " | SETUP NUMERICAL ATOMIC ORBITALS |" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " | Orbital information includes the cutoff radius, angular momentum, |" << std::endl; + (*ptr_log) << " | zeta number and numerical values on a radial grid. |" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; + (*ptr_log) << "\n\n\n\n"; + } itype_ = itype; - read_abacus_orb(ifs, ptr_log, rank); - set_rcut_max(); + read_abacus_orb (ifs, ptr_log, rank); + set_rcut_max (); if (rank == 0) - { - ifs.close(); - } + { + ifs.close (); + } } -void AtomicRadials::build(RadialSet* const other, const int itype, const double rcut) +void + AtomicRadials::build (RadialSet* const other, const int itype, const double rcut) { - this->symbol_ = other->symbol(); - this->lmax_ = other->lmax(); - this->nchi_ = other->nchi(); - this->nzeta_max_ = other->nzeta_max(); + this->symbol_ = other->symbol (); + this->lmax_ = other->lmax (); + this->nchi_ = other->nchi (); + this->nzeta_max_ = other->nzeta_max (); this->itype_ = itype; - this->symbol_ = other->symbol(); + this->symbol_ = other->symbol (); this->nzeta_ = new int[this->lmax_ + 1]; for (int l = 0; l <= this->lmax_; ++l) - { - this->nzeta_[l] = other->nzeta(l); - } - this->indexing(); - this->chi_ = new NumericalRadial[nchi_]; - for (int ichi = 0; ichi < this->nchi_; ichi++) - { - const int l = other->cbegin()[ichi].l(); - int ngrid = other->cbegin()[ichi].nr(); - const double* rgrid = other->cbegin()[ichi].rgrid(); - const double* rvalue = other->cbegin()[ichi].rvalue(); - const int izeta = other->cbegin()[ichi].izeta(); - // if the cutoff radius is larger than the original one, just copy the orbitals - if (rcut >= other->cbegin()[ichi].rcut()) { - this->chi_[ichi].build(l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype, false); + this->nzeta_[l] = other->nzeta (l); } - else + this->indexing (); + this->chi_ = new NumericalRadial[nchi_]; + for (int ichi = 0; ichi < this->nchi_; ichi++) { - // call smoothgen to modify the orbitals to the local projections - std::vector rvalue_new; - smoothgen(ngrid, rgrid, rvalue, rcut, rvalue_new); - ngrid = rvalue_new.size(); - // projgen(l, ngrid, rgrid, rvalue, rcut, 20, rvalue_new); - // build the new on-site orbitals - this->chi_[ichi].build(l, true, ngrid, rgrid, rvalue_new.data(), 0, izeta, symbol_, itype, false); + const int l = other->cbegin ()[ichi].l (); + int ngrid = other->cbegin ()[ichi].nr (); + const double* rgrid = other->cbegin ()[ichi].rgrid (); + const double* rvalue = other->cbegin ()[ichi].rvalue (); + const int izeta = other->cbegin ()[ichi].izeta (); + // if the cutoff radius is larger than the original one, just copy the orbitals + if (rcut >= other->cbegin ()[ichi].rcut ()) + { + this->chi_[ichi].build (l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype, false); + } + else + { + // call smoothgen to modify the orbitals to the local projections + std::vector rvalue_new; + smoothgen (ngrid, rgrid, rvalue, rcut, rvalue_new); + ngrid = rvalue_new.size (); + // projgen(l, ngrid, rgrid, rvalue, rcut, 20, rvalue_new); + // build the new on-site orbitals + this->chi_[ichi].build (l, true, ngrid, rgrid, rvalue_new.data (), 0, izeta, symbol_, itype, false); + } } - } } -void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log, const int rank) +void + AtomicRadials::read_abacus_orb (std::ifstream& ifs, std::ofstream* ptr_log, const int rank) { /* * Read the orbital file. @@ -122,155 +126,155 @@ void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log, std::string tmp; if (rank == 0) - { - /* - * read the header & grid information, including - * - * 1. element symbol --> symbol_ - * 2. energy cutoff --> orb_ecut_ - * 3. maximum angular momentum --> lmax_ - * 4. number of radial functions for each angular momentum --> nzeta_ - * 5. number of grid points --> ngrid - * 6. grid spacing --> dr - * */ - while (ifs >> tmp) { - if (tmp == "Element") - { - ifs >> symbol_; - } - else if (tmp == "Cutoff(Ry)") - { - ifs >> orb_ecut_; - } - else if (tmp == "Lmax") - { - ifs >> lmax_; + /* + * read the header & grid information, including + * + * 1. element symbol --> symbol_ + * 2. energy cutoff --> orb_ecut_ + * 3. maximum angular momentum --> lmax_ + * 4. number of radial functions for each angular momentum --> nzeta_ + * 5. number of grid points --> ngrid + * 6. grid spacing --> dr + * */ + while (ifs >> tmp) + { + if (tmp == "Element") + { + ifs >> symbol_; + } + else if (tmp == "Cutoff(Ry)") + { + ifs >> orb_ecut_; + } + else if (tmp == "Lmax") + { + ifs >> lmax_; #ifdef __DEBUG - assert(lmax_ >= 0); + assert (lmax_ >= 0); #endif - nzeta_ = new int[lmax_ + 1]; - for (int l = 0; l <= lmax_; ++l) - { - ifs >> tmp >> tmp >> tmp >> nzeta_[l]; // skip "Number" "of" "Xorbital-->" + nzeta_ = new int[lmax_ + 1]; + for (int l = 0; l <= lmax_; ++l) + { + ifs >> tmp >> tmp >> tmp >> nzeta_[l]; // skip "Number" "of" "Xorbital-->" + } + } + else if (tmp == "Mesh") + { + ifs >> ngrid; + continue; + } + else if (tmp == "dr") + { + ifs >> dr; + break; + } } - } - else if (tmp == "Mesh") - { - ifs >> ngrid; - continue; - } - else if (tmp == "dr") - { - ifs >> dr; - break; - } - } - /* - * calculate: - * - * 1. the total number of radial functions --> nchi_ - * 2. maximum number of radial functions for each angular momentum --> nzeta_max_ - * 3. a map from (l, izeta) to 1-d array index in chi_ - * */ - nchi_ = 0; - for (int l = 0; l <= lmax_; ++l) - { - nchi_ += nzeta_[l]; + /* + * calculate: + * + * 1. the total number of radial functions --> nchi_ + * 2. maximum number of radial functions for each angular momentum --> nzeta_max_ + * 3. a map from (l, izeta) to 1-d array index in chi_ + * */ + nchi_ = 0; + for (int l = 0; l <= lmax_; ++l) + { + nchi_ += nzeta_[l]; + } + nzeta_max_ = *std::max_element (nzeta_, nzeta_ + lmax_ + 1); + indexing (); // build index_map_ } - nzeta_max_ = *std::max_element(nzeta_, nzeta_ + lmax_ + 1); - indexing(); // build index_map_ - } #ifdef __MPI - Parallel_Common::bcast_string(symbol_); - Parallel_Common::bcast_double(orb_ecut_); - Parallel_Common::bcast_int(lmax_); + Parallel_Common::bcast_string (symbol_); + Parallel_Common::bcast_double (orb_ecut_); + Parallel_Common::bcast_int (lmax_); - Parallel_Common::bcast_int(nchi_); - Parallel_Common::bcast_int(nzeta_max_); + Parallel_Common::bcast_int (nchi_); + Parallel_Common::bcast_int (nzeta_max_); - Parallel_Common::bcast_int(ngrid); - Parallel_Common::bcast_double(dr); + Parallel_Common::bcast_int (ngrid); + Parallel_Common::bcast_double (dr); #endif if (rank != 0) - { - nzeta_ = new int[lmax_ + 1]; - index_map_ = new int[(lmax_ + 1) * nzeta_max_]; - } + { + nzeta_ = new int[lmax_ + 1]; + index_map_ = new int[(lmax_ + 1) * nzeta_max_]; + } #ifdef __MPI - Parallel_Common::bcast_int(nzeta_, lmax_ + 1); - Parallel_Common::bcast_int(index_map_, (lmax_ + 1) * nzeta_max_); + Parallel_Common::bcast_int (nzeta_, lmax_ + 1); + Parallel_Common::bcast_int (index_map_, (lmax_ + 1) * nzeta_max_); #endif double* rvalue = new double[ngrid]; double* rgrid = new double[ngrid]; for (int ir = 0; ir != ngrid; ++ir) - { - rgrid[ir] = ir * dr; - } + { + rgrid[ir] = ir * dr; + } chi_ = new NumericalRadial[nchi_]; // record whether an orbital has been read or not bool* is_read = new bool[nchi_]; for (int i = 0; i != nchi_; ++i) - { - is_read[i] = false; - } + { + is_read[i] = false; + } for (int l = 0; l <= lmax_; ++l) - { - for (int izeta = 0; izeta < nzeta_[l]; ++izeta) { - if (rank == 0) - { - /* - * read the orbital information, including - * - * 1. angular momentum - * 2. zeta number - * 3. values on the grid - * */ - while (ifs.good()) + for (int izeta = 0; izeta < nzeta_[l]; ++izeta) { - while (ifs >> tmp) - { - if (tmp == "N") + if (rank == 0) { - break; + /* + * read the orbital information, including + * + * 1. angular momentum + * 2. zeta number + * 3. values on the grid + * */ + while (ifs.good ()) + { + while (ifs >> tmp) + { + if (tmp == "N") + { + break; + } + } + int read_l, read_izeta; + ifs >> tmp >> read_l >> read_izeta; + if (l == read_l && izeta == read_izeta) + { + break; + } + } + + for (int ir = 0; ir != ngrid; ++ir) + { + ifs >> rvalue[ir]; + } } - } - int read_l, read_izeta; - ifs >> tmp >> read_l >> read_izeta; - if (l == read_l && izeta == read_izeta) - { - break; - } - } - - for (int ir = 0; ir != ngrid; ++ir) - { - ifs >> rvalue[ir]; - } - } #ifdef __MPI - Parallel_Common::bcast_double(rvalue, ngrid); + Parallel_Common::bcast_double (rvalue, ngrid); #endif #ifdef __DEBUG - assert(index(l, izeta) >= 0 && index(l, izeta) < nchi_); - assert(!is_read[index(l, izeta)]); + assert (index (l, izeta) >= 0 && index (l, izeta) < nchi_); + assert (!is_read[index (l, izeta)]); #endif - is_read[index(l, izeta)] = true; + is_read[index (l, izeta)] = true; - // skip the initialization of sbt_ in this stage - chi_[index(l, izeta)].build(l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype_, false); - chi_[index(l, izeta)].normalize(); + // skip the initialization of sbt_ in this stage + chi_[index (l, izeta)].build (l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype_, false); + chi_[index (l, izeta)].normalize (); + } } - } delete[] is_read; delete[] rvalue; @@ -305,7 +309,7 @@ void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log, // nzeta_max_ = *std::max_element(nzeta.begin(), nzeta.end()); // indexing(); - + // std::vector rgrid(nr); // std::iota(rgrid.begin(), rgrid.end(), 0); // std::for_each(rgrid.begin(), rgrid.end(), [dr](double& r) { r *= dr; }); @@ -315,8 +319,8 @@ void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log, // { // for (int izeta = 0; izeta < nzeta[l]; ++izeta) // { -// chi_[index(l, izeta)].build(l, true, nr, rgrid.data(), radials[ichi].data(), 0, izeta, symbol_, itype_, false); -// chi_[index(l, izeta)].normalize(); +// chi_[index(l, izeta)].build(l, true, nr, rgrid.data(), radials[ichi].data(), 0, izeta, symbol_, itype_, +// false); chi_[index(l, izeta)].normalize(); // ++ichi; // } // } diff --git a/source/source_basis/module_nao/atomic_radials.h b/source/source_basis/module_nao/atomic_radials.h index d4cdb1b39ee..06809e93602 100644 --- a/source/source_basis/module_nao/atomic_radials.h +++ b/source/source_basis/module_nao/atomic_radials.h @@ -23,35 +23,42 @@ class AtomicRadials : public RadialSet { public: - AtomicRadials() {} - AtomicRadials(const AtomicRadials& other) : RadialSet(other), orb_ecut_(other.orb_ecut_) {} + AtomicRadials () {} + AtomicRadials (const AtomicRadials& other) : RadialSet (other), orb_ecut_ (other.orb_ecut_) {} - AtomicRadials& operator=(const AtomicRadials& rhs); - AtomicRadials* clone() const { return new AtomicRadials(*this); } // covariant return type + AtomicRadials& operator= (const AtomicRadials& rhs); + AtomicRadials* + clone () const + { + return new AtomicRadials (*this); + } // covariant return type - ~AtomicRadials() {} // ~RadialSet() is called automatically + ~AtomicRadials () {} // ~RadialSet() is called automatically //! Build the class from an orbital file - void build(const std::string& file, //!< orbital file name - const int itype = 0, //!< element index in calculation - std::ofstream* ptr_log = nullptr, //!< output file stream for logging - const int rank = 0 //!< MPI rank + void build (const std::string& file, //!< orbital file name + const int itype = 0, //!< element index in calculation + std::ofstream* ptr_log = nullptr, //!< output file stream for logging + const int rank = 0 //!< MPI rank ); - void build(RadialSet* const other, const int itype, const double rcut); + void build (RadialSet* const other, const int itype, const double rcut); //! Get the energy cutoff as given by the orbital file - double orb_ecut() const { return orb_ecut_; } + double + orb_ecut () const + { + return orb_ecut_; + } private: double orb_ecut_; //!< energy cutoff as given by the orbital file //! Read the orbital file in the ABACUS format - void read_abacus_orb(std::ifstream& ifs, //!< input file stream from orbital file - std::ofstream* ptr_log = nullptr, //!< output file stream for logging - const int rank = 0 //!< MPI rank + void read_abacus_orb (std::ifstream& ifs, //!< input file stream from orbital file + std::ofstream* ptr_log = nullptr, //!< output file stream for logging + const int rank = 0 //!< MPI rank ); - }; #endif diff --git a/source/source_basis/module_nao/beta_radials.cpp b/source/source_basis/module_nao/beta_radials.cpp index 34137c993c9..6748d3560a0 100644 --- a/source/source_basis/module_nao/beta_radials.cpp +++ b/source/source_basis/module_nao/beta_radials.cpp @@ -7,36 +7,45 @@ #include -void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofstream* const ptr_log) +void + BetaRadials::build (const Numerical_Nonlocal& nl, const int itype, std::ofstream* const ptr_log) { - cleanup(); + cleanup (); itype_ = itype; #ifdef __DEBUG - assert(itype_ == nl.getType()); + assert (itype_ == nl.getType ()); #endif - symbol_ = nl.getLabel(); - lmax_ = nl.getLmax(); - nchi_ = nl.get_nproj(); + symbol_ = nl.getLabel (); + lmax_ = nl.getLmax (); + nchi_ = nl.get_nproj (); chi_ = new NumericalRadial[nchi_]; nzeta_ = new int[lmax_ + 1]; - std::fill(nzeta_, nzeta_ + lmax_ + 1, 0); + std::fill (nzeta_, nzeta_ + lmax_ + 1, 0); for (int ichi = 0; ichi != nchi_; ++ichi) - { - Numerical_Nonlocal_Lm& beta = nl.Proj[ichi]; - int l = beta.getL(); - // skip the initialization of sbt_ in this stage - chi_[ichi] - .build(l, true, beta.getNr(), beta.getRadial(), beta.getBeta_r(), 1, nzeta_[l], symbol_, itype_, false); - nzeta_[l] += 1; - } - nzeta_max_ = *std::max_element(nzeta_, nzeta_ + lmax_ + 1); + { + Numerical_Nonlocal_Lm& beta = nl.Proj[ichi]; + int l = beta.getL (); + // skip the initialization of sbt_ in this stage + chi_[ichi].build (l, + true, + beta.getNr (), + beta.getRadial (), + beta.getBeta_r (), + 1, + nzeta_[l], + symbol_, + itype_, + false); + nzeta_[l] += 1; + } + nzeta_max_ = *std::max_element (nzeta_, nzeta_ + lmax_ + 1); - indexing(); - set_rcut_max(); + indexing (); + set_rcut_max (); } // void BetaRadials::build(const std::string& file, const int itype, std::ofstream* ptr_log, const int rank) @@ -57,9 +66,9 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // is_open = ifs.is_open(); // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_bool(is_open); -//#endif +// #endif // // if (!is_open) // { @@ -95,9 +104,9 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // } // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_int(upf_version); -//#endif +// #endif // // switch (upf_version) // { @@ -179,13 +188,13 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // } // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_bool(is_good); // Parallel_Common::bcast_string(symbol_); // Parallel_Common::bcast_int(lmax_); // Parallel_Common::bcast_int(nchi_); // Parallel_Common::bcast_int(ngrid_max); -//#endif +// #endif // // if (!is_good) // { @@ -222,9 +231,9 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // assert(tmp == ""); // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_double(rgrid, ngrid_max); -//#endif +// #endif // // assert(lmax_ >= 0); // nzeta_ = new int[lmax_ + 1]; @@ -288,18 +297,18 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // assert(tmp == ""); // } // rank == 0 // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_int(l); // Parallel_Common::bcast_int(ngrid); // Parallel_Common::bcast_int(izeta); // Parallel_Common::bcast_double(rbeta, ngrid); -//#endif +// #endif // chi_[i].build(l, true, ngrid, rgrid, rbeta, 1, izeta, symbol_, itype_); // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_int(nzeta_, lmax_ + 1); -//#endif +// #endif // // indexing(); // @@ -386,21 +395,21 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // symbol_.c_str(), lmax_, ngrid_max, nbeta, has_so, is_good); // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_bool(is_good); -//#endif +// #endif // if (!is_good) // { // ModuleBase::WARNING_QUIT("BetaRadials::read_beta_upf201", "PP_HEADER error"); // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_string(symbol_); // Parallel_Common::bcast_int(lmax_); // Parallel_Common::bcast_int(ngrid_max); // Parallel_Common::bcast_int(nbeta); // Parallel_Common::bcast_bool(has_so); -//#endif +// #endif // // // It is an error if lspinorb is set to true but the pseudopotential file does not contain spin-orbit information // if (!has_so && PARAM.inp.lspinorb) @@ -444,9 +453,9 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // assert(tmpword == ""); // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_double(rgrid, ngrid_max); -//#endif +// #endif // // /*=========================================================== // * @@ -531,9 +540,9 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // } // for loop over beta functions // }// rank == 0 // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_bool(is_good); -//#endif +// #endif // if (!is_good) // { // ModuleBase::WARNING_QUIT("BetaRadials::read_beta_upf201", "PP_BETA error"); @@ -676,9 +685,9 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // rbeta_final = rbeta; // } // } -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_bool(is_good); -//#endif +// #endif // if (!is_good) // { // ModuleBase::WARNING_QUIT("BetaRadials::read_beta_upf201", "PP_BETA error"); @@ -689,9 +698,9 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // * Broadcast final beta functions // * // *===========================================================*/ -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_int(nbeta_final); -//#endif +// #endif // // if (rank != 0) // { @@ -700,11 +709,11 @@ void BetaRadials::build(const Numerical_Nonlocal& nl, const int itype, std::ofst // ngrid_final = new int[nbeta_final]; // } // -//#ifdef __MPI +// #ifdef __MPI // Parallel_Common::bcast_int(l_final, nbeta_final); // Parallel_Common::bcast_int(ngrid_final, nbeta_final); // Parallel_Common::bcast_double(rbeta_final, nbeta_final * ngrid_max); -//#endif +// #endif // // /*=========================================================== // * diff --git a/source/source_basis/module_nao/beta_radials.h b/source/source_basis/module_nao/beta_radials.h index 449cca755c2..1cafcab1fbf 100644 --- a/source/source_basis/module_nao/beta_radials.h +++ b/source/source_basis/module_nao/beta_radials.h @@ -23,18 +23,20 @@ class BetaRadials : public RadialSet { public: - BetaRadials() {} - BetaRadials(const BetaRadials& other) : RadialSet(other) {} //!< deep copy + BetaRadials () {} + BetaRadials (const BetaRadials& other) : RadialSet (other) {} //!< deep copy using RadialSet::operator=; - BetaRadials* clone() const { return new BetaRadials(*this); } // covariant return type + BetaRadials* + clone () const + { + return new BetaRadials (*this); + } // covariant return type - ~BetaRadials() {} + ~BetaRadials () {} /// Build the class from a Numerical_Nonlocal object - void build(const Numerical_Nonlocal& nl, - const int itype = 0, - std::ofstream* const ptr_log = nullptr); + void build (const Numerical_Nonlocal& nl, const int itype = 0, std::ofstream* const ptr_log = nullptr); /* void build(const std::string& file, //!< pseudopotential file name diff --git a/source/source_basis/module_nao/hydrogen_radials.cpp b/source/source_basis/module_nao/hydrogen_radials.cpp index e1faca26aab..3ff505dfdfe 100644 --- a/source/source_basis/module_nao/hydrogen_radials.cpp +++ b/source/source_basis/module_nao/hydrogen_radials.cpp @@ -6,13 +6,15 @@ #include #include -HydrogenRadials& HydrogenRadials::operator=(const HydrogenRadials& rhs) +HydrogenRadials& + HydrogenRadials::operator= (const HydrogenRadials& rhs) { - RadialSet::operator=(rhs); + RadialSet::operator= (rhs); return *this; } -void HydrogenRadials::build(const int itype, +void + HydrogenRadials::build (const int itype, const double charge, const bool with_slater_screening, const int nmax, @@ -24,362 +26,382 @@ void HydrogenRadials::build(const int itype, const std::string strategy, std::ofstream* ptr_log) { - if(with_slater_screening&&rank==0) {printf("Build hydrogen_radials with Slater screening coefficients.\n");} - cleanup(); + if (with_slater_screening && rank == 0) + { + printf ("Build hydrogen_radials with Slater screening coefficients.\n"); + } + cleanup (); itype_ = itype; symbol_ = symbol; // rcut should be determined as soon as possible... - //generate_hydrogen_radials(charge, nmax, 10.0, dr, rank, ptr_log); - hydrogen(charge, with_slater_screening, nmax, dr, conv_thr, rank, strategy, ptr_log); - set_rcut_max(); + // generate_hydrogen_radials(charge, nmax, 10.0, dr, rank, ptr_log); + hydrogen (charge, with_slater_screening, nmax, dr, conv_thr, rank, strategy, ptr_log); + set_rcut_max (); } -std::vector HydrogenRadials::generate_hydrogen_radial_segment(const double charge, - const bool with_slater_screening, - const int n, - const int l, - const double rmin, - const double rmax, - const double dr, - const int rank, - std::ofstream* ptr_log) +std::vector + HydrogenRadials::generate_hydrogen_radial_segment (const double charge, + const bool with_slater_screening, + const int n, + const int l, + const double rmin, + const double rmax, + const double dr, + const int rank, + std::ofstream* ptr_log) { double a0 = 1.0; // Bohr radius - int ngrid = static_cast((rmax - rmin) / dr) + 1; - std::vector rgrid(ngrid); - std::vector rvalue(ngrid); + int ngrid = static_cast ((rmax - rmin) / dr) + 1; + std::vector rgrid (ngrid); + std::vector rvalue (ngrid); // initialize value for rgrid - for(int ir = 0; ir != ngrid; ++ir) - { - rgrid[ir] = rmin + ir * dr; - } + for (int ir = 0; ir != ngrid; ++ir) + { + rgrid[ir] = rmin + ir * dr; + } double screened_charge = charge; - if(with_slater_screening) - { - double sigma = slater_screening(symbol_, n, l); - screened_charge = charge - sigma; - } + if (with_slater_screening) + { + double sigma = slater_screening (symbol_, n, l); + screened_charge = charge - sigma; + } - double norm_factor = sqrt( - 4.0*std::pow(screened_charge, 3)* - static_cast(this->assoc_laguerre_.factorial(n - l - 1)) / - std::pow(double(n), 4) / - static_cast(this->assoc_laguerre_.factorial(n + l)) / - std::pow(a0, 3) - ); + double norm_factor = sqrt ( + 4.0 * std::pow (screened_charge, 3) * static_cast (this->assoc_laguerre_.factorial (n - l - 1)) + / std::pow (double (n), 4) / static_cast (this->assoc_laguerre_.factorial (n + l)) / std::pow (a0, 3)); - for(int ir = 0; ir != ngrid; ++ir) - { - // Bohr radius is 1.0 - double rho = 2.0 * rgrid[ir] * screened_charge / n / a0; - rvalue[ir] = norm_factor * std::pow(rho, l) * exp(-rho/2.0) * this->assoc_laguerre_.value( - n, l, rho - ); - } + for (int ir = 0; ir != ngrid; ++ir) + { + // Bohr radius is 1.0 + double rho = 2.0 * rgrid[ir] * screened_charge / n / a0; + rvalue[ir] = norm_factor * std::pow (rho, l) * exp (-rho / 2.0) * this->assoc_laguerre_.value (n, l, rho); + } return rvalue; } -double HydrogenRadials::radial_norm(const std::vector rgrid, - const std::vector rvalue) +double + HydrogenRadials::radial_norm (const std::vector rgrid, const std::vector rvalue) { - std::vector integrand(rvalue.size()); - for(int ir = 0; ir != rvalue.size(); ++ir) - { - integrand[ir] = rvalue[ir] * rvalue[ir] * rgrid[ir] * rgrid[ir]; - } + std::vector integrand (rvalue.size ()); + for (int ir = 0; ir != rvalue.size (); ++ir) + { + integrand[ir] = rvalue[ir] * rvalue[ir] * rgrid[ir] * rgrid[ir]; + } double dr = rgrid[1] - rgrid[0]; - double norm = ModuleBase::Integral::simpson(rvalue.size(), integrand.data(), dr); - norm = sqrt(norm); + double norm = ModuleBase::Integral::simpson (rvalue.size (), integrand.data (), dr); + norm = sqrt (norm); return norm; } -double HydrogenRadials::generate_hydrogen_radial_toconv(const double charge, - const bool with_slater_screening, - const int n, - const int l, - const double conv_thr, - const int rank, - std::vector& rgrid, - std::vector& rvalue, - std::ofstream* ptr_log) +double + HydrogenRadials::generate_hydrogen_radial_toconv (const double charge, + const bool with_slater_screening, + const int n, + const int l, + const double conv_thr, + const int rank, + std::vector& rgrid, + std::vector& rvalue, + std::ofstream* ptr_log) { double norm = 0.0; - double rmax_ = 0.0; // in Bohr - double rmin_ = 0.0; // always to be 0, in Bohr + double rmax_ = 0.0; // in Bohr + double rmin_ = 0.0; // always to be 0, in Bohr double delta_r = 0.5; // stepsize for radius cutoff searching, in Bohr // clear the input vectors - rgrid.clear(); rgrid.shrink_to_fit(); - rvalue.clear(); rvalue.shrink_to_fit(); + rgrid.clear (); + rgrid.shrink_to_fit (); + rvalue.clear (); + rvalue.shrink_to_fit (); double dr = 0.01; // radial function realspace grid stepsize, in Bohr - if(delta_r < dr) - { - dr = delta_r; - } - if(rank == 0) - { - printf("Searching for the cutoff radius for n = %d, l = %d, conv_thr = %6.4e\n", n, l, conv_thr); + if (delta_r < dr) + { + dr = delta_r; + } + if (rank == 0) + { + printf ("Searching for the cutoff radius for n = %d, l = %d, conv_thr = %6.4e\n", n, l, conv_thr); #ifdef __MPI - printf("Convergence information only shown from rank 0, other ranks will be silent.\n"); + printf ("Convergence information only shown from rank 0, other ranks will be silent.\n"); #endif - printf("%10s%12s%14s%18s", "Step Nr.", "Rmax (a.u.)", "Norm", "Delta Norm\n"); - } + printf ("%10s%12s%14s%18s", "Step Nr.", "Rmax (a.u.)", "Norm", "Delta Norm\n"); + } int istep = 1; double delta_norm = 1.0; - while((std::fabs(delta_norm) > conv_thr)) - { - rmin_ = rmax_; - rmax_ += delta_r; - int ngrid = static_cast((rmax_ - rmin_) / dr) + 1; // [rmin, rmax] - std::vector rgrid_segment(ngrid); - for(int ir = 0; ir != ngrid; ++ir) + while ((std::fabs (delta_norm) > conv_thr)) { - rgrid_segment[ir] = rmin_ + ir * dr; + rmin_ = rmax_; + rmax_ += delta_r; + int ngrid = static_cast ((rmax_ - rmin_) / dr) + 1; // [rmin, rmax] + std::vector rgrid_segment (ngrid); + for (int ir = 0; ir != ngrid; ++ir) + { + rgrid_segment[ir] = rmin_ + ir * dr; + } + std::vector rvalue_segment = generate_hydrogen_radial_segment (charge, + with_slater_screening, + n, + l, + rmin_, + rmax_, + dr, + rank, + ptr_log); + // before push back, pop back the last element + if (rgrid.size () != 0) + { + rgrid.pop_back (); + rvalue.pop_back (); + } + rgrid.insert (rgrid.end (), rgrid_segment.begin (), rgrid_segment.end ()); + rvalue.insert (rvalue.end (), rvalue_segment.begin (), rvalue_segment.end ()); + delta_norm = norm; + norm = radial_norm (rgrid, rvalue); + delta_norm = norm - delta_norm; + if ((istep == 0) && (rank == 0)) + { + printf ("%10d%12.2f%14.10f%18.10e\n", istep, rmax_, norm, delta_norm); + } + ++istep; } - std::vector rvalue_segment = generate_hydrogen_radial_segment( - charge, with_slater_screening, n, l, rmin_, rmax_, dr, rank, ptr_log); - // before push back, pop back the last element - if(rgrid.size() != 0) + if (rank == 0) { - rgrid.pop_back(); - rvalue.pop_back(); + printf ("...\n"); + printf ("%10d%12.2f%14.10f%18.10e\n", istep, rmax_, norm, delta_norm); } - rgrid.insert(rgrid.end(), rgrid_segment.begin(), rgrid_segment.end()); - rvalue.insert(rvalue.end(), rvalue_segment.begin(), rvalue_segment.end()); - delta_norm = norm; - norm = radial_norm(rgrid, rvalue); - delta_norm = norm - delta_norm; - if((istep == 0)&&(rank == 0)) printf("%10d%12.2f%14.10f%18.10e\n", istep, rmax_, norm, delta_norm); - ++istep; - } - if(rank == 0) - { - printf("...\n"); - printf("%10d%12.2f%14.10f%18.10e\n", istep, rmax_, norm, delta_norm); - } return rmax_; } -std::vector> HydrogenRadials::unzip_strategy(const int nmax, - const std::string strategy) +std::vector> + HydrogenRadials::unzip_strategy (const int nmax, const std::string strategy) { - if(strategy.substr(0, 6) != "energy") - { - // because for "energy", the nmax is used as the number of electrons - assert(nmax < 8); - } - std::vector> nl_pairs; - if(strategy.substr(0, 7) == "minimal") - { - if(strategy == "minimal-nodeless") - { - for(int n = 1; n <= nmax; n++) - { - std::pair nl_pair = std::make_pair(n, n - 1); - nl_pairs.push_back(nl_pair); - } - } - else// if(strategy == "minimal-valence") + if (strategy.substr (0, 6) != "energy") { - for(int l = 0; l < nmax; l++) - { - std::pair nl_pair = std::make_pair(nmax, l); - nl_pairs.push_back(nl_pair); - } + // because for "energy", the nmax is used as the number of electrons + assert (nmax < 8); } - } - else if(strategy.substr(0, 6) == "energy") - { - // 1s, -(n+1)-> 2s, -(l+1)-> 2p, 3s, -(n+1)-> 3p, 4s, -(l+1)-> 3d, 4p, 5s, -(n+1)-> 4d, 5p, 6s - int starting_n = 1; - int starting_l = 0; - int nelec = 0; - int nl_switch = 0; - while(nelec < nmax) // now nmax is the full number of electrons of certain atom + std::vector> nl_pairs; + if (strategy.substr (0, 7) == "minimal") { - int n = starting_n; - int l = starting_l; - do - { - nl_pairs.push_back(std::make_pair(n, l)); - nelec += (2*l + 1)*2; - n++; - l--; - } while ((l >= 0)&&(nelec < nmax)); - - if(nl_switch%2 == 1) - { - starting_l++; - } - else - { - starting_n++; - } - nl_switch++; + if (strategy == "minimal-nodeless") + { + for (int n = 1; n <= nmax; n++) + { + std::pair nl_pair = std::make_pair (n, n - 1); + nl_pairs.push_back (nl_pair); + } + } + else // if(strategy == "minimal-valence") + { + for (int l = 0; l < nmax; l++) + { + std::pair nl_pair = std::make_pair (nmax, l); + nl_pairs.push_back (nl_pair); + } + } } - if(strategy == "energy-valence") + else if (strategy.substr (0, 6) == "energy") { - std::vector nmax_ls; - std::vector nmax_minus1_ls; - int real_nmax = 0; - for(auto nl_pair : nl_pairs) - { - if(nl_pair.first > real_nmax) real_nmax = nl_pair.first; - } - for(auto it = nl_pairs.begin(); it != nl_pairs.end();) - { - if(it->first == real_nmax) nmax_ls.push_back(it->second); - else if(it->first == real_nmax - 1) nmax_minus1_ls.push_back(it->second); - else + // 1s, -(n+1)-> 2s, -(l+1)-> 2p, 3s, -(n+1)-> 3p, 4s, -(l+1)-> 3d, 4p, 5s, -(n+1)-> 4d, 5p, 6s + int starting_n = 1; + int starting_l = 0; + int nelec = 0; + int nl_switch = 0; + while (nelec < nmax) // now nmax is the full number of electrons of certain atom { - it = nl_pairs.erase(it); - continue; + int n = starting_n; + int l = starting_l; + do + { + nl_pairs.push_back (std::make_pair (n, l)); + nelec += (2 * l + 1) * 2; + n++; + l--; + } + while ((l >= 0) && (nelec < nmax)); + + if (nl_switch % 2 == 1) + { + starting_l++; + } + else + { + starting_n++; + } + nl_switch++; } - ++it; - } - for(auto it = nl_pairs.begin(); it != nl_pairs.end();) - { - if(it->first == real_nmax - 1) + if (strategy == "energy-valence") { - if(std::find(nmax_ls.begin(), nmax_ls.end(), it->second) != nmax_ls.end()) - { - it = nl_pairs.erase(it); - continue; - } + std::vector nmax_ls; + std::vector nmax_minus1_ls; + int real_nmax = 0; + for (auto nl_pair: nl_pairs) + { + if (nl_pair.first > real_nmax) + { + real_nmax = nl_pair.first; + } + } + for (auto it = nl_pairs.begin (); it != nl_pairs.end ();) + { + if (it->first == real_nmax) + { + nmax_ls.push_back (it->second); + } + else if (it->first == real_nmax - 1) + { + nmax_minus1_ls.push_back (it->second); + } + else + { + it = nl_pairs.erase (it); + continue; + } + ++it; + } + for (auto it = nl_pairs.begin (); it != nl_pairs.end ();) + { + if (it->first == real_nmax - 1) + { + if (std::find (nmax_ls.begin (), nmax_ls.end (), it->second) != nmax_ls.end ()) + { + it = nl_pairs.erase (it); + continue; + } + } + ++it; + } } - ++it; - } } - } else - { - for(int n = 1; n <= nmax; n++) { - for(int l = 0; l < n; l++) - { - std::pair nl_pair = std::make_pair(n, l); - nl_pairs.push_back(nl_pair); - } + for (int n = 1; n <= nmax; n++) + { + for (int l = 0; l < n; l++) + { + std::pair nl_pair = std::make_pair (n, l); + nl_pairs.push_back (nl_pair); + } + } } - } return nl_pairs; } -void HydrogenRadials::smooth(std::vector& rgrid, - std::vector& rvalue, - const double sigma) +void + HydrogenRadials::smooth (std::vector& rgrid, std::vector& rvalue, const double sigma) { - double prefactor = 1.0 / sqrt(2.0 * M_PI) / sigma; - double rmax = rgrid.back(); - for(int ir = 0; ir != rgrid.size(); ++ir) - { - double delta_r = rgrid[ir] - rmax; - double smooth = prefactor * exp(-delta_r * delta_r / 2.0 / sigma / sigma); - rvalue[ir] *= (1 - smooth); - } + double prefactor = 1.0 / sqrt (2.0 * M_PI) / sigma; + double rmax = rgrid.back (); + for (int ir = 0; ir != rgrid.size (); ++ir) + { + double delta_r = rgrid[ir] - rmax; + double smooth = prefactor * exp (-delta_r * delta_r / 2.0 / sigma / sigma); + rvalue[ir] *= (1 - smooth); + } } -std::map, std::pair, std::vector>> -HydrogenRadials::generate_orb(const double charge, - const bool with_slater_screening, - const int nmax, - const double dr, - const double conv_thr, - const int rank, - const std::string strategy, - std::ofstream* ptr_log) +std::map, std::pair, std::vector>> + HydrogenRadials::generate_orb (const double charge, + const bool with_slater_screening, + const int nmax, + const double dr, + const double conv_thr, + const int rank, + const std::string strategy, + std::ofstream* ptr_log) { // create space for storing all generated orbitals // (n, l) to (rgrid, rvalue) - std::map, std::pair, std::vector>> radials; + std::map, std::pair, std::vector>> radials; std::map, double> rmaxs; - std::vector> nl_pairs = unzip_strategy(nmax, strategy); + std::vector> nl_pairs = unzip_strategy (nmax, strategy); double rmax = 0.0; - for(auto nl_pair : nl_pairs) - { - int n = nl_pair.first; - int l = nl_pair.second; - std::vector rgrid; - std::vector rvalue; - double rmax_nl = generate_hydrogen_radial_toconv(charge, - with_slater_screening, - n, - l, - conv_thr, - rank, - rgrid, - rvalue, - ptr_log); - radials[nl_pair] = std::make_pair(rgrid, rvalue); - rmaxs[nl_pair] = rmax_nl; - if(rmax < rmax_nl) + for (auto nl_pair: nl_pairs) { - rmax = rmax_nl; + int n = nl_pair.first; + int l = nl_pair.second; + std::vector rgrid; + std::vector rvalue; + double rmax_nl = generate_hydrogen_radial_toconv (charge, + with_slater_screening, + n, + l, + conv_thr, + rank, + rgrid, + rvalue, + ptr_log); + radials[nl_pair] = std::make_pair (rgrid, rvalue); + rmaxs[nl_pair] = rmax_nl; + if (rmax < rmax_nl) + { + rmax = rmax_nl; + } } - } // zero padding towards rmax - for(auto& radial : radials) - { - int n = radial.first.first; - int l = radial.first.second; - std::pair nl_pair = std::make_pair(n, l); + for (auto& radial: radials) + { + int n = radial.first.first; + int l = radial.first.second; + std::pair nl_pair = std::make_pair (n, l); - std::vector& rgrid = radial.second.first; - std::vector& rvalue = radial.second.second; + std::vector& rgrid = radial.second.first; + std::vector& rvalue = radial.second.second; - if(rmaxs[nl_pair] < rmax) - { - int ngrid = static_cast((rmax - rmaxs[nl_pair]) / dr); // (r, rmax] - for(int ir = 1; ir <= ngrid; ++ir) - { - rgrid.push_back(rmaxs[nl_pair] + ir * dr); - rvalue.push_back(0.0); - } + if (rmaxs[nl_pair] < rmax) + { + int ngrid = static_cast ((rmax - rmaxs[nl_pair]) / dr); // (r, rmax] + for (int ir = 1; ir <= ngrid; ++ir) + { + rgrid.push_back (rmaxs[nl_pair] + ir * dr); + rvalue.push_back (0.0); + } + } + // smooth the tail + smooth (rgrid, rvalue, 0.1); } - // smooth the tail - smooth(rgrid, rvalue, 0.1); - } return radials; } std::map, std::pair> -HydrogenRadials::mapping_nl_lzeta(const int nmax, - const std::string strategy) + HydrogenRadials::mapping_nl_lzeta (const int nmax, const std::string strategy) { std::map, std::pair> nl_lzeta; - std::vector> nl_pairs = unzip_strategy(nmax, strategy); + std::vector> nl_pairs = unzip_strategy (nmax, strategy); // initialize nzetas by all zeros // get lmax int lmax = 0; - for(auto nl_pair: nl_pairs) - { - lmax = lmax>nl_pair.second?lmax:nl_pair.second; - } - std::vector nzetas(lmax + 1, 0); - for(auto nl_pair : nl_pairs) - { - int n = nl_pair.first; - int l = nl_pair.second; - nl_lzeta[nl_pair] = std::make_pair(l, nzetas[l]); - nzetas[l] += 1; - } + for (auto nl_pair: nl_pairs) + { + lmax = lmax > nl_pair.second ? lmax : nl_pair.second; + } + std::vector nzetas (lmax + 1, 0); + for (auto nl_pair: nl_pairs) + { + int n = nl_pair.first; + int l = nl_pair.second; + nl_lzeta[nl_pair] = std::make_pair (l, nzetas[l]); + nzetas[l] += 1; + } // import to standard RadialSet data structure lmax_ = lmax; nzeta_ = new int[lmax_ + 1]; - for(int l = 0; l <= lmax_; ++l) - { - nzeta_[l] = nzetas[l]; - nzeta_max_ = nzeta_max_>nzeta_[l]?nzeta_max_:nzeta_[l]; - } - indexing(); + for (int l = 0; l <= lmax_; ++l) + { + nzeta_[l] = nzetas[l]; + nzeta_max_ = nzeta_max_ > nzeta_[l] ? nzeta_max_ : nzeta_[l]; + } + indexing (); return nl_lzeta; } -void HydrogenRadials::hydrogen(const double charge, +void + HydrogenRadials::hydrogen (const double charge, const bool with_slater_screening, const int nmax, const double dr, @@ -388,75 +410,96 @@ void HydrogenRadials::hydrogen(const double charge, const std::string strategy, std::ofstream* ptr_log) { - std::map, std::pair, std::vector>> orbitals = - generate_orb(charge, with_slater_screening, nmax, dr, conv_thr, rank, strategy, ptr_log); - std::map, std::pair> nl_lzeta = mapping_nl_lzeta(nmax, strategy); + std::map, std::pair, std::vector>> orbitals + = generate_orb (charge, with_slater_screening, nmax, dr, conv_thr, rank, strategy, ptr_log); + std::map, std::pair> nl_lzeta = mapping_nl_lzeta (nmax, strategy); - nchi_ = orbitals.size(); + nchi_ = orbitals.size (); chi_ = new NumericalRadial[nchi_]; int ichi = 0; - for(auto orbital : orbitals) - { - int n = orbital.first.first; - int l = orbital.first.second; - std::pair nl_pair = std::make_pair(n, l); - std::vector& rgrid = orbital.second.first; - std::vector& rvalue = orbital.second.second; - int lzeta = nl_lzeta[nl_pair].second; - chi_[index(l, lzeta)].build(l, true, rgrid.size(), rgrid.data(), rvalue.data(), 0, lzeta, symbol_, itype_, false); - chi_[index(l, lzeta)].normalize(); - //++ichi; - } + for (auto orbital: orbitals) + { + int n = orbital.first.first; + int l = orbital.first.second; + std::pair nl_pair = std::make_pair (n, l); + std::vector& rgrid = orbital.second.first; + std::vector& rvalue = orbital.second.second; + int lzeta = nl_lzeta[nl_pair].second; + chi_[index (l, lzeta)] + .build (l, true, rgrid.size (), rgrid.data (), rvalue.data (), 0, lzeta, symbol_, itype_, false); + chi_[index (l, lzeta)].normalize (); + //++ichi; + } } -double HydrogenRadials::slater_screening(const std::string symbol, - const int n, - const int l) +double + HydrogenRadials::slater_screening (const std::string symbol, const int n, const int l) { atom_in atom_db; std::vector electron_config = atom_db.groundstate_electronconfiguration[symbol]; int isubshell = 0; double sigma = 0.0; int _len = 0; - for(int n_ = 1; n_ <= n; ++n_) - { - if(n_ == n) _len += l + 1; - else _len += n_; - } - if(_len > electron_config.size()) - { - printf("Error: electron configuration is not enough for %s\n", symbol.c_str()); - printf("n = %d, l = %d\n", n, l); - exit(1); - } - // special case for 1s: for H and He, use 0.30 for 1s screening constant - if(symbol == "H") return 0.0; // only one 1s electron, no screening by "other electrons" - else if(symbol == "He") return 0.30; // only two 1s electrons, one screening the other - else if(n == 1) return 0.30; - for(int n_ = 1; n_ <= n; ++n_) - { - int lmax = (n_ == n) ? l : n_ - 1; - for(int l_ = 0; l_ <= lmax; ++l_) + for (int n_ = 1; n_ <= n; ++n_) { - int nelec = electron_config[isubshell]; - if(n - n_ >= 2) sigma += nelec * 1.0; - else if(n - n_ == 1) - { - double screening = (l > 1)? 1.00 : 0.85; - sigma += nelec * screening; - } + if (n_ == n) + { + _len += l + 1; + } else - { - if(l_ == l) sigma += (nelec - 1) * 0.35; - else { - double screening = (l > 1)? 1.00 : 0.35; - sigma += nelec * screening; + _len += n_; + } + } + if (_len > electron_config.size ()) + { + printf ("Error: electron configuration is not enough for %s\n", symbol.c_str ()); + printf ("n = %d, l = %d\n", n, l); + exit (1); + } + // special case for 1s: for H and He, use 0.30 for 1s screening constant + if (symbol == "H") + { + return 0.0; // only one 1s electron, no screening by "other electrons" + } + else if (symbol == "He") + { + return 0.30; // only two 1s electrons, one screening the other + } + else if (n == 1) + { + return 0.30; + } + for (int n_ = 1; n_ <= n; ++n_) + { + int lmax = (n_ == n) ? l : n_ - 1; + for (int l_ = 0; l_ <= lmax; ++l_) + { + int nelec = electron_config[isubshell]; + if (n - n_ >= 2) + { + sigma += nelec * 1.0; + } + else if (n - n_ == 1) + { + double screening = (l > 1) ? 1.00 : 0.85; + sigma += nelec * screening; + } + else + { + if (l_ == l) + { + sigma += (nelec - 1) * 0.35; + } + else + { + double screening = (l > 1) ? 1.00 : 0.35; + sigma += nelec * screening; + } + } + ++isubshell; } - } - ++isubshell; } - } return sigma; } \ No newline at end of file diff --git a/source/source_basis/module_nao/hydrogen_radials.h b/source/source_basis/module_nao/hydrogen_radials.h index b9f87c27e12..59bf6634b17 100644 --- a/source/source_basis/module_nao/hydrogen_radials.h +++ b/source/source_basis/module_nao/hydrogen_radials.h @@ -19,142 +19,124 @@ 2. hydrogen() calls generate_orb() to generate the radial functions for each n, l pair. 3. generate_orb() calls generate_hydrogen_radial_toconv() to generate the radial function for set of n, l pairs. 4. with more details, unzip_strategy() is called to parse the strategy string to get the n, l pairs. - 5. for each n, l pair, generate_hydrogen_radial_toconv() is called to generate the radial function for a given n, l pair. + 5. for each n, l pair, generate_hydrogen_radial_toconv() is called to generate the radial function for a given n, l + pair. 6. note that the "toconv" means each radial function should have a norm converged to 1 at a certain radius, this is - also controlled by a convergence threshold (conv_thr), now set to 1e-6. Therefore each radial function will have different - rmax. - 7. if the norm of radial function is not converged to 1 at the given radius, the radius will be increased by dr, and the - radial function will be recalculated until the norm is converged to 1. + also controlled by a convergence threshold (conv_thr), now set to 1e-6. Therefore each radial function will have + different rmax. + 7. if the norm of radial function is not converged to 1 at the given radius, the radius will be increased by dr, and + the radial function will be recalculated until the norm is converged to 1. 8. the radial function is calculated by generate_hydrogen_radial_segment(), which is a wrapper of assoc_laguerre_. - 9. assoc_laguerre_ is a class that can calculate the radial function of hydrogen-like atom, with the help of associated - Laguerre polynomials. - 10. the radial function is calculated from 0.0 to a radius where the norm of radial function is converged, and the radial - function is stored in a pair of vectors, first vector is the radial grid, second vector is the radial function. - 11. after the first generation step, all radial functions are zero-padded to the same length, and stored in a map, with - the key being the n, l pair, and the value being the pair of vectors. - 12. to store the radial functions, they must indiced by the l, zeta pairs, so mapping_nl_lzeta() is called to map the n, l - pairs to the l, zeta pairs. - 13. finally, the radial functions are stored in NumericalRadials, with the key being the l, zeta pairs, and the value being - the pair of vectors. - + 9. assoc_laguerre_ is a class that can calculate the radial function of hydrogen-like atom, with the help of + associated Laguerre polynomials. + 10. the radial function is calculated from 0.0 to a radius where the norm of radial function is converged, and the + radial function is stored in a pair of vectors, first vector is the radial grid, second vector is the radial + function. + 11. after the first generation step, all radial functions are zero-padded to the same length, and stored in a map, + with the key being the n, l pair, and the value being the pair of vectors. + 12. to store the radial functions, they must indiced by the l, zeta pairs, so mapping_nl_lzeta() is called to map + the n, l pairs to the l, zeta pairs. + 13. finally, the radial functions are stored in NumericalRadials, with the key being the l, zeta pairs, and the + value being the pair of vectors. + User capable settings: - 1. charge of the nucleus charge according to pseudopotential, it is also a rescale of radius. Higher charge result in smaller - radius. - 2. generation strategy. minimal will only generate 1 orbital per angular momentum, double will generate 2 orbitals per angular - momentum, full will generate all orbitals per angular momentum up to nmax. Note: nmax is read from atom_database. - 3. conv_thr, the convergence threshold of the norm of radial function, if not reached, will continue to increase the radius. - user use this to control the accuracy of radial function. Too large conv_thr will result in inaccurate spherical Bessel - transformation results (in FFT-two_center_integrator) + 1. charge of the nucleus charge according to pseudopotential, it is also a rescale of radius. Higher charge result + in smaller radius. + 2. generation strategy. minimal will only generate 1 orbital per angular momentum, double will generate 2 orbitals + per angular momentum, full will generate all orbitals per angular momentum up to nmax. Note: nmax is read from + atom_database. + 3. conv_thr, the convergence threshold of the norm of radial function, if not reached, will continue to increase the + radius. user use this to control the accuracy of radial function. Too large conv_thr will result in inaccurate + spherical Bessel transformation results (in FFT-two_center_integrator) */ class HydrogenRadials : public RadialSet { - public: - /// @brief default constructor - HydrogenRadials() {} - /// @brief overloaded assignment operator - /// @param rhs HydrogenRadials object to be assigned - /// @return a reference to the assigned HydrogenRadials object - HydrogenRadials& operator=(const HydrogenRadials& rhs); - /// @brief copy constructor - /// @return a copy of the HydrogenRadials object - HydrogenRadials* clone() const { return new HydrogenRadials(*this); } // covariant return type - /// @brief destructor - ~HydrogenRadials() {} + public: + /// @brief default constructor + HydrogenRadials () {} + /// @brief overloaded assignment operator + /// @param rhs HydrogenRadials object to be assigned + /// @return a reference to the assigned HydrogenRadials object + HydrogenRadials& operator= (const HydrogenRadials& rhs); + /// @brief copy constructor + /// @return a copy of the HydrogenRadials object + HydrogenRadials* + clone () const + { + return new HydrogenRadials (*this); + } // covariant return type + /// @brief destructor + ~HydrogenRadials () {} - /// @brief build the hydrogen-like radial functions and push into NumericalRadials - /// @param itype index of the atom type - /// @param charge charge of the nucleus - /// @param nmax maxmium principal quantum number - /// @param rcut cutoff radius of the radial function (not used anymore) - /// @param dr step size of the radial grid - /// @param rank MPI rank - /// @param symbol element symbol, seems only useful when storing orbital information to file - /// @param strategy strategy string - /// @param ptr_log pointer to the log ofstream - void build(const int itype = 0, - const double charge = 1.0, - const bool with_slater_screening = false, - const int nmax = 0, - const double rcut = 10.0, - const double dr = 0.01, - const double conv_thr = 1e-6, - const int rank = 0, - const std::string symbol = "", - const std::string strategy = "minimal-valence", - std::ofstream* ptr_log = nullptr - ); + /// @brief build the hydrogen-like radial functions and push into NumericalRadials + /// @param itype index of the atom type + /// @param charge charge of the nucleus + /// @param nmax maxmium principal quantum number + /// @param rcut cutoff radius of the radial function (not used anymore) + /// @param dr step size of the radial grid + /// @param rank MPI rank + /// @param symbol element symbol, seems only useful when storing orbital information to file + /// @param strategy strategy string + /// @param ptr_log pointer to the log ofstream + void build (const int itype = 0, + const double charge = 1.0, + const bool with_slater_screening = false, + const int nmax = 0, + const double rcut = 10.0, + const double dr = 0.01, + const double conv_thr = 1e-6, + const int rank = 0, + const std::string symbol = "", + const std::string strategy = "minimal-valence", + std::ofstream* ptr_log = nullptr); - /// @brief parse the strategy string to get the n, l pairs - /// @param nmax maxmium principal quantum number - /// @param strategy strategy string - /// @return a vector of n, l pairs - std::vector> unzip_strategy(const int nmax = 0, - const std::string strategy = "minimal-valence"); - /// @brief smooth the radial function to avoid high frequency noise in FFT-spherical bessel transform - /// @param rgrid radial grid - /// @param rvalue radial function - /// @param sigma sigma of the Gaussian kernel - void smooth(std::vector& rgrid, - std::vector& rvalue, - const double sigma = 0.1); - /// @brief generate hydrogen-like radial functions for a given n, l, from 0.0 to a radius where the norm of radial function is converged - /// @param charge charge of the nucleus - /// @param n principal quantum number - /// @param l angular momentum quantum number - /// @param converge_threshold the threshold of norm of radial function, if not reached, will continue to increase the radius - /// @param rank MPI rank - /// @param rgrid returned radial grid - /// @param rvalue returned radial function - /// @param ptr_log pointer to the log ofstream - /// @return the rmax of present radial function - double generate_hydrogen_radial_toconv(const double charge, - const bool with_slater_screening, - const int n, - const int l, - const double conv_thr, - const int rank, - std::vector& rgrid, - std::vector& rvalue, - std::ofstream* ptr_log = nullptr); - /// @brief returns the norm of the radial function - /// @param rgrid radial grid - /// @param rvalue radial function - /// @return norm of the radial function - double radial_norm(const std::vector rgrid, - const std::vector rvalue); + /// @brief parse the strategy string to get the n, l pairs + /// @param nmax maxmium principal quantum number + /// @param strategy strategy string + /// @return a vector of n, l pairs + std::vector> unzip_strategy (const int nmax = 0, + const std::string strategy = "minimal-valence"); + /// @brief smooth the radial function to avoid high frequency noise in FFT-spherical bessel transform + /// @param rgrid radial grid + /// @param rvalue radial function + /// @param sigma sigma of the Gaussian kernel + void smooth (std::vector& rgrid, std::vector& rvalue, const double sigma = 0.1); + /// @brief generate hydrogen-like radial functions for a given n, l, from 0.0 to a radius where the norm of radial + /// function is converged + /// @param charge charge of the nucleus + /// @param n principal quantum number + /// @param l angular momentum quantum number + /// @param converge_threshold the threshold of norm of radial function, if not reached, will continue to increase + /// the radius + /// @param rank MPI rank + /// @param rgrid returned radial grid + /// @param rvalue returned radial function + /// @param ptr_log pointer to the log ofstream + /// @return the rmax of present radial function + double generate_hydrogen_radial_toconv (const double charge, + const bool with_slater_screening, + const int n, + const int l, + const double conv_thr, + const int rank, + std::vector& rgrid, + std::vector& rvalue, + std::ofstream* ptr_log = nullptr); + /// @brief returns the norm of the radial function + /// @param rgrid radial grid + /// @param rvalue radial function + /// @return norm of the radial function + double radial_norm (const std::vector rgrid, const std::vector rvalue); - /// @brief generate set of hydrogen-like radial functions for a given charge, nmax, dr, rank, strategy - /// @param charge charge of the nucleus - /// @param nmax maxmium principal quantum number - /// @param dr step size of the radial grid - /// @param rank MPI rank - /// @param strategy strategy string - /// @param ptr_log pointer to the log ofstream - std::map, std::pair, std::vector>> - generate_orb(const double charge = 1.0, - const bool with_slater_screening = false, - const int nmax = 0, - const double dr = 0.01, - const double conv_thr = 1e-6, - const int rank = 0, - const std::string strategy = "minimal-valence", - std::ofstream* ptr_log = nullptr); - /// @brief mapping the n, l pairs to the l, zeta pairs - /// @param nmax maxmium principal quantum number - /// @param strategy strategy string - /// @return a map of n, l pairs to l, zeta pairs - std::map, std::pair> - mapping_nl_lzeta(const int nmax = 0, - const std::string strategy = "minimal-valence"); - /// @brief kernel function of hydrogen-like radial functions - /// @param charge charge of the nucleus - /// @param nmax maxmium principal quantum number - /// @param dr step size of the radial grid - /// @param conv_thr convergence threshold of the norm of radial function - /// @param rank MPI rank - /// @param strategy strategy string - /// @param ptr_log pointer to the log ofstream - void hydrogen(const double charge = 1.0, + /// @brief generate set of hydrogen-like radial functions for a given charge, nmax, dr, rank, strategy + /// @param charge charge of the nucleus + /// @param nmax maxmium principal quantum number + /// @param dr step size of the radial grid + /// @param rank MPI rank + /// @param strategy strategy string + /// @param ptr_log pointer to the log ofstream + std::map, std::pair, std::vector>> + generate_orb (const double charge = 1.0, const bool with_slater_screening = false, const int nmax = 0, const double dr = 0.01, @@ -162,35 +144,58 @@ class HydrogenRadials : public RadialSet const int rank = 0, const std::string strategy = "minimal-valence", std::ofstream* ptr_log = nullptr); - /// @brief return the Slater screening constant for calculating effective nuclear charge - /// @note hoping to get a more accurate estimation of hydrogen-like atom radial function, by including many-electron effect in this way - /// @details algorithm: https://laney.edu/pinar-alscher/wp-content/uploads/sites/219/2016/04/Slater-rules-revised.pdf - /// @param n principal quantum number - /// @param l angular momentum quantum number - double slater_screening(const std::string symbol, - const int n, - const int l); - private: - /// @brief generate hydrogen-like radial functions for a given n, l, in a given range [rmin, rmax] - /// @param charge charge of the nucleus - /// @param n principal quantum number - /// @param l angular momentum quantum number - /// @param rmin the minimal radius - /// @param rmax the maximal radius - /// @param dr step size of the radial grid - /// @param rank MPI rank - /// @param ptr_log pointer to the log ofstream - /// @return the radial function stored in std::vector - std::vector generate_hydrogen_radial_segment(const double charge = 1.0, - const bool with_slater_screening = false, - const int n = 0, - const int l = 0, - const double rmin = 0.0, - const double rmax = 10.0, - const double dr = 0.01, - const int rank = 0, - std::ofstream* ptr_log = nullptr); - /// @brief Associated Laguerre polynomials generator - Assoc_Laguerre assoc_laguerre_; + /// @brief mapping the n, l pairs to the l, zeta pairs + /// @param nmax maxmium principal quantum number + /// @param strategy strategy string + /// @return a map of n, l pairs to l, zeta pairs + std::map, std::pair> + mapping_nl_lzeta (const int nmax = 0, const std::string strategy = "minimal-valence"); + /// @brief kernel function of hydrogen-like radial functions + /// @param charge charge of the nucleus + /// @param nmax maxmium principal quantum number + /// @param dr step size of the radial grid + /// @param conv_thr convergence threshold of the norm of radial function + /// @param rank MPI rank + /// @param strategy strategy string + /// @param ptr_log pointer to the log ofstream + void hydrogen (const double charge = 1.0, + const bool with_slater_screening = false, + const int nmax = 0, + const double dr = 0.01, + const double conv_thr = 1e-6, + const int rank = 0, + const std::string strategy = "minimal-valence", + std::ofstream* ptr_log = nullptr); + /// @brief return the Slater screening constant for calculating effective nuclear charge + /// @note hoping to get a more accurate estimation of hydrogen-like atom radial function, by including many-electron + /// effect in this way + /// @details algorithm: + /// https://laney.edu/pinar-alscher/wp-content/uploads/sites/219/2016/04/Slater-rules-revised.pdf + /// @param n principal quantum number + /// @param l angular momentum quantum number + double slater_screening (const std::string symbol, const int n, const int l); + + private: + /// @brief generate hydrogen-like radial functions for a given n, l, in a given range [rmin, rmax] + /// @param charge charge of the nucleus + /// @param n principal quantum number + /// @param l angular momentum quantum number + /// @param rmin the minimal radius + /// @param rmax the maximal radius + /// @param dr step size of the radial grid + /// @param rank MPI rank + /// @param ptr_log pointer to the log ofstream + /// @return the radial function stored in std::vector + std::vector generate_hydrogen_radial_segment (const double charge = 1.0, + const bool with_slater_screening = false, + const int n = 0, + const int l = 0, + const double rmin = 0.0, + const double rmax = 10.0, + const double dr = 0.01, + const int rank = 0, + std::ofstream* ptr_log = nullptr); + /// @brief Associated Laguerre polynomials generator + Assoc_Laguerre assoc_laguerre_; }; #endif // HYDROGEN_RADIALS_H_ \ No newline at end of file diff --git a/source/source_basis/module_nao/numerical_radial.cpp b/source/source_basis/module_nao/numerical_radial.cpp index dec99ed56f8..020ed809274 100644 --- a/source/source_basis/module_nao/numerical_radial.cpp +++ b/source/source_basis/module_nao/numerical_radial.cpp @@ -15,47 +15,39 @@ using ModuleBase::PI; -NumericalRadial::NumericalRadial(const NumericalRadial& other) : - symbol_(other.symbol_), - itype_(other.itype_), - l_(other.l_), - izeta_(other.izeta_), - nr_(other.nr_), - nk_(other.nk_), - ircut_(other.ircut_), - ikcut_(other.ikcut_), - is_fft_compliant_(other.is_fft_compliant_), - pr_(other.pr_), - pk_(other.pk_), - sbt_(other.sbt_) +NumericalRadial::NumericalRadial (const NumericalRadial& other) + : symbol_ (other.symbol_), itype_ (other.itype_), l_ (other.l_), izeta_ (other.izeta_), nr_ (other.nr_), + nk_ (other.nk_), ircut_ (other.ircut_), ikcut_ (other.ikcut_), is_fft_compliant_ (other.is_fft_compliant_), + pr_ (other.pr_), pk_ (other.pk_), sbt_ (other.sbt_) { // deep copy - if (other.rgrid()) - { - rgrid_ = new double[nr_]; - rvalue_ = new double[nr_]; - std::memcpy(rgrid_, other.rgrid_, nr_ * sizeof(double)); - std::memcpy(rvalue_, other.rvalue_, nr_ * sizeof(double)); - } - - if (other.kgrid()) - { - kgrid_ = new double[nk_]; - kvalue_ = new double[nk_]; - std::memcpy(kgrid_, other.kgrid_, nk_ * sizeof(double)); - std::memcpy(kvalue_, other.kvalue_, nk_ * sizeof(double)); - } + if (other.rgrid ()) + { + rgrid_ = new double[nr_]; + rvalue_ = new double[nr_]; + std::memcpy (rgrid_, other.rgrid_, nr_ * sizeof (double)); + std::memcpy (rvalue_, other.rvalue_, nr_ * sizeof (double)); + } + + if (other.kgrid ()) + { + kgrid_ = new double[nk_]; + kvalue_ = new double[nk_]; + std::memcpy (kgrid_, other.kgrid_, nk_ * sizeof (double)); + std::memcpy (kvalue_, other.kvalue_, nk_ * sizeof (double)); + } } -NumericalRadial& NumericalRadial::operator=(const NumericalRadial& rhs) +NumericalRadial& + NumericalRadial::operator= (const NumericalRadial& rhs) { if (this == &rhs) - { - return *this; - } + { + return *this; + } // wipe off r & k space data - wipe(true, true); + wipe (true, true); symbol_ = rhs.symbol_; itype_ = rhs.itype_; @@ -76,26 +68,26 @@ NumericalRadial& NumericalRadial::operator=(const NumericalRadial& rhs) sbt_ = rhs.sbt_; // deep copy - if (rhs.rgrid()) - { - rgrid_ = new double[nr_]; - rvalue_ = new double[nr_]; - std::memcpy(rgrid_, rhs.rgrid_, nr_ * sizeof(double)); - std::memcpy(rvalue_, rhs.rvalue_, nr_ * sizeof(double)); - } - - if (rhs.kgrid()) - { - kgrid_ = new double[nk_]; - kvalue_ = new double[nk_]; - std::memcpy(kgrid_, rhs.kgrid_, nk_ * sizeof(double)); - std::memcpy(kvalue_, rhs.kvalue_, nk_ * sizeof(double)); - } + if (rhs.rgrid ()) + { + rgrid_ = new double[nr_]; + rvalue_ = new double[nr_]; + std::memcpy (rgrid_, rhs.rgrid_, nr_ * sizeof (double)); + std::memcpy (rvalue_, rhs.rvalue_, nr_ * sizeof (double)); + } + + if (rhs.kgrid ()) + { + kgrid_ = new double[nk_]; + kvalue_ = new double[nk_]; + std::memcpy (kgrid_, rhs.kgrid_, nk_ * sizeof (double)); + std::memcpy (kvalue_, rhs.kvalue_, nk_ * sizeof (double)); + } return *this; } -NumericalRadial::~NumericalRadial() +NumericalRadial::~NumericalRadial () { delete[] rgrid_; delete[] kgrid_; @@ -103,7 +95,8 @@ NumericalRadial::~NumericalRadial() delete[] kvalue_; } -void NumericalRadial::build(const int l, +void + NumericalRadial::build (const int l, const bool for_r_space, const int ngrid, const double* const grid, @@ -115,17 +108,17 @@ void NumericalRadial::build(const int l, const bool init_sbt) { #ifdef __DEBUG - assert(l >= 0); - assert(ngrid > 1); - assert(grid && value); + assert (l >= 0); + assert (ngrid > 1); + assert (grid && value); // grid must be strictly increasing and every element must be non-negative - assert(std::is_sorted(grid, grid + ngrid, std::less_equal())); // using less_equal forbids equal values - assert(grid[0] >= 0.0); + assert (std::is_sorted (grid, grid + ngrid, std::less_equal ())); // using less_equal forbids equal values + assert (grid[0] >= 0.0); #endif // wipe off any existing r & k space data - wipe(true, true); + wipe (true, true); symbol_ = symbol; itype_ = itype; @@ -133,76 +126,93 @@ void NumericalRadial::build(const int l, l_ = l; if (for_r_space) - { - nr_ = ngrid; - pr_ = p; - rgrid_ = new double[nr_]; - rvalue_ = new double[nr_]; - std::memcpy(rgrid_, grid, nr_ * sizeof(double)); - std::memcpy(rvalue_, value, nr_ * sizeof(double)); - } + { + nr_ = ngrid; + pr_ = p; + rgrid_ = new double[nr_]; + rvalue_ = new double[nr_]; + std::memcpy (rgrid_, grid, nr_ * sizeof (double)); + std::memcpy (rvalue_, value, nr_ * sizeof (double)); + } else - { - nk_ = ngrid; - pk_ = p; - kgrid_ = new double[nk_]; - kvalue_ = new double[nk_]; - std::memcpy(kgrid_, grid, nk_ * sizeof(double)); - std::memcpy(kvalue_, value, nk_ * sizeof(double)); - } - - set_icut(for_r_space, !for_r_space); + { + nk_ = ngrid; + pk_ = p; + kgrid_ = new double[nk_]; + kvalue_ = new double[nk_]; + std::memcpy (kgrid_, grid, nk_ * sizeof (double)); + std::memcpy (kvalue_, value, nk_ * sizeof (double)); + } + + set_icut (for_r_space, !for_r_space); } -void NumericalRadial::to_numerical_orbital_lm(Numerical_Orbital_Lm& orbital_lm, const int nk_legacy, const double lcao_dk) const +void + NumericalRadial::to_numerical_orbital_lm (Numerical_Orbital_Lm& orbital_lm, + const int nk_legacy, + const double lcao_dk) const { #ifdef __DEBUG - assert(rgrid_); - assert(rgrid_[0] == 0.0); - assert(is_uniform(nr_, rgrid_, 1e-14)); + assert (rgrid_); + assert (rgrid_[0] == 0.0); + assert (is_uniform (nr_, rgrid_, 1e-14)); // Numerical_Orbital_Lm does not support extra exponent in the real space value - assert(pr_ == 0); + assert (pr_ == 0); #endif double dr = rgrid_[1] - rgrid_[0]; double* rab = new double[nr_]; - std::fill(rab, rab + nr_, dr); - - orbital_lm.set_orbital_info(symbol_, itype_, l_, izeta_, std::min(nr_, ircut_+1), rab, rgrid_, - Numerical_Orbital_Lm::Psi_Type::Psi, rvalue_, nk_legacy, lcao_dk, - 0.001 /* dr_uniform */, PARAM.inp.out_element_info, true, PARAM.inp.cal_force); + std::fill (rab, rab + nr_, dr); + + orbital_lm.set_orbital_info (symbol_, + itype_, + l_, + izeta_, + std::min (nr_, ircut_ + 1), + rab, + rgrid_, + Numerical_Orbital_Lm::Psi_Type::Psi, + rvalue_, + nk_legacy, + lcao_dk, + 0.001 /* dr_uniform */, + PARAM.inp.out_element_info, + true, + PARAM.inp.cal_force); delete[] rab; } -void NumericalRadial::set_transformer(ModuleBase::SphericalBesselTransformer sbt, int update) +void + NumericalRadial::set_transformer (ModuleBase::SphericalBesselTransformer sbt, int update) { sbt_ = sbt; #ifdef __DEBUG - assert(update == 0 || update == 1 || update == -1); + assert (update == 0 || update == 1 || update == -1); #endif switch (update) - { - case 1: - transform(true); // forward transform r -> k - break; - case -1: - transform(false); // backward transform k -> r - break; - default:; // do nothing - } + { + case 1: + transform (true); // forward transform r -> k + break; + case -1: + transform (false); // backward transform k -> r + break; + default:; // do nothing + } } -void NumericalRadial::set_grid(const bool for_r_space, const int ngrid, const double* const grid, const char mode) +void + NumericalRadial::set_grid (const bool for_r_space, const int ngrid, const double* const grid, const char mode) { #ifdef __DEBUG - assert(mode == 'i' || mode == 't'); - assert(ngrid > 1); + assert (mode == 'i' || mode == 't'); + assert (ngrid > 1); // grid must be strictly increasing and every element must be non-negative - assert(std::is_sorted(grid, grid + ngrid, std::less_equal())); // using less_equal forbids equal values - assert(grid[0] >= 0.0); + assert (std::is_sorted (grid, grid + ngrid, std::less_equal ())); // using less_equal forbids equal values + assert (grid[0] >= 0.0); #endif // tbu stands for "to be updated" @@ -211,137 +221,143 @@ void NumericalRadial::set_grid(const bool for_r_space, const int ngrid, const do int& ngrid_tbu = (for_r_space ? nr_ : nk_); if (mode == 't') - { // obtain new values by a transform from the other space - // make sure a transform from the other space is available + { // obtain new values by a transform from the other space + // make sure a transform from the other space is available #ifdef __DEBUG - assert(for_r_space ? (kgrid_ && kvalue_) : (rgrid_ && rvalue_)); + assert (for_r_space ? (kgrid_ && kvalue_) : (rgrid_ && rvalue_)); #endif - delete[] grid_tbu; - delete[] value_tbu; - grid_tbu = new double[ngrid]; - value_tbu = new double[ngrid]; - ngrid_tbu = ngrid; - std::memcpy(grid_tbu, grid, ngrid * sizeof(double)); - - is_fft_compliant_ = is_fft_compliant(nr_, rgrid_, nk_, kgrid_); - transform(!for_r_space); // transform(true): r -> k; transform(false): k -> r - // ircut_ or ikcut_ is updated in transform() - } + delete[] grid_tbu; + delete[] value_tbu; + grid_tbu = new double[ngrid]; + value_tbu = new double[ngrid]; + ngrid_tbu = ngrid; + std::memcpy (grid_tbu, grid, ngrid * sizeof (double)); + + is_fft_compliant_ = is_fft_compliant (nr_, rgrid_, nk_, kgrid_); + transform (!for_r_space); // transform(true): r -> k; transform(false): k -> r + // ircut_ or ikcut_ is updated in transform() + } else - { // obtain new values by interpolation in the current space - // make sure an interpolation in the current space is available + { // obtain new values by interpolation in the current space + // make sure an interpolation in the current space is available #ifdef __DEBUG - assert(grid_tbu && value_tbu); + assert (grid_tbu && value_tbu); #endif - // cubic spline interpolation - ModuleBase::CubicSpline cubspl(ngrid_tbu, grid_tbu, value_tbu); // not-a-knot boundary condition + // cubic spline interpolation + ModuleBase::CubicSpline cubspl (ngrid_tbu, grid_tbu, value_tbu); // not-a-knot boundary condition - double* grid_new = new double[ngrid]; - double* value_new = new double[ngrid]; + double* grid_new = new double[ngrid]; + double* value_new = new double[ngrid]; - std::memcpy(grid_new, grid, ngrid * sizeof(double)); - std::fill_n(value_new, ngrid, 0.0); + std::memcpy (grid_new, grid, ngrid * sizeof (double)); + std::fill_n (value_new, ngrid, 0.0); - // do interpolation for grid points within the range of the origional grid - // for grid points outside the original range, simply set the values to zero + // do interpolation for grid points within the range of the origional grid + // for grid points outside the original range, simply set the values to zero - // grid_start is the first grid point that is greater than or equal to grid_tbu[0] - double* grid_start = std::lower_bound(grid_new, grid_new + ngrid, grid_tbu[0]); + // grid_start is the first grid point that is greater than or equal to grid_tbu[0] + double* grid_start = std::lower_bound (grid_new, grid_new + ngrid, grid_tbu[0]); - // grid_end is the first grid point that is strictly greater than grid_tbu[ngrid_tbu-1] - double* grid_end = std::upper_bound(grid_new, grid_new + ngrid, grid_tbu[ngrid_tbu - 1]); + // grid_end is the first grid point that is strictly greater than grid_tbu[ngrid_tbu-1] + double* grid_end = std::upper_bound (grid_new, grid_new + ngrid, grid_tbu[ngrid_tbu - 1]); - cubspl.eval(std::distance(grid_start, grid_end), grid_start, value_new + std::distance(grid_new, grid_start)); + cubspl.eval (std::distance (grid_start, grid_end), + grid_start, + value_new + std::distance (grid_new, grid_start)); - delete[] grid_tbu; - delete[] value_tbu; + delete[] grid_tbu; + delete[] value_tbu; - grid_tbu = grid_new; - value_tbu = value_new; - ngrid_tbu = ngrid; + grid_tbu = grid_new; + value_tbu = value_new; + ngrid_tbu = ngrid; - is_fft_compliant_ = is_fft_compliant(nr_, rgrid_, nk_, kgrid_); - set_icut(for_r_space, !for_r_space); - transform(for_r_space); // transform(true): r -> k; transform(false): k -> r - } + is_fft_compliant_ = is_fft_compliant (nr_, rgrid_, nk_, kgrid_); + set_icut (for_r_space, !for_r_space); + transform (for_r_space); // transform(true): r -> k; transform(false): k -> r + } } -void NumericalRadial::set_uniform_grid(const bool for_r_space, +void + NumericalRadial::set_uniform_grid (const bool for_r_space, const int ngrid, const double cutoff, const char mode, const bool enable_fft) { - std::vector grid(ngrid); + std::vector grid (ngrid); double dx = cutoff / (ngrid - 1); for (int i = 0; i != ngrid; ++i) - { - grid[i] = i * dx; - } + { + grid[i] = i * dx; + } - set_grid(for_r_space, ngrid, grid.data(), mode); + set_grid (for_r_space, ngrid, grid.data (), mode); if (enable_fft) - { - set_uniform_grid(!for_r_space, ngrid, PI / dx, 't', false); - } + { + set_uniform_grid (!for_r_space, ngrid, PI / dx, 't', false); + } } -void NumericalRadial::set_value(const bool for_r_space, const double* const value, const int p) +void + NumericalRadial::set_value (const bool for_r_space, const double* const value, const int p) { #ifdef __DEBUG - assert(for_r_space ? rvalue_ : kvalue_); + assert (for_r_space ? rvalue_ : kvalue_); #endif if (for_r_space) - { - std::memcpy(rvalue_, value, nr_ * sizeof(double)); - pr_ = p; - transform(true); - set_icut(true, false); - } + { + std::memcpy (rvalue_, value, nr_ * sizeof (double)); + pr_ = p; + transform (true); + set_icut (true, false); + } else - { - std::memcpy(kvalue_, value, nk_ * sizeof(double)); - pk_ = p; - transform(false); - set_icut(false, true); - } + { + std::memcpy (kvalue_, value, nk_ * sizeof (double)); + pk_ = p; + transform (false); + set_icut (false, true); + } } -void NumericalRadial::wipe(const bool r_space, const bool k_space) +void + NumericalRadial::wipe (const bool r_space, const bool k_space) { #ifdef __DEBUG - assert(r_space || k_space); + assert (r_space || k_space); #endif // wipe the grid and value in r/k space if (r_space) - { - delete[] rgrid_; - delete[] rvalue_; - rgrid_ = nullptr; - rvalue_ = nullptr; - nr_ = 0; - pr_ = 0; - ircut_ = 0; - } + { + delete[] rgrid_; + delete[] rvalue_; + rgrid_ = nullptr; + rvalue_ = nullptr; + nr_ = 0; + pr_ = 0; + ircut_ = 0; + } if (k_space) - { - delete[] kgrid_; - delete[] kvalue_; - kgrid_ = nullptr; - kvalue_ = nullptr; - nk_ = 0; - pk_ = 0; - ikcut_ = 0; - } + { + delete[] kgrid_; + delete[] kvalue_; + kgrid_ = nullptr; + kvalue_ = nullptr; + nk_ = 0; + pk_ = 0; + ikcut_ = 0; + } is_fft_compliant_ = false; } -void NumericalRadial::radtab(const char op, +void + NumericalRadial::radtab (const char op, const NumericalRadial& ket, const int l, double* const table, @@ -350,59 +366,60 @@ void NumericalRadial::radtab(const char op, const bool deriv) const { #ifdef __DEBUG - assert(op == 'S' || op == 'I' || op == 'T' || op == 'U'); - assert(l >= 0); - assert(rmax_tab > 0 && nr_tab > 0); + assert (op == 'S' || op == 'I' || op == 'T' || op == 'U'); + assert (l >= 0); + assert (rmax_tab > 0 && nr_tab > 0); // radtab requires that two NumericalRadial objects have exactly the same (non-null) kgrid_ - assert(nk_ > 0 && nk_ == ket.nk_); - assert(std::equal(kgrid_, kgrid_ + nk_, ket.kgrid_)); + assert (nk_ > 0 && nk_ == ket.nk_); + assert (std::equal (kgrid_, kgrid_ + nk_, ket.kgrid_)); #endif double* rgrid_tab = new double[nr_tab]; double dr = rmax_tab / (nr_tab - 1); - std::for_each(rgrid_tab, rgrid_tab + nr_tab, [dr,&rgrid_tab](double& r) { r = dr * (int)(&r - rgrid_tab); }); + std::for_each (rgrid_tab, rgrid_tab + nr_tab, [dr, &rgrid_tab] (double& r) { r = dr * (int)(&r - rgrid_tab); }); - bool use_radrfft = is_fft_compliant(nr_tab, rgrid_tab, nk_, kgrid_); + bool use_radrfft = is_fft_compliant (nr_tab, rgrid_tab, nk_, kgrid_); // function to undergo a spherical Bessel transform: // overlap: chi1(k) * chi2(k) // kinetic: k^2 * chi1(k) * chi2(k) // Coulomb: k^(-2) * chi1(k) * chi2(k) double* fk = new double[nk_]; - std::transform(kvalue_, kvalue_ + nk_, ket.kvalue_, fk, std::multiplies()); + std::transform (kvalue_, kvalue_ + nk_, ket.kvalue_, fk, std::multiplies ()); int op_pk = 0; switch (op) - { - case 'T': - op_pk = -2; - break; - case 'U': - op_pk = 2; - break; - default:; // for overlap integral op_pk = 0 - } + { + case 'T': + op_pk = -2; + break; + case 'U': + op_pk = 2; + break; + default:; // for overlap integral op_pk = 0 + } if (use_radrfft) - { - sbt_.radrfft(l, nk_, kmax(), fk, table, pk_ + ket.pk_ + op_pk); - } + { + sbt_.radrfft (l, nk_, kmax (), fk, table, pk_ + ket.pk_ + op_pk); + } else - { - sbt_.direct(l, nk_, kgrid_, fk, nr_tab, rgrid_tab, table, pk_ + ket.pk_ + op_pk); - } + { + sbt_.direct (l, nk_, kgrid_, fk, nr_tab, rgrid_tab, table, pk_ + ket.pk_ + op_pk); + } delete[] fk; delete[] rgrid_tab; // spherical Bessel transform has a prefactor of sqrt(2/pi) // and the prefactor for the two-center integral radial table is 4*pi - double pref = ModuleBase::FOUR_PI * std::sqrt(ModuleBase::PI / 2.0); - std::for_each(table, table + nr_tab, [pref](double& x) { x *= pref; }); + double pref = ModuleBase::FOUR_PI * std::sqrt (ModuleBase::PI / 2.0); + std::for_each (table, table + nr_tab, [pref] (double& x) { x *= pref; }); } -void NumericalRadial::normalize(bool for_r_space) +void + NumericalRadial::normalize (bool for_r_space) { int& ngrid = for_r_space ? nr_ : nk_; @@ -414,104 +431,111 @@ void NumericalRadial::normalize(bool for_r_space) double* integrand = new double[ngrid]; double* rab = new double[ngrid]; - std::adjacent_difference(grid_tbu, grid_tbu + ngrid, rab); - std::transform(value_tbu, value_tbu + ngrid, grid_tbu, integrand, std::multiplies()); - std::for_each(integrand, integrand + ngrid, [](double& x) { x *= x; }); + std::adjacent_difference (grid_tbu, grid_tbu + ngrid, rab); + std::transform (value_tbu, value_tbu + ngrid, grid_tbu, integrand, std::multiplies ()); + std::for_each (integrand, integrand + ngrid, [] (double& x) { x *= x; }); - factor = ModuleBase::Integral::simpson(ngrid, integrand, &rab[1]); - factor = 1. / std::sqrt(factor); + factor = ModuleBase::Integral::simpson (ngrid, integrand, &rab[1]); + factor = 1. / std::sqrt (factor); - std::for_each(value_tbu, value_tbu + ngrid, [factor](double& x) { x *= factor; }); - transform(for_r_space); + std::for_each (value_tbu, value_tbu + ngrid, [factor] (double& x) { x *= factor; }); + transform (for_r_space); delete[] rab; delete[] integrand; } -void NumericalRadial::transform(const bool forward) +void + NumericalRadial::transform (const bool forward) { #ifdef __DEBUG // grid & value must exist in the initial space - assert(forward ? (rgrid_ && rvalue_) : (kgrid_ && kvalue_)); + assert (forward ? (rgrid_ && rvalue_) : (kgrid_ && kvalue_)); #endif // do nothing if there is no grid in the destination space if ((forward && !kgrid_) || (!forward && !rgrid_)) - { - return; - } - - if (forward) - { // r -> k - if (is_fft_compliant_) { - sbt_.radrfft(l_, nr_, rgrid_[nr_ - 1], rvalue_, kvalue_, pr_); + return; } - else - { - sbt_.direct(l_, nr_, rgrid_, rvalue_, nk_, kgrid_, kvalue_, pr_); + + if (forward) + { // r -> k + if (is_fft_compliant_) + { + sbt_.radrfft (l_, nr_, rgrid_[nr_ - 1], rvalue_, kvalue_, pr_); + } + else + { + sbt_.direct (l_, nr_, rgrid_, rvalue_, nk_, kgrid_, kvalue_, pr_); + } + pk_ = 0; + set_icut (false, true); } - pk_ = 0; - set_icut(false, true); - } else - { // k -> r - if (is_fft_compliant_) - { - sbt_.radrfft(l_, nk_, kgrid_[nk_ - 1], kvalue_, rvalue_, pk_); - } - else - { - sbt_.direct(l_, nk_, kgrid_, kvalue_, nr_, rgrid_, rvalue_, pk_); + { // k -> r + if (is_fft_compliant_) + { + sbt_.radrfft (l_, nk_, kgrid_[nk_ - 1], kvalue_, rvalue_, pk_); + } + else + { + sbt_.direct (l_, nk_, kgrid_, kvalue_, nr_, rgrid_, rvalue_, pk_); + } + pr_ = 0; + set_icut (true, false); } - pr_ = 0; - set_icut(true, false); - } } -void NumericalRadial::set_icut(const bool for_r_space, const bool for_k_space, const double tol) +void + NumericalRadial::set_icut (const bool for_r_space, const bool for_k_space, const double tol) { if (for_r_space) - { + { #ifdef __DEBUG - assert(rgrid_ && rvalue_); + assert (rgrid_ && rvalue_); #endif - ircut_ = nr_; - while (ircut_ && std::abs(rvalue_[ircut_ - 1]) <= tol) { --ircut_; } - } + ircut_ = nr_; + while (ircut_ && std::abs (rvalue_[ircut_ - 1]) <= tol) + { + --ircut_; + } + } if (for_k_space) - { + { #ifdef __DEBUG - assert(kgrid_ && kvalue_); + assert (kgrid_ && kvalue_); #endif - ikcut_ = nk_; - while (ikcut_ && std::abs(kvalue_[ikcut_ - 1]) <= tol) { --ikcut_; } - } + ikcut_ = nk_; + while (ikcut_ && std::abs (kvalue_[ikcut_ - 1]) <= tol) + { + --ikcut_; + } + } } -bool NumericalRadial::is_uniform(const int n, const double* const x, const double tol) +bool + NumericalRadial::is_uniform (const int n, const double* const x, const double tol) { double dx = (x[n - 1] - x[0]) / (n - 1); - return std::all_of(x, x + n, - [&](const double& xi) { return std::abs(x[0] + (&xi - x) * dx - xi) < tol; }); + return std::all_of (x, x + n, [&] (const double& xi) { return std::abs (x[0] + (&xi - x) * dx - xi) < tol; }); } -bool NumericalRadial::is_fft_compliant(const int nr, +bool + NumericalRadial::is_fft_compliant (const int nr, const double* const rgrid, const int nk, const double* const kgrid, - const double tol - ) + const double tol) { if (!rgrid || !kgrid || nr != nk || nr < 2) - { - return false; - } + { + return false; + } double dr = rgrid[nr - 1] / (nr - 1); double dk = kgrid[nk - 1] / (nk - 1); - return nr * std::abs(dr * dk - PI / (nr - 1)) < tol - && rgrid[0] == 0.0 && is_uniform(nr, rgrid, tol) - && kgrid[0] == 0.0 && is_uniform(nk, kgrid, tol); + return nr * std::abs (dr * dk - PI / (nr - 1)) < tol && rgrid[0] == 0.0 && is_uniform (nr, rgrid, tol) + && kgrid[0] == 0.0 && is_uniform (nk, kgrid, tol); } diff --git a/source/source_basis/module_nao/numerical_radial.h b/source/source_basis/module_nao/numerical_radial.h index f89a604ab62..b4e52702cb3 100644 --- a/source/source_basis/module_nao/numerical_radial.h +++ b/source/source_basis/module_nao/numerical_radial.h @@ -36,7 +36,7 @@ * double dr = 0.01; * double* grid = new double[sz]; * for (int ir = 0; ir != sz; ++ir) { - * grid[ir] = ir * dr; + * grid[ir] = ir * dr; * f[ir] = std::exp(-grid[ir] * grid[ir]); * } * // grid does not necessarily have to be uniform; it just @@ -63,14 +63,14 @@ */ class NumericalRadial { -public: - NumericalRadial() = default; - NumericalRadial(NumericalRadial const&); ///< Deep-copy grid & values + public: + NumericalRadial () = default; + NumericalRadial (NumericalRadial const&); ///< Deep-copy grid & values /// Deep-copy grid & values - NumericalRadial& operator=(NumericalRadial const&); + NumericalRadial& operator= (NumericalRadial const&); - ~NumericalRadial(); + ~NumericalRadial (); /** * @brief Initializes the object by providing the grid & values in one space. @@ -89,17 +89,16 @@ class NumericalRadial * @note init_sbt is only useful when the internal SphericalBesselTransformer (sbt_) is * null-initialized; The function will NOT reset sbt_ if it is already usable. */ - void build(const int l, - const bool for_r_space, - const int ngrid, - const double* const grid, - const double* const value, - const int p = 0, - const int izeta = 0, - const std::string symbol = "", - const int itype = 0, - const bool init_sbt = true - ); + void build (const int l, + const bool for_r_space, + const int ngrid, + const double* const grid, + const double* const value, + const int p = 0, + const int izeta = 0, + const std::string symbol = "", + const int itype = 0, + const bool init_sbt = true); /** * @brief Overwrites the content of a Numerical_Orbital_Lm object with the current object. @@ -109,13 +108,13 @@ class NumericalRadial * use the k grid of NumericalRadial (which is FFT-compliant with r grid) to initialize * the k grid of Numerical_Orbital_Lm. */ - void to_numerical_orbital_lm(Numerical_Orbital_Lm& orbital_lm, - const int nk_legacy = 4005, // equivalent to lcao_ecut = 1600 - const double lcao_dk = 0.01) const; + void to_numerical_orbital_lm (Numerical_Orbital_Lm& orbital_lm, + const int nk_legacy = 4005, // equivalent to lcao_ecut = 1600 + const double lcao_dk = 0.01) const; - /** + /** * @brief Sets a SphericalBesselTransformer. - * + * * By default the class uses an internal SphericalBesselTransformer, but one can optionally * use a shared one. This could be beneficial when there are a lot of NumericalRadial objects * whose grids have the same size. @@ -127,7 +126,7 @@ class NumericalRadial * * 1: calls a forward transform; * * -1: calls a backward transform. */ - void set_transformer(ModuleBase::SphericalBesselTransformer sbt, int update = 0); + void set_transformer (ModuleBase::SphericalBesselTransformer sbt, int update = 0); /** * @brief Sets up a grid. @@ -147,7 +146,7 @@ class NumericalRadial * With this option, it is an error if the other space does not * have a grid. */ - void set_grid(const bool for_r_space, const int ngrid, const double* const grid, const char mode = 'i'); + void set_grid (const bool for_r_space, const int ngrid, const double* const grid, const char mode = 'i'); /** * @brief Sets up a uniform grid. @@ -167,11 +166,11 @@ class NumericalRadial * the r & k grids are FFT-compliant (and updates values via a FFT-based spherical * Bessel transform). */ - void set_uniform_grid(const bool for_r_space, - const int ngrid, - const double cutoff, - const char mode = 'i', - const bool enable_fft = false); + void set_uniform_grid (const bool for_r_space, + const int ngrid, + const double cutoff, + const char mode = 'i', + const bool enable_fft = false); /** * @brief Updates values on an existing grid. @@ -182,13 +181,10 @@ class NumericalRadial * * @note This function does not check the index bound; use with care! */ - void set_value(const bool for_r_space, - const double* const value, - const int p - ); + void set_value (const bool for_r_space, const double* const value, const int p); /// Removes the grid & values in r or k space. - void wipe(const bool r_space = true, const bool k_space = true); + void wipe (const bool r_space = true, const bool k_space = true); //! Computes the radial table for two-center integrals. /*! @@ -231,17 +227,17 @@ class NumericalRadial * / 0 l * * */ - void radtab(const char op, //!< [in] operator, could be: - //!< - 'S' or 'I': overlap - //!< - 'T': kinetic - //!< - 'U': Coulomb - const NumericalRadial& ket, //!< [in] the other NumericalRadial object with which - //! the two-center integral is computed - const int l, //!< [in] angular momentum of the table - double* const table, //!< [out] on finish, contain the computed table - const int nr_tab, //!< [in] size of table grid - const double rmax_tab, //!< [in] cutoff radius of table grid - const bool deriv = false //!< [in] if true, calculates the derivative of the table + void radtab (const char op, //!< [in] operator, could be: + //!< - 'S' or 'I': overlap + //!< - 'T': kinetic + //!< - 'U': Coulomb + const NumericalRadial& ket, //!< [in] the other NumericalRadial object with which + //! the two-center integral is computed + const int l, //!< [in] angular momentum of the table + double* const table, //!< [out] on finish, contain the computed table + const int nr_tab, //!< [in] size of table grid + const double rmax_tab, //!< [in] cutoff radius of table grid + const bool deriv = false //!< [in] if true, calculates the derivative of the table ) const; /** @@ -256,48 +252,136 @@ class NumericalRadial * where x is r or k. The integral is evaluated with Simpson's rule. Values in the other space * are updated automatically via a spherical Bessel transform. */ - void normalize(bool for_r_space = true); + void normalize (bool for_r_space = true); /** * @name Getters */ ///@{ - std::string const& symbol() const { return symbol_; } - int itype() const { return itype_; } - int izeta() const { return izeta_; } - int l() const { return l_; } - int nr() const { return nr_; } // paired with rmax(), not rcut! - int nk() const { return nk_; } - double rcut() const { return rgrid_[std::min(ircut_, nr_-1)]; } ///< padded zeros ignored - double kcut() const { return kgrid_[std::min(ikcut_, nk_-1)]; } - double rmax() const { return rgrid_[nr_-1]; } ///< padded zeros considered - double kmax() const { return kgrid_[nk_-1]; } - const double* rgrid() const { return rgrid_; } - const double* kgrid() const { return kgrid_; } - const double* rvalue() const { return rvalue_; } - const double* kvalue() const { return kvalue_; } - double pr() const { return pr_; } - double pk() const { return pk_; } - bool is_fft_compliant() const { return is_fft_compliant_; } - ModuleBase::SphericalBesselTransformer sbt() const { return sbt_; } + std::string const& + symbol () const + { + return symbol_; + } + int + itype () const + { + return itype_; + } + int + izeta () const + { + return izeta_; + } + int + l () const + { + return l_; + } + int + nr () const + { + return nr_; + } // paired with rmax(), not rcut! + int + nk () const + { + return nk_; + } + double + rcut () const + { + return rgrid_[std::min (ircut_, nr_ - 1)]; + } ///< padded zeros ignored + double + kcut () const + { + return kgrid_[std::min (ikcut_, nk_ - 1)]; + } + double + rmax () const + { + return rgrid_[nr_ - 1]; + } ///< padded zeros considered + double + kmax () const + { + return kgrid_[nk_ - 1]; + } + const double* + rgrid () const + { + return rgrid_; + } + const double* + kgrid () const + { + return kgrid_; + } + const double* + rvalue () const + { + return rvalue_; + } + const double* + kvalue () const + { + return kvalue_; + } + double + pr () const + { + return pr_; + } + double + pk () const + { + return pk_; + } + bool + is_fft_compliant () const + { + return is_fft_compliant_; + } + ModuleBase::SphericalBesselTransformer + sbt () const + { + return sbt_; + } - double rgrid(int ir) const { return rgrid_[ir]; } - double kgrid(int ik) const { return kgrid_[ik]; } - double rvalue(int ir) const { return rvalue_[ir]; } - double kvalue(int ik) const { return kvalue_[ik]; } + double + rgrid (int ir) const + { + return rgrid_[ir]; + } + double + kgrid (int ik) const + { + return kgrid_[ik]; + } + double + rvalue (int ir) const + { + return rvalue_[ir]; + } + double + kvalue (int ik) const + { + return kvalue_[ik]; + } ///@} -private: - std::string symbol_ = ""; ///< chemical symbol - int itype_ = 0; ///< element index in calculation - int l_ = -1; ///< angular momentum - int izeta_ = 0; ///< further index for NumericalRadial objects with the same itype_and l_ + private: + std::string symbol_ = ""; ///< chemical symbol + int itype_ = 0; ///< element index in calculation + int l_ = -1; ///< angular momentum + int izeta_ = 0; ///< further index for NumericalRadial objects with the same itype_and l_ - int nr_ = 0; ///< number of r-space grid points - int nk_ = 0; ///< number of k-space grid points + int nr_ = 0; ///< number of r-space grid points + int nk_ = 0; ///< number of k-space grid points - double* rgrid_ = nullptr; ///< r-space grid - double* kgrid_ = nullptr; ///< k-space grid + double* rgrid_ = nullptr; ///< r-space grid + double* kgrid_ = nullptr; ///< k-space grid /** * @brief Index of the first trailing zero. @@ -307,7 +391,7 @@ class NumericalRadial * variables keep track of the actual cutoff radius. Specifically, * if there are no trailing zeros in rvalues_, then ircut_ = nr_; * if there are trailing zeros, then ircut_ is the index of the first - * trailing zero. For example, + * trailing zero. For example, * rvalues_ = {1, 2, 3, 0, 0, 0} -> ircut_ = 3 * rvalues_ = {1, 2, 3, 4, 5, 6} -> ircut_ = 6 * rvalues_ = {0, 0, 0, 0, 0, 0} -> ircut_ = 0 @@ -315,8 +399,8 @@ class NumericalRadial int ircut_ = 0; int ikcut_ = 0; - double* rvalue_ = nullptr; ///< r-space value - double* kvalue_ = nullptr; ///< k-space value + double* rvalue_ = nullptr; ///< r-space value + double* kvalue_ = nullptr; ///< k-space value /** * @brief A flag that tells whether the r & k grids are FFT-compliant. @@ -375,28 +459,27 @@ class NumericalRadial * forward : r to k * backward: k to r */ - void transform(const bool forward); + void transform (const bool forward); /// Updates ircut_ and/or ikcut_. - void set_icut(const bool for_r_space, const bool for_k_space, const double tol = 1e-15); + void set_icut (const bool for_r_space, const bool for_k_space, const double tol = 1e-15); // FIXME is_uniform and is_fft_compliant should be more robust for arrays whose elements // are all close to machine precision /// Checks whether a grid is uniform. - static bool is_uniform(const int n, const double* const grid, const double tol = 1e-15); + static bool is_uniform (const int n, const double* const grid, const double tol = 1e-15); /** * @brief Checks whether the given two grids are FFT-compliant. * * @see is_fft_compliant_ */ - static bool is_fft_compliant(const int nr, - const double* const rgrid, - const int nk, - const double* const kgrid, - const double tol = 1e-15 - ); + static bool is_fft_compliant (const int nr, + const double* const rgrid, + const int nk, + const double* const kgrid, + const double tol = 1e-15); }; #endif diff --git a/source/source_basis/module_nao/pswfc_radials.cpp b/source/source_basis/module_nao/pswfc_radials.cpp index 98d186c4072..fc5ee38fd4a 100644 --- a/source/source_basis/module_nao/pswfc_radials.cpp +++ b/source/source_basis/module_nao/pswfc_radials.cpp @@ -10,13 +10,15 @@ #include "source_base/parallel_common.h" #endif -PswfcRadials& PswfcRadials::operator=(const PswfcRadials& rhs) +PswfcRadials& + PswfcRadials::operator= (const PswfcRadials& rhs) { - RadialSet::operator=(rhs); + RadialSet::operator= (rhs); return *this; } -void PswfcRadials::build(const std::string& file, +void + PswfcRadials::build (const std::string& file, const int itype, const double screening_coeff, const double conv_thr, @@ -24,227 +26,256 @@ void PswfcRadials::build(const std::string& file, const int rank) { // deallocates all arrays and reset variables (excluding sbt_) - cleanup(); + cleanup (); std::ifstream ifs; bool is_open = false; if (rank == 0) - { - ifs.open(file); - is_open = ifs.is_open(); - } + { + ifs.open (file); + is_open = ifs.is_open (); + } #ifdef __MPI - Parallel_Common::bcast_bool(is_open); + Parallel_Common::bcast_bool (is_open); #endif if (!is_open) - { - ModuleBase::WARNING_QUIT("AtomicRadials::read", "Couldn't open pseudopotential file: " + file); - } + { + ModuleBase::WARNING_QUIT ("AtomicRadials::read", "Couldn't open pseudopotential file: " + file); + } itype_ = itype; - read_upf_pswfc(ifs, screening_coeff, conv_thr, ptr_log, rank); - set_rcut_max(); + read_upf_pswfc (ifs, screening_coeff, conv_thr, ptr_log, rank); + set_rcut_max (); if (rank == 0) - { - ifs.close(); - } + { + ifs.close (); + } } -bool PswfcRadials::startswith(std::string word, std::string pattern) +bool + PswfcRadials::startswith (std::string word, std::string pattern) { - if (word.size() < pattern.size()) - return false; - int score = 1; - for (int ic = 0; ic < pattern.size(); ic++) - { - if (word[ic] != pattern[ic]) + if (word.size () < pattern.size ()) { - score *= 0; + return false; } - else + int score = 1; + for (int ic = 0; ic < pattern.size (); ic++) { - score *= 1; + if (word[ic] != pattern[ic]) + { + score *= 0; + } + else + { + score *= 1; + } } - } - return bool(score); + return bool (score); } -std::string PswfcRadials::steal_from_quotes(std::string word) +std::string + PswfcRadials::steal_from_quotes (std::string word) { // first make sure there are even number of quotes in this word int num_quote = 0; for (auto letter: word) - { - if (letter == '\"') - num_quote += 1; - } - assert(num_quote % 2 == 0); + { + if (letter == '\"') + { + num_quote += 1; + } + } + assert (num_quote % 2 == 0); // then steal from quotes std::string result; - size_t _left = word.find_first_of("\""); - size_t _right = word.find_last_of("\""); - result = word.substr(_left + 1, _right - _left - 1); + size_t _left = word.find_first_of ("\""); + size_t _right = word.find_last_of ("\""); + result = word.substr (_left + 1, _right - _left - 1); // then remove all spaces ahead while (result[0] == ' ') - { - result.erase(0, 1); - } + { + result.erase (0, 1); + } return result; } -std::string PswfcRadials::steal_from_quotes(std::ifstream& ifs, std::string word) +std::string + PswfcRadials::steal_from_quotes (std::ifstream& ifs, std::string word) { // concatenate all words until the second quote, no matter how many lines and spaces between - std::string concatenated = word.substr(word.find_first_of("\"") + 1, word.size() - word.find_first_of("\"") - 1); + std::string concatenated + = word.substr (word.find_first_of ("\"") + 1, word.size () - word.find_first_of ("\"") - 1); int num_quote = 1; while (num_quote < 2) - { - std::string line; - ifs >> line; - for (auto letter: line) { - if (letter == '\"') - num_quote += 1; - if (num_quote == 2) - break; - concatenated += letter; + std::string line; + ifs >> line; + for (auto letter: line) + { + if (letter == '\"') + { + num_quote += 1; + } + if (num_quote == 2) + { + break; + } + concatenated += letter; + } } - } // then remove all spaces ahead while (concatenated[0] == ' ') - { - concatenated.erase(0, 1); - } + { + concatenated.erase (0, 1); + } return concatenated; } -std::string PswfcRadials::read_keyword_value(std::ifstream& ifs, std::string word) +std::string + PswfcRadials::read_keyword_value (std::ifstream& ifs, std::string word) { // count the number of quotes, only 1 or 2 cases are considered for pseudopotential reading int num_quote = 0; for (auto letter: word) - { - if (letter == '\"') - num_quote += 1; - } - assert(num_quote == 1 || num_quote == 2); + { + if (letter == '\"') + { + num_quote += 1; + } + } + assert (num_quote == 1 || num_quote == 2); if (num_quote == 1) - return steal_from_quotes(ifs, word); + { + return steal_from_quotes (ifs, word); + } else - return steal_from_quotes(word); + { + return steal_from_quotes (word); + } } -double PswfcRadials::radial_norm(const std::vector rgrid, const std::vector rvalue) +double + PswfcRadials::radial_norm (const std::vector rgrid, const std::vector rvalue) { - std::vector integrand(rvalue.size()); - for (int ir = 0; ir != rvalue.size(); ++ir) - { - integrand[ir] = rvalue[ir] * rvalue[ir] * rgrid[ir] * rgrid[ir]; - } + std::vector integrand (rvalue.size ()); + for (int ir = 0; ir != rvalue.size (); ++ir) + { + integrand[ir] = rvalue[ir] * rvalue[ir] * rgrid[ir] * rgrid[ir]; + } double dr = rgrid[1] - rgrid[0]; - double norm = ModuleBase::Integral::simpson(rvalue.size(), integrand.data(), dr); - norm = sqrt(norm); + double norm = ModuleBase::Integral::simpson (rvalue.size (), integrand.data (), dr); + norm = sqrt (norm); return norm; } -double PswfcRadials::cut_to_convergence(const std::vector& rgrid, - std::vector& rvalue, - const double& conv_thr) +double + PswfcRadials::cut_to_convergence (const std::vector& rgrid, + std::vector& rvalue, + const double& conv_thr) { double norm = 0.0; int ir_ = 0; int ir_min_ = 0; int delta_ir = 5; // stepsize for radius cutoff searching, in Bohr - int ir_max_ = rgrid.size() - 1; + int ir_max_ = rgrid.size () - 1; double dr = rgrid[1] - rgrid[0]; // radial function realspace grid stepsize, in Bohr - printf("Norm of pseudowavefunction before cutoff: %6.4e\n", radial_norm(rgrid, rvalue)); + printf ("Norm of pseudowavefunction before cutoff: %6.4e\n", radial_norm (rgrid, rvalue)); int istep = 1; double delta_norm = 1.0; - printf("Searching for the cutoff radius for pseudowavefunction, conv_thr = %6.4e\n", conv_thr); - printf("%10s%12s%14s%18s", "Step Nr.", "Rmax (a.u.)", "Norm", "Delta Norm\n"); - while ((std::fabs(delta_norm) > conv_thr) && (ir_ <= ir_max_)) - { - ir_ = std::min(ir_ + delta_ir, ir_max_); // update ir_, but be careful not to exceed ir_max_ - delta_norm = norm; - std::vector rgrid_slice = std::vector(rgrid.begin() + ir_min_, rgrid.begin() + ir_ + 1); - std::vector rvalue_slice = std::vector(rvalue.begin() + ir_min_, rvalue.begin() + ir_ + 1); - norm = radial_norm(rgrid_slice, rvalue_slice); - delta_norm = norm - delta_norm; - if (istep == 1) - printf("%10d%12.2f%14.10f%18.10e\n", istep, rgrid[ir_], norm, delta_norm); - ++istep; - } - printf("...\n"); - printf("%10d%12.2f%14.10f%18.10e\n", istep, rgrid[ir_], norm, delta_norm); - - rvalue = std::vector(rvalue.begin() + ir_min_, rvalue.begin() + ir_ + 1); + printf ("Searching for the cutoff radius for pseudowavefunction, conv_thr = %6.4e\n", conv_thr); + printf ("%10s%12s%14s%18s", "Step Nr.", "Rmax (a.u.)", "Norm", "Delta Norm\n"); + while ((std::fabs (delta_norm) > conv_thr) && (ir_ <= ir_max_)) + { + ir_ = std::min (ir_ + delta_ir, ir_max_); // update ir_, but be careful not to exceed ir_max_ + delta_norm = norm; + std::vector rgrid_slice = std::vector (rgrid.begin () + ir_min_, rgrid.begin () + ir_ + 1); + std::vector rvalue_slice + = std::vector (rvalue.begin () + ir_min_, rvalue.begin () + ir_ + 1); + norm = radial_norm (rgrid_slice, rvalue_slice); + delta_norm = norm - delta_norm; + if (istep == 1) + { + printf ("%10d%12.2f%14.10f%18.10e\n", istep, rgrid[ir_], norm, delta_norm); + } + ++istep; + } + printf ("...\n"); + printf ("%10d%12.2f%14.10f%18.10e\n", istep, rgrid[ir_], norm, delta_norm); + + rvalue = std::vector (rvalue.begin () + ir_min_, rvalue.begin () + ir_ + 1); return rgrid[ir_max_]; } -void PswfcRadials::smooth(std::vector& rgrid, std::vector& rvalue, const double sigma) +void + PswfcRadials::smooth (std::vector& rgrid, std::vector& rvalue, const double sigma) { - double prefactor = 1.0 / sqrt(2.0 * M_PI) / sigma; - double rmax = rgrid.back(); - for (int ir = 0; ir != rgrid.size(); ++ir) - { - double delta_r = rgrid[ir] - rmax; - double smooth = prefactor * exp(-delta_r * delta_r / 2.0 / sigma / sigma); - rvalue[ir] *= (1 - smooth); - } + double prefactor = 1.0 / sqrt (2.0 * M_PI) / sigma; + double rmax = rgrid.back (); + for (int ir = 0; ir != rgrid.size (); ++ir) + { + double delta_r = rgrid[ir] - rmax; + double smooth = prefactor * exp (-delta_r * delta_r / 2.0 / sigma / sigma); + rvalue[ir] *= (1 - smooth); + } } -std::vector PswfcRadials::pswfc_prepossess(std::map, std::vector>& lzeta_rvalues, - const double conv_thr) +std::vector + PswfcRadials::pswfc_prepossess (std::map, std::vector>& lzeta_rvalues, + const double conv_thr) { double nmax = 0.0; - for (auto it = lzeta_rvalues.begin(); it != lzeta_rvalues.end(); it++) - { - int l = it->first.first; - int iz = it->first.second; - std::vector rvalue = it->second; - std::vector rgrid = std::vector(rvalue.size(), 0.0); - for (int ir = 0; ir < rvalue.size(); ir++) + for (auto it = lzeta_rvalues.begin (); it != lzeta_rvalues.end (); it++) { - rgrid[ir] = ir * 0.01; + int l = it->first.first; + int iz = it->first.second; + std::vector rvalue = it->second; + std::vector rgrid = std::vector (rvalue.size (), 0.0); + for (int ir = 0; ir < rvalue.size (); ir++) + { + rgrid[ir] = ir * 0.01; + } + double rcut_i = cut_to_convergence (rgrid, rvalue, conv_thr); + if (rvalue.size () > nmax) + { + nmax = rvalue.size (); + } + lzeta_rvalues[it->first] = rvalue; // stores in map } - double rcut_i = cut_to_convergence(rgrid, rvalue, conv_thr); - if (rvalue.size() > nmax) - nmax = rvalue.size(); - lzeta_rvalues[it->first] = rvalue; // stores in map - } // generate rgrid - std::vector rgrid = std::vector(nmax, 0.0); + std::vector rgrid = std::vector (nmax, 0.0); for (int ir = 0; ir < nmax; ir++) - { - rgrid[ir] = ir * 0.01; - } + { + rgrid[ir] = ir * 0.01; + } // zero padding on rvalue - for (auto it = lzeta_rvalues.begin(); it != lzeta_rvalues.end(); it++) - { - int l = it->first.first; - int iz = it->first.second; - std::vector rvalue = it->second; - std::vector rvalue_padded = std::vector(nmax, 0.0); - for (int ir = 0; ir < rvalue.size(); ir++) + for (auto it = lzeta_rvalues.begin (); it != lzeta_rvalues.end (); it++) { - rvalue_padded[ir] = rvalue[ir]; + int l = it->first.first; + int iz = it->first.second; + std::vector rvalue = it->second; + std::vector rvalue_padded = std::vector (nmax, 0.0); + for (int ir = 0; ir < rvalue.size (); ir++) + { + rvalue_padded[ir] = rvalue[ir]; + } + smooth (rgrid, + rvalue_padded, + 0.1); // smooth the radial function to avoid high frequency noise in FFT-spherical bessel transform + lzeta_rvalues[it->first] = rvalue_padded; // stores in map } - smooth(rgrid, - rvalue_padded, - 0.1); // smooth the radial function to avoid high frequency noise in FFT-spherical bessel transform - lzeta_rvalues[it->first] = rvalue_padded; // stores in map - } return rgrid; } -void PswfcRadials::read_upf_pswfc(std::ifstream& ifs, +void + PswfcRadials::read_upf_pswfc (std::ifstream& ifs, const double screening_coeff, const double conv_thr, std::ofstream* ptr_log, @@ -263,142 +294,162 @@ void PswfcRadials::read_upf_pswfc(std::ifstream& ifs, std::vector rgrid; std::vector> rvalues; if (rank == 0) - { - // result is a map from (l, izeta) to rvalue, i.e., from (l,zeta) to exact value of radial function - // it is a temporary container to store the result of pseudowavefunction, next will be transfer to - // ls, izetas, rgrid and rvalues std::vectors and broadcast - std::map, std::vector> result; - - std::string line = ""; - // read element - while (!startswith(line, "element=") && !ifs.eof()) - ifs >> line; - symbol_ = read_keyword_value(ifs, line); - // read lmax - while (!startswith(line, "l_max=") && !ifs.eof()) - ifs >> line; - lmax_ = std::stoi(read_keyword_value(ifs, line)); - // read ngrid - while (!startswith(line, "mesh_size=") && !ifs.eof()) - ifs >> line; - ngrid = std::stoi(read_keyword_value(ifs, line)); - // read nzeta - while (!startswith(line, "number_of_wfc=") && !ifs.eof()) - ifs >> line; - nzeta = std::stoi(read_keyword_value(ifs, line)); - nchi_ = nzeta; - // read contents of pseudowavefunction - while (line != "") - ifs >> line; - nzeta_ = new int[lmax_ + 1]; - for (int il = 0; il < lmax_ + 1; il++) - nzeta_[il] = 0; - for (int iz = 0; iz < nzeta; iz++) // read chi-by-chi - { - // find the next tag - while (!startswith(line, "> line; - // read l - while (!startswith(line, "l=") && !ifs.eof()) - ifs >> line; - int l = std::stoi(read_keyword_value(ifs, line)); - nzeta_[l] += 1; - // to data - while (line != ">" && !ifs.eof()) - ifs >> line; - // before read data, first create container to store - std::vector rvalue = std::vector(ngrid, 0.0); - for (int ir = 0; ir < ngrid; ir++) - { - ifs >> line; - double screening = std::exp(-screening_coeff * ir * dr); - rvalue[ir] = std::stod(line) * screening; - } - result[std::make_pair(l, nzeta_[l] - 1)] = rvalue; - ifs >> line; - assert(startswith(line, "first.first; - int iz = it->first.second; - ls.push_back(l); - izetas.push_back(iz); - rvalues.push_back(it->second); - } - } // rank 0 does almost everything, then broadcast one-by-one + // result is a map from (l, izeta) to rvalue, i.e., from (l,zeta) to exact value of radial function + // it is a temporary container to store the result of pseudowavefunction, next will be transfer to + // ls, izetas, rgrid and rvalues std::vectors and broadcast + std::map, std::vector> result; + + std::string line = ""; + // read element + while (!startswith (line, "element=") && !ifs.eof ()) + { + ifs >> line; + } + symbol_ = read_keyword_value (ifs, line); + // read lmax + while (!startswith (line, "l_max=") && !ifs.eof ()) + { + ifs >> line; + } + lmax_ = std::stoi (read_keyword_value (ifs, line)); + // read ngrid + while (!startswith (line, "mesh_size=") && !ifs.eof ()) + { + ifs >> line; + } + ngrid = std::stoi (read_keyword_value (ifs, line)); + // read nzeta + while (!startswith (line, "number_of_wfc=") && !ifs.eof ()) + { + ifs >> line; + } + nzeta = std::stoi (read_keyword_value (ifs, line)); + nchi_ = nzeta; + // read contents of pseudowavefunction + while (line != "") + { + ifs >> line; + } + nzeta_ = new int[lmax_ + 1]; + for (int il = 0; il < lmax_ + 1; il++) + { + nzeta_[il] = 0; + } + for (int iz = 0; iz < nzeta; iz++) // read chi-by-chi + { + // find the next tag + while (!startswith (line, "> line; + } + // read l + while (!startswith (line, "l=") && !ifs.eof ()) + { + ifs >> line; + } + int l = std::stoi (read_keyword_value (ifs, line)); + nzeta_[l] += 1; + // to data + while (line != ">" && !ifs.eof ()) + { + ifs >> line; + } + // before read data, first create container to store + std::vector rvalue = std::vector (ngrid, 0.0); + for (int ir = 0; ir < ngrid; ir++) + { + ifs >> line; + double screening = std::exp (-screening_coeff * ir * dr); + rvalue[ir] = std::stod (line) * screening; + } + result[std::make_pair (l, nzeta_[l] - 1)] = rvalue; + ifs >> line; + assert (startswith (line, "first.first; + int iz = it->first.second; + ls.push_back (l); + izetas.push_back (iz); + rvalues.push_back (it->second); + } + } // rank 0 does almost everything, then broadcast one-by-one #ifdef __MPI // first broadcast descriptive information to all ranks if (rank == 0) - printf("PswfcRadials: pseudowavefunction read on rank 0, broadcast start.\n"); + printf ("PswfcRadials: pseudowavefunction read on rank 0, broadcast start.\n"); - Parallel_Common::bcast_string(symbol_); - Parallel_Common::bcast_int(lmax_); + Parallel_Common::bcast_string (symbol_); + Parallel_Common::bcast_int (lmax_); - Parallel_Common::bcast_int(nchi_); - Parallel_Common::bcast_int(nzeta_max_); + Parallel_Common::bcast_int (nchi_); + Parallel_Common::bcast_int (nzeta_max_); - Parallel_Common::bcast_int(ngrid); + Parallel_Common::bcast_int (ngrid); // Parallel_Common::bcast_double(dr); // we dont need to broadcast dr again because it is fixed to 0.01 #endif // then adjust and allocate memory for ranks other than 0, according to information broadcasted // from rank0 if (rank != 0) - { - nzeta_ = new int[lmax_ + 1]; - index_map_ = new int[(lmax_ + 1) * nzeta_max_]; - - // decomposed correlated container std::map, std::vector> into three std::vectors, - // additionally with rgrid the r values of pseudowavefunction - ls.resize(nchi_); - izetas.resize(nchi_); - rgrid.resize(ngrid); - rvalues.resize(nchi_); - for (int i = 0; i < nchi_; i++) - rvalues[i].resize(ngrid); - } + { + nzeta_ = new int[lmax_ + 1]; + index_map_ = new int[(lmax_ + 1) * nzeta_max_]; + + // decomposed correlated container std::map, std::vector> into three + // std::vectors, additionally with rgrid the r values of pseudowavefunction + ls.resize (nchi_); + izetas.resize (nchi_); + rgrid.resize (ngrid); + rvalues.resize (nchi_); + for (int i = 0; i < nchi_; i++) + { + rvalues[i].resize (ngrid); + } + } #ifdef __MPI - Parallel_Common::bcast_int(nzeta_, lmax_ + 1); - Parallel_Common::bcast_int(index_map_, (lmax_ + 1) * nzeta_max_); + Parallel_Common::bcast_int (nzeta_, lmax_ + 1); + Parallel_Common::bcast_int (index_map_, (lmax_ + 1) * nzeta_max_); // correlated container bcast - Parallel_Common::bcast_int(ls.data(), nchi_); - Parallel_Common::bcast_int(izetas.data(), nchi_); - Parallel_Common::bcast_double(rgrid.data(), ngrid); + Parallel_Common::bcast_int (ls.data (), nchi_); + Parallel_Common::bcast_int (izetas.data (), nchi_); + Parallel_Common::bcast_double (rgrid.data (), ngrid); for (int i = 0; i < nchi_; i++) - Parallel_Common::bcast_double(rvalues[i].data(), ngrid); + Parallel_Common::bcast_double (rvalues[i].data (), ngrid); if (rank == 0) - printf("PswfcRadials: pseudowavefunction read and broadcast finished on rank 0.\n"); + printf ("PswfcRadials: pseudowavefunction read and broadcast finished on rank 0.\n"); #endif // do the following for all ranks, as if rank 0 chi_ = new NumericalRadial[nchi_]; for (int i = 0; i < nchi_; i++) - { - chi_[index(ls[i], izetas[i])] - .build(ls[i], true, ngrid, rgrid.data(), rvalues[i].data(), 0, izetas[i], symbol_, itype_, false); - if (std::fabs(screening_coeff - 0.0) > 1e-6) // PHYSICAL REVIEW B 78, 245112 2008 { - chi_[index(ls[i], izetas[i])].normalize(); + chi_[index (ls[i], izetas[i])] + .build (ls[i], true, ngrid, rgrid.data (), rvalues[i].data (), 0, izetas[i], symbol_, itype_, false); + if (std::fabs (screening_coeff - 0.0) > 1e-6) // PHYSICAL REVIEW B 78, 245112 2008 + { + chi_[index (ls[i], izetas[i])].normalize (); + } } - } // printf("PswfcRadials: pseudowavefunction read and broadcast finished on rank %d.\n", rank); // nzeta and index_map are not deleted here... } diff --git a/source/source_basis/module_nao/pswfc_radials.h b/source/source_basis/module_nao/pswfc_radials.h index 384c73bdff5..b99af4997de 100644 --- a/source/source_basis/module_nao/pswfc_radials.h +++ b/source/source_basis/module_nao/pswfc_radials.h @@ -5,86 +5,87 @@ #include #include -class PswfcRadials : public RadialSet { - public: - PswfcRadials() {}; - PswfcRadials& operator=(const PswfcRadials& rhs); - PswfcRadials* clone() const { return new PswfcRadials(*this); } - ~PswfcRadials() {}; - /// @brief central function to build RadialCollection from ONCVPSP program generated pseudopotential file - /// @param file file name of pseudopotential file - /// @param itype atomic type, indiced in UnitCell class - /// @param screening_coeff screening coefficient of pseudowavefunction - /// @param conv_thr convergence threshold of norm of pseudowavefunction, see function cut_to_convergence for details - /// @param ptr_log output file stream for logging - /// @param rank MPI rank - void build(const std::string& file = "", - const int itype = 0, - const double screening_coeff = 0.1, - const double conv_thr = 1e-10, - std::ofstream* ptr_log = nullptr, - const int rank = 0); +class PswfcRadials : public RadialSet +{ + public: + PswfcRadials () {}; + PswfcRadials& operator= (const PswfcRadials& rhs); + PswfcRadials* + clone () const + { + return new PswfcRadials (*this); + } + ~PswfcRadials () {}; + /// @brief central function to build RadialCollection from ONCVPSP program generated pseudopotential file + /// @param file file name of pseudopotential file + /// @param itype atomic type, indiced in UnitCell class + /// @param screening_coeff screening coefficient of pseudowavefunction + /// @param conv_thr convergence threshold of norm of pseudowavefunction, see function cut_to_convergence for details + /// @param ptr_log output file stream for logging + /// @param rank MPI rank + void build (const std::string& file = "", + const int itype = 0, + const double screening_coeff = 0.1, + const double conv_thr = 1e-10, + std::ofstream* ptr_log = nullptr, + const int rank = 0); - /// @brief read ONCVPSP program generated pseudopotential file, and store the radial functions in RadialCollection - /// @param ifs input file stream from orbital file - /// @param screening_coeff screening coefficient of pseudowavefunction - /// @param conv_thr convergence threshold of norm of pseudowavefunction, see function cut_to_convergence for details - /// @param ptr_log output file stream for logging - /// @param rank MPI rank - void read_upf_pswfc(std::ifstream& ifs, //!< input file stream from orbital file - const double screening_coeff, //!< screening coefficient - const double conv_thr, //!< convergence threshold - std::ofstream* ptr_log = nullptr, //!< output file stream for logging - const int rank = 0 //!< MPI rank - ); + /// @brief read ONCVPSP program generated pseudopotential file, and store the radial functions in RadialCollection + /// @param ifs input file stream from orbital file + /// @param screening_coeff screening coefficient of pseudowavefunction + /// @param conv_thr convergence threshold of norm of pseudowavefunction, see function cut_to_convergence for details + /// @param ptr_log output file stream for logging + /// @param rank MPI rank + void read_upf_pswfc (std::ifstream& ifs, //!< input file stream from orbital file + const double screening_coeff, //!< screening coefficient + const double conv_thr, //!< convergence threshold + std::ofstream* ptr_log = nullptr, //!< output file stream for logging + const int rank = 0 //!< MPI rank + ); - /// @brief returns the norm of the radial function - /// @param rgrid radial grid - /// @param rvalue radial function - /// @return norm of the radial function - double radial_norm(const std::vector rgrid, - const std::vector rvalue); - /// @brief python-like startswith function - /// @param word as it is - /// @param pattern pattern to be matched - /// @return true if word starts with pattern - bool startswith(std::string word, std::string pattern); - /// @brief read value from attributes in HTML-like format - /// @param ifs input file stream - /// @param word as it is - /// @return value of the attribute - std::string read_keyword_value(std::ifstream& ifs, std::string word); - /// @brief steal string from quotes - /// @param word as it is - /// @return string between quotes - std::string steal_from_quotes(std::string word); - /// @brief steal string from quotes - /// @param ifs input file stream - /// @param word as it is - /// @return string between quotes - std::string steal_from_quotes(std::ifstream& ifs, std::string word); + /// @brief returns the norm of the radial function + /// @param rgrid radial grid + /// @param rvalue radial function + /// @return norm of the radial function + double radial_norm (const std::vector rgrid, const std::vector rvalue); + /// @brief python-like startswith function + /// @param word as it is + /// @param pattern pattern to be matched + /// @return true if word starts with pattern + bool startswith (std::string word, std::string pattern); + /// @brief read value from attributes in HTML-like format + /// @param ifs input file stream + /// @param word as it is + /// @return value of the attribute + std::string read_keyword_value (std::ifstream& ifs, std::string word); + /// @brief steal string from quotes + /// @param word as it is + /// @return string between quotes + std::string steal_from_quotes (std::string word); + /// @brief steal string from quotes + /// @param ifs input file stream + /// @param word as it is + /// @return string between quotes + std::string steal_from_quotes (std::ifstream& ifs, std::string word); - /// @brief cut radial function to convergence - /// @param rgrid radial grid - /// @param rvalue radial function - /// @param conv_thr convergence of norm of radial function - /// @return cutoff radius - double cut_to_convergence(const std::vector& rgrid, - std::vector& rvalue, - const double& conv_thr); - /// @brief smooth the radial function to avoid high frequency noise in FFT-spherical bessel transform - /// @param rgrid radial grid - /// @param rvalue radial function - /// @param sigma sigma of the Gaussian kernel - void smooth(std::vector& rgrid, - std::vector& rvalue, - const double sigma = 0.1); - /// @brief call cut_to_convergence for each (l,zeta) corresponding orbital in std::map, then zero-padding to the maximal r, generate a grid - /// @param pswfc_map a map of (l,zeta) corresponding orbital - /// @return a vector of radial grid - std::vector pswfc_prepossess(std::map, std::vector>& pswfc_map, - const double conv_thr = 1e-6); - private: + /// @brief cut radial function to convergence + /// @param rgrid radial grid + /// @param rvalue radial function + /// @param conv_thr convergence of norm of radial function + /// @return cutoff radius + double cut_to_convergence (const std::vector& rgrid, std::vector& rvalue, const double& conv_thr); + /// @brief smooth the radial function to avoid high frequency noise in FFT-spherical bessel transform + /// @param rgrid radial grid + /// @param rvalue radial function + /// @param sigma sigma of the Gaussian kernel + void smooth (std::vector& rgrid, std::vector& rvalue, const double sigma = 0.1); + /// @brief call cut_to_convergence for each (l,zeta) corresponding orbital in std::map, then zero-padding to the + /// maximal r, generate a grid + /// @param pswfc_map a map of (l,zeta) corresponding orbital + /// @return a vector of radial grid + std::vector pswfc_prepossess (std::map, std::vector>& pswfc_map, + const double conv_thr = 1e-6); + private: }; #endif // PSWFC_RADIALS_H_ \ No newline at end of file diff --git a/source/source_basis/module_nao/radial_collection.cpp b/source/source_basis/module_nao/radial_collection.cpp index e0b9cfad056..b6ddac23cfc 100644 --- a/source/source_basis/module_nao/radial_collection.cpp +++ b/source/source_basis/module_nao/radial_collection.cpp @@ -12,38 +12,33 @@ #include "source_basis/module_nao/hydrogen_radials.h" #include "source_basis/module_nao/pswfc_radials.h" -RadialCollection::RadialCollection(const RadialCollection& other) : - ntype_(other.ntype_), - lmax_(other.lmax_), - nchi_(other.nchi_), - nzeta_max_(other.nzeta_max_), - rcut_max_(other.rcut_max_), - radset_(nullptr), - iter_(nullptr), - nl_(nullptr) +RadialCollection::RadialCollection (const RadialCollection& other) + : ntype_ (other.ntype_), lmax_ (other.lmax_), nchi_ (other.nchi_), nzeta_max_ (other.nzeta_max_), + rcut_max_ (other.rcut_max_), radset_ (nullptr), iter_ (nullptr), nl_ (nullptr) { if (ntype_ == 0) - { - return; - } + { + return; + } radset_ = new RadialSet*[ntype_]; for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype] = other.radset_[itype]->clone(); - } + { + radset_[itype] = other.radset_[itype]->clone (); + } - iter_build(); + iter_build (); } -RadialCollection& RadialCollection::operator=(const RadialCollection& rhs) +RadialCollection& + RadialCollection::operator= (const RadialCollection& rhs) { if (&rhs == this) - { - return *this; - } + { + return *this; + } - cleanup(); + cleanup (); ntype_ = rhs.ntype_; lmax_ = rhs.lmax_; @@ -53,41 +48,43 @@ RadialCollection& RadialCollection::operator=(const RadialCollection& rhs) radset_ = new RadialSet*[ntype_]; for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype] = rhs.radset_[itype]->clone(); - } + { + radset_[itype] = rhs.radset_[itype]->clone (); + } - iter_build(); + iter_build (); return *this; } -RadialCollection::~RadialCollection() +RadialCollection::~RadialCollection () { for (int itype = 0; itype < ntype_; ++itype) - { - delete radset_[itype]; - } + { + delete radset_[itype]; + } delete[] radset_; delete[] iter_; // iterator does not control memory; simply delete the pointer array delete[] nl_; } -void RadialCollection::set_rcut_max() +void + RadialCollection::set_rcut_max () { rcut_max_ = 0.0; for (int itype = 0; itype < ntype_; ++itype) - { - rcut_max_ = std::max(rcut_max_, radset_[itype]->rcut_max()); - } + { + rcut_max_ = std::max (rcut_max_, radset_[itype]->rcut_max ()); + } } -void RadialCollection::cleanup() +void + RadialCollection::cleanup () { for (int itype = 0; itype < ntype_; ++itype) - { - delete radset_[itype]; - } + { + delete radset_[itype]; + } delete[] radset_; radset_ = nullptr; @@ -103,7 +100,8 @@ void RadialCollection::cleanup() nzeta_max_ = 0; } -void RadialCollection::iter_build() +void + RadialCollection::iter_build () { /* * collect the addresses of NumericalRadial objects from different RadialSet objects @@ -118,223 +116,233 @@ void RadialCollection::iter_build() iter_ = new const NumericalRadial*[nchi_]; int i = 0; - std::fill(nl_, nl_ + lmax_ + 1, 0); + std::fill (nl_, nl_ + lmax_ + 1, 0); for (int l = 0; l <= lmax_; ++l) - { - for (int itype = 0; itype != ntype_; ++itype) { - for (int izeta = 0; izeta < radset_[itype]->nzeta(l); ++izeta) - { - iter_[i] = &radset_[itype]->chi(l, izeta); - ++i; - ++nl_[l]; - } + for (int itype = 0; itype != ntype_; ++itype) + { + for (int izeta = 0; izeta < radset_[itype]->nzeta (l); ++izeta) + { + iter_[i] = &radset_[itype]->chi (l, izeta); + ++i; + ++nl_[l]; + } + } } - } } -void RadialCollection::build(const int ntype, Numerical_Nonlocal* const nls) +void + RadialCollection::build (const int ntype, Numerical_Nonlocal* const nls) { - cleanup(); + cleanup (); ntype_ = ntype; radset_ = new RadialSet*[ntype_]; for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype] = new BetaRadials; - radset_[itype]->build(nls[itype], itype); + { + radset_[itype] = new BetaRadials; + radset_[itype]->build (nls[itype], itype); - lmax_ = std::max(lmax_, radset_[itype]->lmax()); - nchi_ += radset_[itype]->nchi(); - nzeta_max_ = std::max(nzeta_max_, radset_[itype]->nzeta_max()); - } + lmax_ = std::max (lmax_, radset_[itype]->lmax ()); + nchi_ += radset_[itype]->nchi (); + nzeta_max_ = std::max (nzeta_max_, radset_[itype]->nzeta_max ()); + } - iter_build(); - set_rcut_max(); + iter_build (); + set_rcut_max (); } -void RadialCollection::build(const RadialCollection* nls, const double radius) +void + RadialCollection::build (const RadialCollection* nls, const double radius) { - cleanup(); - this->ntype_ = nls->ntype(); - this->rcut_max_ = radius>0.0?radius:nls->rcut_max(); + cleanup (); + this->ntype_ = nls->ntype (); + this->rcut_max_ = radius > 0.0 ? radius : nls->rcut_max (); this->radset_ = new RadialSet*[ntype_]; - this->lmax_ = nls->lmax(); - this->nchi_ = nls->nchi(); - this->nzeta_max_ = nls->nzeta_max(); + this->lmax_ = nls->lmax (); + this->nchi_ = nls->nchi (); + this->nzeta_max_ = nls->nzeta_max (); for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype] = new AtomicRadials; - static_cast(radset_[itype])->build(nls->radset_[itype], itype, this->rcut_max_); - } + { + radset_[itype] = new AtomicRadials; + static_cast (radset_[itype])->build (nls->radset_[itype], itype, this->rcut_max_); + } - iter_build(); + iter_build (); } -void RadialCollection::build(const int nfile, const std::string* const file, const char ftype) +void + RadialCollection::build (const int nfile, const std::string* const file, const char ftype) { - cleanup(); + cleanup (); ntype_ = nfile; #ifdef __MPI - Parallel_Common::bcast_int(ntype_); + Parallel_Common::bcast_int (ntype_); #endif radset_ = new RadialSet*[ntype_]; char* file_type = new char[ntype_]; if (ftype) - { // simply use the given file type if given - std::fill(file_type, file_type + ntype_, ftype); - } + { // simply use the given file type if given + std::fill (file_type, file_type + ntype_, ftype); + } else - { // otherwise check the file type - for (int itype = 0; itype < ntype_; ++itype) - { - file_type[itype] = check_file_type(file[itype]); + { // otherwise check the file type + for (int itype = 0; itype < ntype_; ++itype) + { + file_type[itype] = check_file_type (file[itype]); + } } - } for (int itype = 0; itype < ntype_; ++itype) - { - switch(file_type[itype]) { - case 'o': // orbital file - radset_[itype] = new AtomicRadials; - break; - case 'c': // coefficient file - radset_[itype] = new SphbesRadials; - break; - default: // not supposed to happend - ModuleBase::WARNING_QUIT("RadialCollection::build", "Unrecognized file: " + file[itype]); + switch (file_type[itype]) + { + case 'o': // orbital file + radset_[itype] = new AtomicRadials; + break; + case 'c': // coefficient file + radset_[itype] = new SphbesRadials; + break; + default: // not supposed to happend + ModuleBase::WARNING_QUIT ("RadialCollection::build", "Unrecognized file: " + file[itype]); + } + radset_[itype]->build (file[itype], itype); } - radset_[itype]->build(file[itype], itype); - } delete[] file_type; for (int itype = 0; itype < ntype_; ++itype) - { - lmax_ = std::max(lmax_, radset_[itype]->lmax()); - nchi_ += radset_[itype]->nchi(); - nzeta_max_ = std::max(nzeta_max_, radset_[itype]->nzeta_max()); - } - - iter_build(); - set_rcut_max(); + { + lmax_ = std::max (lmax_, radset_[itype]->lmax ()); + nchi_ += radset_[itype]->nchi (); + nzeta_max_ = std::max (nzeta_max_, radset_[itype]->nzeta_max ()); + } + + iter_build (); + set_rcut_max (); } -void RadialCollection::build(const int ntype, - const double* const charges, +void + RadialCollection::build (const int ntype, + const double* const charges, const bool with_slater_screening, - const int* const nmax, + const int* const nmax, const std::string* symbols, const double conv_thr, const std::string* strategies, const int& rank) { - cleanup(); + cleanup (); ntype_ = ntype; radset_ = new RadialSet*[ntype_]; for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype] = new HydrogenRadials; - radset_[itype]->build(itype, - charges[itype], - with_slater_screening, - nmax[itype], - 10.0, // rcut should be determined automatically, in principle... - 0.01, - conv_thr, - rank, - symbols[itype], - strategies[itype]); - - lmax_ = std::max(lmax_, radset_[itype]->lmax()); - nchi_ += radset_[itype]->nchi(); - nzeta_max_ = std::max(nzeta_max_, radset_[itype]->nzeta_max()); - } + { + radset_[itype] = new HydrogenRadials; + radset_[itype]->build (itype, + charges[itype], + with_slater_screening, + nmax[itype], + 10.0, // rcut should be determined automatically, in principle... + 0.01, + conv_thr, + rank, + symbols[itype], + strategies[itype]); + + lmax_ = std::max (lmax_, radset_[itype]->lmax ()); + nchi_ += radset_[itype]->nchi (); + nzeta_max_ = std::max (nzeta_max_, radset_[itype]->nzeta_max ()); + } // what are these two functions for? Do I need them? - iter_build(); - set_rcut_max(); + iter_build (); + set_rcut_max (); } -void RadialCollection::build(const int ntype, +void + RadialCollection::build (const int ntype, const std::string* const file, const double* const screening_coeffs, const double conv_thr, const int& rank) { - cleanup(); + cleanup (); ntype_ = ntype; radset_ = new RadialSet*[ntype_]; for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype] = new PswfcRadials; - radset_[itype]->build(file[itype], itype, screening_coeffs[itype], conv_thr, nullptr, rank); + { + radset_[itype] = new PswfcRadials; + radset_[itype]->build (file[itype], itype, screening_coeffs[itype], conv_thr, nullptr, rank); - lmax_ = std::max(lmax_, radset_[itype]->lmax()); - nchi_ += radset_[itype]->nchi(); - nzeta_max_ = std::max(nzeta_max_, radset_[itype]->nzeta_max()); - } + lmax_ = std::max (lmax_, radset_[itype]->lmax ()); + nchi_ += radset_[itype]->nchi (); + nzeta_max_ = std::max (nzeta_max_, radset_[itype]->nzeta_max ()); + } - iter_build(); - set_rcut_max(); + iter_build (); + set_rcut_max (); } -void RadialCollection::build(const int lmax, const int nbes, const double rcut, const double sigma, const double dr) +void + RadialCollection::build (const int lmax, const int nbes, const double rcut, const double sigma, const double dr) { - cleanup(); + cleanup (); ntype_ = 1; radset_ = new RadialSet*[ntype_]; radset_[0] = new SphbesRadials; - radset_[0]->build(lmax, nbes, rcut, sigma, dr); + radset_[0]->build (lmax, nbes, rcut, sigma, dr); lmax_ = lmax; - nchi_ = radset_[0]->nchi(); - nzeta_max_ = radset_[0]->nzeta_max(); + nchi_ = radset_[0]->nchi (); + nzeta_max_ = radset_[0]->nzeta_max (); - iter_build(); - set_rcut_max(); + iter_build (); + set_rcut_max (); } -void RadialCollection::set_transformer(ModuleBase::SphericalBesselTransformer sbt, const int update) +void + RadialCollection::set_transformer (ModuleBase::SphericalBesselTransformer sbt, const int update) { for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype]->set_transformer(sbt, update); - } + { + radset_[itype]->set_transformer (sbt, update); + } } -void RadialCollection::set_grid(const bool for_r_space, const int ngrid, const double* grid, const char mode) +void + RadialCollection::set_grid (const bool for_r_space, const int ngrid, const double* grid, const char mode) { for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype]->set_grid(for_r_space, ngrid, grid, mode); - } - set_rcut_max(); + { + radset_[itype]->set_grid (for_r_space, ngrid, grid, mode); + } + set_rcut_max (); } -void RadialCollection::set_uniform_grid(const bool for_r_space, +void + RadialCollection::set_uniform_grid (const bool for_r_space, const int ngrid, const double cutoff, const char mode, const bool enable_fft) { for (int itype = 0; itype < ntype_; ++itype) - { - radset_[itype]->set_uniform_grid(for_r_space, ngrid, cutoff, mode, enable_fft); - } - set_rcut_max(); + { + radset_[itype]->set_uniform_grid (for_r_space, ngrid, cutoff, mode, enable_fft); + } + set_rcut_max (); } -char RadialCollection::check_file_type(const std::string& file) const +char + RadialCollection::check_file_type (const std::string& file) const { // currently we only support ABACUS numerical atomic orbital file and // SIAB/PTG-generated orbital coefficient file. The latter contains a @@ -348,30 +356,31 @@ char RadialCollection::check_file_type(const std::string& file) const char file_type = 'o'; if (GlobalV::MY_RANK == 0) - { - std::ifstream ifs(file.c_str()); - std::string line; - while (std::getline(ifs, line)) { - if (line.find("symbol() + "_" + appendix + ".orb"; - radset_[itype]->write_abacus_orb(fname); - } + { + std::string fname = radset_[itype]->symbol () + "_" + appendix + ".orb"; + radset_[itype]->write_abacus_orb (fname); + } } diff --git a/source/source_basis/module_nao/radial_collection.h b/source/source_basis/module_nao/radial_collection.h index 6e22bfdd9d9..23109bee72f 100644 --- a/source/source_basis/module_nao/radial_collection.h +++ b/source/source_basis/module_nao/radial_collection.h @@ -17,71 +17,112 @@ class RadialCollection { public: - RadialCollection() = default; - RadialCollection(const RadialCollection& other); ///< deep copy - RadialCollection& operator=(const RadialCollection& rhs); ///< deep copy + RadialCollection () = default; + RadialCollection (const RadialCollection& other); ///< deep copy + RadialCollection& operator= (const RadialCollection& rhs); ///< deep copy - ~RadialCollection(); + ~RadialCollection (); /// Builds the collection from (orbital) files. - void build(const int nfile, const std::string* const file, const char ftype = '\0'); + void build (const int nfile, const std::string* const file, const char ftype = '\0'); /// Builds the collection from Numerical_Nonlocal objects. - void build(const int ntype, Numerical_Nonlocal* const nls); + void build (const int ntype, Numerical_Nonlocal* const nls); /// Builds the collection from RadialCollection objects and another radius cutoff. - void build(const RadialCollection* nls, double radius = 0.0); + void build (const RadialCollection* nls, double radius = 0.0); /// builds the collection from quasi hydrogen radial functions - void build(const int ntype, - const double* const charges, - const bool with_slater_screening, - const int* const nmax, - const std::string* symbols = nullptr, - const double conv_thr = 1e-10, - const std::string* strategies = nullptr, - const int& rank = 0); - + void build (const int ntype, + const double* const charges, + const bool with_slater_screening, + const int* const nmax, + const std::string* symbols = nullptr, + const double conv_thr = 1e-10, + const std::string* strategies = nullptr, + const int& rank = 0); + /// builds the collection from pseudopotential pswfc - void build(const int ntype, - const std::string* const file, - const double* const screening_coeff, - const double conv_thr = 1e-10, - const int& rank = 0); + void build (const int ntype, + const std::string* const file, + const double* const screening_coeff, + const double conv_thr = 1e-10, + const int& rank = 0); /// builds a collection of truncated spherical Bessel functions - void build(const int lmax, - const int nbes, - const double rcut, - const double sigma, - const double dr - ); + void build (const int lmax, const int nbes, const double rcut, const double sigma, const double dr); /** * @name Getters */ ///@{ - const std::string& symbol(const int itype) const { return radset_[itype]->symbol(); } - int ntype() const { return ntype_; } - int lmax(const int itype) const { return radset_[itype]->lmax(); } - int lmax() const { return lmax_; } - double rcut_max(const int itype) const { return radset_[itype]->rcut_max(); } - double rcut_max() const { return rcut_max_; } - int nzeta(const int itype, const int l) const { return radset_[itype]->nzeta(l); } - int nzeta_max(const int itype) const { return radset_[itype]->nzeta_max(); } - int nzeta_max() const { return nzeta_max_; } - int nchi() const { return nchi_; } - int nchi(const int itype) const { return radset_[itype]->nchi(); } + const std::string& + symbol (const int itype) const + { + return radset_[itype]->symbol (); + } + int + ntype () const + { + return ntype_; + } + int + lmax (const int itype) const + { + return radset_[itype]->lmax (); + } + int + lmax () const + { + return lmax_; + } + double + rcut_max (const int itype) const + { + return radset_[itype]->rcut_max (); + } + double + rcut_max () const + { + return rcut_max_; + } + int + nzeta (const int itype, const int l) const + { + return radset_[itype]->nzeta (l); + } + int + nzeta_max (const int itype) const + { + return radset_[itype]->nzeta_max (); + } + int + nzeta_max () const + { + return nzeta_max_; + } + int + nchi () const + { + return nchi_; + } + int + nchi (const int itype) const + { + return radset_[itype]->nchi (); + } - const NumericalRadial& operator()(const int itype, const int l, const int izeta) const + const NumericalRadial& + operator() (const int itype, const int l, const int izeta) const { - assert(itype >= 0 && itype < ntype_); - return radset_[itype]->chi(l, izeta); + assert (itype >= 0 && itype < ntype_); + return radset_[itype]->chi (l, izeta); } - const RadialSet& operator()(const int itype) const + const RadialSet& + operator() (const int itype) const { - assert(itype >= 0 && itype < ntype_); + assert (itype >= 0 && itype < ntype_); return *radset_[itype]; } ///@} @@ -92,30 +133,34 @@ class RadialCollection * Objects are sorted by l first, by itype next, by izeta last. */ ///@{ - const NumericalRadial** cbegin() const + const NumericalRadial** + cbegin () const { - assert(ntype_ > 0); + assert (ntype_ > 0); return iter_; } - const NumericalRadial** cend() const + const NumericalRadial** + cend () const { - assert(ntype_ > 0); + assert (ntype_ > 0); return iter_ + nchi_; } /// *(this->cbegin(l)) returns the address of the first NumericalRadial object with angular momentum l - const NumericalRadial** cbegin(const int l) const + const NumericalRadial** + cbegin (const int l) const { - assert(ntype_ > 0 && l >= 0 && l <= lmax_); - return iter_ + std::accumulate(nl_, nl_ + l, 0); + assert (ntype_ > 0 && l >= 0 && l <= lmax_); + return iter_ + std::accumulate (nl_, nl_ + l, 0); } /// *(this->cend(l)) returns the address of one-past-last NumericalRadial object with angular momentum l - const NumericalRadial** cend(const int l) const + const NumericalRadial** + cend (const int l) const { - assert(ntype_ > 0 && l >= 0 && l <= lmax_); - return iter_ + std::accumulate(nl_, nl_ + l + 1, 0); + assert (ntype_ > 0 && l >= 0 && l <= lmax_); + return iter_ + std::accumulate (nl_, nl_ + l + 1, 0); } ///@} @@ -124,28 +169,28 @@ class RadialCollection */ ///@{ /// Sets a spherical Bessel transformers for all RadialSet objects. - void set_transformer(ModuleBase::SphericalBesselTransformer sbt, const int update = 0); + void set_transformer (ModuleBase::SphericalBesselTransformer sbt, const int update = 0); /// Sets a common grid for all RadialSet objects. - void set_grid(const bool for_r_space, const int ngrid, const double* grid, const char mode = 'i'); + void set_grid (const bool for_r_space, const int ngrid, const double* grid, const char mode = 'i'); /// Sets a common uniform grid for all RadialSet objects. - void set_uniform_grid(const bool for_r_space, - const int ngrid, - const double cutoff, - const char mode = 'i', - const bool enable_fft = false); + void set_uniform_grid (const bool for_r_space, + const int ngrid, + const double cutoff, + const char mode = 'i', + const bool enable_fft = false); ///@} /** * @brief export all RadialSet objects to a file in a given format. - * - * Supported formats: + * + * Supported formats: * - "abacus_orb" (default): ABACUS Numerical atomic orbital format */ - void to_file(const std::string& appendix, ///< file name - const std::string& format = "abacus_orb" ///< file format - ) const; + void to_file (const std::string& appendix, ///< file name + const std::string& format = "abacus_orb" ///< file format + ) const; private: int ntype_ = 0; ///< number of RadialSet in the collection @@ -168,13 +213,13 @@ class RadialCollection int* nl_ = nullptr; /// Deallocates all RadialSet objects and resets all members to default. - void cleanup(); + void cleanup (); /// Builds iter_ from radset_. - void iter_build(); + void iter_build (); /// Finds the maximum cutoff radius among all RadialSet objects and sets rcut_max_ accordingly. - void set_rcut_max(); + void set_rcut_max (); /** * @brief Returns the file type of a given file. @@ -184,7 +229,7 @@ class RadialCollection * * Only rank-0 performs the check; the result is broadcasted to all ranks. */ - char check_file_type(const std::string& file) const; + char check_file_type (const std::string& file) const; }; #endif diff --git a/source/source_basis/module_nao/radial_set.cpp b/source/source_basis/module_nao/radial_set.cpp index eade7423dd8..243a25574be 100644 --- a/source/source_basis/module_nao/radial_set.cpp +++ b/source/source_basis/module_nao/radial_set.cpp @@ -10,48 +10,42 @@ // FIXME: should update with pyabacus // #include "source_io/orb_io.h" -RadialSet::~RadialSet() +RadialSet::~RadialSet () { delete[] nzeta_; delete[] chi_; delete[] index_map_; } -RadialSet::RadialSet(const RadialSet& other) : - symbol_(other.symbol_), - itype_(other.itype_), - lmax_(other.lmax_), - rcut_max_(other.rcut_max_), - nzeta_(nullptr), - nzeta_max_(other.nzeta_max_), - nchi_(other.nchi_), - chi_(nullptr), - index_map_(nullptr) +RadialSet::RadialSet (const RadialSet& other) + : symbol_ (other.symbol_), itype_ (other.itype_), lmax_ (other.lmax_), rcut_max_ (other.rcut_max_), + nzeta_ (nullptr), nzeta_max_ (other.nzeta_max_), nchi_ (other.nchi_), chi_ (nullptr), index_map_ (nullptr) { if (nchi_ == 0) - { - return; - } + { + return; + } nzeta_ = new int[lmax_ + 1]; - std::memcpy(nzeta_, other.nzeta_, (lmax_ + 1) * sizeof(int)); + std::memcpy (nzeta_, other.nzeta_, (lmax_ + 1) * sizeof (int)); index_map_ = new int[(lmax_ + 1) * nzeta_max_]; - std::memcpy(index_map_, other.index_map_, (lmax_ + 1) * nzeta_max_ * sizeof(int)); + std::memcpy (index_map_, other.index_map_, (lmax_ + 1) * nzeta_max_ * sizeof (int)); chi_ = new NumericalRadial[nchi_]; for (int i = 0; i < nchi_; i++) - { - chi_[i] = other.chi_[i]; // deep copy - } + { + chi_[i] = other.chi_[i]; // deep copy + } } -RadialSet& RadialSet::operator=(const RadialSet& rhs) +RadialSet& + RadialSet::operator= (const RadialSet& rhs) { if (&rhs == this) - { - return *this; - } + { + return *this; + } symbol_ = rhs.symbol_; itype_ = rhs.itype_; @@ -68,116 +62,125 @@ RadialSet& RadialSet::operator=(const RadialSet& rhs) index_map_ = nullptr; if (nchi_ > 0) - { - nzeta_ = new int[lmax_ + 1]; - std::memcpy(nzeta_, rhs.nzeta_, (lmax_ + 1) * sizeof(int)); + { + nzeta_ = new int[lmax_ + 1]; + std::memcpy (nzeta_, rhs.nzeta_, (lmax_ + 1) * sizeof (int)); - index_map_ = new int[(lmax_ + 1) * nzeta_max_]; - std::memcpy(index_map_, rhs.index_map_, (lmax_ + 1) * nzeta_max_ * sizeof(int)); + index_map_ = new int[(lmax_ + 1) * nzeta_max_]; + std::memcpy (index_map_, rhs.index_map_, (lmax_ + 1) * nzeta_max_ * sizeof (int)); - chi_ = new NumericalRadial[nchi_]; - for (int i = 0; i < nchi_; i++) - { - chi_[i] = rhs.chi_[i]; // deep copy + chi_ = new NumericalRadial[nchi_]; + for (int i = 0; i < nchi_; i++) + { + chi_[i] = rhs.chi_[i]; // deep copy + } } - } return *this; } -void RadialSet::to_numerical_orbital(Numerical_Orbital& no, const int nk_legacy, const double lcao_dk) const +void + RadialSet::to_numerical_orbital (Numerical_Orbital& no, const int nk_legacy, const double lcao_dk) const { - no.chi().clear(); - no.chi().resize(nchi_); + no.chi ().clear (); + no.chi ().resize (nchi_); for (int i = 0; i < nchi_; i++) - { - chi_[i].to_numerical_orbital_lm(no.chi()[i], nk_legacy, lcao_dk); - } + { + chi_[i].to_numerical_orbital_lm (no.chi ()[i], nk_legacy, lcao_dk); + } - no.set_orbital_info(itype_, symbol_, lmax_, nzeta_, nchi_); + no.set_orbital_info (itype_, symbol_, lmax_, nzeta_, nchi_); } -void RadialSet::set_rcut_max() +void + RadialSet::set_rcut_max () { rcut_max_ = 0.0; for (int i = 0; i < nchi_; ++i) - { - rcut_max_ = std::max(rcut_max_, chi_[i].rcut()); - } + { + rcut_max_ = std::max (rcut_max_, chi_[i].rcut ()); + } } -int RadialSet::index(const int l, const int izeta) const +int + RadialSet::index (const int l, const int izeta) const { #ifdef __DEBUG - assert(l >= 0 && l <= lmax_); - assert(izeta >= 0 && izeta < nzeta_[l]); + assert (l >= 0 && l <= lmax_); + assert (izeta >= 0 && izeta < nzeta_[l]); #endif return index_map_[l * nzeta_max_ + izeta]; } -void RadialSet::indexing() +void + RadialSet::indexing () { if (!nzeta_) - { - return; - } + { + return; + } #ifdef __DEBUG - assert(lmax_ >= 0); + assert (lmax_ >= 0); #endif delete[] index_map_; index_map_ = new int[(lmax_ + 1) * nzeta_max_]; int index_chi = 0; for (int l = 0; l <= lmax_; ++l) - { - for (int izeta = 0; izeta != nzeta_max_; ++izeta) { - index_map_[l * nzeta_max_ + izeta] = izeta >= nzeta_[l] ? -1 : index_chi++; + for (int izeta = 0; izeta != nzeta_max_; ++izeta) + { + index_map_[l * nzeta_max_ + izeta] = izeta >= nzeta_[l] ? -1 : index_chi++; + } } - } } -const NumericalRadial& RadialSet::chi(const int l, const int izeta) const +const NumericalRadial& + RadialSet::chi (const int l, const int izeta) const { int i = index_map_[l * nzeta_max_ + izeta]; #ifdef __DEBUG - assert(i >= 0 && i < nchi_); + assert (i >= 0 && i < nchi_); #endif return chi_[i]; } -void RadialSet::set_transformer(ModuleBase::SphericalBesselTransformer sbt, const int update) +void + RadialSet::set_transformer (ModuleBase::SphericalBesselTransformer sbt, const int update) { for (int i = 0; i < nchi_; i++) - { - chi_[i].set_transformer(sbt, update); - } + { + chi_[i].set_transformer (sbt, update); + } } -void RadialSet::set_grid(const bool for_r_space, const int ngrid, const double* grid, const char mode) +void + RadialSet::set_grid (const bool for_r_space, const int ngrid, const double* grid, const char mode) { for (int i = 0; i < nchi_; i++) - { - chi_[i].set_grid(for_r_space, ngrid, grid, mode); - } - set_rcut_max(); + { + chi_[i].set_grid (for_r_space, ngrid, grid, mode); + } + set_rcut_max (); } -void RadialSet::set_uniform_grid(const bool for_r_space, +void + RadialSet::set_uniform_grid (const bool for_r_space, const int ngrid, const double cutoff, const char mode, const bool enable_fft) { for (int i = 0; i < nchi_; i++) - { - chi_[i].set_uniform_grid(for_r_space, ngrid, cutoff, mode, enable_fft); - } - set_rcut_max(); + { + chi_[i].set_uniform_grid (for_r_space, ngrid, cutoff, mode, enable_fft); + } + set_rcut_max (); } -void RadialSet::cleanup() +void + RadialSet::cleanup () { symbol_ = ""; itype_ = 0; @@ -195,64 +198,65 @@ void RadialSet::cleanup() index_map_ = nullptr; } -void RadialSet::write_abacus_orb(const std::string& file_name, const int rank) const +void + RadialSet::write_abacus_orb (const std::string& file_name, const int rank) const { std::ofstream file_to; - file_to.open(file_name, std::ios::out); + file_to.open (file_name, std::ios::out); std::vector sublayers = {"S", "P", "D", "F", "G", "H", "I", "J", "K"}; - if(file_to.good()) - { - for(int i = 0; i < 75; ++i) - { - file_to << "-"; - } - file_to << std::endl; - // left aligned - file_to << std::left << std::setw(28) << "Element" << symbol_ << std::endl; - file_to << std::left << std::setw(28) << "Energy Cutoff(Ry)" << std::to_string(int(100.0)) << std::endl; - // rcut .1f, not scientific - file_to << std::left << std::setw(28) << "Radius Cutoff(a.u.)" - << std::fixed << std::setprecision(1) << rcut_max_ << std::endl; - file_to << std::left << std::setw(28) << "Lmax" << lmax_ << std::endl; - for(int l = 0; l <= lmax_; ++l) - { - std::string title = "Number of " + sublayers[l] + "orbital-->"; - file_to << std::left << std::setw(28) << title << nzeta_[l] << std::endl; - } - for(int i = 0; i < 75; ++i) + if (file_to.good ()) { - file_to << "-"; - } - file_to << std::endl; - file_to << "SUMMARY END\n\n"; - file_to << std::left << std::setw(28) << "Mesh" << std::setprecision(0) << int(rcut_max_/0.01) + 1 << std::endl; - file_to << std::left << std::setw(28) << "dr" << std::setprecision(2) << 0.01 << std::endl; + for (int i = 0; i < 75; ++i) + { + file_to << "-"; + } + file_to << std::endl; + // left aligned + file_to << std::left << std::setw (28) << "Element" << symbol_ << std::endl; + file_to << std::left << std::setw (28) << "Energy Cutoff(Ry)" << std::to_string (int (100.0)) << std::endl; + // rcut .1f, not scientific + file_to << std::left << std::setw (28) << "Radius Cutoff(a.u.)" << std::fixed << std::setprecision (1) + << rcut_max_ << std::endl; + file_to << std::left << std::setw (28) << "Lmax" << lmax_ << std::endl; + for (int l = 0; l <= lmax_; ++l) + { + std::string title = "Number of " + sublayers[l] + "orbital-->"; + file_to << std::left << std::setw (28) << title << nzeta_[l] << std::endl; + } + for (int i = 0; i < 75; ++i) + { + file_to << "-"; + } + file_to << std::endl; + file_to << "SUMMARY END\n\n"; + file_to << std::left << std::setw (28) << "Mesh" << std::setprecision (0) << int (rcut_max_ / 0.01) + 1 + << std::endl; + file_to << std::left << std::setw (28) << "dr" << std::setprecision (2) << 0.01 << std::endl; - for(int l = 0; l <= lmax_; l++) - { - for(int izeta = 0; izeta < nzeta_[l]; izeta++) - { - file_to << std::right << std::setw(20) << "Type" - << std::right << std::setw(20) << "L" - << std::right << std::setw(20) << "N" << std::endl; - file_to << std::right << std::setw(20) << std::to_string(0) - << std::right << std::setw(20) << std::to_string(l) - << std::right << std::setw(20) << std::to_string(izeta); - for(int i = 0; i < int(rcut_max_/0.01) + 1; i++) + for (int l = 0; l <= lmax_; l++) { - if(i % 4 == 0) - { - file_to << std::endl; - } - file_to << std::left << std::setw(22) << std::setprecision(14) << std::scientific - << chi_[index(l, izeta)].rvalue()[i]; + for (int izeta = 0; izeta < nzeta_[l]; izeta++) + { + file_to << std::right << std::setw (20) << "Type" << std::right << std::setw (20) << "L" + << std::right << std::setw (20) << "N" << std::endl; + file_to << std::right << std::setw (20) << std::to_string (0) << std::right + << std::setw (20) << std::to_string (l) << std::right << std::setw (20) + << std::to_string (izeta); + for (int i = 0; i < int (rcut_max_ / 0.01) + 1; i++) + { + if (i % 4 == 0) + { + file_to << std::endl; + } + file_to << std::left << std::setw (22) << std::setprecision (14) << std::scientific + << chi_[index (l, izeta)].rvalue ()[i]; + } + file_to << std::endl; + } } - file_to << std::endl; - } } - } - file_to.close(); + file_to.close (); } // FIXME: should update with pyabacus diff --git a/source/source_basis/module_nao/radial_set.h b/source/source_basis/module_nao/radial_set.h index f03f7f34b82..76bc9cdb82c 100644 --- a/source/source_basis/module_nao/radial_set.h +++ b/source/source_basis/module_nao/radial_set.h @@ -25,12 +25,12 @@ class RadialSet { public: - RadialSet() = default; - RadialSet(const RadialSet&); ///< deep copy - RadialSet& operator=(const RadialSet&); ///< deep copy - virtual RadialSet* clone() const = 0; ///< for polymorphic copy + RadialSet () = default; + RadialSet (const RadialSet&); ///< deep copy + RadialSet& operator= (const RadialSet&); ///< deep copy + virtual RadialSet* clone () const = 0; ///< for polymorphic copy - virtual ~RadialSet(); + virtual ~RadialSet (); /** * @name Builders. @@ -53,11 +53,14 @@ class RadialSet * Currently only AtomicRadials objects are supposed to used this * interface. */ - virtual void build(const std::string&, ///< file name - const int = 0, ///< the element index in calculation - std::ofstream* const = nullptr, ///< output file stream for logging - const int = 0 ///< MPI rank - ) {} + virtual void + build (const std::string&, ///< file name + const int = 0, ///< the element index in calculation + std::ofstream* const = nullptr, ///< output file stream for logging + const int = 0 ///< MPI rank + ) + { + } /** * @brief Builds from a Numerical_Nonlocal object. @@ -65,10 +68,13 @@ class RadialSet * Currently nonlocal projectors are read in source_cell and passed * to Numerical_Nonlocal objects. */ - virtual void build(const Numerical_Nonlocal&, ///< Numerical_Nonlocal object - const int = 0, ///< the element index in calculation - std::ofstream* const = nullptr ///< output file stream for logging - ) {} + virtual void + build (const Numerical_Nonlocal&, ///< Numerical_Nonlocal object + const int = 0, ///< the element index in calculation + std::ofstream* const = nullptr ///< output file stream for logging + ) + { + } /** * @brief Builds from hydrogen-like radial functions. @@ -76,18 +82,21 @@ class RadialSet * Currently only HydrogenRadials objects are supposed to used this * interface. */ - virtual void build(const int = 0, ///< the element index in calculation - const double = 1.0, ///< nuclear charge - const bool = false, ///< whether to include Slater screening - const int = 0, ///< maximal principal quantum number or electrons - const double = 10.0, ///< maximal radius - const double = 0.01, ///< radial grid step - const double = 1e-6, ///< convergence threshold for norm of pseudowavefunction - const int = 0, ///< MPI rank - const std::string = "", ///< the name of the element - const std::string = "minimal-valence", ///< the strategy to generate whole set of radial functions - std::ofstream* const = nullptr ///< output file stream for logging - ) {} + virtual void + build (const int = 0, ///< the element index in calculation + const double = 1.0, ///< nuclear charge + const bool = false, ///< whether to include Slater screening + const int = 0, ///< maximal principal quantum number or electrons + const double = 10.0, ///< maximal radius + const double = 0.01, ///< radial grid step + const double = 1e-6, ///< convergence threshold for norm of pseudowavefunction + const int = 0, ///< MPI rank + const std::string = "", ///< the name of the element + const std::string = "minimal-valence", ///< the strategy to generate whole set of radial functions + std::ofstream* const = nullptr ///< output file stream for logging + ) + { + } /** * @brief Builds from pseudopotential file @@ -95,13 +104,16 @@ class RadialSet * Currently only PswfcRadials objects are supposed to used this * interface. */ - virtual void build(const std::string&, ///< file name - const int, ///< the element index in calculation - const double, ///< radial screening coefficient, sometimes to be 0.1 - const double, ///< convergence threshold of norm of pseudowavefunction - std::ofstream* const = nullptr, ///< output file stream for logging - const int = 0 ///< MPI rank - ) {} + virtual void + build (const std::string&, ///< file name + const int, ///< the element index in calculation + const double, ///< radial screening coefficient, sometimes to be 0.1 + const double, ///< convergence threshold of norm of pseudowavefunction + std::ofstream* const = nullptr, ///< output file stream for logging + const int = 0 ///< MPI rank + ) + { + } /** * @brief Builds with the truncated spherical Bessel functions. @@ -110,29 +122,29 @@ class RadialSet * spherical Bessel functions. * See the derived class "SphbesRadials" for the implementation. */ - virtual void build(const int, ///< maximum angular momentum - const int, ///< number of spherical Bessel functions - const double, ///< cutoff radius - const double = 0.1, ///< smoothing parameter - const double = 0.01, ///< radial grid spacing - const int = 0, ///< element index - std::ofstream* = nullptr, ///< output file stream for logging - const int = 0 ///< MPI rank + virtual void build (const int, ///< maximum angular momentum + const int, ///< number of spherical Bessel functions + const double, ///< cutoff radius + const double = 0.1, ///< smoothing parameter + const double = 0.01, ///< radial grid spacing + const int = 0, ///< element index + std::ofstream* = nullptr, ///< output file stream for logging + const int = 0 ///< MPI rank ) {}; /** * @brief write any RadialSet object to a file in ABACUS numerical atomic orbital format. - * + * * This function will write any RadialSet object to file in ABACUS numerical atomic orbital format. - * However its counterparts, - * `read_abacus_orb()` is in AtomicRadials, + * However its counterparts, + * `read_abacus_orb()` is in AtomicRadials, * `read_beta_upf100()` and `read_beta_upf201()` are in BetaRadials, - * `read_upf_pswfc()` is in PswfcRadials, - * `read_coeff()` is in SphbesRadials, + * `read_upf_pswfc()` is in PswfcRadials, + * `read_coeff()` is in SphbesRadials, * due to read-in procedures always vary from one to another. */ - void write_abacus_orb(const std::string&, ///< file name - const int = 0) const; ///< MPI rank + void write_abacus_orb (const std::string&, ///< file name + const int = 0) const; ///< MPI rank ///@} @@ -141,27 +153,62 @@ class RadialSet * * This function provides an interface to the corresponding object in the old module_ao. */ - virtual void to_numerical_orbital(Numerical_Orbital&, - const int nk_legacy = 4005, // equivalent to lcao_ecut = 1600 - const double lcao_dk = 0.01 - ) const; + virtual void to_numerical_orbital (Numerical_Orbital&, + const int nk_legacy = 4005, // equivalent to lcao_ecut = 1600 + const double lcao_dk = 0.01) const; /** * @name Getters */ ///@{ - const std::string& symbol() const { return symbol_; } - int itype() const { return itype_; } - int lmax() const { return lmax_; } - double rcut_max() const { return rcut_max_; } - - int nzeta(const int l) const { return (l >= 0 && l <= lmax_) ? nzeta_[l] : 0; } - int nzeta_max() const { return nzeta_max_; } - int nchi() const { return nchi_; } - - const NumericalRadial& chi(const int l, const int izeta) const; - const NumericalRadial* cbegin() const { return chi_; } - const NumericalRadial* cend() const { return chi_ + nchi_; } + const std::string& + symbol () const + { + return symbol_; + } + int + itype () const + { + return itype_; + } + int + lmax () const + { + return lmax_; + } + double + rcut_max () const + { + return rcut_max_; + } + + int + nzeta (const int l) const + { + return (l >= 0 && l <= lmax_) ? nzeta_[l] : 0; + } + int + nzeta_max () const + { + return nzeta_max_; + } + int + nchi () const + { + return nchi_; + } + + const NumericalRadial& chi (const int l, const int izeta) const; + const NumericalRadial* + cbegin () const + { + return chi_; + } + const NumericalRadial* + cend () const + { + return chi_ + nchi_; + } ///@} /** @@ -169,28 +216,28 @@ class RadialSet */ ///@{ /// Sets a spherical Bessel transformers for all NumericalRadial objects. - void set_transformer(ModuleBase::SphericalBesselTransformer sbt, const int update = 0); + void set_transformer (ModuleBase::SphericalBesselTransformer sbt, const int update = 0); /// Sets a common grid for all NumericalRadial objects. - void set_grid(const bool for_r_space, const int ngrid, const double* grid, const char mode = 'i'); + void set_grid (const bool for_r_space, const int ngrid, const double* grid, const char mode = 'i'); /// Sets a common uniform grid for all NumericalRadial objects. - void set_uniform_grid(const bool for_r_space, - const int ngrid, - const double cutoff, - const char mode = 'i', - const bool enable_fft = false); + void set_uniform_grid (const bool for_r_space, + const int ngrid, + const double cutoff, + const char mode = 'i', + const bool enable_fft = false); ///@} protected: - std::string symbol_ = ""; ///< usually the chemical symbol - int itype_ = 0; ///< usually the index for element in calculation - int lmax_ = -1; ///< maximum angular momentum among all NumericalRadial objects - double rcut_max_ = 0.0; ///< maximum rcut (NOT rmax!) among all NumericalRadial objects + std::string symbol_ = ""; ///< usually the chemical symbol + int itype_ = 0; ///< usually the index for element in calculation + int lmax_ = -1; ///< maximum angular momentum among all NumericalRadial objects + double rcut_max_ = 0.0; ///< maximum rcut (NOT rmax!) among all NumericalRadial objects - int* nzeta_ = nullptr; ///< number of NumericalRadial objects for each angular momentum - int nzeta_max_ = 0; ///< maximum number of NumericalRadial objects among each angular momentum - int nchi_ = 0; ///< total number of NumericalRadial objects + int* nzeta_ = nullptr; ///< number of NumericalRadial objects for each angular momentum + int nzeta_max_ = 0; ///< maximum number of NumericalRadial objects among each angular momentum + int nchi_ = 0; ///< total number of NumericalRadial objects NumericalRadial* chi_ = nullptr; ///< array of NumericalRadial objects @@ -203,16 +250,16 @@ class RadialSet int* index_map_ = nullptr; /// Deallocates memory and reset all class members to default values. - void cleanup(); + void cleanup (); /// Gets the index in chi_ array from (l,izeta). - int index(const int l, const int izeta) const; + int index (const int l, const int izeta) const; /// Builds index_map_ from nzeta_, nzeta_max_ and lmax_. - void indexing(); + void indexing (); /// Sets rcut_max_ to be the maximum rcut of all NumericalRadial objects. - void set_rcut_max(); + void set_rcut_max (); }; #endif diff --git a/source/source_basis/module_nao/real_gaunt_table.cpp b/source/source_basis/module_nao/real_gaunt_table.cpp index e6b94cb76e7..4cfc2e2e979 100644 --- a/source/source_basis/module_nao/real_gaunt_table.cpp +++ b/source/source_basis/module_nao/real_gaunt_table.cpp @@ -6,17 +6,18 @@ #include "source_base/constants.h" -void RealGauntTable::build(const int lmax) +void + RealGauntTable::build (const int lmax) { #ifdef __DEBUG - assert( lmax >= 0 ); + assert (lmax >= 0); #endif // do nothing if lmax < lmax_ -- or shall we shrink the map & tensor? if (lmax <= lmax_) - { - return; - } + { + return; + } // TODO // If the table already exists and lmax is larger than the current lmax_, @@ -24,217 +25,236 @@ void RealGauntTable::build(const int lmax) // build the standard Gaunt table (with symmetry & selection rule considered) for (int l1 = 0; l1 <= 2 * lmax; ++l1) - { - for (int l2 = 0; l2 <= l1; ++l2) { - for (int l3 = l1 - l2; l3 <= l2; l3 += 2) - { - for (int m3 = 0; m3 <= l3; ++m3) + for (int l2 = 0; l2 <= l1; ++l2) { - int m2_max = (l1 - m3) > l2 ? l2 : l1 - m3; // ensure min(m1) >= l1 - for (int m2 = -l2; m2 <= m2_max; ++m2) - { - int m1 = -m2 - m3; - gaunt_table_[ std::array({l1, l2, l3, m1, m2, m3}) ] = gaunt(l1, l2, l3, m1, m2, m3); - } + for (int l3 = l1 - l2; l3 <= l2; l3 += 2) + { + for (int m3 = 0; m3 <= l3; ++m3) + { + int m2_max = (l1 - m3) > l2 ? l2 : l1 - m3; // ensure min(m1) >= l1 + for (int m2 = -l2; m2 <= m2_max; ++m2) + { + int m1 = -m2 - m3; + gaunt_table_[std::array ({l1, l2, l3, m1, m2, m3})] + = gaunt (l1, l2, l3, m1, m2, m3); + } + } + } } - } } - } lmax_ = lmax; // build the real Gaunt table from tabulated standard Gaunt coefficients // this real Gaunt table is supposed to be used in two-center integrals, so the maximum // l of the third dimension is twice as large as the maximum l of the first two dimensions - real_gaunt_table_.resize({ (lmax + 1) * (lmax + 1), (lmax + 1) * (lmax + 1), (2 * lmax + 1) * (2 * lmax + 1) }); - real_gaunt_table_.zero(); + real_gaunt_table_.resize ({(lmax + 1) * (lmax + 1), (lmax + 1) * (lmax + 1), (2 * lmax + 1) * (2 * lmax + 1)}); + real_gaunt_table_.zero (); for (int l1 = 0; l1 <= lmax; ++l1) - { - for (int m1 = -l1; m1 <= l1; ++m1) { - int index1 = index_map(l1, m1); - for (int l2 = 0; l2 <= lmax; ++l2) - { - for (int m2 = -l2; m2 <= l2; ++m2) + for (int m1 = -l1; m1 <= l1; ++m1) { - int index2 = index_map(l2, m2); - for (int l3 = std::abs(l1 - l2); l3 <= l1 + l2; l3 += 2) - { - for (int m3 = -l3; m3 <= l3; ++m3) + int index1 = index_map (l1, m1); + for (int l2 = 0; l2 <= lmax; ++l2) { - int index3 = index_map(l3, m3); - real_gaunt_table_.get_value(index1, index2, index3) - = real_gaunt_lookup(l1, l2, l3, m1, m2, m3); + for (int m2 = -l2; m2 <= l2; ++m2) + { + int index2 = index_map (l2, m2); + for (int l3 = std::abs (l1 - l2); l3 <= l1 + l2; l3 += 2) + { + for (int m3 = -l3; m3 <= l3; ++m3) + { + int index3 = index_map (l3, m3); + real_gaunt_table_.get_value (index1, index2, index3) + = real_gaunt_lookup (l1, l2, l3, m1, m2, m3); + } + } + } } - } } - } } - } } -const double& RealGauntTable::operator()(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const +const double& + RealGauntTable::operator() (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) + const { #ifdef __DEBUG - assert( is_valid_lm(l1, l2, l3, m1, m2, m3) ); - assert( l1 <= lmax_ && l2 <= lmax_ && l3 <= 2 * lmax_ ); + assert (is_valid_lm (l1, l2, l3, m1, m2, m3)); + assert (l1 <= lmax_ && l2 <= lmax_ && l3 <= 2 * lmax_); #endif - return real_gaunt_table_.get_value(index_map(l1, m1), index_map(l2, m2), index_map(l3, m3)); + return real_gaunt_table_.get_value (index_map (l1, m1), index_map (l2, m2), index_map (l3, m3)); } -double RealGauntTable::real_gaunt_lookup(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const +double + RealGauntTable::real_gaunt_lookup (const int l1, + const int l2, + const int l3, + const int m1, + const int m2, + const int m3) const { // This function calculates and returns the Gaunt coefficients of real spherical harmonics // from tabulated standard Gaunt coefficients. #ifdef __DEBUG - assert( is_valid_lm(l1, l2, l3, m1, m2, m3) ); - assert( l1 <= lmax_ && l2 <= lmax_ && l3 <= 2 * lmax_ ); + assert (is_valid_lm (l1, l2, l3, m1, m2, m3)); + assert (l1 <= lmax_ && l2 <= lmax_ && l3 <= 2 * lmax_); #endif - if ( !gaunt_select_l(l1, l2, l3) || !real_gaunt_select_m(m1, m2, m3) ) - { - return 0.0; - } - - std::array m = {std::abs(m1), std::abs(m2), std::abs(m3)}; - int& m_absmax = *std::max_element(m.begin(), m.end()); - - if ( m1 == 0 || m2 == 0 || m3 == 0 ) - { - m_absmax = -m_absmax; - return minus_1_pow(m_absmax) * gaunt_lookup(l1, l2, l3, m[0], m[1], m[2]); - } - else if ( m1 + m2 + m3 == 0 ) - { - return ModuleBase::SQRT2 / 2.0 * minus_1_pow(m_absmax + 1) * gaunt_lookup(l1, l2, l3, m1, m2, m3); - } + if (!gaunt_select_l (l1, l2, l3) || !real_gaunt_select_m (m1, m2, m3)) + { + return 0.0; + } + + std::array m = {std::abs (m1), std::abs (m2), std::abs (m3)}; + int& m_absmax = *std::max_element (m.begin (), m.end ()); + + if (m1 == 0 || m2 == 0 || m3 == 0) + { + m_absmax = -m_absmax; + return minus_1_pow (m_absmax) * gaunt_lookup (l1, l2, l3, m[0], m[1], m[2]); + } + else if (m1 + m2 + m3 == 0) + { + return ModuleBase::SQRT2 / 2.0 * minus_1_pow (m_absmax + 1) * gaunt_lookup (l1, l2, l3, m1, m2, m3); + } else - { - m_absmax = -m_absmax; - return ModuleBase::SQRT2 / 2.0 * minus_1_pow(m_absmax) * gaunt_lookup(l1, l2, l3, m[0], m[1], m[2]); - } + { + m_absmax = -m_absmax; + return ModuleBase::SQRT2 / 2.0 * minus_1_pow (m_absmax) * gaunt_lookup (l1, l2, l3, m[0], m[1], m[2]); + } } -double RealGauntTable::gaunt(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const +double + RealGauntTable::gaunt (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const { // This function computes the Gaunt coefficients from the Wigner-3j expression #ifdef __DEBUG - assert( is_valid_lm(l1, l2, l3, m1, m2, m3) ); + assert (is_valid_lm (l1, l2, l3, m1, m2, m3)); #endif - if ( !gaunt_select_l(l1, l2, l3) || !gaunt_select_m(m1, m2, m3) ) - { - return 0.0; - } + if (!gaunt_select_l (l1, l2, l3) || !gaunt_select_m (m1, m2, m3)) + { + return 0.0; + } int g = (l1 + l2 + l3) / 2; - double pref = std::sqrt( (2 * l1 + 1) * (2 * l2 + 1) * (2 * l3 + 1) / ModuleBase::FOUR_PI); - double tri = std::sqrt( factorial(l1 + l2 - l3) * factorial(l2 + l3 - l1) * factorial(l3 + l1 - l2) - / factorial(l1 + l2 + l3 + 1) ); + double pref = std::sqrt ((2 * l1 + 1) * (2 * l2 + 1) * (2 * l3 + 1) / ModuleBase::FOUR_PI); + double tri = std::sqrt (factorial (l1 + l2 - l3) * factorial (l2 + l3 - l1) * factorial (l3 + l1 - l2) + / factorial (l1 + l2 + l3 + 1)); // wigner3j(l1,l2,l3,0,0,0) - double wigner1 = minus_1_pow(g) * tri * factorial(g) / factorial(g - l1) / factorial(g - l2) / factorial(g - l3); + double wigner1 + = minus_1_pow (g) * tri * factorial (g) / factorial (g - l1) / factorial (g - l2) / factorial (g - l3); // wigner3j(l1,l2,l3,m1,m2,m3) - int kmin = std::max(l2 - l3 - m1, l1 - l3 + m2); - kmin = std::max(kmin, 0); + int kmin = std::max (l2 - l3 - m1, l1 - l3 + m2); + kmin = std::max (kmin, 0); - int kmax = std::min(l1 - m1, l2 + m2); - kmax = std::min(kmax, l1 + l2 - l3); + int kmax = std::min (l1 - m1, l2 + m2); + kmax = std::min (kmax, l1 + l2 - l3); double wigner2 = 0.0; for (int k = kmin; k <= kmax; ++k) - { - wigner2 += minus_1_pow(k) / factorial(k) / factorial(l1 - m1 - k) / factorial(l2 + m2 - k) - / factorial(l3 - l2 + m1 + k) / factorial(l3 - l1 - m2 + k) / factorial(l1 + l2 - l3 - k); - } + { + wigner2 += minus_1_pow (k) / factorial (k) / factorial (l1 - m1 - k) / factorial (l2 + m2 - k) + / factorial (l3 - l2 + m1 + k) / factorial (l3 - l1 - m2 + k) / factorial (l1 + l2 - l3 - k); + } - wigner2 *= tri * minus_1_pow(l1 - l2 - m3) * std::sqrt( - factorial(l1 + m1) * factorial(l1 - m1) * - factorial(l2 + m2) * factorial(l2 - m2) * - factorial(l3 + m3) * factorial(l3 - m3) ); + wigner2 *= tri * minus_1_pow (l1 - l2 - m3) + * std::sqrt (factorial (l1 + m1) * factorial (l1 - m1) * factorial (l2 + m2) * factorial (l2 - m2) + * factorial (l3 + m3) * factorial (l3 - m3)); return pref * wigner1 * wigner2; } -bool RealGauntTable::is_valid_lm(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const +bool + RealGauntTable::is_valid_lm (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) + const { - return std::abs(m1) <= l1 && std::abs(m2) <= l2 && std::abs(m3) <= l3; + return std::abs (m1) <= l1 && std::abs (m2) <= l2 && std::abs (m3) <= l3; } -bool RealGauntTable::gaunt_select_l(const int l1, const int l2, const int l3) const +bool + RealGauntTable::gaunt_select_l (const int l1, const int l2, const int l3) const { return l1 + l2 >= l3 && l1 + l3 >= l2 && l2 + l3 >= l1 && (l1 + l2 + l3) % 2 == 0; } -bool RealGauntTable::real_gaunt_select_m(const int m1, const int m2, const int m3) const +bool + RealGauntTable::real_gaunt_select_m (const int m1, const int m2, const int m3) const { - return ( ( (m1 < 0) + (m2 < 0) + (m3 < 0) ) % 2 == 0 ) && - ( std::abs(m1) + std::abs(m2) == std::abs(m3) || - std::abs(m2) + std::abs(m3) == std::abs(m1) || - std::abs(m3) + std::abs(m1) == std::abs(m2) ); + return (((m1 < 0) + (m2 < 0) + (m3 < 0)) % 2 == 0) + && (std::abs (m1) + std::abs (m2) == std::abs (m3) || std::abs (m2) + std::abs (m3) == std::abs (m1) + || std::abs (m3) + std::abs (m1) == std::abs (m2)); } -double RealGauntTable::gaunt_lookup(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const +double + RealGauntTable::gaunt_lookup (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) + const { #ifdef __DEBUG - assert( is_valid_lm(l1, l2, l3, m1, m2, m3) ); - assert( l1 <= 2 * lmax_ && l2 <= 2 * lmax_ && l3 <= 2 * lmax_ ); + assert (is_valid_lm (l1, l2, l3, m1, m2, m3)); + assert (l1 <= 2 * lmax_ && l2 <= 2 * lmax_ && l3 <= 2 * lmax_); #endif - return ( gaunt_select_l(l1, l2, l3) && gaunt_select_m(m1, m2, m3) ) - ? gaunt_table_.at( gaunt_key(l1, l2, l3, m1, m2, m3) ) - : 0.0; + return (gaunt_select_l (l1, l2, l3) && gaunt_select_m (m1, m2, m3)) + ? gaunt_table_.at (gaunt_key (l1, l2, l3, m1, m2, m3)) + : 0.0; } -std::array RealGauntTable::gaunt_key(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const +std::array + RealGauntTable::gaunt_key (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const { #ifdef __DEBUG - assert( is_valid_lm(l1, l2, l3, m1, m2, m3) ); + assert (is_valid_lm (l1, l2, l3, m1, m2, m3)); #endif std::array key{l1, l2, l3, m1, m2, m3}; - arrange(key[0], key[1], key[3], key[4]); - arrange(key[0], key[2], key[3], key[5]); - arrange(key[1], key[2], key[4], key[5]); - if ( key[5] < 0 ) - { - key[3] = -key[3]; - key[4] = -key[4]; - key[5] = -key[5]; - } + arrange (key[0], key[1], key[3], key[4]); + arrange (key[0], key[2], key[3], key[5]); + arrange (key[1], key[2], key[4], key[5]); + if (key[5] < 0) + { + key[3] = -key[3]; + key[4] = -key[4]; + key[5] = -key[5]; + } return key; } -void RealGauntTable::arrange(int& l1, int& l2, int& m1, int& m2) const +void + RealGauntTable::arrange (int& l1, int& l2, int& m1, int& m2) const { - if ( l1 < l2 ) - { - std::swap(l1, l2); - std::swap(m1, m2); - } + if (l1 < l2) + { + std::swap (l1, l2); + std::swap (m1, m2); + } } -double RealGauntTable::factorial(const int n) const +double + RealGauntTable::factorial (const int n) const { #ifdef __DEBUG - assert( n >= 0 ); + assert (n >= 0); #endif double val = 1.0; - for(int i = 2; i <= n; i++) - { - val *= static_cast(i); - } + for (int i = 2; i <= n; i++) + { + val *= static_cast (i); + } return val; } -int RealGauntTable::index_map(int l, int m) const +int + RealGauntTable::index_map (int l, int m) const { #ifdef __DEBUG - assert( std::abs(m) <= l ); + assert (std::abs (m) <= l); #endif return l * l + l + m; } diff --git a/source/source_basis/module_nao/real_gaunt_table.h b/source/source_basis/module_nao/real_gaunt_table.h index 2966b6e0b81..f8416686f6a 100644 --- a/source/source_basis/module_nao/real_gaunt_table.h +++ b/source/source_basis/module_nao/real_gaunt_table.h @@ -24,12 +24,13 @@ class RealGauntTable { public: - RealGauntTable(RealGauntTable const&) = delete; - RealGauntTable& operator=(RealGauntTable const&) = delete; + RealGauntTable (RealGauntTable const&) = delete; + RealGauntTable& operator= (RealGauntTable const&) = delete; - ~RealGauntTable() {} + ~RealGauntTable () {} - static RealGauntTable& instance() + static RealGauntTable& + instance () { static RealGauntTable instance_; return instance_; @@ -64,22 +65,26 @@ class RealGauntTable * * where c = sqrt(3/4/pi) and r = sqrt(x^2 + y^2 + z^2). * */ - void build(const int lmax); + void build (const int lmax); /// gets the tabulated real Gaunt coefficient - const double& operator()(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; + const double& operator() (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; /// returns the maximum l (for the first two dimensions; the third dimension is 2*lmax) - int lmax() const { return lmax_; } + int + lmax () const + { + return lmax_; + } /// Returns the amount of heap memory used by this class (in bytes). - size_t memory() const + size_t + memory () const { - return gaunt_table_.size() * (6 * sizeof(int) + sizeof(double)) - + real_gaunt_table_.NumElements() * sizeof(double); + return gaunt_table_.size () * (6 * sizeof (int) + sizeof (double)) + + real_gaunt_table_.NumElements () * sizeof (double); } - /*! * @brief Computes the standard Gaunt coefficients. * @@ -98,11 +103,10 @@ class RealGauntTable * symbols, which in turn is evaluated with the Racah formula. This might have * some numerical issue for large l and is yet to be studied later. * */ - double gaunt(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; - + double gaunt (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; private: - RealGauntTable() {} + RealGauntTable () {} /// maximum angular momentum of the table (for the first two dimensions) int lmax_ = -1; @@ -117,16 +121,20 @@ class RealGauntTable std::map, double> gaunt_table_; /// table of real Gaunt coefficients - container::Tensor real_gaunt_table_{container::DataType::DT_DOUBLE, container::TensorShape({0})}; + container::Tensor real_gaunt_table_{container::DataType::DT_DOUBLE, container::TensorShape ({0})}; /// selection rule of standard & real Gaunt coefficients regarding l1, l2, l3 - bool gaunt_select_l(const int l1, const int l2, const int l3) const; + bool gaunt_select_l (const int l1, const int l2, const int l3) const; /// selection rule of standard Gaunt coefficients regarding m1, m2, m3 - bool gaunt_select_m(const int m1, const int m2, const int m3) const { return m1 + m2 + m3 == 0; } + bool + gaunt_select_m (const int m1, const int m2, const int m3) const + { + return m1 + m2 + m3 == 0; + } /// selection rule of real Gaunt coefficients regarding m1, m2, m3 - bool real_gaunt_select_m(const int m1, const int m2, const int m3) const; + bool real_gaunt_select_m (const int m1, const int m2, const int m3) const; /*! * @brief Returns whether the given l & m are valid quantum numbers. @@ -134,13 +142,13 @@ class RealGauntTable * This function checks whether abs(mi) <= li (i=1,2,3) is satisfied. * This implies li >= 0. * */ - bool is_valid_lm(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; + bool is_valid_lm (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; /// Get a Gaunt coefficient by looking up the table - double gaunt_lookup(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; + double gaunt_lookup (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; /// Get a real Gaunt coefficient from the stored Gaunt coefficients - double real_gaunt_lookup(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; + double real_gaunt_lookup (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; /*! * @brief Symmetry-adapted key for gaunt_table_. @@ -157,13 +165,14 @@ class RealGauntTable * if necessary so that the returned key {l1,l2,l3,m1,m2,m3} satisfies * l1 >= l2 >= l3 and m3 >= 0. * */ - std::array gaunt_key(const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; + std::array + gaunt_key (const int l1, const int l2, const int l3, const int m1, const int m2, const int m3) const; /// swap (l1,m1) <--> (l2,m2) if l1 < l2; do nothing otherwise - void arrange(int& l1, int& l2, int& m1, int& m2) const; + void arrange (int& l1, int& l2, int& m1, int& m2) const; /// returns n! as a double - double factorial(const int n) const; + double factorial (const int n) const; /*! * @brief Returns the linearized index of Y(l,m). @@ -172,10 +181,14 @@ class RealGauntTable * m 0 -1 0 1 -2 -1 0 1 2 -3 ... * index 0 1 2 3 4 5 6 7 8 9 ... * */ - int index_map(int l, int m) const; + int index_map (int l, int m) const; /// returns pow(-1, m) - int minus_1_pow(int m) const { return m % 2 ? -1 : 1; } + int + minus_1_pow (int m) const + { + return m % 2 ? -1 : 1; + } }; #endif \ No newline at end of file diff --git a/source/source_basis/module_nao/sphbes_radials.cpp b/source/source_basis/module_nao/sphbes_radials.cpp index d9378e4e895..627e50db15c 100644 --- a/source/source_basis/module_nao/sphbes_radials.cpp +++ b/source/source_basis/module_nao/sphbes_radials.cpp @@ -9,71 +9,74 @@ #include #include -SphbesRadials& SphbesRadials::operator=(const SphbesRadials& rhs) +SphbesRadials& + SphbesRadials::operator= (const SphbesRadials& rhs) { if (this != &rhs) - { - RadialSet::operator=(rhs); - dr_ = rhs.dr_; - sigma_ = rhs.sigma_; - coeff_ = rhs.coeff_; - } + { + RadialSet::operator= (rhs); + dr_ = rhs.dr_; + sigma_ = rhs.sigma_; + coeff_ = rhs.coeff_; + } return *this; } -void SphbesRadials::build(const std::string& file, +void + SphbesRadials::build (const std::string& file, const double dr, const int itype, std::ofstream* ptr_log, const int rank) { - cleanup(); - coeff_.clear(); + cleanup (); + coeff_.clear (); std::ifstream ifs; bool is_open = false; if (rank == 0) - { - ifs.open(file); - is_open = ifs.is_open(); - } + { + ifs.open (file); + is_open = ifs.is_open (); + } #ifdef __MPI - Parallel_Common::bcast_bool(is_open); + Parallel_Common::bcast_bool (is_open); #endif if (!is_open) - { - ModuleBase::WARNING_QUIT("SphbesRadials::build", "Couldn't open orbital file: " + file); - } + { + ModuleBase::WARNING_QUIT ("SphbesRadials::build", "Couldn't open orbital file: " + file); + } if (ptr_log) - { - (*ptr_log) << "\n\n\n\n"; - (*ptr_log) << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " | SETUP NUMERICAL ATOMIC ORBITALS |" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " | Orbital information includes the cutoff radius, angular momentum, |" << std::endl; - (*ptr_log) << " | zeta number, spherical Bessel coefficients and smoothing parameter. |" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; - (*ptr_log) << "\n\n\n\n"; - } + { + (*ptr_log) << "\n\n\n\n"; + (*ptr_log) << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " | SETUP NUMERICAL ATOMIC ORBITALS |" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " | Orbital information includes the cutoff radius, angular momentum, |" << std::endl; + (*ptr_log) << " | zeta number, spherical Bessel coefficients and smoothing parameter. |" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; + (*ptr_log) << "\n\n\n\n"; + } itype_ = itype; dr_ = dr; - read_coeff(ifs, ptr_log, rank); - build_radset(); + read_coeff (ifs, ptr_log, rank); + build_radset (); if (rank == 0) - { - ifs.close(); - } + { + ifs.close (); + } } -void SphbesRadials::build(const int lmax, +void + SphbesRadials::build (const int lmax, const int nbes, const double rcut, const double sigma, @@ -82,22 +85,22 @@ void SphbesRadials::build(const int lmax, std::ofstream* ptr_log, const int rank) { - cleanup(); - coeff_.clear(); + cleanup (); + coeff_.clear (); if (ptr_log) - { - (*ptr_log) << "\n\n\n\n"; - (*ptr_log) << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " | SETUP TRUNCATED SPHERICAL BESSEL ORBITALS |" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " | Orbital information includes the cutoff radius, angular momentum, |" << std::endl; - (*ptr_log) << " | zeta number, spherical Bessel coefficients and smoothing parameter. |" << std::endl; - (*ptr_log) << " | |" << std::endl; - (*ptr_log) << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; - (*ptr_log) << "\n\n\n\n"; - } + { + (*ptr_log) << "\n\n\n\n"; + (*ptr_log) << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " | SETUP TRUNCATED SPHERICAL BESSEL ORBITALS |" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " | Orbital information includes the cutoff radius, angular momentum, |" << std::endl; + (*ptr_log) << " | zeta number, spherical Bessel coefficients and smoothing parameter. |" << std::endl; + (*ptr_log) << " | |" << std::endl; + (*ptr_log) << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; + (*ptr_log) << "\n\n\n\n"; + } itype_ = itype; rcut_max_ = rcut; @@ -107,180 +110,198 @@ void SphbesRadials::build(const int lmax, ////////////////////////// // Instead of reading from a file, we will generate the coefficients here. for (int l = 0; l <= lmax; ++l) - { - for (int zeta = 0; zeta < nbes; ++zeta) { - std::vector coeff_q(nbes, 0.0); - coeff_q[zeta] = 1.0; - coeff_.emplace(std::make_pair(l, zeta), std::move(coeff_q)); + for (int zeta = 0; zeta < nbes; ++zeta) + { + std::vector coeff_q (nbes, 0.0); + coeff_q[zeta] = 1.0; + coeff_.emplace (std::make_pair (l, zeta), std::move (coeff_q)); + } } - } ////////////////////////// - build_radset(false); + build_radset (false); } -void SphbesRadials::read_coeff(std::ifstream& ifs, std::ofstream* ptr_log, const int rank) +void + SphbesRadials::read_coeff (std::ifstream& ifs, std::ofstream* ptr_log, const int rank) { std::string info, tmp; if (rank == 0) - { - // reach the Coefficient block (between and ) - while ((ifs >> tmp) && tmp != " and ) + while ((ifs >> tmp) && tmp != ") - std::getline(ifs, info, '<'); - } + // read the rest part of the Coefficient block at once (before ) + std::getline (ifs, info, '<'); + } #ifdef __MPI - Parallel_Common::bcast_string(info); + Parallel_Common::bcast_string (info); #endif // extract rcut & sigma from the pattern KEYWORD=" VALUE " - if ((tmp = extract(info, "rcut")).empty()) - { // rcut must be provided by the file; quit if not found. - ModuleBase::WARNING_QUIT("SphbesRadials::read_coeff", "Fails to read the cutoff radius (rcut)."); - } - rcut_max_ = std::stod(tmp); - sigma_ = (tmp = extract(info, "sigma")).empty() ? sigma_ : std::stod(tmp); // use default if not found - symbol_ = extract(info, "element"); + if ((tmp = extract (info, "rcut")).empty ()) + { // rcut must be provided by the file; quit if not found. + ModuleBase::WARNING_QUIT ("SphbesRadials::read_coeff", "Fails to read the cutoff radius (rcut)."); + } + rcut_max_ = std::stod (tmp); + sigma_ = (tmp = extract (info, "sigma")).empty () ? sigma_ : std::stod (tmp); // use default if not found + symbol_ = extract (info, "element"); // split the string by white spaces, tabs or new lines into a vector of substrings - std::vector v = split(info, " \n\t"); + std::vector v = split (info, " \n\t"); // find the indices of all occurences of "Type" (plus the one-past-last index) std::vector delim; // delimiters - std::for_each(v.begin(), v.end(), [&delim, &v](std::string const& s) { - if (s == "Type") - delim.push_back(&s - &v[0]); - }); // for_each is guaranteed to be sequential - delim.push_back(v.size()); + std::for_each (v.begin (), + v.end (), + [&delim, &v] (std::string const& s) + { + if (s == "Type") + { + delim.push_back (&s - &v[0]); + } + }); // for_each is guaranteed to be sequential + delim.push_back (v.size ()); // NOTE: Zeta-Orbital in some ORBITAL_RESULTS.txt is one-based numbering // which needs to be converted to a zero-based numbering. // Here we keep track of this index ourselves. int l_last = -1; int izeta = -1; - for (size_t i = 0; i < delim.size() - 1; ++i) - { - int l = std::stoi(v[delim[i] + 4]); - izeta = (l == l_last) ? izeta + 1 : 0; - l_last = l; - - std::vector coeff_q(delim[i + 1] - delim[i] - 6); - std::transform(v.begin() + delim[i] + 6, v.begin() + delim[i + 1], coeff_q.begin(), [](std::string const& s) { - return std::stod(s); - }); - coeff_.emplace(std::make_pair(l, izeta), std::move(coeff_q)); - } + for (size_t i = 0; i < delim.size () - 1; ++i) + { + int l = std::stoi (v[delim[i] + 4]); + izeta = (l == l_last) ? izeta + 1 : 0; + l_last = l; + + std::vector coeff_q (delim[i + 1] - delim[i] - 6); + std::transform (v.begin () + delim[i] + 6, + v.begin () + delim[i + 1], + coeff_q.begin (), + [] (std::string const& s) { return std::stod (s); }); + coeff_.emplace (std::make_pair (l, izeta), std::move (coeff_q)); + } } -std::string SphbesRadials::extract(std::string const& str, std::string const& keyword) +std::string + SphbesRadials::extract (std::string const& str, std::string const& keyword) { std::smatch match; std::string regex_string = keyword + "=\" *([^= ]+) *\""; - std::regex re(regex_string); - std::regex_search(str, match, re); - return match.empty() ? "" : match[1].str(); + std::regex re (regex_string); + std::regex_search (str, match, re); + return match.empty () ? "" : match[1].str (); } -std::vector SphbesRadials::split(std::string const& str, const char* delim) +std::vector + SphbesRadials::split (std::string const& str, const char* delim) { std::vector v; std::string::size_type start = 0, end = 0; - while ((start = str.find_first_not_of(delim, end)) != std::string::npos) - { - end = str.find_first_of(delim, start); - v.push_back(str.substr(start, end - start)); - } + while ((start = str.find_first_not_of (delim, end)) != std::string::npos) + { + end = str.find_first_of (delim, start); + v.push_back (str.substr (start, end - start)); + } return v; } -std::vector SphbesRadials::sphbes_comb(const int l, - std::vector const& coeff_q, - double rcut, - double dr, - std::vector const& q) +std::vector + SphbesRadials::sphbes_comb (const int l, + std::vector const& coeff_q, + double rcut, + double dr, + std::vector const& q) { #ifdef __DEBUG - assert(coeff_q.size() == q.size()); - assert(l >= 0 && rcut >= 0.0 && dr > 0.0); + assert (coeff_q.size () == q.size ()); + assert (l >= 0 && rcut >= 0.0 && dr > 0.0); #endif - int nr = static_cast(rcut / dr) + 1; - std::vector r(nr); - std::for_each(r.begin(), r.end(), [&r, dr](double& x) { x = (&x - r.data()) * dr; }); + int nr = static_cast (rcut / dr) + 1; + std::vector r (nr); + std::for_each (r.begin (), r.end (), [&r, dr] (double& x) { x = (&x - r.data ()) * dr; }); - std::vector tmp(nr, 0.0); - std::vector f(nr, 0.0); + std::vector tmp (nr, 0.0); + std::vector f (nr, 0.0); // f[ir] = \sum_{iq} coeff[iq] * j_{l}(q[i] * r[ir]) - for (size_t iq = 0; iq != q.size(); ++iq) - { - if (coeff_q[iq] == 0.0) - continue; - - ModuleBase::Sphbes::sphbesj(nr, r.data(), q[iq], l, tmp.data()); - for (size_t ir = 0; ir != tmp.size(); ++ir) + for (size_t iq = 0; iq != q.size (); ++iq) { - f[ir] += coeff_q[iq] * tmp[ir]; + if (coeff_q[iq] == 0.0) + { + continue; + } + + ModuleBase::Sphbes::sphbesj (nr, r.data (), q[iq], l, tmp.data ()); + for (size_t ir = 0; ir != tmp.size (); ++ir) + { + f[ir] += coeff_q[iq] * tmp[ir]; + } } - } return f; } -double SphbesRadials::smooth(double r, double rcut, double sigma) +double + SphbesRadials::smooth (double r, double rcut, double sigma) { - return (r < rcut) * (sigma == 0 ? 1.0 : 1.0 - std::exp(-0.5 * std::pow((r - rcut) / sigma, 2))); + return (r < rcut) * (sigma == 0 ? 1.0 : 1.0 - std::exp (-0.5 * std::pow ((r - rcut) / sigma, 2))); } -void SphbesRadials::build_radset(const bool normalize) +void + SphbesRadials::build_radset (const bool normalize) { // symbol_ is set in read_coeff() // itype_ is set in build() // rcut_max_ is set in read_coeff() (there's only one rcut for all orbitals) - lmax_ = std::max_element(coeff_.begin(), coeff_.end())->first.first; // std::pair uses lexicographical order + lmax_ = std::max_element (coeff_.begin (), coeff_.end ())->first.first; // std::pair uses lexicographical order delete[] nzeta_; nzeta_ = new int[lmax_ + 1](); // zero initialized for (auto const& p: coeff_) - { - nzeta_[p.first.first]++; - } - nzeta_max_ = *std::max_element(nzeta_, nzeta_ + lmax_ + 1); - indexing(); + { + nzeta_[p.first.first]++; + } + nzeta_max_ = *std::max_element (nzeta_, nzeta_ + lmax_ + 1); + indexing (); - int nr = static_cast(rcut_max_ / dr_) + 1; - std::vector r(nr); - std::for_each(r.begin(), r.end(), [&r, this](double& x) { x = (&x - r.data()) * dr_; }); + int nr = static_cast (rcut_max_ / dr_) + 1; + std::vector r (nr); + std::for_each (r.begin (), r.end (), [&r, this] (double& x) { x = (&x - r.data ()) * dr_; }); - nchi_ = coeff_.size(); + nchi_ = coeff_.size (); chi_ = new NumericalRadial[nchi_]; for (auto const& p: coeff_) // p has the form of ( (l, izeta), coeff_q ) - { - int l = p.first.first; - int izeta = p.first.second; - auto& coeff_q = p.second; - - // find wave numbers such that j_l(q * rcut) = 0 - std::vector q(coeff_q.size()); - ModuleBase::Sphbes::sphbes_zeros(l, coeff_q.size(), q.data()); - std::for_each(q.begin(), q.end(), [this](double& qi) { qi /= rcut_max_; }); - - // linear combination of spherical Bessel functions - std::vector f = sphbes_comb(l, coeff_q, rcut_max_, dr_, q); - - // smooth the function at rcut - std::transform(r.begin(), r.end(), f.begin(), f.begin(), [this](double ri, double fi) { - return fi * smooth(ri, rcut_max_, sigma_); - }); - - chi_[index(l, izeta)].build(l, true, nr, r.data(), f.data(), 0, izeta, symbol_, itype_, false); - if (normalize) - chi_[index(l, izeta)].normalize(); - } + { + int l = p.first.first; + int izeta = p.first.second; + auto& coeff_q = p.second; + + // find wave numbers such that j_l(q * rcut) = 0 + std::vector q (coeff_q.size ()); + ModuleBase::Sphbes::sphbes_zeros (l, coeff_q.size (), q.data ()); + std::for_each (q.begin (), q.end (), [this] (double& qi) { qi /= rcut_max_; }); + + // linear combination of spherical Bessel functions + std::vector f = sphbes_comb (l, coeff_q, rcut_max_, dr_, q); + + // smooth the function at rcut + std::transform (r.begin (), + r.end (), + f.begin (), + f.begin (), + [this] (double ri, double fi) { return fi * smooth (ri, rcut_max_, sigma_); }); + + chi_[index (l, izeta)].build (l, true, nr, r.data (), f.data (), 0, izeta, symbol_, itype_, false); + if (normalize) + { + chi_[index (l, izeta)].normalize (); + } + } } diff --git a/source/source_basis/module_nao/sphbes_radials.h b/source/source_basis/module_nao/sphbes_radials.h index 9d89e478f42..e143f74bf5d 100644 --- a/source/source_basis/module_nao/sphbes_radials.h +++ b/source/source_basis/module_nao/sphbes_radials.h @@ -9,19 +9,25 @@ /** * @brief Numerical radials from spherical Bessel coefficients. - * + * */ class SphbesRadials : public RadialSet { public: - SphbesRadials() {} - SphbesRadials(const SphbesRadials& other): - RadialSet(other), sigma_(other.sigma_), dr_(other.dr_), coeff_(other.coeff_) {} + SphbesRadials () {} + SphbesRadials (const SphbesRadials& other) + : RadialSet (other), sigma_ (other.sigma_), dr_ (other.dr_), coeff_ (other.coeff_) + { + } - SphbesRadials& operator=(const SphbesRadials& rhs); - SphbesRadials* clone() const { return new SphbesRadials(*this); } // covariant return type + SphbesRadials& operator= (const SphbesRadials& rhs); + SphbesRadials* + clone () const + { + return new SphbesRadials (*this); + } // covariant return type - ~SphbesRadials() {} // ~RadialSet() is called automatically + ~SphbesRadials () {} // ~RadialSet() is called automatically /** * @brief Builds the class from a spherical Bessel coefficient file @@ -32,12 +38,11 @@ class SphbesRadials : public RadialSet * @param[in] ptr_log output file stream for logging * @param[in] rank MPI rank */ - void build(const std::string& file, - const double dr = 0.01, - const int itype = 0, - std::ofstream* ptr_log = nullptr, - const int rank = 0 - ); + void build (const std::string& file, + const double dr = 0.01, + const int itype = 0, + std::ofstream* ptr_log = nullptr, + const int rank = 0); /** * @brief Builds the class with truncated spherical Bessel functions. @@ -53,27 +58,37 @@ class SphbesRadials : public RadialSet * @param[in] ptr_log output file stream for logging * @param[in] rank MPI rank */ - void build(const int lmax, - const int nbes, - const double rcut, - const double sigma, - const double dr = 0.01, - const int itype = 0, - std::ofstream* ptr_log = nullptr, - const int rank = 0 - ); + void build (const int lmax, + const int nbes, + const double rcut, + const double sigma, + const double dr = 0.01, + const int itype = 0, + std::ofstream* ptr_log = nullptr, + const int rank = 0); /** * @name Getters */ ///@{ - double sigma() const { return sigma_; } - double dr() const { return dr_; } - std::vector const& coeff(const int l, const int izeta) const { return coeff_.at(std::make_pair(l, izeta)); } + double + sigma () const + { + return sigma_; + } + double + dr () const + { + return dr_; + } + std::vector const& + coeff (const int l, const int izeta) const + { + return coeff_.at (std::make_pair (l, izeta)); + } ///@} private: - /// Smoothing parameter. double sigma_ = 0.0; @@ -81,34 +96,29 @@ class SphbesRadials : public RadialSet double dr_ = 0.01; /// Spherical Bessel coefficients coeff_[{l,zeta}][q] - std::map, std::vector> coeff_; + std::map, std::vector> coeff_; /// Reads spherical Bessel coefficients, cutoff radius & smoothing parameter from a file stream. - void read_coeff(std::ifstream& ifs, - std::ofstream* ptr_log = nullptr, - const int rank = 0); + void read_coeff (std::ifstream& ifs, std::ofstream* ptr_log = nullptr, const int rank = 0); - /// - void build_radset(const bool normalize = true); + /// + void build_radset (const bool normalize = true); /// Extracts a substring (VALUE) from a string of the form KEYWORD=" VALUE ". - std::string extract(std::string const& str, std::string const& keyword); + std::string extract (std::string const& str, std::string const& keyword); /// Splits a string into a vector of substrings with given delimiters. - std::vector split(std::string const& str, const char* delim = " \n\t"); + std::vector split (std::string const& str, const char* delim = " \n\t"); /// Computes the combination of spherical Bessel functions on a uniform grid. - std::vector sphbes_comb(const int l, - std::vector const& coeff_q, - double rcut, - double dr, - std::vector const& q - ); + std::vector sphbes_comb (const int l, + std::vector const& coeff_q, + double rcut, + double dr, + std::vector const& q); /// Smoothing function. - double smooth(double r, double rcut, double sigma); - + double smooth (double r, double rcut, double sigma); }; #endif - diff --git a/source/source_basis/module_nao/test/atomic_radials_test.cpp b/source/source_basis/module_nao/test/atomic_radials_test.cpp index 6632ddcb46e..725fe9fcce8 100644 --- a/source/source_basis/module_nao/test/atomic_radials_test.cpp +++ b/source/source_basis/module_nao/test/atomic_radials_test.cpp @@ -40,88 +40,89 @@ using ModuleBase::SphericalBesselTransformer; class AtomicRadialsTest : public ::testing::Test { protected: - void SetUp(); - void TearDown(){}; + void SetUp (); + void TearDown () {}; - AtomicRadials Ti_radials; //!< object under test + AtomicRadials Ti_radials; //!< object under test std::string file = "../../../../../tests/PP_ORB/Ti_gga_10au_100Ry_4s2p2d1f.orb"; //!< orbital file to read from - std::string log_file = "./test_files/atomic_radials.log"; //!< file for logging + std::string log_file = "./test_files/atomic_radials.log"; //!< file for logging double tol = 1e-12; //!< numerical tolerance for grid & values }; -void AtomicRadialsTest::SetUp() +void + AtomicRadialsTest::SetUp () { #ifdef __MPI - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif } -TEST_F(AtomicRadialsTest, ReadAndGet) +TEST_F (AtomicRadialsTest, ReadAndGet) { - Ti_radials.build(file, 0, nullptr, GlobalV::MY_RANK); - - EXPECT_EQ(Ti_radials.lmax(), 3); - EXPECT_EQ(Ti_radials.nzeta(0), 4); - EXPECT_EQ(Ti_radials.nzeta(1), 2); - EXPECT_EQ(Ti_radials.nzeta(2), 2); - EXPECT_EQ(Ti_radials.nzeta(3), 1); - EXPECT_EQ(Ti_radials.nzeta_max(), 4); - EXPECT_EQ(Ti_radials.nchi(), 9); - EXPECT_DOUBLE_EQ(Ti_radials.rcut_max(), 10.0); - EXPECT_DOUBLE_EQ(Ti_radials.orb_ecut(), 100.0); - - EXPECT_NEAR(Ti_radials.chi(0, 0).rvalue(0), -1.581711853170e-01, tol); - EXPECT_NEAR(Ti_radials.chi(0, 0).rvalue(4), -1.583907030513e-01, tol); - EXPECT_NEAR(Ti_radials.chi(0, 0).rvalue(996), -4.183526380009e-05, tol); - EXPECT_NEAR(Ti_radials.chi(0, 0).rvalue(1000), 0, tol); - - EXPECT_NEAR(Ti_radials.chi(0, 3).rvalue(0), -1.166292682541e+00, tol); - EXPECT_NEAR(Ti_radials.chi(0, 3).rvalue(4), -1.164223359672e+00, tol); - EXPECT_NEAR(Ti_radials.chi(0, 3).rvalue(996), -3.183325576529e-04, tol); - EXPECT_NEAR(Ti_radials.chi(0, 3).rvalue(1000), 0, tol); - - EXPECT_NEAR(Ti_radials.chi(3, 0).rvalue(0), 0, tol); - EXPECT_NEAR(Ti_radials.chi(3, 0).rvalue(4), 3.744878535962e-05, tol); - EXPECT_NEAR(Ti_radials.chi(3, 0).rvalue(996), 7.495357740660e-05, tol); - EXPECT_NEAR(Ti_radials.chi(3, 0).rvalue(1000), 0, tol); + Ti_radials.build (file, 0, nullptr, GlobalV::MY_RANK); + + EXPECT_EQ (Ti_radials.lmax (), 3); + EXPECT_EQ (Ti_radials.nzeta (0), 4); + EXPECT_EQ (Ti_radials.nzeta (1), 2); + EXPECT_EQ (Ti_radials.nzeta (2), 2); + EXPECT_EQ (Ti_radials.nzeta (3), 1); + EXPECT_EQ (Ti_radials.nzeta_max (), 4); + EXPECT_EQ (Ti_radials.nchi (), 9); + EXPECT_DOUBLE_EQ (Ti_radials.rcut_max (), 10.0); + EXPECT_DOUBLE_EQ (Ti_radials.orb_ecut (), 100.0); + + EXPECT_NEAR (Ti_radials.chi (0, 0).rvalue (0), -1.581711853170e-01, tol); + EXPECT_NEAR (Ti_radials.chi (0, 0).rvalue (4), -1.583907030513e-01, tol); + EXPECT_NEAR (Ti_radials.chi (0, 0).rvalue (996), -4.183526380009e-05, tol); + EXPECT_NEAR (Ti_radials.chi (0, 0).rvalue (1000), 0, tol); + + EXPECT_NEAR (Ti_radials.chi (0, 3).rvalue (0), -1.166292682541e+00, tol); + EXPECT_NEAR (Ti_radials.chi (0, 3).rvalue (4), -1.164223359672e+00, tol); + EXPECT_NEAR (Ti_radials.chi (0, 3).rvalue (996), -3.183325576529e-04, tol); + EXPECT_NEAR (Ti_radials.chi (0, 3).rvalue (1000), 0, tol); + + EXPECT_NEAR (Ti_radials.chi (3, 0).rvalue (0), 0, tol); + EXPECT_NEAR (Ti_radials.chi (3, 0).rvalue (4), 3.744878535962e-05, tol); + EXPECT_NEAR (Ti_radials.chi (3, 0).rvalue (996), 7.495357740660e-05, tol); + EXPECT_NEAR (Ti_radials.chi (3, 0).rvalue (1000), 0, tol); } -TEST_F(AtomicRadialsTest, BatchSet) +TEST_F (AtomicRadialsTest, BatchSet) { int itype = 5; - Ti_radials.build(file, itype, nullptr, GlobalV::MY_RANK); + Ti_radials.build (file, itype, nullptr, GlobalV::MY_RANK); - EXPECT_EQ(Ti_radials.itype(), 5); - EXPECT_EQ(Ti_radials.chi(0, 0).itype(), 5); - EXPECT_EQ(Ti_radials.chi(0, 3).itype(), 5); - EXPECT_EQ(Ti_radials.chi(3, 0).itype(), 5); + EXPECT_EQ (Ti_radials.itype (), 5); + EXPECT_EQ (Ti_radials.chi (0, 0).itype (), 5); + EXPECT_EQ (Ti_radials.chi (0, 3).itype (), 5); + EXPECT_EQ (Ti_radials.chi (3, 0).itype (), 5); SphericalBesselTransformer sbt; - Ti_radials.set_transformer(sbt); - EXPECT_EQ(sbt, Ti_radials.chi(0, 0).sbt()); - EXPECT_EQ(sbt, Ti_radials.chi(0, 3).sbt()); - EXPECT_EQ(sbt, Ti_radials.chi(3, 0).sbt()); - - Ti_radials.set_uniform_grid(true, 2001, 20.0); - EXPECT_EQ(Ti_radials.chi(0, 0).nr(), 2001); - EXPECT_EQ(Ti_radials.chi(0, 3).nr(), 2001); - EXPECT_EQ(Ti_radials.chi(3, 0).nr(), 2001); - EXPECT_NEAR(Ti_radials.chi(0, 0).rgrid(2000), 20, tol); - EXPECT_NEAR(Ti_radials.chi(0, 3).rgrid(1500), 15, tol); - EXPECT_NEAR(Ti_radials.chi(3, 0).rgrid(1200), 12, tol); - EXPECT_NEAR(Ti_radials.chi(0, 0).rvalue(2000), 0, tol); - EXPECT_NEAR(Ti_radials.chi(0, 3).rvalue(1500), 0, tol); - EXPECT_NEAR(Ti_radials.chi(3, 0).rvalue(1200), 0, tol); + Ti_radials.set_transformer (sbt); + EXPECT_EQ (sbt, Ti_radials.chi (0, 0).sbt ()); + EXPECT_EQ (sbt, Ti_radials.chi (0, 3).sbt ()); + EXPECT_EQ (sbt, Ti_radials.chi (3, 0).sbt ()); + + Ti_radials.set_uniform_grid (true, 2001, 20.0); + EXPECT_EQ (Ti_radials.chi (0, 0).nr (), 2001); + EXPECT_EQ (Ti_radials.chi (0, 3).nr (), 2001); + EXPECT_EQ (Ti_radials.chi (3, 0).nr (), 2001); + EXPECT_NEAR (Ti_radials.chi (0, 0).rgrid (2000), 20, tol); + EXPECT_NEAR (Ti_radials.chi (0, 3).rgrid (1500), 15, tol); + EXPECT_NEAR (Ti_radials.chi (3, 0).rgrid (1200), 12, tol); + EXPECT_NEAR (Ti_radials.chi (0, 0).rvalue (2000), 0, tol); + EXPECT_NEAR (Ti_radials.chi (0, 3).rvalue (1500), 0, tol); + EXPECT_NEAR (Ti_radials.chi (3, 0).rvalue (1200), 0, tol); double grid[5] = {0.0, 1.1, 2.2, 3.3, 4.4}; - Ti_radials.set_grid(true, 5, grid); - EXPECT_EQ(Ti_radials.chi(0, 0).rgrid(1), 1.1); - EXPECT_EQ(Ti_radials.chi(0, 3).rgrid(2), 2.2); - EXPECT_EQ(Ti_radials.chi(3, 0).rgrid(3), 3.3); + Ti_radials.set_grid (true, 5, grid); + EXPECT_EQ (Ti_radials.chi (0, 0).rgrid (1), 1.1); + EXPECT_EQ (Ti_radials.chi (0, 3).rgrid (2), 2.2); + EXPECT_EQ (Ti_radials.chi (3, 0).rgrid (3), 3.3); } -TEST_F(AtomicRadialsTest, Copy) +TEST_F (AtomicRadialsTest, Copy) { /* * This test checks whether @@ -133,149 +134,150 @@ TEST_F(AtomicRadialsTest, Copy) * work as expected. * */ int itype = 5; - Ti_radials.build(file, itype, nullptr, GlobalV::MY_RANK); + Ti_radials.build (file, itype, nullptr, GlobalV::MY_RANK); // copy constructor - AtomicRadials Ti_copy(Ti_radials); - - EXPECT_EQ(Ti_copy.itype(), itype); - EXPECT_EQ(Ti_copy.lmax(), 3); - EXPECT_EQ(Ti_copy.nzeta(0), 4); - EXPECT_EQ(Ti_copy.nzeta(1), 2); - EXPECT_EQ(Ti_copy.nzeta(2), 2); - EXPECT_EQ(Ti_copy.nzeta(3), 1); - EXPECT_EQ(Ti_copy.nzeta_max(), 4); - EXPECT_EQ(Ti_copy.nchi(), 9); - EXPECT_DOUBLE_EQ(Ti_copy.rcut_max(), 10.0); - EXPECT_DOUBLE_EQ(Ti_copy.orb_ecut(), 100.0); - - EXPECT_NEAR(Ti_copy.chi(0, 0).rvalue(0), -1.581711853170e-01, tol); - EXPECT_NEAR(Ti_copy.chi(0, 0).rvalue(4), -1.583907030513e-01, tol); - EXPECT_NEAR(Ti_copy.chi(0, 0).rvalue(996), -4.183526380009e-05, tol); - EXPECT_NEAR(Ti_copy.chi(0, 0).rvalue(1000), 0, tol); - - EXPECT_NEAR(Ti_copy.chi(0, 3).rvalue(0), -1.166292682541e+00, tol); - EXPECT_NEAR(Ti_copy.chi(0, 3).rvalue(4), -1.164223359672e+00, tol); - EXPECT_NEAR(Ti_copy.chi(0, 3).rvalue(996), -3.183325576529e-04, tol); - EXPECT_NEAR(Ti_copy.chi(0, 3).rvalue(1000), 0, tol); - - EXPECT_NEAR(Ti_copy.chi(3, 0).rvalue(0), 0, tol); - EXPECT_NEAR(Ti_copy.chi(3, 0).rvalue(4), 3.744878535962e-05, tol); - EXPECT_NEAR(Ti_copy.chi(3, 0).rvalue(996), 7.495357740660e-05, tol); - EXPECT_NEAR(Ti_copy.chi(3, 0).rvalue(1000), 0, tol); + AtomicRadials Ti_copy (Ti_radials); + + EXPECT_EQ (Ti_copy.itype (), itype); + EXPECT_EQ (Ti_copy.lmax (), 3); + EXPECT_EQ (Ti_copy.nzeta (0), 4); + EXPECT_EQ (Ti_copy.nzeta (1), 2); + EXPECT_EQ (Ti_copy.nzeta (2), 2); + EXPECT_EQ (Ti_copy.nzeta (3), 1); + EXPECT_EQ (Ti_copy.nzeta_max (), 4); + EXPECT_EQ (Ti_copy.nchi (), 9); + EXPECT_DOUBLE_EQ (Ti_copy.rcut_max (), 10.0); + EXPECT_DOUBLE_EQ (Ti_copy.orb_ecut (), 100.0); + + EXPECT_NEAR (Ti_copy.chi (0, 0).rvalue (0), -1.581711853170e-01, tol); + EXPECT_NEAR (Ti_copy.chi (0, 0).rvalue (4), -1.583907030513e-01, tol); + EXPECT_NEAR (Ti_copy.chi (0, 0).rvalue (996), -4.183526380009e-05, tol); + EXPECT_NEAR (Ti_copy.chi (0, 0).rvalue (1000), 0, tol); + + EXPECT_NEAR (Ti_copy.chi (0, 3).rvalue (0), -1.166292682541e+00, tol); + EXPECT_NEAR (Ti_copy.chi (0, 3).rvalue (4), -1.164223359672e+00, tol); + EXPECT_NEAR (Ti_copy.chi (0, 3).rvalue (996), -3.183325576529e-04, tol); + EXPECT_NEAR (Ti_copy.chi (0, 3).rvalue (1000), 0, tol); + + EXPECT_NEAR (Ti_copy.chi (3, 0).rvalue (0), 0, tol); + EXPECT_NEAR (Ti_copy.chi (3, 0).rvalue (4), 3.744878535962e-05, tol); + EXPECT_NEAR (Ti_copy.chi (3, 0).rvalue (996), 7.495357740660e-05, tol); + EXPECT_NEAR (Ti_copy.chi (3, 0).rvalue (1000), 0, tol); // assignment operator AtomicRadials Ti_assign; Ti_assign = Ti_radials; - EXPECT_EQ(Ti_assign.itype(), itype); - EXPECT_EQ(Ti_assign.lmax(), 3); - EXPECT_EQ(Ti_assign.nzeta(0), 4); - EXPECT_EQ(Ti_assign.nzeta(1), 2); - EXPECT_EQ(Ti_assign.nzeta(2), 2); - EXPECT_EQ(Ti_assign.nzeta(3), 1); - EXPECT_EQ(Ti_assign.nzeta_max(), 4); - EXPECT_EQ(Ti_assign.nchi(), 9); - EXPECT_DOUBLE_EQ(Ti_assign.rcut_max(), 10.0); - EXPECT_DOUBLE_EQ(Ti_assign.orb_ecut(), 100.0); - - EXPECT_NEAR(Ti_assign.chi(0, 0).rvalue(0), -1.581711853170e-01, tol); - EXPECT_NEAR(Ti_assign.chi(0, 0).rvalue(4), -1.583907030513e-01, tol); - EXPECT_NEAR(Ti_assign.chi(0, 0).rvalue(996), -4.183526380009e-05, tol); - EXPECT_NEAR(Ti_assign.chi(0, 0).rvalue(1000), 0, tol); - - EXPECT_NEAR(Ti_assign.chi(0, 3).rvalue(0), -1.166292682541e+00, tol); - EXPECT_NEAR(Ti_assign.chi(0, 3).rvalue(4), -1.164223359672e+00, tol); - EXPECT_NEAR(Ti_assign.chi(0, 3).rvalue(996), -3.183325576529e-04, tol); - EXPECT_NEAR(Ti_assign.chi(0, 3).rvalue(1000), 0, tol); - - EXPECT_NEAR(Ti_assign.chi(3, 0).rvalue(0), 0, tol); - EXPECT_NEAR(Ti_assign.chi(3, 0).rvalue(4), 3.744878535962e-05, tol); - EXPECT_NEAR(Ti_assign.chi(3, 0).rvalue(996), 7.495357740660e-05, tol); - EXPECT_NEAR(Ti_assign.chi(3, 0).rvalue(1000), 0, tol); + EXPECT_EQ (Ti_assign.itype (), itype); + EXPECT_EQ (Ti_assign.lmax (), 3); + EXPECT_EQ (Ti_assign.nzeta (0), 4); + EXPECT_EQ (Ti_assign.nzeta (1), 2); + EXPECT_EQ (Ti_assign.nzeta (2), 2); + EXPECT_EQ (Ti_assign.nzeta (3), 1); + EXPECT_EQ (Ti_assign.nzeta_max (), 4); + EXPECT_EQ (Ti_assign.nchi (), 9); + EXPECT_DOUBLE_EQ (Ti_assign.rcut_max (), 10.0); + EXPECT_DOUBLE_EQ (Ti_assign.orb_ecut (), 100.0); + + EXPECT_NEAR (Ti_assign.chi (0, 0).rvalue (0), -1.581711853170e-01, tol); + EXPECT_NEAR (Ti_assign.chi (0, 0).rvalue (4), -1.583907030513e-01, tol); + EXPECT_NEAR (Ti_assign.chi (0, 0).rvalue (996), -4.183526380009e-05, tol); + EXPECT_NEAR (Ti_assign.chi (0, 0).rvalue (1000), 0, tol); + + EXPECT_NEAR (Ti_assign.chi (0, 3).rvalue (0), -1.166292682541e+00, tol); + EXPECT_NEAR (Ti_assign.chi (0, 3).rvalue (4), -1.164223359672e+00, tol); + EXPECT_NEAR (Ti_assign.chi (0, 3).rvalue (996), -3.183325576529e-04, tol); + EXPECT_NEAR (Ti_assign.chi (0, 3).rvalue (1000), 0, tol); + + EXPECT_NEAR (Ti_assign.chi (3, 0).rvalue (0), 0, tol); + EXPECT_NEAR (Ti_assign.chi (3, 0).rvalue (4), 3.744878535962e-05, tol); + EXPECT_NEAR (Ti_assign.chi (3, 0).rvalue (996), 7.495357740660e-05, tol); + EXPECT_NEAR (Ti_assign.chi (3, 0).rvalue (1000), 0, tol); // polymorphic clone - RadialSet* ptr_Ti_polyclone = Ti_radials.clone(); - - EXPECT_EQ(ptr_Ti_polyclone->itype(), itype); - EXPECT_EQ(ptr_Ti_polyclone->lmax(), 3); - EXPECT_EQ(ptr_Ti_polyclone->nzeta(0), 4); - EXPECT_EQ(ptr_Ti_polyclone->nzeta(1), 2); - EXPECT_EQ(ptr_Ti_polyclone->nzeta(2), 2); - EXPECT_EQ(ptr_Ti_polyclone->nzeta(3), 1); - EXPECT_EQ(ptr_Ti_polyclone->nzeta_max(), 4); - EXPECT_EQ(ptr_Ti_polyclone->nchi(), 9); - EXPECT_DOUBLE_EQ(ptr_Ti_polyclone->rcut_max(), 10.0); - - EXPECT_NEAR(ptr_Ti_polyclone->chi(0, 0).rvalue(0), -1.581711853170e-01, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(0, 0).rvalue(4), -1.583907030513e-01, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(0, 0).rvalue(996), -4.183526380009e-05, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(0, 0).rvalue(1000), 0, tol); - - EXPECT_NEAR(ptr_Ti_polyclone->chi(0, 3).rvalue(0), -1.166292682541e+00, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(0, 3).rvalue(4), -1.164223359672e+00, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(0, 3).rvalue(996), -3.183325576529e-04, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(0, 3).rvalue(1000), 0, tol); - - EXPECT_NEAR(ptr_Ti_polyclone->chi(3, 0).rvalue(0), 0, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(3, 0).rvalue(4), 3.744878535962e-05, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(3, 0).rvalue(996), 7.495357740660e-05, tol); - EXPECT_NEAR(ptr_Ti_polyclone->chi(3, 0).rvalue(1000), 0, tol); + RadialSet* ptr_Ti_polyclone = Ti_radials.clone (); + + EXPECT_EQ (ptr_Ti_polyclone->itype (), itype); + EXPECT_EQ (ptr_Ti_polyclone->lmax (), 3); + EXPECT_EQ (ptr_Ti_polyclone->nzeta (0), 4); + EXPECT_EQ (ptr_Ti_polyclone->nzeta (1), 2); + EXPECT_EQ (ptr_Ti_polyclone->nzeta (2), 2); + EXPECT_EQ (ptr_Ti_polyclone->nzeta (3), 1); + EXPECT_EQ (ptr_Ti_polyclone->nzeta_max (), 4); + EXPECT_EQ (ptr_Ti_polyclone->nchi (), 9); + EXPECT_DOUBLE_EQ (ptr_Ti_polyclone->rcut_max (), 10.0); + + EXPECT_NEAR (ptr_Ti_polyclone->chi (0, 0).rvalue (0), -1.581711853170e-01, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (0, 0).rvalue (4), -1.583907030513e-01, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (0, 0).rvalue (996), -4.183526380009e-05, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (0, 0).rvalue (1000), 0, tol); + + EXPECT_NEAR (ptr_Ti_polyclone->chi (0, 3).rvalue (0), -1.166292682541e+00, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (0, 3).rvalue (4), -1.164223359672e+00, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (0, 3).rvalue (996), -3.183325576529e-04, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (0, 3).rvalue (1000), 0, tol); + + EXPECT_NEAR (ptr_Ti_polyclone->chi (3, 0).rvalue (0), 0, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (3, 0).rvalue (4), 3.744878535962e-05, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (3, 0).rvalue (996), 7.495357740660e-05, tol); + EXPECT_NEAR (ptr_Ti_polyclone->chi (3, 0).rvalue (1000), 0, tol); delete ptr_Ti_polyclone; // normal clone - AtomicRadials* ptr_Ti_clone = Ti_radials.clone(); - EXPECT_DOUBLE_EQ(ptr_Ti_clone->orb_ecut(), 100.0); + AtomicRadials* ptr_Ti_clone = Ti_radials.clone (); + EXPECT_DOUBLE_EQ (ptr_Ti_clone->orb_ecut (), 100.0); delete ptr_Ti_clone; } -TEST_F(AtomicRadialsTest, BeginAndEnd) +TEST_F (AtomicRadialsTest, BeginAndEnd) { int itype = 5; - Ti_radials.build(file, itype, nullptr, GlobalV::MY_RANK); + Ti_radials.build (file, itype, nullptr, GlobalV::MY_RANK); - EXPECT_EQ(Ti_radials.cbegin(), &Ti_radials.chi(0, 0)); - EXPECT_EQ(Ti_radials.cend() - 1, &Ti_radials.chi(3, 0)); + EXPECT_EQ (Ti_radials.cbegin (), &Ti_radials.chi (0, 0)); + EXPECT_EQ (Ti_radials.cend () - 1, &Ti_radials.chi (3, 0)); } -TEST_F(AtomicRadialsTest, ToNumericalOrbital) +TEST_F (AtomicRadialsTest, ToNumericalOrbital) { int itype = 5; - Ti_radials.build(file, itype, nullptr, GlobalV::MY_RANK); + Ti_radials.build (file, itype, nullptr, GlobalV::MY_RANK); ModuleBase::SphericalBesselTransformer sbt; - Ti_radials.set_transformer(sbt); - Ti_radials.set_uniform_grid(false, 1001, 30.0, 't'); + Ti_radials.set_transformer (sbt); + Ti_radials.set_uniform_grid (false, 1001, 30.0, 't'); Numerical_Orbital no; - Ti_radials.to_numerical_orbital(no); - - EXPECT_EQ(Ti_radials.lmax(), no.getLmax()); - EXPECT_EQ(Ti_radials.rcut_max(), no.getRcut()); - EXPECT_EQ(Ti_radials.itype(), no.getType()); - EXPECT_EQ(Ti_radials.nchi(), no.getTotal_nchi()); - EXPECT_EQ(Ti_radials.symbol(), no.getLabel()); - - for (int l = 0; l <= Ti_radials.lmax(); l++) - { - EXPECT_EQ(Ti_radials.nzeta(l), no.getNchi(l)); - } + Ti_radials.to_numerical_orbital (no); + + EXPECT_EQ (Ti_radials.lmax (), no.getLmax ()); + EXPECT_EQ (Ti_radials.rcut_max (), no.getRcut ()); + EXPECT_EQ (Ti_radials.itype (), no.getType ()); + EXPECT_EQ (Ti_radials.nchi (), no.getTotal_nchi ()); + EXPECT_EQ (Ti_radials.symbol (), no.getLabel ()); + + for (int l = 0; l <= Ti_radials.lmax (); l++) + { + EXPECT_EQ (Ti_radials.nzeta (l), no.getNchi (l)); + } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_basis/module_nao/test/beta_radials_test.cpp b/source/source_basis/module_nao/test/beta_radials_test.cpp index 16039641986..53197e261a8 100644 --- a/source/source_basis/module_nao/test/beta_radials_test.cpp +++ b/source/source_basis/module_nao/test/beta_radials_test.cpp @@ -33,300 +33,306 @@ class BetaRadialsTest : public ::testing::Test { protected: - void SetUp(); - void TearDown() {} + void SetUp (); + void + TearDown () + { + } BetaRadials beta; //!< object under test - std::string dir = "../../../../../tests/PP_ORB/"; //!< directory with test files + std::string dir = "../../../../../tests/PP_ORB/"; //!< directory with test files std::string log_file = "./test_files/beta_radials.log"; //!< file for logging }; -void BetaRadialsTest::SetUp() { +void + BetaRadialsTest::SetUp () +{ #ifdef __MPI - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif } -//TEST_F(BetaRadialsTest, ReadAndGet100) +// TEST_F(BetaRadialsTest, ReadAndGet100) //{ -// /* -// * Read beta projectors from a UPF file of old format -// * */ -// std::string file100 = "Zn.LDA.UPF"; //!< a UPF 1.0.0 file to read from -// beta.build(dir+file100, 314, nullptr, GlobalV::MY_RANK); -// -// EXPECT_EQ(beta.itype(), 314); -// EXPECT_EQ(beta.symbol(), "Zn"); -// EXPECT_EQ(beta.lmax(), 2); -// EXPECT_EQ(beta.nzeta(0), 0); -// EXPECT_EQ(beta.nzeta(1), 1); -// EXPECT_EQ(beta.nzeta(2), 1); -// EXPECT_EQ(beta.nzeta_max(), 1); -// EXPECT_EQ(beta.nchi(), 2); -// -// EXPECT_DOUBLE_EQ(beta.rcut_max(), 3.36331676102); -// -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rgrid()[0] , 3.21829939971e-05); -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rgrid()[4] , 3.39007851980e-05); -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rgrid()[888], 3.31987661585e+00); -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rgrid()[889], 3.36331676102e+00); -// -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rgrid()[0] , 3.21829939971e-05); -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rgrid()[4] , 3.39007851980e-05); -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rgrid()[887], 3.27699753773e+00); -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rgrid()[888], 3.31987661585e+00); -// -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[0] , -9.73759791529e-09); -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[4] , -1.08048430719e-08); -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[600], -5.66090228695e-02); -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[888], -1.89079314582e-11); -// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[889], 1.99483148995e-12); -// -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[0] , -2.84316719619e-11); -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[4] , -3.32316831459e-11); -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[600], -3.87224488590e-01); -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[887], 3.31894955664e-11); -// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[888], -3.22751710952e-12); -//} -// -//TEST_F(BetaRadialsTest, ReadAndGetVoid) { -// /* -// * This test reads a UPF file with no beta projectors. -// * The test is to check that the code does not crash. -// * */ -// std::string file0 = "H.pz-vbc.UPF"; //!< a UPF file with no beta projectors -// beta.build(dir+file0, 5, nullptr, GlobalV::MY_RANK); -// -// EXPECT_EQ(beta.nchi(), 0); -// EXPECT_EQ(beta.itype(), 5); -// EXPECT_EQ(beta.lmax(), -1); -// EXPECT_EQ(beta.symbol(), "H"); -//} -// -//TEST_F(BetaRadialsTest, ReadAndGet201) +// /* +// * Read beta projectors from a UPF file of old format +// * */ +// std::string file100 = "Zn.LDA.UPF"; //!< a UPF 1.0.0 file to read from +// beta.build(dir+file100, 314, nullptr, GlobalV::MY_RANK); +// +// EXPECT_EQ(beta.itype(), 314); +// EXPECT_EQ(beta.symbol(), "Zn"); +// EXPECT_EQ(beta.lmax(), 2); +// EXPECT_EQ(beta.nzeta(0), 0); +// EXPECT_EQ(beta.nzeta(1), 1); +// EXPECT_EQ(beta.nzeta(2), 1); +// EXPECT_EQ(beta.nzeta_max(), 1); +// EXPECT_EQ(beta.nchi(), 2); +// +// EXPECT_DOUBLE_EQ(beta.rcut_max(), 3.36331676102); +// +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rgrid()[0] , 3.21829939971e-05); +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rgrid()[4] , 3.39007851980e-05); +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rgrid()[888], 3.31987661585e+00); +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rgrid()[889], 3.36331676102e+00); +// +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rgrid()[0] , 3.21829939971e-05); +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rgrid()[4] , 3.39007851980e-05); +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rgrid()[887], 3.27699753773e+00); +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rgrid()[888], 3.31987661585e+00); +// +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[0] , -9.73759791529e-09); +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[4] , -1.08048430719e-08); +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[600], -5.66090228695e-02); +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[888], -1.89079314582e-11); +// EXPECT_DOUBLE_EQ(beta.chi(1, 0).ptr_rvalue()[889], 1.99483148995e-12); +// +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[0] , -2.84316719619e-11); +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[4] , -3.32316831459e-11); +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[600], -3.87224488590e-01); +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[887], 3.31894955664e-11); +// EXPECT_DOUBLE_EQ(beta.chi(2, 0).ptr_rvalue()[888], -3.22751710952e-12); +// } +// +// TEST_F(BetaRadialsTest, ReadAndGetVoid) { +// /* +// * This test reads a UPF file with no beta projectors. +// * The test is to check that the code does not crash. +// * */ +// std::string file0 = "H.pz-vbc.UPF"; //!< a UPF file with no beta projectors +// beta.build(dir+file0, 5, nullptr, GlobalV::MY_RANK); +// +// EXPECT_EQ(beta.nchi(), 0); +// EXPECT_EQ(beta.itype(), 5); +// EXPECT_EQ(beta.lmax(), -1); +// EXPECT_EQ(beta.symbol(), "H"); +// } +// +// TEST_F(BetaRadialsTest, ReadAndGet201) //{ -// /* -// * This test read beta projectors from a UPF file of 2.0.1 format -// * */ -// std::string file201 = "Pb_ONCV_PBE-1.0.upf"; //!< a UPF 2.0.1 file to read from -// beta.build(dir+file201, 999, nullptr, GlobalV::MY_RANK); -// -// EXPECT_EQ(beta.itype(), 999); -// EXPECT_EQ(beta.symbol(), "Pb"); -// EXPECT_EQ(beta.lmax(), 3); -// EXPECT_EQ(beta.nzeta(0), 2); -// EXPECT_EQ(beta.nzeta(1), 2); -// EXPECT_EQ(beta.nzeta(2), 2); -// EXPECT_EQ(beta.nzeta(3), 2); -// EXPECT_EQ(beta.nzeta_max(), 2); -// EXPECT_EQ(beta.nchi(), 8); -// -// // NOTE: neither "cutoff_radius_index" nor "cutoff_radius" is reliable! -// // the code reads all the values first and then reverse scan to determine the grid size -// EXPECT_DOUBLE_EQ(beta.rcut_max(), 3.68); -// -// EXPECT_EQ(beta.chi(0,0).rcut(), 3.64); -// EXPECT_EQ(beta.chi(0,0).nr(), 365); -// EXPECT_EQ(beta.chi(0,0).izeta(), 0); -// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rgrid()[0] , 0.0000); -// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rgrid()[8] , 0.0800); -// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rgrid()[364], 3.6400); -// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rvalue()[0] , 0.0000000000e+00); -// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rvalue()[4] , 5.9689893417e-02); -// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rvalue()[364], -4.5888625103e-07); -// -// EXPECT_EQ(beta.chi(3,1).rcut(), 3.68); -// EXPECT_EQ(beta.chi(3,1).nr(), 369); -// EXPECT_EQ(beta.chi(3,1).izeta(), 1); -// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rgrid()[0] , 0.0000); -// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rgrid()[8] , 0.0800); -// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rgrid()[368], 3.6800); -// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rvalue()[0] , 0.0000000000e+00); -// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rvalue()[4] , 1.7908487484e-06); -// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rvalue()[368], -7.0309158570e-06); -//} -// -//TEST_F(BetaRadialsTest, BatchSet) +// /* +// * This test read beta projectors from a UPF file of 2.0.1 format +// * */ +// std::string file201 = "Pb_ONCV_PBE-1.0.upf"; //!< a UPF 2.0.1 file to read from +// beta.build(dir+file201, 999, nullptr, GlobalV::MY_RANK); +// +// EXPECT_EQ(beta.itype(), 999); +// EXPECT_EQ(beta.symbol(), "Pb"); +// EXPECT_EQ(beta.lmax(), 3); +// EXPECT_EQ(beta.nzeta(0), 2); +// EXPECT_EQ(beta.nzeta(1), 2); +// EXPECT_EQ(beta.nzeta(2), 2); +// EXPECT_EQ(beta.nzeta(3), 2); +// EXPECT_EQ(beta.nzeta_max(), 2); +// EXPECT_EQ(beta.nchi(), 8); +// +// // NOTE: neither "cutoff_radius_index" nor "cutoff_radius" is reliable! +// // the code reads all the values first and then reverse scan to determine the grid size +// EXPECT_DOUBLE_EQ(beta.rcut_max(), 3.68); +// +// EXPECT_EQ(beta.chi(0,0).rcut(), 3.64); +// EXPECT_EQ(beta.chi(0,0).nr(), 365); +// EXPECT_EQ(beta.chi(0,0).izeta(), 0); +// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rgrid()[0] , 0.0000); +// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rgrid()[8] , 0.0800); +// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rgrid()[364], 3.6400); +// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rvalue()[0] , 0.0000000000e+00); +// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rvalue()[4] , 5.9689893417e-02); +// EXPECT_DOUBLE_EQ(beta.chi(0, 0).ptr_rvalue()[364], -4.5888625103e-07); +// +// EXPECT_EQ(beta.chi(3,1).rcut(), 3.68); +// EXPECT_EQ(beta.chi(3,1).nr(), 369); +// EXPECT_EQ(beta.chi(3,1).izeta(), 1); +// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rgrid()[0] , 0.0000); +// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rgrid()[8] , 0.0800); +// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rgrid()[368], 3.6800); +// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rvalue()[0] , 0.0000000000e+00); +// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rvalue()[4] , 1.7908487484e-06); +// EXPECT_DOUBLE_EQ(beta.chi(3, 1).ptr_rvalue()[368], -7.0309158570e-06); +// } +// +// TEST_F(BetaRadialsTest, BatchSet) //{ -// std::string file201 = "Pb_ONCV_PBE-1.0.upf"; //!< a UPF 2.0.1 file to read from -// beta.build(dir+file201, 999, nullptr, GlobalV::MY_RANK); -// -// ModuleBase::SphericalBesselTransformer sbt; -// beta.set_transformer(&sbt); -// for (int l = 0; l != beta.lmax(); ++l) { -// for (int izeta = 0; izeta != beta.nzeta(l); ++izeta) { -// EXPECT_EQ(beta.chi(l, izeta).ptr_sbt(), &sbt); -// } -// } -// -// beta.set_uniform_grid(true, 2001, 20.0); -// for (int l = 0; l != beta.lmax(); ++l) { -// for (int izeta = 0; izeta != beta.nzeta(l); ++izeta) { -// EXPECT_EQ(beta.chi(l, izeta).nr(), 2001); -// EXPECT_EQ(beta.chi(l, izeta).rcut(), 20.0); -// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[1500], 15.0); -// EXPECT_EQ(beta.chi(l, izeta).ptr_rvalue()[1500], 0.0); -// } -// } -// -// double grid[5] = {0.0, 1.1, 2.2, 3.3, 4.4}; -// beta.set_grid(true, 5, grid); -// for (int l = 0; l != beta.lmax(); ++l) { -// for (int izeta = 0; izeta != beta.nzeta(l); ++izeta) { -// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[0], 0.0); -// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[1], 1.1); -// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[2], 2.2); -// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[3], 3.3); -// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[4], 4.4); -// } -// } -//} -// -//TEST_F(BetaRadialsTest, Copy) +// std::string file201 = "Pb_ONCV_PBE-1.0.upf"; //!< a UPF 2.0.1 file to read from +// beta.build(dir+file201, 999, nullptr, GlobalV::MY_RANK); +// +// ModuleBase::SphericalBesselTransformer sbt; +// beta.set_transformer(&sbt); +// for (int l = 0; l != beta.lmax(); ++l) { +// for (int izeta = 0; izeta != beta.nzeta(l); ++izeta) { +// EXPECT_EQ(beta.chi(l, izeta).ptr_sbt(), &sbt); +// } +// } +// +// beta.set_uniform_grid(true, 2001, 20.0); +// for (int l = 0; l != beta.lmax(); ++l) { +// for (int izeta = 0; izeta != beta.nzeta(l); ++izeta) { +// EXPECT_EQ(beta.chi(l, izeta).nr(), 2001); +// EXPECT_EQ(beta.chi(l, izeta).rcut(), 20.0); +// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[1500], 15.0); +// EXPECT_EQ(beta.chi(l, izeta).ptr_rvalue()[1500], 0.0); +// } +// } +// +// double grid[5] = {0.0, 1.1, 2.2, 3.3, 4.4}; +// beta.set_grid(true, 5, grid); +// for (int l = 0; l != beta.lmax(); ++l) { +// for (int izeta = 0; izeta != beta.nzeta(l); ++izeta) { +// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[0], 0.0); +// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[1], 1.1); +// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[2], 2.2); +// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[3], 3.3); +// EXPECT_EQ(beta.chi(l, izeta).ptr_rgrid()[4], 4.4); +// } +// } +// } +// +// TEST_F(BetaRadialsTest, Copy) //{ -// /* -// * This test checks whether -// * -// * 1. copy constructor -// * 2. assignment operator -// * 3. polymorphic clone -// * -// * work as expected. -// * */ -// std::string file201 = "Pb_ONCV_PBE-1.0.upf"; //!< a UPF 2.0.1 file to read from -// beta.build(dir + file201, 999, nullptr, GlobalV::MY_RANK); -// -// // copy constructor -// BetaRadials Pb_copy(beta); -// -// EXPECT_EQ(Pb_copy.itype(), 999); -// EXPECT_EQ(Pb_copy.symbol(), "Pb"); -// EXPECT_EQ(Pb_copy.lmax(), 3); -// EXPECT_EQ(Pb_copy.nzeta(0), 2); -// EXPECT_EQ(Pb_copy.nzeta(1), 2); -// EXPECT_EQ(Pb_copy.nzeta(2), 2); -// EXPECT_EQ(Pb_copy.nzeta(3), 2); -// EXPECT_EQ(Pb_copy.nzeta_max(), 2); -// EXPECT_EQ(Pb_copy.nchi(), 8); -// -// EXPECT_DOUBLE_EQ(Pb_copy.rcut_max(), 3.68); -// -// EXPECT_EQ(Pb_copy.chi(0, 0).rcut(), 3.64); -// EXPECT_EQ(Pb_copy.chi(0, 0).nr(), 365); -// EXPECT_EQ(Pb_copy.chi(0, 0).izeta(), 0); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rgrid()[0], 0.0000); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rgrid()[8], 0.0800); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rgrid()[364], 3.6400); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rvalue()[0], 0.0000000000e+00); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rvalue()[4], 5.9689893417e-02); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rvalue()[364], -4.5888625103e-07); -// -// EXPECT_EQ(Pb_copy.chi(3, 1).rcut(), 3.68); -// EXPECT_EQ(Pb_copy.chi(3, 1).nr(), 369); -// EXPECT_EQ(Pb_copy.chi(3, 1).izeta(), 1); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rgrid()[0], 0.0000); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rgrid()[8], 0.0800); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rgrid()[368], 3.6800); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rvalue()[0], 0.0000000000e+00); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rvalue()[4], 1.7908487484e-06); -// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rvalue()[368], -7.0309158570e-06); -// -// // assignment operator -// BetaRadials Pb_assign; -// Pb_assign = beta; -// -// EXPECT_EQ(Pb_assign.itype(), 999); -// EXPECT_EQ(Pb_assign.symbol(), "Pb"); -// EXPECT_EQ(Pb_assign.lmax(), 3); -// EXPECT_EQ(Pb_assign.nzeta(0), 2); -// EXPECT_EQ(Pb_assign.nzeta(1), 2); -// EXPECT_EQ(Pb_assign.nzeta(2), 2); -// EXPECT_EQ(Pb_assign.nzeta(3), 2); -// EXPECT_EQ(Pb_assign.nzeta_max(), 2); -// EXPECT_EQ(Pb_assign.nchi(), 8); -// -// EXPECT_DOUBLE_EQ(Pb_assign.rcut_max(), 3.68); -// -// EXPECT_EQ(Pb_assign.chi(0, 0).rcut(), 3.64); -// EXPECT_EQ(Pb_assign.chi(0, 0).nr(), 365); -// EXPECT_EQ(Pb_assign.chi(0, 0).izeta(), 0); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rgrid()[0], 0.0000); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rgrid()[8], 0.0800); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rgrid()[364], 3.6400); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rvalue()[0], 0.0000000000e+00); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rvalue()[4], 5.9689893417e-02); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rvalue()[364], -4.5888625103e-07); -// -// EXPECT_EQ(Pb_assign.chi(3, 1).rcut(), 3.68); -// EXPECT_EQ(Pb_assign.chi(3, 1).nr(), 369); -// EXPECT_EQ(Pb_assign.chi(3, 1).izeta(), 1); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rgrid()[0], 0.0000); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rgrid()[8], 0.0800); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rgrid()[368], 3.6800); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rvalue()[0], 0.0000000000e+00); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rvalue()[4], 1.7908487484e-06); -// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rvalue()[368], -7.0309158570e-06); -// -// // polymorphic clone -// RadialSet* Pb_clone = beta.clone(); -// -// EXPECT_EQ(Pb_clone->itype(), 999); -// EXPECT_EQ(Pb_clone->symbol(), "Pb"); -// EXPECT_EQ(Pb_clone->lmax(), 3); -// EXPECT_EQ(Pb_clone->nzeta(0), 2); -// EXPECT_EQ(Pb_clone->nzeta(1), 2); -// EXPECT_EQ(Pb_clone->nzeta(2), 2); -// EXPECT_EQ(Pb_clone->nzeta(3), 2); -// EXPECT_EQ(Pb_clone->nzeta_max(), 2); -// EXPECT_EQ(Pb_clone->nchi(), 8); -// -// EXPECT_DOUBLE_EQ(Pb_clone->rcut_max(), 3.68); -// -// EXPECT_EQ(Pb_clone->chi(0, 0).rcut(), 3.64); -// EXPECT_EQ(Pb_clone->chi(0, 0).nr(), 365); -// EXPECT_EQ(Pb_clone->chi(0, 0).izeta(), 0); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rgrid()[0], 0.0000); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rgrid()[8], 0.0800); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rgrid()[364], 3.6400); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rvalue()[0], 0.0000000000e+00); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rvalue()[4], 5.9689893417e-02); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rvalue()[364], -4.5888625103e-07); -// -// EXPECT_EQ(Pb_clone->chi(3, 1).rcut(), 3.68); -// EXPECT_EQ(Pb_clone->chi(3, 1).nr(), 369); -// EXPECT_EQ(Pb_clone->chi(3, 1).izeta(), 1); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rgrid()[0], 0.0000); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rgrid()[8], 0.0800); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rgrid()[368], 3.6800); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rvalue()[0], 0.0000000000e+00); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rvalue()[4], 1.7908487484e-06); -// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rvalue()[368], -7.0309158570e-06); -// -// delete Pb_clone; -//} -// -//TEST_F(BetaRadialsTest, BeginAndEnd) +// /* +// * This test checks whether +// * +// * 1. copy constructor +// * 2. assignment operator +// * 3. polymorphic clone +// * +// * work as expected. +// * */ +// std::string file201 = "Pb_ONCV_PBE-1.0.upf"; //!< a UPF 2.0.1 file to read from +// beta.build(dir + file201, 999, nullptr, GlobalV::MY_RANK); +// +// // copy constructor +// BetaRadials Pb_copy(beta); +// +// EXPECT_EQ(Pb_copy.itype(), 999); +// EXPECT_EQ(Pb_copy.symbol(), "Pb"); +// EXPECT_EQ(Pb_copy.lmax(), 3); +// EXPECT_EQ(Pb_copy.nzeta(0), 2); +// EXPECT_EQ(Pb_copy.nzeta(1), 2); +// EXPECT_EQ(Pb_copy.nzeta(2), 2); +// EXPECT_EQ(Pb_copy.nzeta(3), 2); +// EXPECT_EQ(Pb_copy.nzeta_max(), 2); +// EXPECT_EQ(Pb_copy.nchi(), 8); +// +// EXPECT_DOUBLE_EQ(Pb_copy.rcut_max(), 3.68); +// +// EXPECT_EQ(Pb_copy.chi(0, 0).rcut(), 3.64); +// EXPECT_EQ(Pb_copy.chi(0, 0).nr(), 365); +// EXPECT_EQ(Pb_copy.chi(0, 0).izeta(), 0); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rgrid()[0], 0.0000); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rgrid()[8], 0.0800); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rgrid()[364], 3.6400); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rvalue()[0], 0.0000000000e+00); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rvalue()[4], 5.9689893417e-02); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(0, 0).ptr_rvalue()[364], -4.5888625103e-07); +// +// EXPECT_EQ(Pb_copy.chi(3, 1).rcut(), 3.68); +// EXPECT_EQ(Pb_copy.chi(3, 1).nr(), 369); +// EXPECT_EQ(Pb_copy.chi(3, 1).izeta(), 1); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rgrid()[0], 0.0000); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rgrid()[8], 0.0800); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rgrid()[368], 3.6800); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rvalue()[0], 0.0000000000e+00); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rvalue()[4], 1.7908487484e-06); +// EXPECT_DOUBLE_EQ(Pb_copy.chi(3, 1).ptr_rvalue()[368], -7.0309158570e-06); +// +// // assignment operator +// BetaRadials Pb_assign; +// Pb_assign = beta; +// +// EXPECT_EQ(Pb_assign.itype(), 999); +// EXPECT_EQ(Pb_assign.symbol(), "Pb"); +// EXPECT_EQ(Pb_assign.lmax(), 3); +// EXPECT_EQ(Pb_assign.nzeta(0), 2); +// EXPECT_EQ(Pb_assign.nzeta(1), 2); +// EXPECT_EQ(Pb_assign.nzeta(2), 2); +// EXPECT_EQ(Pb_assign.nzeta(3), 2); +// EXPECT_EQ(Pb_assign.nzeta_max(), 2); +// EXPECT_EQ(Pb_assign.nchi(), 8); +// +// EXPECT_DOUBLE_EQ(Pb_assign.rcut_max(), 3.68); +// +// EXPECT_EQ(Pb_assign.chi(0, 0).rcut(), 3.64); +// EXPECT_EQ(Pb_assign.chi(0, 0).nr(), 365); +// EXPECT_EQ(Pb_assign.chi(0, 0).izeta(), 0); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rgrid()[0], 0.0000); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rgrid()[8], 0.0800); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rgrid()[364], 3.6400); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rvalue()[0], 0.0000000000e+00); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rvalue()[4], 5.9689893417e-02); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(0, 0).ptr_rvalue()[364], -4.5888625103e-07); +// +// EXPECT_EQ(Pb_assign.chi(3, 1).rcut(), 3.68); +// EXPECT_EQ(Pb_assign.chi(3, 1).nr(), 369); +// EXPECT_EQ(Pb_assign.chi(3, 1).izeta(), 1); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rgrid()[0], 0.0000); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rgrid()[8], 0.0800); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rgrid()[368], 3.6800); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rvalue()[0], 0.0000000000e+00); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rvalue()[4], 1.7908487484e-06); +// EXPECT_DOUBLE_EQ(Pb_assign.chi(3, 1).ptr_rvalue()[368], -7.0309158570e-06); +// +// // polymorphic clone +// RadialSet* Pb_clone = beta.clone(); +// +// EXPECT_EQ(Pb_clone->itype(), 999); +// EXPECT_EQ(Pb_clone->symbol(), "Pb"); +// EXPECT_EQ(Pb_clone->lmax(), 3); +// EXPECT_EQ(Pb_clone->nzeta(0), 2); +// EXPECT_EQ(Pb_clone->nzeta(1), 2); +// EXPECT_EQ(Pb_clone->nzeta(2), 2); +// EXPECT_EQ(Pb_clone->nzeta(3), 2); +// EXPECT_EQ(Pb_clone->nzeta_max(), 2); +// EXPECT_EQ(Pb_clone->nchi(), 8); +// +// EXPECT_DOUBLE_EQ(Pb_clone->rcut_max(), 3.68); +// +// EXPECT_EQ(Pb_clone->chi(0, 0).rcut(), 3.64); +// EXPECT_EQ(Pb_clone->chi(0, 0).nr(), 365); +// EXPECT_EQ(Pb_clone->chi(0, 0).izeta(), 0); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rgrid()[0], 0.0000); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rgrid()[8], 0.0800); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rgrid()[364], 3.6400); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rvalue()[0], 0.0000000000e+00); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rvalue()[4], 5.9689893417e-02); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(0, 0).ptr_rvalue()[364], -4.5888625103e-07); +// +// EXPECT_EQ(Pb_clone->chi(3, 1).rcut(), 3.68); +// EXPECT_EQ(Pb_clone->chi(3, 1).nr(), 369); +// EXPECT_EQ(Pb_clone->chi(3, 1).izeta(), 1); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rgrid()[0], 0.0000); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rgrid()[8], 0.0800); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rgrid()[368], 3.6800); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rvalue()[0], 0.0000000000e+00); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rvalue()[4], 1.7908487484e-06); +// EXPECT_DOUBLE_EQ(Pb_clone->chi(3, 1).ptr_rvalue()[368], -7.0309158570e-06); +// +// delete Pb_clone; +// } +// +// TEST_F(BetaRadialsTest, BeginAndEnd) //{ -// std::string file201 = "Pb_ONCV_PBE-1.0.upf"; //!< a UPF 2.0.1 file to read from -// beta.build(dir + file201, 999, nullptr, GlobalV::MY_RANK); +// std::string file201 = "Pb_ONCV_PBE-1.0.upf"; //!< a UPF 2.0.1 file to read from +// beta.build(dir + file201, 999, nullptr, GlobalV::MY_RANK); // -// EXPECT_EQ(beta.cbegin(), &beta.chi(0, 0)); -// EXPECT_EQ(beta.cend() - 1, &beta.chi(3, 1)); -//} +// EXPECT_EQ(beta.cbegin(), &beta.chi(0, 0)); +// EXPECT_EQ(beta.cend() - 1, &beta.chi(3, 1)); +// } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_basis/module_nao/test/hydrogen_radials_test.cpp b/source/source_basis/module_nao/test/hydrogen_radials_test.cpp index cff318f97ed..6ddd5c24142 100644 --- a/source/source_basis/module_nao/test/hydrogen_radials_test.cpp +++ b/source/source_basis/module_nao/test/hydrogen_radials_test.cpp @@ -9,136 +9,140 @@ class HydrogenRadialsTest : public ::testing::Test { - protected: - virtual void SetUp() - { - // set up the test case - itype_ = 1; - charge_ = 1.0; - nmax_ = 3; - rcut_ = 20.0; - dr_ = 0.01; - rank_ = 0; - ptr_log_ = NULL; - } + protected: + virtual void + SetUp () + { + // set up the test case + itype_ = 1; + charge_ = 1.0; + nmax_ = 3; + rcut_ = 20.0; + dr_ = 0.01; + rank_ = 0; + ptr_log_ = nullptr; + } - virtual void TearDown() - { - // tear down the test case - } + virtual void + TearDown () + { + // tear down the test case + } - int itype_; - double charge_; - int nmax_; - double rcut_; - double dr_; - int rank_; - std::ofstream* ptr_log_; + int itype_; + double charge_; + int nmax_; + double rcut_; + double dr_; + int rank_; + std::ofstream* ptr_log_; }; -TEST_F(HydrogenRadialsTest, UnzipStrategy) +TEST_F (HydrogenRadialsTest, UnzipStrategy) { HydrogenRadials hr; std::vector> nl_pairs; // minimal, 1s - nl_pairs = hr.unzip_strategy(1, "minimal-nodeless"); - EXPECT_EQ(nl_pairs.size(), 1); - EXPECT_EQ(nl_pairs[0].first, 1); - EXPECT_EQ(nl_pairs[0].second, 0); + nl_pairs = hr.unzip_strategy (1, "minimal-nodeless"); + EXPECT_EQ (nl_pairs.size (), 1); + EXPECT_EQ (nl_pairs[0].first, 1); + EXPECT_EQ (nl_pairs[0].second, 0); // minimal, 1s, 2p, 3d, 4f - nl_pairs = hr.unzip_strategy(4, "minimal-nodeless"); - EXPECT_EQ(nl_pairs.size(), 4); - EXPECT_EQ(nl_pairs[0].first, 1); - EXPECT_EQ(nl_pairs[0].second, 0); - EXPECT_EQ(nl_pairs[1].first, 2); - EXPECT_EQ(nl_pairs[1].second, 1); - EXPECT_EQ(nl_pairs[2].first, 3); - EXPECT_EQ(nl_pairs[2].second, 2); - EXPECT_EQ(nl_pairs[3].first, 4); - EXPECT_EQ(nl_pairs[3].second, 3); - nl_pairs = hr.unzip_strategy(4, "minimal-valence"); - EXPECT_EQ(nl_pairs.size(), 4); - EXPECT_EQ(nl_pairs[0].first, 4); - EXPECT_EQ(nl_pairs[0].second, 0); - EXPECT_EQ(nl_pairs[1].first, 4); - EXPECT_EQ(nl_pairs[1].second, 1); - EXPECT_EQ(nl_pairs[2].first, 4); - EXPECT_EQ(nl_pairs[2].second, 2); - EXPECT_EQ(nl_pairs[3].first, 4); - EXPECT_EQ(nl_pairs[3].second, 3); - nl_pairs = hr.unzip_strategy(1, "energy-full"); // H, 1s1 -> 1s -> [1s] - EXPECT_EQ(nl_pairs.size(), 1); - EXPECT_EQ(nl_pairs[0].first, 1); - EXPECT_EQ(nl_pairs[0].second, 0); - nl_pairs = hr.unzip_strategy(6, "energy-full"); // C, 1s1 2s2 2p2 -> 1s2s2p -> [1s][2s][2p] - EXPECT_EQ(nl_pairs.size(), 3); - EXPECT_EQ(nl_pairs[0].first, 1); // 1s - EXPECT_EQ(nl_pairs[0].second, 0); - EXPECT_EQ(nl_pairs[1].first, 2); // 2s - EXPECT_EQ(nl_pairs[1].second, 0); - EXPECT_EQ(nl_pairs[2].first, 2); // 2p - EXPECT_EQ(nl_pairs[2].second, 1); - nl_pairs = hr.unzip_strategy(29, "energy-full"); // Cu, 1s1 2s2 2p6 3s2 3p6 3d10 4s1 -> 1s2s2p3s3p4s3d -> [1s][2s][2p3s][3p4s][3d] - EXPECT_EQ(nl_pairs.size(), 7); - EXPECT_EQ(nl_pairs[0].first, 1); // 1s - EXPECT_EQ(nl_pairs[0].second, 0); - EXPECT_EQ(nl_pairs[1].first, 2); // 2s - EXPECT_EQ(nl_pairs[1].second, 0); - EXPECT_EQ(nl_pairs[2].first, 2); // 2p - EXPECT_EQ(nl_pairs[2].second, 1); - EXPECT_EQ(nl_pairs[3].first, 3); // 3s - EXPECT_EQ(nl_pairs[3].second, 0); - EXPECT_EQ(nl_pairs[4].first, 3); // 3p - EXPECT_EQ(nl_pairs[4].second, 1); - EXPECT_EQ(nl_pairs[5].first, 4); // 4s - EXPECT_EQ(nl_pairs[5].second, 0); - EXPECT_EQ(nl_pairs[6].first, 3); // 3d - EXPECT_EQ(nl_pairs[6].second, 2); - nl_pairs = hr.unzip_strategy(29, "energy-valence"); // Cu, 1s1 2s2 2p6 3s2 3p6 3d10 4s1 -> 3p4s3d - EXPECT_EQ(nl_pairs.size(), 3); - EXPECT_EQ(nl_pairs[0].first, 3); // 3p - EXPECT_EQ(nl_pairs[0].second, 1); - EXPECT_EQ(nl_pairs[1].first, 4); // 4s - EXPECT_EQ(nl_pairs[1].second, 0); - EXPECT_EQ(nl_pairs[2].first, 3); // 3d - EXPECT_EQ(nl_pairs[2].second, 2); - nl_pairs = hr.unzip_strategy(14, "energy-valence"); // Si, 1s1 2s2 2p6 3s2 3p2 -> 3s3p - EXPECT_EQ(nl_pairs.size(), 2); - EXPECT_EQ(nl_pairs[0].first, 3); // 3s - EXPECT_EQ(nl_pairs[0].second, 0); - EXPECT_EQ(nl_pairs[1].first, 3); // 3p - EXPECT_EQ(nl_pairs[1].second, 1); + nl_pairs = hr.unzip_strategy (4, "minimal-nodeless"); + EXPECT_EQ (nl_pairs.size (), 4); + EXPECT_EQ (nl_pairs[0].first, 1); + EXPECT_EQ (nl_pairs[0].second, 0); + EXPECT_EQ (nl_pairs[1].first, 2); + EXPECT_EQ (nl_pairs[1].second, 1); + EXPECT_EQ (nl_pairs[2].first, 3); + EXPECT_EQ (nl_pairs[2].second, 2); + EXPECT_EQ (nl_pairs[3].first, 4); + EXPECT_EQ (nl_pairs[3].second, 3); + nl_pairs = hr.unzip_strategy (4, "minimal-valence"); + EXPECT_EQ (nl_pairs.size (), 4); + EXPECT_EQ (nl_pairs[0].first, 4); + EXPECT_EQ (nl_pairs[0].second, 0); + EXPECT_EQ (nl_pairs[1].first, 4); + EXPECT_EQ (nl_pairs[1].second, 1); + EXPECT_EQ (nl_pairs[2].first, 4); + EXPECT_EQ (nl_pairs[2].second, 2); + EXPECT_EQ (nl_pairs[3].first, 4); + EXPECT_EQ (nl_pairs[3].second, 3); + nl_pairs = hr.unzip_strategy (1, "energy-full"); // H, 1s1 -> 1s -> [1s] + EXPECT_EQ (nl_pairs.size (), 1); + EXPECT_EQ (nl_pairs[0].first, 1); + EXPECT_EQ (nl_pairs[0].second, 0); + nl_pairs = hr.unzip_strategy (6, "energy-full"); // C, 1s1 2s2 2p2 -> 1s2s2p -> [1s][2s][2p] + EXPECT_EQ (nl_pairs.size (), 3); + EXPECT_EQ (nl_pairs[0].first, 1); // 1s + EXPECT_EQ (nl_pairs[0].second, 0); + EXPECT_EQ (nl_pairs[1].first, 2); // 2s + EXPECT_EQ (nl_pairs[1].second, 0); + EXPECT_EQ (nl_pairs[2].first, 2); // 2p + EXPECT_EQ (nl_pairs[2].second, 1); + nl_pairs = hr.unzip_strategy ( + 29, + "energy-full"); // Cu, 1s1 2s2 2p6 3s2 3p6 3d10 4s1 -> 1s2s2p3s3p4s3d -> [1s][2s][2p3s][3p4s][3d] + EXPECT_EQ (nl_pairs.size (), 7); + EXPECT_EQ (nl_pairs[0].first, 1); // 1s + EXPECT_EQ (nl_pairs[0].second, 0); + EXPECT_EQ (nl_pairs[1].first, 2); // 2s + EXPECT_EQ (nl_pairs[1].second, 0); + EXPECT_EQ (nl_pairs[2].first, 2); // 2p + EXPECT_EQ (nl_pairs[2].second, 1); + EXPECT_EQ (nl_pairs[3].first, 3); // 3s + EXPECT_EQ (nl_pairs[3].second, 0); + EXPECT_EQ (nl_pairs[4].first, 3); // 3p + EXPECT_EQ (nl_pairs[4].second, 1); + EXPECT_EQ (nl_pairs[5].first, 4); // 4s + EXPECT_EQ (nl_pairs[5].second, 0); + EXPECT_EQ (nl_pairs[6].first, 3); // 3d + EXPECT_EQ (nl_pairs[6].second, 2); + nl_pairs = hr.unzip_strategy (29, "energy-valence"); // Cu, 1s1 2s2 2p6 3s2 3p6 3d10 4s1 -> 3p4s3d + EXPECT_EQ (nl_pairs.size (), 3); + EXPECT_EQ (nl_pairs[0].first, 3); // 3p + EXPECT_EQ (nl_pairs[0].second, 1); + EXPECT_EQ (nl_pairs[1].first, 4); // 4s + EXPECT_EQ (nl_pairs[1].second, 0); + EXPECT_EQ (nl_pairs[2].first, 3); // 3d + EXPECT_EQ (nl_pairs[2].second, 2); + nl_pairs = hr.unzip_strategy (14, "energy-valence"); // Si, 1s1 2s2 2p6 3s2 3p2 -> 3s3p + EXPECT_EQ (nl_pairs.size (), 2); + EXPECT_EQ (nl_pairs[0].first, 3); // 3s + EXPECT_EQ (nl_pairs[0].second, 0); + EXPECT_EQ (nl_pairs[1].first, 3); // 3p + EXPECT_EQ (nl_pairs[1].second, 1); // full, 1s - nl_pairs = hr.unzip_strategy(1, "full"); - EXPECT_EQ(nl_pairs.size(), 1); - EXPECT_EQ(nl_pairs[0].first, 1); - EXPECT_EQ(nl_pairs[0].second, 0); + nl_pairs = hr.unzip_strategy (1, "full"); + EXPECT_EQ (nl_pairs.size (), 1); + EXPECT_EQ (nl_pairs[0].first, 1); + EXPECT_EQ (nl_pairs[0].second, 0); // full, 1s, 2s, 2p, 3s, 3p, 3d, 4s, 4p, 4d, 4f - nl_pairs = hr.unzip_strategy(4, "full"); - EXPECT_EQ(nl_pairs.size(), 10); - EXPECT_EQ(nl_pairs[0].first, 1); - EXPECT_EQ(nl_pairs[0].second, 0); - EXPECT_EQ(nl_pairs[1].first, 2); - EXPECT_EQ(nl_pairs[1].second, 0); - EXPECT_EQ(nl_pairs[2].first, 2); - EXPECT_EQ(nl_pairs[2].second, 1); - EXPECT_EQ(nl_pairs[3].first, 3); - EXPECT_EQ(nl_pairs[3].second, 0); - EXPECT_EQ(nl_pairs[4].first, 3); - EXPECT_EQ(nl_pairs[4].second, 1); - EXPECT_EQ(nl_pairs[5].first, 3); - EXPECT_EQ(nl_pairs[5].second, 2); - EXPECT_EQ(nl_pairs[6].first, 4); - EXPECT_EQ(nl_pairs[6].second, 0); - EXPECT_EQ(nl_pairs[7].first, 4); - EXPECT_EQ(nl_pairs[7].second, 1); - EXPECT_EQ(nl_pairs[8].first, 4); - EXPECT_EQ(nl_pairs[8].second, 2); - EXPECT_EQ(nl_pairs[9].first, 4); - EXPECT_EQ(nl_pairs[9].second, 3); + nl_pairs = hr.unzip_strategy (4, "full"); + EXPECT_EQ (nl_pairs.size (), 10); + EXPECT_EQ (nl_pairs[0].first, 1); + EXPECT_EQ (nl_pairs[0].second, 0); + EXPECT_EQ (nl_pairs[1].first, 2); + EXPECT_EQ (nl_pairs[1].second, 0); + EXPECT_EQ (nl_pairs[2].first, 2); + EXPECT_EQ (nl_pairs[2].second, 1); + EXPECT_EQ (nl_pairs[3].first, 3); + EXPECT_EQ (nl_pairs[3].second, 0); + EXPECT_EQ (nl_pairs[4].first, 3); + EXPECT_EQ (nl_pairs[4].second, 1); + EXPECT_EQ (nl_pairs[5].first, 3); + EXPECT_EQ (nl_pairs[5].second, 2); + EXPECT_EQ (nl_pairs[6].first, 4); + EXPECT_EQ (nl_pairs[6].second, 0); + EXPECT_EQ (nl_pairs[7].first, 4); + EXPECT_EQ (nl_pairs[7].second, 1); + EXPECT_EQ (nl_pairs[8].first, 4); + EXPECT_EQ (nl_pairs[8].second, 2); + EXPECT_EQ (nl_pairs[9].first, 4); + EXPECT_EQ (nl_pairs[9].second, 3); } -TEST_F(HydrogenRadialsTest, RadialNorm) +TEST_F (HydrogenRadialsTest, RadialNorm) { HydrogenRadials hr; std::vector r; @@ -146,318 +150,249 @@ TEST_F(HydrogenRadialsTest, RadialNorm) double dr = 0.01; double rmax = 10.0; for (double r_ = 0.0; r_ <= rmax; r_ += dr) - { - r.push_back(r_); - f.push_back(r_); - } + { + r.push_back (r_); + f.push_back (r_); + } // radial norm computes the integral of r^2*f^2 std::vector r2f2; - for (int i = 0; i < r.size(); i++) - { - r2f2.push_back(r[i]*r[i]*f[i]*f[i]); - } - double norm = hr.radial_norm(r, f); - EXPECT_EQ(norm, sqrt(ModuleBase::Integral::simpson(r.size(), r2f2.data(), dr))); + for (int i = 0; i < r.size (); i++) + { + r2f2.push_back (r[i] * r[i] * f[i] * f[i]); + } + double norm = hr.radial_norm (r, f); + EXPECT_EQ (norm, sqrt (ModuleBase::Integral::simpson (r.size (), r2f2.data (), dr))); } -TEST_F(HydrogenRadialsTest, MappingNLLZeta) +TEST_F (HydrogenRadialsTest, MappingNLLZeta) { HydrogenRadials hr; std::map, std::pair> nl_lzeta_map; int l; int lzeta; // minimal, 1s, map (1, 0) to (0, 0) - nl_lzeta_map = hr.mapping_nl_lzeta(1, "minimal-nodeless"); - EXPECT_EQ(nl_lzeta_map.size(), 1); - l = nl_lzeta_map[std::make_pair(1, 0)].first; - lzeta = nl_lzeta_map[std::make_pair(1, 0)].second; - EXPECT_EQ(l, 0); - EXPECT_EQ(lzeta, 0); + nl_lzeta_map = hr.mapping_nl_lzeta (1, "minimal-nodeless"); + EXPECT_EQ (nl_lzeta_map.size (), 1); + l = nl_lzeta_map[std::make_pair (1, 0)].first; + lzeta = nl_lzeta_map[std::make_pair (1, 0)].second; + EXPECT_EQ (l, 0); + EXPECT_EQ (lzeta, 0); // minimal, 1s, 2p, 3d, 4f, map (1, 0) to (0, 0), (2, 1) to (1, 0), (3, 2) to (2, 0), (4, 3) to (3, 0) - nl_lzeta_map = hr.mapping_nl_lzeta(4, "minimal-nodeless"); - EXPECT_EQ(nl_lzeta_map.size(), 4); - l = nl_lzeta_map[std::make_pair(1, 0)].first; - lzeta = nl_lzeta_map[std::make_pair(1, 0)].second; - EXPECT_EQ(l, 0); - EXPECT_EQ(lzeta, 0); - l = nl_lzeta_map[std::make_pair(2, 1)].first; - lzeta = nl_lzeta_map[std::make_pair(2, 1)].second; - EXPECT_EQ(l, 1); - EXPECT_EQ(lzeta, 0); - l = nl_lzeta_map[std::make_pair(3, 2)].first; - lzeta = nl_lzeta_map[std::make_pair(3, 2)].second; - EXPECT_EQ(l, 2); - EXPECT_EQ(lzeta, 0); - l = nl_lzeta_map[std::make_pair(4, 3)].first; - lzeta = nl_lzeta_map[std::make_pair(4, 3)].second; - EXPECT_EQ(l, 3); - EXPECT_EQ(lzeta, 0); + nl_lzeta_map = hr.mapping_nl_lzeta (4, "minimal-nodeless"); + EXPECT_EQ (nl_lzeta_map.size (), 4); + l = nl_lzeta_map[std::make_pair (1, 0)].first; + lzeta = nl_lzeta_map[std::make_pair (1, 0)].second; + EXPECT_EQ (l, 0); + EXPECT_EQ (lzeta, 0); + l = nl_lzeta_map[std::make_pair (2, 1)].first; + lzeta = nl_lzeta_map[std::make_pair (2, 1)].second; + EXPECT_EQ (l, 1); + EXPECT_EQ (lzeta, 0); + l = nl_lzeta_map[std::make_pair (3, 2)].first; + lzeta = nl_lzeta_map[std::make_pair (3, 2)].second; + EXPECT_EQ (l, 2); + EXPECT_EQ (lzeta, 0); + l = nl_lzeta_map[std::make_pair (4, 3)].first; + lzeta = nl_lzeta_map[std::make_pair (4, 3)].second; + EXPECT_EQ (l, 3); + EXPECT_EQ (lzeta, 0); // full, 1s, map (1, 0) to (0, 0) - nl_lzeta_map = hr.mapping_nl_lzeta(1, "full"); - EXPECT_EQ(nl_lzeta_map.size(), 1); - l = nl_lzeta_map[std::make_pair(1, 0)].first; - lzeta = nl_lzeta_map[std::make_pair(1, 0)].second; - EXPECT_EQ(l, 0); - EXPECT_EQ(lzeta, 0); - // full, 1s, 2s, 2p, 3s, 3p, 3d, 4s, 4p, 4d, 4f, + nl_lzeta_map = hr.mapping_nl_lzeta (1, "full"); + EXPECT_EQ (nl_lzeta_map.size (), 1); + l = nl_lzeta_map[std::make_pair (1, 0)].first; + lzeta = nl_lzeta_map[std::make_pair (1, 0)].second; + EXPECT_EQ (l, 0); + EXPECT_EQ (lzeta, 0); + // full, 1s, 2s, 2p, 3s, 3p, 3d, 4s, 4p, 4d, 4f, // map (1, 0), (2, 0), (3, 0), (4, 0) to (0, 0), (0, 1), (0, 2), (0, 3) // (2, 1), (3, 1), (4, 1) to (1, 0), (1, 1), (1, 2) // (3, 2), (4, 2) to (2, 0), (2, 1) // (4, 3) to (3, 0) - nl_lzeta_map = hr.mapping_nl_lzeta(4, "full"); - EXPECT_EQ(nl_lzeta_map.size(), 10); - l = nl_lzeta_map[std::make_pair(1, 0)].first; - lzeta = nl_lzeta_map[std::make_pair(1, 0)].second; - EXPECT_EQ(l, 0); - EXPECT_EQ(lzeta, 0); - l = nl_lzeta_map[std::make_pair(2, 0)].first; - lzeta = nl_lzeta_map[std::make_pair(2, 0)].second; - EXPECT_EQ(l, 0); - EXPECT_EQ(lzeta, 1); - l = nl_lzeta_map[std::make_pair(3, 0)].first; - lzeta = nl_lzeta_map[std::make_pair(3, 0)].second; - EXPECT_EQ(l, 0); - EXPECT_EQ(lzeta, 2); - l = nl_lzeta_map[std::make_pair(4, 0)].first; - lzeta = nl_lzeta_map[std::make_pair(4, 0)].second; - EXPECT_EQ(l, 0); - EXPECT_EQ(lzeta, 3); - l = nl_lzeta_map[std::make_pair(2, 1)].first; - lzeta = nl_lzeta_map[std::make_pair(2, 1)].second; - EXPECT_EQ(l, 1); - EXPECT_EQ(lzeta, 0); - l = nl_lzeta_map[std::make_pair(3, 1)].first; - lzeta = nl_lzeta_map[std::make_pair(3, 1)].second; - EXPECT_EQ(l, 1); - EXPECT_EQ(lzeta, 1); - l = nl_lzeta_map[std::make_pair(4, 1)].first; - lzeta = nl_lzeta_map[std::make_pair(4, 1)].second; - EXPECT_EQ(l, 1); - EXPECT_EQ(lzeta, 2); - l = nl_lzeta_map[std::make_pair(3, 2)].first; - lzeta = nl_lzeta_map[std::make_pair(3, 2)].second; - EXPECT_EQ(l, 2); - EXPECT_EQ(lzeta, 0); - l = nl_lzeta_map[std::make_pair(4, 2)].first; - lzeta = nl_lzeta_map[std::make_pair(4, 2)].second; - EXPECT_EQ(l, 2); - EXPECT_EQ(lzeta, 1); - l = nl_lzeta_map[std::make_pair(4, 3)].first; - lzeta = nl_lzeta_map[std::make_pair(4, 3)].second; - EXPECT_EQ(l, 3); - EXPECT_EQ(lzeta, 0); + nl_lzeta_map = hr.mapping_nl_lzeta (4, "full"); + EXPECT_EQ (nl_lzeta_map.size (), 10); + l = nl_lzeta_map[std::make_pair (1, 0)].first; + lzeta = nl_lzeta_map[std::make_pair (1, 0)].second; + EXPECT_EQ (l, 0); + EXPECT_EQ (lzeta, 0); + l = nl_lzeta_map[std::make_pair (2, 0)].first; + lzeta = nl_lzeta_map[std::make_pair (2, 0)].second; + EXPECT_EQ (l, 0); + EXPECT_EQ (lzeta, 1); + l = nl_lzeta_map[std::make_pair (3, 0)].first; + lzeta = nl_lzeta_map[std::make_pair (3, 0)].second; + EXPECT_EQ (l, 0); + EXPECT_EQ (lzeta, 2); + l = nl_lzeta_map[std::make_pair (4, 0)].first; + lzeta = nl_lzeta_map[std::make_pair (4, 0)].second; + EXPECT_EQ (l, 0); + EXPECT_EQ (lzeta, 3); + l = nl_lzeta_map[std::make_pair (2, 1)].first; + lzeta = nl_lzeta_map[std::make_pair (2, 1)].second; + EXPECT_EQ (l, 1); + EXPECT_EQ (lzeta, 0); + l = nl_lzeta_map[std::make_pair (3, 1)].first; + lzeta = nl_lzeta_map[std::make_pair (3, 1)].second; + EXPECT_EQ (l, 1); + EXPECT_EQ (lzeta, 1); + l = nl_lzeta_map[std::make_pair (4, 1)].first; + lzeta = nl_lzeta_map[std::make_pair (4, 1)].second; + EXPECT_EQ (l, 1); + EXPECT_EQ (lzeta, 2); + l = nl_lzeta_map[std::make_pair (3, 2)].first; + lzeta = nl_lzeta_map[std::make_pair (3, 2)].second; + EXPECT_EQ (l, 2); + EXPECT_EQ (lzeta, 0); + l = nl_lzeta_map[std::make_pair (4, 2)].first; + lzeta = nl_lzeta_map[std::make_pair (4, 2)].second; + EXPECT_EQ (l, 2); + EXPECT_EQ (lzeta, 1); + l = nl_lzeta_map[std::make_pair (4, 3)].first; + lzeta = nl_lzeta_map[std::make_pair (4, 3)].second; + EXPECT_EQ (l, 3); + EXPECT_EQ (lzeta, 0); } -TEST_F(HydrogenRadialsTest, GenerateHydrogenRadialToconv) +TEST_F (HydrogenRadialsTest, GenerateHydrogenRadialToconv) { HydrogenRadials hr; std::vector r; std::vector Rnl; - double rmax_chg1_n1l0 = hr.generate_hydrogen_radial_toconv( - 1.0, - false, - 1, - 0, - 1e-7, - 0, - r, - Rnl - ); + double rmax_chg1_n1l0 = hr.generate_hydrogen_radial_toconv (1.0, false, 1, 0, 1e-7, 0, r, Rnl); std::vector r2Rnl2; - for (int i = 0; i < r.size(); i++) - { - r2Rnl2.push_back(r[i]*r[i]*Rnl[i]*Rnl[i]); - } - double norm = ModuleBase::Integral::simpson(r.size(), r2Rnl2.data(), 0.01); - EXPECT_NEAR(norm, 1.0, 1e-6); + for (int i = 0; i < r.size (); i++) + { + r2Rnl2.push_back (r[i] * r[i] * Rnl[i] * Rnl[i]); + } + double norm = ModuleBase::Integral::simpson (r.size (), r2Rnl2.data (), 0.01); + EXPECT_NEAR (norm, 1.0, 1e-6); - double rmax_chg4_n2l1 = hr.generate_hydrogen_radial_toconv( - 4.0, - false, - 2, - 1, - 1e-7, - 0, - r, - Rnl - ); - r2Rnl2.clear(); - for (int i = 0; i < r.size(); i++) - { - r2Rnl2.push_back(r[i]*r[i]*Rnl[i]*Rnl[i]); - } - norm = ModuleBase::Integral::simpson(r.size(), r2Rnl2.data(), 0.01); - EXPECT_NEAR(norm, 1.0, 1e-6); + double rmax_chg4_n2l1 = hr.generate_hydrogen_radial_toconv (4.0, false, 2, 1, 1e-7, 0, r, Rnl); + r2Rnl2.clear (); + for (int i = 0; i < r.size (); i++) + { + r2Rnl2.push_back (r[i] * r[i] * Rnl[i] * Rnl[i]); + } + norm = ModuleBase::Integral::simpson (r.size (), r2Rnl2.data (), 0.01); + EXPECT_NEAR (norm, 1.0, 1e-6); - EXPECT_NE(rmax_chg1_n1l0, rmax_chg4_n2l1); + EXPECT_NE (rmax_chg1_n1l0, rmax_chg4_n2l1); } -TEST_F(HydrogenRadialsTest, Build) +TEST_F (HydrogenRadialsTest, Build) { HydrogenRadials hr; // build 1s 2p 3d - hr.build( - itype_, - charge_, - false, - nmax_, - rcut_, - dr_, - 1e-6, - rank_, - "H", - "minimal-nodeless", - ptr_log_ - ); + hr.build (itype_, charge_, false, nmax_, rcut_, dr_, 1e-6, rank_, "H", "minimal-nodeless", ptr_log_); // nmax = 1, minimal, yields 1s orbital - EXPECT_EQ(hr.lmax(), 2); - EXPECT_EQ(hr.nzeta(0), 1); - EXPECT_EQ(hr.nzeta_max(), 1); - EXPECT_EQ(hr.nchi(), 3); + EXPECT_EQ (hr.lmax (), 2); + EXPECT_EQ (hr.nzeta (0), 1); + EXPECT_EQ (hr.nzeta_max (), 1); + EXPECT_EQ (hr.nchi (), 3); // Cu, minimal-valence, 4s 4p 4d - hr.build( - itype_, - charge_, - false, - 4, - rcut_, - dr_, - 1e-6, - rank_, - "Cu", - "minimal-valence", - ptr_log_ - ); + hr.build (itype_, charge_, false, 4, rcut_, dr_, 1e-6, rank_, "Cu", "minimal-valence", ptr_log_); // nmax = 4, minimal-valence, yields 4s 4p 4d 4f orbitals - EXPECT_EQ(hr.lmax(), 3); - EXPECT_EQ(hr.nzeta(0), 1); - EXPECT_EQ(hr.nzeta(1), 1); - EXPECT_EQ(hr.nzeta(2), 1); - EXPECT_EQ(hr.nzeta(3), 1); - EXPECT_EQ(hr.nzeta_max(), 1); - EXPECT_EQ(hr.nchi(), 4); + EXPECT_EQ (hr.lmax (), 3); + EXPECT_EQ (hr.nzeta (0), 1); + EXPECT_EQ (hr.nzeta (1), 1); + EXPECT_EQ (hr.nzeta (2), 1); + EXPECT_EQ (hr.nzeta (3), 1); + EXPECT_EQ (hr.nzeta_max (), 1); + EXPECT_EQ (hr.nchi (), 4); // Cu, energy-full, 1s 2s 2p 3s 3p 4s 3d - hr.build( - itype_, - charge_, - false, - 29, - rcut_, - dr_, - 1e-6, - rank_, - "Cu", - "energy-full", - ptr_log_ - ); + hr.build (itype_, charge_, false, 29, rcut_, dr_, 1e-6, rank_, "Cu", "energy-full", ptr_log_); // nmax = 29, energy-full, yields 1s 2s 2p 3s 3p 4s 3d orbitals - EXPECT_EQ(hr.lmax(), 2); - EXPECT_EQ(hr.nzeta(0), 4); - EXPECT_EQ(hr.nzeta(1), 2); - EXPECT_EQ(hr.nzeta(2), 1); - EXPECT_EQ(hr.nzeta(3), 0); - EXPECT_EQ(hr.nzeta_max(), 4); - EXPECT_EQ(hr.nchi(), 7); + EXPECT_EQ (hr.lmax (), 2); + EXPECT_EQ (hr.nzeta (0), 4); + EXPECT_EQ (hr.nzeta (1), 2); + EXPECT_EQ (hr.nzeta (2), 1); + EXPECT_EQ (hr.nzeta (3), 0); + EXPECT_EQ (hr.nzeta_max (), 4); + EXPECT_EQ (hr.nchi (), 7); // Cu, energy-valence, 3p 4s 3d - printf("Unittest for generating Cu energy-valence orbitals without Slater screening:\n"); - hr.build( - itype_, - 29, // use the real nuclear charge for Cu - false, - 29, - rcut_, - dr_, - 1e-6, - rank_, - "Cu", - "energy-valence", - ptr_log_ - ); + printf ("Unittest for generating Cu energy-valence orbitals without Slater screening:\n"); + hr.build (itype_, + 29, // use the real nuclear charge for Cu + false, + 29, + rcut_, + dr_, + 1e-6, + rank_, + "Cu", + "energy-valence", + ptr_log_); // nmax = 29, energy-valence, yields 3p 4s 3d orbitals - EXPECT_EQ(hr.lmax(), 2); - EXPECT_EQ(hr.nzeta(0), 1); - EXPECT_EQ(hr.nzeta(1), 1); - EXPECT_EQ(hr.nzeta(2), 1); - EXPECT_EQ(hr.nzeta(3), 0); - EXPECT_EQ(hr.nzeta_max(), 1); - EXPECT_EQ(hr.nchi(), 3); + EXPECT_EQ (hr.lmax (), 2); + EXPECT_EQ (hr.nzeta (0), 1); + EXPECT_EQ (hr.nzeta (1), 1); + EXPECT_EQ (hr.nzeta (2), 1); + EXPECT_EQ (hr.nzeta (3), 0); + EXPECT_EQ (hr.nzeta_max (), 1); + EXPECT_EQ (hr.nchi (), 3); // test with Slater screening on Cu // Cu, energy-valence, 3p 4s 3d - printf("Unittest for generating Cu energy-valence orbitals with Slater screening:\n"); - hr.build( - itype_, - 29, // use the real nuclear charge for Cu - true, - 29, - rcut_, - dr_, - 1e-6, - rank_, - "Cu", - "energy-valence", - ptr_log_ - ); + printf ("Unittest for generating Cu energy-valence orbitals with Slater screening:\n"); + hr.build (itype_, + 29, // use the real nuclear charge for Cu + true, + 29, + rcut_, + dr_, + 1e-6, + rank_, + "Cu", + "energy-valence", + ptr_log_); // nmax = 29, energy-valence, yields 3p 4s 3d orbitals - EXPECT_EQ(hr.lmax(), 2); - EXPECT_EQ(hr.nzeta(0), 1); - EXPECT_EQ(hr.nzeta(1), 1); - EXPECT_EQ(hr.nzeta(2), 1); - EXPECT_EQ(hr.nzeta(3), 0); - EXPECT_EQ(hr.nzeta_max(), 1); - EXPECT_EQ(hr.nchi(), 3); + EXPECT_EQ (hr.lmax (), 2); + EXPECT_EQ (hr.nzeta (0), 1); + EXPECT_EQ (hr.nzeta (1), 1); + EXPECT_EQ (hr.nzeta (2), 1); + EXPECT_EQ (hr.nzeta (3), 0); + EXPECT_EQ (hr.nzeta_max (), 1); + EXPECT_EQ (hr.nchi (), 3); // build 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f - hr.build( - itype_, - charge_, - false, - 4, - rcut_, - dr_, - 1e-6, - rank_, - "H", - "full", - ptr_log_ - ); + hr.build (itype_, charge_, false, 4, rcut_, dr_, 1e-6, rank_, "H", "full", ptr_log_); // nmax = 4, full, yields 1s 2s 2p 3s 3p 3d 4s 4p 4d 4f orbitals - EXPECT_EQ(hr.lmax(), 3); - EXPECT_EQ(hr.nzeta(0), 4); - EXPECT_EQ(hr.nzeta(1), 3); - EXPECT_EQ(hr.nzeta(2), 2); - EXPECT_EQ(hr.nzeta(3), 1); - EXPECT_EQ(hr.nzeta_max(), 4); - EXPECT_EQ(hr.nchi(), 10); + EXPECT_EQ (hr.lmax (), 3); + EXPECT_EQ (hr.nzeta (0), 4); + EXPECT_EQ (hr.nzeta (1), 3); + EXPECT_EQ (hr.nzeta (2), 2); + EXPECT_EQ (hr.nzeta (3), 1); + EXPECT_EQ (hr.nzeta_max (), 4); + EXPECT_EQ (hr.nchi (), 10); } -TEST_F(HydrogenRadialsTest, SlaterScreeningTest) +TEST_F (HydrogenRadialsTest, SlaterScreeningTest) { HydrogenRadials hr; - double sigma = hr.slater_screening("H", 1, 0); - EXPECT_NEAR(sigma, 0.0, 1e-6); - sigma = hr.slater_screening("He", 1, 0); - EXPECT_NEAR(sigma, 0.30, 1e-6); - sigma = hr.slater_screening("F", 2, 1); - EXPECT_NEAR(sigma, 3.8, 1e-6); - sigma = hr.slater_screening("Ca", 4, 0); - EXPECT_NEAR(sigma, 17.15, 1e-6); - sigma = hr.slater_screening("Sc", 4, 0); - EXPECT_NEAR(sigma, 18, 1e-6); - sigma = hr.slater_screening("Cu", 4, 0); - EXPECT_NEAR(sigma, 25.3, 1e-6); - sigma = hr.slater_screening("Cu", 3, 2); - EXPECT_NEAR(sigma, 21.15, 1e-6); - sigma = hr.slater_screening("Pt", 6, 0); - EXPECT_NEAR(sigma, 74.45, 1e-6); - sigma = hr.slater_screening("Pt", 5, 1); - EXPECT_NEAR(sigma, 57.65, 1e-6); - sigma = hr.slater_screening("Os", 1, 0); - EXPECT_NEAR(sigma, 0.3, 1e-6); + double sigma = hr.slater_screening ("H", 1, 0); + EXPECT_NEAR (sigma, 0.0, 1e-6); + sigma = hr.slater_screening ("He", 1, 0); + EXPECT_NEAR (sigma, 0.30, 1e-6); + sigma = hr.slater_screening ("F", 2, 1); + EXPECT_NEAR (sigma, 3.8, 1e-6); + sigma = hr.slater_screening ("Ca", 4, 0); + EXPECT_NEAR (sigma, 17.15, 1e-6); + sigma = hr.slater_screening ("Sc", 4, 0); + EXPECT_NEAR (sigma, 18, 1e-6); + sigma = hr.slater_screening ("Cu", 4, 0); + EXPECT_NEAR (sigma, 25.3, 1e-6); + sigma = hr.slater_screening ("Cu", 3, 2); + EXPECT_NEAR (sigma, 21.15, 1e-6); + sigma = hr.slater_screening ("Pt", 6, 0); + EXPECT_NEAR (sigma, 74.45, 1e-6); + sigma = hr.slater_screening ("Pt", 5, 1); + EXPECT_NEAR (sigma, 57.65, 1e-6); + sigma = hr.slater_screening ("Os", 1, 0); + EXPECT_NEAR (sigma, 0.3, 1e-6); } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); return result; } \ No newline at end of file diff --git a/source/source_basis/module_nao/test/numerical_radial_test.cpp b/source/source_basis/module_nao/test/numerical_radial_test.cpp index 4f138bc8a10..ab1b2e8033e 100644 --- a/source/source_basis/module_nao/test/numerical_radial_test.cpp +++ b/source/source_basis/module_nao/test/numerical_radial_test.cpp @@ -52,8 +52,8 @@ using ModuleBase::SphericalBesselTransformer; class NumericalRadialTest : public ::testing::Test { protected: - void SetUp(); - void TearDown(); + void SetUp (); + void TearDown (); int sz_max = 10000; //!< size of each buffer double* grid = nullptr; //!< buffer for input grid @@ -65,21 +65,23 @@ class NumericalRadialTest : public ::testing::Test double tol = 1e-8; //!< tolerance for element-wise numerical error }; -void NumericalRadialTest::SetUp() +void + NumericalRadialTest::SetUp () { grid = new double[sz_max]; f = new double[sz_max]; g = new double[sz_max]; } -void NumericalRadialTest::TearDown() +void + NumericalRadialTest::TearDown () { delete[] f; delete[] g; delete[] grid; } -TEST_F(NumericalRadialTest, ConstructAndAssign) +TEST_F (NumericalRadialTest, ConstructAndAssign) { /* * Tests the copy constructor and copy assignment operator. @@ -87,91 +89,91 @@ TEST_F(NumericalRadialTest, ConstructAndAssign) double dk = PI / 50; int sz = 10000; int pk = -2; - double pref = 48 * std::sqrt(2. / PI); + double pref = 48 * std::sqrt (2. / PI); for (int ik = 0; ik != sz; ++ik) - { - double k = ik * dk; - grid[ik] = k; - f[ik] = pref / std::pow(k * k + 1, 4); - } - - chi.build(2, false, sz, grid, f, pk); - chi.set_uniform_grid(true, sz, PI / dk, 't'); - - NumericalRadial chi2(chi); - EXPECT_EQ(chi.symbol(), chi2.symbol()); - EXPECT_EQ(chi.izeta(), chi2.izeta()); - EXPECT_EQ(chi.itype(), chi2.itype()); - EXPECT_EQ(chi.l(), chi2.l()); - - EXPECT_EQ(chi.nr(), chi2.nr()); - EXPECT_EQ(chi.nk(), chi2.nk()); - EXPECT_EQ(chi.rcut(), chi2.rcut()); - EXPECT_EQ(chi.kcut(), chi2.kcut()); - - ASSERT_NE(chi2.rgrid(), nullptr); - ASSERT_NE(chi2.rvalue(), nullptr); + { + double k = ik * dk; + grid[ik] = k; + f[ik] = pref / std::pow (k * k + 1, 4); + } + + chi.build (2, false, sz, grid, f, pk); + chi.set_uniform_grid (true, sz, PI / dk, 't'); + + NumericalRadial chi2 (chi); + EXPECT_EQ (chi.symbol (), chi2.symbol ()); + EXPECT_EQ (chi.izeta (), chi2.izeta ()); + EXPECT_EQ (chi.itype (), chi2.itype ()); + EXPECT_EQ (chi.l (), chi2.l ()); + + EXPECT_EQ (chi.nr (), chi2.nr ()); + EXPECT_EQ (chi.nk (), chi2.nk ()); + EXPECT_EQ (chi.rcut (), chi2.rcut ()); + EXPECT_EQ (chi.kcut (), chi2.kcut ()); + + ASSERT_NE (chi2.rgrid (), nullptr); + ASSERT_NE (chi2.rvalue (), nullptr); for (int ir = 0; ir != sz; ++ir) - { - EXPECT_EQ(chi.rgrid(ir), chi2.rgrid(ir)); - EXPECT_EQ(chi.rvalue(ir), chi2.rvalue(ir)); - } + { + EXPECT_EQ (chi.rgrid (ir), chi2.rgrid (ir)); + EXPECT_EQ (chi.rvalue (ir), chi2.rvalue (ir)); + } - ASSERT_NE(chi2.kgrid(), nullptr); - ASSERT_NE(chi2.kvalue(), nullptr); + ASSERT_NE (chi2.kgrid (), nullptr); + ASSERT_NE (chi2.kvalue (), nullptr); for (int ik = 0; ik != sz; ++ik) - { - EXPECT_EQ(chi.kgrid(ik), chi2.kgrid(ik)); - EXPECT_EQ(chi.kvalue(ik), chi2.kvalue(ik)); - } + { + EXPECT_EQ (chi.kgrid (ik), chi2.kgrid (ik)); + EXPECT_EQ (chi.kvalue (ik), chi2.kvalue (ik)); + } - EXPECT_EQ(chi.pr(), chi2.pr()); - EXPECT_EQ(chi.pk(), chi2.pk()); - EXPECT_EQ(chi.is_fft_compliant(), chi2.is_fft_compliant()); - EXPECT_EQ(chi2.sbt(), chi.sbt()); + EXPECT_EQ (chi.pr (), chi2.pr ()); + EXPECT_EQ (chi.pk (), chi2.pk ()); + EXPECT_EQ (chi.is_fft_compliant (), chi2.is_fft_compliant ()); + EXPECT_EQ (chi2.sbt (), chi.sbt ()); NumericalRadial chi3; chi3 = chi; - EXPECT_EQ(chi.symbol(), chi3.symbol()); - EXPECT_EQ(chi.izeta(), chi3.izeta()); - EXPECT_EQ(chi.itype(), chi3.itype()); - EXPECT_EQ(chi.l(), chi3.l()); - - EXPECT_EQ(chi.nr(), chi3.nr()); - EXPECT_EQ(chi.nk(), chi3.nk()); - EXPECT_EQ(chi.rcut(), chi3.rcut()); - EXPECT_EQ(chi.kcut(), chi3.kcut()); - - ASSERT_NE(chi3.rgrid(), nullptr); - ASSERT_NE(chi3.rvalue(), nullptr); + EXPECT_EQ (chi.symbol (), chi3.symbol ()); + EXPECT_EQ (chi.izeta (), chi3.izeta ()); + EXPECT_EQ (chi.itype (), chi3.itype ()); + EXPECT_EQ (chi.l (), chi3.l ()); + + EXPECT_EQ (chi.nr (), chi3.nr ()); + EXPECT_EQ (chi.nk (), chi3.nk ()); + EXPECT_EQ (chi.rcut (), chi3.rcut ()); + EXPECT_EQ (chi.kcut (), chi3.kcut ()); + + ASSERT_NE (chi3.rgrid (), nullptr); + ASSERT_NE (chi3.rvalue (), nullptr); for (int ir = 0; ir != sz; ++ir) - { - EXPECT_EQ(chi.rgrid(ir), chi3.rgrid(ir)); - EXPECT_EQ(chi.rvalue(ir), chi3.rvalue(ir)); - } + { + EXPECT_EQ (chi.rgrid (ir), chi3.rgrid (ir)); + EXPECT_EQ (chi.rvalue (ir), chi3.rvalue (ir)); + } - ASSERT_NE(chi3.kgrid(), nullptr); - ASSERT_NE(chi3.kvalue(), nullptr); + ASSERT_NE (chi3.kgrid (), nullptr); + ASSERT_NE (chi3.kvalue (), nullptr); for (int ik = 0; ik != sz; ++ik) - { - EXPECT_EQ(chi.kgrid(ik), chi3.kgrid(ik)); - EXPECT_EQ(chi.kvalue(ik), chi3.kvalue(ik)); - } + { + EXPECT_EQ (chi.kgrid (ik), chi3.kgrid (ik)); + EXPECT_EQ (chi.kvalue (ik), chi3.kvalue (ik)); + } - EXPECT_EQ(chi.pr(), chi3.pr()); - EXPECT_EQ(chi.pk(), chi3.pk()); - EXPECT_EQ(chi.is_fft_compliant(), chi3.is_fft_compliant()); + EXPECT_EQ (chi.pr (), chi3.pr ()); + EXPECT_EQ (chi.pk (), chi3.pk ()); + EXPECT_EQ (chi.is_fft_compliant (), chi3.is_fft_compliant ()); SphericalBesselTransformer sbt; - chi.set_transformer(sbt, 1); + chi.set_transformer (sbt, 1); chi3 = chi; - EXPECT_EQ(chi3.sbt(), chi.sbt()); + EXPECT_EQ (chi3.sbt (), chi.sbt ()); // self assignment is not common, but it should not throw - EXPECT_NO_THROW(chi3 = chi3); + EXPECT_NO_THROW (chi3 = chi3); } -TEST_F(NumericalRadialTest, BuildAndGet) +TEST_F (NumericalRadialTest, BuildAndGet) { /* * Builds a NumericalRadial object and gets access to its members. @@ -184,40 +186,40 @@ TEST_F(NumericalRadialTest, BuildAndGet) int izeta = 5; std::string symbol = "Au"; for (int ir = 0; ir != sz; ++ir) - { - double r = ir * dr; - grid[ir] = r; - f[ir] = std::exp(-r); - } + { + double r = ir * dr; + grid[ir] = r; + f[ir] = std::exp (-r); + } - chi.build(l, true, sz, grid, f, pr, izeta, symbol, itype); + chi.build (l, true, sz, grid, f, pr, izeta, symbol, itype); - EXPECT_EQ(chi.symbol(), symbol); - EXPECT_EQ(chi.izeta(), izeta); - EXPECT_EQ(chi.itype(), itype); - EXPECT_EQ(chi.l(), l); + EXPECT_EQ (chi.symbol (), symbol); + EXPECT_EQ (chi.izeta (), izeta); + EXPECT_EQ (chi.itype (), itype); + EXPECT_EQ (chi.l (), l); - EXPECT_EQ(chi.nr(), sz); - EXPECT_EQ(chi.nk(), 0); - EXPECT_EQ(chi.rmax(), grid[sz - 1]); + EXPECT_EQ (chi.nr (), sz); + EXPECT_EQ (chi.nk (), 0); + EXPECT_EQ (chi.rmax (), grid[sz - 1]); - ASSERT_NE(chi.rgrid(), nullptr); - ASSERT_NE(chi.rvalue(), nullptr); + ASSERT_NE (chi.rgrid (), nullptr); + ASSERT_NE (chi.rvalue (), nullptr); for (int ir = 0; ir != sz; ++ir) - { - EXPECT_EQ(chi.rgrid(ir), grid[ir]); - EXPECT_EQ(chi.rvalue(ir), f[ir]); - } + { + EXPECT_EQ (chi.rgrid (ir), grid[ir]); + EXPECT_EQ (chi.rvalue (ir), f[ir]); + } - EXPECT_EQ(chi.kgrid(), nullptr); - EXPECT_EQ(chi.kvalue(), nullptr); + EXPECT_EQ (chi.kgrid (), nullptr); + EXPECT_EQ (chi.kvalue (), nullptr); - EXPECT_EQ(chi.pr(), pr); - EXPECT_EQ(chi.pk(), 0); - EXPECT_EQ(chi.is_fft_compliant(), false); + EXPECT_EQ (chi.pr (), pr); + EXPECT_EQ (chi.pk (), 0); + EXPECT_EQ (chi.is_fft_compliant (), false); } -TEST_F(NumericalRadialTest, GridSetAndWipe) +TEST_F (NumericalRadialTest, GridSetAndWipe) { /* * This test first builds a NumericalRadial object with r-space values @@ -236,50 +238,50 @@ TEST_F(NumericalRadialTest, GridSetAndWipe) double dr = 0.01; int nr = 5000; int pr = -1; - for (int ir = 0; ir != nr ; ++ir) - { - double r = ir * dr; - grid[ir] = r; - f[ir] = std::exp(-r); - } + for (int ir = 0; ir != nr; ++ir) + { + double r = ir * dr; + grid[ir] = r; + f[ir] = std::exp (-r); + } - chi.build(1, true, nr, grid, f, pr); + chi.build (1, true, nr, grid, f, pr); int nk = 2000; double* kgrid = new double[nk]; double dk = 0.01; for (int ik = 0; ik != nk; ++ik) - { - kgrid[ik] = ik * dk; - } + { + kgrid[ik] = ik * dk; + } - chi.set_grid(false, nk, kgrid, 't'); + chi.set_grid (false, nk, kgrid, 't'); - double pref = 8 * std::sqrt(2. / PI); + double pref = 8 * std::sqrt (2. / PI); for (int ik = 0; ik != nk; ++ik) - { - double k = ik * dk; - EXPECT_NEAR(pref * k / std::pow(k * k + 1, 3), chi.kvalue(ik), tol); - } + { + double k = ik * dk; + EXPECT_NEAR (pref * k / std::pow (k * k + 1, 3), chi.kvalue (ik), tol); + } - EXPECT_EQ(chi.is_fft_compliant(), false); + EXPECT_EQ (chi.is_fft_compliant (), false); - chi.wipe(true); - EXPECT_EQ(chi.rgrid(), nullptr); - EXPECT_EQ(chi.rvalue(), nullptr); - EXPECT_EQ(chi.nr(), 0); - EXPECT_EQ(chi.is_fft_compliant(), false); + chi.wipe (true); + EXPECT_EQ (chi.rgrid (), nullptr); + EXPECT_EQ (chi.rvalue (), nullptr); + EXPECT_EQ (chi.nr (), 0); + EXPECT_EQ (chi.is_fft_compliant (), false); - chi.wipe(false); - EXPECT_EQ(chi.kgrid(), nullptr); - EXPECT_EQ(chi.kvalue(), nullptr); - EXPECT_EQ(chi.nk(), 0); + chi.wipe (false); + EXPECT_EQ (chi.kgrid (), nullptr); + EXPECT_EQ (chi.kvalue (), nullptr); + EXPECT_EQ (chi.nk (), 0); delete[] kgrid; } -TEST_F(NumericalRadialTest, SetUniformGrid) +TEST_F (NumericalRadialTest, SetUniformGrid) { /* * This test starts from a NumericalRadial object with k-space values of @@ -294,26 +296,27 @@ TEST_F(NumericalRadialTest, SetUniformGrid) double dk = PI / 50; int sz = 10000; int pk = -2; - double pref = 48 * std::sqrt(2. / PI); + double pref = 48 * std::sqrt (2. / PI); for (int ik = 0; ik != sz; ++ik) - { - double k = ik * dk; - grid[ik] = k; - f[ik] = pref / std::pow(k * k + 1, 4); - } + { + double k = ik * dk; + grid[ik] = k; + f[ik] = pref / std::pow (k * k + 1, 4); + } - chi.build(2, false, sz, grid, f, pk); - chi.set_uniform_grid(true, sz, PI / dk, 't', true); + chi.build (2, false, sz, grid, f, pk); + chi.set_uniform_grid (true, sz, PI / dk, 't', true); - double dr = PI / chi.kmax(); + double dr = PI / chi.kmax (); for (int ir = 0; ir != sz; ++ir) - { - double r = ir * dr; - EXPECT_NEAR(r * r * std::exp(-r), chi.rvalue(ir), tol); - } + { + double r = ir * dr; + EXPECT_NEAR (r * r * std::exp (-r), chi.rvalue (ir), tol); + } } -TEST_F(NumericalRadialTest, Interpolate) { +TEST_F (NumericalRadialTest, Interpolate) +{ /* * This test starts with a NumericalRadial object with k-space values * @@ -328,58 +331,62 @@ TEST_F(NumericalRadialTest, Interpolate) { double dk = 0.01; int sz = 10000; int pk = -2; - double pref = 48 * std::sqrt(2./PI); - for (int ik = 0; ik != sz; ++ik) { - double k = ik * dk; - k *= std::exp(0.02*k); - grid[ik] = k; - f[ik] = pref / std::pow(k*k+1, 4); - } + double pref = 48 * std::sqrt (2. / PI); + for (int ik = 0; ik != sz; ++ik) + { + double k = ik * dk; + k *= std::exp (0.02 * k); + grid[ik] = k; + f[ik] = pref / std::pow (k * k + 1, 4); + } - chi.build(2, false, sz, grid, f, pk); + chi.build (2, false, sz, grid, f, pk); - chi.set_uniform_grid(false, sz, PI/50*(sz-1), 'i', true); + chi.set_uniform_grid (false, sz, PI / 50 * (sz - 1), 'i', true); - double dr = PI / chi.kmax(); + double dr = PI / chi.kmax (); for (int ir = 0; ir != sz; ++ir) - { - double r = ir * dr; - EXPECT_NEAR(r*r*std::exp(-r), chi.rvalue(ir), tol*2); // slightly relax the tolerance due to interpolation - } + { + double r = ir * dr; + EXPECT_NEAR (r * r * std::exp (-r), + chi.rvalue (ir), + tol * 2); // slightly relax the tolerance due to interpolation + } } -TEST_F(NumericalRadialTest, ZeroPadding) { +TEST_F (NumericalRadialTest, ZeroPadding) +{ /* * This test checks whether set_grid properly pads the value array. * */ double dk = PI / 50; int sz1 = 2000; int pk = -2; - double pref = 48 * std::sqrt(2. / PI); + double pref = 48 * std::sqrt (2. / PI); for (int ik = 0; ik != sz1; ++ik) - { - double k = ik * dk; - grid[ik] = k; - f[ik] = pref / std::pow(k * k + 1, 4); - } + { + double k = ik * dk; + grid[ik] = k; + f[ik] = pref / std::pow (k * k + 1, 4); + } - chi.build(2, false, sz1, grid, f, pk); + chi.build (2, false, sz1, grid, f, pk); int sz2 = 10000; - chi.set_uniform_grid(false, sz2, dk*(sz2-1), 'i'); + chi.set_uniform_grid (false, sz2, dk * (sz2 - 1), 'i'); for (int ik = 0; ik != sz1; ++ik) - { - EXPECT_EQ(f[ik], chi.kvalue(ik)); - } + { + EXPECT_EQ (f[ik], chi.kvalue (ik)); + } for (int ik = sz1; ik != sz2; ++ik) - { - EXPECT_EQ(0.0, chi.kvalue(ik)); - } + { + EXPECT_EQ (0.0, chi.kvalue (ik)); + } } -TEST_F(NumericalRadialTest, SetValue) +TEST_F (NumericalRadialTest, SetValue) { /* * This test attempts to updates values in a NumericalRadial object. @@ -388,45 +395,45 @@ TEST_F(NumericalRadialTest, SetValue) int sz = 5000; int p = -1; for (int i = 0; i != sz; ++i) - { - double r = i * dx; - grid[i] = r; - f[i] = std::exp(-r); - } + { + double r = i * dx; + grid[i] = r; + f[i] = std::exp (-r); + } int sz_cut = 20; - std::fill(f + sz_cut, f + sz, 0.0); + std::fill (f + sz_cut, f + sz, 0.0); - chi.build(1, true, sz, grid, f, p); + chi.build (1, true, sz, grid, f, p); - EXPECT_EQ(chi.rcut(), sz_cut * dx); - EXPECT_EQ(chi.rmax(), (sz-1) * dx); + EXPECT_EQ (chi.rcut (), sz_cut * dx); + EXPECT_EQ (chi.rmax (), (sz - 1) * dx); for (int ir = 0; ir != sz; ++ir) - { - f[ir] *= 2; - } - chi.set_value(true, f, p); + { + f[ir] *= 2; + } + chi.set_value (true, f, p); for (int i = 0; i != sz; ++i) - { - EXPECT_EQ(chi.rvalue(i), f[i]); - } + { + EXPECT_EQ (chi.rvalue (i), f[i]); + } - chi.build(1, false, sz, grid, f, p); + chi.build (1, false, sz, grid, f, p); for (int i = 0; i != sz; ++i) - { - f[i] *= 2; - } - chi.set_value(false, f, p); + { + f[i] *= 2; + } + chi.set_value (false, f, p); for (int i = 0; i != sz; ++i) - { - EXPECT_EQ(chi.kvalue(i), f[i]); - } + { + EXPECT_EQ (chi.kvalue (i), f[i]); + } } -TEST_F(NumericalRadialTest, RadialTable) +TEST_F (NumericalRadialTest, RadialTable) { /* * This test checks the radial table for the two-center integral @@ -447,68 +454,68 @@ TEST_F(NumericalRadialTest, RadialTable) * U(l=0, R) = c * 1/32 * exp(-R*R/2) * * */ - double pref = std::sqrt(2) / 16; + double pref = std::sqrt (2) / 16; int sz = 5000; double dr = 0.01; double dk = PI / ((sz - 1) * dr); for (int ir = 0; ir != sz; ++ir) - { - double r = ir * dr; - grid[ir] = r; - f[ir] = std::exp(-r * r); - } + { + double r = ir * dr; + grid[ir] = r; + f[ir] = std::exp (-r * r); + } NumericalRadial chi1, chi2; - chi1.build(0, true, sz, grid, f, 0); - chi2.build(2, true, sz, grid, f, -2); + chi1.build (0, true, sz, grid, f, 0); + chi2.build (2, true, sz, grid, f, -2); - chi1.set_uniform_grid(false, sz, PI / dr, 't'); - chi2.set_uniform_grid(false, sz, PI / dr, 't'); + chi1.set_uniform_grid (false, sz, PI / dr, 't'); + chi2.set_uniform_grid (false, sz, PI / dr, 't'); // make sure chi(k) have expected values for (int ik = 1; ik != sz; ++ik) - { - double k = ik * dk; - ASSERT_NEAR(chi1.kvalue(ik), 4 * pref * std::exp(-k * k / 4), tol); - ASSERT_NEAR(chi2.kvalue(ik), pref * k * k * std::exp(-k * k / 4), tol); - } + { + double k = ik * dk; + ASSERT_NEAR (chi1.kvalue (ik), 4 * pref * std::exp (-k * k / 4), tol); + ASSERT_NEAR (chi2.kvalue (ik), pref * k * k * std::exp (-k * k / 4), tol); + } double* table = new double[sz]; - double table_pref = ModuleBase::FOUR_PI * std::sqrt(ModuleBase::PI / 2.0); - double rmax_tab = chi1.rmax(); + double table_pref = ModuleBase::FOUR_PI * std::sqrt (ModuleBase::PI / 2.0); + double rmax_tab = chi1.rmax (); - chi1.radtab('S', chi2, 0, table, chi1.nr(), rmax_tab); + chi1.radtab ('S', chi2, 0, table, chi1.nr (), rmax_tab); for (int i = 0; i != sz; ++i) - { - double R = i * dr; - EXPECT_NEAR(table[i], table_pref * (3 - R * R) / 32 * std::exp(-R * R / 2), tol); - } + { + double R = i * dr; + EXPECT_NEAR (table[i], table_pref * (3 - R * R) / 32 * std::exp (-R * R / 2), tol); + } - chi1.radtab('S', chi2, 2, table, chi1.nr(), rmax_tab); + chi1.radtab ('S', chi2, 2, table, chi1.nr (), rmax_tab); for (int i = 0; i != sz; ++i) - { - double R = i * dr; - EXPECT_NEAR(table[i], table_pref * R * R / 32 * std::exp(-R * R / 2), tol); - } + { + double R = i * dr; + EXPECT_NEAR (table[i], table_pref * R * R / 32 * std::exp (-R * R / 2), tol); + } - chi1.radtab('T', chi2, 0, table, chi1.nr(), rmax_tab); + chi1.radtab ('T', chi2, 0, table, chi1.nr (), rmax_tab); for (int i = 0; i != sz; ++i) - { - double R = i * dr; - EXPECT_NEAR(table[i], table_pref * (std::pow(R, 4) - 10 * R * R + 15) / 32 * std::exp(-R * R / 2), tol); - } + { + double R = i * dr; + EXPECT_NEAR (table[i], table_pref * (std::pow (R, 4) - 10 * R * R + 15) / 32 * std::exp (-R * R / 2), tol); + } - chi1.radtab('U', chi2, 0, table, chi1.nr(), rmax_tab); + chi1.radtab ('U', chi2, 0, table, chi1.nr (), rmax_tab); for (int i = 0; i != sz; ++i) - { - double R = i * dr; - EXPECT_NEAR(table[i], table_pref * 1. / 32 * std::exp(-R * R / 2), tol); - } + { + double R = i * dr; + EXPECT_NEAR (table[i], table_pref * 1. / 32 * std::exp (-R * R / 2), tol); + } delete[] table; } -TEST_F(NumericalRadialTest, ToNumericalOrbitalLm) +TEST_F (NumericalRadialTest, ToNumericalOrbitalLm) { /* * Builds a Numerical_Orbital_Lm object from a NumericalRadial object. @@ -521,69 +528,70 @@ TEST_F(NumericalRadialTest, ToNumericalOrbitalLm) int izeta = 5; std::string symbol = "Au"; for (int ir = 0; ir != nr; ++ir) - { - double r = ir * dr; - grid[ir] = r; - f[ir] = std::exp(-r); - } + { + double r = ir * dr; + grid[ir] = r; + f[ir] = std::exp (-r); + } - chi.build(l, true, nr, grid, f, pr, izeta, symbol, itype); + chi.build (l, true, nr, grid, f, pr, izeta, symbol, itype); int nk = 1001; double kcut = 30; - chi.set_uniform_grid(false, nk, kcut, 't'); + chi.set_uniform_grid (false, nk, kcut, 't'); Numerical_Orbital_Lm nol; double lcao_ecut = 100; double lcao_dk = 0.01; - int nk_legacy = static_cast(std::sqrt(lcao_ecut) / lcao_dk) + 4; + int nk_legacy = static_cast (std::sqrt (lcao_ecut) / lcao_dk) + 4; nk_legacy += 1 - nk_legacy % 2; double kcut_legacy = (nk_legacy - 1) * lcao_dk; - chi.to_numerical_orbital_lm(nol, nk_legacy, lcao_dk); - int nrcut = static_cast(chi.rcut() / dr) + 1; + chi.to_numerical_orbital_lm (nol, nk_legacy, lcao_dk); + int nrcut = static_cast (chi.rcut () / dr) + 1; // check that the orbital_lm has the same values as the chi - EXPECT_EQ(nol.getLabel(), symbol); - EXPECT_EQ(nol.getType(), itype); - EXPECT_EQ(nol.getL(), l); - EXPECT_EQ(nol.getChi(), izeta); - EXPECT_EQ(nol.getNr(), nrcut); - EXPECT_EQ(nol.getNk(), nk_legacy); - - EXPECT_EQ(nol.getRcut(), chi.rcut()); - EXPECT_EQ(nol.getKcut(), kcut_legacy); - - EXPECT_EQ(nol.getRadial(111), grid[111]); - EXPECT_EQ(nol.getRadial(777), grid[777]); - EXPECT_EQ(nol.getKpoint(3), 3 * lcao_dk); - - EXPECT_EQ(nol.getRab(123), dr); - EXPECT_EQ(nol.getDk(), lcao_dk); - - EXPECT_EQ(nol.getPsi(55), f[55]); - EXPECT_EQ(nol.getPsi(222), f[222]); - EXPECT_EQ(nol.getPsi(3333), f[3333]); + EXPECT_EQ (nol.getLabel (), symbol); + EXPECT_EQ (nol.getType (), itype); + EXPECT_EQ (nol.getL (), l); + EXPECT_EQ (nol.getChi (), izeta); + EXPECT_EQ (nol.getNr (), nrcut); + EXPECT_EQ (nol.getNk (), nk_legacy); + + EXPECT_EQ (nol.getRcut (), chi.rcut ()); + EXPECT_EQ (nol.getKcut (), kcut_legacy); + + EXPECT_EQ (nol.getRadial (111), grid[111]); + EXPECT_EQ (nol.getRadial (777), grid[777]); + EXPECT_EQ (nol.getKpoint (3), 3 * lcao_dk); + + EXPECT_EQ (nol.getRab (123), dr); + EXPECT_EQ (nol.getDk (), lcao_dk); + + EXPECT_EQ (nol.getPsi (55), f[55]); + EXPECT_EQ (nol.getPsi (222), f[222]); + EXPECT_EQ (nol.getPsi (3333), f[3333]); // k values may have noticable difference due to algorithmic distinction } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif - fftw_cleanup(); + fftw_cleanup (); return result; } diff --git a/source/source_basis/module_nao/test/projgen_test.cpp b/source/source_basis/module_nao/test/projgen_test.cpp index 620bd7786d6..7e528ae5e5e 100644 --- a/source/source_basis/module_nao/test/projgen_test.cpp +++ b/source/source_basis/module_nao/test/projgen_test.cpp @@ -15,122 +15,114 @@ * Unit test of functions in projgen.cpp * "projgen" : generate the projector coefficients * "smoothgen" : smooth the projector coefficients and optimize the sigma - * + * ***********************************************************/ -TEST(projgen_test, projgen) +TEST (projgen_test, projgen) { // test orbital r^2 * exp(-r) int l = 2; double dr = 0.01; double rcut_nao = 10; - int nr_nao = int(rcut_nao / dr) + 1; - std::vector r(nr_nao); - std::vector orb(nr_nao); + int nr_nao = int (rcut_nao / dr) + 1; + std::vector r (nr_nao); + std::vector orb (nr_nao); - for (int i = 0; i < nr_nao; ++i) { - r[i] = i * dr; - orb[i] = r[i] * r[i] * std::exp(-r[i]); - } + for (int i = 0; i < nr_nao; ++i) + { + r[i] = i * dr; + orb[i] = r[i] * r[i] * std::exp (-r[i]); + } // normalize the input orbital - std::vector integrand(nr_nao); - std::transform(r.begin(), r.end(), orb.begin(), integrand.begin(), - [](double r_i, double orb_i) { return std::pow(r_i * orb_i, 2); }); - double N = 1.0 / std::sqrt(ModuleBase::Integral::simpson(nr_nao, integrand.data(), dr)); - std::for_each(orb.begin(), orb.end(), [N](double& chi_i) { chi_i *= N; }); + std::vector integrand (nr_nao); + std::transform (r.begin (), + r.end (), + orb.begin (), + integrand.begin (), + [] (double r_i, double orb_i) { return std::pow (r_i * orb_i, 2); }); + double N = 1.0 / std::sqrt (ModuleBase::Integral::simpson (nr_nao, integrand.data (), dr)); + std::for_each (orb.begin (), orb.end (), [N] (double& chi_i) { chi_i *= N; }); // projector information double rcut_proj = 7.0; int nbes = 7; std::vector alpha; - projgen(l, nr_nao, r.data(), orb.data(), rcut_proj, nbes, alpha); + projgen (l, nr_nao, r.data (), orb.data (), rcut_proj, nbes, alpha); // compare with python script result std::vector ref = { - 0.000000000000e+00, - 2.344902364599e-05, - 9.378381332712e-05, - 2.109675345121e-04, - 3.749388271050e-04, - 5.856118515995e-04, - 8.428763536364e-04, - 1.146597746904e-03, - 1.496617214310e-03, - 1.892751827321e-03, - 2.334794683381e-03, - 2.822515061259e-03, - 3.355658594204e-03, - 3.933947460740e-03, - 4.557080592928e-03, - 5.224733901903e-03, - 5.936560520491e-03, - 6.692191062668e-03, - 7.491233899644e-03, - 8.333275452302e-03, + 0.000000000000e+00, 2.344902364599e-05, 9.378381332712e-05, 2.109675345121e-04, 3.749388271050e-04, + 5.856118515995e-04, 8.428763536364e-04, 1.146597746904e-03, 1.496617214310e-03, 1.892751827321e-03, + 2.334794683381e-03, 2.822515061259e-03, 3.355658594204e-03, 3.933947460740e-03, 4.557080592928e-03, + 5.224733901903e-03, 5.936560520491e-03, 6.692191062668e-03, 7.491233899644e-03, 8.333275452302e-03, }; - for (int i = 0; i < 20; ++i) { - EXPECT_NEAR(alpha[i], ref[i], 1e-12); - } + for (int i = 0; i < 20; ++i) + { + EXPECT_NEAR (alpha[i], ref[i], 1e-12); + } } -TEST(smoothgen_test, smoothgen) +TEST (smoothgen_test, smoothgen) { // test orbital r^2 * exp(-r) int l = 2; double dr = 0.01; double rcut_nao = 10; - int nr_nao = int(rcut_nao / dr) + 1; - std::vector r(nr_nao); - std::vector orb(nr_nao); + int nr_nao = int (rcut_nao / dr) + 1; + std::vector r (nr_nao); + std::vector orb (nr_nao); - for (int i = 0; i < nr_nao; ++i) { - r[i] = i * dr; - orb[i] = r[i] * r[i] * std::exp(-r[i]); - } + for (int i = 0; i < nr_nao; ++i) + { + r[i] = i * dr; + orb[i] = r[i] * r[i] * std::exp (-r[i]); + } // normalize the input orbital - std::vector integrand(nr_nao); - std::transform(r.begin(), r.end(), orb.begin(), integrand.begin(), - [](double r_i, double orb_i) { return std::pow(r_i * orb_i, 2); }); - double N = 1.0 / std::sqrt(ModuleBase::Integral::simpson(nr_nao, integrand.data(), dr)); - std::for_each(orb.begin(), orb.end(), [N](double& chi_i) { chi_i *= N; }); + std::vector integrand (nr_nao); + std::transform (r.begin (), + r.end (), + orb.begin (), + integrand.begin (), + [] (double r_i, double orb_i) { return std::pow (r_i * orb_i, 2); }); + double N = 1.0 / std::sqrt (ModuleBase::Integral::simpson (nr_nao, integrand.data (), dr)); + std::for_each (orb.begin (), orb.end (), [N] (double& chi_i) { chi_i *= N; }); // projector information double rcut_proj = 7.0; int nbes = 7; std::vector alpha; - smoothgen(nr_nao, r.data(), orb.data(), rcut_proj, alpha); + smoothgen (nr_nao, r.data (), orb.data (), rcut_proj, alpha); // compare with python script result - std::vector ref = { - 0, - 4.3350439973614511e-05, - 0.00017167638355532129, - 0.00038242839374460959, - 0.0006731078535961104, - 0.0010412661227313883, - 0.0014845037064463464, - 0.0020004694372387291, - 0.0025868596685825239, - 0.0032414174807783905, - 0.0039619318987114734, - 0.0047462371213502306, - 0.0055922117628221264, - 0.006497778104904167, - 0.0074609013607684835, - 0.0084795889498252546, - 0.0095518897835073727, - 0.010675893561843302, - 0.01184973008066669, - 0.013071568549313281 - }; - - for (int i = 0; i < 20; ++i) { - //std::cout< ref = {0, + 4.3350439973614511e-05, + 0.00017167638355532129, + 0.00038242839374460959, + 0.0006731078535961104, + 0.0010412661227313883, + 0.0014845037064463464, + 0.0020004694372387291, + 0.0025868596685825239, + 0.0032414174807783905, + 0.0039619318987114734, + 0.0047462371213502306, + 0.0055922117628221264, + 0.006497778104904167, + 0.0074609013607684835, + 0.0084795889498252546, + 0.0095518897835073727, + 0.010675893561843302, + 0.01184973008066669, + 0.013071568549313281}; + + for (int i = 0; i < 20; ++i) + { + // std::cout< #endif -class PswfcRadialsTest : public ::testing::Test { - protected: - virtual void SetUp() { +class PswfcRadialsTest : public ::testing::Test +{ + protected: + virtual void + SetUp () + { #ifdef __MPI - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif - } - virtual void TearDown() { - // code here will be called just after the test completes - // (each TEST_F) - } + } + virtual void + TearDown () + { + // code here will be called just after the test completes + // (each TEST_F) + } }; -TEST_F(PswfcRadialsTest, startswith) { +TEST_F (PswfcRadialsTest, startswith) +{ PswfcRadials pswfc_radials; std::string word = "hello"; std::string pattern = "he"; - bool result = pswfc_radials.startswith(word, pattern); - EXPECT_TRUE(result); + bool result = pswfc_radials.startswith (word, pattern); + EXPECT_TRUE (result); } -TEST_F(PswfcRadialsTest, StealFromQuotes) { +TEST_F (PswfcRadialsTest, StealFromQuotes) +{ PswfcRadials pswfc_radials; std::string word = "\"hello\""; - std::string result = pswfc_radials.steal_from_quotes(word); - EXPECT_EQ(result, "hello"); + std::string result = pswfc_radials.steal_from_quotes (word); + EXPECT_EQ (result, "hello"); } -TEST_F(PswfcRadialsTest, StealFromQuotesOverload1) { +TEST_F (PswfcRadialsTest, StealFromQuotesOverload1) +{ PswfcRadials pswfc_radials; std::string pspot = "../../../../../tests/PP_ORB/As_dojo.upf"; std::ifstream ifs; - ifs.open(pspot); + ifs.open (pspot); // check if file is opened - bool is_open = ifs.is_open(); - if(!is_open) {std::cout<<"File path WRONG.\n"; } - ASSERT_TRUE(is_open); + bool is_open = ifs.is_open (); + if (!is_open) + { + std::cout << "File path WRONG.\n"; + } + ASSERT_TRUE (is_open); std::string line; - while(!ifs.eof()) - { - ifs >> line; - if(pswfc_radials.startswith(line, "mesh_size=")) + while (!ifs.eof ()) { - int result = std::stoi(pswfc_radials.steal_from_quotes(ifs, line)); - EXPECT_EQ(result, 1358); + ifs >> line; + if (pswfc_radials.startswith (line, "mesh_size=")) + { + int result = std::stoi (pswfc_radials.steal_from_quotes (ifs, line)); + EXPECT_EQ (result, 1358); + } } - } - ifs.close(); + ifs.close (); } -TEST_F(PswfcRadialsTest, ReadKeywordValue) { +TEST_F (PswfcRadialsTest, ReadKeywordValue) +{ PswfcRadials pswfc_radials; std::string pspot = "../../../../../tests/PP_ORB/As_dojo.upf"; std::ifstream ifs; - ifs.open(pspot); + ifs.open (pspot); // check if file is opened - bool is_open = ifs.is_open(); - if(!is_open) {std::cout<<"File path WRONG.\n"; } - ASSERT_TRUE(is_open); + bool is_open = ifs.is_open (); + if (!is_open) + { + std::cout << "File path WRONG.\n"; + } + ASSERT_TRUE (is_open); std::string line; - while(!ifs.eof()) - { - ifs >> line; - if(pswfc_radials.startswith(line, "mesh_size=")) + while (!ifs.eof ()) { - std::string str_result = pswfc_radials.read_keyword_value(ifs, line); - int result = std::stoi(str_result); - EXPECT_EQ(result, 1358); + ifs >> line; + if (pswfc_radials.startswith (line, "mesh_size=")) + { + std::string str_result = pswfc_radials.read_keyword_value (ifs, line); + int result = std::stoi (str_result); + EXPECT_EQ (result, 1358); + } } - } - ifs.close(); + ifs.close (); } -TEST_F(PswfcRadialsTest, ReadUpfPswfc) { +TEST_F (PswfcRadialsTest, ReadUpfPswfc) +{ PswfcRadials pswfc_radials; std::string pspot = "../../../../../tests/PP_ORB/As_dojo.upf"; std::ifstream ifs; - ifs.open(pspot); + ifs.open (pspot); // check if file is opened - bool is_open = ifs.is_open(); - if(!is_open) {std::cout<<"File path WRONG.\n"; } - ASSERT_TRUE(is_open); - - pswfc_radials.read_upf_pswfc(ifs, 0.0, 1e-6); - EXPECT_EQ(pswfc_radials.lmax(), 2); - EXPECT_EQ(pswfc_radials.nzeta(0), 1); - EXPECT_EQ(pswfc_radials.nzeta(1), 1); - EXPECT_EQ(pswfc_radials.nzeta(2), 1); - EXPECT_EQ(pswfc_radials.nzeta_max(), 1); + bool is_open = ifs.is_open (); + if (!is_open) + { + std::cout << "File path WRONG.\n"; + } + ASSERT_TRUE (is_open); + + pswfc_radials.read_upf_pswfc (ifs, 0.0, 1e-6); + EXPECT_EQ (pswfc_radials.lmax (), 2); + EXPECT_EQ (pswfc_radials.nzeta (0), 1); + EXPECT_EQ (pswfc_radials.nzeta (1), 1); + EXPECT_EQ (pswfc_radials.nzeta (2), 1); + EXPECT_EQ (pswfc_radials.nzeta_max (), 1); // l = 0, , 4S - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(0), 5.0672226831E-13); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(1), 3.0740550920E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(2), 6.2055866358E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(3), 9.4519832136E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(4), 1.2870457911E-03); - - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(39), 6.2747938942E-02); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(79), 3.2957297188E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(119), 6.5729325723E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(159), 7.9744230720E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(199), 7.3512466100E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (0), 5.0672226831E-13); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (1), 3.0740550920E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (2), 6.2055866358E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (3), 9.4519832136E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (4), 1.2870457911E-03); + + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (39), 6.2747938942E-02); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (79), 3.2957297188E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (119), 6.5729325723E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (159), 7.9744230720E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (199), 7.3512466100E-01); // l = 1, , 4P - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(0), -1.2105620326E-12); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(1), 2.6035363423E-05); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(2), 1.0416837423E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(3), 2.3447967866E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(4), 4.1710331234E-04); - - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(39), 4.3850721200E-02); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(79), 2.0385391536E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(119), 4.4174606396E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(159), 6.1861552438E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(199), 6.6885272981E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (0), -1.2105620326E-12); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (1), 2.6035363423E-05); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (2), 1.0416837423E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (3), 2.3447967866E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (4), 4.1710331234E-04); + + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (39), 4.3850721200E-02); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (79), 2.0385391536E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (119), 4.4174606396E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (159), 6.1861552438E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (199), 6.6885272981E-01); // l = 2, , 3D - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(0), 2.0998757420E-11); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(1), 1.0346699413E-05); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(2), 8.2719900770E-05); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(3), 2.7887785682E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(4), 6.6004347176E-04); - - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(39), 4.4075591655E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(79), 1.2790898686E+00); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(119), 7.5247184647E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(159), 2.7836191101E-01); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(199), 1.3889295980E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (0), 2.0998757420E-11); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (1), 1.0346699413E-05); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (2), 8.2719900770E-05); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (3), 2.7887785682E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (4), 6.6004347176E-04); + + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (39), 4.4075591655E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (79), 1.2790898686E+00); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (119), 7.5247184647E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (159), 2.7836191101E-01); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (199), 1.3889295980E-01); } -TEST_F(PswfcRadialsTest, Build) +TEST_F (PswfcRadialsTest, Build) { PswfcRadials pswfc_radials; std::string pspot = "../../../../../tests/PP_ORB/As_dojo.upf"; - pswfc_radials.build(pspot, 0, 0.0); - - EXPECT_EQ(pswfc_radials.lmax(), 2); - EXPECT_EQ(pswfc_radials.nzeta(0), 1); - EXPECT_EQ(pswfc_radials.nzeta(1), 1); - EXPECT_EQ(pswfc_radials.nzeta(2), 1); - EXPECT_EQ(pswfc_radials.nzeta_max(), 1); - EXPECT_EQ(pswfc_radials.rcut_max(), 13.57); - - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(0), 5.0672226831E-13); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(1), 3.0740550920E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(2), 6.2055866358E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(3), 9.4519832136E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(0, 0).rvalue(4), 1.2870457911E-03); - - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(0), -1.2105620326E-12); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(1), 2.6035363423E-05); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(2), 1.0416837423E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(3), 2.3447967866E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(1, 0).rvalue(4), 4.1710331234E-04); - - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(0), 2.0998757420E-11); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(1), 1.0346699413E-05); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(2), 8.2719900770E-05); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(3), 2.7887785682E-04); - EXPECT_DOUBLE_EQ(pswfc_radials.chi(2, 0).rvalue(4), 6.6004347176E-04); + pswfc_radials.build (pspot, 0, 0.0); + + EXPECT_EQ (pswfc_radials.lmax (), 2); + EXPECT_EQ (pswfc_radials.nzeta (0), 1); + EXPECT_EQ (pswfc_radials.nzeta (1), 1); + EXPECT_EQ (pswfc_radials.nzeta (2), 1); + EXPECT_EQ (pswfc_radials.nzeta_max (), 1); + EXPECT_EQ (pswfc_radials.rcut_max (), 13.57); + + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (0), 5.0672226831E-13); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (1), 3.0740550920E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (2), 6.2055866358E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (3), 9.4519832136E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (0, 0).rvalue (4), 1.2870457911E-03); + + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (0), -1.2105620326E-12); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (1), 2.6035363423E-05); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (2), 1.0416837423E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (3), 2.3447967866E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (1, 0).rvalue (4), 4.1710331234E-04); + + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (0), 2.0998757420E-11); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (1), 1.0346699413E-05); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (2), 8.2719900770E-05); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (3), 2.7887785682E-04); + EXPECT_DOUBLE_EQ (pswfc_radials.chi (2, 0).rvalue (4), 6.6004347176E-04); } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - std::cout << "Current getcwd: " << getcwd(nullptr, 0) << std::endl; - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + std::cout << "Current getcwd: " << getcwd (nullptr, 0) << std::endl; + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_basis/module_nao/test/radial_collection_test.cpp b/source/source_basis/module_nao/test/radial_collection_test.cpp index dcb0b49a30e..32d377ae255 100644 --- a/source/source_basis/module_nao/test/radial_collection_test.cpp +++ b/source/source_basis/module_nao/test/radial_collection_test.cpp @@ -32,16 +32,18 @@ class RadialCollectionTest : public ::testing::Test { protected: - void SetUp(); - void TearDown(); + void SetUp (); + void TearDown (); - RadialCollection orb; //!< object under test - int nfile = 0; // number of orbital/pseudopotential files - std::string* file = nullptr; //!< orbitals file to read from - std::string log_file = "./test_files/radial_collection.log"; //!< file for logging + RadialCollection orb; //!< object under test + int nfile = 0; // number of orbital/pseudopotential files + std::string* file = nullptr; //!< orbitals file to read from + std::string log_file = "./test_files/radial_collection.log"; //!< file for logging }; -void RadialCollectionTest::SetUp() { +void + RadialCollectionTest::SetUp () +{ std::string dir = "../../../../../tests/PP_ORB/"; nfile = 4; file = new std::string[nfile]; @@ -51,269 +53,285 @@ void RadialCollectionTest::SetUp() { file[3] = dir + "Fe_gga_9au_100Ry_4s2p2d1f.orb"; } -void RadialCollectionTest::TearDown() { +void + RadialCollectionTest::TearDown () +{ delete[] file; } -TEST_F(RadialCollectionTest, BuildAndGet) { - orb.build(nfile, file, 'o'); - - EXPECT_EQ(orb.symbol(0), "C"); - EXPECT_EQ(orb.symbol(1), "H"); - EXPECT_EQ(orb.symbol(2), "O"); - EXPECT_EQ(orb.symbol(3), "Fe"); - - EXPECT_EQ(orb.ntype(), 4); - EXPECT_EQ(orb.lmax(), 3); - EXPECT_DOUBLE_EQ(orb.rcut_max(), 10.0); - - EXPECT_EQ(orb.nzeta(0,0), 2); - EXPECT_EQ(orb.nzeta(0,1), 2); - EXPECT_EQ(orb.nzeta(0,2), 1); - - EXPECT_EQ(orb.nzeta(1,0), 2); - EXPECT_EQ(orb.nzeta(1,1), 1); - - EXPECT_EQ(orb.nzeta(2,0), 2); - EXPECT_EQ(orb.nzeta(2,1), 2); - EXPECT_EQ(orb.nzeta(2,2), 1); - - EXPECT_EQ(orb.nzeta(3,0), 4); - EXPECT_EQ(orb.nzeta(3,1), 2); - EXPECT_EQ(orb.nzeta(3,2), 2); - EXPECT_EQ(orb.nzeta(3,3), 1); - - EXPECT_EQ(orb.nzeta_max(0), 2); - EXPECT_EQ(orb.nzeta_max(1), 2); - EXPECT_EQ(orb.nzeta_max(2), 2); - EXPECT_EQ(orb.nzeta_max(3), 4); - - EXPECT_EQ(orb.nchi(0), 5); - EXPECT_EQ(orb.nchi(1), 3); - EXPECT_EQ(orb.nchi(2), 5); - EXPECT_EQ(orb.nchi(3), 9); - EXPECT_EQ(orb.nchi(), 22); - - for (int itype = 0; itype <= 3; ++itype) { - EXPECT_EQ(orb(itype).itype(), itype); - } - - for (int itype = 0; itype <= 3; ++itype) { - for (int l = 0; l <= orb(itype).lmax(); ++l) { - for (int izeta = 0; izeta != orb(itype).nzeta(l); ++izeta) { - EXPECT_EQ(orb(itype, l, izeta).l(), l); - } +TEST_F (RadialCollectionTest, BuildAndGet) +{ + orb.build (nfile, file, 'o'); + + EXPECT_EQ (orb.symbol (0), "C"); + EXPECT_EQ (orb.symbol (1), "H"); + EXPECT_EQ (orb.symbol (2), "O"); + EXPECT_EQ (orb.symbol (3), "Fe"); + + EXPECT_EQ (orb.ntype (), 4); + EXPECT_EQ (orb.lmax (), 3); + EXPECT_DOUBLE_EQ (orb.rcut_max (), 10.0); + + EXPECT_EQ (orb.nzeta (0, 0), 2); + EXPECT_EQ (orb.nzeta (0, 1), 2); + EXPECT_EQ (orb.nzeta (0, 2), 1); + + EXPECT_EQ (orb.nzeta (1, 0), 2); + EXPECT_EQ (orb.nzeta (1, 1), 1); + + EXPECT_EQ (orb.nzeta (2, 0), 2); + EXPECT_EQ (orb.nzeta (2, 1), 2); + EXPECT_EQ (orb.nzeta (2, 2), 1); + + EXPECT_EQ (orb.nzeta (3, 0), 4); + EXPECT_EQ (orb.nzeta (3, 1), 2); + EXPECT_EQ (orb.nzeta (3, 2), 2); + EXPECT_EQ (orb.nzeta (3, 3), 1); + + EXPECT_EQ (orb.nzeta_max (0), 2); + EXPECT_EQ (orb.nzeta_max (1), 2); + EXPECT_EQ (orb.nzeta_max (2), 2); + EXPECT_EQ (orb.nzeta_max (3), 4); + + EXPECT_EQ (orb.nchi (0), 5); + EXPECT_EQ (orb.nchi (1), 3); + EXPECT_EQ (orb.nchi (2), 5); + EXPECT_EQ (orb.nchi (3), 9); + EXPECT_EQ (orb.nchi (), 22); + + for (int itype = 0; itype <= 3; ++itype) + { + EXPECT_EQ (orb (itype).itype (), itype); + } + + for (int itype = 0; itype <= 3; ++itype) + { + for (int l = 0; l <= orb (itype).lmax (); ++l) + { + for (int izeta = 0; izeta != orb (itype).nzeta (l); ++izeta) + { + EXPECT_EQ (orb (itype, l, izeta).l (), l); + } + } } - } } -TEST_F(RadialCollectionTest, BatchSet) { - orb.build(nfile, file, 'o'); +TEST_F (RadialCollectionTest, BatchSet) +{ + orb.build (nfile, file, 'o'); ModuleBase::SphericalBesselTransformer sbt; - orb.set_transformer(sbt); - orb.set_uniform_grid(true, 2001, 20.0); + orb.set_transformer (sbt); + orb.set_uniform_grid (true, 2001, 20.0); // NOTE: cutoff radius is not necessarily the last rgrid point. This is // because the grid might have zero padding for the sake of FFT. rcut // keeps track of the "actual" cutoff radius. - EXPECT_EQ(orb.rcut_max(), 10.0); + EXPECT_EQ (orb.rcut_max (), 10.0); std::array rcut = {8, 8, 10, 9}; - for (int itype = 0; itype != orb.ntype(); ++itype) { - for (int l = 0; l <= orb(itype).lmax(); ++l) { - for (int izeta = 0; izeta != orb.nzeta(itype, l); ++izeta) { - EXPECT_EQ(sbt, orb(itype, l, izeta).sbt()); - EXPECT_DOUBLE_EQ(orb(itype, l, izeta).rcut(), rcut[itype]); - } + for (int itype = 0; itype != orb.ntype (); ++itype) + { + for (int l = 0; l <= orb (itype).lmax (); ++l) + { + for (int izeta = 0; izeta != orb.nzeta (itype, l); ++izeta) + { + EXPECT_EQ (sbt, orb (itype, l, izeta).sbt ()); + EXPECT_DOUBLE_EQ (orb (itype, l, izeta).rcut (), rcut[itype]); + } + } } - } double* grid = new double[3]; grid[0] = 0.0; grid[1] = 1.0; grid[2] = 3.14; - orb.set_grid(true, 3, grid, 'i'); - for (int itype = 0; itype != orb.ntype(); ++itype) { - for (int l = 0; l <= orb(itype).lmax(); ++l) { - for (int izeta = 0; izeta != orb.nzeta(itype, l); ++izeta) { - EXPECT_DOUBLE_EQ(orb(itype, l, izeta).rcut(), 3.14); - } + orb.set_grid (true, 3, grid, 'i'); + for (int itype = 0; itype != orb.ntype (); ++itype) + { + for (int l = 0; l <= orb (itype).lmax (); ++l) + { + for (int izeta = 0; izeta != orb.nzeta (itype, l); ++izeta) + { + EXPECT_DOUBLE_EQ (orb (itype, l, izeta).rcut (), 3.14); + } + } } - } delete[] grid; } -TEST_F(RadialCollectionTest, Copy) +TEST_F (RadialCollectionTest, Copy) { - orb.build(nfile, file, 'o'); + orb.build (nfile, file, 'o'); // copy constructor - RadialCollection orb2(orb); + RadialCollection orb2 (orb); - EXPECT_EQ(orb2.symbol(0), "C"); - EXPECT_EQ(orb2.symbol(1), "H"); - EXPECT_EQ(orb2.symbol(2), "O"); - EXPECT_EQ(orb2.symbol(3), "Fe"); + EXPECT_EQ (orb2.symbol (0), "C"); + EXPECT_EQ (orb2.symbol (1), "H"); + EXPECT_EQ (orb2.symbol (2), "O"); + EXPECT_EQ (orb2.symbol (3), "Fe"); - EXPECT_EQ(orb2.ntype(), 4); - EXPECT_EQ(orb2.lmax(), 3); - EXPECT_DOUBLE_EQ(orb2.rcut_max(), 10.0); + EXPECT_EQ (orb2.ntype (), 4); + EXPECT_EQ (orb2.lmax (), 3); + EXPECT_DOUBLE_EQ (orb2.rcut_max (), 10.0); - EXPECT_EQ(orb2.nzeta(0, 0), 2); - EXPECT_EQ(orb2.nzeta(0, 1), 2); - EXPECT_EQ(orb2.nzeta(0, 2), 1); + EXPECT_EQ (orb2.nzeta (0, 0), 2); + EXPECT_EQ (orb2.nzeta (0, 1), 2); + EXPECT_EQ (orb2.nzeta (0, 2), 1); - EXPECT_EQ(orb2.nzeta(1, 0), 2); - EXPECT_EQ(orb2.nzeta(1, 1), 1); + EXPECT_EQ (orb2.nzeta (1, 0), 2); + EXPECT_EQ (orb2.nzeta (1, 1), 1); - EXPECT_EQ(orb2.nzeta(2, 0), 2); - EXPECT_EQ(orb2.nzeta(2, 1), 2); - EXPECT_EQ(orb2.nzeta(2, 2), 1); + EXPECT_EQ (orb2.nzeta (2, 0), 2); + EXPECT_EQ (orb2.nzeta (2, 1), 2); + EXPECT_EQ (orb2.nzeta (2, 2), 1); - EXPECT_EQ(orb2.nzeta(3, 0), 4); - EXPECT_EQ(orb2.nzeta(3, 1), 2); - EXPECT_EQ(orb2.nzeta(3, 2), 2); - EXPECT_EQ(orb2.nzeta(3, 3), 1); + EXPECT_EQ (orb2.nzeta (3, 0), 4); + EXPECT_EQ (orb2.nzeta (3, 1), 2); + EXPECT_EQ (orb2.nzeta (3, 2), 2); + EXPECT_EQ (orb2.nzeta (3, 3), 1); - EXPECT_EQ(orb2.nzeta_max(0), 2); - EXPECT_EQ(orb2.nzeta_max(1), 2); - EXPECT_EQ(orb2.nzeta_max(2), 2); - EXPECT_EQ(orb2.nzeta_max(3), 4); + EXPECT_EQ (orb2.nzeta_max (0), 2); + EXPECT_EQ (orb2.nzeta_max (1), 2); + EXPECT_EQ (orb2.nzeta_max (2), 2); + EXPECT_EQ (orb2.nzeta_max (3), 4); - EXPECT_EQ(orb2.nchi(0), 5); - EXPECT_EQ(orb2.nchi(1), 3); - EXPECT_EQ(orb2.nchi(2), 5); - EXPECT_EQ(orb2.nchi(3), 9); - EXPECT_EQ(orb2.nchi(), 22); + EXPECT_EQ (orb2.nchi (0), 5); + EXPECT_EQ (orb2.nchi (1), 3); + EXPECT_EQ (orb2.nchi (2), 5); + EXPECT_EQ (orb2.nchi (3), 9); + EXPECT_EQ (orb2.nchi (), 22); for (int itype = 0; itype <= 3; ++itype) - { - EXPECT_EQ(orb2(itype).itype(), itype); - } + { + EXPECT_EQ (orb2 (itype).itype (), itype); + } for (int itype = 0; itype <= 3; ++itype) - { - for (int l = 0; l <= orb2(itype).lmax(); ++l) { - for (int izeta = 0; izeta != orb2(itype).nzeta(l); ++izeta) - { - EXPECT_EQ(orb2(itype, l, izeta).l(), l); - } + for (int l = 0; l <= orb2 (itype).lmax (); ++l) + { + for (int izeta = 0; izeta != orb2 (itype).nzeta (l); ++izeta) + { + EXPECT_EQ (orb2 (itype, l, izeta).l (), l); + } + } } - } // assignment operator RadialCollection orb3; orb3 = orb; - EXPECT_EQ(orb3.symbol(0), "C"); - EXPECT_EQ(orb3.symbol(1), "H"); - EXPECT_EQ(orb3.symbol(2), "O"); - EXPECT_EQ(orb3.symbol(3), "Fe"); + EXPECT_EQ (orb3.symbol (0), "C"); + EXPECT_EQ (orb3.symbol (1), "H"); + EXPECT_EQ (orb3.symbol (2), "O"); + EXPECT_EQ (orb3.symbol (3), "Fe"); - EXPECT_EQ(orb3.ntype(), 4); - EXPECT_EQ(orb3.lmax(), 3); - EXPECT_DOUBLE_EQ(orb3.rcut_max(), 10.0); + EXPECT_EQ (orb3.ntype (), 4); + EXPECT_EQ (orb3.lmax (), 3); + EXPECT_DOUBLE_EQ (orb3.rcut_max (), 10.0); - EXPECT_EQ(orb3.nzeta(0, 0), 2); - EXPECT_EQ(orb3.nzeta(0, 1), 2); - EXPECT_EQ(orb3.nzeta(0, 2), 1); + EXPECT_EQ (orb3.nzeta (0, 0), 2); + EXPECT_EQ (orb3.nzeta (0, 1), 2); + EXPECT_EQ (orb3.nzeta (0, 2), 1); - EXPECT_EQ(orb3.nzeta(1, 0), 2); - EXPECT_EQ(orb3.nzeta(1, 1), 1); + EXPECT_EQ (orb3.nzeta (1, 0), 2); + EXPECT_EQ (orb3.nzeta (1, 1), 1); - EXPECT_EQ(orb3.nzeta(2, 0), 2); - EXPECT_EQ(orb3.nzeta(2, 1), 2); - EXPECT_EQ(orb3.nzeta(2, 2), 1); + EXPECT_EQ (orb3.nzeta (2, 0), 2); + EXPECT_EQ (orb3.nzeta (2, 1), 2); + EXPECT_EQ (orb3.nzeta (2, 2), 1); - EXPECT_EQ(orb3.nzeta(3, 0), 4); - EXPECT_EQ(orb3.nzeta(3, 1), 2); - EXPECT_EQ(orb3.nzeta(3, 2), 2); - EXPECT_EQ(orb3.nzeta(3, 3), 1); + EXPECT_EQ (orb3.nzeta (3, 0), 4); + EXPECT_EQ (orb3.nzeta (3, 1), 2); + EXPECT_EQ (orb3.nzeta (3, 2), 2); + EXPECT_EQ (orb3.nzeta (3, 3), 1); - EXPECT_EQ(orb3.nzeta_max(0), 2); - EXPECT_EQ(orb3.nzeta_max(1), 2); - EXPECT_EQ(orb3.nzeta_max(2), 2); - EXPECT_EQ(orb3.nzeta_max(3), 4); + EXPECT_EQ (orb3.nzeta_max (0), 2); + EXPECT_EQ (orb3.nzeta_max (1), 2); + EXPECT_EQ (orb3.nzeta_max (2), 2); + EXPECT_EQ (orb3.nzeta_max (3), 4); - EXPECT_EQ(orb3.nchi(0), 5); - EXPECT_EQ(orb3.nchi(1), 3); - EXPECT_EQ(orb3.nchi(2), 5); - EXPECT_EQ(orb3.nchi(3), 9); - EXPECT_EQ(orb3.nchi(), 22); + EXPECT_EQ (orb3.nchi (0), 5); + EXPECT_EQ (orb3.nchi (1), 3); + EXPECT_EQ (orb3.nchi (2), 5); + EXPECT_EQ (orb3.nchi (3), 9); + EXPECT_EQ (orb3.nchi (), 22); for (int itype = 0; itype <= 3; ++itype) - { - EXPECT_EQ(orb3(itype).itype(), itype); - } + { + EXPECT_EQ (orb3 (itype).itype (), itype); + } for (int itype = 0; itype <= 3; ++itype) - { - for (int l = 0; l <= orb3(itype).lmax(); ++l) { - for (int izeta = 0; izeta != orb3(itype).nzeta(l); ++izeta) - { - EXPECT_EQ(orb3(itype, l, izeta).l(), l); - } + for (int l = 0; l <= orb3 (itype).lmax (); ++l) + { + for (int izeta = 0; izeta != orb3 (itype).nzeta (l); ++izeta) + { + EXPECT_EQ (orb3 (itype, l, izeta).l (), l); + } + } } - } } -TEST_F(RadialCollectionTest, Iteration) +TEST_F (RadialCollectionTest, Iteration) { - orb.build(nfile, file, 'o'); - EXPECT_EQ(*orb.cbegin(), &orb(0, 0, 0)); - EXPECT_EQ(*(orb.cbegin() + 2), &orb(1, 0, 0)); - EXPECT_EQ(*(orb.cbegin() + 9), &orb(3, 0, 3)); - EXPECT_EQ(*(orb.cbegin() + 10), &orb(0, 1, 0)); - EXPECT_EQ(*(orb.cbegin() + 17), &orb(0, 2, 0)); - EXPECT_EQ(*(orb.cbegin() + 21), &orb(3, 3, 0)); - EXPECT_EQ(*(orb.cend() - 1), &orb(3, 3, 0)); - //EXPECT_EQ(*(orb.cbegin() + 5), &orb(1, 0, 0)); - //EXPECT_EQ(*(orb.cbegin() + 8), &orb(2, 0, 0)); - //EXPECT_EQ(*(orb.cbegin() + 13), &orb(3, 0, 0)); - //EXPECT_EQ(*(orb.cend() - 1), &orb(3, 3, 0)); + orb.build (nfile, file, 'o'); + EXPECT_EQ (*orb.cbegin (), &orb (0, 0, 0)); + EXPECT_EQ (*(orb.cbegin () + 2), &orb (1, 0, 0)); + EXPECT_EQ (*(orb.cbegin () + 9), &orb (3, 0, 3)); + EXPECT_EQ (*(orb.cbegin () + 10), &orb (0, 1, 0)); + EXPECT_EQ (*(orb.cbegin () + 17), &orb (0, 2, 0)); + EXPECT_EQ (*(orb.cbegin () + 21), &orb (3, 3, 0)); + EXPECT_EQ (*(orb.cend () - 1), &orb (3, 3, 0)); + // EXPECT_EQ(*(orb.cbegin() + 5), &orb(1, 0, 0)); + // EXPECT_EQ(*(orb.cbegin() + 8), &orb(2, 0, 0)); + // EXPECT_EQ(*(orb.cbegin() + 13), &orb(3, 0, 0)); + // EXPECT_EQ(*(orb.cend() - 1), &orb(3, 3, 0)); } -TEST_F(RadialCollectionTest, Build2) { +TEST_F (RadialCollectionTest, Build2) +{ // build a collection of truncated spherical Bessel functions int lmax = 3; int nbes = 10; double rcut = 10.0; double sigma = 0.0; double dr = 0.01; - orb.build(lmax, nbes, rcut, sigma, dr); + orb.build (lmax, nbes, rcut, sigma, dr); - orb.lmax(); + orb.lmax (); - EXPECT_EQ(orb.ntype(), 1); - EXPECT_EQ(orb.lmax(), lmax); - EXPECT_DOUBLE_EQ(orb.rcut_max(), rcut); + EXPECT_EQ (orb.ntype (), 1); + EXPECT_EQ (orb.lmax (), lmax); + EXPECT_DOUBLE_EQ (orb.rcut_max (), rcut); - for (int l = 0; l <= lmax; ++l) { - EXPECT_EQ(orb.nzeta(0, l), nbes); - } + for (int l = 0; l <= lmax; ++l) + { + EXPECT_EQ (orb.nzeta (0, l), nbes); + } - EXPECT_EQ(orb.nzeta_max(0), nbes); - EXPECT_EQ(orb.nchi(0), nbes*(lmax+1)); - EXPECT_EQ(orb.nchi(), nbes*(lmax+1)); + EXPECT_EQ (orb.nzeta_max (0), nbes); + EXPECT_EQ (orb.nchi (0), nbes * (lmax + 1)); + EXPECT_EQ (orb.nchi (), nbes * (lmax + 1)); } - -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_basis/module_nao/test/real_gaunt_table_test.cpp b/source/source_basis/module_nao/test/real_gaunt_table_test.cpp index 948987b7ba2..cf1c2a903f6 100644 --- a/source/source_basis/module_nao/test/real_gaunt_table_test.cpp +++ b/source/source_basis/module_nao/test/real_gaunt_table_test.cpp @@ -2,7 +2,7 @@ #include "gtest/gtest.h" #include -#include +#include #ifdef __MPI #include @@ -33,134 +33,150 @@ using iclock = std::chrono::high_resolution_clock; class RealGauntTableTest : public ::testing::Test { protected: - void SetUp() { /*rgt.build(lmax);*/ } - void TearDown() {} + void + SetUp () + { /*rgt.build(lmax);*/ + } + void + TearDown () + { + } - int lmax = 10; //!< maximum angular momentum - //RealGauntTable rgt; //!< object under test + int lmax = 10; //!< maximum angular momentum + // RealGauntTable rgt; //!< object under test const double tol = 1e-12; //!< numerical error tolerance for individual Gaunt coefficient }; -TEST_F(RealGauntTableTest, LegacyConsistency) +TEST_F (RealGauntTableTest, LegacyConsistency) { - //iclock::time_point start; - //std::chrono::duration dur; + // iclock::time_point start; + // std::chrono::duration dur; // this test checks whether the coefficients in RealGauntTable is consistent with those of ORB_gaunt_table // this test shall be removed in the future once the refactoring is finished ORB_gaunt_table ogt; - RealGauntTable::instance().build(lmax); + RealGauntTable::instance ().build (lmax); - //start = iclock::now(); - ogt.init_Gaunt_CH(lmax); - ogt.init_Gaunt(lmax); - //dur = iclock::now() - start; - //std::cout << "time elased = " << dur.count() << " s" << std::endl; + // start = iclock::now(); + ogt.init_Gaunt_CH (lmax); + ogt.init_Gaunt (lmax); + // dur = iclock::now() - start; + // std::cout << "time elased = " << dur.count() << " s" << std::endl; - //RealGauntTable rgt2; - //start = iclock::now(); - //rgt2.build(lmax); - //dur = iclock::now() - start; - //std::cout << "time elased = " << dur.count() << " s" << std::endl; + // RealGauntTable rgt2; + // start = iclock::now(); + // rgt2.build(lmax); + // dur = iclock::now() - start; + // std::cout << "time elased = " << dur.count() << " s" << std::endl; for (int l1 = 0; l1 <= lmax; ++l1) - { - for (int mm1 = 0; mm1 <= 2*l1; ++mm1) { - int index1 = ogt.get_lm_index(l1, mm1); - for (int l2 = 0; l2 <= lmax; ++l2) - { - for (int mm2 = 0; mm2 <= 2*l2; ++mm2) + for (int mm1 = 0; mm1 <= 2 * l1; ++mm1) { - int index2 = ogt.get_lm_index(l2, mm2); - for (int l3 = 0; l3 <= 2*lmax; ++l3) - //for (int l3 = std::abs(l1-l2); l3 <= 2*lmax; l3 += 2) - { - for (int mm3 = 0; mm3 <= 2*l3; ++mm3) + int index1 = ogt.get_lm_index (l1, mm1); + for (int l2 = 0; l2 <= lmax; ++l2) { - int index3 = ogt.get_lm_index(l3, mm3); - - int m1 = ogt.Index_M(mm1); - int m2 = ogt.Index_M(mm2); - int m3 = ogt.Index_M(mm3); - - EXPECT_NEAR(RealGauntTable::instance()(l1, l2, l3, m1, m2, m3), - ogt.Gaunt_Coefficients(index1, index2, index3), tol); + for (int mm2 = 0; mm2 <= 2 * l2; ++mm2) + { + int index2 = ogt.get_lm_index (l2, mm2); + for (int l3 = 0; l3 <= 2 * lmax; ++l3) + // for (int l3 = std::abs(l1-l2); l3 <= 2*lmax; l3 += 2) + { + for (int mm3 = 0; mm3 <= 2 * l3; ++mm3) + { + int index3 = ogt.get_lm_index (l3, mm3); + + int m1 = ogt.Index_M (mm1); + int m2 = ogt.Index_M (mm2); + int m3 = ogt.Index_M (mm3); + + EXPECT_NEAR (RealGauntTable::instance () (l1, l2, l3, m1, m2, m3), + ogt.Gaunt_Coefficients (index1, index2, index3), + tol); + } + } + } } - } } - } } - } } -TEST_F(RealGauntTableTest, SanityCheck) +TEST_F (RealGauntTableTest, SanityCheck) { - EXPECT_EQ(RealGauntTable::instance().lmax(), lmax); - - EXPECT_NEAR(RealGauntTable::instance()(0, 0, 0, 0, 0, 0), ModuleBase::SQRT_INVERSE_FOUR_PI, tol); - - EXPECT_NEAR(RealGauntTable::instance()(4, 0, 4, 3, 0, 3), ModuleBase::SQRT_INVERSE_FOUR_PI, tol); - EXPECT_NEAR(RealGauntTable::instance()(4, 0, 4, -3, 0, -3), ModuleBase::SQRT_INVERSE_FOUR_PI, tol); - - EXPECT_NEAR(RealGauntTable::instance()(2, 2, 2, 2, -1, -1), -std::sqrt(15.0) / 7.0 * ModuleBase::SQRT_INVERSE_FOUR_PI, tol); - EXPECT_NEAR(RealGauntTable::instance()(2, 2, 2, -1, 2, -1), -std::sqrt(15.0) / 7.0 * ModuleBase::SQRT_INVERSE_FOUR_PI, tol); - - EXPECT_NEAR(RealGauntTable::instance()(3, 3, 2, 2, 1, 1), ModuleBase::SQRT_INVERSE_FOUR_PI / std::sqrt(6.0), tol); - EXPECT_NEAR(RealGauntTable::instance()(2, 3, 3, 1, 1, 2), ModuleBase::SQRT_INVERSE_FOUR_PI / std::sqrt(6.0), tol); - - EXPECT_NEAR(RealGauntTable::instance()(4, 5, 7, 3, -2, -5), ModuleBase::SQRT_INVERSE_FOUR_PI * std::sqrt(210.0) / 221.0, tol); + EXPECT_EQ (RealGauntTable::instance ().lmax (), lmax); + + EXPECT_NEAR (RealGauntTable::instance () (0, 0, 0, 0, 0, 0), ModuleBase::SQRT_INVERSE_FOUR_PI, tol); + + EXPECT_NEAR (RealGauntTable::instance () (4, 0, 4, 3, 0, 3), ModuleBase::SQRT_INVERSE_FOUR_PI, tol); + EXPECT_NEAR (RealGauntTable::instance () (4, 0, 4, -3, 0, -3), ModuleBase::SQRT_INVERSE_FOUR_PI, tol); + + EXPECT_NEAR (RealGauntTable::instance () (2, 2, 2, 2, -1, -1), + -std::sqrt (15.0) / 7.0 * ModuleBase::SQRT_INVERSE_FOUR_PI, + tol); + EXPECT_NEAR (RealGauntTable::instance () (2, 2, 2, -1, 2, -1), + -std::sqrt (15.0) / 7.0 * ModuleBase::SQRT_INVERSE_FOUR_PI, + tol); + + EXPECT_NEAR (RealGauntTable::instance () (3, 3, 2, 2, 1, 1), + ModuleBase::SQRT_INVERSE_FOUR_PI / std::sqrt (6.0), + tol); + EXPECT_NEAR (RealGauntTable::instance () (2, 3, 3, 1, 1, 2), + ModuleBase::SQRT_INVERSE_FOUR_PI / std::sqrt (6.0), + tol); + + EXPECT_NEAR (RealGauntTable::instance () (4, 5, 7, 3, -2, -5), + ModuleBase::SQRT_INVERSE_FOUR_PI * std::sqrt (210.0) / 221.0, + tol); } /** -* @brief real gaunt function test: -* Using sympy realgaunt function test points set. -* 2 failed test case with a sign error occurred -* on some cases of the actual Gaunt coefficients of 2 negative m -*/ -TEST_F(RealGauntTableTest, Check2) + * @brief real gaunt function test: + * Using sympy realgaunt function test points set. + * 2 failed test case with a sign error occurred + * on some cases of the actual Gaunt coefficients of 2 negative m + */ +TEST_F (RealGauntTableTest, Check2) { - const double PI = 3.14159265358979323846; - for(int i=0;i<3;i++){ - for(int j=-i;j<=i;j++){ - EXPECT_NEAR(RealGauntTable::instance()(0, i, i, 0, j, j), 1/(2*sqrt(PI)), tol); + const double PI = 3.14159265358979323846; + for (int i = 0; i < 3; i++) + { + for (int j = -i; j <= i; j++) + { + EXPECT_NEAR (RealGauntTable::instance () (0, i, i, 0, j, j), 1 / (2 * sqrt (PI)), tol); + } } - } - + // EXPECT_NEAR(rgt(1, 1, 2, -1, 1, -2), -sqrt(15)/(10*sqrt(PI)), tol); //wrong case // EXPECT_NEAR(rgt(1, 1, 2, -1, 1, -2),-sqrt(15)/(10*sqrt(PI)), tol); //wrong case - - EXPECT_NEAR(RealGauntTable::instance()(1, 1, 2, 0, 0, 0),sqrt(5)/(5*sqrt(PI)), tol); - EXPECT_NEAR(RealGauntTable::instance()(1, 1, 2, 1, 1, 0),-sqrt(5)/(10*sqrt(PI)), tol); - EXPECT_NEAR(RealGauntTable::instance()(2, 2, 2, 0, 0, 0),sqrt(5)/(7*sqrt(PI)), tol); - EXPECT_NEAR(RealGauntTable::instance()(2, 2, 2, 0, 2, 2),-sqrt(5)/(7*sqrt(PI)), tol); - EXPECT_NEAR(RealGauntTable::instance()(2, 2, 2, -2, -2, 0),-sqrt(5)/(7*sqrt(PI)), tol); - - EXPECT_NEAR(RealGauntTable::instance()(1, 1, 2, -1, 0, -1),sqrt(15)/(10*sqrt(PI)), tol); - EXPECT_NEAR(RealGauntTable::instance()(1, 1, 2, 0, 1, 1),sqrt(15)/(10*sqrt(PI)), tol); - EXPECT_NEAR(RealGauntTable::instance()(1, 1, 2, 1, 1, 2),sqrt(15)/(10*sqrt(PI)), tol); - - EXPECT_NEAR(RealGauntTable::instance()(1, 1, 2, -1, -1, 2),-sqrt(15)/(10*sqrt(PI)), tol); - - EXPECT_NEAR(RealGauntTable::instance()(2, 2, 2, 0, 1, 1),sqrt(5)/(14*sqrt(PI)), tol); - EXPECT_NEAR(RealGauntTable::instance()(2, 2, 2, 1, 1, 2),sqrt(15)/(14*sqrt(PI)), tol); - EXPECT_NEAR(RealGauntTable::instance()(2, 2, 2, -1, -1, 2),-sqrt(15)/(14*sqrt(PI)), tol); - - -} + EXPECT_NEAR (RealGauntTable::instance () (1, 1, 2, 0, 0, 0), sqrt (5) / (5 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (1, 1, 2, 1, 1, 0), -sqrt (5) / (10 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (2, 2, 2, 0, 0, 0), sqrt (5) / (7 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (2, 2, 2, 0, 2, 2), -sqrt (5) / (7 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (2, 2, 2, -2, -2, 0), -sqrt (5) / (7 * sqrt (PI)), tol); + + EXPECT_NEAR (RealGauntTable::instance () (1, 1, 2, -1, 0, -1), sqrt (15) / (10 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (1, 1, 2, 0, 1, 1), sqrt (15) / (10 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (1, 1, 2, 1, 1, 2), sqrt (15) / (10 * sqrt (PI)), tol); + + EXPECT_NEAR (RealGauntTable::instance () (1, 1, 2, -1, -1, 2), -sqrt (15) / (10 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (2, 2, 2, 0, 1, 1), sqrt (5) / (14 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (2, 2, 2, 1, 1, 2), sqrt (15) / (14 * sqrt (PI)), tol); + EXPECT_NEAR (RealGauntTable::instance () (2, 2, 2, -1, -1, 2), -sqrt (15) / (14 * sqrt (PI)), tol); +} /** -* @brief gaunt function test: -* Use the sympy gaunt function run results as the test set. -* Test set results are stored in Gaunt.txt with data structure -* l1 l2 l3 m1 m2 m3 gaunt(l1,l2,l3,m1,m2,m3). -* Double precision machine error (1e-15) is used to compare the -* accuracy of the function in abacus and the gaunt function in sumpy. -*/ -struct gaunt_ans{ + * @brief gaunt function test: + * Use the sympy gaunt function run results as the test set. + * Test set results are stored in Gaunt.txt with data structure + * l1 l2 l3 m1 m2 m3 gaunt(l1,l2,l3,m1,m2,m3). + * Double precision machine error (1e-15) is used to compare the + * accuracy of the function in abacus and the gaunt function in sumpy. + */ +struct gaunt_ans +{ int l1; int l2; int l3; @@ -168,115 +184,155 @@ struct gaunt_ans{ int m2; int m3; double gaunt; -}typedef gaunt_ans; - - +} typedef gaunt_ans; // the length of the values in gaunt.txt #define len_gaunt 7242 -TEST_F(RealGauntTableTest, Check3) +TEST_F (RealGauntTableTest, Check3) { gaunt_ans ga_ref[len_gaunt]; gaunt_ans ga_func[len_gaunt]; - - int len=len_gaunt; - std::ifstream infile("../../../../../source/source_basis/module_nao/test/gaunt.txt"); - if (!infile) { - EXPECT_NEAR(0,1,tol); - } + int len = len_gaunt; + std::ifstream infile ("../../../../../source/source_basis/module_nao/test/gaunt.txt"); + if (!infile) + { + EXPECT_NEAR (0, 1, tol); + } double tmp; - for(int i=0;i>ga_ref[i].l1>>ga_ref[i].l2>>ga_ref[i].l3>>ga_ref[i].m1>>ga_ref[i].m2>>ga_ref[i].m3>>ga_ref[i].gaunt; - } + for (int i = 0; i < len; i++) + { + infile >> ga_ref[i].l1 >> ga_ref[i].l2 >> ga_ref[i].l3 >> ga_ref[i].m1 >> ga_ref[i].m2 >> ga_ref[i].m3 + >> ga_ref[i].gaunt; + } - int cnt=0; - const double PI = 3.14159265358979323846; + int cnt = 0; + const double PI = 3.14159265358979323846; int l_max = 10; int l3_max = 24; - for(int l1=0;l1<=l_max;l1++){ - for(int l2=l1;l2<=l_max;l2++){ - for(int l3=l2;l3<=l3_max;l3++){ - for(int m1=-l1;m1<=l1;m1++){ - for(int m2=-l2;m2<=l2;m2++){ - for(int m3=0;m3<=l3;m3++){ - double gaunt = RealGauntTable::instance().gaunt(l1,l2,l3,m1,m2,m3); - double gaunt_symmetry[20]; - double tmp; - int cnt_sym = 0; - if(gaunt>tol&&cnt tol && cnt < len_gaunt) + { + ga_func[cnt].l1 = l1, ga_func[cnt].l2 = l2, + ga_func[cnt].l3 = l3; + ga_func[cnt].m1 = m1, ga_func[cnt].m2 = m2, + ga_func[cnt].m3 = m3; + ga_func[cnt++].gaunt = gaunt; + + // Detects whether the values of functions omitted in the + // loop due to symmetry are equal + if (l3 <= 10) + { + + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l2, l1, l3, m2, m1, m3); + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l2, l1, l3, -m2, -m1, -m3); + + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l2, l3, l1, m2, m3, m1); + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l2, l3, l1, -m2, -m3, -m1); + + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l1, l3, l2, m1, m3, m2); + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l1, l3, l2, -m1, -m3, -m2); + + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l3, l2, l1, m3, m2, m1); + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l3, l2, l1, -m3, -m2, -m1); + + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l3, l1, l2, m3, m1, m2); + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l3, l1, l2, -m3, -m1, -m2); + + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l1, l2, l3, -m1, -m2, -m3); + } + else + { + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l2, l1, l3, m2, m1, m3); + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l2, l1, l3, -m2, -m1, -m3); + gaunt_symmetry[cnt_sym++] + = RealGauntTable::instance () + .gaunt (l1, l2, l3, -m1, -m2, -m3); + } + for (int i = 0; i < cnt_sym; i++) + { + EXPECT_NEAR (gaunt_symmetry[i], gaunt, tol); + } + } + } + } } - for(int i=0;i vR0 = {0.0, 0.0, 0.0}; - //double out; - //ovl->calculate(0, 0, 0, 0, 0, 0, 0, 0, vR0, false, &out); - //std::cout << "out = " << out << std::endl; + // ModuleBase::Vector3 vR0 = {0.0, 0.0, 0.0}; + // double out; + // ovl->calculate(0, 0, 0, 0, 0, 0, 0, 0, vR0, false, &out); + // std::cout << "out = " << out << std::endl; - //psibeta->calculate(0, 0, 0, 0, 0, 0, 0, 0, vR0, false, &out); - //std::cout << "out = " << out << std::endl; + // psibeta->calculate(0, 0, 0, 0, 0, 0, 0, 0, vR0, false, &out); + // std::cout << "out = " << out << std::endl; - //kin->calculate(0, 0, 0, 0, 0, 0, 0, 0, vR0, false, &out); - //std::cout << "out = " << out << std::endl; + // kin->calculate(0, 0, 0, 0, 0, 0, 0, 0, vR0, false, &out); + // std::cout << "out = " << out << std::endl; } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_basis/module_nao/test/two_center_integrator_test.cpp b/source/source_basis/module_nao/test/two_center_integrator_test.cpp index 37601fadfc7..3371c4c7808 100644 --- a/source/source_basis/module_nao/test/two_center_integrator_test.cpp +++ b/source/source_basis/module_nao/test/two_center_integrator_test.cpp @@ -28,8 +28,8 @@ using ModuleBase::Sphbes; class TwoCenterIntegratorTest : public ::testing::Test { protected: - void SetUp(); - void TearDown(); + void SetUp (); + void TearDown (); TwoCenterIntegrator S_intor; TwoCenterIntegrator T_intor; @@ -42,10 +42,11 @@ class TwoCenterIntegratorTest : public ::testing::Test double elem_tol = 1e-6; //! tolerance for comparison between new and legacy matrix elements }; -void TwoCenterIntegratorTest::SetUp() +void + TwoCenterIntegratorTest::SetUp () { #ifdef __MPI - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif std::string dir = "../../../../../tests/PP_ORB/"; @@ -60,41 +61,42 @@ void TwoCenterIntegratorTest::SetUp() file[6] = dir + "F_gga_7au_100Ry_2s2p1d.orb"; file[7] = dir + "I_gga_7au_100Ry_2s2p2d1f.orb"; - ModuleBase::Ylm::set_coefficients(); + ModuleBase::Ylm::set_coefficients (); } -void TwoCenterIntegratorTest::TearDown() +void + TwoCenterIntegratorTest::TearDown () { delete[] file; } -TEST_F(TwoCenterIntegratorTest, FiniteDifference) +TEST_F (TwoCenterIntegratorTest, FiniteDifference) { nfile = 3; - orb.build(nfile, file, 'o'); + orb.build (nfile, file, 'o'); ModuleBase::SphericalBesselTransformer sbt; - orb.set_transformer(sbt); + orb.set_transformer (sbt); - double rmax = orb.rcut_max() * 2.0; + double rmax = orb.rcut_max () * 2.0; double dr = 0.01; - int nr = static_cast(rmax / dr) + 1; + int nr = static_cast (rmax / dr) + 1; // ModuleBase::SphericalBesselTransformer sbt; // sbt.set_fftw_plan_flag(FFTW_MEASURE); // not necessarily worth it! // orb.set_transformer(&sbt, 0); - orb.set_uniform_grid(true, nr, rmax, 'i', true); + orb.set_uniform_grid (true, nr, rmax, 'i', true); iclock::time_point start; std::chrono::duration dur; - start = iclock::now(); + start = iclock::now (); - S_intor.tabulate(orb, orb, 'S', nr, rmax); - T_intor.tabulate(orb, orb, 'T', nr, rmax); + S_intor.tabulate (orb, orb, 'S', nr, rmax); + T_intor.tabulate (orb, orb, 'T', nr, rmax); - dur = iclock::now() - start; - std::cout << "time elapsed = " << dur.count() << " s" << std::endl; + dur = iclock::now () - start; + std::cout << "time elapsed = " << dur.count () << " s" << std::endl; // check whether analytical derivative and finite difference agree int ntype = nfile; @@ -104,62 +106,122 @@ TEST_F(TwoCenterIntegratorTest, FiniteDifference) ModuleBase::Vector3 vR; for (int t1 = 0; t1 < ntype; t1++) - { - for (int l1 = 0; l1 <= orb(t1).lmax(); l1++) { - for (int izeta1 = 0; izeta1 < orb(t1).nzeta(l1); izeta1++) - { - for (int m1 = -l1; m1 <= l1; ++m1) + for (int l1 = 0; l1 <= orb (t1).lmax (); l1++) { - for (int t2 = t1; t2 < ntype; t2++) - { - for (int l2 = 0; l2 <= orb(t2).lmax(); l2++) + for (int izeta1 = 0; izeta1 < orb (t1).nzeta (l1); izeta1++) { - for (int izeta2 = 0; izeta2 < orb(t2).nzeta(l2); izeta2++) - { - for (int m2 = -l2; m2 <= l2; ++m2) + for (int m1 = -l1; m1 <= l1; ++m1) { - double dx = 1e-4; - double elem_p; - double elem_m; - double grad_elem[3]; - - // S - vR = vR0; - vR[2] += dx; - S_intor.calculate(t1, l1, izeta1, m1, t2, l2, izeta2, m2, vR, &elem_p); - - vR = vR0; - vR[2] -= dx; - S_intor.calculate(t1, l1, izeta1, m1, t2, l2, izeta2, m2, vR, &elem_m); - - S_intor.calculate(t1, l1, izeta1, m1, t2, l2, izeta2, m2, vR, nullptr, grad_elem); - - EXPECT_NEAR((elem_p - elem_m) / (2. * dx), grad_elem[2], tol_d); - - // T - vR = vR0; - vR[2] += dx; - T_intor.calculate(t1, l1, izeta1, m1, t2, l2, izeta2, m2, vR, &elem_p); - - vR = vR0; - vR[2] -= dx; - T_intor.calculate(t1, l1, izeta1, m1, t2, l2, izeta2, m2, vR, &elem_m); - - T_intor.calculate(t1, l1, izeta1, m1, t2, l2, izeta2, m2, vR, nullptr, grad_elem); - - EXPECT_NEAR((elem_p - elem_m) / (2. * dx), grad_elem[2], tol_d); + for (int t2 = t1; t2 < ntype; t2++) + { + for (int l2 = 0; l2 <= orb (t2).lmax (); l2++) + { + for (int izeta2 = 0; izeta2 < orb (t2).nzeta (l2); izeta2++) + { + for (int m2 = -l2; m2 <= l2; ++m2) + { + double dx = 1e-4; + double elem_p; + double elem_m; + double grad_elem[3]; + + // S + vR = vR0; + vR[2] += dx; + S_intor.calculate (t1, + l1, + izeta1, + m1, + t2, + l2, + izeta2, + m2, + vR, + &elem_p); + + vR = vR0; + vR[2] -= dx; + S_intor.calculate (t1, + l1, + izeta1, + m1, + t2, + l2, + izeta2, + m2, + vR, + &elem_m); + + S_intor.calculate (t1, + l1, + izeta1, + m1, + t2, + l2, + izeta2, + m2, + vR, + nullptr, + grad_elem); + + EXPECT_NEAR ((elem_p - elem_m) / (2. * dx), + grad_elem[2], + tol_d); + + // T + vR = vR0; + vR[2] += dx; + T_intor.calculate (t1, + l1, + izeta1, + m1, + t2, + l2, + izeta2, + m2, + vR, + &elem_p); + + vR = vR0; + vR[2] -= dx; + T_intor.calculate (t1, + l1, + izeta1, + m1, + t2, + l2, + izeta2, + m2, + vR, + &elem_m); + + T_intor.calculate (t1, + l1, + izeta1, + m1, + t2, + l2, + izeta2, + m2, + vR, + nullptr, + grad_elem); + + EXPECT_NEAR ((elem_p - elem_m) / (2. * dx), + grad_elem[2], + tol_d); + } + } + } + } } - } } - } } - } } - } } -TEST_F(TwoCenterIntegratorTest, SphericalBessel) +TEST_F (TwoCenterIntegratorTest, SphericalBessel) { int lmax = 3; int nbes = 5; @@ -173,210 +235,212 @@ TEST_F(TwoCenterIntegratorTest, SphericalBessel) // for dr = 0.01, the error of kinetic matrix element is about 1.5e-3 // for dr = 0.001, the error of kinetic matrix element is about 1.5e-4 - orb.build(lmax, nbes, rcut, sigma, dr); + orb.build (lmax, nbes, rcut, sigma, dr); ModuleBase::SphericalBesselTransformer sbt; - orb.set_transformer(sbt); + orb.set_transformer (sbt); - double rmax = orb.rcut_max() * 2.0; - int nr = static_cast(rmax / dr) + 1; + double rmax = orb.rcut_max () * 2.0; + int nr = static_cast (rmax / dr) + 1; - orb.set_uniform_grid(true, nr, rmax, 'i', true); + orb.set_uniform_grid (true, nr, rmax, 'i', true); - S_intor.tabulate(orb, orb, 'S', nr, rmax); - T_intor.tabulate(orb, orb, 'T', nr, rmax); + S_intor.tabulate (orb, orb, 'S', nr, rmax); + T_intor.tabulate (orb, orb, 'T', nr, rmax); ModuleBase::Vector3 R0 = {0.0, 0.0, 0.0}; // zeros of spherical bessel functions double* zeros = new double[nbes * (lmax + 1)]; - Sphbes::sphbes_zeros(lmax, nbes, zeros, true); + Sphbes::sphbes_zeros (lmax, nbes, zeros, true); // checks the diagonal elements with analytical expression double elem, ref; for (int l = 0; l <= lmax; ++l) - { - for (int zeta = 0; zeta < nbes; ++zeta) { - S_intor.calculate(0, l, zeta, 0, 0, l, zeta, 0, R0, &elem); - ref = 0.5 * std::pow(rcut, 3) * std::pow(Sphbes::sphbesj(l + 1, zeros[l * nbes + zeta]), 2); - EXPECT_NEAR(elem, ref, 1e-5); - - T_intor.calculate(0, l, zeta, 0, 0, l, zeta, 0, R0, &elem); - ref = 0.5 * rcut * std::pow(zeros[l * nbes + zeta] * Sphbes::sphbesj(l + 1, zeros[l * nbes + zeta]), 2); - EXPECT_NEAR(elem, ref, 1e-3); - - // orthogonality - for (int zeta2 = 0; zeta2 < zeta; ++zeta2) - { - S_intor.calculate(0, l, zeta, 0, 0, l, zeta2, 0, R0, &elem); - ref = 0.0; - EXPECT_NEAR(elem, ref, 1e-5); - } + for (int zeta = 0; zeta < nbes; ++zeta) + { + S_intor.calculate (0, l, zeta, 0, 0, l, zeta, 0, R0, &elem); + ref = 0.5 * std::pow (rcut, 3) * std::pow (Sphbes::sphbesj (l + 1, zeros[l * nbes + zeta]), 2); + EXPECT_NEAR (elem, ref, 1e-5); + + T_intor.calculate (0, l, zeta, 0, 0, l, zeta, 0, R0, &elem); + ref = 0.5 * rcut + * std::pow (zeros[l * nbes + zeta] * Sphbes::sphbesj (l + 1, zeros[l * nbes + zeta]), 2); + EXPECT_NEAR (elem, ref, 1e-3); + + // orthogonality + for (int zeta2 = 0; zeta2 < zeta; ++zeta2) + { + S_intor.calculate (0, l, zeta, 0, 0, l, zeta2, 0, R0, &elem); + ref = 0.0; + EXPECT_NEAR (elem, ref, 1e-5); + } + } } - } delete[] zeros; } -TEST_F(TwoCenterIntegratorTest, HessianSymmetry) +TEST_F (TwoCenterIntegratorTest, HessianSymmetry) { nfile = 3; - orb.build(nfile, file, 'o'); + orb.build (nfile, file, 'o'); ModuleBase::SphericalBesselTransformer sbt; - orb.set_transformer(sbt); + orb.set_transformer (sbt); - double rmax = orb.rcut_max() * 2.0; + double rmax = orb.rcut_max () * 2.0; double dr = 0.01; - int nr = static_cast(rmax / dr) + 1; + int nr = static_cast (rmax / dr) + 1; - orb.set_uniform_grid(true, nr, rmax, 'i', true); + orb.set_uniform_grid (true, nr, rmax, 'i', true); - S_intor.tabulate(orb, orb, 'S', nr, rmax); - T_intor.tabulate(orb, orb, 'T', nr, rmax); + S_intor.tabulate (orb, orb, 'S', nr, rmax); + T_intor.tabulate (orb, orb, 'T', nr, rmax); - ModuleBase::Vector3 R(1.5, 2.0, 1.0); + ModuleBase::Vector3 R (1.5, 2.0, 1.0); double hess[9]; // Test S operator - S_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, nullptr, hess); + S_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, nullptr, hess); - EXPECT_NEAR(hess[1], hess[3], 1e-10); // H_xy == H_yx - EXPECT_NEAR(hess[2], hess[6], 1e-10); // H_xz == H_zx - EXPECT_NEAR(hess[5], hess[7], 1e-10); // H_yz == H_zy + EXPECT_NEAR (hess[1], hess[3], 1e-10); // H_xy == H_yx + EXPECT_NEAR (hess[2], hess[6], 1e-10); // H_xz == H_zx + EXPECT_NEAR (hess[5], hess[7], 1e-10); // H_yz == H_zy // Test T operator - T_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, nullptr, hess); + T_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, nullptr, hess); - EXPECT_NEAR(hess[1], hess[3], 1e-10); // H_xy == H_yx - EXPECT_NEAR(hess[2], hess[6], 1e-10); // H_xz == H_zx - EXPECT_NEAR(hess[5], hess[7], 1e-10); // H_yz == H_zy + EXPECT_NEAR (hess[1], hess[3], 1e-10); // H_xy == H_yx + EXPECT_NEAR (hess[2], hess[6], 1e-10); // H_xz == H_zx + EXPECT_NEAR (hess[5], hess[7], 1e-10); // H_yz == H_zy } -TEST_F(TwoCenterIntegratorTest, HessianFiniteDifference) +TEST_F (TwoCenterIntegratorTest, HessianFiniteDifference) { nfile = 3; - orb.build(nfile, file, 'o'); + orb.build (nfile, file, 'o'); ModuleBase::SphericalBesselTransformer sbt; - orb.set_transformer(sbt); + orb.set_transformer (sbt); - double rmax = orb.rcut_max() * 2.0; + double rmax = orb.rcut_max () * 2.0; double dr = 0.01; - int nr = static_cast(rmax / dr) + 1; + int nr = static_cast (rmax / dr) + 1; - orb.set_uniform_grid(true, nr, rmax, 'i', true); + orb.set_uniform_grid (true, nr, rmax, 'i', true); - S_intor.tabulate(orb, orb, 'S', nr, rmax); - T_intor.tabulate(orb, orb, 'T', nr, rmax); + S_intor.tabulate (orb, orb, 'S', nr, rmax); + T_intor.tabulate (orb, orb, 'T', nr, rmax); - ModuleBase::Vector3 R(1.5, 2.0, 1.0); + ModuleBase::Vector3 R (1.5, 2.0, 1.0); double hess_analytical[9]; double hess_numerical[9]; double eps = 1e-5; // Test S operator - S_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, nullptr, hess_analytical); + S_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, nullptr, hess_analytical); // Compute numerical Hessian via finite differences for (int alpha = 0; alpha < 3; ++alpha) - { - for (int beta = 0; beta < 3; ++beta) { - ModuleBase::Vector3 R_plus = R, R_minus = R; - R_plus[beta] += eps; - R_minus[beta] -= eps; + for (int beta = 0; beta < 3; ++beta) + { + ModuleBase::Vector3 R_plus = R, R_minus = R; + R_plus[beta] += eps; + R_minus[beta] -= eps; - double grad_plus[3], grad_minus[3]; - S_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R_plus, nullptr, grad_plus, nullptr); - S_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R_minus, nullptr, grad_minus, nullptr); + double grad_plus[3], grad_minus[3]; + S_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R_plus, nullptr, grad_plus, nullptr); + S_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R_minus, nullptr, grad_minus, nullptr); - hess_numerical[alpha * 3 + beta] = (grad_plus[alpha] - grad_minus[alpha]) / (2.0 * eps); + hess_numerical[alpha * 3 + beta] = (grad_plus[alpha] - grad_minus[alpha]) / (2.0 * eps); + } } - } // Compare with tolerance appropriate for finite differences for (int i = 0; i < 9; ++i) - { - EXPECT_NEAR(hess_analytical[i], hess_numerical[i], 1e-5); - } + { + EXPECT_NEAR (hess_analytical[i], hess_numerical[i], 1e-5); + } // Test T operator - T_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, nullptr, hess_analytical); + T_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, nullptr, hess_analytical); for (int alpha = 0; alpha < 3; ++alpha) - { - for (int beta = 0; beta < 3; ++beta) { - ModuleBase::Vector3 R_plus = R, R_minus = R; - R_plus[beta] += eps; - R_minus[beta] -= eps; + for (int beta = 0; beta < 3; ++beta) + { + ModuleBase::Vector3 R_plus = R, R_minus = R; + R_plus[beta] += eps; + R_minus[beta] -= eps; - double grad_plus[3], grad_minus[3]; - T_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R_plus, nullptr, grad_plus, nullptr); - T_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R_minus, nullptr, grad_minus, nullptr); + double grad_plus[3], grad_minus[3]; + T_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R_plus, nullptr, grad_plus, nullptr); + T_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R_minus, nullptr, grad_minus, nullptr); - hess_numerical[alpha * 3 + beta] = (grad_plus[alpha] - grad_minus[alpha]) / (2.0 * eps); + hess_numerical[alpha * 3 + beta] = (grad_plus[alpha] - grad_minus[alpha]) / (2.0 * eps); + } } - } for (int i = 0; i < 9; ++i) - { - EXPECT_NEAR(hess_analytical[i], hess_numerical[i], 1e-5); - } + { + EXPECT_NEAR (hess_analytical[i], hess_numerical[i], 1e-5); + } } -TEST_F(TwoCenterIntegratorTest, HessianDoesNotBreakGradient) +TEST_F (TwoCenterIntegratorTest, HessianDoesNotBreakGradient) { nfile = 3; - orb.build(nfile, file, 'o'); + orb.build (nfile, file, 'o'); ModuleBase::SphericalBesselTransformer sbt; - orb.set_transformer(sbt); + orb.set_transformer (sbt); - double rmax = orb.rcut_max() * 2.0; + double rmax = orb.rcut_max () * 2.0; double dr = 0.01; - int nr = static_cast(rmax / dr) + 1; + int nr = static_cast (rmax / dr) + 1; - orb.set_uniform_grid(true, nr, rmax, 'i', true); + orb.set_uniform_grid (true, nr, rmax, 'i', true); - S_intor.tabulate(orb, orb, 'S', nr, rmax); - T_intor.tabulate(orb, orb, 'T', nr, rmax); + S_intor.tabulate (orb, orb, 'S', nr, rmax); + T_intor.tabulate (orb, orb, 'T', nr, rmax); - ModuleBase::Vector3 R(1.5, 2.0, 1.0); + ModuleBase::Vector3 R (1.5, 2.0, 1.0); double grad_only[3], grad_with_hess[3], hess[9]; // Test S operator - S_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, grad_only, nullptr); - S_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, grad_with_hess, hess); + S_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, grad_only, nullptr); + S_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, grad_with_hess, hess); for (int i = 0; i < 3; ++i) - { - EXPECT_NEAR(grad_only[i], grad_with_hess[i], 1e-12); - } + { + EXPECT_NEAR (grad_only[i], grad_with_hess[i], 1e-12); + } // Test T operator - T_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, grad_only, nullptr); - T_intor.calculate(0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, grad_with_hess, hess); + T_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, grad_only, nullptr); + T_intor.calculate (0, 1, 0, 0, 1, 1, 0, 0, R, nullptr, grad_with_hess, hess); for (int i = 0; i < 3; ++i) - { - EXPECT_NEAR(grad_only[i], grad_with_hess[i], 1e-12); - } + { + EXPECT_NEAR (grad_only[i], grad_with_hess[i], 1e-12); + } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_basis/module_nao/test/two_center_table_test.cpp b/source/source_basis/module_nao/test/two_center_table_test.cpp index 9b2fba78754..f9c11876e59 100644 --- a/source/source_basis/module_nao/test/two_center_table_test.cpp +++ b/source/source_basis/module_nao/test/two_center_table_test.cpp @@ -25,8 +25,8 @@ using iclock = std::chrono::high_resolution_clock; class TwoCenterTableTest : public ::testing::Test { protected: - void SetUp(); - void TearDown(); + void SetUp (); + void TearDown (); TwoCenterTable S_tab; TwoCenterTable T_tab; @@ -43,10 +43,11 @@ class TwoCenterTableTest : public ::testing::Test double tol_d_rel = 1e-2; /// relative tolerance for derivative table (compared to finite difference) }; -void TwoCenterTableTest::SetUp() +void + TwoCenterTableTest::SetUp () { #ifdef __MPI - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif std::string dir = "../../../../../tests/PP_ORB/"; @@ -62,150 +63,172 @@ void TwoCenterTableTest::SetUp() file[7] = dir + "I_gga_7au_100Ry_2s2p2d1f.orb"; } -void TwoCenterTableTest::TearDown() +void + TwoCenterTableTest::TearDown () { delete[] file; } -TEST_F(TwoCenterTableTest, BuildOverlapAndKinetic) +TEST_F (TwoCenterTableTest, BuildOverlapAndKinetic) { - orb.build(nfile, file, 'o'); + orb.build (nfile, file, 'o'); ModuleBase::SphericalBesselTransformer sbt; - orb.set_transformer(sbt); + orb.set_transformer (sbt); - double rmax = orb.rcut_max() * 2.0; + double rmax = orb.rcut_max () * 2.0; double dr = 0.01; - int nr = static_cast(rmax / dr) + 1; + int nr = static_cast (rmax / dr) + 1; // ModuleBase::SphericalBesselTransformer sbt; // sbt.set_fftw_plan_flag(FFTW_MEASURE); // not necessarily worth it! // orb.set_transformer(&sbt, 0); - orb.set_uniform_grid(true, nr, rmax, 'i', true); + orb.set_uniform_grid (true, nr, rmax, 'i', true); iclock::time_point start; std::chrono::duration dur; - start = iclock::now(); - S_tab.build(orb, orb, 'S', nr, rmax); - T_tab.build(orb, orb, 'T', nr, rmax); - dur = iclock::now() - start; - std::cout << "time elapsed = " << dur.count() << " s" << std::endl; + start = iclock::now (); + S_tab.build (orb, orb, 'S', nr, rmax); + T_tab.build (orb, orb, 'T', nr, rmax); + dur = iclock::now () - start; + std::cout << "time elapsed = " << dur.count () << " s" << std::endl; // check whether the derivative table agrees with the finite difference of table int ntype = nfile; for (int T1 = 0; T1 < ntype; T1++) - { - for (int T2 = T1; T2 < ntype; T2++) { - for (int L1 = 0; L1 <= orb(T1).lmax(); L1++) - { - for (int N1 = 0; N1 < orb(T1).nzeta(L1); N1++) + for (int T2 = T1; T2 < ntype; T2++) { - for (int L2 = 0; L2 <= orb(T2).lmax(); L2++) - { - for (int N2 = 0; N2 < orb(T2).nzeta(L2); N2++) + for (int L1 = 0; L1 <= orb (T1).lmax (); L1++) { - for (int L = std::abs(L1 - L2); L <= (L1 + L2); L += 2) - { - const double* f = S_tab.table(T1, L1, N1, T2, L2, N2, L, false); - const double* df = S_tab.table(T1, L1, N1, T2, L2, N2, L, true); - - for (int ir = 4; ir != S_tab.nr() - 4; ++ir) - { - double df_fd - = (-1.0 / 280 * (f[ir + 4] - f[ir - 4]) + 4.0 / 105 * (f[ir + 3] - f[ir - 3]) - - 0.2 * (f[ir + 2] - f[ir - 2]) + 0.8 * (f[ir + 1] - f[ir - 1])) - / dr; - - // df is (d/dR)(S/R^l), it should be rescaled to have the - // same unit as dS/dR in order to have meaningful error comparison - double err_abs = std::abs(df_fd - df[ir]) * std::pow(ir * dr, L); - double err_rel = std::abs((df_fd - df[ir]) / df[ir]); - if (err_abs > tol_d_abs && err_rel > tol_d_rel) - { - printf("T1 = %i L1 = %i N1 = %i T2 = %i L2 = %i N2 = %i L = %i " - "ir = %2i df_fd = % 8.5e df_tab = % 8.5e err_abs = %8.5e " - "err_rel = %8.5e\n", - T1, - L1, - N1, - T2, - L2, - N2, - L, - ir, - df_fd, - df[ir], - err_abs, - err_rel); - } - - EXPECT_TRUE(err_abs < tol_d_abs || err_rel < tol_d_rel); - } - - f = T_tab.table(T1, L1, N1, T2, L2, N2, L, false); - df = T_tab.table(T1, L1, N1, T2, L2, N2, L, true); - - for (int ir = 4; ir != T_tab.nr() - 4; ++ir) + for (int N1 = 0; N1 < orb (T1).nzeta (L1); N1++) { - double df_fd - = (-1.0 / 280 * (f[ir + 4] - f[ir - 4]) + 4.0 / 105 * (f[ir + 3] - f[ir - 3]) - - 0.2 * (f[ir + 2] - f[ir - 2]) + 0.8 * (f[ir + 1] - f[ir - 1])) - / dr; - - // df is (d/dR)(S/R^l), it should be rescaled to have the - // same unit as dS/dR in order to have meaningful error comparison - double err_abs = std::abs(df_fd - df[ir]) * std::pow(ir * dr, L); - double err_rel = std::abs((df_fd - df[ir]) / df[ir]); - if (err_abs > tol_d_abs && err_rel > tol_d_rel) - { - printf("T1 = %i L1 = %i N1 = %i T2 = %i L2 = %i N2 = %i L = %i " - "ir = %2i df_fd = % 8.5e df_tab = % 8.5e err_abs = %8.5e " - "err_rel = %8.5e\n", - T1, - L1, - N1, - T2, - L2, - N2, - L, - ir, - df_fd, - df[ir], - err_abs, - err_rel); - } - - EXPECT_TRUE(err_abs < tol_d_abs || err_rel < tol_d_rel); + for (int L2 = 0; L2 <= orb (T2).lmax (); L2++) + { + for (int N2 = 0; N2 < orb (T2).nzeta (L2); N2++) + { + for (int L = std::abs (L1 - L2); L <= (L1 + L2); L += 2) + { + const double* f + = S_tab.table (T1, L1, N1, T2, L2, N2, L, false); + const double* df + = S_tab.table (T1, L1, N1, T2, L2, N2, L, true); + + for (int ir = 4; ir != S_tab.nr () - 4; ++ir) + { + double df_fd + = (-1.0 / 280 * (f[ir + 4] - f[ir - 4]) + + 4.0 / 105 * (f[ir + 3] - f[ir - 3]) + - 0.2 * (f[ir + 2] - f[ir - 2]) + + 0.8 * (f[ir + 1] - f[ir - 1])) + / dr; + + // df is (d/dR)(S/R^l), it should be rescaled to + // have the same unit as dS/dR in order to have + // meaningful error comparison + double err_abs = std::abs (df_fd - df[ir]) + * std::pow (ir * dr, L); + double err_rel + = std::abs ((df_fd - df[ir]) / df[ir]); + if (err_abs > tol_d_abs && err_rel > tol_d_rel) + { + printf ( + "T1 = %i L1 = %i N1 = %i T2 = %i " + " L2 = %i N2 = %i L = %i " + "ir = %2i df_fd = % 8.5e df_tab = " + "% 8.5e err_abs = %8.5e " + "err_rel = %8.5e\n", + T1, + L1, + N1, + T2, + L2, + N2, + L, + ir, + df_fd, + df[ir], + err_abs, + err_rel); + } + + EXPECT_TRUE (err_abs < tol_d_abs + || err_rel < tol_d_rel); + } + + f = T_tab.table (T1, L1, N1, T2, L2, N2, L, false); + df = T_tab.table (T1, L1, N1, T2, L2, N2, L, true); + + for (int ir = 4; ir != T_tab.nr () - 4; ++ir) + { + double df_fd + = (-1.0 / 280 * (f[ir + 4] - f[ir - 4]) + + 4.0 / 105 * (f[ir + 3] - f[ir - 3]) + - 0.2 * (f[ir + 2] - f[ir - 2]) + + 0.8 * (f[ir + 1] - f[ir - 1])) + / dr; + + // df is (d/dR)(S/R^l), it should be rescaled to + // have the same unit as dS/dR in order to have + // meaningful error comparison + double err_abs = std::abs (df_fd - df[ir]) + * std::pow (ir * dr, L); + double err_rel + = std::abs ((df_fd - df[ir]) / df[ir]); + if (err_abs > tol_d_abs && err_rel > tol_d_rel) + { + printf ( + "T1 = %i L1 = %i N1 = %i T2 = %i " + " L2 = %i N2 = %i L = %i " + "ir = %2i df_fd = % 8.5e df_tab = " + "% 8.5e err_abs = %8.5e " + "err_rel = %8.5e\n", + T1, + L1, + N1, + T2, + L2, + N2, + L, + ir, + df_fd, + df[ir], + err_abs, + err_rel); + } + + EXPECT_TRUE (err_abs < tol_d_abs + || err_rel < tol_d_rel); + } + } + } + } } - } } - } } - } } - } - EXPECT_EQ(S_tab.nr(), nr); - EXPECT_EQ(T_tab.nr(), nr); + EXPECT_EQ (S_tab.nr (), nr); + EXPECT_EQ (T_tab.nr (), nr); - EXPECT_EQ(S_tab.rmax(), rmax); - EXPECT_EQ(T_tab.rmax(), rmax); + EXPECT_EQ (S_tab.rmax (), rmax); + EXPECT_EQ (T_tab.rmax (), rmax); } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; diff --git a/source/source_basis/module_nao/two_center_bundle.cpp b/source/source_basis/module_nao/two_center_bundle.cpp index b7c9dc9cfb0..82de967dd34 100644 --- a/source/source_basis/module_nao/two_center_bundle.cpp +++ b/source/source_basis/module_nao/two_center_bundle.cpp @@ -9,211 +9,241 @@ #include -void TwoCenterBundle::build_orb(int ntype, const std::string* file_orb0) +void + TwoCenterBundle::build_orb (int ntype, const std::string* file_orb0) { - std::vector file_orb(ntype); + std::vector file_orb (ntype); if (GlobalV::MY_RANK == 0) - { - std::transform(file_orb0, file_orb0 + ntype, file_orb.begin(), [](const std::string& file) { - return PARAM.inp.orbital_dir + file; - }); - } + { + std::transform (file_orb0, + file_orb0 + ntype, + file_orb.begin (), + [] (const std::string& file) { return PARAM.inp.orbital_dir + file; }); + } #ifdef __MPI - Parallel_Common::bcast_string(file_orb.data(), ntype); + Parallel_Common::bcast_string (file_orb.data (), ntype); #endif - orb_ = std::unique_ptr(new RadialCollection); - orb_->build(ntype, file_orb.data()); // automatically detect file type + orb_ = std::unique_ptr (new RadialCollection); + orb_->build (ntype, file_orb.data ()); // automatically detect file type } -void TwoCenterBundle::build_beta(int ntype, Numerical_Nonlocal* nl) +void + TwoCenterBundle::build_beta (int ntype, Numerical_Nonlocal* nl) { - beta_ = std::unique_ptr(new RadialCollection); - beta_->build(ntype, nl); + beta_ = std::unique_ptr (new RadialCollection); + beta_->build (ntype, nl); } -void TwoCenterBundle::build_alpha(int ndesc, std::string* file_desc0) +void + TwoCenterBundle::build_alpha (int ndesc, std::string* file_desc0) { if (PARAM.globalv.deepks_setorb) - { - std::vector file_desc(ndesc); - if (GlobalV::MY_RANK == 0) { - std::copy(file_desc0, file_desc0 + ndesc, file_desc.begin()); - } + std::vector file_desc (ndesc); + if (GlobalV::MY_RANK == 0) + { + std::copy (file_desc0, file_desc0 + ndesc, file_desc.begin ()); + } #ifdef __MPI - Parallel_Common::bcast_string(file_desc.data(), ndesc); + Parallel_Common::bcast_string (file_desc.data (), ndesc); #endif - alpha_ = std::unique_ptr(new RadialCollection); - alpha_->build(ndesc, file_desc.data()); - } + alpha_ = std::unique_ptr (new RadialCollection); + alpha_->build (ndesc, file_desc.data ()); + } } -void TwoCenterBundle::build_orb_onsite(const double& radius) +void + TwoCenterBundle::build_orb_onsite (const double& radius) { if (radius > 0) - { - orb_onsite_ = std::unique_ptr(new RadialCollection); - orb_onsite_->build(orb_.get(), radius); - } + { + orb_onsite_ = std::unique_ptr (new RadialCollection); + orb_onsite_->build (orb_.get (), radius); + } } -void TwoCenterBundle::tabulate() +void + TwoCenterBundle::tabulate () { - ModuleBase::SphericalBesselTransformer sbt(true); - orb_->set_transformer(sbt); - if (beta_) { beta_->set_transformer(sbt); } - if (alpha_) { - alpha_->set_transformer(sbt); -} - if (orb_onsite_) { - orb_onsite_->set_transformer(sbt); -} + ModuleBase::SphericalBesselTransformer sbt (true); + orb_->set_transformer (sbt); + if (beta_) + { + beta_->set_transformer (sbt); + } + if (alpha_) + { + alpha_->set_transformer (sbt); + } + if (orb_onsite_) + { + orb_onsite_->set_transformer (sbt); + } //================================================================ // build two-center integration tables //================================================================ // set up a universal radial grid - double rmax = orb_->rcut_max(); - if (beta_) { rmax = std::max(rmax, beta_->rcut_max()); } - if (alpha_) { rmax = std::max(rmax, alpha_->rcut_max()); } + double rmax = orb_->rcut_max (); + if (beta_) + { + rmax = std::max (rmax, beta_->rcut_max ()); + } + if (alpha_) + { + rmax = std::max (rmax, alpha_->rcut_max ()); + } double dr = 0.01; double cutoff = 2.0 * rmax; - int nr = static_cast(rmax / dr) + 1; + int nr = static_cast (rmax / dr) + 1; - orb_->set_uniform_grid(true, nr, cutoff, 'i', true); - if (beta_) { beta_->set_uniform_grid(true, nr, cutoff, 'i', true); } - if (alpha_) { alpha_->set_uniform_grid(true, nr, cutoff, 'i', true);} - if (orb_onsite_) { orb_onsite_->set_uniform_grid(true, nr, cutoff, 'i', true);} + orb_->set_uniform_grid (true, nr, cutoff, 'i', true); + if (beta_) + { + beta_->set_uniform_grid (true, nr, cutoff, 'i', true); + } + if (alpha_) + { + alpha_->set_uniform_grid (true, nr, cutoff, 'i', true); + } + if (orb_onsite_) + { + orb_onsite_->set_uniform_grid (true, nr, cutoff, 'i', true); + } // build TwoCenterIntegrator objects - kinetic_orb = std::unique_ptr(new TwoCenterIntegrator); - kinetic_orb->tabulate(*orb_, *orb_, 'T', nr, cutoff); - ModuleBase::Memory::record("TwoCenterTable: Kinetic", kinetic_orb->table_memory()); + kinetic_orb = std::unique_ptr (new TwoCenterIntegrator); + kinetic_orb->tabulate (*orb_, *orb_, 'T', nr, cutoff); + ModuleBase::Memory::record ("TwoCenterTable: Kinetic", kinetic_orb->table_memory ()); - overlap_orb = std::unique_ptr(new TwoCenterIntegrator); - overlap_orb->tabulate(*orb_, *orb_, 'S', nr, cutoff); - ModuleBase::Memory::record("TwoCenterTable: Overlap", overlap_orb->table_memory()); + overlap_orb = std::unique_ptr (new TwoCenterIntegrator); + overlap_orb->tabulate (*orb_, *orb_, 'S', nr, cutoff); + ModuleBase::Memory::record ("TwoCenterTable: Overlap", overlap_orb->table_memory ()); if (beta_) - { - overlap_orb_beta = std::unique_ptr(new TwoCenterIntegrator); - overlap_orb_beta->tabulate(*orb_, *beta_, 'S', nr, cutoff); - ModuleBase::Memory::record("TwoCenterTable: Nonlocal", overlap_orb_beta->table_memory()); - } + { + overlap_orb_beta = std::unique_ptr (new TwoCenterIntegrator); + overlap_orb_beta->tabulate (*orb_, *beta_, 'S', nr, cutoff); + ModuleBase::Memory::record ("TwoCenterTable: Nonlocal", overlap_orb_beta->table_memory ()); + } if (alpha_) - { - overlap_orb_alpha = std::unique_ptr(new TwoCenterIntegrator); - overlap_orb_alpha->tabulate(*orb_, *alpha_, 'S', nr, cutoff); - ModuleBase::Memory::record("TwoCenterTable: Descriptor", overlap_orb_alpha->table_memory()); - } + { + overlap_orb_alpha = std::unique_ptr (new TwoCenterIntegrator); + overlap_orb_alpha->tabulate (*orb_, *alpha_, 'S', nr, cutoff); + ModuleBase::Memory::record ("TwoCenterTable: Descriptor", overlap_orb_alpha->table_memory ()); + } if (orb_onsite_) - { - overlap_orb_onsite = std::unique_ptr(new TwoCenterIntegrator); - overlap_orb_onsite->tabulate(*orb_, *orb_onsite_, 'S', nr, cutoff); - } + { + overlap_orb_onsite = std::unique_ptr (new TwoCenterIntegrator); + overlap_orb_onsite->tabulate (*orb_, *orb_onsite_, 'S', nr, cutoff); + } - ModuleBase::Memory::record("RealGauntTable", RealGauntTable::instance().memory()); + ModuleBase::Memory::record ("RealGauntTable", RealGauntTable::instance ().memory ()); - sbt.clear(); + sbt.clear (); } -void TwoCenterBundle::tabulate(const double lcao_ecut, +void + TwoCenterBundle::tabulate (const double lcao_ecut, const double lcao_dk, const double lcao_dr, const double lcao_rmax) { - ModuleBase::SphericalBesselTransformer sbt(true); - orb_->set_transformer(sbt); - beta_->set_transformer(sbt); - if (alpha_) { - alpha_->set_transformer(sbt); -} - if (orb_onsite_) { - orb_onsite_->set_transformer(sbt); -} + ModuleBase::SphericalBesselTransformer sbt (true); + orb_->set_transformer (sbt); + beta_->set_transformer (sbt); + if (alpha_) + { + alpha_->set_transformer (sbt); + } + if (orb_onsite_) + { + orb_onsite_->set_transformer (sbt); + } //================================================================ // build two-center integration tables //================================================================ // old formula for the number of k-space grid points - int nk = static_cast(sqrt(lcao_ecut) / lcao_dk) + 4; + int nk = static_cast (sqrt (lcao_ecut) / lcao_dk) + 4; nk += 1 - nk % 2; // make nk odd - std::vector kgrid(nk); + std::vector kgrid (nk); for (int ik = 0; ik < nk; ++ik) - { - kgrid[ik] = ik * lcao_dk; - } + { + kgrid[ik] = ik * lcao_dk; + } - orb_->set_grid(false, nk, kgrid.data(), 't'); - beta_->set_grid(false, nk, kgrid.data(), 't'); + orb_->set_grid (false, nk, kgrid.data (), 't'); + beta_->set_grid (false, nk, kgrid.data (), 't'); if (alpha_) - { - alpha_->set_grid(false, nk, kgrid.data(), 't'); - } + { + alpha_->set_grid (false, nk, kgrid.data (), 't'); + } if (orb_onsite_) - { - orb_onsite_->set_grid(false, nk, kgrid.data(), 't'); - } + { + orb_onsite_->set_grid (false, nk, kgrid.data (), 't'); + } // "st" stands for overlap (s) and kinetic (t) - const double cutoff_st = std::min(lcao_rmax, 2.0 * orb_->rcut_max()); - const int nr_st = static_cast(cutoff_st / lcao_dr) + 5; + const double cutoff_st = std::min (lcao_rmax, 2.0 * orb_->rcut_max ()); + const int nr_st = static_cast (cutoff_st / lcao_dr) + 5; - kinetic_orb = std::unique_ptr(new TwoCenterIntegrator); - kinetic_orb->tabulate(*orb_, *orb_, 'T', nr_st, cutoff_st); - ModuleBase::Memory::record("TwoCenterTable: Kinetic", kinetic_orb->table_memory()); + kinetic_orb = std::unique_ptr (new TwoCenterIntegrator); + kinetic_orb->tabulate (*orb_, *orb_, 'T', nr_st, cutoff_st); + ModuleBase::Memory::record ("TwoCenterTable: Kinetic", kinetic_orb->table_memory ()); - overlap_orb = std::unique_ptr(new TwoCenterIntegrator); - overlap_orb->tabulate(*orb_, *orb_, 'S', nr_st, cutoff_st); - ModuleBase::Memory::record("TwoCenterTable: Overlap", overlap_orb->table_memory()); + overlap_orb = std::unique_ptr (new TwoCenterIntegrator); + overlap_orb->tabulate (*orb_, *orb_, 'S', nr_st, cutoff_st); + ModuleBase::Memory::record ("TwoCenterTable: Overlap", overlap_orb->table_memory ()); // overlap between orbital and beta (for nonlocal potential) - const double cutoff_nl = std::min(lcao_rmax, orb_->rcut_max() + beta_->rcut_max()); - const int nr_nl = static_cast(cutoff_nl / lcao_dr) + 5; - overlap_orb_beta = std::unique_ptr(new TwoCenterIntegrator); - overlap_orb_beta->tabulate(*orb_, *beta_, 'S', nr_nl, cutoff_nl); - ModuleBase::Memory::record("TwoCenterTable: Nonlocal", overlap_orb_beta->table_memory()); + const double cutoff_nl = std::min (lcao_rmax, orb_->rcut_max () + beta_->rcut_max ()); + const int nr_nl = static_cast (cutoff_nl / lcao_dr) + 5; + overlap_orb_beta = std::unique_ptr (new TwoCenterIntegrator); + overlap_orb_beta->tabulate (*orb_, *beta_, 'S', nr_nl, cutoff_nl); + ModuleBase::Memory::record ("TwoCenterTable: Nonlocal", overlap_orb_beta->table_memory ()); // overlap between orbital and deepks projector if (alpha_) - { - const double cutoff_alpha = std::min(lcao_rmax, orb_->rcut_max() + alpha_->rcut_max()); - const int nr_alpha = static_cast(cutoff_alpha / lcao_dr) + 5; - overlap_orb_alpha = std::unique_ptr(new TwoCenterIntegrator); - overlap_orb_alpha->tabulate(*orb_, *alpha_, 'S', nr_alpha, cutoff_alpha); - ModuleBase::Memory::record("TwoCenterTable: Descriptor", overlap_orb_beta->table_memory()); - } + { + const double cutoff_alpha = std::min (lcao_rmax, orb_->rcut_max () + alpha_->rcut_max ()); + const int nr_alpha = static_cast (cutoff_alpha / lcao_dr) + 5; + overlap_orb_alpha = std::unique_ptr (new TwoCenterIntegrator); + overlap_orb_alpha->tabulate (*orb_, *alpha_, 'S', nr_alpha, cutoff_alpha); + ModuleBase::Memory::record ("TwoCenterTable: Descriptor", overlap_orb_beta->table_memory ()); + } // overlap between orbital and "onsite orbital" (for DFT+U) if (orb_onsite_) - { - const double cutoff_onsite = std::min(lcao_rmax, orb_->rcut_max() + orb_onsite_->rcut_max()); - const int nr_onsite = static_cast(cutoff_onsite / lcao_dr) + 5; - overlap_orb_onsite = std::unique_ptr(new TwoCenterIntegrator); - overlap_orb_onsite->tabulate(*orb_, *orb_onsite_, 'S', nr_onsite, cutoff_onsite); - } + { + const double cutoff_onsite = std::min (lcao_rmax, orb_->rcut_max () + orb_onsite_->rcut_max ()); + const int nr_onsite = static_cast (cutoff_onsite / lcao_dr) + 5; + overlap_orb_onsite = std::unique_ptr (new TwoCenterIntegrator); + overlap_orb_onsite->tabulate (*orb_, *orb_onsite_, 'S', nr_onsite, cutoff_onsite); + } - ModuleBase::Memory::record("RealGauntTable", RealGauntTable::instance().memory()); + ModuleBase::Memory::record ("RealGauntTable", RealGauntTable::instance ().memory ()); - sbt.clear(); + sbt.clear (); } -void TwoCenterBundle::to_LCAO_Orbitals(LCAO_Orbitals& ORB, +void + TwoCenterBundle::to_LCAO_Orbitals (LCAO_Orbitals& ORB, const double lcao_ecut, const double lcao_dk, const double lcao_dr, const double lcao_rmax) const { - ORB.ntype = orb_->ntype(); - ORB.lmax = orb_->lmax(); - ORB.nchimax = orb_->nzeta_max(); - ORB.rcutmax_Phi = orb_->rcut_max(); + ORB.ntype = orb_->ntype (); + ORB.lmax = orb_->lmax (); + ORB.nchimax = orb_->nzeta_max (); + ORB.rcutmax_Phi = orb_->rcut_max (); ORB.dR = lcao_dr; ORB.Rmax = lcao_rmax; ORB.dr_uniform = 0.001; @@ -227,32 +257,32 @@ void TwoCenterBundle::to_LCAO_Orbitals(LCAO_Orbitals& ORB, ORB.dk = lcao_dk; if (ORB.ecutwfc < 20) - { - ORB.kmesh = static_cast(2 * sqrt(ORB.ecutwfc) / ORB.dk) + 4; - } + { + ORB.kmesh = static_cast (2 * sqrt (ORB.ecutwfc) / ORB.dk) + 4; + } else - { - ORB.kmesh = static_cast(sqrt(ORB.ecutwfc) / ORB.dk) + 4; - } + { + ORB.kmesh = static_cast (sqrt (ORB.ecutwfc) / ORB.dk) + 4; + } ORB.kmesh += 1 - ORB.kmesh % 2; delete[] ORB.Phi; - ORB.Phi = new Numerical_Orbital[orb_->ntype()]; - for (int itype = 0; itype < orb_->ntype(); ++itype) - { - (*orb_)(itype).to_numerical_orbital(ORB.Phi[itype], ORB.kmesh, ORB.dk); - } + ORB.Phi = new Numerical_Orbital[orb_->ntype ()]; + for (int itype = 0; itype < orb_->ntype (); ++itype) + { + (*orb_) (itype).to_numerical_orbital (ORB.Phi[itype], ORB.kmesh, ORB.dk); + } if (PARAM.globalv.deepks_setorb) - { - ORB.lmax_d = alpha_->lmax(); - ORB.nchimax_d = alpha_->nzeta_max(); - - delete[] ORB.Alpha; - ORB.Alpha = new Numerical_Orbital[alpha_->ntype()]; - for (int itype = 0; itype < alpha_->ntype(); ++itype) { - (*alpha_)(itype).to_numerical_orbital(ORB.Alpha[itype], ORB.kmesh, ORB.dk); + ORB.lmax_d = alpha_->lmax (); + ORB.nchimax_d = alpha_->nzeta_max (); + + delete[] ORB.Alpha; + ORB.Alpha = new Numerical_Orbital[alpha_->ntype ()]; + for (int itype = 0; itype < alpha_->ntype (); ++itype) + { + (*alpha_) (itype).to_numerical_orbital (ORB.Alpha[itype], ORB.kmesh, ORB.dk); + } } - } } diff --git a/source/source_basis/module_nao/two_center_bundle.h b/source/source_basis/module_nao/two_center_bundle.h index b2708a9dadf..9658cfcb360 100644 --- a/source/source_basis/module_nao/two_center_bundle.h +++ b/source/source_basis/module_nao/two_center_bundle.h @@ -10,22 +10,22 @@ class TwoCenterBundle { public: - TwoCenterBundle() = default; - ~TwoCenterBundle() = default; - TwoCenterBundle& operator=(TwoCenterBundle&&) = default; + TwoCenterBundle () = default; + ~TwoCenterBundle () = default; + TwoCenterBundle& operator= (TwoCenterBundle&&) = default; // NOTE: some variables might be set only on RANK-0 - void build_orb(int ntype, const std::string* file_orb0); - void build_beta(int ntype, Numerical_Nonlocal* nl); - void build_alpha(int ndesc = 0, std::string* file_desc0 = nullptr); - void build_orb_onsite(const double& radius); + void build_orb (int ntype, const std::string* file_orb0); + void build_beta (int ntype, Numerical_Nonlocal* nl); + void build_alpha (int ndesc = 0, std::string* file_desc0 = nullptr); + void build_orb_onsite (const double& radius); - void tabulate(); + void tabulate (); // Unlike the tabulate() above, this overload function computes // two-center integration table by direct integration with Simpson's // rule, which was the algorithm used prior to v3.3.4. - void tabulate(const double lcao_ecut, const double lcao_dk, const double lcao_dr, const double lcao_rmax); + void tabulate (const double lcao_ecut, const double lcao_dk, const double lcao_dr, const double lcao_rmax); /** * @brief Overwrites the content of a LCAO_Orbitals object (e.g. ORB) @@ -33,11 +33,11 @@ class TwoCenterBundle * * This function provides an interface to the corresponding object in the old module_ao. */ - void to_LCAO_Orbitals(LCAO_Orbitals& orb, - const double lcao_ecut, - const double lcao_dk, - const double lcao_dr, - const double lcao_rmax) const; + void to_LCAO_Orbitals (LCAO_Orbitals& orb, + const double lcao_ecut, + const double lcao_dk, + const double lcao_dr, + const double lcao_rmax) const; std::unique_ptr kinetic_orb; std::unique_ptr overlap_orb; diff --git a/source/source_basis/module_nao/two_center_integrator.cpp b/source/source_basis/module_nao/two_center_integrator.cpp index 436f3885ed8..b2c44a3a1b0 100644 --- a/source/source_basis/module_nao/two_center_integrator.cpp +++ b/source/source_basis/module_nao/two_center_integrator.cpp @@ -3,25 +3,23 @@ #include "source_base/vector3.h" #include "source_base/ylm.h" -TwoCenterIntegrator::TwoCenterIntegrator(): - is_tabulated_(false), - op_('\0') -{ -} +TwoCenterIntegrator::TwoCenterIntegrator () : is_tabulated_ (false), op_ ('\0') {} -void TwoCenterIntegrator::tabulate(const RadialCollection& bra, +void + TwoCenterIntegrator::tabulate (const RadialCollection& bra, const RadialCollection& ket, const char op, const int nr, const double cutoff) { op_ = op; - table_.build(bra, ket, op, nr, cutoff); - RealGauntTable::instance().build(std::max(bra.lmax(), ket.lmax())); + table_.build (bra, ket, op, nr, cutoff); + RealGauntTable::instance ().build (std::max (bra.lmax (), ket.lmax ())); is_tabulated_ = true; } -void TwoCenterIntegrator::calculate(const int itype1, +void + TwoCenterIntegrator::calculate (const int itype1, const int l1, const int izeta1, const int m1, @@ -29,52 +27,57 @@ void TwoCenterIntegrator::calculate(const int itype1, const int l2, const int izeta2, const int m2, - const ModuleBase::Vector3& vR, // R = R2 - R1 + const ModuleBase::Vector3& vR, // R = R2 - R1 double* out, double* grad_out, double* hess_out) const { #ifdef __DEBUG - assert( is_tabulated_ ); - assert( out || grad_out || hess_out ); + assert (is_tabulated_); + assert (out || grad_out || hess_out); #endif - if (out) *out = 0.0; - if (grad_out) std::fill(grad_out, grad_out + 3, 0.0); - if (hess_out) std::fill(hess_out, hess_out + 9, 0.0); + if (out) + *out = 0.0; + if (grad_out) + std::fill (grad_out, grad_out + 3, 0.0); + if (hess_out) + std::fill (hess_out, hess_out + 9, 0.0); - double R = vR.norm(); - if (R > table_.rmax()) - { - return; - } + double R = vR.norm (); + if (R > table_.rmax ()) + { + return; + } if (m1 > l1 || m1 < -l1 || m2 > l2 || m2 < -l2) - { - ModuleBase::WARNING("TwoCenterIntegrator", "m should be in range [-l, l]."); - return; - } + { + ModuleBase::WARNING ("TwoCenterIntegrator", "m should be in range [-l, l]."); + return; + } // Check angular momentum limitation for Hessian computation if (hess_out && (l1 + l2 > 6)) - { - ModuleBase::WARNING_QUIT("TwoCenterIntegrator::calculate", - "Hessian computation not supported for l1+l2 > 6"); - } + { + ModuleBase::WARNING_QUIT ("TwoCenterIntegrator::calculate", + "Hessian computation not supported for l1+l2 > 6"); + } // unit vector along R - ModuleBase::Vector3 uR = (R == 0.0 ? ModuleBase::Vector3(0., 0., 1.) : vR / R); + ModuleBase::Vector3 uR = (R == 0.0 ? ModuleBase::Vector3 (0., 0., 1.) : vR / R); // generate all necessary real (solid) spherical harmonics const int lmax = l1 + l2; - std::vector Rl_Y((lmax+1) * (lmax+1)); - std::vector grad_Rl_Y((lmax+1) * (lmax+1) * 3); + std::vector Rl_Y ((lmax + 1) * (lmax + 1)); + std::vector grad_Rl_Y ((lmax + 1) * (lmax + 1) * 3); std::vector> hess_Rl_Y; // R^l * Y is necessary anyway - ModuleBase::Ylm::rl_sph_harm(l1 + l2, vR[0], vR[1], vR[2], Rl_Y); - if (grad_out || hess_out) ModuleBase::Ylm::grad_rl_sph_harm(l1 + l2, vR[0], vR[1], vR[2], Rl_Y.data(), grad_Rl_Y.data()); - if (hess_out) ModuleBase::Ylm::hes_rl_sph_harm(l1 + l2, vR[0], vR[1], vR[2], hess_Rl_Y); + ModuleBase::Ylm::rl_sph_harm (l1 + l2, vR[0], vR[1], vR[2], Rl_Y); + if (grad_out || hess_out) + ModuleBase::Ylm::grad_rl_sph_harm (l1 + l2, vR[0], vR[1], vR[2], Rl_Y.data (), grad_Rl_Y.data ()); + if (hess_out) + ModuleBase::Ylm::hes_rl_sph_harm (l1 + l2, vR[0], vR[1], vR[2], hess_Rl_Y); double tmp[3] = {0.0, 0.0, 0.0}; double* S_by_Rl = tmp; @@ -82,130 +85,150 @@ void TwoCenterIntegrator::calculate(const int itype1, double* d2_S_by_Rl = hess_out ? tmp + 2 : nullptr; // the sign is given by i^(l1-l2-l) = (-1)^((l1-l2-l)/2) - int sign = (l1 - l2 - std::abs(l1 - l2)) % 4 == 0 ? 1 : -1; - for (int l = std::abs(l1 - l2); l <= l1 + l2; l += 2) - { - // look up S/R^l, (d/dR)(S/R^l), and (d²/dR²)(S/R^l) from the radial table - table_.lookup(itype1, l1, izeta1, itype2, l2, izeta2, l, R, S_by_Rl, d_S_by_Rl, d2_S_by_Rl); - - for (int m = -l; m <= l; ++m) + int sign = (l1 - l2 - std::abs (l1 - l2)) % 4 == 0 ? 1 : -1; + for (int l = std::abs (l1 - l2); l <= l1 + l2; l += 2) { - double G = RealGauntTable::instance()(l1, l2, l, m1, m2, m); - int lm_idx = ylm_index(l, m); + // look up S/R^l, (d/dR)(S/R^l), and (d²/dR²)(S/R^l) from the radial table + table_.lookup (itype1, l1, izeta1, itype2, l2, izeta2, l, R, S_by_Rl, d_S_by_Rl, d2_S_by_Rl); - if (out) - { - *out += sign * G * (*S_by_Rl) * Rl_Y[lm_idx]; - } - - if (grad_out) - { - for (int i = 0; i < 3; ++i) + for (int m = -l; m <= l; ++m) { - grad_out[i] += sign * G * ( (*d_S_by_Rl) * uR[i] * Rl_Y[lm_idx] - + (*S_by_Rl) * grad_Rl_Y[lm_idx*3 + i] ); + double G = RealGauntTable::instance () (l1, l2, l, m1, m2, m); + int lm_idx = ylm_index (l, m); + + if (out) + { + *out += sign * G * (*S_by_Rl) * Rl_Y[lm_idx]; + } + + if (grad_out) + { + for (int i = 0; i < 3; ++i) + { + grad_out[i] += sign * G + * ((*d_S_by_Rl) * uR[i] * Rl_Y[lm_idx] + + (*S_by_Rl) * grad_Rl_Y[lm_idx * 3 + i]); + } + } + + if (hess_out) + { + // Convert 6-element symmetric format to 9-element full matrix + // hess_Rl_Y[lm_idx] = [H_xx, H_xy, H_xz, H_yy, H_yz, H_zz] + double H_full[9] = {hess_Rl_Y[lm_idx][0], + hess_Rl_Y[lm_idx][1], + hess_Rl_Y[lm_idx][2], + hess_Rl_Y[lm_idx][1], + hess_Rl_Y[lm_idx][3], + hess_Rl_Y[lm_idx][4], + hess_Rl_Y[lm_idx][2], + hess_Rl_Y[lm_idx][4], + hess_Rl_Y[lm_idx][5]}; + + for (int alpha = 0; alpha < 3; ++alpha) + { + for (int beta = 0; beta < 3; ++beta) + { + int idx = alpha * 3 + beta; + + // Product rule: d²(f*g)/dα dβ = f''*g + f'*g'_α + f'*g'_β + f*g'' + double term1 = (*d2_S_by_Rl) * uR[alpha] * uR[beta] * Rl_Y[lm_idx]; + + // Derivative of unit vector: du_α/dR_β = (δ_αβ - u_α*u_β)/R + double du_dR = (alpha == beta ? 1.0 : 0.0) - uR[alpha] * uR[beta]; + if (R > 1e-10) + du_dR /= R; + else + du_dR = 0.0; + + double term2 + = (*d_S_by_Rl) + * (du_dR * Rl_Y[lm_idx] + uR[alpha] * grad_Rl_Y[lm_idx * 3 + beta] + + uR[beta] * grad_Rl_Y[lm_idx * 3 + alpha]); + double term3 = (*S_by_Rl) * H_full[idx]; + + hess_out[idx] += sign * G * (term1 + term2 + term3); + } + } + } } - } - - if (hess_out) - { - // Convert 6-element symmetric format to 9-element full matrix - // hess_Rl_Y[lm_idx] = [H_xx, H_xy, H_xz, H_yy, H_yz, H_zz] - double H_full[9] = { - hess_Rl_Y[lm_idx][0], hess_Rl_Y[lm_idx][1], hess_Rl_Y[lm_idx][2], - hess_Rl_Y[lm_idx][1], hess_Rl_Y[lm_idx][3], hess_Rl_Y[lm_idx][4], - hess_Rl_Y[lm_idx][2], hess_Rl_Y[lm_idx][4], hess_Rl_Y[lm_idx][5] - }; - - for (int alpha = 0; alpha < 3; ++alpha) - { - for (int beta = 0; beta < 3; ++beta) - { - int idx = alpha * 3 + beta; - - // Product rule: d²(f*g)/dα dβ = f''*g + f'*g'_α + f'*g'_β + f*g'' - double term1 = (*d2_S_by_Rl) * uR[alpha] * uR[beta] * Rl_Y[lm_idx]; - - // Derivative of unit vector: du_α/dR_β = (δ_αβ - u_α*u_β)/R - double du_dR = (alpha == beta ? 1.0 : 0.0) - uR[alpha] * uR[beta]; - if (R > 1e-10) du_dR /= R; - else du_dR = 0.0; - - double term2 = (*d_S_by_Rl) * (du_dR * Rl_Y[lm_idx] - + uR[alpha] * grad_Rl_Y[lm_idx*3 + beta] - + uR[beta] * grad_Rl_Y[lm_idx*3 + alpha]); - double term3 = (*S_by_Rl) * H_full[idx]; - - hess_out[idx] += sign * G * (term1 + term2 + term3); - } - } - } + sign = -sign; } - sign = -sign; - } } -void TwoCenterIntegrator::snap(const int itype1, - const int l1, - const int izeta1, - const int m1, +void + TwoCenterIntegrator::snap (const int itype1, + const int l1, + const int izeta1, + const int m1, const int itype2, - const ModuleBase::Vector3& vR, + const ModuleBase::Vector3& vR, const bool deriv, std::vector>& out) const { #ifdef __DEBUG - assert( is_tabulated_ ); + assert (is_tabulated_); #endif - out.resize(deriv ? 4 : 1); + out.resize (deriv ? 4 : 1); // total number of ket functions (including all m!) int num_ket = 0; - for (int l2 = 0; l2 <= table_.lmax_ket(); ++l2) - { - num_ket += (2 * l2 + 1) * table_.nchi_ket(itype2, l2); - } + for (int l2 = 0; l2 <= table_.lmax_ket (); ++l2) + { + num_ket += (2 * l2 + 1) * table_.nchi_ket (itype2, l2); + } if (num_ket == 0) - { - return; - } + { + return; + } - for(size_t i = 0; i < out.size(); ++i) - { - out[i].resize(num_ket); - std::fill(out[i].begin(), out[i].end(), 0.0); - } + for (size_t i = 0; i < out.size (); ++i) + { + out[i].resize (num_ket); + std::fill (out[i].begin (), out[i].end (), 0.0); + } int index = 0; double tmp[3] = {0.0, 0.0, 0.0}; - for (int l2 = 0; l2 <= table_.lmax_ket(); ++l2) - { - for (int izeta2 = 0; izeta2 < table_.nchi_ket(itype2, l2); ++izeta2) + for (int l2 = 0; l2 <= table_.lmax_ket (); ++l2) { - // NOTE: here the order of m is consistent with the rest of ABACUS - // i.e., 0, 1, -1, 2, -2, 3, -3, ... - // whether it should be rearranged to -l, -l+1, ..., l will be studied later - for (int mm2 = 0; mm2 <= 2*l2; ++mm2) - { - int m2 = (mm2 % 2 == 0) ? -mm2 / 2 : (mm2 + 1) / 2; - calculate(itype1, l1, izeta1, m1, itype2, l2, izeta2, m2, vR, &out[0][index], deriv ? tmp : nullptr); - - if (deriv) + for (int izeta2 = 0; izeta2 < table_.nchi_ket (itype2, l2); ++izeta2) { - out[1][index] = tmp[0]; - out[2][index] = tmp[1]; - out[3][index] = tmp[2]; + // NOTE: here the order of m is consistent with the rest of ABACUS + // i.e., 0, 1, -1, 2, -2, 3, -3, ... + // whether it should be rearranged to -l, -l+1, ..., l will be studied later + for (int mm2 = 0; mm2 <= 2 * l2; ++mm2) + { + int m2 = (mm2 % 2 == 0) ? -mm2 / 2 : (mm2 + 1) / 2; + calculate (itype1, + l1, + izeta1, + m1, + itype2, + l2, + izeta2, + m2, + vR, + &out[0][index], + deriv ? tmp : nullptr); + + if (deriv) + { + out[1][index] = tmp[0]; + out[2][index] = tmp[1]; + out[3][index] = tmp[2]; + } + + ++index; + } } - - ++index; - } } - } } -int TwoCenterIntegrator::ylm_index(const int l, const int m) const +int + TwoCenterIntegrator::ylm_index (const int l, const int m) const { return l * l + (m > 0 ? 2 * m - 1 : -2 * m); } diff --git a/source/source_basis/module_nao/two_center_integrator.h b/source/source_basis/module_nao/two_center_integrator.h index 285ffe8728c..25e13fd0286 100644 --- a/source/source_basis/module_nao/two_center_integrator.h +++ b/source/source_basis/module_nao/two_center_integrator.h @@ -11,9 +11,9 @@ * * This class computes two-center integrals * - * / + * / * I(R) = | dr phi1(r) (op) phi2(r - R) - * / + * / * * as well as their gradients, where op is 1 (overlap) or minus Laplacian (kinetic), * and phi1, phi2 are "atomic-orbital-like" functions of the form @@ -22,8 +22,8 @@ * * where chi is some numerical radial function and Ylm is some real spherical harmonics. * - * This class is designed to efficiently compute the two-center integrals between - * two "collections" of the above functions with various R, e.g., the overlap integrals + * This class is designed to efficiently compute the two-center integrals between + * two "collections" of the above functions with various R, e.g., the overlap integrals * between all numerical atomic orbitals and all Kleinman-Bylander nonlocal projectors, * the overlap & kinetic integrals between all numerical atomic orbitals, etc. * This is done by tabulating the radial part of the integrals on an r-space grid and @@ -34,11 +34,11 @@ class TwoCenterIntegrator { public: - TwoCenterIntegrator(); - TwoCenterIntegrator(const TwoCenterIntegrator&) = delete; - TwoCenterIntegrator& operator=(const TwoCenterIntegrator&) = delete; + TwoCenterIntegrator (); + TwoCenterIntegrator (const TwoCenterIntegrator&) = delete; + TwoCenterIntegrator& operator= (const TwoCenterIntegrator&) = delete; - ~TwoCenterIntegrator() {} + ~TwoCenterIntegrator () {} /*! * @brief Tabulates the radial part of a two-center integral. @@ -49,12 +49,11 @@ class TwoCenterIntegrator * @param[in] nr Number of r-space grid points. * @param[in] cutoff r-space cutoff radius. * */ - void tabulate(const RadialCollection& bra, - const RadialCollection& ket, - const char op, - const int nr, - const double cutoff - ); + void tabulate (const RadialCollection& bra, + const RadialCollection& ket, + const char op, + const int nr, + const double cutoff); /*! * @brief Compute the two-center integrals and optionally their derivatives. @@ -89,19 +88,18 @@ class TwoCenterIntegrator * @note At least one of out, grad_out, or hess_out must be non-nullptr. * @note Hessian computation requires l1 + l2 <= 6 (limitation of hes_rl_sph_harm). * */ - void calculate(const int itype1, - const int l1, - const int izeta1, - const int m1, - const int itype2, - const int l2, - const int izeta2, - const int m2, - const ModuleBase::Vector3& vR, // vR = R2 - R1 - double* out = nullptr, - double* grad_out = nullptr, - double* hess_out = nullptr - ) const; + void calculate (const int itype1, + const int l1, + const int izeta1, + const int m1, + const int itype2, + const int l2, + const int izeta2, + const int m2, + const ModuleBase::Vector3& vR, // vR = R2 - R1 + double* out = nullptr, + double* grad_out = nullptr, + double* hess_out = nullptr) const; /*! * @brief Compute a batch of two-center integrals. @@ -109,18 +107,21 @@ class TwoCenterIntegrator * This function calculates the two-center integrals (and optionally their gradients) * between one orbital and all orbitals of a certain type from the other collection. * */ - void snap(const int itype1, - const int l1, - const int izeta1, - const int m1, - const int itype2, - const ModuleBase::Vector3& vR, // vR = R2 - R1 - const bool deriv, - std::vector>& out - ) const; + void snap (const int itype1, + const int l1, + const int izeta1, + const int m1, + const int itype2, + const ModuleBase::Vector3& vR, // vR = R2 - R1 + const bool deriv, + std::vector>& out) const; /// Returns the amount of heap memory used by table_ (in bytes). - size_t table_memory() const { return table_.memory(); } + size_t + table_memory () const + { + return table_.memory (); + } private: bool is_tabulated_; @@ -136,7 +137,7 @@ class TwoCenterIntegrator * l 0 1 1 1 2 2 2 2 2 3 3 3 3 ... * m 0 0 1 -1 0 1 -1 2 -2 0 1 -1 2 ... * */ - int ylm_index(const int l, const int m) const; + int ylm_index (const int l, const int m) const; }; #endif diff --git a/source/source_basis/module_nao/two_center_table.cpp b/source/source_basis/module_nao/two_center_table.cpp index 821e881a458..970601dfea8 100644 --- a/source/source_basis/module_nao/two_center_table.cpp +++ b/source/source_basis/module_nao/two_center_table.cpp @@ -10,67 +10,71 @@ #include #include -void TwoCenterTable::build(const RadialCollection& bra, +void + TwoCenterTable::build (const RadialCollection& bra, const RadialCollection& ket, const char op, const int nr, const double cutoff) { #ifdef __DEBUG - assert(nr >= 3 && cutoff > 0.0); + assert (nr >= 3 && cutoff > 0.0); #endif - cleanup(); + cleanup (); op_ = op; nr_ = nr; rmax_ = cutoff; - nchi_ket_.resize({ket.ntype(), ket.lmax() + 1}); - std::fill(nchi_ket_.data(), nchi_ket_.data() + nchi_ket_.NumElements(), 0); - for (int itype = 0; itype < ket.ntype(); ++itype) - for (int l = 0; l <= ket.lmax(itype); ++l) - nchi_ket_.get_value(itype, l) = ket.nzeta(itype, l); + nchi_ket_.resize ({ket.ntype (), ket.lmax () + 1}); + std::fill (nchi_ket_.data (), nchi_ket_.data () + nchi_ket_.NumElements (), 0); + for (int itype = 0; itype < ket.ntype (); ++itype) + for (int l = 0; l <= ket.lmax (itype); ++l) + nchi_ket_.get_value (itype, l) = ket.nzeta (itype, l); rgrid_ = new double[nr_]; double dr = rmax_ / (nr_ - 1); - std::for_each(rgrid_, rgrid_ + nr_, [this, dr](double& r) { r = (&r - rgrid_) * dr; }); + std::for_each (rgrid_, rgrid_ + nr_, [this, dr] (double& r) { r = (&r - rgrid_) * dr; }); // index the table by generating a map from (itype1, l1, izeta1, itype2, l2, izeta2, l) to a row index - index_map_.resize({bra.ntype(), - bra.lmax() + 1, - bra.nzeta_max(), - ket.ntype(), - ket.lmax() + 1, - ket.nzeta_max(), - bra.lmax() + ket.lmax() + 1}); - std::fill(index_map_.data(), index_map_.data() + index_map_.NumElements(), -1); + index_map_.resize ({bra.ntype (), + bra.lmax () + 1, + bra.nzeta_max (), + ket.ntype (), + ket.lmax () + 1, + ket.nzeta_max (), + bra.lmax () + ket.lmax () + 1}); + std::fill (index_map_.data (), index_map_.data () + index_map_.NumElements (), -1); ntab_ = 0; - two_center_loop(bra, ket, &TwoCenterTable::_indexing); + two_center_loop (bra, ket, &TwoCenterTable::_indexing); - table_.resize({ntab_, nr_}); - dtable_.resize({ntab_, nr_}); - two_center_loop(bra, ket, &TwoCenterTable::_tabulate); + table_.resize ({ntab_, nr_}); + dtable_.resize ({ntab_, nr_}); + two_center_loop (bra, ket, &TwoCenterTable::_tabulate); } -const double* TwoCenterTable::table(const int itype1, - const int l1, - const int izeta1, - const int itype2, - const int l2, - const int izeta2, - const int l, - const bool deriv) const +const double* + TwoCenterTable::table (const int itype1, + const int l1, + const int izeta1, + const int itype2, + const int l2, + const int izeta2, + const int l, + const bool deriv) const { #ifdef __DEBUG - assert(is_present(itype1, l1, izeta1, itype2, l2, izeta2, l)); + assert (is_present (itype1, l1, izeta1, itype2, l2, izeta2, l)); #endif - return deriv ? dtable_.inner_most_ptr(index_map_.get_value(itype1, l1, izeta1, itype2, l2, izeta2, l)) - : table_.inner_most_ptr(index_map_.get_value(itype1, l1, izeta1, itype2, l2, izeta2, l)); + return deriv + ? dtable_.inner_most_ptr (index_map_.get_value (itype1, l1, izeta1, itype2, l2, izeta2, l)) + : table_.inner_most_ptr (index_map_.get_value (itype1, l1, izeta1, itype2, l2, izeta2, l)); } -void TwoCenterTable::lookup(const int itype1, +void + TwoCenterTable::lookup (const int itype1, const int l1, const int izeta1, const int itype2, @@ -83,31 +87,34 @@ void TwoCenterTable::lookup(const int itype1, double* d2val) const { #ifdef __DEBUG - assert(R >= 0); + assert (R >= 0); #endif - if (R > rmax()) - { - if (val) - *val = 0.0; - if (dval) - *dval = 0.0; - if (d2val) - *d2val = 0.0; - return; - } - - const double* tab = table(itype1, l1, izeta1, itype2, l2, izeta2, l, false); - const double* dtab = table(itype1, l1, izeta1, itype2, l2, izeta2, l, true); - ModuleBase::CubicSpline::eval(nr_, rgrid_, tab, dtab, 1, &R, val, dval, d2val); + if (R > rmax ()) + { + if (val) + *val = 0.0; + if (dval) + *dval = 0.0; + if (d2val) + *d2val = 0.0; + return; + } + + const double* tab = table (itype1, l1, izeta1, itype2, l2, izeta2, l, false); + const double* dtab = table (itype1, l1, izeta1, itype2, l2, izeta2, l, true); + ModuleBase::CubicSpline::eval (nr_, rgrid_, tab, dtab, 1, &R, val, dval, d2val); } -int& TwoCenterTable::table_index(const NumericalRadial* it1, const NumericalRadial* it2, const int l) +int& + TwoCenterTable::table_index (const NumericalRadial* it1, const NumericalRadial* it2, const int l) { - return index_map_.get_value(it1->itype(), it1->l(), it1->izeta(), it2->itype(), it2->l(), it2->izeta(), l); + return index_map_ + .get_value (it1->itype (), it1->l (), it1->izeta (), it2->itype (), it2->l (), it2->izeta (), l); } -void TwoCenterTable::cleanup() +void + TwoCenterTable::cleanup () { op_ = '\0'; ntab_ = 0; @@ -116,13 +123,14 @@ void TwoCenterTable::cleanup() delete[] rgrid_; rgrid_ = nullptr; - table_.resize({0}); - dtable_.resize({0}); - index_map_.resize({0}); - nchi_ket_.resize({0}); + table_.resize ({0}); + dtable_.resize ({0}); + index_map_.resize ({0}); + nchi_ket_.resize ({0}); } -bool TwoCenterTable::is_present(const int itype1, +bool + TwoCenterTable::is_present (const int itype1, const int l1, const int izeta1, const int itype2, @@ -132,43 +140,48 @@ bool TwoCenterTable::is_present(const int itype1, { // The given indices map to an entry in the table if they fall within the bounds of index_map_ and // the value of the entry in index_map_ is non-negative - return itype1 >= 0 && itype1 < index_map_.shape().dim_size(0) && l1 >= 0 && l1 < index_map_.shape().dim_size(1) - && izeta1 >= 0 && izeta1 < index_map_.shape().dim_size(2) && itype2 >= 0 - && itype2 < index_map_.shape().dim_size(3) && l2 >= 0 && l2 < index_map_.shape().dim_size(4) && izeta2 >= 0 - && izeta2 < index_map_.shape().dim_size(5) && l >= 0 && l <= index_map_.shape().dim_size(6) - && index_map_.get_value(itype1, l1, izeta1, itype2, l2, izeta2, l) >= 0; + return itype1 >= 0 && itype1 < index_map_.shape ().dim_size (0) && l1 >= 0 && l1 < index_map_.shape ().dim_size (1) + && izeta1 >= 0 && izeta1 < index_map_.shape ().dim_size (2) && itype2 >= 0 + && itype2 < index_map_.shape ().dim_size (3) && l2 >= 0 && l2 < index_map_.shape ().dim_size (4) + && izeta2 >= 0 && izeta2 < index_map_.shape ().dim_size (5) && l >= 0 + && l <= index_map_.shape ().dim_size (6) + && index_map_.get_value (itype1, l1, izeta1, itype2, l2, izeta2, l) >= 0; } -double TwoCenterTable::dfact(int l) const +double + TwoCenterTable::dfact (int l) const { double result = 1.0; for (int i = l; i > 1; i -= 2) - { - result *= i; - } + { + result *= i; + } return result; } -void TwoCenterTable::two_center_loop(const RadialCollection& bra, const RadialCollection& ket, looped_func f) +void + TwoCenterTable::two_center_loop (const RadialCollection& bra, const RadialCollection& ket, looped_func f) { - for (int l = 0; l <= bra.lmax() + ket.lmax(); ++l) - for (int l1 = 0; l1 <= bra.lmax(); ++l1) - for (const NumericalRadial** it1 = bra.cbegin(l1); it1 != bra.cend(l1); ++it1) - for (int l2 = std::abs(l1 - l); l2 <= std::min(ket.lmax(), l + l1); l2 += 2) - for (const NumericalRadial** it2 = ket.cbegin(l2); it2 != ket.cend(l2); ++it2) - (this->*f)(*it1, *it2, l); + for (int l = 0; l <= bra.lmax () + ket.lmax (); ++l) + for (int l1 = 0; l1 <= bra.lmax (); ++l1) + for (const NumericalRadial** it1 = bra.cbegin (l1); it1 != bra.cend (l1); ++it1) + for (int l2 = std::abs (l1 - l); l2 <= std::min (ket.lmax (), l + l1); l2 += 2) + for (const NumericalRadial** it2 = ket.cbegin (l2); it2 != ket.cend (l2); ++it2) + (this->*f) (*it1, *it2, l); } -void TwoCenterTable::_indexing(const NumericalRadial* it1, const NumericalRadial* it2, const int l) +void + TwoCenterTable::_indexing (const NumericalRadial* it1, const NumericalRadial* it2, const int l) { - table_index(it1, it2, l) = ntab_++; + table_index (it1, it2, l) = ntab_++; } -void TwoCenterTable::_tabulate(const NumericalRadial* it1, const NumericalRadial* it2, const int l) +void + TwoCenterTable::_tabulate (const NumericalRadial* it1, const NumericalRadial* it2, const int l) { - int itab = table_index(it1, it2, l); - double* tab = table_.inner_most_ptr(itab); - it1->radtab(op_, *it2, l, tab, nr_, rmax_, false); + int itab = table_index (it1, it2, l); + double* tab = table_.inner_most_ptr (itab); + it1->radtab (op_, *it2, l, tab, nr_, rmax_, false); // NOTE: // A radial table stores S(R)/R^l or T(R)/R^l instead of bare S/T. @@ -190,41 +203,41 @@ void TwoCenterTable::_tabulate(const NumericalRadial* it1, const NumericalRadial // See the developer's document for more details. double dr = rmax_ / (nr_ - 1); if (l > 0) - { - // divide S(R) by R^l (except the R=0 point) - std::for_each(&tab[1], tab + nr_, [&](double& val) { val /= std::pow(dr * (&val - tab), l); }); - - // special treatment for R=0 - int nk = it1->nk(); - const double* kgrid = it1->kgrid(); - - double* fk = new double[nk]; - double* h = new double[nk]; - std::adjacent_difference(kgrid, kgrid + nk, h); - - int op_exp = l; - switch (op_) - { - case 'S': - op_exp += 2; - break; - case 'T': - op_exp += 4; - break; - default:; // currently not supposed to happen - } - - for (int ik = 0; ik != nk; ++ik) { - fk[ik] = it1->kvalue(ik) * it2->kvalue(ik) * std::pow(kgrid[ik], op_exp); + // divide S(R) by R^l (except the R=0 point) + std::for_each (&tab[1], tab + nr_, [&] (double& val) { val /= std::pow (dr * (&val - tab), l); }); + + // special treatment for R=0 + int nk = it1->nk (); + const double* kgrid = it1->kgrid (); + + double* fk = new double[nk]; + double* h = new double[nk]; + std::adjacent_difference (kgrid, kgrid + nk, h); + + int op_exp = l; + switch (op_) + { + case 'S': + op_exp += 2; + break; + case 'T': + op_exp += 4; + break; + default:; // currently not supposed to happen + } + + for (int ik = 0; ik != nk; ++ik) + { + fk[ik] = it1->kvalue (ik) * it2->kvalue (ik) * std::pow (kgrid[ik], op_exp); + } + + tab[0] = ModuleBase::Integral::simpson (nk, fk, &h[1]) * ModuleBase::FOUR_PI / dfact (2 * l + 1); + + delete[] fk; + delete[] h; } - tab[0] = ModuleBase::Integral::simpson(nk, fk, &h[1]) * ModuleBase::FOUR_PI / dfact(2 * l + 1); - - delete[] fk; - delete[] h; - } - // The derivative table stores the derivative of S(R)/R^l or T(R)/R^l // instead of bare dS(R)/dR or dT(R)/dR, which simplifies further calculation. // @@ -232,10 +245,10 @@ void TwoCenterTable::_tabulate(const NumericalRadial* it1, const NumericalRadial // than two spherical Bessel transforms. By doing so, we achieve a good // consistency between the table and its derivative during interpolation. using ModuleBase::CubicSpline; - CubicSpline::build(nr_, - rgrid_, - table_.inner_most_ptr(itab), - {CubicSpline::BoundaryType::first_deriv, 0.0}, - {CubicSpline::BoundaryType::first_deriv, 0.0}, - dtable_.inner_most_ptr(itab)); + CubicSpline::build (nr_, + rgrid_, + table_.inner_most_ptr (itab), + {CubicSpline::BoundaryType::first_deriv, 0.0}, + {CubicSpline::BoundaryType::first_deriv, 0.0}, + dtable_.inner_most_ptr (itab)); } diff --git a/source/source_basis/module_nao/two_center_table.h b/source/source_basis/module_nao/two_center_table.h index dc977872194..b3475cd4be7 100644 --- a/source/source_basis/module_nao/two_center_table.h +++ b/source/source_basis/module_nao/two_center_table.h @@ -7,17 +7,17 @@ class TwoCenterTable { public: - TwoCenterTable() = default; - ~TwoCenterTable() { delete[] rgrid_; } + TwoCenterTable () = default; + ~TwoCenterTable () { delete[] rgrid_; } - TwoCenterTable(const TwoCenterTable&) = delete; - TwoCenterTable& operator=(const TwoCenterTable&) = delete; + TwoCenterTable (const TwoCenterTable&) = delete; + TwoCenterTable& operator= (const TwoCenterTable&) = delete; - void build(const RadialCollection& bra, //!< [in] radial collection involved in - const RadialCollection& ket, //!< [in] radial collection involved in - const char op, //!< [in] operator of the two-center integral - const int nr, //!< [in] number of table grid points - const double cutoff //!< [in] cutoff radius of the table + void build (const RadialCollection& bra, //!< [in] radial collection involved in + const RadialCollection& ket, //!< [in] radial collection involved in + const char op, //!< [in] operator of the two-center integral + const int nr, //!< [in] number of table grid points + const double cutoff //!< [in] cutoff radius of the table ); /*! @@ -25,39 +25,55 @@ class TwoCenterTable * */ //!@{ //! returns the operator of the two-center integral - char op() const { return op_; } + char + op () const + { + return op_; + } //! returns the number of radial points of each table - int nr() const { return nr_; } + int + nr () const + { + return nr_; + } // returns the number of table entries - int ntab() const { return ntab_; } + int + ntab () const + { + return ntab_; + } //! returns the radius cutoff of the table - double rmax() const { return rmax_; } + double + rmax () const + { + return rmax_; + } //! gets the read-only pointer to a specific table - const double* table(const int itype1, //!< [in] element index of chi1 - const int l1, //!< [in] angular momentum of chi1 - const int izeta1, //!< [in] zeta number of chi1 - const int itype2, //!< [in] element index of chi2 - const int l2, //!< [in] angular momentum of chi2 - const int izeta2, //!< [in] zeta number of chi2 - const int l, //!< [in] angular momentum of the entry - const bool deriv = false //!< [in] if true, return the derivative table + const double* table (const int itype1, //!< [in] element index of chi1 + const int l1, //!< [in] angular momentum of chi1 + const int izeta1, //!< [in] zeta number of chi1 + const int itype2, //!< [in] element index of chi2 + const int l2, //!< [in] angular momentum of chi2 + const int izeta2, //!< [in] zeta number of chi2 + const int l, //!< [in] angular momentum of the entry + const bool deriv = false //!< [in] if true, return the derivative table ) const; - void lookup(const int itype1, //!< [in] element index of chi1 - const int l1, //!< [in] angular momentum of chi1 - const int izeta1, //!< [in] zeta number of chi1 - const int itype2, //!< [in] element index of chi2 - const int l2, //!< [in] angular momentum of chi2 - const int izeta2, //!< [in] zeta number of chi2 - const int l, //!< [in] angular momentum of the entry - const double R, //!< [in] distance between the two centers - double* val, //!< [out] interpolated values from table_ - double* dval = nullptr, //!< [out] interpolated values from dtable_ - double* d2val = nullptr //!< [out] interpolated second derivatives + void lookup (const int itype1, //!< [in] element index of chi1 + const int l1, //!< [in] angular momentum of chi1 + const int izeta1, //!< [in] zeta number of chi1 + const int itype2, //!< [in] element index of chi2 + const int l2, //!< [in] angular momentum of chi2 + const int izeta2, //!< [in] zeta number of chi2 + const int l, //!< [in] angular momentum of the entry + const double R, //!< [in] distance between the two centers + double* val, //!< [out] interpolated values from table_ + double* dval = nullptr, //!< [out] interpolated values from dtable_ + double* d2val = nullptr //!< [out] interpolated second derivatives ) const; //!@} @@ -66,69 +82,75 @@ class TwoCenterTable // This might not be the intended purpose of this class. /// number of NumericalRadial objects in the ket with given itype and l - int nchi_ket(const int itype, const int l) const + int + nchi_ket (const int itype, const int l) const { - assert(itype >= 0 && itype < nchi_ket_.shape().dim_size(0)); - assert(l >= 0 && l < nchi_ket_.shape().dim_size(1)); - return nchi_ket_.get_value(itype, l); + assert (itype >= 0 && itype < nchi_ket_.shape ().dim_size (0)); + assert (l >= 0 && l < nchi_ket_.shape ().dim_size (1)); + return nchi_ket_.get_value (itype, l); } /// maximum angular momentum of the ket - int lmax_ket() const { return nchi_ket_.shape().dim_size(1) - 1; } + int + lmax_ket () const + { + return nchi_ket_.shape ().dim_size (1) - 1; + } /// Returns the amount of heap memory used by this class (in bytes). - size_t memory() const { - return (table_.NumElements() + dtable_.NumElements() - + nchi_ket_.NumElements() + index_map_.NumElements() + nr_) * sizeof(double); + size_t + memory () const + { + return (table_.NumElements () + dtable_.NumElements () + nchi_ket_.NumElements () + index_map_.NumElements () + + nr_) + * sizeof (double); } private: - char op_ = '\0'; //!< operator associated with the present table - int ntab_ = 0; //!< number of table entries - int nr_ = 0; //!< number of radial points of each table - double rmax_= 0.0; //!< cutoff radius of the table + char op_ = '\0'; //!< operator associated with the present table + int ntab_ = 0; //!< number of table entries + int nr_ = 0; //!< number of radial points of each table + double rmax_ = 0.0; //!< cutoff radius of the table double* rgrid_ = nullptr; /// Table of size ntype x lmax that stores the number of radial functions of given type and l - container::Tensor nchi_ket_{container::DataType::DT_INT, container::TensorShape({0})}; + container::Tensor nchi_ket_{container::DataType::DT_INT, container::TensorShape ({0})}; /// two-center integral radial table, stored as a row-major matrix - container::Tensor table_{container::DataType::DT_DOUBLE, container::TensorShape({0})}; + container::Tensor table_{container::DataType::DT_DOUBLE, container::TensorShape ({0})}; /// derivative table generated from cubic spline interpolation - container::Tensor dtable_{container::DataType::DT_DOUBLE, container::TensorShape({0})}; + container::Tensor dtable_{container::DataType::DT_DOUBLE, container::TensorShape ({0})}; /// map (itype1, l1, izeta1, itype2, l2, izeta2, l) to a row index in the table - container::Tensor index_map_{container::DataType::DT_INT, container::TensorShape({0})}; + container::Tensor index_map_{container::DataType::DT_INT, container::TensorShape ({0})}; /// returns the row-index of the table corresponding to the given two radial functions and l - int& table_index(const NumericalRadial* ptr_rad1, const NumericalRadial* ptr_rad2, const int l); + int& table_index (const NumericalRadial* ptr_rad1, const NumericalRadial* ptr_rad2, const int l); /// deallocates memory and reset variables to default. - void cleanup(); + void cleanup (); /// returns whether the given indices map to an entry in the table - bool is_present(const int itype1, - const int l1, - const int izeta1, - const int itype2, - const int l2, - const int izeta2, - const int l) const; + bool is_present (const int itype1, + const int l1, + const int izeta1, + const int itype2, + const int l2, + const int izeta2, + const int l) const; /// double factorial - double dfact(int l) const; + double dfact (int l) const; - typedef void(TwoCenterTable::*looped_func)(const NumericalRadial*, const NumericalRadial*, const int l); + typedef void (TwoCenterTable::*looped_func) (const NumericalRadial*, const NumericalRadial*, const int l); /// loop-execute a function over all pairwise radial functions & l with non-vanishing Gaunt coefficients - void two_center_loop(const RadialCollection& bra, - const RadialCollection& ket, - looped_func f); + void two_center_loop (const RadialCollection& bra, const RadialCollection& ket, looped_func f); /// various looped functions during the construction of table - void _indexing(const NumericalRadial* it1, const NumericalRadial* it2, const int l); - void _tabulate(const NumericalRadial* it1, const NumericalRadial* it2, const int l); + void _indexing (const NumericalRadial* it1, const NumericalRadial* it2, const int l); + void _tabulate (const NumericalRadial* it1, const NumericalRadial* it2, const int l); }; #endif diff --git a/source/source_basis/module_pw/kernels/cuda/pw_op.cu b/source/source_basis/module_pw/kernels/cuda/pw_op.cu index 58488e1a7df..88449867e24 100644 --- a/source/source_basis/module_pw/kernels/cuda/pw_op.cu +++ b/source/source_basis/module_pw/kernels/cuda/pw_op.cu @@ -4,155 +4,179 @@ #include #include -namespace ModulePW { +namespace ModulePW +{ #define THREADS_PER_BLOCK 256 -template -__global__ void set_3d_fft_box( - const int npwk, - const int* box_index, - const thrust::complex* in, - thrust::complex* out) +template +__global__ void + set_3d_fft_box (const int npwk, + const int* box_index, + const thrust::complex* in, + thrust::complex* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx < npwk) - { - int xx = box_index[idx]; - out[xx] = in[idx]; - } + if (idx < npwk) + { + int xx = box_index[idx]; + out[xx] = in[idx]; + } } -template -__global__ void set_recip_to_real_output( - const int nrxx, - const bool add, - const FPTYPE factor, - const thrust::complex* in, - thrust::complex* out) +template +__global__ void + set_recip_to_real_output (const int nrxx, + const bool add, + const FPTYPE factor, + const thrust::complex* in, + thrust::complex* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nrxx) {return;} - if(add) { - out[idx] += factor * in[idx]; - } - else { - out[idx] = in[idx]; - } + if (idx >= nrxx) + { + return; + } + if (add) + { + out[idx] += factor * in[idx]; + } + else + { + out[idx] = in[idx]; + } } -template -__global__ void set_recip_to_real_output( - const int nrxx, - const bool add, - const FPTYPE factor, - const thrust::complex* in, - FPTYPE* out) +template +__global__ void + set_recip_to_real_output (const int nrxx, + const bool add, + const FPTYPE factor, + const thrust::complex* in, + FPTYPE* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nrxx) {return;} - if(add) { - out[idx] += factor * in[idx].real(); - } - else { - out[idx] = in[idx].real(); - } + if (idx >= nrxx) + { + return; + } + if (add) + { + out[idx] += factor * in[idx].real (); + } + else + { + out[idx] = in[idx].real (); + } } -template -__global__ void set_real_to_recip_output( - const int npwk, - const int nxyz, - const bool add, - const FPTYPE factor, - const int* box_index, - const thrust::complex* in, - thrust::complex* out) +template +__global__ void + set_real_to_recip_output (const int npwk, + const int nxyz, + const bool add, + const FPTYPE factor, + const int* box_index, + const thrust::complex* in, + thrust::complex* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= npwk) {return;} - if(add) { - out[idx] += factor / nxyz * in[box_index[idx]]; - } - else { - out[idx] = in[box_index[idx]] / nxyz; - } + if (idx >= npwk) + { + return; + } + if (add) + { + out[idx] += factor / nxyz * in[box_index[idx]]; + } + else + { + out[idx] = in[box_index[idx]] / nxyz; + } } -template -__global__ void set_real_to_recip_output( - const int npwk, - const int nxyz, - const bool add, - const FPTYPE factor, - const int* box_index, - const thrust::complex* in, - FPTYPE* out) +template +__global__ void + set_real_to_recip_output (const int npwk, + const int nxyz, + const bool add, + const FPTYPE factor, + const int* box_index, + const thrust::complex* in, + FPTYPE* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= npwk) {return;} - if(add) { - out[idx] += factor / nxyz * in[box_index[idx]].real(); - } - else { - out[idx] = in[box_index[idx]].real() / nxyz; - } + if (idx >= npwk) + { + return; + } + if (add) + { + out[idx] += factor / nxyz * in[box_index[idx]].real (); + } + else + { + out[idx] = in[box_index[idx]].real () / nxyz; + } } template -void set_3d_fft_box_op::operator()(const int npwk, +void + set_3d_fft_box_op::operator() (const int npwk, const int* box_index, const std::complex* in, std::complex* out) { const int block = (npwk + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - set_3d_fft_box<<>>( - npwk, - box_index, - reinterpret_cast*>(in), - reinterpret_cast*>(out)); + set_3d_fft_box<<>> (npwk, + box_index, + reinterpret_cast*> (in), + reinterpret_cast*> (out)); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template -void set_recip_to_real_output_op::operator()(const int nrxx, +void + set_recip_to_real_output_op::operator() (const int nrxx, const bool add, const FPTYPE factor, const std::complex* in, std::complex* out) { const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - set_recip_to_real_output<<>>( - nrxx, - add, - factor, - reinterpret_cast*>(in), - reinterpret_cast*>(out)); - - CHECK_CUDA_SYNC(); + set_recip_to_real_output + <<>> (nrxx, + add, + factor, + reinterpret_cast*> (in), + reinterpret_cast*> (out)); + + CHECK_CUDA_SYNC (); } template -void set_recip_to_real_output_op::operator()(const int nrxx, +void + set_recip_to_real_output_op::operator() (const int nrxx, const bool add, const FPTYPE factor, const std::complex* in, FPTYPE* out) { const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - set_recip_to_real_output<<>>( - nrxx, - add, - factor, - reinterpret_cast*>(in), - reinterpret_cast(out)); - - CHECK_CUDA_SYNC(); + set_recip_to_real_output + <<>> (nrxx, + add, + factor, + reinterpret_cast*> (in), + reinterpret_cast (out)); + + CHECK_CUDA_SYNC (); } template -void set_real_to_recip_output_op::operator()(const int npwk, +void + set_real_to_recip_output_op::operator() (const int npwk, const int nxyz, const bool add, const FPTYPE factor, @@ -161,20 +185,21 @@ void set_real_to_recip_output_op::operator()(co std::complex* out) { const int block = (npwk + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - set_real_to_recip_output<<>>( - npwk, - nxyz, - add, - factor, - box_index, - reinterpret_cast*>(in), - reinterpret_cast*>(out)); - - CHECK_CUDA_SYNC(); + set_real_to_recip_output + <<>> (npwk, + nxyz, + add, + factor, + box_index, + reinterpret_cast*> (in), + reinterpret_cast*> (out)); + + CHECK_CUDA_SYNC (); } template -void set_real_to_recip_output_op::operator()(const int npwk, +void + set_real_to_recip_output_op::operator() (const int npwk, const int nxyz, const bool add, const FPTYPE factor, @@ -183,16 +208,16 @@ void set_real_to_recip_output_op::operator()(co FPTYPE* out) { const int block = (npwk + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - set_real_to_recip_output<<>>( - npwk, - nxyz, - add, - factor, - box_index, - reinterpret_cast*>(in), - reinterpret_cast(out)); - - CHECK_CUDA_SYNC(); + set_real_to_recip_output + <<>> (npwk, + nxyz, + add, + factor, + box_index, + reinterpret_cast*> (in), + reinterpret_cast (out)); + + CHECK_CUDA_SYNC (); } template struct set_3d_fft_box_op; @@ -202,4 +227,4 @@ template struct set_3d_fft_box_op; template struct set_recip_to_real_output_op; template struct set_real_to_recip_output_op; -} // namespace ModulePW +} // namespace ModulePW diff --git a/source/source_basis/module_pw/kernels/pw_op.cpp b/source/source_basis/module_pw/kernels/pw_op.cpp index 21b9820a1d5..191737890ec 100644 --- a/source/source_basis/module_pw/kernels/pw_op.cpp +++ b/source/source_basis/module_pw/kernels/pw_op.cpp @@ -1,70 +1,72 @@ #include "source_basis/module_pw/kernels/pw_op.h" -namespace ModulePW { +namespace ModulePW +{ template struct set_3d_fft_box_op { - void operator()(const int npwk, - const int* box_index, - const std::complex* in, - std::complex* out) + void + operator() (const int npwk, const int* box_index, const std::complex* in, std::complex* out) { for (int ig = 0; ig < npwk; ++ig) - { - out[box_index[ig]] = in[ig]; - } + { + out[box_index[ig]] = in[ig]; + } } }; template struct set_recip_to_real_output_op { - void operator()(const int nrxx, + void + operator() (const int nrxx, const bool add, const FPTYPE factor, const std::complex* in, std::complex* out) { - if(add) { - for(int ir = 0; ir < nrxx ; ++ir) { - out[ir] += factor * in[ir]; + if (add) + { + for (int ir = 0; ir < nrxx; ++ir) + { + out[ir] += factor * in[ir]; + } } - } - else { - for(int ir = 0; ir < nrxx ; ++ir) { - out[ir] = in[ir]; + else + { + for (int ir = 0; ir < nrxx; ++ir) + { + out[ir] = in[ir]; + } } - } } - void operator()(const int nrxx, - const bool add, - const FPTYPE factor, - const std::complex* in, - FPTYPE* out) + void + operator() (const int nrxx, const bool add, const FPTYPE factor, const std::complex* in, FPTYPE* out) { if (add) - { - for (int ir = 0; ir < nrxx; ++ir) { - out[ir] += factor * in[ir].real(); + for (int ir = 0; ir < nrxx; ++ir) + { + out[ir] += factor * in[ir].real (); + } } - } else - { - for (int ir = 0; ir < nrxx; ++ir) { - out[ir] = in[ir].real(); + for (int ir = 0; ir < nrxx; ++ir) + { + out[ir] = in[ir].real (); + } } - } } }; template struct set_real_to_recip_output_op { - void operator()(const int npw_k, + void + operator() (const int npw_k, const int nxyz, const bool add, const FPTYPE factor, @@ -72,16 +74,20 @@ struct set_real_to_recip_output_op const std::complex* in, std::complex* out) { - if(add) { - for(int ig = 0; ig < npw_k; ++ig) { - out[ig] += factor / static_cast(nxyz) * in[box_index[ig]]; + if (add) + { + for (int ig = 0; ig < npw_k; ++ig) + { + out[ig] += factor / static_cast (nxyz) * in[box_index[ig]]; + } } - } - else { - for(int ig = 0; ig < npw_k; ++ig) { - out[ig] = in[box_index[ig]] / static_cast(nxyz); + else + { + for (int ig = 0; ig < npw_k; ++ig) + { + out[ig] = in[box_index[ig]] / static_cast (nxyz); + } } - } } }; @@ -92,5 +98,4 @@ template struct set_3d_fft_box_op; template struct set_recip_to_real_output_op; template struct set_real_to_recip_output_op; -} // namespace ModulePW - +} // namespace ModulePW diff --git a/source/source_basis/module_pw/kernels/pw_op.h b/source/source_basis/module_pw/kernels/pw_op.h index 32183b6bf78..336a042302f 100644 --- a/source/source_basis/module_pw/kernels/pw_op.h +++ b/source/source_basis/module_pw/kernels/pw_op.h @@ -4,9 +4,11 @@ #include "source_psi/psi.h" #include -namespace ModulePW { +namespace ModulePW +{ template -struct set_3d_fft_box_op { +struct set_3d_fft_box_op +{ /// @brief Set the 3D fft box for fft transfrom between the recip and real space. /// To map the 1D psi(1D continuous array) to 3D box psi(fft box) /// @@ -18,15 +20,12 @@ struct set_3d_fft_box_op { /// /// Output Parameters /// @param out - output psi within the 3D box(in recip space) - void operator() ( - const int npwk, - const int* box_index, - const std::complex* in, - std::complex* out); + void operator() (const int npwk, const int* box_index, const std::complex* in, std::complex* out); }; template -struct set_recip_to_real_output_op { +struct set_recip_to_real_output_op +{ /// @brief Calculate the outputs after the FFT translation of recip_to_real /// /// Input Parameters @@ -37,23 +36,18 @@ struct set_recip_to_real_output_op { /// /// Output Parameters /// @param out - output psi within the 3D box(in real space) - void operator() ( - const int nrxx, - const bool add, - const FPTYPE factor, - const std::complex* in, - std::complex* out); + void operator() (const int nrxx, + const bool add, + const FPTYPE factor, + const std::complex* in, + std::complex* out); - void operator() ( - const int nrxx, - const bool add, - const FPTYPE factor, - const std::complex* in, - FPTYPE* out); + void operator() (const int nrxx, const bool add, const FPTYPE factor, const std::complex* in, FPTYPE* out); }; template -struct set_real_to_recip_output_op { +struct set_real_to_recip_output_op +{ /// @brief Calculate the outputs after the FFT translation of real_to_recip /// /// Input Parameters @@ -66,23 +60,21 @@ struct set_real_to_recip_output_op { /// /// Output Parameters /// @param out - output psi within the 3D box(in recip space) - void operator() ( - const int npw_k, - const int nxyz, - const bool add, - const FPTYPE factor, - const int* box_index, - const std::complex* in, - std::complex* out); + void operator() (const int npw_k, + const int nxyz, + const bool add, + const FPTYPE factor, + const int* box_index, + const std::complex* in, + std::complex* out); - void operator() ( - const int npw_k, - const int nxyz, - const bool add, - const FPTYPE factor, - const int* box_index, - const std::complex* in, - FPTYPE* out); + void operator() (const int npw_k, + const int nxyz, + const bool add, + const FPTYPE factor, + const int* box_index, + const std::complex* in, + FPTYPE* out); }; #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM @@ -90,47 +82,40 @@ struct set_real_to_recip_output_op { template struct set_3d_fft_box_op { - void operator()(const int npwk, - const int* box_index, - const std::complex* in, - std::complex* out); + void operator() (const int npwk, const int* box_index, const std::complex* in, std::complex* out); }; template struct set_recip_to_real_output_op { - void operator()(const int nrxx, - const bool add, - const FPTYPE factor, - const std::complex* in, - std::complex* out); + void operator() (const int nrxx, + const bool add, + const FPTYPE factor, + const std::complex* in, + std::complex* out); - void operator()(const int nrxx, - const bool add, - const FPTYPE factor, - const std::complex* in, - FPTYPE* out); + void operator() (const int nrxx, const bool add, const FPTYPE factor, const std::complex* in, FPTYPE* out); }; template struct set_real_to_recip_output_op { - void operator()(const int npw_k, - const int nxyz, - const bool add, - const FPTYPE factor, - const int* box_index, - const std::complex* in, - std::complex* out); - void operator()(const int npw_k, - const int nxyz, - const bool add, - const FPTYPE factor, - const int* box_index, - const std::complex* in, - FPTYPE* out); + void operator() (const int npw_k, + const int nxyz, + const bool add, + const FPTYPE factor, + const int* box_index, + const std::complex* in, + std::complex* out); + void operator() (const int npw_k, + const int nxyz, + const bool add, + const FPTYPE factor, + const int* box_index, + const std::complex* in, + FPTYPE* out); }; #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM } // namespace ModulePW -#endif //MODULE_PW_MULTI_DEVICE_H \ No newline at end of file +#endif // MODULE_PW_MULTI_DEVICE_H \ No newline at end of file diff --git a/source/source_basis/module_pw/kernels/rocm/pw_op.hip.cu b/source/source_basis/module_pw/kernels/rocm/pw_op.hip.cu index fa8aa17d883..061c46a633f 100644 --- a/source/source_basis/module_pw/kernels/rocm/pw_op.hip.cu +++ b/source/source_basis/module_pw/kernels/rocm/pw_op.hip.cu @@ -5,155 +5,192 @@ #include #include -namespace ModulePW { +namespace ModulePW +{ #define THREADS_PER_BLOCK 256 -template -__global__ void set_3d_fft_box( - const int npwk, - const int* box_index, - const thrust::complex* in, - thrust::complex* out) +template +__global__ void + set_3d_fft_box (const int npwk, + const int* box_index, + const thrust::complex* in, + thrust::complex* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx < npwk) - { - int xx = box_index[idx]; - out[xx] = in[idx]; - } + if (idx < npwk) + { + int xx = box_index[idx]; + out[xx] = in[idx]; + } } -template -__global__ void set_recip_to_real_output( - const int nrxx, - const bool add, - const FPTYPE factor, - const thrust::complex* in, - thrust::complex* out) +template +__global__ void + set_recip_to_real_output (const int nrxx, + const bool add, + const FPTYPE factor, + const thrust::complex* in, + thrust::complex* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nrxx) {return;} - if(add) { - out[idx] += factor * in[idx]; - } - else { - out[idx] = in[idx]; - } + if (idx >= nrxx) + { + return; + } + if (add) + { + out[idx] += factor * in[idx]; + } + else + { + out[idx] = in[idx]; + } } -template -__global__ void set_recip_to_real_output( - const int nrxx, - const bool add, - const FPTYPE factor, - const thrust::complex* in, - FPTYPE* out) +template +__global__ void + set_recip_to_real_output (const int nrxx, + const bool add, + const FPTYPE factor, + const thrust::complex* in, + FPTYPE* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nrxx) {return;} - if(add) { - out[idx] += factor * in[idx].real(); - } - else { - out[idx] = in[idx].real(); - } + if (idx >= nrxx) + { + return; + } + if (add) + { + out[idx] += factor * in[idx].real (); + } + else + { + out[idx] = in[idx].real (); + } } -template -__global__ void set_real_to_recip_output( - const int npwk, - const int nxyz, - const bool add, - const FPTYPE factor, - const int* box_index, - const thrust::complex* in, - thrust::complex* out) +template +__global__ void + set_real_to_recip_output (const int npwk, + const int nxyz, + const bool add, + const FPTYPE factor, + const int* box_index, + const thrust::complex* in, + thrust::complex* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= npwk) {return;} - if(add) { - out[idx] += factor / nxyz * in[box_index[idx]]; - } - else { - out[idx] = in[box_index[idx]] / nxyz; - } + if (idx >= npwk) + { + return; + } + if (add) + { + out[idx] += factor / nxyz * in[box_index[idx]]; + } + else + { + out[idx] = in[box_index[idx]] / nxyz; + } } -template -__global__ void set_real_to_recip_output( - const int npwk, - const int nxyz, - const bool add, - const FPTYPE factor, - const int* box_index, - const thrust::complex* in, - FPTYPE* out) +template +__global__ void + set_real_to_recip_output (const int npwk, + const int nxyz, + const bool add, + const FPTYPE factor, + const int* box_index, + const thrust::complex* in, + FPTYPE* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= npwk) {return;} - if(add) { - out[idx] += factor / nxyz * in[box_index[idx]].real(); - } - else { - out[idx] = in[box_index[idx]].real() / nxyz; - } + if (idx >= npwk) + { + return; + } + if (add) + { + out[idx] += factor / nxyz * in[box_index[idx]].real (); + } + else + { + out[idx] = in[box_index[idx]].real () / nxyz; + } } template -void set_3d_fft_box_op::operator()(const int npwk, +void + set_3d_fft_box_op::operator() (const int npwk, const int* box_index, const std::complex* in, std::complex* out) { const int block = (npwk + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(HIP_KERNEL_NAME(set_3d_fft_box), dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, - npwk, - box_index, - reinterpret_cast*>(in), - reinterpret_cast*>(out)); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (set_3d_fft_box), + dim3 (block), + dim3 (THREADS_PER_BLOCK), + 0, + 0, + npwk, + box_index, + reinterpret_cast*> (in), + reinterpret_cast*> (out)); + + hipCheckOnDebug (); } template -void set_recip_to_real_output_op::operator()(const int nrxx, +void + set_recip_to_real_output_op::operator() (const int nrxx, const bool add, const FPTYPE factor, const std::complex* in, std::complex* out) { const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(HIP_KERNEL_NAME(set_recip_to_real_output), dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, - nrxx, - add, - factor, - reinterpret_cast*>(in), - reinterpret_cast*>(out)); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (set_recip_to_real_output), + dim3 (block), + dim3 (THREADS_PER_BLOCK), + 0, + 0, + nrxx, + add, + factor, + reinterpret_cast*> (in), + reinterpret_cast*> (out)); + + hipCheckOnDebug (); } template -void set_recip_to_real_output_op::operator()(const int nrxx, +void + set_recip_to_real_output_op::operator() (const int nrxx, const bool add, const FPTYPE factor, const std::complex* in, FPTYPE* out) { const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(HIP_KERNEL_NAME(set_recip_to_real_output), dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, - nrxx, - add, - factor, - reinterpret_cast*>(in), - reinterpret_cast(out)); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (set_recip_to_real_output), + dim3 (block), + dim3 (THREADS_PER_BLOCK), + 0, + 0, + nrxx, + add, + factor, + reinterpret_cast*> (in), + reinterpret_cast (out)); + + hipCheckOnDebug (); } template -void set_real_to_recip_output_op::operator()(const int npwk, +void + set_real_to_recip_output_op::operator() (const int npwk, const int nxyz, const bool add, const FPTYPE factor, @@ -162,20 +199,25 @@ void set_real_to_recip_output_op::operator()(co std::complex* out) { const int block = (npwk + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(HIP_KERNEL_NAME(set_real_to_recip_output), dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, - npwk, - nxyz, - add, - factor, - box_index, - reinterpret_cast*>(in), - reinterpret_cast*>(out)); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (set_real_to_recip_output), + dim3 (block), + dim3 (THREADS_PER_BLOCK), + 0, + 0, + npwk, + nxyz, + add, + factor, + box_index, + reinterpret_cast*> (in), + reinterpret_cast*> (out)); + + hipCheckOnDebug (); } template -void set_real_to_recip_output_op::operator()(const int npwk, +void + set_real_to_recip_output_op::operator() (const int npwk, const int nxyz, const bool add, const FPTYPE factor, @@ -184,16 +226,20 @@ void set_real_to_recip_output_op::operator()(co FPTYPE* out) { const int block = (npwk + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(HIP_KERNEL_NAME(set_real_to_recip_output), dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, - npwk, - nxyz, - add, - factor, - box_index, - reinterpret_cast*>(in), - reinterpret_cast(out)); - - hipCheckOnDebug(); + hipLaunchKernelGGL (HIP_KERNEL_NAME (set_real_to_recip_output), + dim3 (block), + dim3 (THREADS_PER_BLOCK), + 0, + 0, + npwk, + nxyz, + add, + factor, + box_index, + reinterpret_cast*> (in), + reinterpret_cast (out)); + + hipCheckOnDebug (); } template struct set_3d_fft_box_op; @@ -204,4 +250,4 @@ template struct set_3d_fft_box_op; template struct set_recip_to_real_output_op; template struct set_real_to_recip_output_op; -} // namespace ModulePW +} // namespace ModulePW diff --git a/source/source_basis/module_pw/kernels/test/pw_op_test.cpp b/source/source_basis/module_pw/kernels/test/pw_op_test.cpp index 3e16f57e639..4bee4df220b 100644 --- a/source/source_basis/module_pw/kernels/test/pw_op_test.cpp +++ b/source/source_basis/module_pw/kernels/test/pw_op_test.cpp @@ -8,20 +8,6971 @@ class TestModulePWPWMultiDevice : public ::testing::Test { protected: - const int npwk = 59; const int nxyz = 1728; const bool add = false; const double factor = 1.0; - const std::vector box_index = {10, 11, 0, 1, 2, 23, 12, 13, 14, 24, 25, 131, 120, 142, 143, 132, 133, 155, 144, 145, 146, 167, 156, 157, 158, 168, 169, 170, 287, 276, 277, 288, 289, 300, 301, 302, 313, 314, 1451, 1440, 1570, 1571, 1582, 1583, 1572, 1594, 1595, 1584, 1585, 1607, 1596, 1597, 1714, 1715, 1704, 1726, 1727, 1716, 1717}; - const std::vector > in_1 = {{-0.0157932, -0}, {0.140385, 0}, {1.15637, 0}, {0.140385, 0}, {-0.0157932, -0}, {-0.00612284, -0}, {0.140385, 0}, {0.0746255, 0}, {-0.0151924, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {0.0746255, 0}, {0.140385, 0}, {-0.00612284, -0}, {-0.00612284, -0}, {0.140385, 0}, {0.0746255, 0}, {-0.0151924, -0}, {-0.0151924, -0}, {0.0746255, 0}, {0.140385, 0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {-0.0157932, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {0.0746255, 0}, {0.140385, 0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {0.140385, 0}, {0.0746255, 0}, {-0.0151924, -0}}; - const std::vector > out_1 = {{1.15637, 0}, {0.140385, 0}, {-0.0157932, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0157932, -0}, {0.140385, 0}, {0.140385, 0}, {0.0746255, 0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.00612284, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0157932, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {0.140385, 0}, {-0.00612284, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {0.0746255, 0}, {0.140385, 0}, {0.0746255, 0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.00612284, -0}, {0.0746255, 0}, {0.140385, 0}, {-0.00612284, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.00612284, -0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {-0.0157932, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0157932, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0157932, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {-0.00612284, -0}, {0.140385, 0}, {-0.00612284, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {0.0746255, 0}, {-0.00612284, -0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.0151924, -0}, {-0.00612284, -0}, {0.0746255, 0}, {-0.0151924, -0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {-0.00612284, -0}, {0.140385, 0}}; - const std::vector > in_2 = {{2.16277, 0}, {2.17701, 0}, {2.14383, 0}, {1.92793, 0}, {1.51507, 0}, {1.08798, -2.4037e-17}, {0.905261, 0}, {1.08798, 0}, {1.51507, 0}, {1.92793, 2.4037e-17}, {2.14383, 0}, {2.17701, 0}, {2.17701, 0}, {2.16623, 5.55112e-17}, {2.03322, 0}, {1.69928, 0}, {1.25388, 0}, {0.930725, -2.77556e-17}, {0.930725, 0}, {1.25388, 0}, {1.69928, 0}, {2.03322, -2.77556e-17}, {2.16623, 0}, {2.17701, 0}, {2.14383, 0}, {2.03322, 0}, {1.76522, 0}, {1.36223, 0}, {0.981652, 0}, {0.824264, 0}, {0.981652, 0}, {1.36223, 0}, {1.76522, 0}, {2.03322, 0}, {2.14383, 0}, {2.16623, 0}, {1.92793, 0}, {1.69928, 0}, {1.36223, 0}, {1.00712, 0}, {0.775362, 0}, {0.775362, 1.20185e-17}, {1.00712, 0}, {1.36223, 0}, {1.69928, 0}, {1.92793, -1.20185e-17}, {2.03322, 0}, {2.03322, 0}, {1.51507, 0}, {1.25388, 0}, {0.981652, 0}, {0.775362, 0}, {0.698375, 0}, {0.775362, -4.50694e-18}, {0.981652, 0}, {1.25388, 0}, {1.51507, 0}, {1.69928, 4.50694e-18}, {1.76522, 0}, {1.69928, 0}, {1.08798, 0}, {0.930725, -2.77556e-17}, {0.824264, 0}, {0.775362, 0}, {0.775362, 0}, {0.824264, 1.38778e-17}, {0.930725, 0}, {1.08798, 0}, {1.25388, 0}, {1.36223, 1.38778e-17}, {1.36223, 0}, {1.25388, 0}, {0.905261, 0}, {0.930725, 0}, {0.981652, 0}, {1.00712, 0}, {0.981652, 0}, {0.930725, 0}, {0.905261, 0}, {0.930725, 0}, {0.981652, 0}, {1.00712, 0}, {0.981652, 0}, {0.930725, 0}, {1.08798, 0}, {1.25388, 0}, {1.36223, 0}, {1.36223, 0}, {1.25388, 0}, {1.08798, 0}, {0.930725, 0}, {0.824264, 0}, {0.775362, 0}, {0.775362, 0}, {0.824264, 0}, {0.930725, 0}, {1.51507, 0}, {1.69928, -2.77556e-17}, {1.76522, 0}, {1.69928, 0}, {1.51507, 0}, {1.25388, 1.38778e-17}, {0.981652, 0}, {0.775362, 0}, {0.698375, 0}, {0.775362, 1.38778e-17}, {0.981652, 0}, {1.25388, 0}, {1.92793, 0}, {2.03322, 0}, {2.03322, 0}, {1.92793, 0}, {1.69928, 0}, {1.36223, 0}, {1.00712, 0}, {0.775362, 0}, {0.775362, 0}, {1.00712, 0}, {1.36223, 0}, {1.69928, 0}, {2.14383, 0}, {2.16623, -3.54439e-19}, {2.14383, 0}, {2.03322, 0}, {1.76522, 0}, {1.36223, 1.77219e-19}, {0.981652, 0}, {0.824264, 0}, {0.981652, 0}, {1.36223, 1.77219e-19}, {1.76522, 0}, {2.03322, 0}, {2.17701, 0}, {2.17701, 0}, {2.16623, 0}, {2.03322, 0}, {1.69928, 0}, {1.25388, 0}, {0.930725, 0}, {0.930725, 0}, {1.25388, 0}, {1.69928, 0}, {2.03322, 0}, {2.16623, 0}, {2.17701, -6.93889e-18}, {2.16623, -6.93889e-18}, {2.03322, -6.93889e-18}, {1.69928, -6.93889e-18}, {1.25388, -6.93889e-18}, {0.930725, -3.09759e-17}, {0.930725, -6.93889e-18}, {1.25388, -6.93889e-18}, {1.69928, -6.93889e-18}, {2.03322, 1.70981e-17}, {2.16623, -6.93889e-18}, {2.17701, -6.93889e-18}, {2.16623, 0}, {2.07555, 0}, {1.82256, 0}, {1.42048, 0}, {1.03162, 0}, {0.869317, -2.4037e-17}, {1.03162, 0}, {1.42048, 0}, {1.82256, 0}, {2.07555, 2.4037e-17}, {2.16623, 0}, {2.17945, 0}, {2.03322, 0}, {1.82256, -5.55112e-17}, {1.48405, 0}, {1.1084, 0}, {0.85665, 0}, {0.85665, 2.77556e-17}, {1.1084, 0}, {1.48405, 0}, {1.82256, 0}, {2.03322, 2.77556e-17}, {2.11997, 0}, {2.11997, 0}, {1.69928, 0}, {1.42048, 0}, {1.1084, 0}, {0.858837, 0}, {0.763031, 0}, {0.858837, 0}, {1.1084, 0}, {1.42048, 0}, {1.69928, 0}, {1.88226, 0}, {1.94478, 0}, {1.88226, 0}, {1.25388, 3.46945e-18}, {1.03162, -2.42861e-17}, {0.85665, 3.46945e-18}, {0.763031, 3.46945e-18}, {0.763031, 3.46945e-18}, {0.85665, 1.73472e-17}, {1.03162, 3.46945e-18}, {1.25388, 3.46945e-18}, {1.46388, 3.46945e-18}, {1.59252, 1.73472e-17}, {1.59252, 3.46945e-18}, {1.46388, 3.46945e-18}, {0.930725, -2.4037e-17}, {0.869317, -3.79148e-17}, {0.85665, -2.4037e-17}, {0.858837, -1.7563e-17}, {0.85665, -2.4037e-17}, {0.869317, -1.70981e-17}, {0.930725, -2.4037e-17}, {1.04306, -2.72741e-17}, {1.15758, -2.4037e-17}, {1.20632, -1.70981e-17}, {1.15758, -2.4037e-17}, {1.04306, -2.72741e-17}, {0.930725, 0}, {1.03162, 0}, {1.1084, 0}, {1.1084, 0}, {1.03162, 0}, {0.930725, 0}, {0.864823, 0}, {0.851575, 0}, {0.862449, 0}, {0.862449, 0}, {0.851575, 0}, {0.864823, 0}, {1.25388, -2.47509e-17}, {1.42048, -3.86287e-17}, {1.48405, -1.08732e-17}, {1.42048, 3.00463e-18}, {1.25388, 1.68824e-17}, {1.04306, 4.78583e-17}, {0.851575, 3.39806e-17}, {0.72366, 2.70417e-17}, {0.679431, 1.68824e-17}, {0.72366, -2.15723e-19}, {0.851575, -1.40935e-17}, {1.04306, -2.10324e-17}, {1.69928, 3.46945e-18}, {1.82256, 1.73472e-17}, {1.82256, 3.46945e-18}, {1.69928, 3.46945e-18}, {1.46388, 3.46945e-18}, {1.15758, -3.46945e-18}, {0.862449, 3.46945e-18}, {0.679431, 3.46945e-18}, {0.679431, 3.46945e-18}, {0.862449, -3.46945e-18}, {1.15758, 3.46945e-18}, {1.46388, 3.46945e-18}, {2.03322, 2.4037e-17}, {2.07555, 2.5603e-17}, {2.03322, 2.4037e-17}, {1.88226, 2.4037e-17}, {1.59252, -2.4037e-17}, {1.20632, 2.32541e-17}, {0.862449, 2.4037e-17}, {0.72366, 2.4037e-17}, {0.862449, 7.21111e-17}, {1.20632, 2.32541e-17}, {1.59252, 2.4037e-17}, {1.88226, 2.4037e-17}, {2.16623, 0}, {2.16623, 0}, {2.11997, 0}, {1.94478, 0}, {1.59252, 0}, {1.15758, -4.80741e-17}, {0.851575, 0}, {0.851575, 0}, {1.15758, 0}, {1.59252, 4.80741e-17}, {1.94478, 0}, {2.11997, 0}, {2.17701, -1.68824e-17}, {2.17945, -3.07602e-17}, {2.11997, -3.00463e-18}, {1.88226, -3.00463e-18}, {1.46388, 3.93426e-18}, {1.04306, -1.31639e-17}, {0.864823, -3.00463e-18}, {1.04306, 9.01389e-18}, {1.46388, 3.93426e-18}, {1.88226, 3.49102e-17}, {2.11997, -3.00463e-18}, {2.17945, -1.50231e-17}, {2.14383, 0}, {2.03322, 0}, {1.76522, 0}, {1.36223, 0}, {0.981652, 0}, {0.824264, 2.4037e-17}, {0.981652, 0}, {1.36223, 0}, {1.76522, 0}, {2.03322, -2.4037e-17}, {2.14383, 0}, {2.16623, 0}, {2.03322, -2.77556e-17}, {1.82256, -2.77556e-17}, {1.48405, -2.77556e-17}, {1.1084, -2.77556e-17}, {0.85665, -2.77556e-17}, {0.85665, -3.71854e-18}, {1.1084, -2.77556e-17}, {1.48405, -2.77556e-17}, {1.82256, -2.77556e-17}, {2.03322, -5.17926e-17}, {2.11997, -2.77556e-17}, {2.11997, -2.77556e-17}, {1.76522, 0}, {1.48405, 0}, {1.16145, 0}, {0.899033, 0}, {0.797429, 0}, {0.899033, 9.01389e-18}, {1.16145, 0}, {1.48405, 0}, {1.76522, 0}, {1.94478, -9.01389e-18}, {2.00496, 0}, {1.94478, 0}, {1.36223, 0}, {1.1084, -2.77556e-17}, {0.899033, 0}, {0.782576, 0}, {0.782576, 0}, {0.899033, 1.38778e-17}, {1.1084, 0}, {1.36223, 0}, {1.59252, 0}, {1.7299, 1.38778e-17}, {1.7299, 0}, {1.59252, 0}, {0.981652, 0}, {0.85665, 2.77556e-17}, {0.797429, 0}, {0.782576, 0}, {0.797429, 0}, {0.85665, -1.38778e-17}, {0.981652, 0}, {1.15758, 0}, {1.31866, 0}, {1.38444, -1.38778e-17}, {1.31866, 0}, {1.15758, 0}, {0.824264, 1.38778e-17}, {0.85665, 1.38778e-17}, {0.899033, 1.73472e-17}, {0.899033, 6.93889e-18}, {0.85665, 1.38778e-17}, {0.824264, 1.68824e-17}, {0.851575, 1.77767e-17}, {0.931265, 2.03519e-17}, {1.00096, 1.38778e-17}, {1.00096, 1.08732e-17}, {0.931265, 6.50938e-18}, {0.851575, 1.43426e-17}, {0.981652, 0}, {1.1084, 0}, {1.16145, 0}, {1.1084, 0}, {0.981652, 0}, {0.851575, 0}, {0.771224, 0}, {0.743927, 0}, {0.740598, 0}, {0.743927, 0}, {0.771224, 0}, {0.851575, 0}, {1.36223, 0}, {1.48405, -6.93889e-18}, {1.48405, 0}, {1.36223, 0}, {1.15758, 0}, {0.931265, 3.46945e-18}, {0.743927, 0}, {0.639427, 0}, {0.639427, 0}, {0.743927, 3.46945e-18}, {0.931265, 0}, {1.15758, 0}, {1.76522, 0}, {1.82256, 2.93165e-18}, {1.76522, 0}, {1.59252, 0}, {1.31866, 0}, {1.00096, -4.95399e-17}, {0.740598, 0}, {0.639427, 0}, {0.740598, 0}, {1.00096, 4.66082e-17}, {1.31866, 0}, {1.59252, 0}, {2.03322, 1.38778e-17}, {2.03322, 1.38778e-17}, {1.94478, 1.38778e-17}, {1.7299, 1.38778e-17}, {1.38444, 1.38778e-17}, {1.00096, 1.38778e-17}, {0.743927, 1.38778e-17}, {0.743927, 1.38778e-17}, {1.00096, 1.38778e-17}, {1.38444, 1.38778e-17}, {1.7299, 1.38778e-17}, {1.94478, 1.38778e-17}, {2.14383, 0}, {2.11997, 0}, {2.00496, 0}, {1.7299, 0}, {1.31866, 0}, {0.931265, 4.80741e-17}, {0.771224, 0}, {0.931265, 0}, {1.31866, 0}, {1.7299, -4.80741e-17}, {2.00496, 0}, {2.11997, 0}, {2.16623, 0}, {2.11997, 0}, {1.94478, 0}, {1.59252, 0}, {1.15758, 0}, {0.851575, 0}, {0.851575, 0}, {1.15758, 0}, {1.59252, 0}, {1.94478, 0}, {2.11997, 0}, {2.16623, 0}, {1.92793, 0}, {1.69928, 0}, {1.36223, 0}, {1.00712, 0}, {0.775362, 0}, {0.775362, 0}, {1.00712, 0}, {1.36223, 0}, {1.69928, 0}, {1.92793, 0}, {2.03322, 0}, {2.03322, 0}, {1.69928, -2.77556e-17}, {1.42048, -2.77556e-17}, {1.1084, -2.77556e-17}, {0.858837, -2.77556e-17}, {0.763031, -2.77556e-17}, {0.858837, -2.77556e-17}, {1.1084, -2.77556e-17}, {1.42048, -2.77556e-17}, {1.69928, -2.77556e-17}, {1.88226, -2.77556e-17}, {1.94478, -2.77556e-17}, {1.88226, -2.77556e-17}, {1.36223, 0}, {1.1084, -2.77556e-17}, {0.899033, 0}, {0.782576, 0}, {0.782576, 0}, {0.899033, 1.38778e-17}, {1.1084, 0}, {1.36223, 0}, {1.59252, 0}, {1.7299, 1.38778e-17}, {1.7299, 0}, {1.59252, 0}, {1.00712, 0}, {0.858837, 0}, {0.782576, 0}, {0.761485, 0}, {0.782576, 0}, {0.858837, 0}, {1.00712, 0}, {1.20632, 0}, {1.38444, 0}, {1.45645, 0}, {1.38444, 0}, {1.20632, 0}, {0.775362, 0}, {0.763031, 0}, {0.782576, 0}, {0.782576, 0}, {0.763031, 0}, {0.775362, 0}, {0.862449, 0}, {1.00096, 0}, {1.10759, 0}, {1.10759, 0}, {1.00096, 0}, {0.862449, 0}, {0.775362, 5.32872e-18}, {0.858837, 8.79816e-18}, {0.899033, 1.85927e-18}, {0.858837, 1.85927e-18}, {0.775362, 1.24548e-19}, {0.72366, -1.61018e-18}, {0.743927, 1.85927e-18}, {0.804389, -4.14999e-18}, {0.836162, 1.24548e-19}, {0.804389, -1.61018e-18}, {0.743927, 1.85927e-18}, {0.72366, 7.86853e-18}, {1.00712, 0}, {1.1084, 6.93889e-18}, {1.1084, 0}, {1.00712, 0}, {0.862449, 0}, {0.743927, -3.46945e-18}, {0.683306, 0}, {0.666068, 0}, {0.666068, 0}, {0.683306, -3.46945e-18}, {0.743927, 0}, {0.862449, 0}, {1.36223, 0}, {1.42048, -2.16167e-19}, {1.36223, 0}, {1.20632, 0}, {1.00096, 0}, {0.804389, 1.08084e-19}, {0.666068, 0}, {0.616617, 0}, {0.666068, 0}, {0.804389, 1.08084e-19}, {1.00096, 0}, {1.20632, 0}, {1.69928, 0}, {1.69928, 0}, {1.59252, 0}, {1.38444, 0}, {1.10759, 0}, {0.836162, 0}, {0.666068, 0}, {0.666068, 0}, {0.836162, 0}, {1.10759, 0}, {1.38444, 0}, {1.59252, 0}, {1.92793, 2.13894e-17}, {1.88226, 2.58963e-17}, {1.7299, 2.58963e-17}, {1.45645, 2.58963e-17}, {1.10759, 2.81498e-17}, {0.804389, 2.58963e-17}, {0.683306, 2.58963e-17}, {0.804389, 3.19056e-17}, {1.10759, 2.81498e-17}, {1.45645, 2.58963e-17}, {1.7299, 2.58963e-17}, {1.88226, 1.9887e-17}, {2.03322, 0}, {1.94478, 0}, {1.7299, 0}, {1.38444, 0}, {1.00096, 0}, {0.743927, 2.4037e-17}, {0.743927, 0}, {1.00096, 0}, {1.38444, 0}, {1.7299, -2.4037e-17}, {1.94478, 0}, {2.03322, 0}, {2.03322, 0}, {1.88226, -5.55112e-17}, {1.59252, 0}, {1.20632, 0}, {0.862449, 0}, {0.72366, 2.77556e-17}, {0.862449, 0}, {1.20632, 0}, {1.59252, 0}, {1.88226, 2.77556e-17}, {2.03322, 0}, {2.07555, 0}, {1.51507, 0}, {1.25388, -5.55112e-17}, {0.981652, 0}, {0.775362, 0}, {0.698375, 0}, {0.775362, 3.22625e-17}, {0.981652, 0}, {1.25388, 0}, {1.51507, 0}, {1.69928, 2.32486e-17}, {1.76522, 0}, {1.69928, 0}, {1.25388, 0}, {1.03162, 2.77556e-17}, {0.85665, 0}, {0.763031, 0}, {0.763031, 0}, {0.85665, -1.38778e-17}, {1.03162, 0}, {1.25388, 0}, {1.46388, 0}, {1.59252, -1.38778e-17}, {1.59252, 0}, {1.46388, 0}, {0.981652, 0}, {0.85665, 0}, {0.797429, 0}, {0.782576, 0}, {0.797429, 0}, {0.85665, 0}, {0.981652, 0}, {1.15758, 0}, {1.31866, 0}, {1.38444, 0}, {1.31866, 0}, {1.15758, 0}, {0.775362, 0}, {0.763031, 0}, {0.782576, 0}, {0.782576, 0}, {0.763031, 0}, {0.775362, 0}, {0.862449, 0}, {1.00096, 0}, {1.10759, 0}, {1.10759, 0}, {1.00096, 0}, {0.862449, 0}, {0.698375, 0}, {0.763031, 0}, {0.797429, 0}, {0.763031, 0}, {0.698375, 0}, {0.679431, 7.51157e-19}, {0.740598, 0}, {0.836162, 0}, {0.881874, 0}, {0.836162, -7.51157e-19}, {0.740598, 0}, {0.679431, 0}, {0.775362, 0}, {0.85665, -8.67362e-19}, {0.85665, 0}, {0.775362, 0}, {0.679431, 0}, {0.639427, 4.33681e-19}, {0.666068, 0}, {0.707352, 0}, {0.707352, 0}, {0.666068, 4.33681e-19}, {0.639427, 0}, {0.679431, 0}, {0.981652, 0}, {1.03162, 1.40862e-18}, {0.981652, 0}, {0.862449, 0}, {0.740598, 0}, {0.666068, 1.13142e-17}, {0.641509, 0}, {0.63886, 0}, {0.641509, 0}, {0.666068, -1.27228e-17}, {0.740598, 0}, {0.862449, 0}, {1.25388, 0}, {1.25388, 0}, {1.15758, 0}, {1.00096, 0}, {0.836162, 0}, {0.707352, -2.4037e-17}, {0.63886, 0}, {0.63886, 0}, {0.707352, 0}, {0.836162, 2.4037e-17}, {1.00096, 0}, {1.15758, 0}, {1.51507, 0}, {1.46388, 0}, {1.31866, 0}, {1.10759, 0}, {0.881874, 0}, {0.707352, 0}, {0.641509, 0}, {0.707352, 0}, {0.881874, 0}, {1.10759, 0}, {1.31866, 0}, {1.46388, 0}, {1.69928, 0}, {1.59252, 0}, {1.38444, 0}, {1.10759, 0}, {0.836162, 0}, {0.666068, 0}, {0.666068, 0}, {0.836162, 0}, {1.10759, 0}, {1.38444, 0}, {1.59252, 0}, {1.69928, 0}, {1.76522, 0}, {1.59252, 0}, {1.31866, 0}, {1.00096, 0}, {0.740598, 0}, {0.639427, 0}, {0.740598, 0}, {1.00096, 0}, {1.31866, 0}, {1.59252, 0}, {1.76522, 0}, {1.82256, 0}, {1.69928, 0}, {1.46388, 0}, {1.15758, 0}, {0.862449, 0}, {0.679431, 0}, {0.679431, 1.20185e-17}, {0.862449, 0}, {1.15758, 0}, {1.46388, 0}, {1.69928, -1.20185e-17}, {1.82256, 0}, {1.82256, 0}, {1.08798, 1.85927e-18}, {0.930725, 1.85927e-18}, {0.824264, 1.85927e-18}, {0.775362, 1.85927e-18}, {0.775362, 1.85927e-18}, {0.824264, 1.85927e-18}, {0.930725, 1.85927e-18}, {1.08798, 1.85927e-18}, {1.25388, 1.85927e-18}, {1.36223, 1.85927e-18}, {1.36223, 1.85927e-18}, {1.25388, 1.85927e-18}, {0.930725, 1.85927e-18}, {0.869317, 1.85927e-18}, {0.85665, 1.57371e-17}, {0.858837, 1.66044e-17}, {0.85665, 1.08732e-17}, {0.869317, 1.38778e-17}, {0.930725, 6.93889e-18}, {1.04306, -5.5133e-18}, {1.15758, -7.15462e-18}, {1.20632, -1.01592e-17}, {1.15758, -1.70981e-17}, {1.04306, -5.5133e-18}, {0.824264, -1.20185e-17}, {0.85665, -5.07962e-18}, {0.899033, -8.54907e-18}, {0.899033, -1.20185e-17}, {0.85665, -1.20185e-17}, {0.824264, -2.14972e-17}, {0.851575, -1.90113e-17}, {0.931265, -1.80278e-17}, {1.00096, -1.20185e-17}, {1.00096, -9.47871e-18}, {0.931265, -8.49514e-18}, {0.851575, -6.00926e-18}, {0.775362, -5.07962e-18}, {0.858837, -5.07962e-18}, {0.899033, -5.07962e-18}, {0.858837, -1.20185e-17}, {0.775362, -1.5488e-17}, {0.72366, -1.5488e-17}, {0.743927, -2.75065e-17}, {0.804389, -1.80278e-17}, {0.836162, -1.5488e-17}, {0.804389, -1.5488e-17}, {0.743927, -3.46945e-18}, {0.72366, -6.00926e-18}, {0.775362, -1.20185e-17}, {0.85665, -1.28859e-17}, {0.85665, -1.89574e-17}, {0.775362, -1.89574e-17}, {0.679431, -2.24269e-17}, {0.639427, -3.40117e-17}, {0.666068, -1.89574e-17}, {0.707352, -2.49667e-17}, {0.707352, -2.24269e-17}, {0.666068, -9.97466e-18}, {0.639427, -1.89574e-17}, {0.679431, -1.29482e-17}, {0.824264, -1.10889e-17}, {0.869317, -1.71739e-17}, {0.824264, -1.45583e-17}, {0.72366, -2.14972e-17}, {0.639427, -3.19056e-17}, {0.616617, -5.29001e-17}, {0.63886, -4.21894e-17}, {0.655231, -4.32269e-17}, {0.63886, -3.19056e-17}, {0.616617, -4.82604e-18}, {0.639427, -1.81523e-17}, {0.72366, -1.01759e-17}, {0.930725, -1.20185e-17}, {0.930725, -8.54907e-18}, {0.851575, -1.20185e-17}, {0.743927, -1.20185e-17}, {0.666068, -1.20185e-17}, {0.63886, -1.37532e-17}, {0.640151, -1.20185e-17}, {0.640151, -1.20185e-17}, {0.63886, -1.20185e-17}, {0.666068, -1.37532e-17}, {0.743927, -1.20185e-17}, {0.851575, -1.20185e-17}, {1.08798, 1.85927e-18}, {1.04306, 1.85927e-18}, {0.931265, -1.20185e-17}, {0.804389, -1.20185e-17}, {0.707352, -1.89574e-17}, {0.655231, -1.89574e-17}, {0.640151, -2.4037e-17}, {0.655231, -2.4037e-17}, {0.707352, -1.89574e-17}, {0.804389, -1.89574e-17}, {0.931265, 0}, {1.04306, 0}, {1.25388, -1.20185e-17}, {1.15758, -1.89574e-17}, {1.00096, -1.89574e-17}, {0.836162, -1.89574e-17}, {0.707352, -2.24269e-17}, {0.63886, -6.93889e-18}, {0.63886, -2.49667e-17}, {0.707352, -2.49667e-17}, {0.836162, -2.24269e-17}, {1.00096, -3.09759e-17}, {1.15758, -1.29482e-17}, {1.25388, -1.29482e-17}, {1.36223, -1.29482e-17}, {1.20632, -1.29482e-17}, {1.00096, -4.07037e-17}, {0.804389, -1.29482e-17}, {0.666068, 1.10889e-17}, {0.616617, -1.29482e-17}, {0.666068, -1.10889e-17}, {0.804389, -1.29482e-17}, {1.00096, -3.69852e-17}, {1.20632, -1.29482e-17}, {1.36223, 1.29482e-17}, {1.42048, -1.29482e-17}, {1.36223, -1.20185e-17}, {1.15758, -1.20185e-17}, {0.931265, -1.20185e-17}, {0.743927, -1.20185e-17}, {0.639427, -1.20185e-17}, {0.639427, -1.20185e-17}, {0.743927, -1.20185e-17}, {0.931265, -1.20185e-17}, {1.15758, -1.20185e-17}, {1.36223, -1.20185e-17}, {1.48405, -1.20185e-17}, {1.48405, -1.20185e-17}, {1.25388, 1.57371e-17}, {1.04306, 1.57371e-17}, {0.851575, -1.20185e-17}, {0.72366, -1.20185e-17}, {0.679431, -2.58963e-17}, {0.72366, -4.09195e-17}, {0.851575, -2.4037e-17}, {1.04306, -3.60556e-17}, {1.25388, -2.58963e-17}, {1.42048, -1.08732e-17}, {1.48405, 0}, {1.42048, 1.20185e-17}, {0.905261, 0}, {0.930725, 3.46945e-18}, {0.981652, 0}, {1.00712, 0}, {0.981652, 0}, {0.930725, -1.73472e-18}, {0.905261, 0}, {0.930725, 0}, {0.981652, 0}, {1.00712, -1.73472e-18}, {0.981652, 0}, {0.930725, 0}, {0.930725, 0}, {1.03162, 0}, {1.1084, 0}, {1.1084, 1.73472e-18}, {1.03162, 0}, {0.930725, -6.00926e-18}, {0.864823, 0}, {0.851575, -8.67362e-19}, {0.862449, 0}, {0.862449, 6.00926e-18}, {0.851575, 0}, {0.864823, -8.67362e-19}, {0.981652, 0}, {1.1084, 3.46945e-18}, {1.16145, 0}, {1.1084, 0}, {0.981652, 0}, {0.851575, -1.73472e-18}, {0.771224, 0}, {0.743927, 0}, {0.740598, 0}, {0.743927, -1.73472e-18}, {0.771224, 0}, {0.851575, 0}, {1.00712, 0}, {1.1084, 6.93889e-18}, {1.1084, 0}, {1.00712, 0}, {0.862449, 0}, {0.743927, -3.46945e-18}, {0.683306, 0}, {0.666068, 0}, {0.666068, 0}, {0.683306, -3.46945e-18}, {0.743927, 0}, {0.862449, 0}, {0.981652, 0}, {1.03162, -1.38028e-18}, {0.981652, 0}, {0.862449, 0}, {0.740598, 0}, {0.666068, 6.90141e-19}, {0.641509, 0}, {0.63886, 0}, {0.641509, 0}, {0.666068, 6.90141e-19}, {0.740598, 0}, {0.862449, 0}, {0.930725, -8.67362e-18}, {0.930725, -8.67362e-18}, {0.851575, -6.93889e-18}, {0.743927, -6.93889e-18}, {0.666068, 4.33681e-18}, {0.63886, 1.63553e-17}, {0.640151, 9.47871e-18}, {0.640151, 1.24833e-17}, {0.63886, 4.33681e-18}, {0.666068, -7.68171e-18}, {0.743927, -2.53981e-18}, {0.851575, -5.54444e-18}, {0.905261, 0}, {0.864823, -1.73472e-18}, {0.771224, 0}, {0.683306, 0}, {0.641509, 0}, {0.640151, 8.67362e-19}, {0.645833, 0}, {0.640151, 0}, {0.641509, 0}, {0.683306, 8.67362e-19}, {0.771224, 0}, {0.864823, 0}, {0.930725, 0}, {0.851575, 0}, {0.743927, 0}, {0.666068, 0}, {0.63886, 0}, {0.640151, 0}, {0.640151, 0}, {0.63886, 0}, {0.666068, 0}, {0.743927, 0}, {0.851575, 0}, {0.930725, 0}, {0.981652, 0}, {0.862449, 0}, {0.740598, 0}, {0.666068, 0}, {0.641509, 0}, {0.63886, 0}, {0.641509, 0}, {0.666068, 0}, {0.740598, 0}, {0.862449, 0}, {0.981652, 0}, {1.03162, 0}, {1.00712, 1.38778e-17}, {0.862449, 1.38778e-17}, {0.743927, 0}, {0.683306, 0}, {0.666068, -6.93889e-18}, {0.666068, -1.29482e-17}, {0.683306, -1.20185e-17}, {0.743927, -1.20185e-17}, {0.862449, -6.93889e-18}, {1.00712, -9.29636e-19}, {1.1084, 1.20185e-17}, {1.1084, 1.20185e-17}, {0.981652, 0}, {0.851575, 1.38778e-17}, {0.771224, 0}, {0.743927, 0}, {0.740598, 0}, {0.743927, -6.93889e-18}, {0.771224, 0}, {0.851575, 0}, {0.981652, 0}, {1.1084, -6.93889e-18}, {1.16145, 0}, {1.1084, 0}, {0.930725, 0}, {0.864823, 0}, {0.851575, 0}, {0.862449, 0}, {0.862449, 0}, {0.851575, 0}, {0.864823, 0}, {0.930725, 0}, {1.03162, 0}, {1.1084, 0}, {1.1084, 0}, {1.03162, 0}, {1.08798, 0}, {1.25388, 0}, {1.36223, 0}, {1.36223, 0}, {1.25388, 0}, {1.08798, 1.20185e-17}, {0.930725, 0}, {0.824264, 0}, {0.775362, 0}, {0.775362, -1.20185e-17}, {0.824264, 0}, {0.930725, 0}, {1.25388, 2.77556e-17}, {1.42048, -1.38778e-17}, {1.48405, -1.38778e-17}, {1.42048, 0}, {1.25388, -1.38778e-17}, {1.04306, 6.93889e-18}, {0.851575, 3.09759e-17}, {0.72366, 0}, {0.679431, -1.38778e-17}, {0.72366, 6.93889e-18}, {0.851575, -1.70981e-17}, {1.04306, 0}, {1.36223, 0}, {1.48405, -6.93889e-18}, {1.48405, 0}, {1.36223, 0}, {1.15758, 0}, {0.931265, 3.46945e-18}, {0.743927, 0}, {0.639427, 0}, {0.639427, 0}, {0.743927, 3.46945e-18}, {0.931265, 0}, {1.15758, 0}, {1.36223, 0}, {1.42048, -2.07544e-18}, {1.36223, 0}, {1.20632, 0}, {1.00096, 0}, {0.804389, 1.03772e-18}, {0.666068, 0}, {0.616617, 0}, {0.666068, 0}, {0.804389, 1.03772e-18}, {1.00096, 0}, {1.20632, 0}, {1.25388, 0}, {1.25388, 0}, {1.15758, 0}, {1.00096, 0}, {0.836162, 0}, {0.707352, 0}, {0.63886, 0}, {0.63886, 0}, {0.707352, 0}, {0.836162, 0}, {1.00096, 0}, {1.15758, 0}, {1.08798, -4.64818e-19}, {1.04306, 0}, {0.931265, 0}, {0.804389, 0}, {0.707352, 2.32409e-19}, {0.655231, 0}, {0.640151, -6.00926e-18}, {0.655231, -4.50694e-18}, {0.707352, 2.32409e-19}, {0.804389, 0}, {0.931265, 6.00926e-18}, {1.04306, 4.50694e-18}, {0.930725, 0}, {0.851575, 0}, {0.743927, 0}, {0.666068, 0}, {0.63886, 0}, {0.640151, 0}, {0.640151, 0}, {0.63886, 0}, {0.666068, 0}, {0.743927, 0}, {0.851575, 0}, {0.930725, 0}, {0.824264, 0}, {0.72366, 0}, {0.639427, 0}, {0.616617, 0}, {0.63886, 0}, {0.655231, -1.50231e-18}, {0.63886, 0}, {0.616617, 0}, {0.639427, 0}, {0.72366, 1.50231e-18}, {0.824264, 0}, {0.869317, 0}, {0.775362, 0}, {0.679431, 0}, {0.639427, 0}, {0.666068, 0}, {0.707352, 0}, {0.707352, 0}, {0.666068, 0}, {0.639427, 0}, {0.679431, 0}, {0.775362, 0}, {0.85665, 0}, {0.85665, 0}, {0.775362, -6.93889e-18}, {0.72366, -3.46945e-18}, {0.743927, 0}, {0.804389, 0}, {0.836162, 4.64818e-19}, {0.804389, 1.73472e-18}, {0.743927, 0}, {0.72366, 6.00926e-18}, {0.775362, 6.47408e-18}, {0.858837, 1.73472e-18}, {0.899033, 0}, {0.858837, -6.00926e-18}, {0.824264, 0}, {0.851575, 0}, {0.931265, 0}, {1.00096, 0}, {1.00096, 0}, {0.931265, 0}, {0.851575, 0}, {0.824264, 0}, {0.85665, 0}, {0.899033, 0}, {0.899033, 0}, {0.85665, 0}, {0.930725, 0}, {1.04306, 0}, {1.15758, 0}, {1.20632, 0}, {1.15758, 0}, {1.04306, 0}, {0.930725, 0}, {0.869317, 0}, {0.85665, 0}, {0.858837, 0}, {0.85665, 0}, {0.869317, 0}, {1.51507, 0}, {1.69928, 0}, {1.76522, 0}, {1.69928, 0}, {1.51507, 0}, {1.25388, -2.4037e-17}, {0.981652, 0}, {0.775362, 0}, {0.698375, 0}, {0.775362, 2.4037e-17}, {0.981652, 0}, {1.25388, 0}, {1.69928, 0}, {1.82256, -1.38778e-17}, {1.82256, 0}, {1.69928, 0}, {1.46388, 0}, {1.15758, -4.11352e-17}, {0.862449, 0}, {0.679431, 0}, {0.679431, 0}, {0.862449, 5.5013e-17}, {1.15758, 0}, {1.46388, 0}, {1.76522, 0}, {1.82256, 6.6502e-18}, {1.76522, 0}, {1.59252, 0}, {1.31866, 0}, {1.00096, -3.3251e-18}, {0.740598, 0}, {0.639427, 0}, {0.740598, 0}, {1.00096, -3.3251e-18}, {1.31866, 0}, {1.59252, 0}, {1.69928, 0}, {1.69928, 0}, {1.59252, 0}, {1.38444, 0}, {1.10759, 0}, {0.836162, 0}, {0.666068, 0}, {0.666068, 0}, {0.836162, 0}, {1.10759, 0}, {1.38444, 0}, {1.59252, 0}, {1.51507, 0}, {1.46388, 0}, {1.31866, 0}, {1.10759, 0}, {0.881874, 0}, {0.707352, 0}, {0.641509, 0}, {0.707352, 0}, {0.881874, 0}, {1.10759, 0}, {1.31866, 0}, {1.46388, 0}, {1.25388, 1.38778e-17}, {1.15758, 0}, {1.00096, 0}, {0.836162, 2.77556e-17}, {0.707352, -6.93889e-18}, {0.63886, -1.20185e-17}, {0.63886, -6.00926e-18}, {0.707352, -2.58963e-17}, {0.836162, -6.93889e-18}, {1.00096, 1.20185e-17}, {1.15758, 6.00926e-18}, {1.25388, -1.85927e-18}, {0.981652, 0}, {0.862449, 1.38778e-17}, {0.740598, 0}, {0.666068, 0}, {0.641509, 0}, {0.63886, -6.93889e-18}, {0.641509, 0}, {0.666068, 0}, {0.740598, 0}, {0.862449, -6.93889e-18}, {0.981652, 0}, {1.03162, 0}, {0.775362, 0}, {0.679431, 0}, {0.639427, 0}, {0.666068, 0}, {0.707352, 0}, {0.707352, 0}, {0.666068, 0}, {0.639427, 0}, {0.679431, 0}, {0.775362, 0}, {0.85665, 0}, {0.85665, 0}, {0.698375, 0}, {0.679431, 0}, {0.740598, 0}, {0.836162, 0}, {0.881874, 0}, {0.836162, -6.00926e-18}, {0.740598, 0}, {0.679431, 0}, {0.698375, 0}, {0.763031, 6.00926e-18}, {0.797429, 0}, {0.763031, 0}, {0.775362, 0}, {0.862449, 0}, {1.00096, 0}, {1.10759, 0}, {1.10759, 0}, {1.00096, 0}, {0.862449, 0}, {0.775362, 0}, {0.763031, 0}, {0.782576, 0}, {0.782576, 0}, {0.763031, 0}, {0.981652, 0}, {1.15758, 0}, {1.31866, 0}, {1.38444, 0}, {1.31866, 0}, {1.15758, 0}, {0.981652, 0}, {0.85665, 0}, {0.797429, 0}, {0.782576, 0}, {0.797429, 0}, {0.85665, 0}, {1.25388, 0}, {1.46388, 0}, {1.59252, 0}, {1.59252, 0}, {1.46388, 0}, {1.25388, 2.4037e-17}, {1.03162, 0}, {0.85665, 0}, {0.763031, 0}, {0.763031, -2.4037e-17}, {0.85665, 0}, {1.03162, 0}, {1.92793, 1.20185e-17}, {2.03322, 2.58963e-17}, {2.03322, 3.97741e-17}, {1.92793, 1.20185e-17}, {1.69928, 3.60556e-17}, {1.36223, 5.07962e-18}, {1.00712, -1.85927e-18}, {0.775362, 1.20185e-17}, {0.775362, -1.20185e-17}, {1.00712, 5.07962e-18}, {1.36223, -1.85927e-18}, {1.69928, 1.20185e-17}, {2.03322, 2.41616e-17}, {2.07555, 2.2009e-17}, {2.03322, 7.96727e-17}, {1.88226, 1.35184e-16}, {1.59252, 2.41616e-17}, {1.20632, 2.52379e-17}, {0.862449, 4.44801e-17}, {0.72366, -3.13496e-17}, {0.862449, 2.41616e-17}, {1.20632, 2.52379e-17}, {1.59252, -5.16681e-17}, {1.88226, -3.13496e-17}, {2.03322, 3.97741e-17}, {2.03322, 2.58963e-17}, {1.94478, 3.97741e-17}, {1.7299, 3.97741e-17}, {1.38444, 3.97741e-17}, {1.00096, 4.6713e-17}, {0.743927, 3.97741e-17}, {0.743927, 3.97741e-17}, {1.00096, 3.97741e-17}, {1.38444, 4.6713e-17}, {1.7299, 3.97741e-17}, {1.94478, 3.97741e-17}, {1.92793, 3.97741e-17}, {1.88226, 1.20185e-17}, {1.7299, 3.97741e-17}, {1.45645, 3.97741e-17}, {1.10759, 3.97741e-17}, {0.804389, 5.36519e-17}, {0.683306, 3.97741e-17}, {0.804389, 3.97741e-17}, {1.10759, 3.97741e-17}, {1.45645, 5.36519e-17}, {1.7299, 3.97741e-17}, {1.88226, 3.97741e-17}, {1.69928, 3.60556e-17}, {1.59252, 6.38111e-17}, {1.38444, 3.60556e-17}, {1.10759, 3.60556e-17}, {0.836162, 3.60556e-17}, {0.666068, 2.21778e-17}, {0.666068, 3.60556e-17}, {0.836162, 3.60556e-17}, {1.10759, 3.60556e-17}, {1.38444, 2.21778e-17}, {1.59252, 3.60556e-17}, {1.69928, 3.60556e-17}, {1.36223, 7.16174e-17}, {1.20632, 8.54952e-17}, {1.00096, 8.54952e-17}, {0.804389, 5.77396e-17}, {0.666068, 3.32044e-17}, {0.616617, 1.4247e-17}, {0.666068, -3.75829e-17}, {0.804389, -3.19678e-17}, {1.00096, -1.48697e-17}, {1.20632, -9.79007e-18}, {1.36223, 4.20398e-17}, {1.42048, 6.41803e-17}, {1.00712, 2.21778e-17}, {0.862449, 3.60556e-17}, {0.743927, 4.99333e-17}, {0.683306, 4.99333e-17}, {0.666068, 4.62148e-17}, {0.666068, 3.4769e-17}, {0.683306, 2.03185e-17}, {0.743927, 8.29997e-18}, {0.862449, -1.85927e-18}, {1.00712, -4.29122e-18}, {1.1084, -3.71854e-18}, {1.1084, 8.29997e-18}, {0.775362, -1.85927e-18}, {0.72366, 1.20185e-17}, {0.743927, 1.89574e-17}, {0.804389, 2.58963e-17}, {0.836162, 2.21778e-17}, {0.804389, 1.52389e-17}, {0.743927, -2.49095e-19}, {0.72366, -1.57371e-17}, {0.775362, -2.58963e-17}, {0.858837, -3.28352e-17}, {0.899033, -2.42861e-17}, {0.858837, -1.57371e-17}, {0.775362, -1.20185e-17}, {0.862449, 1.85927e-18}, {1.00096, 1.22676e-17}, {1.10759, 1.57371e-17}, {1.10759, 1.20185e-17}, {1.00096, 2.07499e-18}, {0.862449, -1.21431e-17}, {0.775362, -2.58963e-17}, {0.763031, -3.60556e-17}, {0.782576, -3.99898e-17}, {0.782576, -3.61801e-17}, {0.763031, -2.58963e-17}, {1.00712, -7.36012e-17}, {1.20632, -7.36012e-17}, {1.38444, -1.809e-17}, {1.45645, 1.14003e-17}, {1.38444, 3.67072e-17}, {1.20632, 5.77396e-17}, {1.00712, 2.9984e-17}, {0.858837, 1.52389e-17}, {0.782576, -1.73761e-17}, {0.761485, -3.84085e-17}, {0.782576, -6.61641e-17}, {0.858837, -8.09093e-17}, {1.36223, -2.58963e-17}, {1.59252, 1.85927e-18}, {1.7299, 1.85927e-18}, {1.7299, 1.85927e-18}, {1.59252, -1.85927e-18}, {1.36223, -1.57371e-17}, {1.1084, -1.57371e-17}, {0.899033, -3.97741e-17}, {0.782576, -4.99333e-17}, {0.782576, -6.38111e-17}, {0.899033, -6.38111e-17}, {1.1084, -3.97741e-17}, {1.69928, -1.85927e-18}, {1.88226, 2.58963e-17}, {1.94478, 1.20185e-17}, {1.88226, 2.58963e-17}, {1.69928, 2.21778e-17}, {1.42048, 8.29997e-18}, {1.1084, -8.79816e-18}, {0.858837, -1.57371e-17}, {0.763031, -2.58963e-17}, {0.858837, -3.97741e-17}, {1.1084, -8.79816e-18}, {1.42048, -1.57371e-17}, {2.14383, 0}, {2.16623, -3.54439e-19}, {2.14383, 0}, {2.03322, 0}, {1.76522, 0}, {1.36223, 1.77219e-19}, {0.981652, 0}, {0.824264, 0}, {0.981652, 0}, {1.36223, 1.77219e-19}, {1.76522, 0}, {2.03322, 0}, {2.16623, -6.93889e-18}, {2.16623, -6.93889e-18}, {2.11997, -6.93889e-18}, {1.94478, -6.93889e-18}, {1.59252, -6.93889e-18}, {1.15758, -6.93889e-18}, {0.851575, 1.70981e-17}, {0.851575, -6.93889e-18}, {1.15758, -6.93889e-18}, {1.59252, -6.93889e-18}, {1.94478, -3.09759e-17}, {2.11997, -6.93889e-18}, {2.14383, 0}, {2.11997, 0}, {2.00496, 0}, {1.7299, 0}, {1.31866, 0}, {0.931265, 4.80741e-17}, {0.771224, 0}, {0.931265, 0}, {1.31866, 0}, {1.7299, -4.80741e-17}, {2.00496, 0}, {2.11997, 0}, {2.03322, 0}, {1.94478, 0}, {1.7299, 0}, {1.38444, 0}, {1.00096, 0}, {0.743927, 0}, {0.743927, 0}, {1.00096, 0}, {1.38444, 0}, {1.7299, 0}, {1.94478, 0}, {2.03322, 0}, {1.76522, 0}, {1.59252, 5.55112e-17}, {1.31866, 0}, {1.00096, 0}, {0.740598, 0}, {0.639427, -2.77556e-17}, {0.740598, 0}, {1.00096, 0}, {1.31866, 0}, {1.59252, -2.77556e-17}, {1.76522, 0}, {1.82256, 0}, {1.36223, 3.46945e-18}, {1.15758, 3.1225e-17}, {0.931265, 3.1225e-17}, {0.743927, 3.1225e-17}, {0.639427, 2.75065e-17}, {0.639427, 1.36287e-17}, {0.743927, 1.61018e-18}, {0.931265, -1.04083e-17}, {1.15758, -2.05676e-17}, {1.36223, -3.44454e-17}, {1.48405, -2.24269e-17}, {1.48405, -1.04083e-17}, {0.981652, 0}, {0.851575, 1.38778e-17}, {0.771224, 0}, {0.743927, 0}, {0.740598, 0}, {0.743927, -6.93889e-18}, {0.771224, 0}, {0.851575, 0}, {0.981652, 0}, {1.1084, -6.93889e-18}, {1.16145, 0}, {1.1084, 0}, {0.824264, 0}, {0.851575, 0}, {0.931265, 0}, {1.00096, 0}, {1.00096, 0}, {0.931265, 0}, {0.851575, 0}, {0.824264, 0}, {0.85665, 0}, {0.899033, 0}, {0.899033, 0}, {0.85665, 0}, {0.981652, 0}, {1.15758, 0}, {1.31866, 0}, {1.38444, 0}, {1.31866, 0}, {1.15758, 0}, {0.981652, 0}, {0.85665, 0}, {0.797429, 0}, {0.782576, 0}, {0.797429, 0}, {0.85665, 0}, {1.36223, 3.1225e-17}, {1.59252, 3.1225e-17}, {1.7299, 1.73472e-17}, {1.7299, -3.46945e-18}, {1.59252, -1.04083e-17}, {1.36223, -1.04083e-17}, {1.1084, -2.75065e-17}, {0.899033, -1.70981e-17}, {0.782576, -1.04083e-17}, {0.782576, -1.04083e-17}, {0.899033, 2.05676e-17}, {1.1084, 3.09759e-17}, {1.76522, 0}, {1.94478, 2.77556e-17}, {2.00496, 0}, {1.94478, 0}, {1.76522, 0}, {1.48405, -1.38778e-17}, {1.16145, 0}, {0.899033, 0}, {0.797429, 0}, {0.899033, -1.38778e-17}, {1.16145, 0}, {1.48405, 0}, {2.03322, 0}, {2.11997, 1.38778e-17}, {2.11997, 0}, {2.03322, 0}, {1.82256, 0}, {1.48405, -6.93889e-18}, {1.1084, 0}, {0.85665, 0}, {0.85665, 0}, {1.1084, -6.93889e-18}, {1.48405, 0}, {1.82256, 0}, {2.17701, 0}, {2.17701, 0}, {2.16623, 0}, {2.03322, 0}, {1.69928, 0}, {1.25388, 0}, {0.930725, 0}, {0.930725, 0}, {1.25388, 0}, {1.69928, 0}, {2.03322, 0}, {2.16623, 0}, {2.17701, -5.20417e-18}, {2.17945, -2.77556e-17}, {2.11997, 0}, {1.88226, 0}, {1.46388, 2.60209e-18}, {1.04306, -1.01592e-17}, {0.864823, 0}, {1.04306, 1.20185e-17}, {1.46388, 2.60209e-18}, {1.88226, 3.79148e-17}, {2.11997, 0}, {2.17945, -1.20185e-17}, {2.16623, 0}, {2.11997, 0}, {1.94478, 0}, {1.59252, 0}, {1.15758, 0}, {0.851575, 0}, {0.851575, 0}, {1.15758, 0}, {1.59252, 0}, {1.94478, 0}, {2.11997, 0}, {2.16623, 0}, {2.03322, 0}, {1.88226, 0}, {1.59252, 0}, {1.20632, 0}, {0.862449, 0}, {0.72366, 2.4037e-17}, {0.862449, 0}, {1.20632, 0}, {1.59252, 0}, {1.88226, -2.4037e-17}, {2.03322, 0}, {2.07555, 0}, {1.69928, 0}, {1.46388, 0}, {1.15758, 0}, {0.862449, 0}, {0.679431, 0}, {0.679431, -1.20185e-17}, {0.862449, 0}, {1.15758, 0}, {1.46388, 0}, {1.69928, 1.20185e-17}, {1.82256, 0}, {1.82256, 0}, {1.25388, 0}, {1.04306, 0}, {0.851575, 0}, {0.72366, 0}, {0.679431, 0}, {0.72366, 1.50231e-18}, {0.851575, 0}, {1.04306, 0}, {1.25388, 0}, {1.42048, -1.50231e-18}, {1.48405, 0}, {1.42048, 0}, {0.930725, 0}, {0.864823, 0}, {0.851575, 0}, {0.862449, 0}, {0.862449, 0}, {0.851575, 0}, {0.864823, 0}, {0.930725, 0}, {1.03162, 0}, {1.1084, 0}, {1.1084, 0}, {1.03162, 0}, {0.930725, 0}, {1.04306, 0}, {1.15758, 0}, {1.20632, 0}, {1.15758, 0}, {1.04306, 0}, {0.930725, 0}, {0.869317, 0}, {0.85665, 0}, {0.858837, 0}, {0.85665, 0}, {0.869317, 0}, {1.25388, 0}, {1.46388, 0}, {1.59252, 0}, {1.59252, 0}, {1.46388, 0}, {1.25388, 0}, {1.03162, 0}, {0.85665, 0}, {0.763031, 0}, {0.763031, 0}, {0.85665, 0}, {1.03162, 0}, {1.69928, 0}, {1.88226, 2.77556e-17}, {1.94478, 0}, {1.88226, 0}, {1.69928, 0}, {1.42048, -1.38778e-17}, {1.1084, 0}, {0.858837, 0}, {0.763031, 0}, {0.858837, -1.38778e-17}, {1.1084, 0}, {1.42048, 0}, {2.03322, 0}, {2.11997, 0}, {2.11997, 0}, {2.03322, 0}, {1.82256, 0}, {1.48405, 0}, {1.1084, 0}, {0.85665, 0}, {0.85665, 0}, {1.1084, 0}, {1.48405, 0}, {1.82256, 0}, {2.16623, 0}, {2.17945, 5.846e-18}, {2.16623, 0}, {2.07555, 0}, {1.82256, 0}, {1.42048, -2.923e-18}, {1.03162, 0}, {0.869317, 0}, {1.03162, 0}, {1.42048, -2.923e-18}, {1.82256, 0}, {2.07555, 0}}; - const std::vector > out_2 = {{2.16277, 0}, {2.17701, 0}, {2.14383, 0}, {1.92793, 0}, {1.51507, 0}, {1.08798, -2.4037e-17}, {0.905261, 0}, {1.08798, 0}, {1.51507, 0}, {1.92793, 2.4037e-17}, {2.14383, 0}, {2.17701, 0}, {2.17701, 0}, {2.16623, 5.55112e-17}, {2.03322, 0}, {1.69928, 0}, {1.25388, 0}, {0.930725, -2.77556e-17}, {0.930725, 0}, {1.25388, 0}, {1.69928, 0}, {2.03322, -2.77556e-17}, {2.16623, 0}, {2.17701, 0}, {2.14383, 0}, {2.03322, 0}, {1.76522, 0}, {1.36223, 0}, {0.981652, 0}, {0.824264, 0}, {0.981652, 0}, {1.36223, 0}, {1.76522, 0}, {2.03322, 0}, {2.14383, 0}, {2.16623, 0}, {1.92793, 0}, {1.69928, 0}, {1.36223, 0}, {1.00712, 0}, {0.775362, 0}, {0.775362, 1.20185e-17}, {1.00712, 0}, {1.36223, 0}, {1.69928, 0}, {1.92793, -1.20185e-17}, {2.03322, 0}, {2.03322, 0}, {1.51507, 0}, {1.25388, 0}, {0.981652, 0}, {0.775362, 0}, {0.698375, 0}, {0.775362, -4.50694e-18}, {0.981652, 0}, {1.25388, 0}, {1.51507, 0}, {1.69928, 4.50694e-18}, {1.76522, 0}, {1.69928, 0}, {1.08798, 0}, {0.930725, -2.77556e-17}, {0.824264, 0}, {0.775362, 0}, {0.775362, 0}, {0.824264, 1.38778e-17}, {0.930725, 0}, {1.08798, 0}, {1.25388, 0}, {1.36223, 1.38778e-17}, {1.36223, 0}, {1.25388, 0}, {0.905261, 0}, {0.930725, 0}, {0.981652, 0}, {1.00712, 0}, {0.981652, 0}, {0.930725, 0}, {0.905261, 0}, {0.930725, 0}, {0.981652, 0}, {1.00712, 0}, {0.981652, 0}, {0.930725, 0}, {1.08798, 0}, {1.25388, 0}, {1.36223, 0}, {1.36223, 0}, {1.25388, 0}, {1.08798, 0}, {0.930725, 0}, {0.824264, 0}, {0.775362, 0}, {0.775362, 0}, {0.824264, 0}, {0.930725, 0}, {1.51507, 0}, {1.69928, -2.77556e-17}, {1.76522, 0}, {1.69928, 0}, {1.51507, 0}, {1.25388, 1.38778e-17}, {0.981652, 0}, {0.775362, 0}, {0.698375, 0}, {0.775362, 1.38778e-17}, {0.981652, 0}, {1.25388, 0}, {1.92793, 0}, {2.03322, 0}, {2.03322, 0}, {1.92793, 0}, {1.69928, 0}, {1.36223, 0}, {1.00712, 0}, {0.775362, 0}, {0.775362, 0}, {1.00712, 0}, {1.36223, 0}, {1.69928, 0}, {2.14383, 0}, {2.16623, -3.54439e-19}, {2.14383, 0}, {2.03322, 0}, {1.76522, 0}, {1.36223, 1.77219e-19}, {0.981652, 0}, {0.824264, 0}, {0.981652, 0}, {1.36223, 1.77219e-19}, {1.76522, 0}, {2.03322, 0}, {2.17701, 0}, {2.17701, 0}, {2.16623, 0}, {2.03322, 0}, {1.69928, 0}, {1.25388, 0}, {0.930725, 0}, {0.930725, 0}, {1.25388, 0}, {1.69928, 0}, {2.03322, 0}, {2.16623, 0}, {2.17701, -6.93889e-18}, {2.16623, -6.93889e-18}, {2.03322, -6.93889e-18}, {1.69928, -6.93889e-18}, {1.25388, -6.93889e-18}, {0.930725, -3.09759e-17}, {0.930725, -6.93889e-18}, {1.25388, -6.93889e-18}, {1.69928, -6.93889e-18}, {2.03322, 1.70981e-17}, {2.16623, -6.93889e-18}, {2.17701, -6.93889e-18}, {2.16623, 0}, {2.07555, 0}, {1.82256, 0}, {1.42048, 0}, {1.03162, 0}, {0.869317, -2.4037e-17}, {1.03162, 0}, {1.42048, 0}, {1.82256, 0}, {2.07555, 2.4037e-17}, {2.16623, 0}, {2.17945, 0}, {2.03322, 0}, {1.82256, -5.55112e-17}, {1.48405, 0}, {1.1084, 0}, {0.85665, 0}, {0.85665, 2.77556e-17}, {1.1084, 0}, {1.48405, 0}, {1.82256, 0}, {2.03322, 2.77556e-17}, {2.11997, 0}, {2.11997, 0}, {1.69928, 0}, {1.42048, 0}, {1.1084, 0}, {0.858837, 0}, {0.763031, 0}, {0.858837, 0}, {1.1084, 0}, {1.42048, 0}, {1.69928, 0}, {1.88226, 0}, {1.94478, 0}, {1.88226, 0}, {1.25388, 3.46945e-18}, {1.03162, -2.42861e-17}, {0.85665, 3.46945e-18}, {0.763031, 3.46945e-18}, {0.763031, 3.46945e-18}, {0.85665, 1.73472e-17}, {1.03162, 3.46945e-18}, {1.25388, 3.46945e-18}, {1.46388, 3.46945e-18}, {1.59252, 1.73472e-17}, {1.59252, 3.46945e-18}, {1.46388, 3.46945e-18}, {0.930725, -2.4037e-17}, {0.869317, -3.79148e-17}, {0.85665, -2.4037e-17}, {0.858837, -1.7563e-17}, {0.85665, -2.4037e-17}, {0.869317, -1.70981e-17}, {0.930725, -2.4037e-17}, {1.04306, -2.72741e-17}, {1.15758, -2.4037e-17}, {1.20632, -1.70981e-17}, {1.15758, -2.4037e-17}, {1.04306, -2.72741e-17}, {0.930725, 0}, {1.03162, 0}, {1.1084, 0}, {1.1084, 0}, {1.03162, 0}, {0.930725, 0}, {0.864823, 0}, {0.851575, 0}, {0.862449, 0}, {0.862449, 0}, {0.851575, 0}, {0.864823, 0}, {1.25388, -2.47509e-17}, {1.42048, -3.86287e-17}, {1.48405, -1.08732e-17}, {1.42048, 3.00463e-18}, {1.25388, 1.68824e-17}, {1.04306, 4.78583e-17}, {0.851575, 3.39806e-17}, {0.72366, 2.70417e-17}, {0.679431, 1.68824e-17}, {0.72366, -2.15723e-19}, {0.851575, -1.40935e-17}, {1.04306, -2.10324e-17}, {1.69928, 3.46945e-18}, {1.82256, 1.73472e-17}, {1.82256, 3.46945e-18}, {1.69928, 3.46945e-18}, {1.46388, 3.46945e-18}, {1.15758, -3.46945e-18}, {0.862449, 3.46945e-18}, {0.679431, 3.46945e-18}, {0.679431, 3.46945e-18}, {0.862449, -3.46945e-18}, {1.15758, 3.46945e-18}, {1.46388, 3.46945e-18}, {2.03322, 2.4037e-17}, {2.07555, 2.5603e-17}, {2.03322, 2.4037e-17}, {1.88226, 2.4037e-17}, {1.59252, -2.4037e-17}, {1.20632, 2.32541e-17}, {0.862449, 2.4037e-17}, {0.72366, 2.4037e-17}, {0.862449, 7.21111e-17}, {1.20632, 2.32541e-17}, {1.59252, 2.4037e-17}, {1.88226, 2.4037e-17}, {2.16623, 0}, {2.16623, 0}, {2.11997, 0}, {1.94478, 0}, {1.59252, 0}, {1.15758, -4.80741e-17}, {0.851575, 0}, {0.851575, 0}, {1.15758, 0}, {1.59252, 4.80741e-17}, {1.94478, 0}, {2.11997, 0}, {2.17701, -1.68824e-17}, {2.17945, -3.07602e-17}, {2.11997, -3.00463e-18}, {1.88226, -3.00463e-18}, {1.46388, 3.93426e-18}, {1.04306, -1.31639e-17}, {0.864823, -3.00463e-18}, {1.04306, 9.01389e-18}, {1.46388, 3.93426e-18}, {1.88226, 3.49102e-17}, {2.11997, -3.00463e-18}, {2.17945, -1.50231e-17}, {2.14383, 0}, {2.03322, 0}, {1.76522, 0}, {1.36223, 0}, {0.981652, 0}, {0.824264, 2.4037e-17}, {0.981652, 0}, {1.36223, 0}, {1.76522, 0}, {2.03322, -2.4037e-17}, {2.14383, 0}, {2.16623, 0}, {2.03322, -2.77556e-17}, {1.82256, -2.77556e-17}, {1.48405, -2.77556e-17}, {1.1084, -2.77556e-17}, {0.85665, -2.77556e-17}, {0.85665, -3.71854e-18}, {1.1084, -2.77556e-17}, {1.48405, -2.77556e-17}, {1.82256, -2.77556e-17}, {2.03322, -5.17926e-17}, {2.11997, -2.77556e-17}, {2.11997, -2.77556e-17}, {1.76522, 0}, {1.48405, 0}, {1.16145, 0}, {0.899033, 0}, {0.797429, 0}, {0.899033, 9.01389e-18}, {1.16145, 0}, {1.48405, 0}, {1.76522, 0}, {1.94478, -9.01389e-18}, {2.00496, 0}, {1.94478, 0}, {1.36223, 0}, {1.1084, -2.77556e-17}, {0.899033, 0}, {0.782576, 0}, {0.782576, 0}, {0.899033, 1.38778e-17}, {1.1084, 0}, {1.36223, 0}, {1.59252, 0}, {1.7299, 1.38778e-17}, {1.7299, 0}, {1.59252, 0}, {0.981652, 0}, {0.85665, 2.77556e-17}, {0.797429, 0}, {0.782576, 0}, {0.797429, 0}, {0.85665, -1.38778e-17}, {0.981652, 0}, {1.15758, 0}, {1.31866, 0}, {1.38444, -1.38778e-17}, {1.31866, 0}, {1.15758, 0}, {0.824264, 1.38778e-17}, {0.85665, 1.38778e-17}, {0.899033, 1.73472e-17}, {0.899033, 6.93889e-18}, {0.85665, 1.38778e-17}, {0.824264, 1.68824e-17}, {0.851575, 1.77767e-17}, {0.931265, 2.03519e-17}, {1.00096, 1.38778e-17}, {1.00096, 1.08732e-17}, {0.931265, 6.50938e-18}, {0.851575, 1.43426e-17}, {0.981652, 0}, {1.1084, 0}, {1.16145, 0}, {1.1084, 0}, {0.981652, 0}, {0.851575, 0}, {0.771224, 0}, {0.743927, 0}, {0.740598, 0}, {0.743927, 0}, {0.771224, 0}, {0.851575, 0}, {1.36223, 0}, {1.48405, -6.93889e-18}, {1.48405, 0}, {1.36223, 0}, {1.15758, 0}, {0.931265, 3.46945e-18}, {0.743927, 0}, {0.639427, 0}, {0.639427, 0}, {0.743927, 3.46945e-18}, {0.931265, 0}, {1.15758, 0}, {1.76522, 0}, {1.82256, 2.93165e-18}, {1.76522, 0}, {1.59252, 0}, {1.31866, 0}, {1.00096, -4.95399e-17}, {0.740598, 0}, {0.639427, 0}, {0.740598, 0}, {1.00096, 4.66082e-17}, {1.31866, 0}, {1.59252, 0}, {2.03322, 1.38778e-17}, {2.03322, 1.38778e-17}, {1.94478, 1.38778e-17}, {1.7299, 1.38778e-17}, {1.38444, 1.38778e-17}, {1.00096, 1.38778e-17}, {0.743927, 1.38778e-17}, {0.743927, 1.38778e-17}, {1.00096, 1.38778e-17}, {1.38444, 1.38778e-17}, {1.7299, 1.38778e-17}, {1.94478, 1.38778e-17}, {2.14383, 0}, {2.11997, 0}, {2.00496, 0}, {1.7299, 0}, {1.31866, 0}, {0.931265, 4.80741e-17}, {0.771224, 0}, {0.931265, 0}, {1.31866, 0}, {1.7299, -4.80741e-17}, {2.00496, 0}, {2.11997, 0}, {2.16623, 0}, {2.11997, 0}, {1.94478, 0}, {1.59252, 0}, {1.15758, 0}, {0.851575, 0}, {0.851575, 0}, {1.15758, 0}, {1.59252, 0}, {1.94478, 0}, {2.11997, 0}, {2.16623, 0}, {1.92793, 0}, {1.69928, 0}, {1.36223, 0}, {1.00712, 0}, {0.775362, 0}, {0.775362, 0}, {1.00712, 0}, {1.36223, 0}, {1.69928, 0}, {1.92793, 0}, {2.03322, 0}, {2.03322, 0}, {1.69928, -2.77556e-17}, {1.42048, -2.77556e-17}, {1.1084, -2.77556e-17}, {0.858837, -2.77556e-17}, {0.763031, -2.77556e-17}, {0.858837, -2.77556e-17}, {1.1084, -2.77556e-17}, {1.42048, -2.77556e-17}, {1.69928, -2.77556e-17}, {1.88226, -2.77556e-17}, {1.94478, -2.77556e-17}, {1.88226, -2.77556e-17}, {1.36223, 0}, {1.1084, -2.77556e-17}, {0.899033, 0}, {0.782576, 0}, {0.782576, 0}, {0.899033, 1.38778e-17}, {1.1084, 0}, {1.36223, 0}, {1.59252, 0}, {1.7299, 1.38778e-17}, {1.7299, 0}, {1.59252, 0}, {1.00712, 0}, {0.858837, 0}, {0.782576, 0}, {0.761485, 0}, {0.782576, 0}, {0.858837, 0}, {1.00712, 0}, {1.20632, 0}, {1.38444, 0}, {1.45645, 0}, {1.38444, 0}, {1.20632, 0}, {0.775362, 0}, {0.763031, 0}, {0.782576, 0}, {0.782576, 0}, {0.763031, 0}, {0.775362, 0}, {0.862449, 0}, {1.00096, 0}, {1.10759, 0}, {1.10759, 0}, {1.00096, 0}, {0.862449, 0}, {0.775362, 5.32872e-18}, {0.858837, 8.79816e-18}, {0.899033, 1.85927e-18}, {0.858837, 1.85927e-18}, {0.775362, 1.24548e-19}, {0.72366, -1.61018e-18}, {0.743927, 1.85927e-18}, {0.804389, -4.14999e-18}, {0.836162, 1.24548e-19}, {0.804389, -1.61018e-18}, {0.743927, 1.85927e-18}, {0.72366, 7.86853e-18}, {1.00712, 0}, {1.1084, 6.93889e-18}, {1.1084, 0}, {1.00712, 0}, {0.862449, 0}, {0.743927, -3.46945e-18}, {0.683306, 0}, {0.666068, 0}, {0.666068, 0}, {0.683306, -3.46945e-18}, {0.743927, 0}, {0.862449, 0}, {1.36223, 0}, {1.42048, -2.16167e-19}, {1.36223, 0}, {1.20632, 0}, {1.00096, 0}, {0.804389, 1.08084e-19}, {0.666068, 0}, {0.616617, 0}, {0.666068, 0}, {0.804389, 1.08084e-19}, {1.00096, 0}, {1.20632, 0}, {1.69928, 0}, {1.69928, 0}, {1.59252, 0}, {1.38444, 0}, {1.10759, 0}, {0.836162, 0}, {0.666068, 0}, {0.666068, 0}, {0.836162, 0}, {1.10759, 0}, {1.38444, 0}, {1.59252, 0}, {1.92793, 2.13894e-17}, {1.88226, 2.58963e-17}, {1.7299, 2.58963e-17}, {1.45645, 2.58963e-17}, {1.10759, 2.81498e-17}, {0.804389, 2.58963e-17}, {0.683306, 2.58963e-17}, {0.804389, 3.19056e-17}, {1.10759, 2.81498e-17}, {1.45645, 2.58963e-17}, {1.7299, 2.58963e-17}, {1.88226, 1.9887e-17}, {2.03322, 0}, {1.94478, 0}, {1.7299, 0}, {1.38444, 0}, {1.00096, 0}, {0.743927, 2.4037e-17}, {0.743927, 0}, {1.00096, 0}, {1.38444, 0}, {1.7299, -2.4037e-17}, {1.94478, 0}, {2.03322, 0}, {2.03322, 0}, {1.88226, -5.55112e-17}, {1.59252, 0}, {1.20632, 0}, {0.862449, 0}, {0.72366, 2.77556e-17}, {0.862449, 0}, {1.20632, 0}, {1.59252, 0}, {1.88226, 2.77556e-17}, {2.03322, 0}, {2.07555, 0}, {1.51507, 0}, {1.25388, -5.55112e-17}, {0.981652, 0}, {0.775362, 0}, {0.698375, 0}, {0.775362, 3.22625e-17}, {0.981652, 0}, {1.25388, 0}, {1.51507, 0}, {1.69928, 2.32486e-17}, {1.76522, 0}, {1.69928, 0}, {1.25388, 0}, {1.03162, 2.77556e-17}, {0.85665, 0}, {0.763031, 0}, {0.763031, 0}, {0.85665, -1.38778e-17}, {1.03162, 0}, {1.25388, 0}, {1.46388, 0}, {1.59252, -1.38778e-17}, {1.59252, 0}, {1.46388, 0}, {0.981652, 0}, {0.85665, 0}, {0.797429, 0}, {0.782576, 0}, {0.797429, 0}, {0.85665, 0}, {0.981652, 0}, {1.15758, 0}, {1.31866, 0}, {1.38444, 0}, {1.31866, 0}, {1.15758, 0}, {0.775362, 0}, {0.763031, 0}, {0.782576, 0}, {0.782576, 0}, {0.763031, 0}, {0.775362, 0}, {0.862449, 0}, {1.00096, 0}, {1.10759, 0}, {1.10759, 0}, {1.00096, 0}, {0.862449, 0}, {0.698375, 0}, {0.763031, 0}, {0.797429, 0}, {0.763031, 0}, {0.698375, 0}, {0.679431, 7.51157e-19}, {0.740598, 0}, {0.836162, 0}, {0.881874, 0}, {0.836162, -7.51157e-19}, {0.740598, 0}, {0.679431, 0}, {0.775362, 0}, {0.85665, -8.67362e-19}, {0.85665, 0}, {0.775362, 0}, {0.679431, 0}, {0.639427, 4.33681e-19}, {0.666068, 0}, {0.707352, 0}, {0.707352, 0}, {0.666068, 4.33681e-19}, {0.639427, 0}, {0.679431, 0}, {0.981652, 0}, {1.03162, 1.40862e-18}, {0.981652, 0}, {0.862449, 0}, {0.740598, 0}, {0.666068, 1.13142e-17}, {0.641509, 0}, {0.63886, 0}, {0.641509, 0}, {0.666068, -1.27228e-17}, {0.740598, 0}, {0.862449, 0}, {1.25388, 0}, {1.25388, 0}, {1.15758, 0}, {1.00096, 0}, {0.836162, 0}, {0.707352, -2.4037e-17}, {0.63886, 0}, {0.63886, 0}, {0.707352, 0}, {0.836162, 2.4037e-17}, {1.00096, 0}, {1.15758, 0}, {1.51507, 0}, {1.46388, 0}, {1.31866, 0}, {1.10759, 0}, {0.881874, 0}, {0.707352, 0}, {0.641509, 0}, {0.707352, 0}, {0.881874, 0}, {1.10759, 0}, {1.31866, 0}, {1.46388, 0}, {1.69928, 0}, {1.59252, 0}, {1.38444, 0}, {1.10759, 0}, {0.836162, 0}, {0.666068, 0}, {0.666068, 0}, {0.836162, 0}, {1.10759, 0}, {1.38444, 0}, {1.59252, 0}, {1.69928, 0}, {1.76522, 0}, {1.59252, 0}, {1.31866, 0}, {1.00096, 0}, {0.740598, 0}, {0.639427, 0}, {0.740598, 0}, {1.00096, 0}, {1.31866, 0}, {1.59252, 0}, {1.76522, 0}, {1.82256, 0}, {1.69928, 0}, {1.46388, 0}, {1.15758, 0}, {0.862449, 0}, {0.679431, 0}, {0.679431, 1.20185e-17}, {0.862449, 0}, {1.15758, 0}, {1.46388, 0}, {1.69928, -1.20185e-17}, {1.82256, 0}, {1.82256, 0}, {1.08798, 1.85927e-18}, {0.930725, 1.85927e-18}, {0.824264, 1.85927e-18}, {0.775362, 1.85927e-18}, {0.775362, 1.85927e-18}, {0.824264, 1.85927e-18}, {0.930725, 1.85927e-18}, {1.08798, 1.85927e-18}, {1.25388, 1.85927e-18}, {1.36223, 1.85927e-18}, {1.36223, 1.85927e-18}, {1.25388, 1.85927e-18}, {0.930725, 1.85927e-18}, {0.869317, 1.85927e-18}, {0.85665, 1.57371e-17}, {0.858837, 1.66044e-17}, {0.85665, 1.08732e-17}, {0.869317, 1.38778e-17}, {0.930725, 6.93889e-18}, {1.04306, -5.5133e-18}, {1.15758, -7.15462e-18}, {1.20632, -1.01592e-17}, {1.15758, -1.70981e-17}, {1.04306, -5.5133e-18}, {0.824264, -1.20185e-17}, {0.85665, -5.07962e-18}, {0.899033, -8.54907e-18}, {0.899033, -1.20185e-17}, {0.85665, -1.20185e-17}, {0.824264, -2.14972e-17}, {0.851575, -1.90113e-17}, {0.931265, -1.80278e-17}, {1.00096, -1.20185e-17}, {1.00096, -9.47871e-18}, {0.931265, -8.49514e-18}, {0.851575, -6.00926e-18}, {0.775362, -5.07962e-18}, {0.858837, -5.07962e-18}, {0.899033, -5.07962e-18}, {0.858837, -1.20185e-17}, {0.775362, -1.5488e-17}, {0.72366, -1.5488e-17}, {0.743927, -2.75065e-17}, {0.804389, -1.80278e-17}, {0.836162, -1.5488e-17}, {0.804389, -1.5488e-17}, {0.743927, -3.46945e-18}, {0.72366, -6.00926e-18}, {0.775362, -1.20185e-17}, {0.85665, -1.28859e-17}, {0.85665, -1.89574e-17}, {0.775362, -1.89574e-17}, {0.679431, -2.24269e-17}, {0.639427, -3.40117e-17}, {0.666068, -1.89574e-17}, {0.707352, -2.49667e-17}, {0.707352, -2.24269e-17}, {0.666068, -9.97466e-18}, {0.639427, -1.89574e-17}, {0.679431, -1.29482e-17}, {0.824264, -1.10889e-17}, {0.869317, -1.71739e-17}, {0.824264, -1.45583e-17}, {0.72366, -2.14972e-17}, {0.639427, -3.19056e-17}, {0.616617, -5.29001e-17}, {0.63886, -4.21894e-17}, {0.655231, -4.32269e-17}, {0.63886, -3.19056e-17}, {0.616617, -4.82604e-18}, {0.639427, -1.81523e-17}, {0.72366, -1.01759e-17}, {0.930725, -1.20185e-17}, {0.930725, -8.54907e-18}, {0.851575, -1.20185e-17}, {0.743927, -1.20185e-17}, {0.666068, -1.20185e-17}, {0.63886, -1.37532e-17}, {0.640151, -1.20185e-17}, {0.640151, -1.20185e-17}, {0.63886, -1.20185e-17}, {0.666068, -1.37532e-17}, {0.743927, -1.20185e-17}, {0.851575, -1.20185e-17}, {1.08798, 1.85927e-18}, {1.04306, 1.85927e-18}, {0.931265, -1.20185e-17}, {0.804389, -1.20185e-17}, {0.707352, -1.89574e-17}, {0.655231, -1.89574e-17}, {0.640151, -2.4037e-17}, {0.655231, -2.4037e-17}, {0.707352, -1.89574e-17}, {0.804389, -1.89574e-17}, {0.931265, 0}, {1.04306, 0}, {1.25388, -1.20185e-17}, {1.15758, -1.89574e-17}, {1.00096, -1.89574e-17}, {0.836162, -1.89574e-17}, {0.707352, -2.24269e-17}, {0.63886, -6.93889e-18}, {0.63886, -2.49667e-17}, {0.707352, -2.49667e-17}, {0.836162, -2.24269e-17}, {1.00096, -3.09759e-17}, {1.15758, -1.29482e-17}, {1.25388, -1.29482e-17}, {1.36223, -1.29482e-17}, {1.20632, -1.29482e-17}, {1.00096, -4.07037e-17}, {0.804389, -1.29482e-17}, {0.666068, 1.10889e-17}, {0.616617, -1.29482e-17}, {0.666068, -1.10889e-17}, {0.804389, -1.29482e-17}, {1.00096, -3.69852e-17}, {1.20632, -1.29482e-17}, {1.36223, 1.29482e-17}, {1.42048, -1.29482e-17}, {1.36223, -1.20185e-17}, {1.15758, -1.20185e-17}, {0.931265, -1.20185e-17}, {0.743927, -1.20185e-17}, {0.639427, -1.20185e-17}, {0.639427, -1.20185e-17}, {0.743927, -1.20185e-17}, {0.931265, -1.20185e-17}, {1.15758, -1.20185e-17}, {1.36223, -1.20185e-17}, {1.48405, -1.20185e-17}, {1.48405, -1.20185e-17}, {1.25388, 1.57371e-17}, {1.04306, 1.57371e-17}, {0.851575, -1.20185e-17}, {0.72366, -1.20185e-17}, {0.679431, -2.58963e-17}, {0.72366, -4.09195e-17}, {0.851575, -2.4037e-17}, {1.04306, -3.60556e-17}, {1.25388, -2.58963e-17}, {1.42048, -1.08732e-17}, {1.48405, 0}, {1.42048, 1.20185e-17}, {0.905261, 0}, {0.930725, 3.46945e-18}, {0.981652, 0}, {1.00712, 0}, {0.981652, 0}, {0.930725, -1.73472e-18}, {0.905261, 0}, {0.930725, 0}, {0.981652, 0}, {1.00712, -1.73472e-18}, {0.981652, 0}, {0.930725, 0}, {0.930725, 0}, {1.03162, 0}, {1.1084, 0}, {1.1084, 1.73472e-18}, {1.03162, 0}, {0.930725, -6.00926e-18}, {0.864823, 0}, {0.851575, -8.67362e-19}, {0.862449, 0}, {0.862449, 6.00926e-18}, {0.851575, 0}, {0.864823, -8.67362e-19}, {0.981652, 0}, {1.1084, 3.46945e-18}, {1.16145, 0}, {1.1084, 0}, {0.981652, 0}, {0.851575, -1.73472e-18}, {0.771224, 0}, {0.743927, 0}, {0.740598, 0}, {0.743927, -1.73472e-18}, {0.771224, 0}, {0.851575, 0}, {1.00712, 0}, {1.1084, 6.93889e-18}, {1.1084, 0}, {1.00712, 0}, {0.862449, 0}, {0.743927, -3.46945e-18}, {0.683306, 0}, {0.666068, 0}, {0.666068, 0}, {0.683306, -3.46945e-18}, {0.743927, 0}, {0.862449, 0}, {0.981652, 0}, {1.03162, -1.38028e-18}, {0.981652, 0}, {0.862449, 0}, {0.740598, 0}, {0.666068, 6.90141e-19}, {0.641509, 0}, {0.63886, 0}, {0.641509, 0}, {0.666068, 6.90141e-19}, {0.740598, 0}, {0.862449, 0}, {0.930725, -8.67362e-18}, {0.930725, -8.67362e-18}, {0.851575, -6.93889e-18}, {0.743927, -6.93889e-18}, {0.666068, 4.33681e-18}, {0.63886, 1.63553e-17}, {0.640151, 9.47871e-18}, {0.640151, 1.24833e-17}, {0.63886, 4.33681e-18}, {0.666068, -7.68171e-18}, {0.743927, -2.53981e-18}, {0.851575, -5.54444e-18}, {0.905261, 0}, {0.864823, -1.73472e-18}, {0.771224, 0}, {0.683306, 0}, {0.641509, 0}, {0.640151, 8.67362e-19}, {0.645833, 0}, {0.640151, 0}, {0.641509, 0}, {0.683306, 8.67362e-19}, {0.771224, 0}, {0.864823, 0}, {0.930725, 0}, {0.851575, 0}, {0.743927, 0}, {0.666068, 0}, {0.63886, 0}, {0.640151, 0}, {0.640151, 0}, {0.63886, 0}, {0.666068, 0}, {0.743927, 0}, {0.851575, 0}, {0.930725, 0}, {0.981652, 0}, {0.862449, 0}, {0.740598, 0}, {0.666068, 0}, {0.641509, 0}, {0.63886, 0}, {0.641509, 0}, {0.666068, 0}, {0.740598, 0}, {0.862449, 0}, {0.981652, 0}, {1.03162, 0}, {1.00712, 1.38778e-17}, {0.862449, 1.38778e-17}, {0.743927, 0}, {0.683306, 0}, {0.666068, -6.93889e-18}, {0.666068, -1.29482e-17}, {0.683306, -1.20185e-17}, {0.743927, -1.20185e-17}, {0.862449, -6.93889e-18}, {1.00712, -9.29636e-19}, {1.1084, 1.20185e-17}, {1.1084, 1.20185e-17}, {0.981652, 0}, {0.851575, 1.38778e-17}, {0.771224, 0}, {0.743927, 0}, {0.740598, 0}, {0.743927, -6.93889e-18}, {0.771224, 0}, {0.851575, 0}, {0.981652, 0}, {1.1084, -6.93889e-18}, {1.16145, 0}, {1.1084, 0}, {0.930725, 0}, {0.864823, 0}, {0.851575, 0}, {0.862449, 0}, {0.862449, 0}, {0.851575, 0}, {0.864823, 0}, {0.930725, 0}, {1.03162, 0}, {1.1084, 0}, {1.1084, 0}, {1.03162, 0}, {1.08798, 0}, {1.25388, 0}, {1.36223, 0}, {1.36223, 0}, {1.25388, 0}, {1.08798, 1.20185e-17}, {0.930725, 0}, {0.824264, 0}, {0.775362, 0}, {0.775362, -1.20185e-17}, {0.824264, 0}, {0.930725, 0}, {1.25388, 2.77556e-17}, {1.42048, -1.38778e-17}, {1.48405, -1.38778e-17}, {1.42048, 0}, {1.25388, -1.38778e-17}, {1.04306, 6.93889e-18}, {0.851575, 3.09759e-17}, {0.72366, 0}, {0.679431, -1.38778e-17}, {0.72366, 6.93889e-18}, {0.851575, -1.70981e-17}, {1.04306, 0}, {1.36223, 0}, {1.48405, -6.93889e-18}, {1.48405, 0}, {1.36223, 0}, {1.15758, 0}, {0.931265, 3.46945e-18}, {0.743927, 0}, {0.639427, 0}, {0.639427, 0}, {0.743927, 3.46945e-18}, {0.931265, 0}, {1.15758, 0}, {1.36223, 0}, {1.42048, -2.07544e-18}, {1.36223, 0}, {1.20632, 0}, {1.00096, 0}, {0.804389, 1.03772e-18}, {0.666068, 0}, {0.616617, 0}, {0.666068, 0}, {0.804389, 1.03772e-18}, {1.00096, 0}, {1.20632, 0}, {1.25388, 0}, {1.25388, 0}, {1.15758, 0}, {1.00096, 0}, {0.836162, 0}, {0.707352, 0}, {0.63886, 0}, {0.63886, 0}, {0.707352, 0}, {0.836162, 0}, {1.00096, 0}, {1.15758, 0}, {1.08798, -4.64818e-19}, {1.04306, 0}, {0.931265, 0}, {0.804389, 0}, {0.707352, 2.32409e-19}, {0.655231, 0}, {0.640151, -6.00926e-18}, {0.655231, -4.50694e-18}, {0.707352, 2.32409e-19}, {0.804389, 0}, {0.931265, 6.00926e-18}, {1.04306, 4.50694e-18}, {0.930725, 0}, {0.851575, 0}, {0.743927, 0}, {0.666068, 0}, {0.63886, 0}, {0.640151, 0}, {0.640151, 0}, {0.63886, 0}, {0.666068, 0}, {0.743927, 0}, {0.851575, 0}, {0.930725, 0}, {0.824264, 0}, {0.72366, 0}, {0.639427, 0}, {0.616617, 0}, {0.63886, 0}, {0.655231, -1.50231e-18}, {0.63886, 0}, {0.616617, 0}, {0.639427, 0}, {0.72366, 1.50231e-18}, {0.824264, 0}, {0.869317, 0}, {0.775362, 0}, {0.679431, 0}, {0.639427, 0}, {0.666068, 0}, {0.707352, 0}, {0.707352, 0}, {0.666068, 0}, {0.639427, 0}, {0.679431, 0}, {0.775362, 0}, {0.85665, 0}, {0.85665, 0}, {0.775362, -6.93889e-18}, {0.72366, -3.46945e-18}, {0.743927, 0}, {0.804389, 0}, {0.836162, 4.64818e-19}, {0.804389, 1.73472e-18}, {0.743927, 0}, {0.72366, 6.00926e-18}, {0.775362, 6.47408e-18}, {0.858837, 1.73472e-18}, {0.899033, 0}, {0.858837, -6.00926e-18}, {0.824264, 0}, {0.851575, 0}, {0.931265, 0}, {1.00096, 0}, {1.00096, 0}, {0.931265, 0}, {0.851575, 0}, {0.824264, 0}, {0.85665, 0}, {0.899033, 0}, {0.899033, 0}, {0.85665, 0}, {0.930725, 0}, {1.04306, 0}, {1.15758, 0}, {1.20632, 0}, {1.15758, 0}, {1.04306, 0}, {0.930725, 0}, {0.869317, 0}, {0.85665, 0}, {0.858837, 0}, {0.85665, 0}, {0.869317, 0}, {1.51507, 0}, {1.69928, 0}, {1.76522, 0}, {1.69928, 0}, {1.51507, 0}, {1.25388, -2.4037e-17}, {0.981652, 0}, {0.775362, 0}, {0.698375, 0}, {0.775362, 2.4037e-17}, {0.981652, 0}, {1.25388, 0}, {1.69928, 0}, {1.82256, -1.38778e-17}, {1.82256, 0}, {1.69928, 0}, {1.46388, 0}, {1.15758, -4.11352e-17}, {0.862449, 0}, {0.679431, 0}, {0.679431, 0}, {0.862449, 5.5013e-17}, {1.15758, 0}, {1.46388, 0}, {1.76522, 0}, {1.82256, 6.6502e-18}, {1.76522, 0}, {1.59252, 0}, {1.31866, 0}, {1.00096, -3.3251e-18}, {0.740598, 0}, {0.639427, 0}, {0.740598, 0}, {1.00096, -3.3251e-18}, {1.31866, 0}, {1.59252, 0}, {1.69928, 0}, {1.69928, 0}, {1.59252, 0}, {1.38444, 0}, {1.10759, 0}, {0.836162, 0}, {0.666068, 0}, {0.666068, 0}, {0.836162, 0}, {1.10759, 0}, {1.38444, 0}, {1.59252, 0}, {1.51507, 0}, {1.46388, 0}, {1.31866, 0}, {1.10759, 0}, {0.881874, 0}, {0.707352, 0}, {0.641509, 0}, {0.707352, 0}, {0.881874, 0}, {1.10759, 0}, {1.31866, 0}, {1.46388, 0}, {1.25388, 1.38778e-17}, {1.15758, 0}, {1.00096, 0}, {0.836162, 2.77556e-17}, {0.707352, -6.93889e-18}, {0.63886, -1.20185e-17}, {0.63886, -6.00926e-18}, {0.707352, -2.58963e-17}, {0.836162, -6.93889e-18}, {1.00096, 1.20185e-17}, {1.15758, 6.00926e-18}, {1.25388, -1.85927e-18}, {0.981652, 0}, {0.862449, 1.38778e-17}, {0.740598, 0}, {0.666068, 0}, {0.641509, 0}, {0.63886, -6.93889e-18}, {0.641509, 0}, {0.666068, 0}, {0.740598, 0}, {0.862449, -6.93889e-18}, {0.981652, 0}, {1.03162, 0}, {0.775362, 0}, {0.679431, 0}, {0.639427, 0}, {0.666068, 0}, {0.707352, 0}, {0.707352, 0}, {0.666068, 0}, {0.639427, 0}, {0.679431, 0}, {0.775362, 0}, {0.85665, 0}, {0.85665, 0}, {0.698375, 0}, {0.679431, 0}, {0.740598, 0}, {0.836162, 0}, {0.881874, 0}, {0.836162, -6.00926e-18}, {0.740598, 0}, {0.679431, 0}, {0.698375, 0}, {0.763031, 6.00926e-18}, {0.797429, 0}, {0.763031, 0}, {0.775362, 0}, {0.862449, 0}, {1.00096, 0}, {1.10759, 0}, {1.10759, 0}, {1.00096, 0}, {0.862449, 0}, {0.775362, 0}, {0.763031, 0}, {0.782576, 0}, {0.782576, 0}, {0.763031, 0}, {0.981652, 0}, {1.15758, 0}, {1.31866, 0}, {1.38444, 0}, {1.31866, 0}, {1.15758, 0}, {0.981652, 0}, {0.85665, 0}, {0.797429, 0}, {0.782576, 0}, {0.797429, 0}, {0.85665, 0}, {1.25388, 0}, {1.46388, 0}, {1.59252, 0}, {1.59252, 0}, {1.46388, 0}, {1.25388, 2.4037e-17}, {1.03162, 0}, {0.85665, 0}, {0.763031, 0}, {0.763031, -2.4037e-17}, {0.85665, 0}, {1.03162, 0}, {1.92793, 1.20185e-17}, {2.03322, 2.58963e-17}, {2.03322, 3.97741e-17}, {1.92793, 1.20185e-17}, {1.69928, 3.60556e-17}, {1.36223, 5.07962e-18}, {1.00712, -1.85927e-18}, {0.775362, 1.20185e-17}, {0.775362, -1.20185e-17}, {1.00712, 5.07962e-18}, {1.36223, -1.85927e-18}, {1.69928, 1.20185e-17}, {2.03322, 2.41616e-17}, {2.07555, 2.2009e-17}, {2.03322, 7.96727e-17}, {1.88226, 1.35184e-16}, {1.59252, 2.41616e-17}, {1.20632, 2.52379e-17}, {0.862449, 4.44801e-17}, {0.72366, -3.13496e-17}, {0.862449, 2.41616e-17}, {1.20632, 2.52379e-17}, {1.59252, -5.16681e-17}, {1.88226, -3.13496e-17}, {2.03322, 3.97741e-17}, {2.03322, 2.58963e-17}, {1.94478, 3.97741e-17}, {1.7299, 3.97741e-17}, {1.38444, 3.97741e-17}, {1.00096, 4.6713e-17}, {0.743927, 3.97741e-17}, {0.743927, 3.97741e-17}, {1.00096, 3.97741e-17}, {1.38444, 4.6713e-17}, {1.7299, 3.97741e-17}, {1.94478, 3.97741e-17}, {1.92793, 3.97741e-17}, {1.88226, 1.20185e-17}, {1.7299, 3.97741e-17}, {1.45645, 3.97741e-17}, {1.10759, 3.97741e-17}, {0.804389, 5.36519e-17}, {0.683306, 3.97741e-17}, {0.804389, 3.97741e-17}, {1.10759, 3.97741e-17}, {1.45645, 5.36519e-17}, {1.7299, 3.97741e-17}, {1.88226, 3.97741e-17}, {1.69928, 3.60556e-17}, {1.59252, 6.38111e-17}, {1.38444, 3.60556e-17}, {1.10759, 3.60556e-17}, {0.836162, 3.60556e-17}, {0.666068, 2.21778e-17}, {0.666068, 3.60556e-17}, {0.836162, 3.60556e-17}, {1.10759, 3.60556e-17}, {1.38444, 2.21778e-17}, {1.59252, 3.60556e-17}, {1.69928, 3.60556e-17}, {1.36223, 7.16174e-17}, {1.20632, 8.54952e-17}, {1.00096, 8.54952e-17}, {0.804389, 5.77396e-17}, {0.666068, 3.32044e-17}, {0.616617, 1.4247e-17}, {0.666068, -3.75829e-17}, {0.804389, -3.19678e-17}, {1.00096, -1.48697e-17}, {1.20632, -9.79007e-18}, {1.36223, 4.20398e-17}, {1.42048, 6.41803e-17}, {1.00712, 2.21778e-17}, {0.862449, 3.60556e-17}, {0.743927, 4.99333e-17}, {0.683306, 4.99333e-17}, {0.666068, 4.62148e-17}, {0.666068, 3.4769e-17}, {0.683306, 2.03185e-17}, {0.743927, 8.29997e-18}, {0.862449, -1.85927e-18}, {1.00712, -4.29122e-18}, {1.1084, -3.71854e-18}, {1.1084, 8.29997e-18}, {0.775362, -1.85927e-18}, {0.72366, 1.20185e-17}, {0.743927, 1.89574e-17}, {0.804389, 2.58963e-17}, {0.836162, 2.21778e-17}, {0.804389, 1.52389e-17}, {0.743927, -2.49095e-19}, {0.72366, -1.57371e-17}, {0.775362, -2.58963e-17}, {0.858837, -3.28352e-17}, {0.899033, -2.42861e-17}, {0.858837, -1.57371e-17}, {0.775362, -1.20185e-17}, {0.862449, 1.85927e-18}, {1.00096, 1.22676e-17}, {1.10759, 1.57371e-17}, {1.10759, 1.20185e-17}, {1.00096, 2.07499e-18}, {0.862449, -1.21431e-17}, {0.775362, -2.58963e-17}, {0.763031, -3.60556e-17}, {0.782576, -3.99898e-17}, {0.782576, -3.61801e-17}, {0.763031, -2.58963e-17}, {1.00712, -7.36012e-17}, {1.20632, -7.36012e-17}, {1.38444, -1.809e-17}, {1.45645, 1.14003e-17}, {1.38444, 3.67072e-17}, {1.20632, 5.77396e-17}, {1.00712, 2.9984e-17}, {0.858837, 1.52389e-17}, {0.782576, -1.73761e-17}, {0.761485, -3.84085e-17}, {0.782576, -6.61641e-17}, {0.858837, -8.09093e-17}, {1.36223, -2.58963e-17}, {1.59252, 1.85927e-18}, {1.7299, 1.85927e-18}, {1.7299, 1.85927e-18}, {1.59252, -1.85927e-18}, {1.36223, -1.57371e-17}, {1.1084, -1.57371e-17}, {0.899033, -3.97741e-17}, {0.782576, -4.99333e-17}, {0.782576, -6.38111e-17}, {0.899033, -6.38111e-17}, {1.1084, -3.97741e-17}, {1.69928, -1.85927e-18}, {1.88226, 2.58963e-17}, {1.94478, 1.20185e-17}, {1.88226, 2.58963e-17}, {1.69928, 2.21778e-17}, {1.42048, 8.29997e-18}, {1.1084, -8.79816e-18}, {0.858837, -1.57371e-17}, {0.763031, -2.58963e-17}, {0.858837, -3.97741e-17}, {1.1084, -8.79816e-18}, {1.42048, -1.57371e-17}, {2.14383, 0}, {2.16623, -3.54439e-19}, {2.14383, 0}, {2.03322, 0}, {1.76522, 0}, {1.36223, 1.77219e-19}, {0.981652, 0}, {0.824264, 0}, {0.981652, 0}, {1.36223, 1.77219e-19}, {1.76522, 0}, {2.03322, 0}, {2.16623, -6.93889e-18}, {2.16623, -6.93889e-18}, {2.11997, -6.93889e-18}, {1.94478, -6.93889e-18}, {1.59252, -6.93889e-18}, {1.15758, -6.93889e-18}, {0.851575, 1.70981e-17}, {0.851575, -6.93889e-18}, {1.15758, -6.93889e-18}, {1.59252, -6.93889e-18}, {1.94478, -3.09759e-17}, {2.11997, -6.93889e-18}, {2.14383, 0}, {2.11997, 0}, {2.00496, 0}, {1.7299, 0}, {1.31866, 0}, {0.931265, 4.80741e-17}, {0.771224, 0}, {0.931265, 0}, {1.31866, 0}, {1.7299, -4.80741e-17}, {2.00496, 0}, {2.11997, 0}, {2.03322, 0}, {1.94478, 0}, {1.7299, 0}, {1.38444, 0}, {1.00096, 0}, {0.743927, 0}, {0.743927, 0}, {1.00096, 0}, {1.38444, 0}, {1.7299, 0}, {1.94478, 0}, {2.03322, 0}, {1.76522, 0}, {1.59252, 5.55112e-17}, {1.31866, 0}, {1.00096, 0}, {0.740598, 0}, {0.639427, -2.77556e-17}, {0.740598, 0}, {1.00096, 0}, {1.31866, 0}, {1.59252, -2.77556e-17}, {1.76522, 0}, {1.82256, 0}, {1.36223, 3.46945e-18}, {1.15758, 3.1225e-17}, {0.931265, 3.1225e-17}, {0.743927, 3.1225e-17}, {0.639427, 2.75065e-17}, {0.639427, 1.36287e-17}, {0.743927, 1.61018e-18}, {0.931265, -1.04083e-17}, {1.15758, -2.05676e-17}, {1.36223, -3.44454e-17}, {1.48405, -2.24269e-17}, {1.48405, -1.04083e-17}, {0.981652, 0}, {0.851575, 1.38778e-17}, {0.771224, 0}, {0.743927, 0}, {0.740598, 0}, {0.743927, -6.93889e-18}, {0.771224, 0}, {0.851575, 0}, {0.981652, 0}, {1.1084, -6.93889e-18}, {1.16145, 0}, {1.1084, 0}, {0.824264, 0}, {0.851575, 0}, {0.931265, 0}, {1.00096, 0}, {1.00096, 0}, {0.931265, 0}, {0.851575, 0}, {0.824264, 0}, {0.85665, 0}, {0.899033, 0}, {0.899033, 0}, {0.85665, 0}, {0.981652, 0}, {1.15758, 0}, {1.31866, 0}, {1.38444, 0}, {1.31866, 0}, {1.15758, 0}, {0.981652, 0}, {0.85665, 0}, {0.797429, 0}, {0.782576, 0}, {0.797429, 0}, {0.85665, 0}, {1.36223, 3.1225e-17}, {1.59252, 3.1225e-17}, {1.7299, 1.73472e-17}, {1.7299, -3.46945e-18}, {1.59252, -1.04083e-17}, {1.36223, -1.04083e-17}, {1.1084, -2.75065e-17}, {0.899033, -1.70981e-17}, {0.782576, -1.04083e-17}, {0.782576, -1.04083e-17}, {0.899033, 2.05676e-17}, {1.1084, 3.09759e-17}, {1.76522, 0}, {1.94478, 2.77556e-17}, {2.00496, 0}, {1.94478, 0}, {1.76522, 0}, {1.48405, -1.38778e-17}, {1.16145, 0}, {0.899033, 0}, {0.797429, 0}, {0.899033, -1.38778e-17}, {1.16145, 0}, {1.48405, 0}, {2.03322, 0}, {2.11997, 1.38778e-17}, {2.11997, 0}, {2.03322, 0}, {1.82256, 0}, {1.48405, -6.93889e-18}, {1.1084, 0}, {0.85665, 0}, {0.85665, 0}, {1.1084, -6.93889e-18}, {1.48405, 0}, {1.82256, 0}, {2.17701, 0}, {2.17701, 0}, {2.16623, 0}, {2.03322, 0}, {1.69928, 0}, {1.25388, 0}, {0.930725, 0}, {0.930725, 0}, {1.25388, 0}, {1.69928, 0}, {2.03322, 0}, {2.16623, 0}, {2.17701, -5.20417e-18}, {2.17945, -2.77556e-17}, {2.11997, 0}, {1.88226, 0}, {1.46388, 2.60209e-18}, {1.04306, -1.01592e-17}, {0.864823, 0}, {1.04306, 1.20185e-17}, {1.46388, 2.60209e-18}, {1.88226, 3.79148e-17}, {2.11997, 0}, {2.17945, -1.20185e-17}, {2.16623, 0}, {2.11997, 0}, {1.94478, 0}, {1.59252, 0}, {1.15758, 0}, {0.851575, 0}, {0.851575, 0}, {1.15758, 0}, {1.59252, 0}, {1.94478, 0}, {2.11997, 0}, {2.16623, 0}, {2.03322, 0}, {1.88226, 0}, {1.59252, 0}, {1.20632, 0}, {0.862449, 0}, {0.72366, 2.4037e-17}, {0.862449, 0}, {1.20632, 0}, {1.59252, 0}, {1.88226, -2.4037e-17}, {2.03322, 0}, {2.07555, 0}, {1.69928, 0}, {1.46388, 0}, {1.15758, 0}, {0.862449, 0}, {0.679431, 0}, {0.679431, -1.20185e-17}, {0.862449, 0}, {1.15758, 0}, {1.46388, 0}, {1.69928, 1.20185e-17}, {1.82256, 0}, {1.82256, 0}, {1.25388, 0}, {1.04306, 0}, {0.851575, 0}, {0.72366, 0}, {0.679431, 0}, {0.72366, 1.50231e-18}, {0.851575, 0}, {1.04306, 0}, {1.25388, 0}, {1.42048, -1.50231e-18}, {1.48405, 0}, {1.42048, 0}, {0.930725, 0}, {0.864823, 0}, {0.851575, 0}, {0.862449, 0}, {0.862449, 0}, {0.851575, 0}, {0.864823, 0}, {0.930725, 0}, {1.03162, 0}, {1.1084, 0}, {1.1084, 0}, {1.03162, 0}, {0.930725, 0}, {1.04306, 0}, {1.15758, 0}, {1.20632, 0}, {1.15758, 0}, {1.04306, 0}, {0.930725, 0}, {0.869317, 0}, {0.85665, 0}, {0.858837, 0}, {0.85665, 0}, {0.869317, 0}, {1.25388, 0}, {1.46388, 0}, {1.59252, 0}, {1.59252, 0}, {1.46388, 0}, {1.25388, 0}, {1.03162, 0}, {0.85665, 0}, {0.763031, 0}, {0.763031, 0}, {0.85665, 0}, {1.03162, 0}, {1.69928, 0}, {1.88226, 2.77556e-17}, {1.94478, 0}, {1.88226, 0}, {1.69928, 0}, {1.42048, -1.38778e-17}, {1.1084, 0}, {0.858837, 0}, {0.763031, 0}, {0.858837, -1.38778e-17}, {1.1084, 0}, {1.42048, 0}, {2.03322, 0}, {2.11997, 0}, {2.11997, 0}, {2.03322, 0}, {1.82256, 0}, {1.48405, 0}, {1.1084, 0}, {0.85665, 0}, {0.85665, 0}, {1.1084, 0}, {1.48405, 0}, {1.82256, 0}, {2.16623, 0}, {2.17945, 5.846e-18}, {2.16623, 0}, {2.07555, 0}, {1.82256, 0}, {1.42048, -2.923e-18}, {1.03162, 0}, {0.869317, 0}, {1.03162, 0}, {1.42048, -2.923e-18}, {1.82256, 0}, {2.07555, 0}}; - const std::vector > in_3 = {{-1922.64, 0}, {-1001.68, 433.439}, {-219.942, 33.69}, {-68.6663, -31.5682}, {-19.01, -3.81031}, {-0.58927, -0.511793}, {0.996488, -7.89357e-17}, {-0.58927, 0.511793}, {-19.01, 3.81031}, {-68.6663, 31.5682}, {-219.942, -33.69}, {-1001.68, -433.439}, {-1001.68, 433.439}, {-517.273, 5.9508e-14}, {-352.926, -115.37}, {-141.346, -11.6655}, {-25.4876, 7.80357}, {-0.501786, 1.37769}, {1.08582, -0.0715093}, {0.894073, 0.768163}, {-2.92014, 2.09061}, {-25.1033, -5.63667}, {-174.402, -55.7416}, {-690.893, -8.6513e-15}, {-219.942, 33.69}, {-352.926, -115.37}, {-281.447, 7.54952e-14}, {-85.5703, 25.4729}, {-13.6457, 2.44125}, {0.99011, 1.07155}, {0.910811, -0.175105}, {0.653152, -0.025503}, {0.169545, -0.652296}, {-11.7051, -3.59868}, {-72.4614, -2.89031e-14}, {-174.402, 55.7416}, {-68.6663, -31.5682}, {-141.346, -11.6655}, {-85.5703, 25.4729}, {-28.2549, -3.81917e-14}, {-9.75792, -2.71861}, {1.33155, -0.16363}, {0.66818, -0.0435261}, {0.136765, 0.00586452}, {0.71571, -0.0128863}, {1.26225, -4.03935e-14}, {-11.7051, 3.59868}, {-25.1033, 5.63667}, {-19.01, -3.81031}, {-25.4876, 7.80357}, {-13.6457, 2.44125}, {-9.75792, -2.71861}, {1.25751, 1.30778e-15}, {1.13756, -0.487619}, {0.344756, -0.0047045}, {0.187321, 0.0707209}, {0.272653, -2.48445e-14}, {0.71571, 0.0128863}, {0.169545, 0.652296}, {-2.92014, -2.09061}, {-0.58927, -0.511793}, {-0.501786, 1.37769}, {0.99011, 1.07155}, {1.33155, -0.16363}, {1.13756, -0.487619}, {0.536417, -2.98492e-14}, {0.198078, 0.0681101}, {0.245083, -9.7141e-15}, {0.187321, -0.0707209}, {0.136765, -0.00586452}, {0.653152, 0.025503}, {0.894073, -0.768163}, {0.996488, -5.0257e-15}, {1.08582, -0.0715093}, {0.910811, -0.175105}, {0.66818, -0.0435261}, {0.344756, -0.0047045}, {0.198078, 0.0681101}, {0.256446, -1.4581e-15}, {0.198078, -0.0681101}, {0.344756, 0.0047045}, {0.66818, 0.0435261}, {0.910811, 0.175105}, {1.08582, 0.0715093}, {-0.58927, 0.511793}, {0.894073, 0.768163}, {0.653152, -0.025503}, {0.136765, 0.00586452}, {0.187321, 0.0707209}, {0.245083, -9.70357e-15}, {0.198078, -0.0681101}, {0.536417, 2.82968e-14}, {1.13756, 0.487619}, {1.33155, 0.16363}, {0.99011, -1.07155}, {-0.501786, -1.37769}, {-19.01, 3.81031}, {-2.92014, 2.09061}, {0.169545, -0.652296}, {0.71571, -0.0128863}, {0.272653, 2.48445e-14}, {0.187321, -0.0707209}, {0.344756, 0.0047045}, {1.13756, 0.487619}, {1.25751, -1.30778e-15}, {-9.75792, 2.71861}, {-13.6457, -2.44125}, {-25.4876, -7.80357}, {-68.6663, 31.5682}, {-25.1033, -5.63667}, {-11.7051, -3.59868}, {1.26225, 6.30607e-14}, {0.71571, 0.0128863}, {0.136765, -0.00586452}, {0.66818, 0.0435261}, {1.33155, 0.16363}, {-9.75792, 2.71861}, {-28.2549, 2.77768e-14}, {-85.5703, -25.4729}, {-141.346, 11.6655}, {-219.942, -33.69}, {-174.402, -55.7416}, {-72.4614, -7.99361e-15}, {-11.7051, 3.59868}, {0.169545, 0.652296}, {0.653152, 0.025503}, {0.910811, 0.175105}, {0.99011, -1.07155}, {-13.6457, -2.44125}, {-85.5703, -25.4729}, {-281.447, -5.40755e-14}, {-352.926, 115.37}, {-1001.68, -433.439}, {-690.893, 1.06581e-14}, {-174.402, 55.7416}, {-25.1033, 5.63667}, {-2.92014, -2.09061}, {0.894073, -0.768163}, {1.08582, 0.0715093}, {-0.501786, -1.37769}, {-25.4876, -7.80357}, {-141.346, 11.6655}, {-352.926, 115.37}, {-517.273, -3.16493e-14}, {-1001.68, 433.439}, {-517.273, 7.54952e-14}, {-352.926, -115.37}, {-141.346, -11.6655}, {-25.4876, 7.80357}, {-0.501786, 1.37769}, {1.08582, -0.0715093}, {0.894073, 0.768163}, {-2.92014, 2.09061}, {-25.1033, -5.63667}, {-174.402, -55.7416}, {-690.893, 1.97704e-14}, {-517.273, 2.84217e-14}, {-1001.68, -433.439}, {-690.893, -4.9738e-14}, {-174.402, 55.7416}, {-25.1033, 5.63667}, {-2.92014, -2.09061}, {0.894073, -0.768163}, {1.08582, 0.0715093}, {-0.501786, -1.37769}, {-25.4876, -7.80357}, {-141.346, 11.6655}, {-352.926, 115.37}, {-352.926, -115.37}, {-690.893, -2.4841e-14}, {-352.926, 115.37}, {-100.781, 4.56302e-14}, {-39.4244, -13.5375}, {-8.79435, -1.30817}, {0.157364, -0.195195}, {0.548415, 3.66208e-14}, {0.157364, 0.195195}, {-8.79435, 1.30817}, {-39.4244, 13.5375}, {-100.781, 1.20069e-14}, {-141.346, -11.6655}, {-174.402, 55.7416}, {-100.781, 5.10703e-14}, {-85.5703, -25.4729}, {-36.1195, 2.5003e-14}, {-5.30025, 1.65834}, {0.642398, 0.553017}, {0.422501, 0.0375059}, {0.608177, 0.0924076}, {0.984075, -0.568373}, {-6.57219, -1.46426e-14}, {-39.4244, -13.5375}, {-25.4876, 7.80357}, {-25.1033, 5.63667}, {-39.4244, -13.5375}, {-36.1195, 1.14353e-14}, {-9.75792, 2.71861}, {-0.90306, 1.82384e-14}, {0.856068, 0.244528}, {0.331794, -0.0443841}, {0.197596, -0.0729406}, {0.718888, -1.86825e-14}, {0.984075, 0.568373}, {-8.79435, -1.30817}, {-0.501786, 1.37769}, {-2.92014, -2.09061}, {-8.79435, -1.30817}, {-5.30025, 1.65834}, {-0.90306, 6.65646e-14}, {1.13756, 0.487619}, {0.609244, 2.33578e-14}, {0.183862, -0.0625824}, {0.116983, -1.51613e-14}, {0.197596, 0.0729406}, {0.608177, -0.0924076}, {0.157364, -0.195195}, {1.08582, -0.0715093}, {0.894073, -0.768163}, {0.157364, -0.195195}, {0.642398, 0.553017}, {0.856068, 0.244528}, {0.609244, -1.08488e-14}, {0.198078, -0.0681101}, {0.119232, -2.82893e-14}, {0.183862, 0.0625824}, {0.331794, 0.0443841}, {0.422501, -0.0375059}, {0.548415, -1.3122e-14}, {0.894073, 0.768163}, {1.08582, 0.0715093}, {0.548415, 5.96745e-15}, {0.422501, 0.0375059}, {0.331794, -0.0443841}, {0.183862, -0.0625824}, {0.119232, -5.39905e-15}, {0.198078, 0.0681101}, {0.609244, 1.71134e-16}, {0.856068, -0.244528}, {0.642398, -0.553017}, {0.157364, 0.195195}, {-2.92014, 2.09061}, {-0.501786, -1.37769}, {0.157364, 0.195195}, {0.608177, 0.0924076}, {0.197596, -0.0729406}, {0.116983, 1.40688e-14}, {0.183862, 0.0625824}, {0.609244, -5.18358e-15}, {1.13756, -0.487619}, {-0.90306, -2.30043e-15}, {-5.30025, -1.65834}, {-8.79435, 1.30817}, {-25.1033, -5.63667}, {-25.4876, -7.80357}, {-8.79435, 1.30817}, {0.984075, -0.568373}, {0.718888, -3.72397e-14}, {0.197596, 0.0729406}, {0.331794, 0.0443841}, {0.856068, -0.244528}, {-0.90306, -2.49328e-14}, {-9.75792, -2.71861}, {-36.1195, -2.84933e-14}, {-39.4244, 13.5375}, {-174.402, -55.7416}, {-141.346, 11.6655}, {-39.4244, 13.5375}, {-6.57219, -3.10862e-15}, {0.984075, 0.568373}, {0.608177, -0.0924076}, {0.422501, -0.0375059}, {0.642398, -0.553017}, {-5.30025, -1.65834}, {-36.1195, -1.1744e-14}, {-85.5703, 25.4729}, {-100.781, -5.24575e-15}, {-690.893, -1.64069e-14}, {-352.926, 115.37}, {-100.781, 4.28486e-15}, {-39.4244, -13.5375}, {-8.79435, -1.30817}, {0.157364, -0.195195}, {0.548415, -2.47213e-15}, {0.157364, 0.195195}, {-8.79435, 1.30817}, {-39.4244, 13.5375}, {-100.781, -2.09326e-14}, {-352.926, -115.37}, {-219.942, 33.69}, {-352.926, -115.37}, {-281.447, -2.30926e-14}, {-85.5703, 25.4729}, {-13.6457, 2.44125}, {0.99011, 1.07155}, {0.910811, -0.175105}, {0.653152, -0.025503}, {0.169545, -0.652296}, {-11.7051, -3.59868}, {-72.4614, -2.45898e-14}, {-174.402, 55.7416}, {-352.926, -115.37}, {-690.893, -6.4928e-14}, {-352.926, 115.37}, {-100.781, 3.07079e-14}, {-39.4244, -13.5375}, {-8.79435, -1.30817}, {0.157364, -0.195195}, {0.548415, 3.23816e-15}, {0.157364, 0.195195}, {-8.79435, 1.30817}, {-39.4244, 13.5375}, {-100.781, 1.55451e-14}, {-281.447, -4.17444e-14}, {-352.926, 115.37}, {-219.942, -33.69}, {-174.402, -55.7416}, {-72.4614, 6.06555e-15}, {-11.7051, 3.59868}, {0.169545, 0.652296}, {0.653152, 0.025503}, {0.910811, 0.175105}, {0.99011, -1.07155}, {-13.6457, -2.44125}, {-85.5703, -25.4729}, {-85.5703, 25.4729}, {-100.781, 3.55271e-15}, {-174.402, -55.7416}, {-141.346, 11.6655}, {-39.4244, 13.5375}, {-6.57219, -3.34301e-15}, {0.984075, 0.568373}, {0.608177, -0.0924076}, {0.422501, -0.0375059}, {0.642398, -0.553017}, {-5.30025, -1.65834}, {-36.1195, -1.78338e-14}, {-13.6457, 2.44125}, {-39.4244, -13.5375}, {-72.4614, -5.77808e-15}, {-39.4244, 13.5375}, {-13.6457, -2.44125}, {-5.30025, -1.65834}, {1.10754, 3.09516e-15}, {0.438213, -0.0482423}, {0.123572, -2.49539e-14}, {0.438213, 0.0482423}, {1.10754, 2.15556e-14}, {-5.30025, 1.65834}, {0.99011, 1.07155}, {-8.79435, -1.30817}, {-11.7051, 3.59868}, {-6.57219, -5.10703e-15}, {-5.30025, -1.65834}, {1.33155, 0.16363}, {0.856068, -0.244528}, {0.234236, -8.58421e-15}, {0.175034, 0.0639427}, {0.247817, -1.38739e-14}, {0.438213, -0.0482423}, {0.642398, 0.553017}, {0.910811, -0.175105}, {0.157364, -0.195195}, {0.169545, 0.652296}, {0.984075, 0.568373}, {1.10754, 2.34291e-14}, {0.856068, -0.244528}, {0.344756, 0.0047045}, {0.183862, 0.0625824}, {0.22958, 1.50402e-14}, {0.175034, -0.0639427}, {0.123572, -1.0726e-14}, {0.422501, 0.0375059}, {0.653152, -0.025503}, {0.548415, -2.20767e-15}, {0.653152, 0.025503}, {0.608177, -0.0924076}, {0.438213, -0.0482423}, {0.234236, 2.35693e-14}, {0.183862, 0.0625824}, {0.245083, -5.32172e-15}, {0.183862, -0.0625824}, {0.234236, -1.68369e-14}, {0.438213, 0.0482423}, {0.608177, 0.0924076}, {0.169545, -0.652296}, {0.157364, 0.195195}, {0.910811, 0.175105}, {0.422501, -0.0375059}, {0.123572, 1.25992e-14}, {0.175034, 0.0639427}, {0.22958, -2.41685e-15}, {0.183862, -0.0625824}, {0.344756, -0.0047045}, {0.856068, 0.244528}, {1.10754, 1.86174e-15}, {0.984075, -0.568373}, {-11.7051, -3.59868}, {-8.79435, 1.30817}, {0.99011, -1.07155}, {0.642398, -0.553017}, {0.438213, 0.0482423}, {0.247817, 1.27324e-14}, {0.175034, -0.0639427}, {0.234236, -1.14132e-14}, {0.856068, 0.244528}, {1.33155, -0.16363}, {-5.30025, 1.65834}, {-6.57219, -1.79842e-15}, {-72.4614, 1.40308e-14}, {-39.4244, 13.5375}, {-13.6457, -2.44125}, {-5.30025, -1.65834}, {1.10754, 2.81848e-17}, {0.438213, -0.0482423}, {0.123572, 4.57785e-15}, {0.438213, 0.0482423}, {1.10754, -6.1253e-15}, {-5.30025, 1.65834}, {-13.6457, 2.44125}, {-39.4244, -13.5375}, {-174.402, 55.7416}, {-100.781, 1.86517e-14}, {-85.5703, -25.4729}, {-36.1195, 3.01981e-14}, {-5.30025, 1.65834}, {0.642398, 0.553017}, {0.422501, 0.0375059}, {0.608177, 0.0924076}, {0.984075, -0.568373}, {-6.57219, -5.76082e-15}, {-39.4244, -13.5375}, {-141.346, -11.6655}, {-68.6663, -31.5682}, {-141.346, -11.6655}, {-85.5703, 25.4729}, {-28.2549, 4.44089e-15}, {-9.75792, -2.71861}, {1.33155, -0.16363}, {0.66818, -0.0435261}, {0.136765, 0.00586452}, {0.71571, -0.0128863}, {1.26225, -1.09583e-14}, {-11.7051, 3.59868}, {-25.1033, 5.63667}, {-141.346, -11.6655}, {-174.402, 55.7416}, {-100.781, 9.10383e-15}, {-85.5703, -25.4729}, {-36.1195, -8.23997e-15}, {-5.30025, 1.65834}, {0.642398, 0.553017}, {0.422501, 0.0375059}, {0.608177, 0.0924076}, {0.984075, -0.568373}, {-6.57219, -6.49073e-15}, {-39.4244, -13.5375}, {-85.5703, 25.4729}, {-100.781, 1.24345e-14}, {-174.402, -55.7416}, {-141.346, 11.6655}, {-39.4244, 13.5375}, {-6.57219, 2.87424e-15}, {0.984075, 0.568373}, {0.608177, -0.0924076}, {0.422501, -0.0375059}, {0.642398, -0.553017}, {-5.30025, -1.65834}, {-36.1195, 3.06972e-14}, {-28.2549, -9.32587e-15}, {-85.5703, -25.4729}, {-141.346, 11.6655}, {-68.6663, 31.5682}, {-25.1033, -5.63667}, {-11.7051, -3.59868}, {1.26225, 9.24574e-15}, {0.71571, 0.0128863}, {0.136765, -0.00586452}, {0.66818, 0.0435261}, {1.33155, 0.16363}, {-9.75792, 2.71861}, {-9.75792, -2.71861}, {-36.1195, -5.77316e-15}, {-39.4244, 13.5375}, {-25.1033, -5.63667}, {-25.4876, -7.80357}, {-8.79435, 1.30817}, {0.984075, -0.568373}, {0.718888, -6.50192e-16}, {0.197596, 0.0729406}, {0.331794, 0.0443841}, {0.856068, -0.244528}, {-0.90306, 2.42655e-15}, {1.33155, -0.16363}, {-5.30025, 1.65834}, {-6.57219, 5.16254e-15}, {-11.7051, -3.59868}, {-8.79435, 1.30817}, {0.99011, -1.07155}, {0.642398, -0.553017}, {0.438213, 0.0482423}, {0.247817, -3.29037e-15}, {0.175034, -0.0639427}, {0.234236, 7.12867e-15}, {0.856068, 0.244528}, {0.66818, -0.0435261}, {0.642398, 0.553017}, {0.984075, 0.568373}, {1.26225, 6.85556e-15}, {0.984075, -0.568373}, {0.642398, -0.553017}, {0.66818, 0.0435261}, {0.331794, 0.0443841}, {0.175034, -0.0639427}, {0.110194, 2.47264e-15}, {0.175034, 0.0639427}, {0.331794, -0.0443841}, {0.136765, 0.00586452}, {0.422501, 0.0375059}, {0.608177, -0.0924076}, {0.71571, 0.0128863}, {0.718888, -7.88276e-15}, {0.438213, 0.0482423}, {0.331794, 0.0443841}, {0.187321, -0.0707209}, {0.116983, 4.30402e-15}, {0.175034, 0.0639427}, {0.247817, 1.06916e-14}, {0.197596, -0.0729406}, {0.71571, -0.0128863}, {0.608177, 0.0924076}, {0.422501, -0.0375059}, {0.136765, -0.00586452}, {0.197596, 0.0729406}, {0.247817, 1.63574e-14}, {0.175034, -0.0639427}, {0.116983, -5.83814e-16}, {0.187321, 0.0707209}, {0.331794, -0.0443841}, {0.438213, -0.0482423}, {0.718888, 5.76196e-15}, {1.26225, -5.32052e-16}, {0.984075, -0.568373}, {0.642398, -0.553017}, {0.66818, 0.0435261}, {0.331794, 0.0443841}, {0.175034, -0.0639427}, {0.110194, -5.31886e-16}, {0.175034, 0.0639427}, {0.331794, -0.0443841}, {0.66818, -0.0435261}, {0.642398, 0.553017}, {0.984075, 0.568373}, {-11.7051, 3.59868}, {-6.57219, 9.99201e-16}, {-5.30025, -1.65834}, {1.33155, 0.16363}, {0.856068, -0.244528}, {0.234236, 8.80874e-16}, {0.175034, 0.0639427}, {0.247817, 2.06115e-15}, {0.438213, -0.0482423}, {0.642398, 0.553017}, {0.99011, 1.07155}, {-8.79435, -1.30817}, {-25.1033, 5.63667}, {-39.4244, -13.5375}, {-36.1195, -5.55112e-16}, {-9.75792, 2.71861}, {-0.90306, 1.33227e-15}, {0.856068, 0.244528}, {0.331794, -0.0443841}, {0.197596, -0.0729406}, {0.718888, 1.33227e-15}, {0.984075, 0.568373}, {-8.79435, -1.30817}, {-25.4876, 7.80357}, {-19.01, -3.81031}, {-25.4876, 7.80357}, {-13.6457, 2.44125}, {-9.75792, -2.71861}, {1.25751, -4.45683e-15}, {1.13756, -0.487619}, {0.344756, -0.0047045}, {0.187321, 0.0707209}, {0.272653, -7.53357e-15}, {0.71571, 0.0128863}, {0.169545, 0.652296}, {-2.92014, -2.09061}, {-25.4876, 7.80357}, {-25.1033, 5.63667}, {-39.4244, -13.5375}, {-36.1195, -3.44169e-15}, {-9.75792, 2.71861}, {-0.90306, 4.82121e-15}, {0.856068, 0.244528}, {0.331794, -0.0443841}, {0.197596, -0.0729406}, {0.718888, -7.48575e-15}, {0.984075, 0.568373}, {-8.79435, -1.30817}, {-13.6457, 2.44125}, {-39.4244, -13.5375}, {-72.4614, -9.78397e-15}, {-39.4244, 13.5375}, {-13.6457, -2.44125}, {-5.30025, -1.65834}, {1.10754, 7.85206e-15}, {0.438213, -0.0482423}, {0.123572, -8.69677e-15}, {0.438213, 0.0482423}, {1.10754, 1.69858e-15}, {-5.30025, 1.65834}, {-9.75792, -2.71861}, {-36.1195, -1.76525e-14}, {-39.4244, 13.5375}, {-25.1033, -5.63667}, {-25.4876, -7.80357}, {-8.79435, 1.30817}, {0.984075, -0.568373}, {0.718888, -7.73968e-15}, {0.197596, 0.0729406}, {0.331794, 0.0443841}, {0.856068, -0.244528}, {-0.90306, -1.5862e-15}, {1.25751, -5.77316e-15}, {-9.75792, 2.71861}, {-13.6457, -2.44125}, {-25.4876, -7.80357}, {-19.01, 3.81031}, {-2.92014, 2.09061}, {0.169545, -0.652296}, {0.71571, -0.0128863}, {0.272653, 7.41352e-15}, {0.187321, -0.0707209}, {0.344756, 0.0047045}, {1.13756, 0.487619}, {1.13756, -0.487619}, {-0.90306, 1.44884e-14}, {-5.30025, -1.65834}, {-8.79435, 1.30817}, {-2.92014, 2.09061}, {-0.501786, -1.37769}, {0.157364, 0.195195}, {0.608177, 0.0924076}, {0.197596, -0.0729406}, {0.116983, -1.55166e-14}, {0.183862, 0.0625824}, {0.609244, -4.87742e-15}, {0.344756, -0.0047045}, {0.856068, 0.244528}, {1.10754, 1.37806e-14}, {0.984075, -0.568373}, {0.169545, -0.652296}, {0.157364, 0.195195}, {0.910811, 0.175105}, {0.422501, -0.0375059}, {0.123572, -2.23074e-14}, {0.175034, 0.0639427}, {0.22958, 7.56348e-15}, {0.183862, -0.0625824}, {0.187321, 0.0707209}, {0.331794, -0.0443841}, {0.438213, -0.0482423}, {0.718888, 2.74173e-15}, {0.71571, -0.0128863}, {0.608177, 0.0924076}, {0.422501, -0.0375059}, {0.136765, -0.00586452}, {0.197596, 0.0729406}, {0.247817, 1.79677e-14}, {0.175034, -0.0639427}, {0.116983, 2.32163e-15}, {0.272653, 1.09197e-14}, {0.197596, -0.0729406}, {0.123572, -5.44548e-15}, {0.197596, 0.0729406}, {0.272653, 1.31859e-14}, {0.197596, -0.0729406}, {0.123572, 7.19444e-15}, {0.197596, 0.0729406}, {0.272653, 2.36674e-14}, {0.197596, -0.0729406}, {0.123572, -1.17805e-14}, {0.197596, 0.0729406}, {0.71571, 0.0128863}, {0.718888, -5.54157e-15}, {0.438213, 0.0482423}, {0.331794, 0.0443841}, {0.187321, -0.0707209}, {0.116983, -2.0205e-15}, {0.175034, 0.0639427}, {0.247817, 1.79555e-14}, {0.197596, -0.0729406}, {0.136765, 0.00586452}, {0.422501, 0.0375059}, {0.608177, -0.0924076}, {0.169545, 0.652296}, {0.984075, 0.568373}, {1.10754, 2.94209e-15}, {0.856068, -0.244528}, {0.344756, 0.0047045}, {0.183862, 0.0625824}, {0.22958, -5.96334e-15}, {0.175034, -0.0639427}, {0.123572, -5.29063e-15}, {0.422501, 0.0375059}, {0.910811, -0.175105}, {0.157364, -0.195195}, {-2.92014, -2.09061}, {-8.79435, -1.30817}, {-5.30025, 1.65834}, {-0.90306, -6.27276e-15}, {1.13756, 0.487619}, {0.609244, -1.51453e-14}, {0.183862, -0.0625824}, {0.116983, -9.54253e-15}, {0.197596, 0.0729406}, {0.608177, -0.0924076}, {0.157364, -0.195195}, {-0.501786, 1.37769}, {-0.58927, -0.511793}, {-0.501786, 1.37769}, {0.99011, 1.07155}, {1.33155, -0.16363}, {1.13756, -0.487619}, {0.536417, 6.39291e-15}, {0.198078, 0.0681101}, {0.245083, -7.14518e-15}, {0.187321, -0.0707209}, {0.136765, -0.00586452}, {0.653152, 0.025503}, {0.894073, -0.768163}, {-0.501786, 1.37769}, {-2.92014, -2.09061}, {-8.79435, -1.30817}, {-5.30025, 1.65834}, {-0.90306, 2.35457e-14}, {1.13756, 0.487619}, {0.609244, -7.22771e-15}, {0.183862, -0.0625824}, {0.116983, -1.22214e-14}, {0.197596, 0.0729406}, {0.608177, -0.0924076}, {0.157364, -0.195195}, {0.99011, 1.07155}, {-8.79435, -1.30817}, {-11.7051, 3.59868}, {-6.57219, 3.69149e-15}, {-5.30025, -1.65834}, {1.33155, 0.16363}, {0.856068, -0.244528}, {0.234236, -2.03201e-14}, {0.175034, 0.0639427}, {0.247817, 4.15067e-15}, {0.438213, -0.0482423}, {0.642398, 0.553017}, {1.33155, -0.16363}, {-5.30025, 1.65834}, {-6.57219, 1.11022e-16}, {-11.7051, -3.59868}, {-8.79435, 1.30817}, {0.99011, -1.07155}, {0.642398, -0.553017}, {0.438213, 0.0482423}, {0.247817, 1.74335e-15}, {0.175034, -0.0639427}, {0.234236, 3.04986e-14}, {0.856068, 0.244528}, {1.13756, -0.487619}, {-0.90306, 9.27036e-15}, {-5.30025, -1.65834}, {-8.79435, 1.30817}, {-2.92014, 2.09061}, {-0.501786, -1.37769}, {0.157364, 0.195195}, {0.608177, 0.0924076}, {0.197596, -0.0729406}, {0.116983, 2.00811e-14}, {0.183862, 0.0625824}, {0.609244, 2.11012e-14}, {0.536417, 5.32907e-15}, {1.13756, 0.487619}, {1.33155, 0.16363}, {0.99011, -1.07155}, {-0.501786, -1.37769}, {-0.58927, 0.511793}, {0.894073, 0.768163}, {0.653152, -0.025503}, {0.136765, 0.00586452}, {0.187321, 0.0707209}, {0.245083, 2.79908e-15}, {0.198078, -0.0681101}, {0.198078, 0.0681101}, {0.609244, 7.88258e-15}, {0.856068, -0.244528}, {0.642398, -0.553017}, {0.157364, 0.195195}, {0.894073, 0.768163}, {1.08582, 0.0715093}, {0.548415, 5.03212e-15}, {0.422501, 0.0375059}, {0.331794, -0.0443841}, {0.183862, -0.0625824}, {0.119232, 2.62842e-15}, {0.245083, -6.6277e-15}, {0.183862, -0.0625824}, {0.234236, -7.99412e-15}, {0.438213, 0.0482423}, {0.608177, 0.0924076}, {0.653152, -0.025503}, {0.548415, -3.30034e-15}, {0.653152, 0.025503}, {0.608177, -0.0924076}, {0.438213, -0.0482423}, {0.234236, -1.1971e-15}, {0.183862, 0.0625824}, {0.187321, -0.0707209}, {0.116983, -1.06477e-14}, {0.175034, 0.0639427}, {0.247817, -4.66294e-15}, {0.197596, -0.0729406}, {0.136765, 0.00586452}, {0.422501, 0.0375059}, {0.608177, -0.0924076}, {0.71571, 0.0128863}, {0.718888, -5.45392e-15}, {0.438213, 0.0482423}, {0.331794, 0.0443841}, {0.136765, -0.00586452}, {0.197596, 0.0729406}, {0.247817, -1.46619e-14}, {0.175034, -0.0639427}, {0.116983, 1.57673e-14}, {0.187321, 0.0707209}, {0.331794, -0.0443841}, {0.438213, -0.0482423}, {0.718888, -6.92367e-15}, {0.71571, -0.0128863}, {0.608177, 0.0924076}, {0.422501, -0.0375059}, {0.653152, 0.025503}, {0.608177, -0.0924076}, {0.438213, -0.0482423}, {0.234236, 1.13307e-14}, {0.183862, 0.0625824}, {0.245083, 3.39358e-15}, {0.183862, -0.0625824}, {0.234236, -2.49775e-14}, {0.438213, 0.0482423}, {0.608177, 0.0924076}, {0.653152, -0.025503}, {0.548415, -7.98332e-15}, {0.894073, -0.768163}, {0.157364, -0.195195}, {0.642398, 0.553017}, {0.856068, 0.244528}, {0.609244, 9.84471e-15}, {0.198078, -0.0681101}, {0.119232, -1.08306e-14}, {0.183862, 0.0625824}, {0.331794, 0.0443841}, {0.422501, -0.0375059}, {0.548415, -1.93693e-15}, {1.08582, -0.0715093}, {0.996488, -8.65654e-15}, {1.08582, -0.0715093}, {0.910811, -0.175105}, {0.66818, -0.0435261}, {0.344756, -0.0047045}, {0.198078, 0.0681101}, {0.256446, 1.87526e-15}, {0.198078, -0.0681101}, {0.344756, 0.0047045}, {0.66818, 0.0435261}, {0.910811, 0.175105}, {1.08582, 0.0715093}, {1.08582, -0.0715093}, {0.894073, -0.768163}, {0.157364, -0.195195}, {0.642398, 0.553017}, {0.856068, 0.244528}, {0.609244, 1.10978e-14}, {0.198078, -0.0681101}, {0.119232, -8.91416e-15}, {0.183862, 0.0625824}, {0.331794, 0.0443841}, {0.422501, -0.0375059}, {0.548415, 3.58509e-15}, {0.910811, -0.175105}, {0.157364, -0.195195}, {0.169545, 0.652296}, {0.984075, 0.568373}, {1.10754, -2.44636e-15}, {0.856068, -0.244528}, {0.344756, 0.0047045}, {0.183862, 0.0625824}, {0.22958, -1.29025e-14}, {0.175034, -0.0639427}, {0.123572, 7.36999e-15}, {0.422501, 0.0375059}, {0.66818, -0.0435261}, {0.642398, 0.553017}, {0.984075, 0.568373}, {1.26225, -1.48159e-14}, {0.984075, -0.568373}, {0.642398, -0.553017}, {0.66818, 0.0435261}, {0.331794, 0.0443841}, {0.175034, -0.0639427}, {0.110194, -1.04245e-14}, {0.175034, 0.0639427}, {0.331794, -0.0443841}, {0.344756, -0.0047045}, {0.856068, 0.244528}, {1.10754, -1.46688e-14}, {0.984075, -0.568373}, {0.169545, -0.652296}, {0.157364, 0.195195}, {0.910811, 0.175105}, {0.422501, -0.0375059}, {0.123572, 1.07388e-15}, {0.175034, 0.0639427}, {0.22958, 6.57697e-16}, {0.183862, -0.0625824}, {0.198078, 0.0681101}, {0.609244, -9.21485e-15}, {0.856068, -0.244528}, {0.642398, -0.553017}, {0.157364, 0.195195}, {0.894073, 0.768163}, {1.08582, 0.0715093}, {0.548415, 1.74673e-15}, {0.422501, 0.0375059}, {0.331794, -0.0443841}, {0.183862, -0.0625824}, {0.119232, -3.30104e-15}, {0.256446, 7.05931e-16}, {0.198078, -0.0681101}, {0.344756, 0.0047045}, {0.66818, 0.0435261}, {0.910811, 0.175105}, {1.08582, 0.0715093}, {0.996488, 9.09906e-16}, {1.08582, -0.0715093}, {0.910811, -0.175105}, {0.66818, -0.0435261}, {0.344756, -0.0047045}, {0.198078, 0.0681101}, {0.198078, -0.0681101}, {0.119232, 1.58207e-15}, {0.183862, 0.0625824}, {0.331794, 0.0443841}, {0.422501, -0.0375059}, {0.548415, -1.33616e-15}, {1.08582, -0.0715093}, {0.894073, -0.768163}, {0.157364, -0.195195}, {0.642398, 0.553017}, {0.856068, 0.244528}, {0.609244, 8.54405e-15}, {0.344756, 0.0047045}, {0.183862, 0.0625824}, {0.22958, -3.6568e-15}, {0.175034, -0.0639427}, {0.123572, -3.76603e-15}, {0.422501, 0.0375059}, {0.910811, -0.175105}, {0.157364, -0.195195}, {0.169545, 0.652296}, {0.984075, 0.568373}, {1.10754, 1.63819e-14}, {0.856068, -0.244528}, {0.66818, 0.0435261}, {0.331794, 0.0443841}, {0.175034, -0.0639427}, {0.110194, 5.40016e-15}, {0.175034, 0.0639427}, {0.331794, -0.0443841}, {0.66818, -0.0435261}, {0.642398, 0.553017}, {0.984075, 0.568373}, {1.26225, 1.79667e-14}, {0.984075, -0.568373}, {0.642398, -0.553017}, {0.910811, 0.175105}, {0.422501, -0.0375059}, {0.123572, -1.23235e-14}, {0.175034, 0.0639427}, {0.22958, 1.33292e-14}, {0.183862, -0.0625824}, {0.344756, -0.0047045}, {0.856068, 0.244528}, {1.10754, 8.56991e-15}, {0.984075, -0.568373}, {0.169545, -0.652296}, {0.157364, 0.195195}, {1.08582, 0.0715093}, {0.548415, -1.18239e-14}, {0.422501, 0.0375059}, {0.331794, -0.0443841}, {0.183862, -0.0625824}, {0.119232, 1.51533e-14}, {0.198078, 0.0681101}, {0.609244, -6.48772e-15}, {0.856068, -0.244528}, {0.642398, -0.553017}, {0.157364, 0.195195}, {0.894073, 0.768163}, {-0.58927, 0.511793}, {0.894073, 0.768163}, {0.653152, -0.025503}, {0.136765, 0.00586452}, {0.187321, 0.0707209}, {0.245083, 1.57505e-14}, {0.198078, -0.0681101}, {0.536417, -9.99313e-15}, {1.13756, 0.487619}, {1.33155, 0.16363}, {0.99011, -1.07155}, {-0.501786, -1.37769}, {0.894073, 0.768163}, {1.08582, 0.0715093}, {0.548415, -7.38298e-15}, {0.422501, 0.0375059}, {0.331794, -0.0443841}, {0.183862, -0.0625824}, {0.119232, 1.81233e-14}, {0.198078, 0.0681101}, {0.609244, -1.0975e-14}, {0.856068, -0.244528}, {0.642398, -0.553017}, {0.157364, 0.195195}, {0.653152, -0.025503}, {0.548415, 1.88434e-15}, {0.653152, 0.025503}, {0.608177, -0.0924076}, {0.438213, -0.0482423}, {0.234236, 2.71911e-14}, {0.183862, 0.0625824}, {0.245083, -1.67171e-15}, {0.183862, -0.0625824}, {0.234236, -8.38368e-15}, {0.438213, 0.0482423}, {0.608177, 0.0924076}, {0.136765, 0.00586452}, {0.422501, 0.0375059}, {0.608177, -0.0924076}, {0.71571, 0.0128863}, {0.718888, 7.24348e-16}, {0.438213, 0.0482423}, {0.331794, 0.0443841}, {0.187321, -0.0707209}, {0.116983, -9.46735e-15}, {0.175034, 0.0639427}, {0.247817, 1.47624e-14}, {0.197596, -0.0729406}, {0.187321, 0.0707209}, {0.331794, -0.0443841}, {0.438213, -0.0482423}, {0.718888, -1.28977e-15}, {0.71571, -0.0128863}, {0.608177, 0.0924076}, {0.422501, -0.0375059}, {0.136765, -0.00586452}, {0.197596, 0.0729406}, {0.247817, 9.21922e-15}, {0.175034, -0.0639427}, {0.116983, 2.0981e-14}, {0.245083, 1.31681e-14}, {0.183862, -0.0625824}, {0.234236, 4.47544e-15}, {0.438213, 0.0482423}, {0.608177, 0.0924076}, {0.653152, -0.025503}, {0.548415, -5.02797e-15}, {0.653152, 0.025503}, {0.608177, -0.0924076}, {0.438213, -0.0482423}, {0.234236, 2.14729e-14}, {0.183862, 0.0625824}, {0.198078, -0.0681101}, {0.119232, 7.21645e-16}, {0.183862, 0.0625824}, {0.331794, 0.0443841}, {0.422501, -0.0375059}, {0.548415, 1.65598e-16}, {1.08582, -0.0715093}, {0.894073, -0.768163}, {0.157364, -0.195195}, {0.642398, 0.553017}, {0.856068, 0.244528}, {0.609244, 5.67284e-15}, {0.536417, -4.88498e-15}, {0.198078, 0.0681101}, {0.245083, -9.15934e-15}, {0.187321, -0.0707209}, {0.136765, -0.00586452}, {0.653152, 0.025503}, {0.894073, -0.768163}, {-0.58927, -0.511793}, {-0.501786, 1.37769}, {0.99011, 1.07155}, {1.33155, -0.16363}, {1.13756, -0.487619}, {1.13756, 0.487619}, {0.609244, -2.50355e-14}, {0.183862, -0.0625824}, {0.116983, -1.90958e-14}, {0.197596, 0.0729406}, {0.608177, -0.0924076}, {0.157364, -0.195195}, {-0.501786, 1.37769}, {-2.92014, -2.09061}, {-8.79435, -1.30817}, {-5.30025, 1.65834}, {-0.90306, -8.72338e-15}, {1.33155, 0.16363}, {0.856068, -0.244528}, {0.234236, -3.1225e-14}, {0.175034, 0.0639427}, {0.247817, -5.50801e-15}, {0.438213, -0.0482423}, {0.642398, 0.553017}, {0.99011, 1.07155}, {-8.79435, -1.30817}, {-11.7051, 3.59868}, {-6.57219, 4.93573e-16}, {-5.30025, -1.65834}, {0.99011, -1.07155}, {0.642398, -0.553017}, {0.438213, 0.0482423}, {0.247817, -1.77636e-15}, {0.175034, -0.0639427}, {0.234236, 1.59054e-14}, {0.856068, 0.244528}, {1.33155, -0.16363}, {-5.30025, 1.65834}, {-6.57219, -5.24722e-15}, {-11.7051, -3.59868}, {-8.79435, 1.30817}, {-0.501786, -1.37769}, {0.157364, 0.195195}, {0.608177, 0.0924076}, {0.197596, -0.0729406}, {0.116983, 1.53547e-14}, {0.183862, 0.0625824}, {0.609244, 8.8169e-15}, {1.13756, -0.487619}, {-0.90306, -2.36814e-14}, {-5.30025, -1.65834}, {-8.79435, 1.30817}, {-2.92014, 2.09061}, {-19.01, 3.81031}, {-2.92014, 2.09061}, {0.169545, -0.652296}, {0.71571, -0.0128863}, {0.272653, 7.53357e-15}, {0.187321, -0.0707209}, {0.344756, 0.0047045}, {1.13756, 0.487619}, {1.25751, 4.45683e-15}, {-9.75792, 2.71861}, {-13.6457, -2.44125}, {-25.4876, -7.80357}, {-2.92014, 2.09061}, {-0.501786, -1.37769}, {0.157364, 0.195195}, {0.608177, 0.0924076}, {0.197596, -0.0729406}, {0.116983, 9.80781e-15}, {0.183862, 0.0625824}, {0.609244, 1.49587e-14}, {1.13756, -0.487619}, {-0.90306, 7.06758e-15}, {-5.30025, -1.65834}, {-8.79435, 1.30817}, {0.169545, -0.652296}, {0.157364, 0.195195}, {0.910811, 0.175105}, {0.422501, -0.0375059}, {0.123572, 6.06665e-15}, {0.175034, 0.0639427}, {0.22958, 6.1993e-15}, {0.183862, -0.0625824}, {0.344756, -0.0047045}, {0.856068, 0.244528}, {1.10754, -2.28577e-15}, {0.984075, -0.568373}, {0.71571, -0.0128863}, {0.608177, 0.0924076}, {0.422501, -0.0375059}, {0.136765, -0.00586452}, {0.197596, 0.0729406}, {0.247817, -1.83171e-14}, {0.175034, -0.0639427}, {0.116983, 1.17627e-15}, {0.187321, 0.0707209}, {0.331794, -0.0443841}, {0.438213, -0.0482423}, {0.718888, 5.88753e-15}, {0.272653, -1.09197e-14}, {0.197596, -0.0729406}, {0.123572, 1.17805e-14}, {0.197596, 0.0729406}, {0.272653, -2.36674e-14}, {0.197596, -0.0729406}, {0.123572, -7.19444e-15}, {0.197596, 0.0729406}, {0.272653, -1.31859e-14}, {0.197596, -0.0729406}, {0.123572, 5.44548e-15}, {0.197596, 0.0729406}, {0.187321, -0.0707209}, {0.116983, -2.02963e-15}, {0.175034, 0.0639427}, {0.247817, -1.83777e-14}, {0.197596, -0.0729406}, {0.136765, 0.00586452}, {0.422501, 0.0375059}, {0.608177, -0.0924076}, {0.71571, 0.0128863}, {0.718888, -2.82656e-15}, {0.438213, 0.0482423}, {0.331794, 0.0443841}, {0.344756, 0.0047045}, {0.183862, 0.0625824}, {0.22958, -7.32747e-15}, {0.175034, -0.0639427}, {0.123572, 2.2981e-14}, {0.422501, 0.0375059}, {0.910811, -0.175105}, {0.157364, -0.195195}, {0.169545, 0.652296}, {0.984075, 0.568373}, {1.10754, -1.49351e-14}, {0.856068, -0.244528}, {1.13756, 0.487619}, {0.609244, 4.66294e-15}, {0.183862, -0.0625824}, {0.116983, 1.55431e-14}, {0.197596, 0.0729406}, {0.608177, -0.0924076}, {0.157364, -0.195195}, {-0.501786, 1.37769}, {-2.92014, -2.09061}, {-8.79435, -1.30817}, {-5.30025, 1.65834}, {-0.90306, -1.49364e-14}, {1.25751, 5.77316e-15}, {1.13756, -0.487619}, {0.344756, -0.0047045}, {0.187321, 0.0707209}, {0.272653, -7.41352e-15}, {0.71571, 0.0128863}, {0.169545, 0.652296}, {-2.92014, -2.09061}, {-19.01, -3.81031}, {-25.4876, 7.80357}, {-13.6457, 2.44125}, {-9.75792, -2.71861}, {-9.75792, 2.71861}, {-0.90306, -1.11022e-15}, {0.856068, 0.244528}, {0.331794, -0.0443841}, {0.197596, -0.0729406}, {0.718888, 1.06265e-14}, {0.984075, 0.568373}, {-8.79435, -1.30817}, {-25.4876, 7.80357}, {-25.1033, 5.63667}, {-39.4244, -13.5375}, {-36.1195, 1.53615e-14}, {-13.6457, -2.44125}, {-5.30025, -1.65834}, {1.10754, -3.75347e-16}, {0.438213, -0.0482423}, {0.123572, 6.59532e-15}, {0.438213, 0.0482423}, {1.10754, -9.60557e-15}, {-5.30025, 1.65834}, {-13.6457, 2.44125}, {-39.4244, -13.5375}, {-72.4614, 8.85487e-15}, {-39.4244, 13.5375}, {-25.4876, -7.80357}, {-8.79435, 1.30817}, {0.984075, -0.568373}, {0.718888, 6.66134e-15}, {0.197596, 0.0729406}, {0.331794, 0.0443841}, {0.856068, -0.244528}, {-0.90306, -3.2321e-15}, {-9.75792, -2.71861}, {-36.1195, 3.07691e-15}, {-39.4244, 13.5375}, {-25.1033, -5.63667}, {-68.6663, 31.5682}, {-25.1033, -5.63667}, {-11.7051, -3.59868}, {1.26225, 5.32907e-15}, {0.71571, 0.0128863}, {0.136765, -0.00586452}, {0.66818, 0.0435261}, {1.33155, 0.16363}, {-9.75792, 2.71861}, {-28.2549, -1.30009e-14}, {-85.5703, -25.4729}, {-141.346, 11.6655}, {-25.1033, -5.63667}, {-25.4876, -7.80357}, {-8.79435, 1.30817}, {0.984075, -0.568373}, {0.718888, 1.04645e-15}, {0.197596, 0.0729406}, {0.331794, 0.0443841}, {0.856068, -0.244528}, {-0.90306, -1.12605e-14}, {-9.75792, -2.71861}, {-36.1195, -4.7106e-15}, {-39.4244, 13.5375}, {-11.7051, -3.59868}, {-8.79435, 1.30817}, {0.99011, -1.07155}, {0.642398, -0.553017}, {0.438213, 0.0482423}, {0.247817, -2.82239e-15}, {0.175034, -0.0639427}, {0.234236, -3.56681e-15}, {0.856068, 0.244528}, {1.33155, -0.16363}, {-5.30025, 1.65834}, {-6.57219, -1.16432e-14}, {1.26225, 3.58047e-15}, {0.984075, -0.568373}, {0.642398, -0.553017}, {0.66818, 0.0435261}, {0.331794, 0.0443841}, {0.175034, -0.0639427}, {0.110194, 2.93524e-15}, {0.175034, 0.0639427}, {0.331794, -0.0443841}, {0.66818, -0.0435261}, {0.642398, 0.553017}, {0.984075, 0.568373}, {0.71571, 0.0128863}, {0.718888, -5.34729e-15}, {0.438213, 0.0482423}, {0.331794, 0.0443841}, {0.187321, -0.0707209}, {0.116983, 2.48202e-15}, {0.175034, 0.0639427}, {0.247817, -1.39069e-14}, {0.197596, -0.0729406}, {0.136765, 0.00586452}, {0.422501, 0.0375059}, {0.608177, -0.0924076}, {0.136765, -0.00586452}, {0.197596, 0.0729406}, {0.247817, -9.0622e-15}, {0.175034, -0.0639427}, {0.116983, 2.82922e-15}, {0.187321, 0.0707209}, {0.331794, -0.0443841}, {0.438213, -0.0482423}, {0.718888, 3.1417e-15}, {0.71571, -0.0128863}, {0.608177, 0.0924076}, {0.422501, -0.0375059}, {0.66818, 0.0435261}, {0.331794, 0.0443841}, {0.175034, -0.0639427}, {0.110194, 2.79023e-16}, {0.175034, 0.0639427}, {0.331794, -0.0443841}, {0.66818, -0.0435261}, {0.642398, 0.553017}, {0.984075, 0.568373}, {1.26225, -1.87546e-14}, {0.984075, -0.568373}, {0.642398, -0.553017}, {1.33155, 0.16363}, {0.856068, -0.244528}, {0.234236, -4.71845e-15}, {0.175034, 0.0639427}, {0.247817, 1.06316e-14}, {0.438213, -0.0482423}, {0.642398, 0.553017}, {0.99011, 1.07155}, {-8.79435, -1.30817}, {-11.7051, 3.59868}, {-6.57219, -1.15544e-14}, {-5.30025, -1.65834}, {-9.75792, 2.71861}, {-0.90306, -8.43769e-15}, {0.856068, 0.244528}, {0.331794, -0.0443841}, {0.197596, -0.0729406}, {0.718888, 4.79381e-15}, {0.984075, 0.568373}, {-8.79435, -1.30817}, {-25.4876, 7.80357}, {-25.1033, 5.63667}, {-39.4244, -13.5375}, {-36.1195, -2.29847e-15}, {-28.2549, 1.17684e-14}, {-9.75792, -2.71861}, {1.33155, -0.16363}, {0.66818, -0.0435261}, {0.136765, 0.00586452}, {0.71571, -0.0128863}, {1.26225, -6.27057e-15}, {-11.7051, 3.59868}, {-25.1033, 5.63667}, {-68.6663, -31.5682}, {-141.346, -11.6655}, {-85.5703, 25.4729}, {-85.5703, -25.4729}, {-36.1195, -8.88178e-15}, {-5.30025, 1.65834}, {0.642398, 0.553017}, {0.422501, 0.0375059}, {0.608177, 0.0924076}, {0.984075, -0.568373}, {-6.57219, -9.51947e-16}, {-39.4244, -13.5375}, {-141.346, -11.6655}, {-174.402, 55.7416}, {-100.781, -1.32589e-14}, {-141.346, 11.6655}, {-39.4244, 13.5375}, {-6.57219, 9.32587e-15}, {0.984075, 0.568373}, {0.608177, -0.0924076}, {0.422501, -0.0375059}, {0.642398, -0.553017}, {-5.30025, -1.65834}, {-36.1195, -1.51055e-14}, {-85.5703, 25.4729}, {-100.781, -1.03368e-14}, {-174.402, -55.7416}, {-219.942, -33.69}, {-174.402, -55.7416}, {-72.4614, 3.19744e-14}, {-11.7051, 3.59868}, {0.169545, 0.652296}, {0.653152, 0.025503}, {0.910811, 0.175105}, {0.99011, -1.07155}, {-13.6457, -2.44125}, {-85.5703, -25.4729}, {-281.447, 2.89669e-14}, {-352.926, 115.37}, {-174.402, -55.7416}, {-141.346, 11.6655}, {-39.4244, 13.5375}, {-6.57219, -4.88498e-15}, {0.984075, 0.568373}, {0.608177, -0.0924076}, {0.422501, -0.0375059}, {0.642398, -0.553017}, {-5.30025, -1.65834}, {-36.1195, -1.82141e-14}, {-85.5703, 25.4729}, {-100.781, -9.27444e-15}, {-72.4614, -4.85029e-15}, {-39.4244, 13.5375}, {-13.6457, -2.44125}, {-5.30025, -1.65834}, {1.10754, 3.15158e-15}, {0.438213, -0.0482423}, {0.123572, -1.13913e-14}, {0.438213, 0.0482423}, {1.10754, 1.54585e-14}, {-5.30025, 1.65834}, {-13.6457, 2.44125}, {-39.4244, -13.5375}, {-11.7051, 3.59868}, {-6.57219, -1.02973e-14}, {-5.30025, -1.65834}, {1.33155, 0.16363}, {0.856068, -0.244528}, {0.234236, -8.67871e-16}, {0.175034, 0.0639427}, {0.247817, -9.0156e-15}, {0.438213, -0.0482423}, {0.642398, 0.553017}, {0.99011, 1.07155}, {-8.79435, -1.30817}, {0.169545, 0.652296}, {0.984075, 0.568373}, {1.10754, -9.28424e-15}, {0.856068, -0.244528}, {0.344756, 0.0047045}, {0.183862, 0.0625824}, {0.22958, -2.66014e-15}, {0.175034, -0.0639427}, {0.123572, -1.71764e-14}, {0.422501, 0.0375059}, {0.910811, -0.175105}, {0.157364, -0.195195}, {0.653152, 0.025503}, {0.608177, -0.0924076}, {0.438213, -0.0482423}, {0.234236, 3.00826e-14}, {0.183862, 0.0625824}, {0.245083, 6.85206e-15}, {0.183862, -0.0625824}, {0.234236, -3.07568e-14}, {0.438213, 0.0482423}, {0.608177, 0.0924076}, {0.653152, -0.025503}, {0.548415, 1.36866e-15}, {0.910811, 0.175105}, {0.422501, -0.0375059}, {0.123572, 5.7454e-15}, {0.175034, 0.0639427}, {0.22958, -8.11296e-15}, {0.183862, -0.0625824}, {0.344756, -0.0047045}, {0.856068, 0.244528}, {1.10754, -2.31121e-14}, {0.984075, -0.568373}, {0.169545, -0.652296}, {0.157364, 0.195195}, {0.99011, -1.07155}, {0.642398, -0.553017}, {0.438213, 0.0482423}, {0.247817, 1.40998e-14}, {0.175034, -0.0639427}, {0.234236, 1.26193e-14}, {0.856068, 0.244528}, {1.33155, -0.16363}, {-5.30025, 1.65834}, {-6.57219, 2.68492e-14}, {-11.7051, -3.59868}, {-8.79435, 1.30817}, {-13.6457, -2.44125}, {-5.30025, -1.65834}, {1.10754, -1.79197e-14}, {0.438213, -0.0482423}, {0.123572, 2.88317e-14}, {0.438213, 0.0482423}, {1.10754, 9.5501e-15}, {-5.30025, 1.65834}, {-13.6457, 2.44125}, {-39.4244, -13.5375}, {-72.4614, -2.75686e-15}, {-39.4244, 13.5375}, {-85.5703, -25.4729}, {-36.1195, 1.73195e-14}, {-5.30025, 1.65834}, {0.642398, 0.553017}, {0.422501, 0.0375059}, {0.608177, 0.0924076}, {0.984075, -0.568373}, {-6.57219, 4.82121e-15}, {-39.4244, -13.5375}, {-141.346, -11.6655}, {-174.402, 55.7416}, {-100.781, -7.48575e-15}, {-281.447, 3.90799e-14}, {-85.5703, 25.4729}, {-13.6457, 2.44125}, {0.99011, 1.07155}, {0.910811, -0.175105}, {0.653152, -0.025503}, {0.169545, -0.652296}, {-11.7051, -3.59868}, {-72.4614, -2.1037e-14}, {-174.402, 55.7416}, {-219.942, 33.69}, {-352.926, -115.37}, {-352.926, 115.37}, {-100.781, -1.07118e-15}, {-39.4244, -13.5375}, {-8.79435, -1.30817}, {0.157364, -0.195195}, {0.548415, -1.6234e-14}, {0.157364, 0.195195}, {-8.79435, 1.30817}, {-39.4244, 13.5375}, {-100.781, -2.85409e-14}, {-352.926, -115.37}, {-690.893, 8.12484e-14}, {-1001.68, -433.439}, {-690.893, -3.19744e-14}, {-174.402, 55.7416}, {-25.1033, 5.63667}, {-2.92014, -2.09061}, {0.894073, -0.768163}, {1.08582, 0.0715093}, {-0.501786, -1.37769}, {-25.4876, -7.80357}, {-141.346, 11.6655}, {-352.926, 115.37}, {-517.273, -7.42819e-14}, {-690.893, -4.996e-15}, {-352.926, 115.37}, {-100.781, 2.12053e-14}, {-39.4244, -13.5375}, {-8.79435, -1.30817}, {0.157364, -0.195195}, {0.548415, 6.04246e-15}, {0.157364, 0.195195}, {-8.79435, 1.30817}, {-39.4244, 13.5375}, {-100.781, -6.2645e-15}, {-352.926, -115.37}, {-174.402, 55.7416}, {-100.781, -3.9968e-15}, {-85.5703, -25.4729}, {-36.1195, 1.77636e-15}, {-5.30025, 1.65834}, {0.642398, 0.553017}, {0.422501, 0.0375059}, {0.608177, 0.0924076}, {0.984075, -0.568373}, {-6.57219, -1.20726e-14}, {-39.4244, -13.5375}, {-141.346, -11.6655}, {-25.1033, 5.63667}, {-39.4244, -13.5375}, {-36.1195, 7.10543e-15}, {-9.75792, 2.71861}, {-0.90306, -6.56569e-15}, {0.856068, 0.244528}, {0.331794, -0.0443841}, {0.197596, -0.0729406}, {0.718888, 1.18948e-14}, {0.984075, 0.568373}, {-8.79435, -1.30817}, {-25.4876, 7.80357}, {-2.92014, -2.09061}, {-8.79435, -1.30817}, {-5.30025, 1.65834}, {-0.90306, 1.58207e-14}, {1.13756, 0.487619}, {0.609244, -2.48678e-15}, {0.183862, -0.0625824}, {0.116983, -2.16976e-14}, {0.197596, 0.0729406}, {0.608177, -0.0924076}, {0.157364, -0.195195}, {-0.501786, 1.37769}, {0.894073, -0.768163}, {0.157364, -0.195195}, {0.642398, 0.553017}, {0.856068, 0.244528}, {0.609244, -1.09297e-14}, {0.198078, -0.0681101}, {0.119232, -1.83728e-14}, {0.183862, 0.0625824}, {0.331794, 0.0443841}, {0.422501, -0.0375059}, {0.548415, -2.94779e-14}, {1.08582, -0.0715093}, {1.08582, 0.0715093}, {0.548415, 1.00198e-14}, {0.422501, 0.0375059}, {0.331794, -0.0443841}, {0.183862, -0.0625824}, {0.119232, 1.21228e-14}, {0.198078, 0.0681101}, {0.609244, 4.84268e-15}, {0.856068, -0.244528}, {0.642398, -0.553017}, {0.157364, 0.195195}, {0.894073, 0.768163}, {-0.501786, -1.37769}, {0.157364, 0.195195}, {0.608177, 0.0924076}, {0.197596, -0.0729406}, {0.116983, 5.39933e-14}, {0.183862, 0.0625824}, {0.609244, -1.87156e-15}, {1.13756, -0.487619}, {-0.90306, -6.7538e-14}, {-5.30025, -1.65834}, {-8.79435, 1.30817}, {-2.92014, 2.09061}, {-25.4876, -7.80357}, {-8.79435, 1.30817}, {0.984075, -0.568373}, {0.718888, 4.88498e-14}, {0.197596, 0.0729406}, {0.331794, 0.0443841}, {0.856068, -0.244528}, {-0.90306, -6.80815e-15}, {-9.75792, -2.71861}, {-36.1195, -8.78654e-15}, {-39.4244, 13.5375}, {-25.1033, -5.63667}, {-141.346, 11.6655}, {-39.4244, 13.5375}, {-6.57219, 2.26485e-14}, {0.984075, 0.568373}, {0.608177, -0.0924076}, {0.422501, -0.0375059}, {0.642398, -0.553017}, {-5.30025, -1.65834}, {-36.1195, -1.97408e-15}, {-85.5703, 25.4729}, {-100.781, -2.76711e-14}, {-174.402, -55.7416}, {-352.926, 115.37}, {-100.781, 4.53139e-15}, {-39.4244, -13.5375}, {-8.79435, -1.30817}, {0.157364, -0.195195}, {0.548415, -2.27175e-14}, {0.157364, 0.195195}, {-8.79435, 1.30817}, {-39.4244, 13.5375}, {-100.781, -5.34849e-14}, {-352.926, -115.37}, {-690.893, 7.92872e-15}, {-517.273, -5.68434e-14}, {-352.926, -115.37}, {-141.346, -11.6655}, {-25.4876, 7.80357}, {-0.501786, 1.37769}, {1.08582, -0.0715093}, {0.894073, 0.768163}, {-2.92014, 2.09061}, {-25.1033, -5.63667}, {-174.402, -55.7416}, {-690.893, 1.78665e-14}, {-1001.68, 433.439}}; - const std::vector > out_3 = {{-0.199195, -0.0194965}, {-0.419867, -0.250833}, {-1.11264, 0}, {-0.419867, 0.250833}, {-0.199195, 0.0194965}, {-0.418409, -5.00654e-18}, {-0.419867, 0.250833}, {-0.18608, 3.44375e-17}, {-0.267653, -0.0667649}, {-0.199195, 0.0194965}, {-0.267653, -0.0667649}, {-0.267653, 0.0667649}, {-0.199195, -0.0194965}, {-0.267653, 0.0667649}, {-0.18608, -1.83156e-17}, {-0.419867, -0.250833}, {-0.418409, 6.16791e-18}, {-0.418409, 1.14412e-17}, {-0.419867, 0.250833}, {-0.18608, 4.36893e-17}, {-0.267653, -0.0667649}, {-0.267653, 0.0667649}, {-0.18608, 1.64477e-17}, {-0.419867, -0.250833}, {-0.418409, -2.87836e-17}, {-0.267653, -0.0667649}, {-0.418409, -1.43756e-17}, {-0.267653, 0.0667649}, {-0.267653, -0.0667649}, {-0.418409, -9.49474e-18}, {-0.267653, 0.0667649}, {-0.199195, 0.0194965}, {-0.267653, -0.0667649}, {-0.267653, -0.0667649}, {-0.418409, -3.75741e-17}, {-0.267653, 0.0667649}, {-0.267653, 0.0667649}, {-0.199195, -0.0194965}, {-0.267653, 0.0667649}, {-0.199195, -0.0194965}, {-0.199195, 0.0194965}, {-0.267653, -0.0667649}, {-0.267653, -0.0667649}, {-0.418409, 4.70188e-17}, {-0.267653, 0.0667649}, {-0.267653, 0.0667649}, {-0.18608, -4.29872e-17}, {-0.419867, -0.250833}, {-0.418409, -1.85037e-17}, {-0.267653, -0.0667649}, {-0.418409, -2.89121e-18}, {-0.267653, 0.0667649}, {-0.267653, -0.0667649}, {-0.418409, 4.58838e-18}, {-0.267653, 0.0667649}, {-0.418409, 1.03394e-17}, {-0.419867, 0.250833}, {-0.18608, -3.28955e-17}, {-0.267653, -0.0667649}}; - const std::vector > out_3_init = {{-0.0719135, 0}, {0.15981, 0}, {0, 0}, {0.15981, 0}, {-0.0719135, 0}, {-0.0185867, 0}, {0.15981, 0}, {0.113268, 0}, {-0.0634128, 0}, {-0.0719135, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0719135, 0}, {-0.0634128, 0}, {0.113268, 0}, {0.15981, 0}, {-0.0185867, 0}, {-0.0185867, 0}, {0.15981, 0}, {0.113268, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {0.113268, 0}, {0.15981, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0719135, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0719135, 0}, {-0.0634128, 0}, {-0.0719135, 0}, {-0.0719135, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {0.113268, 0}, {0.15981, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {0.15981, 0}, {0.113268, 0}, {-0.0634128, 0}}; + const std::vector box_index + = {10, 11, 0, 1, 2, 23, 12, 13, 14, 24, 25, 131, 120, 142, 143, + 132, 133, 155, 144, 145, 146, 167, 156, 157, 158, 168, 169, 170, 287, 276, + 277, 288, 289, 300, 301, 302, 313, 314, 1451, 1440, 1570, 1571, 1582, 1583, 1572, + 1594, 1595, 1584, 1585, 1607, 1596, 1597, 1714, 1715, 1704, 1726, 1727, 1716, 1717}; + const std::vector> in_1 = { + {-0.0157932, -0}, {0.140385, 0}, {1.15637, 0}, {0.140385, 0}, {-0.0157932, -0}, {-0.00612284, -0}, + {0.140385, 0}, {0.0746255, 0}, {-0.0151924, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {-0.0151924, -0}, + {-0.0157932, -0}, {-0.0151924, -0}, {0.0746255, 0}, {0.140385, 0}, {-0.00612284, -0}, {-0.00612284, -0}, + {0.140385, 0}, {0.0746255, 0}, {-0.0151924, -0}, {-0.0151924, -0}, {0.0746255, 0}, {0.140385, 0}, + {-0.00612284, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.00612284, -0}, + {-0.0151924, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, + {-0.0151924, -0}, {-0.0157932, -0}, {-0.0151924, -0}, {-0.0157932, -0}, {-0.0157932, -0}, {-0.0151924, -0}, + {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {0.0746255, 0}, {0.140385, 0}, + {-0.00612284, -0}, {-0.0151924, -0}, {-0.00612284, -0}, {-0.0151924, -0}, {-0.0151924, -0}, {-0.00612284, -0}, + {-0.0151924, -0}, {-0.00612284, -0}, {0.140385, 0}, {0.0746255, 0}, {-0.0151924, -0}}; + const std::vector> out_1 = {{1.15637, 0}, + {0.140385, 0}, + {-0.0157932, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0157932, -0}, + {0.140385, 0}, + {0.140385, 0}, + {0.0746255, 0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.00612284, -0}, + {-0.0157932, -0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0157932, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {0.140385, 0}, + {-0.00612284, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {0.0746255, 0}, + {0.140385, 0}, + {0.0746255, 0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.00612284, -0}, + {0.0746255, 0}, + {0.140385, 0}, + {-0.00612284, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {-0.0151924, -0}, + {-0.00612284, -0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.00612284, -0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {-0.0157932, -0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {-0.00612284, -0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {-0.0157932, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0157932, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0157932, -0}, + {-0.0151924, -0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {-0.00612284, -0}, + {0.140385, 0}, + {-0.00612284, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {0.0746255, 0}, + {-0.00612284, -0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.0151924, -0}, + {-0.00612284, -0}, + {0.0746255, 0}, + {-0.0151924, -0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {-0.00612284, -0}, + {0.140385, 0}}; + const std::vector> in_2 = {{2.16277, 0}, + {2.17701, 0}, + {2.14383, 0}, + {1.92793, 0}, + {1.51507, 0}, + {1.08798, -2.4037e-17}, + {0.905261, 0}, + {1.08798, 0}, + {1.51507, 0}, + {1.92793, 2.4037e-17}, + {2.14383, 0}, + {2.17701, 0}, + {2.17701, 0}, + {2.16623, 5.55112e-17}, + {2.03322, 0}, + {1.69928, 0}, + {1.25388, 0}, + {0.930725, -2.77556e-17}, + {0.930725, 0}, + {1.25388, 0}, + {1.69928, 0}, + {2.03322, -2.77556e-17}, + {2.16623, 0}, + {2.17701, 0}, + {2.14383, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.36223, 0}, + {0.981652, 0}, + {0.824264, 0}, + {0.981652, 0}, + {1.36223, 0}, + {1.76522, 0}, + {2.03322, 0}, + {2.14383, 0}, + {2.16623, 0}, + {1.92793, 0}, + {1.69928, 0}, + {1.36223, 0}, + {1.00712, 0}, + {0.775362, 0}, + {0.775362, 1.20185e-17}, + {1.00712, 0}, + {1.36223, 0}, + {1.69928, 0}, + {1.92793, -1.20185e-17}, + {2.03322, 0}, + {2.03322, 0}, + {1.51507, 0}, + {1.25388, 0}, + {0.981652, 0}, + {0.775362, 0}, + {0.698375, 0}, + {0.775362, -4.50694e-18}, + {0.981652, 0}, + {1.25388, 0}, + {1.51507, 0}, + {1.69928, 4.50694e-18}, + {1.76522, 0}, + {1.69928, 0}, + {1.08798, 0}, + {0.930725, -2.77556e-17}, + {0.824264, 0}, + {0.775362, 0}, + {0.775362, 0}, + {0.824264, 1.38778e-17}, + {0.930725, 0}, + {1.08798, 0}, + {1.25388, 0}, + {1.36223, 1.38778e-17}, + {1.36223, 0}, + {1.25388, 0}, + {0.905261, 0}, + {0.930725, 0}, + {0.981652, 0}, + {1.00712, 0}, + {0.981652, 0}, + {0.930725, 0}, + {0.905261, 0}, + {0.930725, 0}, + {0.981652, 0}, + {1.00712, 0}, + {0.981652, 0}, + {0.930725, 0}, + {1.08798, 0}, + {1.25388, 0}, + {1.36223, 0}, + {1.36223, 0}, + {1.25388, 0}, + {1.08798, 0}, + {0.930725, 0}, + {0.824264, 0}, + {0.775362, 0}, + {0.775362, 0}, + {0.824264, 0}, + {0.930725, 0}, + {1.51507, 0}, + {1.69928, -2.77556e-17}, + {1.76522, 0}, + {1.69928, 0}, + {1.51507, 0}, + {1.25388, 1.38778e-17}, + {0.981652, 0}, + {0.775362, 0}, + {0.698375, 0}, + {0.775362, 1.38778e-17}, + {0.981652, 0}, + {1.25388, 0}, + {1.92793, 0}, + {2.03322, 0}, + {2.03322, 0}, + {1.92793, 0}, + {1.69928, 0}, + {1.36223, 0}, + {1.00712, 0}, + {0.775362, 0}, + {0.775362, 0}, + {1.00712, 0}, + {1.36223, 0}, + {1.69928, 0}, + {2.14383, 0}, + {2.16623, -3.54439e-19}, + {2.14383, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.36223, 1.77219e-19}, + {0.981652, 0}, + {0.824264, 0}, + {0.981652, 0}, + {1.36223, 1.77219e-19}, + {1.76522, 0}, + {2.03322, 0}, + {2.17701, 0}, + {2.17701, 0}, + {2.16623, 0}, + {2.03322, 0}, + {1.69928, 0}, + {1.25388, 0}, + {0.930725, 0}, + {0.930725, 0}, + {1.25388, 0}, + {1.69928, 0}, + {2.03322, 0}, + {2.16623, 0}, + {2.17701, -6.93889e-18}, + {2.16623, -6.93889e-18}, + {2.03322, -6.93889e-18}, + {1.69928, -6.93889e-18}, + {1.25388, -6.93889e-18}, + {0.930725, -3.09759e-17}, + {0.930725, -6.93889e-18}, + {1.25388, -6.93889e-18}, + {1.69928, -6.93889e-18}, + {2.03322, 1.70981e-17}, + {2.16623, -6.93889e-18}, + {2.17701, -6.93889e-18}, + {2.16623, 0}, + {2.07555, 0}, + {1.82256, 0}, + {1.42048, 0}, + {1.03162, 0}, + {0.869317, -2.4037e-17}, + {1.03162, 0}, + {1.42048, 0}, + {1.82256, 0}, + {2.07555, 2.4037e-17}, + {2.16623, 0}, + {2.17945, 0}, + {2.03322, 0}, + {1.82256, -5.55112e-17}, + {1.48405, 0}, + {1.1084, 0}, + {0.85665, 0}, + {0.85665, 2.77556e-17}, + {1.1084, 0}, + {1.48405, 0}, + {1.82256, 0}, + {2.03322, 2.77556e-17}, + {2.11997, 0}, + {2.11997, 0}, + {1.69928, 0}, + {1.42048, 0}, + {1.1084, 0}, + {0.858837, 0}, + {0.763031, 0}, + {0.858837, 0}, + {1.1084, 0}, + {1.42048, 0}, + {1.69928, 0}, + {1.88226, 0}, + {1.94478, 0}, + {1.88226, 0}, + {1.25388, 3.46945e-18}, + {1.03162, -2.42861e-17}, + {0.85665, 3.46945e-18}, + {0.763031, 3.46945e-18}, + {0.763031, 3.46945e-18}, + {0.85665, 1.73472e-17}, + {1.03162, 3.46945e-18}, + {1.25388, 3.46945e-18}, + {1.46388, 3.46945e-18}, + {1.59252, 1.73472e-17}, + {1.59252, 3.46945e-18}, + {1.46388, 3.46945e-18}, + {0.930725, -2.4037e-17}, + {0.869317, -3.79148e-17}, + {0.85665, -2.4037e-17}, + {0.858837, -1.7563e-17}, + {0.85665, -2.4037e-17}, + {0.869317, -1.70981e-17}, + {0.930725, -2.4037e-17}, + {1.04306, -2.72741e-17}, + {1.15758, -2.4037e-17}, + {1.20632, -1.70981e-17}, + {1.15758, -2.4037e-17}, + {1.04306, -2.72741e-17}, + {0.930725, 0}, + {1.03162, 0}, + {1.1084, 0}, + {1.1084, 0}, + {1.03162, 0}, + {0.930725, 0}, + {0.864823, 0}, + {0.851575, 0}, + {0.862449, 0}, + {0.862449, 0}, + {0.851575, 0}, + {0.864823, 0}, + {1.25388, -2.47509e-17}, + {1.42048, -3.86287e-17}, + {1.48405, -1.08732e-17}, + {1.42048, 3.00463e-18}, + {1.25388, 1.68824e-17}, + {1.04306, 4.78583e-17}, + {0.851575, 3.39806e-17}, + {0.72366, 2.70417e-17}, + {0.679431, 1.68824e-17}, + {0.72366, -2.15723e-19}, + {0.851575, -1.40935e-17}, + {1.04306, -2.10324e-17}, + {1.69928, 3.46945e-18}, + {1.82256, 1.73472e-17}, + {1.82256, 3.46945e-18}, + {1.69928, 3.46945e-18}, + {1.46388, 3.46945e-18}, + {1.15758, -3.46945e-18}, + {0.862449, 3.46945e-18}, + {0.679431, 3.46945e-18}, + {0.679431, 3.46945e-18}, + {0.862449, -3.46945e-18}, + {1.15758, 3.46945e-18}, + {1.46388, 3.46945e-18}, + {2.03322, 2.4037e-17}, + {2.07555, 2.5603e-17}, + {2.03322, 2.4037e-17}, + {1.88226, 2.4037e-17}, + {1.59252, -2.4037e-17}, + {1.20632, 2.32541e-17}, + {0.862449, 2.4037e-17}, + {0.72366, 2.4037e-17}, + {0.862449, 7.21111e-17}, + {1.20632, 2.32541e-17}, + {1.59252, 2.4037e-17}, + {1.88226, 2.4037e-17}, + {2.16623, 0}, + {2.16623, 0}, + {2.11997, 0}, + {1.94478, 0}, + {1.59252, 0}, + {1.15758, -4.80741e-17}, + {0.851575, 0}, + {0.851575, 0}, + {1.15758, 0}, + {1.59252, 4.80741e-17}, + {1.94478, 0}, + {2.11997, 0}, + {2.17701, -1.68824e-17}, + {2.17945, -3.07602e-17}, + {2.11997, -3.00463e-18}, + {1.88226, -3.00463e-18}, + {1.46388, 3.93426e-18}, + {1.04306, -1.31639e-17}, + {0.864823, -3.00463e-18}, + {1.04306, 9.01389e-18}, + {1.46388, 3.93426e-18}, + {1.88226, 3.49102e-17}, + {2.11997, -3.00463e-18}, + {2.17945, -1.50231e-17}, + {2.14383, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.36223, 0}, + {0.981652, 0}, + {0.824264, 2.4037e-17}, + {0.981652, 0}, + {1.36223, 0}, + {1.76522, 0}, + {2.03322, -2.4037e-17}, + {2.14383, 0}, + {2.16623, 0}, + {2.03322, -2.77556e-17}, + {1.82256, -2.77556e-17}, + {1.48405, -2.77556e-17}, + {1.1084, -2.77556e-17}, + {0.85665, -2.77556e-17}, + {0.85665, -3.71854e-18}, + {1.1084, -2.77556e-17}, + {1.48405, -2.77556e-17}, + {1.82256, -2.77556e-17}, + {2.03322, -5.17926e-17}, + {2.11997, -2.77556e-17}, + {2.11997, -2.77556e-17}, + {1.76522, 0}, + {1.48405, 0}, + {1.16145, 0}, + {0.899033, 0}, + {0.797429, 0}, + {0.899033, 9.01389e-18}, + {1.16145, 0}, + {1.48405, 0}, + {1.76522, 0}, + {1.94478, -9.01389e-18}, + {2.00496, 0}, + {1.94478, 0}, + {1.36223, 0}, + {1.1084, -2.77556e-17}, + {0.899033, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.899033, 1.38778e-17}, + {1.1084, 0}, + {1.36223, 0}, + {1.59252, 0}, + {1.7299, 1.38778e-17}, + {1.7299, 0}, + {1.59252, 0}, + {0.981652, 0}, + {0.85665, 2.77556e-17}, + {0.797429, 0}, + {0.782576, 0}, + {0.797429, 0}, + {0.85665, -1.38778e-17}, + {0.981652, 0}, + {1.15758, 0}, + {1.31866, 0}, + {1.38444, -1.38778e-17}, + {1.31866, 0}, + {1.15758, 0}, + {0.824264, 1.38778e-17}, + {0.85665, 1.38778e-17}, + {0.899033, 1.73472e-17}, + {0.899033, 6.93889e-18}, + {0.85665, 1.38778e-17}, + {0.824264, 1.68824e-17}, + {0.851575, 1.77767e-17}, + {0.931265, 2.03519e-17}, + {1.00096, 1.38778e-17}, + {1.00096, 1.08732e-17}, + {0.931265, 6.50938e-18}, + {0.851575, 1.43426e-17}, + {0.981652, 0}, + {1.1084, 0}, + {1.16145, 0}, + {1.1084, 0}, + {0.981652, 0}, + {0.851575, 0}, + {0.771224, 0}, + {0.743927, 0}, + {0.740598, 0}, + {0.743927, 0}, + {0.771224, 0}, + {0.851575, 0}, + {1.36223, 0}, + {1.48405, -6.93889e-18}, + {1.48405, 0}, + {1.36223, 0}, + {1.15758, 0}, + {0.931265, 3.46945e-18}, + {0.743927, 0}, + {0.639427, 0}, + {0.639427, 0}, + {0.743927, 3.46945e-18}, + {0.931265, 0}, + {1.15758, 0}, + {1.76522, 0}, + {1.82256, 2.93165e-18}, + {1.76522, 0}, + {1.59252, 0}, + {1.31866, 0}, + {1.00096, -4.95399e-17}, + {0.740598, 0}, + {0.639427, 0}, + {0.740598, 0}, + {1.00096, 4.66082e-17}, + {1.31866, 0}, + {1.59252, 0}, + {2.03322, 1.38778e-17}, + {2.03322, 1.38778e-17}, + {1.94478, 1.38778e-17}, + {1.7299, 1.38778e-17}, + {1.38444, 1.38778e-17}, + {1.00096, 1.38778e-17}, + {0.743927, 1.38778e-17}, + {0.743927, 1.38778e-17}, + {1.00096, 1.38778e-17}, + {1.38444, 1.38778e-17}, + {1.7299, 1.38778e-17}, + {1.94478, 1.38778e-17}, + {2.14383, 0}, + {2.11997, 0}, + {2.00496, 0}, + {1.7299, 0}, + {1.31866, 0}, + {0.931265, 4.80741e-17}, + {0.771224, 0}, + {0.931265, 0}, + {1.31866, 0}, + {1.7299, -4.80741e-17}, + {2.00496, 0}, + {2.11997, 0}, + {2.16623, 0}, + {2.11997, 0}, + {1.94478, 0}, + {1.59252, 0}, + {1.15758, 0}, + {0.851575, 0}, + {0.851575, 0}, + {1.15758, 0}, + {1.59252, 0}, + {1.94478, 0}, + {2.11997, 0}, + {2.16623, 0}, + {1.92793, 0}, + {1.69928, 0}, + {1.36223, 0}, + {1.00712, 0}, + {0.775362, 0}, + {0.775362, 0}, + {1.00712, 0}, + {1.36223, 0}, + {1.69928, 0}, + {1.92793, 0}, + {2.03322, 0}, + {2.03322, 0}, + {1.69928, -2.77556e-17}, + {1.42048, -2.77556e-17}, + {1.1084, -2.77556e-17}, + {0.858837, -2.77556e-17}, + {0.763031, -2.77556e-17}, + {0.858837, -2.77556e-17}, + {1.1084, -2.77556e-17}, + {1.42048, -2.77556e-17}, + {1.69928, -2.77556e-17}, + {1.88226, -2.77556e-17}, + {1.94478, -2.77556e-17}, + {1.88226, -2.77556e-17}, + {1.36223, 0}, + {1.1084, -2.77556e-17}, + {0.899033, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.899033, 1.38778e-17}, + {1.1084, 0}, + {1.36223, 0}, + {1.59252, 0}, + {1.7299, 1.38778e-17}, + {1.7299, 0}, + {1.59252, 0}, + {1.00712, 0}, + {0.858837, 0}, + {0.782576, 0}, + {0.761485, 0}, + {0.782576, 0}, + {0.858837, 0}, + {1.00712, 0}, + {1.20632, 0}, + {1.38444, 0}, + {1.45645, 0}, + {1.38444, 0}, + {1.20632, 0}, + {0.775362, 0}, + {0.763031, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.763031, 0}, + {0.775362, 0}, + {0.862449, 0}, + {1.00096, 0}, + {1.10759, 0}, + {1.10759, 0}, + {1.00096, 0}, + {0.862449, 0}, + {0.775362, 5.32872e-18}, + {0.858837, 8.79816e-18}, + {0.899033, 1.85927e-18}, + {0.858837, 1.85927e-18}, + {0.775362, 1.24548e-19}, + {0.72366, -1.61018e-18}, + {0.743927, 1.85927e-18}, + {0.804389, -4.14999e-18}, + {0.836162, 1.24548e-19}, + {0.804389, -1.61018e-18}, + {0.743927, 1.85927e-18}, + {0.72366, 7.86853e-18}, + {1.00712, 0}, + {1.1084, 6.93889e-18}, + {1.1084, 0}, + {1.00712, 0}, + {0.862449, 0}, + {0.743927, -3.46945e-18}, + {0.683306, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.683306, -3.46945e-18}, + {0.743927, 0}, + {0.862449, 0}, + {1.36223, 0}, + {1.42048, -2.16167e-19}, + {1.36223, 0}, + {1.20632, 0}, + {1.00096, 0}, + {0.804389, 1.08084e-19}, + {0.666068, 0}, + {0.616617, 0}, + {0.666068, 0}, + {0.804389, 1.08084e-19}, + {1.00096, 0}, + {1.20632, 0}, + {1.69928, 0}, + {1.69928, 0}, + {1.59252, 0}, + {1.38444, 0}, + {1.10759, 0}, + {0.836162, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.836162, 0}, + {1.10759, 0}, + {1.38444, 0}, + {1.59252, 0}, + {1.92793, 2.13894e-17}, + {1.88226, 2.58963e-17}, + {1.7299, 2.58963e-17}, + {1.45645, 2.58963e-17}, + {1.10759, 2.81498e-17}, + {0.804389, 2.58963e-17}, + {0.683306, 2.58963e-17}, + {0.804389, 3.19056e-17}, + {1.10759, 2.81498e-17}, + {1.45645, 2.58963e-17}, + {1.7299, 2.58963e-17}, + {1.88226, 1.9887e-17}, + {2.03322, 0}, + {1.94478, 0}, + {1.7299, 0}, + {1.38444, 0}, + {1.00096, 0}, + {0.743927, 2.4037e-17}, + {0.743927, 0}, + {1.00096, 0}, + {1.38444, 0}, + {1.7299, -2.4037e-17}, + {1.94478, 0}, + {2.03322, 0}, + {2.03322, 0}, + {1.88226, -5.55112e-17}, + {1.59252, 0}, + {1.20632, 0}, + {0.862449, 0}, + {0.72366, 2.77556e-17}, + {0.862449, 0}, + {1.20632, 0}, + {1.59252, 0}, + {1.88226, 2.77556e-17}, + {2.03322, 0}, + {2.07555, 0}, + {1.51507, 0}, + {1.25388, -5.55112e-17}, + {0.981652, 0}, + {0.775362, 0}, + {0.698375, 0}, + {0.775362, 3.22625e-17}, + {0.981652, 0}, + {1.25388, 0}, + {1.51507, 0}, + {1.69928, 2.32486e-17}, + {1.76522, 0}, + {1.69928, 0}, + {1.25388, 0}, + {1.03162, 2.77556e-17}, + {0.85665, 0}, + {0.763031, 0}, + {0.763031, 0}, + {0.85665, -1.38778e-17}, + {1.03162, 0}, + {1.25388, 0}, + {1.46388, 0}, + {1.59252, -1.38778e-17}, + {1.59252, 0}, + {1.46388, 0}, + {0.981652, 0}, + {0.85665, 0}, + {0.797429, 0}, + {0.782576, 0}, + {0.797429, 0}, + {0.85665, 0}, + {0.981652, 0}, + {1.15758, 0}, + {1.31866, 0}, + {1.38444, 0}, + {1.31866, 0}, + {1.15758, 0}, + {0.775362, 0}, + {0.763031, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.763031, 0}, + {0.775362, 0}, + {0.862449, 0}, + {1.00096, 0}, + {1.10759, 0}, + {1.10759, 0}, + {1.00096, 0}, + {0.862449, 0}, + {0.698375, 0}, + {0.763031, 0}, + {0.797429, 0}, + {0.763031, 0}, + {0.698375, 0}, + {0.679431, 7.51157e-19}, + {0.740598, 0}, + {0.836162, 0}, + {0.881874, 0}, + {0.836162, -7.51157e-19}, + {0.740598, 0}, + {0.679431, 0}, + {0.775362, 0}, + {0.85665, -8.67362e-19}, + {0.85665, 0}, + {0.775362, 0}, + {0.679431, 0}, + {0.639427, 4.33681e-19}, + {0.666068, 0}, + {0.707352, 0}, + {0.707352, 0}, + {0.666068, 4.33681e-19}, + {0.639427, 0}, + {0.679431, 0}, + {0.981652, 0}, + {1.03162, 1.40862e-18}, + {0.981652, 0}, + {0.862449, 0}, + {0.740598, 0}, + {0.666068, 1.13142e-17}, + {0.641509, 0}, + {0.63886, 0}, + {0.641509, 0}, + {0.666068, -1.27228e-17}, + {0.740598, 0}, + {0.862449, 0}, + {1.25388, 0}, + {1.25388, 0}, + {1.15758, 0}, + {1.00096, 0}, + {0.836162, 0}, + {0.707352, -2.4037e-17}, + {0.63886, 0}, + {0.63886, 0}, + {0.707352, 0}, + {0.836162, 2.4037e-17}, + {1.00096, 0}, + {1.15758, 0}, + {1.51507, 0}, + {1.46388, 0}, + {1.31866, 0}, + {1.10759, 0}, + {0.881874, 0}, + {0.707352, 0}, + {0.641509, 0}, + {0.707352, 0}, + {0.881874, 0}, + {1.10759, 0}, + {1.31866, 0}, + {1.46388, 0}, + {1.69928, 0}, + {1.59252, 0}, + {1.38444, 0}, + {1.10759, 0}, + {0.836162, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.836162, 0}, + {1.10759, 0}, + {1.38444, 0}, + {1.59252, 0}, + {1.69928, 0}, + {1.76522, 0}, + {1.59252, 0}, + {1.31866, 0}, + {1.00096, 0}, + {0.740598, 0}, + {0.639427, 0}, + {0.740598, 0}, + {1.00096, 0}, + {1.31866, 0}, + {1.59252, 0}, + {1.76522, 0}, + {1.82256, 0}, + {1.69928, 0}, + {1.46388, 0}, + {1.15758, 0}, + {0.862449, 0}, + {0.679431, 0}, + {0.679431, 1.20185e-17}, + {0.862449, 0}, + {1.15758, 0}, + {1.46388, 0}, + {1.69928, -1.20185e-17}, + {1.82256, 0}, + {1.82256, 0}, + {1.08798, 1.85927e-18}, + {0.930725, 1.85927e-18}, + {0.824264, 1.85927e-18}, + {0.775362, 1.85927e-18}, + {0.775362, 1.85927e-18}, + {0.824264, 1.85927e-18}, + {0.930725, 1.85927e-18}, + {1.08798, 1.85927e-18}, + {1.25388, 1.85927e-18}, + {1.36223, 1.85927e-18}, + {1.36223, 1.85927e-18}, + {1.25388, 1.85927e-18}, + {0.930725, 1.85927e-18}, + {0.869317, 1.85927e-18}, + {0.85665, 1.57371e-17}, + {0.858837, 1.66044e-17}, + {0.85665, 1.08732e-17}, + {0.869317, 1.38778e-17}, + {0.930725, 6.93889e-18}, + {1.04306, -5.5133e-18}, + {1.15758, -7.15462e-18}, + {1.20632, -1.01592e-17}, + {1.15758, -1.70981e-17}, + {1.04306, -5.5133e-18}, + {0.824264, -1.20185e-17}, + {0.85665, -5.07962e-18}, + {0.899033, -8.54907e-18}, + {0.899033, -1.20185e-17}, + {0.85665, -1.20185e-17}, + {0.824264, -2.14972e-17}, + {0.851575, -1.90113e-17}, + {0.931265, -1.80278e-17}, + {1.00096, -1.20185e-17}, + {1.00096, -9.47871e-18}, + {0.931265, -8.49514e-18}, + {0.851575, -6.00926e-18}, + {0.775362, -5.07962e-18}, + {0.858837, -5.07962e-18}, + {0.899033, -5.07962e-18}, + {0.858837, -1.20185e-17}, + {0.775362, -1.5488e-17}, + {0.72366, -1.5488e-17}, + {0.743927, -2.75065e-17}, + {0.804389, -1.80278e-17}, + {0.836162, -1.5488e-17}, + {0.804389, -1.5488e-17}, + {0.743927, -3.46945e-18}, + {0.72366, -6.00926e-18}, + {0.775362, -1.20185e-17}, + {0.85665, -1.28859e-17}, + {0.85665, -1.89574e-17}, + {0.775362, -1.89574e-17}, + {0.679431, -2.24269e-17}, + {0.639427, -3.40117e-17}, + {0.666068, -1.89574e-17}, + {0.707352, -2.49667e-17}, + {0.707352, -2.24269e-17}, + {0.666068, -9.97466e-18}, + {0.639427, -1.89574e-17}, + {0.679431, -1.29482e-17}, + {0.824264, -1.10889e-17}, + {0.869317, -1.71739e-17}, + {0.824264, -1.45583e-17}, + {0.72366, -2.14972e-17}, + {0.639427, -3.19056e-17}, + {0.616617, -5.29001e-17}, + {0.63886, -4.21894e-17}, + {0.655231, -4.32269e-17}, + {0.63886, -3.19056e-17}, + {0.616617, -4.82604e-18}, + {0.639427, -1.81523e-17}, + {0.72366, -1.01759e-17}, + {0.930725, -1.20185e-17}, + {0.930725, -8.54907e-18}, + {0.851575, -1.20185e-17}, + {0.743927, -1.20185e-17}, + {0.666068, -1.20185e-17}, + {0.63886, -1.37532e-17}, + {0.640151, -1.20185e-17}, + {0.640151, -1.20185e-17}, + {0.63886, -1.20185e-17}, + {0.666068, -1.37532e-17}, + {0.743927, -1.20185e-17}, + {0.851575, -1.20185e-17}, + {1.08798, 1.85927e-18}, + {1.04306, 1.85927e-18}, + {0.931265, -1.20185e-17}, + {0.804389, -1.20185e-17}, + {0.707352, -1.89574e-17}, + {0.655231, -1.89574e-17}, + {0.640151, -2.4037e-17}, + {0.655231, -2.4037e-17}, + {0.707352, -1.89574e-17}, + {0.804389, -1.89574e-17}, + {0.931265, 0}, + {1.04306, 0}, + {1.25388, -1.20185e-17}, + {1.15758, -1.89574e-17}, + {1.00096, -1.89574e-17}, + {0.836162, -1.89574e-17}, + {0.707352, -2.24269e-17}, + {0.63886, -6.93889e-18}, + {0.63886, -2.49667e-17}, + {0.707352, -2.49667e-17}, + {0.836162, -2.24269e-17}, + {1.00096, -3.09759e-17}, + {1.15758, -1.29482e-17}, + {1.25388, -1.29482e-17}, + {1.36223, -1.29482e-17}, + {1.20632, -1.29482e-17}, + {1.00096, -4.07037e-17}, + {0.804389, -1.29482e-17}, + {0.666068, 1.10889e-17}, + {0.616617, -1.29482e-17}, + {0.666068, -1.10889e-17}, + {0.804389, -1.29482e-17}, + {1.00096, -3.69852e-17}, + {1.20632, -1.29482e-17}, + {1.36223, 1.29482e-17}, + {1.42048, -1.29482e-17}, + {1.36223, -1.20185e-17}, + {1.15758, -1.20185e-17}, + {0.931265, -1.20185e-17}, + {0.743927, -1.20185e-17}, + {0.639427, -1.20185e-17}, + {0.639427, -1.20185e-17}, + {0.743927, -1.20185e-17}, + {0.931265, -1.20185e-17}, + {1.15758, -1.20185e-17}, + {1.36223, -1.20185e-17}, + {1.48405, -1.20185e-17}, + {1.48405, -1.20185e-17}, + {1.25388, 1.57371e-17}, + {1.04306, 1.57371e-17}, + {0.851575, -1.20185e-17}, + {0.72366, -1.20185e-17}, + {0.679431, -2.58963e-17}, + {0.72366, -4.09195e-17}, + {0.851575, -2.4037e-17}, + {1.04306, -3.60556e-17}, + {1.25388, -2.58963e-17}, + {1.42048, -1.08732e-17}, + {1.48405, 0}, + {1.42048, 1.20185e-17}, + {0.905261, 0}, + {0.930725, 3.46945e-18}, + {0.981652, 0}, + {1.00712, 0}, + {0.981652, 0}, + {0.930725, -1.73472e-18}, + {0.905261, 0}, + {0.930725, 0}, + {0.981652, 0}, + {1.00712, -1.73472e-18}, + {0.981652, 0}, + {0.930725, 0}, + {0.930725, 0}, + {1.03162, 0}, + {1.1084, 0}, + {1.1084, 1.73472e-18}, + {1.03162, 0}, + {0.930725, -6.00926e-18}, + {0.864823, 0}, + {0.851575, -8.67362e-19}, + {0.862449, 0}, + {0.862449, 6.00926e-18}, + {0.851575, 0}, + {0.864823, -8.67362e-19}, + {0.981652, 0}, + {1.1084, 3.46945e-18}, + {1.16145, 0}, + {1.1084, 0}, + {0.981652, 0}, + {0.851575, -1.73472e-18}, + {0.771224, 0}, + {0.743927, 0}, + {0.740598, 0}, + {0.743927, -1.73472e-18}, + {0.771224, 0}, + {0.851575, 0}, + {1.00712, 0}, + {1.1084, 6.93889e-18}, + {1.1084, 0}, + {1.00712, 0}, + {0.862449, 0}, + {0.743927, -3.46945e-18}, + {0.683306, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.683306, -3.46945e-18}, + {0.743927, 0}, + {0.862449, 0}, + {0.981652, 0}, + {1.03162, -1.38028e-18}, + {0.981652, 0}, + {0.862449, 0}, + {0.740598, 0}, + {0.666068, 6.90141e-19}, + {0.641509, 0}, + {0.63886, 0}, + {0.641509, 0}, + {0.666068, 6.90141e-19}, + {0.740598, 0}, + {0.862449, 0}, + {0.930725, -8.67362e-18}, + {0.930725, -8.67362e-18}, + {0.851575, -6.93889e-18}, + {0.743927, -6.93889e-18}, + {0.666068, 4.33681e-18}, + {0.63886, 1.63553e-17}, + {0.640151, 9.47871e-18}, + {0.640151, 1.24833e-17}, + {0.63886, 4.33681e-18}, + {0.666068, -7.68171e-18}, + {0.743927, -2.53981e-18}, + {0.851575, -5.54444e-18}, + {0.905261, 0}, + {0.864823, -1.73472e-18}, + {0.771224, 0}, + {0.683306, 0}, + {0.641509, 0}, + {0.640151, 8.67362e-19}, + {0.645833, 0}, + {0.640151, 0}, + {0.641509, 0}, + {0.683306, 8.67362e-19}, + {0.771224, 0}, + {0.864823, 0}, + {0.930725, 0}, + {0.851575, 0}, + {0.743927, 0}, + {0.666068, 0}, + {0.63886, 0}, + {0.640151, 0}, + {0.640151, 0}, + {0.63886, 0}, + {0.666068, 0}, + {0.743927, 0}, + {0.851575, 0}, + {0.930725, 0}, + {0.981652, 0}, + {0.862449, 0}, + {0.740598, 0}, + {0.666068, 0}, + {0.641509, 0}, + {0.63886, 0}, + {0.641509, 0}, + {0.666068, 0}, + {0.740598, 0}, + {0.862449, 0}, + {0.981652, 0}, + {1.03162, 0}, + {1.00712, 1.38778e-17}, + {0.862449, 1.38778e-17}, + {0.743927, 0}, + {0.683306, 0}, + {0.666068, -6.93889e-18}, + {0.666068, -1.29482e-17}, + {0.683306, -1.20185e-17}, + {0.743927, -1.20185e-17}, + {0.862449, -6.93889e-18}, + {1.00712, -9.29636e-19}, + {1.1084, 1.20185e-17}, + {1.1084, 1.20185e-17}, + {0.981652, 0}, + {0.851575, 1.38778e-17}, + {0.771224, 0}, + {0.743927, 0}, + {0.740598, 0}, + {0.743927, -6.93889e-18}, + {0.771224, 0}, + {0.851575, 0}, + {0.981652, 0}, + {1.1084, -6.93889e-18}, + {1.16145, 0}, + {1.1084, 0}, + {0.930725, 0}, + {0.864823, 0}, + {0.851575, 0}, + {0.862449, 0}, + {0.862449, 0}, + {0.851575, 0}, + {0.864823, 0}, + {0.930725, 0}, + {1.03162, 0}, + {1.1084, 0}, + {1.1084, 0}, + {1.03162, 0}, + {1.08798, 0}, + {1.25388, 0}, + {1.36223, 0}, + {1.36223, 0}, + {1.25388, 0}, + {1.08798, 1.20185e-17}, + {0.930725, 0}, + {0.824264, 0}, + {0.775362, 0}, + {0.775362, -1.20185e-17}, + {0.824264, 0}, + {0.930725, 0}, + {1.25388, 2.77556e-17}, + {1.42048, -1.38778e-17}, + {1.48405, -1.38778e-17}, + {1.42048, 0}, + {1.25388, -1.38778e-17}, + {1.04306, 6.93889e-18}, + {0.851575, 3.09759e-17}, + {0.72366, 0}, + {0.679431, -1.38778e-17}, + {0.72366, 6.93889e-18}, + {0.851575, -1.70981e-17}, + {1.04306, 0}, + {1.36223, 0}, + {1.48405, -6.93889e-18}, + {1.48405, 0}, + {1.36223, 0}, + {1.15758, 0}, + {0.931265, 3.46945e-18}, + {0.743927, 0}, + {0.639427, 0}, + {0.639427, 0}, + {0.743927, 3.46945e-18}, + {0.931265, 0}, + {1.15758, 0}, + {1.36223, 0}, + {1.42048, -2.07544e-18}, + {1.36223, 0}, + {1.20632, 0}, + {1.00096, 0}, + {0.804389, 1.03772e-18}, + {0.666068, 0}, + {0.616617, 0}, + {0.666068, 0}, + {0.804389, 1.03772e-18}, + {1.00096, 0}, + {1.20632, 0}, + {1.25388, 0}, + {1.25388, 0}, + {1.15758, 0}, + {1.00096, 0}, + {0.836162, 0}, + {0.707352, 0}, + {0.63886, 0}, + {0.63886, 0}, + {0.707352, 0}, + {0.836162, 0}, + {1.00096, 0}, + {1.15758, 0}, + {1.08798, -4.64818e-19}, + {1.04306, 0}, + {0.931265, 0}, + {0.804389, 0}, + {0.707352, 2.32409e-19}, + {0.655231, 0}, + {0.640151, -6.00926e-18}, + {0.655231, -4.50694e-18}, + {0.707352, 2.32409e-19}, + {0.804389, 0}, + {0.931265, 6.00926e-18}, + {1.04306, 4.50694e-18}, + {0.930725, 0}, + {0.851575, 0}, + {0.743927, 0}, + {0.666068, 0}, + {0.63886, 0}, + {0.640151, 0}, + {0.640151, 0}, + {0.63886, 0}, + {0.666068, 0}, + {0.743927, 0}, + {0.851575, 0}, + {0.930725, 0}, + {0.824264, 0}, + {0.72366, 0}, + {0.639427, 0}, + {0.616617, 0}, + {0.63886, 0}, + {0.655231, -1.50231e-18}, + {0.63886, 0}, + {0.616617, 0}, + {0.639427, 0}, + {0.72366, 1.50231e-18}, + {0.824264, 0}, + {0.869317, 0}, + {0.775362, 0}, + {0.679431, 0}, + {0.639427, 0}, + {0.666068, 0}, + {0.707352, 0}, + {0.707352, 0}, + {0.666068, 0}, + {0.639427, 0}, + {0.679431, 0}, + {0.775362, 0}, + {0.85665, 0}, + {0.85665, 0}, + {0.775362, -6.93889e-18}, + {0.72366, -3.46945e-18}, + {0.743927, 0}, + {0.804389, 0}, + {0.836162, 4.64818e-19}, + {0.804389, 1.73472e-18}, + {0.743927, 0}, + {0.72366, 6.00926e-18}, + {0.775362, 6.47408e-18}, + {0.858837, 1.73472e-18}, + {0.899033, 0}, + {0.858837, -6.00926e-18}, + {0.824264, 0}, + {0.851575, 0}, + {0.931265, 0}, + {1.00096, 0}, + {1.00096, 0}, + {0.931265, 0}, + {0.851575, 0}, + {0.824264, 0}, + {0.85665, 0}, + {0.899033, 0}, + {0.899033, 0}, + {0.85665, 0}, + {0.930725, 0}, + {1.04306, 0}, + {1.15758, 0}, + {1.20632, 0}, + {1.15758, 0}, + {1.04306, 0}, + {0.930725, 0}, + {0.869317, 0}, + {0.85665, 0}, + {0.858837, 0}, + {0.85665, 0}, + {0.869317, 0}, + {1.51507, 0}, + {1.69928, 0}, + {1.76522, 0}, + {1.69928, 0}, + {1.51507, 0}, + {1.25388, -2.4037e-17}, + {0.981652, 0}, + {0.775362, 0}, + {0.698375, 0}, + {0.775362, 2.4037e-17}, + {0.981652, 0}, + {1.25388, 0}, + {1.69928, 0}, + {1.82256, -1.38778e-17}, + {1.82256, 0}, + {1.69928, 0}, + {1.46388, 0}, + {1.15758, -4.11352e-17}, + {0.862449, 0}, + {0.679431, 0}, + {0.679431, 0}, + {0.862449, 5.5013e-17}, + {1.15758, 0}, + {1.46388, 0}, + {1.76522, 0}, + {1.82256, 6.6502e-18}, + {1.76522, 0}, + {1.59252, 0}, + {1.31866, 0}, + {1.00096, -3.3251e-18}, + {0.740598, 0}, + {0.639427, 0}, + {0.740598, 0}, + {1.00096, -3.3251e-18}, + {1.31866, 0}, + {1.59252, 0}, + {1.69928, 0}, + {1.69928, 0}, + {1.59252, 0}, + {1.38444, 0}, + {1.10759, 0}, + {0.836162, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.836162, 0}, + {1.10759, 0}, + {1.38444, 0}, + {1.59252, 0}, + {1.51507, 0}, + {1.46388, 0}, + {1.31866, 0}, + {1.10759, 0}, + {0.881874, 0}, + {0.707352, 0}, + {0.641509, 0}, + {0.707352, 0}, + {0.881874, 0}, + {1.10759, 0}, + {1.31866, 0}, + {1.46388, 0}, + {1.25388, 1.38778e-17}, + {1.15758, 0}, + {1.00096, 0}, + {0.836162, 2.77556e-17}, + {0.707352, -6.93889e-18}, + {0.63886, -1.20185e-17}, + {0.63886, -6.00926e-18}, + {0.707352, -2.58963e-17}, + {0.836162, -6.93889e-18}, + {1.00096, 1.20185e-17}, + {1.15758, 6.00926e-18}, + {1.25388, -1.85927e-18}, + {0.981652, 0}, + {0.862449, 1.38778e-17}, + {0.740598, 0}, + {0.666068, 0}, + {0.641509, 0}, + {0.63886, -6.93889e-18}, + {0.641509, 0}, + {0.666068, 0}, + {0.740598, 0}, + {0.862449, -6.93889e-18}, + {0.981652, 0}, + {1.03162, 0}, + {0.775362, 0}, + {0.679431, 0}, + {0.639427, 0}, + {0.666068, 0}, + {0.707352, 0}, + {0.707352, 0}, + {0.666068, 0}, + {0.639427, 0}, + {0.679431, 0}, + {0.775362, 0}, + {0.85665, 0}, + {0.85665, 0}, + {0.698375, 0}, + {0.679431, 0}, + {0.740598, 0}, + {0.836162, 0}, + {0.881874, 0}, + {0.836162, -6.00926e-18}, + {0.740598, 0}, + {0.679431, 0}, + {0.698375, 0}, + {0.763031, 6.00926e-18}, + {0.797429, 0}, + {0.763031, 0}, + {0.775362, 0}, + {0.862449, 0}, + {1.00096, 0}, + {1.10759, 0}, + {1.10759, 0}, + {1.00096, 0}, + {0.862449, 0}, + {0.775362, 0}, + {0.763031, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.763031, 0}, + {0.981652, 0}, + {1.15758, 0}, + {1.31866, 0}, + {1.38444, 0}, + {1.31866, 0}, + {1.15758, 0}, + {0.981652, 0}, + {0.85665, 0}, + {0.797429, 0}, + {0.782576, 0}, + {0.797429, 0}, + {0.85665, 0}, + {1.25388, 0}, + {1.46388, 0}, + {1.59252, 0}, + {1.59252, 0}, + {1.46388, 0}, + {1.25388, 2.4037e-17}, + {1.03162, 0}, + {0.85665, 0}, + {0.763031, 0}, + {0.763031, -2.4037e-17}, + {0.85665, 0}, + {1.03162, 0}, + {1.92793, 1.20185e-17}, + {2.03322, 2.58963e-17}, + {2.03322, 3.97741e-17}, + {1.92793, 1.20185e-17}, + {1.69928, 3.60556e-17}, + {1.36223, 5.07962e-18}, + {1.00712, -1.85927e-18}, + {0.775362, 1.20185e-17}, + {0.775362, -1.20185e-17}, + {1.00712, 5.07962e-18}, + {1.36223, -1.85927e-18}, + {1.69928, 1.20185e-17}, + {2.03322, 2.41616e-17}, + {2.07555, 2.2009e-17}, + {2.03322, 7.96727e-17}, + {1.88226, 1.35184e-16}, + {1.59252, 2.41616e-17}, + {1.20632, 2.52379e-17}, + {0.862449, 4.44801e-17}, + {0.72366, -3.13496e-17}, + {0.862449, 2.41616e-17}, + {1.20632, 2.52379e-17}, + {1.59252, -5.16681e-17}, + {1.88226, -3.13496e-17}, + {2.03322, 3.97741e-17}, + {2.03322, 2.58963e-17}, + {1.94478, 3.97741e-17}, + {1.7299, 3.97741e-17}, + {1.38444, 3.97741e-17}, + {1.00096, 4.6713e-17}, + {0.743927, 3.97741e-17}, + {0.743927, 3.97741e-17}, + {1.00096, 3.97741e-17}, + {1.38444, 4.6713e-17}, + {1.7299, 3.97741e-17}, + {1.94478, 3.97741e-17}, + {1.92793, 3.97741e-17}, + {1.88226, 1.20185e-17}, + {1.7299, 3.97741e-17}, + {1.45645, 3.97741e-17}, + {1.10759, 3.97741e-17}, + {0.804389, 5.36519e-17}, + {0.683306, 3.97741e-17}, + {0.804389, 3.97741e-17}, + {1.10759, 3.97741e-17}, + {1.45645, 5.36519e-17}, + {1.7299, 3.97741e-17}, + {1.88226, 3.97741e-17}, + {1.69928, 3.60556e-17}, + {1.59252, 6.38111e-17}, + {1.38444, 3.60556e-17}, + {1.10759, 3.60556e-17}, + {0.836162, 3.60556e-17}, + {0.666068, 2.21778e-17}, + {0.666068, 3.60556e-17}, + {0.836162, 3.60556e-17}, + {1.10759, 3.60556e-17}, + {1.38444, 2.21778e-17}, + {1.59252, 3.60556e-17}, + {1.69928, 3.60556e-17}, + {1.36223, 7.16174e-17}, + {1.20632, 8.54952e-17}, + {1.00096, 8.54952e-17}, + {0.804389, 5.77396e-17}, + {0.666068, 3.32044e-17}, + {0.616617, 1.4247e-17}, + {0.666068, -3.75829e-17}, + {0.804389, -3.19678e-17}, + {1.00096, -1.48697e-17}, + {1.20632, -9.79007e-18}, + {1.36223, 4.20398e-17}, + {1.42048, 6.41803e-17}, + {1.00712, 2.21778e-17}, + {0.862449, 3.60556e-17}, + {0.743927, 4.99333e-17}, + {0.683306, 4.99333e-17}, + {0.666068, 4.62148e-17}, + {0.666068, 3.4769e-17}, + {0.683306, 2.03185e-17}, + {0.743927, 8.29997e-18}, + {0.862449, -1.85927e-18}, + {1.00712, -4.29122e-18}, + {1.1084, -3.71854e-18}, + {1.1084, 8.29997e-18}, + {0.775362, -1.85927e-18}, + {0.72366, 1.20185e-17}, + {0.743927, 1.89574e-17}, + {0.804389, 2.58963e-17}, + {0.836162, 2.21778e-17}, + {0.804389, 1.52389e-17}, + {0.743927, -2.49095e-19}, + {0.72366, -1.57371e-17}, + {0.775362, -2.58963e-17}, + {0.858837, -3.28352e-17}, + {0.899033, -2.42861e-17}, + {0.858837, -1.57371e-17}, + {0.775362, -1.20185e-17}, + {0.862449, 1.85927e-18}, + {1.00096, 1.22676e-17}, + {1.10759, 1.57371e-17}, + {1.10759, 1.20185e-17}, + {1.00096, 2.07499e-18}, + {0.862449, -1.21431e-17}, + {0.775362, -2.58963e-17}, + {0.763031, -3.60556e-17}, + {0.782576, -3.99898e-17}, + {0.782576, -3.61801e-17}, + {0.763031, -2.58963e-17}, + {1.00712, -7.36012e-17}, + {1.20632, -7.36012e-17}, + {1.38444, -1.809e-17}, + {1.45645, 1.14003e-17}, + {1.38444, 3.67072e-17}, + {1.20632, 5.77396e-17}, + {1.00712, 2.9984e-17}, + {0.858837, 1.52389e-17}, + {0.782576, -1.73761e-17}, + {0.761485, -3.84085e-17}, + {0.782576, -6.61641e-17}, + {0.858837, -8.09093e-17}, + {1.36223, -2.58963e-17}, + {1.59252, 1.85927e-18}, + {1.7299, 1.85927e-18}, + {1.7299, 1.85927e-18}, + {1.59252, -1.85927e-18}, + {1.36223, -1.57371e-17}, + {1.1084, -1.57371e-17}, + {0.899033, -3.97741e-17}, + {0.782576, -4.99333e-17}, + {0.782576, -6.38111e-17}, + {0.899033, -6.38111e-17}, + {1.1084, -3.97741e-17}, + {1.69928, -1.85927e-18}, + {1.88226, 2.58963e-17}, + {1.94478, 1.20185e-17}, + {1.88226, 2.58963e-17}, + {1.69928, 2.21778e-17}, + {1.42048, 8.29997e-18}, + {1.1084, -8.79816e-18}, + {0.858837, -1.57371e-17}, + {0.763031, -2.58963e-17}, + {0.858837, -3.97741e-17}, + {1.1084, -8.79816e-18}, + {1.42048, -1.57371e-17}, + {2.14383, 0}, + {2.16623, -3.54439e-19}, + {2.14383, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.36223, 1.77219e-19}, + {0.981652, 0}, + {0.824264, 0}, + {0.981652, 0}, + {1.36223, 1.77219e-19}, + {1.76522, 0}, + {2.03322, 0}, + {2.16623, -6.93889e-18}, + {2.16623, -6.93889e-18}, + {2.11997, -6.93889e-18}, + {1.94478, -6.93889e-18}, + {1.59252, -6.93889e-18}, + {1.15758, -6.93889e-18}, + {0.851575, 1.70981e-17}, + {0.851575, -6.93889e-18}, + {1.15758, -6.93889e-18}, + {1.59252, -6.93889e-18}, + {1.94478, -3.09759e-17}, + {2.11997, -6.93889e-18}, + {2.14383, 0}, + {2.11997, 0}, + {2.00496, 0}, + {1.7299, 0}, + {1.31866, 0}, + {0.931265, 4.80741e-17}, + {0.771224, 0}, + {0.931265, 0}, + {1.31866, 0}, + {1.7299, -4.80741e-17}, + {2.00496, 0}, + {2.11997, 0}, + {2.03322, 0}, + {1.94478, 0}, + {1.7299, 0}, + {1.38444, 0}, + {1.00096, 0}, + {0.743927, 0}, + {0.743927, 0}, + {1.00096, 0}, + {1.38444, 0}, + {1.7299, 0}, + {1.94478, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.59252, 5.55112e-17}, + {1.31866, 0}, + {1.00096, 0}, + {0.740598, 0}, + {0.639427, -2.77556e-17}, + {0.740598, 0}, + {1.00096, 0}, + {1.31866, 0}, + {1.59252, -2.77556e-17}, + {1.76522, 0}, + {1.82256, 0}, + {1.36223, 3.46945e-18}, + {1.15758, 3.1225e-17}, + {0.931265, 3.1225e-17}, + {0.743927, 3.1225e-17}, + {0.639427, 2.75065e-17}, + {0.639427, 1.36287e-17}, + {0.743927, 1.61018e-18}, + {0.931265, -1.04083e-17}, + {1.15758, -2.05676e-17}, + {1.36223, -3.44454e-17}, + {1.48405, -2.24269e-17}, + {1.48405, -1.04083e-17}, + {0.981652, 0}, + {0.851575, 1.38778e-17}, + {0.771224, 0}, + {0.743927, 0}, + {0.740598, 0}, + {0.743927, -6.93889e-18}, + {0.771224, 0}, + {0.851575, 0}, + {0.981652, 0}, + {1.1084, -6.93889e-18}, + {1.16145, 0}, + {1.1084, 0}, + {0.824264, 0}, + {0.851575, 0}, + {0.931265, 0}, + {1.00096, 0}, + {1.00096, 0}, + {0.931265, 0}, + {0.851575, 0}, + {0.824264, 0}, + {0.85665, 0}, + {0.899033, 0}, + {0.899033, 0}, + {0.85665, 0}, + {0.981652, 0}, + {1.15758, 0}, + {1.31866, 0}, + {1.38444, 0}, + {1.31866, 0}, + {1.15758, 0}, + {0.981652, 0}, + {0.85665, 0}, + {0.797429, 0}, + {0.782576, 0}, + {0.797429, 0}, + {0.85665, 0}, + {1.36223, 3.1225e-17}, + {1.59252, 3.1225e-17}, + {1.7299, 1.73472e-17}, + {1.7299, -3.46945e-18}, + {1.59252, -1.04083e-17}, + {1.36223, -1.04083e-17}, + {1.1084, -2.75065e-17}, + {0.899033, -1.70981e-17}, + {0.782576, -1.04083e-17}, + {0.782576, -1.04083e-17}, + {0.899033, 2.05676e-17}, + {1.1084, 3.09759e-17}, + {1.76522, 0}, + {1.94478, 2.77556e-17}, + {2.00496, 0}, + {1.94478, 0}, + {1.76522, 0}, + {1.48405, -1.38778e-17}, + {1.16145, 0}, + {0.899033, 0}, + {0.797429, 0}, + {0.899033, -1.38778e-17}, + {1.16145, 0}, + {1.48405, 0}, + {2.03322, 0}, + {2.11997, 1.38778e-17}, + {2.11997, 0}, + {2.03322, 0}, + {1.82256, 0}, + {1.48405, -6.93889e-18}, + {1.1084, 0}, + {0.85665, 0}, + {0.85665, 0}, + {1.1084, -6.93889e-18}, + {1.48405, 0}, + {1.82256, 0}, + {2.17701, 0}, + {2.17701, 0}, + {2.16623, 0}, + {2.03322, 0}, + {1.69928, 0}, + {1.25388, 0}, + {0.930725, 0}, + {0.930725, 0}, + {1.25388, 0}, + {1.69928, 0}, + {2.03322, 0}, + {2.16623, 0}, + {2.17701, -5.20417e-18}, + {2.17945, -2.77556e-17}, + {2.11997, 0}, + {1.88226, 0}, + {1.46388, 2.60209e-18}, + {1.04306, -1.01592e-17}, + {0.864823, 0}, + {1.04306, 1.20185e-17}, + {1.46388, 2.60209e-18}, + {1.88226, 3.79148e-17}, + {2.11997, 0}, + {2.17945, -1.20185e-17}, + {2.16623, 0}, + {2.11997, 0}, + {1.94478, 0}, + {1.59252, 0}, + {1.15758, 0}, + {0.851575, 0}, + {0.851575, 0}, + {1.15758, 0}, + {1.59252, 0}, + {1.94478, 0}, + {2.11997, 0}, + {2.16623, 0}, + {2.03322, 0}, + {1.88226, 0}, + {1.59252, 0}, + {1.20632, 0}, + {0.862449, 0}, + {0.72366, 2.4037e-17}, + {0.862449, 0}, + {1.20632, 0}, + {1.59252, 0}, + {1.88226, -2.4037e-17}, + {2.03322, 0}, + {2.07555, 0}, + {1.69928, 0}, + {1.46388, 0}, + {1.15758, 0}, + {0.862449, 0}, + {0.679431, 0}, + {0.679431, -1.20185e-17}, + {0.862449, 0}, + {1.15758, 0}, + {1.46388, 0}, + {1.69928, 1.20185e-17}, + {1.82256, 0}, + {1.82256, 0}, + {1.25388, 0}, + {1.04306, 0}, + {0.851575, 0}, + {0.72366, 0}, + {0.679431, 0}, + {0.72366, 1.50231e-18}, + {0.851575, 0}, + {1.04306, 0}, + {1.25388, 0}, + {1.42048, -1.50231e-18}, + {1.48405, 0}, + {1.42048, 0}, + {0.930725, 0}, + {0.864823, 0}, + {0.851575, 0}, + {0.862449, 0}, + {0.862449, 0}, + {0.851575, 0}, + {0.864823, 0}, + {0.930725, 0}, + {1.03162, 0}, + {1.1084, 0}, + {1.1084, 0}, + {1.03162, 0}, + {0.930725, 0}, + {1.04306, 0}, + {1.15758, 0}, + {1.20632, 0}, + {1.15758, 0}, + {1.04306, 0}, + {0.930725, 0}, + {0.869317, 0}, + {0.85665, 0}, + {0.858837, 0}, + {0.85665, 0}, + {0.869317, 0}, + {1.25388, 0}, + {1.46388, 0}, + {1.59252, 0}, + {1.59252, 0}, + {1.46388, 0}, + {1.25388, 0}, + {1.03162, 0}, + {0.85665, 0}, + {0.763031, 0}, + {0.763031, 0}, + {0.85665, 0}, + {1.03162, 0}, + {1.69928, 0}, + {1.88226, 2.77556e-17}, + {1.94478, 0}, + {1.88226, 0}, + {1.69928, 0}, + {1.42048, -1.38778e-17}, + {1.1084, 0}, + {0.858837, 0}, + {0.763031, 0}, + {0.858837, -1.38778e-17}, + {1.1084, 0}, + {1.42048, 0}, + {2.03322, 0}, + {2.11997, 0}, + {2.11997, 0}, + {2.03322, 0}, + {1.82256, 0}, + {1.48405, 0}, + {1.1084, 0}, + {0.85665, 0}, + {0.85665, 0}, + {1.1084, 0}, + {1.48405, 0}, + {1.82256, 0}, + {2.16623, 0}, + {2.17945, 5.846e-18}, + {2.16623, 0}, + {2.07555, 0}, + {1.82256, 0}, + {1.42048, -2.923e-18}, + {1.03162, 0}, + {0.869317, 0}, + {1.03162, 0}, + {1.42048, -2.923e-18}, + {1.82256, 0}, + {2.07555, 0}}; + const std::vector> out_2 = {{2.16277, 0}, + {2.17701, 0}, + {2.14383, 0}, + {1.92793, 0}, + {1.51507, 0}, + {1.08798, -2.4037e-17}, + {0.905261, 0}, + {1.08798, 0}, + {1.51507, 0}, + {1.92793, 2.4037e-17}, + {2.14383, 0}, + {2.17701, 0}, + {2.17701, 0}, + {2.16623, 5.55112e-17}, + {2.03322, 0}, + {1.69928, 0}, + {1.25388, 0}, + {0.930725, -2.77556e-17}, + {0.930725, 0}, + {1.25388, 0}, + {1.69928, 0}, + {2.03322, -2.77556e-17}, + {2.16623, 0}, + {2.17701, 0}, + {2.14383, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.36223, 0}, + {0.981652, 0}, + {0.824264, 0}, + {0.981652, 0}, + {1.36223, 0}, + {1.76522, 0}, + {2.03322, 0}, + {2.14383, 0}, + {2.16623, 0}, + {1.92793, 0}, + {1.69928, 0}, + {1.36223, 0}, + {1.00712, 0}, + {0.775362, 0}, + {0.775362, 1.20185e-17}, + {1.00712, 0}, + {1.36223, 0}, + {1.69928, 0}, + {1.92793, -1.20185e-17}, + {2.03322, 0}, + {2.03322, 0}, + {1.51507, 0}, + {1.25388, 0}, + {0.981652, 0}, + {0.775362, 0}, + {0.698375, 0}, + {0.775362, -4.50694e-18}, + {0.981652, 0}, + {1.25388, 0}, + {1.51507, 0}, + {1.69928, 4.50694e-18}, + {1.76522, 0}, + {1.69928, 0}, + {1.08798, 0}, + {0.930725, -2.77556e-17}, + {0.824264, 0}, + {0.775362, 0}, + {0.775362, 0}, + {0.824264, 1.38778e-17}, + {0.930725, 0}, + {1.08798, 0}, + {1.25388, 0}, + {1.36223, 1.38778e-17}, + {1.36223, 0}, + {1.25388, 0}, + {0.905261, 0}, + {0.930725, 0}, + {0.981652, 0}, + {1.00712, 0}, + {0.981652, 0}, + {0.930725, 0}, + {0.905261, 0}, + {0.930725, 0}, + {0.981652, 0}, + {1.00712, 0}, + {0.981652, 0}, + {0.930725, 0}, + {1.08798, 0}, + {1.25388, 0}, + {1.36223, 0}, + {1.36223, 0}, + {1.25388, 0}, + {1.08798, 0}, + {0.930725, 0}, + {0.824264, 0}, + {0.775362, 0}, + {0.775362, 0}, + {0.824264, 0}, + {0.930725, 0}, + {1.51507, 0}, + {1.69928, -2.77556e-17}, + {1.76522, 0}, + {1.69928, 0}, + {1.51507, 0}, + {1.25388, 1.38778e-17}, + {0.981652, 0}, + {0.775362, 0}, + {0.698375, 0}, + {0.775362, 1.38778e-17}, + {0.981652, 0}, + {1.25388, 0}, + {1.92793, 0}, + {2.03322, 0}, + {2.03322, 0}, + {1.92793, 0}, + {1.69928, 0}, + {1.36223, 0}, + {1.00712, 0}, + {0.775362, 0}, + {0.775362, 0}, + {1.00712, 0}, + {1.36223, 0}, + {1.69928, 0}, + {2.14383, 0}, + {2.16623, -3.54439e-19}, + {2.14383, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.36223, 1.77219e-19}, + {0.981652, 0}, + {0.824264, 0}, + {0.981652, 0}, + {1.36223, 1.77219e-19}, + {1.76522, 0}, + {2.03322, 0}, + {2.17701, 0}, + {2.17701, 0}, + {2.16623, 0}, + {2.03322, 0}, + {1.69928, 0}, + {1.25388, 0}, + {0.930725, 0}, + {0.930725, 0}, + {1.25388, 0}, + {1.69928, 0}, + {2.03322, 0}, + {2.16623, 0}, + {2.17701, -6.93889e-18}, + {2.16623, -6.93889e-18}, + {2.03322, -6.93889e-18}, + {1.69928, -6.93889e-18}, + {1.25388, -6.93889e-18}, + {0.930725, -3.09759e-17}, + {0.930725, -6.93889e-18}, + {1.25388, -6.93889e-18}, + {1.69928, -6.93889e-18}, + {2.03322, 1.70981e-17}, + {2.16623, -6.93889e-18}, + {2.17701, -6.93889e-18}, + {2.16623, 0}, + {2.07555, 0}, + {1.82256, 0}, + {1.42048, 0}, + {1.03162, 0}, + {0.869317, -2.4037e-17}, + {1.03162, 0}, + {1.42048, 0}, + {1.82256, 0}, + {2.07555, 2.4037e-17}, + {2.16623, 0}, + {2.17945, 0}, + {2.03322, 0}, + {1.82256, -5.55112e-17}, + {1.48405, 0}, + {1.1084, 0}, + {0.85665, 0}, + {0.85665, 2.77556e-17}, + {1.1084, 0}, + {1.48405, 0}, + {1.82256, 0}, + {2.03322, 2.77556e-17}, + {2.11997, 0}, + {2.11997, 0}, + {1.69928, 0}, + {1.42048, 0}, + {1.1084, 0}, + {0.858837, 0}, + {0.763031, 0}, + {0.858837, 0}, + {1.1084, 0}, + {1.42048, 0}, + {1.69928, 0}, + {1.88226, 0}, + {1.94478, 0}, + {1.88226, 0}, + {1.25388, 3.46945e-18}, + {1.03162, -2.42861e-17}, + {0.85665, 3.46945e-18}, + {0.763031, 3.46945e-18}, + {0.763031, 3.46945e-18}, + {0.85665, 1.73472e-17}, + {1.03162, 3.46945e-18}, + {1.25388, 3.46945e-18}, + {1.46388, 3.46945e-18}, + {1.59252, 1.73472e-17}, + {1.59252, 3.46945e-18}, + {1.46388, 3.46945e-18}, + {0.930725, -2.4037e-17}, + {0.869317, -3.79148e-17}, + {0.85665, -2.4037e-17}, + {0.858837, -1.7563e-17}, + {0.85665, -2.4037e-17}, + {0.869317, -1.70981e-17}, + {0.930725, -2.4037e-17}, + {1.04306, -2.72741e-17}, + {1.15758, -2.4037e-17}, + {1.20632, -1.70981e-17}, + {1.15758, -2.4037e-17}, + {1.04306, -2.72741e-17}, + {0.930725, 0}, + {1.03162, 0}, + {1.1084, 0}, + {1.1084, 0}, + {1.03162, 0}, + {0.930725, 0}, + {0.864823, 0}, + {0.851575, 0}, + {0.862449, 0}, + {0.862449, 0}, + {0.851575, 0}, + {0.864823, 0}, + {1.25388, -2.47509e-17}, + {1.42048, -3.86287e-17}, + {1.48405, -1.08732e-17}, + {1.42048, 3.00463e-18}, + {1.25388, 1.68824e-17}, + {1.04306, 4.78583e-17}, + {0.851575, 3.39806e-17}, + {0.72366, 2.70417e-17}, + {0.679431, 1.68824e-17}, + {0.72366, -2.15723e-19}, + {0.851575, -1.40935e-17}, + {1.04306, -2.10324e-17}, + {1.69928, 3.46945e-18}, + {1.82256, 1.73472e-17}, + {1.82256, 3.46945e-18}, + {1.69928, 3.46945e-18}, + {1.46388, 3.46945e-18}, + {1.15758, -3.46945e-18}, + {0.862449, 3.46945e-18}, + {0.679431, 3.46945e-18}, + {0.679431, 3.46945e-18}, + {0.862449, -3.46945e-18}, + {1.15758, 3.46945e-18}, + {1.46388, 3.46945e-18}, + {2.03322, 2.4037e-17}, + {2.07555, 2.5603e-17}, + {2.03322, 2.4037e-17}, + {1.88226, 2.4037e-17}, + {1.59252, -2.4037e-17}, + {1.20632, 2.32541e-17}, + {0.862449, 2.4037e-17}, + {0.72366, 2.4037e-17}, + {0.862449, 7.21111e-17}, + {1.20632, 2.32541e-17}, + {1.59252, 2.4037e-17}, + {1.88226, 2.4037e-17}, + {2.16623, 0}, + {2.16623, 0}, + {2.11997, 0}, + {1.94478, 0}, + {1.59252, 0}, + {1.15758, -4.80741e-17}, + {0.851575, 0}, + {0.851575, 0}, + {1.15758, 0}, + {1.59252, 4.80741e-17}, + {1.94478, 0}, + {2.11997, 0}, + {2.17701, -1.68824e-17}, + {2.17945, -3.07602e-17}, + {2.11997, -3.00463e-18}, + {1.88226, -3.00463e-18}, + {1.46388, 3.93426e-18}, + {1.04306, -1.31639e-17}, + {0.864823, -3.00463e-18}, + {1.04306, 9.01389e-18}, + {1.46388, 3.93426e-18}, + {1.88226, 3.49102e-17}, + {2.11997, -3.00463e-18}, + {2.17945, -1.50231e-17}, + {2.14383, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.36223, 0}, + {0.981652, 0}, + {0.824264, 2.4037e-17}, + {0.981652, 0}, + {1.36223, 0}, + {1.76522, 0}, + {2.03322, -2.4037e-17}, + {2.14383, 0}, + {2.16623, 0}, + {2.03322, -2.77556e-17}, + {1.82256, -2.77556e-17}, + {1.48405, -2.77556e-17}, + {1.1084, -2.77556e-17}, + {0.85665, -2.77556e-17}, + {0.85665, -3.71854e-18}, + {1.1084, -2.77556e-17}, + {1.48405, -2.77556e-17}, + {1.82256, -2.77556e-17}, + {2.03322, -5.17926e-17}, + {2.11997, -2.77556e-17}, + {2.11997, -2.77556e-17}, + {1.76522, 0}, + {1.48405, 0}, + {1.16145, 0}, + {0.899033, 0}, + {0.797429, 0}, + {0.899033, 9.01389e-18}, + {1.16145, 0}, + {1.48405, 0}, + {1.76522, 0}, + {1.94478, -9.01389e-18}, + {2.00496, 0}, + {1.94478, 0}, + {1.36223, 0}, + {1.1084, -2.77556e-17}, + {0.899033, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.899033, 1.38778e-17}, + {1.1084, 0}, + {1.36223, 0}, + {1.59252, 0}, + {1.7299, 1.38778e-17}, + {1.7299, 0}, + {1.59252, 0}, + {0.981652, 0}, + {0.85665, 2.77556e-17}, + {0.797429, 0}, + {0.782576, 0}, + {0.797429, 0}, + {0.85665, -1.38778e-17}, + {0.981652, 0}, + {1.15758, 0}, + {1.31866, 0}, + {1.38444, -1.38778e-17}, + {1.31866, 0}, + {1.15758, 0}, + {0.824264, 1.38778e-17}, + {0.85665, 1.38778e-17}, + {0.899033, 1.73472e-17}, + {0.899033, 6.93889e-18}, + {0.85665, 1.38778e-17}, + {0.824264, 1.68824e-17}, + {0.851575, 1.77767e-17}, + {0.931265, 2.03519e-17}, + {1.00096, 1.38778e-17}, + {1.00096, 1.08732e-17}, + {0.931265, 6.50938e-18}, + {0.851575, 1.43426e-17}, + {0.981652, 0}, + {1.1084, 0}, + {1.16145, 0}, + {1.1084, 0}, + {0.981652, 0}, + {0.851575, 0}, + {0.771224, 0}, + {0.743927, 0}, + {0.740598, 0}, + {0.743927, 0}, + {0.771224, 0}, + {0.851575, 0}, + {1.36223, 0}, + {1.48405, -6.93889e-18}, + {1.48405, 0}, + {1.36223, 0}, + {1.15758, 0}, + {0.931265, 3.46945e-18}, + {0.743927, 0}, + {0.639427, 0}, + {0.639427, 0}, + {0.743927, 3.46945e-18}, + {0.931265, 0}, + {1.15758, 0}, + {1.76522, 0}, + {1.82256, 2.93165e-18}, + {1.76522, 0}, + {1.59252, 0}, + {1.31866, 0}, + {1.00096, -4.95399e-17}, + {0.740598, 0}, + {0.639427, 0}, + {0.740598, 0}, + {1.00096, 4.66082e-17}, + {1.31866, 0}, + {1.59252, 0}, + {2.03322, 1.38778e-17}, + {2.03322, 1.38778e-17}, + {1.94478, 1.38778e-17}, + {1.7299, 1.38778e-17}, + {1.38444, 1.38778e-17}, + {1.00096, 1.38778e-17}, + {0.743927, 1.38778e-17}, + {0.743927, 1.38778e-17}, + {1.00096, 1.38778e-17}, + {1.38444, 1.38778e-17}, + {1.7299, 1.38778e-17}, + {1.94478, 1.38778e-17}, + {2.14383, 0}, + {2.11997, 0}, + {2.00496, 0}, + {1.7299, 0}, + {1.31866, 0}, + {0.931265, 4.80741e-17}, + {0.771224, 0}, + {0.931265, 0}, + {1.31866, 0}, + {1.7299, -4.80741e-17}, + {2.00496, 0}, + {2.11997, 0}, + {2.16623, 0}, + {2.11997, 0}, + {1.94478, 0}, + {1.59252, 0}, + {1.15758, 0}, + {0.851575, 0}, + {0.851575, 0}, + {1.15758, 0}, + {1.59252, 0}, + {1.94478, 0}, + {2.11997, 0}, + {2.16623, 0}, + {1.92793, 0}, + {1.69928, 0}, + {1.36223, 0}, + {1.00712, 0}, + {0.775362, 0}, + {0.775362, 0}, + {1.00712, 0}, + {1.36223, 0}, + {1.69928, 0}, + {1.92793, 0}, + {2.03322, 0}, + {2.03322, 0}, + {1.69928, -2.77556e-17}, + {1.42048, -2.77556e-17}, + {1.1084, -2.77556e-17}, + {0.858837, -2.77556e-17}, + {0.763031, -2.77556e-17}, + {0.858837, -2.77556e-17}, + {1.1084, -2.77556e-17}, + {1.42048, -2.77556e-17}, + {1.69928, -2.77556e-17}, + {1.88226, -2.77556e-17}, + {1.94478, -2.77556e-17}, + {1.88226, -2.77556e-17}, + {1.36223, 0}, + {1.1084, -2.77556e-17}, + {0.899033, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.899033, 1.38778e-17}, + {1.1084, 0}, + {1.36223, 0}, + {1.59252, 0}, + {1.7299, 1.38778e-17}, + {1.7299, 0}, + {1.59252, 0}, + {1.00712, 0}, + {0.858837, 0}, + {0.782576, 0}, + {0.761485, 0}, + {0.782576, 0}, + {0.858837, 0}, + {1.00712, 0}, + {1.20632, 0}, + {1.38444, 0}, + {1.45645, 0}, + {1.38444, 0}, + {1.20632, 0}, + {0.775362, 0}, + {0.763031, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.763031, 0}, + {0.775362, 0}, + {0.862449, 0}, + {1.00096, 0}, + {1.10759, 0}, + {1.10759, 0}, + {1.00096, 0}, + {0.862449, 0}, + {0.775362, 5.32872e-18}, + {0.858837, 8.79816e-18}, + {0.899033, 1.85927e-18}, + {0.858837, 1.85927e-18}, + {0.775362, 1.24548e-19}, + {0.72366, -1.61018e-18}, + {0.743927, 1.85927e-18}, + {0.804389, -4.14999e-18}, + {0.836162, 1.24548e-19}, + {0.804389, -1.61018e-18}, + {0.743927, 1.85927e-18}, + {0.72366, 7.86853e-18}, + {1.00712, 0}, + {1.1084, 6.93889e-18}, + {1.1084, 0}, + {1.00712, 0}, + {0.862449, 0}, + {0.743927, -3.46945e-18}, + {0.683306, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.683306, -3.46945e-18}, + {0.743927, 0}, + {0.862449, 0}, + {1.36223, 0}, + {1.42048, -2.16167e-19}, + {1.36223, 0}, + {1.20632, 0}, + {1.00096, 0}, + {0.804389, 1.08084e-19}, + {0.666068, 0}, + {0.616617, 0}, + {0.666068, 0}, + {0.804389, 1.08084e-19}, + {1.00096, 0}, + {1.20632, 0}, + {1.69928, 0}, + {1.69928, 0}, + {1.59252, 0}, + {1.38444, 0}, + {1.10759, 0}, + {0.836162, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.836162, 0}, + {1.10759, 0}, + {1.38444, 0}, + {1.59252, 0}, + {1.92793, 2.13894e-17}, + {1.88226, 2.58963e-17}, + {1.7299, 2.58963e-17}, + {1.45645, 2.58963e-17}, + {1.10759, 2.81498e-17}, + {0.804389, 2.58963e-17}, + {0.683306, 2.58963e-17}, + {0.804389, 3.19056e-17}, + {1.10759, 2.81498e-17}, + {1.45645, 2.58963e-17}, + {1.7299, 2.58963e-17}, + {1.88226, 1.9887e-17}, + {2.03322, 0}, + {1.94478, 0}, + {1.7299, 0}, + {1.38444, 0}, + {1.00096, 0}, + {0.743927, 2.4037e-17}, + {0.743927, 0}, + {1.00096, 0}, + {1.38444, 0}, + {1.7299, -2.4037e-17}, + {1.94478, 0}, + {2.03322, 0}, + {2.03322, 0}, + {1.88226, -5.55112e-17}, + {1.59252, 0}, + {1.20632, 0}, + {0.862449, 0}, + {0.72366, 2.77556e-17}, + {0.862449, 0}, + {1.20632, 0}, + {1.59252, 0}, + {1.88226, 2.77556e-17}, + {2.03322, 0}, + {2.07555, 0}, + {1.51507, 0}, + {1.25388, -5.55112e-17}, + {0.981652, 0}, + {0.775362, 0}, + {0.698375, 0}, + {0.775362, 3.22625e-17}, + {0.981652, 0}, + {1.25388, 0}, + {1.51507, 0}, + {1.69928, 2.32486e-17}, + {1.76522, 0}, + {1.69928, 0}, + {1.25388, 0}, + {1.03162, 2.77556e-17}, + {0.85665, 0}, + {0.763031, 0}, + {0.763031, 0}, + {0.85665, -1.38778e-17}, + {1.03162, 0}, + {1.25388, 0}, + {1.46388, 0}, + {1.59252, -1.38778e-17}, + {1.59252, 0}, + {1.46388, 0}, + {0.981652, 0}, + {0.85665, 0}, + {0.797429, 0}, + {0.782576, 0}, + {0.797429, 0}, + {0.85665, 0}, + {0.981652, 0}, + {1.15758, 0}, + {1.31866, 0}, + {1.38444, 0}, + {1.31866, 0}, + {1.15758, 0}, + {0.775362, 0}, + {0.763031, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.763031, 0}, + {0.775362, 0}, + {0.862449, 0}, + {1.00096, 0}, + {1.10759, 0}, + {1.10759, 0}, + {1.00096, 0}, + {0.862449, 0}, + {0.698375, 0}, + {0.763031, 0}, + {0.797429, 0}, + {0.763031, 0}, + {0.698375, 0}, + {0.679431, 7.51157e-19}, + {0.740598, 0}, + {0.836162, 0}, + {0.881874, 0}, + {0.836162, -7.51157e-19}, + {0.740598, 0}, + {0.679431, 0}, + {0.775362, 0}, + {0.85665, -8.67362e-19}, + {0.85665, 0}, + {0.775362, 0}, + {0.679431, 0}, + {0.639427, 4.33681e-19}, + {0.666068, 0}, + {0.707352, 0}, + {0.707352, 0}, + {0.666068, 4.33681e-19}, + {0.639427, 0}, + {0.679431, 0}, + {0.981652, 0}, + {1.03162, 1.40862e-18}, + {0.981652, 0}, + {0.862449, 0}, + {0.740598, 0}, + {0.666068, 1.13142e-17}, + {0.641509, 0}, + {0.63886, 0}, + {0.641509, 0}, + {0.666068, -1.27228e-17}, + {0.740598, 0}, + {0.862449, 0}, + {1.25388, 0}, + {1.25388, 0}, + {1.15758, 0}, + {1.00096, 0}, + {0.836162, 0}, + {0.707352, -2.4037e-17}, + {0.63886, 0}, + {0.63886, 0}, + {0.707352, 0}, + {0.836162, 2.4037e-17}, + {1.00096, 0}, + {1.15758, 0}, + {1.51507, 0}, + {1.46388, 0}, + {1.31866, 0}, + {1.10759, 0}, + {0.881874, 0}, + {0.707352, 0}, + {0.641509, 0}, + {0.707352, 0}, + {0.881874, 0}, + {1.10759, 0}, + {1.31866, 0}, + {1.46388, 0}, + {1.69928, 0}, + {1.59252, 0}, + {1.38444, 0}, + {1.10759, 0}, + {0.836162, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.836162, 0}, + {1.10759, 0}, + {1.38444, 0}, + {1.59252, 0}, + {1.69928, 0}, + {1.76522, 0}, + {1.59252, 0}, + {1.31866, 0}, + {1.00096, 0}, + {0.740598, 0}, + {0.639427, 0}, + {0.740598, 0}, + {1.00096, 0}, + {1.31866, 0}, + {1.59252, 0}, + {1.76522, 0}, + {1.82256, 0}, + {1.69928, 0}, + {1.46388, 0}, + {1.15758, 0}, + {0.862449, 0}, + {0.679431, 0}, + {0.679431, 1.20185e-17}, + {0.862449, 0}, + {1.15758, 0}, + {1.46388, 0}, + {1.69928, -1.20185e-17}, + {1.82256, 0}, + {1.82256, 0}, + {1.08798, 1.85927e-18}, + {0.930725, 1.85927e-18}, + {0.824264, 1.85927e-18}, + {0.775362, 1.85927e-18}, + {0.775362, 1.85927e-18}, + {0.824264, 1.85927e-18}, + {0.930725, 1.85927e-18}, + {1.08798, 1.85927e-18}, + {1.25388, 1.85927e-18}, + {1.36223, 1.85927e-18}, + {1.36223, 1.85927e-18}, + {1.25388, 1.85927e-18}, + {0.930725, 1.85927e-18}, + {0.869317, 1.85927e-18}, + {0.85665, 1.57371e-17}, + {0.858837, 1.66044e-17}, + {0.85665, 1.08732e-17}, + {0.869317, 1.38778e-17}, + {0.930725, 6.93889e-18}, + {1.04306, -5.5133e-18}, + {1.15758, -7.15462e-18}, + {1.20632, -1.01592e-17}, + {1.15758, -1.70981e-17}, + {1.04306, -5.5133e-18}, + {0.824264, -1.20185e-17}, + {0.85665, -5.07962e-18}, + {0.899033, -8.54907e-18}, + {0.899033, -1.20185e-17}, + {0.85665, -1.20185e-17}, + {0.824264, -2.14972e-17}, + {0.851575, -1.90113e-17}, + {0.931265, -1.80278e-17}, + {1.00096, -1.20185e-17}, + {1.00096, -9.47871e-18}, + {0.931265, -8.49514e-18}, + {0.851575, -6.00926e-18}, + {0.775362, -5.07962e-18}, + {0.858837, -5.07962e-18}, + {0.899033, -5.07962e-18}, + {0.858837, -1.20185e-17}, + {0.775362, -1.5488e-17}, + {0.72366, -1.5488e-17}, + {0.743927, -2.75065e-17}, + {0.804389, -1.80278e-17}, + {0.836162, -1.5488e-17}, + {0.804389, -1.5488e-17}, + {0.743927, -3.46945e-18}, + {0.72366, -6.00926e-18}, + {0.775362, -1.20185e-17}, + {0.85665, -1.28859e-17}, + {0.85665, -1.89574e-17}, + {0.775362, -1.89574e-17}, + {0.679431, -2.24269e-17}, + {0.639427, -3.40117e-17}, + {0.666068, -1.89574e-17}, + {0.707352, -2.49667e-17}, + {0.707352, -2.24269e-17}, + {0.666068, -9.97466e-18}, + {0.639427, -1.89574e-17}, + {0.679431, -1.29482e-17}, + {0.824264, -1.10889e-17}, + {0.869317, -1.71739e-17}, + {0.824264, -1.45583e-17}, + {0.72366, -2.14972e-17}, + {0.639427, -3.19056e-17}, + {0.616617, -5.29001e-17}, + {0.63886, -4.21894e-17}, + {0.655231, -4.32269e-17}, + {0.63886, -3.19056e-17}, + {0.616617, -4.82604e-18}, + {0.639427, -1.81523e-17}, + {0.72366, -1.01759e-17}, + {0.930725, -1.20185e-17}, + {0.930725, -8.54907e-18}, + {0.851575, -1.20185e-17}, + {0.743927, -1.20185e-17}, + {0.666068, -1.20185e-17}, + {0.63886, -1.37532e-17}, + {0.640151, -1.20185e-17}, + {0.640151, -1.20185e-17}, + {0.63886, -1.20185e-17}, + {0.666068, -1.37532e-17}, + {0.743927, -1.20185e-17}, + {0.851575, -1.20185e-17}, + {1.08798, 1.85927e-18}, + {1.04306, 1.85927e-18}, + {0.931265, -1.20185e-17}, + {0.804389, -1.20185e-17}, + {0.707352, -1.89574e-17}, + {0.655231, -1.89574e-17}, + {0.640151, -2.4037e-17}, + {0.655231, -2.4037e-17}, + {0.707352, -1.89574e-17}, + {0.804389, -1.89574e-17}, + {0.931265, 0}, + {1.04306, 0}, + {1.25388, -1.20185e-17}, + {1.15758, -1.89574e-17}, + {1.00096, -1.89574e-17}, + {0.836162, -1.89574e-17}, + {0.707352, -2.24269e-17}, + {0.63886, -6.93889e-18}, + {0.63886, -2.49667e-17}, + {0.707352, -2.49667e-17}, + {0.836162, -2.24269e-17}, + {1.00096, -3.09759e-17}, + {1.15758, -1.29482e-17}, + {1.25388, -1.29482e-17}, + {1.36223, -1.29482e-17}, + {1.20632, -1.29482e-17}, + {1.00096, -4.07037e-17}, + {0.804389, -1.29482e-17}, + {0.666068, 1.10889e-17}, + {0.616617, -1.29482e-17}, + {0.666068, -1.10889e-17}, + {0.804389, -1.29482e-17}, + {1.00096, -3.69852e-17}, + {1.20632, -1.29482e-17}, + {1.36223, 1.29482e-17}, + {1.42048, -1.29482e-17}, + {1.36223, -1.20185e-17}, + {1.15758, -1.20185e-17}, + {0.931265, -1.20185e-17}, + {0.743927, -1.20185e-17}, + {0.639427, -1.20185e-17}, + {0.639427, -1.20185e-17}, + {0.743927, -1.20185e-17}, + {0.931265, -1.20185e-17}, + {1.15758, -1.20185e-17}, + {1.36223, -1.20185e-17}, + {1.48405, -1.20185e-17}, + {1.48405, -1.20185e-17}, + {1.25388, 1.57371e-17}, + {1.04306, 1.57371e-17}, + {0.851575, -1.20185e-17}, + {0.72366, -1.20185e-17}, + {0.679431, -2.58963e-17}, + {0.72366, -4.09195e-17}, + {0.851575, -2.4037e-17}, + {1.04306, -3.60556e-17}, + {1.25388, -2.58963e-17}, + {1.42048, -1.08732e-17}, + {1.48405, 0}, + {1.42048, 1.20185e-17}, + {0.905261, 0}, + {0.930725, 3.46945e-18}, + {0.981652, 0}, + {1.00712, 0}, + {0.981652, 0}, + {0.930725, -1.73472e-18}, + {0.905261, 0}, + {0.930725, 0}, + {0.981652, 0}, + {1.00712, -1.73472e-18}, + {0.981652, 0}, + {0.930725, 0}, + {0.930725, 0}, + {1.03162, 0}, + {1.1084, 0}, + {1.1084, 1.73472e-18}, + {1.03162, 0}, + {0.930725, -6.00926e-18}, + {0.864823, 0}, + {0.851575, -8.67362e-19}, + {0.862449, 0}, + {0.862449, 6.00926e-18}, + {0.851575, 0}, + {0.864823, -8.67362e-19}, + {0.981652, 0}, + {1.1084, 3.46945e-18}, + {1.16145, 0}, + {1.1084, 0}, + {0.981652, 0}, + {0.851575, -1.73472e-18}, + {0.771224, 0}, + {0.743927, 0}, + {0.740598, 0}, + {0.743927, -1.73472e-18}, + {0.771224, 0}, + {0.851575, 0}, + {1.00712, 0}, + {1.1084, 6.93889e-18}, + {1.1084, 0}, + {1.00712, 0}, + {0.862449, 0}, + {0.743927, -3.46945e-18}, + {0.683306, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.683306, -3.46945e-18}, + {0.743927, 0}, + {0.862449, 0}, + {0.981652, 0}, + {1.03162, -1.38028e-18}, + {0.981652, 0}, + {0.862449, 0}, + {0.740598, 0}, + {0.666068, 6.90141e-19}, + {0.641509, 0}, + {0.63886, 0}, + {0.641509, 0}, + {0.666068, 6.90141e-19}, + {0.740598, 0}, + {0.862449, 0}, + {0.930725, -8.67362e-18}, + {0.930725, -8.67362e-18}, + {0.851575, -6.93889e-18}, + {0.743927, -6.93889e-18}, + {0.666068, 4.33681e-18}, + {0.63886, 1.63553e-17}, + {0.640151, 9.47871e-18}, + {0.640151, 1.24833e-17}, + {0.63886, 4.33681e-18}, + {0.666068, -7.68171e-18}, + {0.743927, -2.53981e-18}, + {0.851575, -5.54444e-18}, + {0.905261, 0}, + {0.864823, -1.73472e-18}, + {0.771224, 0}, + {0.683306, 0}, + {0.641509, 0}, + {0.640151, 8.67362e-19}, + {0.645833, 0}, + {0.640151, 0}, + {0.641509, 0}, + {0.683306, 8.67362e-19}, + {0.771224, 0}, + {0.864823, 0}, + {0.930725, 0}, + {0.851575, 0}, + {0.743927, 0}, + {0.666068, 0}, + {0.63886, 0}, + {0.640151, 0}, + {0.640151, 0}, + {0.63886, 0}, + {0.666068, 0}, + {0.743927, 0}, + {0.851575, 0}, + {0.930725, 0}, + {0.981652, 0}, + {0.862449, 0}, + {0.740598, 0}, + {0.666068, 0}, + {0.641509, 0}, + {0.63886, 0}, + {0.641509, 0}, + {0.666068, 0}, + {0.740598, 0}, + {0.862449, 0}, + {0.981652, 0}, + {1.03162, 0}, + {1.00712, 1.38778e-17}, + {0.862449, 1.38778e-17}, + {0.743927, 0}, + {0.683306, 0}, + {0.666068, -6.93889e-18}, + {0.666068, -1.29482e-17}, + {0.683306, -1.20185e-17}, + {0.743927, -1.20185e-17}, + {0.862449, -6.93889e-18}, + {1.00712, -9.29636e-19}, + {1.1084, 1.20185e-17}, + {1.1084, 1.20185e-17}, + {0.981652, 0}, + {0.851575, 1.38778e-17}, + {0.771224, 0}, + {0.743927, 0}, + {0.740598, 0}, + {0.743927, -6.93889e-18}, + {0.771224, 0}, + {0.851575, 0}, + {0.981652, 0}, + {1.1084, -6.93889e-18}, + {1.16145, 0}, + {1.1084, 0}, + {0.930725, 0}, + {0.864823, 0}, + {0.851575, 0}, + {0.862449, 0}, + {0.862449, 0}, + {0.851575, 0}, + {0.864823, 0}, + {0.930725, 0}, + {1.03162, 0}, + {1.1084, 0}, + {1.1084, 0}, + {1.03162, 0}, + {1.08798, 0}, + {1.25388, 0}, + {1.36223, 0}, + {1.36223, 0}, + {1.25388, 0}, + {1.08798, 1.20185e-17}, + {0.930725, 0}, + {0.824264, 0}, + {0.775362, 0}, + {0.775362, -1.20185e-17}, + {0.824264, 0}, + {0.930725, 0}, + {1.25388, 2.77556e-17}, + {1.42048, -1.38778e-17}, + {1.48405, -1.38778e-17}, + {1.42048, 0}, + {1.25388, -1.38778e-17}, + {1.04306, 6.93889e-18}, + {0.851575, 3.09759e-17}, + {0.72366, 0}, + {0.679431, -1.38778e-17}, + {0.72366, 6.93889e-18}, + {0.851575, -1.70981e-17}, + {1.04306, 0}, + {1.36223, 0}, + {1.48405, -6.93889e-18}, + {1.48405, 0}, + {1.36223, 0}, + {1.15758, 0}, + {0.931265, 3.46945e-18}, + {0.743927, 0}, + {0.639427, 0}, + {0.639427, 0}, + {0.743927, 3.46945e-18}, + {0.931265, 0}, + {1.15758, 0}, + {1.36223, 0}, + {1.42048, -2.07544e-18}, + {1.36223, 0}, + {1.20632, 0}, + {1.00096, 0}, + {0.804389, 1.03772e-18}, + {0.666068, 0}, + {0.616617, 0}, + {0.666068, 0}, + {0.804389, 1.03772e-18}, + {1.00096, 0}, + {1.20632, 0}, + {1.25388, 0}, + {1.25388, 0}, + {1.15758, 0}, + {1.00096, 0}, + {0.836162, 0}, + {0.707352, 0}, + {0.63886, 0}, + {0.63886, 0}, + {0.707352, 0}, + {0.836162, 0}, + {1.00096, 0}, + {1.15758, 0}, + {1.08798, -4.64818e-19}, + {1.04306, 0}, + {0.931265, 0}, + {0.804389, 0}, + {0.707352, 2.32409e-19}, + {0.655231, 0}, + {0.640151, -6.00926e-18}, + {0.655231, -4.50694e-18}, + {0.707352, 2.32409e-19}, + {0.804389, 0}, + {0.931265, 6.00926e-18}, + {1.04306, 4.50694e-18}, + {0.930725, 0}, + {0.851575, 0}, + {0.743927, 0}, + {0.666068, 0}, + {0.63886, 0}, + {0.640151, 0}, + {0.640151, 0}, + {0.63886, 0}, + {0.666068, 0}, + {0.743927, 0}, + {0.851575, 0}, + {0.930725, 0}, + {0.824264, 0}, + {0.72366, 0}, + {0.639427, 0}, + {0.616617, 0}, + {0.63886, 0}, + {0.655231, -1.50231e-18}, + {0.63886, 0}, + {0.616617, 0}, + {0.639427, 0}, + {0.72366, 1.50231e-18}, + {0.824264, 0}, + {0.869317, 0}, + {0.775362, 0}, + {0.679431, 0}, + {0.639427, 0}, + {0.666068, 0}, + {0.707352, 0}, + {0.707352, 0}, + {0.666068, 0}, + {0.639427, 0}, + {0.679431, 0}, + {0.775362, 0}, + {0.85665, 0}, + {0.85665, 0}, + {0.775362, -6.93889e-18}, + {0.72366, -3.46945e-18}, + {0.743927, 0}, + {0.804389, 0}, + {0.836162, 4.64818e-19}, + {0.804389, 1.73472e-18}, + {0.743927, 0}, + {0.72366, 6.00926e-18}, + {0.775362, 6.47408e-18}, + {0.858837, 1.73472e-18}, + {0.899033, 0}, + {0.858837, -6.00926e-18}, + {0.824264, 0}, + {0.851575, 0}, + {0.931265, 0}, + {1.00096, 0}, + {1.00096, 0}, + {0.931265, 0}, + {0.851575, 0}, + {0.824264, 0}, + {0.85665, 0}, + {0.899033, 0}, + {0.899033, 0}, + {0.85665, 0}, + {0.930725, 0}, + {1.04306, 0}, + {1.15758, 0}, + {1.20632, 0}, + {1.15758, 0}, + {1.04306, 0}, + {0.930725, 0}, + {0.869317, 0}, + {0.85665, 0}, + {0.858837, 0}, + {0.85665, 0}, + {0.869317, 0}, + {1.51507, 0}, + {1.69928, 0}, + {1.76522, 0}, + {1.69928, 0}, + {1.51507, 0}, + {1.25388, -2.4037e-17}, + {0.981652, 0}, + {0.775362, 0}, + {0.698375, 0}, + {0.775362, 2.4037e-17}, + {0.981652, 0}, + {1.25388, 0}, + {1.69928, 0}, + {1.82256, -1.38778e-17}, + {1.82256, 0}, + {1.69928, 0}, + {1.46388, 0}, + {1.15758, -4.11352e-17}, + {0.862449, 0}, + {0.679431, 0}, + {0.679431, 0}, + {0.862449, 5.5013e-17}, + {1.15758, 0}, + {1.46388, 0}, + {1.76522, 0}, + {1.82256, 6.6502e-18}, + {1.76522, 0}, + {1.59252, 0}, + {1.31866, 0}, + {1.00096, -3.3251e-18}, + {0.740598, 0}, + {0.639427, 0}, + {0.740598, 0}, + {1.00096, -3.3251e-18}, + {1.31866, 0}, + {1.59252, 0}, + {1.69928, 0}, + {1.69928, 0}, + {1.59252, 0}, + {1.38444, 0}, + {1.10759, 0}, + {0.836162, 0}, + {0.666068, 0}, + {0.666068, 0}, + {0.836162, 0}, + {1.10759, 0}, + {1.38444, 0}, + {1.59252, 0}, + {1.51507, 0}, + {1.46388, 0}, + {1.31866, 0}, + {1.10759, 0}, + {0.881874, 0}, + {0.707352, 0}, + {0.641509, 0}, + {0.707352, 0}, + {0.881874, 0}, + {1.10759, 0}, + {1.31866, 0}, + {1.46388, 0}, + {1.25388, 1.38778e-17}, + {1.15758, 0}, + {1.00096, 0}, + {0.836162, 2.77556e-17}, + {0.707352, -6.93889e-18}, + {0.63886, -1.20185e-17}, + {0.63886, -6.00926e-18}, + {0.707352, -2.58963e-17}, + {0.836162, -6.93889e-18}, + {1.00096, 1.20185e-17}, + {1.15758, 6.00926e-18}, + {1.25388, -1.85927e-18}, + {0.981652, 0}, + {0.862449, 1.38778e-17}, + {0.740598, 0}, + {0.666068, 0}, + {0.641509, 0}, + {0.63886, -6.93889e-18}, + {0.641509, 0}, + {0.666068, 0}, + {0.740598, 0}, + {0.862449, -6.93889e-18}, + {0.981652, 0}, + {1.03162, 0}, + {0.775362, 0}, + {0.679431, 0}, + {0.639427, 0}, + {0.666068, 0}, + {0.707352, 0}, + {0.707352, 0}, + {0.666068, 0}, + {0.639427, 0}, + {0.679431, 0}, + {0.775362, 0}, + {0.85665, 0}, + {0.85665, 0}, + {0.698375, 0}, + {0.679431, 0}, + {0.740598, 0}, + {0.836162, 0}, + {0.881874, 0}, + {0.836162, -6.00926e-18}, + {0.740598, 0}, + {0.679431, 0}, + {0.698375, 0}, + {0.763031, 6.00926e-18}, + {0.797429, 0}, + {0.763031, 0}, + {0.775362, 0}, + {0.862449, 0}, + {1.00096, 0}, + {1.10759, 0}, + {1.10759, 0}, + {1.00096, 0}, + {0.862449, 0}, + {0.775362, 0}, + {0.763031, 0}, + {0.782576, 0}, + {0.782576, 0}, + {0.763031, 0}, + {0.981652, 0}, + {1.15758, 0}, + {1.31866, 0}, + {1.38444, 0}, + {1.31866, 0}, + {1.15758, 0}, + {0.981652, 0}, + {0.85665, 0}, + {0.797429, 0}, + {0.782576, 0}, + {0.797429, 0}, + {0.85665, 0}, + {1.25388, 0}, + {1.46388, 0}, + {1.59252, 0}, + {1.59252, 0}, + {1.46388, 0}, + {1.25388, 2.4037e-17}, + {1.03162, 0}, + {0.85665, 0}, + {0.763031, 0}, + {0.763031, -2.4037e-17}, + {0.85665, 0}, + {1.03162, 0}, + {1.92793, 1.20185e-17}, + {2.03322, 2.58963e-17}, + {2.03322, 3.97741e-17}, + {1.92793, 1.20185e-17}, + {1.69928, 3.60556e-17}, + {1.36223, 5.07962e-18}, + {1.00712, -1.85927e-18}, + {0.775362, 1.20185e-17}, + {0.775362, -1.20185e-17}, + {1.00712, 5.07962e-18}, + {1.36223, -1.85927e-18}, + {1.69928, 1.20185e-17}, + {2.03322, 2.41616e-17}, + {2.07555, 2.2009e-17}, + {2.03322, 7.96727e-17}, + {1.88226, 1.35184e-16}, + {1.59252, 2.41616e-17}, + {1.20632, 2.52379e-17}, + {0.862449, 4.44801e-17}, + {0.72366, -3.13496e-17}, + {0.862449, 2.41616e-17}, + {1.20632, 2.52379e-17}, + {1.59252, -5.16681e-17}, + {1.88226, -3.13496e-17}, + {2.03322, 3.97741e-17}, + {2.03322, 2.58963e-17}, + {1.94478, 3.97741e-17}, + {1.7299, 3.97741e-17}, + {1.38444, 3.97741e-17}, + {1.00096, 4.6713e-17}, + {0.743927, 3.97741e-17}, + {0.743927, 3.97741e-17}, + {1.00096, 3.97741e-17}, + {1.38444, 4.6713e-17}, + {1.7299, 3.97741e-17}, + {1.94478, 3.97741e-17}, + {1.92793, 3.97741e-17}, + {1.88226, 1.20185e-17}, + {1.7299, 3.97741e-17}, + {1.45645, 3.97741e-17}, + {1.10759, 3.97741e-17}, + {0.804389, 5.36519e-17}, + {0.683306, 3.97741e-17}, + {0.804389, 3.97741e-17}, + {1.10759, 3.97741e-17}, + {1.45645, 5.36519e-17}, + {1.7299, 3.97741e-17}, + {1.88226, 3.97741e-17}, + {1.69928, 3.60556e-17}, + {1.59252, 6.38111e-17}, + {1.38444, 3.60556e-17}, + {1.10759, 3.60556e-17}, + {0.836162, 3.60556e-17}, + {0.666068, 2.21778e-17}, + {0.666068, 3.60556e-17}, + {0.836162, 3.60556e-17}, + {1.10759, 3.60556e-17}, + {1.38444, 2.21778e-17}, + {1.59252, 3.60556e-17}, + {1.69928, 3.60556e-17}, + {1.36223, 7.16174e-17}, + {1.20632, 8.54952e-17}, + {1.00096, 8.54952e-17}, + {0.804389, 5.77396e-17}, + {0.666068, 3.32044e-17}, + {0.616617, 1.4247e-17}, + {0.666068, -3.75829e-17}, + {0.804389, -3.19678e-17}, + {1.00096, -1.48697e-17}, + {1.20632, -9.79007e-18}, + {1.36223, 4.20398e-17}, + {1.42048, 6.41803e-17}, + {1.00712, 2.21778e-17}, + {0.862449, 3.60556e-17}, + {0.743927, 4.99333e-17}, + {0.683306, 4.99333e-17}, + {0.666068, 4.62148e-17}, + {0.666068, 3.4769e-17}, + {0.683306, 2.03185e-17}, + {0.743927, 8.29997e-18}, + {0.862449, -1.85927e-18}, + {1.00712, -4.29122e-18}, + {1.1084, -3.71854e-18}, + {1.1084, 8.29997e-18}, + {0.775362, -1.85927e-18}, + {0.72366, 1.20185e-17}, + {0.743927, 1.89574e-17}, + {0.804389, 2.58963e-17}, + {0.836162, 2.21778e-17}, + {0.804389, 1.52389e-17}, + {0.743927, -2.49095e-19}, + {0.72366, -1.57371e-17}, + {0.775362, -2.58963e-17}, + {0.858837, -3.28352e-17}, + {0.899033, -2.42861e-17}, + {0.858837, -1.57371e-17}, + {0.775362, -1.20185e-17}, + {0.862449, 1.85927e-18}, + {1.00096, 1.22676e-17}, + {1.10759, 1.57371e-17}, + {1.10759, 1.20185e-17}, + {1.00096, 2.07499e-18}, + {0.862449, -1.21431e-17}, + {0.775362, -2.58963e-17}, + {0.763031, -3.60556e-17}, + {0.782576, -3.99898e-17}, + {0.782576, -3.61801e-17}, + {0.763031, -2.58963e-17}, + {1.00712, -7.36012e-17}, + {1.20632, -7.36012e-17}, + {1.38444, -1.809e-17}, + {1.45645, 1.14003e-17}, + {1.38444, 3.67072e-17}, + {1.20632, 5.77396e-17}, + {1.00712, 2.9984e-17}, + {0.858837, 1.52389e-17}, + {0.782576, -1.73761e-17}, + {0.761485, -3.84085e-17}, + {0.782576, -6.61641e-17}, + {0.858837, -8.09093e-17}, + {1.36223, -2.58963e-17}, + {1.59252, 1.85927e-18}, + {1.7299, 1.85927e-18}, + {1.7299, 1.85927e-18}, + {1.59252, -1.85927e-18}, + {1.36223, -1.57371e-17}, + {1.1084, -1.57371e-17}, + {0.899033, -3.97741e-17}, + {0.782576, -4.99333e-17}, + {0.782576, -6.38111e-17}, + {0.899033, -6.38111e-17}, + {1.1084, -3.97741e-17}, + {1.69928, -1.85927e-18}, + {1.88226, 2.58963e-17}, + {1.94478, 1.20185e-17}, + {1.88226, 2.58963e-17}, + {1.69928, 2.21778e-17}, + {1.42048, 8.29997e-18}, + {1.1084, -8.79816e-18}, + {0.858837, -1.57371e-17}, + {0.763031, -2.58963e-17}, + {0.858837, -3.97741e-17}, + {1.1084, -8.79816e-18}, + {1.42048, -1.57371e-17}, + {2.14383, 0}, + {2.16623, -3.54439e-19}, + {2.14383, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.36223, 1.77219e-19}, + {0.981652, 0}, + {0.824264, 0}, + {0.981652, 0}, + {1.36223, 1.77219e-19}, + {1.76522, 0}, + {2.03322, 0}, + {2.16623, -6.93889e-18}, + {2.16623, -6.93889e-18}, + {2.11997, -6.93889e-18}, + {1.94478, -6.93889e-18}, + {1.59252, -6.93889e-18}, + {1.15758, -6.93889e-18}, + {0.851575, 1.70981e-17}, + {0.851575, -6.93889e-18}, + {1.15758, -6.93889e-18}, + {1.59252, -6.93889e-18}, + {1.94478, -3.09759e-17}, + {2.11997, -6.93889e-18}, + {2.14383, 0}, + {2.11997, 0}, + {2.00496, 0}, + {1.7299, 0}, + {1.31866, 0}, + {0.931265, 4.80741e-17}, + {0.771224, 0}, + {0.931265, 0}, + {1.31866, 0}, + {1.7299, -4.80741e-17}, + {2.00496, 0}, + {2.11997, 0}, + {2.03322, 0}, + {1.94478, 0}, + {1.7299, 0}, + {1.38444, 0}, + {1.00096, 0}, + {0.743927, 0}, + {0.743927, 0}, + {1.00096, 0}, + {1.38444, 0}, + {1.7299, 0}, + {1.94478, 0}, + {2.03322, 0}, + {1.76522, 0}, + {1.59252, 5.55112e-17}, + {1.31866, 0}, + {1.00096, 0}, + {0.740598, 0}, + {0.639427, -2.77556e-17}, + {0.740598, 0}, + {1.00096, 0}, + {1.31866, 0}, + {1.59252, -2.77556e-17}, + {1.76522, 0}, + {1.82256, 0}, + {1.36223, 3.46945e-18}, + {1.15758, 3.1225e-17}, + {0.931265, 3.1225e-17}, + {0.743927, 3.1225e-17}, + {0.639427, 2.75065e-17}, + {0.639427, 1.36287e-17}, + {0.743927, 1.61018e-18}, + {0.931265, -1.04083e-17}, + {1.15758, -2.05676e-17}, + {1.36223, -3.44454e-17}, + {1.48405, -2.24269e-17}, + {1.48405, -1.04083e-17}, + {0.981652, 0}, + {0.851575, 1.38778e-17}, + {0.771224, 0}, + {0.743927, 0}, + {0.740598, 0}, + {0.743927, -6.93889e-18}, + {0.771224, 0}, + {0.851575, 0}, + {0.981652, 0}, + {1.1084, -6.93889e-18}, + {1.16145, 0}, + {1.1084, 0}, + {0.824264, 0}, + {0.851575, 0}, + {0.931265, 0}, + {1.00096, 0}, + {1.00096, 0}, + {0.931265, 0}, + {0.851575, 0}, + {0.824264, 0}, + {0.85665, 0}, + {0.899033, 0}, + {0.899033, 0}, + {0.85665, 0}, + {0.981652, 0}, + {1.15758, 0}, + {1.31866, 0}, + {1.38444, 0}, + {1.31866, 0}, + {1.15758, 0}, + {0.981652, 0}, + {0.85665, 0}, + {0.797429, 0}, + {0.782576, 0}, + {0.797429, 0}, + {0.85665, 0}, + {1.36223, 3.1225e-17}, + {1.59252, 3.1225e-17}, + {1.7299, 1.73472e-17}, + {1.7299, -3.46945e-18}, + {1.59252, -1.04083e-17}, + {1.36223, -1.04083e-17}, + {1.1084, -2.75065e-17}, + {0.899033, -1.70981e-17}, + {0.782576, -1.04083e-17}, + {0.782576, -1.04083e-17}, + {0.899033, 2.05676e-17}, + {1.1084, 3.09759e-17}, + {1.76522, 0}, + {1.94478, 2.77556e-17}, + {2.00496, 0}, + {1.94478, 0}, + {1.76522, 0}, + {1.48405, -1.38778e-17}, + {1.16145, 0}, + {0.899033, 0}, + {0.797429, 0}, + {0.899033, -1.38778e-17}, + {1.16145, 0}, + {1.48405, 0}, + {2.03322, 0}, + {2.11997, 1.38778e-17}, + {2.11997, 0}, + {2.03322, 0}, + {1.82256, 0}, + {1.48405, -6.93889e-18}, + {1.1084, 0}, + {0.85665, 0}, + {0.85665, 0}, + {1.1084, -6.93889e-18}, + {1.48405, 0}, + {1.82256, 0}, + {2.17701, 0}, + {2.17701, 0}, + {2.16623, 0}, + {2.03322, 0}, + {1.69928, 0}, + {1.25388, 0}, + {0.930725, 0}, + {0.930725, 0}, + {1.25388, 0}, + {1.69928, 0}, + {2.03322, 0}, + {2.16623, 0}, + {2.17701, -5.20417e-18}, + {2.17945, -2.77556e-17}, + {2.11997, 0}, + {1.88226, 0}, + {1.46388, 2.60209e-18}, + {1.04306, -1.01592e-17}, + {0.864823, 0}, + {1.04306, 1.20185e-17}, + {1.46388, 2.60209e-18}, + {1.88226, 3.79148e-17}, + {2.11997, 0}, + {2.17945, -1.20185e-17}, + {2.16623, 0}, + {2.11997, 0}, + {1.94478, 0}, + {1.59252, 0}, + {1.15758, 0}, + {0.851575, 0}, + {0.851575, 0}, + {1.15758, 0}, + {1.59252, 0}, + {1.94478, 0}, + {2.11997, 0}, + {2.16623, 0}, + {2.03322, 0}, + {1.88226, 0}, + {1.59252, 0}, + {1.20632, 0}, + {0.862449, 0}, + {0.72366, 2.4037e-17}, + {0.862449, 0}, + {1.20632, 0}, + {1.59252, 0}, + {1.88226, -2.4037e-17}, + {2.03322, 0}, + {2.07555, 0}, + {1.69928, 0}, + {1.46388, 0}, + {1.15758, 0}, + {0.862449, 0}, + {0.679431, 0}, + {0.679431, -1.20185e-17}, + {0.862449, 0}, + {1.15758, 0}, + {1.46388, 0}, + {1.69928, 1.20185e-17}, + {1.82256, 0}, + {1.82256, 0}, + {1.25388, 0}, + {1.04306, 0}, + {0.851575, 0}, + {0.72366, 0}, + {0.679431, 0}, + {0.72366, 1.50231e-18}, + {0.851575, 0}, + {1.04306, 0}, + {1.25388, 0}, + {1.42048, -1.50231e-18}, + {1.48405, 0}, + {1.42048, 0}, + {0.930725, 0}, + {0.864823, 0}, + {0.851575, 0}, + {0.862449, 0}, + {0.862449, 0}, + {0.851575, 0}, + {0.864823, 0}, + {0.930725, 0}, + {1.03162, 0}, + {1.1084, 0}, + {1.1084, 0}, + {1.03162, 0}, + {0.930725, 0}, + {1.04306, 0}, + {1.15758, 0}, + {1.20632, 0}, + {1.15758, 0}, + {1.04306, 0}, + {0.930725, 0}, + {0.869317, 0}, + {0.85665, 0}, + {0.858837, 0}, + {0.85665, 0}, + {0.869317, 0}, + {1.25388, 0}, + {1.46388, 0}, + {1.59252, 0}, + {1.59252, 0}, + {1.46388, 0}, + {1.25388, 0}, + {1.03162, 0}, + {0.85665, 0}, + {0.763031, 0}, + {0.763031, 0}, + {0.85665, 0}, + {1.03162, 0}, + {1.69928, 0}, + {1.88226, 2.77556e-17}, + {1.94478, 0}, + {1.88226, 0}, + {1.69928, 0}, + {1.42048, -1.38778e-17}, + {1.1084, 0}, + {0.858837, 0}, + {0.763031, 0}, + {0.858837, -1.38778e-17}, + {1.1084, 0}, + {1.42048, 0}, + {2.03322, 0}, + {2.11997, 0}, + {2.11997, 0}, + {2.03322, 0}, + {1.82256, 0}, + {1.48405, 0}, + {1.1084, 0}, + {0.85665, 0}, + {0.85665, 0}, + {1.1084, 0}, + {1.48405, 0}, + {1.82256, 0}, + {2.16623, 0}, + {2.17945, 5.846e-18}, + {2.16623, 0}, + {2.07555, 0}, + {1.82256, 0}, + {1.42048, -2.923e-18}, + {1.03162, 0}, + {0.869317, 0}, + {1.03162, 0}, + {1.42048, -2.923e-18}, + {1.82256, 0}, + {2.07555, 0}}; + const std::vector> in_3 = {{-1922.64, 0}, + {-1001.68, 433.439}, + {-219.942, 33.69}, + {-68.6663, -31.5682}, + {-19.01, -3.81031}, + {-0.58927, -0.511793}, + {0.996488, -7.89357e-17}, + {-0.58927, 0.511793}, + {-19.01, 3.81031}, + {-68.6663, 31.5682}, + {-219.942, -33.69}, + {-1001.68, -433.439}, + {-1001.68, 433.439}, + {-517.273, 5.9508e-14}, + {-352.926, -115.37}, + {-141.346, -11.6655}, + {-25.4876, 7.80357}, + {-0.501786, 1.37769}, + {1.08582, -0.0715093}, + {0.894073, 0.768163}, + {-2.92014, 2.09061}, + {-25.1033, -5.63667}, + {-174.402, -55.7416}, + {-690.893, -8.6513e-15}, + {-219.942, 33.69}, + {-352.926, -115.37}, + {-281.447, 7.54952e-14}, + {-85.5703, 25.4729}, + {-13.6457, 2.44125}, + {0.99011, 1.07155}, + {0.910811, -0.175105}, + {0.653152, -0.025503}, + {0.169545, -0.652296}, + {-11.7051, -3.59868}, + {-72.4614, -2.89031e-14}, + {-174.402, 55.7416}, + {-68.6663, -31.5682}, + {-141.346, -11.6655}, + {-85.5703, 25.4729}, + {-28.2549, -3.81917e-14}, + {-9.75792, -2.71861}, + {1.33155, -0.16363}, + {0.66818, -0.0435261}, + {0.136765, 0.00586452}, + {0.71571, -0.0128863}, + {1.26225, -4.03935e-14}, + {-11.7051, 3.59868}, + {-25.1033, 5.63667}, + {-19.01, -3.81031}, + {-25.4876, 7.80357}, + {-13.6457, 2.44125}, + {-9.75792, -2.71861}, + {1.25751, 1.30778e-15}, + {1.13756, -0.487619}, + {0.344756, -0.0047045}, + {0.187321, 0.0707209}, + {0.272653, -2.48445e-14}, + {0.71571, 0.0128863}, + {0.169545, 0.652296}, + {-2.92014, -2.09061}, + {-0.58927, -0.511793}, + {-0.501786, 1.37769}, + {0.99011, 1.07155}, + {1.33155, -0.16363}, + {1.13756, -0.487619}, + {0.536417, -2.98492e-14}, + {0.198078, 0.0681101}, + {0.245083, -9.7141e-15}, + {0.187321, -0.0707209}, + {0.136765, -0.00586452}, + {0.653152, 0.025503}, + {0.894073, -0.768163}, + {0.996488, -5.0257e-15}, + {1.08582, -0.0715093}, + {0.910811, -0.175105}, + {0.66818, -0.0435261}, + {0.344756, -0.0047045}, + {0.198078, 0.0681101}, + {0.256446, -1.4581e-15}, + {0.198078, -0.0681101}, + {0.344756, 0.0047045}, + {0.66818, 0.0435261}, + {0.910811, 0.175105}, + {1.08582, 0.0715093}, + {-0.58927, 0.511793}, + {0.894073, 0.768163}, + {0.653152, -0.025503}, + {0.136765, 0.00586452}, + {0.187321, 0.0707209}, + {0.245083, -9.70357e-15}, + {0.198078, -0.0681101}, + {0.536417, 2.82968e-14}, + {1.13756, 0.487619}, + {1.33155, 0.16363}, + {0.99011, -1.07155}, + {-0.501786, -1.37769}, + {-19.01, 3.81031}, + {-2.92014, 2.09061}, + {0.169545, -0.652296}, + {0.71571, -0.0128863}, + {0.272653, 2.48445e-14}, + {0.187321, -0.0707209}, + {0.344756, 0.0047045}, + {1.13756, 0.487619}, + {1.25751, -1.30778e-15}, + {-9.75792, 2.71861}, + {-13.6457, -2.44125}, + {-25.4876, -7.80357}, + {-68.6663, 31.5682}, + {-25.1033, -5.63667}, + {-11.7051, -3.59868}, + {1.26225, 6.30607e-14}, + {0.71571, 0.0128863}, + {0.136765, -0.00586452}, + {0.66818, 0.0435261}, + {1.33155, 0.16363}, + {-9.75792, 2.71861}, + {-28.2549, 2.77768e-14}, + {-85.5703, -25.4729}, + {-141.346, 11.6655}, + {-219.942, -33.69}, + {-174.402, -55.7416}, + {-72.4614, -7.99361e-15}, + {-11.7051, 3.59868}, + {0.169545, 0.652296}, + {0.653152, 0.025503}, + {0.910811, 0.175105}, + {0.99011, -1.07155}, + {-13.6457, -2.44125}, + {-85.5703, -25.4729}, + {-281.447, -5.40755e-14}, + {-352.926, 115.37}, + {-1001.68, -433.439}, + {-690.893, 1.06581e-14}, + {-174.402, 55.7416}, + {-25.1033, 5.63667}, + {-2.92014, -2.09061}, + {0.894073, -0.768163}, + {1.08582, 0.0715093}, + {-0.501786, -1.37769}, + {-25.4876, -7.80357}, + {-141.346, 11.6655}, + {-352.926, 115.37}, + {-517.273, -3.16493e-14}, + {-1001.68, 433.439}, + {-517.273, 7.54952e-14}, + {-352.926, -115.37}, + {-141.346, -11.6655}, + {-25.4876, 7.80357}, + {-0.501786, 1.37769}, + {1.08582, -0.0715093}, + {0.894073, 0.768163}, + {-2.92014, 2.09061}, + {-25.1033, -5.63667}, + {-174.402, -55.7416}, + {-690.893, 1.97704e-14}, + {-517.273, 2.84217e-14}, + {-1001.68, -433.439}, + {-690.893, -4.9738e-14}, + {-174.402, 55.7416}, + {-25.1033, 5.63667}, + {-2.92014, -2.09061}, + {0.894073, -0.768163}, + {1.08582, 0.0715093}, + {-0.501786, -1.37769}, + {-25.4876, -7.80357}, + {-141.346, 11.6655}, + {-352.926, 115.37}, + {-352.926, -115.37}, + {-690.893, -2.4841e-14}, + {-352.926, 115.37}, + {-100.781, 4.56302e-14}, + {-39.4244, -13.5375}, + {-8.79435, -1.30817}, + {0.157364, -0.195195}, + {0.548415, 3.66208e-14}, + {0.157364, 0.195195}, + {-8.79435, 1.30817}, + {-39.4244, 13.5375}, + {-100.781, 1.20069e-14}, + {-141.346, -11.6655}, + {-174.402, 55.7416}, + {-100.781, 5.10703e-14}, + {-85.5703, -25.4729}, + {-36.1195, 2.5003e-14}, + {-5.30025, 1.65834}, + {0.642398, 0.553017}, + {0.422501, 0.0375059}, + {0.608177, 0.0924076}, + {0.984075, -0.568373}, + {-6.57219, -1.46426e-14}, + {-39.4244, -13.5375}, + {-25.4876, 7.80357}, + {-25.1033, 5.63667}, + {-39.4244, -13.5375}, + {-36.1195, 1.14353e-14}, + {-9.75792, 2.71861}, + {-0.90306, 1.82384e-14}, + {0.856068, 0.244528}, + {0.331794, -0.0443841}, + {0.197596, -0.0729406}, + {0.718888, -1.86825e-14}, + {0.984075, 0.568373}, + {-8.79435, -1.30817}, + {-0.501786, 1.37769}, + {-2.92014, -2.09061}, + {-8.79435, -1.30817}, + {-5.30025, 1.65834}, + {-0.90306, 6.65646e-14}, + {1.13756, 0.487619}, + {0.609244, 2.33578e-14}, + {0.183862, -0.0625824}, + {0.116983, -1.51613e-14}, + {0.197596, 0.0729406}, + {0.608177, -0.0924076}, + {0.157364, -0.195195}, + {1.08582, -0.0715093}, + {0.894073, -0.768163}, + {0.157364, -0.195195}, + {0.642398, 0.553017}, + {0.856068, 0.244528}, + {0.609244, -1.08488e-14}, + {0.198078, -0.0681101}, + {0.119232, -2.82893e-14}, + {0.183862, 0.0625824}, + {0.331794, 0.0443841}, + {0.422501, -0.0375059}, + {0.548415, -1.3122e-14}, + {0.894073, 0.768163}, + {1.08582, 0.0715093}, + {0.548415, 5.96745e-15}, + {0.422501, 0.0375059}, + {0.331794, -0.0443841}, + {0.183862, -0.0625824}, + {0.119232, -5.39905e-15}, + {0.198078, 0.0681101}, + {0.609244, 1.71134e-16}, + {0.856068, -0.244528}, + {0.642398, -0.553017}, + {0.157364, 0.195195}, + {-2.92014, 2.09061}, + {-0.501786, -1.37769}, + {0.157364, 0.195195}, + {0.608177, 0.0924076}, + {0.197596, -0.0729406}, + {0.116983, 1.40688e-14}, + {0.183862, 0.0625824}, + {0.609244, -5.18358e-15}, + {1.13756, -0.487619}, + {-0.90306, -2.30043e-15}, + {-5.30025, -1.65834}, + {-8.79435, 1.30817}, + {-25.1033, -5.63667}, + {-25.4876, -7.80357}, + {-8.79435, 1.30817}, + {0.984075, -0.568373}, + {0.718888, -3.72397e-14}, + {0.197596, 0.0729406}, + {0.331794, 0.0443841}, + {0.856068, -0.244528}, + {-0.90306, -2.49328e-14}, + {-9.75792, -2.71861}, + {-36.1195, -2.84933e-14}, + {-39.4244, 13.5375}, + {-174.402, -55.7416}, + {-141.346, 11.6655}, + {-39.4244, 13.5375}, + {-6.57219, -3.10862e-15}, + {0.984075, 0.568373}, + {0.608177, -0.0924076}, + {0.422501, -0.0375059}, + {0.642398, -0.553017}, + {-5.30025, -1.65834}, + {-36.1195, -1.1744e-14}, + {-85.5703, 25.4729}, + {-100.781, -5.24575e-15}, + {-690.893, -1.64069e-14}, + {-352.926, 115.37}, + {-100.781, 4.28486e-15}, + {-39.4244, -13.5375}, + {-8.79435, -1.30817}, + {0.157364, -0.195195}, + {0.548415, -2.47213e-15}, + {0.157364, 0.195195}, + {-8.79435, 1.30817}, + {-39.4244, 13.5375}, + {-100.781, -2.09326e-14}, + {-352.926, -115.37}, + {-219.942, 33.69}, + {-352.926, -115.37}, + {-281.447, -2.30926e-14}, + {-85.5703, 25.4729}, + {-13.6457, 2.44125}, + {0.99011, 1.07155}, + {0.910811, -0.175105}, + {0.653152, -0.025503}, + {0.169545, -0.652296}, + {-11.7051, -3.59868}, + {-72.4614, -2.45898e-14}, + {-174.402, 55.7416}, + {-352.926, -115.37}, + {-690.893, -6.4928e-14}, + {-352.926, 115.37}, + {-100.781, 3.07079e-14}, + {-39.4244, -13.5375}, + {-8.79435, -1.30817}, + {0.157364, -0.195195}, + {0.548415, 3.23816e-15}, + {0.157364, 0.195195}, + {-8.79435, 1.30817}, + {-39.4244, 13.5375}, + {-100.781, 1.55451e-14}, + {-281.447, -4.17444e-14}, + {-352.926, 115.37}, + {-219.942, -33.69}, + {-174.402, -55.7416}, + {-72.4614, 6.06555e-15}, + {-11.7051, 3.59868}, + {0.169545, 0.652296}, + {0.653152, 0.025503}, + {0.910811, 0.175105}, + {0.99011, -1.07155}, + {-13.6457, -2.44125}, + {-85.5703, -25.4729}, + {-85.5703, 25.4729}, + {-100.781, 3.55271e-15}, + {-174.402, -55.7416}, + {-141.346, 11.6655}, + {-39.4244, 13.5375}, + {-6.57219, -3.34301e-15}, + {0.984075, 0.568373}, + {0.608177, -0.0924076}, + {0.422501, -0.0375059}, + {0.642398, -0.553017}, + {-5.30025, -1.65834}, + {-36.1195, -1.78338e-14}, + {-13.6457, 2.44125}, + {-39.4244, -13.5375}, + {-72.4614, -5.77808e-15}, + {-39.4244, 13.5375}, + {-13.6457, -2.44125}, + {-5.30025, -1.65834}, + {1.10754, 3.09516e-15}, + {0.438213, -0.0482423}, + {0.123572, -2.49539e-14}, + {0.438213, 0.0482423}, + {1.10754, 2.15556e-14}, + {-5.30025, 1.65834}, + {0.99011, 1.07155}, + {-8.79435, -1.30817}, + {-11.7051, 3.59868}, + {-6.57219, -5.10703e-15}, + {-5.30025, -1.65834}, + {1.33155, 0.16363}, + {0.856068, -0.244528}, + {0.234236, -8.58421e-15}, + {0.175034, 0.0639427}, + {0.247817, -1.38739e-14}, + {0.438213, -0.0482423}, + {0.642398, 0.553017}, + {0.910811, -0.175105}, + {0.157364, -0.195195}, + {0.169545, 0.652296}, + {0.984075, 0.568373}, + {1.10754, 2.34291e-14}, + {0.856068, -0.244528}, + {0.344756, 0.0047045}, + {0.183862, 0.0625824}, + {0.22958, 1.50402e-14}, + {0.175034, -0.0639427}, + {0.123572, -1.0726e-14}, + {0.422501, 0.0375059}, + {0.653152, -0.025503}, + {0.548415, -2.20767e-15}, + {0.653152, 0.025503}, + {0.608177, -0.0924076}, + {0.438213, -0.0482423}, + {0.234236, 2.35693e-14}, + {0.183862, 0.0625824}, + {0.245083, -5.32172e-15}, + {0.183862, -0.0625824}, + {0.234236, -1.68369e-14}, + {0.438213, 0.0482423}, + {0.608177, 0.0924076}, + {0.169545, -0.652296}, + {0.157364, 0.195195}, + {0.910811, 0.175105}, + {0.422501, -0.0375059}, + {0.123572, 1.25992e-14}, + {0.175034, 0.0639427}, + {0.22958, -2.41685e-15}, + {0.183862, -0.0625824}, + {0.344756, -0.0047045}, + {0.856068, 0.244528}, + {1.10754, 1.86174e-15}, + {0.984075, -0.568373}, + {-11.7051, -3.59868}, + {-8.79435, 1.30817}, + {0.99011, -1.07155}, + {0.642398, -0.553017}, + {0.438213, 0.0482423}, + {0.247817, 1.27324e-14}, + {0.175034, -0.0639427}, + {0.234236, -1.14132e-14}, + {0.856068, 0.244528}, + {1.33155, -0.16363}, + {-5.30025, 1.65834}, + {-6.57219, -1.79842e-15}, + {-72.4614, 1.40308e-14}, + {-39.4244, 13.5375}, + {-13.6457, -2.44125}, + {-5.30025, -1.65834}, + {1.10754, 2.81848e-17}, + {0.438213, -0.0482423}, + {0.123572, 4.57785e-15}, + {0.438213, 0.0482423}, + {1.10754, -6.1253e-15}, + {-5.30025, 1.65834}, + {-13.6457, 2.44125}, + {-39.4244, -13.5375}, + {-174.402, 55.7416}, + {-100.781, 1.86517e-14}, + {-85.5703, -25.4729}, + {-36.1195, 3.01981e-14}, + {-5.30025, 1.65834}, + {0.642398, 0.553017}, + {0.422501, 0.0375059}, + {0.608177, 0.0924076}, + {0.984075, -0.568373}, + {-6.57219, -5.76082e-15}, + {-39.4244, -13.5375}, + {-141.346, -11.6655}, + {-68.6663, -31.5682}, + {-141.346, -11.6655}, + {-85.5703, 25.4729}, + {-28.2549, 4.44089e-15}, + {-9.75792, -2.71861}, + {1.33155, -0.16363}, + {0.66818, -0.0435261}, + {0.136765, 0.00586452}, + {0.71571, -0.0128863}, + {1.26225, -1.09583e-14}, + {-11.7051, 3.59868}, + {-25.1033, 5.63667}, + {-141.346, -11.6655}, + {-174.402, 55.7416}, + {-100.781, 9.10383e-15}, + {-85.5703, -25.4729}, + {-36.1195, -8.23997e-15}, + {-5.30025, 1.65834}, + {0.642398, 0.553017}, + {0.422501, 0.0375059}, + {0.608177, 0.0924076}, + {0.984075, -0.568373}, + {-6.57219, -6.49073e-15}, + {-39.4244, -13.5375}, + {-85.5703, 25.4729}, + {-100.781, 1.24345e-14}, + {-174.402, -55.7416}, + {-141.346, 11.6655}, + {-39.4244, 13.5375}, + {-6.57219, 2.87424e-15}, + {0.984075, 0.568373}, + {0.608177, -0.0924076}, + {0.422501, -0.0375059}, + {0.642398, -0.553017}, + {-5.30025, -1.65834}, + {-36.1195, 3.06972e-14}, + {-28.2549, -9.32587e-15}, + {-85.5703, -25.4729}, + {-141.346, 11.6655}, + {-68.6663, 31.5682}, + {-25.1033, -5.63667}, + {-11.7051, -3.59868}, + {1.26225, 9.24574e-15}, + {0.71571, 0.0128863}, + {0.136765, -0.00586452}, + {0.66818, 0.0435261}, + {1.33155, 0.16363}, + {-9.75792, 2.71861}, + {-9.75792, -2.71861}, + {-36.1195, -5.77316e-15}, + {-39.4244, 13.5375}, + {-25.1033, -5.63667}, + {-25.4876, -7.80357}, + {-8.79435, 1.30817}, + {0.984075, -0.568373}, + {0.718888, -6.50192e-16}, + {0.197596, 0.0729406}, + {0.331794, 0.0443841}, + {0.856068, -0.244528}, + {-0.90306, 2.42655e-15}, + {1.33155, -0.16363}, + {-5.30025, 1.65834}, + {-6.57219, 5.16254e-15}, + {-11.7051, -3.59868}, + {-8.79435, 1.30817}, + {0.99011, -1.07155}, + {0.642398, -0.553017}, + {0.438213, 0.0482423}, + {0.247817, -3.29037e-15}, + {0.175034, -0.0639427}, + {0.234236, 7.12867e-15}, + {0.856068, 0.244528}, + {0.66818, -0.0435261}, + {0.642398, 0.553017}, + {0.984075, 0.568373}, + {1.26225, 6.85556e-15}, + {0.984075, -0.568373}, + {0.642398, -0.553017}, + {0.66818, 0.0435261}, + {0.331794, 0.0443841}, + {0.175034, -0.0639427}, + {0.110194, 2.47264e-15}, + {0.175034, 0.0639427}, + {0.331794, -0.0443841}, + {0.136765, 0.00586452}, + {0.422501, 0.0375059}, + {0.608177, -0.0924076}, + {0.71571, 0.0128863}, + {0.718888, -7.88276e-15}, + {0.438213, 0.0482423}, + {0.331794, 0.0443841}, + {0.187321, -0.0707209}, + {0.116983, 4.30402e-15}, + {0.175034, 0.0639427}, + {0.247817, 1.06916e-14}, + {0.197596, -0.0729406}, + {0.71571, -0.0128863}, + {0.608177, 0.0924076}, + {0.422501, -0.0375059}, + {0.136765, -0.00586452}, + {0.197596, 0.0729406}, + {0.247817, 1.63574e-14}, + {0.175034, -0.0639427}, + {0.116983, -5.83814e-16}, + {0.187321, 0.0707209}, + {0.331794, -0.0443841}, + {0.438213, -0.0482423}, + {0.718888, 5.76196e-15}, + {1.26225, -5.32052e-16}, + {0.984075, -0.568373}, + {0.642398, -0.553017}, + {0.66818, 0.0435261}, + {0.331794, 0.0443841}, + {0.175034, -0.0639427}, + {0.110194, -5.31886e-16}, + {0.175034, 0.0639427}, + {0.331794, -0.0443841}, + {0.66818, -0.0435261}, + {0.642398, 0.553017}, + {0.984075, 0.568373}, + {-11.7051, 3.59868}, + {-6.57219, 9.99201e-16}, + {-5.30025, -1.65834}, + {1.33155, 0.16363}, + {0.856068, -0.244528}, + {0.234236, 8.80874e-16}, + {0.175034, 0.0639427}, + {0.247817, 2.06115e-15}, + {0.438213, -0.0482423}, + {0.642398, 0.553017}, + {0.99011, 1.07155}, + {-8.79435, -1.30817}, + {-25.1033, 5.63667}, + {-39.4244, -13.5375}, + {-36.1195, -5.55112e-16}, + {-9.75792, 2.71861}, + {-0.90306, 1.33227e-15}, + {0.856068, 0.244528}, + {0.331794, -0.0443841}, + {0.197596, -0.0729406}, + {0.718888, 1.33227e-15}, + {0.984075, 0.568373}, + {-8.79435, -1.30817}, + {-25.4876, 7.80357}, + {-19.01, -3.81031}, + {-25.4876, 7.80357}, + {-13.6457, 2.44125}, + {-9.75792, -2.71861}, + {1.25751, -4.45683e-15}, + {1.13756, -0.487619}, + {0.344756, -0.0047045}, + {0.187321, 0.0707209}, + {0.272653, -7.53357e-15}, + {0.71571, 0.0128863}, + {0.169545, 0.652296}, + {-2.92014, -2.09061}, + {-25.4876, 7.80357}, + {-25.1033, 5.63667}, + {-39.4244, -13.5375}, + {-36.1195, -3.44169e-15}, + {-9.75792, 2.71861}, + {-0.90306, 4.82121e-15}, + {0.856068, 0.244528}, + {0.331794, -0.0443841}, + {0.197596, -0.0729406}, + {0.718888, -7.48575e-15}, + {0.984075, 0.568373}, + {-8.79435, -1.30817}, + {-13.6457, 2.44125}, + {-39.4244, -13.5375}, + {-72.4614, -9.78397e-15}, + {-39.4244, 13.5375}, + {-13.6457, -2.44125}, + {-5.30025, -1.65834}, + {1.10754, 7.85206e-15}, + {0.438213, -0.0482423}, + {0.123572, -8.69677e-15}, + {0.438213, 0.0482423}, + {1.10754, 1.69858e-15}, + {-5.30025, 1.65834}, + {-9.75792, -2.71861}, + {-36.1195, -1.76525e-14}, + {-39.4244, 13.5375}, + {-25.1033, -5.63667}, + {-25.4876, -7.80357}, + {-8.79435, 1.30817}, + {0.984075, -0.568373}, + {0.718888, -7.73968e-15}, + {0.197596, 0.0729406}, + {0.331794, 0.0443841}, + {0.856068, -0.244528}, + {-0.90306, -1.5862e-15}, + {1.25751, -5.77316e-15}, + {-9.75792, 2.71861}, + {-13.6457, -2.44125}, + {-25.4876, -7.80357}, + {-19.01, 3.81031}, + {-2.92014, 2.09061}, + {0.169545, -0.652296}, + {0.71571, -0.0128863}, + {0.272653, 7.41352e-15}, + {0.187321, -0.0707209}, + {0.344756, 0.0047045}, + {1.13756, 0.487619}, + {1.13756, -0.487619}, + {-0.90306, 1.44884e-14}, + {-5.30025, -1.65834}, + {-8.79435, 1.30817}, + {-2.92014, 2.09061}, + {-0.501786, -1.37769}, + {0.157364, 0.195195}, + {0.608177, 0.0924076}, + {0.197596, -0.0729406}, + {0.116983, -1.55166e-14}, + {0.183862, 0.0625824}, + {0.609244, -4.87742e-15}, + {0.344756, -0.0047045}, + {0.856068, 0.244528}, + {1.10754, 1.37806e-14}, + {0.984075, -0.568373}, + {0.169545, -0.652296}, + {0.157364, 0.195195}, + {0.910811, 0.175105}, + {0.422501, -0.0375059}, + {0.123572, -2.23074e-14}, + {0.175034, 0.0639427}, + {0.22958, 7.56348e-15}, + {0.183862, -0.0625824}, + {0.187321, 0.0707209}, + {0.331794, -0.0443841}, + {0.438213, -0.0482423}, + {0.718888, 2.74173e-15}, + {0.71571, -0.0128863}, + {0.608177, 0.0924076}, + {0.422501, -0.0375059}, + {0.136765, -0.00586452}, + {0.197596, 0.0729406}, + {0.247817, 1.79677e-14}, + {0.175034, -0.0639427}, + {0.116983, 2.32163e-15}, + {0.272653, 1.09197e-14}, + {0.197596, -0.0729406}, + {0.123572, -5.44548e-15}, + {0.197596, 0.0729406}, + {0.272653, 1.31859e-14}, + {0.197596, -0.0729406}, + {0.123572, 7.19444e-15}, + {0.197596, 0.0729406}, + {0.272653, 2.36674e-14}, + {0.197596, -0.0729406}, + {0.123572, -1.17805e-14}, + {0.197596, 0.0729406}, + {0.71571, 0.0128863}, + {0.718888, -5.54157e-15}, + {0.438213, 0.0482423}, + {0.331794, 0.0443841}, + {0.187321, -0.0707209}, + {0.116983, -2.0205e-15}, + {0.175034, 0.0639427}, + {0.247817, 1.79555e-14}, + {0.197596, -0.0729406}, + {0.136765, 0.00586452}, + {0.422501, 0.0375059}, + {0.608177, -0.0924076}, + {0.169545, 0.652296}, + {0.984075, 0.568373}, + {1.10754, 2.94209e-15}, + {0.856068, -0.244528}, + {0.344756, 0.0047045}, + {0.183862, 0.0625824}, + {0.22958, -5.96334e-15}, + {0.175034, -0.0639427}, + {0.123572, -5.29063e-15}, + {0.422501, 0.0375059}, + {0.910811, -0.175105}, + {0.157364, -0.195195}, + {-2.92014, -2.09061}, + {-8.79435, -1.30817}, + {-5.30025, 1.65834}, + {-0.90306, -6.27276e-15}, + {1.13756, 0.487619}, + {0.609244, -1.51453e-14}, + {0.183862, -0.0625824}, + {0.116983, -9.54253e-15}, + {0.197596, 0.0729406}, + {0.608177, -0.0924076}, + {0.157364, -0.195195}, + {-0.501786, 1.37769}, + {-0.58927, -0.511793}, + {-0.501786, 1.37769}, + {0.99011, 1.07155}, + {1.33155, -0.16363}, + {1.13756, -0.487619}, + {0.536417, 6.39291e-15}, + {0.198078, 0.0681101}, + {0.245083, -7.14518e-15}, + {0.187321, -0.0707209}, + {0.136765, -0.00586452}, + {0.653152, 0.025503}, + {0.894073, -0.768163}, + {-0.501786, 1.37769}, + {-2.92014, -2.09061}, + {-8.79435, -1.30817}, + {-5.30025, 1.65834}, + {-0.90306, 2.35457e-14}, + {1.13756, 0.487619}, + {0.609244, -7.22771e-15}, + {0.183862, -0.0625824}, + {0.116983, -1.22214e-14}, + {0.197596, 0.0729406}, + {0.608177, -0.0924076}, + {0.157364, -0.195195}, + {0.99011, 1.07155}, + {-8.79435, -1.30817}, + {-11.7051, 3.59868}, + {-6.57219, 3.69149e-15}, + {-5.30025, -1.65834}, + {1.33155, 0.16363}, + {0.856068, -0.244528}, + {0.234236, -2.03201e-14}, + {0.175034, 0.0639427}, + {0.247817, 4.15067e-15}, + {0.438213, -0.0482423}, + {0.642398, 0.553017}, + {1.33155, -0.16363}, + {-5.30025, 1.65834}, + {-6.57219, 1.11022e-16}, + {-11.7051, -3.59868}, + {-8.79435, 1.30817}, + {0.99011, -1.07155}, + {0.642398, -0.553017}, + {0.438213, 0.0482423}, + {0.247817, 1.74335e-15}, + {0.175034, -0.0639427}, + {0.234236, 3.04986e-14}, + {0.856068, 0.244528}, + {1.13756, -0.487619}, + {-0.90306, 9.27036e-15}, + {-5.30025, -1.65834}, + {-8.79435, 1.30817}, + {-2.92014, 2.09061}, + {-0.501786, -1.37769}, + {0.157364, 0.195195}, + {0.608177, 0.0924076}, + {0.197596, -0.0729406}, + {0.116983, 2.00811e-14}, + {0.183862, 0.0625824}, + {0.609244, 2.11012e-14}, + {0.536417, 5.32907e-15}, + {1.13756, 0.487619}, + {1.33155, 0.16363}, + {0.99011, -1.07155}, + {-0.501786, -1.37769}, + {-0.58927, 0.511793}, + {0.894073, 0.768163}, + {0.653152, -0.025503}, + {0.136765, 0.00586452}, + {0.187321, 0.0707209}, + {0.245083, 2.79908e-15}, + {0.198078, -0.0681101}, + {0.198078, 0.0681101}, + {0.609244, 7.88258e-15}, + {0.856068, -0.244528}, + {0.642398, -0.553017}, + {0.157364, 0.195195}, + {0.894073, 0.768163}, + {1.08582, 0.0715093}, + {0.548415, 5.03212e-15}, + {0.422501, 0.0375059}, + {0.331794, -0.0443841}, + {0.183862, -0.0625824}, + {0.119232, 2.62842e-15}, + {0.245083, -6.6277e-15}, + {0.183862, -0.0625824}, + {0.234236, -7.99412e-15}, + {0.438213, 0.0482423}, + {0.608177, 0.0924076}, + {0.653152, -0.025503}, + {0.548415, -3.30034e-15}, + {0.653152, 0.025503}, + {0.608177, -0.0924076}, + {0.438213, -0.0482423}, + {0.234236, -1.1971e-15}, + {0.183862, 0.0625824}, + {0.187321, -0.0707209}, + {0.116983, -1.06477e-14}, + {0.175034, 0.0639427}, + {0.247817, -4.66294e-15}, + {0.197596, -0.0729406}, + {0.136765, 0.00586452}, + {0.422501, 0.0375059}, + {0.608177, -0.0924076}, + {0.71571, 0.0128863}, + {0.718888, -5.45392e-15}, + {0.438213, 0.0482423}, + {0.331794, 0.0443841}, + {0.136765, -0.00586452}, + {0.197596, 0.0729406}, + {0.247817, -1.46619e-14}, + {0.175034, -0.0639427}, + {0.116983, 1.57673e-14}, + {0.187321, 0.0707209}, + {0.331794, -0.0443841}, + {0.438213, -0.0482423}, + {0.718888, -6.92367e-15}, + {0.71571, -0.0128863}, + {0.608177, 0.0924076}, + {0.422501, -0.0375059}, + {0.653152, 0.025503}, + {0.608177, -0.0924076}, + {0.438213, -0.0482423}, + {0.234236, 1.13307e-14}, + {0.183862, 0.0625824}, + {0.245083, 3.39358e-15}, + {0.183862, -0.0625824}, + {0.234236, -2.49775e-14}, + {0.438213, 0.0482423}, + {0.608177, 0.0924076}, + {0.653152, -0.025503}, + {0.548415, -7.98332e-15}, + {0.894073, -0.768163}, + {0.157364, -0.195195}, + {0.642398, 0.553017}, + {0.856068, 0.244528}, + {0.609244, 9.84471e-15}, + {0.198078, -0.0681101}, + {0.119232, -1.08306e-14}, + {0.183862, 0.0625824}, + {0.331794, 0.0443841}, + {0.422501, -0.0375059}, + {0.548415, -1.93693e-15}, + {1.08582, -0.0715093}, + {0.996488, -8.65654e-15}, + {1.08582, -0.0715093}, + {0.910811, -0.175105}, + {0.66818, -0.0435261}, + {0.344756, -0.0047045}, + {0.198078, 0.0681101}, + {0.256446, 1.87526e-15}, + {0.198078, -0.0681101}, + {0.344756, 0.0047045}, + {0.66818, 0.0435261}, + {0.910811, 0.175105}, + {1.08582, 0.0715093}, + {1.08582, -0.0715093}, + {0.894073, -0.768163}, + {0.157364, -0.195195}, + {0.642398, 0.553017}, + {0.856068, 0.244528}, + {0.609244, 1.10978e-14}, + {0.198078, -0.0681101}, + {0.119232, -8.91416e-15}, + {0.183862, 0.0625824}, + {0.331794, 0.0443841}, + {0.422501, -0.0375059}, + {0.548415, 3.58509e-15}, + {0.910811, -0.175105}, + {0.157364, -0.195195}, + {0.169545, 0.652296}, + {0.984075, 0.568373}, + {1.10754, -2.44636e-15}, + {0.856068, -0.244528}, + {0.344756, 0.0047045}, + {0.183862, 0.0625824}, + {0.22958, -1.29025e-14}, + {0.175034, -0.0639427}, + {0.123572, 7.36999e-15}, + {0.422501, 0.0375059}, + {0.66818, -0.0435261}, + {0.642398, 0.553017}, + {0.984075, 0.568373}, + {1.26225, -1.48159e-14}, + {0.984075, -0.568373}, + {0.642398, -0.553017}, + {0.66818, 0.0435261}, + {0.331794, 0.0443841}, + {0.175034, -0.0639427}, + {0.110194, -1.04245e-14}, + {0.175034, 0.0639427}, + {0.331794, -0.0443841}, + {0.344756, -0.0047045}, + {0.856068, 0.244528}, + {1.10754, -1.46688e-14}, + {0.984075, -0.568373}, + {0.169545, -0.652296}, + {0.157364, 0.195195}, + {0.910811, 0.175105}, + {0.422501, -0.0375059}, + {0.123572, 1.07388e-15}, + {0.175034, 0.0639427}, + {0.22958, 6.57697e-16}, + {0.183862, -0.0625824}, + {0.198078, 0.0681101}, + {0.609244, -9.21485e-15}, + {0.856068, -0.244528}, + {0.642398, -0.553017}, + {0.157364, 0.195195}, + {0.894073, 0.768163}, + {1.08582, 0.0715093}, + {0.548415, 1.74673e-15}, + {0.422501, 0.0375059}, + {0.331794, -0.0443841}, + {0.183862, -0.0625824}, + {0.119232, -3.30104e-15}, + {0.256446, 7.05931e-16}, + {0.198078, -0.0681101}, + {0.344756, 0.0047045}, + {0.66818, 0.0435261}, + {0.910811, 0.175105}, + {1.08582, 0.0715093}, + {0.996488, 9.09906e-16}, + {1.08582, -0.0715093}, + {0.910811, -0.175105}, + {0.66818, -0.0435261}, + {0.344756, -0.0047045}, + {0.198078, 0.0681101}, + {0.198078, -0.0681101}, + {0.119232, 1.58207e-15}, + {0.183862, 0.0625824}, + {0.331794, 0.0443841}, + {0.422501, -0.0375059}, + {0.548415, -1.33616e-15}, + {1.08582, -0.0715093}, + {0.894073, -0.768163}, + {0.157364, -0.195195}, + {0.642398, 0.553017}, + {0.856068, 0.244528}, + {0.609244, 8.54405e-15}, + {0.344756, 0.0047045}, + {0.183862, 0.0625824}, + {0.22958, -3.6568e-15}, + {0.175034, -0.0639427}, + {0.123572, -3.76603e-15}, + {0.422501, 0.0375059}, + {0.910811, -0.175105}, + {0.157364, -0.195195}, + {0.169545, 0.652296}, + {0.984075, 0.568373}, + {1.10754, 1.63819e-14}, + {0.856068, -0.244528}, + {0.66818, 0.0435261}, + {0.331794, 0.0443841}, + {0.175034, -0.0639427}, + {0.110194, 5.40016e-15}, + {0.175034, 0.0639427}, + {0.331794, -0.0443841}, + {0.66818, -0.0435261}, + {0.642398, 0.553017}, + {0.984075, 0.568373}, + {1.26225, 1.79667e-14}, + {0.984075, -0.568373}, + {0.642398, -0.553017}, + {0.910811, 0.175105}, + {0.422501, -0.0375059}, + {0.123572, -1.23235e-14}, + {0.175034, 0.0639427}, + {0.22958, 1.33292e-14}, + {0.183862, -0.0625824}, + {0.344756, -0.0047045}, + {0.856068, 0.244528}, + {1.10754, 8.56991e-15}, + {0.984075, -0.568373}, + {0.169545, -0.652296}, + {0.157364, 0.195195}, + {1.08582, 0.0715093}, + {0.548415, -1.18239e-14}, + {0.422501, 0.0375059}, + {0.331794, -0.0443841}, + {0.183862, -0.0625824}, + {0.119232, 1.51533e-14}, + {0.198078, 0.0681101}, + {0.609244, -6.48772e-15}, + {0.856068, -0.244528}, + {0.642398, -0.553017}, + {0.157364, 0.195195}, + {0.894073, 0.768163}, + {-0.58927, 0.511793}, + {0.894073, 0.768163}, + {0.653152, -0.025503}, + {0.136765, 0.00586452}, + {0.187321, 0.0707209}, + {0.245083, 1.57505e-14}, + {0.198078, -0.0681101}, + {0.536417, -9.99313e-15}, + {1.13756, 0.487619}, + {1.33155, 0.16363}, + {0.99011, -1.07155}, + {-0.501786, -1.37769}, + {0.894073, 0.768163}, + {1.08582, 0.0715093}, + {0.548415, -7.38298e-15}, + {0.422501, 0.0375059}, + {0.331794, -0.0443841}, + {0.183862, -0.0625824}, + {0.119232, 1.81233e-14}, + {0.198078, 0.0681101}, + {0.609244, -1.0975e-14}, + {0.856068, -0.244528}, + {0.642398, -0.553017}, + {0.157364, 0.195195}, + {0.653152, -0.025503}, + {0.548415, 1.88434e-15}, + {0.653152, 0.025503}, + {0.608177, -0.0924076}, + {0.438213, -0.0482423}, + {0.234236, 2.71911e-14}, + {0.183862, 0.0625824}, + {0.245083, -1.67171e-15}, + {0.183862, -0.0625824}, + {0.234236, -8.38368e-15}, + {0.438213, 0.0482423}, + {0.608177, 0.0924076}, + {0.136765, 0.00586452}, + {0.422501, 0.0375059}, + {0.608177, -0.0924076}, + {0.71571, 0.0128863}, + {0.718888, 7.24348e-16}, + {0.438213, 0.0482423}, + {0.331794, 0.0443841}, + {0.187321, -0.0707209}, + {0.116983, -9.46735e-15}, + {0.175034, 0.0639427}, + {0.247817, 1.47624e-14}, + {0.197596, -0.0729406}, + {0.187321, 0.0707209}, + {0.331794, -0.0443841}, + {0.438213, -0.0482423}, + {0.718888, -1.28977e-15}, + {0.71571, -0.0128863}, + {0.608177, 0.0924076}, + {0.422501, -0.0375059}, + {0.136765, -0.00586452}, + {0.197596, 0.0729406}, + {0.247817, 9.21922e-15}, + {0.175034, -0.0639427}, + {0.116983, 2.0981e-14}, + {0.245083, 1.31681e-14}, + {0.183862, -0.0625824}, + {0.234236, 4.47544e-15}, + {0.438213, 0.0482423}, + {0.608177, 0.0924076}, + {0.653152, -0.025503}, + {0.548415, -5.02797e-15}, + {0.653152, 0.025503}, + {0.608177, -0.0924076}, + {0.438213, -0.0482423}, + {0.234236, 2.14729e-14}, + {0.183862, 0.0625824}, + {0.198078, -0.0681101}, + {0.119232, 7.21645e-16}, + {0.183862, 0.0625824}, + {0.331794, 0.0443841}, + {0.422501, -0.0375059}, + {0.548415, 1.65598e-16}, + {1.08582, -0.0715093}, + {0.894073, -0.768163}, + {0.157364, -0.195195}, + {0.642398, 0.553017}, + {0.856068, 0.244528}, + {0.609244, 5.67284e-15}, + {0.536417, -4.88498e-15}, + {0.198078, 0.0681101}, + {0.245083, -9.15934e-15}, + {0.187321, -0.0707209}, + {0.136765, -0.00586452}, + {0.653152, 0.025503}, + {0.894073, -0.768163}, + {-0.58927, -0.511793}, + {-0.501786, 1.37769}, + {0.99011, 1.07155}, + {1.33155, -0.16363}, + {1.13756, -0.487619}, + {1.13756, 0.487619}, + {0.609244, -2.50355e-14}, + {0.183862, -0.0625824}, + {0.116983, -1.90958e-14}, + {0.197596, 0.0729406}, + {0.608177, -0.0924076}, + {0.157364, -0.195195}, + {-0.501786, 1.37769}, + {-2.92014, -2.09061}, + {-8.79435, -1.30817}, + {-5.30025, 1.65834}, + {-0.90306, -8.72338e-15}, + {1.33155, 0.16363}, + {0.856068, -0.244528}, + {0.234236, -3.1225e-14}, + {0.175034, 0.0639427}, + {0.247817, -5.50801e-15}, + {0.438213, -0.0482423}, + {0.642398, 0.553017}, + {0.99011, 1.07155}, + {-8.79435, -1.30817}, + {-11.7051, 3.59868}, + {-6.57219, 4.93573e-16}, + {-5.30025, -1.65834}, + {0.99011, -1.07155}, + {0.642398, -0.553017}, + {0.438213, 0.0482423}, + {0.247817, -1.77636e-15}, + {0.175034, -0.0639427}, + {0.234236, 1.59054e-14}, + {0.856068, 0.244528}, + {1.33155, -0.16363}, + {-5.30025, 1.65834}, + {-6.57219, -5.24722e-15}, + {-11.7051, -3.59868}, + {-8.79435, 1.30817}, + {-0.501786, -1.37769}, + {0.157364, 0.195195}, + {0.608177, 0.0924076}, + {0.197596, -0.0729406}, + {0.116983, 1.53547e-14}, + {0.183862, 0.0625824}, + {0.609244, 8.8169e-15}, + {1.13756, -0.487619}, + {-0.90306, -2.36814e-14}, + {-5.30025, -1.65834}, + {-8.79435, 1.30817}, + {-2.92014, 2.09061}, + {-19.01, 3.81031}, + {-2.92014, 2.09061}, + {0.169545, -0.652296}, + {0.71571, -0.0128863}, + {0.272653, 7.53357e-15}, + {0.187321, -0.0707209}, + {0.344756, 0.0047045}, + {1.13756, 0.487619}, + {1.25751, 4.45683e-15}, + {-9.75792, 2.71861}, + {-13.6457, -2.44125}, + {-25.4876, -7.80357}, + {-2.92014, 2.09061}, + {-0.501786, -1.37769}, + {0.157364, 0.195195}, + {0.608177, 0.0924076}, + {0.197596, -0.0729406}, + {0.116983, 9.80781e-15}, + {0.183862, 0.0625824}, + {0.609244, 1.49587e-14}, + {1.13756, -0.487619}, + {-0.90306, 7.06758e-15}, + {-5.30025, -1.65834}, + {-8.79435, 1.30817}, + {0.169545, -0.652296}, + {0.157364, 0.195195}, + {0.910811, 0.175105}, + {0.422501, -0.0375059}, + {0.123572, 6.06665e-15}, + {0.175034, 0.0639427}, + {0.22958, 6.1993e-15}, + {0.183862, -0.0625824}, + {0.344756, -0.0047045}, + {0.856068, 0.244528}, + {1.10754, -2.28577e-15}, + {0.984075, -0.568373}, + {0.71571, -0.0128863}, + {0.608177, 0.0924076}, + {0.422501, -0.0375059}, + {0.136765, -0.00586452}, + {0.197596, 0.0729406}, + {0.247817, -1.83171e-14}, + {0.175034, -0.0639427}, + {0.116983, 1.17627e-15}, + {0.187321, 0.0707209}, + {0.331794, -0.0443841}, + {0.438213, -0.0482423}, + {0.718888, 5.88753e-15}, + {0.272653, -1.09197e-14}, + {0.197596, -0.0729406}, + {0.123572, 1.17805e-14}, + {0.197596, 0.0729406}, + {0.272653, -2.36674e-14}, + {0.197596, -0.0729406}, + {0.123572, -7.19444e-15}, + {0.197596, 0.0729406}, + {0.272653, -1.31859e-14}, + {0.197596, -0.0729406}, + {0.123572, 5.44548e-15}, + {0.197596, 0.0729406}, + {0.187321, -0.0707209}, + {0.116983, -2.02963e-15}, + {0.175034, 0.0639427}, + {0.247817, -1.83777e-14}, + {0.197596, -0.0729406}, + {0.136765, 0.00586452}, + {0.422501, 0.0375059}, + {0.608177, -0.0924076}, + {0.71571, 0.0128863}, + {0.718888, -2.82656e-15}, + {0.438213, 0.0482423}, + {0.331794, 0.0443841}, + {0.344756, 0.0047045}, + {0.183862, 0.0625824}, + {0.22958, -7.32747e-15}, + {0.175034, -0.0639427}, + {0.123572, 2.2981e-14}, + {0.422501, 0.0375059}, + {0.910811, -0.175105}, + {0.157364, -0.195195}, + {0.169545, 0.652296}, + {0.984075, 0.568373}, + {1.10754, -1.49351e-14}, + {0.856068, -0.244528}, + {1.13756, 0.487619}, + {0.609244, 4.66294e-15}, + {0.183862, -0.0625824}, + {0.116983, 1.55431e-14}, + {0.197596, 0.0729406}, + {0.608177, -0.0924076}, + {0.157364, -0.195195}, + {-0.501786, 1.37769}, + {-2.92014, -2.09061}, + {-8.79435, -1.30817}, + {-5.30025, 1.65834}, + {-0.90306, -1.49364e-14}, + {1.25751, 5.77316e-15}, + {1.13756, -0.487619}, + {0.344756, -0.0047045}, + {0.187321, 0.0707209}, + {0.272653, -7.41352e-15}, + {0.71571, 0.0128863}, + {0.169545, 0.652296}, + {-2.92014, -2.09061}, + {-19.01, -3.81031}, + {-25.4876, 7.80357}, + {-13.6457, 2.44125}, + {-9.75792, -2.71861}, + {-9.75792, 2.71861}, + {-0.90306, -1.11022e-15}, + {0.856068, 0.244528}, + {0.331794, -0.0443841}, + {0.197596, -0.0729406}, + {0.718888, 1.06265e-14}, + {0.984075, 0.568373}, + {-8.79435, -1.30817}, + {-25.4876, 7.80357}, + {-25.1033, 5.63667}, + {-39.4244, -13.5375}, + {-36.1195, 1.53615e-14}, + {-13.6457, -2.44125}, + {-5.30025, -1.65834}, + {1.10754, -3.75347e-16}, + {0.438213, -0.0482423}, + {0.123572, 6.59532e-15}, + {0.438213, 0.0482423}, + {1.10754, -9.60557e-15}, + {-5.30025, 1.65834}, + {-13.6457, 2.44125}, + {-39.4244, -13.5375}, + {-72.4614, 8.85487e-15}, + {-39.4244, 13.5375}, + {-25.4876, -7.80357}, + {-8.79435, 1.30817}, + {0.984075, -0.568373}, + {0.718888, 6.66134e-15}, + {0.197596, 0.0729406}, + {0.331794, 0.0443841}, + {0.856068, -0.244528}, + {-0.90306, -3.2321e-15}, + {-9.75792, -2.71861}, + {-36.1195, 3.07691e-15}, + {-39.4244, 13.5375}, + {-25.1033, -5.63667}, + {-68.6663, 31.5682}, + {-25.1033, -5.63667}, + {-11.7051, -3.59868}, + {1.26225, 5.32907e-15}, + {0.71571, 0.0128863}, + {0.136765, -0.00586452}, + {0.66818, 0.0435261}, + {1.33155, 0.16363}, + {-9.75792, 2.71861}, + {-28.2549, -1.30009e-14}, + {-85.5703, -25.4729}, + {-141.346, 11.6655}, + {-25.1033, -5.63667}, + {-25.4876, -7.80357}, + {-8.79435, 1.30817}, + {0.984075, -0.568373}, + {0.718888, 1.04645e-15}, + {0.197596, 0.0729406}, + {0.331794, 0.0443841}, + {0.856068, -0.244528}, + {-0.90306, -1.12605e-14}, + {-9.75792, -2.71861}, + {-36.1195, -4.7106e-15}, + {-39.4244, 13.5375}, + {-11.7051, -3.59868}, + {-8.79435, 1.30817}, + {0.99011, -1.07155}, + {0.642398, -0.553017}, + {0.438213, 0.0482423}, + {0.247817, -2.82239e-15}, + {0.175034, -0.0639427}, + {0.234236, -3.56681e-15}, + {0.856068, 0.244528}, + {1.33155, -0.16363}, + {-5.30025, 1.65834}, + {-6.57219, -1.16432e-14}, + {1.26225, 3.58047e-15}, + {0.984075, -0.568373}, + {0.642398, -0.553017}, + {0.66818, 0.0435261}, + {0.331794, 0.0443841}, + {0.175034, -0.0639427}, + {0.110194, 2.93524e-15}, + {0.175034, 0.0639427}, + {0.331794, -0.0443841}, + {0.66818, -0.0435261}, + {0.642398, 0.553017}, + {0.984075, 0.568373}, + {0.71571, 0.0128863}, + {0.718888, -5.34729e-15}, + {0.438213, 0.0482423}, + {0.331794, 0.0443841}, + {0.187321, -0.0707209}, + {0.116983, 2.48202e-15}, + {0.175034, 0.0639427}, + {0.247817, -1.39069e-14}, + {0.197596, -0.0729406}, + {0.136765, 0.00586452}, + {0.422501, 0.0375059}, + {0.608177, -0.0924076}, + {0.136765, -0.00586452}, + {0.197596, 0.0729406}, + {0.247817, -9.0622e-15}, + {0.175034, -0.0639427}, + {0.116983, 2.82922e-15}, + {0.187321, 0.0707209}, + {0.331794, -0.0443841}, + {0.438213, -0.0482423}, + {0.718888, 3.1417e-15}, + {0.71571, -0.0128863}, + {0.608177, 0.0924076}, + {0.422501, -0.0375059}, + {0.66818, 0.0435261}, + {0.331794, 0.0443841}, + {0.175034, -0.0639427}, + {0.110194, 2.79023e-16}, + {0.175034, 0.0639427}, + {0.331794, -0.0443841}, + {0.66818, -0.0435261}, + {0.642398, 0.553017}, + {0.984075, 0.568373}, + {1.26225, -1.87546e-14}, + {0.984075, -0.568373}, + {0.642398, -0.553017}, + {1.33155, 0.16363}, + {0.856068, -0.244528}, + {0.234236, -4.71845e-15}, + {0.175034, 0.0639427}, + {0.247817, 1.06316e-14}, + {0.438213, -0.0482423}, + {0.642398, 0.553017}, + {0.99011, 1.07155}, + {-8.79435, -1.30817}, + {-11.7051, 3.59868}, + {-6.57219, -1.15544e-14}, + {-5.30025, -1.65834}, + {-9.75792, 2.71861}, + {-0.90306, -8.43769e-15}, + {0.856068, 0.244528}, + {0.331794, -0.0443841}, + {0.197596, -0.0729406}, + {0.718888, 4.79381e-15}, + {0.984075, 0.568373}, + {-8.79435, -1.30817}, + {-25.4876, 7.80357}, + {-25.1033, 5.63667}, + {-39.4244, -13.5375}, + {-36.1195, -2.29847e-15}, + {-28.2549, 1.17684e-14}, + {-9.75792, -2.71861}, + {1.33155, -0.16363}, + {0.66818, -0.0435261}, + {0.136765, 0.00586452}, + {0.71571, -0.0128863}, + {1.26225, -6.27057e-15}, + {-11.7051, 3.59868}, + {-25.1033, 5.63667}, + {-68.6663, -31.5682}, + {-141.346, -11.6655}, + {-85.5703, 25.4729}, + {-85.5703, -25.4729}, + {-36.1195, -8.88178e-15}, + {-5.30025, 1.65834}, + {0.642398, 0.553017}, + {0.422501, 0.0375059}, + {0.608177, 0.0924076}, + {0.984075, -0.568373}, + {-6.57219, -9.51947e-16}, + {-39.4244, -13.5375}, + {-141.346, -11.6655}, + {-174.402, 55.7416}, + {-100.781, -1.32589e-14}, + {-141.346, 11.6655}, + {-39.4244, 13.5375}, + {-6.57219, 9.32587e-15}, + {0.984075, 0.568373}, + {0.608177, -0.0924076}, + {0.422501, -0.0375059}, + {0.642398, -0.553017}, + {-5.30025, -1.65834}, + {-36.1195, -1.51055e-14}, + {-85.5703, 25.4729}, + {-100.781, -1.03368e-14}, + {-174.402, -55.7416}, + {-219.942, -33.69}, + {-174.402, -55.7416}, + {-72.4614, 3.19744e-14}, + {-11.7051, 3.59868}, + {0.169545, 0.652296}, + {0.653152, 0.025503}, + {0.910811, 0.175105}, + {0.99011, -1.07155}, + {-13.6457, -2.44125}, + {-85.5703, -25.4729}, + {-281.447, 2.89669e-14}, + {-352.926, 115.37}, + {-174.402, -55.7416}, + {-141.346, 11.6655}, + {-39.4244, 13.5375}, + {-6.57219, -4.88498e-15}, + {0.984075, 0.568373}, + {0.608177, -0.0924076}, + {0.422501, -0.0375059}, + {0.642398, -0.553017}, + {-5.30025, -1.65834}, + {-36.1195, -1.82141e-14}, + {-85.5703, 25.4729}, + {-100.781, -9.27444e-15}, + {-72.4614, -4.85029e-15}, + {-39.4244, 13.5375}, + {-13.6457, -2.44125}, + {-5.30025, -1.65834}, + {1.10754, 3.15158e-15}, + {0.438213, -0.0482423}, + {0.123572, -1.13913e-14}, + {0.438213, 0.0482423}, + {1.10754, 1.54585e-14}, + {-5.30025, 1.65834}, + {-13.6457, 2.44125}, + {-39.4244, -13.5375}, + {-11.7051, 3.59868}, + {-6.57219, -1.02973e-14}, + {-5.30025, -1.65834}, + {1.33155, 0.16363}, + {0.856068, -0.244528}, + {0.234236, -8.67871e-16}, + {0.175034, 0.0639427}, + {0.247817, -9.0156e-15}, + {0.438213, -0.0482423}, + {0.642398, 0.553017}, + {0.99011, 1.07155}, + {-8.79435, -1.30817}, + {0.169545, 0.652296}, + {0.984075, 0.568373}, + {1.10754, -9.28424e-15}, + {0.856068, -0.244528}, + {0.344756, 0.0047045}, + {0.183862, 0.0625824}, + {0.22958, -2.66014e-15}, + {0.175034, -0.0639427}, + {0.123572, -1.71764e-14}, + {0.422501, 0.0375059}, + {0.910811, -0.175105}, + {0.157364, -0.195195}, + {0.653152, 0.025503}, + {0.608177, -0.0924076}, + {0.438213, -0.0482423}, + {0.234236, 3.00826e-14}, + {0.183862, 0.0625824}, + {0.245083, 6.85206e-15}, + {0.183862, -0.0625824}, + {0.234236, -3.07568e-14}, + {0.438213, 0.0482423}, + {0.608177, 0.0924076}, + {0.653152, -0.025503}, + {0.548415, 1.36866e-15}, + {0.910811, 0.175105}, + {0.422501, -0.0375059}, + {0.123572, 5.7454e-15}, + {0.175034, 0.0639427}, + {0.22958, -8.11296e-15}, + {0.183862, -0.0625824}, + {0.344756, -0.0047045}, + {0.856068, 0.244528}, + {1.10754, -2.31121e-14}, + {0.984075, -0.568373}, + {0.169545, -0.652296}, + {0.157364, 0.195195}, + {0.99011, -1.07155}, + {0.642398, -0.553017}, + {0.438213, 0.0482423}, + {0.247817, 1.40998e-14}, + {0.175034, -0.0639427}, + {0.234236, 1.26193e-14}, + {0.856068, 0.244528}, + {1.33155, -0.16363}, + {-5.30025, 1.65834}, + {-6.57219, 2.68492e-14}, + {-11.7051, -3.59868}, + {-8.79435, 1.30817}, + {-13.6457, -2.44125}, + {-5.30025, -1.65834}, + {1.10754, -1.79197e-14}, + {0.438213, -0.0482423}, + {0.123572, 2.88317e-14}, + {0.438213, 0.0482423}, + {1.10754, 9.5501e-15}, + {-5.30025, 1.65834}, + {-13.6457, 2.44125}, + {-39.4244, -13.5375}, + {-72.4614, -2.75686e-15}, + {-39.4244, 13.5375}, + {-85.5703, -25.4729}, + {-36.1195, 1.73195e-14}, + {-5.30025, 1.65834}, + {0.642398, 0.553017}, + {0.422501, 0.0375059}, + {0.608177, 0.0924076}, + {0.984075, -0.568373}, + {-6.57219, 4.82121e-15}, + {-39.4244, -13.5375}, + {-141.346, -11.6655}, + {-174.402, 55.7416}, + {-100.781, -7.48575e-15}, + {-281.447, 3.90799e-14}, + {-85.5703, 25.4729}, + {-13.6457, 2.44125}, + {0.99011, 1.07155}, + {0.910811, -0.175105}, + {0.653152, -0.025503}, + {0.169545, -0.652296}, + {-11.7051, -3.59868}, + {-72.4614, -2.1037e-14}, + {-174.402, 55.7416}, + {-219.942, 33.69}, + {-352.926, -115.37}, + {-352.926, 115.37}, + {-100.781, -1.07118e-15}, + {-39.4244, -13.5375}, + {-8.79435, -1.30817}, + {0.157364, -0.195195}, + {0.548415, -1.6234e-14}, + {0.157364, 0.195195}, + {-8.79435, 1.30817}, + {-39.4244, 13.5375}, + {-100.781, -2.85409e-14}, + {-352.926, -115.37}, + {-690.893, 8.12484e-14}, + {-1001.68, -433.439}, + {-690.893, -3.19744e-14}, + {-174.402, 55.7416}, + {-25.1033, 5.63667}, + {-2.92014, -2.09061}, + {0.894073, -0.768163}, + {1.08582, 0.0715093}, + {-0.501786, -1.37769}, + {-25.4876, -7.80357}, + {-141.346, 11.6655}, + {-352.926, 115.37}, + {-517.273, -7.42819e-14}, + {-690.893, -4.996e-15}, + {-352.926, 115.37}, + {-100.781, 2.12053e-14}, + {-39.4244, -13.5375}, + {-8.79435, -1.30817}, + {0.157364, -0.195195}, + {0.548415, 6.04246e-15}, + {0.157364, 0.195195}, + {-8.79435, 1.30817}, + {-39.4244, 13.5375}, + {-100.781, -6.2645e-15}, + {-352.926, -115.37}, + {-174.402, 55.7416}, + {-100.781, -3.9968e-15}, + {-85.5703, -25.4729}, + {-36.1195, 1.77636e-15}, + {-5.30025, 1.65834}, + {0.642398, 0.553017}, + {0.422501, 0.0375059}, + {0.608177, 0.0924076}, + {0.984075, -0.568373}, + {-6.57219, -1.20726e-14}, + {-39.4244, -13.5375}, + {-141.346, -11.6655}, + {-25.1033, 5.63667}, + {-39.4244, -13.5375}, + {-36.1195, 7.10543e-15}, + {-9.75792, 2.71861}, + {-0.90306, -6.56569e-15}, + {0.856068, 0.244528}, + {0.331794, -0.0443841}, + {0.197596, -0.0729406}, + {0.718888, 1.18948e-14}, + {0.984075, 0.568373}, + {-8.79435, -1.30817}, + {-25.4876, 7.80357}, + {-2.92014, -2.09061}, + {-8.79435, -1.30817}, + {-5.30025, 1.65834}, + {-0.90306, 1.58207e-14}, + {1.13756, 0.487619}, + {0.609244, -2.48678e-15}, + {0.183862, -0.0625824}, + {0.116983, -2.16976e-14}, + {0.197596, 0.0729406}, + {0.608177, -0.0924076}, + {0.157364, -0.195195}, + {-0.501786, 1.37769}, + {0.894073, -0.768163}, + {0.157364, -0.195195}, + {0.642398, 0.553017}, + {0.856068, 0.244528}, + {0.609244, -1.09297e-14}, + {0.198078, -0.0681101}, + {0.119232, -1.83728e-14}, + {0.183862, 0.0625824}, + {0.331794, 0.0443841}, + {0.422501, -0.0375059}, + {0.548415, -2.94779e-14}, + {1.08582, -0.0715093}, + {1.08582, 0.0715093}, + {0.548415, 1.00198e-14}, + {0.422501, 0.0375059}, + {0.331794, -0.0443841}, + {0.183862, -0.0625824}, + {0.119232, 1.21228e-14}, + {0.198078, 0.0681101}, + {0.609244, 4.84268e-15}, + {0.856068, -0.244528}, + {0.642398, -0.553017}, + {0.157364, 0.195195}, + {0.894073, 0.768163}, + {-0.501786, -1.37769}, + {0.157364, 0.195195}, + {0.608177, 0.0924076}, + {0.197596, -0.0729406}, + {0.116983, 5.39933e-14}, + {0.183862, 0.0625824}, + {0.609244, -1.87156e-15}, + {1.13756, -0.487619}, + {-0.90306, -6.7538e-14}, + {-5.30025, -1.65834}, + {-8.79435, 1.30817}, + {-2.92014, 2.09061}, + {-25.4876, -7.80357}, + {-8.79435, 1.30817}, + {0.984075, -0.568373}, + {0.718888, 4.88498e-14}, + {0.197596, 0.0729406}, + {0.331794, 0.0443841}, + {0.856068, -0.244528}, + {-0.90306, -6.80815e-15}, + {-9.75792, -2.71861}, + {-36.1195, -8.78654e-15}, + {-39.4244, 13.5375}, + {-25.1033, -5.63667}, + {-141.346, 11.6655}, + {-39.4244, 13.5375}, + {-6.57219, 2.26485e-14}, + {0.984075, 0.568373}, + {0.608177, -0.0924076}, + {0.422501, -0.0375059}, + {0.642398, -0.553017}, + {-5.30025, -1.65834}, + {-36.1195, -1.97408e-15}, + {-85.5703, 25.4729}, + {-100.781, -2.76711e-14}, + {-174.402, -55.7416}, + {-352.926, 115.37}, + {-100.781, 4.53139e-15}, + {-39.4244, -13.5375}, + {-8.79435, -1.30817}, + {0.157364, -0.195195}, + {0.548415, -2.27175e-14}, + {0.157364, 0.195195}, + {-8.79435, 1.30817}, + {-39.4244, 13.5375}, + {-100.781, -5.34849e-14}, + {-352.926, -115.37}, + {-690.893, 7.92872e-15}, + {-517.273, -5.68434e-14}, + {-352.926, -115.37}, + {-141.346, -11.6655}, + {-25.4876, 7.80357}, + {-0.501786, 1.37769}, + {1.08582, -0.0715093}, + {0.894073, 0.768163}, + {-2.92014, 2.09061}, + {-25.1033, -5.63667}, + {-174.402, -55.7416}, + {-690.893, 1.78665e-14}, + {-1001.68, 433.439}}; + const std::vector> out_3 + = {{-0.199195, -0.0194965}, {-0.419867, -0.250833}, {-1.11264, 0}, + {-0.419867, 0.250833}, {-0.199195, 0.0194965}, {-0.418409, -5.00654e-18}, + {-0.419867, 0.250833}, {-0.18608, 3.44375e-17}, {-0.267653, -0.0667649}, + {-0.199195, 0.0194965}, {-0.267653, -0.0667649}, {-0.267653, 0.0667649}, + {-0.199195, -0.0194965}, {-0.267653, 0.0667649}, {-0.18608, -1.83156e-17}, + {-0.419867, -0.250833}, {-0.418409, 6.16791e-18}, {-0.418409, 1.14412e-17}, + {-0.419867, 0.250833}, {-0.18608, 4.36893e-17}, {-0.267653, -0.0667649}, + {-0.267653, 0.0667649}, {-0.18608, 1.64477e-17}, {-0.419867, -0.250833}, + {-0.418409, -2.87836e-17}, {-0.267653, -0.0667649}, {-0.418409, -1.43756e-17}, + {-0.267653, 0.0667649}, {-0.267653, -0.0667649}, {-0.418409, -9.49474e-18}, + {-0.267653, 0.0667649}, {-0.199195, 0.0194965}, {-0.267653, -0.0667649}, + {-0.267653, -0.0667649}, {-0.418409, -3.75741e-17}, {-0.267653, 0.0667649}, + {-0.267653, 0.0667649}, {-0.199195, -0.0194965}, {-0.267653, 0.0667649}, + {-0.199195, -0.0194965}, {-0.199195, 0.0194965}, {-0.267653, -0.0667649}, + {-0.267653, -0.0667649}, {-0.418409, 4.70188e-17}, {-0.267653, 0.0667649}, + {-0.267653, 0.0667649}, {-0.18608, -4.29872e-17}, {-0.419867, -0.250833}, + {-0.418409, -1.85037e-17}, {-0.267653, -0.0667649}, {-0.418409, -2.89121e-18}, + {-0.267653, 0.0667649}, {-0.267653, -0.0667649}, {-0.418409, 4.58838e-18}, + {-0.267653, 0.0667649}, {-0.418409, 1.03394e-17}, {-0.419867, 0.250833}, + {-0.18608, -3.28955e-17}, {-0.267653, -0.0667649}}; + const std::vector> out_3_init + = {{-0.0719135, 0}, {0.15981, 0}, {0, 0}, {0.15981, 0}, {-0.0719135, 0}, {-0.0185867, 0}, + {0.15981, 0}, {0.113268, 0}, {-0.0634128, 0}, {-0.0719135, 0}, {-0.0634128, 0}, {-0.0634128, 0}, + {-0.0719135, 0}, {-0.0634128, 0}, {0.113268, 0}, {0.15981, 0}, {-0.0185867, 0}, {-0.0185867, 0}, + {0.15981, 0}, {0.113268, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {0.113268, 0}, {0.15981, 0}, + {-0.0185867, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0185867, 0}, + {-0.0634128, 0}, {-0.0719135, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, + {-0.0634128, 0}, {-0.0719135, 0}, {-0.0634128, 0}, {-0.0719135, 0}, {-0.0719135, 0}, {-0.0634128, 0}, + {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {0.113268, 0}, {0.15981, 0}, + {-0.0185867, 0}, {-0.0634128, 0}, {-0.0185867, 0}, {-0.0634128, 0}, {-0.0634128, 0}, {-0.0185867, 0}, + {-0.0634128, 0}, {-0.0185867, 0}, {0.15981, 0}, {0.113268, 0}, {-0.0634128, 0}}; const base_device::DEVICE_CPU* cpu_ctx = {}; const base_device::DEVICE_GPU* gpu_ctx = {}; @@ -54,9 +7005,13 @@ class TestModulePWPWMultiDevice : public ::testing::Test using synchronize_memory_int_h2d_op = base_device::memory::synchronize_memory_op; - void SetUp() override { + void + SetUp () override + { } - void TearDown() override { + void + TearDown () override + { } }; @@ -69,100 +7024,112 @@ class TestModulePWPWMultiDevice : public ::testing::Test // std::complex* out, // const int* box_index); -TEST_F(TestModulePWPWMultiDevice, set_3d_fft_box_op_cpu) +TEST_F (TestModulePWPWMultiDevice, set_3d_fft_box_op_cpu) { - std::vector> res(out_1.size(), std::complex{0, 0}); - set_3d_fft_box_cpu_op()(this->npwk, box_index.data(), in_1.data(), res.data()); - for (int ii = 0; ii < this->nxyz; ii++) { - EXPECT_LT(std::abs(res[ii] - out_1[ii]), 1e-12); - } + std::vector> res (out_1.size (), std::complex{0, 0}); + set_3d_fft_box_cpu_op () (this->npwk, box_index.data (), in_1.data (), res.data ()); + for (int ii = 0; ii < this->nxyz; ii++) + { + EXPECT_LT (std::abs (res[ii] - out_1[ii]), 1e-12); + } } -TEST_F(TestModulePWPWMultiDevice, set_recip_to_real_output_op_cpu) +TEST_F (TestModulePWPWMultiDevice, set_recip_to_real_output_op_cpu) { - std::vector> res(out_2.size(), std::complex{0, 0}); - set_recip_to_real_output_cpu_op()(this->nxyz, this->add, this->factor, in_2.data(), res.data()); - for (int ii = 0; ii < this->nxyz; ii++) { - EXPECT_LT(std::abs(res[ii] - out_2[ii]), 1e-12); - } + std::vector> res (out_2.size (), std::complex{0, 0}); + set_recip_to_real_output_cpu_op () (this->nxyz, this->add, this->factor, in_2.data (), res.data ()); + for (int ii = 0; ii < this->nxyz; ii++) + { + EXPECT_LT (std::abs (res[ii] - out_2[ii]), 1e-12); + } } -TEST_F(TestModulePWPWMultiDevice, set_real_to_recip_output_op_cpu) +TEST_F (TestModulePWPWMultiDevice, set_real_to_recip_output_op_cpu) { std::vector> res = out_3_init; - set_real_to_recip_output_cpu_op()(this->npwk, this->nxyz, true, this->factor, box_index.data(), in_3.data(), res.data()); - for (int ii = 0; ii < out_3.size(); ii++) { - EXPECT_LT(std::abs(res[ii] - out_3[ii]), 5e-6); - } + set_real_to_recip_output_cpu_op () (this->npwk, + this->nxyz, + true, + this->factor, + box_index.data (), + in_3.data (), + res.data ()); + for (int ii = 0; ii < out_3.size (); ii++) + { + EXPECT_LT (std::abs (res[ii] - out_3[ii]), 5e-6); + } } #if __UT_USE_CUDA || __UT_USE_ROCM -TEST_F(TestModulePWPWMultiDevice, set_3d_fft_box_op_gpu) +TEST_F (TestModulePWPWMultiDevice, set_3d_fft_box_op_gpu) { - std::vector> res(out_1.size(), std::complex{0, 0}); - int * d_box_index = NULL; - std::complex* d_res = NULL, * d_in_1 = NULL; - resize_memory_int_gpu_op()(d_box_index, box_index.size()); - resize_memory_complex_gpu_op()(d_res, res.size()); - resize_memory_complex_gpu_op()(d_in_1, in_1.size()); - synchronize_memory_int_h2d_op()(d_box_index, box_index.data(), box_index.size()); - synchronize_memory_complex_h2d_op()(d_res, res.data(), res.size()); - synchronize_memory_complex_h2d_op()(d_in_1, in_1.data(), in_1.size()); - - set_3d_fft_box_gpu_op()(this->npwk, d_box_index, d_in_1, d_res); - - synchronize_memory_complex_d2h_op()(res.data(), d_res, res.size()); - - for (int ii = 0; ii < this->nxyz; ii++) { - EXPECT_LT(fabs(res[ii] - out_1[ii]), 1e-12); - } - delete_memory_int_gpu_op()(d_box_index); - delete_memory_complex_gpu_op()(d_res); - delete_memory_complex_gpu_op()(d_in_1); + std::vector> res (out_1.size (), std::complex{0, 0}); + int* d_box_index = NULL; + std::complex*d_res = NULL, *d_in_1 = NULL; + resize_memory_int_gpu_op () (d_box_index, box_index.size ()); + resize_memory_complex_gpu_op () (d_res, res.size ()); + resize_memory_complex_gpu_op () (d_in_1, in_1.size ()); + synchronize_memory_int_h2d_op () (d_box_index, box_index.data (), box_index.size ()); + synchronize_memory_complex_h2d_op () (d_res, res.data (), res.size ()); + synchronize_memory_complex_h2d_op () (d_in_1, in_1.data (), in_1.size ()); + + set_3d_fft_box_gpu_op () (this->npwk, d_box_index, d_in_1, d_res); + + synchronize_memory_complex_d2h_op () (res.data (), d_res, res.size ()); + + for (int ii = 0; ii < this->nxyz; ii++) + { + EXPECT_LT (fabs (res[ii] - out_1[ii]), 1e-12); + } + delete_memory_int_gpu_op () (d_box_index); + delete_memory_complex_gpu_op () (d_res); + delete_memory_complex_gpu_op () (d_in_1); } -TEST_F(TestModulePWPWMultiDevice, set_recip_to_real_output_op_gpu) +TEST_F (TestModulePWPWMultiDevice, set_recip_to_real_output_op_gpu) { - std::vector> res(out_2.size(), std::complex{0, 0}); - std::complex* d_res = NULL, * d_in_2 = NULL; - resize_memory_complex_gpu_op()(d_res, res.size()); - resize_memory_complex_gpu_op()(d_in_2, in_2.size()); - synchronize_memory_complex_h2d_op()(d_res, res.data(), res.size()); - synchronize_memory_complex_h2d_op()(d_in_2, in_2.data(), in_2.size()); + std::vector> res (out_2.size (), std::complex{0, 0}); + std::complex*d_res = NULL, *d_in_2 = NULL; + resize_memory_complex_gpu_op () (d_res, res.size ()); + resize_memory_complex_gpu_op () (d_in_2, in_2.size ()); + synchronize_memory_complex_h2d_op () (d_res, res.data (), res.size ()); + synchronize_memory_complex_h2d_op () (d_in_2, in_2.data (), in_2.size ()); - set_recip_to_real_output_gpu_op()(this->nxyz, this->add, this->factor, d_in_2, d_res); + set_recip_to_real_output_gpu_op () (this->nxyz, this->add, this->factor, d_in_2, d_res); - synchronize_memory_complex_d2h_op()(res.data(), d_res, res.size()); + synchronize_memory_complex_d2h_op () (res.data (), d_res, res.size ()); - for (int ii = 0; ii < this->nxyz; ii++) { - EXPECT_LT(fabs(res[ii] - out_2[ii]), 1e-12); - } - delete_memory_complex_gpu_op()(d_res); - delete_memory_complex_gpu_op()(d_in_2); + for (int ii = 0; ii < this->nxyz; ii++) + { + EXPECT_LT (fabs (res[ii] - out_2[ii]), 1e-12); + } + delete_memory_complex_gpu_op () (d_res); + delete_memory_complex_gpu_op () (d_in_2); } -TEST_F(TestModulePWPWMultiDevice, set_real_to_recip_output_op_gpu) +TEST_F (TestModulePWPWMultiDevice, set_real_to_recip_output_op_gpu) { std::vector> res = out_3_init; - int * d_box_index = NULL; - std::complex* d_res = NULL, * d_in_3 = NULL; - resize_memory_int_gpu_op()(d_box_index, box_index.size()); - resize_memory_complex_gpu_op()(d_res, res.size()); - resize_memory_complex_gpu_op()(d_in_3, in_3.size()); - synchronize_memory_int_h2d_op()(d_box_index, box_index.data(), box_index.size()); - synchronize_memory_complex_h2d_op()(d_res, res.data(), res.size()); - synchronize_memory_complex_h2d_op()(d_in_3, in_3.data(), in_3.size()); + int* d_box_index = NULL; + std::complex*d_res = NULL, *d_in_3 = NULL; + resize_memory_int_gpu_op () (d_box_index, box_index.size ()); + resize_memory_complex_gpu_op () (d_res, res.size ()); + resize_memory_complex_gpu_op () (d_in_3, in_3.size ()); + synchronize_memory_int_h2d_op () (d_box_index, box_index.data (), box_index.size ()); + synchronize_memory_complex_h2d_op () (d_res, res.data (), res.size ()); + synchronize_memory_complex_h2d_op () (d_in_3, in_3.data (), in_3.size ()); - set_real_to_recip_output_gpu_op()(this->npwk, this->nxyz, true, this->factor, d_box_index, d_in_3, d_res); + set_real_to_recip_output_gpu_op () (this->npwk, this->nxyz, true, this->factor, d_box_index, d_in_3, d_res); - synchronize_memory_complex_d2h_op()(res.data(), d_res, res.size()); + synchronize_memory_complex_d2h_op () (res.data (), d_res, res.size ()); - for (int ii = 0; ii < out_3.size(); ii++) { - EXPECT_LT(fabs(res[ii] - out_3[ii]), 5e-6); - } - delete_memory_int_gpu_op()(d_box_index); - delete_memory_complex_gpu_op()(d_res); - delete_memory_complex_gpu_op()(d_in_3); + for (int ii = 0; ii < out_3.size (); ii++) + { + EXPECT_LT (fabs (res[ii] - out_3[ii]), 5e-6); + } + delete_memory_int_gpu_op () (d_box_index); + delete_memory_complex_gpu_op () (d_res); + delete_memory_complex_gpu_op () (d_in_3); } #endif // __UT_USE_CUDA || __UT_USE_ROCM diff --git a/source/source_basis/module_pw/pw_basis.cpp b/source/source_basis/module_pw/pw_basis.cpp index 549fec8e5a4..325671daefe 100644 --- a/source/source_basis/module_pw/pw_basis.cpp +++ b/source/source_basis/module_pw/pw_basis.cpp @@ -5,22 +5,20 @@ #include "source_base/timer.h" #include "source_base/global_function.h" - namespace ModulePW { -PW_Basis::PW_Basis() -{ - classname="PW_Basis"; -} +PW_Basis::PW_Basis () { classname = "PW_Basis"; } -PW_Basis::PW_Basis(std::string device_, std::string precision_) : device(std::move(device_)), precision(std::move(precision_)) { - classname="PW_Basis"; - this->fft_bundle.setfft("cpu",this->precision); +PW_Basis::PW_Basis (std::string device_, std::string precision_) + : device (std::move (device_)), precision (std::move (precision_)) +{ + classname = "PW_Basis"; + this->fft_bundle.setfft ("cpu", this->precision); this->double_data_ = (this->precision == "double") || (this->precision == "mixing"); - this->float_data_ = (this->precision == "single") || (this->precision == "mixing"); + this->float_data_ = (this->precision == "single") || (this->precision == "mixing"); } -PW_Basis:: ~PW_Basis() +PW_Basis::~PW_Basis () { delete[] ig2isz; delete[] istot2ixy; @@ -41,89 +39,110 @@ PW_Basis:: ~PW_Basis() delete[] gg_uniq; #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") - { - delmem_int_op()(this->d_is2fftixy); - delmem_int_op()(this->ig2ixyz_gpu); - } + { + delmem_int_op () (this->d_is2fftixy); + delmem_int_op () (this->ig2ixyz_gpu); + } #endif } -/// +/// /// distribute plane wave basis and real-space grids to different processors /// set up maps for fft and create arrays for MPI_Alltoall /// set up ffts /// -void PW_Basis::setuptransform() +void + PW_Basis::setuptransform () { - ModuleBase::timer::start(this->classname, "setuptransform"); - this->distribute_r(); - this->distribute_g(); - this->getstartgr(); - this->fft_bundle.clear(); - - if(this->xprime) - { - this->fft_bundle.initfft(this->nx,this->ny,this->nz,this->lix,this->rix,this->nst,this->nplane,this->poolnproc,this->gamma_only, this->xprime); - } - else - { - this->fft_bundle.initfft(this->nx,this->ny,this->nz,this->liy,this->riy,this->nst,this->nplane,this->poolnproc,this->gamma_only, this->xprime); - } - this->fft_bundle.setupFFT(); - ModuleBase::timer::end(this->classname, "setuptransform"); + ModuleBase::timer::start (this->classname, "setuptransform"); + this->distribute_r (); + this->distribute_g (); + this->getstartgr (); + this->fft_bundle.clear (); + + if (this->xprime) + { + this->fft_bundle.initfft (this->nx, + this->ny, + this->nz, + this->lix, + this->rix, + this->nst, + this->nplane, + this->poolnproc, + this->gamma_only, + this->xprime); + } + else + { + this->fft_bundle.initfft (this->nx, + this->ny, + this->nz, + this->liy, + this->riy, + this->nst, + this->nplane, + this->poolnproc, + this->gamma_only, + this->xprime); + } + this->fft_bundle.setupFFT (); + ModuleBase::timer::end (this->classname, "setuptransform"); } -void PW_Basis::getstartgr() +void + PW_Basis::getstartgr () { - if(this->gamma_only) - { - this->nmaxgr = ( this->npw > (this->nrxx+1)/2 ) ? this->npw : (this->nrxx+1)/2; - } + if (this->gamma_only) + { + this->nmaxgr = (this->npw > (this->nrxx + 1) / 2) ? this->npw : (this->nrxx + 1) / 2; + } else - { - this->nmaxgr = ( this->npw > this->nrxx ) ? this->npw : this->nrxx; - } - - //--------------------------------------------- - // sum : starting plane of FFT box. - //--------------------------------------------- - delete[] this->numg; this->numg = new int[poolnproc]; - delete[] this->startg; this->startg = new int[poolnproc]; - delete[] this->startr; this->startr = new int[poolnproc]; - delete[] this->numr; this->numr = new int[poolnproc]; - - // Each processor has a set of full sticks, - // 'rank_use' processor send a piece(npps[ip]) of these sticks(nst_per[rank_use]) - // to all the other processors in this pool - for (int ip = 0;ip < poolnproc; ++ip) - { - this->numg[ip] = this->nst_per[poolrank] * this->numz[ip]; - } - + { + this->nmaxgr = (this->npw > this->nrxx) ? this->npw : this->nrxx; + } - // Each processor in a pool send a piece of each stick(nst_per[ip]) to - // other processors in this pool - // rank_use processor receive datas in npps[rank_p] planes. - for (int ip = 0;ip < poolnproc; ++ip) - { - this->numr[ip] = this->nst_per[ip] * this->numz[poolrank]; - } + //--------------------------------------------- + // sum : starting plane of FFT box. + //--------------------------------------------- + delete[] this->numg; + this->numg = new int[poolnproc]; + delete[] this->startg; + this->startg = new int[poolnproc]; + delete[] this->startr; + this->startr = new int[poolnproc]; + delete[] this->numr; + this->numr = new int[poolnproc]; + // Each processor has a set of full sticks, + // 'rank_use' processor send a piece(npps[ip]) of these sticks(nst_per[rank_use]) + // to all the other processors in this pool + for (int ip = 0; ip < poolnproc; ++ip) + { + this->numg[ip] = this->nst_per[poolrank] * this->numz[ip]; + } - // startg record the starting 'numg' position in each processor. - this->startg[0] = 0; - for (int ip = 1;ip < poolnproc; ++ip) - { - this->startg[ip] = this->startg[ip-1] + this->numg[ip-1]; - } + // Each processor in a pool send a piece of each stick(nst_per[ip]) to + // other processors in this pool + // rank_use processor receive datas in npps[rank_p] planes. + for (int ip = 0; ip < poolnproc; ++ip) + { + this->numr[ip] = this->nst_per[ip] * this->numz[poolrank]; + } + // startg record the starting 'numg' position in each processor. + this->startg[0] = 0; + for (int ip = 1; ip < poolnproc; ++ip) + { + this->startg[ip] = this->startg[ip - 1] + this->numg[ip - 1]; + } - // startr record the starting 'numr' position - this->startr[0] = 0; - for (int ip = 1;ip < poolnproc; ++ip) - { - this->startr[ip] = this->startr[ip-1] + this->numr[ip-1]; - } + // startr record the starting 'numr' position + this->startr[0] = 0; + for (int ip = 1; ip < poolnproc; ++ip) + { + this->startr[ip] = this->startr[ip - 1] + this->numr[ip - 1]; + } return; } @@ -131,57 +150,61 @@ void PW_Basis::getstartgr() /// Collect planewaves on current core, and construct gg, gdirect, gcar according to ig2isz and is2fftixy. /// known: ig2isz, is2fftixy /// output: gg, gdirect, gcar -/// -void PW_Basis::collect_local_pw() +/// +void + PW_Basis::collect_local_pw () { - if(this->npw <= 0) - { - return; - } + if (this->npw <= 0) + { + return; + } this->ig_gge0 = -1; - delete[] this->gg; this->gg = new double[this->npw]; - delete[] this->gdirect; this->gdirect = new ModuleBase::Vector3[this->npw]; - delete[] this->gcar; this->gcar = new ModuleBase::Vector3[this->npw]; + delete[] this->gg; + this->gg = new double[this->npw]; + delete[] this->gdirect; + this->gdirect = new ModuleBase::Vector3[this->npw]; + delete[] this->gcar; + this->gcar = new ModuleBase::Vector3[this->npw]; ModuleBase::Vector3 f; int gamma_num = 0; - for(int ig = 0 ; ig < this-> npw ; ++ig) - { - int isz = this->ig2isz[ig]; - int iz = isz % this->nz; - int is = isz / this->nz; - int ixy = this->is2fftixy[is]; - int ix = ixy / this->fftny; - int iy = ixy % this->fftny; - if (ix >= int(this->nx/2) + 1) - { - ix -= this->nx; - } - if (iy >= int(this->ny/2) + 1) - { - iy -= this->ny; - } - if (iz >= int(this->nz/2) + 1) + for (int ig = 0; ig < this->npw; ++ig) { - iz -= this->nz; + int isz = this->ig2isz[ig]; + int iz = isz % this->nz; + int is = isz / this->nz; + int ixy = this->is2fftixy[is]; + int ix = ixy / this->fftny; + int iy = ixy % this->fftny; + if (ix >= int (this->nx / 2) + 1) + { + ix -= this->nx; + } + if (iy >= int (this->ny / 2) + 1) + { + iy -= this->ny; + } + if (iz >= int (this->nz / 2) + 1) + { + iz -= this->nz; + } + f.x = ix; + f.y = iy; + f.z = iz; + this->gg[ig] = f * (this->GGT * f); + this->gdirect[ig] = f; + this->gcar[ig] = f * this->G; + if (this->gg[ig] < 1e-8) + { + this->ig_gge0 = ig; + ++gamma_num; + if (gamma_num > 1) + { + ModuleBase::WARNING_QUIT ("PW_Basis::collect_local_pw", + "More than one gamma point found in the plane wave basis set.\n"); + } + } } - f.x = ix; - f.y = iy; - f.z = iz; - this->gg[ig] = f * (this->GGT * f); - this->gdirect[ig] = f; - this->gcar[ig] = f * this->G; - if(this->gg[ig] < 1e-8) - { - this->ig_gge0 = ig; - ++gamma_num; - if (gamma_num > 1) - { - ModuleBase::WARNING_QUIT("PW_Basis::collect_local_pw", - "More than one gamma point found in the plane wave basis set.\n"); - } - } - } return; } @@ -189,116 +212,125 @@ void PW_Basis::collect_local_pw() /// Collect modulus of planewaves on current cores /// known: ig2isz, is2fftixy /// output: ig2igg, gg_uniq, ngg -/// -void PW_Basis::collect_uniqgg() +/// +void + PW_Basis::collect_uniqgg () { - if(this->npw <= 0) - { - return; - } - this->ig_gge0 = -1; - delete[] this->ig2igg; this->ig2igg = new int [this->npw]; - - int *sortindex = new int [this->npw];//Reconstruct the mapping of the plane wave index ig according to the energy size of the plane waves - double *tmpgg = new double [this->npw];//Ranking the plane waves by energy size while ensuring that the same energy is preserved for each wave to correspond - double *tmpgg2 = new double [this->npw];//ranking the plane waves by energy size and removing the duplicates - ModuleBase::Vector3 f; - for(int ig = 0 ; ig < this-> npw ; ++ig) - { - int isz = this->ig2isz[ig]; - int iz = isz % this->nz; - int is = isz / this->nz; - int ixy = this->is2fftixy[is]; - int ix = ixy / this->fftny; - int iy = ixy % this->fftny; - if (ix >= int(this->nx/2) + 1) - { - ix -= this->nx; - } - if (iy >= int(this->ny/2) + 1) + if (this->npw <= 0) { - iy -= this->ny; + return; } - if (iz >= int(this->nz/2) + 1) - { - iz -= this->nz; - } - f.x = ix; - f.y = iy; - f.z = iz; - tmpgg[ig] = f * (this->GGT * f); - if(tmpgg[ig] < 1e-8) + this->ig_gge0 = -1; + delete[] this->ig2igg; + this->ig2igg = new int[this->npw]; + + int* sortindex = new int[this->npw]; // Reconstruct the mapping of the plane wave index ig according to the energy + // size of the plane waves + double* tmpgg = new double[this->npw]; // Ranking the plane waves by energy size while ensuring that the same energy + // is preserved for each wave to correspond + double* tmpgg2 = new double[this->npw]; // ranking the plane waves by energy size and removing the duplicates + ModuleBase::Vector3 f; + for (int ig = 0; ig < this->npw; ++ig) { - this->ig_gge0 = ig; + int isz = this->ig2isz[ig]; + int iz = isz % this->nz; + int is = isz / this->nz; + int ixy = this->is2fftixy[is]; + int ix = ixy / this->fftny; + int iy = ixy % this->fftny; + if (ix >= int (this->nx / 2) + 1) + { + ix -= this->nx; + } + if (iy >= int (this->ny / 2) + 1) + { + iy -= this->ny; + } + if (iz >= int (this->nz / 2) + 1) + { + iz -= this->nz; + } + f.x = ix; + f.y = iy; + f.z = iz; + tmpgg[ig] = f * (this->GGT * f); + if (tmpgg[ig] < 1e-8) + { + this->ig_gge0 = ig; + } } - } - ModuleBase::GlobalFunc::ZEROS(sortindex, this->npw); - ModuleBase::heapsort(this->npw, tmpgg, sortindex); - + ModuleBase::GlobalFunc::ZEROS (sortindex, this->npw); + ModuleBase::heapsort (this->npw, tmpgg, sortindex); int igg = 0; this->ig2igg[sortindex[0]] = 0; tmpgg2[0] = tmpgg[0]; - double avg_gg = tmpgg2[igg];//For waves with similar energy,take the average - int avg_n = 1;//The number of waves required to take the average + double avg_gg = tmpgg2[igg]; // For waves with similar energy,take the average + int avg_n = 1; // The number of waves required to take the average for (int ig = 1; ig < this->npw; ++ig) - { - if (std::abs(tmpgg[ig] - tmpgg2[igg]) > 1.0e-8) - { - tmpgg2[igg] = avg_gg / double(avg_n); - ++igg; - tmpgg2[igg] = tmpgg[ig]; - avg_gg = tmpgg2[igg]; - avg_n = 1; - } - else { - avg_n++; - avg_gg += tmpgg[ig]; + if (std::abs (tmpgg[ig] - tmpgg2[igg]) > 1.0e-8) + { + tmpgg2[igg] = avg_gg / double (avg_n); + ++igg; + tmpgg2[igg] = tmpgg[ig]; + avg_gg = tmpgg2[igg]; + avg_n = 1; + } + else + { + avg_n++; + avg_gg += tmpgg[ig]; + } + this->ig2igg[sortindex[ig]] = igg; } - this->ig2igg[sortindex[ig]] = igg; - } - tmpgg2[igg] = avg_gg / double(avg_n); + tmpgg2[igg] = avg_gg / double (avg_n); this->ngg = igg + 1; - delete[] this->gg_uniq; this->gg_uniq = new double [this->ngg]; - for(int igg = 0 ; igg < this->ngg ; ++igg) - { + delete[] this->gg_uniq; + this->gg_uniq = new double[this->ngg]; + for (int igg = 0; igg < this->ngg; ++igg) + { gg_uniq[igg] = tmpgg2[igg]; - } + } delete[] sortindex; delete[] tmpgg; delete[] tmpgg2; } -void PW_Basis::getfftixy2is(int * fftixy2is) const +void + PW_Basis::getfftixy2is (int* fftixy2is) const { -//Note: please assert when is1 >= is2, fftixy2is[is1] >= fftixy2is[is2]! - for(int ixy = 0 ; ixy < this->fftnxy ; ++ixy) - { - fftixy2is[ixy] = -1; - } + // Note: please assert when is1 >= is2, fftixy2is[is1] >= fftixy2is[is2]! + for (int ixy = 0; ixy < this->fftnxy; ++ixy) + { + fftixy2is[ixy] = -1; + } int ixy = 0; - for(int is = 0; is < this->nst; ++is) - { - for(; ixy < this->fftnxy ; ++ixy) + for (int is = 0; is < this->nst; ++is) { - if(this->is2fftixy[is] == ixy) - { - fftixy2is[ixy] = is; - ++ixy; - break; - } + for (; ixy < this->fftnxy; ++ixy) + { + if (this->is2fftixy[is] == ixy) + { + fftixy2is[ixy] = is; + ++ixy; + break; + } + } } - } } -void PW_Basis::set_device(std::string device_) { - this->device = std::move(device_); +void + PW_Basis::set_device (std::string device_) +{ + this->device = std::move (device_); } -void PW_Basis::set_precision(std::string precision_) { - this->precision = std::move(precision_); +void + PW_Basis::set_precision (std::string precision_) +{ + this->precision = std::move (precision_); } -} +} // namespace ModulePW diff --git a/source/source_basis/module_pw/pw_basis.h b/source/source_basis/module_pw/pw_basis.h index b834cb0e0f4..63c515a5626 100644 --- a/source/source_basis/module_pw/pw_basis.h +++ b/source/source_basis/module_pw/pw_basis.h @@ -35,7 +35,8 @@ namespace ModulePW * pwtest.initgrids(lat0,latvec,gridecut); * pwtest.initgrids(lat0,latvec,N1,N2,N3); * //double lat0: unit length, (unit: bohr) - * //ModuleBase::Matrix3 latvec: lattice vector, (unit: lat0), e.g. ModuleBase::Matrix3 latvec(1, 1, 0, 0, 2, 0, 0, 0, 2); + * //ModuleBase::Matrix3 latvec: lattice vector, (unit: lat0), e.g. ModuleBase::Matrix3 latvec(1, 1, 0, 0, 2, 0, 0, 0, + * 2); * //double gridecut: cutoff energy to generate FFT grids, (unit: Ry) * //int N1,N2,N3: FFT grids * 2. init parameters @@ -55,240 +56,231 @@ namespace ModulePW class PW_Basis { -public: + public: std::string classname; - PW_Basis(); - PW_Basis(std::string device_, std::string precision_); - virtual ~PW_Basis(); - //Init mpi parameters + PW_Basis (); + PW_Basis (std::string device_, std::string precision_); + virtual ~PW_Basis (); + // Init mpi parameters #ifdef __MPI - void initmpi( - const int poolnproc_in, // Number of processors in this pool - const int poolrank_in, // Rank in this pool - MPI_Comm pool_world_in //Comm world for pw_basis + void initmpi (const int poolnproc_in, // Number of processors in this pool + const int poolrank_in, // Rank in this pool + MPI_Comm pool_world_in // Comm world for pw_basis ); #endif - //Init the grids for FFT - virtual void initgrids( - const double lat0_in, //unit length (unit in bohr) - const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors (unit in lat0) - const double gridecut //unit in Ry, ecut to set up grids - ); - //Init the grids for FFT - virtual void initgrids( - const double lat0_in, - const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors - const int nx_in, int ny_in, int nz_in - ); - - //Init some parameters - void initparameters( - const bool gamma_only_in, - const double pwecut_in, //unit in Ry, ecut to decides plane waves - const int distribution_type_in = 1, - const bool xprime_in = true - ); - - //Set parameters about full planewave, only used in OFDFT for now. - void setfullpw( - const bool inpt_full_pw = false, - const int inpt_full_pw_dim = 0 + // Init the grids for FFT + virtual void initgrids (const double lat0_in, // unit length (unit in bohr) + const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors (unit in lat0) + const double gridecut // unit in Ry, ecut to set up grids ); -//=============================================== -// distribution maps -//=============================================== -public: + // Init the grids for FFT + virtual void initgrids (const double lat0_in, + const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors + const int nx_in, + int ny_in, + int nz_in); + + // Init some parameters + void initparameters (const bool gamma_only_in, + const double pwecut_in, // unit in Ry, ecut to decides plane waves + const int distribution_type_in = 1, + const bool xprime_in = true); + + // Set parameters about full planewave, only used in OFDFT for now. + void setfullpw (const bool inpt_full_pw = false, const int inpt_full_pw_dim = 0); + //=============================================== + // distribution maps + //=============================================== + public: #ifdef __MPI - MPI_Comm pool_world=MPI_COMM_NULL; + MPI_Comm pool_world = MPI_COMM_NULL; #endif - int *ig2isz=nullptr; // map ig to (is, iz). - int *ig2ixyz_gpu = nullptr; - int *istot2ixy=nullptr; // istot2ixy[is]: iy + ix * ny of is^th stick among all sticks. - int *is2fftixy=nullptr, * d_is2fftixy = nullptr; // is2fftixy[is]: iy + ix * ny of is^th stick among sticks on current proc. - int *fftixy2ip=nullptr; // fftixy2ip[iy + ix * fftny]: ip of proc which contains stick on (ix, iy). if no stick: -1 - int nst=0; //num. of sticks in current proc. - int *nst_per=nullptr;// nst on each core - int nstnz=0; // nst * nz - int nstot=0; //num. of sticks in total. - int npw=0; //num. of plane waves in current proc. - int *npw_per=nullptr; //npw on each core - int npwtot=0; // total num. of plane waves in all proc. in this pool - - //real space - int nrxx=0; //num. of real space grids - int *startz=nullptr; //startz[ip]: starting z plane in the ip-th proc. in current POOL_WORLD - int *numz=nullptr; //numz[ip]: num. of z planes in the ip-th proc. in current POOL_WORLD - int *numg=nullptr; //numg[ip] : nst_per[poolrank] * numz[ip] - int *numr=nullptr; //numr[ip] : numz[poolrank] * nst_per[ip] - int *startg=nullptr; // startg[ip] = numg[ip-1] + startg[ip-1] - int *startr=nullptr; // startr[ip] = numr[ip-1] + startr[ip-1] - int startz_current=0; - int nplane=0; //num. of planes in current proc. - - ModuleBase::Vector3 *gdirect=nullptr; //(= *G1d) ; // ig = new Vector igc[npw] - ModuleBase::Vector3 *gcar=nullptr; //G vectors in cartesian corrdinate - double *gg=nullptr; // modulus (G^2) of G vectors [npw] - //gg[ng]=ig[ng]*GGT*ig[ng]/(lat0*lat0)=g[ng]*g[ng] (/lat0*lat0) - // gg_global dimension: [cutgg_num_now] (save memory skill is used) - int ig_gge0=-1; //ig when gg == 0 - - //distribute plane waves and grids and set up fft - void setuptransform(); - -protected: - int *startnsz_per=nullptr;//useless intermediate variable// startnsz_per[ip]: starting is * nz stick in the ip^th proc. - - //distribute plane waves to different processors - void distribute_g(); - - //distribute real-space grids to different processors - virtual void distribute_r(); - - //prepare for MPI_Alltoall - void getstartgr(); - - -public: - //collect gdirect, gcar, gg - void collect_local_pw(); - -public: - int ngg=0; //number of different modulus (G^2) of G vectors - int *ig2igg=nullptr;//[npw] map ig to igg(* gdirect = nullptr; //(= *G1d) ; // ig = new Vector igc[npw] + ModuleBase::Vector3* gcar = nullptr; // G vectors in cartesian corrdinate + double* gg = nullptr; // modulus (G^2) of G vectors [npw] + // gg[ng]=ig[ng]*GGT*ig[ng]/(lat0*lat0)=g[ng]*g[ng] (/lat0*lat0) + // gg_global dimension: [cutgg_num_now] (save memory skill is used) + int ig_gge0 = -1; // ig when gg == 0 + + // distribute plane waves and grids and set up fft + void setuptransform (); + + protected: + int* startnsz_per + = nullptr; // useless intermediate variable// startnsz_per[ip]: starting is * nz stick in the ip^th proc. + + // distribute plane waves to different processors + void distribute_g (); + + // distribute real-space grids to different processors + virtual void distribute_r (); + + // prepare for MPI_Alltoall + void getstartgr (); + + public: + // collect gdirect, gcar, gg + void collect_local_pw (); + + public: + int ngg = 0; // number of different modulus (G^2) of G vectors + int* ig2igg = nullptr; //[npw] map ig to igg(nstot) (in distributeg method1 and method2) - void count_pw_st( - int* st_length2D, // the number of planewaves that belong to the stick located on (x, y). - int* st_bottom2D // the z-coordinate of the bottom of stick on (x, y). + // Count the total number of planewaves (tot_npw) and sticks (this->nstot) (in distributeg method1 and method2) + void count_pw_st (int* st_length2D, // the number of planewaves that belong to the stick located on (x, y). + int* st_bottom2D // the z-coordinate of the bottom of stick on (x, y). ); - //get ig2isz and is2fftixy - void get_ig2isz_is2fftixy( - int* st_bottom, // minimum z of stick, stored in 1d array with tot_nst elements. - int* st_length // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. + // get ig2isz and is2fftixy + void get_ig2isz_is2fftixy (int* st_bottom, // minimum z of stick, stored in 1d array with tot_nst elements. + int* st_length // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. ); - //Collect the x, y indexs, length of the sticks (in distributeg method1) - void collect_st( - int* st_length2D, // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane. - int* st_bottom2D, // the z-coordinate of the bottom of stick on (x, y), stored in 2d x-y plane. - int* st_i, // x or x + fftnx (if x < 0) of stick. - int* st_j, // y or y + fftny (if y < 0) of stick. - int* st_length // number of planewaves in stick, stored in 1d array with tot_nst elements. + // Collect the x, y indexs, length of the sticks (in distributeg method1) + void collect_st (int* st_length2D, // the number of planewaves that belong to the stick located on (x, y), stored in + // 2d x-y plane. + int* st_bottom2D, // the z-coordinate of the bottom of stick on (x, y), stored in 2d x-y plane. + int* st_i, // x or x + fftnx (if x < 0) of stick. + int* st_j, // y or y + fftny (if y < 0) of stick. + int* st_length // number of planewaves in stick, stored in 1d array with tot_nst elements. ); - //get istot2ixy - void get_istot2ixy( - int* st_i, // x or x + fftnx (if x < 0) of stick. - int* st_j // y or y + fftny (if y < 0) of stick. + // get istot2ixy + void get_istot2ixy (int* st_i, // x or x + fftnx (if x < 0) of stick. + int* st_j // y or y + fftny (if y < 0) of stick. ); - //Create the maps from ixy to (in method 2) - void create_maps( - int* st_length2D // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane. + // Create the maps from ixy to (in method 2) + void create_maps ( + int* st_length2D // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane. ); -//=============================================== -// FFT -//=============================================== -public: - // FFT dimensions for wave functions. - int fftnx=0, fftny=0, fftnz=0, fftnxyz=0, fftnxy=0; - int nx=0, ny=0, nz=0, nxyz=0, nxy=0; // Gamma_only: fftny = int(ny/2)-1 , others: fftny = ny - int liy=0, riy=0;// liy: the left edge of the pw ball; riy: the right edge of the pw ball in the y direction - int lix=0, rix=0;// lix: the left edge of the pw ball; rix: the right edge of the pw ball in the x direction - bool xprime = true; // true: when do recip2real, x-fft will be done last and when doing real2recip, x-fft will be - // done first; false: y-fft For gamma_only, true: we use half x; false: we use half y - int ng_xeq0 = 0; //only used when xprime = true, number of g whose gx = 0 - int nmaxgr = 0; // Gamma_only: max between npw and (nrxx+1)/2, others: max between npw and nrxx - // Thus std::complex[nmaxgr] is able to contain either reciprocal or real data + //=============================================== + // FFT + //=============================================== + public: + // FFT dimensions for wave functions. + int fftnx = 0, fftny = 0, fftnz = 0, fftnxyz = 0, fftnxy = 0; + int nx = 0, ny = 0, nz = 0, nxyz = 0, nxy = 0; // Gamma_only: fftny = int(ny/2)-1 , others: fftny = ny + int liy = 0, riy = 0; // liy: the left edge of the pw ball; riy: the right edge of the pw ball in the y direction + int lix = 0, rix = 0; // lix: the left edge of the pw ball; rix: the right edge of the pw ball in the x direction + bool xprime = true; // true: when do recip2real, x-fft will be done last and when doing real2recip, x-fft will be + // done first; false: y-fft For gamma_only, true: we use half x; false: we use half y + int ng_xeq0 = 0; // only used when xprime = true, number of g whose gx = 0 + int nmaxgr = 0; // Gamma_only: max between npw and (nrxx+1)/2, others: max between npw and nrxx + // Thus std::complex[nmaxgr] is able to contain either reciprocal or real data // FFT ft; ModuleBase::FFT_Bundle fft_bundle; - //The position of pointer in and out can be equal(in-place transform) or different(out-of-place transform). - + // The position of pointer in and out can be equal(in-place transform) or different(out-of-place transform). + template - void real2recip(const FPTYPE* in, - std::complex* out, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + void real2recip (const FPTYPE* in, + std::complex* out, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) template - void real2recip(const std::complex* in, - std::complex* out, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + void real2recip (const std::complex* in, + std::complex* out, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) template - void recip2real(const std::complex* in, - FPTYPE* out, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) + void recip2real (const std::complex* in, + FPTYPE* out, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) template - void recip2real(const std::complex* in, - std::complex* out, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) - + void recip2real (const std::complex* in, + std::complex* out, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) + template - void real2recip_gpu(const FPTYPE* in, - std::complex* out, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + void real2recip_gpu (const FPTYPE* in, + std::complex* out, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) template - void real2recip_gpu(const std::complex* in, - std::complex* out, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + void real2recip_gpu (const std::complex* in, + std::complex* out, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) template - void recip2real_gpu(const std::complex* in, - FPTYPE* out, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) + void recip2real_gpu (const std::complex* in, + FPTYPE* out, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) template - void recip2real_gpu(const std::complex* in, - std::complex* out, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) + void recip2real_gpu (const std::complex* in, + std::complex* out, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) /** * @brief Converts data from reciprocal space to real space on Cpu @@ -297,7 +289,8 @@ class PW_Basis * It supports complex types as input. * The output can be either the same fundamental type or the underlying real type of a complex type. * - * @tparam FPTYPE The type of the input data, which can only be a compelx type (e.g., std::complex, std::complex) + * @tparam FPTYPE The type of the input data, which can only be a compelx type (e.g., std::complex, + * std::complex) * @tparam Device The device type, must be base_device::DEVICE_CPU. * @tparam std::enable_if::type>::value, int>::type * SFINAE constraint to ensure that FPTYPE is a complex type. @@ -317,11 +310,11 @@ class PW_Basis && (std::is_same::type>::value || std::is_same::value) && std::is_same::value, - int>::type - = 0> - void recip_to_real(TK* in, TR* out, const bool add = false, const typename GetTypeReal::type factor = 1.0) const + int>::type = 0> + void + recip_to_real (TK* in, TR* out, const bool add = false, const typename GetTypeReal::type factor = 1.0) const { - this->recip2real(in, out, add, factor); + this->recip2real (in, out, add, factor); }; /** @@ -348,15 +341,12 @@ class PW_Basis && (std::is_same::type>::value || std::is_same::value) && std::is_same::value, - int>::type - = 0> - void recip_to_real(TK* in, - TR* out, - const bool add = false, - const typename GetTypeReal::type factor = 1.0) const - { - this->recip2real_gpu(in,out,add,factor); - }; + int>::type = 0> + void + recip_to_real (TK* in, TR* out, const bool add = false, const typename GetTypeReal::type factor = 1.0) const + { + this->recip2real_gpu (in, out, add, factor); + }; // template ::type>::value - && (std::is_same::type>::value || std::is_same::value) - && std::is_same::value ,int>::type = 0> - void real_to_recip(TR* in, - TK* out, - const bool add = false, - const typename GetTypeReal::type factor = 1.0) const + template ::type>::value + && (std::is_same::type>::value + || std::is_same::value) + && std::is_same::value, + int>::type = 0> + void + real_to_recip (TR* in, TK* out, const bool add = false, const typename GetTypeReal::type factor = 1.0) const { - this->real2recip(in, out, add, factor); + this->real2recip (in, out, add, factor); } - template ::type>::value - && (std::is_same::type>::value || std::is_same::value) - && std::is_same::value ,int>::type = 0> - void real_to_recip(TR* in, - TK* out, - const bool add = false, - const typename GetTypeReal::type factor = 1.0) const - { - this->real2recip_gpu(in,out,add,factor); - }; + template ::type>::value + && (std::is_same::type>::value + || std::is_same::value) + && std::is_same::value, + int>::type = 0> + void + real_to_recip (TR* in, TK* out, const bool add = false, const typename GetTypeReal::type factor = 1.0) const + { + this->real2recip_gpu (in, out, add, factor); + }; protected: - //gather planes and scatter sticks of all processors + // gather planes and scatter sticks of all processors template - void gatherp_scatters(std::complex* in, std::complex* out) const; + void gatherp_scatters (std::complex* in, std::complex* out) const; // gather sticks of and scatter planes of all processors template - void gathers_scatterp(std::complex* in, std::complex* out) const; + void gathers_scatterp (std::complex* in, std::complex* out) const; public: - //get fftixy2is; - void getfftixy2is(int * fftixy2is) const; + // get fftixy2is; + void getfftixy2is (int* fftixy2is) const; using resmem_int_op = base_device::memory::resize_memory_op; using delmem_int_op = base_device::memory::delete_memory_op; - using syncmem_int_h2d_op = base_device::memory::synchronize_memory_op; + using syncmem_int_h2d_op + = base_device::memory::synchronize_memory_op; // using default_device_cpu = base_device::DEVICE_CPU; - - void set_device(std::string device_); - void set_precision(std::string precision_); - std::string get_device() const { return device; } - std::string get_precision() const { return precision; } + void set_device (std::string device_); + void set_precision (std::string precision_); -protected: + std::string + get_device () const + { + return device; + } + std::string + get_precision () const + { + return precision; + } - std::string device = "cpu"; ///< cpu or gpu - std::string precision = "double"; ///< single, double, mixing - bool double_data_ = true; ///< if has double data - bool float_data_ = false; ///< if has float data + protected: + std::string device = "cpu"; ///< cpu or gpu + std::string precision = "double"; ///< single, double, mixing + bool double_data_ = true; ///< if has double data + bool float_data_ = false; ///< if has float data }; -} +} // namespace ModulePW #endif // PWBASIS_H #include "pw_basis_sup.h" #include "pw_basis_big.h" //temporary it will be removed diff --git a/source/source_basis/module_pw/pw_basis_big.h b/source/source_basis/module_pw/pw_basis_big.h index 987af787b3f..7b8ea983d89 100644 --- a/source/source_basis/module_pw/pw_basis_big.h +++ b/source/source_basis/module_pw/pw_basis_big.h @@ -7,7 +7,7 @@ #include "mpi.h" #endif -// temporary class, because previous ABACUS consider big grid for fft grids +// temporary class, because previous ABACUS consider big grid for fft grids // which are used for grid integration in LCAO. // In fact, it is unnecessary. It will be moved after grid integration is refactored. namespace ModulePW @@ -15,19 +15,16 @@ namespace ModulePW class PW_Basis_Big : public PW_Basis_Sup { -public: - // combine [bx,by,bz] FFT grids into a big one - // typical values are bx=2, by=2, bz=2 - // nbx=nx/bx, nby=ny/by, nbz=nz/bz, - PW_Basis_Big() - { - } - PW_Basis_Big(std::string device_, std::string precision_) : PW_Basis_Sup(device_, precision_) - { - } + public: + // combine [bx,by,bz] FFT grids into a big one + // typical values are bx=2, by=2, bz=2 + // nbx=nx/bx, nby=ny/by, nbz=nz/bz, + PW_Basis_Big () {} + PW_Basis_Big (std::string device_, std::string precision_) : PW_Basis_Sup (device_, precision_) {} - ~PW_Basis_Big(){}; - void setbxyz(const int bx_in, const int by_in, const int bz_in) + ~PW_Basis_Big () {}; + void + setbxyz (const int bx_in, const int by_in, const int bz_in) { bx = bx_in; by = by_in; @@ -35,323 +32,341 @@ class PW_Basis_Big : public PW_Basis_Sup bxyz = bx * by * bz; } int bx = 1, by = 1, bz = 1, bxyz = 1; - int nbx=0; - int nby=0; - int nbz=0; - int nbzp=0; - int nbxx=0; - int nbzp_start=0; + int nbx = 0; + int nby = 0; + int nbz = 0; + int nbzp = 0; + int nbxx = 0; + int nbzp_start = 0; - void autoset_big_cell_size(int& b_size, const int& nc_size, const int nproc = 0) + void + autoset_big_cell_size (int& b_size, const int& nc_size, const int nproc = 0) { - //original default setting is 4 + // original default setting is 4 b_size = 4; - //only for bz - if(nproc > 0) - { - int candidate_lists[4] = {4, 3, 5, 2}; - int max_bz[4]; - for(int i=0;i<4;i++) + // only for bz + if (nproc > 0) { - int tmp = candidate_lists[i]; - max_bz[i] = nc_size / tmp; - if(nc_size % tmp!=0) - {//ignore candidates which can't be factored by nc_size - max_bz[i]=0; - continue; - } - if(max_bz[i] % nproc == 0) - { - b_size = tmp; - return; - } - } + int candidate_lists[4] = {4, 3, 5, 2}; + int max_bz[4]; + for (int i = 0; i < 4; i++) + { + int tmp = candidate_lists[i]; + max_bz[i] = nc_size / tmp; + if (nc_size % tmp != 0) + { // ignore candidates which can't be factored by nc_size + max_bz[i] = 0; + continue; + } + if (max_bz[i] % nproc == 0) + { + b_size = tmp; + return; + } + } - //choose maximum residual - double res = 0.0; - double res_temp = 0.0; - for(int i=0;i<4;i++) - { - if(max_bz[i]==0) continue; - res_temp = double(max_bz[i] % nproc) / nproc; - if(res < res_temp) - { - res = res_temp; - b_size = candidate_lists[i]; - } + // choose maximum residual + double res = 0.0; + double res_temp = 0.0; + for (int i = 0; i < 4; i++) + { + if (max_bz[i] == 0) + continue; + res_temp = double (max_bz[i] % nproc) / nproc; + if (res < res_temp) + { + res = res_temp; + b_size = candidate_lists[i]; + } + } + return; } - return; - } - //for bx and by, choose maximum residual of (5,4,3) + // for bx and by, choose maximum residual of (5,4,3) else - { - int res = 0; - int res_temp = 0; - for(int i=5;i>2;i--) { - res_temp = nc_size % i; - if(res_temp == 0) - { - b_size = i; - return; - } - else if(res < res_temp) - { - res = res_temp; - b_size = i; - } + int res = 0; + int res_temp = 0; + for (int i = 5; i > 2; i--) + { + res_temp = nc_size % i; + if (res_temp == 0) + { + b_size = i; + return; + } + else if (res < res_temp) + { + res = res_temp; + b_size = i; + } + } + return; } - return; - } } + virtual void + initgrids (const double lat0_in, const ModuleBase::Matrix3 latvec_in, const double gridecut) + { + // init lattice + this->lat0 = lat0_in; + this->latvec = latvec_in; + this->omega = std::abs (latvec.Det ()) * lat0 * lat0 * lat0; + this->GT = latvec.Inverse (); + this->G = GT.Transpose (); + this->GGT = G * GT; - virtual void initgrids(const double lat0_in,const ModuleBase::Matrix3 latvec_in, - const double gridecut){ - //init lattice - this->lat0 = lat0_in; - this->latvec = latvec_in; - this->omega = std::abs(latvec.Det()) * lat0 * lat0 * lat0; - this->GT = latvec.Inverse(); - this->G = GT.Transpose(); - this->GGT = G * GT; + //------------------------------------------------------------ + //-------------------------init grids------------------------- + //------------------------------------------------------------ + this->tpiba = ModuleBase::TWO_PI / this->lat0; + this->tpiba2 = this->tpiba * this->tpiba; + this->gridecut_lat = gridecut / tpiba2; + ModuleBase::Vector3 lat; + int* ibox = new int[3]; - //------------------------------------------------------------ - //-------------------------init grids------------------------- - //------------------------------------------------------------ - this->tpiba = ModuleBase::TWO_PI / this->lat0; - this->tpiba2 = this->tpiba * this->tpiba; - this->gridecut_lat = gridecut / tpiba2; - ModuleBase::Vector3 lat; - int *ibox = new int[3]; - - lat.x = latvec.e11; - lat.y = latvec.e12; - lat.z = latvec.e13; - ibox[0] = 2 * int(sqrt(gridecut_lat) * sqrt(lat * lat)) + 1; + lat.x = latvec.e11; + lat.y = latvec.e12; + lat.z = latvec.e13; + ibox[0] = 2 * int (sqrt (gridecut_lat) * sqrt (lat * lat)) + 1; - lat.x = latvec.e21; - lat.y = latvec.e22; - lat.z = latvec.e23; - ibox[1] = 2 * int(sqrt(gridecut_lat) * sqrt(lat * lat)) + 1; + lat.x = latvec.e21; + lat.y = latvec.e22; + lat.z = latvec.e23; + ibox[1] = 2 * int (sqrt (gridecut_lat) * sqrt (lat * lat)) + 1; - lat.x = latvec.e31; - lat.y = latvec.e32; - lat.z = latvec.e33; - ibox[2] = 2 * int(sqrt(gridecut_lat) * sqrt(lat * lat)) + 1; + lat.x = latvec.e31; + lat.y = latvec.e32; + lat.z = latvec.e33; + ibox[2] = 2 * int (sqrt (gridecut_lat) * sqrt (lat * lat)) + 1; - // We should check if ibox is the minimum number to cover the planewave ball. - // Find the minimum number of ibox by traveling all possible ibox - int n1,n2,n3; - n1 = n2 = n3 = 0; - for(int igz = -ibox[2]+this->poolrank; igz <= ibox[2]; igz += this->poolnproc) - { - for(int igy = -ibox[1]; igy <= ibox[1]; ++igy) - { - for(int igx = -ibox[0]; igx <= ibox[0]; ++igx) + // We should check if ibox is the minimum number to cover the planewave ball. + // Find the minimum number of ibox by traveling all possible ibox + int n1, n2, n3; + n1 = n2 = n3 = 0; + for (int igz = -ibox[2] + this->poolrank; igz <= ibox[2]; igz += this->poolnproc) { - ModuleBase::Vector3 f; - f.x = igx; - f.y = igy; - f.z = igz; - double modulus = f * (this->GGT * f); - if(modulus <= this->gridecut_lat) - { - if(n1 < std::abs(igx)) n1 = std::abs(igx); - if(n2 < std::abs(igy)) n2 = std::abs(igy); - if(n3 < std::abs(igz)) n3 = std::abs(igz); - } + for (int igy = -ibox[1]; igy <= ibox[1]; ++igy) + { + for (int igx = -ibox[0]; igx <= ibox[0]; ++igx) + { + ModuleBase::Vector3 f; + f.x = igx; + f.y = igy; + f.z = igz; + double modulus = f * (this->GGT * f); + if (modulus <= this->gridecut_lat) + { + if (n1 < std::abs (igx)) + n1 = std::abs (igx); + if (n2 < std::abs (igy)) + n2 = std::abs (igy); + if (n3 < std::abs (igz)) + n3 = std::abs (igz); + } + } + } } - } - } - ibox[0] = 2*n1+1; - ibox[1] = 2*n2+1; - ibox[2] = 2*n3+1; + ibox[0] = 2 * n1 + 1; + ibox[1] = 2 * n2 + 1; + ibox[2] = 2 * n3 + 1; #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, ibox, 3, MPI_INT, MPI_MAX , this->pool_world); + MPI_Allreduce (MPI_IN_PLACE, ibox, 3, MPI_INT, MPI_MAX, this->pool_world); #endif - // Find the minimal FFT box size the factors into the primes (2,3,5,7). - for (int i = 0; i < 3; i++) - { - int b = 0; - int n2 = 0; - int n3 = 0; - int n5 = 0; - //int n7 = 0; - bool done_factoring = false; - - // increase ibox[i] by 1 until it is totally factorizable by (2,3,5,7) - do - { - b = ibox[i]; + // Find the minimal FFT box size the factors into the primes (2,3,5,7). + for (int i = 0; i < 3; i++) + { + int b = 0; + int n2 = 0; + int n3 = 0; + int n5 = 0; + // int n7 = 0; + bool done_factoring = false; - //n2 = n3 = n5 = n7 = 0; - n2 = n3 = n5 = 0; - done_factoring = false; - if ((this->full_pw && this->full_pw_dim == 2) && b % 2 != 0) done_factoring = true; // full_pw_dim = 2 means FFT dimensions should be even. - while (!done_factoring) - { - if (b % 2 == 0 && (!this->full_pw || this->full_pw_dim != 1)) // full_pw_dim = 1 means FFT dimension should be odd. - { - n2++; - b /= 2; - continue; - } - if (b % 3 == 0) - { - n3++; - b /= 3; - continue; - } - if (b % 5 == 0) - { - n5++; - b /= 5; - continue; - } - //if (b%7==0) { n7++; b /= 7; continue; } - done_factoring = true; - } - ibox[i] += 1; - } - while (b != 1); - ibox[i] -= 1; - // b==1 means fftbox[i] is (2,3,5,7) factorizable - } - //autoset bx/by/bz if not set in INPUT - if(!this->bz) - { - this->autoset_big_cell_size(this->bz, ibox[2], this->poolnproc); - } - if(!this->bx) - { - //if cz == cx, autoset bx==bz for keeping same symmetry - if(ibox[0] == ibox[2]) - { - this->bx = this->bz; - } - else - { - this->autoset_big_cell_size(this->bx, ibox[0]); - } - } - if(!this->by) - { - //if cz == cy, autoset by==bz for keeping same symmetry - if(ibox[1] == ibox[2]) - { - this->by = this->bz; - } - else - { - this->autoset_big_cell_size(this->by, ibox[1]); - } - } - this->bxyz = this->bx * this->by * this->bz; - if(ibox[0]%this->bx != 0) ibox[0] += (this->bx - ibox[0] % this->bx); - if(ibox[1]%this->by != 0) ibox[1] += (this->by - ibox[1] % this->by); - if(ibox[2]%this->bz != 0) ibox[2] += (this->bz - ibox[2] % this->bz); + // increase ibox[i] by 1 until it is totally factorizable by (2,3,5,7) + do + { + b = ibox[i]; - this->nx = ibox[0]; - this->ny = ibox[1]; - this->nz = ibox[2]; - this->nxy =this->nx * this->ny; - this->nxyz = this->nxy * this->nz; - this->nbx = this->nx / bx; - this->nby = this->ny / by; - this->nbz = this->nz / bz; + // n2 = n3 = n5 = n7 = 0; + n2 = n3 = n5 = 0; + done_factoring = false; + if ((this->full_pw && this->full_pw_dim == 2) && b % 2 != 0) + done_factoring = true; // full_pw_dim = 2 means FFT dimensions should be even. + while (!done_factoring) + { + if (b % 2 == 0 + && (!this->full_pw + || this->full_pw_dim + != 1)) // full_pw_dim = 1 means FFT dimension should be odd. + { + n2++; + b /= 2; + continue; + } + if (b % 3 == 0) + { + n3++; + b /= 3; + continue; + } + if (b % 5 == 0) + { + n5++; + b /= 5; + continue; + } + // if (b%7==0) { n7++; b /= 7; continue; } + done_factoring = true; + } + ibox[i] += 1; + } + while (b != 1); + ibox[i] -= 1; + // b==1 means fftbox[i] is (2,3,5,7) factorizable + } + // autoset bx/by/bz if not set in INPUT + if (!this->bz) + { + this->autoset_big_cell_size (this->bz, ibox[2], this->poolnproc); + } + if (!this->bx) + { + // if cz == cx, autoset bx==bz for keeping same symmetry + if (ibox[0] == ibox[2]) + { + this->bx = this->bz; + } + else + { + this->autoset_big_cell_size (this->bx, ibox[0]); + } + } + if (!this->by) + { + // if cz == cy, autoset by==bz for keeping same symmetry + if (ibox[1] == ibox[2]) + { + this->by = this->bz; + } + else + { + this->autoset_big_cell_size (this->by, ibox[1]); + } + } + this->bxyz = this->bx * this->by * this->bz; + if (ibox[0] % this->bx != 0) + ibox[0] += (this->bx - ibox[0] % this->bx); + if (ibox[1] % this->by != 0) + ibox[1] += (this->by - ibox[1] % this->by); + if (ibox[2] % this->bz != 0) + ibox[2] += (this->bz - ibox[2] % this->bz); - delete[] ibox; - return; + this->nx = ibox[0]; + this->ny = ibox[1]; + this->nz = ibox[2]; + this->nxy = this->nx * this->ny; + this->nxyz = this->nxy * this->nz; + this->nbx = this->nx / bx; + this->nby = this->ny / by; + this->nbz = this->nz / bz; + delete[] ibox; + return; } - virtual void initgrids( - const double lat0_in, - const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors - const int nx_in, int ny_in, int nz_in - ) + virtual void + initgrids (const double lat0_in, + const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors + const int nx_in, + int ny_in, + int nz_in) { this->lat0 = lat0_in; this->tpiba = ModuleBase::TWO_PI / this->lat0; - this->tpiba2 = this->tpiba*this->tpiba; + this->tpiba2 = this->tpiba * this->tpiba; this->latvec = latvec_in; - this->omega = std::abs(latvec.Det()) * lat0 * lat0 * lat0; - this->GT = latvec.Inverse(); - this->G = GT.Transpose(); - this->GGT = G * GT; + this->omega = std::abs (latvec.Det ()) * lat0 * lat0 * lat0; + this->GT = latvec.Inverse (); + this->G = GT.Transpose (); + this->GGT = G * GT; this->nx = nx_in; this->ny = ny_in; this->nz = nz_in; // autoset bx/by/bz if not set in INPUT if (!this->bz) - { - this->autoset_big_cell_size(this->bz, nz, this->poolnproc); - } + { + this->autoset_big_cell_size (this->bz, nz, this->poolnproc); + } if (!this->bx) - { - // if cz == cx, autoset bx==bz for keeping same symmetry - if (nx == nz) - { - this->bx = this->bz; - } - else - { - this->autoset_big_cell_size(this->bx, nx); - } - } + { + // if cz == cx, autoset bx==bz for keeping same symmetry + if (nx == nz) + { + this->bx = this->bz; + } + else + { + this->autoset_big_cell_size (this->bx, nx); + } + } if (!this->by) - { - // if cz == cy, autoset by==bz for keeping same symmetry - if (ny == nz) - { - this->by = this->bz; - } - else - { - this->autoset_big_cell_size(this->by, ny); - } - } + { + // if cz == cy, autoset by==bz for keeping same symmetry + if (ny == nz) + { + this->by = this->bz; + } + else + { + this->autoset_big_cell_size (this->by, ny); + } + } this->bxyz = this->bx * this->by * this->bz; - if(this->nx%this->bx != 0) this->nx += (this->bx - this->nx % this->bx); - if(this->ny%this->by != 0) this->ny += (this->by - this->ny % this->by); - if(this->nz%this->bz != 0) this->nz += (this->bz - this->nz % this->bz); + if (this->nx % this->bx != 0) + this->nx += (this->bx - this->nx % this->bx); + if (this->ny % this->by != 0) + this->ny += (this->by - this->ny % this->by); + if (this->nz % this->bz != 0) + this->nz += (this->bz - this->nz % this->bz); this->nbx = this->nx / bx; this->nby = this->ny / by; this->nbz = this->nz / bz; this->nxy = this->nx * this->ny; this->nxyz = this->nxy * this->nz; - int *ibox = new int[3]; - ibox[0] = int((this->nx-1)/2)+1; - ibox[1] = int((this->ny-1)/2)+1; - ibox[2] = int((this->nz-1)/2)+1; + int* ibox = new int[3]; + ibox[0] = int ((this->nx - 1) / 2) + 1; + ibox[1] = int ((this->ny - 1) / 2) + 1; + ibox[2] = int ((this->nz - 1) / 2) + 1; this->gridecut_lat = 1e20; int count = 0; - for(int igz = -ibox[2]; igz <= ibox[2]; ++igz) - { - for(int igy = -ibox[1]; igy <= ibox[1]; ++igy) + for (int igz = -ibox[2]; igz <= ibox[2]; ++igz) { - for(int igx = -ibox[0]; igx <= ibox[0]; ++igx) - { - ++count; - if(count%this->poolnproc != this->poolrank) continue; - if(std::abs(igx)<=ibox[0]-1 && std::abs(igy)<=ibox[1]-1 && std::abs(igz)<=ibox[2]-1 ) continue; - ModuleBase::Vector3 f; - f.x = igx; - f.y = igy; - f.z = igz; - double modulus = f * (this->GGT * f); - if(modulus < this->gridecut_lat) + for (int igy = -ibox[1]; igy <= ibox[1]; ++igy) { - this->gridecut_lat = modulus; + for (int igx = -ibox[0]; igx <= ibox[0]; ++igx) + { + ++count; + if (count % this->poolnproc != this->poolrank) + continue; + if (std::abs (igx) <= ibox[0] - 1 && std::abs (igy) <= ibox[1] - 1 + && std::abs (igz) <= ibox[2] - 1) + continue; + ModuleBase::Vector3 f; + f.x = igx; + f.y = igy; + f.z = igz; + double modulus = f * (this->GGT * f); + if (modulus < this->gridecut_lat) + { + this->gridecut_lat = modulus; + } + } } - } } - } #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &this->gridecut_lat, 1, MPI_DOUBLE, MPI_MIN , this->pool_world); + MPI_Allreduce (MPI_IN_PLACE, &this->gridecut_lat, 1, MPI_DOUBLE, MPI_MIN, this->pool_world); #endif this->gridecut_lat -= 1e-6; @@ -360,34 +375,38 @@ class PW_Basis_Big : public PW_Basis_Sup return; } - virtual void distribute_r() - { - delete[] this->numz; this->numz = new int[this->poolnproc]; - delete[] this->startz; this->startz = new int[this->poolnproc]; - ModuleBase::GlobalFunc::ZEROS(this->numz, this->poolnproc); - ModuleBase::GlobalFunc::ZEROS(this->startz, this->poolnproc); + virtual void + distribute_r () + { + delete[] this->numz; + this->numz = new int[this->poolnproc]; + delete[] this->startz; + this->startz = new int[this->poolnproc]; + ModuleBase::GlobalFunc::ZEROS (this->numz, this->poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->startz, this->poolnproc); int npbz = this->nbz / this->poolnproc; int modbz = this->nbz % this->poolnproc; this->startz[0] = 0; - for(int ip = 0 ; ip < this->poolnproc ; ++ip) - { - this->numz[ip] = npbz*this->bz; - if(ip < modbz) this->numz[ip]+=this->bz; - if(ip < this->poolnproc - 1) this->startz[ip+1] = this->startz[ip] + numz[ip]; - if(ip == this->poolrank) + for (int ip = 0; ip < this->poolnproc; ++ip) { - this->nplane = numz[ip]; - this->startz_current = startz[ip]; + this->numz[ip] = npbz * this->bz; + if (ip < modbz) + this->numz[ip] += this->bz; + if (ip < this->poolnproc - 1) + this->startz[ip + 1] = this->startz[ip] + numz[ip]; + if (ip == this->poolrank) + { + this->nplane = numz[ip]; + this->startz_current = startz[ip]; + } } - } this->nbzp = this->nplane / this->bz; this->nrxx = this->numz[this->poolrank] * this->nxy; this->nbxx = this->nbzp * this->nbx * this->nby; this->nbzp_start = this->startz[this->poolrank] / this->bz; return; } - }; -} +} // namespace ModulePW #endif \ No newline at end of file diff --git a/source/source_basis/module_pw/pw_basis_k.cpp b/source/source_basis/module_pw/pw_basis_k.cpp index ece4674c4bf..174aff1ef11 100644 --- a/source/source_basis/module_pw/pw_basis_k.cpp +++ b/source/source_basis/module_pw/pw_basis_k.cpp @@ -10,12 +10,12 @@ namespace ModulePW { -PW_Basis_K::PW_Basis_K() +PW_Basis_K::PW_Basis_K () { classname = "PW_Basis_K"; - this->fft_bundle.setfft(this->device, this->precision); + this->fft_bundle.setfft (this->device, this->precision); } -PW_Basis_K::~PW_Basis_K() +PW_Basis_K::~PW_Basis_K () { delete[] kvec_d; delete[] kvec_c; @@ -25,29 +25,30 @@ PW_Basis_K::~PW_Basis_K() delete[] gk2; #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") - { - delmem_sd_op()(this->s_kvec_c); - delmem_sd_op()(this->s_gcar); - delmem_sd_op()(this->s_gk2); - delmem_dd_op()(this->d_gcar); - delmem_dd_op()(this->d_gk2); - delmem_dd_op()(this->d_kvec_c); - delmem_int_op()(this->ig2ixyz_k); - delmem_int_op()(this->d_igl2isz_k); - } + { + delmem_sd_op () (this->s_kvec_c); + delmem_sd_op () (this->s_gcar); + delmem_sd_op () (this->s_gk2); + delmem_dd_op () (this->d_gcar); + delmem_dd_op () (this->d_gk2); + delmem_dd_op () (this->d_kvec_c); + delmem_int_op () (this->ig2ixyz_k); + delmem_int_op () (this->d_igl2isz_k); + } else - { + { #endif - delmem_sh_op()(this->s_kvec_c); - delmem_sh_op()(this->s_gcar); - delmem_sh_op()(this->s_gk2); - // There's no need to delete double pointers while in a CPU environment. + delmem_sh_op () (this->s_kvec_c); + delmem_sh_op () (this->s_gcar); + delmem_sh_op () (this->s_gk2); + // There's no need to delete double pointers while in a CPU environment. #if defined(__CUDA) || defined(__ROCM) - } + } #endif } -void PW_Basis_K::initparameters(const bool gamma_only_in, +void + PW_Basis_K::initparameters (const bool gamma_only_in, const double gk_ecut_in, const int nks_in, // number of k points in this pool const ModuleBase::Vector3* kvec_d_in, // Direct coordinates of k points @@ -62,141 +63,144 @@ void PW_Basis_K::initparameters(const bool gamma_only_in, double kmaxmod = 0; for (int ik = 0; ik < this->nks; ++ik) - { - this->kvec_d[ik] = kvec_d_in[ik]; - this->kvec_c[ik] = this->kvec_d[ik] * this->G; - double kmod = sqrt(this->kvec_c[ik] * this->kvec_c[ik]); - if (kmod > kmaxmod) { - kmaxmod = kmod; + this->kvec_d[ik] = kvec_d_in[ik]; + this->kvec_c[ik] = this->kvec_d[ik] * this->G; + double kmod = sqrt (this->kvec_c[ik] * this->kvec_c[ik]); + if (kmod > kmaxmod) + { + kmaxmod = kmod; + } } - } this->gk_ecut = gk_ecut_in / this->tpiba2; - this->ggecut = pow(sqrt(this->gk_ecut) + kmaxmod, 2); + this->ggecut = pow (sqrt (this->gk_ecut) + kmaxmod, 2); if (this->ggecut > this->gridecut_lat) - { - this->ggecut = this->gridecut_lat; - this->gk_ecut = pow(sqrt(this->ggecut) - kmaxmod, 2); - } + { + this->ggecut = this->gridecut_lat; + this->gk_ecut = pow (sqrt (this->ggecut) - kmaxmod, 2); + } this->gamma_only = gamma_only_in; if (kmaxmod > 0) - { - this->gamma_only = false; // if it is not the gamma point, we do not use gamma_only - } + { + this->gamma_only = false; // if it is not the gamma point, we do not use gamma_only + } this->xprime = xprime_in; this->fftny = this->ny; this->fftnx = this->nx; if (this->gamma_only) - { - if (this->xprime) { - this->fftnx = int(this->nx / 2) + 1; - } - else - { - this->fftny = int(this->ny / 2) + 1; + if (this->xprime) + { + this->fftnx = int (this->nx / 2) + 1; + } + else + { + this->fftny = int (this->ny / 2) + 1; + } } - } this->fftnz = this->nz; this->fftnxy = this->fftnx * this->fftny; this->fftnxyz = this->fftnxy * this->fftnz; this->distribution_type = distribution_type_in; #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") - { - if (this->float_data_) { - resmem_sd_op()(this->s_kvec_c, this->nks * 3); - castmem_d2s_h2d_op()(this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); + if (this->float_data_) + { + resmem_sd_op () (this->s_kvec_c, this->nks * 3); + castmem_d2s_h2d_op () (this->s_kvec_c, + reinterpret_cast (&this->kvec_c[0][0]), + this->nks * 3); + } + resmem_dd_op () (this->d_kvec_c, this->nks * 3); + syncmem_d2d_h2d_op () (this->d_kvec_c, reinterpret_cast (&this->kvec_c[0][0]), this->nks * 3); } - resmem_dd_op()(this->d_kvec_c, this->nks * 3); - syncmem_d2d_h2d_op()(this->d_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); - } else - { -#endif - if (this->float_data_) { - resmem_sh_op()(this->s_kvec_c, this->nks * 3); - castmem_d2s_h2h_op()(this->s_kvec_c, reinterpret_cast(&this->kvec_c[0][0]), this->nks * 3); - } - this->d_kvec_c = reinterpret_cast(&this->kvec_c[0][0]); - // There's no need to allocate double pointers while in a CPU environment. +#endif + if (this->float_data_) + { + resmem_sh_op () (this->s_kvec_c, this->nks * 3); + castmem_d2s_h2h_op () (this->s_kvec_c, + reinterpret_cast (&this->kvec_c[0][0]), + this->nks * 3); + } + this->d_kvec_c = reinterpret_cast (&this->kvec_c[0][0]); + // There's no need to allocate double pointers while in a CPU environment. #if defined(__CUDA) || defined(__ROCM) - } + } #endif } -void PW_Basis_K::setupIndGk() +void + PW_Basis_K::setupIndGk () { // count npwk this->npwk_max = 0; delete[] this->npwk; this->npwk = new int[this->nks]; for (int ik = 0; ik < this->nks; ik++) - { - int ng = 0; - for (int ig = 0; ig < this->npw; ig++) { - const double gk2 = this->cal_GplusK_cartesian(ik, ig).norm2(); - if (gk2 <= this->gk_ecut) - { - ++ng; - } - } - this->npwk[ik] = ng; - int ng_global_k = ng; + int ng = 0; + for (int ig = 0; ig < this->npw; ig++) + { + const double gk2 = this->cal_GplusK_cartesian (ik, ig).norm2 (); + if (gk2 <= this->gk_ecut) + { + ++ng; + } + } + this->npwk[ik] = ng; + int ng_global_k = ng; #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &ng_global_k, 1, MPI_INT, MPI_SUM, this->pool_world); + MPI_Allreduce (MPI_IN_PLACE, &ng_global_k, 1, MPI_INT, MPI_SUM, this->pool_world); #endif - const char* no_pw_message = "Current core has no plane waves! Please reduce the cores."; - if (ng_global_k == 0) - { - no_pw_message = "No plane waves are available for this k-point across the whole pool. Please increase ecutwfc or check KPT settings."; - } - ModuleBase::CHECK_WARNING_QUIT((ng == 0), - "pw_basis_k.cpp", - PARAM.inp.calculation, - no_pw_message); - if (this->npwk_max < ng) - { - this->npwk_max = ng; + const char* no_pw_message = "Current core has no plane waves! Please reduce the cores."; + if (ng_global_k == 0) + { + no_pw_message = "No plane waves are available for this k-point across the whole pool. Please " + "increase ecutwfc or check KPT settings."; + } + ModuleBase::CHECK_WARNING_QUIT ((ng == 0), "pw_basis_k.cpp", PARAM.inp.calculation, no_pw_message); + if (this->npwk_max < ng) + { + this->npwk_max = ng; + } } - } // get igl2isz_k and igl2ig_k if (this->npwk_max <= 0) - { - return; - } + { + return; + } delete[] igl2isz_k; this->igl2isz_k = new int[this->nks * this->npwk_max]; delete[] igl2ig_k; this->igl2ig_k = new int[this->nks * this->npwk_max]; for (int ik = 0; ik < this->nks; ik++) - { - int igl = 0; - for (int ig = 0; ig < this->npw; ig++) { - const double gk2 = this->cal_GplusK_cartesian(ik, ig).norm2(); - if (gk2 <= this->gk_ecut) - { - this->igl2isz_k[ik * npwk_max + igl] = this->ig2isz[ig]; - this->igl2ig_k[ik * npwk_max + igl] = ig; - ++igl; - } + int igl = 0; + for (int ig = 0; ig < this->npw; ig++) + { + const double gk2 = this->cal_GplusK_cartesian (ik, ig).norm2 (); + if (gk2 <= this->gk_ecut) + { + this->igl2isz_k[ik * npwk_max + igl] = this->ig2isz[ig]; + this->igl2ig_k[ik * npwk_max + igl] = ig; + ++igl; + } + } } - } #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") - { - resmem_int_op()(this->d_igl2isz_k, this->npwk_max * this->nks); - syncmem_int_h2d_op()(this->d_igl2isz_k, this->igl2isz_k, this->npwk_max * this->nks); - } + { + resmem_int_op () (this->d_igl2isz_k, this->npwk_max * this->nks); + syncmem_int_h2d_op () (this->d_igl2isz_k, this->igl2isz_k, this->npwk_max * this->nks); + } #endif - this->get_ig2ixyz_k(); + this->get_ig2ixyz_k (); return; } @@ -205,171 +209,175 @@ void PW_Basis_K::setupIndGk() /// set up maps for fft and create arrays for MPI_Alltoall /// set up ffts /// -void PW_Basis_K::setuptransform() +void + PW_Basis_K::setuptransform () { - ModuleBase::timer::start(this->classname, "setuptransform"); - this->distribute_r(); - this->distribute_g(); - this->getstartgr(); - this->setupIndGk(); - this->fft_bundle.clear(); + ModuleBase::timer::start (this->classname, "setuptransform"); + this->distribute_r (); + this->distribute_g (); + this->getstartgr (); + this->setupIndGk (); + this->fft_bundle.clear (); std::string fft_device = this->device; #if defined(__DSP) fft_device = "dsp"; - this->fft_bundle.set_dsp_cluster_id(GlobalV::MY_RANK % PARAM.inp.dsp_count); + this->fft_bundle.set_dsp_cluster_id (GlobalV::MY_RANK % PARAM.inp.dsp_count); #endif - this->fft_bundle.setfft(fft_device, this->precision); + this->fft_bundle.setfft (fft_device, this->precision); if (this->xprime) - { - this->fft_bundle.initfft(this->nx, - this->ny, - this->nz, - this->lix, - this->rix, - this->nst, - this->nplane, - this->poolnproc, - this->gamma_only, - this->xprime); - } + { + this->fft_bundle.initfft (this->nx, + this->ny, + this->nz, + this->lix, + this->rix, + this->nst, + this->nplane, + this->poolnproc, + this->gamma_only, + this->xprime); + } else - { - this->fft_bundle.initfft(this->nx, - this->ny, - this->nz, - this->liy, - this->riy, - this->nst, - this->nplane, - this->poolnproc, - this->gamma_only, - this->xprime); - } - this->fft_bundle.setupFFT(); - ModuleBase::timer::end(this->classname, "setuptransform"); + { + this->fft_bundle.initfft (this->nx, + this->ny, + this->nz, + this->liy, + this->riy, + this->nst, + this->nplane, + this->poolnproc, + this->gamma_only, + this->xprime); + } + this->fft_bundle.setupFFT (); + ModuleBase::timer::end (this->classname, "setuptransform"); } -void PW_Basis_K::collect_local_pw(const double& erf_ecut_in, const double& erf_height_in, const double& erf_sigma_in) +void + PW_Basis_K::collect_local_pw (const double& erf_ecut_in, const double& erf_height_in, const double& erf_sigma_in) { this->erf_ecut = erf_ecut_in; this->erf_height = erf_height_in; this->erf_sigma = erf_sigma_in; if (this->npwk_max <= 0) - { - return; - } + { + return; + } delete[] gk2; delete[] gcar; this->gk2 = new double[this->npwk_max * this->nks]; this->gcar = new ModuleBase::Vector3[this->npwk_max * this->nks]; - ModuleBase::Memory::record("PW_B_K::gk2", sizeof(double) * this->npwk_max * this->nks); - ModuleBase::Memory::record("PW_B_K::gcar", sizeof(ModuleBase::Vector3) * this->npwk_max * this->nks); + ModuleBase::Memory::record ("PW_B_K::gk2", sizeof (double) * this->npwk_max * this->nks); + ModuleBase::Memory::record ("PW_B_K::gcar", sizeof (ModuleBase::Vector3) * this->npwk_max * this->nks); ModuleBase::Vector3 f; for (int ik = 0; ik < this->nks; ++ik) - { - ModuleBase::Vector3 kv = this->kvec_d[ik]; - for (int igl = 0; igl < this->npwk[ik]; ++igl) { - int isz = this->igl2isz_k[ik * npwk_max + igl]; - int iz = isz % this->nz; - int is = isz / this->nz; - int ixy = this->is2fftixy[is]; - int ix = ixy / this->fftny; - int iy = ixy % this->fftny; - if (ix >= int(this->nx / 2) + 1) - { - ix -= this->nx; - } - if (iy >= int(this->ny / 2) + 1) - { - iy -= this->ny; - } - if (iz >= int(this->nz / 2) + 1) - { - iz -= this->nz; - } - f.x = ix; - f.y = iy; - f.z = iz; + ModuleBase::Vector3 kv = this->kvec_d[ik]; + for (int igl = 0; igl < this->npwk[ik]; ++igl) + { + int isz = this->igl2isz_k[ik * npwk_max + igl]; + int iz = isz % this->nz; + int is = isz / this->nz; + int ixy = this->is2fftixy[is]; + int ix = ixy / this->fftny; + int iy = ixy % this->fftny; + if (ix >= int (this->nx / 2) + 1) + { + ix -= this->nx; + } + if (iy >= int (this->ny / 2) + 1) + { + iy -= this->ny; + } + if (iz >= int (this->nz / 2) + 1) + { + iz -= this->nz; + } + f.x = ix; + f.y = iy; + f.z = iz; - this->gcar[ik * npwk_max + igl] = f * this->G; - double temp_gk2 = (f + kv) * (this->GGT * (f + kv)); - if (erf_height > 0) - { - this->gk2[ik * npwk_max + igl] - = temp_gk2 + erf_height / tpiba2 * (1.0 + std::erf((temp_gk2 * tpiba2 - erf_ecut) / erf_sigma)); - } - else - { - this->gk2[ik * npwk_max + igl] = temp_gk2; - } + this->gcar[ik * npwk_max + igl] = f * this->G; + double temp_gk2 = (f + kv) * (this->GGT * (f + kv)); + if (erf_height > 0) + { + this->gk2[ik * npwk_max + igl] + = temp_gk2 + + erf_height / tpiba2 * (1.0 + std::erf ((temp_gk2 * tpiba2 - erf_ecut) / erf_sigma)); + } + else + { + this->gk2[ik * npwk_max + igl] = temp_gk2; + } + } } - } #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") - { - if (this->float_data_) - { - resmem_sd_op()(this->s_gk2, this->npwk_max * this->nks); - resmem_sd_op()(this->s_gcar, this->npwk_max * this->nks * 3); - castmem_d2s_h2d_op()(this->s_gk2, this->gk2, this->npwk_max * this->nks); - castmem_d2s_h2d_op()(this->s_gcar, - reinterpret_cast(&this->gcar[0][0]), - this->npwk_max * this->nks * 3); - } - if (this->double_data_) { - resmem_dd_op()(this->d_gk2, this->npwk_max * this->nks); - resmem_dd_op()(this->d_gcar, this->npwk_max * this->nks * 3); - syncmem_d2d_h2d_op()(this->d_gk2, this->gk2, this->npwk_max * this->nks); - syncmem_d2d_h2d_op()(this->d_gcar, - reinterpret_cast(&this->gcar[0][0]), - this->npwk_max * this->nks * 3); + if (this->float_data_) + { + resmem_sd_op () (this->s_gk2, this->npwk_max * this->nks); + resmem_sd_op () (this->s_gcar, this->npwk_max * this->nks * 3); + castmem_d2s_h2d_op () (this->s_gk2, this->gk2, this->npwk_max * this->nks); + castmem_d2s_h2d_op () (this->s_gcar, + reinterpret_cast (&this->gcar[0][0]), + this->npwk_max * this->nks * 3); + } + if (this->double_data_) + { + resmem_dd_op () (this->d_gk2, this->npwk_max * this->nks); + resmem_dd_op () (this->d_gcar, this->npwk_max * this->nks * 3); + syncmem_d2d_h2d_op () (this->d_gk2, this->gk2, this->npwk_max * this->nks); + syncmem_d2d_h2d_op () (this->d_gcar, + reinterpret_cast (&this->gcar[0][0]), + this->npwk_max * this->nks * 3); + } } - } else - { -#endif - if (this->float_data_) - { - resmem_sh_op()(this->s_gk2, this->npwk_max * this->nks, "PW_B_K::s_gk2"); - resmem_sh_op()(this->s_gcar, this->npwk_max * this->nks * 3, "PW_B_K::s_gcar"); - castmem_d2s_h2h_op()(this->s_gk2, this->gk2, this->npwk_max * this->nks); - castmem_d2s_h2h_op()(this->s_gcar, - reinterpret_cast(&this->gcar[0][0]), - this->npwk_max * this->nks * 3); - } - if (this->double_data_) { - this->d_gcar = reinterpret_cast(&this->gcar[0][0]); - this->d_gk2 = this->gk2; - } - // There's no need to allocate double pointers while in a CPU environment. +#endif + if (this->float_data_) + { + resmem_sh_op () (this->s_gk2, this->npwk_max * this->nks, "PW_B_K::s_gk2"); + resmem_sh_op () (this->s_gcar, this->npwk_max * this->nks * 3, "PW_B_K::s_gcar"); + castmem_d2s_h2h_op () (this->s_gk2, this->gk2, this->npwk_max * this->nks); + castmem_d2s_h2h_op () (this->s_gcar, + reinterpret_cast (&this->gcar[0][0]), + this->npwk_max * this->nks * 3); + } + if (this->double_data_) + { + this->d_gcar = reinterpret_cast (&this->gcar[0][0]); + this->d_gk2 = this->gk2; + } + // There's no need to allocate double pointers while in a CPU environment. #if defined(__CUDA) || defined(__ROCM) - } + } #endif } -ModuleBase::Vector3 PW_Basis_K::cal_GplusK_cartesian(const int ik, const int ig) const +ModuleBase::Vector3 + PW_Basis_K::cal_GplusK_cartesian (const int ik, const int ig) const { int isz = this->ig2isz[ig]; int iz = isz % this->nz; int is = isz / this->nz; int ix = this->is2fftixy[is] / this->fftny; int iy = this->is2fftixy[is] % this->fftny; - if (ix >= int(this->nx / 2) + 1) - { - ix -= this->nx; - } - if (iy >= int(this->ny / 2) + 1) - { - iy -= this->ny; - } - if (iz >= int(this->nz / 2) + 1) - { - iz -= this->nz; - } + if (ix >= int (this->nx / 2) + 1) + { + ix -= this->nx; + } + if (iy >= int (this->ny / 2) + 1) + { + iy -= this->ny; + } + if (iz >= int (this->nz / 2) + 1) + { + iz -= this->nz; + } ModuleBase::Vector3 f; f.x = ix; f.y = iy; @@ -379,154 +387,170 @@ ModuleBase::Vector3 PW_Basis_K::cal_GplusK_cartesian(const int ik, const return g_temp_; } -double& PW_Basis_K::getgk2(const int ik, const int igl) const +double& + PW_Basis_K::getgk2 (const int ik, const int igl) const { return this->gk2[ik * this->npwk_max + igl]; } -ModuleBase::Vector3& PW_Basis_K::getgcar(const int ik, const int igl) const +ModuleBase::Vector3& + PW_Basis_K::getgcar (const int ik, const int igl) const { return this->gcar[ik * this->npwk_max + igl]; } -ModuleBase::Vector3 PW_Basis_K::getgdirect(const int ik, const int igl) const +ModuleBase::Vector3 + PW_Basis_K::getgdirect (const int ik, const int igl) const { ModuleBase::Vector3 f = this->latvec * this->gcar[ik * this->npwk_max + igl]; - f.x = std::round(f.x); - f.y = std::round(f.y); - f.z = std::round(f.z); + f.x = std::round (f.x); + f.y = std::round (f.y); + f.z = std::round (f.z); return f; } -ModuleBase::Vector3 PW_Basis_K::getgpluskcar(const int ik, const int igl) const +ModuleBase::Vector3 + PW_Basis_K::getgpluskcar (const int ik, const int igl) const { return this->gcar[ik * this->npwk_max + igl] + this->kvec_c[ik]; } -int& PW_Basis_K::getigl2isz(const int ik, const int igl) const +int& + PW_Basis_K::getigl2isz (const int ik, const int igl) const { return this->igl2isz_k[ik * this->npwk_max + igl]; } -int& PW_Basis_K::getigl2ig(const int ik, const int igl) const +int& + PW_Basis_K::getigl2ig (const int ik, const int igl) const { return this->igl2ig_k[ik * this->npwk_max + igl]; } -void PW_Basis_K::get_ig2ixyz_k() +void + PW_Basis_K::get_ig2ixyz_k () { #if not defined(__DSP) if (this->device != "gpu") - { - // only GPU need to get ig2ixyz_k - return; - } + { + // only GPU need to get ig2ixyz_k + return; + } #endif - ig2ixyz_k_cpu.resize(this->npwk_max * this->nks); - ModuleBase::Memory::record("PW_B_K::ig2ixyz", sizeof(int) * this->npwk_max * this->nks); - assert(gamma_only == false); // We only finish non-gamma_only fft on GPU temperarily. + ig2ixyz_k_cpu.resize (this->npwk_max * this->nks); + ModuleBase::Memory::record ("PW_B_K::ig2ixyz", sizeof (int) * this->npwk_max * this->nks); + assert (gamma_only == false); // We only finish non-gamma_only fft on GPU temperarily. for (int ik = 0; ik < this->nks; ++ik) - { - for (int igl = 0; igl < this->npwk[ik]; ++igl) { - int isz = this->igl2isz_k[igl + ik * npwk_max]; - int iz = isz % this->nz; - int is = isz / this->nz; - int ixy = this->is2fftixy[is]; - int iy = ixy % this->ny; - int ix = ixy / this->ny; - ig2ixyz_k_cpu[igl + ik * npwk_max] = iz + iy * nz + ix * ny * nz; + for (int igl = 0; igl < this->npwk[ik]; ++igl) + { + int isz = this->igl2isz_k[igl + ik * npwk_max]; + int iz = isz % this->nz; + int is = isz / this->nz; + int ixy = this->is2fftixy[is]; + int iy = ixy % this->ny; + int ix = ixy / this->ny; + ig2ixyz_k_cpu[igl + ik * npwk_max] = iz + iy * nz + ix * ny * nz; + } } - } - resmem_int_op()(ig2ixyz_k, this->npwk_max * this->nks); - syncmem_int_h2d_op()(this->ig2ixyz_k, ig2ixyz_k_cpu.data(), this->npwk_max * this->nks); + resmem_int_op () (ig2ixyz_k, this->npwk_max * this->nks); + syncmem_int_h2d_op () (this->ig2ixyz_k, ig2ixyz_k_cpu.data (), this->npwk_max * this->nks); } -std::vector PW_Basis_K::get_ig2ix(const int ik) const +std::vector + PW_Basis_K::get_ig2ix (const int ik) const { std::vector ig_to_ix; - ig_to_ix.resize(npwk[ik]); + ig_to_ix.resize (npwk[ik]); for (int ig = 0; ig < npwk[ik]; ig++) - { - int isz = this->igl2isz_k[ig + ik * npwk_max]; - int is = isz / this->nz; - int ixy = this->is2fftixy[is]; - int ix = ixy / this->ny; - if (ix < (nx / 2) + 1) { - ix += nx; + int isz = this->igl2isz_k[ig + ik * npwk_max]; + int is = isz / this->nz; + int ixy = this->is2fftixy[is]; + int ix = ixy / this->ny; + if (ix < (nx / 2) + 1) + { + ix += nx; + } + ig_to_ix[ig] = ix; } - ig_to_ix[ig] = ix; - } return ig_to_ix; } -std::vector PW_Basis_K::get_ig2iy(const int ik) const +std::vector + PW_Basis_K::get_ig2iy (const int ik) const { std::vector ig_to_iy; - ig_to_iy.resize(npwk[ik]); + ig_to_iy.resize (npwk[ik]); for (int ig = 0; ig < npwk[ik]; ig++) - { - int isz = this->igl2isz_k[ig + ik * npwk_max]; - int is = isz / this->nz; - int ixy = this->is2fftixy[is]; - int iy = ixy % this->ny; - if (iy < (ny / 2) + 1) { - iy += ny; + int isz = this->igl2isz_k[ig + ik * npwk_max]; + int is = isz / this->nz; + int ixy = this->is2fftixy[is]; + int iy = ixy % this->ny; + if (iy < (ny / 2) + 1) + { + iy += ny; + } + ig_to_iy[ig] = iy; } - ig_to_iy[ig] = iy; - } return ig_to_iy; } -std::vector PW_Basis_K::get_ig2iz(const int ik) const +std::vector + PW_Basis_K::get_ig2iz (const int ik) const { std::vector ig_to_iz; - ig_to_iz.resize(npwk[ik]); + ig_to_iz.resize (npwk[ik]); for (int ig = 0; ig < npwk[ik]; ig++) - { - int isz = this->igl2isz_k[ig + ik * npwk_max]; - int iz = isz % this->nz; - if (iz < (nz / 2) + 1) { - iz += nz; + int isz = this->igl2isz_k[ig + ik * npwk_max]; + int iz = isz % this->nz; + if (iz < (nz / 2) + 1) + { + iz += nz; + } + ig_to_iz[ig] = iz; } - ig_to_iz[ig] = iz; - } return ig_to_iz; } template <> -float* PW_Basis_K::get_kvec_c_data() const +float* + PW_Basis_K::get_kvec_c_data () const { return this->s_kvec_c; } template <> -double* PW_Basis_K::get_kvec_c_data() const +double* + PW_Basis_K::get_kvec_c_data () const { return this->d_kvec_c; } template <> -float* PW_Basis_K::get_gcar_data() const +float* + PW_Basis_K::get_gcar_data () const { return this->s_gcar; } template <> -double* PW_Basis_K::get_gcar_data() const +double* + PW_Basis_K::get_gcar_data () const { return this->d_gcar; } template <> -float* PW_Basis_K::get_gk2_data() const +float* + PW_Basis_K::get_gk2_data () const { return this->s_gk2; } template <> -double* PW_Basis_K::get_gk2_data() const +double* + PW_Basis_K::get_gk2_data () const { return this->d_gk2; } diff --git a/source/source_basis/module_pw/pw_basis_k.h b/source/source_basis/module_pw/pw_basis_k.h index b87da9ca0f6..ecb8f6b5fe9 100644 --- a/source/source_basis/module_pw/pw_basis_k.h +++ b/source/source_basis/module_pw/pw_basis_k.h @@ -56,229 +56,235 @@ namespace ModulePW class PW_Basis_K : public PW_Basis { -public: - PW_Basis_K(); - PW_Basis_K(std::string device_, std::string precision_) : PW_Basis(device_, precision_) {classname="PW_Basis_K";} - ~PW_Basis_K(); + public: + PW_Basis_K (); + PW_Basis_K (std::string device_, std::string precision_) : PW_Basis (device_, precision_) + { + classname = "PW_Basis_K"; + } + ~PW_Basis_K (); - //init parameters of pw_basis_k class - void initparameters( - const bool gamma_only_in, - const double ecut_in, - const int nk_in, //number of k points in this pool - const ModuleBase::Vector3 *kvec_d, // Direct coordinates of k points - const int distribution_type_in = 1, - const bool xprime_in = true - ); + // init parameters of pw_basis_k class + void initparameters (const bool gamma_only_in, + const double ecut_in, + const int nk_in, // number of k points in this pool + const ModuleBase::Vector3* kvec_d, // Direct coordinates of k points + const int distribution_type_in = 1, + const bool xprime_in = true); public: - int nks=0;//number of k points in this pool - ModuleBase::Vector3 *kvec_d=nullptr; // Direct coordinates of k points - ModuleBase::Vector3 *kvec_c=nullptr; // Cartesian coordinates of k points - int *npwk=nullptr; //[nks] number of plane waves of different k-points - int npwk_max=0; //max npwk among all nks k-points, it may be smaller than npw - //npw cutoff: (|g|+|k|)^2, npwk in the the npw ball, thus is smaller - double gk_ecut=0; //Energy cut off for (g+k)^2/2 + int nks = 0; // number of k points in this pool + ModuleBase::Vector3* kvec_d = nullptr; // Direct coordinates of k points + ModuleBase::Vector3* kvec_c = nullptr; // Cartesian coordinates of k points + int* npwk = nullptr; //[nks] number of plane waves of different k-points + int npwk_max = 0; // max npwk among all nks k-points, it may be smaller than npw + // npw cutoff: (|g|+|k|)^2, npwk in the the npw ball, thus is smaller + double gk_ecut = 0; // Energy cut off for (g+k)^2/2 -public: - //prepare for transforms between real and reciprocal spaces - void setuptransform(); + public: + // prepare for transforms between real and reciprocal spaces + void setuptransform (); - int *igl2isz_k=nullptr, * d_igl2isz_k = nullptr; //[npwk_max*nks] map (igl,ik) to (is,iz) - int *igl2ig_k=nullptr;//[npwk_max*nks] map (igl,ik) to ig - int *ig2ixyz_k=nullptr; ///< [npw] map ig to ixyz - std::vector ig2ixyz_k_cpu; /// [npw] map ig to ixyz,which is used in dsp fft. - double *gk2=nullptr; // modulus (G+K)^2 of G vectors [npwk_max*nks] + int *igl2isz_k = nullptr, *d_igl2isz_k = nullptr; //[npwk_max*nks] map (igl,ik) to (is,iz) + int* igl2ig_k = nullptr; //[npwk_max*nks] map (igl,ik) to ig + int* ig2ixyz_k = nullptr; ///< [npw] map ig to ixyz + std::vector ig2ixyz_k_cpu; /// [npw] map ig to ixyz,which is used in dsp fft. + double* gk2 = nullptr; // modulus (G+K)^2 of G vectors [npwk_max*nks] // liuyu add 2023-09-06 - double erf_ecut=0.0; // the value of the constant energy cutoff - double erf_height=0.0; // the height of the energy step for reciprocal vectors - double erf_sigma=0.0; // the width of the energy step for reciprocal vectors + double erf_ecut = 0.0; // the value of the constant energy cutoff + double erf_height = 0.0; // the height of the energy step for reciprocal vectors + double erf_sigma = 0.0; // the width of the energy step for reciprocal vectors - //collect gdirect, gcar, gg - void collect_local_pw(const double& erf_ecut_in = 0.0, - const double& erf_height_in = 0.0, - const double& erf_sigma_in = 0.1); + // collect gdirect, gcar, gg + void collect_local_pw (const double& erf_ecut_in = 0.0, + const double& erf_height_in = 0.0, + const double& erf_sigma_in = 0.1); private: - float * s_gk2 = nullptr; - double * d_gk2 = nullptr; // modulus (G+K)^2 of G vectors [npwk_max*nks] - //create igl2isz_k map array for fft - void setupIndGk(); + float* s_gk2 = nullptr; + double* d_gk2 = nullptr; // modulus (G+K)^2 of G vectors [npwk_max*nks] + // create igl2isz_k map array for fft + void setupIndGk (); // get ig2ixyz_k - void get_ig2ixyz_k(); - //calculate G+K, it is a private function - ModuleBase::Vector3 cal_GplusK_cartesian(const int ik, const int ig) const; + void get_ig2ixyz_k (); + // calculate G+K, it is a private function + ModuleBase::Vector3 cal_GplusK_cartesian (const int ik, const int ig) const; public: template - void real2recip(const FPTYPE* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + void real2recip (const FPTYPE* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) template - void real2recip(const std::complex* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + void real2recip (const std::complex* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) template - void recip2real(const std::complex* in, - FPTYPE* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) + void recip2real (const std::complex* in, + FPTYPE* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) template - void recip2real(const std::complex* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) - #if defined(__DSP) + void recip2real (const std::complex* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) +#if defined(__DSP) template - void convolution(const Device* ctx, + void convolution (const Device* ctx, const int ik, const int size, const std::complex* input, - const FPTYPE* input1, - std::complex* output, + const FPTYPE* input1, + std::complex* output, const bool add = false, - const FPTYPE factor =1.0) const ; + const FPTYPE factor = 1.0) const; template - void real2recip_dsp(const std::complex* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + void real2recip_dsp (const std::complex* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) template - void recip2real_dsp(const std::complex* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) - - #endif + void recip2real_dsp (const std::complex* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) - template - void real_to_recip(const Device* ctx, - const std::complex* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) - template - void recip_to_real(const Device* ctx, - const std::complex* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) +#endif + template + void real_to_recip (const Device* ctx, + const std::complex* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + template + void recip_to_real (const Device* ctx, + const std::complex* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) template ::value, int>::type = 0> - void real_to_recip(const TK* in, + void + real_to_recip (const TK* in, TK* out, const int ik, const bool add = false, const typename GetTypeReal::type factor = 1.0) const { - #if defined(__DSP) - this->real2recip_dsp(in, out, ik, add, factor); - #else - this->real2recip(in,out,ik,add,factor); - #endif +#if defined(__DSP) + this->real2recip_dsp (in, out, ik, add, factor); +#else + this->real2recip (in, out, ik, add, factor); +#endif } template ::value, int>::type = 0> - void recip_to_real(const TK* in, + void + recip_to_real (const TK* in, TK* out, const int ik, const bool add = false, const typename GetTypeReal::type factor = 1.0) const { - - #if defined(__DSP) - this->recip2real_dsp(in,out,ik,add,factor); - #else - this->recip2real(in,out,ik,add,factor); - #endif + +#if defined(__DSP) + this->recip2real_dsp (in, out, ik, add, factor); +#else + this->recip2real (in, out, ik, add, factor); +#endif } template - void real2recip_gpu(const std::complex* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) - + void real2recip_gpu (const std::complex* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nplane,nx*ny) ; out(nz, ns) + template - void recip2real_gpu(const std::complex* in, - std::complex* out, - const int ik, - const bool add = false, - const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) + void recip2real_gpu (const std::complex* in, + std::complex* out, + const int ik, + const bool add = false, + const FPTYPE factor = 1.0) const; // in:(nz, ns) ; out(nplane,nx*ny) template ::value, int>::type = 0> - void real_to_recip(const FPTYPE* in, + void + real_to_recip (const FPTYPE* in, FPTYPE* out, const int ik, const bool add = false, const typename GetTypeReal::type factor = 1.0) const { - this->real2recip_gpu(in, out, ik, add, factor); + this->real2recip_gpu (in, out, ik, add, factor); } template ::value, int>::type = 0> - void recip_to_real(const TK* in, + void + recip_to_real (const TK* in, TK* out, const int ik, const bool add = false, const typename GetTypeReal::type factor = 1.0) const { - this->recip2real_gpu(in, out, ik, add, factor); + this->recip2real_gpu (in, out, ik, add, factor); } public: - //operator: - //get (G+K)^2: - double& getgk2(const int ik, const int igl) const; - //get G - ModuleBase::Vector3& getgcar(const int ik, const int igl) const; - //get G-direct - ModuleBase::Vector3 getgdirect(const int ik, const int igl) const; - //get (G+K) - ModuleBase::Vector3 getgpluskcar(const int ik, const int igl) const; - //get igl2isz_k - int& getigl2isz(const int ik, const int igl) const; - //get igl2ig_k or igk(ik,ig) in older ABACUS - int& getigl2ig(const int ik, const int igl) const; + // operator: + // get (G+K)^2: + double& getgk2 (const int ik, const int igl) const; + // get G + ModuleBase::Vector3& getgcar (const int ik, const int igl) const; + // get G-direct + ModuleBase::Vector3 getgdirect (const int ik, const int igl) const; + // get (G+K) + ModuleBase::Vector3 getgpluskcar (const int ik, const int igl) const; + // get igl2isz_k + int& getigl2isz (const int ik, const int igl) const; + // get igl2ig_k or igk(ik,ig) in older ABACUS + int& getigl2ig (const int ik, const int igl) const; - //get ig_to_ix - std::vector get_ig2ix(const int ik) const; - //get ig_to_iy - std::vector get_ig2iy(const int ik) const; - //get ig_to_iz - std::vector get_ig2iz(const int ik) const; + // get ig_to_ix + std::vector get_ig2ix (const int ik) const; + // get ig_to_iy + std::vector get_ig2iy (const int ik) const; + // get ig_to_iz + std::vector get_ig2iz (const int ik) const; - template FPTYPE * get_gk2_data() const; - template FPTYPE * get_gcar_data() const; - template FPTYPE * get_kvec_c_data() const; + template + FPTYPE* get_gk2_data () const; + template + FPTYPE* get_gcar_data () const; + template + FPTYPE* get_kvec_c_data () const; -private: - float * s_gcar = nullptr, * s_kvec_c = nullptr; - double * d_gcar = nullptr, * d_kvec_c = nullptr; + private: + float *s_gcar = nullptr, *s_kvec_c = nullptr; + double *d_gcar = nullptr, *d_kvec_c = nullptr; }; -} -#endif //PlaneWave_K class +} // namespace ModulePW +#endif // PlaneWave_K class #include "./pw_basis_k_big.h" //temporary it will be removed - diff --git a/source/source_basis/module_pw/pw_basis_k_big.h b/source/source_basis/module_pw/pw_basis_k_big.h index 128c1d937d2..d6a84ade324 100644 --- a/source/source_basis/module_pw/pw_basis_k_big.h +++ b/source/source_basis/module_pw/pw_basis_k_big.h @@ -3,41 +3,44 @@ #include "source_base/constants.h" #include "source_base/global_function.h" -// temporary class, because previous ABACUS consider big grid for fft grids +// temporary class, because previous ABACUS consider big grid for fft grids // which are used for grid integration in LCAO. // In fact, it is unnecessary. It will be moved after grid integration is refactored. namespace ModulePW { -class PW_Basis_K_Big: public PW_Basis_K +class PW_Basis_K_Big : public PW_Basis_K { -public: - + public: // combine [bx,by,bz] FFT grids into a big one - // typical values are bx=2, by=2, bz=2 - // nbx=nx/bx, nby=ny/by, nbz=nz/bz, - // Note: this class can only use initgrids(lat0_in, latvec_in, PW_Basis_Big::nx, PW_Basis_Big::ny, PW_Basis_Big::nz)!!! - PW_Basis_K_Big(){ + // typical values are bx=2, by=2, bz=2 + // nbx=nx/bx, nby=ny/by, nbz=nz/bz, + // Note: this class can only use initgrids(lat0_in, latvec_in, PW_Basis_Big::nx, PW_Basis_Big::ny, + // PW_Basis_Big::nz)!!! + PW_Basis_K_Big () + { bx = 1; by = 1; bz = 1; } - PW_Basis_K_Big(std::string device_, std::string precision_) : PW_Basis_K(device_, precision_) {} - ~PW_Basis_K_Big(){}; - void setbxyz(const int bx_in, const int by_in, const int bz_in) + PW_Basis_K_Big (std::string device_, std::string precision_) : PW_Basis_K (device_, precision_) {} + ~PW_Basis_K_Big () {}; + void + setbxyz (const int bx_in, const int by_in, const int bz_in) { bx = bx_in; by = by_in; bz = bz_in; } - int bx=0; - int by=0; - int bz=0; - int nbx=0; - int nby=0; - int nbz=0; + int bx = 0; + int by = 0; + int bz = 0; + int nbx = 0; + int nby = 0; + int nbz = 0; - virtual void distribute_r() + virtual void + distribute_r () { bx = (bx == 0) ? 2 : bx; by = (by == 0) ? 2 : by; @@ -45,29 +48,34 @@ class PW_Basis_K_Big: public PW_Basis_K this->nbx = this->nx / bx; this->nby = this->ny / by; this->nbz = this->nz / bz; - delete[] this->numz; this->numz = new int[this->poolnproc]; - delete[] this->startz; this->startz = new int[this->poolnproc]; - ModuleBase::GlobalFunc::ZEROS(this->numz, this->poolnproc); - ModuleBase::GlobalFunc::ZEROS(this->startz, this->poolnproc); + delete[] this->numz; + this->numz = new int[this->poolnproc]; + delete[] this->startz; + this->startz = new int[this->poolnproc]; + ModuleBase::GlobalFunc::ZEROS (this->numz, this->poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->startz, this->poolnproc); int npbz = this->nbz / this->poolnproc; int modbz = this->nbz % this->poolnproc; this->startz[0] = 0; - for(int ip = 0 ; ip < this->poolnproc ; ++ip) - { - this->numz[ip] = npbz*this->bz; - if(ip < modbz) { this->numz[ip]+=this->bz;} - if(ip < this->poolnproc - 1) this->startz[ip+1] = this->startz[ip] + numz[ip]; - if(ip == this->poolrank) + for (int ip = 0; ip < this->poolnproc; ++ip) { - this->nplane = numz[ip]; - this->startz_current = startz[ip]; + this->numz[ip] = npbz * this->bz; + if (ip < modbz) + { + this->numz[ip] += this->bz; + } + if (ip < this->poolnproc - 1) + this->startz[ip + 1] = this->startz[ip] + numz[ip]; + if (ip == this->poolrank) + { + this->nplane = numz[ip]; + this->startz_current = startz[ip]; + } } - } this->nrxx = this->numz[this->poolrank] * this->nxy; return; } - }; -} +} // namespace ModulePW #endif \ No newline at end of file diff --git a/source/source_basis/module_pw/pw_basis_sup.cpp b/source/source_basis/module_pw/pw_basis_sup.cpp index 43eb044a5f8..6df69b259e7 100644 --- a/source/source_basis/module_pw/pw_basis_sup.cpp +++ b/source/source_basis/module_pw/pw_basis_sup.cpp @@ -4,50 +4,49 @@ namespace ModulePW { -PW_Basis_Sup::~PW_Basis_Sup() -{ -} +PW_Basis_Sup::~PW_Basis_Sup () {} /// /// distribute plane wave basis and real-space grids to different processors /// set up maps for fft and create arrays for MPI_Alltoall /// set up ffts /// -void PW_Basis_Sup::setuptransform(const ModulePW::PW_Basis* pw_rho) +void + PW_Basis_Sup::setuptransform (const ModulePW::PW_Basis* pw_rho) { - ModuleBase::timer::start(this->classname, "setuptransform"); - this->distribute_r(); - this->distribute_g(pw_rho); - this->getstartgr(); - this->fft_bundle.clear(); + ModuleBase::timer::start (this->classname, "setuptransform"); + this->distribute_r (); + this->distribute_g (pw_rho); + this->getstartgr (); + this->fft_bundle.clear (); if (this->xprime) - { - this->fft_bundle.initfft(this->nx, - this->ny, - this->nz, - this->lix, - this->rix, - this->nst, - this->nplane, - this->poolnproc, - this->gamma_only, - this->xprime); - } + { + this->fft_bundle.initfft (this->nx, + this->ny, + this->nz, + this->lix, + this->rix, + this->nst, + this->nplane, + this->poolnproc, + this->gamma_only, + this->xprime); + } else - { - this->fft_bundle.initfft(this->nx, - this->ny, - this->nz, - this->liy, - this->riy, - this->nst, - this->nplane, - this->poolnproc, - this->gamma_only, - this->xprime); - } - this->fft_bundle.setupFFT(); - ModuleBase::timer::end(this->classname, "setuptransform"); + { + this->fft_bundle.initfft (this->nx, + this->ny, + this->nz, + this->liy, + this->riy, + this->nst, + this->nplane, + this->poolnproc, + this->gamma_only, + this->xprime); + } + this->fft_bundle.setupFFT (); + ModuleBase::timer::end (this->classname, "setuptransform"); } /// @@ -55,15 +54,16 @@ void PW_Basis_Sup::setuptransform(const ModulePW::PW_Basis* pw_rho) /// Known: G, GT, GGT, fftnx, fftny, nz, poolnproc, poolrank, ggecut /// output: ig2isz[ig], istot2ixy[is], is2fftixy[is], fftixy2ip[ixy], gg[ig], gcar[ig], gdirect[ig], nst, nstot /// -void PW_Basis_Sup::distribute_g(const ModulePW::PW_Basis* pw_rho) +void + PW_Basis_Sup::distribute_g (const ModulePW::PW_Basis* pw_rho) { - ModuleBase::timer::start(this->classname, "distributeg"); - this->distribution_method3(pw_rho); - ModuleBase::CHECK_WARNING_QUIT((this->npw == 0), - "pw_distributeg.cpp", - PARAM.inp.calculation, - "Current core has no plane waves! Please reduce the cores."); - ModuleBase::timer::end(this->classname, "distributeg"); + ModuleBase::timer::start (this->classname, "distributeg"); + this->distribution_method3 (pw_rho); + ModuleBase::CHECK_WARNING_QUIT ((this->npw == 0), + "pw_distributeg.cpp", + PARAM.inp.calculation, + "Current core has no plane waves! Please reduce the cores."); + ModuleBase::timer::end (this->classname, "distributeg"); return; } @@ -89,7 +89,8 @@ void PW_Basis_Sup::distribute_g(const ModulePW::PW_Basis* pw_rho) /// Known: G, GT, GGT, fftny, fftnx, nz, poolnproc, poolrank, ggecut /// output: ig2isz[ig], istot2ixy[is], is2fftixy[is], fftixy2ip[ixy], startnsz_per[ip], nst_per[ip], nst /// -void PW_Basis_Sup::distribution_method3(const ModulePW::PW_Basis* pw_rho) +void + PW_Basis_Sup::distribution_method3 (const ModulePW::PW_Basis* pw_rho) { // initial the variables needed by all process int* st_bottom2D = new int[fftnxy]; // st_bottom2D[ixy], minimum z of stick on (x, y). @@ -100,85 +101,86 @@ void PW_Basis_Sup::distribution_method3(const ModulePW::PW_Basis* pw_rho) this->npw_per = new int[this->poolnproc]; // number of planewaves on each core. delete[] this->fftixy2ip; this->fftixy2ip = new int[this->fftnxy]; // ip of core which contains the stick on (x, y). - for (int ixy = 0; ixy < this->fftnxy; ++ixy) { - this->fftixy2ip[ixy] = -1; // meaning this stick has not been distributed or there is no stick on (x, y). -} + for (int ixy = 0; ixy < this->fftnxy; ++ixy) + { + this->fftixy2ip[ixy] = -1; // meaning this stick has not been distributed or there is no stick on (x, y). + } if (poolrank == 0) - { - // (1) Count the total number of planewaves (tot_npw) and sticks (this->nstot). + { + // (1) Count the total number of planewaves (tot_npw) and sticks (this->nstot). - // Actually we will scan [(2 * ibox[0] + 1) * (2 * ibox[1] + 1)] points on x-y plane, - // but we define st_length2D with (fftny * fftnx) points here, because the diameter - // of the sphere should be shorter than the sides of the cube. - // calculate this->nstot and this->npwtot, liy, riy - this->count_pw_st(st_length2D, st_bottom2D); - } + // Actually we will scan [(2 * ibox[0] + 1) * (2 * ibox[1] + 1)] points on x-y plane, + // but we define st_length2D with (fftny * fftnx) points here, because the diameter + // of the sphere should be shorter than the sides of the cube. + // calculate this->nstot and this->npwtot, liy, riy + this->count_pw_st (st_length2D, st_bottom2D); + } #ifdef __MPI - MPI_Bcast(&this->npwtot, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&this->nstot, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&liy, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&riy, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&lix, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&rix, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&this->npwtot, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&this->nstot, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&liy, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&riy, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&lix, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&rix, 1, MPI_INT, 0, this->pool_world); #endif delete[] this->istot2ixy; this->istot2ixy = new int[this->nstot]; if (poolrank == 0) - { + { #ifdef __MPI - // Parallel line - // (2) Collect the x, y indexs, and length of the sticks. - int* st_i = new int[this->nstot]; // x or x + fftnx (if x < 0) of stick. - int* st_j = new int[this->nstot]; // y or y + fftny (if y < 0) of stick. - int* st_length = new int[this->nstot]; // number of planewaves in stick. - this->collect_st(st_length2D, st_bottom2D, st_i, st_j, st_length); + // Parallel line + // (2) Collect the x, y indexs, and length of the sticks. + int* st_i = new int[this->nstot]; // x or x + fftnx (if x < 0) of stick. + int* st_j = new int[this->nstot]; // y or y + fftny (if y < 0) of stick. + int* st_length = new int[this->nstot]; // number of planewaves in stick. + this->collect_st (st_length2D, st_bottom2D, st_i, st_j, st_length); - // (3) Distribute the sticks to cores. - // get nst_per, npw_per, fftixy2ip, and startnsz_per - this->startnsz_per = new int[this->poolnproc]; - this->divide_sticks_3(st_length2D, st_i, st_j, st_length, pw_rho->fftixy2ip, pw_rho->nx, pw_rho->ny); - delete[] st_length; + // (3) Distribute the sticks to cores. + // get nst_per, npw_per, fftixy2ip, and startnsz_per + this->startnsz_per = new int[this->poolnproc]; + this->divide_sticks_3 (st_length2D, st_i, st_j, st_length, pw_rho->fftixy2ip, pw_rho->nx, pw_rho->ny); + delete[] st_length; - // (4) Get map from istot to (iy, ix) - this->get_istot2ixy(st_i, st_j); - delete[] st_i; - delete[] st_j; - // We do not need startnsz_per after it. - delete[] this->startnsz_per; - this->startnsz_per = nullptr; + // (4) Get map from istot to (iy, ix) + this->get_istot2ixy (st_i, st_j); + delete[] st_i; + delete[] st_j; + // We do not need startnsz_per after it. + delete[] this->startnsz_per; + this->startnsz_per = nullptr; #else - // Serial line - // get nst_per, npw_per, fftixy2ip, and istot2ixy - this->nst_per[0] = this->nstot; - this->npw_per[0] = this->npwtot; - int st_move = 0; - for (int ixy = 0; ixy < fftnxy; ++ixy) - { - if (st_length2D[ixy] > 0) - { - this->istot2ixy[st_move] = ixy / fftny * ny + ixy % fftny; - this->fftixy2ip[ixy] = 0; - st_move++; - } - } + // Serial line + // get nst_per, npw_per, fftixy2ip, and istot2ixy + this->nst_per[0] = this->nstot; + this->npw_per[0] = this->npwtot; + int st_move = 0; + for (int ixy = 0; ixy < fftnxy; ++ixy) + { + if (st_length2D[ixy] > 0) + { + this->istot2ixy[st_move] = ixy / fftny * ny + ixy % fftny; + this->fftixy2ip[ixy] = 0; + st_move++; + } + } #endif - } + } #ifdef __MPI - - MPI_Bcast(st_length2D, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(st_bottom2D, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->fftixy2ip, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->istot2ixy, this->nstot, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->nst_per, this->poolnproc, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->npw_per, this->poolnproc, MPI_INT, 0, this->pool_world); + + MPI_Bcast (st_length2D, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (st_bottom2D, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->fftixy2ip, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->istot2ixy, this->nstot, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->nst_per, this->poolnproc, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->npw_per, this->poolnproc, MPI_INT, 0, this->pool_world); #endif this->npw = this->npw_per[this->poolrank]; this->nst = this->nst_per[this->poolrank]; this->nstnz = this->nst * this->nz; // (5) Construct ig2isz and is2fftixy. - this->get_ig2isz_is2fftixy(st_bottom2D, st_length2D, pw_rho); + this->get_ig2isz_is2fftixy (st_bottom2D, st_length2D, pw_rho); delete[] st_bottom2D; delete[] st_length2D; @@ -197,101 +199,109 @@ void PW_Basis_Sup::distribution_method3(const ModulePW::PW_Basis* pw_rho) /// known: tot_npw, this->nstot, st_i, st_j, st_length /// output: npw_per, nst_per, this->fftixy2ip, this->startnsz_per /// -void PW_Basis_Sup::divide_sticks_3( - const int* st_length2D, // st_length2D[ixy], number of planewaves in stick on (x, y). - const int* st_i, // x or x + fftnx (if x < 0) of stick. - const int* st_j, // y or y + fftny (if y < 0) of stick. - const int* st_length, // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. - const int* fftixy2ip_s, // fftixy2ip of smooth grids - const int& nx_s, // nx of smooth grids - const int& ny_s) // ny of smooth grids +void + PW_Basis_Sup::divide_sticks_3 ( + const int* st_length2D, // st_length2D[ixy], number of planewaves in stick on (x, y). + const int* st_i, // x or x + fftnx (if x < 0) of stick. + const int* st_j, // y or y + fftny (if y < 0) of stick. + const int* st_length, // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. + const int* fftixy2ip_s, // fftixy2ip of smooth grids + const int& nx_s, // nx of smooth grids + const int& ny_s) // ny of smooth grids { - ModuleBase::GlobalFunc::ZEROS(this->nst_per, poolnproc); - ModuleBase::GlobalFunc::ZEROS(this->npw_per, poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->nst_per, poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->npw_per, poolnproc); int fftny_s = ny_s; int fftnx_s = nx_s; if (this->gamma_only) - { - if (this->xprime) { - fftnx_s = int(nx_s / 2) + 1; - } else { - fftny_s = int(ny_s / 2) + 1; -} - } + { + if (this->xprime) + { + fftnx_s = int (nx_s / 2) + 1; + } + else + { + fftny_s = int (ny_s / 2) + 1; + } + } int fftnxy_s = fftnx_s * fftny_s; // (1) Distribute sticks corresponding to smooth grids first. for (int ixy = 0; ixy < fftnxy_s; ++ixy) - { - int ix = ixy / fftny_s; - int iy = ixy % fftny_s; - if (ix >= int(nx_s / 2) + 1) { - ix -= nx_s; -} - if (iy >= int(ny_s / 2) + 1) { - iy -= ny_s; -} - - if (ix < 0) { - ix += nx; -} - if (iy < 0) { - iy += ny; -} - int index = ix * this->fftny + iy; - int ip = fftixy2ip_s[ixy]; - if (ip >= 0) { - this->fftixy2ip[index] = ip; - this->nst_per[ip]++; - this->npw_per[ip] += st_length2D[index]; + int ix = ixy / fftny_s; + int iy = ixy % fftny_s; + if (ix >= int (nx_s / 2) + 1) + { + ix -= nx_s; + } + if (iy >= int (ny_s / 2) + 1) + { + iy -= ny_s; + } + + if (ix < 0) + { + ix += nx; + } + if (iy < 0) + { + iy += ny; + } + int index = ix * this->fftny + iy; + int ip = fftixy2ip_s[ixy]; + if (ip >= 0) + { + this->fftixy2ip[index] = ip; + this->nst_per[ip]++; + this->npw_per[ip] += st_length2D[index]; + } } - } // distribute the longest in the lefted stick preferentially. int ipmin = 0; // The ip of core containing least number of planewaves. for (int is = 0; is < this->nstot; ++is) - { - // skip sticks corresponding to smooth grids. - if (this->fftixy2ip[st_i[is] * this->fftny + st_j[is]] >= 0) { - continue; - } + // skip sticks corresponding to smooth grids. + if (this->fftixy2ip[st_i[is] * this->fftny + st_j[is]] >= 0) + { + continue; + } - // find the ip of core containing the least planewaves. - for (int ip = 0; ip < this->poolnproc; ++ip) - { - const int npwmin = this->npw_per[ipmin]; - const int npw_ip = this->npw_per[ip]; - const int nstmin = nst_per[ipmin]; - const int nst_ip = nst_per[ip]; + // find the ip of core containing the least planewaves. + for (int ip = 0; ip < this->poolnproc; ++ip) + { + const int npwmin = this->npw_per[ipmin]; + const int npw_ip = this->npw_per[ip]; + const int nstmin = nst_per[ipmin]; + const int nst_ip = nst_per[ip]; - if (npw_ip == 0) - { - ipmin = ip; - break; - } - else if (npw_ip < npwmin) - { - ipmin = ip; - } - else if (npw_ip == npwmin && nst_ip < nstmin) - { - ipmin = ip; - } + if (npw_ip == 0) + { + ipmin = ip; + break; + } + else if (npw_ip < npwmin) + { + ipmin = ip; + } + else if (npw_ip == npwmin && nst_ip < nstmin) + { + ipmin = ip; + } + } + this->nst_per[ipmin]++; + this->npw_per[ipmin] += st_length[is]; + this->fftixy2ip[st_i[is] * this->fftny + st_j[is]] = ipmin; } - this->nst_per[ipmin]++; - this->npw_per[ipmin] += st_length[is]; - this->fftixy2ip[st_i[is] * this->fftny + st_j[is]] = ipmin; - } this->startnsz_per[0] = 0; for (int ip = 1; ip < poolnproc; ++ip) - { - this->startnsz_per[ip] = this->startnsz_per[ip - 1] + this->nst_per[ip - 1] * this->nz; - } + { + this->startnsz_per[ip] = this->startnsz_per[ip - 1] + this->nst_per[ip - 1] * this->nz; + } return; } @@ -304,142 +314,154 @@ void PW_Basis_Sup::divide_sticks_3( /// known: this->nstot, st_bottom2D, st_length2D /// output: ig2isz, is2fftixy /// -void PW_Basis_Sup::get_ig2isz_is2fftixy( - int* st_bottom2D, // minimum z of stick, stored in 1d array with this->nstot elements. - int* st_length2D, // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. - const ModulePW::PW_Basis* pw_rho) +void + PW_Basis_Sup::get_ig2isz_is2fftixy ( + int* st_bottom2D, // minimum z of stick, stored in 1d array with this->nstot elements. + int* st_length2D, // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. + const ModulePW::PW_Basis* pw_rho) { if (this->npw == 0) - { - delete[] this->ig2isz; - this->ig2isz = nullptr; // map ig to the z coordinate of this planewave. - delete[] this->is2fftixy; - this->is2fftixy = nullptr; // map is (index of sticks) to ixy (iy + ix * fftny). -#if defined(__CUDA) || defined(__ROCM) - if (this->device == "gpu") { - delmem_int_op()(this->d_is2fftixy); - d_is2fftixy = nullptr; - } + delete[] this->ig2isz; + this->ig2isz = nullptr; // map ig to the z coordinate of this planewave. + delete[] this->is2fftixy; + this->is2fftixy = nullptr; // map is (index of sticks) to ixy (iy + ix * fftny). +#if defined(__CUDA) || defined(__ROCM) + if (this->device == "gpu") + { + delmem_int_op () (this->d_is2fftixy); + d_is2fftixy = nullptr; + } #endif - return; - } + return; + } delete[] this->ig2isz; this->ig2isz = new int[this->npw]; // map ig to the z coordinate of this planewave. - ModuleBase::GlobalFunc::ZEROS(this->ig2isz, this->npw); + ModuleBase::GlobalFunc::ZEROS (this->ig2isz, this->npw); delete[] this->is2fftixy; this->is2fftixy = new int[this->nst]; // map is (index of sticks) to ixy (iy + ix * fftny). for (int is = 0; is < this->nst; ++is) - { - this->is2fftixy[is] = -1; - } + { + this->is2fftixy[is] = -1; + } int* fftixy2is = new int[this->fftnxy]; // map ixy to is. for (int ixy = 0; ixy < this->fftnxy; ++ixy) - { - fftixy2is[ixy] = -1; - } + { + fftixy2is[ixy] = -1; + } bool* found = new bool[this->fftnxyz]; // whether the planewave on (x, y, z) has been found on the smooth grid. for (int i = 0; i < this->fftnxyz; ++i) - { - found[i] = false; - } + { + found[i] = false; + } // get is2fftixy int st_move = 0; // this is the st_move^th stick on current core. for (int ixy = 0; ixy < this->fftnxy; ++ixy) - { - if (this->fftixy2ip[ixy] == this->poolrank) { - this->is2fftixy[st_move] = ixy; - fftixy2is[ixy] = st_move; - st_move++; + if (this->fftixy2ip[ixy] == this->poolrank) + { + this->is2fftixy[st_move] = ixy; + fftixy2is[ixy] = st_move; + st_move++; + } + if (st_move == this->nst) + { + break; + } } - if (st_move == this->nst) { - break; -} - } // distribute planewaves in the same order as smooth grids first. int pw_filled = 0; // how many current core's planewaves have been found. for (int ig = 0; ig < pw_rho->npw; ig++) - { - int isz = pw_rho->ig2isz[ig]; - int iz = isz % pw_rho->nz; - int is = isz / pw_rho->nz; - int ixy = pw_rho->is2fftixy[is]; - int ix = ixy / pw_rho->fftny; - int iy = ixy % pw_rho->fftny; - if (ix >= int(pw_rho->nx / 2) + 1) { - ix -= pw_rho->nx; -} - if (iy >= int(pw_rho->ny / 2) + 1) { - iy -= pw_rho->ny; -} - if (iz >= int(pw_rho->nz / 2) + 1) { - iz -= pw_rho->nz; -} + { + int isz = pw_rho->ig2isz[ig]; + int iz = isz % pw_rho->nz; + int is = isz / pw_rho->nz; + int ixy = pw_rho->is2fftixy[is]; + int ix = ixy / pw_rho->fftny; + int iy = ixy % pw_rho->fftny; + if (ix >= int (pw_rho->nx / 2) + 1) + { + ix -= pw_rho->nx; + } + if (iy >= int (pw_rho->ny / 2) + 1) + { + iy -= pw_rho->ny; + } + if (iz >= int (pw_rho->nz / 2) + 1) + { + iz -= pw_rho->nz; + } - if (ix < 0) { - ix += this->nx; -} - if (iy < 0) { - iy += this->ny; -} - if (iz < 0) { - iz += this->nz; -} - int ixy_now = ix * this->fftny + iy; - int index = ixy_now * this->nz + iz; - int is_now = fftixy2is[ixy_now]; - int isz_now = is_now * this->nz + iz; - this->ig2isz[ig] = isz_now; - pw_filled++; - found[index] = true; - if (xprime && ix == 0) { - ng_xeq0++; -} - } - assert(pw_filled == pw_rho->npw); + if (ix < 0) + { + ix += this->nx; + } + if (iy < 0) + { + iy += this->ny; + } + if (iz < 0) + { + iz += this->nz; + } + int ixy_now = ix * this->fftny + iy; + int index = ixy_now * this->nz + iz; + int is_now = fftixy2is[ixy_now]; + int isz_now = is_now * this->nz + iz; + this->ig2isz[ig] = isz_now; + pw_filled++; + found[index] = true; + if (xprime && ix == 0) + { + ng_xeq0++; + } + } + assert (pw_filled == pw_rho->npw); // distribute the lefted planewaves. for (int ixy = 0; ixy < this->fftnxy; ++ixy) - { - if (this->fftixy2ip[ixy] == this->poolrank) { - int zstart = st_bottom2D[ixy]; - for (int iz = zstart; iz < zstart + st_length2D[ixy]; ++iz) - { - int z = iz; - if (z < 0) { - z += this->nz; -} - if (!found[ixy * this->nz + z]) + if (this->fftixy2ip[ixy] == this->poolrank) { - found[ixy * this->nz + z] = true; - int is = fftixy2is[ixy]; - this->ig2isz[pw_filled] = is * this->nz + z; - pw_filled++; - if (xprime && ixy / fftny == 0) { - ng_xeq0++; -} + int zstart = st_bottom2D[ixy]; + for (int iz = zstart; iz < zstart + st_length2D[ixy]; ++iz) + { + int z = iz; + if (z < 0) + { + z += this->nz; + } + if (!found[ixy * this->nz + z]) + { + found[ixy * this->nz + z] = true; + int is = fftixy2is[ixy]; + this->ig2isz[pw_filled] = is * this->nz + z; + pw_filled++; + if (xprime && ixy / fftny == 0) + { + ng_xeq0++; + } + } + } + } + if (pw_filled == this->npw) + { + break; } - } } - if (pw_filled == this->npw) { - break; -} - } delete[] fftixy2is; delete[] found; #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") - { - resmem_int_op()(d_is2fftixy, this->nst); - syncmem_int_h2d_op()(this->d_is2fftixy, this->is2fftixy, this->nst); - } + { + resmem_int_op () (d_is2fftixy, this->nst); + syncmem_int_h2d_op () (this->d_is2fftixy, this->is2fftixy, this->nst); + } #endif return; } diff --git a/source/source_basis/module_pw/pw_basis_sup.h b/source/source_basis/module_pw/pw_basis_sup.h index b77dfa6bec0..265e5027cc1 100644 --- a/source/source_basis/module_pw/pw_basis_sup.h +++ b/source/source_basis/module_pw/pw_basis_sup.h @@ -1,7 +1,6 @@ #ifndef PWBASIS_SUP_H #define PWBASIS_SUP_H - namespace ModulePW { @@ -22,39 +21,37 @@ class PW_Basis_Sup : public PW_Basis { public: - PW_Basis_Sup() - { - } - PW_Basis_Sup(std::string device_, std::string precision_) : PW_Basis(device_, precision_) + PW_Basis_Sup () {} + PW_Basis_Sup (std::string device_, std::string precision_) : PW_Basis (device_, precision_) { classname = "PW_Basis_Sup"; } - ~PW_Basis_Sup(); + ~PW_Basis_Sup (); // distribute plane waves and grids and set up fft according to the smooth grids - void setuptransform(const ModulePW::PW_Basis* pw_rho); + void setuptransform (const ModulePW::PW_Basis* pw_rho); protected: // distribute plane waves to different processors according to the smooth grids - void distribute_g(const ModulePW::PW_Basis* pw_rho); + void distribute_g (const ModulePW::PW_Basis* pw_rho); // method 3: ONLY for dense grids in uspp // consider the consistence of sticks between dense and smooth grids - void distribution_method3(const ModulePW::PW_Basis* pw_rho); + void distribution_method3 (const ModulePW::PW_Basis* pw_rho); // Distribute sticks to cores in method 3. - void divide_sticks_3(const int* st_length2D, // st_length2D[ixy], number of planewaves in stick on (x, y). - const int* st_i, // x or x + fftnx (if x < 0) of stick. - const int* st_j, // y or y + fftny (if y < 0) of stick. - const int* st_length, // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. - const int* fftixy2ip_s, // fftixy2ip of smooth grids - const int& nx_s, // nx of smooth grids - const int& ny_s // ny of smooth grids + void divide_sticks_3 (const int* st_length2D, // st_length2D[ixy], number of planewaves in stick on (x, y). + const int* st_i, // x or x + fftnx (if x < 0) of stick. + const int* st_j, // y or y + fftny (if y < 0) of stick. + const int* st_length, // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. + const int* fftixy2ip_s, // fftixy2ip of smooth grids + const int& nx_s, // nx of smooth grids + const int& ny_s // ny of smooth grids ); - void get_ig2isz_is2fftixy(int* st_bottom2D, // minimum z of stick, stored in 1d array with this->nstot elements. - int* st_length2D, // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. - const ModulePW::PW_Basis* pw_rho); + void get_ig2isz_is2fftixy (int* st_bottom2D, // minimum z of stick, stored in 1d array with this->nstot elements. + int* st_length2D, // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. + const ModulePW::PW_Basis* pw_rho); }; // class PW_Basis_Sup } // namespace ModulePW diff --git a/source/source_basis/module_pw/pw_distributeg.cpp b/source/source_basis/module_pw/pw_distributeg.cpp index ea026e88d41..827605d1071 100644 --- a/source/source_basis/module_pw/pw_distributeg.cpp +++ b/source/source_basis/module_pw/pw_distributeg.cpp @@ -10,134 +10,149 @@ namespace ModulePW * @param in: G, GT, GGT, fftnx, fftny, nz, poolnproc, poolrank, ggecut * @param out: ig2isz[ig], istot2ixy[is], is2fftixy[is], fftixy2ip[ixy], gg[ig], gcar[ig], gdirect[ig], nst, nstot */ -void PW_Basis::distribute_g() +void + PW_Basis::distribute_g () { - ModuleBase::timer::start(this->classname, "distributeg"); - if(this->distribution_type == 1) - { - this->distribution_method1(); - } - else if(this->distribution_type == 2) - { - this->distribution_method2(); - } + ModuleBase::timer::start (this->classname, "distributeg"); + if (this->distribution_type == 1) + { + this->distribution_method1 (); + } + else if (this->distribution_type == 2) + { + this->distribution_method2 (); + } else - { - ModuleBase::WARNING_QUIT("divide", "No such division type."); - } + { + ModuleBase::WARNING_QUIT ("divide", "No such division type."); + } const char* no_pw_message = "Current core has no plane waves! Please reduce the cores."; - ModuleBase::CHECK_WARNING_QUIT((this->npw == 0), "pw_distributeg.cpp", PARAM.inp.calculation, - no_pw_message); - ModuleBase::timer::end(this->classname, "distributeg"); + ModuleBase::CHECK_WARNING_QUIT ((this->npw == 0), "pw_distributeg.cpp", PARAM.inp.calculation, no_pw_message); + ModuleBase::timer::end (this->classname, "distributeg"); return; } /** * @brief (1) We count the total number of planewaves (tot_npw) and sticks (this->nstot) here. * - * Meanwhile, we record the number of planewaves on (x, y) in st_length2D, and store the smallest z-coordinate of each stick in st_bottom2D, - * so that we can scan a much smaller area in step(2). + * Meanwhile, we record the number of planewaves on (x, y) in st_length2D, and store the smallest z-coordinate of each + * stick in st_bottom2D, so that we can scan a much smaller area in step(2). * * @param in: fftnx, fftny, nz, ggecut, GGT * @param out: tot_npw, this->nstot, st_length2D, st_bottom2D, this->riy, this->liy */ -void PW_Basis::count_pw_st( - int* st_length2D, // the number of planewaves that belong to the stick located on (x, y). - int* st_bottom2D // the z-coordinate of the bottom of stick on (x, y). -) +void + PW_Basis::count_pw_st (int* st_length2D, // the number of planewaves that belong to the stick located on (x, y). + int* st_bottom2D // the z-coordinate of the bottom of stick on (x, y). + ) { - ModuleBase::GlobalFunc::ZEROS(st_length2D, this->fftnxy); - ModuleBase::GlobalFunc::ZEROS(st_bottom2D, this->fftnxy); + ModuleBase::GlobalFunc::ZEROS (st_length2D, this->fftnxy); + ModuleBase::GlobalFunc::ZEROS (st_bottom2D, this->fftnxy); ModuleBase::Vector3 f; // determine the scaning area along x-direct, if gamma-only && xprime, only positive axis is used. - int ix_end = int(this->nx / 2) + 1; + int ix_end = int (this->nx / 2) + 1; int ix_start = -ix_end; // determine the scaning area along y-direct, if gamma-only && !xprime, only positive axis is used. - int iy_end = int(this->ny / 2) + 1; + int iy_end = int (this->ny / 2) + 1; int iy_start = -iy_end; - int iz_end = int(this->nz / 2) + 1; + int iz_end = int (this->nz / 2) + 1; int iz_start = -iz_end; if (this->full_pw) - { - ix_end = int(this->nx / 2); - ix_start = ix_end - this->nx + 1; + { + ix_end = int (this->nx / 2); + ix_start = ix_end - this->nx + 1; - iy_end = int(this->ny / 2); - iy_start = iy_end - this->ny + 1; + iy_end = int (this->ny / 2); + iy_start = iy_end - this->ny + 1; - iz_end = int(this->nz / 2); - iz_start = iz_end - this->nz + 1; - } + iz_end = int (this->nz / 2); + iz_start = iz_end - this->nz + 1; + } if (this->gamma_only) - { - if(this->xprime) - { - ix_start = 0; - ix_end = this->fftnx - 1; - } - else { - iy_start = 0; - iy_end = this->fftny - 1; + if (this->xprime) + { + ix_start = 0; + ix_end = this->fftnx - 1; + } + else + { + iy_start = 0; + iy_end = this->fftny - 1; + } } - } this->liy = this->riy = 0; this->lix = this->rix = 0; this->npwtot = 0; this->nstot = 0; for (int ix = ix_start; ix <= ix_end; ++ix) - { - for (int iy = iy_start; iy <= iy_end; ++iy) { - // we shift all sticks to the first quadrant in x-y plane here. - // (ix, iy, iz) is the direct coordinates of planewaves. - // x and y is the coordinates of shifted sticks in x-y plane. - // for example, if fftny = fftnx = 10, we will shift the stick on (-1, 2) to (9, 2), - // so that its index in st_length and st_bottom is 9 * 10 + 2 = 92. - int x = ix; - int y = iy; - if (x < 0) { x += this->nx; -} - if (y < 0) { y += this->ny; -} - int index = x * this->fftny + y; - - int length = 0; // number of planewave on stick (x, y). - for (int iz = iz_start; iz <= iz_end; ++iz) - { - f.x = ix; - f.y = iy; - f.z = iz; - double modulus = f * (this->GGT * f); - if (modulus <= this->ggecut || this->full_pw) + for (int iy = iy_start; iy <= iy_end; ++iy) { - if (length == 0) { st_bottom2D[index] = iz; // length == 0 means this point is the bottom of stick (x, y). -} - ++this->npwtot; - ++length; - if(iy < this->riy) { this->riy = iy; -} - if(iy > this->liy) { this->liy = iy; -} - if(ix < this->rix) { this->rix = ix; -} - if(ix > this->lix) { this->lix = ix; -} + // we shift all sticks to the first quadrant in x-y plane here. + // (ix, iy, iz) is the direct coordinates of planewaves. + // x and y is the coordinates of shifted sticks in x-y plane. + // for example, if fftny = fftnx = 10, we will shift the stick on (-1, 2) to (9, 2), + // so that its index in st_length and st_bottom is 9 * 10 + 2 = 92. + int x = ix; + int y = iy; + if (x < 0) + { + x += this->nx; + } + if (y < 0) + { + y += this->ny; + } + int index = x * this->fftny + y; + + int length = 0; // number of planewave on stick (x, y). + for (int iz = iz_start; iz <= iz_end; ++iz) + { + f.x = ix; + f.y = iy; + f.z = iz; + double modulus = f * (this->GGT * f); + if (modulus <= this->ggecut || this->full_pw) + { + if (length == 0) + { + st_bottom2D[index] + = iz; // length == 0 means this point is the bottom of stick (x, y). + } + ++this->npwtot; + ++length; + if (iy < this->riy) + { + this->riy = iy; + } + if (iy > this->liy) + { + this->liy = iy; + } + if (ix < this->rix) + { + this->rix = ix; + } + if (ix > this->lix) + { + this->lix = ix; + } + } + } + if (length > 0) + { + st_length2D[index] = length; + ++this->nstot; + } } - } - if (length > 0) - { - st_length2D[index] = length; - ++this->nstot; - } } - } riy += this->ny; rix += this->nx; return; @@ -154,79 +169,86 @@ void PW_Basis::count_pw_st( * @param out: ig2isz, is2fftixy */ -void PW_Basis::get_ig2isz_is2fftixy( - int* st_bottom2D, // minimum z of stick, stored in 1d array with this->nstot elements. - int* st_length2D // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. -) +void + PW_Basis::get_ig2isz_is2fftixy ( + int* st_bottom2D, // minimum z of stick, stored in 1d array with this->nstot elements. + int* st_length2D // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. + ) { if (this->npw == 0) - { - delete[] this->ig2isz; this->ig2isz = nullptr; // map ig to the z coordinate of this planewave. - delete[] this->is2fftixy; this->is2fftixy = nullptr; // map is (index of sticks) to ixy (iy + ix * fftny). + { + delete[] this->ig2isz; + this->ig2isz = nullptr; // map ig to the z coordinate of this planewave. + delete[] this->is2fftixy; + this->is2fftixy = nullptr; // map is (index of sticks) to ixy (iy + ix * fftny). #if defined(__CUDA) || defined(__ROCM) - if (this->device == "gpu") { - delmem_int_op()(this->d_is2fftixy); - d_is2fftixy = nullptr; - } + if (this->device == "gpu") + { + delmem_int_op () (this->d_is2fftixy); + d_is2fftixy = nullptr; + } #endif - return; - } + return; + } - delete[] this->ig2isz; this->ig2isz = new int[this->npw]; // map ig to the z coordinate of this planewave. - ModuleBase::GlobalFunc::ZEROS(this->ig2isz, this->npw); - delete[] this->is2fftixy; this->is2fftixy = new int[this->nst]; // map is (index of sticks) to ixy (iy + ix * fftny). + delete[] this->ig2isz; + this->ig2isz = new int[this->npw]; // map ig to the z coordinate of this planewave. + ModuleBase::GlobalFunc::ZEROS (this->ig2isz, this->npw); + delete[] this->is2fftixy; + this->is2fftixy = new int[this->nst]; // map is (index of sticks) to ixy (iy + ix * fftny). for (int is = 0; is < this->nst; ++is) - { - this->is2fftixy[is] = -1; - } + { + this->is2fftixy[is] = -1; + } - int st_move = 0; // this is the st_move^th stick on current core. + int st_move = 0; // this is the st_move^th stick on current core. int pw_filled = 0; // how many current core's planewaves have been found. for (int ixy = 0; ixy < this->fftnxy; ++ixy) - { - if (this->fftixy2ip[ixy] == this->poolrank) { - int zstart = st_bottom2D[ixy]; - for (int iz = zstart; iz < zstart + st_length2D[ixy]; ++iz) - { - int z = iz; - if (z < 0) + if (this->fftixy2ip[ixy] == this->poolrank) + { + int zstart = st_bottom2D[ixy]; + for (int iz = zstart; iz < zstart + st_length2D[ixy]; ++iz) + { + int z = iz; + if (z < 0) + { + z += this->nz; + } + this->ig2isz[pw_filled] = st_move * this->nz + z; + pw_filled++; + } + this->is2fftixy[st_move] = ixy; + st_move++; + if (xprime && ixy / fftny == 0) + { + ng_xeq0 = pw_filled; + } + } + if (st_move == this->nst && pw_filled == this->npw) { - z += this->nz; + break; } - this->ig2isz[pw_filled] = st_move * this->nz + z; - pw_filled++; - } - this->is2fftixy[st_move] = ixy; - st_move++; - if (xprime && ixy / fftny == 0) - { - ng_xeq0 = pw_filled; - } } - if (st_move == this->nst && pw_filled == this->npw) + std::vector ig2ixyz (this->npw); + for (int igl = 0; igl < this->npw; ++igl) { - break; + int isz = this->ig2isz[igl]; + int iz = isz % this->nz; + int is = isz / this->nz; + int ixy = this->is2fftixy[is]; + int iy = ixy % this->ny; + int ix = ixy / this->ny; + ig2ixyz[igl] = iz + iy * nz + ix * ny * nz; } - } - std::vector ig2ixyz(this->npw); - for (int igl = 0; igl < this->npw; ++igl) - { - int isz = this->ig2isz[igl]; - int iz = isz % this->nz; - int is = isz / this->nz; - int ixy = this->is2fftixy[is]; - int iy = ixy % this->ny; - int ix = ixy / this->ny; - ig2ixyz[igl] = iz + iy * nz + ix * ny * nz; - } #if defined(__CUDA) || defined(__ROCM) - if (this->device == "gpu") { - resmem_int_op()(d_is2fftixy, this->nst); - syncmem_int_h2d_op()(this->d_is2fftixy, this->is2fftixy, this->nst); - resmem_int_op()(ig2ixyz_gpu,this->npw); - syncmem_int_h2d_op()(ig2ixyz_gpu, ig2ixyz.data(), this->npw); - } + if (this->device == "gpu") + { + resmem_int_op () (d_is2fftixy, this->nst); + syncmem_int_h2d_op () (this->d_is2fftixy, this->is2fftixy, this->nst); + resmem_int_op () (ig2ixyz_gpu, this->npw); + syncmem_int_h2d_op () (ig2ixyz_gpu, ig2ixyz.data (), this->npw); + } #endif return; } diff --git a/source/source_basis/module_pw/pw_distributeg_method1.cpp b/source/source_basis/module_pw/pw_distributeg_method1.cpp index a3e3e4ac89d..5791b7ee1fd 100644 --- a/source/source_basis/module_pw/pw_distributeg_method1.cpp +++ b/source/source_basis/module_pw/pw_distributeg_method1.cpp @@ -2,7 +2,6 @@ #include "source_base/mymath.h" #include "source_base/global_function.h" - namespace ModulePW { /** @@ -22,95 +21,101 @@ namespace ModulePW * @param out: ig2isz[ig], istot2ixy[is], is2fftixy[is], fftixy2ip[ixy], startnsz_per[ip], nst_per[ip], nst */ -void PW_Basis::distribution_method1() +void + PW_Basis::distribution_method1 () { // initial the variables needed by all process - int *st_bottom2D = new int[fftnxy]; // st_bottom2D[ixy], minimum z of stick on (x, y). - int *st_length2D = new int[fftnxy]; // st_length2D[ixy], number of planewaves in stick on (x, y). - delete[] this->nst_per; this->nst_per = new int[this->poolnproc]; // number of sticks on each core. - delete[] this->npw_per; this->npw_per = new int[this->poolnproc]; // number of planewaves on each core. - delete[] this->fftixy2ip; this->fftixy2ip = new int[this->fftnxy]; // ip of core which contains the stick on (x, y). + int* st_bottom2D = new int[fftnxy]; // st_bottom2D[ixy], minimum z of stick on (x, y). + int* st_length2D = new int[fftnxy]; // st_length2D[ixy], number of planewaves in stick on (x, y). + delete[] this->nst_per; + this->nst_per = new int[this->poolnproc]; // number of sticks on each core. + delete[] this->npw_per; + this->npw_per = new int[this->poolnproc]; // number of planewaves on each core. + delete[] this->fftixy2ip; + this->fftixy2ip = new int[this->fftnxy]; // ip of core which contains the stick on (x, y). for (int ixy = 0; ixy < this->fftnxy; ++ixy) - this->fftixy2ip[ixy] = -1; // meaning this stick has not been distributed or there is no stick on (x, y). + { + this->fftixy2ip[ixy] = -1; // meaning this stick has not been distributed or there is no stick on (x, y). + } if (poolrank == 0) - { - /** - * @brief (1) Count the total number of planewaves (tot_npw) and sticks (this->nstot). - * - * @note the funcion here is defined in pw_distributeg.cpp - * Actually we will scan [(2 * ibox[0] + 1) * (2 * ibox[1] + 1)] points on x-y plane, - * but we define st_length2D with (fftny * fftnx) points here, because the diameter - * of the sphere should be shorter than the sides of the cube. - * calculate this->nstot and this->npwtot, liy, riy - */ + { + /** + * @brief (1) Count the total number of planewaves (tot_npw) and sticks (this->nstot). + * + * @note the funcion here is defined in pw_distributeg.cpp + * Actually we will scan [(2 * ibox[0] + 1) * (2 * ibox[1] + 1)] points on x-y plane, + * but we define st_length2D with (fftny * fftnx) points here, because the diameter + * of the sphere should be shorter than the sides of the cube. + * calculate this->nstot and this->npwtot, liy, riy + */ - this->count_pw_st(st_length2D, st_bottom2D); - } + this->count_pw_st (st_length2D, st_bottom2D); + } #ifdef __MPI - MPI_Bcast(&this->npwtot, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&this->nstot, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&liy, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&riy, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&lix, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&rix, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&this->npwtot, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&this->nstot, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&liy, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&riy, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&lix, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&rix, 1, MPI_INT, 0, this->pool_world); #endif - delete[] this->istot2ixy; + delete[] this->istot2ixy; this->istot2ixy = new int[this->nstot]; - if(poolrank == 0) - { + if (poolrank == 0) + { #ifdef __MPI - // Parallel line - // (2) Collect the x, y indexs, and length of the sticks. - int* st_i = new int[this->nstot]; // x or x + fftnx (if x < 0) of stick. - int* st_j = new int[this->nstot]; // y or y + fftny (if y < 0) of stick. - int* st_length = new int[this->nstot]; // number of planewaves in stick. - this->collect_st(st_length2D, st_bottom2D, st_i, st_j, st_length); + // Parallel line + // (2) Collect the x, y indexs, and length of the sticks. + int* st_i = new int[this->nstot]; // x or x + fftnx (if x < 0) of stick. + int* st_j = new int[this->nstot]; // y or y + fftny (if y < 0) of stick. + int* st_length = new int[this->nstot]; // number of planewaves in stick. + this->collect_st (st_length2D, st_bottom2D, st_i, st_j, st_length); - // (3) Distribute the sticks to cores. - //get nst_per, npw_per, fftixy2ip, and startnsz_per - this->startnsz_per = new int[this->poolnproc]; - this->divide_sticks_1(st_i, st_j, st_length); - delete[] st_length; + // (3) Distribute the sticks to cores. + // get nst_per, npw_per, fftixy2ip, and startnsz_per + this->startnsz_per = new int[this->poolnproc]; + this->divide_sticks_1 (st_i, st_j, st_length); + delete[] st_length; - // (4) Get map from istot to (iy, ix) - this->get_istot2ixy(st_i, st_j); - delete[] st_i; - delete[] st_j; - //We do not need startnsz_per after it. - delete[] this->startnsz_per; - this->startnsz_per=nullptr; + // (4) Get map from istot to (iy, ix) + this->get_istot2ixy (st_i, st_j); + delete[] st_i; + delete[] st_j; + // We do not need startnsz_per after it. + delete[] this->startnsz_per; + this->startnsz_per = nullptr; #else - // Serial line - // get nst_per, npw_per, fftixy2ip, and istot2ixy - this->nst_per[0] = this->nstot; - this->npw_per[0] = this->npwtot; - int st_move = 0; - for (int ixy = 0; ixy < fftnxy; ++ixy) - { - if (st_length2D[ixy] > 0) - { - this->istot2ixy[st_move] = ixy / fftny * ny + ixy % fftny; - this->fftixy2ip[ixy] = 0; - st_move++; - } - } + // Serial line + // get nst_per, npw_per, fftixy2ip, and istot2ixy + this->nst_per[0] = this->nstot; + this->npw_per[0] = this->npwtot; + int st_move = 0; + for (int ixy = 0; ixy < fftnxy; ++ixy) + { + if (st_length2D[ixy] > 0) + { + this->istot2ixy[st_move] = ixy / fftny * ny + ixy % fftny; + this->fftixy2ip[ixy] = 0; + st_move++; + } + } #endif - } + } #ifdef __MPI - MPI_Bcast(st_length2D, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(st_bottom2D, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->fftixy2ip, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->istot2ixy, this->nstot, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->nst_per, this->poolnproc, MPI_INT, 0 , this->pool_world); - MPI_Bcast(this->npw_per, this->poolnproc, MPI_INT, 0 , this->pool_world); + MPI_Bcast (st_length2D, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (st_bottom2D, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->fftixy2ip, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->istot2ixy, this->nstot, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->nst_per, this->poolnproc, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->npw_per, this->poolnproc, MPI_INT, 0, this->pool_world); #endif this->npw = this->npw_per[this->poolrank]; this->nst = this->nst_per[this->poolrank]; this->nstnz = this->nst * this->nz; // (5) Construct ig2isz and is2fftixy. - this->get_ig2isz_is2fftixy(st_bottom2D, st_length2D); + this->get_ig2isz_is2fftixy (st_bottom2D, st_length2D); delete[] st_bottom2D; delete[] st_length2D; @@ -128,113 +133,123 @@ void PW_Basis::distribution_method1() * @param out: st_i, st_j, st_length */ -void PW_Basis::collect_st( - int* st_length2D, // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane. - int* st_bottom2D, // the z-coordinate of the bottom of stick on (x, y), stored in 2d x-y plane. - int* st_i, // x or x + fftnx (if x < 0) of stick. - int* st_j, // y or y + fftny (if y < 0) of stick. - int* st_length // number of planewaves in stick, stored in 1d array with this->nstot elements. -) +void + PW_Basis::collect_st ( + int* + st_length2D, // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane. + int* st_bottom2D, // the z-coordinate of the bottom of stick on (x, y), stored in 2d x-y plane. + int* st_i, // x or x + fftnx (if x < 0) of stick. + int* st_j, // y or y + fftny (if y < 0) of stick. + int* st_length // number of planewaves in stick, stored in 1d array with this->nstot elements. + ) { - int *temp_st_i = new int[this->nstot]; // x or x + fftnx (if x < 0) of stick. - int *temp_st_j = new int[this->nstot]; // y or y + fftny (if y < 0) of stick. - double *temp_st_length = new double[this->nstot]; // length of sticks. - ModuleBase::GlobalFunc::ZEROS(temp_st_length, this->nstot); + int* temp_st_i = new int[this->nstot]; // x or x + fftnx (if x < 0) of stick. + int* temp_st_j = new int[this->nstot]; // y or y + fftny (if y < 0) of stick. + double* temp_st_length = new double[this->nstot]; // length of sticks. + ModuleBase::GlobalFunc::ZEROS (temp_st_length, this->nstot); ModuleBase::Vector3 f; int is = 0; // index of stick. - int ix_end = int(this->nx / 2) + 1; + int ix_end = int (this->nx / 2) + 1; int ix_start = -ix_end; - int iy_end = int(this->ny / 2) + 1; + int iy_end = int (this->ny / 2) + 1; int iy_start = -iy_end; if (this->full_pw) - { - ix_end = int(this->nx / 2); - ix_start = ix_end - this->nx + 1; - iy_end = int(this->ny / 2); - iy_start = iy_end - this->ny + 1; - } - - if (this->gamma_only) - { - if(this->xprime) { - ix_start = 0; - ix_end = this->fftnx - 1; + ix_end = int (this->nx / 2); + ix_start = ix_end - this->nx + 1; + iy_end = int (this->ny / 2); + iy_start = iy_end - this->ny + 1; } - else + + if (this->gamma_only) { - iy_start = 0; - iy_end = this->fftny - 1; + if (this->xprime) + { + ix_start = 0; + ix_end = this->fftnx - 1; + } + else + { + iy_start = 0; + iy_end = this->fftny - 1; + } } - } for (int ix = ix_start; ix <= ix_end; ++ix) - { - for (int iy = iy_start; iy <= iy_end; ++iy) { - - // we have shifted all sticks to the first quadrant in x-y plane before. - // (ix, iy, iz) is the direct coordinates of planewaves. - // x and y is the coordinates of shifted sticks in x-y plane. - // for example, if fftnx = fftny = 10, we will shift the stick on (-1, 2) to (9, 2), - // so that its index in st_length and st_bottom is 9 * 10 + 2 = 92. - int x = ix; - int y = iy; - if (x < 0) x += nx; - if (y < 0) y += ny; - int index = x * this->fftny + y; - if (st_length2D[index] > 0) // meaning there is a stick on (x, y) point. - { - bool find_stick = false; - if (!this->full_pw) + for (int iy = iy_start; iy <= iy_end; ++iy) { - for (int iz = st_bottom2D[index]; iz < st_bottom2D[index] + st_length2D[index]; ++iz) - { - f.x = ix; - f.y = iy; - f.z = iz; - double modulus = f * (GGT * f); - if (modulus <= ggecut) + + // we have shifted all sticks to the first quadrant in x-y plane before. + // (ix, iy, iz) is the direct coordinates of planewaves. + // x and y is the coordinates of shifted sticks in x-y plane. + // for example, if fftnx = fftny = 10, we will shift the stick on (-1, 2) to (9, 2), + // so that its index in st_length and st_bottom is 9 * 10 + 2 = 92. + int x = ix; + int y = iy; + if (x < 0) { - find_stick = true; - break; + x += nx; } - } - } - else - { - find_stick = true; - } + if (y < 0) + { + y += ny; + } + int index = x * this->fftny + y; + if (st_length2D[index] > 0) // meaning there is a stick on (x, y) point. + { + bool find_stick = false; + if (!this->full_pw) + { + for (int iz = st_bottom2D[index]; iz < st_bottom2D[index] + st_length2D[index]; + ++iz) + { + f.x = ix; + f.y = iy; + f.z = iz; + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + find_stick = true; + break; + } + } + } + else + { + find_stick = true; + } - if (find_stick) - { - temp_st_i[is] = x; - temp_st_j[is] = y; - temp_st_length[is] = static_cast(st_length2D[index]); - ++is; + if (find_stick) + { + temp_st_i[is] = x; + temp_st_j[is] = y; + temp_st_length[is] = static_cast (st_length2D[index]); + ++is; + } + } } - } } - } - assert(is == this->nstot); + assert (is == this->nstot); // std::cout<<"collect sticks done\n"; - // As we will distribute the longest sticks preferentially in Step(3), we rearrange st_* in the order of length decreasing. + // As we will distribute the longest sticks preferentially in Step(3), we rearrange st_* in the order of length + // decreasing. - int *st_sorted_index = new int[this->nstot]; // indexs in the order of length increasing. + int* st_sorted_index = new int[this->nstot]; // indexs in the order of length increasing. st_sorted_index[0] = 0; - ModuleBase::heapsort(this->nstot, temp_st_length, st_sorted_index); // sort st_* in the order of length increasing. + ModuleBase::heapsort (this->nstot, temp_st_length, st_sorted_index); // sort st_* in the order of length increasing. - int index = 0; // indexs in the order of length decreasing. + int index = 0; // indexs in the order of length decreasing. for (int istot = 0; istot < this->nstot; ++istot) - { - index = (this->nstot - 1) - istot; - st_length[index] = static_cast(temp_st_length[istot]); - st_i[index] = temp_st_i[st_sorted_index[istot]]; - st_j[index] = temp_st_j[st_sorted_index[istot]]; - } + { + index = (this->nstot - 1) - istot; + st_length[index] = static_cast (temp_st_length[istot]); + st_i[index] = temp_st_i[st_sorted_index[istot]]; + st_j[index] = temp_st_j[st_sorted_index[istot]]; + } // std::cout << "st_length "; // for (int is = 0; is < this->nstot; ++is) std::cout << st_length[is] << std::setw(4); // std::cout << "\n"; @@ -246,66 +261,66 @@ void PW_Basis::collect_st( return; } - /** * @brief (3-1) Distribute sticks to cores according to the number of plane waves. * - * We have rearranged sticks in the order of length decreasing, so that we will distribute the longest stick preferentially here. - * For each stick, we find the core that contains the least planewaves firstly, and distribute the stick to it, - * then update npw_per, this->fftixy2ip, and this->startnsz_per. + * We have rearranged sticks in the order of length decreasing, so that we will distribute the longest stick + * preferentially here. For each stick, we find the core that contains the least planewaves firstly, and distribute the + * stick to it, then update npw_per, this->fftixy2ip, and this->startnsz_per. * * @param in: tot_npw, this->nstot, st_i, st_j, st_length * @param out: npw_per, nst_per, this->fftixy2ip, this->startnsz_per */ -void PW_Basis::divide_sticks_1( - int* st_i, // x or x + fftnx (if x < 0) of stick. - int* st_j, // y or y + fftny (if y < 0) of stick. - int* st_length // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. -) +void + PW_Basis::divide_sticks_1 (int* st_i, // x or x + fftnx (if x < 0) of stick. + int* st_j, // y or y + fftny (if y < 0) of stick. + int* st_length // the stick on (x, y) consists of st_length[x*fftny+y] planewaves. + ) { - ModuleBase::GlobalFunc::ZEROS(this->nst_per, poolnproc); - ModuleBase::GlobalFunc::ZEROS(this->npw_per, poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->nst_per, poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->npw_per, poolnproc); int ipmin = 0; // The ip of core containing least number of planewaves. for (int is = 0; is < this->nstot; ++is) - { - // find the ip of core containing the least planewaves. - for (int ip = 0; ip < this->poolnproc; ++ip) { - const int npwmin = this->npw_per[ipmin]; - const int npw_ip = this->npw_per[ip]; - const int nstmin = nst_per[ipmin]; - const int nst_ip = nst_per[ip]; + // find the ip of core containing the least planewaves. + for (int ip = 0; ip < this->poolnproc; ++ip) + { + const int npwmin = this->npw_per[ipmin]; + const int npw_ip = this->npw_per[ip]; + const int nstmin = nst_per[ipmin]; + const int nst_ip = nst_per[ip]; - if (npw_ip == 0) - { - ipmin = ip; - break; - } - else if (npw_ip < npwmin) - { - ipmin = ip; - } - else if (npw_ip == npwmin && nst_ip < nstmin) - { - ipmin = ip; - } + if (npw_ip == 0) + { + ipmin = ip; + break; + } + else if (npw_ip < npwmin) + { + ipmin = ip; + } + else if (npw_ip == npwmin && nst_ip < nstmin) + { + ipmin = ip; + } + } + this->nst_per[ipmin]++; + this->npw_per[ipmin] += st_length[is]; + this->fftixy2ip[st_i[is] * this->fftny + st_j[is]] = ipmin; } - this->nst_per[ipmin]++; - this->npw_per[ipmin] += st_length[is]; - this->fftixy2ip[st_i[is] * this->fftny + st_j[is]] = ipmin; - } - this->startnsz_per[0] = 0; + this->startnsz_per[0] = 0; for (int ip = 1; ip < poolnproc; ++ip) - { - this->startnsz_per[ip] = this->startnsz_per[ip - 1] + this->nst_per[ip - 1] * this->nz; - } + { + this->startnsz_per[ip] = this->startnsz_per[ip - 1] + this->nst_per[ip - 1] * this->nz; + } return; } /** - * @brief (3-2) Rearrange sticks in the order of the ip of core increasing, in each core, sticks are sorted in the order of ixy increasing. + * @brief (3-2) Rearrange sticks in the order of the ip of core increasing, in each core, sticks are sorted in the order + * of ixy increasing. * * (st_start + st_move) is the new index of sticks. * Then get istot2ixy (istot2ixy[is]: iy + ix * fftny of is^th stick among all sticks) on the first core @@ -314,27 +329,27 @@ void PW_Basis::divide_sticks_1( * @param out: istot2ixy */ -void PW_Basis::get_istot2ixy( - int* st_i, // x or x + fftnx (if x < 0) of stick. - int* st_j // y or y + fftny (if y < 0) of stick. -) +void + PW_Basis::get_istot2ixy (int* st_i, // x or x + fftnx (if x < 0) of stick. + int* st_j // y or y + fftny (if y < 0) of stick. + ) { - assert(this->poolrank == 0); + assert (this->poolrank == 0); int* st_move = new int[this->poolnproc]; // st_move[ip]: this is the st_move^th stick on ip^th core. - ModuleBase::GlobalFunc::ZEROS(this->istot2ixy, this->nstot); - ModuleBase::GlobalFunc::ZEROS(st_move, this->poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->istot2ixy, this->nstot); + ModuleBase::GlobalFunc::ZEROS (st_move, this->poolnproc); for (int ixy = 0; ixy < this->fftnxy; ++ixy) - { - int ip = this->fftixy2ip[ixy]; - if (ip != -1) { - this->istot2ixy[this->startnsz_per[ip] / this->nz + st_move[ip]] = (ixy / fftny)*ny + ixy % fftny; - st_move[ip]++; + int ip = this->fftixy2ip[ixy]; + if (ip != -1) + { + this->istot2ixy[this->startnsz_per[ip] / this->nz + st_move[ip]] = (ixy / fftny) * ny + ixy % fftny; + st_move[ip]++; + } } - } delete[] st_move; return; } -} +} // namespace ModulePW diff --git a/source/source_basis/module_pw/pw_distributeg_method2.cpp b/source/source_basis/module_pw/pw_distributeg_method2.cpp index 7c2ae76fb19..e5eaadd559c 100644 --- a/source/source_basis/module_pw/pw_distributeg_method2.cpp +++ b/source/source_basis/module_pw/pw_distributeg_method2.cpp @@ -2,7 +2,6 @@ #include "source_base/mymath.h" #include "source_base/global_function.h" - namespace ModulePW { /** @@ -22,88 +21,94 @@ namespace ModulePW * @param out: ig2isz[ig], istot2ixy[is], is2fftixy[is], fftixy2ip[ixy], startnsz_per[ip], nst_per[ip], nst */ -void PW_Basis::distribution_method2() +void + PW_Basis::distribution_method2 () { // initial the variables needed by all proc. - int *st_bottom2D = new int[fftnxy]; // st_bottom2D[ixy], minimum z of stick on (x, y). - int *st_length2D = new int[fftnxy]; // st_length2D[ixy], number of planewaves in stick on (x, y). - delete[] this->nst_per; this->nst_per = new int[this->poolnproc]; // number of sticks on each core. - delete[] this->npw_per; this->npw_per = new int[this->poolnproc]; // number of planewaves on each core. - delete[] this->fftixy2ip; this->fftixy2ip = new int[this->fftnxy]; // ip of core which contains the stick on (x, y). + int* st_bottom2D = new int[fftnxy]; // st_bottom2D[ixy], minimum z of stick on (x, y). + int* st_length2D = new int[fftnxy]; // st_length2D[ixy], number of planewaves in stick on (x, y). + delete[] this->nst_per; + this->nst_per = new int[this->poolnproc]; // number of sticks on each core. + delete[] this->npw_per; + this->npw_per = new int[this->poolnproc]; // number of planewaves on each core. + delete[] this->fftixy2ip; + this->fftixy2ip = new int[this->fftnxy]; // ip of core which contains the stick on (x, y). for (int ixy = 0; ixy < this->fftnxy; ++ixy) - this->fftixy2ip[ixy] = -1; // meaning this stick has not been distributed or there is no stick on (x, y). + { + this->fftixy2ip[ixy] = -1; // meaning this stick has not been distributed or there is no stick on (x, y). + } if (poolrank == 0) - { - /** - * @brief (1) Count the total number of planewaves (tot_npw) and sticks (this->nstot). - * - * @note the funcion here is defined in pw_distributeg.cpp - * Actually we will scan [(2 * ibox[0] + 1) * (2 * ibox[1] + 1)] points on x-y plane, - * but we define st_length2D with (fftny * fftnx) points here, because the diameter - * of the sphere should be shorter than the sides of the cube. - * calculate this->nstot and this->npwtot, liy, riy - */ - this->count_pw_st(st_length2D, st_bottom2D); - } + { + /** + * @brief (1) Count the total number of planewaves (tot_npw) and sticks (this->nstot). + * + * @note the funcion here is defined in pw_distributeg.cpp + * Actually we will scan [(2 * ibox[0] + 1) * (2 * ibox[1] + 1)] points on x-y plane, + * but we define st_length2D with (fftny * fftnx) points here, because the diameter + * of the sphere should be shorter than the sides of the cube. + * calculate this->nstot and this->npwtot, liy, riy + */ + this->count_pw_st (st_length2D, st_bottom2D); + } #ifdef __MPI - MPI_Bcast(&this->npwtot, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&this->nstot, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&liy, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&riy, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&lix, 1, MPI_INT, 0, this->pool_world); - MPI_Bcast(&rix, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&this->npwtot, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&this->nstot, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&liy, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&riy, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&lix, 1, MPI_INT, 0, this->pool_world); + MPI_Bcast (&rix, 1, MPI_INT, 0, this->pool_world); #endif - delete[] this->istot2ixy; + delete[] this->istot2ixy; this->istot2ixy = new int[this->nstot]; - if(poolrank == 0) - { + if (poolrank == 0) + { #ifdef __MPI - - // Parallel line - // (2) Devide the sticks to each core, sticks are in the order of ixy increasing. - // get nst_per and startnsz_per - this->startnsz_per = new int[this->poolnproc]; - this->divide_sticks_2(); - // (3) Create the maps from ixy to ip, istot, and from istot to ixy - // get istot2ixy, fftixy2ip, npw_per - this->create_maps(st_length2D); - //We do not need startnsz_per after it. - delete[] this->startnsz_per; - this->startnsz_per=nullptr; + // Parallel line + // (2) Devide the sticks to each core, sticks are in the order of ixy increasing. + // get nst_per and startnsz_per + this->startnsz_per = new int[this->poolnproc]; + this->divide_sticks_2 (); + + // (3) Create the maps from ixy to ip, istot, and from istot to ixy + // get istot2ixy, fftixy2ip, npw_per + this->create_maps (st_length2D); + // We do not need startnsz_per after it. + delete[] this->startnsz_per; + this->startnsz_per = nullptr; #else - // Serial line - // get nst_per, npw_per, fftixy2ip, and istot2ixy - this->nst_per[0] = this->nstot; - this->npw_per[0] = this->npwtot; - int st_move = 0; - for (int ixy = 0; ixy < fftnxy; ++ixy) - { - if (st_length2D[ixy] > 0) - { - this->istot2ixy[st_move] = ixy / fftny * ny + ixy % fftny; - this->fftixy2ip[ixy] = 0; - st_move++; - } - } + // Serial line + // get nst_per, npw_per, fftixy2ip, and istot2ixy + this->nst_per[0] = this->nstot; + this->npw_per[0] = this->npwtot; + int st_move = 0; + for (int ixy = 0; ixy < fftnxy; ++ixy) + { + if (st_length2D[ixy] > 0) + { + this->istot2ixy[st_move] = ixy / fftny * ny + ixy % fftny; + this->fftixy2ip[ixy] = 0; + st_move++; + } + } #endif - } + } #ifdef __MPI - MPI_Bcast(st_length2D, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(st_bottom2D, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->fftixy2ip, this->fftnxy, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->istot2ixy, this->nstot, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->nst_per, this->poolnproc, MPI_INT, 0, this->pool_world); - MPI_Bcast(this->npw_per, this->poolnproc, MPI_INT, 0, this->pool_world); + MPI_Bcast (st_length2D, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (st_bottom2D, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->fftixy2ip, this->fftnxy, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->istot2ixy, this->nstot, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->nst_per, this->poolnproc, MPI_INT, 0, this->pool_world); + MPI_Bcast (this->npw_per, this->poolnproc, MPI_INT, 0, this->pool_world); #endif this->npw = this->npw_per[this->poolrank]; this->nst = this->nst_per[this->poolrank]; this->nstnz = this->nst * this->nz; // (5) Construct ig2isz and is2fftixy. - this->get_ig2isz_is2fftixy(st_bottom2D, st_length2D); + this->get_ig2isz_is2fftixy (st_bottom2D, st_length2D); delete[] st_bottom2D; delete[] st_length2D; @@ -117,20 +122,27 @@ void PW_Basis::distribution_method2() * @param out: nst_per, this->startnsz_per */ -void PW_Basis::divide_sticks_2() +void + PW_Basis::divide_sticks_2 () { - ModuleBase::GlobalFunc::ZEROS(nst_per, this->poolnproc); + ModuleBase::GlobalFunc::ZEROS (nst_per, this->poolnproc); int average_nst = this->nstot / this->poolnproc; int mods = this->nstot % this->poolnproc; this->startnsz_per[0] = 0; for (int ip = 0; ip < this->poolnproc; ++ip) - { - nst_per[ip] = average_nst; - if (ip < mods) nst_per[ip]++; - if (ip >= 1) this->startnsz_per[ip] = this->startnsz_per[ip-1] + this->nst_per[ip-1] * this->nz; - } + { + nst_per[ip] = average_nst; + if (ip < mods) + { + nst_per[ip]++; + } + if (ip >= 1) + { + this->startnsz_per[ip] = this->startnsz_per[ip - 1] + this->nst_per[ip - 1] * this->nz; + } + } } /** @@ -139,28 +151,32 @@ void PW_Basis::divide_sticks_2() * @param out: this->fftixy2ip, this->istot2ixy, npw_per */ -void PW_Basis::create_maps( - int* st_length2D // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane. -) +void + PW_Basis::create_maps ( + int* st_length2D // the number of planewaves that belong to the stick located on (x, y), stored in 2d x-y plane. + ) { - ModuleBase::GlobalFunc::ZEROS(this->istot2ixy, this->nstot); - ModuleBase::GlobalFunc::ZEROS(this->npw_per, poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->istot2ixy, this->nstot); + ModuleBase::GlobalFunc::ZEROS (this->npw_per, poolnproc); int ip = 0; int st_move = 0; // the number of sticks that have been found. for (int ixy = 0; ixy < this->fftnxy; ++ixy) - { - if (st_length2D[ixy] > 0) { - this->istot2ixy[st_move] = ixy / fftny * ny + ixy % fftny; - this->fftixy2ip[ixy] = ip; - this->npw_per[ip] += st_length2D[ixy]; - st_move++; - if (ip < this->poolnproc - 1) - { - // all of sticks on current core are found, skip to next core - if (st_move * this->nz >= this->startnsz_per[ip + 1]) ip++; - } + if (st_length2D[ixy] > 0) + { + this->istot2ixy[st_move] = ixy / fftny * ny + ixy % fftny; + this->fftixy2ip[ixy] = ip; + this->npw_per[ip] += st_length2D[ixy]; + st_move++; + if (ip < this->poolnproc - 1) + { + // all of sticks on current core are found, skip to next core + if (st_move * this->nz >= this->startnsz_per[ip + 1]) + { + ip++; + } + } + } } - } -} } +} // namespace ModulePW diff --git a/source/source_basis/module_pw/pw_distributer.cpp b/source/source_basis/module_pw/pw_distributer.cpp index cc2b14f7726..ac015bd5c0b 100644 --- a/source/source_basis/module_pw/pw_distributer.cpp +++ b/source/source_basis/module_pw/pw_distributer.cpp @@ -8,29 +8,38 @@ namespace ModulePW * @param in: nx, ny, nz, poolnproc, poolrank * @param out: nrxx, startz, numz */ -void PW_Basis::distribute_r() +void + PW_Basis::distribute_r () { - delete[] this->numz; this->numz = new int[this->poolnproc]; - delete[] this->startz; this->startz = new int[this->poolnproc]; - ModuleBase::GlobalFunc::ZEROS(this->numz, this->poolnproc); - ModuleBase::GlobalFunc::ZEROS(this->startz, this->poolnproc); + delete[] this->numz; + this->numz = new int[this->poolnproc]; + delete[] this->startz; + this->startz = new int[this->poolnproc]; + ModuleBase::GlobalFunc::ZEROS (this->numz, this->poolnproc); + ModuleBase::GlobalFunc::ZEROS (this->startz, this->poolnproc); int npz = this->nz / this->poolnproc; int modz = this->nz % this->poolnproc; this->startz[0] = 0; - for(int ip = 0 ; ip < this->poolnproc ; ++ip) - { - this->numz[ip] = npz; - if(ip < modz) this->numz[ip]++; - if(ip < this->poolnproc - 1) this->startz[ip+1] = this->startz[ip] + numz[ip]; - if(ip == this->poolrank) + for (int ip = 0; ip < this->poolnproc; ++ip) { - this->nplane = numz[ip]; - this->startz_current = startz[ip]; + this->numz[ip] = npz; + if (ip < modz) + { + this->numz[ip]++; + } + if (ip < this->poolnproc - 1) + { + this->startz[ip + 1] = this->startz[ip] + numz[ip]; + } + if (ip == this->poolrank) + { + this->nplane = numz[ip]; + this->startz_current = startz[ip]; + } } - } this->nrxx = this->numz[this->poolrank] * this->nxy; return; } -} +} // namespace ModulePW diff --git a/source/source_basis/module_pw/pw_gatherscatter.h b/source/source_basis/module_pw/pw_gatherscatter.h index 24e3671d4b3..efc5c8151bd 100644 --- a/source/source_basis/module_pw/pw_gatherscatter.h +++ b/source/source_basis/module_pw/pw_gatherscatter.h @@ -12,79 +12,88 @@ namespace ModulePW * @note in[] will be changed */ template -void PW_Basis::gatherp_scatters(std::complex* in, std::complex* out) const +void + PW_Basis::gatherp_scatters (std::complex* in, std::complex* out) const { - //ModuleBase::timer::start(this->classname, "gatherp_scatters"); - - if(this->poolnproc == 1) //In this case nst=nstot, nz = nplane, - { + // ModuleBase::timer::start(this->classname, "gatherp_scatters"); + + if (this->poolnproc == 1) // In this case nst=nstot, nz = nplane, + { #ifdef _OPENMP #pragma omp parallel for #endif - for(int is = 0 ; is < this->nst ; ++is) - { - int ixy = this->istot2ixy[is]; - //int ixy = (ixy / fftny)*ny + ixy % fftny; - std::complex *outp = &out[is*nz]; - std::complex *inp = &in[ixy*nz]; - for(int iz = 0 ; iz < this->nz ; ++iz) - { - outp[iz] = inp[iz]; - } + for (int is = 0; is < this->nst; ++is) + { + int ixy = this->istot2ixy[is]; + // int ixy = (ixy / fftny)*ny + ixy % fftny; + std::complex* outp = &out[is * nz]; + std::complex* inp = &in[ixy * nz]; + for (int iz = 0; iz < this->nz; ++iz) + { + outp[iz] = inp[iz]; + } + } + // ModuleBase::timer::end(this->classname, "gatherp_scatters"); + return; } - //ModuleBase::timer::end(this->classname, "gatherp_scatters"); - return; - } #ifdef __MPI - //change (nplane fftnxy) to (nplane,nstot) - // Hence, we can send them at one time. + // change (nplane fftnxy) to (nplane,nstot) + // Hence, we can send them at one time. #ifdef _OPENMP #pragma omp parallel for #endif - for (int istot = 0;istot < nstot; ++istot) - { - int ixy = this->istot2ixy[istot]; - //int ixy = (ixy / fftny)*ny + ixy % fftny; - std::complex *outp = &out[istot*nplane]; - std::complex *inp = &in[ixy*nplane]; - for (int iz = 0; iz < nplane; ++iz) - { - outp[iz] = inp[iz]; - } - } + for (int istot = 0; istot < nstot; ++istot) + { + int ixy = this->istot2ixy[istot]; + // int ixy = (ixy / fftny)*ny + ixy % fftny; + std::complex* outp = &out[istot * nplane]; + std::complex* inp = &in[ixy * nplane]; + for (int iz = 0; iz < nplane; ++iz) + { + outp[iz] = inp[iz]; + } + } - //exchange data + // exchange data //(nplane,nstot) to (numz[ip],ns, poolnproc) - if(typeid(T) == typeid(double)) - { - MPI_Alltoallv(out, numr, startr, MPI_DOUBLE_COMPLEX, in, numg, startg, MPI_DOUBLE_COMPLEX, this->pool_world); - } - else if(typeid(T) == typeid(float)) - { - MPI_Alltoallv(out, numr, startr, MPI_COMPLEX, in, numg, startg, MPI_COMPLEX, this->pool_world); - } + if (typeid (T) == typeid (double)) + { + MPI_Alltoallv (out, + numr, + startr, + MPI_DOUBLE_COMPLEX, + in, + numg, + startg, + MPI_DOUBLE_COMPLEX, + this->pool_world); + } + else if (typeid (T) == typeid (float)) + { + MPI_Alltoallv (out, numr, startr, MPI_COMPLEX, in, numg, startg, MPI_COMPLEX, this->pool_world); + } - // change (nz,ns) to (numz[ip],ns, poolnproc) + // change (nz,ns) to (numz[ip],ns, poolnproc) #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif - for (int ip = 0; ip < this->poolnproc ;++ip) - { - for (int is = 0; is < this->nst; ++is) - { - int nzip = this->numz[ip]; - std::complex *outp0 = &out[startz[ip]]; - std::complex *inp0 = &in[startg[ip]]; - std::complex *outp = &outp0[is * nz]; - std::complex *inp = &inp0[is * nzip ]; - for (int izip = 0; izip < nzip; ++izip) - { - outp[izip] = inp[izip]; - } - } - } + for (int ip = 0; ip < this->poolnproc; ++ip) + { + for (int is = 0; is < this->nst; ++is) + { + int nzip = this->numz[ip]; + std::complex* outp0 = &out[startz[ip]]; + std::complex* inp0 = &in[startg[ip]]; + std::complex* outp = &outp0[is * nz]; + std::complex* inp = &inp0[is * nzip]; + for (int izip = 0; izip < nzip; ++izip) + { + outp[izip] = inp[izip]; + } + } + } #endif - //ModuleBase::timer::start(this->classname, "gatherp_scatters"); + // ModuleBase::timer::start(this->classname, "gatherp_scatters"); return; } @@ -96,96 +105,103 @@ void PW_Basis::gatherp_scatters(std::complex* in, std::complex* out) const * @note in[] will be changed */ template -void PW_Basis::gathers_scatterp(std::complex* in, std::complex* out) const +void + PW_Basis::gathers_scatterp (std::complex* in, std::complex* out) const { // ModuleBase::timer::start(this->classname, "gathers_scatterp"); - if(this->poolnproc == 1) //In this case nrxx=fftnx*fftny*nz, nst = nstot, - { + if (this->poolnproc == 1) // In this case nrxx=fftnx*fftny*nz, nst = nstot, + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for(int i = 0; i < this->nrxx; ++i) - { - out[i] = std::complex(0, 0); - } + for (int i = 0; i < this->nrxx; ++i) + { + out[i] = std::complex (0, 0); + } #ifdef _OPENMP #pragma omp parallel for #endif - for(int is = 0 ; is < this->nst ; ++is) - { - int ixy = istot2ixy[is]; - //int ixy = (ixy / fftny)*ny + ixy % fftny; - std::complex *outp = &out[ixy*nz]; - std::complex *inp = &in[is*nz]; - for(int iz = 0 ; iz < this->nz ; ++iz) - { - outp[iz] = inp[iz]; - } + for (int is = 0; is < this->nst; ++is) + { + int ixy = istot2ixy[is]; + // int ixy = (ixy / fftny)*ny + ixy % fftny; + std::complex* outp = &out[ixy * nz]; + std::complex* inp = &in[is * nz]; + for (int iz = 0; iz < this->nz; ++iz) + { + outp[iz] = inp[iz]; + } + } + // ModuleBase::timer::end(this->classname, "gathers_scatterp"); + return; } - // ModuleBase::timer::end(this->classname, "gathers_scatterp"); - return; - } #ifdef __MPI - // change (nz,ns) to (numz[ip],ns, poolnproc) - // Hence, we can send them at one time. + // change (nz,ns) to (numz[ip],ns, poolnproc) + // Hence, we can send them at one time. #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif - for (int ip = 0; ip < this->poolnproc ;++ip) - { - for (int is = 0; is < this->nst; ++is) - { - int nzip = this->numz[ip]; - std::complex *outp0 = &out[startg[ip]]; - std::complex *inp0 = &in[startz[ip]]; - std::complex *outp = &outp0[is * nzip]; - std::complex *inp = &inp0[is * nz ]; - for (int izip = 0; izip < nzip; ++izip) - { - outp[izip] = inp[izip]; - } - } - } + for (int ip = 0; ip < this->poolnproc; ++ip) + { + for (int is = 0; is < this->nst; ++is) + { + int nzip = this->numz[ip]; + std::complex* outp0 = &out[startg[ip]]; + std::complex* inp0 = &in[startz[ip]]; + std::complex* outp = &outp0[is * nzip]; + std::complex* inp = &inp0[is * nz]; + for (int izip = 0; izip < nzip; ++izip) + { + outp[izip] = inp[izip]; + } + } + } - //exchange data + // exchange data //(numz[ip],ns, poolnproc) to (nplane,nstot) - if(typeid(T) == typeid(double)) - { - MPI_Alltoallv(out, numg, startg, MPI_DOUBLE_COMPLEX, in, numr, startr, MPI_DOUBLE_COMPLEX, this->pool_world); - } - else if(typeid(T) == typeid(float)) - { - MPI_Alltoallv(out, numg, startg, MPI_COMPLEX, in, numr, startr, MPI_COMPLEX, this->pool_world); - } + if (typeid (T) == typeid (double)) + { + MPI_Alltoallv (out, + numg, + startg, + MPI_DOUBLE_COMPLEX, + in, + numr, + startr, + MPI_DOUBLE_COMPLEX, + this->pool_world); + } + else if (typeid (T) == typeid (float)) + { + MPI_Alltoallv (out, numg, startg, MPI_COMPLEX, in, numr, startr, MPI_COMPLEX, this->pool_world); + } #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for(int i = 0; i < this->nrxx; ++i) - { - out[i] = std::complex(0, 0); - } - //change (nplane,nstot) to (nplane fftnxy) + for (int i = 0; i < this->nrxx; ++i) + { + out[i] = std::complex (0, 0); + } + // change (nplane,nstot) to (nplane fftnxy) #ifdef _OPENMP #pragma omp parallel for #endif - for (int istot = 0;istot < nstot; ++istot) - { - int ixy = this->istot2ixy[istot]; - //int ixy = (ixy / fftny)*ny + ixy % fftny; - std::complex *outp = &out[ixy * nplane]; - std::complex *inp = &in[istot * nplane]; - for (int iz = 0; iz < nplane; ++iz) - { - outp[iz] = inp[iz]; - } - } + for (int istot = 0; istot < nstot; ++istot) + { + int ixy = this->istot2ixy[istot]; + // int ixy = (ixy / fftny)*ny + ixy % fftny; + std::complex* outp = &out[ixy * nplane]; + std::complex* inp = &in[istot * nplane]; + for (int iz = 0; iz < nplane; ++iz) + { + outp[iz] = inp[iz]; + } + } #endif // ModuleBase::timer::start(this->classname, "gathers_scatterp"); return; } - - -} +} // namespace ModulePW diff --git a/source/source_basis/module_pw/pw_init.cpp b/source/source_basis/module_pw/pw_init.cpp index 08c676d39f3..ebfa285b4d4 100644 --- a/source/source_basis/module_pw/pw_init.cpp +++ b/source/source_basis/module_pw/pw_init.cpp @@ -4,203 +4,221 @@ namespace ModulePW { #ifdef __MPI -void PW_Basis:: initmpi( - const int poolnproc_in, - const int poolrank_in, - MPI_Comm pool_world_in -) +void + PW_Basis::initmpi (const int poolnproc_in, const int poolrank_in, MPI_Comm pool_world_in) { - this->poolnproc = poolnproc_in; - this->poolrank = poolrank_in; - this->pool_world = pool_world_in; + this->poolnproc = poolnproc_in; + this->poolrank = poolrank_in; + this->pool_world = pool_world_in; } #endif -/// +/// /// Init the grids for FFT /// Input: lattice vectors of the cell, Energy cut off for G^2/2 /// Output: fftnx, fftny, fftnz, fftnxyz, latvec, G, GT, GGT -/// -void PW_Basis:: initgrids( - const double lat0_in, //unit length (unit in bohr) - const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors - const double gridecut -) +/// +void + PW_Basis::initgrids (const double lat0_in, // unit length (unit in bohr) + const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors + const double gridecut) { - //init lattice + // init lattice this->lat0 = lat0_in; this->tpiba = ModuleBase::TWO_PI / this->lat0; - this->tpiba2 = this->tpiba*this->tpiba; + this->tpiba2 = this->tpiba * this->tpiba; this->latvec = latvec_in; - this->omega = std::abs(latvec.Det()) * lat0 * lat0 * lat0; - this->GT = latvec.Inverse(); - this->G = GT.Transpose(); - this->GGT = G * GT; + this->omega = std::abs (latvec.Det ()) * lat0 * lat0 * lat0; + this->GT = latvec.Inverse (); + this->G = GT.Transpose (); + this->GGT = G * GT; //------------------------------------------------------------ //-------------------------init grids------------------------- //----------------------------------------------------------- this->gridecut_lat = gridecut / this->tpiba2; ModuleBase::Vector3 lat; - int *ibox = new int[3];// ibox[i] are the minimal FFT dimensions, - + int* ibox = new int[3]; // ibox[i] are the minimal FFT dimensions, + lat.x = latvec.e11; lat.y = latvec.e12; lat.z = latvec.e13; - ibox[0] = int(sqrt(this->gridecut_lat) * sqrt(lat * lat)) + 1; + ibox[0] = int (sqrt (this->gridecut_lat) * sqrt (lat * lat)) + 1; lat.x = latvec.e21; lat.y = latvec.e22; lat.z = latvec.e23; - ibox[1] = int(sqrt(this->gridecut_lat) * sqrt(lat * lat)) + 1; + ibox[1] = int (sqrt (this->gridecut_lat) * sqrt (lat * lat)) + 1; lat.x = latvec.e31; lat.y = latvec.e32; lat.z = latvec.e33; - ibox[2] = int(sqrt(this->gridecut_lat) * sqrt(lat * lat)) + 1; - - // We should check if ibox is the minimum number to cover the planewave ball. + ibox[2] = int (sqrt (this->gridecut_lat) * sqrt (lat * lat)) + 1; + + // We should check if ibox is the minimum number to cover the planewave ball. // Find the minimum number of ibox by traveling all possible ibox - int n1,n2,n3; + int n1, n2, n3; n1 = n2 = n3 = 0; - for(int igz = -ibox[2]+this->poolrank; igz <= ibox[2]; igz += this->poolnproc) - { - for(int igy = -ibox[1]; igy <= ibox[1]; ++igy) + for (int igz = -ibox[2] + this->poolrank; igz <= ibox[2]; igz += this->poolnproc) { - for(int igx = -ibox[0]; igx <= ibox[0]; ++igx) - { - ModuleBase::Vector3 f; - f.x = igx; - f.y = igy; - f.z = igz; - double modulus = f * (this->GGT * f); - if(modulus <= this->gridecut_lat) + for (int igy = -ibox[1]; igy <= ibox[1]; ++igy) { - if(n1 < std::abs(igx)) n1 = std::abs(igx); - if(n2 < std::abs(igy)) n2 = std::abs(igy); - if(n3 < std::abs(igz)) n3 = std::abs(igz); + for (int igx = -ibox[0]; igx <= ibox[0]; ++igx) + { + ModuleBase::Vector3 f; + f.x = igx; + f.y = igy; + f.z = igz; + double modulus = f * (this->GGT * f); + if (modulus <= this->gridecut_lat) + { + if (n1 < std::abs (igx)) + { + n1 = std::abs (igx); + } + if (n2 < std::abs (igy)) + { + n2 = std::abs (igy); + } + if (n3 < std::abs (igz)) + { + n3 = std::abs (igz); + } + } + } } - } } - } - ibox[0] = 2*n1+1; - ibox[1] = 2*n2+1; - ibox[2] = 2*n3+1; + ibox[0] = 2 * n1 + 1; + ibox[1] = 2 * n2 + 1; + ibox[2] = 2 * n3 + 1; #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, ibox, 3, MPI_INT, MPI_MAX , this->pool_world); + MPI_Allreduce (MPI_IN_PLACE, ibox, 3, MPI_INT, MPI_MAX, this->pool_world); #endif // Find the minimal FFT box size the factors into the primes (2,3,5,7). for (int i = 0; i < 3; i++) - { - int b = 0; - int n2 = 0; - int n3 = 0; - int n5 = 0; - //int n7 = 0; - bool done_factoring = false; - - // increase ibox[i] by 1 until it is totally factorizable by (2,3,5,7) - do { - b = ibox[i]; - - //n2 = n3 = n5 = n7 = 0; - n2 = n3 = n5 = 0; - done_factoring = false; - if ((this->full_pw && this->full_pw_dim == 2) && b % 2 != 0) done_factoring = true; // full_pw_dim = 2 means FFT dimensions should be even. - while (!done_factoring) - { - if (b % 2 == 0 && (!this->full_pw || this->full_pw_dim != 1)) // full_pw_dim = 1 means FFT dimension should be odd. - { - n2++; - b /= 2; - continue; - } - if (b % 3 == 0) - { - n3++; - b /= 3; - continue; - } - if (b % 5 == 0) - { - n5++; - b /= 5; - continue; - } - //if (b%7==0) { n7++; b /= 7; continue; } - done_factoring = true; - } - ibox[i] += 1; + int b = 0; + int n2 = 0; + int n3 = 0; + int n5 = 0; + // int n7 = 0; + bool done_factoring = false; + + // increase ibox[i] by 1 until it is totally factorizable by (2,3,5,7) + do + { + b = ibox[i]; + + // n2 = n3 = n5 = n7 = 0; + n2 = n3 = n5 = 0; + done_factoring = false; + if ((this->full_pw && this->full_pw_dim == 2) && b % 2 != 0) + { + done_factoring = true; // full_pw_dim = 2 means FFT dimensions should be even. + } + while (!done_factoring) + { + if (b % 2 == 0 + && (!this->full_pw + || this->full_pw_dim != 1)) // full_pw_dim = 1 means FFT dimension should be odd. + { + n2++; + b /= 2; + continue; + } + if (b % 3 == 0) + { + n3++; + b /= 3; + continue; + } + if (b % 5 == 0) + { + n5++; + b /= 5; + continue; + } + // if (b%7==0) { n7++; b /= 7; continue; } + done_factoring = true; + } + ibox[i] += 1; + } + while (b != 1); + ibox[i] -= 1; + // b==1 means fftbox[i] is (2,3,5,7) factorizable } - while (b != 1); - ibox[i] -= 1; - // b==1 means fftbox[i] is (2,3,5,7) factorizable - } this->nx = ibox[0]; this->ny = ibox[1]; this->nz = ibox[2]; - this->nxy =this->nx * this->ny; + this->nxy = this->nx * this->ny; this->nxyz = this->nxy * this->nz; - delete[] ibox; + delete[] ibox; return; } -/// +/// /// Init the grids for FFT /// Input: lattice vectors of the cell, nx, ny, nz /// Output: nx, ny, nz, nxyz, latvec, G, GT, GGT -/// -void PW_Basis:: initgrids( - const double lat0_in, - const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors - const int nx_in, int ny_in, int nz_in -) +/// +void + PW_Basis::initgrids (const double lat0_in, + const ModuleBase::Matrix3 latvec_in, // Unitcell lattice vectors + const int nx_in, + int ny_in, + int nz_in) { this->lat0 = lat0_in; this->tpiba = ModuleBase::TWO_PI / this->lat0; - this->tpiba2 = this->tpiba*this->tpiba; + this->tpiba2 = this->tpiba * this->tpiba; this->latvec = latvec_in; - this->omega = std::abs(latvec.Det()) * lat0 * lat0 * lat0; - this->GT = latvec.Inverse(); - this->G = GT.Transpose(); - this->GGT = G * GT; + this->omega = std::abs (latvec.Det ()) * lat0 * lat0 * lat0; + this->GT = latvec.Inverse (); + this->G = GT.Transpose (); + this->GGT = G * GT; this->nx = nx_in; this->ny = ny_in; this->nz = nz_in; this->nxy = this->nx * this->ny; this->nxyz = this->nxy * this->nz; - int *ibox = new int[3]; - ibox[0] = int((this->nx-1)/2)+1; - ibox[1] = int((this->ny-1)/2)+1; - ibox[2] = int((this->nz-1)/2)+1; + int* ibox = new int[3]; + ibox[0] = int ((this->nx - 1) / 2) + 1; + ibox[1] = int ((this->ny - 1) / 2) + 1; + ibox[2] = int ((this->nz - 1) / 2) + 1; this->gridecut_lat = 1e20; int count = 0; - for(int igz = -ibox[2]; igz <= ibox[2]; ++igz) - { - for(int igy = -ibox[1]; igy <= ibox[1]; ++igy) + for (int igz = -ibox[2]; igz <= ibox[2]; ++igz) { - for(int igx = -ibox[0]; igx <= ibox[0]; ++igx) - { - ++count; - if(count%this->poolnproc != this->poolrank) continue; - if(std::abs(igx)<=ibox[0]-1 && std::abs(igy)<=ibox[1]-1 && std::abs(igz)<=ibox[2]-1 ) continue; - ModuleBase::Vector3 f; - f.x = igx; - f.y = igy; - f.z = igz; - double modulus = f * (this->GGT * f); - if(modulus < this->gridecut_lat) + for (int igy = -ibox[1]; igy <= ibox[1]; ++igy) { - this->gridecut_lat = modulus; + for (int igx = -ibox[0]; igx <= ibox[0]; ++igx) + { + ++count; + if (count % this->poolnproc != this->poolrank) + { + continue; + } + if (std::abs (igx) <= ibox[0] - 1 && std::abs (igy) <= ibox[1] - 1 + && std::abs (igz) <= ibox[2] - 1) + { + continue; + } + ModuleBase::Vector3 f; + f.x = igx; + f.y = igy; + f.z = igz; + double modulus = f * (this->GGT * f); + if (modulus < this->gridecut_lat) + { + this->gridecut_lat = modulus; + } + } } - } } - } #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, &this->gridecut_lat, 1, MPI_DOUBLE, MPI_MIN , this->pool_world); + MPI_Allreduce (MPI_IN_PLACE, &this->gridecut_lat, 1, MPI_DOUBLE, MPI_MIN, this->pool_world); #endif this->gridecut_lat -= 1e-6; @@ -208,14 +226,12 @@ void PW_Basis:: initgrids( return; } - -//Init some parameters -void PW_Basis:: initparameters( - const bool gamma_only_in, - const double pwecut_in, - const int distribution_type_in, - const bool xprime_in -) +// Init some parameters +void + PW_Basis::initparameters (const bool gamma_only_in, + const double pwecut_in, + const int distribution_type_in, + const bool xprime_in) { this->xprime = xprime_in; this->gamma_only = gamma_only_in; @@ -223,33 +239,40 @@ void PW_Basis:: initparameters( // we have F(-k) = F(k)*, so that only half of planewaves are needed. this->fftny = this->ny; this->fftnx = this->nx; - if (this->gamma_only) - { - if(this->xprime) this->fftnx = int(this->nx / 2) + 1; - else this->fftny = int(this->ny / 2) + 1; - } - + if (this->gamma_only) + { + if (this->xprime) + { + this->fftnx = int (this->nx / 2) + 1; + } + else + { + this->fftny = int (this->ny / 2) + 1; + } + } + this->fftnz = this->nz; this->fftnxy = this->fftnx * this->fftny; this->fftnxyz = this->fftnxy * this->fftnz; this->ggecut = pwecut_in / this->tpiba2; - //ggecut should be no larger than gridecut - if(this->ggecut > this->gridecut_lat) - { - this->ggecut = this->gridecut_lat; - } + // ggecut should be no larger than gridecut + if (this->ggecut > this->gridecut_lat) + { + this->ggecut = this->gridecut_lat; + } this->distribution_type = distribution_type_in; } // Set parameters about full planewave, used only in OFDFT for now. sunliang added 2022-08-30 -void PW_Basis::setfullpw( - const bool inpt_full_pw, - const int inpt_full_pw_dim -) +void + PW_Basis::setfullpw (const bool inpt_full_pw, const int inpt_full_pw_dim) { this->full_pw = inpt_full_pw; this->full_pw_dim = inpt_full_pw_dim; - if (!this->full_pw) this->full_pw_dim = 0; + if (!this->full_pw) + { + this->full_pw_dim = 0; + } } -} \ No newline at end of file +} // namespace ModulePW \ No newline at end of file diff --git a/source/source_basis/module_pw/pw_transform.cpp b/source/source_basis/module_pw/pw_transform.cpp index 2195517d49d..8c4a5b59a13 100644 --- a/source/source_basis/module_pw/pw_transform.cpp +++ b/source/source_basis/module_pw/pw_transform.cpp @@ -22,50 +22,51 @@ namespace ModulePW * @param out: (nz, ns), std::complex data */ template -void PW_Basis::real2recip(const std::complex* in, +void + PW_Basis::real2recip (const std::complex* in, std::complex* out, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "real2recip"); + ModuleBase::timer::start (this->classname, "real2recip"); - assert(this->gamma_only == false); + assert (this->gamma_only == false); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int ir = 0; ir < this->nrxx; ++ir) - { - this->fft_bundle.get_auxr_data()[ir] = in[ir]; - } - this->fft_bundle.fftxyfor(fft_bundle.get_auxr_data(), fft_bundle.get_auxr_data()); + { + this->fft_bundle.get_auxr_data ()[ir] = in[ir]; + } + this->fft_bundle.fftxyfor (fft_bundle.get_auxr_data (), fft_bundle.get_auxr_data ()); - this->gatherp_scatters(this->fft_bundle.get_auxr_data(), this->fft_bundle.get_auxg_data()); + this->gatherp_scatters (this->fft_bundle.get_auxr_data (), this->fft_bundle.get_auxg_data ()); - this->fft_bundle.fftzfor(fft_bundle.get_auxg_data(), fft_bundle.get_auxg_data()); + this->fft_bundle.fftzfor (fft_bundle.get_auxg_data (), fft_bundle.get_auxg_data ()); if (add) - { - FPTYPE tmpfac = factor / FPTYPE(this->nxyz); + { + FPTYPE tmpfac = factor / FPTYPE (this->nxyz); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < this->npw; ++ig) - { - out[ig] += tmpfac * this->fft_bundle.get_auxg_data()[this->ig2isz[ig]]; + for (int ig = 0; ig < this->npw; ++ig) + { + out[ig] += tmpfac * this->fft_bundle.get_auxg_data ()[this->ig2isz[ig]]; + } } - } else - { - FPTYPE tmpfac = 1.0 / FPTYPE(this->nxyz); + { + FPTYPE tmpfac = 1.0 / FPTYPE (this->nxyz); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < this->npw; ++ig) - { - out[ig] = tmpfac * this->fft_bundle.get_auxg_data()[this->ig2isz[ig]]; + for (int ig = 0; ig < this->npw; ++ig) + { + out[ig] = tmpfac * this->fft_bundle.get_auxg_data ()[this->ig2isz[ig]]; + } } - } - ModuleBase::timer::end(this->classname, "real2recip"); + ModuleBase::timer::end (this->classname, "real2recip"); } /** @@ -76,63 +77,64 @@ void PW_Basis::real2recip(const std::complex* in, * @param out: (nz, ns), std::complex data */ template -void PW_Basis::real2recip(const FPTYPE* in, std::complex* out, const bool add, const FPTYPE factor) const +void + PW_Basis::real2recip (const FPTYPE* in, std::complex* out, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "real2recip"); + ModuleBase::timer::start (this->classname, "real2recip"); if (this->gamma_only) - { - const int npy = this->ny * this->nplane; + { + const int npy = this->ny * this->nplane; #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif - for (int ix = 0; ix < this->nx; ++ix) - { - for (int ipy = 0; ipy < npy; ++ipy) - { - this->fft_bundle.get_rspace_data()[ix * npy + ipy] = in[ix * npy + ipy]; - } - } + for (int ix = 0; ix < this->nx; ++ix) + { + for (int ipy = 0; ipy < npy; ++ipy) + { + this->fft_bundle.get_rspace_data ()[ix * npy + ipy] = in[ix * npy + ipy]; + } + } - this->fft_bundle.fftxyr2c(fft_bundle.get_rspace_data(), fft_bundle.get_auxr_data()); - } + this->fft_bundle.fftxyr2c (fft_bundle.get_rspace_data (), fft_bundle.get_auxr_data ()); + } else - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ir = 0; ir < this->nrxx; ++ir) - { - this->fft_bundle.get_auxr_data()[ir] = std::complex(in[ir], 0); + for (int ir = 0; ir < this->nrxx; ++ir) + { + this->fft_bundle.get_auxr_data ()[ir] = std::complex (in[ir], 0); + } + this->fft_bundle.fftxyfor (fft_bundle.get_auxr_data (), fft_bundle.get_auxr_data ()); } - this->fft_bundle.fftxyfor(fft_bundle.get_auxr_data(), fft_bundle.get_auxr_data()); - } - this->gatherp_scatters(this->fft_bundle.get_auxr_data(), this->fft_bundle.get_auxg_data()); + this->gatherp_scatters (this->fft_bundle.get_auxr_data (), this->fft_bundle.get_auxg_data ()); - this->fft_bundle.fftzfor(fft_bundle.get_auxg_data(), fft_bundle.get_auxg_data()); + this->fft_bundle.fftzfor (fft_bundle.get_auxg_data (), fft_bundle.get_auxg_data ()); if (add) - { - FPTYPE tmpfac = factor / FPTYPE(this->nxyz); + { + FPTYPE tmpfac = factor / FPTYPE (this->nxyz); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < this->npw; ++ig) - { - out[ig] += tmpfac * this->fft_bundle.get_auxg_data()[this->ig2isz[ig]]; + for (int ig = 0; ig < this->npw; ++ig) + { + out[ig] += tmpfac * this->fft_bundle.get_auxg_data ()[this->ig2isz[ig]]; + } } - } else - { - FPTYPE tmpfac = 1.0 / FPTYPE(this->nxyz); + { + FPTYPE tmpfac = 1.0 / FPTYPE (this->nxyz); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < this->npw; ++ig) - { - out[ig] = tmpfac * this->fft_bundle.get_auxg_data()[this->ig2isz[ig]]; + for (int ig = 0; ig < this->npw; ++ig) + { + out[ig] = tmpfac * this->fft_bundle.get_auxg_data ()[this->ig2isz[ig]]; + } } - } - ModuleBase::timer::end(this->classname, "real2recip"); + ModuleBase::timer::end (this->classname, "real2recip"); } /** @@ -143,55 +145,56 @@ void PW_Basis::real2recip(const FPTYPE* in, std::complex* out, const boo * @param out: (nplane, ny, nx), std::complex */ template -void PW_Basis::recip2real(const std::complex* in, +void + PW_Basis::recip2real (const std::complex* in, std::complex* out, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "recip2real"); - assert(this->gamma_only == false); + ModuleBase::timer::start (this->classname, "recip2real"); + assert (this->gamma_only == false); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < this->nst * this->nz; ++i) - { - fft_bundle.get_auxg_data()[i] = std::complex(0, 0); - } + { + fft_bundle.get_auxg_data ()[i] = std::complex (0, 0); + } #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int ig = 0; ig < this->npw; ++ig) - { - this->fft_bundle.get_auxg_data()[this->ig2isz[ig]] = in[ig]; - } - this->fft_bundle.fftzbac(fft_bundle.get_auxg_data(), fft_bundle.get_auxg_data()); + { + this->fft_bundle.get_auxg_data ()[this->ig2isz[ig]] = in[ig]; + } + this->fft_bundle.fftzbac (fft_bundle.get_auxg_data (), fft_bundle.get_auxg_data ()); - this->gathers_scatterp(this->fft_bundle.get_auxg_data(), this->fft_bundle.get_auxr_data()); + this->gathers_scatterp (this->fft_bundle.get_auxg_data (), this->fft_bundle.get_auxr_data ()); - this->fft_bundle.fftxybac(fft_bundle.get_auxr_data(), fft_bundle.get_auxr_data()); + this->fft_bundle.fftxybac (fft_bundle.get_auxr_data (), fft_bundle.get_auxr_data ()); if (add) - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ir = 0; ir < this->nrxx; ++ir) - { - out[ir] += factor * this->fft_bundle.get_auxr_data()[ir]; + for (int ir = 0; ir < this->nrxx; ++ir) + { + out[ir] += factor * this->fft_bundle.get_auxr_data ()[ir]; + } } - } else - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ir = 0; ir < this->nrxx; ++ir) - { - out[ir] = this->fft_bundle.get_auxr_data()[ir]; + for (int ir = 0; ir < this->nrxx; ++ir) + { + out[ir] = this->fft_bundle.get_auxr_data ()[ir]; + } } - } - ModuleBase::timer::end(this->classname, "recip2real"); + ModuleBase::timer::end (this->classname, "recip2real"); } /** @@ -202,119 +205,121 @@ void PW_Basis::recip2real(const std::complex* in, * @param out: (nplane, ny, nx), double */ template -void PW_Basis::recip2real(const std::complex* in, FPTYPE* out, const bool add, const FPTYPE factor) const +void + PW_Basis::recip2real (const std::complex* in, FPTYPE* out, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "recip2real"); + ModuleBase::timer::start (this->classname, "recip2real"); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int i = 0; i < this->nst * this->nz; ++i) - { - fft_bundle.get_auxg_data()[i] = std::complex(0, 0); - } + { + fft_bundle.get_auxg_data ()[i] = std::complex (0, 0); + } #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int ig = 0; ig < this->npw; ++ig) - { - this->fft_bundle.get_auxg_data()[this->ig2isz[ig]] = in[ig]; - } - this->fft_bundle.fftzbac(fft_bundle.get_auxg_data(), fft_bundle.get_auxg_data()); + { + this->fft_bundle.get_auxg_data ()[this->ig2isz[ig]] = in[ig]; + } + this->fft_bundle.fftzbac (fft_bundle.get_auxg_data (), fft_bundle.get_auxg_data ()); - this->gathers_scatterp(this->fft_bundle.get_auxg_data(), this->fft_bundle.get_auxr_data()); + this->gathers_scatterp (this->fft_bundle.get_auxg_data (), this->fft_bundle.get_auxr_data ()); if (this->gamma_only) - { - this->fft_bundle.fftxyc2r(fft_bundle.get_auxr_data(), fft_bundle.get_rspace_data()); + { + this->fft_bundle.fftxyc2r (fft_bundle.get_auxr_data (), fft_bundle.get_rspace_data ()); - // r2c in place - const int npy = this->ny * this->nplane; + // r2c in place + const int npy = this->ny * this->nplane; - if (add) - { + if (add) + { #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif - for (int ix = 0; ix < this->nx; ++ix) - { - for (int ipy = 0; ipy < npy; ++ipy) - { - out[ix * npy + ipy] += factor * this->fft_bundle.get_rspace_data()[ix * npy + ipy]; + for (int ix = 0; ix < this->nx; ++ix) + { + for (int ipy = 0; ipy < npy; ++ipy) + { + out[ix * npy + ipy] + += factor * this->fft_bundle.get_rspace_data ()[ix * npy + ipy]; + } + } } - } - } - else - { + else + { #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif - for (int ix = 0; ix < this->nx; ++ix) - { - for (int ipy = 0; ipy < npy; ++ipy) - { - out[ix * npy + ipy] = this->fft_bundle.get_rspace_data()[ix * npy + ipy]; + for (int ix = 0; ix < this->nx; ++ix) + { + for (int ipy = 0; ipy < npy; ++ipy) + { + out[ix * npy + ipy] = this->fft_bundle.get_rspace_data ()[ix * npy + ipy]; + } + } } - } } - } else - { - this->fft_bundle.fftxybac(fft_bundle.get_auxr_data(), fft_bundle.get_auxr_data()); - if (add) { + this->fft_bundle.fftxybac (fft_bundle.get_auxr_data (), fft_bundle.get_auxr_data ()); + if (add) + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ir = 0; ir < this->nrxx; ++ir) - { - out[ir] += factor * this->fft_bundle.get_auxr_data()[ir].real(); - } - } - else - { + for (int ir = 0; ir < this->nrxx; ++ir) + { + out[ir] += factor * this->fft_bundle.get_auxr_data ()[ir].real (); + } + } + else + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ir = 0; ir < this->nrxx; ++ir) - { - out[ir] = this->fft_bundle.get_auxr_data()[ir].real(); - } + for (int ir = 0; ir < this->nrxx; ++ir) + { + out[ir] = this->fft_bundle.get_auxr_data ()[ir].real (); + } + } } - } - ModuleBase::timer::end(this->classname, "recip2real"); + ModuleBase::timer::end (this->classname, "recip2real"); } -template void PW_Basis::real2recip(const float* in, - std::complex* out, - const bool add, - const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis::real2recip(const std::complex* in, - std::complex* out, - const bool add, - const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis::recip2real(const std::complex* in, - float* out, - const bool add, - const float factor) const; // in:(nz, ns) ; out(nplane,nx*ny) -template void PW_Basis::recip2real(const std::complex* in, - std::complex* out, - const bool add, - const float factor) const; - -template void PW_Basis::real2recip(const double* in, - std::complex* out, +template void PW_Basis::real2recip (const float* in, + std::complex* out, const bool add, - const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis::real2recip(const std::complex* in, - std::complex* out, + const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis::real2recip (const std::complex* in, + std::complex* out, const bool add, - const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis::recip2real(const std::complex* in, - double* out, + const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis::recip2real (const std::complex* in, + float* out, const bool add, - const double factor) const; // in:(nz, ns) ; out(nplane,nx*ny) -template void PW_Basis::recip2real(const std::complex* in, - std::complex* out, + const float factor) const; // in:(nz, ns) ; out(nplane,nx*ny) +template void PW_Basis::recip2real (const std::complex* in, + std::complex* out, const bool add, - const double factor) const; + const float factor) const; + +template void PW_Basis::real2recip (const double* in, + std::complex* out, + const bool add, + const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis::real2recip (const std::complex* in, + std::complex* out, + const bool add, + const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis::recip2real (const std::complex* in, + double* out, + const bool add, + const double factor) const; // in:(nz, ns) ; out(nplane,nx*ny) +template void PW_Basis::recip2real (const std::complex* in, + std::complex* out, + const bool add, + const double factor) const; } // namespace ModulePW \ No newline at end of file diff --git a/source/source_basis/module_pw/pw_transform_gpu.cpp b/source/source_basis/module_pw/pw_transform_gpu.cpp index 0be22b4244b..365804746e8 100644 --- a/source/source_basis/module_pw/pw_transform_gpu.cpp +++ b/source/source_basis/module_pw/pw_transform_gpu.cpp @@ -5,143 +5,148 @@ namespace ModulePW { #if (defined(__CUDA) || defined(__ROCM)) template -void PW_Basis::real2recip_gpu(const FPTYPE* in, std::complex* out, const bool add, const FPTYPE factor) const +void + PW_Basis::real2recip_gpu (const FPTYPE* in, std::complex* out, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "real_to_recip gpu"); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "real_to_recip gpu"); + assert (this->poolnproc == 1); const size_t size = this->nrxx; - base_device::memory::cast_memory_op, FPTYPE,base_device::DEVICE_GPU, base_device::DEVICE_GPU>()( - this->fft_bundle.get_auxr_3d_data(), - in, - size); + base_device::memory:: + cast_memory_op, FPTYPE, base_device::DEVICE_GPU, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), + in, + size); - this->fft_bundle.fft3D_forward(this->fft_bundle.get_auxr_3d_data(), - this->fft_bundle.get_auxr_3d_data()); + this->fft_bundle.fft3D_forward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); - set_real_to_recip_output_op()(npw, - this->nxyz, - add, - factor, - this->ig2ixyz_gpu, - this->fft_bundle.get_auxr_3d_data(), - out); - ModuleBase::timer::end(this->classname, "real_to_recip gpu"); + set_real_to_recip_output_op () (npw, + this->nxyz, + add, + factor, + this->ig2ixyz_gpu, + this->fft_bundle.get_auxr_3d_data (), + out); + ModuleBase::timer::end (this->classname, "real_to_recip gpu"); } template -void PW_Basis::real2recip_gpu(const std::complex* in, +void + PW_Basis::real2recip_gpu (const std::complex* in, std::complex* out, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "real_to_recip gpu"); - assert(this->poolnproc == 1); - base_device::memory::synchronize_memory_op, - base_device::DEVICE_GPU, - base_device::DEVICE_GPU>()(this->fft_bundle.get_auxr_3d_data(), - in, - this->nrxx); - this->fft_bundle.fft3D_forward(this->fft_bundle.get_auxr_3d_data(), - this->fft_bundle.get_auxr_3d_data()); + ModuleBase::timer::start (this->classname, "real_to_recip gpu"); + assert (this->poolnproc == 1); + base_device::memory:: + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), + in, + this->nrxx); + this->fft_bundle.fft3D_forward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); - set_real_to_recip_output_op()(npw, - this->nxyz, - add, - factor, - this->ig2ixyz_gpu, - this->fft_bundle.get_auxr_3d_data(), - out); - ModuleBase::timer::end(this->classname, "real_to_recip gpu"); + set_real_to_recip_output_op () (npw, + this->nxyz, + add, + factor, + this->ig2ixyz_gpu, + this->fft_bundle.get_auxr_3d_data (), + out); + ModuleBase::timer::end (this->classname, "real_to_recip gpu"); } template -void PW_Basis::recip2real_gpu(const std::complex* in, FPTYPE* out, const bool add, const FPTYPE factor) const +void + PW_Basis::recip2real_gpu (const std::complex* in, FPTYPE* out, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "recip_to_real gpu"); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "recip_to_real gpu"); + assert (this->poolnproc == 1); // ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxr_3d_data(), this->nxyz); - base_device::memory::set_memory_op, base_device::DEVICE_GPU>()( - this->fft_bundle.get_auxr_3d_data(), + base_device::memory::set_memory_op, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), 0, this->nxyz); - set_3d_fft_box_op()(npw, - this->ig2ixyz_gpu, - in, - this->fft_bundle.get_auxr_3d_data()); - this->fft_bundle.fft3D_backward(this->fft_bundle.get_auxr_3d_data(), - this->fft_bundle.get_auxr_3d_data()); + set_3d_fft_box_op () (npw, + this->ig2ixyz_gpu, + in, + this->fft_bundle.get_auxr_3d_data ()); + this->fft_bundle.fft3D_backward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); - set_recip_to_real_output_op()(this->nrxx, - add, - factor, - this->fft_bundle.get_auxr_3d_data(), - out); + set_recip_to_real_output_op () (this->nrxx, + add, + factor, + this->fft_bundle.get_auxr_3d_data (), + out); - ModuleBase::timer::end(this->classname, "recip_to_real gpu"); + ModuleBase::timer::end (this->classname, "recip_to_real gpu"); } template -void PW_Basis::recip2real_gpu(const std::complex* in, +void + PW_Basis::recip2real_gpu (const std::complex* in, std::complex* out, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "recip_to_real gpu"); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "recip_to_real gpu"); + assert (this->poolnproc == 1); // ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxr_3d_data(), this->nxyz); - base_device::memory::set_memory_op, base_device::DEVICE_GPU>()( - this->fft_bundle.get_auxr_3d_data(), + base_device::memory::set_memory_op, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), 0, this->nxyz); - set_3d_fft_box_op()(npw, - this->ig2ixyz_gpu, - in, - this->fft_bundle.get_auxr_3d_data()); - this->fft_bundle.fft3D_backward(this->fft_bundle.get_auxr_3d_data(), - this->fft_bundle.get_auxr_3d_data()); + set_3d_fft_box_op () (npw, + this->ig2ixyz_gpu, + in, + this->fft_bundle.get_auxr_3d_data ()); + this->fft_bundle.fft3D_backward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); - set_recip_to_real_output_op()(this->nrxx, - add, - factor, - this->fft_bundle.get_auxr_3d_data(), - out); + set_recip_to_real_output_op () (this->nrxx, + add, + factor, + this->fft_bundle.get_auxr_3d_data (), + out); - ModuleBase::timer::end(this->classname, "recip_to_real gpu"); + ModuleBase::timer::end (this->classname, "recip_to_real gpu"); } -template void PW_Basis::real2recip_gpu(const double* in, - std::complex* out, +template void PW_Basis::real2recip_gpu (const double* in, + std::complex* out, + const bool add, + const double factor) const; +template void PW_Basis::real2recip_gpu (const float* in, + std::complex* out, const bool add, - const double factor) const; -template void PW_Basis::real2recip_gpu(const float* in, - std::complex* out, - const bool add, - const float factor) const; + const float factor) const; -template void PW_Basis::real2recip_gpu(const std::complex* in, - std::complex* out, +template void PW_Basis::real2recip_gpu (const std::complex* in, + std::complex* out, + const bool add, + const double factor) const; +template void PW_Basis::real2recip_gpu (const std::complex* in, + std::complex* out, const bool add, - const double factor) const; -template void PW_Basis::real2recip_gpu(const std::complex* in, - std::complex* out, - const bool add, - const float factor) const; + const float factor) const; -template void PW_Basis::recip2real_gpu(const std::complex* in, - double* out, +template void PW_Basis::recip2real_gpu (const std::complex* in, + double* out, + const bool add, + const double factor) const; +template void PW_Basis::recip2real_gpu (const std::complex* in, + float* out, const bool add, - const double factor) const; -template void PW_Basis::recip2real_gpu(const std::complex* in, - float* out, - const bool add, - const float factor) const; + const float factor) const; -template void PW_Basis::recip2real_gpu(const std::complex* in, - std::complex* out, +template void PW_Basis::recip2real_gpu (const std::complex* in, + std::complex* out, + const bool add, + const double factor) const; +template void PW_Basis::recip2real_gpu (const std::complex* in, + std::complex* out, const bool add, - const double factor) const; -template void PW_Basis::recip2real_gpu(const std::complex* in, - std::complex* out, - const bool add, - const float factor) const; + const float factor) const; #endif } // namespace ModulePW \ No newline at end of file diff --git a/source/source_basis/module_pw/pw_transform_k.cpp b/source/source_basis/module_pw/pw_transform_k.cpp index a09aa2b686f..3ba129960a8 100644 --- a/source/source_basis/module_pw/pw_transform_k.cpp +++ b/source/source_basis/module_pw/pw_transform_k.cpp @@ -23,55 +23,56 @@ namespace ModulePW * @param out: (nz, ns), std::complex data */ template -void PW_Basis_K::real2recip(const std::complex* in, +void + PW_Basis_K::real2recip (const std::complex* in, std::complex* out, const int ik, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "real2recip"); + ModuleBase::timer::start (this->classname, "real2recip"); - assert(this->gamma_only == false); - auto* auxr = this->fft_bundle.get_auxr_data(); + assert (this->gamma_only == false); + auto* auxr = this->fft_bundle.get_auxr_data (); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int ir = 0; ir < this->nrxx; ++ir) - { - auxr[ir] = in[ir]; - } - this->fft_bundle.fftxyfor(fft_bundle.get_auxr_data(), fft_bundle.get_auxr_data()); + { + auxr[ir] = in[ir]; + } + this->fft_bundle.fftxyfor (fft_bundle.get_auxr_data (), fft_bundle.get_auxr_data ()); - this->gatherp_scatters(this->fft_bundle.get_auxr_data(), this->fft_bundle.get_auxg_data()); + this->gatherp_scatters (this->fft_bundle.get_auxr_data (), this->fft_bundle.get_auxg_data ()); - this->fft_bundle.fftzfor(fft_bundle.get_auxg_data(), fft_bundle.get_auxg_data()); + this->fft_bundle.fftzfor (fft_bundle.get_auxg_data (), fft_bundle.get_auxg_data ()); const int startig = ik * this->npwk_max; const int npwk = this->npwk[ik]; - auto* auxg = this->fft_bundle.get_auxg_data(); + auto* auxg = this->fft_bundle.get_auxg_data (); if (add) - { - FPTYPE tmpfac = factor / FPTYPE(this->nxyz); + { + FPTYPE tmpfac = factor / FPTYPE (this->nxyz); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int igl = 0; igl < npwk; ++igl) - { - out[igl] += tmpfac * auxg[this->igl2isz_k[igl + startig]]; + for (int igl = 0; igl < npwk; ++igl) + { + out[igl] += tmpfac * auxg[this->igl2isz_k[igl + startig]]; + } } - } else - { - FPTYPE tmpfac = 1.0 / FPTYPE(this->nxyz); + { + FPTYPE tmpfac = 1.0 / FPTYPE (this->nxyz); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int igl = 0; igl < npwk; ++igl) - { - out[igl] = tmpfac * auxg[this->igl2isz_k[igl + startig]]; + for (int igl = 0; igl < npwk; ++igl) + { + out[igl] = tmpfac * auxg[this->igl2isz_k[igl + startig]]; + } } - } - ModuleBase::timer::end(this->classname, "real2recip"); + ModuleBase::timer::end (this->classname, "real2recip"); } /** @@ -88,14 +89,15 @@ void PW_Basis_K::real2recip(const std::complex* in, * @param out: (nz, ns), std::complex data */ template -void PW_Basis_K::real2recip(const FPTYPE* in, +void + PW_Basis_K::real2recip (const FPTYPE* in, std::complex* out, const int ik, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "real2recip"); - assert(this->gamma_only == true); + ModuleBase::timer::start (this->classname, "real2recip"); + assert (this->gamma_only == true); // for(int ir = 0 ; ir < this->nrxx ; ++ir) // { // this->fft_bundle.get_rspace_data()[ir] = in[ir]; @@ -106,45 +108,45 @@ void PW_Basis_K::real2recip(const FPTYPE* in, #pragma omp parallel for collapse(2) schedule(static) #endif for (int ix = 0; ix < this->nx; ++ix) - { - for (int ipy = 0; ipy < npy; ++ipy) { - this->fft_bundle.get_rspace_data()[ix * npy + ipy] = in[ix * npy + ipy]; + for (int ipy = 0; ipy < npy; ++ipy) + { + this->fft_bundle.get_rspace_data ()[ix * npy + ipy] = in[ix * npy + ipy]; + } } - } - this->fft_bundle.fftxyr2c(fft_bundle.get_rspace_data(), fft_bundle.get_auxr_data()); + this->fft_bundle.fftxyr2c (fft_bundle.get_rspace_data (), fft_bundle.get_auxr_data ()); - this->gatherp_scatters(this->fft_bundle.get_auxr_data(), this->fft_bundle.get_auxg_data()); + this->gatherp_scatters (this->fft_bundle.get_auxr_data (), this->fft_bundle.get_auxg_data ()); - this->fft_bundle.fftzfor(fft_bundle.get_auxg_data(), fft_bundle.get_auxg_data()); + this->fft_bundle.fftzfor (fft_bundle.get_auxg_data (), fft_bundle.get_auxg_data ()); const int startig = ik * this->npwk_max; const int npwk = this->npwk[ik]; - auto* auxg = this->fft_bundle.get_auxg_data(); + auto* auxg = this->fft_bundle.get_auxg_data (); if (add) - { - FPTYPE tmpfac = factor / FPTYPE(this->nxyz); + { + FPTYPE tmpfac = factor / FPTYPE (this->nxyz); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int igl = 0; igl < npwk; ++igl) - { - out[igl] += tmpfac * auxg[this->igl2isz_k[igl + startig]]; + for (int igl = 0; igl < npwk; ++igl) + { + out[igl] += tmpfac * auxg[this->igl2isz_k[igl + startig]]; + } } - } else - { - FPTYPE tmpfac = 1.0 / FPTYPE(this->nxyz); + { + FPTYPE tmpfac = 1.0 / FPTYPE (this->nxyz); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int igl = 0; igl < npwk; ++igl) - { - out[igl] = tmpfac * auxg[this->igl2isz_k[igl + startig]]; + for (int igl = 0; igl < npwk; ++igl) + { + out[igl] = tmpfac * auxg[this->igl2isz_k[igl + startig]]; + } } - } - ModuleBase::timer::end(this->classname, "real2recip"); + ModuleBase::timer::end (this->classname, "real2recip"); return; } @@ -162,53 +164,54 @@ void PW_Basis_K::real2recip(const FPTYPE* in, * @param out: (nplane, ny, nx), std::complex */ template -void PW_Basis_K::recip2real(const std::complex* in, +void + PW_Basis_K::recip2real (const std::complex* in, std::complex* out, const int ik, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "recip2real"); - assert(this->gamma_only == false); - ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxg_data(), this->nst * this->nz); + ModuleBase::timer::start (this->classname, "recip2real"); + assert (this->gamma_only == false); + ModuleBase::GlobalFunc::ZEROS (fft_bundle.get_auxg_data (), this->nst * this->nz); const int startig = ik * this->npwk_max; const int npwk = this->npwk[ik]; - auto* auxg = this->fft_bundle.get_auxg_data(); + auto* auxg = this->fft_bundle.get_auxg_data (); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int igl = 0; igl < npwk; ++igl) - { - auxg[this->igl2isz_k[igl + startig]] = in[igl]; - } - this->fft_bundle.fftzbac(fft_bundle.get_auxg_data(), fft_bundle.get_auxg_data()); + { + auxg[this->igl2isz_k[igl + startig]] = in[igl]; + } + this->fft_bundle.fftzbac (fft_bundle.get_auxg_data (), fft_bundle.get_auxg_data ()); - this->gathers_scatterp(this->fft_bundle.get_auxg_data(), this->fft_bundle.get_auxr_data()); + this->gathers_scatterp (this->fft_bundle.get_auxg_data (), this->fft_bundle.get_auxr_data ()); - this->fft_bundle.fftxybac(fft_bundle.get_auxr_data(), fft_bundle.get_auxr_data()); - auto* auxr = this->fft_bundle.get_auxr_data(); + this->fft_bundle.fftxybac (fft_bundle.get_auxr_data (), fft_bundle.get_auxr_data ()); + auto* auxr = this->fft_bundle.get_auxr_data (); if (add) - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ir = 0; ir < this->nrxx; ++ir) - { - out[ir] += factor * auxr[ir]; + for (int ir = 0; ir < this->nrxx; ++ir) + { + out[ir] += factor * auxr[ir]; + } } - } else - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ir = 0; ir < this->nrxx; ++ir) - { - out[ir] = auxr[ir]; + for (int ir = 0; ir < this->nrxx; ++ir) + { + out[ir] = auxr[ir]; + } } - } - ModuleBase::timer::end(this->classname, "recip2real"); + ModuleBase::timer::end (this->classname, "recip2real"); } /** @@ -225,31 +228,32 @@ void PW_Basis_K::recip2real(const std::complex* in, * @param out: (nplane, ny, nx), double */ template -void PW_Basis_K::recip2real(const std::complex* in, +void + PW_Basis_K::recip2real (const std::complex* in, FPTYPE* out, const int ik, const bool add, const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "recip2real"); - assert(this->gamma_only == true); - ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxg_data(), this->nst * this->nz); + ModuleBase::timer::start (this->classname, "recip2real"); + assert (this->gamma_only == true); + ModuleBase::GlobalFunc::ZEROS (fft_bundle.get_auxg_data (), this->nst * this->nz); const int startig = ik * this->npwk_max; const int npwk = this->npwk[ik]; - auto* auxg = this->fft_bundle.get_auxg_data(); + auto* auxg = this->fft_bundle.get_auxg_data (); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int igl = 0; igl < npwk; ++igl) - { - auxg[this->igl2isz_k[igl + startig]] = in[igl]; - } - this->fft_bundle.fftzbac(fft_bundle.get_auxg_data(), fft_bundle.get_auxg_data()); + { + auxg[this->igl2isz_k[igl + startig]] = in[igl]; + } + this->fft_bundle.fftzbac (fft_bundle.get_auxg_data (), fft_bundle.get_auxg_data ()); - this->gathers_scatterp(this->fft_bundle.get_auxg_data(), this->fft_bundle.get_auxr_data()); + this->gathers_scatterp (this->fft_bundle.get_auxg_data (), this->fft_bundle.get_auxr_data ()); - this->fft_bundle.fftxyc2r(fft_bundle.get_auxr_data(), fft_bundle.get_rspace_data()); + this->fft_bundle.fftxyc2r (fft_bundle.get_auxr_data (), fft_bundle.get_rspace_data ()); // for(int ir = 0 ; ir < this->nrxx ; ++ir) // { @@ -258,347 +262,364 @@ void PW_Basis_K::recip2real(const std::complex* in, // r2c in place const int npy = this->ny * this->nplane; - auto* rspace = this->fft_bundle.get_rspace_data(); + auto* rspace = this->fft_bundle.get_rspace_data (); if (add) - { + { #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif - for (int ix = 0; ix < this->nx; ++ix) - { - for (int ipy = 0; ipy < npy; ++ipy) - { - out[ix * npy + ipy] += factor * rspace[ix * npy + ipy]; - } + for (int ix = 0; ix < this->nx; ++ix) + { + for (int ipy = 0; ipy < npy; ++ipy) + { + out[ix * npy + ipy] += factor * rspace[ix * npy + ipy]; + } + } } - } else - { + { #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif - for (int ix = 0; ix < this->nx; ++ix) - { - for (int ipy = 0; ipy < npy; ++ipy) - { - out[ix * npy + ipy] = rspace[ix * npy + ipy]; - } + for (int ix = 0; ix < this->nx; ++ix) + { + for (int ipy = 0; ipy < npy; ++ipy) + { + out[ix * npy + ipy] = rspace[ix * npy + ipy]; + } + } } - } - ModuleBase::timer::end(this->classname, "recip2real"); + ModuleBase::timer::end (this->classname, "recip2real"); } template <> -void PW_Basis_K::real_to_recip(const base_device::DEVICE_CPU* /*dev*/, +void + PW_Basis_K::real_to_recip (const base_device::DEVICE_CPU* /*dev*/, const std::complex* in, std::complex* out, const int ik, const bool add, const float factor) const { - this->real2recip(in, out, ik, add, factor); + this->real2recip (in, out, ik, add, factor); } template <> -void PW_Basis_K::real_to_recip(const base_device::DEVICE_CPU* /*dev*/, +void + PW_Basis_K::real_to_recip (const base_device::DEVICE_CPU* /*dev*/, const std::complex* in, std::complex* out, const int ik, const bool add, const double factor) const { - #if defined(__DSP) - this->real2recip_dsp(in,out,ik,add,factor); - #else - this->real2recip(in, out, ik, add, factor); - #endif +#if defined(__DSP) + this->real2recip_dsp (in, out, ik, add, factor); +#else + this->real2recip (in, out, ik, add, factor); +#endif } template <> -void PW_Basis_K::recip_to_real(const base_device::DEVICE_CPU* /*dev*/, +void + PW_Basis_K::recip_to_real (const base_device::DEVICE_CPU* /*dev*/, const std::complex* in, std::complex* out, const int ik, const bool add, const float factor) const { - this->recip2real(in, out, ik, add, factor); + this->recip2real (in, out, ik, add, factor); } template <> -void PW_Basis_K::recip_to_real(const base_device::DEVICE_CPU* /*dev*/, +void + PW_Basis_K::recip_to_real (const base_device::DEVICE_CPU* /*dev*/, const std::complex* in, std::complex* out, const int ik, const bool add, const double factor) const { - #if defined(__DSP) - this->recip2real_dsp(in,out,ik,add,factor); - #else - this->recip2real(in, out, ik, add, factor); - #endif +#if defined(__DSP) + this->recip2real_dsp (in, out, ik, add, factor); +#else + this->recip2real (in, out, ik, add, factor); +#endif } #if (defined(__CUDA) || defined(__ROCM)) template <> -void PW_Basis_K::real_to_recip(const base_device::DEVICE_GPU* ctx, +void + PW_Basis_K::real_to_recip (const base_device::DEVICE_GPU* ctx, const std::complex* in, std::complex* out, const int ik, const bool add, const float factor) const { - ModuleBase::timer::start(this->classname, "real_to_recip gpu"); - assert(this->gamma_only == false); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "real_to_recip gpu"); + assert (this->gamma_only == false); + assert (this->poolnproc == 1); - base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>()( - this->fft_bundle.get_auxr_3d_data(), - in, - this->nrxx); + base_device::memory::synchronize_memory_op, + base_device::DEVICE_GPU, + base_device::DEVICE_GPU> () (this->fft_bundle.get_auxr_3d_data (), + in, + this->nrxx); - this->fft_bundle.fft3D_forward(this->fft_bundle.get_auxr_3d_data(), this->fft_bundle.get_auxr_3d_data()); + this->fft_bundle.fft3D_forward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; - set_real_to_recip_output_op()(npw_k, - this->nxyz, - add, - factor, - this->ig2ixyz_k + startig, - this->fft_bundle.get_auxr_3d_data(), - out); - ModuleBase::timer::end(this->classname, "real_to_recip gpu"); + set_real_to_recip_output_op () (npw_k, + this->nxyz, + add, + factor, + this->ig2ixyz_k + startig, + this->fft_bundle.get_auxr_3d_data (), + out); + ModuleBase::timer::end (this->classname, "real_to_recip gpu"); } template <> -void PW_Basis_K::real_to_recip(const base_device::DEVICE_GPU* ctx, +void + PW_Basis_K::real_to_recip (const base_device::DEVICE_GPU* ctx, const std::complex* in, std::complex* out, const int ik, const bool add, const double factor) const { - ModuleBase::timer::start(this->classname, "real_to_recip gpu"); - assert(this->gamma_only == false); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "real_to_recip gpu"); + assert (this->gamma_only == false); + assert (this->poolnproc == 1); - base_device::memory::synchronize_memory_op, - base_device::DEVICE_GPU, - base_device::DEVICE_GPU>()(this->fft_bundle.get_auxr_3d_data(), - in, - this->nrxx); + base_device::memory:: + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), + in, + this->nrxx); - this->fft_bundle.fft3D_forward(this->fft_bundle.get_auxr_3d_data(), this->fft_bundle.get_auxr_3d_data()); + this->fft_bundle.fft3D_forward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; - set_real_to_recip_output_op()(npw_k, - this->nxyz, - add, - factor, - this->ig2ixyz_k + startig, - this->fft_bundle.get_auxr_3d_data(), - out); - ModuleBase::timer::end(this->classname, "real_to_recip gpu"); + set_real_to_recip_output_op () (npw_k, + this->nxyz, + add, + factor, + this->ig2ixyz_k + startig, + this->fft_bundle.get_auxr_3d_data (), + out); + ModuleBase::timer::end (this->classname, "real_to_recip gpu"); } template <> -void PW_Basis_K::recip_to_real(const base_device::DEVICE_GPU* ctx, +void + PW_Basis_K::recip_to_real (const base_device::DEVICE_GPU* ctx, const std::complex* in, std::complex* out, const int ik, const bool add, const float factor) const { - ModuleBase::timer::start(this->classname, "recip_to_real gpu"); - assert(this->gamma_only == false); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "recip_to_real gpu"); + assert (this->gamma_only == false); + assert (this->poolnproc == 1); // ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxr_3d_data(), this->nxyz); - base_device::memory::set_memory_op, base_device::DEVICE_GPU>()( - this->fft_bundle.get_auxr_3d_data(), + base_device::memory::set_memory_op, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), 0, this->nxyz); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; - set_3d_fft_box_op()(npw_k, - this->ig2ixyz_k + startig, - in, - this->fft_bundle.get_auxr_3d_data()); - this->fft_bundle.fft3D_backward(this->fft_bundle.get_auxr_3d_data(), this->fft_bundle.get_auxr_3d_data()); + set_3d_fft_box_op () (npw_k, + this->ig2ixyz_k + startig, + in, + this->fft_bundle.get_auxr_3d_data ()); + this->fft_bundle.fft3D_backward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); - set_recip_to_real_output_op()(this->nrxx, - add, - factor, - this->fft_bundle.get_auxr_3d_data(), - out); + set_recip_to_real_output_op () (this->nrxx, + add, + factor, + this->fft_bundle.get_auxr_3d_data (), + out); - ModuleBase::timer::end(this->classname, "recip_to_real gpu"); + ModuleBase::timer::end (this->classname, "recip_to_real gpu"); } template <> -void PW_Basis_K::recip_to_real(const base_device::DEVICE_GPU* ctx, +void + PW_Basis_K::recip_to_real (const base_device::DEVICE_GPU* ctx, const std::complex* in, std::complex* out, const int ik, const bool add, const double factor) const { - ModuleBase::timer::start(this->classname, "recip_to_real gpu"); - assert(this->gamma_only == false); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "recip_to_real gpu"); + assert (this->gamma_only == false); + assert (this->poolnproc == 1); // ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxr_3d_data(), this->nxyz); - base_device::memory::set_memory_op, base_device::DEVICE_GPU>()( - this->fft_bundle.get_auxr_3d_data(), + base_device::memory::set_memory_op, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), 0, this->nxyz); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; - set_3d_fft_box_op()(npw_k, - this->ig2ixyz_k + startig, - in, - this->fft_bundle.get_auxr_3d_data()); - this->fft_bundle.fft3D_backward(this->fft_bundle.get_auxr_3d_data(), this->fft_bundle.get_auxr_3d_data()); + set_3d_fft_box_op () (npw_k, + this->ig2ixyz_k + startig, + in, + this->fft_bundle.get_auxr_3d_data ()); + this->fft_bundle.fft3D_backward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); - set_recip_to_real_output_op()(this->nrxx, - add, - factor, - this->fft_bundle.get_auxr_3d_data(), - out); + set_recip_to_real_output_op () (this->nrxx, + add, + factor, + this->fft_bundle.get_auxr_3d_data (), + out); - ModuleBase::timer::end(this->classname, "recip_to_real gpu"); + ModuleBase::timer::end (this->classname, "recip_to_real gpu"); } template -void PW_Basis_K::real2recip_gpu(const std::complex* in, - std::complex* out, - const int ik, - const bool add, - const FPTYPE factor) const +void + PW_Basis_K::real2recip_gpu (const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "real_to_recip gpu"); - assert(this->gamma_only == false); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "real_to_recip gpu"); + assert (this->gamma_only == false); + assert (this->poolnproc == 1); - base_device::memory::synchronize_memory_op, - base_device::DEVICE_GPU, - base_device::DEVICE_GPU>()(this->fft_bundle.get_auxr_3d_data(), - in, - this->nrxx); + base_device::memory:: + synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), + in, + this->nrxx); - this->fft_bundle.fft3D_forward(this->fft_bundle.get_auxr_3d_data(), this->fft_bundle.get_auxr_3d_data()); + this->fft_bundle.fft3D_forward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; - set_real_to_recip_output_op()(npw_k, - this->nxyz, - add, - factor, - this->ig2ixyz_k + startig, - this->fft_bundle.get_auxr_3d_data(), - out); - ModuleBase::timer::end(this->classname, "real_to_recip gpu"); + set_real_to_recip_output_op () (npw_k, + this->nxyz, + add, + factor, + this->ig2ixyz_k + startig, + this->fft_bundle.get_auxr_3d_data (), + out); + ModuleBase::timer::end (this->classname, "real_to_recip gpu"); } template -void PW_Basis_K::recip2real_gpu(const std::complex* in, - std::complex* out, - const int ik, - const bool add, - const FPTYPE factor) const +void + PW_Basis_K::recip2real_gpu (const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const FPTYPE factor) const { - ModuleBase::timer::start(this->classname, "recip_to_real gpu"); - assert(this->gamma_only == false); - assert(this->poolnproc == 1); + ModuleBase::timer::start (this->classname, "recip_to_real gpu"); + assert (this->gamma_only == false); + assert (this->poolnproc == 1); // ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxr_3d_data(), this->nxyz); - base_device::memory::set_memory_op, base_device::DEVICE_GPU>()( - this->fft_bundle.get_auxr_3d_data(), + base_device::memory::set_memory_op, base_device::DEVICE_GPU> () ( + this->fft_bundle.get_auxr_3d_data (), 0, this->nxyz); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; - set_3d_fft_box_op()(npw_k, - this->ig2ixyz_k + startig, - in, - this->fft_bundle.get_auxr_3d_data()); - this->fft_bundle.fft3D_backward(this->fft_bundle.get_auxr_3d_data(), this->fft_bundle.get_auxr_3d_data()); + set_3d_fft_box_op () (npw_k, + this->ig2ixyz_k + startig, + in, + this->fft_bundle.get_auxr_3d_data ()); + this->fft_bundle.fft3D_backward (this->fft_bundle.get_auxr_3d_data (), + this->fft_bundle.get_auxr_3d_data ()); - set_recip_to_real_output_op()(this->nrxx, - add, - factor, - this->fft_bundle.get_auxr_3d_data(), - out); + set_recip_to_real_output_op () (this->nrxx, + add, + factor, + this->fft_bundle.get_auxr_3d_data (), + out); - ModuleBase::timer::end(this->classname, "recip_to_real gpu"); + ModuleBase::timer::end (this->classname, "recip_to_real gpu"); } -template void PW_Basis_K::real2recip_gpu(const std::complex*, - std::complex*, - const int, - const bool, - const float) const; - -template void PW_Basis_K::real2recip_gpu(const std::complex*, - std::complex*, +template void PW_Basis_K::real2recip_gpu (const std::complex*, + std::complex*, const int, const bool, - const double) const; + const float) const; -template void PW_Basis_K::recip2real_gpu(const std::complex*, - std::complex*, - const int, - const bool, - const float) const; +template void PW_Basis_K::real2recip_gpu (const std::complex*, + std::complex*, + const int, + const bool, + const double) const; -template void PW_Basis_K::recip2real_gpu(const std::complex*, - std::complex*, +template void PW_Basis_K::recip2real_gpu (const std::complex*, + std::complex*, const int, const bool, - const double) const; + const float) const; + +template void PW_Basis_K::recip2real_gpu (const std::complex*, + std::complex*, + const int, + const bool, + const double) const; #endif -template void PW_Basis_K::real2recip(const float* in, - std::complex* out, - const int ik, - const bool add, - const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis_K::real2recip(const std::complex* in, - std::complex* out, - const int ik, - const bool add, - const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis_K::recip2real(const std::complex* in, - float* out, - const int ik, - const bool add, - const float factor) const; // in:(nz, ns) ; out(nplane,nx*ny) -template void PW_Basis_K::recip2real(const std::complex* in, - std::complex* out, - const int ik, - const bool add, - const float factor) const; // in:(nz, ns) ; out(nplane,nx*ny) - -template void PW_Basis_K::real2recip(const double* in, - std::complex* out, +template void PW_Basis_K::real2recip (const float* in, + std::complex* out, const int ik, const bool add, - const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis_K::real2recip(const std::complex* in, - std::complex* out, + const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis_K::real2recip (const std::complex* in, + std::complex* out, const int ik, const bool add, - const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis_K::recip2real(const std::complex* in, - double* out, + const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis_K::recip2real (const std::complex* in, + float* out, const int ik, const bool add, - const double factor) const; // in:(nz, ns) ; out(nplane,nx*ny) -template void PW_Basis_K::recip2real(const std::complex* in, - std::complex* out, + const float factor) const; // in:(nz, ns) ; out(nplane,nx*ny) +template void PW_Basis_K::recip2real (const std::complex* in, + std::complex* out, const int ik, const bool add, - const double factor) const; // in:(nz, ns) ; out(nplane,nx*ny) + const float factor) const; // in:(nz, ns) ; out(nplane,nx*ny) + +template void PW_Basis_K::real2recip (const double* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis_K::real2recip (const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis_K::recip2real (const std::complex* in, + double* out, + const int ik, + const bool add, + const double factor) const; // in:(nz, ns) ; out(nplane,nx*ny) +template void PW_Basis_K::recip2real (const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; // in:(nz, ns) ; out(nplane,nx*ny) } // namespace ModulePW diff --git a/source/source_basis/module_pw/pw_transform_k_dsp.cpp b/source/source_basis/module_pw/pw_transform_k_dsp.cpp index 2ef0ec92950..fad4f970af6 100644 --- a/source/source_basis/module_pw/pw_transform_k_dsp.cpp +++ b/source/source_basis/module_pw/pw_transform_k_dsp.cpp @@ -5,29 +5,30 @@ #include #include -#if defined (__DSP) +#if defined(__DSP) namespace ModulePW { - template <> -void PW_Basis_K::real2recip_dsp(const std::complex* in, +template <> +void + PW_Basis_K::real2recip_dsp (const std::complex* in, std::complex* out, const int ik, const bool add, const float factor) const - { - - } - template <> -void PW_Basis_K::recip2real_dsp(const std::complex* in, +{ +} +template <> +void + PW_Basis_K::recip2real_dsp (const std::complex* in, std::complex* out, const int ik, const bool add, const float factor) const - { - - } +{ +} template <> -void PW_Basis_K::real2recip_dsp(const std::complex* in, +void + PW_Basis_K::real2recip_dsp (const std::complex* in, std::complex* out, const int ik, const bool add, @@ -35,63 +36,64 @@ void PW_Basis_K::real2recip_dsp(const std::complex* in, { const base_device::DEVICE_CPU* ctx = nullptr; const base_device::DEVICE_GPU* gpux = nullptr; - assert(this->gamma_only == false); - auto* auxr = this->fft_bundle.get_auxr_3d_data(); + assert (this->gamma_only == false); + auto* auxr = this->fft_bundle.get_auxr_3d_data (); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; // copy the in into the auxr with std::complex - memcpy(auxr, in, this->nrxx * 2 * 8); + memcpy (auxr, in, this->nrxx * 2 * 8); // 3d fft - this->fft_bundle.resource_handler(1); - this->fft_bundle.fft3D_forward(auxr, - auxr); - this->fft_bundle.resource_handler(0); + this->fft_bundle.resource_handler (1); + this->fft_bundle.fft3D_forward (auxr, auxr); + this->fft_bundle.resource_handler (0); // copy the result from the auxr to the out ,while consider the add - set_real_to_recip_output_op()(npw_k, - this->nxyz, - add, - factor, - this->ig2ixyz_k_cpu.data() + startig, - auxr, - out); + set_real_to_recip_output_op () (npw_k, + this->nxyz, + add, + factor, + this->ig2ixyz_k_cpu.data () + startig, + auxr, + out); } template <> -void PW_Basis_K::recip2real_dsp(const std::complex* in, +void + PW_Basis_K::recip2real_dsp (const std::complex* in, std::complex* out, const int ik, const bool add, const double factor) const { - assert(this->gamma_only == false); + assert (this->gamma_only == false); const base_device::DEVICE_CPU* ctx = nullptr; const base_device::DEVICE_GPU* gpux = nullptr; // memset the auxr of 0 in the auxr,here the len of the auxr is nxyz - auto* auxr = this->fft_bundle.get_auxr_3d_data(); - memset(auxr, 0, this->nxyz * 2 * 8); + auto* auxr = this->fft_bundle.get_auxr_3d_data (); + memset (auxr, 0, this->nxyz * 2 * 8); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; // copy the mapping form the type of stick to the 3dfft - set_3d_fft_box_op()(npw_k, this->ig2ixyz_k_cpu.data() + startig, in, auxr); + set_3d_fft_box_op () (npw_k, this->ig2ixyz_k_cpu.data () + startig, in, auxr); // use 3d fft backward - this->fft_bundle.resource_handler(1); - this->fft_bundle.fft3D_backward(auxr, auxr); - this->fft_bundle.resource_handler(0); + this->fft_bundle.resource_handler (1); + this->fft_bundle.fft3D_backward (auxr, auxr); + this->fft_bundle.resource_handler (0); if (add) - { - const int one = 1; - const std::complex factor1 = std::complex(factor, 0); - BlasConnector::axpy(nrxx, factor1, auxr, one, out, one); - } + { + const int one = 1; + const std::complex factor1 = std::complex (factor, 0); + BlasConnector::axpy (nrxx, factor1, auxr, one, out, one); + } else - { - memcpy(out, auxr, nrxx * 2 * 8); - } + { + memcpy (out, auxr, nrxx * 2 * 8); + } } template <> -void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx, +void + PW_Basis_K::convolution (const base_device::DEVICE_CPU* ctx, const int ik, const int size, const std::complex* input, @@ -103,7 +105,8 @@ void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx, } template <> -void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx, +void + PW_Basis_K::convolution (const base_device::DEVICE_CPU* ctx, const int ik, const int size, const std::complex* input, @@ -112,60 +115,60 @@ void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx, const bool add, const double factor) const { - ModuleBase::timer::start(this->classname, "convolution"); + ModuleBase::timer::start (this->classname, "convolution"); - assert(this->gamma_only == false); + assert (this->gamma_only == false); const base_device::DEVICE_GPU* gpux = nullptr; // memset the auxr of 0 in the auxr,here the len of the auxr is nxyz - auto* auxr = this->fft_bundle.get_auxr_3d_data(); - memset(auxr, 0, this->nxyz * 2 * 8); + auto* auxr = this->fft_bundle.get_auxr_3d_data (); + memset (auxr, 0, this->nxyz * 2 * 8); const int startig = ik * this->npwk_max; const int npw_k = this->npwk[ik]; // copy the mapping form the type of stick to the 3dfft - set_3d_fft_box_op()(npw_k, this->ig2ixyz_k_cpu.data() + startig, input, auxr); + set_3d_fft_box_op () (npw_k, this->ig2ixyz_k_cpu.data () + startig, input, auxr); // use 3d fft backward - this->fft_bundle.fft3D_backward(auxr, auxr); + this->fft_bundle.fft3D_backward (auxr, auxr); for (int ir = 0; ir < size; ir++) - { - auxr[ir] *= input1[ir]; - } + { + auxr[ir] *= input1[ir]; + } // 3d fft - this->fft_bundle.fft3D_forward(auxr, auxr); + this->fft_bundle.fft3D_forward (auxr, auxr); // copy the result from the auxr to the out ,while consider the add - set_real_to_recip_output_op()(npw_k, - this->nxyz, - add, - factor, - this->ig2ixyz_k_cpu.data() + startig, - auxr, - output); - ModuleBase::timer::end(this->classname, "convolution"); + set_real_to_recip_output_op () (npw_k, + this->nxyz, + add, + factor, + this->ig2ixyz_k_cpu.data () + startig, + auxr, + output); + ModuleBase::timer::end (this->classname, "convolution"); } -template void PW_Basis_K::real2recip_dsp(const std::complex* in, - std::complex* out, - const int ik, - const bool add, - const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis_K::recip2real_dsp(const std::complex* in, - std::complex* out, - const int ik, - const bool add, - const float factor) const; // in:(nz, ns) ; out(nplane,nx*ny) - -template void PW_Basis_K::real2recip_dsp(const std::complex* in, - std::complex* out, +template void PW_Basis_K::real2recip_dsp (const std::complex* in, + std::complex* out, const int ik, const bool add, - const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) -template void PW_Basis_K::recip2real_dsp(const std::complex* in, - std::complex* out, + const float factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis_K::recip2real_dsp (const std::complex* in, + std::complex* out, const int ik, const bool add, - const double factor) const; + const float factor) const; // in:(nz, ns) ; out(nplane,nx*ny) + +template void PW_Basis_K::real2recip_dsp (const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; // in:(nplane,nx*ny) ; out(nz, ns) +template void PW_Basis_K::recip2real_dsp (const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; } // namespace ModulePW #endif diff --git a/source/source_basis/module_pw/test/depend_mock.cpp b/source/source_basis/module_pw/test/depend_mock.cpp index 4fdcfd5f4a5..bab1d8d4bf1 100644 --- a/source/source_basis/module_pw/test/depend_mock.cpp +++ b/source/source_basis/module_pw/test/depend_mock.cpp @@ -4,21 +4,43 @@ #include "depend_mock.h" namespace GlobalV -{ - std::ofstream ofs_running; +{ +std::ofstream ofs_running; } #ifdef __MPI MPI_Comm POOL_WORLD; namespace Parallel_Reduce { - template void reduce_all(T& object) { return; }; - template void reduce_pool(T& object) { return; }; +template +void + reduce_all (T& object) +{ + return; +}; +template +void + reduce_pool (T& object) +{ + return; +}; - template<> - void reduce_all(double& object) { return; }; - template<> - void reduce_pool(double& object) { return; }; - template<> - void reduce_pool(float& object) { return; }; -} +template <> +void + reduce_all (double& object) +{ + return; +}; +template <> +void + reduce_pool (double& object) +{ + return; +}; +template <> +void + reduce_pool (float& object) +{ + return; +}; +} // namespace Parallel_Reduce #endif \ No newline at end of file diff --git a/source/source_basis/module_pw/test/depend_mock.h b/source/source_basis/module_pw/test/depend_mock.h index 216233d7dca..93a1ab725fa 100644 --- a/source/source_basis/module_pw/test/depend_mock.h +++ b/source/source_basis/module_pw/test/depend_mock.h @@ -1,19 +1,19 @@ #include -//memory.cpp depends on GlobalV::ofs_running and reduce_all -//GPU depends on reduce_pool +// memory.cpp depends on GlobalV::ofs_running and reduce_all +// GPU depends on reduce_pool #ifdef __MPI #include "mpi.h" extern MPI_Comm POOL_WORLD; namespace Parallel_Reduce { - void reduce_all(double& object); - void reduce_pool(double& object); - void reduce_pool(float& object); -} +void reduce_all (double& object); +void reduce_pool (double& object); +void reduce_pool (float& object); +} // namespace Parallel_Reduce #endif namespace GlobalV -{ - extern std::ofstream ofs_running; +{ +extern std::ofstream ofs_running; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/generate.cpp b/source/source_basis/module_pw/test/generate.cpp index 31998b5c67b..5e78a2832e0 100644 --- a/source/source_basis/module_pw/test/generate.cpp +++ b/source/source_basis/module_pw/test/generate.cpp @@ -7,18 +7,21 @@ #include "pw_test.h" using namespace std; -void create_pools(const int totnproc, const int myrank, const int nproc) -{ +void + create_pools (const int totnproc, const int myrank, const int nproc) +{ int mypool = 1; - if(myrank < nproc) mypool = 0; - MPI_Comm_split(MPI_COMM_WORLD,mypool,1,&POOL_WORLD); + if (myrank < nproc) + mypool = 0; + MPI_Comm_split (MPI_COMM_WORLD, mypool, 1, &POOL_WORLD); return; } -int main(int argc, char **argv) +int + main (int argc, char** argv) { int totnproc, myrank; - setupmpi(argc,argv,totnproc, myrank); + setupmpi (argc, argv, totnproc, myrank); const int nn = totnproc; int npw_per_ref[nn][nn]; int nst_per_ref[nn][nn]; @@ -29,112 +32,115 @@ int main(int argc, char **argv) int ny_ref; int fftny_ref; int nz_ref; - if(myrank == 0) cout<<"Generating ref..."< *kvec_d; + ModuleBase::Vector3* kvec_d; //-------------------------------------------------- lat0 = 3; - ModuleBase::Matrix3 la(1, 1, 0, 0, 1, 0, 0, 0, 1); + ModuleBase::Matrix3 la (1, 1, 0, 0, 1, 0, 0, 0, 1); latvec = la; wfcecut = 20; nks = 2; kvec_d = new ModuleBase::Vector3[nks]; - kvec_d[0].set(0,0,0.5); - kvec_d[1].set(0.5,0.5,0.5); + kvec_d[0].set (0, 0, 0.5); + kvec_d[1].set (0.5, 0.5, 0.5); gamma_only = false; int distribution_type = 1; bool xprime = true; //-------------------------------------------------- - //init //Real parameters + // init //Real parameters #ifdef __MPI - pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); - pwktest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); + pwktest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); #endif - pwtest.setbxyz(2,2,2); - pwktest.setbxyz(2,2,2); - pwtest.initgrids(lat0,latvec, 11, 11, 11); - EXPECT_EQ(pwtest.nx%2, 0); - EXPECT_EQ(pwtest.ny%2, 0); - EXPECT_EQ(pwtest.nz%2, 0); - pwtest.initgrids(lat0,latvec, 2*wfcecut); - pwtest.initgrids(lat0,latvec, 3*wfcecut); - pwktest.initgrids(lat0,latvec, pwtest.nx, pwtest.ny, pwtest.nz); - pwtest.initparameters(gamma_only,wfcecut,distribution_type,xprime); - pwktest.initparameters(gamma_only,wfcecut,nks,kvec_d,distribution_type, xprime); - static_cast(pwtest).setuptransform(); - pwktest.setuptransform(); - EXPECT_EQ(pwtest.nx%2, 0); - EXPECT_EQ(pwtest.ny%2, 0); - EXPECT_EQ(pwtest.nz%2, 0); - EXPECT_EQ(pwktest.nx%2, 0); - EXPECT_EQ(pwktest.ny%2, 0); - EXPECT_EQ(pwktest.nz%2, 0); + pwtest.setbxyz (2, 2, 2); + pwktest.setbxyz (2, 2, 2); + pwtest.initgrids (lat0, latvec, 11, 11, 11); + EXPECT_EQ (pwtest.nx % 2, 0); + EXPECT_EQ (pwtest.ny % 2, 0); + EXPECT_EQ (pwtest.nz % 2, 0); + pwtest.initgrids (lat0, latvec, 2 * wfcecut); + pwtest.initgrids (lat0, latvec, 3 * wfcecut); + pwktest.initgrids (lat0, latvec, pwtest.nx, pwtest.ny, pwtest.nz); + pwtest.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwktest.initparameters (gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime); + static_cast (pwtest).setuptransform (); + pwktest.setuptransform (); + EXPECT_EQ (pwtest.nx % 2, 0); + EXPECT_EQ (pwtest.ny % 2, 0); + EXPECT_EQ (pwtest.nz % 2, 0); + EXPECT_EQ (pwktest.nx % 2, 0); + EXPECT_EQ (pwktest.ny % 2, 0); + EXPECT_EQ (pwktest.nz % 2, 0); int bsize = 0; - pwtest.autoset_big_cell_size(bsize, 12); - EXPECT_EQ(bsize, 4); - pwtest.autoset_big_cell_size(bsize, 12, 4); - EXPECT_EQ(bsize, 3); - pwtest.autoset_big_cell_size(bsize, 14, 4); - EXPECT_EQ(bsize, 2); - + pwtest.autoset_big_cell_size (bsize, 12); + EXPECT_EQ (bsize, 4); + pwtest.autoset_big_cell_size (bsize, 12, 4); + EXPECT_EQ (bsize, 3); + pwtest.autoset_big_cell_size (bsize, 14, 4); + EXPECT_EQ (bsize, 2); delete[] kvec_d; - ModulePW::PW_Basis_Big *p_pw = new ModulePW::PW_Basis_Big(device_flag, precision_flag); - ModulePW::PW_Basis_K_Big *p_pwk = new ModulePW::PW_Basis_K_Big(device_flag, precision_flag); + ModulePW::PW_Basis_Big* p_pw = new ModulePW::PW_Basis_Big (device_flag, precision_flag); + ModulePW::PW_Basis_K_Big* p_pwk = new ModulePW::PW_Basis_K_Big (device_flag, precision_flag); delete p_pw; delete p_pwk; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } class TestPW_Basis_Big : public ::testing::Test { - public: - ModulePW::PW_Basis_Big pwtest = ModulePW::PW_Basis_Big(); + public: + ModulePW::PW_Basis_Big pwtest = ModulePW::PW_Basis_Big (); }; // Test the function with nproc = 0 (bx and by) -TEST_F(TestPW_Basis_Big, BxByTest) { +TEST_F (TestPW_Basis_Big, BxByTest) +{ int b_size = 0; int nc_size = 12; - pwtest.autoset_big_cell_size(b_size, nc_size); - EXPECT_EQ(b_size, 4); + pwtest.autoset_big_cell_size (b_size, nc_size); + EXPECT_EQ (b_size, 4); } // Test the function with nproc > 0 (bz) -TEST_F(TestPW_Basis_Big, BzTest) { +TEST_F (TestPW_Basis_Big, BzTest) +{ int b_size = 0; int nc_size = 12; int nproc = 2; - pwtest.autoset_big_cell_size(b_size, nc_size, nproc); - EXPECT_EQ(b_size, 3); + pwtest.autoset_big_cell_size (b_size, nc_size, nproc); + EXPECT_EQ (b_size, 3); } // Test the function with nproc > 0 (bz) and nc_size not factored by any candidate -TEST_F(TestPW_Basis_Big, BzNoFactorTest) { +TEST_F (TestPW_Basis_Big, BzNoFactorTest) +{ int b_size = 0; int nc_size = 11; int nproc = 2; - pwtest.autoset_big_cell_size(b_size, nc_size, nproc); - EXPECT_EQ(b_size, 4); + pwtest.autoset_big_cell_size (b_size, nc_size, nproc); + EXPECT_EQ (b_size, 4); } // Test the function with nproc > 0 (bz) and nc_size not factored by any candidate -TEST_F(TestPW_Basis_Big, BzNoFactorNoResultTest) { +TEST_F (TestPW_Basis_Big, BzNoFactorNoResultTest) +{ int b_size = 0; int nc_size = 11; int nproc = 3; - pwtest.autoset_big_cell_size(b_size, nc_size, nproc); - EXPECT_EQ(b_size, 4); + pwtest.autoset_big_cell_size (b_size, nc_size, nproc); + EXPECT_EQ (b_size, 4); } // Test the function with nproc > 0 (bz) and nc_size smaller than candidates -TEST_F(TestPW_Basis_Big, BzSmallTest) { +TEST_F (TestPW_Basis_Big, BzSmallTest) +{ int b_size = 0; int nc_size = 2; int nproc = 2; - pwtest.autoset_big_cell_size(b_size, nc_size, nproc); - EXPECT_EQ(b_size, 2); + pwtest.autoset_big_cell_size (b_size, nc_size, nproc); + EXPECT_EQ (b_size, 2); } // Test the function with nproc > 0 (bz) and nc_size smaller than candidates -TEST_F(TestPW_Basis_Big, BzSmallNoResultTest) { +TEST_F (TestPW_Basis_Big, BzSmallNoResultTest) +{ int b_size = 0; int nc_size = 2; int nproc = 3; - pwtest.autoset_big_cell_size(b_size, nc_size, nproc); - EXPECT_EQ(b_size, 2); + pwtest.autoset_big_cell_size (b_size, nc_size, nproc); + EXPECT_EQ (b_size, 2); } // Test the function with nproc > 0 (bz) and nc_size not divisible by nproc -TEST_F(TestPW_Basis_Big, BzNprocTest) { +TEST_F (TestPW_Basis_Big, BzNprocTest) +{ int b_size = 0; int nc_size = 12; int nproc = 3; - pwtest.autoset_big_cell_size(b_size, nc_size, nproc); - EXPECT_EQ(b_size, 4); + pwtest.autoset_big_cell_size (b_size, nc_size, nproc); + EXPECT_EQ (b_size, 4); } // Test the function with nproc > 0 (bz) and nc_size not divisible by nproc -TEST_F(TestPW_Basis_Big, BzNprocNoResultTest) { +TEST_F (TestPW_Basis_Big, BzNprocNoResultTest) +{ int b_size = 0; int nc_size = 12; int nproc = 5; - pwtest.autoset_big_cell_size(b_size, nc_size, nproc); - EXPECT_EQ(b_size, 3); + pwtest.autoset_big_cell_size (b_size, nc_size, nproc); + EXPECT_EQ (b_size, 3); } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test-other.cpp b/source/source_basis/module_pw/test/test-other.cpp index e6efa90654a..27c121d44e7 100644 --- a/source/source_basis/module_pw/test/test-other.cpp +++ b/source/source_basis/module_pw/test/test-other.cpp @@ -14,190 +14,190 @@ #include "gmock/gmock.h" using namespace std; -TEST_F(PWTEST,test_other) +TEST_F (PWTEST, test_other) { - cout<<"Test other codes"< *kvec_d = new ModuleBase::Vector3[nks]; - kvec_d[0].set(0,0,0.5); - kvec_d[1].set(0.5,0.5,0.5); - pwktest.set_precision("double"); - pwktest.initgrids(2, latvec, 4,4,4); - pwktest.initparameters(true, 20, nks, kvec_d); - pwktest.setuptransform(); - pwktest.collect_local_pw(); + ModuleBase::Vector3* kvec_d = new ModuleBase::Vector3[nks]; + kvec_d[0].set (0, 0, 0.5); + kvec_d[1].set (0.5, 0.5, 0.5); + pwktest.set_precision ("double"); + pwktest.initgrids (2, latvec, 4, 4, 4); + pwktest.initparameters (true, 20, nks, kvec_d); + pwktest.setuptransform (); + pwktest.collect_local_pw (); #ifdef __ENABLE_FLOAT_FFTW - pwktest.set_precision("single"); + pwktest.set_precision ("single"); #endif - pwktest.initparameters(true, 8, nks, kvec_d); - pwktest.setuptransform(); - pwktest.collect_local_pw(); + pwktest.initparameters (true, 8, nks, kvec_d); + pwktest.setuptransform (); + pwktest.collect_local_pw (); const int nrxx = pwktest.nrxx; - std::complex * rhor1 = new std::complex [nrxx]; - std::complex * rhor2 = new std::complex [nrxx]; + std::complex* rhor1 = new std::complex[nrxx]; + std::complex* rhor2 = new std::complex[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofr1 = new complex [nrxx]; - complex * rhofr2 = new complex [nrxx]; + complex* rhofr1 = new complex[nrxx]; + complex* rhofr2 = new complex[nrxx]; #endif const base_device::DEVICE_CPU* ctx; - for(int ik = 0; ik < nks; ++ik) - { - const int npwk = pwktest.npwk[ik]; - std::complex * rhog1 = new std::complex [npwk]; - std::complex * rhog2 = new std::complex [npwk]; + for (int ik = 0; ik < nks; ++ik) + { + const int npwk = pwktest.npwk[ik]; + std::complex* rhog1 = new std::complex[npwk]; + std::complex* rhog2 = new std::complex[npwk]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg1 = new complex [npwk]; - complex * rhofg2 = new complex [npwk]; + complex* rhofg1 = new complex[npwk]; + complex* rhofg2 = new complex[npwk]; #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhog1[ig] = 1.0/(pwktest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwktest.getgdirect(ik,ig).x+1) + 1); - rhog2[ig] = 1.0/(pwktest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwktest.getgdirect(ik,ig).x+1) + 1); - } + for (int ig = 0; ig < npwk; ++ig) + { + rhog1[ig] = 1.0 / (pwktest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwktest.getgdirect (ik, ig).x + 1) + 1); + rhog2[ig] = 1.0 / (pwktest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwktest.getgdirect (ik, ig).x + 1) + 1); + } #ifdef __ENABLE_FLOAT_FFTW - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhofg1[ig] = 1.0/(pwktest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwktest.getgdirect(ik,ig).x+1) + 1); - rhofg2[ig] = 1.0/(pwktest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwktest.getgdirect(ik,ig).x+1) + 1); - } + for (int ig = 0; ig < npwk; ++ig) + { + rhofg1[ig] = 1.0 / (pwktest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwktest.getgdirect (ik, ig).x + 1) + 1); + rhofg2[ig] = 1.0 / (pwktest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwktest.getgdirect (ik, ig).x + 1) + 1); + } #endif - pwktest.recip_to_real(ctx, rhog1, rhor1, ik); - pwktest.recip2real(rhog2, rhor2, ik); - for(int ir = 0 ; ir < nrxx; ++ir) - { - EXPECT_NEAR(std::abs(rhor1[ir]),std::abs(rhor2[ir]),1e-8); - } - pwktest.real_to_recip(ctx, rhor1, rhog1, ik); - pwktest.real2recip(rhor2, rhog2, ik); - for(int ig = 0 ; ig < npwk; ++ig) - { - EXPECT_NEAR(std::abs(rhog1[ig]),std::abs(rhog2[ig]),1e-8); - } + pwktest.recip_to_real (ctx, rhog1, rhor1, ik); + pwktest.recip2real (rhog2, rhor2, ik); + for (int ir = 0; ir < nrxx; ++ir) + { + EXPECT_NEAR (std::abs (rhor1[ir]), std::abs (rhor2[ir]), 1e-8); + } + pwktest.real_to_recip (ctx, rhor1, rhog1, ik); + pwktest.real2recip (rhor2, rhog2, ik); + for (int ig = 0; ig < npwk; ++ig) + { + EXPECT_NEAR (std::abs (rhog1[ig]), std::abs (rhog2[ig]), 1e-8); + } #ifdef __ENABLE_FLOAT_FFTW - pwktest.recip_to_real(ctx, rhofg1, rhofr1, ik); - pwktest.recip2real(rhofg2, rhofr2, ik); - for(int ir = 0 ; ir < nrxx; ++ir) - { - EXPECT_NEAR(std::abs(rhofr1[ir]),std::abs(rhofr2[ir]),1e-6); - } - pwktest.real_to_recip(ctx, rhofr1, rhofg1, ik); - pwktest.real2recip(rhofr2, rhofg2, ik); - for(int ig = 0 ; ig < npwk; ++ig) - { - EXPECT_NEAR(std::abs(rhofg1[ig]),std::abs(rhofg2[ig]),1e-6); - } + pwktest.recip_to_real (ctx, rhofg1, rhofr1, ik); + pwktest.recip2real (rhofg2, rhofr2, ik); + for (int ir = 0; ir < nrxx; ++ir) + { + EXPECT_NEAR (std::abs (rhofr1[ir]), std::abs (rhofr2[ir]), 1e-6); + } + pwktest.real_to_recip (ctx, rhofr1, rhofg1, ik); + pwktest.real2recip (rhofr2, rhofg2, ik); + for (int ig = 0; ig < npwk; ++ig) + { + EXPECT_NEAR (std::abs (rhofg1[ig]), std::abs (rhofg2[ig]), 1e-6); + } #endif - - - delete [] rhog1; - delete [] rhog2; + delete[] rhog1; + delete[] rhog2; #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg1; - delete [] rhofg2; + delete[] rhofg1; + delete[] rhofg2; #endif - } - delete [] rhor1; - delete [] rhor2; + } + delete[] rhor1; + delete[] rhor2; #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofr1; - delete [] rhofr2; + delete[] rhofr1; + delete[] rhofr2; #endif - - double* d_kvec_c = pwktest.get_kvec_c_data(); - double* d_gcar = pwktest.get_gcar_data(); - double* d_gk2 = pwktest.get_gk2_data(); + double* d_kvec_c = pwktest.get_kvec_c_data (); + double* d_gcar = pwktest.get_gcar_data (); + double* d_gk2 = pwktest.get_gk2_data (); #ifdef __ENABLE_FLOAT_FFTW - float* s_kvec_c = pwktest.get_kvec_c_data(); - float* s_gcar = pwktest.get_gcar_data(); - float* s_gk2 = pwktest.get_gk2_data(); + float* s_kvec_c = pwktest.get_kvec_c_data (); + float* s_gcar = pwktest.get_gcar_data (); + float* s_gk2 = pwktest.get_gk2_data (); #endif - delete[] kvec_d; - ModulePW::PW_Basis *p_pw = new ModulePW::PW_Basis(device_flag, precision_flag); - ModulePW::PW_Basis_K *p_pwk = new ModulePW::PW_Basis_K(device_flag, precision_flag); + ModulePW::PW_Basis* p_pw = new ModulePW::PW_Basis (device_flag, precision_flag); + ModulePW::PW_Basis_K* p_pwk = new ModulePW::PW_Basis_K (device_flag, precision_flag); delete p_pw; delete p_pwk; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } -TEST_F(PWTEST, test_no_plane_wave_message_global_empty_k) +TEST_F (PWTEST, test_no_plane_wave_message_global_empty_k) { - ModulePW::PW_Basis_K pwktest(device_flag, precision_flag); - ModuleBase::Matrix3 latvec(0.2, 0, 0, 0, 1, 0, 0, 0, 1); + ModulePW::PW_Basis_K pwktest (device_flag, precision_flag); + ModuleBase::Matrix3 latvec (0.2, 0, 0, 0, 1, 0, 0, 0, 1); #ifdef __MPI - pwktest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + pwktest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); #endif const int nks = 1; ModuleBase::Vector3 kvec_d[nks]; - kvec_d[0].set(0.5, 0.5, 0.5); + kvec_d[0].set (0.5, 0.5, 0.5); - pwktest.initgrids(2, latvec, 4, 4, 4); - pwktest.initparameters(true, 1e-4, nks, kvec_d); - testing::internal::CaptureStdout(); - pwktest.setuptransform(); - std::string output = testing::internal::GetCapturedStdout(); + pwktest.initgrids (2, latvec, 4, 4, 4); + pwktest.initparameters (true, 1e-4, nks, kvec_d); + testing::internal::CaptureStdout (); + pwktest.setuptransform (); + std::string output = testing::internal::GetCapturedStdout (); - EXPECT_THAT(output, - testing::HasSubstr("No plane waves are available for this k-point across the whole pool. Please increase ecutwfc or check KPT settings.")); + EXPECT_THAT (output, + testing::HasSubstr ("No plane waves are available for this k-point across the whole pool. Please " + "increase ecutwfc or check KPT settings.")); } -TEST_F(PWTEST, test_no_plane_wave_message_parallel_local_empty) +TEST_F (PWTEST, test_no_plane_wave_message_parallel_local_empty) { #ifndef __MPI - GTEST_SKIP() << "Requires MPI ranks to simulate local-empty but global-nonempty case."; + GTEST_SKIP () << "Requires MPI ranks to simulate local-empty but global-nonempty case."; #else if (nproc_in_pool <= 1) - { - GTEST_SKIP() << "Requires more than one MPI rank."; - } + { + GTEST_SKIP () << "Requires more than one MPI rank."; + } - ModulePW::PW_Basis_K pwktest(device_flag, precision_flag); - ModuleBase::Matrix3 latvec(0.2, 0, 0, 0, 1, 0, 0, 0, 1); - pwktest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + ModulePW::PW_Basis_K pwktest (device_flag, precision_flag); + ModuleBase::Matrix3 latvec (0.2, 0, 0, 0, 1, 0, 0, 0, 1); + pwktest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); const int nks = 1; ModuleBase::Vector3 kvec_d[nks]; - kvec_d[0].set(0.0, 0.0, 0.0); + kvec_d[0].set (0.0, 0.0, 0.0); - pwktest.initgrids(2, latvec, 4, 4, 4); - pwktest.initparameters(true, 8.0, nks, kvec_d); - testing::internal::CaptureStdout(); - pwktest.setuptransform(); - std::string output = testing::internal::GetCapturedStdout(); + pwktest.initgrids (2, latvec, 4, 4, 4); + pwktest.initparameters (true, 8.0, nks, kvec_d); + testing::internal::CaptureStdout (); + pwktest.setuptransform (); + std::string output = testing::internal::GetCapturedStdout (); const int local_npwk = pwktest.npwk[0]; int global_npwk = local_npwk; - MPI_Allreduce(MPI_IN_PLACE, &global_npwk, 1, MPI_INT, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &global_npwk, 1, MPI_INT, MPI_SUM, POOL_WORLD); const int local_target_rank = (local_npwk == 0 && global_npwk > 0) ? 1 : 0; int any_target_rank = local_target_rank; - MPI_Allreduce(MPI_IN_PLACE, &any_target_rank, 1, MPI_INT, MPI_MAX, POOL_WORLD); - EXPECT_EQ(any_target_rank, 1); + MPI_Allreduce (MPI_IN_PLACE, &any_target_rank, 1, MPI_INT, MPI_MAX, POOL_WORLD); + EXPECT_EQ (any_target_rank, 1); if (local_target_rank == 1) - { - EXPECT_THAT(output, - testing::HasSubstr("Current core has no plane waves! Please reduce the cores.")); - } + { + EXPECT_THAT (output, testing::HasSubstr ("Current core has no plane waves! Please reduce the cores.")); + } #endif } diff --git a/source/source_basis/module_pw/test/test1-1-1.cpp b/source/source_basis/module_pw/test/test1-1-1.cpp index 3eb9d8fd5e4..c3dc5fd2918 100644 --- a/source/source_basis/module_pw/test/test1-1-1.cpp +++ b/source/source_basis/module_pw/test/test1-1-1.cpp @@ -7,14 +7,14 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test1_1_1) +TEST_F (PWTEST, test1_1_1) { - cout<<"dividemthd 1, gamma_only: off, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(iy >= int(pwtest.ny/2) +1) f.y -= pwtest.ny; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.ny; ++iy) + { + for (int ix = 0; ix < pwtest.nx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (iy >= int (pwtest.ny / 2) + 1) + { + f.y -= pwtest.ny; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; - //Add tests for gg_uniq - ModuleBase::Matrix3 latvec2(5.1358423233,0.0,0.0,0.1578526541,5.1334159104,0.0,-2.646847675,-2.5667081359,3.5753437737); + // Add tests for gg_uniq + ModuleBase::Matrix3 + latvec2 (5.1358423233, 0.0, 0.0, 0.1578526541, 5.1334159104, 0.0, -2.646847675, -2.5667081359, 3.5753437737); gamma_only = false; wfcecut = 240; lat0 = 1.88972613; distribution_type = 1; //-------------------------------------------------- - pwtest.initgrids(lat0, latvec2, wfcecut); - pwtest.initparameters(gamma_only, wfcecut, distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); - pwtest.collect_uniqgg(); - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - int * irindex = new int [pwtest.fftnxy]; - pwtest.getfftixy2is(irindex); - for(int is = 0 ; is < pwtest.nst ;++is) - { - EXPECT_EQ(irindex[pwtest.is2fftixy[is]],is); - } + pwtest.initgrids (lat0, latvec2, wfcecut); + pwtest.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); + pwtest.collect_uniqgg (); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + int* irindex = new int[pwtest.fftnxy]; + pwtest.getfftixy2is (irindex); + for (int is = 0; is < pwtest.nst; ++is) + { + EXPECT_EQ (irindex[pwtest.is2fftixy[is]], is); + } delete[] irindex; - - } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test1-1-2.cpp b/source/source_basis/module_pw/test/test1-1-2.cpp index 699d70c8030..8380196fa3a 100644 --- a/source/source_basis/module_pw/test/test1-1-2.cpp +++ b/source/source_basis/module_pw/test/test1-1-2.cpp @@ -7,14 +7,14 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test1_1_2) +TEST_F (PWTEST, test1_1_2) { - cout<<"dividemthd 1, gamma_only: on, xprime: false, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test1-2-2.cpp b/source/source_basis/module_pw/test/test1-2-2.cpp index 45df3c906d1..f9b7acf2418 100644 --- a/source/source_basis/module_pw/test/test1-2-2.cpp +++ b/source/source_basis/module_pw/test/test1-2-2.cpp @@ -12,33 +12,33 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test1_2_2) +TEST_F (PWTEST, test1_2_2) { - cout<<"dividemthd 1, gamma_only: off, check fft between double and complex"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1); - if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vy > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + else if (vy < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.x + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) - { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - else if(pwtest.gdirect[ig].y < 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]-=ModuleBase::IMAG_UNIT / (std::abs(-pwtest.gdirect[ig].x+1) + 1); - rhogr[ig]-=ModuleBase::IMAG_UNIT / (std::abs(-pwtest.gdirect[ig].x+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + else if (pwtest.gdirect[ig].y < 0) + { + rhog[ig] -= ModuleBase::IMAG_UNIT / (std::abs (-pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] -= ModuleBase::IMAG_UNIT / (std::abs (-pwtest.gdirect[ig].x + 1) + 1); + } } - } - double * rhor = new double [nrxx]; + double* rhor = new double[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhofg[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - else if(pwtest.gdirect[ig].y < 0) - { - rhofg[ig]-=ModuleBase::IMAG_UNIT / (std::abs(-pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig]-=ModuleBase::IMAG_UNIT / (std::abs(-pwtest.gdirect[ig].x+1) + 1); + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhofg[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + else if (pwtest.gdirect[ig].y < 0) + { + rhofg[ig] -= ModuleBase::IMAG_UNIT / (std::abs (-pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] -= ModuleBase::IMAG_UNIT / (std::abs (-pwtest.gdirect[ig].x + 1) + 1); + } } - } - float * rhofr = new float [nrxx]; + float* rhofr = new float[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,(double*)rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,(float*)rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, (float*)rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz],1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((float*)rhofgr)[ixy*nplane+iz],1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz], 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((float*)rhofgr)[ixy * nplane + iz], 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform - pwtest.real2recip((double*)rhogr,rhogr);//check in-place transform + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip((float*)rhofgr,rhofgr);//check in-place transform + pwtest.real2recip ((float*)rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test1-2.cpp b/source/source_basis/module_pw/test/test1-2.cpp index b5bd1c29aba..d5d364d13fc 100644 --- a/source/source_basis/module_pw/test/test1-2.cpp +++ b/source/source_basis/module_pw/test/test1-2.cpp @@ -12,18 +12,18 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test1_2) +TEST_F (PWTEST, test1_2) { - cout<<"dividemthd 1, gamma_only: off, xprime: false, check fft between complex and complex"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - complex * rhofr = new complex [nrxx]; + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + complex* rhofr = new complex[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofr[ixy*nplane+iz].imag(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofgr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofgr[ixy*nplane+iz].imag(),1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofr[ixy * nplane + iz].imag (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofgr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofgr[ixy * nplane + iz].imag (), 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip(rhofgr,rhofgr);//check in-place transform + pwtest.real2recip (rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test1-3.cpp b/source/source_basis/module_pw/test/test1-3.cpp index 961260a47d7..8fdcd2b2035 100644 --- a/source/source_basis/module_pw/test/test1-3.cpp +++ b/source/source_basis/module_pw/test/test1-3.cpp @@ -12,33 +12,33 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test1_3) +TEST_F (PWTEST, test1_3) { - cout<<"dividemthd 1, gamma_only: on, xprime: false, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vy > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + else if (vy < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.x + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } } - } - double * rhor = new double [nrxx]; + double* rhor = new double[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhofg[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhofg[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } } - } - float * rhofr = new float [nrxx]; + float* rhofr = new float[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,(double*)rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,(float*)rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, (float*)rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz],1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((float*)rhofgr)[ixy*nplane+iz],1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz], 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((float*)rhofgr)[ixy * nplane + iz], 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform - pwtest.real2recip((double*)rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform + + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip((float*)rhofgr,rhofgr);//check in-place transform + pwtest.real2recip ((float*)rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test1-4.cpp b/source/source_basis/module_pw/test/test1-4.cpp index 61d403c30e8..4eacc068e7b 100644 --- a/source/source_basis/module_pw/test/test1-4.cpp +++ b/source/source_basis/module_pw/test/test1-4.cpp @@ -12,24 +12,24 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test1_4) +TEST_F (PWTEST, test1_4) { - cout<<"dividemthd 1, gamma_only: off, xprime: false, 2 kpoints, check fft"< *kvec_d; + ModuleBase::Vector3* kvec_d; int nks; //-------------------------------------------------- lat0 = 2; - ModuleBase::Matrix3 la(1, 1, 0, 0, 2, 0, 0, 0, 2); + ModuleBase::Matrix3 la (1, 1, 0, 0, 2, 0, 0, 0, 2); nks = 2; kvec_d = new ModuleBase::Vector3[nks]; - kvec_d[0].set(0,0,0.5); - kvec_d[1].set(0.5,0.5,0.5); + kvec_d[0].set (0, 0, 0.5); + kvec_d[1].set (0.5, 0.5, 0.5); latvec = la; wfcecut = 10; gamma_only = false; @@ -37,21 +37,20 @@ TEST_F(PWTEST,test1_4) bool xprime = false; //-------------------------------------------------- #ifdef __MPI - pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); #endif - //Useless, only to test reinit function. - pwtest.initgrids(2, latvec, 4,4,4); - pwtest.initparameters(true, 200, nks, kvec_d, 2,xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); - EXPECT_EQ(pwtest.nstot,5); - - - //init //real parameter - pwtest.initgrids(lat0,latvec,4*wfcecut); - pwtest.initparameters(gamma_only,wfcecut,nks,kvec_d,distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); + // Useless, only to test reinit function. + pwtest.initgrids (2, latvec, 4, 4, 4); + pwtest.initparameters (true, 200, nks, kvec_d, 2, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); + EXPECT_EQ (pwtest.nstot, 5); + + // init //real parameter + pwtest.initgrids (lat0, latvec, 4 * wfcecut); + pwtest.initparameters (gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); const int nrxx = pwtest.nrxx; const int nmaxgr = pwtest.nmaxgr; @@ -62,162 +61,176 @@ TEST_F(PWTEST,test1_4) double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0; double ggecut = wfcecut / tpiba2; - ModuleBase::Matrix3 GT,G,GGT; - GT = latvec.Inverse(); - G = GT.Transpose(); - GGT = G * GT; - std::complex *tmp = new std::complex [nx*ny*nz]; - std::complex * rhor = new std::complex [nrxx]; - std::complex * rhogr = new std::complex [nmaxgr]; + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + std::complex* rhor = new std::complex[nrxx]; + std::complex* rhogr = new std::complex[nmaxgr]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofr = new complex [nrxx]; - complex * rhofgr = new complex [nmaxgr]; + complex* rhofr = new complex[nrxx]; + complex* rhofgr = new complex[nmaxgr]; #endif - for(int ik = 0; ik < nks; ++ik) - { - int npwk = pwtest.npwk[ik]; - if(rank_in_pool == 0) + for (int ik = 0; ik < nks; ++ik) { - ModuleBase::Vector3 kk = kvec_d[ik]; - for(int ix = 0 ; ix < nx ; ++ix) - { - for(int iy = 0 ; iy < ny ; ++iy) + int npwk = pwtest.npwk[ik]; + if (rank_in_pool == 0) { - for(int iz = 0 ; iz < nz ; ++iz) - { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - // double modulus = v * (GGT * v); - double modulusgk = (v+kk) * (GGT * (v+kk)); - if (modulusgk <= ggecut) + ModuleBase::Vector3 kk = kvec_d[ik]; + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulusgk+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + // double modulus = v * (GGT * v); + double modulusgk = (v + kk) * (GGT * (v + kk)); + if (modulusgk <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulusgk + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp = fftw_plan_dft_3d (nx, + ny, + nz, + (fftw_complex*)tmp, + (fftw_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); } - } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - std::complex * rhog = new std::complex [npwk]; - std::complex * rhogout = new std::complex [npwk]; + std::complex* rhog = new std::complex[npwk]; + std::complex* rhogout = new std::complex[npwk]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npwk]; - complex * rhofgout = new complex [npwk]; + complex* rhofg = new complex[npwk]; + complex* rhofgout = new complex[npwk]; #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhog[ig] = 1.0/(pwtest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.getgdirect(ik,ig).x+1) + 1); - rhogr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.getgdirect(ik,ig).x+1) + 1); - } + for (int ig = 0; ig < npwk; ++ig) + { + rhog[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwtest.getgdirect (ik, ig).x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwtest.getgdirect (ik, ig).x + 1) + 1); + } #ifdef __ENABLE_FLOAT_FFTW - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.getgdirect(ik,ig).x+1) + 1); - rhofgr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.getgdirect(ik,ig).x+1) + 1); - } + for (int ig = 0; ig < npwk; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwtest.getgdirect (ik, ig).x + 1) + 1); + rhofgr[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwtest.getgdirect (ik, ig).x + 1) + 1); + } #endif - pwtest.recip2real(rhog,rhor,ik); //check out-of-place transform + pwtest.recip2real (rhog, rhor, ik); // check out-of-place transform - pwtest.recip2real(rhogr,rhogr,ik); //check in-place transform + pwtest.recip2real (rhogr, rhogr, ik); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr,ik); //check out-of-place transform + pwtest.recip2real (rhofg, rhofr, ik); // check out-of-place transform - pwtest.recip2real(rhofgr,rhofgr,ik); //check in-place transform + pwtest.recip2real (rhofgr, rhofgr, ik); // check in-place transform #endif - int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) - { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + int startiz = pwtest.startz_current; + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofr[ixy*nplane+iz].imag(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofgr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofgr[ixy*nplane+iz].imag(),1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofr[ixy * nplane + iz].imag (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofgr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofgr[ixy * nplane + iz].imag (), 1e-4); #endif - } - } + } + } - pwtest.real2recip(rhor,rhogout,ik); + pwtest.real2recip (rhor, rhogout, ik); - pwtest.real2recip(rhogr,rhogr,ik); + pwtest.real2recip (rhogr, rhogr, ik); #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout,ik); + pwtest.real2recip (rhofr, rhofgout, ik); - pwtest.real2recip(rhofgr,rhofgr,ik); + pwtest.real2recip (rhofgr, rhofgr, ik); #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhog[ig].real(),rhogr[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogr[ig].imag(),1e-6); + for (int ig = 0; ig < npwk; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhog[ig].real (), rhogr[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogr[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofg[ig].real(),rhofgr[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgr[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgr[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgr[ig].imag (), 1e-4); #endif - } - + } - delete [] rhog; - delete [] rhogout; + delete[] rhog; + delete[] rhogout; #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; + delete[] rhofg; + delete[] rhofgout; #endif - //check igl2ig - for(int igl = 0; igl < npwk ; ++igl) - { - const int isz = pwtest.getigl2isz(ik,igl); - for(int ig = 0 ; ig < pwtest.npw; ++ig) - { - if(isz == pwtest.ig2isz[ig]){ - EXPECT_EQ(ig,pwtest.getigl2ig(ik,igl));} - } + // check igl2ig + for (int igl = 0; igl < npwk; ++igl) + { + const int isz = pwtest.getigl2isz (ik, igl); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + if (isz == pwtest.ig2isz[ig]) + { + EXPECT_EQ (ig, pwtest.getigl2ig (ik, igl)); + } + } + } } - - } - delete []tmp; - delete [] rhor; + delete[] tmp; + delete[] rhor; delete[] kvec_d; delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW delete[] rhofr; delete[] rhofgr; - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test1-5.cpp b/source/source_basis/module_pw/test/test1-5.cpp index 7f07659d2d1..3cec410a318 100644 --- a/source/source_basis/module_pw/test/test1-5.cpp +++ b/source/source_basis/module_pw/test/test1-5.cpp @@ -12,23 +12,23 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test1_5) +TEST_F (PWTEST, test1_5) { - cout<<"dividemthd 1, gamma_only: on, xprime: false, gamma kpoint, check fft"< *kvec_d; + ModuleBase::Vector3* kvec_d; int nks; //-------------------------------------------------- lat0 = 2.7; - ModuleBase::Matrix3 la(1, 0.3, 0, 0, 2, 0, 0, 0, 2); + ModuleBase::Matrix3 la (1, 0.3, 0, 0, 2, 0, 0, 0, 2); nks = 1; kvec_d = new ModuleBase::Vector3[nks]; - kvec_d[0].set(0,0,0); + kvec_d[0].set (0, 0, 0); latvec = la; wfcecut = 10; gamma_only = true; @@ -36,13 +36,13 @@ TEST_F(PWTEST,test1_5) bool xprime = false; //-------------------------------------------------- #ifdef __MPI - pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); #endif - //init //real parameter - pwtest.initgrids(lat0,latvec,4*wfcecut); - pwtest.initparameters(gamma_only,wfcecut,nks,kvec_d,distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); + // init //real parameter + pwtest.initgrids (lat0, latvec, 4 * wfcecut); + pwtest.initparameters (gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); const int nrxx = pwtest.nrxx; const int nmaxgr = pwtest.nmaxgr; @@ -53,172 +53,192 @@ TEST_F(PWTEST,test1_5) double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0; double ggecut = wfcecut / tpiba2; - ModuleBase::Matrix3 GT,G,GGT; - GT = latvec.Inverse(); - G = GT.Transpose(); - GGT = G * GT; - std::complex *tmp = new std::complex [nx*ny*nz]; - std::complex * rhogr = new std::complex [nmaxgr]; - double * rhor = new double [nrxx]; + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + std::complex* rhogr = new std::complex[nmaxgr]; + double* rhor = new double[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - float * rhofr = new float [nrxx]; - complex * rhofgr = new complex [nmaxgr]; + float* rhofr = new float[nrxx]; + complex* rhofgr = new complex[nmaxgr]; #endif - for(int ik = 0; ik < nks; ++ik) - { - int npwk = pwtest.npwk[ik]; - if(rank_in_pool == 0) + for (int ik = 0; ik < nks; ++ik) { - ModuleBase::Vector3 kk = kvec_d[ik]; - for(int ix = 0 ; ix < nx ; ++ix) - { - for(int iy = 0 ; iy < ny ; ++iy) + int npwk = pwtest.npwk[ik]; + if (rank_in_pool == 0) { - for(int iz = 0 ; iz < nz ; ++iz) - { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - // double modulus = v * (GGT * v); - double modulusgk = (v+kk) * (GGT * (v+kk)); - if (modulusgk <= ggecut) + ModuleBase::Vector3 kk = kvec_d[ik]; + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulusgk+1); - if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1); + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + // double modulus = v * (GGT * v); + double modulusgk = (v + kk) * (GGT * (v + kk)); + if (modulusgk <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulusgk + 1); + if (vy > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + else if (vy < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.x + 1) + 1); + } + } + } + } + } + fftw_plan pp = fftw_plan_dft_3d (nx, + ny, + nz, + (fftw_complex*)tmp, + (fftw_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); } - } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - std::complex * rhog = new std::complex [npwk]; - std::complex * rhogout = new std::complex [npwk]; + std::complex* rhog = new std::complex[npwk]; + std::complex* rhogout = new std::complex[npwk]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npwk]; - complex * rhofgout = new complex [npwk]; + complex* rhofg = new complex[npwk]; + complex* rhofgout = new complex[npwk]; #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhog[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); - rhogr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); - ModuleBase::Vector3 f = pwtest.getgdirect(ik,ig); - if(f.y > 0) - { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1); - } - } + for (int ig = 0; ig < npwk; ++ig) + { + rhog[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + rhogr[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + ModuleBase::Vector3 f = pwtest.getgdirect (ik, ig); + if (f.y > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1) + 1); + } + } #ifdef __ENABLE_FLOAT_FFTW - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); - rhofgr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); - ModuleBase::Vector3 f = pwtest.getgdirect(ik,ig); - if(f.y > 0) - { - rhofg[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1); - rhofgr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1); - } - } + for (int ig = 0; ig < npwk; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + rhofgr[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + ModuleBase::Vector3 f = pwtest.getgdirect (ik, ig); + if (f.y > 0) + { + rhofg[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1) + 1); + rhofgr[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1) + 1); + } + } #endif - pwtest.recip2real(rhog,rhor,ik); //check out-of-place transform + pwtest.recip2real (rhog, rhor, ik); // check out-of-place transform - pwtest.recip2real(rhogr,(double*)rhogr,ik); //check in-place transform + pwtest.recip2real (rhogr, (double*)rhogr, ik); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr,ik); //check out-of-place transform + pwtest.recip2real (rhofg, rhofr, ik); // check out-of-place transform - pwtest.recip2real(rhofgr,(float*)rhofgr,ik); //check in-place transform + pwtest.recip2real (rhofgr, (float*)rhofgr, ik); // check in-place transform #endif - int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) - { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + int startiz = pwtest.startz_current; + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), + ((double*)rhogr)[ixy * nplane + iz], + 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz],1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((float*)rhofgr)[ixy*nplane+iz],1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz], 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), + ((float*)rhofgr)[ixy * nplane + iz], + 1e-4); #endif - } - } + } + } - pwtest.real2recip(rhor,rhogout,ik); + pwtest.real2recip (rhor, rhogout, ik); - pwtest.real2recip((double*)rhogr,rhogr,ik); + pwtest.real2recip ((double*)rhogr, rhogr, ik); #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout,ik); + pwtest.real2recip (rhofr, rhofgout, ik); - pwtest.real2recip((float*)rhofgr,rhofgr,ik); + pwtest.real2recip ((float*)rhofgr, rhofgr, ik); #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhog[ig].real(),rhogr[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogr[ig].imag(),1e-6); + for (int ig = 0; ig < npwk; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhog[ig].real (), rhogr[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogr[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-6); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-6); - EXPECT_NEAR(rhofg[ig].real(),rhofgr[ig].real(),1e-6); - EXPECT_NEAR(rhofg[ig].imag(),rhofgr[ig].imag(),1e-6); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-6); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-6); + EXPECT_NEAR (rhofg[ig].real (), rhofgr[ig].real (), 1e-6); + EXPECT_NEAR (rhofg[ig].imag (), rhofgr[ig].imag (), 1e-6); #endif - } - + } - delete [] rhog; - delete [] rhogout; + delete[] rhog; + delete[] rhogout; #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; + delete[] rhofg; + delete[] rhofgout; #endif - //check igl2ig - for(int igl = 0; igl < npwk ; ++igl) - { - const int isz = pwtest.getigl2isz(ik,igl); - for(int ig = 0 ; ig < pwtest.npw; ++ig) - { - if(isz == pwtest.ig2isz[ig]){ - EXPECT_EQ(ig,pwtest.getigl2ig(ik,igl));} - } + // check igl2ig + for (int igl = 0; igl < npwk; ++igl) + { + const int isz = pwtest.getigl2isz (ik, igl); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + if (isz == pwtest.ig2isz[ig]) + { + EXPECT_EQ (ig, pwtest.getigl2ig (ik, igl)); + } + } + } } - - } - delete []tmp; - delete [] rhor; + delete[] tmp; + delete[] rhor; delete[] kvec_d; delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW delete[] rhofr; delete[] rhofgr; - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test2-1-1.cpp b/source/source_basis/module_pw/test/test2-1-1.cpp index 57a70c6af38..d4b8af05c93 100644 --- a/source/source_basis/module_pw/test/test2-1-1.cpp +++ b/source/source_basis/module_pw/test/test2-1-1.cpp @@ -7,14 +7,14 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test2_1_1) +TEST_F (PWTEST, test2_1_1) { - cout<<"dividemthd 2, gamma_only: off, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(iy >= int(pwtest.ny/2) +1) f.y -= pwtest.ny; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.ny; ++iy) + { + for (int ix = 0; ix < pwtest.nx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (iy >= int (pwtest.ny / 2) + 1) + { + f.y -= pwtest.ny; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test2-1-2.cpp b/source/source_basis/module_pw/test/test2-1-2.cpp index 38247478001..6f7eba0fe43 100644 --- a/source/source_basis/module_pw/test/test2-1-2.cpp +++ b/source/source_basis/module_pw/test/test2-1-2.cpp @@ -7,14 +7,14 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test2_1_2) +TEST_F (PWTEST, test2_1_2) { - cout<<"dividemthd 2, gamma_only: on, xprime: false, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test2-2.cpp b/source/source_basis/module_pw/test/test2-2.cpp index 63e074ea9a3..66db5a61850 100644 --- a/source/source_basis/module_pw/test/test2-2.cpp +++ b/source/source_basis/module_pw/test/test2-2.cpp @@ -12,33 +12,33 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test2_2) +TEST_F (PWTEST, test2_2) { - cout<<"dividemthd 2, gamma_only: off, xprime: false, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + // output + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - //output - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; - pwtest.recip2real(rhog,rhor); + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; + pwtest.recip2real (rhog, rhor); int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + } } - } - pwtest.real2recip(rhor,rhogout); - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - } - + pwtest.real2recip (rhor, rhogout); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + } - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete []tmp; - fftw_cleanup(); + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test2-3.cpp b/source/source_basis/module_pw/test/test2-3.cpp index 7286f06ca6b..4f5509ab6d2 100644 --- a/source/source_basis/module_pw/test/test2-3.cpp +++ b/source/source_basis/module_pw/test/test2-3.cpp @@ -12,33 +12,33 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test2_3) +TEST_F (PWTEST, test2_3) { - cout<<"dividemthd 2, gamma_only: on, xprime: false, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vy > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + else if (vy < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.x + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - double * rhor = new double [nrxx]; - pwtest.recip2real(rhog,rhor); + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + } + double* rhor = new double[nrxx]; + pwtest.recip2real (rhog, rhor); int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + } + } + + pwtest.real2recip (rhor, rhogout); + for (int ig = 0; ig < npw; ++ig) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); } - } - - - pwtest.real2recip(rhor,rhogout); - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - fftw_cleanup(); + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test3-1.cpp b/source/source_basis/module_pw/test/test3-1.cpp index 33ac4f46b08..395c1bf5855 100644 --- a/source/source_basis/module_pw/test/test3-1.cpp +++ b/source/source_basis/module_pw/test/test3-1.cpp @@ -7,14 +7,14 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test3_1) +TEST_F (PWTEST, test3_1) { - cout<<"dividemthd 1, gamma_only: on, xprime: true, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } + } + } + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + if (ig < pwtest.ng_xeq0) + { + EXPECT_EQ (pwtest.gdirect[ig].x, 0); + } + else { - ModuleBase::Vector3 f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + EXPECT_NE (pwtest.gdirect[ig].x, 0); } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; - for(int ig = 0 ; ig < pwtest.npw ; ++ig) - { - if(ig < pwtest.ng_xeq0) EXPECT_EQ(pwtest.gdirect[ig].x, 0); - else EXPECT_NE(pwtest.gdirect[ig].x, 0); - } } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test3-2.cpp b/source/source_basis/module_pw/test/test3-2.cpp index 2164cefe536..c9ec73fbe2a 100644 --- a/source/source_basis/module_pw/test/test3-2.cpp +++ b/source/source_basis/module_pw/test/test3-2.cpp @@ -12,32 +12,37 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test3_2) +TEST_F (PWTEST, test3_2) { - cout<<"dividemthd 1, gamma_only: off, xprime: true, check fft between complex and complex, reset ggecut to latecut"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); + } } - } - - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true, 1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform + + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test3-3-2.cpp b/source/source_basis/module_pw/test/test3-3-2.cpp index 8c0afa7de91..67b0183050b 100644 --- a/source/source_basis/module_pw/test/test3-3-2.cpp +++ b/source/source_basis/module_pw/test/test3-3-2.cpp @@ -12,33 +12,33 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test3_3_2) +TEST_F (PWTEST, test3_3_2) { - cout<<"dividemthd 1, gamma_only: off, xprime: true, check fft between r & c"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vx > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.y+1) + 1); - else if(vx < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.y+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vx > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.y + 1) + 1); + } + else if (vx < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.y + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].x > 0) - { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); - } - else if(pwtest.gdirect[ig].x < 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]-=ModuleBase::IMAG_UNIT / (std::abs(-pwtest.gdirect[ig].y+1) + 1); - rhogr[ig]-=ModuleBase::IMAG_UNIT / (std::abs(-pwtest.gdirect[ig].y+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].x > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + } + else if (pwtest.gdirect[ig].x < 0) + { + rhog[ig] -= ModuleBase::IMAG_UNIT / (std::abs (-pwtest.gdirect[ig].y + 1) + 1); + rhogr[ig] -= ModuleBase::IMAG_UNIT / (std::abs (-pwtest.gdirect[ig].y + 1) + 1); + } } - } - double * rhor = new double [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor,true,1);//check out-of-place transform // test add fft - - pwtest.recip2real(rhogr, (double *)rhogr);//check in-place transform + double* rhor = new double[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + pwtest.recip2real (rhog, rhor, true, 1); // check out-of-place transform // test add fft + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); + } } - } - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true,1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform + + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform - pwtest.real2recip((double *)rhogr,rhogr);//check in-place transform + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test3-3.cpp b/source/source_basis/module_pw/test/test3-3.cpp index 6b4c00b7955..b640e8da38c 100644 --- a/source/source_basis/module_pw/test/test3-3.cpp +++ b/source/source_basis/module_pw/test/test3-3.cpp @@ -12,33 +12,33 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test3_3) +TEST_F (PWTEST, test3_3) { - cout<<"dividemthd 1, gamma_only: on, xprime: true, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vx > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.y+1) + 1); - else if(vx < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.y+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vx > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.y + 1) + 1); + } + else if (vx < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.y + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].x > 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].x > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + } } - } - double * rhor = new double [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor,true,1);//check out-of-place transform // test add fft - - pwtest.recip2real(rhogr,(double*)rhogr);//check in-place transform + double* rhor = new double[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + pwtest.recip2real (rhog, rhor, true, 1); // check out-of-place transform // test add fft + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); + } } - } - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true,1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform + + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform - pwtest.real2recip((double*)rhogr,rhogr);//check in-place transform + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test4-1.cpp b/source/source_basis/module_pw/test/test4-1.cpp index 0e6e17c82da..2e790cf5081 100644 --- a/source/source_basis/module_pw/test/test4-1.cpp +++ b/source/source_basis/module_pw/test/test4-1.cpp @@ -7,14 +7,14 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test4_1) +TEST_F (PWTEST, test4_1) { - cout<<"dividemthd 2, gamma_only: on, xprime: true, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test4-2.cpp b/source/source_basis/module_pw/test/test4-2.cpp index 8be35d9afc7..472ee2ac43f 100644 --- a/source/source_basis/module_pw/test/test4-2.cpp +++ b/source/source_basis/module_pw/test/test4-2.cpp @@ -12,33 +12,38 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test4_2) +TEST_F (PWTEST, test4_2) { - cout<<"dividemthd 2, gamma_only: off, xprime: true, check fft between complex and complex, reset ggecut to latecut"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - complex * rhofr = new complex [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhofr, nrxx); + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + complex* rhofr = new complex[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhofr, nrxx); #endif - - pwtest.recip2real(rhog,rhor,true,1);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor, true, 1); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr,true, float(1));//check out-of-place transform + pwtest.recip2real (rhofg, rhofr, true, float (1)); // check out-of-place transform - pwtest.recip2real(rhofgr,rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofr[ixy*nplane+iz].imag(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofgr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofgr[ixy*nplane+iz].imag(),1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofr[ixy * nplane + iz].imag (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofgr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofgr[ixy * nplane + iz].imag (), 1e-4); #endif + } } - } - - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true, 1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - ModuleBase::GlobalFunc::ZEROS(rhofgout, npw); - pwtest.real2recip(rhofr,rhofgout, true, float(1));//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhofgout, npw); + pwtest.real2recip (rhofr, rhofgout, true, float (1)); // check out-of-place transform - pwtest.real2recip(rhofgr,rhofgr);//check in-place transform + pwtest.real2recip (rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test4-3.cpp b/source/source_basis/module_pw/test/test4-3.cpp index a2f60a0b9c1..ef01e2ee871 100644 --- a/source/source_basis/module_pw/test/test4-3.cpp +++ b/source/source_basis/module_pw/test/test4-3.cpp @@ -12,33 +12,33 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test4_3) +TEST_F (PWTEST, test4_3) { - cout<<"dividemthd 2, gamma_only: on, xprime: true, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vx > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.y+1) + 1); - else if(vx < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.y+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vx > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.y + 1) + 1); + } + else if (vx < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.y + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].x > 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].x > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + } } - } - double * rhor = new double [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); + double* rhor = new double[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].x > 0) + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhofg[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); - rhofgr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].x > 0) + { + rhofg[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + rhofgr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + } } - } - float * rhofr = new float [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhofr, nrxx); + float* rhofr = new float[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhofr, nrxx); #endif - pwtest.recip2real(rhog,rhor,true,1);//check out-of-place transform // test add fft + pwtest.recip2real (rhog, rhor, true, 1); // check out-of-place transform // test add fft - pwtest.recip2real(rhogr,(double*)rhogr);//check in-place transform + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr,true,float(1));//check out-of-place transform + pwtest.recip2real (rhofg, rhofr, true, float (1)); // check out-of-place transform - pwtest.recip2real(rhofgr,(float*)rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, (float*)rhofgr); // check in-place transform #endif - - int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz],1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((float*)rhofgr)[ixy*nplane+iz],1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz], 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((float*)rhofgr)[ixy * nplane + iz], 1e-4); #endif + } } - } - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true,1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform - pwtest.real2recip((double*)rhogr,rhogr);//check in-place transform + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - ModuleBase::GlobalFunc::ZEROS(rhofgout, npw); - pwtest.real2recip(rhofr,rhofgout,true,float(1));//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhofgout, npw); + pwtest.real2recip (rhofr, rhofgout, true, float (1)); // check out-of-place transform - pwtest.real2recip((float*)rhofgr,rhofgr);//check in-place transform + pwtest.real2recip ((float*)rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test4-4.cpp b/source/source_basis/module_pw/test/test4-4.cpp index dd966744401..83d7f42e746 100644 --- a/source/source_basis/module_pw/test/test4-4.cpp +++ b/source/source_basis/module_pw/test/test4-4.cpp @@ -12,24 +12,24 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test4_4) +TEST_F (PWTEST, test4_4) { - cout<<"dividemthd 2, gamma_only: off, xprime: true, 2 kpoints, check fft"< *kvec_d; + ModuleBase::Vector3* kvec_d; int nks; //-------------------------------------------------- lat0 = 4; - ModuleBase::Matrix3 la(1, 5, 0, 0, 1, 0, 0, 0, 1); + ModuleBase::Matrix3 la (1, 5, 0, 0, 1, 0, 0, 0, 1); nks = 2; kvec_d = new ModuleBase::Vector3[nks]; - kvec_d[0].set(0,0,0); - kvec_d[1].set(0,0.5,0.5); + kvec_d[0].set (0, 0, 0); + kvec_d[1].set (0, 0.5, 0.5); latvec = la; wfcecut = 40; gamma_only = false; @@ -37,14 +37,14 @@ TEST_F(PWTEST,test4_4) bool xprime = true; //-------------------------------------------------- - //init //real parameter + // init //real parameter #ifdef __MPI - pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); #endif - pwtest.initgrids(lat0,latvec,4*wfcecut); - pwtest.initparameters(gamma_only,wfcecut,nks,kvec_d,distribution_type,xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); + pwtest.initgrids (lat0, latvec, 4 * wfcecut); + pwtest.initparameters (gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); const int nrxx = pwtest.nrxx; const int nmaxgr = pwtest.nmaxgr; @@ -55,173 +55,189 @@ TEST_F(PWTEST,test4_4) double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0; double ggecut = wfcecut / tpiba2; - ModuleBase::Matrix3 GT,G,GGT; - GT = latvec.Inverse(); - G = GT.Transpose(); - GGT = G * GT; - std::complex *tmp = new std::complex [nx*ny*nz]; - std::complex * rhor = new std::complex [nrxx]; - std::complex * rhogr = new std::complex [nmaxgr]; + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + std::complex* rhor = new std::complex[nrxx]; + std::complex* rhogr = new std::complex[nmaxgr]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofr = new complex [nrxx]; - complex * rhofgr = new complex [nmaxgr]; + complex* rhofr = new complex[nrxx]; + complex* rhofgr = new complex[nmaxgr]; #endif - for(int ik = 0; ik < nks; ++ik) - { - int npwk = pwtest.npwk[ik]; - if(rank_in_pool == 0) + for (int ik = 0; ik < nks; ++ik) { - ModuleBase::Vector3 kk = kvec_d[ik]; - for(int ix = 0 ; ix < nx ; ++ix) - { - for(int iy = 0 ; iy < ny ; ++iy) + int npwk = pwtest.npwk[ik]; + if (rank_in_pool == 0) { - for(int iz = 0 ; iz < nz ; ++iz) - { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - // double modulus = v * (GGT * v); - double modulusgk = (v+kk) * (GGT * (v+kk)); - if (modulusgk <= ggecut) + ModuleBase::Vector3 kk = kvec_d[ik]; + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulusgk+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + // double modulus = v * (GGT * v); + double modulusgk = (v + kk) * (GGT * (v + kk)); + if (modulusgk <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulusgk + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp = fftw_plan_dft_3d (nx, + ny, + nz, + (fftw_complex*)tmp, + (fftw_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); } - } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - std::complex * rhog = new std::complex [npwk]; - std::complex * rhogout = new std::complex [npwk]; + std::complex* rhog = new std::complex[npwk]; + std::complex* rhogout = new std::complex[npwk]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npwk]; - complex * rhofgout = new complex [npwk]; + complex* rhofg = new complex[npwk]; + complex* rhofgout = new complex[npwk]; #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhog[ig] = 1.0/(pwtest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.getgdirect(ik,ig).x+1) + 1); - rhogr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.getgdirect(ik,ig).x+1) + 1); - } + for (int ig = 0; ig < npwk; ++ig) + { + rhog[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwtest.getgdirect (ik, ig).x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwtest.getgdirect (ik, ig).x + 1) + 1); + } #ifdef __ENABLE_FLOAT_FFTW - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.getgdirect(ik,ig).x+1) + 1); - rhofgr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.getgdirect(ik,ig).x+1) + 1); - } + for (int ig = 0; ig < npwk; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwtest.getgdirect (ik, ig).x + 1) + 1); + rhofgr[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1) + + ModuleBase::IMAG_UNIT / (std::abs (pwtest.getgdirect (ik, ig).x + 1) + 1); + } #endif - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - pwtest.recip2real(rhog,rhor,ik, true, 1); //check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + pwtest.recip2real (rhog, rhor, ik, true, 1); // check out-of-place transform - pwtest.recip2real(rhogr,rhogr,ik); //check in-place transform + pwtest.recip2real (rhogr, rhogr, ik); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - ModuleBase::GlobalFunc::ZEROS(rhofr, nrxx); - pwtest.recip2real(rhofg,rhofr,ik, true, float(1)); //check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhofr, nrxx); + pwtest.recip2real (rhofg, rhofr, ik, true, float (1)); // check out-of-place transform - pwtest.recip2real(rhofgr,rhofgr,ik); //check in-place transform + pwtest.recip2real (rhofgr, rhofgr, ik); // check in-place transform #endif - int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) - { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + int startiz = pwtest.startz_current; + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofr[ixy*nplane+iz].imag(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofgr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofgr[ixy*nplane+iz].imag(),1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofr[ixy * nplane + iz].imag (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofgr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofgr[ixy * nplane + iz].imag (), 1e-4); #endif - } - } + } + } - ModuleBase::GlobalFunc::ZEROS(rhogout, npwk); - pwtest.real2recip(rhor,rhogout,ik, true, 1); + ModuleBase::GlobalFunc::ZEROS (rhogout, npwk); + pwtest.real2recip (rhor, rhogout, ik, true, 1); - pwtest.real2recip(rhogr,rhogr,ik); + pwtest.real2recip (rhogr, rhogr, ik); #ifdef __ENABLE_FLOAT_FFTW - ModuleBase::GlobalFunc::ZEROS(rhofgout, npwk); - pwtest.real2recip(rhofr,rhofgout,ik, true, float(1)); + ModuleBase::GlobalFunc::ZEROS (rhofgout, npwk); + pwtest.real2recip (rhofr, rhofgout, ik, true, float (1)); - pwtest.real2recip(rhofgr,rhofgr,ik); + pwtest.real2recip (rhofgr, rhofgr, ik); #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhog[ig].real(),rhogr[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogr[ig].imag(),1e-6); + for (int ig = 0; ig < npwk; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhog[ig].real (), rhogr[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogr[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofg[ig].real(),rhofgr[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgr[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgr[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgr[ig].imag (), 1e-4); #endif - } - + } - delete [] rhog; - delete [] rhogout; + delete[] rhog; + delete[] rhogout; #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; + delete[] rhofg; + delete[] rhofgout; #endif - //check getgcar(ik,ig) - for(int igl = 0 ; igl < npwk; ++igl) - { - EXPECT_NEAR(pwtest.getgcar(ik,igl).norm2(), (pwtest.getgdirect(ik,igl) * G).norm2(), 1e-8); - EXPECT_NEAR(pwtest.getgpluskcar(ik,igl).norm2(), ((pwtest.getgdirect(ik,igl) + kvec_d[ik]) * G).norm2(), 1e-8); - } + // check getgcar(ik,ig) + for (int igl = 0; igl < npwk; ++igl) + { + EXPECT_NEAR (pwtest.getgcar (ik, igl).norm2 (), (pwtest.getgdirect (ik, igl) * G).norm2 (), 1e-8); + EXPECT_NEAR (pwtest.getgpluskcar (ik, igl).norm2 (), + ((pwtest.getgdirect (ik, igl) + kvec_d[ik]) * G).norm2 (), + 1e-8); + } - //check igl2ig - for(int igl = 0; igl < npwk ; ++igl) - { - const int isz = pwtest.getigl2isz(ik,igl); - for(int ig = 0 ; ig < pwtest.npw; ++ig) - { - if(isz == pwtest.ig2isz[ig]){ - EXPECT_EQ(ig,pwtest.getigl2ig(ik,igl));} - } + // check igl2ig + for (int igl = 0; igl < npwk; ++igl) + { + const int isz = pwtest.getigl2isz (ik, igl); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + if (isz == pwtest.ig2isz[ig]) + { + EXPECT_EQ (ig, pwtest.getigl2ig (ik, igl)); + } + } + } } - - } - delete []tmp; - delete [] rhor; + delete[] tmp; + delete[] rhor; delete[] kvec_d; delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW delete[] rhofr; delete[] rhofgr; - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test4-5.cpp b/source/source_basis/module_pw/test/test4-5.cpp index 2027e7b3f97..1af17b85df1 100644 --- a/source/source_basis/module_pw/test/test4-5.cpp +++ b/source/source_basis/module_pw/test/test4-5.cpp @@ -12,23 +12,23 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test4_5) +TEST_F (PWTEST, test4_5) { - cout<<"dividemthd 2, gamma_only: on, xprime: true, gamma kpoint, check fft"< *kvec_d; + ModuleBase::Vector3* kvec_d; int nks; //-------------------------------------------------- lat0 = 5; - ModuleBase::Matrix3 la(1, 0.3, 0, 0, 2, 0, 0, 0, 6); + ModuleBase::Matrix3 la (1, 0.3, 0, 0, 2, 0, 0, 0, 6); nks = 1; kvec_d = new ModuleBase::Vector3[nks]; - kvec_d[0].set(0,0,0); + kvec_d[0].set (0, 0, 0); latvec = la; wfcecut = 10; gamma_only = true; @@ -36,14 +36,14 @@ TEST_F(PWTEST,test4_5) bool xprime = true; //-------------------------------------------------- - //init //real parameter + // init //real parameter #ifdef __MPI - pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); #endif - pwtest.initgrids(lat0,latvec,4*wfcecut); - pwtest.initparameters(gamma_only,wfcecut,nks,kvec_d,distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); + pwtest.initgrids (lat0, latvec, 4 * wfcecut); + pwtest.initparameters (gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); const int nrxx = pwtest.nrxx; const int nmaxgr = pwtest.nmaxgr; @@ -54,176 +54,196 @@ TEST_F(PWTEST,test4_5) double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0; double ggecut = wfcecut / tpiba2; - ModuleBase::Matrix3 GT,G,GGT; - GT = latvec.Inverse(); - G = GT.Transpose(); - GGT = G * GT; - std::complex *tmp = new std::complex [nx*ny*nz]; - std::complex * rhogr = new std::complex [nmaxgr]; - double * rhor = new double [nrxx]; + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + std::complex* rhogr = new std::complex[nmaxgr]; + double* rhor = new double[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - float * rhofr = new float [nrxx]; - complex * rhofgr = new complex [nmaxgr]; + float* rhofr = new float[nrxx]; + complex* rhofgr = new complex[nmaxgr]; #endif - for(int ik = 0; ik < nks; ++ik) - { - int npwk = pwtest.npwk[ik]; - if(rank_in_pool == 0) + for (int ik = 0; ik < nks; ++ik) { - ModuleBase::Vector3 kk = kvec_d[ik]; - for(int ix = 0 ; ix < nx ; ++ix) - { - for(int iy = 0 ; iy < ny ; ++iy) + int npwk = pwtest.npwk[ik]; + if (rank_in_pool == 0) { - for(int iz = 0 ; iz < nz ; ++iz) - { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix - int(nx/2); - double vy = iy - int(ny/2); - double vz = iz - int(nz/2); - ModuleBase::Vector3 v(vx,vy,vz); - // double modulus = v * (GGT * v); - double modulusgk = (v+kk) * (GGT * (v+kk)); - if (modulusgk <= ggecut) + ModuleBase::Vector3 kk = kvec_d[ik]; + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulusgk+1); - if(vx > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - else if(vx < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1); + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix - int (nx / 2); + double vy = iy - int (ny / 2); + double vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + // double modulus = v * (GGT * v); + double modulusgk = (v + kk) * (GGT * (v + kk)); + if (modulusgk <= ggecut) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulusgk + 1); + if (vx > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + else if (vx < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.x + 1) + 1); + } + } + } + } + } + fftw_plan pp = fftw_plan_dft_3d (nx, + ny, + nz, + (fftw_complex*)tmp, + (fftw_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); } - } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - std::complex * rhog = new std::complex [npwk]; - std::complex * rhogout = new std::complex [npwk]; + std::complex* rhog = new std::complex[npwk]; + std::complex* rhogout = new std::complex[npwk]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npwk]; - complex * rhofgout = new complex [npwk]; + complex* rhofg = new complex[npwk]; + complex* rhofgout = new complex[npwk]; #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhog[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); - rhogr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); - ModuleBase::Vector3 f = pwtest.getgdirect(ik,ig); - if(f.x > 0) - { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1); - } - } + for (int ig = 0; ig < npwk; ++ig) + { + rhog[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + rhogr[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + ModuleBase::Vector3 f = pwtest.getgdirect (ik, ig); + if (f.x > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1) + 1); + } + } #ifdef __ENABLE_FLOAT_FFTW - for(int ig = 0 ; ig < npwk ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); - rhofgr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1); - ModuleBase::Vector3 f = pwtest.getgdirect(ik,ig); - if(f.x > 0) - { - rhofg[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1); - rhofgr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1); - } - } + for (int ig = 0; ig < npwk; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + rhofgr[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + ModuleBase::Vector3 f = pwtest.getgdirect (ik, ig); + if (f.x > 0) + { + rhofg[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1) + 1); + rhofgr[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1) + 1); + } + } #endif - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - pwtest.recip2real(rhog,rhor,ik,true, 1); //check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + pwtest.recip2real (rhog, rhor, ik, true, 1); // check out-of-place transform - pwtest.recip2real(rhogr,(double*)rhogr,ik); //check in-place transform + pwtest.recip2real (rhogr, (double*)rhogr, ik); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - ModuleBase::GlobalFunc::ZEROS(rhofr, nrxx); - pwtest.recip2real(rhofg,rhofr,ik); //check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhofr, nrxx); + pwtest.recip2real (rhofg, rhofr, ik); // check out-of-place transform - pwtest.recip2real(rhofgr,(float*)rhofgr,ik); //check in-place transform + pwtest.recip2real (rhofgr, (float*)rhofgr, ik); // check in-place transform #endif - int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) - { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + int startiz = pwtest.startz_current; + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), + ((double*)rhogr)[ixy * nplane + iz], + 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz],1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((float*)rhofgr)[ixy*nplane+iz],1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz], 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), + ((float*)rhofgr)[ixy * nplane + iz], + 1e-4); #endif - } - } + } + } - ModuleBase::GlobalFunc::ZEROS(rhogout, npwk); - pwtest.real2recip(rhor,rhogout,ik,true, 1.0); + ModuleBase::GlobalFunc::ZEROS (rhogout, npwk); + pwtest.real2recip (rhor, rhogout, ik, true, 1.0); - pwtest.real2recip((double*)rhogr,rhogr,ik); + pwtest.real2recip ((double*)rhogr, rhogr, ik); #ifdef __ENABLE_FLOAT_FFTW - ModuleBase::GlobalFunc::ZEROS(rhofgout, npwk); - pwtest.real2recip(rhofr,rhofgout,ik,true, 1.0); + ModuleBase::GlobalFunc::ZEROS (rhofgout, npwk); + pwtest.real2recip (rhofr, rhofgout, ik, true, 1.0); - pwtest.real2recip((float*)rhofgr,rhofgr,ik); + pwtest.real2recip ((float*)rhofgr, rhofgr, ik); #endif - for(int ig = 0 ; ig < npwk ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhog[ig].real(),rhogr[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogr[ig].imag(),1e-6); + for (int ig = 0; ig < npwk; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhog[ig].real (), rhogr[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogr[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-6); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-6); - EXPECT_NEAR(rhofg[ig].real(),rhofgr[ig].real(),1e-6); - EXPECT_NEAR(rhofg[ig].imag(),rhofgr[ig].imag(),1e-6); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-6); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-6); + EXPECT_NEAR (rhofg[ig].real (), rhofgr[ig].real (), 1e-6); + EXPECT_NEAR (rhofg[ig].imag (), rhofgr[ig].imag (), 1e-6); #endif - } - + } - delete [] rhog; - delete [] rhogout; + delete[] rhog; + delete[] rhogout; #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; + delete[] rhofg; + delete[] rhofgout; #endif - //check igl2ig - for(int igl = 0; igl < npwk ; ++igl) - { - const int isz = pwtest.getigl2isz(ik,igl); - for(int ig = 0 ; ig < pwtest.npw; ++ig) - { - if(isz == pwtest.ig2isz[ig]){ - EXPECT_EQ(ig,pwtest.getigl2ig(ik,igl));} - } + // check igl2ig + for (int igl = 0; igl < npwk; ++igl) + { + const int isz = pwtest.getigl2isz (ik, igl); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + if (isz == pwtest.ig2isz[ig]) + { + EXPECT_EQ (ig, pwtest.getigl2ig (ik, igl)); + } + } + } } - - } - delete []tmp; - delete [] rhor; + delete[] tmp; + delete[] rhor; delete[] kvec_d; delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW delete[] rhofr; delete[] rhofgr; - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test5-1-1.cpp b/source/source_basis/module_pw/test/test5-1-1.cpp index 1fb800d9e97..f9ad8355310 100644 --- a/source/source_basis/module_pw/test/test5-1-1.cpp +++ b/source/source_basis/module_pw/test/test5-1-1.cpp @@ -7,14 +7,15 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test5_1_1) +TEST_F (PWTEST, test5_1_1) { - cout<<"dividemthd 1, gamma_only: off, full_pw: true, full_pw_dim: 1, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(iy >= int(pwtest.ny/2) +1) f.y -= pwtest.ny; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.ny; ++iy) + { + for (int ix = 0; ix < pwtest.nx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (iy >= int (pwtest.ny / 2) + 1) + { + f.y -= pwtest.ny; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; - //Add tests for gg_uniq - ModuleBase::Matrix3 latvec2(5.1358423233,0.0,0.0,0.1578526541,5.1334159104,0.0,-2.646847675,-2.5667081359,3.5753437737); + // Add tests for gg_uniq + ModuleBase::Matrix3 + latvec2 (5.1358423233, 0.0, 0.0, 0.1578526541, 5.1334159104, 0.0, -2.646847675, -2.5667081359, 3.5753437737); gamma_only = false; wfcecut = 240; lat0 = 1.88972613; distribution_type = 1; //-------------------------------------------------- - pwtest.initgrids(lat0, latvec2, wfcecut); - pwtest.initparameters(gamma_only, wfcecut, distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); - pwtest.collect_uniqgg(); - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - int * irindex = new int [pwtest.fftnxy]; - pwtest.getfftixy2is(irindex); - for(int is = 0 ; is < pwtest.nst ;++is) - { - EXPECT_EQ(irindex[pwtest.is2fftixy[is]],is); - } + pwtest.initgrids (lat0, latvec2, wfcecut); + pwtest.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); + pwtest.collect_uniqgg (); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + int* irindex = new int[pwtest.fftnxy]; + pwtest.getfftixy2is (irindex); + for (int is = 0; is < pwtest.nst; ++is) + { + EXPECT_EQ (irindex[pwtest.is2fftixy[is]], is); + } delete[] irindex; - - } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test5-1-2.cpp b/source/source_basis/module_pw/test/test5-1-2.cpp index 50b6c17c494..0030a1a3a02 100644 --- a/source/source_basis/module_pw/test/test5-1-2.cpp +++ b/source/source_basis/module_pw/test/test5-1-2.cpp @@ -7,14 +7,15 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test5_1_2) +TEST_F (PWTEST, test5_1_2) { - cout<<"dividemthd 1, gamma_only: off, full_pw: true, full_pw_dim: 2, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(iy >= int(pwtest.ny/2) +1) f.y -= pwtest.ny; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.ny; ++iy) + { + for (int ix = 0; ix < pwtest.nx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (iy >= int (pwtest.ny / 2) + 1) + { + f.y -= pwtest.ny; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; - //Add tests for gg_uniq - ModuleBase::Matrix3 latvec2(5.1358423233,0.0,0.0,0.1578526541,5.1334159104,0.0,-2.646847675,-2.5667081359,3.5753437737); + // Add tests for gg_uniq + ModuleBase::Matrix3 + latvec2 (5.1358423233, 0.0, 0.0, 0.1578526541, 5.1334159104, 0.0, -2.646847675, -2.5667081359, 3.5753437737); gamma_only = false; wfcecut = 240; lat0 = 1.88972613; distribution_type = 1; //-------------------------------------------------- - pwtest.initgrids(lat0, latvec2, wfcecut); - pwtest.initparameters(gamma_only, wfcecut, distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); - pwtest.collect_uniqgg(); - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - int * irindex = new int [pwtest.fftnxy]; - pwtest.getfftixy2is(irindex); - for(int is = 0 ; is < pwtest.nst ;++is) - { - EXPECT_EQ(irindex[pwtest.is2fftixy[is]],is); - } + pwtest.initgrids (lat0, latvec2, wfcecut); + pwtest.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); + pwtest.collect_uniqgg (); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + int* irindex = new int[pwtest.fftnxy]; + pwtest.getfftixy2is (irindex); + for (int is = 0; is < pwtest.nst; ++is) + { + EXPECT_EQ (irindex[pwtest.is2fftixy[is]], is); + } delete[] irindex; - - } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test5-2-1.cpp b/source/source_basis/module_pw/test/test5-2-1.cpp index a05306285d7..fc6de384598 100644 --- a/source/source_basis/module_pw/test/test5-2-1.cpp +++ b/source/source_basis/module_pw/test/test5-2-1.cpp @@ -7,14 +7,15 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test5_2_1) +TEST_F (PWTEST, test5_2_1) { - cout<<"dividemthd 1, gamma_only: on, full_pw: true, full_pw_dim: 1, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test5-2-2.cpp b/source/source_basis/module_pw/test/test5-2-2.cpp index 080a58af6ff..4e397ba73a6 100644 --- a/source/source_basis/module_pw/test/test5-2-2.cpp +++ b/source/source_basis/module_pw/test/test5-2-2.cpp @@ -7,14 +7,15 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test5_2_2) +TEST_F (PWTEST, test5_2_2) { - cout<<"dividemthd 1, gamma_only: on, full_pw: true, full_pw_dim: 2, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test5-3-1.cpp b/source/source_basis/module_pw/test/test5-3-1.cpp index 21ffbbd87de..e234a8fc6bd 100644 --- a/source/source_basis/module_pw/test/test5-3-1.cpp +++ b/source/source_basis/module_pw/test/test5-3-1.cpp @@ -12,18 +12,18 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test5_3_1) +TEST_F (PWTEST, test5_3_1) { - cout<<"dividemthd 1, gamma_only: on, xprime: false, full_pw: true, full_pw_dim: 1, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vy > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + else if (vy < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.x + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } } - } - double * rhor = new double [nrxx]; + double* rhor = new double[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhofg[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhofg[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } } - } - float * rhofr = new float [nrxx]; + float* rhofr = new float[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,(double*)rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,(float*)rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, (float*)rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz],1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((float*)rhofgr)[ixy*nplane+iz],1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz], 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((float*)rhofgr)[ixy * nplane + iz], 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform - pwtest.real2recip((double*)rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform + + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip((float*)rhofgr,rhofgr);//check in-place transform + pwtest.real2recip ((float*)rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test5-4-1.cpp b/source/source_basis/module_pw/test/test5-4-1.cpp index ae3dfd647c8..0391b2bf58a 100644 --- a/source/source_basis/module_pw/test/test5-4-1.cpp +++ b/source/source_basis/module_pw/test/test5-4-1.cpp @@ -12,18 +12,20 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test5_4_1) +TEST_F (PWTEST, test5_4_1) { - cout<<"dividemthd 1, gamma_only: off, xprime: false, full_pw: true, full_pw_dim: 1, check fft between complex and complex"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - complex * rhofr = new complex [nrxx]; + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + complex* rhofr = new complex[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofr[ixy*nplane+iz].imag(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofgr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofgr[ixy*nplane+iz].imag(),1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofr[ixy * nplane + iz].imag (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofgr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofgr[ixy * nplane + iz].imag (), 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip(rhofgr,rhofgr);//check in-place transform + pwtest.real2recip (rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test5-4-2.cpp b/source/source_basis/module_pw/test/test5-4-2.cpp index 95bbc3f114a..322a09e854a 100644 --- a/source/source_basis/module_pw/test/test5-4-2.cpp +++ b/source/source_basis/module_pw/test/test5-4-2.cpp @@ -12,18 +12,20 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test5_4_2) +TEST_F (PWTEST, test5_4_2) { - cout<<"dividemthd 1, gamma_only: off, xprime: false, full_pw: true, full_pw_dim: 2, check fft between complex and complex"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g ((double (int (nx / 2)) - 1.) / nx, + (double (int (ny / 2)) - 1.) / ny, + (double (int (nz / 2)) - 1.) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g((double(int(nx/2))-1.)/nx, (double(int(ny/2))-1.)/ny, (double(int(nz/2))-1.)/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - complex * rhofr = new complex [nrxx]; + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + complex* rhofr = new complex[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofr[ixy*nplane+iz].imag(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofgr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofgr[ixy*nplane+iz].imag(),1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofr[ixy * nplane + iz].imag (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofgr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofgr[ixy * nplane + iz].imag (), 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip(rhofgr,rhofgr);//check in-place transform + pwtest.real2recip (rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test6-1-1.cpp b/source/source_basis/module_pw/test/test6-1-1.cpp index d7cce63c827..c0d17c9aa00 100644 --- a/source/source_basis/module_pw/test/test6-1-1.cpp +++ b/source/source_basis/module_pw/test/test6-1-1.cpp @@ -7,14 +7,15 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test6_1_1) +TEST_F (PWTEST, test6_1_1) { - cout<<"dividemthd 2, gamma_only: off, full_pw: true, full_pw_dim: 1, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(iy >= int(pwtest.ny/2) +1) f.y -= pwtest.ny; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.ny; ++iy) + { + for (int ix = 0; ix < pwtest.nx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (iy >= int (pwtest.ny / 2) + 1) + { + f.y -= pwtest.ny; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; - //Add tests for gg_uniq - ModuleBase::Matrix3 latvec2(5.1358423233,0.0,0.0,0.1578526541,5.1334159104,0.0,-2.646847675,-2.5667081359,3.5753437737); + // Add tests for gg_uniq + ModuleBase::Matrix3 + latvec2 (5.1358423233, 0.0, 0.0, 0.1578526541, 5.1334159104, 0.0, -2.646847675, -2.5667081359, 3.5753437737); gamma_only = false; wfcecut = 240; lat0 = 1.88972613; distribution_type = 1; //-------------------------------------------------- - pwtest.initgrids(lat0, latvec2, wfcecut); - pwtest.initparameters(gamma_only, wfcecut, distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); - pwtest.collect_uniqgg(); - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - int * irindex = new int [pwtest.fftnxy]; - pwtest.getfftixy2is(irindex); - for(int is = 0 ; is < pwtest.nst ;++is) - { - EXPECT_EQ(irindex[pwtest.is2fftixy[is]],is); - } + pwtest.initgrids (lat0, latvec2, wfcecut); + pwtest.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); + pwtest.collect_uniqgg (); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + int* irindex = new int[pwtest.fftnxy]; + pwtest.getfftixy2is (irindex); + for (int is = 0; is < pwtest.nst; ++is) + { + EXPECT_EQ (irindex[pwtest.is2fftixy[is]], is); + } delete[] irindex; - - } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test6-1-2.cpp b/source/source_basis/module_pw/test/test6-1-2.cpp index 8aa431441c0..00a92c79c41 100644 --- a/source/source_basis/module_pw/test/test6-1-2.cpp +++ b/source/source_basis/module_pw/test/test6-1-2.cpp @@ -7,14 +7,15 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test6_1_2) +TEST_F (PWTEST, test6_1_2) { - cout<<"dividemthd 2, gamma_only: off, full_pw: true, full_pw_dim: 2, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(iy >= int(pwtest.ny/2) +1) f.y -= pwtest.ny; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.nz + ix*pwtest.ny*pwtest.nz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.ny; ++iy) + { + for (int ix = 0; ix < pwtest.nx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (iy >= int (pwtest.ny / 2) + 1) + { + f.y -= pwtest.ny; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.nz + ix * pwtest.ny * pwtest.nz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; - //Add tests for gg_uniq - ModuleBase::Matrix3 latvec2(5.1358423233,0.0,0.0,0.1578526541,5.1334159104,0.0,-2.646847675,-2.5667081359,3.5753437737); + // Add tests for gg_uniq + ModuleBase::Matrix3 + latvec2 (5.1358423233, 0.0, 0.0, 0.1578526541, 5.1334159104, 0.0, -2.646847675, -2.5667081359, 3.5753437737); gamma_only = false; wfcecut = 240; lat0 = 1.88972613; distribution_type = 1; //-------------------------------------------------- - pwtest.initgrids(lat0, latvec2, wfcecut); - pwtest.initparameters(gamma_only, wfcecut, distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); - pwtest.collect_uniqgg(); - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - int * irindex = new int [pwtest.fftnxy]; - pwtest.getfftixy2is(irindex); - for(int is = 0 ; is < pwtest.nst ;++is) - { - EXPECT_EQ(irindex[pwtest.is2fftixy[is]],is); - } + pwtest.initgrids (lat0, latvec2, wfcecut); + pwtest.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); + pwtest.collect_uniqgg (); + for (int ig = 0; ig < pwtest.npw; ++ig) + { + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + int* irindex = new int[pwtest.fftnxy]; + pwtest.getfftixy2is (irindex); + for (int is = 0; is < pwtest.nst; ++is) + { + EXPECT_EQ (irindex[pwtest.is2fftixy[is]], is); + } delete[] irindex; - - } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test6-2-1.cpp b/source/source_basis/module_pw/test/test6-2-1.cpp index 8473553e00a..083f48c6088 100644 --- a/source/source_basis/module_pw/test/test6-2-1.cpp +++ b/source/source_basis/module_pw/test/test6-2-1.cpp @@ -7,14 +7,15 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test6_2_1) +TEST_F (PWTEST, test6_2_1) { - cout<<"dividemthd 2, gamma_only: on, full_pw: true, full_pw_dim: 1, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test6-2-2.cpp b/source/source_basis/module_pw/test/test6-2-2.cpp index 87a8b19af19..b84e291c76c 100644 --- a/source/source_basis/module_pw/test/test6-2-2.cpp +++ b/source/source_basis/module_pw/test/test6-2-2.cpp @@ -7,14 +7,15 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test6_2_2) +TEST_F (PWTEST, test6_2_2) { - cout<<"dividemthd 2, gamma_only: on, full_pw: true, full_pw_dim: 2, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test6-3-1.cpp b/source/source_basis/module_pw/test/test6-3-1.cpp index 0ade2569221..f2c0abc5821 100644 --- a/source/source_basis/module_pw/test/test6-3-1.cpp +++ b/source/source_basis/module_pw/test/test6-3-1.cpp @@ -12,18 +12,18 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test6_3_1) +TEST_F (PWTEST, test6_3_1) { - cout<<"dividemthd 2, gamma_only: on, xprime: false, full_pw: true, full_pw_dim: 1, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vy > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + else if (vy < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.x + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } } - } - double * rhor = new double [nrxx]; + double* rhor = new double[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].y > 0) + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhofg[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + rhofg[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } } - } - float * rhofr = new float [nrxx]; + float* rhofr = new float[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,(double*)rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,(float*)rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, (float*)rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz],1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((float*)rhofgr)[ixy*nplane+iz],1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz], 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((float*)rhofgr)[ixy * nplane + iz], 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform - pwtest.real2recip((double*)rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform + + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip((float*)rhofgr,rhofgr);//check in-place transform + pwtest.real2recip ((float*)rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test6-4-1.cpp b/source/source_basis/module_pw/test/test6-4-1.cpp index 145071dea01..965e3949072 100644 --- a/source/source_basis/module_pw/test/test6-4-1.cpp +++ b/source/source_basis/module_pw/test/test6-4-1.cpp @@ -12,18 +12,20 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test6_4_1) +TEST_F (PWTEST, test6_4_1) { - cout<<"dividemthd 2, gamma_only: off, xprime: false, full_pw: true, full_pw_dim: 1, check fft between complex and complex"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - complex * rhofr = new complex [nrxx]; + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + complex* rhofr = new complex[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofr[ixy*nplane+iz].imag(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofgr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofgr[ixy*nplane+iz].imag(),1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofr[ixy * nplane + iz].imag (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofgr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofgr[ixy * nplane + iz].imag (), 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip(rhofgr,rhofgr);//check in-place transform + pwtest.real2recip (rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test6-4-2.cpp b/source/source_basis/module_pw/test/test6-4-2.cpp index 1f84f987a9f..7e78163bb56 100644 --- a/source/source_basis/module_pw/test/test6-4-2.cpp +++ b/source/source_basis/module_pw/test/test6-4-2.cpp @@ -12,18 +12,20 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test6_4_2) +TEST_F (PWTEST, test6_4_2) { - cout<<"dividemthd 2, gamma_only: off, xprime: false, full_pw: true, full_pw_dim: 2, check fft between complex and complex"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g ((double (int (nx / 2)) - 1.) / nx, + (double (int (ny / 2)) - 1.) / ny, + (double (int (nz / 2)) - 1.) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g((double(int(nx/2))-1.)/nx, (double(int(ny/2))-1.)/ny, (double(int(nz/2))-1.)/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; #ifdef __ENABLE_FLOAT_FFTW - complex * rhofg = new complex [npw]; - complex * rhofgr = new complex [nmaxgr]; - complex * rhofgout = new complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhofg[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhofgr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - complex * rhofr = new complex [nrxx]; + complex* rhofg = new complex[npw]; + complex* rhofgr = new complex[nmaxgr]; + complex* rhofgout = new complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhofg[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhofgr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + complex* rhofr = new complex[nrxx]; #endif - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.recip2real(rhofg,rhofr);//check out-of-place transform + pwtest.recip2real (rhofg, rhofr); // check out-of-place transform - pwtest.recip2real(rhofgr,rhofgr);//check in-place transform + pwtest.recip2real (rhofgr, rhofgr); // check in-place transform #endif int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofr[ixy*nplane+iz].imag(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhofgr[ixy*nplane+iz].real(),1e-4); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhofgr[ixy*nplane+iz].imag(),1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofr[ixy * nplane + iz].imag (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhofgr[ixy * nplane + iz].real (), 1e-4); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhofgr[ixy * nplane + iz].imag (), 1e-4); #endif + } } - } - - - pwtest.real2recip(rhor,rhogout);//check out-of-place transform + pwtest.real2recip (rhor, rhogout); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform #ifdef __ENABLE_FLOAT_FFTW - pwtest.real2recip(rhofr,rhofgout);//check out-of-place transform + pwtest.real2recip (rhofr, rhofgout); // check out-of-place transform - pwtest.real2recip(rhofgr,rhofgr);//check in-place transform + pwtest.real2recip (rhofgr, rhofgr); // check in-place transform #endif - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); #ifdef __ENABLE_FLOAT_FFTW - EXPECT_NEAR(rhofg[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofg[ig].imag(),rhofgout[ig].imag(),1e-4); - EXPECT_NEAR(rhofgr[ig].real(),rhofgout[ig].real(),1e-4); - EXPECT_NEAR(rhofgr[ig].imag(),rhofgout[ig].imag(),1e-4); + EXPECT_NEAR (rhofg[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofg[ig].imag (), rhofgout[ig].imag (), 1e-4); + EXPECT_NEAR (rhofgr[ig].real (), rhofgout[ig].real (), 1e-4); + EXPECT_NEAR (rhofgr[ig].imag (), rhofgout[ig].imag (), 1e-4); #endif - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; - - fftw_cleanup(); + } + + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; + + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - delete [] rhofg; - delete [] rhofgout; - delete [] rhofr; - delete [] rhofgr; - fftwf_cleanup(); + delete[] rhofg; + delete[] rhofgout; + delete[] rhofr; + delete[] rhofgr; + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test7-1.cpp b/source/source_basis/module_pw/test/test7-1.cpp index 8492779792e..2ff679456c8 100644 --- a/source/source_basis/module_pw/test/test7-1.cpp +++ b/source/source_basis/module_pw/test/test7-1.cpp @@ -7,14 +7,16 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test7_1) +TEST_F (PWTEST, test7_1) { - cout<<"dividemthd 1, gamma_only: on, xprime: true, full_pw: true, full_pw_dim: 0, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test7-2-1.cpp b/source/source_basis/module_pw/test/test7-2-1.cpp index 4a046cd6fa4..82163359cd0 100644 --- a/source/source_basis/module_pw/test/test7-2-1.cpp +++ b/source/source_basis/module_pw/test/test7-2-1.cpp @@ -12,18 +12,18 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test7_2_1) +TEST_F (PWTEST, test7_2_1) { - cout<<"dividemthd 1, gamma_only: on, xprime: true, full_pw: true, full_pw_dim: 1, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vx > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.y+1) + 1); - else if(vx < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.y+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vx > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.y + 1) + 1); + } + else if (vx < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.y + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - if(pwtest.gdirect[ig].x > 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].x > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + } } - } - double * rhor = new double [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor,true,1.0);//check out-of-place transform // test add fft - - pwtest.recip2real(rhogr,(double*)rhogr);//check in-place transform + double* rhor = new double[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + pwtest.recip2real (rhog, rhor, true, 1.0); // check out-of-place transform // test add fft + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); + } } - } - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true,1.0);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1.0); // check out-of-place transform + + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform - pwtest.real2recip((double*)rhogr,rhogr);//check in-place transform + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test7-3-1.cpp b/source/source_basis/module_pw/test/test7-3-1.cpp index c64ced34b25..c1010ba3b86 100644 --- a/source/source_basis/module_pw/test/test7-3-1.cpp +++ b/source/source_basis/module_pw/test/test7-3-1.cpp @@ -12,18 +12,20 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test7_3_1) +TEST_F (PWTEST, test7_3_1) { - cout<<"dividemthd 1, gamma_only: off, xprime: true, full_pw: true, full_pw_dim: 1, check fft between complex and complex, reset ggecut to latecut"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); + } } - } - - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true, 1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform + + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test7-3-2.cpp b/source/source_basis/module_pw/test/test7-3-2.cpp index 345bb42ce01..fa9ef268fc7 100644 --- a/source/source_basis/module_pw/test/test7-3-2.cpp +++ b/source/source_basis/module_pw/test/test7-3-2.cpp @@ -12,18 +12,20 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test7_3_2) +TEST_F (PWTEST, test7_3_2) { - cout<<"dividemthd 1, gamma_only: off, xprime: true, full_pw: true, full_pw_dim: 2, check fft between complex and complex, reset ggecut to latecut"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); - ModuleBase::Vector3 delta_g((double(int(nx/2))-1.)/nx, (double(int(ny/2))-1.)/ny, (double(int(nz/2))-1.)/nz); - // ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } + ModuleBase::Vector3 delta_g ((double (int (nx / 2)) - 1.) / nx, + (double (int (ny / 2)) - 1.) / ny, + (double (int (nz / 2)) - 1.) / nz); + // ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } + } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); + } } - } - - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true, 1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform + + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test8-1.cpp b/source/source_basis/module_pw/test/test8-1.cpp index 0204e14fa41..4cd050163d8 100644 --- a/source/source_basis/module_pw/test/test8-1.cpp +++ b/source/source_basis/module_pw/test/test8-1.cpp @@ -7,14 +7,16 @@ #include "source_base/global_function.h" #include "source_base/constants.h" #include "pw_test.h" -extern int nproc_in_pool,rank_in_pool; +extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST,test8_1) +TEST_F (PWTEST, test8_1) { - cout<<"dividemthd 2, gamma_only: on, xprime: true, full_pw: true, full_pw_dim: 0, check gcar,gdirect,gg,istot2ixy,ig2isz"< f; - f.x = ix; - f.y = iy; - f.z = iz; - if(iz >= int(pwtest.nz/2) +1) f.z -= pwtest.nz; - if(ix >= int(pwtest.nx/2) +1) f.x -= pwtest.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy*pwtest.fftnz + ix*pwtest.fftny*pwtest.fftnz], int(f.z)); - } - + for (int iy = 0; iy < pwtest.fftny; ++iy) + { + for (int ix = 0; ix < pwtest.fftnx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwtest.nz / 2) + 1) + { + f.z -= pwtest.nz; + } + if (ix >= int (pwtest.nx / 2) + 1) + { + f.x -= pwtest.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwtest.fftnz + ix * pwtest.fftny * pwtest.fftnz], + int (f.z)); + } + } + } } - } } - } - for(int ig = 0 ;ig < pwtest.npw ; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwtest.gdirect[ig].x; - f.y = pwtest.gdirect[ig].y; - f.z = pwtest.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f*GGT*f; - EXPECT_NEAR(gcar.x,pwtest.gcar[ig].x,1e-6); - EXPECT_NEAR(gcar.y,pwtest.gcar[ig].y,1e-6); - EXPECT_NEAR(gcar.z,pwtest.gcar[ig].z,1e-6); - EXPECT_NEAR(modulus,pwtest.gg[ig],1e-6); - EXPECT_NEAR(pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]],1e-8); - } - for(int igg = 1 ; igg < pwtest.ngg ; ++igg) - { - EXPECT_GT(pwtest.gg_uniq[igg], pwtest.gg_uniq[igg-1]); - } - if(pwtest.ig_gge0 >= 0) {EXPECT_NEAR(0.0, pwtest.gg[pwtest.ig_gge0], 1e-8);} - delete [] startnst; - delete [] tmpx; - delete [] tmpy; - delete [] tmpz; + for (int ig = 0; ig < pwtest.npw; ++ig) + { + ModuleBase::Vector3 f; + f.x = pwtest.gdirect[ig].x; + f.y = pwtest.gdirect[ig].y; + f.z = pwtest.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwtest.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwtest.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwtest.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwtest.gg[ig], 1e-6); + EXPECT_NEAR (pwtest.gg[ig], pwtest.gg_uniq[pwtest.ig2igg[ig]], 1e-8); + } + for (int igg = 1; igg < pwtest.ngg; ++igg) + { + EXPECT_GT (pwtest.gg_uniq[igg], pwtest.gg_uniq[igg - 1]); + } + if (pwtest.ig_gge0 >= 0) + { + EXPECT_NEAR (0.0, pwtest.gg[pwtest.ig_gge0], 1e-8); + } + delete[] startnst; + delete[] tmpx; + delete[] tmpy; + delete[] tmpz; } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test8-2-1.cpp b/source/source_basis/module_pw/test/test8-2-1.cpp index 1230c7f5ef8..712b733a5b2 100644 --- a/source/source_basis/module_pw/test/test8-2-1.cpp +++ b/source/source_basis/module_pw/test/test8-2-1.cpp @@ -12,18 +12,18 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test8_2_1) +TEST_F (PWTEST, test8_2_1) { - cout<<"dividemthd 2, gamma_only: on, xprime: true, full_pw: true, full_pw_dim: 1, check fft"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz] = 1.0/(modulus+1); - if(vx > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.y+1) + 1); - else if(vx < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.y+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulus + 1); + if (vx > 0) + { + tmp[ix * ny * nz + iy * nz + iz] + += ModuleBase::IMAG_UNIT / (std::abs (v.y + 1) + 1); + } + else if (vx < 0) + { + tmp[ix * ny * nz + iy * nz + iz] + -= ModuleBase::IMAG_UNIT / (std::abs (-v.y + 1) + 1); + } + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1); - double a = rhog[ig].real(); - if(pwtest.gdirect[ig].x > 0) + + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) { - rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); - rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].y+1) + 1); + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1); + double a = rhog[ig].real (); + if (pwtest.gdirect[ig].x > 0) + { + rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + rhogr[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].y + 1) + 1); + } } - } - double * rhor = new double [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor,true,1);//check out-of-place transform // test add fft - - pwtest.recip2real(rhogr,(double*)rhogr);//check in-place transform + double* rhor = new double[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + pwtest.recip2real (rhog, rhor, true, 1); // check out-of-place transform // test add fft + pwtest.recip2real (rhogr, (double*)rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz], 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), ((double*)rhogr)[ixy * nplane + iz], 1e-6); + } } - } - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true,1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform + + pwtest.real2recip ((double*)rhogr, rhogr); // check in-place transform - pwtest.real2recip((double*)rhogr,rhogr);//check in-place transform + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test8-3-1.cpp b/source/source_basis/module_pw/test/test8-3-1.cpp index 336db418477..b7ca72f8a48 100644 --- a/source/source_basis/module_pw/test/test8-3-1.cpp +++ b/source/source_basis/module_pw/test/test8-3-1.cpp @@ -12,18 +12,20 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test8_3_1) +TEST_F (PWTEST, test8_3_1) { - cout<<"dividemthd 2, gamma_only: off, xprime: true, full_pw: true, full_pw_dim: 1, check fft between complex and complex, reset ggecut to latecut"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } + } + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + + ModuleBase::Vector3 delta_g (double (int (nx / 2)) / nx, + double (int (ny / 2)) / ny, + double (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - - ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); + } } - } - - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true, 1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform + + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test8-3-2.cpp b/source/source_basis/module_pw/test/test8-3-2.cpp index 2b43e83b52a..4fe81972ca3 100644 --- a/source/source_basis/module_pw/test/test8-3-2.cpp +++ b/source/source_basis/module_pw/test/test8-3-2.cpp @@ -12,18 +12,20 @@ #include "pw_test.h" using namespace std; -TEST_F(PWTEST,test8_3_2) +TEST_F (PWTEST, test8_3_2) { - cout<<"dividemthd 2, gamma_only: off, xprime: true, full_pw: true, full_pw_dim: 2, check fft between complex and complex, reset ggecut to latecut"< *tmp = new std::complex [nx*ny*nz]; - if(rank_in_pool == 0) - { - for(int ix = 0 ; ix < nx ; ++ix) + ModuleBase::Matrix3 GT, G, GGT; + GT = latvec.Inverse (); + G = GT.Transpose (); + GGT = G * GT; + std::complex* tmp = new std::complex[nx * ny * nz]; + if (rank_in_pool == 0) { - for(int iy = 0 ; iy < ny ; ++iy) - { - for(int iz = 0 ; iz < nz ; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[ix*ny*nz + iy*nz + iz]=0.0; - double vx = ix + int(nx/2) - nx + 1; - double vy = iy + int(ny/2) - ny + 1; - double vz = iz + int(nz/2) - nz + 1; - ModuleBase::Vector3 v(vx,vy,vz); - double modulus = v * (GGT * v); - if (modulus <= ggecut || full_pw) - { - tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulus+1) + ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1); - } + for (int iy = 0; iy < ny; ++iy) + { + for (int iz = 0; iz < nz; ++iz) + { + tmp[ix * ny * nz + iy * nz + iz] = 0.0; + double vx = ix + int (nx / 2) - nx + 1; + double vy = iy + int (ny / 2) - ny + 1; + double vz = iz + int (nz / 2) - nz + 1; + ModuleBase::Vector3 v (vx, vy, vz); + double modulus = v * (GGT * v); + if (modulus <= ggecut || full_pw) + { + tmp[ix * ny * nz + iy * nz + iz] + = 1.0 / (modulus + 1) + + ModuleBase::IMAG_UNIT / (std::abs (v.x + 1) + 1); + } + } + } } - } - } - fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); + fftw_plan pp + = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); - ModuleBase::Vector3 delta_g((double(int(nx/2))-1.)/nx, (double(int(ny/2))-1.)/ny, (double(int(nz/2))-1.)/nz); - // ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nz ; ++iz) - { - int ix = ixy / ny; - int iy = ixy % ny; - ModuleBase::Vector3 real_r(ix, iy, iz); - double phase_im = -delta_g * real_r; - std::complex phase(0,ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } + ModuleBase::Vector3 delta_g ((double (int (nx / 2)) - 1.) / nx, + (double (int (ny / 2)) - 1.) / ny, + (double (int (nz / 2)) - 1.) / nz); + // ModuleBase::Vector3 delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + for (int iz = 0; iz < nz; ++iz) + { + int ix = ixy / ny; + int iy = ixy % ny; + ModuleBase::Vector3 real_r (ix, iy, iz); + double phase_im = -delta_g * real_r; + std::complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } + } } - } #ifdef __MPI - MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD); + MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD); #endif - - std::complex * rhog = new std::complex [npw]; - std::complex * rhogr = new std::complex [nmaxgr]; - std::complex * rhogout = new std::complex [npw]; - for(int ig = 0 ; ig < npw ; ++ig) - { - rhog[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - rhogr[ig] = 1.0/(pwtest.gg[ig]+1) + ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x+1) + 1); - } - std::complex * rhor = new std::complex [nrxx]; - ModuleBase::GlobalFunc::ZEROS(rhor, nrxx); - - pwtest.recip2real(rhog,rhor);//check out-of-place transform - pwtest.recip2real(rhogr,rhogr);//check in-place transform + std::complex* rhog = new std::complex[npw]; + std::complex* rhogr = new std::complex[nmaxgr]; + std::complex* rhogout = new std::complex[npw]; + for (int ig = 0; ig < npw; ++ig) + { + rhog[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + rhogr[ig] = 1.0 / (pwtest.gg[ig] + 1) + ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + std::complex* rhor = new std::complex[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rhor, nrxx); + + pwtest.recip2real (rhog, rhor); // check out-of-place transform + + pwtest.recip2real (rhogr, rhogr); // check in-place transform int startiz = pwtest.startz_current; - for(int ixy = 0 ; ixy < nx * ny ; ++ixy) - { - for(int iz = 0 ; iz < nplane ; ++iz) + for (int ixy = 0; ixy < nx * ny; ++ixy) { - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhor[ixy*nplane+iz].imag(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhogr[ixy*nplane+iz].real(),1e-6); - EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(),rhogr[ixy*nplane+iz].imag(),1e-6); + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhor[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhor[ixy * nplane + iz].imag (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), rhogr[ixy * nplane + iz].real (), 1e-6); + EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), rhogr[ixy * nplane + iz].imag (), 1e-6); + } } - } - - ModuleBase::GlobalFunc::ZEROS(rhogout, npw); - pwtest.real2recip(rhor,rhogout,true, 1);//check out-of-place transform + ModuleBase::GlobalFunc::ZEROS (rhogout, npw); + pwtest.real2recip (rhor, rhogout, true, 1); // check out-of-place transform - pwtest.real2recip(rhogr,rhogr);//check in-place transform + pwtest.real2recip (rhogr, rhogr); // check in-place transform + + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (rhog[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhog[ig].imag (), rhogout[ig].imag (), 1e-6); + EXPECT_NEAR (rhogr[ig].real (), rhogout[ig].real (), 1e-6); + EXPECT_NEAR (rhogr[ig].imag (), rhogout[ig].imag (), 1e-6); + } - for(int ig = 0 ; ig < npw ; ++ig) - { - EXPECT_NEAR(rhog[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhog[ig].imag(),rhogout[ig].imag(),1e-6); - EXPECT_NEAR(rhogr[ig].real(),rhogout[ig].real(),1e-6); - EXPECT_NEAR(rhogr[ig].imag(),rhogout[ig].imag(),1e-6); - } - - delete [] rhog; - delete [] rhogout; - delete [] rhor; - delete [] tmp; - delete [] rhogr; + delete[] rhog; + delete[] rhogout; + delete[] rhor; + delete[] tmp; + delete[] rhogr; - fftw_cleanup(); + fftw_cleanup (); #ifdef __ENABLE_FLOAT_FFTW - fftwf_cleanup(); + fftwf_cleanup (); #endif } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test_sup.cpp b/source/source_basis/module_pw/test/test_sup.cpp index 188aabc4eda..27dce906674 100644 --- a/source/source_basis/module_pw/test/test_sup.cpp +++ b/source/source_basis/module_pw/test/test_sup.cpp @@ -10,11 +10,11 @@ extern int nproc_in_pool, rank_in_pool; using namespace std; -TEST_F(PWTEST, test_sup) +TEST_F (PWTEST, test_sup) { cout << "dividemthd 3, gamma_only: off, check gcar,gdirect,gg,istot2ixy,ig2isz" << endl; //-------------------------------------------------- - ModuleBase::Matrix3 latvec(20, 1, 0, 0, 1, 0, 0, 0, 5); + ModuleBase::Matrix3 latvec (20, 1, 0, 0, 1, 0, 0, 0, 5); bool gamma_only = false; double wfcecut = 70; double wfcecutdense = 100; @@ -24,30 +24,30 @@ TEST_F(PWTEST, test_sup) //-------------------------------------------------- // smooth grids - ModulePW::PW_Basis pwsmooth(device_flag, precision_flag); + ModulePW::PW_Basis pwsmooth (device_flag, precision_flag); #ifdef __MPI - pwsmooth.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + pwsmooth.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); #endif - pwsmooth.initgrids(lat0, latvec, wfcecut); - pwsmooth.initparameters(gamma_only, wfcecut, distribution_type, xprime); - pwsmooth.setuptransform(); - pwsmooth.collect_local_pw(); - pwsmooth.collect_uniqgg(); + pwsmooth.initgrids (lat0, latvec, wfcecut); + pwsmooth.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwsmooth.setuptransform (); + pwsmooth.collect_local_pw (); + pwsmooth.collect_uniqgg (); // dense grids - ModulePW::PW_Basis_Sup pwdense(device_flag, precision_flag); + ModulePW::PW_Basis_Sup pwdense (device_flag, precision_flag); #ifdef __MPI - pwdense.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + pwdense.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); #endif - pwdense.initgrids(lat0, latvec, wfcecutdense); - pwdense.initparameters(gamma_only, wfcecutdense, distribution_type, xprime); - pwdense.setuptransform(&pwsmooth); - pwdense.collect_local_pw(); - pwdense.collect_uniqgg(); + pwdense.initgrids (lat0, latvec, wfcecutdense); + pwdense.initparameters (gamma_only, wfcecutdense, distribution_type, xprime); + pwdense.setuptransform (&pwsmooth); + pwdense.collect_local_pw (); + pwdense.collect_uniqgg (); ModuleBase::Matrix3 GT, G, GGT; - GT = latvec.Inverse(); - G = GT.Transpose(); + GT = latvec.Inverse (); + G = GT.Transpose (); GGT = G * GT; double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0; double ggecut = wfcecutdense / tpiba2; @@ -60,164 +60,171 @@ TEST_F(PWTEST, test_sup) const int nz_ref = 15; // some results for different number of processors - int npw_per_ref[12][12] = { - {1751}, - {874, 877}, - {583, 584, 584}, - {439, 436, 438, 438}, - {351, 350, 350, 350, 350}, - {293, 293, 293, 291, 291, 290}, - {249, 251, 251, 251, 251, 249, 249}, - {221, 218, 220, 218, 218, 218, 218, 220}, - {196, 196, 196, 194, 194, 194, 194, 193, 194}, - {177, 176, 176, 176, 174, 174, 174, 176, 174, 174}, - {161, 161, 161, 159, 159, 158, 158, 158, 159, 158, 159}, - {147, 148, 148, 145, 146, 146, 145, 145, 145, 146, 145, 145} - }; - int nst_per_ref[12][12] = { - {161}, - {80, 81}, - {53, 54, 54}, - {41, 40, 40, 40}, - {33, 32, 32, 32, 32}, - {27, 27, 27, 27, 27, 26}, - {23, 23, 23, 23, 23, 23, 23}, - {21, 20, 20, 20, 20, 20, 20, 20}, - {18, 18, 18, 18, 18, 18, 18, 17, 18}, - {17, 16, 16, 16, 16, 16, 16, 16, 16, 16}, - {15, 15, 15, 15, 15, 14, 14, 14, 15, 14, 15}, - {13, 14, 14, 13, 14, 14, 13, 13, 13, 14, 13, 13} - }; + int npw_per_ref[12][12] = {{1751}, + {874, 877}, + {583, 584, 584}, + {439, 436, 438, 438}, + {351, 350, 350, 350, 350}, + {293, 293, 293, 291, 291, 290}, + {249, 251, 251, 251, 251, 249, 249}, + {221, 218, 220, 218, 218, 218, 218, 220}, + {196, 196, 196, 194, 194, 194, 194, 193, 194}, + {177, 176, 176, 176, 174, 174, 174, 176, 174, 174}, + {161, 161, 161, 159, 159, 158, 158, 158, 159, 158, 159}, + {147, 148, 148, 145, 146, 146, 145, 145, 145, 146, 145, 145}}; + int nst_per_ref[12][12] = {{161}, + {80, 81}, + {53, 54, 54}, + {41, 40, 40, 40}, + {33, 32, 32, 32, 32}, + {27, 27, 27, 27, 27, 26}, + {23, 23, 23, 23, 23, 23, 23}, + {21, 20, 20, 20, 20, 20, 20, 20}, + {18, 18, 18, 18, 18, 18, 18, 17, 18}, + {17, 16, 16, 16, 16, 16, 16, 16, 16, 16}, + {15, 15, 15, 15, 15, 14, 14, 14, 15, 14, 15}, + {13, 14, 14, 13, 14, 14, 13, 13, 13, 14, 13, 13}}; int* npw_per = nullptr; if (rank_in_pool == 0) - { - npw_per = new int[nproc_in_pool]; - } + { + npw_per = new int[nproc_in_pool]; + } #ifdef __MPI - MPI_Gather(&pwdense.npw, 1, MPI_INT, npw_per, 1, MPI_INT, 0, POOL_WORLD); + MPI_Gather (&pwdense.npw, 1, MPI_INT, npw_per, 1, MPI_INT, 0, POOL_WORLD); #else if (rank_in_pool == 0) - npw_per[0] = pwdense.npw; -#endif - if (rank_in_pool == 0) - { - if (nproc_in_pool <= 12) { - for (int ip = 0; ip < nproc_in_pool; ++ip) - { - EXPECT_EQ(npw_per_ref[nproc_in_pool - 1][ip], npw_per[ip]); - EXPECT_EQ(nst_per_ref[nproc_in_pool - 1][ip], pwdense.nst_per[ip]); - } + npw_per[0] = pwdense.npw; } - else +#endif + if (rank_in_pool == 0) { - cout << "Please use mpi processors no more than 12." << endl; + if (nproc_in_pool <= 12) + { + for (int ip = 0; ip < nproc_in_pool; ++ip) + { + EXPECT_EQ (npw_per_ref[nproc_in_pool - 1][ip], npw_per[ip]); + EXPECT_EQ (nst_per_ref[nproc_in_pool - 1][ip], pwdense.nst_per[ip]); + } + } + else + { + cout << "Please use mpi processors no more than 12." << endl; + } + delete[] npw_per; } - delete[] npw_per; - } // results int tot_npw = 0; #ifdef __MPI - MPI_Allreduce(&pwdense.npw, &tot_npw, 1, MPI_INT, MPI_SUM, POOL_WORLD); + MPI_Allreduce (&pwdense.npw, &tot_npw, 1, MPI_INT, MPI_SUM, POOL_WORLD); #else tot_npw = pwdense.npw; #endif - EXPECT_EQ(pwdense.nx, nx_ref); - EXPECT_EQ(pwdense.ny, ny_ref); - EXPECT_EQ(pwdense.nz, nz_ref); - EXPECT_EQ(pwdense.fftnx, nx_ref); - EXPECT_EQ(pwdense.fftny, ny_ref); - EXPECT_EQ(tot_npw, totnpw_ref); - EXPECT_EQ(pwdense.npwtot, totnpw_ref); - EXPECT_EQ(pwdense.nstot, totnst_ref); - EXPECT_EQ(pwdense.nxyz, nx_ref * ny_ref * nz_ref); + EXPECT_EQ (pwdense.nx, nx_ref); + EXPECT_EQ (pwdense.ny, ny_ref); + EXPECT_EQ (pwdense.nz, nz_ref); + EXPECT_EQ (pwdense.fftnx, nx_ref); + EXPECT_EQ (pwdense.fftny, ny_ref); + EXPECT_EQ (tot_npw, totnpw_ref); + EXPECT_EQ (pwdense.npwtot, totnpw_ref); + EXPECT_EQ (pwdense.nstot, totnst_ref); + EXPECT_EQ (pwdense.nxyz, nx_ref * ny_ref * nz_ref); int* tmpx = new int[pwdense.nx * pwdense.ny * pwdense.nz]; int* tmpy = new int[pwdense.nx * pwdense.ny * pwdense.nz]; int* tmpz = new int[pwdense.nx * pwdense.ny * pwdense.nz]; - ModuleBase::GlobalFunc::ZEROS(tmpx, pwdense.nx * pwdense.ny * pwdense.nz); - ModuleBase::GlobalFunc::ZEROS(tmpy, pwdense.nx * pwdense.ny * pwdense.nz); - ModuleBase::GlobalFunc::ZEROS(tmpz, pwdense.nx * pwdense.ny * pwdense.nz); + ModuleBase::GlobalFunc::ZEROS (tmpx, pwdense.nx * pwdense.ny * pwdense.nz); + ModuleBase::GlobalFunc::ZEROS (tmpy, pwdense.nx * pwdense.ny * pwdense.nz); + ModuleBase::GlobalFunc::ZEROS (tmpz, pwdense.nx * pwdense.ny * pwdense.nz); int* startnst = new int[nproc_in_pool]; startnst[0] = 0; for (int ip = 1; ip < nproc_in_pool; ++ip) - { - startnst[ip] = startnst[ip - 1] + pwdense.nst_per[ip - 1]; - } + { + startnst[ip] = startnst[ip - 1] + pwdense.nst_per[ip - 1]; + } for (int ig = 0; ig < pwdense.npw; ++ig) - { - int istot = pwdense.ig2isz[ig] / pwdense.nz + startnst[rank_in_pool]; - // int is = pwdense.ig2isz[ig] / pwdense.nz; - int iz = pwdense.ig2isz[ig] % pwdense.nz; - int iy = pwdense.istot2ixy[istot] % pwdense.ny; - int ix = pwdense.istot2ixy[istot] / pwdense.ny; - // int iy = pwdense.is2fftixy[is] % pwdense.ny; - // int ix = pwdense.is2fftixy[is] / pwdense.ny; + { + int istot = pwdense.ig2isz[ig] / pwdense.nz + startnst[rank_in_pool]; + // int is = pwdense.ig2isz[ig] / pwdense.nz; + int iz = pwdense.ig2isz[ig] % pwdense.nz; + int iy = pwdense.istot2ixy[istot] % pwdense.ny; + int ix = pwdense.istot2ixy[istot] / pwdense.ny; + // int iy = pwdense.is2fftixy[is] % pwdense.ny; + // int ix = pwdense.is2fftixy[is] / pwdense.ny; - tmpx[iz + (iy + ix * pwdense.ny) * pwdense.nz] = int(pwdense.gdirect[ig].x); - tmpy[iz + (iy + ix * pwdense.ny) * pwdense.nz] = int(pwdense.gdirect[ig].y); - tmpz[iz + (iy + ix * pwdense.ny) * pwdense.nz] = int(pwdense.gdirect[ig].z); - } + tmpx[iz + (iy + ix * pwdense.ny) * pwdense.nz] = int (pwdense.gdirect[ig].x); + tmpy[iz + (iy + ix * pwdense.ny) * pwdense.nz] = int (pwdense.gdirect[ig].y); + tmpz[iz + (iy + ix * pwdense.ny) * pwdense.nz] = int (pwdense.gdirect[ig].z); + } #ifdef __MPI - MPI_Allreduce(MPI_IN_PLACE, tmpx, pwdense.nxyz, MPI_INT, MPI_SUM, POOL_WORLD); - MPI_Allreduce(MPI_IN_PLACE, tmpy, pwdense.nxyz, MPI_INT, MPI_SUM, POOL_WORLD); - MPI_Allreduce(MPI_IN_PLACE, tmpz, pwdense.nxyz, MPI_INT, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, tmpx, pwdense.nxyz, MPI_INT, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, tmpy, pwdense.nxyz, MPI_INT, MPI_SUM, POOL_WORLD); + MPI_Allreduce (MPI_IN_PLACE, tmpz, pwdense.nxyz, MPI_INT, MPI_SUM, POOL_WORLD); #endif if (rank_in_pool == 0) - { - for (int iz = 0; iz < pwdense.nz; ++iz) { - for (int iy = 0; iy < pwdense.ny; ++iy) - { - for (int ix = 0; ix < pwdense.nx; ++ix) + for (int iz = 0; iz < pwdense.nz; ++iz) { - ModuleBase::Vector3 f; - f.x = ix; - f.y = iy; - f.z = iz; - if (iz >= int(pwdense.nz / 2) + 1) - f.z -= pwdense.nz; - if (iy >= int(pwdense.ny / 2) + 1) - f.y -= pwdense.ny; - if (ix >= int(pwdense.nx / 2) + 1) - f.x -= pwdense.nx; - double modulus = f * (GGT * f); - if (modulus <= ggecut) - { - EXPECT_EQ(tmpx[iz + iy * pwdense.nz + ix * pwdense.ny * pwdense.nz], int(f.x)); - EXPECT_EQ(tmpy[iz + iy * pwdense.nz + ix * pwdense.ny * pwdense.nz], int(f.y)); - EXPECT_EQ(tmpz[iz + iy * pwdense.nz + ix * pwdense.ny * pwdense.nz], int(f.z)); - } + for (int iy = 0; iy < pwdense.ny; ++iy) + { + for (int ix = 0; ix < pwdense.nx; ++ix) + { + ModuleBase::Vector3 f; + f.x = ix; + f.y = iy; + f.z = iz; + if (iz >= int (pwdense.nz / 2) + 1) + { + f.z -= pwdense.nz; + } + if (iy >= int (pwdense.ny / 2) + 1) + { + f.y -= pwdense.ny; + } + if (ix >= int (pwdense.nx / 2) + 1) + { + f.x -= pwdense.nx; + } + double modulus = f * (GGT * f); + if (modulus <= ggecut) + { + EXPECT_EQ (tmpx[iz + iy * pwdense.nz + ix * pwdense.ny * pwdense.nz], + int (f.x)); + EXPECT_EQ (tmpy[iz + iy * pwdense.nz + ix * pwdense.ny * pwdense.nz], + int (f.y)); + EXPECT_EQ (tmpz[iz + iy * pwdense.nz + ix * pwdense.ny * pwdense.nz], + int (f.z)); + } + } + } } - } } - } for (int ig = 0; ig < pwdense.npw; ++ig) - { - ModuleBase::Vector3 f; - f.x = pwdense.gdirect[ig].x; - f.y = pwdense.gdirect[ig].y; - f.z = pwdense.gdirect[ig].z; - ModuleBase::Vector3 gcar; - gcar = f * G; - double modulus = f * GGT * f; - EXPECT_NEAR(gcar.x, pwdense.gcar[ig].x, 1e-6); - EXPECT_NEAR(gcar.y, pwdense.gcar[ig].y, 1e-6); - EXPECT_NEAR(gcar.z, pwdense.gcar[ig].z, 1e-6); - EXPECT_NEAR(modulus, pwdense.gg[ig], 1e-6); - EXPECT_NEAR(pwdense.gg[ig], pwdense.gg_uniq[pwdense.ig2igg[ig]], 1e-8); - } + { + ModuleBase::Vector3 f; + f.x = pwdense.gdirect[ig].x; + f.y = pwdense.gdirect[ig].y; + f.z = pwdense.gdirect[ig].z; + ModuleBase::Vector3 gcar; + gcar = f * G; + double modulus = f * GGT * f; + EXPECT_NEAR (gcar.x, pwdense.gcar[ig].x, 1e-6); + EXPECT_NEAR (gcar.y, pwdense.gcar[ig].y, 1e-6); + EXPECT_NEAR (gcar.z, pwdense.gcar[ig].z, 1e-6); + EXPECT_NEAR (modulus, pwdense.gg[ig], 1e-6); + EXPECT_NEAR (pwdense.gg[ig], pwdense.gg_uniq[pwdense.ig2igg[ig]], 1e-8); + } for (int igg = 1; igg < pwdense.ngg; ++igg) - { - EXPECT_GT(pwdense.gg_uniq[igg], pwdense.gg_uniq[igg - 1]); - } + { + EXPECT_GT (pwdense.gg_uniq[igg], pwdense.gg_uniq[igg - 1]); + } if (pwdense.ig_gge0 >= 0) - { - EXPECT_NEAR(0.0, pwdense.gg[pwdense.ig_gge0], 1e-8); - } + { + EXPECT_NEAR (0.0, pwdense.gg[pwdense.ig_gge0], 1e-8); + } delete[] startnst; delete[] tmpx; delete[] tmpy; @@ -225,10 +232,10 @@ TEST_F(PWTEST, test_sup) // the planewaves of dense grids must be consistent with the smooth grids for (int ig = 0; ig < pwsmooth.npw; ++ig) - { - for (int ipol = 0; ipol < 3; ipol++) { - EXPECT_DOUBLE_EQ(pwsmooth.gcar[ig][ipol], pwdense.gcar[ig][ipol]); + for (int ipol = 0; ipol < 3; ipol++) + { + EXPECT_DOUBLE_EQ (pwsmooth.gcar[ig][ipol], pwdense.gcar[ig][ipol]); + } } - } } \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test_tool.cpp b/source/source_basis/module_pw/test/test_tool.cpp index f2f3db11b82..11d5a08fb34 100644 --- a/source/source_basis/module_pw/test/test_tool.cpp +++ b/source/source_basis/module_pw/test/test_tool.cpp @@ -2,66 +2,68 @@ #include "depend_mock.h" #include "mpi.h" #include -void setupmpi(int argc,char **argv,int &nproc, int &myrank) +void + setupmpi (int argc, char** argv, int& nproc, int& myrank) { int provided; - MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided); - if( provided != MPI_THREAD_FUNNELED ) - std::cout<<"MPI_Init_thread request "<= startpro_pool[i]) + for (int i = 0; i < kpar; i++) { - mypool=i; + if (myrank >= startpro_pool[i]) + { + mypool = i; + } } - } int key = 1; - rank_in_pool = myrank-startpro_pool[mypool]; + rank_in_pool = myrank - startpro_pool[mypool]; - MPI_Comm_split(MPI_COMM_WORLD,mypool,key,&POOL_WORLD); + MPI_Comm_split (MPI_COMM_WORLD, mypool, key, &POOL_WORLD); delete[] nproc_pool; delete[] startpro_pool; return; } -void finishmpi() +void + finishmpi () { - MPI_Comm_free(&POOL_WORLD); - MPI_Finalize(); + MPI_Comm_free (&POOL_WORLD); + MPI_Finalize (); } #endif \ No newline at end of file diff --git a/source/source_basis/module_pw/test/test_tool.h b/source/source_basis/module_pw/test/test_tool.h index a3251ed9bd6..e6f70524982 100644 --- a/source/source_basis/module_pw/test/test_tool.h +++ b/source/source_basis/module_pw/test/test_tool.h @@ -1,3 +1,3 @@ -void setupmpi(int argc,char **argv,int &nproc, int &myrank); -void divide_pools(const int &nproc, const int &myrank,int &nproc_in_pool, int &kpar, int&mypool, int &rank_in_pool); -void finishmpi(); \ No newline at end of file +void setupmpi (int argc, char** argv, int& nproc, int& myrank); +void divide_pools (const int& nproc, const int& myrank, int& nproc_in_pool, int& kpar, int& mypool, int& rank_in_pool); +void finishmpi (); \ No newline at end of file diff --git a/source/source_basis/module_pw/test/time.cpp b/source/source_basis/module_pw/test/time.cpp index 7836b34e978..a33b0195cc0 100644 --- a/source/source_basis/module_pw/test/time.cpp +++ b/source/source_basis/module_pw/test/time.cpp @@ -3,246 +3,247 @@ using namespace std; - -int main(int argc, char **argv) +int + main (int argc, char** argv) { int nproc = 1, myrank = 0; - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD,&nproc); - MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - if(myrank == 0) - cout<<"compare time between xprime and yprime"<* vpsic1 = new std::complex[nrxx]; - std::complex* vpsic2 = new std::complex[nrxx]; - for(int i = 0 ; i < nrxx ; ++i) + pwtest3.initmpi (nproc, myrank, MPI_COMM_WORLD); + if (Nx * Ny * Nz == 0) { - vr4[i] = vr3[i] = vr2[i] = vr1[i] = rand()/double(RAND_MAX); + pwtest3.initgrids (lat0, latvec, rhoecut); + pwtest3.initparameters (true, wfcecut, distribution_type, true); } - - std::complex *psi = new std::complex [npw*nbands]; - std::complex *psi2 = new std::complex [npw*nbands]; - std::complex *psi3 = new std::complex [npw_half1*nbands]; - std::complex *psi4 = new std::complex [npw_half2*nbands]; - std::complex *psiout = new std::complex [npw*nbands]; - std::complex *psiout2 = new std::complex [npw*nbands]; - std::complex *psiout3 = new std::complex [npw_half1*nbands]; - std::complex *psiout4 = new std::complex [npw_half2*nbands]; - for(int i = 0 ; i < npw*nbands ; ++i) + else { - psi2[i] = psi[i] = std::complex(rand()/double(RAND_MAX), rand()/double(RAND_MAX)); + pwtest3.initgrids (lat0, latvec, Nx, Ny, Nz); + pwtest3.initparameters (true, pwtest1.gridecut_lat * pwtest1.tpiba2 / 4, distribution_type, true); } + pwtest3.setuptransform (); - for(int i = 0 ; i < npw_half1 * nbands ; ++i) + ModulePW::PW_Basis pwtest4; + pwtest4.initmpi (nproc, myrank, MPI_COMM_WORLD); + if (Nx * Ny * Nz == 0) { - psi3[i] = std::complex(rand()/double(RAND_MAX), rand()/double(RAND_MAX)); + pwtest4.initgrids (lat0, latvec, rhoecut); + pwtest4.initparameters (true, wfcecut, distribution_type, false); } - for(int i = 0 ; i < npw_half2 * nbands ; ++i) + else { - psi4[i] = std::complex(rand()/double(RAND_MAX), rand()/double(RAND_MAX)); + pwtest4.initgrids (lat0, latvec, Nx, Ny, Nz); + pwtest4.initparameters (true, pwtest1.gridecut_lat * pwtest1.tpiba2 / 4, distribution_type, false); } + pwtest4.setuptransform (); - - MPI_Barrier(MPI_COMM_WORLD); - start=MPI_Wtime(); - for(int i = 0 ; i < nbands ; ++i) + if (myrank == 0) { - std::complex * tmp = psi; - std::complex * tmpout = psiout; - pwtest1.recip2real(tmp, vpsic1); - for(int j = 0 ; j < nrxx ; ++j) vpsic1[j]*=vr1[j]; - pwtest1.real2recip(vpsic1,tmpout); - tmp+=npw; - tmpout+=npw; + cout << "nx: " << pwtest1.nx << " ny: " << pwtest1.ny << " nz: " << pwtest1.nz << endl; + cout << "fullnpw npw1: " << pwtest1.npw << " nst1: " << pwtest1.nst << "; npw2: " << pwtest2.npw + << " nst2: " << pwtest2.nst << endl; + cout << "halfnpw npw3: " << pwtest3.npw << " nst3: " << pwtest3.nst << "; npw4: " << pwtest4.npw + << " nst4: " << pwtest4.nst << endl; } - MPI_Barrier(MPI_COMM_WORLD); - end=MPI_Wtime(); - t1 += end - start; - - MPI_Barrier(MPI_COMM_WORLD); - start=MPI_Wtime(); - for(int i = 0 ; i < nbands ; ++i) + if (pwtest1.nx != pwtest2.nx || pwtest2.nx != pwtest3.nx || pwtest3.nx != pwtest4.nx || pwtest1.ny != pwtest2.ny + || pwtest2.ny != pwtest3.ny || pwtest3.ny != pwtest4.ny || pwtest1.nz != pwtest2.nz || pwtest2.nz != pwtest3.nz + || pwtest3.nz != pwtest4.nz) { - std::complex * tmp = psi2; - std::complex * tmpout = psiout2; - pwtest2.recip2real(tmp, vpsic2); - for(int j = 0 ; j < nrxx ; ++j) vpsic2[j]*=vr2[j]; - pwtest2.real2recip(vpsic2,tmpout); - tmp+=npw; - tmpout+=npw; + cout << "Error" << endl; + exit (0); } - MPI_Barrier(MPI_COMM_WORLD); - end=MPI_Wtime(); - t2 += end - start; - MPI_Barrier(MPI_COMM_WORLD); - start=MPI_Wtime(); - for(int i = 0 ; i < nbands ; ++i) - { - std::complex * tmp = psi3; - std::complex * tmpout = psiout3; - pwtest3.recip2real(tmp, vpsi1); - for(int j = 0 ; j < nrxx ; ++j) vpsi1[j]*=vr3[j]; - pwtest3.real2recip(vpsi1,tmpout); - tmp+=npw_half1; - tmpout+=npw_half1; - } - MPI_Barrier(MPI_COMM_WORLD); - end=MPI_Wtime(); - t3 += end - start; + int nrxx = pwtest1.nrxx; + int npw = pwtest1.npw; + int npw_half1 = pwtest3.npw; + int npw_half2 = pwtest4.npw; - MPI_Barrier(MPI_COMM_WORLD); - start=MPI_Wtime(); - for(int i = 0 ; i < nbands ; ++i) + double start, end; + double t1 = 0, t2 = 0, t3 = 0, t4 = 0; + for (int it = 0; it < 10; ++it) { - std::complex * tmp = psi4; - std::complex * tmpout = psiout4; - pwtest4.recip2real(tmp, vpsi2); - for(int j = 0 ; j < nrxx ; ++j) vpsi2[j]*=vr4[j]; - pwtest4.real2recip(vpsi2,tmpout); - tmp+=npw_half2; - tmpout+=npw_half2; + // init + double* vr1 = new double[nrxx]; + double* vr2 = new double[nrxx]; + double* vr3 = new double[nrxx]; + double* vr4 = new double[nrxx]; + double* vpsi1 = new double[nrxx]; + double* vpsi2 = new double[nrxx]; + std::complex* vpsic1 = new std::complex[nrxx]; + std::complex* vpsic2 = new std::complex[nrxx]; + for (int i = 0; i < nrxx; ++i) + { + vr4[i] = vr3[i] = vr2[i] = vr1[i] = rand () / double (RAND_MAX); + } + + std::complex* psi = new std::complex[npw * nbands]; + std::complex* psi2 = new std::complex[npw * nbands]; + std::complex* psi3 = new std::complex[npw_half1 * nbands]; + std::complex* psi4 = new std::complex[npw_half2 * nbands]; + std::complex* psiout = new std::complex[npw * nbands]; + std::complex* psiout2 = new std::complex[npw * nbands]; + std::complex* psiout3 = new std::complex[npw_half1 * nbands]; + std::complex* psiout4 = new std::complex[npw_half2 * nbands]; + for (int i = 0; i < npw * nbands; ++i) + { + psi2[i] = psi[i] = std::complex (rand () / double (RAND_MAX), rand () / double (RAND_MAX)); + } + + for (int i = 0; i < npw_half1 * nbands; ++i) + { + psi3[i] = std::complex (rand () / double (RAND_MAX), rand () / double (RAND_MAX)); + } + for (int i = 0; i < npw_half2 * nbands; ++i) + { + psi4[i] = std::complex (rand () / double (RAND_MAX), rand () / double (RAND_MAX)); + } + + MPI_Barrier (MPI_COMM_WORLD); + start = MPI_Wtime (); + for (int i = 0; i < nbands; ++i) + { + std::complex* tmp = psi; + std::complex* tmpout = psiout; + pwtest1.recip2real (tmp, vpsic1); + for (int j = 0; j < nrxx; ++j) + vpsic1[j] *= vr1[j]; + pwtest1.real2recip (vpsic1, tmpout); + tmp += npw; + tmpout += npw; + } + MPI_Barrier (MPI_COMM_WORLD); + end = MPI_Wtime (); + t1 += end - start; + + MPI_Barrier (MPI_COMM_WORLD); + start = MPI_Wtime (); + for (int i = 0; i < nbands; ++i) + { + std::complex* tmp = psi2; + std::complex* tmpout = psiout2; + pwtest2.recip2real (tmp, vpsic2); + for (int j = 0; j < nrxx; ++j) + vpsic2[j] *= vr2[j]; + pwtest2.real2recip (vpsic2, tmpout); + tmp += npw; + tmpout += npw; + } + MPI_Barrier (MPI_COMM_WORLD); + end = MPI_Wtime (); + t2 += end - start; + + MPI_Barrier (MPI_COMM_WORLD); + start = MPI_Wtime (); + for (int i = 0; i < nbands; ++i) + { + std::complex* tmp = psi3; + std::complex* tmpout = psiout3; + pwtest3.recip2real (tmp, vpsi1); + for (int j = 0; j < nrxx; ++j) + vpsi1[j] *= vr3[j]; + pwtest3.real2recip (vpsi1, tmpout); + tmp += npw_half1; + tmpout += npw_half1; + } + MPI_Barrier (MPI_COMM_WORLD); + end = MPI_Wtime (); + t3 += end - start; + + MPI_Barrier (MPI_COMM_WORLD); + start = MPI_Wtime (); + for (int i = 0; i < nbands; ++i) + { + std::complex* tmp = psi4; + std::complex* tmpout = psiout4; + pwtest4.recip2real (tmp, vpsi2); + for (int j = 0; j < nrxx; ++j) + vpsi2[j] *= vr4[j]; + pwtest4.real2recip (vpsi2, tmpout); + tmp += npw_half2; + tmpout += npw_half2; + } + MPI_Barrier (MPI_COMM_WORLD); + end = MPI_Wtime (); + t4 += end - start; + + for (int i = 0; i < nbands * npw; ++i) + { + double error = std::abs (psiout2[i] - psiout[i]); + if (error > 1e-4) + { + cout << "Wrong" << endl; + exit (0); + } + } + + delete[] psi; + delete[] psi2; + delete[] psi3; + delete[] psi4; + delete[] psiout; + delete[] psiout2; + delete[] psiout3; + delete[] psiout4; + delete[] vr1; + delete[] vr2; + delete[] vr3; + delete[] vr4; + delete[] vpsic1; + delete[] vpsic2; + delete[] vpsi1; + delete[] vpsi2; } - MPI_Barrier(MPI_COMM_WORLD); - end=MPI_Wtime(); - t4 += end - start; - for(int i = 0 ; i < nbands*npw; ++i) + if (myrank == 0) { - double error = std::abs(psiout2[i]-psiout[i]); - if(error > 1e-4) - { - cout<<"Wrong"<* h_rhog = nullptr; complex* h_rhogout = nullptr; complex* h_rhor = nullptr; - void init(ModulePW::PW_Basis& pwtest) + void + init (ModulePW::PW_Basis& pwtest) { - ModuleBase::Matrix3 latvec(1, 1, 0, 0, 1, 1, 0, 0, 2); + ModuleBase::Matrix3 latvec (1, 1, 0, 0, 1, 1, 0, 0, 2); T wfcecut; T lat0 = 2.2; @@ -50,12 +51,12 @@ class PW_BASIS_K_GPU_TEST : public ::testing::Test const int nproc_in_pool = 1; const int rank_in_pool = 0; MPI_Comm POOL_WORLD; - MPI_Comm_split(MPI_COMM_WORLD, mypool, key, &POOL_WORLD); - pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); - pwtest.initgrids(lat0, latvec, wfcecut); - pwtest.initparameters(gamma_only, wfcecut, distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); + MPI_Comm_split (MPI_COMM_WORLD, mypool, key, &POOL_WORLD); + pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); + pwtest.initgrids (lat0, latvec, wfcecut); + pwtest.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); const int npw = pwtest.npw; const int nrxx = pwtest.nrxx; @@ -68,140 +69,145 @@ class PW_BASIS_K_GPU_TEST : public ::testing::Test const T tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0; const T ggecut = wfcecut / tpiba2; ModuleBase::Matrix3 GT, G, GGT; - GT = latvec.Inverse(); - G = GT.Transpose(); + GT = latvec.Inverse (); + G = GT.Transpose (); GGT = G * GT; tmp = new complex[nx * ny * nz]; - if (rank_in_pool == 0) + if (rank_in_pool == 0) { for (int ix = 0; ix < nx; ++ix) - { - const T vx = ix - int(nx / 2); - for (int iy = 0; iy < ny; ++iy) { - const int offset = (ix * ny + iy) * nz; - const T vy = iy - int(ny / 2); - for (int iz = 0; iz < nz; ++iz) - { - tmp[offset + iz] = 0.0; - T vz = iz - int(nz / 2); - ModuleBase::Vector3 v(vx, vy, vz); - T modulus = v * (GGT * v); - if (modulus <= ggecut) + const T vx = ix - int (nx / 2); + for (int iy = 0; iy < ny; ++iy) { - tmp[offset + iz] = 1.0 / (modulus + 1); - if (vy > 0) - { - tmp[offset + iz] - += std::complex(0, 1.0) / (std::abs(static_cast(v.x) + 1) + 1); - } - else if (vy < 0) - { - tmp[offset + iz] - -= std::complex(0, 1.0) / (std::abs(-static_cast(v.x) + 1) + 1); - } + const int offset = (ix * ny + iy) * nz; + const T vy = iy - int (ny / 2); + for (int iz = 0; iz < nz; ++iz) + { + tmp[offset + iz] = 0.0; + T vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + T modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[offset + iz] = 1.0 / (modulus + 1); + if (vy > 0) + { + tmp[offset + iz] += std::complex (0, 1.0) + / (std::abs (static_cast (v.x) + 1) + 1); + } + else if (vy < 0) + { + tmp[offset + iz] + -= std::complex (0, 1.0) + / (std::abs (-static_cast (v.x) + 1) + 1); + } + } + } } - } } - } - if (typeid(T) == typeid(double)) - { - fftw_plan pp = fftw_plan_dft_3d(nx, - ny, - nz, - (fftw_complex*)tmp, - (fftw_complex*)tmp, - FFTW_BACKWARD, - FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - } - else if (typeid(T) == typeid(float)) - { - fftwf_plan pp = fftwf_plan_dft_3d(nx, - ny, - nz, - (fftwf_complex*)tmp, - (fftwf_complex*)tmp, - FFTW_BACKWARD, - FFTW_ESTIMATE); - fftwf_execute(pp); - fftwf_destroy_plan(pp); - } - ModuleBase::Vector3 delta_g(T(int(nx / 2)) / nx, T(int(ny / 2)) / ny, T(int(nz / 2)) / nz); + if (typeid (T) == typeid (double)) + { + fftw_plan pp = fftw_plan_dft_3d (nx, + ny, + nz, + (fftw_complex*)tmp, + (fftw_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + } + else if (typeid (T) == typeid (float)) + { + fftwf_plan pp = fftwf_plan_dft_3d (nx, + ny, + nz, + (fftwf_complex*)tmp, + (fftwf_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftwf_execute (pp); + fftwf_destroy_plan (pp); + } + ModuleBase::Vector3 delta_g (T (int (nx / 2)) / nx, T (int (ny / 2)) / ny, T (int (nz / 2)) / nz); for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int ix = ixy / ny; - const int iy = ixy % ny; - for (int iz = 0; iz < nz; ++iz) { - ModuleBase::Vector3 real_r(ix, iy, iz); - T phase_im = -delta_g * real_r; - complex phase(0, ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); + const int ix = ixy / ny; + const int iy = ixy % ny; + for (int iz = 0; iz < nz; ++iz) + { + ModuleBase::Vector3 real_r (ix, iy, iz); + T phase_im = -delta_g * real_r; + complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } } - } h_rhog = new complex[npw]; h_rhogout = new complex[npw]; for (int ig = 0; ig < npw; ++ig) - { - h_rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); - - if (pwtest.gdirect[ig].y > 0) { - h_rhog[ig] += std::complex(0, 1.0) / (std::abs(float(pwtest.gdirect[ig].x) + 1) + 1); - } - else if (pwtest.gdirect[ig].y < 0) - { - h_rhog[ig] -= std::complex(0, 1.0) / (std::abs(float(-pwtest.gdirect[ig].x) + 1) + 1); + h_rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + + if (pwtest.gdirect[ig].y > 0) + { + h_rhog[ig] + += std::complex (0, 1.0) / (std::abs (float (pwtest.gdirect[ig].x) + 1) + 1); + } + else if (pwtest.gdirect[ig].y < 0) + { + h_rhog[ig] -= std::complex (0, 1.0) + / (std::abs (float (-pwtest.gdirect[ig].x) + 1) + 1); + } } - } - cudaMalloc((void**)&d_rhog, npw * sizeof(complex)); - cudaMalloc((void**)&d_rhor, nrxx * sizeof(complex)); - cudaMemcpy(d_rhog, h_rhog, npw * sizeof(complex), cudaMemcpyHostToDevice); + cudaMalloc ((void**)&d_rhog, npw * sizeof (complex)); + cudaMalloc ((void**)&d_rhor, nrxx * sizeof (complex)); + cudaMemcpy (d_rhog, h_rhog, npw * sizeof (complex), cudaMemcpyHostToDevice); h_rhor = new complex[nrxx]; - pwtest.recip_to_real, std::complex,base_device::DEVICE_GPU>(d_rhog, d_rhor); - cudaMemcpy(h_rhor, d_rhor, nrxx * sizeof(complex), cudaMemcpyDeviceToHost); + pwtest.recip_to_real, std::complex, base_device::DEVICE_GPU> (d_rhog, d_rhor); + cudaMemcpy (h_rhor, d_rhor, nrxx * sizeof (complex), cudaMemcpyDeviceToHost); - pwtest.real_to_recip, std::complex,base_device::DEVICE_GPU>(d_rhor, d_rhog); - cudaMemcpy(h_rhogout, d_rhog, npw * sizeof(complex), cudaMemcpyDeviceToHost); + pwtest.real_to_recip, std::complex, base_device::DEVICE_GPU> (d_rhor, d_rhog); + cudaMemcpy (h_rhogout, d_rhog, npw * sizeof (complex), cudaMemcpyDeviceToHost); } } - ModulePW::PW_Basis* access_pw() + ModulePW::PW_Basis* + access_pw () { return &pwtest; } - void TearDown() override + void + TearDown () override { delete[] h_rhog; delete[] h_rhogout; delete[] h_rhor; delete[] tmp; - cudaFree(d_rhog); - cudaFree(d_rhogr); - cudaFree(d_rhogout); - cudaFree(d_rhor); + cudaFree (d_rhog); + cudaFree (d_rhogr); + cudaFree (d_rhogout); + cudaFree (d_rhor); } }; -using MixedTypes = ::testing::Types, - TypePair >; +using MixedTypes + = ::testing::Types, TypePair>; -TYPED_TEST_CASE(PW_BASIS_K_GPU_TEST, MixedTypes); +TYPED_TEST_CASE (PW_BASIS_K_GPU_TEST, MixedTypes); -TYPED_TEST(PW_BASIS_K_GPU_TEST, Mixing) +TYPED_TEST (PW_BASIS_K_GPU_TEST, Mixing) { using T = typename TestFixture::T; using Device = typename TestFixture::Device; ModulePW::PW_Basis pwtest; - pwtest.set_device("gpu"); - pwtest.set_precision("mixing"); - pwtest.fft_bundle.setfft("gpu", "mixing"); - this->init(pwtest); + pwtest.set_device ("gpu"); + pwtest.set_precision ("mixing"); + pwtest.fft_bundle.setfft ("gpu", "mixing"); + this->init (pwtest); int startiz = pwtest.startz_current; const int nx = pwtest.nx; const int ny = pwtest.ny; @@ -209,42 +215,42 @@ TYPED_TEST(PW_BASIS_K_GPU_TEST, Mixing) const int nplane = pwtest.nplane; const int npw = pwtest.npw; for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int offset = ixy * nz + startiz; - const int startz = ixy * nplane; - for (int iz = 0; iz < nplane; ++iz) { - EXPECT_NEAR(this->tmp[offset + iz].real(), this->h_rhor[startz + iz].real(), 1e-4); + const int offset = ixy * nz + startiz; + const int startz = ixy * nplane; + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (this->tmp[offset + iz].real (), this->h_rhor[startz + iz].real (), 1e-4); + } } - } for (int ig = 0; ig < npw; ++ig) - { - EXPECT_NEAR(this->h_rhog[ig].real(), this->h_rhogout[ig].real(), 1e-4); - EXPECT_NEAR(this->h_rhog[ig].imag(), this->h_rhogout[ig].imag(), 1e-4); - } + { + EXPECT_NEAR (this->h_rhog[ig].real (), this->h_rhogout[ig].real (), 1e-4); + EXPECT_NEAR (this->h_rhog[ig].imag (), this->h_rhogout[ig].imag (), 1e-4); + } } -TYPED_TEST(PW_BASIS_K_GPU_TEST, FloatDouble) +TYPED_TEST (PW_BASIS_K_GPU_TEST, FloatDouble) { using T = typename TestFixture::T; using Device = typename TestFixture::Device; ModulePW::PW_Basis pwtest; - pwtest.set_device("gpu"); - pwtest.set_precision("mixing"); - if (typeid(T) == typeid(float)) - { - pwtest.fft_bundle.setfft("gpu", "single"); - } - else if (typeid(T) == typeid(double)) - { - pwtest.fft_bundle.setfft("gpu", "double"); - } + pwtest.set_device ("gpu"); + pwtest.set_precision ("mixing"); + if (typeid (T) == typeid (float)) + { + pwtest.fft_bundle.setfft ("gpu", "single"); + } + else if (typeid (T) == typeid (double)) + { + pwtest.fft_bundle.setfft ("gpu", "double"); + } else - { - cout << "Error: Unsupported type" << endl; - return; - } - this->init(pwtest); + { + cout << "Error: Unsupported type" << endl; + return; + } + this->init (pwtest); int startiz = pwtest.startz_current; const int nx = pwtest.nx; const int ny = pwtest.ny; @@ -252,18 +258,18 @@ TYPED_TEST(PW_BASIS_K_GPU_TEST, FloatDouble) const int nplane = pwtest.nplane; const int npw = pwtest.npw; for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int offset = ixy * nz + startiz; - const int startz = ixy * nplane; - for (int iz = 0; iz < nplane; ++iz) { - EXPECT_NEAR(this->tmp[offset + iz].real(), this->h_rhor[startz + iz].real(), 1e-4); + const int offset = ixy * nz + startiz; + const int startz = ixy * nplane; + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (this->tmp[offset + iz].real (), this->h_rhor[startz + iz].real (), 1e-4); + } } - } for (int ig = 0; ig < npw; ++ig) - { - EXPECT_NEAR(this->h_rhog[ig].real(), this->h_rhogout[ig].real(), 1e-4); - EXPECT_NEAR(this->h_rhog[ig].imag(), this->h_rhogout[ig].imag(), 1e-4); - } + { + EXPECT_NEAR (this->h_rhog[ig].real (), this->h_rhogout[ig].real (), 1e-4); + EXPECT_NEAR (this->h_rhog[ig].imag (), this->h_rhogout[ig].imag (), 1e-4); + } } diff --git a/source/source_basis/module_pw/test_gpu/pw_basis_C2R.cpp b/source/source_basis/module_pw/test_gpu/pw_basis_C2R.cpp index 4df4663a2c9..dcfa0d11df0 100644 --- a/source/source_basis/module_pw/test_gpu/pw_basis_C2R.cpp +++ b/source/source_basis/module_pw/test_gpu/pw_basis_C2R.cpp @@ -31,11 +31,12 @@ class PW_BASIS_C2R_GPU_TEST : public ::testing::Test complex* h_rhog; complex* h_rhogout; T* h_rhor; - void init(ModulePW::PW_Basis& pwtest) + void + init (ModulePW::PW_Basis& pwtest) { cout << "dividemthd 1, gamma_only: off, check fft between T and complex" << endl; - - ModuleBase::Matrix3 latvec(1, 1, 0, 0, 1, 1, 0, 0, 2); + + ModuleBase::Matrix3 latvec (1, 1, 0, 0, 1, 1, 0, 0, 2); T wfcecut; T lat0 = 2.2; bool gamma_only = false; @@ -49,13 +50,13 @@ class PW_BASIS_C2R_GPU_TEST : public ::testing::Test const int nproc_in_pool = 1; const int rank_in_pool = 0; MPI_Comm POOL_WORLD; - MPI_Comm_split(MPI_COMM_WORLD, mypool, key, &POOL_WORLD); - pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); + MPI_Comm_split (MPI_COMM_WORLD, mypool, key, &POOL_WORLD); + pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); - pwtest.initgrids(lat0, latvec, wfcecut); - pwtest.initparameters(gamma_only, wfcecut, distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); + pwtest.initgrids (lat0, latvec, wfcecut); + pwtest.initparameters (gamma_only, wfcecut, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); const int npw = pwtest.npw; const int nrxx = pwtest.nrxx; @@ -68,132 +69,144 @@ class PW_BASIS_C2R_GPU_TEST : public ::testing::Test const T tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0; const T ggecut = wfcecut / tpiba2; ModuleBase::Matrix3 GT, G, GGT; - GT = latvec.Inverse(); - G = GT.Transpose(); + GT = latvec.Inverse (); + G = GT.Transpose (); GGT = G * GT; tmp = new complex[nx * ny * nz]; if (rank_in_pool == 0) - { - for (int ix = 0; ix < nx; ++ix) { - const T vx = ix - int(nx / 2); - for (int iy = 0; iy < ny; ++iy) - { - const int offset = (ix * ny + iy) * nz; - const T vy = iy - int(ny / 2); - for (int iz = 0; iz < nz; ++iz) + for (int ix = 0; ix < nx; ++ix) { - tmp[offset + iz] = 0.0; - T vz = iz - int(nz / 2); - ModuleBase::Vector3 v(vx, vy, vz); - T modulus = v * (GGT * v); - if (modulus <= ggecut) - { - tmp[offset + iz] = 1.0 / (modulus + 1); - if (vy > 0) + const T vx = ix - int (nx / 2); + for (int iy = 0; iy < ny; ++iy) { - tmp[offset + iz] += std::complex(0,1.0) / (std::abs(static_cast(v.x) + 1) + 1); + const int offset = (ix * ny + iy) * nz; + const T vy = iy - int (ny / 2); + for (int iz = 0; iz < nz; ++iz) + { + tmp[offset + iz] = 0.0; + T vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + T modulus = v * (GGT * v); + if (modulus <= ggecut) + { + tmp[offset + iz] = 1.0 / (modulus + 1); + if (vy > 0) + { + tmp[offset + iz] += std::complex (0, 1.0) + / (std::abs (static_cast (v.x) + 1) + 1); + } + else if (vy < 0) + { + tmp[offset + iz] + -= std::complex (0, 1.0) + / (std::abs (-static_cast (v.x) + 1) + 1); + } + } + } } - else if (vy < 0) + } + if (typeid (T) == typeid (double)) + { + fftw_plan pp = fftw_plan_dft_3d (nx, + ny, + nz, + (fftw_complex*)tmp, + (fftw_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + } + else if (typeid (T) == typeid (float)) + { + fftwf_plan pp = fftwf_plan_dft_3d (nx, + ny, + nz, + (fftwf_complex*)tmp, + (fftwf_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftwf_execute (pp); + fftwf_destroy_plan (pp); + } + ModuleBase::Vector3 delta_g (T (int (nx / 2)) / nx, T (int (ny / 2)) / ny, T (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + const int ix = ixy / ny; + const int iy = ixy % ny; + for (int iz = 0; iz < nz; ++iz) { - tmp[offset + iz] -= std::complex(0,1.0) / (std::abs(-static_cast(v.x) + 1) + 1); + ModuleBase::Vector3 real_r (ix, iy, iz); + T phase_im = -delta_g * real_r; + complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); } - } } - } - } - if (typeid(T)==typeid(double)) - { - fftw_plan pp - = fftw_plan_dft_3d(nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - }else if (typeid(T)==typeid(float)){ - fftwf_plan pp - = fftwf_plan_dft_3d(nx, ny, nz, (fftwf_complex*)tmp, (fftwf_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE); - fftwf_execute(pp); - fftwf_destroy_plan(pp); - } - ModuleBase::Vector3 delta_g(T(int(nx / 2)) / nx, - T(int(ny / 2)) / ny, - T(int(nz / 2)) / nz); - for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int ix = ixy / ny; - const int iy = ixy % ny; - for (int iz = 0; iz < nz; ++iz) - { - ModuleBase::Vector3 real_r(ix, iy, iz); - T phase_im = -delta_g * real_r; - complex phase(0, ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } } - } h_rhog = new complex[npw]; h_rhogout = new complex[npw]; - cudaMalloc((void**)&d_rhog, npw * sizeof(complex)); - cudaMalloc((void**)&d_rhogr, npw * sizeof(complex)); - cudaMalloc((void**)&d_rhogout, npw * sizeof(complex)); + cudaMalloc ((void**)&d_rhog, npw * sizeof (complex)); + cudaMalloc ((void**)&d_rhogr, npw * sizeof (complex)); + cudaMalloc ((void**)&d_rhogout, npw * sizeof (complex)); for (int ig = 0; ig < npw; ++ig) - { - h_rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); - if (pwtest.gdirect[ig].y > 0) { - h_rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs(pwtest.gdirect[ig].x + 1) + 1); - } - else if (pwtest.gdirect[ig].y < 0) - { - h_rhog[ig] -= ModuleBase::IMAG_UNIT / (std::abs(-pwtest.gdirect[ig].x + 1) + 1); + h_rhog[ig] = 1.0 / (pwtest.gg[ig] + 1); + if (pwtest.gdirect[ig].y > 0) + { + h_rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (pwtest.gdirect[ig].x + 1) + 1); + } + else if (pwtest.gdirect[ig].y < 0) + { + h_rhog[ig] -= ModuleBase::IMAG_UNIT / (std::abs (-pwtest.gdirect[ig].x + 1) + 1); + } } - } - cudaMemcpy(d_rhog, h_rhog, npw * sizeof(complex), cudaMemcpyHostToDevice); + cudaMemcpy (d_rhog, h_rhog, npw * sizeof (complex), cudaMemcpyHostToDevice); h_rhor = new T[nrxx]; - cudaMalloc((void**)&d_rhor, nrxx * sizeof(T)); - pwtest.recip_to_real, T, base_device::DEVICE_GPU>(d_rhog, d_rhor); - cudaMemcpy(h_rhor, d_rhor, nrxx * sizeof(T), cudaMemcpyDeviceToHost); + cudaMalloc ((void**)&d_rhor, nrxx * sizeof (T)); + pwtest.recip_to_real, T, base_device::DEVICE_GPU> (d_rhog, d_rhor); + cudaMemcpy (h_rhor, d_rhor, nrxx * sizeof (T), cudaMemcpyDeviceToHost); - pwtest.real_to_recip,base_device::DEVICE_GPU>(d_rhor,d_rhog); - cudaMemcpy(h_rhogout,d_rhog,npw * sizeof(complex),cudaMemcpyDeviceToHost); - - + pwtest.real_to_recip, base_device::DEVICE_GPU> (d_rhor, d_rhog); + cudaMemcpy (h_rhogout, d_rhog, npw * sizeof (complex), cudaMemcpyDeviceToHost); } - ModulePW::PW_Basis* access_pw() + ModulePW::PW_Basis* + access_pw () { return &pwtest; } - void TearDown() override + void + TearDown () override { delete[] h_rhog; delete[] h_rhogout; delete[] h_rhor; delete[] tmp; - cudaFree(d_rhog); - cudaFree(d_rhogr); - cudaFree(d_rhogout); - cudaFree(d_rhor); + cudaFree (d_rhog); + cudaFree (d_rhogr); + cudaFree (d_rhogout); + cudaFree (d_rhor); } }; -using MixedTypes = ::testing::Types, - TypePair - >; +using MixedTypes + = ::testing::Types, TypePair>; -TYPED_TEST_CASE(PW_BASIS_C2R_GPU_TEST, MixedTypes); +TYPED_TEST_CASE (PW_BASIS_C2R_GPU_TEST, MixedTypes); -TYPED_TEST(PW_BASIS_C2R_GPU_TEST, Mixing) +TYPED_TEST (PW_BASIS_C2R_GPU_TEST, Mixing) { using T = typename TestFixture::T; using Device = typename TestFixture::Device; ModulePW::PW_Basis pwtest; - pwtest.set_device("gpu"); - pwtest.set_precision("double"); - pwtest.fft_bundle.setfft("gpu", "mixing"); - this->init(pwtest); + pwtest.set_device ("gpu"); + pwtest.set_precision ("double"); + pwtest.fft_bundle.setfft ("gpu", "mixing"); + this->init (pwtest); int startiz = pwtest.startz_current; const int nx = pwtest.nx; const int ny = pwtest.ny; @@ -201,43 +214,42 @@ TYPED_TEST(PW_BASIS_C2R_GPU_TEST, Mixing) const int nplane = pwtest.nplane; const int npw = pwtest.npw; for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int offset = ixy * nz + startiz; - const int startz = ixy * nplane; - for (int iz = 0; iz < nplane; ++iz) { - EXPECT_NEAR(this->tmp[offset + iz].real(), - this->h_rhor[startz + iz], 1e-4); + const int offset = ixy * nz + startiz; + const int startz = ixy * nplane; + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (this->tmp[offset + iz].real (), this->h_rhor[startz + iz], 1e-4); + } } - } for (int ig = 0; ig < pwtest.npw; ++ig) - { - EXPECT_NEAR(this->h_rhog[ig].real(), this->h_rhogout[ig].real(), 1e-4); - EXPECT_NEAR(this->h_rhog[ig].imag(), this->h_rhogout[ig].imag(), 1e-4); - } + { + EXPECT_NEAR (this->h_rhog[ig].real (), this->h_rhogout[ig].real (), 1e-4); + EXPECT_NEAR (this->h_rhog[ig].imag (), this->h_rhogout[ig].imag (), 1e-4); + } } -TYPED_TEST(PW_BASIS_C2R_GPU_TEST, FloatDouble) +TYPED_TEST (PW_BASIS_C2R_GPU_TEST, FloatDouble) { using T = typename TestFixture::T; using Device = typename TestFixture::Device; ModulePW::PW_Basis pwtest; - pwtest.set_device("gpu"); - pwtest.set_precision("double"); - if (typeid(T) == typeid(float)) - { - pwtest.fft_bundle.setfft("gpu", "single"); - } - else if (typeid(T) == typeid(double)) - { - pwtest.fft_bundle.setfft("gpu", "double"); - } + pwtest.set_device ("gpu"); + pwtest.set_precision ("double"); + if (typeid (T) == typeid (float)) + { + pwtest.fft_bundle.setfft ("gpu", "single"); + } + else if (typeid (T) == typeid (double)) + { + pwtest.fft_bundle.setfft ("gpu", "double"); + } else - { - cout << "Error: Unsupported type" << endl; - return; - } - this->init(pwtest); + { + cout << "Error: Unsupported type" << endl; + return; + } + this->init (pwtest); int startiz = pwtest.startz_current; const int nx = pwtest.nx; const int ny = pwtest.ny; @@ -245,19 +257,18 @@ TYPED_TEST(PW_BASIS_C2R_GPU_TEST, FloatDouble) const int nplane = pwtest.nplane; const int npw = pwtest.npw; for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int offset = ixy * nz + startiz; - const int startz = ixy * nplane; - for (int iz = 0; iz < nplane; ++iz) { - EXPECT_NEAR(this->tmp[offset + iz].real(), - this->h_rhor[startz + iz], 1e-4); + const int offset = ixy * nz + startiz; + const int startz = ixy * nplane; + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (this->tmp[offset + iz].real (), this->h_rhor[startz + iz], 1e-4); + } } - } - for (int ig = 0; ig < pwtest.npw; ++ig) - { - EXPECT_NEAR(this->h_rhog[ig].real(), this->h_rhogout[ig].real(), 1e-4); - EXPECT_NEAR(this->h_rhog[ig].imag(), this->h_rhogout[ig].imag(), 1e-4); - } + for (int ig = 0; ig < pwtest.npw; ++ig) + { + EXPECT_NEAR (this->h_rhog[ig].real (), this->h_rhogout[ig].real (), 1e-4); + EXPECT_NEAR (this->h_rhog[ig].imag (), this->h_rhogout[ig].imag (), 1e-4); + } } diff --git a/source/source_basis/module_pw/test_gpu/pw_basis_k_C2C.cpp b/source/source_basis/module_pw/test_gpu/pw_basis_k_C2C.cpp index cf9ee6e23b0..d869cca796f 100644 --- a/source/source_basis/module_pw/test_gpu/pw_basis_k_C2C.cpp +++ b/source/source_basis/module_pw/test_gpu/pw_basis_k_C2C.cpp @@ -32,9 +32,10 @@ class PW_BASIS_K_GPU_TEST : public ::testing::Test complex* h_rhog = nullptr; complex* h_rhogout = nullptr; complex* h_rhor = nullptr; - void init(ModulePW::PW_Basis_K& pwtest) + void + init (ModulePW::PW_Basis_K& pwtest) { - ModuleBase::Matrix3 latvec(1, 1, 0, 0, 1, 1, 0, 0, 2); + ModuleBase::Matrix3 latvec (1, 1, 0, 0, 1, 1, 0, 0, 2); T wfcecut; T lat0 = 2.2; @@ -46,19 +47,19 @@ class PW_BASIS_K_GPU_TEST : public ::testing::Test const int nks = 1; ModuleBase::Vector3* kvec_d; kvec_d = new ModuleBase::Vector3[nks]; - kvec_d[0].set(0, 0, 0); + kvec_d[0].set (0, 0, 0); // init const int mypool = 0; const int key = 1; const int nproc_in_pool = 1; const int rank_in_pool = 0; MPI_Comm POOL_WORLD; - MPI_Comm_split(MPI_COMM_WORLD, mypool, key, &POOL_WORLD); - pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD); - pwtest.initgrids(lat0, latvec, wfcecut); - pwtest.initparameters(gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime); - pwtest.setuptransform(); - pwtest.collect_local_pw(); + MPI_Comm_split (MPI_COMM_WORLD, mypool, key, &POOL_WORLD); + pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD); + pwtest.initgrids (lat0, latvec, wfcecut); + pwtest.initparameters (gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime); + pwtest.setuptransform (); + pwtest.collect_local_pw (); const int nrxx = pwtest.nrxx; const int nmaxgr = pwtest.nmaxgr; @@ -70,146 +71,152 @@ class PW_BASIS_K_GPU_TEST : public ::testing::Test const T tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0; const T ggecut = wfcecut / tpiba2; ModuleBase::Matrix3 GT, G, GGT; - GT = latvec.Inverse(); - G = GT.Transpose(); + GT = latvec.Inverse (); + G = GT.Transpose (); GGT = G * GT; tmp = new complex[nx * ny * nz]; for (int ik = 0; ik < nks; ik++) - { - int npwk = pwtest.npwk[ik]; - ModuleBase::Vector3 kk = kvec_d[ik]; - if (rank_in_pool == 0) { - for (int ix = 0; ix < nx; ++ix) - { - const T vx = ix - int(nx / 2); - for (int iy = 0; iy < ny; ++iy) + int npwk = pwtest.npwk[ik]; + ModuleBase::Vector3 kk = kvec_d[ik]; + if (rank_in_pool == 0) { - const int offset = (ix * ny + iy) * nz; - const T vy = iy - int(ny / 2); - for (int iz = 0; iz < nz; ++iz) - { - tmp[offset + iz] = 0.0; - T vz = iz - int(nz / 2); - ModuleBase::Vector3 v(vx, vy, vz); - T modulus = (v + kk) * (GGT * v); - if (modulus <= ggecut) + for (int ix = 0; ix < nx; ++ix) { - tmp[offset + iz] = 1.0 / (modulus + 1); - if (vy > 0) - { - tmp[offset + iz] - += std::complex(0, 1.0) / (std::abs(static_cast(v.x) + 1) + 1); - } - else if (vy < 0) - { - tmp[offset + iz] - -= std::complex(0, 1.0) / (std::abs(-static_cast(v.x) + 1) + 1); - } + const T vx = ix - int (nx / 2); + for (int iy = 0; iy < ny; ++iy) + { + const int offset = (ix * ny + iy) * nz; + const T vy = iy - int (ny / 2); + for (int iz = 0; iz < nz; ++iz) + { + tmp[offset + iz] = 0.0; + T vz = iz - int (nz / 2); + ModuleBase::Vector3 v (vx, vy, vz); + T modulus = (v + kk) * (GGT * v); + if (modulus <= ggecut) + { + tmp[offset + iz] = 1.0 / (modulus + 1); + if (vy > 0) + { + tmp[offset + iz] + += std::complex (0, 1.0) + / (std::abs (static_cast (v.x) + 1) + 1); + } + else if (vy < 0) + { + tmp[offset + iz] + -= std::complex (0, 1.0) + / (std::abs (-static_cast (v.x) + 1) + 1); + } + } + } + } } - } - } - } - if (typeid(T) == typeid(double)) - { - fftw_plan pp = fftw_plan_dft_3d(nx, - ny, - nz, - (fftw_complex*)tmp, - (fftw_complex*)tmp, - FFTW_BACKWARD, - FFTW_ESTIMATE); - fftw_execute(pp); - fftw_destroy_plan(pp); - } - else if (typeid(T) == typeid(float)) - { - fftwf_plan pp = fftwf_plan_dft_3d(nx, - ny, - nz, - (fftwf_complex*)tmp, - (fftwf_complex*)tmp, - FFTW_BACKWARD, - FFTW_ESTIMATE); - fftwf_execute(pp); - fftwf_destroy_plan(pp); - } - ModuleBase::Vector3 delta_g(T(int(nx / 2)) / nx, T(int(ny / 2)) / ny, T(int(nz / 2)) / nz); - for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int ix = ixy / ny; - const int iy = ixy % ny; - for (int iz = 0; iz < nz; ++iz) - { - ModuleBase::Vector3 real_r(ix, iy, iz); - T phase_im = -delta_g * real_r; - complex phase(0, ModuleBase::TWO_PI * phase_im); - tmp[ixy * nz + iz] *= exp(phase); - } - } - // const int size = nx * ny * nz; + if (typeid (T) == typeid (double)) + { + fftw_plan pp = fftw_plan_dft_3d (nx, + ny, + nz, + (fftw_complex*)tmp, + (fftw_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftw_execute (pp); + fftw_destroy_plan (pp); + } + else if (typeid (T) == typeid (float)) + { + fftwf_plan pp = fftwf_plan_dft_3d (nx, + ny, + nz, + (fftwf_complex*)tmp, + (fftwf_complex*)tmp, + FFTW_BACKWARD, + FFTW_ESTIMATE); + fftwf_execute (pp); + fftwf_destroy_plan (pp); + } + ModuleBase::Vector3 delta_g (T (int (nx / 2)) / nx, + T (int (ny / 2)) / ny, + T (int (nz / 2)) / nz); + for (int ixy = 0; ixy < nx * ny; ++ixy) + { + const int ix = ixy / ny; + const int iy = ixy % ny; + for (int iz = 0; iz < nz; ++iz) + { + ModuleBase::Vector3 real_r (ix, iy, iz); + T phase_im = -delta_g * real_r; + complex phase (0, ModuleBase::TWO_PI * phase_im); + tmp[ixy * nz + iz] *= exp (phase); + } + } + // const int size = nx * ny * nz; - h_rhog = new complex[npwk]; - h_rhogout = new complex[npwk]; - for (int ig = 0; ig < npwk; ++ig) - { - h_rhog[ig] = 1.0 / (pwtest.getgk2(ik, ig) + 1); - ModuleBase::Vector3 f = pwtest.getgdirect(ik, ig); - if (f.y > 0) - { - h_rhog[ig] += std::complex(0, 1.0) / (std::abs(float(f.x) + 1) + 1); - } - else if (f.y < 0) - { - h_rhog[ig] -= std::complex(0, 1.0) / (std::abs(float(-f.x) + 1) + 1); - } - } + h_rhog = new complex[npwk]; + h_rhogout = new complex[npwk]; + for (int ig = 0; ig < npwk; ++ig) + { + h_rhog[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1); + ModuleBase::Vector3 f = pwtest.getgdirect (ik, ig); + if (f.y > 0) + { + h_rhog[ig] += std::complex (0, 1.0) / (std::abs (float (f.x) + 1) + 1); + } + else if (f.y < 0) + { + h_rhog[ig] -= std::complex (0, 1.0) / (std::abs (float (-f.x) + 1) + 1); + } + } - cudaMalloc((void**)&d_rhog, npwk * sizeof(complex)); - cudaMalloc((void**)&d_rhor, nrxx * sizeof(complex)); - cudaMemcpy(d_rhog, h_rhog, npwk * sizeof(complex), cudaMemcpyHostToDevice); + cudaMalloc ((void**)&d_rhog, npwk * sizeof (complex)); + cudaMalloc ((void**)&d_rhor, nrxx * sizeof (complex)); + cudaMemcpy (d_rhog, h_rhog, npwk * sizeof (complex), cudaMemcpyHostToDevice); - h_rhor = new complex[nrxx]; + h_rhor = new complex[nrxx]; - pwtest.recip_to_real, base_device::DEVICE_GPU>(d_rhog, d_rhor, ik); - cudaMemcpy(h_rhor, d_rhor, nrxx * sizeof(complex), cudaMemcpyDeviceToHost); + pwtest.recip_to_real, base_device::DEVICE_GPU> (d_rhog, d_rhor, ik); + cudaMemcpy (h_rhor, d_rhor, nrxx * sizeof (complex), cudaMemcpyDeviceToHost); - pwtest.real_to_recip, base_device::DEVICE_GPU>(d_rhor, d_rhog, ik); - cudaMemcpy(h_rhogout, d_rhog, npwk * sizeof(complex), cudaMemcpyDeviceToHost); + pwtest.real_to_recip, base_device::DEVICE_GPU> (d_rhor, d_rhog, ik); + cudaMemcpy (h_rhogout, d_rhog, npwk * sizeof (complex), cudaMemcpyDeviceToHost); + } } - } } - ModulePW::PW_Basis_K* access_pw() + ModulePW::PW_Basis_K* + access_pw () { return &pwtest; } - void TearDown() override + void + TearDown () override { delete[] h_rhog; delete[] h_rhogout; delete[] h_rhor; delete[] tmp; - cudaFree(d_rhog); - cudaFree(d_rhogr); - cudaFree(d_rhogout); - cudaFree(d_rhor); + cudaFree (d_rhog); + cudaFree (d_rhogr); + cudaFree (d_rhogout); + cudaFree (d_rhor); } }; -using MixedTypes = ::testing::Types, - TypePair >; +using MixedTypes + = ::testing::Types, TypePair>; -TYPED_TEST_CASE(PW_BASIS_K_GPU_TEST, MixedTypes); +TYPED_TEST_CASE (PW_BASIS_K_GPU_TEST, MixedTypes); -TYPED_TEST(PW_BASIS_K_GPU_TEST, Mixing) +TYPED_TEST (PW_BASIS_K_GPU_TEST, Mixing) { using T = typename TestFixture::T; using Device = typename TestFixture::Device; ModulePW::PW_Basis_K pwtest; - pwtest.set_device("gpu"); - pwtest.set_precision("mixing"); - pwtest.fft_bundle.setfft("gpu", "mixing"); - this->init(pwtest); + pwtest.set_device ("gpu"); + pwtest.set_precision ("mixing"); + pwtest.fft_bundle.setfft ("gpu", "mixing"); + this->init (pwtest); int startiz = pwtest.startz_current; const int nx = pwtest.nx; const int ny = pwtest.ny; @@ -217,61 +224,62 @@ TYPED_TEST(PW_BASIS_K_GPU_TEST, Mixing) const int nplane = pwtest.nplane; const int npwk = pwtest.npwk[0]; for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int offset = ixy * nz + startiz; - const int startz = ixy * nplane; - for (int iz = 0; iz < nplane; ++iz) { - EXPECT_NEAR(this->tmp[offset + iz].real(), this->h_rhor[startz + iz].real(), 1e-4); + const int offset = ixy * nz + startiz; + const int startz = ixy * nplane; + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (this->tmp[offset + iz].real (), this->h_rhor[startz + iz].real (), 1e-4); + } } - } for (int ig = 0; ig < npwk; ++ig) - { - EXPECT_NEAR(this->h_rhog[ig].real(), this->h_rhogout[ig].real(), 1e-4); - EXPECT_NEAR(this->h_rhog[ig].imag(), this->h_rhogout[ig].imag(), 1e-4); - } + { + EXPECT_NEAR (this->h_rhog[ig].real (), this->h_rhogout[ig].real (), 1e-4); + EXPECT_NEAR (this->h_rhog[ig].imag (), this->h_rhogout[ig].imag (), 1e-4); + } } -TYPED_TEST(PW_BASIS_K_GPU_TEST, FloatDouble) +TYPED_TEST (PW_BASIS_K_GPU_TEST, FloatDouble) { using T = typename TestFixture::T; using Device = typename TestFixture::Device; ModulePW::PW_Basis_K pwtest; - pwtest.set_device("gpu"); - pwtest.set_precision("mixing"); - if (typeid(T) == typeid(float)) - { - pwtest.fft_bundle.setfft("gpu", "single"); - } - else if (typeid(T) == typeid(double)) - { - pwtest.fft_bundle.setfft("gpu", "double"); - } + pwtest.set_device ("gpu"); + pwtest.set_precision ("mixing"); + if (typeid (T) == typeid (float)) + { + pwtest.fft_bundle.setfft ("gpu", "single"); + } + else if (typeid (T) == typeid (double)) + { + pwtest.fft_bundle.setfft ("gpu", "double"); + } else - { - cout << "Error: Unsupported type" << endl; - return; - } - this->init(pwtest); + { + cout << "Error: Unsupported type" << endl; + return; + } + this->init (pwtest); int startiz = pwtest.startz_current; const int nx = pwtest.nx; const int ny = pwtest.ny; const int nz = pwtest.nz; const int nplane = pwtest.nplane; - const int npwk = pwtest.npwk[0];; + const int npwk = pwtest.npwk[0]; + ; for (int ixy = 0; ixy < nx * ny; ++ixy) - { - const int offset = ixy * nz + startiz; - const int startz = ixy * nplane; - for (int iz = 0; iz < nplane; ++iz) { - EXPECT_NEAR(this->tmp[offset + iz].real(), this->h_rhor[startz + iz].real(), 1e-4); + const int offset = ixy * nz + startiz; + const int startz = ixy * nplane; + for (int iz = 0; iz < nplane; ++iz) + { + EXPECT_NEAR (this->tmp[offset + iz].real (), this->h_rhor[startz + iz].real (), 1e-4); + } } - } for (int ig = 0; ig < npwk; ++ig) - { - EXPECT_NEAR(this->h_rhog[ig].real(), this->h_rhogout[ig].real(), 1e-4); - EXPECT_NEAR(this->h_rhog[ig].imag(), this->h_rhogout[ig].imag(), 1e-4); - } + { + EXPECT_NEAR (this->h_rhog[ig].real (), this->h_rhogout[ig].real (), 1e-4); + EXPECT_NEAR (this->h_rhog[ig].imag (), this->h_rhogout[ig].imag (), 1e-4); + } } diff --git a/source/source_basis/module_pw/test_gpu/pw_test.cpp b/source/source_basis/module_pw/test_gpu/pw_test.cpp index fe61624c7f6..5fd84547874 100644 --- a/source/source_basis/module_pw/test_gpu/pw_test.cpp +++ b/source/source_basis/module_pw/test_gpu/pw_test.cpp @@ -11,31 +11,32 @@ using namespace std; int nproc_in_pool, rank_in_pool; string precision_flag, device_flag; -class TestEnv : public testing::Environment +class TestEnv : public testing::Environment { -public: - virtual void SetUp() + public: + virtual void + SetUp () { - if(rank_in_pool == 0) - { - cout<<"\033[32m"<<"[ SET UP TESTS ]"<<"\033[0m"< kvec_d_in[3] = { {0.0, 0.0, 0.0}, {0.1, 0.2, 0.3}, {0.4, 0.5, 0.6} }; - const int distribution_type_in = 1; - const bool xprime_in = true; - basis_k.initparameters(gamma_only_in, gk_ecut_in, nks_in,kvec_d_in, distribution_type_in, xprime_in); - EXPECT_EQ(basis_k.nks, nks_in); - EXPECT_NE(basis_k.kvec_d, nullptr); - for(int i=0; i kvec_d_in[3] = {{0.0, 0.0, 0.0}, {0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}; + const int distribution_type_in = 1; + const bool xprime_in = true; + basis_k.initparameters (gamma_only_in, gk_ecut_in, nks_in, kvec_d_in, distribution_type_in, xprime_in); + EXPECT_EQ (basis_k.nks, nks_in); + EXPECT_NE (basis_k.kvec_d, nullptr); + for (int i = 0; i < nks_in; i++) + { + EXPECT_EQ (basis_k.kvec_d[i], kvec_d_in[i]); + } + EXPECT_NE (basis_k.kvec_c, nullptr); + for (int i = 0; i < nks_in; i++) + { + EXPECT_EQ (basis_k.kvec_c[i], kvec_d_in[i] * basis_k.G); + } + EXPECT_GT (basis_k.gk_ecut, 0.0); + EXPECT_GT (basis_k.ggecut, 0.0); + EXPECT_LE (basis_k.ggecut, basis_k.gridecut_lat); + EXPECT_FALSE (basis_k.gamma_only); + EXPECT_EQ (basis_k.xprime, xprime_in); + if (basis_k.gamma_only) + { + EXPECT_EQ (basis_k.fftny, basis_k.ny); + EXPECT_EQ (basis_k.fftnx, int (basis_k.nx / 2) + 1); + } + else + { + EXPECT_EQ (basis_k.fftny, basis_k.ny); + EXPECT_EQ (basis_k.fftnx, basis_k.nx); + } + EXPECT_EQ (basis_k.fftnz, basis_k.nz); + EXPECT_EQ (basis_k.fftnxy, basis_k.fftnx * basis_k.fftny); + EXPECT_EQ (basis_k.fftnxyz, basis_k.fftnxy * basis_k.fftnz); + EXPECT_EQ (basis_k.distribution_type, distribution_type_in); } -TEST_F(PWBasisKTEST, SetupTransform) +TEST_F (PWBasisKTEST, SetupTransform) { - ModulePW::PW_Basis_K basis_k(device_flag, precision_double); - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - basis_k.initgrids(lat0,latvec,gridecut); - const bool gamma_only_in = true; - const double gk_ecut_in = 10.0; - const int nks_in = 3; - const ModuleBase::Vector3 kvec_d_in[3] = { {0.0, 0.0, 0.0}, {0.1, 0.2, 0.3}, {0.4, 0.5, 0.6} }; - const int distribution_type_in = 1; - const bool xprime_in = true; - basis_k.initparameters(gamma_only_in, gk_ecut_in, nks_in,kvec_d_in, distribution_type_in, xprime_in); - EXPECT_NO_THROW(basis_k.setuptransform()); - EXPECT_EQ(basis_k.npw,3695); + ModulePW::PW_Basis_K basis_k (device_flag, precision_double); + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + basis_k.initgrids (lat0, latvec, gridecut); + const bool gamma_only_in = true; + const double gk_ecut_in = 10.0; + const int nks_in = 3; + const ModuleBase::Vector3 kvec_d_in[3] = {{0.0, 0.0, 0.0}, {0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}; + const int distribution_type_in = 1; + const bool xprime_in = true; + basis_k.initparameters (gamma_only_in, gk_ecut_in, nks_in, kvec_d_in, distribution_type_in, xprime_in); + EXPECT_NO_THROW (basis_k.setuptransform ()); + EXPECT_EQ (basis_k.npw, 3695); } -TEST_F(PWBasisKTEST, CollectLocalPW) +TEST_F (PWBasisKTEST, CollectLocalPW) { - ModulePW::PW_Basis_K basis_k(device_flag, precision_double); - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - basis_k.initgrids(lat0,latvec,gridecut); - const bool gamma_only_in = true; - const double gk_ecut_in = 11.0; - const int nks_in = 3; - const ModuleBase::Vector3 kvec_d_in[3] = { {0.0, 0.0, 0.0}, {0.1, 0.2, 0.3}, {0.4, 0.5, 0.6} }; - const int distribution_type_in = 1; - const bool xprime_in = true; - basis_k.initparameters(gamma_only_in, gk_ecut_in, nks_in,kvec_d_in, distribution_type_in, xprime_in); - EXPECT_NO_THROW(basis_k.setuptransform()); - EXPECT_NO_THROW(basis_k.collect_local_pw()); - EXPECT_EQ(basis_k.npw,3695); - EXPECT_EQ(basis_k.npwk_max,2721); + ModulePW::PW_Basis_K basis_k (device_flag, precision_double); + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + basis_k.initgrids (lat0, latvec, gridecut); + const bool gamma_only_in = true; + const double gk_ecut_in = 11.0; + const int nks_in = 3; + const ModuleBase::Vector3 kvec_d_in[3] = {{0.0, 0.0, 0.0}, {0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}; + const int distribution_type_in = 1; + const bool xprime_in = true; + basis_k.initparameters (gamma_only_in, gk_ecut_in, nks_in, kvec_d_in, distribution_type_in, xprime_in); + EXPECT_NO_THROW (basis_k.setuptransform ()); + EXPECT_NO_THROW (basis_k.collect_local_pw ()); + EXPECT_EQ (basis_k.npw, 3695); + EXPECT_EQ (basis_k.npwk_max, 2721); } - - diff --git a/source/source_basis/module_pw/test_serial/pw_basis_test.cpp b/source/source_basis/module_pw/test_serial/pw_basis_test.cpp index ea678b9d97c..61863aa812d 100644 --- a/source/source_basis/module_pw/test_serial/pw_basis_test.cpp +++ b/source/source_basis/module_pw/test_serial/pw_basis_test.cpp @@ -41,324 +41,304 @@ #undef private #undef protected -class PWBasisTEST: public testing::Test +class PWBasisTEST : public testing::Test { -public: - std::string precision_flag = "double"; - std::string device_flag = "cpu"; - ModulePW::PW_Basis pwb; - ModulePW::PW_Basis pwb1; + public: + std::string precision_flag = "double"; + std::string device_flag = "cpu"; + ModulePW::PW_Basis pwb; + ModulePW::PW_Basis pwb1; }; -TEST_F(PWBasisTEST,Constructor) +TEST_F (PWBasisTEST, Constructor) { - ModulePW::PW_Basis pwb2(device_flag, precision_flag); - EXPECT_EQ(pwb1.classname,"PW_Basis"); - EXPECT_EQ(pwb2.classname,"PW_Basis"); - EXPECT_EQ(pwb2.device,"cpu"); - EXPECT_EQ(pwb2.precision,"double"); - EXPECT_EQ(pwb2.fft_bundle.device,"cpu"); - EXPECT_EQ(pwb2.fft_bundle.precision,"double"); + ModulePW::PW_Basis pwb2 (device_flag, precision_flag); + EXPECT_EQ (pwb1.classname, "PW_Basis"); + EXPECT_EQ (pwb2.classname, "PW_Basis"); + EXPECT_EQ (pwb2.device, "cpu"); + EXPECT_EQ (pwb2.precision, "double"); + EXPECT_EQ (pwb2.fft_bundle.device, "cpu"); + EXPECT_EQ (pwb2.fft_bundle.precision, "double"); } -TEST_F(PWBasisTEST,Initgrids1) +TEST_F (PWBasisTEST, Initgrids1) { - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - pwb.initgrids(lat0,latvec,gridecut); - EXPECT_DOUBLE_EQ(pwb.lat0,lat0); - EXPECT_DOUBLE_EQ(pwb.tpiba,ModuleBase::TWO_PI/lat0); - EXPECT_DOUBLE_EQ(pwb.tpiba2,pwb.tpiba*pwb.tpiba); - EXPECT_DOUBLE_EQ(pwb.latvec.e11,latvec.e11); - EXPECT_DOUBLE_EQ(pwb.GT.e11,latvec.Inverse().e11); - EXPECT_DOUBLE_EQ(pwb.G.e11,pwb.GT.Transpose().e11); - EXPECT_DOUBLE_EQ(pwb.GGT.e11,(pwb.G*pwb.GT).e11); - EXPECT_DOUBLE_EQ(pwb.gridecut_lat,gridecut/pwb.tpiba2); - EXPECT_NEAR(pwb.gridecut_lat,0.904561,1e-4); - EXPECT_EQ(pwb.nx,20); - EXPECT_EQ(pwb.ny,20); - EXPECT_EQ(pwb.nz,20); - EXPECT_TRUE(pwb.nx%2==0 || pwb.nx%3==0 || pwb.nx%5==0); - EXPECT_TRUE(pwb.ny%2==0 || pwb.ny%3==0 || pwb.ny%5==0); - EXPECT_TRUE(pwb.nz%2==0 || pwb.nz%3==0 || pwb.nz%5==0); + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + pwb.initgrids (lat0, latvec, gridecut); + EXPECT_DOUBLE_EQ (pwb.lat0, lat0); + EXPECT_DOUBLE_EQ (pwb.tpiba, ModuleBase::TWO_PI / lat0); + EXPECT_DOUBLE_EQ (pwb.tpiba2, pwb.tpiba * pwb.tpiba); + EXPECT_DOUBLE_EQ (pwb.latvec.e11, latvec.e11); + EXPECT_DOUBLE_EQ (pwb.GT.e11, latvec.Inverse ().e11); + EXPECT_DOUBLE_EQ (pwb.G.e11, pwb.GT.Transpose ().e11); + EXPECT_DOUBLE_EQ (pwb.GGT.e11, (pwb.G * pwb.GT).e11); + EXPECT_DOUBLE_EQ (pwb.gridecut_lat, gridecut / pwb.tpiba2); + EXPECT_NEAR (pwb.gridecut_lat, 0.904561, 1e-4); + EXPECT_EQ (pwb.nx, 20); + EXPECT_EQ (pwb.ny, 20); + EXPECT_EQ (pwb.nz, 20); + EXPECT_TRUE (pwb.nx % 2 == 0 || pwb.nx % 3 == 0 || pwb.nx % 5 == 0); + EXPECT_TRUE (pwb.ny % 2 == 0 || pwb.ny % 3 == 0 || pwb.ny % 5 == 0); + EXPECT_TRUE (pwb.nz % 2 == 0 || pwb.nz % 3 == 0 || pwb.nz % 5 == 0); } -TEST_F(PWBasisTEST,Initgrids2) +TEST_F (PWBasisTEST, Initgrids2) { - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - int nx_in = 20; - int ny_in = 20; - int nz_in = 20; - pwb.initgrids(lat0,latvec,nx_in,ny_in,nz_in); - EXPECT_DOUBLE_EQ(pwb.lat0,lat0); - EXPECT_DOUBLE_EQ(pwb.tpiba,ModuleBase::TWO_PI/lat0); - EXPECT_DOUBLE_EQ(pwb.tpiba2,pwb.tpiba*pwb.tpiba); - EXPECT_DOUBLE_EQ(pwb.latvec.e11,latvec.e11); - EXPECT_DOUBLE_EQ(pwb.GT.e11,latvec.Inverse().e11); - EXPECT_DOUBLE_EQ(pwb.G.e11,pwb.GT.Transpose().e11); - EXPECT_DOUBLE_EQ(pwb.GGT.e11,(pwb.G*pwb.GT).e11); - EXPECT_EQ(pwb.nx,nx_in); - EXPECT_EQ(pwb.ny,ny_in); - EXPECT_EQ(pwb.nz,nz_in); - EXPECT_NEAR(pwb.gridecut_lat,0.999999,1e-4); - EXPECT_NEAR(pwb.gridecut_lat*pwb.tpiba2,11.0551,1e-4); + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + int nx_in = 20; + int ny_in = 20; + int nz_in = 20; + pwb.initgrids (lat0, latvec, nx_in, ny_in, nz_in); + EXPECT_DOUBLE_EQ (pwb.lat0, lat0); + EXPECT_DOUBLE_EQ (pwb.tpiba, ModuleBase::TWO_PI / lat0); + EXPECT_DOUBLE_EQ (pwb.tpiba2, pwb.tpiba * pwb.tpiba); + EXPECT_DOUBLE_EQ (pwb.latvec.e11, latvec.e11); + EXPECT_DOUBLE_EQ (pwb.GT.e11, latvec.Inverse ().e11); + EXPECT_DOUBLE_EQ (pwb.G.e11, pwb.GT.Transpose ().e11); + EXPECT_DOUBLE_EQ (pwb.GGT.e11, (pwb.G * pwb.GT).e11); + EXPECT_EQ (pwb.nx, nx_in); + EXPECT_EQ (pwb.ny, ny_in); + EXPECT_EQ (pwb.nz, nz_in); + EXPECT_NEAR (pwb.gridecut_lat, 0.999999, 1e-4); + EXPECT_NEAR (pwb.gridecut_lat * pwb.tpiba2, 11.0551, 1e-4); } -TEST_F(PWBasisTEST,Initparameters) +TEST_F (PWBasisTEST, Initparameters) { - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - //initparameters is always called after initgrids - //because of nx,ny,nz, and tpiba2 - pwb.initgrids(lat0,latvec,gridecut); - bool gamma_only_in = true; - double pwecut_in = 11.0; - int distribution_type_in = 1; - bool xprime_in = true; - pwb.initparameters(gamma_only_in,pwecut_in,distribution_type_in,xprime_in); - EXPECT_EQ(pwb.xprime,xprime_in); - EXPECT_EQ(pwb.gamma_only,gamma_only_in); - EXPECT_EQ(pwb.xprime,xprime_in); - EXPECT_TRUE(pwb.gamma_only); - EXPECT_TRUE(pwb.xprime); - EXPECT_EQ(pwb.fftnx,int(pwb.nx/2)+1); - EXPECT_EQ(pwb.fftny,pwb.ny); - EXPECT_EQ(pwb.fftnz,pwb.nz); - EXPECT_EQ(pwb.ggecut,pwb.gridecut_lat); - EXPECT_EQ(pwb.distribution_type,distribution_type_in); + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + // initparameters is always called after initgrids + // because of nx,ny,nz, and tpiba2 + pwb.initgrids (lat0, latvec, gridecut); + bool gamma_only_in = true; + double pwecut_in = 11.0; + int distribution_type_in = 1; + bool xprime_in = true; + pwb.initparameters (gamma_only_in, pwecut_in, distribution_type_in, xprime_in); + EXPECT_EQ (pwb.xprime, xprime_in); + EXPECT_EQ (pwb.gamma_only, gamma_only_in); + EXPECT_EQ (pwb.xprime, xprime_in); + EXPECT_TRUE (pwb.gamma_only); + EXPECT_TRUE (pwb.xprime); + EXPECT_EQ (pwb.fftnx, int (pwb.nx / 2) + 1); + EXPECT_EQ (pwb.fftny, pwb.ny); + EXPECT_EQ (pwb.fftnz, pwb.nz); + EXPECT_EQ (pwb.ggecut, pwb.gridecut_lat); + EXPECT_EQ (pwb.distribution_type, distribution_type_in); } -TEST_F(PWBasisTEST,Setfullpw) +TEST_F (PWBasisTEST, Setfullpw) { - bool inpt_full_pw = false; - int inpt_full_pw_dim = 2; - pwb.setfullpw(inpt_full_pw,inpt_full_pw_dim); - EXPECT_FALSE(pwb.full_pw); - EXPECT_EQ(pwb.full_pw_dim,0); + bool inpt_full_pw = false; + int inpt_full_pw_dim = 2; + pwb.setfullpw (inpt_full_pw, inpt_full_pw_dim); + EXPECT_FALSE (pwb.full_pw); + EXPECT_EQ (pwb.full_pw_dim, 0); } -TEST_F(PWBasisTEST,DistributeR) +TEST_F (PWBasisTEST, DistributeR) { - //distribute_r depends on initgrids - //because of nz - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - //initparameters is always called after initgrids - //because of nx,ny,nz, and tpiba2 - pwb.initgrids(lat0,latvec,gridecut); - //this is serial test, so that - EXPECT_EQ(pwb.poolrank,0); - EXPECT_EQ(pwb.poolnproc,1); - pwb.distribute_r(); - EXPECT_EQ(pwb.startz[0],0); - EXPECT_EQ(pwb.numz[0],pwb.nz); - EXPECT_EQ(pwb.nplane,pwb.nz); - EXPECT_EQ(pwb.nplane,20); - EXPECT_EQ(pwb.nxy,400); - EXPECT_EQ(pwb.nrxx,pwb.numz[0]*pwb.nxy); + // distribute_r depends on initgrids + // because of nz + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + // initparameters is always called after initgrids + // because of nx,ny,nz, and tpiba2 + pwb.initgrids (lat0, latvec, gridecut); + // this is serial test, so that + EXPECT_EQ (pwb.poolrank, 0); + EXPECT_EQ (pwb.poolnproc, 1); + pwb.distribute_r (); + EXPECT_EQ (pwb.startz[0], 0); + EXPECT_EQ (pwb.numz[0], pwb.nz); + EXPECT_EQ (pwb.nplane, pwb.nz); + EXPECT_EQ (pwb.nplane, 20); + EXPECT_EQ (pwb.nxy, 400); + EXPECT_EQ (pwb.nrxx, pwb.numz[0] * pwb.nxy); } -TEST_F(PWBasisTEST,DistributeMethod1) +TEST_F (PWBasisTEST, DistributeMethod1) { - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - //initparameters is always called after initgrids - //because of nx,ny,nz, and tpiba2 - //call initgrids - pwb.initgrids(lat0,latvec,gridecut); - bool gamma_only_in = true; - double pwecut_in = 11.0; - int distribution_type_in = 1; - bool xprime_in = true; - //call initparameters - pwb.initparameters(gamma_only_in,pwecut_in,distribution_type_in,xprime_in); - EXPECT_TRUE(pwb.gamma_only); - EXPECT_TRUE(pwb.xprime); - EXPECT_EQ(pwb.fftnx,int(pwb.nx/2)+1); - EXPECT_EQ(pwb.fftny,pwb.ny); - EXPECT_EQ(pwb.fftnz,pwb.nz); - EXPECT_EQ(pwb.distribution_type,distribution_type_in); - EXPECT_EQ(pwb.fftnxy,pwb.fftnx*pwb.fftny); - EXPECT_EQ(pwb.fftnx,11); - EXPECT_EQ(pwb.fftny,20); - EXPECT_EQ(pwb.fftnz,20); - //distribute_method1 depends on initparamters - //because of fftnxy - EXPECT_EQ(pwb.fftnxy,220); - EXPECT_EQ(pwb.distribution_type,1); - //call distribute_g - pwb.distribute_g(); - EXPECT_EQ(pwb.npwtot,1994); - EXPECT_EQ(pwb.nstot,156); + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + // initparameters is always called after initgrids + // because of nx,ny,nz, and tpiba2 + // call initgrids + pwb.initgrids (lat0, latvec, gridecut); + bool gamma_only_in = true; + double pwecut_in = 11.0; + int distribution_type_in = 1; + bool xprime_in = true; + // call initparameters + pwb.initparameters (gamma_only_in, pwecut_in, distribution_type_in, xprime_in); + EXPECT_TRUE (pwb.gamma_only); + EXPECT_TRUE (pwb.xprime); + EXPECT_EQ (pwb.fftnx, int (pwb.nx / 2) + 1); + EXPECT_EQ (pwb.fftny, pwb.ny); + EXPECT_EQ (pwb.fftnz, pwb.nz); + EXPECT_EQ (pwb.distribution_type, distribution_type_in); + EXPECT_EQ (pwb.fftnxy, pwb.fftnx * pwb.fftny); + EXPECT_EQ (pwb.fftnx, 11); + EXPECT_EQ (pwb.fftny, 20); + EXPECT_EQ (pwb.fftnz, 20); + // distribute_method1 depends on initparamters + // because of fftnxy + EXPECT_EQ (pwb.fftnxy, 220); + EXPECT_EQ (pwb.distribution_type, 1); + // call distribute_g + pwb.distribute_g (); + EXPECT_EQ (pwb.npwtot, 1994); + EXPECT_EQ (pwb.nstot, 156); } -TEST_F(PWBasisTEST,DistributeMethod2) +TEST_F (PWBasisTEST, DistributeMethod2) { - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - //initparameters is always called after initgrids - //because of nx,ny,nz, and tpiba2 - //call initgrids - pwb.initgrids(lat0,latvec,gridecut); - bool gamma_only_in = true; - double pwecut_in = 11.0; - int distribution_type_in = 2; - bool xprime_in = true; - //call initparameters - pwb.initparameters(gamma_only_in,pwecut_in,distribution_type_in,xprime_in); - EXPECT_TRUE(pwb.gamma_only); - EXPECT_TRUE(pwb.xprime); - EXPECT_EQ(pwb.fftnx,int(pwb.nx/2)+1); - EXPECT_EQ(pwb.fftny,pwb.ny); - EXPECT_EQ(pwb.fftnz,pwb.nz); - EXPECT_EQ(pwb.distribution_type,distribution_type_in); - EXPECT_EQ(pwb.fftnxy,pwb.fftnx*pwb.fftny); - EXPECT_EQ(pwb.fftnx,11); - EXPECT_EQ(pwb.fftny,20); - EXPECT_EQ(pwb.fftnz,20); - //distribute_method1 depends on initparamters - //because of fftnxy - EXPECT_EQ(pwb.fftnxy,220); - EXPECT_EQ(pwb.distribution_type,2); - //call distribute_g - pwb.distribute_g(); - EXPECT_EQ(pwb.npwtot,1994); - EXPECT_EQ(pwb.nstot,156); - EXPECT_EQ(pwb.npw,1994); - EXPECT_EQ(pwb.nst,156); - EXPECT_EQ(pwb.nstnz,3120); + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + // initparameters is always called after initgrids + // because of nx,ny,nz, and tpiba2 + // call initgrids + pwb.initgrids (lat0, latvec, gridecut); + bool gamma_only_in = true; + double pwecut_in = 11.0; + int distribution_type_in = 2; + bool xprime_in = true; + // call initparameters + pwb.initparameters (gamma_only_in, pwecut_in, distribution_type_in, xprime_in); + EXPECT_TRUE (pwb.gamma_only); + EXPECT_TRUE (pwb.xprime); + EXPECT_EQ (pwb.fftnx, int (pwb.nx / 2) + 1); + EXPECT_EQ (pwb.fftny, pwb.ny); + EXPECT_EQ (pwb.fftnz, pwb.nz); + EXPECT_EQ (pwb.distribution_type, distribution_type_in); + EXPECT_EQ (pwb.fftnxy, pwb.fftnx * pwb.fftny); + EXPECT_EQ (pwb.fftnx, 11); + EXPECT_EQ (pwb.fftny, 20); + EXPECT_EQ (pwb.fftnz, 20); + // distribute_method1 depends on initparamters + // because of fftnxy + EXPECT_EQ (pwb.fftnxy, 220); + EXPECT_EQ (pwb.distribution_type, 2); + // call distribute_g + pwb.distribute_g (); + EXPECT_EQ (pwb.npwtot, 1994); + EXPECT_EQ (pwb.nstot, 156); + EXPECT_EQ (pwb.npw, 1994); + EXPECT_EQ (pwb.nst, 156); + EXPECT_EQ (pwb.nstnz, 3120); } -TEST_F(PWBasisTEST,GetStartGR) +TEST_F (PWBasisTEST, GetStartGR) { - //getstartgr is called after distribute_r and distribute_g in setuptransform - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - //initparameters is always called after initgrids - //because of nx,ny,nz, and tpiba2 - //call initgrids - pwb.initgrids(lat0,latvec,gridecut); - //call distribute_r - pwb.distribute_r(); - bool gamma_only_in = true; - double pwecut_in = 11.0; - int distribution_type_in = 2; - bool xprime_in = true; - //call initparameters - pwb.initparameters(gamma_only_in,pwecut_in,distribution_type_in,xprime_in); - //call distribute_g - pwb.distribute_g(); - //call getstartgr - pwb.getstartgr(); - EXPECT_TRUE(pwb.gamma_only); - EXPECT_EQ(pwb.npw,1994); - EXPECT_EQ(pwb.nz,20); - EXPECT_EQ(pwb.nst,156); - EXPECT_EQ(pwb.nrxx,8000); - EXPECT_EQ(pwb.nxy,400); - EXPECT_EQ(pwb.nplane,20); - EXPECT_EQ(pwb.nmaxgr,4000); - EXPECT_EQ(pwb.numg[0],3120); - EXPECT_EQ(pwb.numr[0],3120); - EXPECT_EQ(pwb.startg[0],0); - EXPECT_EQ(pwb.startr[0],0); + // getstartgr is called after distribute_r and distribute_g in setuptransform + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + // initparameters is always called after initgrids + // because of nx,ny,nz, and tpiba2 + // call initgrids + pwb.initgrids (lat0, latvec, gridecut); + // call distribute_r + pwb.distribute_r (); + bool gamma_only_in = true; + double pwecut_in = 11.0; + int distribution_type_in = 2; + bool xprime_in = true; + // call initparameters + pwb.initparameters (gamma_only_in, pwecut_in, distribution_type_in, xprime_in); + // call distribute_g + pwb.distribute_g (); + // call getstartgr + pwb.getstartgr (); + EXPECT_TRUE (pwb.gamma_only); + EXPECT_EQ (pwb.npw, 1994); + EXPECT_EQ (pwb.nz, 20); + EXPECT_EQ (pwb.nst, 156); + EXPECT_EQ (pwb.nrxx, 8000); + EXPECT_EQ (pwb.nxy, 400); + EXPECT_EQ (pwb.nplane, 20); + EXPECT_EQ (pwb.nmaxgr, 4000); + EXPECT_EQ (pwb.numg[0], 3120); + EXPECT_EQ (pwb.numr[0], 3120); + EXPECT_EQ (pwb.startg[0], 0); + EXPECT_EQ (pwb.startr[0], 0); } -TEST_F(PWBasisTEST,SetupTransform) +TEST_F (PWBasisTEST, SetupTransform) { - //getstartgr is called after distribute_r and distribute_g in setuptransform - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - //initparameters is always called after initgrids - //because of nx,ny,nz, and tpiba2 - //call initgrids - pwb.initgrids(lat0,latvec,gridecut); - bool gamma_only_in = true; - double pwecut_in = 11.0; - int distribution_type_in = 2; - bool xprime_in = true; - pwb.initparameters(gamma_only_in,pwecut_in,distribution_type_in,xprime_in); - //setuptransform for FFT - //which calls fft planning functions - //currently this is just a trivial test to see its successfull calling - EXPECT_NO_THROW(pwb.setuptransform()); - EXPECT_EQ(pwb.npw,1994); + // getstartgr is called after distribute_r and distribute_g in setuptransform + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + // initparameters is always called after initgrids + // because of nx,ny,nz, and tpiba2 + // call initgrids + pwb.initgrids (lat0, latvec, gridecut); + bool gamma_only_in = true; + double pwecut_in = 11.0; + int distribution_type_in = 2; + bool xprime_in = true; + pwb.initparameters (gamma_only_in, pwecut_in, distribution_type_in, xprime_in); + // setuptransform for FFT + // which calls fft planning functions + // currently this is just a trivial test to see its successfull calling + EXPECT_NO_THROW (pwb.setuptransform ()); + EXPECT_EQ (pwb.npw, 1994); } -TEST_F(PWBasisTEST,CollectLocalPW) +TEST_F (PWBasisTEST, CollectLocalPW) { - //getstartgr is called after distribute_r and distribute_g in setuptransform - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - //initparameters is always called after initgrids - //because of nx,ny,nz, and tpiba2 - //call initgrids - pwb.initgrids(lat0,latvec,gridecut); - bool gamma_only_in = true; - double pwecut_in = 11.0; - int distribution_type_in = 2; - bool xprime_in = true; - pwb.initparameters(gamma_only_in,pwecut_in,distribution_type_in,xprime_in); - //setuptransform for FFT - //which calls fft planning functions - //currently this is just a trivial test to see its successfull calling - EXPECT_NO_THROW(pwb.setuptransform()); - EXPECT_EQ(pwb.npw,1994); - pwb.collect_local_pw(); - EXPECT_EQ(pwb.ig_gge0,9); + // getstartgr is called after distribute_r and distribute_g in setuptransform + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + // initparameters is always called after initgrids + // because of nx,ny,nz, and tpiba2 + // call initgrids + pwb.initgrids (lat0, latvec, gridecut); + bool gamma_only_in = true; + double pwecut_in = 11.0; + int distribution_type_in = 2; + bool xprime_in = true; + pwb.initparameters (gamma_only_in, pwecut_in, distribution_type_in, xprime_in); + // setuptransform for FFT + // which calls fft planning functions + // currently this is just a trivial test to see its successfull calling + EXPECT_NO_THROW (pwb.setuptransform ()); + EXPECT_EQ (pwb.npw, 1994); + pwb.collect_local_pw (); + EXPECT_EQ (pwb.ig_gge0, 9); } -TEST_F(PWBasisTEST,CollectUniqgg) +TEST_F (PWBasisTEST, CollectUniqgg) { - //getstartgr is called after distribute_r and distribute_g in setuptransform - double lat0 = 1.8897261254578281; - ModuleBase::Matrix3 latvec(10.0,0.0,0.0, - 0.0,10.0,0.0, - 0.0,0.0,10.0); - double gridecut=10.0; - //initparameters is always called after initgrids - //because of nx,ny,nz, and tpiba2 - //call initgrids - pwb.initgrids(lat0,latvec,gridecut); - bool gamma_only_in = true; - double pwecut_in = 11.0; - int distribution_type_in = 2; - bool xprime_in = true; - pwb.initparameters(gamma_only_in,pwecut_in,distribution_type_in,xprime_in); - //setuptransform for FFT - //which calls fft planning functions - //currently this is just a trivial test to see its successfull calling - EXPECT_NO_THROW(pwb.setuptransform()); - EXPECT_EQ(pwb.npw,1994); - pwb.collect_local_pw(); - EXPECT_EQ(pwb.ig_gge0,9); - pwb.collect_uniqgg(); - EXPECT_EQ(pwb.ngg,78); + // getstartgr is called after distribute_r and distribute_g in setuptransform + double lat0 = 1.8897261254578281; + ModuleBase::Matrix3 latvec (10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0); + double gridecut = 10.0; + // initparameters is always called after initgrids + // because of nx,ny,nz, and tpiba2 + // call initgrids + pwb.initgrids (lat0, latvec, gridecut); + bool gamma_only_in = true; + double pwecut_in = 11.0; + int distribution_type_in = 2; + bool xprime_in = true; + pwb.initparameters (gamma_only_in, pwecut_in, distribution_type_in, xprime_in); + // setuptransform for FFT + // which calls fft planning functions + // currently this is just a trivial test to see its successfull calling + EXPECT_NO_THROW (pwb.setuptransform ()); + EXPECT_EQ (pwb.npw, 1994); + pwb.collect_local_pw (); + EXPECT_EQ (pwb.ig_gge0, 9); + pwb.collect_uniqgg (); + EXPECT_EQ (pwb.ngg, 78); } diff --git a/source/source_cell/atom_pseudo.cpp b/source/source_cell/atom_pseudo.cpp index 9a917928282..06419385835 100644 --- a/source/source_cell/atom_pseudo.cpp +++ b/source/source_cell/atom_pseudo.cpp @@ -2,24 +2,21 @@ #include "source_io/module_parameter/parameter.h" #include "source_io/module_parameter/parameter.h" -Atom_pseudo::Atom_pseudo() -{ -} +Atom_pseudo::Atom_pseudo () {} -Atom_pseudo::~Atom_pseudo() -{ -} +Atom_pseudo::~Atom_pseudo () {} // mohan add 2021-05-07 -void Atom_pseudo::set_d_so(ModuleBase::ComplexMatrix& d_so_in, +void + Atom_pseudo::set_d_so (ModuleBase::ComplexMatrix& d_so_in, const int& nproj_in, const int& nproj_in_so, const bool has_so) { if (this->lmax < -1 || this->lmax > 20) - { - ModuleBase::WARNING_QUIT("Numerical_Nonlocal", "bad input of lmax : should be between -1 and 20"); - } + { + ModuleBase::WARNING_QUIT ("Numerical_Nonlocal", "bad input of lmax : should be between -1 and 20"); + } this->nproj = nproj_in; this->nproj_soc = nproj_in_so; @@ -27,116 +24,122 @@ void Atom_pseudo::set_d_so(ModuleBase::ComplexMatrix& d_so_in, // optimize for (int is = 0; is < spin_dimension; is++) - { - this->non_zero_count_soc[is] = 0; - this->index1_soc[is] = std::vector(nproj_soc * nproj_soc, 0); - this->index2_soc[is] = std::vector(nproj_soc * nproj_soc, 0); - } + { + this->non_zero_count_soc[is] = 0; + this->index1_soc[is] = std::vector (nproj_soc * nproj_soc, 0); + this->index2_soc[is] = std::vector (nproj_soc * nproj_soc, 0); + } if (!has_so) - { - this->d_real.create(nproj_soc + 1, nproj_soc + 1); - this->d_so.create(spin_dimension, nproj_soc + 1, nproj_soc + 1); // for noncollinear-spin only case - - // calculate the number of non-zero elements in dion - for (int L1 = 0; L1 < nproj_soc; L1++) { - for (int L2 = 0; L2 < nproj_soc; L2++) - { - this->d_real(L1, L2) = d_so_in(L1, L2).real(); - if (std::fabs(d_real(L1, L2)) > 1.0e-8) - { - this->index1_soc[0][non_zero_count_soc[0]] = L1; - this->index2_soc[0][non_zero_count_soc[0]] = L2; - this->non_zero_count_soc[0]++; - } - // for noncollinear-spin only case - this->d_so(0, L1, L2) = d_so_in(L1, L2); - this->d_so(3, L1, L2) = d_so_in(L1, L2); - if (std::fabs(d_real(L1, L2)) > 1.0e-8) + this->d_real.create (nproj_soc + 1, nproj_soc + 1); + this->d_so.create (spin_dimension, nproj_soc + 1, nproj_soc + 1); // for noncollinear-spin only case + + // calculate the number of non-zero elements in dion + for (int L1 = 0; L1 < nproj_soc; L1++) { - this->index1_soc[3][non_zero_count_soc[3]] = L1; - this->index2_soc[3][non_zero_count_soc[3]] = L2; - this->non_zero_count_soc[3]++; + for (int L2 = 0; L2 < nproj_soc; L2++) + { + this->d_real (L1, L2) = d_so_in (L1, L2).real (); + if (std::fabs (d_real (L1, L2)) > 1.0e-8) + { + this->index1_soc[0][non_zero_count_soc[0]] = L1; + this->index2_soc[0][non_zero_count_soc[0]] = L2; + this->non_zero_count_soc[0]++; + } + // for noncollinear-spin only case + this->d_so (0, L1, L2) = d_so_in (L1, L2); + this->d_so (3, L1, L2) = d_so_in (L1, L2); + if (std::fabs (d_real (L1, L2)) > 1.0e-8) + { + this->index1_soc[3][non_zero_count_soc[3]] = L1; + this->index2_soc[3][non_zero_count_soc[3]] = L2; + this->non_zero_count_soc[3]++; + } + } } - } } - } else // zhengdy-soc - { - this->d_so.create(spin_dimension, nproj_soc + 1, nproj_soc + 1); - // std::cout << "lmax=" << lmax << std::endl; - - if (this->lmax > -1) { - if (PARAM.inp.lspinorb) - { - int is = 0; - for (int is1 = 0; is1 < 2; is1++) + this->d_so.create (spin_dimension, nproj_soc + 1, nproj_soc + 1); + // std::cout << "lmax=" << lmax << std::endl; + + if (this->lmax > -1) { - for (int is2 = 0; is2 < 2; is2++) - { - for (int L1 = 0; L1 < nproj_soc; L1++) + if (PARAM.inp.lspinorb) { - for (int L2 = 0; L2 < nproj_soc; L2++) - { - this->d_so(is, L1, L2) = d_so_in(L1 + nproj_soc * is1, L2 + nproj_soc * is2); - - if (fabs(this->d_so(is, L1, L2).real()) > 1.0e-8 - || fabs(this->d_so(is, L1, L2).imag()) > 1.0e-8) + int is = 0; + for (int is1 = 0; is1 < 2; is1++) { - // std::cout << "tt in atom is=" << is << " L1=" << - //L1 - //<< " L2=" - // << L2 << " " << d_so(is, L1, L2) << std::endl; + for (int is2 = 0; is2 < 2; is2++) + { + for (int L1 = 0; L1 < nproj_soc; L1++) + { + for (int L2 = 0; L2 < nproj_soc; L2++) + { + this->d_so (is, L1, L2) + = d_so_in (L1 + nproj_soc * is1, L2 + nproj_soc * is2); + + if (fabs (this->d_so (is, L1, L2).real ()) > 1.0e-8 + || fabs (this->d_so (is, L1, L2).imag ()) > 1.0e-8) + { + // std::cout << "tt + // in atom is=" << is << " L1=" << L1 + //<< " L2=" + // << L2 << " " << + // d_so(is, L1, L2) << std::endl; - this->index1_soc[is][non_zero_count_soc[is]] = L1; - this->index2_soc[is][non_zero_count_soc[is]] = L2; - this->non_zero_count_soc[is]++; + this->index1_soc[is][non_zero_count_soc[is]] = L1; + this->index2_soc[is][non_zero_count_soc[is]] = L2; + this->non_zero_count_soc[is]++; + } + } + } + is++; + } } - } } - is++; - } - } - } - else - { - int is = 0; - for (int is1 = 0; is1 < 2; is1++) - { - for (int is2 = 0; is2 < 2; is2++) - { - if (is >= PARAM.inp.nspin) { - break; -} - for (int L1 = 0; L1 < nproj_soc; L1++) + else { - for (int L2 = 0; L2 < nproj_soc; L2++) - { - if (is == 1 || is == 2) + int is = 0; + for (int is1 = 0; is1 < 2; is1++) { - this->d_so(is, L1, L2) = std::complex(0.0, 0.0); + for (int is2 = 0; is2 < 2; is2++) + { + if (is >= PARAM.inp.nspin) + { + break; + } + for (int L1 = 0; L1 < nproj_soc; L1++) + { + for (int L2 = 0; L2 < nproj_soc; L2++) + { + if (is == 1 || is == 2) + { + this->d_so (is, L1, L2) + = std::complex (0.0, 0.0); + } + else + { + this->d_so (is, L1, L2) + = d_so_in (L1 + nproj_soc * is1, + L2 + nproj_soc * is2); + } + if (std::abs (this->d_so (is, L1, L2).real ()) > 1.0e-8 + || std::abs (this->d_so (is, L1, L2).imag ()) > 1.0e-8) + { + this->index1_soc[is][non_zero_count_soc[is]] = L1; + this->index2_soc[is][non_zero_count_soc[is]] = L2; + this->non_zero_count_soc[is]++; + } + } + } + is++; + } } - else - { - this->d_so(is, L1, L2) = d_so_in(L1 + nproj_soc * is1, L2 + nproj_soc * is2); - } - if (std::abs(this->d_so(is, L1, L2).real()) > 1.0e-8 - || std::abs(this->d_so(is, L1, L2).imag()) > 1.0e-8) - { - this->index1_soc[is][non_zero_count_soc[is]] = L1; - this->index2_soc[is][non_zero_count_soc[is]] = L2; - this->non_zero_count_soc[is]++; - } - } } - is++; - } } - } } - } // 2016-07-19 end, LiuXh return; @@ -145,131 +148,133 @@ void Atom_pseudo::set_d_so(ModuleBase::ComplexMatrix& d_so_in, #include "source_base/parallel_common.h" #ifdef __MPI -void Atom_pseudo::bcast_atom_pseudo() +void + Atom_pseudo::bcast_atom_pseudo () { - ModuleBase::TITLE("Atom_pseudo", "bcast_atom_pseudo"); + ModuleBase::TITLE ("Atom_pseudo", "bcast_atom_pseudo"); // == pseudo_h == // int - Parallel_Common::bcast_int(lmax); - Parallel_Common::bcast_int(mesh); - Parallel_Common::bcast_int(nchi); - Parallel_Common::bcast_int(nbeta); - Parallel_Common::bcast_int(nv); - Parallel_Common::bcast_double(zv); + Parallel_Common::bcast_int (lmax); + Parallel_Common::bcast_int (mesh); + Parallel_Common::bcast_int (nchi); + Parallel_Common::bcast_int (nbeta); + Parallel_Common::bcast_int (nv); + Parallel_Common::bcast_double (zv); // double - Parallel_Common::bcast_double(etotps); - Parallel_Common::bcast_double(ecutwfc); - Parallel_Common::bcast_double(ecutrho); + Parallel_Common::bcast_double (etotps); + Parallel_Common::bcast_double (ecutwfc); + Parallel_Common::bcast_double (ecutrho); // bool - Parallel_Common::bcast_bool(tvanp); - Parallel_Common::bcast_bool(nlcc); - Parallel_Common::bcast_bool(has_so); + Parallel_Common::bcast_bool (tvanp); + Parallel_Common::bcast_bool (nlcc); + Parallel_Common::bcast_bool (has_so); // std::string - Parallel_Common::bcast_string(psd); - Parallel_Common::bcast_string(pp_type); - Parallel_Common::bcast_string(xc_func); + Parallel_Common::bcast_string (psd); + Parallel_Common::bcast_string (pp_type); + Parallel_Common::bcast_string (xc_func); if (GlobalV::MY_RANK != 0) - { - jjj = std::vector(nbeta, 0.0); - els = std::vector(nchi, ""); - lchi = std::vector(nchi, 0); - oc = std::vector(nchi, 0.0); - jchi = std::vector(nchi, 0.0); - nn = std::vector(nchi, 0); - } + { + jjj = std::vector (nbeta, 0.0); + els = std::vector (nchi, ""); + lchi = std::vector (nchi, 0); + oc = std::vector (nchi, 0.0); + jchi = std::vector (nchi, 0.0); + nn = std::vector (nchi, 0); + } - Parallel_Common::bcast_double(jjj.data(), nbeta); - Parallel_Common::bcast_string(els.data(), nchi); - Parallel_Common::bcast_int(lchi.data(), nchi); - Parallel_Common::bcast_double(oc.data(), nchi); - Parallel_Common::bcast_double(jchi.data(), nchi); - Parallel_Common::bcast_int(nn.data(), nchi); + Parallel_Common::bcast_double (jjj.data (), nbeta); + Parallel_Common::bcast_string (els.data (), nchi); + Parallel_Common::bcast_int (lchi.data (), nchi); + Parallel_Common::bcast_double (oc.data (), nchi); + Parallel_Common::bcast_double (jchi.data (), nchi); + Parallel_Common::bcast_int (nn.data (), nchi); // == end of pseudo_h // == pseudo_atom == - Parallel_Common::bcast_int(msh); - Parallel_Common::bcast_double(rcut); + Parallel_Common::bcast_int (msh); + Parallel_Common::bcast_double (rcut); if (GlobalV::MY_RANK != 0) - { - assert(mesh != 0); - r = std::vector(mesh, 0.0); - rab = std::vector(mesh, 0.0); - rho_atc = std::vector(mesh, 0.0); - rho_at = std::vector(mesh, 0.0); - chi.create(nchi, mesh); - } + { + assert (mesh != 0); + r = std::vector (mesh, 0.0); + rab = std::vector (mesh, 0.0); + rho_atc = std::vector (mesh, 0.0); + rho_at = std::vector (mesh, 0.0); + chi.create (nchi, mesh); + } - Parallel_Common::bcast_double(r.data(), mesh); - Parallel_Common::bcast_double(rab.data(), mesh); - Parallel_Common::bcast_double(rho_atc.data(), mesh); - Parallel_Common::bcast_double(rho_at.data(), mesh); - Parallel_Common::bcast_double(chi.c, nchi * mesh); + Parallel_Common::bcast_double (r.data (), mesh); + Parallel_Common::bcast_double (rab.data (), mesh); + Parallel_Common::bcast_double (rho_atc.data (), mesh); + Parallel_Common::bcast_double (rho_at.data (), mesh); + Parallel_Common::bcast_double (chi.c, nchi * mesh); // == end of pseudo_atom == // == pseudo_vl == if (GlobalV::MY_RANK != 0) - { - vloc_at = std::vector(mesh, 0.0); - } - Parallel_Common::bcast_double(vloc_at.data(), mesh); + { + vloc_at = std::vector (mesh, 0.0); + } + Parallel_Common::bcast_double (vloc_at.data (), mesh); // == end of pseudo_vl == // == pseudo == - if (nbeta == 0) { - return; -} + if (nbeta == 0) + { + return; + } if (GlobalV::MY_RANK != 0) - { - lll = std::vector(nbeta, 0); - } - Parallel_Common::bcast_int(lll.data(), nbeta); - Parallel_Common::bcast_int(kkbeta); - Parallel_Common::bcast_int(nh); + { + lll = std::vector (nbeta, 0); + } + Parallel_Common::bcast_int (lll.data (), nbeta); + Parallel_Common::bcast_int (kkbeta); + Parallel_Common::bcast_int (nh); int nr, nc; if (GlobalV::MY_RANK == 0) - { - nr = betar.nr; - nc = betar.nc; - } - Parallel_Common::bcast_int(nr); - Parallel_Common::bcast_int(nc); + { + nr = betar.nr; + nc = betar.nc; + } + Parallel_Common::bcast_int (nr); + Parallel_Common::bcast_int (nc); if (GlobalV::MY_RANK != 0) - { - betar.create(nr, nc); - dion.create(nbeta, nbeta); - } + { + betar.create (nr, nc); + dion.create (nbeta, nbeta); + } // below two 'bcast_double' lines of codes seem to have bugs, // on some computers, the code will stuck here for ever. // mohan note 2021-04-28 - Parallel_Common::bcast_double(dion.c, nbeta * nbeta); - Parallel_Common::bcast_double(betar.c, nr * nc); + Parallel_Common::bcast_double (dion.c, nbeta * nbeta); + Parallel_Common::bcast_double (betar.c, nr * nc); // == end of psesudo_nc == // uspp liuyu 2023-10-03 if (tvanp) - { - Parallel_Common::bcast_int(nqlc); - if (GlobalV::MY_RANK != 0) { - qfuncl.create(nqlc, nbeta * (nbeta + 1) / 2, mesh); - } - const int dim = nqlc * nbeta * (nbeta + 1) / 2 * mesh; - Parallel_Common::bcast_double(qfuncl.ptr, dim); + Parallel_Common::bcast_int (nqlc); + if (GlobalV::MY_RANK != 0) + { + qfuncl.create (nqlc, nbeta * (nbeta + 1) / 2, mesh); + } + const int dim = nqlc * nbeta * (nbeta + 1) / 2 * mesh; + Parallel_Common::bcast_double (qfuncl.ptr, dim); - if (GlobalV::MY_RANK != 0) - { - qqq.create(nbeta, nbeta); + if (GlobalV::MY_RANK != 0) + { + qqq.create (nbeta, nbeta); + } + Parallel_Common::bcast_double (qqq.c, nbeta * nbeta); } - Parallel_Common::bcast_double(qqq.c, nbeta * nbeta); - } return; } diff --git a/source/source_cell/atom_pseudo.h b/source/source_cell/atom_pseudo.h index 8f78a64dff2..35925585ac6 100644 --- a/source/source_cell/atom_pseudo.h +++ b/source/source_cell/atom_pseudo.h @@ -6,46 +6,43 @@ #include "source_base/complexmatrix.h" #include "pseudo.h" - class Atom_pseudo : public pseudo { -public: - - Atom_pseudo(); - ~Atom_pseudo(); - - // mohan add 2021-05-07 - ModuleBase::ComplexArray d_so; //(:,:,:), spin-orbit case - ModuleBase::matrix d_real; //(:,:), non-spin-orbit case - int nproj; - int nproj_soc; // dimension of D_ij^so - std::vector non_zero_count_soc = {0, 0, 0, 0}; - std::vector> index1_soc = {{}, {}, {}, {}}; - std::vector> index2_soc = {{}, {}, {}, {}}; - - void set_d_so( // mohan add 2021-05-07 - ModuleBase::ComplexMatrix &d_so_in, - const int &nproj_in, - const int &nproj_in_so, - const bool has_so); - - - inline void get_d(const int& is, const int& p1, const int& p2, const std::complex*& tmp_d) - { - tmp_d = &this->d_so(is, p1, p2); - return; - } - inline void get_d(const int& is, const int& p1, const int& p2, const double*& tmp_d) - { - tmp_d = &this->d_real(p1, p2); - return; - } - + public: + Atom_pseudo (); + ~Atom_pseudo (); + + // mohan add 2021-05-07 + ModuleBase::ComplexArray d_so; //(:,:,:), spin-orbit case + ModuleBase::matrix d_real; //(:,:), non-spin-orbit case + int nproj; + int nproj_soc; // dimension of D_ij^so + std::vector non_zero_count_soc = {0, 0, 0, 0}; + std::vector> index1_soc = {{}, {}, {}, {}}; + std::vector> index2_soc = {{}, {}, {}, {}}; + + void set_d_so ( // mohan add 2021-05-07 + ModuleBase::ComplexMatrix& d_so_in, + const int& nproj_in, + const int& nproj_in_so, + const bool has_so); + + inline void + get_d (const int& is, const int& p1, const int& p2, const std::complex*& tmp_d) + { + tmp_d = &this->d_so (is, p1, p2); + return; + } + inline void + get_d (const int& is, const int& p1, const int& p2, const double*& tmp_d) + { + tmp_d = &this->d_real (p1, p2); + return; + } #ifdef __MPI - void bcast_atom_pseudo(void); // for upf201 + void bcast_atom_pseudo (void); // for upf201 #endif - }; #endif diff --git a/source/source_cell/atom_spec.cpp b/source/source_cell/atom_spec.cpp index 5c989191620..c2b69bbd90f 100644 --- a/source/source_cell/atom_spec.cpp +++ b/source/source_cell/atom_spec.cpp @@ -3,64 +3,62 @@ #include "source_io/module_output/output.h" #include -Atom::Atom() -{ -} +Atom::Atom () {} -Atom::~Atom() -{ -} +Atom::~Atom () {} -void Atom::set_index() +void + Atom::set_index () { - assert(nw != 0); - this->iw2l.resize(nw, 0); - this->iw2n.resize(nw, 0); - this->iw2m.resize(nw, 0); - this->iw2_ylm.resize(nw, 0); - this->iw2_new.resize(nw, false); // bool array to check if the local orbital is new + assert (nw != 0); + this->iw2l.resize (nw, 0); + this->iw2n.resize (nw, 0); + this->iw2m.resize (nw, 0); + this->iw2_ylm.resize (nw, 0); + this->iw2_new.resize (nw, false); // bool array to check if the local orbital is new int index = 0; for (int L = 0; L <= nwl; L++) - { - assert(l_nchi[L] >= 0); - for (int N = 0; N < l_nchi[L]; N++) { - for (int m = 0; m < 2 * L + 1; m++) - { - iw2l[index] = L; - iw2n[index] = N; - iw2m[index] = m; - iw2_ylm[index] = L * L + m; - if (m == 0) + assert (l_nchi[L] >= 0); + for (int N = 0; N < l_nchi[L]; N++) { - iw2_new[index] = true; + for (int m = 0; m < 2 * L + 1; m++) + { + iw2l[index] = L; + iw2n[index] = N; + iw2m[index] = m; + iw2_ylm[index] = L * L + m; + if (m == 0) + { + iw2_new[index] = true; + } + else + { + iw2_new[index] = false; + } + ++index; + } } - else - { - iw2_new[index] = false; - } - ++index; - } } - } return; } -void Atom::print_Atom(std::ofstream& ofs) +void + Atom::print_Atom (std::ofstream& ofs) { // OUT(ofs,"print_Atom()"); - ModuleBase::GlobalFunc::OUT(ofs, "label", label); - ModuleBase::GlobalFunc::OUT(ofs, "type", type); - ModuleBase::GlobalFunc::OUT(ofs, "na", na); - ModuleBase::GlobalFunc::OUT(ofs, "nwl", nwl); - ModuleBase::GlobalFunc::OUT(ofs, "Rcut", Rcut); // pengfei Li 16-2-29 - ModuleBase::GlobalFunc::OUT(ofs, "nw", nw); - ModuleBase::GlobalFunc::OUT(ofs, "stapos_wf", stapos_wf); - ModuleBase::GlobalFunc::OUT(ofs, "mass", mass); + ModuleBase::GlobalFunc::OUT (ofs, "label", label); + ModuleBase::GlobalFunc::OUT (ofs, "type", type); + ModuleBase::GlobalFunc::OUT (ofs, "na", na); + ModuleBase::GlobalFunc::OUT (ofs, "nwl", nwl); + ModuleBase::GlobalFunc::OUT (ofs, "Rcut", Rcut); // pengfei Li 16-2-29 + ModuleBase::GlobalFunc::OUT (ofs, "nw", nw); + ModuleBase::GlobalFunc::OUT (ofs, "stapos_wf", stapos_wf); + ModuleBase::GlobalFunc::OUT (ofs, "mass", mass); ofs << std::endl; - output::printv31_d(ofs, "atom_position(cartesian)", tau.data(), na); + output::printv31_d (ofs, "atom_position(cartesian)", tau.data (), na); /* for (int i = 0;i < na;i++) { @@ -76,81 +74,83 @@ void Atom::print_Atom(std::ofstream& ofs) #include "source_base/parallel_common.h" #ifdef __MPI -void Atom::bcast_atom() +void + Atom::bcast_atom () { - Parallel_Common::bcast_int(type); - Parallel_Common::bcast_int(na); - Parallel_Common::bcast_int(nwl); - Parallel_Common::bcast_double(Rcut); // pengfei Li 16-2-29 - Parallel_Common::bcast_int(nw); - Parallel_Common::bcast_int(stapos_wf); - Parallel_Common::bcast_string(label); - Parallel_Common::bcast_bool(coulomb_potential); + Parallel_Common::bcast_int (type); + Parallel_Common::bcast_int (na); + Parallel_Common::bcast_int (nwl); + Parallel_Common::bcast_double (Rcut); // pengfei Li 16-2-29 + Parallel_Common::bcast_int (nw); + Parallel_Common::bcast_int (stapos_wf); + Parallel_Common::bcast_string (label); + Parallel_Common::bcast_bool (coulomb_potential); if (GlobalV::MY_RANK != 0) - { - this->l_nchi.resize(nwl + 1, 0); - } - Parallel_Common::bcast_int(l_nchi.data(), nwl + 1); - Parallel_Common::bcast_bool(this->flag_empty_element); - Parallel_Common::bcast_double(mass); - - if (na > 0) - { - if (GlobalV::MY_RANK != 0) { - assert(na != 0); - this->tau.resize(na, ModuleBase::Vector3(0, 0, 0)); - this->dis.resize(na, ModuleBase::Vector3(0, 0, 0)); - this->taud.resize(na, ModuleBase::Vector3(0, 0, 0)); - this->boundary_shift.resize(na, ModuleBase::Vector3(0, 0, 0)); - this->vel.resize(na, ModuleBase::Vector3(0, 0, 0)); - this->mag.resize(na, 0); - this->angle1.resize(na, 0); - this->angle2.resize(na, 0); - this->m_loc_.resize(na, ModuleBase::Vector3(0, 0, 0)); - this->mbl.resize(na, ModuleBase::Vector3(0, 0, 0)); - this->lambda.resize(na, ModuleBase::Vector3(0, 0, 0)); - this->constrain.resize(na, ModuleBase::Vector3(0, 0, 0)); + this->l_nchi.resize (nwl + 1, 0); } + Parallel_Common::bcast_int (l_nchi.data (), nwl + 1); + Parallel_Common::bcast_bool (this->flag_empty_element); + Parallel_Common::bcast_double (mass); - for (int i = 0; i < na; i++) + if (na > 0) { - Parallel_Common::bcast_double(tau[i].x); - Parallel_Common::bcast_double(tau[i].y); - Parallel_Common::bcast_double(tau[i].z); - Parallel_Common::bcast_double(taud[i].x); - Parallel_Common::bcast_double(taud[i].y); - Parallel_Common::bcast_double(taud[i].z); - Parallel_Common::bcast_double(dis[i].x); - Parallel_Common::bcast_double(dis[i].y); - Parallel_Common::bcast_double(dis[i].z); - Parallel_Common::bcast_double(vel[i].x); - Parallel_Common::bcast_double(vel[i].y); - Parallel_Common::bcast_double(vel[i].z); - Parallel_Common::bcast_double(mag[i]); - Parallel_Common::bcast_double(angle1[i]); - Parallel_Common::bcast_double(angle2[i]); - Parallel_Common::bcast_double(m_loc_[i].x); - Parallel_Common::bcast_double(m_loc_[i].y); - Parallel_Common::bcast_double(m_loc_[i].z); - Parallel_Common::bcast_int(mbl[i].x); - Parallel_Common::bcast_int(mbl[i].y); - Parallel_Common::bcast_int(mbl[i].z); - Parallel_Common::bcast_double(lambda[i].x); - Parallel_Common::bcast_double(lambda[i].y); - Parallel_Common::bcast_double(lambda[i].z); - Parallel_Common::bcast_int(constrain[i].x); - Parallel_Common::bcast_int(constrain[i].y); - Parallel_Common::bcast_int(constrain[i].z); + if (GlobalV::MY_RANK != 0) + { + assert (na != 0); + this->tau.resize (na, ModuleBase::Vector3 (0, 0, 0)); + this->dis.resize (na, ModuleBase::Vector3 (0, 0, 0)); + this->taud.resize (na, ModuleBase::Vector3 (0, 0, 0)); + this->boundary_shift.resize (na, ModuleBase::Vector3 (0, 0, 0)); + this->vel.resize (na, ModuleBase::Vector3 (0, 0, 0)); + this->mag.resize (na, 0); + this->angle1.resize (na, 0); + this->angle2.resize (na, 0); + this->m_loc_.resize (na, ModuleBase::Vector3 (0, 0, 0)); + this->mbl.resize (na, ModuleBase::Vector3 (0, 0, 0)); + this->lambda.resize (na, ModuleBase::Vector3 (0, 0, 0)); + this->constrain.resize (na, ModuleBase::Vector3 (0, 0, 0)); + } + + for (int i = 0; i < na; i++) + { + Parallel_Common::bcast_double (tau[i].x); + Parallel_Common::bcast_double (tau[i].y); + Parallel_Common::bcast_double (tau[i].z); + Parallel_Common::bcast_double (taud[i].x); + Parallel_Common::bcast_double (taud[i].y); + Parallel_Common::bcast_double (taud[i].z); + Parallel_Common::bcast_double (dis[i].x); + Parallel_Common::bcast_double (dis[i].y); + Parallel_Common::bcast_double (dis[i].z); + Parallel_Common::bcast_double (vel[i].x); + Parallel_Common::bcast_double (vel[i].y); + Parallel_Common::bcast_double (vel[i].z); + Parallel_Common::bcast_double (mag[i]); + Parallel_Common::bcast_double (angle1[i]); + Parallel_Common::bcast_double (angle2[i]); + Parallel_Common::bcast_double (m_loc_[i].x); + Parallel_Common::bcast_double (m_loc_[i].y); + Parallel_Common::bcast_double (m_loc_[i].z); + Parallel_Common::bcast_int (mbl[i].x); + Parallel_Common::bcast_int (mbl[i].y); + Parallel_Common::bcast_int (mbl[i].z); + Parallel_Common::bcast_double (lambda[i].x); + Parallel_Common::bcast_double (lambda[i].y); + Parallel_Common::bcast_double (lambda[i].z); + Parallel_Common::bcast_int (constrain[i].x); + Parallel_Common::bcast_int (constrain[i].y); + Parallel_Common::bcast_int (constrain[i].z); + } } - } return; } -void Atom::bcast_atom2() +void + Atom::bcast_atom2 () { - this->ncpp.bcast_atom_pseudo(); + this->ncpp.bcast_atom_pseudo (); } #endif diff --git a/source/source_cell/atom_spec.h b/source/source_cell/atom_spec.h index 96730b00de1..46309828f4b 100644 --- a/source/source_cell/atom_spec.h +++ b/source/source_cell/atom_spec.h @@ -6,8 +6,8 @@ class Atom { public: // constructor and destructor - Atom(); - ~Atom(); + Atom (); + ~Atom (); Atom_pseudo ncpp; double mass = 0.0; // the mass of atom @@ -21,7 +21,7 @@ class Atom std::vector iw2_new; int nw = 0; // number of local orbitals (l,n,m) of this type - void set_index(); + void set_index (); int type = 0; // Index of atom type int na = 0; // Number of atoms in this type. @@ -33,12 +33,14 @@ class Atom std::string label = "\0"; // atomic symbol std::vector> tau; // Cartesian coordinates of each atom in this type. - std::vector> dis; // direct displacements of each atom in this type in current step liuyu modift 2023-03-22 - std::vector> taud; // Direct coordinates of each atom in this type. - std::vector> boundary_shift; // record for periodic boundary adjustment. - std::vector> vel; // velocities of each atom in this type. - std::vector> force; // force acting on each atom in this type. - std::vector> lambda; // Lagrange multiplier for each atom in this type. used in deltaspin + std::vector> + dis; // direct displacements of each atom in this type in current step liuyu modift 2023-03-22 + std::vector> taud; // Direct coordinates of each atom in this type. + std::vector> boundary_shift; // record for periodic boundary adjustment. + std::vector> vel; // velocities of each atom in this type. + std::vector> force; // force acting on each atom in this type. + std::vector> + lambda; // Lagrange multiplier for each atom in this type. used in deltaspin std::vector> constrain; // constrain for each atom in this type. used in deltaspin std::string label_orb = "\0"; // atomic Element symbol in the orbital file of lcao @@ -49,11 +51,11 @@ class Atom // Coulomb potential v(r) = z/r // It is a local potentail, and has no non-local potential parts. bool coulomb_potential = false; - void print_Atom(std::ofstream& ofs); - void update_force(ModuleBase::matrix& fcs); + void print_Atom (std::ofstream& ofs); + void update_force (ModuleBase::matrix& fcs); #ifdef __MPI - void bcast_atom(); - void bcast_atom2(); + void bcast_atom (); + void bcast_atom2 (); #endif }; diff --git a/source/source_cell/bcast_cell.cpp b/source/source_cell/bcast_cell.cpp index 8f2dcd3300e..f02df943ba4 100644 --- a/source/source_cell/bcast_cell.cpp +++ b/source/source_cell/bcast_cell.cpp @@ -1,4 +1,4 @@ -#include "unitcell.h" +#include "unitcell.h" #include "source_base/parallel_common.h" #include "source_io/module_parameter/parameter.h" #ifdef __EXX @@ -8,121 +8,119 @@ namespace unitcell { - void bcast_atoms_tau(Atom* atoms, - const int ntype) - { - #ifdef __MPI - MPI_Barrier(MPI_COMM_WORLD); - for (int i = 0; i < ntype; i++) { - atoms[i].bcast_atom(); // bcast tau array +void + bcast_atoms_tau (Atom* atoms, const int ntype) +{ +#ifdef __MPI + MPI_Barrier (MPI_COMM_WORLD); + for (int i = 0; i < ntype; i++) + { + atoms[i].bcast_atom (); // bcast tau array } - #endif - } - - void bcast_atoms_pseudo(Atom* atoms, - const int ntype) - { - #ifdef __MPI - MPI_Barrier(MPI_COMM_WORLD); - for (int i = 0; i < ntype; i++) +#endif +} + +void + bcast_atoms_pseudo (Atom* atoms, const int ntype) +{ +#ifdef __MPI + MPI_Barrier (MPI_COMM_WORLD); + for (int i = 0; i < ntype; i++) { - atoms[i].bcast_atom2(); + atoms[i].bcast_atom2 (); } - #endif - } +#endif +} - void bcast_Lattice(Lattice& lat) - { - #ifdef __MPI - MPI_Barrier(MPI_COMM_WORLD); - // distribute lattice parameters. - ModuleBase::Matrix3& latvec = lat.latvec; - ModuleBase::Matrix3& latvec_supercell = lat.latvec_supercell; - Parallel_Common::bcast_string(lat.Coordinate); - Parallel_Common::bcast_double(lat.lat0); - Parallel_Common::bcast_double(lat.lat0_angstrom); - Parallel_Common::bcast_double(lat.tpiba); - Parallel_Common::bcast_double(lat.tpiba2); - Parallel_Common::bcast_double(lat.omega); - Parallel_Common::bcast_string(lat.latName); +void + bcast_Lattice (Lattice& lat) +{ +#ifdef __MPI + MPI_Barrier (MPI_COMM_WORLD); + // distribute lattice parameters. + ModuleBase::Matrix3& latvec = lat.latvec; + ModuleBase::Matrix3& latvec_supercell = lat.latvec_supercell; + Parallel_Common::bcast_string (lat.Coordinate); + Parallel_Common::bcast_double (lat.lat0); + Parallel_Common::bcast_double (lat.lat0_angstrom); + Parallel_Common::bcast_double (lat.tpiba); + Parallel_Common::bcast_double (lat.tpiba2); + Parallel_Common::bcast_double (lat.omega); + Parallel_Common::bcast_string (lat.latName); - // distribute lattice vectors. - Parallel_Common::bcast_double(latvec.e11); - Parallel_Common::bcast_double(latvec.e12); - Parallel_Common::bcast_double(latvec.e13); - Parallel_Common::bcast_double(latvec.e21); - Parallel_Common::bcast_double(latvec.e22); - Parallel_Common::bcast_double(latvec.e23); - Parallel_Common::bcast_double(latvec.e31); - Parallel_Common::bcast_double(latvec.e32); - Parallel_Common::bcast_double(latvec.e33); + // distribute lattice vectors. + Parallel_Common::bcast_double (latvec.e11); + Parallel_Common::bcast_double (latvec.e12); + Parallel_Common::bcast_double (latvec.e13); + Parallel_Common::bcast_double (latvec.e21); + Parallel_Common::bcast_double (latvec.e22); + Parallel_Common::bcast_double (latvec.e23); + Parallel_Common::bcast_double (latvec.e31); + Parallel_Common::bcast_double (latvec.e32); + Parallel_Common::bcast_double (latvec.e33); - // distribute lattice vectors. - for (int i = 0; i < 3; i++) + // distribute lattice vectors. + for (int i = 0; i < 3; i++) { - Parallel_Common::bcast_double(lat.a1[i]); - Parallel_Common::bcast_double(lat.a2[i]); - Parallel_Common::bcast_double(lat.a3[i]); - Parallel_Common::bcast_double(lat.latcenter[i]); - Parallel_Common::bcast_int(lat.lc[i]); + Parallel_Common::bcast_double (lat.a1[i]); + Parallel_Common::bcast_double (lat.a2[i]); + Parallel_Common::bcast_double (lat.a3[i]); + Parallel_Common::bcast_double (lat.latcenter[i]); + Parallel_Common::bcast_int (lat.lc[i]); } - // distribute superlattice vectors. - Parallel_Common::bcast_double(latvec_supercell.e11); - Parallel_Common::bcast_double(latvec_supercell.e12); - Parallel_Common::bcast_double(latvec_supercell.e13); - Parallel_Common::bcast_double(latvec_supercell.e21); - Parallel_Common::bcast_double(latvec_supercell.e22); - Parallel_Common::bcast_double(latvec_supercell.e23); - Parallel_Common::bcast_double(latvec_supercell.e31); - Parallel_Common::bcast_double(latvec_supercell.e32); - Parallel_Common::bcast_double(latvec_supercell.e33); + // distribute superlattice vectors. + Parallel_Common::bcast_double (latvec_supercell.e11); + Parallel_Common::bcast_double (latvec_supercell.e12); + Parallel_Common::bcast_double (latvec_supercell.e13); + Parallel_Common::bcast_double (latvec_supercell.e21); + Parallel_Common::bcast_double (latvec_supercell.e22); + Parallel_Common::bcast_double (latvec_supercell.e23); + Parallel_Common::bcast_double (latvec_supercell.e31); + Parallel_Common::bcast_double (latvec_supercell.e32); + Parallel_Common::bcast_double (latvec_supercell.e33); - // distribute Change the lattice vectors or not - #endif - } - - void bcast_magnetism(Magnetism& magnet, const int ntype) - { - #ifdef __MPI - MPI_Barrier(MPI_COMM_WORLD); - Parallel_Common::bcast_double(magnet.start_mag, ntype); - if (PARAM.inp.nspin == 4) + // distribute Change the lattice vectors or not +#endif +} + +void + bcast_magnetism (Magnetism& magnet, const int ntype) +{ +#ifdef __MPI + MPI_Barrier (MPI_COMM_WORLD); + Parallel_Common::bcast_double (magnet.start_mag, ntype); + if (PARAM.inp.nspin == 4) { - Parallel_Common::bcast_double(magnet.ux_[0]); - Parallel_Common::bcast_double(magnet.ux_[1]); - Parallel_Common::bcast_double(magnet.ux_[2]); + Parallel_Common::bcast_double (magnet.ux_[0]); + Parallel_Common::bcast_double (magnet.ux_[1]); + Parallel_Common::bcast_double (magnet.ux_[2]); } - #endif - } +#endif +} - void bcast_unitcell(UnitCell& ucell) - { - #ifdef __MPI - const int ntype = ucell.ntype; - Parallel_Common::bcast_int(ucell.nat); +void + bcast_unitcell (UnitCell& ucell) +{ +#ifdef __MPI + const int ntype = ucell.ntype; + Parallel_Common::bcast_int (ucell.nat); - bcast_Lattice(ucell.lat); - bcast_magnetism(ucell.magnet,ntype); - bcast_atoms_tau(ucell.atoms,ntype); + bcast_Lattice (ucell.lat); + bcast_magnetism (ucell.magnet, ntype); + bcast_atoms_tau (ucell.atoms, ntype); - for (int i = 0; i < ntype; i++) + for (int i = 0; i < ntype; i++) { - Parallel_Common::bcast_string(ucell.orbital_fn[i]); + Parallel_Common::bcast_string (ucell.orbital_fn[i]); } - #ifdef __EXX - ModuleBase::bcast_data_cereal(GlobalC::exx_info.info_ri.files_abfs, - MPI_COMM_WORLD, - 0); - ModuleBase::bcast_data_cereal(GlobalC::exx_info.info_opt_abfs.files_abfs, - MPI_COMM_WORLD, - 0); - ModuleBase::bcast_data_cereal(GlobalC::exx_info.info_opt_abfs.files_jles, - MPI_COMM_WORLD, - 0); - #endif - return; - #endif - } +#ifdef __EXX + ModuleBase::bcast_data_cereal (GlobalC::exx_info.info_ri.files_abfs, MPI_COMM_WORLD, 0); + ModuleBase::bcast_data_cereal (GlobalC::exx_info.info_opt_abfs.files_abfs, MPI_COMM_WORLD, 0); + ModuleBase::bcast_data_cereal (GlobalC::exx_info.info_opt_abfs.files_jles, MPI_COMM_WORLD, 0); +#endif + return; +#endif } +} // namespace unitcell diff --git a/source/source_cell/bcast_cell.h b/source/source_cell/bcast_cell.h index 07cfd6474b1..89db5bb9586 100644 --- a/source/source_cell/bcast_cell.h +++ b/source/source_cell/bcast_cell.h @@ -4,47 +4,43 @@ #include "source_cell/unitcell.h" namespace unitcell { - /** - * @brief broadcast the tau array of the atoms - * - * @param atoms: the atoms to be broadcasted [in/out] - * @param ntype: the number of types of the atoms [in] - */ - void bcast_atoms_tau(Atom* atoms, - const int ntype); - - /** - * @brief broadcast the pseduo of the atoms - * - * @param atoms: the atoms to be broadcasted [in/out] - * @param ntype: the number of types of the atoms [in] - */ - void bcast_atoms_pseudo(Atom* atoms, - const int ntype); - /** - * @brief broadcast the lattice - * - * @param lat: the lattice to be broadcasted [in/out] - */ - void bcast_Lattice(Lattice& lat); +/** + * @brief broadcast the tau array of the atoms + * + * @param atoms: the atoms to be broadcasted [in/out] + * @param ntype: the number of types of the atoms [in] + */ +void bcast_atoms_tau (Atom* atoms, const int ntype); - /** - * @brief broadcast the magnetism - * - * @param magnet: the magnetism to be broadcasted [in/out] - * @param nytpe: the number of types of the atoms [in] - */ - void bcast_magnetism(Magnetism& magnet, - const int ntype); - - /** - * @brief broadcast the unitcell - * - * @param ucell: the unitcell to be broadcasted [in/out] - */ - void bcast_unitcell(UnitCell& ucell); +/** + * @brief broadcast the pseduo of the atoms + * + * @param atoms: the atoms to be broadcasted [in/out] + * @param ntype: the number of types of the atoms [in] + */ +void bcast_atoms_pseudo (Atom* atoms, const int ntype); +/** + * @brief broadcast the lattice + * + * @param lat: the lattice to be broadcasted [in/out] + */ +void bcast_Lattice (Lattice& lat); +/** + * @brief broadcast the magnetism + * + * @param magnet: the magnetism to be broadcasted [in/out] + * @param nytpe: the number of types of the atoms [in] + */ +void bcast_magnetism (Magnetism& magnet, const int ntype); -} +/** + * @brief broadcast the unitcell + * + * @param ucell: the unitcell to be broadcasted [in/out] + */ +void bcast_unitcell (UnitCell& ucell); + +} // namespace unitcell #endif // BCAST_CELL_H \ No newline at end of file diff --git a/source/source_cell/cal_atoms_info.h b/source/source_cell/cal_atoms_info.h index c7065758437..2e43745d25f 100644 --- a/source/source_cell/cal_atoms_info.h +++ b/source/source_cell/cal_atoms_info.h @@ -5,8 +5,8 @@ class CalAtomsInfo { public: - CalAtomsInfo(){}; - ~CalAtomsInfo(){}; + CalAtomsInfo () {}; + ~CalAtomsInfo () {}; /** * @brief Calculate the atom information from pseudopotential to set Parameter @@ -15,73 +15,73 @@ class CalAtomsInfo * @param ntype [in] number of atom types * @param para [out] Parameter object */ - void cal_atoms_info(const Atom* atoms, const int& ntype, Parameter& para) + void + cal_atoms_info (const Atom* atoms, const int& ntype, Parameter& para) { // calculate initial total magnetization when NSPIN=2 if (para.inp.nspin == 2 && !para.globalv.two_fermi) - { - for (int it = 0; it < ntype; ++it) { - for (int ia = 0; ia < atoms[it].na; ++ia) - { - para.input.nupdown += atoms[it].mag[ia]; - } + for (int it = 0; it < ntype; ++it) + { + for (int ia = 0; ia < atoms[it].na; ++ia) + { + para.input.nupdown += atoms[it].mag[ia]; + } + } + GlobalV::ofs_running << " The readin total magnetization is " << para.inp.nupdown << std::endl; } - GlobalV::ofs_running << " The readin total magnetization is " << para.inp.nupdown << std::endl; - } - // decide whether to be USPP for (int it = 0; it < ntype; ++it) - { - if (atoms[it].ncpp.tvanp) { - para.sys.use_uspp = true; + if (atoms[it].ncpp.tvanp) + { + para.sys.use_uspp = true; + } } - } // calculate the total number of local basis para.sys.nlocal = 0; for (int it = 0; it < ntype; ++it) - { - const int nlocal_it = atoms[it].nw * atoms[it].na; - if (para.inp.nspin != 4) - { - para.sys.nlocal += nlocal_it; - } - else { - para.sys.nlocal += nlocal_it * 2; // zhengdy-soc + const int nlocal_it = atoms[it].nw * atoms[it].na; + if (para.inp.nspin != 4) + { + para.sys.nlocal += nlocal_it; + } + else + { + para.sys.nlocal += nlocal_it * 2; // zhengdy-soc + } } - } // calculate the total number of electrons - elecstate::cal_nelec(atoms, ntype, para.input.nelec); + elecstate::cal_nelec (atoms, ntype, para.input.nelec); // autoset and check GlobalV::NBANDS - std::vector nelec_spin(2, 0.0); + std::vector nelec_spin (2, 0.0); if (para.inp.nspin == 2) - { - nelec_spin[0] = (para.inp.nelec + para.inp.nupdown ) / 2.0; - nelec_spin[1] = (para.inp.nelec - para.inp.nupdown ) / 2.0; - } - elecstate::cal_nbands(para.inp.nelec, para.sys.nlocal, nelec_spin, para.input.nbands); + { + nelec_spin[0] = (para.inp.nelec + para.inp.nupdown) / 2.0; + nelec_spin[1] = (para.inp.nelec - para.inp.nupdown) / 2.0; + } + elecstate::cal_nbands (para.inp.nelec, para.sys.nlocal, nelec_spin, para.input.nbands); // calculate the number of nbands_local para.sys.nbands_l = para.inp.nbands; if (para.inp.ks_solver == "bpcg") // only bpcg support band parallel - { - para.sys.nbands_l = para.inp.nbands / para.inp.bndpar; - if (GlobalV::MY_BNDGROUP < para.inp.nbands % para.inp.bndpar) { - para.sys.nbands_l++; + para.sys.nbands_l = para.inp.nbands / para.inp.bndpar; + if (GlobalV::MY_BNDGROUP < para.inp.nbands % para.inp.bndpar) + { + para.sys.nbands_l++; + } } - } // temporary code if (GlobalV::MY_BNDGROUP == 0 || para.inp.ks_solver == "bpcg") - { - para.sys.ks_run = true; - } + { + para.sys.ks_run = true; + } return; } }; diff --git a/source/source_cell/cell_index.cpp b/source/source_cell/cell_index.cpp index c147afa936f..1db64d4a069 100644 --- a/source/source_cell/cell_index.cpp +++ b/source/source_cell/cell_index.cpp @@ -3,291 +3,312 @@ #include "source_base/name_angular.h" #include "source_base/tool_quit.h" -CellIndex::CellIndex(const std::vector& atomLabels_in, - const std::vector& atomCounts_in, - const std::vector>& lnchiCounts_in, - const int& nspin) - : atomLabels(atomLabels_in), atomCounts(atomCounts_in), lnchiCounts(lnchiCounts_in) +CellIndex::CellIndex (const std::vector& atomLabels_in, + const std::vector& atomCounts_in, + const std::vector>& lnchiCounts_in, + const int& nspin) + : atomLabels (atomLabels_in), atomCounts (atomCounts_in), lnchiCounts (lnchiCounts_in) { - if (this->check_nspin(nspin)) - { - this->npol_ = (nspin == 4) ? 2 : 1; - } - this->check_atomCounts(); - this->cal_orbitalCounts(); + if (this->check_nspin (nspin)) + { + this->npol_ = (nspin == 4) ? 2 : 1; + } + this->check_atomCounts (); + this->cal_orbitalCounts (); } -int CellIndex::get_ntype() const +int + CellIndex::get_ntype () const { - return this->atomCounts.size(); + return this->atomCounts.size (); } -int CellIndex::get_nat() const +int + CellIndex::get_nat () const { int nat = 0; - for (int it = 0; it < this->atomCounts.size(); ++it) - { - nat += this->atomCounts[it]; - } + for (int it = 0; it < this->atomCounts.size (); ++it) + { + nat += this->atomCounts[it]; + } return nat; } -int CellIndex::get_nat(int it) const +int + CellIndex::get_nat (int it) const { return this->atomCounts[it]; } -int CellIndex::get_nw() const +int + CellIndex::get_nw () const { int nw = 0; - for (int it = 0; it < this->orbitalCounts.size(); ++it) - { - nw += this->orbitalCounts[it] * this->atomCounts[it] * this->npol_; - } + for (int it = 0; it < this->orbitalCounts.size (); ++it) + { + nw += this->orbitalCounts[it] * this->atomCounts[it] * this->npol_; + } return nw; } -int CellIndex::get_nw(int iat) const +int + CellIndex::get_nw (int iat) const { - int it = this->iat2it(iat); + int it = this->iat2it (iat); return this->orbitalCounts[it]; } -int CellIndex::get_iwt(int iat, int orbital_index) const +int + CellIndex::get_iwt (int iat, int orbital_index) const { - if (iat < 0 || iat >= this->get_nat()) - { - ModuleBase::WARNING_QUIT("SpinConstrain::get_iwt", "iat out of range [0, nat)"); - } - int it = this->iat2it(iat); - int ia = this->iat2ia(iat); + if (iat < 0 || iat >= this->get_nat ()) + { + ModuleBase::WARNING_QUIT ("SpinConstrain::get_iwt", "iat out of range [0, nat)"); + } + int it = this->iat2it (iat); + int ia = this->iat2ia (iat); if (orbital_index < 0 || orbital_index >= this->orbitalCounts[it] * this->npol_) - { - ModuleBase::WARNING_QUIT("SpinConstrain::get_iwt", "orbital index out of range [0, atom_nw*npol)"); - } + { + ModuleBase::WARNING_QUIT ("SpinConstrain::get_iwt", "orbital index out of range [0, atom_nw*npol)"); + } int iwt = 0; - for (int it0 = 0; it0 < this->orbitalCounts.size(); ++it0) - { - if (it0 == it) + for (int it0 = 0; it0 < this->orbitalCounts.size (); ++it0) { - break; + if (it0 == it) + { + break; + } + iwt += this->orbitalCounts[it0] * this->atomCounts[it0] * this->npol_; } - iwt += this->orbitalCounts[it0] * this->atomCounts[it0] * this->npol_; - } for (int i = 0; i < ia; ++i) - { - iwt += this->orbitalCounts[it] * this->npol_; - } + { + iwt += this->orbitalCounts[it] * this->npol_; + } iwt += orbital_index; return iwt; } -int CellIndex::get_maxL(int iat) const +int + CellIndex::get_maxL (int iat) const { - int it = this->iat2it(iat); - return this->lnchiCounts[it].size() - 1; + int it = this->iat2it (iat); + return this->lnchiCounts[it].size () - 1; } /// @brief get nchi -int CellIndex::get_nchi(int iat, int L) const +int + CellIndex::get_nchi (int iat, int L) const { - int it = this->iat2it(iat); - if (L < 0 || L >= this->lnchiCounts[it].size()) - { - ModuleBase::WARNING_QUIT("CellIndex::get_nchi", "L out of range [0, maxL]"); - } + int it = this->iat2it (iat); + if (L < 0 || L >= this->lnchiCounts[it].size ()) + { + ModuleBase::WARNING_QUIT ("CellIndex::get_nchi", "L out of range [0, maxL]"); + } return this->lnchiCounts[it][L]; } -void CellIndex::check_atomCounts() +void + CellIndex::check_atomCounts () { - if (!this->atomCounts.size()) - { - ModuleBase::WARNING_QUIT("CellIndex::check_atomCounts", "atomCounts is not set"); - } - if (this->get_nat() <= 0) - { - ModuleBase::WARNING_QUIT("CellIndex::check_atomCounts", "nat <= 0"); - } - for (int it = 0; it < this->atomCounts.size(); ++it) - { - if (this->atomCounts[it] <= 0) + if (!this->atomCounts.size ()) + { + ModuleBase::WARNING_QUIT ("CellIndex::check_atomCounts", "atomCounts is not set"); + } + if (this->get_nat () <= 0) { - ModuleBase::WARNING_QUIT("CellIndex::check_atomCounts", "number of atoms <= 0 for some element"); + ModuleBase::WARNING_QUIT ("CellIndex::check_atomCounts", "nat <= 0"); + } + for (int it = 0; it < this->atomCounts.size (); ++it) + { + if (this->atomCounts[it] <= 0) + { + ModuleBase::WARNING_QUIT ("CellIndex::check_atomCounts", "number of atoms <= 0 for some element"); + } } - } } -std::string CellIndex::get_atom_label(int iat, bool order) const +std::string + CellIndex::get_atom_label (int iat, bool order) const { - int it = this->iat2it(iat); - int ia = this->iat2ia(iat); + int it = this->iat2it (iat); + int ia = this->iat2ia (iat); std::string atomType = atomLabels[it]; if (order) - return atomType + std::to_string(ia + 1); + { + return atomType + std::to_string (ia + 1); + } return atomType; } -int CellIndex::iat2it(int iat) const +int + CellIndex::iat2it (int iat) const { int running_iat = 0; int it = -1; // Tracks the index of the atom in atomLabels // Find the type of atom associated with the total order - for (int i = 0; i < this->atomCounts.size(); ++i) - { - if (running_iat + atomCounts[i] > iat) + for (int i = 0; i < this->atomCounts.size (); ++i) { - it = i; - break; + if (running_iat + atomCounts[i] > iat) + { + it = i; + break; + } + running_iat += atomCounts[i]; } - running_iat += atomCounts[i]; - } if (it == -1) - { - ModuleBase::WARNING_QUIT("CellIndex::get_atom_label", "iat out of range [0, nat)"); - } + { + ModuleBase::WARNING_QUIT ("CellIndex::get_atom_label", "iat out of range [0, nat)"); + } return it; } -int CellIndex::iat2ia(int iat) const +int + CellIndex::iat2ia (int iat) const { - int it = this->iat2it(iat); + int it = this->iat2it (iat); // sum of atoms of previous types int running_iat = 0; for (int i = 0; i < it; ++i) - { - running_iat += atomCounts[i]; - } + { + running_iat += atomCounts[i]; + } return iat - running_iat; } -int CellIndex::iw2l(int iat, int iw) const +int + CellIndex::iw2l (int iat, int iw) const { - int it = this->iat2it(iat); - int maxL = this->lnchiCounts[it].size() - 1; + int it = this->iat2it (iat); + int maxL = this->lnchiCounts[it].size () - 1; for (int L = 0; L <= maxL; ++L) - { - int nchi = this->lnchiCounts[it][L]; - int blockSize = nchi * (2 * L + 1); - if (iw < blockSize) { - return L; + int nchi = this->lnchiCounts[it][L]; + int blockSize = nchi * (2 * L + 1); + if (iw < blockSize) + { + return L; + } + iw -= blockSize; + if (iw < 0) + { + ModuleBase::WARNING_QUIT ("CellIndex::iw2l", "localized wave funciton index out of range [0, nw)"); + } } - iw -= blockSize; - if (iw < 0) + if (iw >= 0) { - ModuleBase::WARNING_QUIT("CellIndex::iw2l", "localized wave funciton index out of range [0, nw)"); + ModuleBase::WARNING_QUIT ("CellIndex::iw2l", "localized wave funciton index out of range [0, nw)"); } - } - if (iw >= 0) - { - ModuleBase::WARNING_QUIT("CellIndex::iw2l", "localized wave funciton index out of range [0, nw)"); - } - ModuleBase::WARNING_QUIT("CellIndex::iw2l", "unreachable code reached"); + ModuleBase::WARNING_QUIT ("CellIndex::iw2l", "unreachable code reached"); } -int CellIndex::iw2z(int iat, int iw) const +int + CellIndex::iw2z (int iat, int iw) const { - int it = this->iat2it(iat); - int maxL = this->lnchiCounts[it].size() - 1; + int it = this->iat2it (iat); + int maxL = this->lnchiCounts[it].size () - 1; for (int L = 0; L <= maxL; ++L) - { - int nchi = this->lnchiCounts[it][L]; - int blockSize = nchi * (2 * L + 1); - if (iw < blockSize) { - return iw / (2 * L + 1); + int nchi = this->lnchiCounts[it][L]; + int blockSize = nchi * (2 * L + 1); + if (iw < blockSize) + { + return iw / (2 * L + 1); + } + iw -= blockSize; + if (iw < 0) + { + ModuleBase::WARNING_QUIT ("CellIndex::iw2l", "localized wave funciton index out of range [0, nw)"); + } } - iw -= blockSize; - if (iw < 0) + if (iw >= 0) { - ModuleBase::WARNING_QUIT("CellIndex::iw2l", "localized wave funciton index out of range [0, nw)"); + ModuleBase::WARNING_QUIT ("CellIndex::iw2z", "localized wave funciton index out of range [0, nw)"); } - } - if (iw >= 0) - { - ModuleBase::WARNING_QUIT("CellIndex::iw2z", "localized wave funciton index out of range [0, nw)"); - } - ModuleBase::WARNING_QUIT("CellIndex::iw2z", "unreachable code reached"); + ModuleBase::WARNING_QUIT ("CellIndex::iw2z", "unreachable code reached"); } -int CellIndex::iw2m(int iat, int iw) const +int + CellIndex::iw2m (int iat, int iw) const { - int it = this->iat2it(iat); - int maxL = this->lnchiCounts[it].size() - 1; + int it = this->iat2it (iat); + int maxL = this->lnchiCounts[it].size () - 1; for (int L = 0; L <= maxL; ++L) - { - int nchi = this->lnchiCounts[it][L]; - int blockSize = nchi * (2 * L + 1); - if (iw < blockSize) { - return iw % (2 * L + 1); + int nchi = this->lnchiCounts[it][L]; + int blockSize = nchi * (2 * L + 1); + if (iw < blockSize) + { + return iw % (2 * L + 1); + } + iw -= blockSize; + if (iw < 0) + { + ModuleBase::WARNING_QUIT ("CellIndex::iw2l", "localized wave funciton index out of range [0, nw)"); + } } - iw -= blockSize; - if (iw < 0) + if (iw >= 0) { - ModuleBase::WARNING_QUIT("CellIndex::iw2l", "localized wave funciton index out of range [0, nw)"); + ModuleBase::WARNING_QUIT ("CellIndex::iw2m", "localized wave funciton index out of range [0, nw)"); } - } - if (iw >= 0) - { - ModuleBase::WARNING_QUIT("CellIndex::iw2m", "localized wave funciton index out of range [0, nw)"); - } - ModuleBase::WARNING_QUIT("CellIndex::iw2m", "unreachable code reached"); + ModuleBase::WARNING_QUIT ("CellIndex::iw2m", "unreachable code reached"); } -bool CellIndex::check_nspin(int nspin) +bool + CellIndex::check_nspin (int nspin) { if (nspin != 1 && nspin != 2 && nspin != 4) - { - ModuleBase::WARNING_QUIT("CellIndex::check_nspin", "nspin must be 1, 2, or 4"); - } + { + ModuleBase::WARNING_QUIT ("CellIndex::check_nspin", "nspin must be 1, 2, or 4"); + } return true; } -void CellIndex::cal_orbitalCounts() +void + CellIndex::cal_orbitalCounts () { - int ntype = this->lnchiCounts.size(); - this->orbitalCounts.resize(ntype, 0); + int ntype = this->lnchiCounts.size (); + this->orbitalCounts.resize (ntype, 0); for (int it = 0; it < ntype; ++it) - { - int orbitalCount = 0; - for (int L = 0; L < this->lnchiCounts[it].size(); ++L) { - orbitalCount += this->lnchiCounts[it][L] * (2 * L + 1); + int orbitalCount = 0; + for (int L = 0; L < this->lnchiCounts[it].size (); ++L) + { + orbitalCount += this->lnchiCounts[it][L] * (2 * L + 1); + } + this->orbitalCounts[it] = orbitalCount; } - this->orbitalCounts[it] = orbitalCount; - } } -void CellIndex::write_orb_info(const std::string& out_dir) const +void + CellIndex::write_orb_info (const std::string& out_dir) const { std::stringstream os; os << out_dir << "Orbital"; - std::ofstream out(os.str().c_str()); - out << std::setw(5) << "#io" << std::setw(8) << "spec" << std::setw(5) << "l" << std::setw(5) << "m" << std::setw(5) - << "z" << std::setw(5) << "sym" << std::endl; + std::ofstream out (os.str ().c_str ()); + out << std::setw (5) << "#io" << std::setw (8) << "spec" << std::setw (5) << "l" << std::setw (5) << "m" + << std::setw (5) << "z" << std::setw (5) << "sym" << std::endl; - for (int iat = 0; iat < this->get_nat(); iat++) - { - for (int iw = 0; iw < this->get_nw(iat); ++iw) + for (int iat = 0; iat < this->get_nat (); iat++) { - const int L = this->iw2l(iat, iw); - const int Z = this->iw2z(iat, iw); - const int M = this->iw2m(iat, iw); - out << std::setw(5) << iat << std::setw(8) << this->get_atom_label(iat) << std::setw(5) << L << std::setw(5) - << M << std::setw(5) << Z + 1 << std::setw(15) << ModuleBase::Name_Angular[L][M] << std::endl; + for (int iw = 0; iw < this->get_nw (iat); ++iw) + { + const int L = this->iw2l (iat, iw); + const int Z = this->iw2z (iat, iw); + const int M = this->iw2m (iat, iw); + out << std::setw (5) << iat << std::setw (8) << this->get_atom_label (iat) << std::setw (5) << L + << std::setw (5) << M << std::setw (5) << Z + 1 << std::setw (15) + << ModuleBase::Name_Angular[L][M] << std::endl; + } } - } out << std::endl << std::endl; - out << std::setw(5) << "#io" << std::setw(2) << "=" << std::setw(2) << "Orbital index in supercell" << std::endl; - out << std::setw(5) << "#spec" << std::setw(2) << "=" << std::setw(2) << "Atomic species label" << std::endl; - out << std::setw(5) << "#l" << std::setw(2) << "=" << std::setw(2) << "Angular mumentum quantum number" + out << std::setw (5) << "#io" << std::setw (2) << "=" << std::setw (2) << "Orbital index in supercell" << std::endl; + out << std::setw (5) << "#spec" << std::setw (2) << "=" << std::setw (2) << "Atomic species label" << std::endl; + out << std::setw (5) << "#l" << std::setw (2) << "=" << std::setw (2) << "Angular mumentum quantum number" << std::endl; - out << std::setw(5) << "#m" << std::setw(2) << "=" << std::setw(2) << "Magnetic quantum number" << std::endl; - out << std::setw(5) << "#z" << std::setw(2) << "=" << std::setw(2) << "Zeta index of orbital" << std::endl; - out << std::setw(5) << "#sym" << std::setw(2) << "=" << std::setw(2) << "Symmetry name of real orbital" + out << std::setw (5) << "#m" << std::setw (2) << "=" << std::setw (2) << "Magnetic quantum number" << std::endl; + out << std::setw (5) << "#z" << std::setw (2) << "=" << std::setw (2) << "Zeta index of orbital" << std::endl; + out << std::setw (5) << "#sym" << std::setw (2) << "=" << std::setw (2) << "Symmetry name of real orbital" << std::endl; - out.close(); + out.close (); } diff --git a/source/source_cell/cell_index.h b/source/source_cell/cell_index.h index ed24e521906..6b922609de0 100644 --- a/source/source_cell/cell_index.h +++ b/source/source_cell/cell_index.h @@ -20,33 +20,33 @@ class CellIndex { public: - CellIndex() = default; - CellIndex(const std::vector& atomLabels_in, - const std::vector& atomCounts_in, - const std::vector>& lnchiCounts_in, - const int& nspin); + CellIndex () = default; + CellIndex (const std::vector& atomLabels_in, + const std::vector& atomCounts_in, + const std::vector>& lnchiCounts_in, + const int& nspin); public: /// @brief the total number of atoms - int get_nat() const; + int get_nat () const; /// @brief the total number of atoms of a given type - int get_nat(int it) const; + int get_nat (int it) const; /// @brief get ntype - int get_ntype() const; + int get_ntype () const; /// @brief get nw - int get_nw() const; + int get_nw () const; /// @brief get nw of a given type - int get_nw(int iat) const; + int get_nw (int iat) const; /// @brief get iwt - int get_iwt(int iat, int orbital_index) const; + int get_iwt (int iat, int orbital_index) const; /// @brief get maximum L of a given atom - int get_maxL(int iat) const; + int get_maxL (int iat) const; /// @brief get nchi of a given atom and a give L - int get_nchi(int iat, int L) const; + int get_nchi (int iat, int L) const; /// @brief get atom label of a given atom - std::string get_atom_label(int iat, bool order = false) const; + std::string get_atom_label (int iat, bool order = false) const; /// @brief write orbital info into file - void write_orb_info(const std::string& out_dir) const; + void write_orb_info (const std::string& out_dir) const; private: /// atomCounts is a vector used to store the number of atoms for each type @@ -60,21 +60,21 @@ class CellIndex /// npol is determined by nspin and used in get_iwt and get_nw int npol_ = 1; /// calculate orbitalCounts from lnchiCounts - void cal_orbitalCounts(); + void cal_orbitalCounts (); /// check nspin - bool check_nspin(int nspin); + bool check_nspin (int nspin); /// check if atomCounts is set ok - void check_atomCounts(); + void check_atomCounts (); /// get type of atom from total order - int iat2it(int iat) const; + int iat2it (int iat) const; /// get index of atom in the same type - int iat2ia(int iat) const; + int iat2ia (int iat) const; /// get L from iw - int iw2l(int iat, int iw) const; + int iw2l (int iat, int iw) const; /// get Z from iw - int iw2z(int iat, int iw) const; + int iw2z (int iat, int iw) const; /// get m from iw - int iw2m(int iat, int iw) const; + int iw2m (int iat, int iw) const; }; #endif // CELL_INDEX_H diff --git a/source/source_cell/check_atomic_stru.cpp b/source/source_cell/check_atomic_stru.cpp index c8fea41be48..a15a597bb60 100644 --- a/source/source_cell/check_atomic_stru.cpp +++ b/source/source_cell/check_atomic_stru.cpp @@ -6,169 +6,177 @@ namespace unitcell { -void check_atomic_stru(UnitCell& ucell, const double& factor) +void + check_atomic_stru (UnitCell& ucell, const double& factor) { - ModuleBase::timer::start("unitcell", "check_atomic_stru"); + ModuleBase::timer::start ("unitcell", "check_atomic_stru"); // First we calculate all bond length in the structure, // and compare with the covalent_bond_length, // if there has bond length is shorter than covalent_bond_length * factor, // we think this structure is unreasonable. - assert(ucell.ntype > 0); + assert (ucell.ntype > 0); bool all_pass = true; bool no_warning = true; std::stringstream errorlog; - errorlog.setf(std::ios_base::fixed, std::ios_base::floatfield); + errorlog.setf (std::ios_base::fixed, std::ios_base::floatfield); if (GlobalV::MY_RANK == 0) - { - - const int ntype = ucell.ntype; - const double lat0 = ucell.lat0; - const double warning_coef = 0.6; - const double max_factor_coef = std::max(warning_coef, factor); - - std::vector symbol_covalent_radiuss(ntype); - for (int it = 0; it < ntype; it++) { - std::string symbol1 = ""; - for (char ch: ucell.atoms[it].label) - { - if (std::isalpha(ch)) + + const int ntype = ucell.ntype; + const double lat0 = ucell.lat0; + const double warning_coef = 0.6; + const double max_factor_coef = std::max (warning_coef, factor); + + std::vector symbol_covalent_radiuss (ntype); + for (int it = 0; it < ntype; it++) { - symbol1.push_back(ch); - } - } + std::string symbol1 = ""; + for (char ch: ucell.atoms[it].label) + { + if (std::isalpha (ch)) + { + symbol1.push_back (ch); + } + } - if (ModuleBase::CovalentRadius.find(symbol1) != ModuleBase::CovalentRadius.end()) - { - symbol_covalent_radiuss[it] = ModuleBase::CovalentRadius.at(symbol1); - } - else - { - std::stringstream mess; - mess << "Notice: symbol '" << symbol1 << "' is not an element symbol!!!! "; - mess << "set the covalent radius to be 0." << std::endl; - GlobalV::ofs_running << mess.str(); - std::cout << mess.str(); - } - } - std::vector latvec (9); - latvec[0] = ucell.a1.x; - latvec[1] = ucell.a2.x; - latvec[2] = ucell.a3.x; - latvec[3] = ucell.a1.y; - latvec[4] = ucell.a2.y; - latvec[5] = ucell.a3.y; - latvec[6] = ucell.a1.z; - latvec[7] = ucell.a2.z; - latvec[8] = ucell.a3.z; - std::vector A(27*3); - std::vector cell(27); - std::vector label(ntype); - for (int i = 0; i < 27; i++) - { - int a = (i / 9) % 3 - 1; - int b = (i / 3) % 3 - 1; - int c = i % 3 - 1; - A[3 * i] = a * latvec[0] + b * latvec[1] + c * latvec[2]; - A[3 * i + 1] = a * latvec[3] + b * latvec[4] + c * latvec[5]; - A[3 * i + 2] = a * latvec[6] + b * latvec[7] + c * latvec[8]; - std::ostringstream tmp_oss; - tmp_oss << " (cell:" << std::setw(2) << a << " " << std::setw(2) << b << " " << std::setw(2) << c - << "), distance= "; - cell[i] = tmp_oss.str(); - } - for (int it = 0; it < ntype; it++) - { - std::ostringstream tmp_oss; - tmp_oss << std::setw(3) << ucell.atoms[it].label; - label[it] = tmp_oss.str(); - } + if (ModuleBase::CovalentRadius.find (symbol1) != ModuleBase::CovalentRadius.end ()) + { + symbol_covalent_radiuss[it] = ModuleBase::CovalentRadius.at (symbol1); + } + else + { + std::stringstream mess; + mess << "Notice: symbol '" << symbol1 << "' is not an element symbol!!!! "; + mess << "set the covalent radius to be 0." << std::endl; + GlobalV::ofs_running << mess.str (); + std::cout << mess.str (); + } + } + std::vector latvec (9); + latvec[0] = ucell.a1.x; + latvec[1] = ucell.a2.x; + latvec[2] = ucell.a3.x; + latvec[3] = ucell.a1.y; + latvec[4] = ucell.a2.y; + latvec[5] = ucell.a3.y; + latvec[6] = ucell.a1.z; + latvec[7] = ucell.a2.z; + latvec[8] = ucell.a3.z; + std::vector A (27 * 3); + std::vector cell (27); + std::vector label (ntype); + for (int i = 0; i < 27; i++) + { + int a = (i / 9) % 3 - 1; + int b = (i / 3) % 3 - 1; + int c = i % 3 - 1; + A[3 * i] = a * latvec[0] + b * latvec[1] + c * latvec[2]; + A[3 * i + 1] = a * latvec[3] + b * latvec[4] + c * latvec[5]; + A[3 * i + 2] = a * latvec[6] + b * latvec[7] + c * latvec[8]; + std::ostringstream tmp_oss; + tmp_oss << " (cell:" << std::setw (2) << a << " " << std::setw (2) << b << " " << std::setw (2) << c + << "), distance= "; + cell[i] = tmp_oss.str (); + } + for (int it = 0; it < ntype; it++) + { + std::ostringstream tmp_oss; + tmp_oss << std::setw (3) << ucell.atoms[it].label; + label[it] = tmp_oss.str (); + } - const double bohr_to_a = ModuleBase::BOHR_TO_A; + const double bohr_to_a = ModuleBase::BOHR_TO_A; #pragma omp parallel - { - std::vector delta_lat(3); -#pragma omp for schedule(dynamic) - for (int iat = 0; iat < ucell.nat; iat++) { - const int it1 = ucell.iat2it[iat]; - const int ia1 = ucell.iat2ia[iat]; - const double symbol1_covalent_radius = symbol_covalent_radiuss[it1]; - double x1 = ucell.atoms[it1].taud[ia1].x; - double y1 = ucell.atoms[it1].taud[ia1].y; - double z1 = ucell.atoms[it1].taud[ia1].z; - for (int it2 = it1; it2 < ntype; it2++) - { - double symbol2_covalent_radius = symbol_covalent_radiuss[it2]; - double covalent_length = (symbol1_covalent_radius + symbol2_covalent_radius) / bohr_to_a; - const double max_error = covalent_length * max_factor_coef / ucell.lat0; - const double max_error_2 = max_error * max_error; - const double factor_error = covalent_length * factor; - for (int ia2 = ia1; ia2 < ucell.atoms[it2].na; ia2++) + std::vector delta_lat (3); +#pragma omp for schedule(dynamic) + for (int iat = 0; iat < ucell.nat; iat++) { - const bool is_same_atom = (it1 == it2) && (ia1 == ia2); - double delta_x = ucell.atoms[it2].taud[ia2].x - x1; - double delta_y = ucell.atoms[it2].taud[ia2].y - y1; - double delta_z = ucell.atoms[it2].taud[ia2].z - z1; - delta_lat[0] = delta_x * latvec[0] + delta_y * latvec[1] + delta_z * latvec[2]; - delta_lat[1] = delta_x * latvec[3] + delta_y * latvec[4] + delta_z * latvec[5]; - delta_lat[2] = delta_x * latvec[6] + delta_y * latvec[7] + delta_z * latvec[8]; - for (int i = 0; i < 27; i++) - { - if ((is_same_atom) && (i == 13)) - continue; - const int offset = i * 3; - const double part1 = delta_lat[0] + A[offset]; - const double part2 = delta_lat[1] + A[offset + 1]; - const double part3 = delta_lat[2] + A[offset + 2]; - const double bond_length = part1 * part1 + part2 * part2 + part3 * part3; - const bool flag = bond_length < max_error_2 ? true : false; - if (flag) + const int it1 = ucell.iat2it[iat]; + const int ia1 = ucell.iat2ia[iat]; + const double symbol1_covalent_radius = symbol_covalent_radiuss[it1]; + double x1 = ucell.atoms[it1].taud[ia1].x; + double y1 = ucell.atoms[it1].taud[ia1].y; + double z1 = ucell.atoms[it1].taud[ia1].z; + for (int it2 = it1; it2 < ntype; it2++) { - const double sqrt_bon = sqrt(bond_length) * lat0; - #pragma omp critical - { - no_warning = false; - all_pass = all_pass && (sqrt_bon < factor_error ? false : true); - errorlog << std::setw(3) << ia1 + 1 << "-th " << label[it1] << ", " << std::setw(3) - << ia2 + 1 << "-th " << label[it2] << cell[i] << std::setprecision(3) - << sqrt_bon << " Bohr (" << sqrt_bon * bohr_to_a << " Angstrom)\n"; - } - } - } - } // ia2 - } // it2 - } // iat + double symbol2_covalent_radius = symbol_covalent_radiuss[it2]; + double covalent_length + = (symbol1_covalent_radius + symbol2_covalent_radius) / bohr_to_a; + const double max_error = covalent_length * max_factor_coef / ucell.lat0; + const double max_error_2 = max_error * max_error; + const double factor_error = covalent_length * factor; + for (int ia2 = ia1; ia2 < ucell.atoms[it2].na; ia2++) + { + const bool is_same_atom = (it1 == it2) && (ia1 == ia2); + double delta_x = ucell.atoms[it2].taud[ia2].x - x1; + double delta_y = ucell.atoms[it2].taud[ia2].y - y1; + double delta_z = ucell.atoms[it2].taud[ia2].z - z1; + delta_lat[0] = delta_x * latvec[0] + delta_y * latvec[1] + delta_z * latvec[2]; + delta_lat[1] = delta_x * latvec[3] + delta_y * latvec[4] + delta_z * latvec[5]; + delta_lat[2] = delta_x * latvec[6] + delta_y * latvec[7] + delta_z * latvec[8]; + for (int i = 0; i < 27; i++) + { + if ((is_same_atom) && (i == 13)) + { + continue; + } + const int offset = i * 3; + const double part1 = delta_lat[0] + A[offset]; + const double part2 = delta_lat[1] + A[offset + 1]; + const double part3 = delta_lat[2] + A[offset + 2]; + const double bond_length + = part1 * part1 + part2 * part2 + part3 * part3; + const bool flag = bond_length < max_error_2 ? true : false; + if (flag) + { + const double sqrt_bon = sqrt (bond_length) * lat0; +#pragma omp critical + { + no_warning = false; + all_pass + = all_pass && (sqrt_bon < factor_error ? false : true); + errorlog << std::setw (3) << ia1 + 1 << "-th " << label[it1] + << ", " << std::setw (3) << ia2 + 1 << "-th " + << label[it2] << cell[i] << std::setprecision (3) + << sqrt_bon << " Bohr (" << sqrt_bon * bohr_to_a + << " Angstrom)\n"; + } + } + } + } // ia2 + } // it2 + } // iat + } } - } if (!all_pass || !no_warning) - { - std::stringstream mess; - mess << "\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - mess << "%%%%%% WARNING WARNING WARNING WARNING WARNING %%%%%%" << std::endl; - mess << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - mess << "!!! WARNING: Some atoms are too close!!!" << std::endl; - mess << "!!! Please check the nearest-neighbor list in log file." << std::endl; - mess << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - mess << "%%%%%% WARNING WARNING WARNING WARNING WARNING %%%%%%" << std::endl; - mess << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - - GlobalV::ofs_running << mess.str() << mess.str() << mess.str() << errorlog.str(); - std::cout << mess.str() << mess.str() << mess.str() << std::endl; - if (!all_pass) { - mess.clear(); - mess.str(""); - mess << "If this structure is what you want, you can set 'min_dist_coef'\n"; - mess << "as a smaller value (the current value is " << factor << ") in INPUT file." << std::endl; - GlobalV::ofs_running << mess.str(); - std::cout << mess.str(); - ModuleBase::WARNING_QUIT("Input", "The structure is unreasonable!"); + std::stringstream mess; + mess << "\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + mess << "%%%%%% WARNING WARNING WARNING WARNING WARNING %%%%%%" << std::endl; + mess << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + mess << "!!! WARNING: Some atoms are too close!!!" << std::endl; + mess << "!!! Please check the nearest-neighbor list in log file." << std::endl; + mess << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + mess << "%%%%%% WARNING WARNING WARNING WARNING WARNING %%%%%%" << std::endl; + mess << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + + GlobalV::ofs_running << mess.str () << mess.str () << mess.str () << errorlog.str (); + std::cout << mess.str () << mess.str () << mess.str () << std::endl; + if (!all_pass) + { + mess.clear (); + mess.str (""); + mess << "If this structure is what you want, you can set 'min_dist_coef'\n"; + mess << "as a smaller value (the current value is " << factor << ") in INPUT file." << std::endl; + GlobalV::ofs_running << mess.str (); + std::cout << mess.str (); + ModuleBase::WARNING_QUIT ("Input", "The structure is unreasonable!"); + } } - } - ModuleBase::timer::end("unitcell", "check_atomic_stru"); + ModuleBase::timer::end ("unitcell", "check_atomic_stru"); } -} +} // namespace unitcell diff --git a/source/source_cell/check_atomic_stru.h b/source/source_cell/check_atomic_stru.h index 15cc218f830..c3b12f36527 100644 --- a/source/source_cell/check_atomic_stru.h +++ b/source/source_cell/check_atomic_stru.h @@ -5,7 +5,7 @@ namespace unitcell { - void check_atomic_stru(UnitCell& ucell, const double& factor); +void check_atomic_stru (UnitCell& ucell, const double& factor); }; #endif diff --git a/source/source_cell/k_vector_utils.cpp b/source/source_cell/k_vector_utils.cpp index 43439bb947d..9d8c4cab07c 100644 --- a/source/source_cell/k_vector_utils.cpp +++ b/source/source_cell/k_vector_utils.cpp @@ -14,343 +14,350 @@ namespace KVectorUtils { -void kvec_d2c(K_Vectors& kv, const ModuleBase::Matrix3& reciprocal_vec) +void + kvec_d2c (K_Vectors& kv, const ModuleBase::Matrix3& reciprocal_vec) { // throw std::runtime_error("k_vec_d2c: This function is not implemented in the new codebase. Please use the new // implementation."); - if (kv.kvec_d.size() != kv.kvec_c.size()) - { - // ModuleBase::WARNING_QUIT("k_vec_d2c", "Size of Cartesian and Direct K vectors mismatch. "); - kv.kvec_c.resize(kv.kvec_d.size()); - } - int nks = kv.kvec_d.size(); // always convert all k vectors - - for (int i = 0; i < nks; i++) - { - // wrong!! kvec_c[i] = G * kvec_d[i]; - // mohan fixed bug 2010-1-10 - if (std::abs(kv.kvec_d[i].x) < 1.0e-10) + if (kv.kvec_d.size () != kv.kvec_c.size ()) { - kv.kvec_d[i].x = 0.0; + // ModuleBase::WARNING_QUIT("k_vec_d2c", "Size of Cartesian and Direct K vectors mismatch. "); + kv.kvec_c.resize (kv.kvec_d.size ()); } - if (std::abs(kv.kvec_d[i].y) < 1.0e-10) - { - kv.kvec_d[i].y = 0.0; - } - if (std::abs(kv.kvec_d[i].z) < 1.0e-10) + int nks = kv.kvec_d.size (); // always convert all k vectors + + for (int i = 0; i < nks; i++) { - kv.kvec_d[i].z = 0.0; - } + // wrong!! kvec_c[i] = G * kvec_d[i]; + // mohan fixed bug 2010-1-10 + if (std::abs (kv.kvec_d[i].x) < 1.0e-10) + { + kv.kvec_d[i].x = 0.0; + } + if (std::abs (kv.kvec_d[i].y) < 1.0e-10) + { + kv.kvec_d[i].y = 0.0; + } + if (std::abs (kv.kvec_d[i].z) < 1.0e-10) + { + kv.kvec_d[i].z = 0.0; + } - kv.kvec_c[i] = kv.kvec_d[i] * reciprocal_vec; + kv.kvec_c[i] = kv.kvec_d[i] * reciprocal_vec; - // mohan add2012-06-10 - if (std::abs(kv.kvec_c[i].x) < 1.0e-10) - { - kv.kvec_c[i].x = 0.0; - } - if (std::abs(kv.kvec_c[i].y) < 1.0e-10) - { - kv.kvec_c[i].y = 0.0; - } - if (std::abs(kv.kvec_c[i].z) < 1.0e-10) - { - kv.kvec_c[i].z = 0.0; + // mohan add2012-06-10 + if (std::abs (kv.kvec_c[i].x) < 1.0e-10) + { + kv.kvec_c[i].x = 0.0; + } + if (std::abs (kv.kvec_c[i].y) < 1.0e-10) + { + kv.kvec_c[i].y = 0.0; + } + if (std::abs (kv.kvec_c[i].z) < 1.0e-10) + { + kv.kvec_c[i].z = 0.0; + } } - } } -void kvec_c2d(K_Vectors& kv, const ModuleBase::Matrix3& latvec) +void + kvec_c2d (K_Vectors& kv, const ModuleBase::Matrix3& latvec) { - if (kv.kvec_d.size() != kv.kvec_c.size()) - { - kv.kvec_d.resize(kv.kvec_c.size()); - } - int nks = kv.kvec_d.size(); // always convert all k vectors + if (kv.kvec_d.size () != kv.kvec_c.size ()) + { + kv.kvec_d.resize (kv.kvec_c.size ()); + } + int nks = kv.kvec_d.size (); // always convert all k vectors - ModuleBase::Matrix3 RT = latvec.Transpose(); + ModuleBase::Matrix3 RT = latvec.Transpose (); for (int i = 0; i < nks; i++) - { - // std::cout << " ik=" << i - // << " kvec.x=" << kvec_c[i].x - // << " kvec.y=" << kvec_c[i].y - // << " kvec.z=" << kvec_c[i].z << std::endl; - // wrong! kvec_d[i] = RT * kvec_c[i]; - // mohan fixed bug 2011-03-07 - kv.kvec_d[i] = kv.kvec_c[i] * RT; - } + { + // std::cout << " ik=" << i + // << " kvec.x=" << kvec_c[i].x + // << " kvec.y=" << kvec_c[i].y + // << " kvec.z=" << kvec_c[i].z << std::endl; + // wrong! kvec_d[i] = RT * kvec_c[i]; + // mohan fixed bug 2011-03-07 + kv.kvec_d[i] = kv.kvec_c[i] * RT; + } } -void set_both_kvec(K_Vectors& kv, const ModuleBase::Matrix3& G, const ModuleBase::Matrix3& R, std::string& skpt) +void + set_both_kvec (K_Vectors& kv, const ModuleBase::Matrix3& G, const ModuleBase::Matrix3& R, std::string& skpt) { if (true) // Originally GlobalV::FINAL_SCF, but we don't have this variable in the new code. - { - if (kv.get_k_nkstot() == 0) - { - kv.kd_done = true; - kv.kc_done = false; - } - else { - if (kv.get_k_kword() == "Cartesian" || kv.get_k_kword() == "C") - { - kv.kc_done = true; - kv.kd_done = false; - } - else if (kv.get_k_kword() == "Direct" || kv.get_k_kword() == "D") - { - kv.kd_done = true; - kv.kc_done = false; - } + if (kv.get_k_nkstot () == 0) + { + kv.kd_done = true; + kv.kc_done = false; + } else - { - GlobalV::ofs_warning << " Error : neither Cartesian nor Direct kpoint." << std::endl; - } + { + if (kv.get_k_kword () == "Cartesian" || kv.get_k_kword () == "C") + { + kv.kc_done = true; + kv.kd_done = false; + } + else if (kv.get_k_kword () == "Direct" || kv.get_k_kword () == "D") + { + kv.kd_done = true; + kv.kc_done = false; + } + else + { + GlobalV::ofs_warning << " Error : neither Cartesian nor Direct kpoint." << std::endl; + } + } } - } // set cartesian k vectors. if (!kv.kc_done && kv.kd_done) - { - KVectorUtils::kvec_d2c(kv, G); - kv.kc_done = true; - } + { + KVectorUtils::kvec_d2c (kv, G); + kv.kc_done = true; + } // set direct k vectors else if (kv.kc_done && !kv.kd_done) - { - KVectorUtils::kvec_c2d(kv, R); - kv.kd_done = true; - } + { + KVectorUtils::kvec_c2d (kv, R); + kv.kd_done = true; + } std::string table; table += " K-POINTS DIRECT COORDINATES\n"; - table += FmtCore::format("%8s%12s%12s%12s%8s\n", "KPOINTS", "DIRECT_X", "DIRECT_Y", "DIRECT_Z", "WEIGHT"); - for (int i = 0; i < kv.get_nkstot(); i++) - { - table += FmtCore::format("%8d%12.8f%12.8f%12.8f%8.4f\n", - i + 1, - kv.kvec_d[i].x, - kv.kvec_d[i].y, - kv.kvec_d[i].z, - kv.wk[i]); - } + table += FmtCore::format ("%8s%12s%12s%12s%8s\n", "KPOINTS", "DIRECT_X", "DIRECT_Y", "DIRECT_Z", "WEIGHT"); + for (int i = 0; i < kv.get_nkstot (); i++) + { + table += FmtCore::format ("%8d%12.8f%12.8f%12.8f%8.4f\n", + i + 1, + kv.kvec_d[i].x, + kv.kvec_d[i].y, + kv.kvec_d[i].z, + kv.wk[i]); + } GlobalV::ofs_running << table << std::endl; if (GlobalV::MY_RANK == 0) - { - std::stringstream ss; - ss << " " << std::setw(40) << "nkstot now" - << " = " << kv.get_nkstot() << std::endl; - ss << table << std::endl; - skpt = ss.str(); - } + { + std::stringstream ss; + ss << " " << std::setw (40) << "nkstot now" + << " = " << kv.get_nkstot () << std::endl; + ss << table << std::endl; + skpt = ss.str (); + } return; } -void set_after_vc(K_Vectors& kv, const int& nspin_in, const ModuleBase::Matrix3& reciprocal_vec) +void + set_after_vc (K_Vectors& kv, const int& nspin_in, const ModuleBase::Matrix3& reciprocal_vec) { GlobalV::ofs_running << "\n SETUP K-POINTS" << std::endl; // kv.nspin = nspin_in; - kv.set_nspin(nspin_in); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nspin", kv.get_nspin()); + kv.set_nspin (nspin_in); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nspin", kv.get_nspin ()); // set cartesian k vectors. - KVectorUtils::kvec_d2c(kv, reciprocal_vec); + KVectorUtils::kvec_d2c (kv, reciprocal_vec); std::string table; table += "K-POINTS DIRECT COORDINATES\n"; - table += FmtCore::format("%8s%12s%12s%12s%8s\n", "KPOINTS", "DIRECT_X", "DIRECT_Y", "DIRECT_Z", "WEIGHT"); - for (int i = 0; i < kv.get_nks(); i++) - { - table += FmtCore::format("%8d%12.8f%12.8f%12.8f%8.4f\n", - i + 1, - kv.kvec_d[i].x, - kv.kvec_d[i].y, - kv.kvec_d[i].z, - kv.wk[i]); - } + table += FmtCore::format ("%8s%12s%12s%12s%8s\n", "KPOINTS", "DIRECT_X", "DIRECT_Y", "DIRECT_Z", "WEIGHT"); + for (int i = 0; i < kv.get_nks (); i++) + { + table += FmtCore::format ("%8d%12.8f%12.8f%12.8f%8.4f\n", + i + 1, + kv.kvec_d[i].x, + kv.kvec_d[i].y, + kv.kvec_d[i].z, + kv.wk[i]); + } GlobalV::ofs_running << table << std::endl; kv.kd_done = true; kv.kc_done = true; - print_klists(kv, GlobalV::ofs_running); + print_klists (kv, GlobalV::ofs_running); } -void print_klists(const K_Vectors& kv, std::ofstream& ofs) +void + print_klists (const K_Vectors& kv, std::ofstream& ofs) { - ModuleBase::TITLE("KVectorUtils", "print_klists"); - int nks = kv.get_nks(); - int nkstot = kv.get_nkstot(); + ModuleBase::TITLE ("KVectorUtils", "print_klists"); + int nks = kv.get_nks (); + int nkstot = kv.get_nkstot (); if (nkstot < nks) - { - std::cout << "\n nkstot=" << nkstot; - std::cout << "\n nks=" << nks; - ModuleBase::WARNING_QUIT("print_klists", "nkstot < nks"); - } + { + std::cout << "\n nkstot=" << nkstot; + std::cout << "\n nks=" << nks; + ModuleBase::WARNING_QUIT ("print_klists", "nkstot < nks"); + } std::string table; table += " K-POINTS CARTESIAN COORDINATES\n"; - table += FmtCore::format("%8s%12s%12s%12s%8s\n", "KPOINTS", "CARTESIAN_X", "CARTESIAN_Y", "CARTESIAN_Z", "WEIGHT"); + table += FmtCore::format ("%8s%12s%12s%12s%8s\n", "KPOINTS", "CARTESIAN_X", "CARTESIAN_Y", "CARTESIAN_Z", "WEIGHT"); for (int i = 0; i < nks; i++) - { - table += FmtCore::format("%8d%12.8f%12.8f%12.8f%8.4f\n", - i + 1, - kv.kvec_c[i].x, - kv.kvec_c[i].y, - kv.kvec_c[i].z, - kv.wk[i]); - } + { + table += FmtCore::format ("%8d%12.8f%12.8f%12.8f%8.4f\n", + i + 1, + kv.kvec_c[i].x, + kv.kvec_c[i].y, + kv.kvec_c[i].z, + kv.wk[i]); + } GlobalV::ofs_running << "\n" << table << std::endl; - table.clear(); + table.clear (); table += " K-POINTS DIRECT COORDINATES\n"; - table += FmtCore::format("%8s%12s%12s%12s%8s\n", "KPOINTS", "DIRECT_X", "DIRECT_Y", "DIRECT_Z", "WEIGHT"); + table += FmtCore::format ("%8s%12s%12s%12s%8s\n", "KPOINTS", "DIRECT_X", "DIRECT_Y", "DIRECT_Z", "WEIGHT"); for (int i = 0; i < nks; i++) - { - table += FmtCore::format("%8d%12.8f%12.8f%12.8f%8.4f\n", - i + 1, - kv.kvec_d[i].x, - kv.kvec_d[i].y, - kv.kvec_d[i].z, - kv.wk[i]); - } + { + table += FmtCore::format ("%8d%12.8f%12.8f%12.8f%8.4f\n", + i + 1, + kv.kvec_d[i].x, + kv.kvec_d[i].y, + kv.kvec_d[i].z, + kv.wk[i]); + } GlobalV::ofs_running << "\n" << table << std::endl; return; } #ifdef __MPI -void kvec_mpi_k(K_Vectors& kv) +void + kvec_mpi_k (K_Vectors& kv) { - ModuleBase::TITLE("KVectorUtils", "kvec_mpi_k"); + ModuleBase::TITLE ("KVectorUtils", "kvec_mpi_k"); - Parallel_Common::bcast_bool(kv.kc_done); + Parallel_Common::bcast_bool (kv.kc_done); - Parallel_Common::bcast_bool(kv.kd_done); + Parallel_Common::bcast_bool (kv.kd_done); - Parallel_Common::bcast_int(kv.nspin); + Parallel_Common::bcast_int (kv.nspin); - Parallel_Common::bcast_int(kv.nkstot); + Parallel_Common::bcast_int (kv.nkstot); - Parallel_Common::bcast_int(kv.nkstot_full); + Parallel_Common::bcast_int (kv.nkstot_full); - Parallel_Common::bcast_int(kv.nmp, 3); + Parallel_Common::bcast_int (kv.nmp, 3); - kv.kl_segids.resize(kv.nkstot); - Parallel_Common::bcast_int(kv.kl_segids.data(), kv.nkstot); + kv.kl_segids.resize (kv.nkstot); + Parallel_Common::bcast_int (kv.kl_segids.data (), kv.nkstot); - Parallel_Common::bcast_double(kv.koffset, 3); + Parallel_Common::bcast_double (kv.koffset, 3); kv.nks = kv.para_k.nks_pool[GlobalV::MY_POOL]; GlobalV::ofs_running << std::endl; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Number of k-points in this process", kv.nks); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Number of k-points in this process", kv.nks); int nks_minimum = kv.nks; - Parallel_Reduce::reduce_min(nks_minimum); + Parallel_Reduce::reduce_min (nks_minimum); if (nks_minimum == 0) - { - ModuleBase::WARNING_QUIT("K_Vectors::mpi_k()", " nks == 0, some processor have no k points!"); - } + { + ModuleBase::WARNING_QUIT ("K_Vectors::mpi_k()", " nks == 0, some processor have no k points!"); + } else - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Minimum distributed k-point number", nks_minimum); - } + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Minimum distributed k-point number", nks_minimum); + } - std::vector isk_aux(kv.nkstot); - std::vector wk_aux(kv.nkstot); - std::vector kvec_c_aux(kv.nkstot * 3); - std::vector kvec_d_aux(kv.nkstot * 3); - std::vector kvec_c_full_aux(kv.nkstot_full * 3); + std::vector isk_aux (kv.nkstot); + std::vector wk_aux (kv.nkstot); + std::vector kvec_c_aux (kv.nkstot * 3); + std::vector kvec_d_aux (kv.nkstot * 3); + std::vector kvec_c_full_aux (kv.nkstot_full * 3); // collect and process in rank 0 if (GlobalV::MY_RANK == 0) - { - for (int ik = 0; ik < kv.nkstot; ik++) { - isk_aux[ik] = kv.isk[ik]; - wk_aux[ik] = kv.wk[ik]; - kvec_c_aux[3 * ik] = kv.kvec_c[ik].x; - kvec_c_aux[3 * ik + 1] = kv.kvec_c[ik].y; - kvec_c_aux[3 * ik + 2] = kv.kvec_c[ik].z; - kvec_d_aux[3 * ik] = kv.kvec_d[ik].x; - kvec_d_aux[3 * ik + 1] = kv.kvec_d[ik].y; - kvec_d_aux[3 * ik + 2] = kv.kvec_d[ik].z; - kvec_c_full_aux[3 * ik] = kv.kvec_c_full[ik].x; - kvec_c_full_aux[3 * ik + 1] = kv.kvec_c_full[ik].y; - kvec_c_full_aux[3 * ik + 2] = kv.kvec_c_full[ik].z; + for (int ik = 0; ik < kv.nkstot; ik++) + { + isk_aux[ik] = kv.isk[ik]; + wk_aux[ik] = kv.wk[ik]; + kvec_c_aux[3 * ik] = kv.kvec_c[ik].x; + kvec_c_aux[3 * ik + 1] = kv.kvec_c[ik].y; + kvec_c_aux[3 * ik + 2] = kv.kvec_c[ik].z; + kvec_d_aux[3 * ik] = kv.kvec_d[ik].x; + kvec_d_aux[3 * ik + 1] = kv.kvec_d[ik].y; + kvec_d_aux[3 * ik + 2] = kv.kvec_d[ik].z; + kvec_c_full_aux[3 * ik] = kv.kvec_c_full[ik].x; + kvec_c_full_aux[3 * ik + 1] = kv.kvec_c_full[ik].y; + kvec_c_full_aux[3 * ik + 2] = kv.kvec_c_full[ik].z; + } } - } // broadcast k point data to all processors - Parallel_Common::bcast_int(isk_aux.data(), kv.nkstot); + Parallel_Common::bcast_int (isk_aux.data (), kv.nkstot); - Parallel_Common::bcast_double(wk_aux.data(), kv.nkstot); - Parallel_Common::bcast_double(kvec_c_aux.data(), kv.nkstot * 3); - Parallel_Common::bcast_double(kvec_d_aux.data(), kv.nkstot * 3); - Parallel_Common::bcast_double(kvec_c_full_aux.data(), kv.nkstot_full * 3); + Parallel_Common::bcast_double (wk_aux.data (), kv.nkstot); + Parallel_Common::bcast_double (kvec_c_aux.data (), kv.nkstot * 3); + Parallel_Common::bcast_double (kvec_d_aux.data (), kv.nkstot * 3); + Parallel_Common::bcast_double (kvec_c_full_aux.data (), kv.nkstot_full * 3); // process k point data in each processor - kv.renew(kv.nks * kv.nspin); + kv.renew (kv.nks * kv.nspin); // distribute int k_index = 0; for (int i = 0; i < kv.nks; i++) - { - // 3 is because each k point has three value:kx, ky, kz - k_index = i + kv.para_k.startk_pool[GlobalV::MY_POOL]; - kv.kvec_c[i].x = kvec_c_aux[k_index * 3]; - kv.kvec_c[i].y = kvec_c_aux[k_index * 3 + 1]; - kv.kvec_c[i].z = kvec_c_aux[k_index * 3 + 2]; - kv.kvec_d[i].x = kvec_d_aux[k_index * 3]; - kv.kvec_d[i].y = kvec_d_aux[k_index * 3 + 1]; - kv.kvec_d[i].z = kvec_d_aux[k_index * 3 + 2]; - kv.kvec_c_full[i].x = kvec_c_full_aux[k_index * 3]; - kv.kvec_c_full[i].y = kvec_c_full_aux[k_index * 3 + 1]; - kv.kvec_c_full[i].z = kvec_c_full_aux[k_index * 3 + 2]; - kv.wk[i] = wk_aux[k_index]; - kv.isk[i] = isk_aux[k_index]; - } + { + // 3 is because each k point has three value:kx, ky, kz + k_index = i + kv.para_k.startk_pool[GlobalV::MY_POOL]; + kv.kvec_c[i].x = kvec_c_aux[k_index * 3]; + kv.kvec_c[i].y = kvec_c_aux[k_index * 3 + 1]; + kv.kvec_c[i].z = kvec_c_aux[k_index * 3 + 2]; + kv.kvec_d[i].x = kvec_d_aux[k_index * 3]; + kv.kvec_d[i].y = kvec_d_aux[k_index * 3 + 1]; + kv.kvec_d[i].z = kvec_d_aux[k_index * 3 + 2]; + kv.kvec_c_full[i].x = kvec_c_full_aux[k_index * 3]; + kv.kvec_c_full[i].y = kvec_c_full_aux[k_index * 3 + 1]; + kv.kvec_c_full[i].z = kvec_c_full_aux[k_index * 3 + 2]; + kv.wk[i] = wk_aux[k_index]; + kv.isk[i] = isk_aux[k_index]; + } #ifdef __EXX if (ModuleSymmetry::Symmetry::symm_flag == 1) - { // bcast kstars - kv.kstars.resize(kv.nkstot); - for (int ikibz = 0; ikibz < kv.nkstot; ++ikibz) - { - int starsize = kv.kstars[ikibz].size(); - Parallel_Common::bcast_int(starsize); - //GlobalV::ofs_running << "starsize: " << starsize << std::endl; - auto ks = kv.kstars[ikibz].begin(); - for (int ik = 0; ik < starsize; ++ik) - { - int isym = 0; - ModuleBase::Vector3 ks_vec(0, 0, 0); - if (GlobalV::MY_RANK == 0) - { - isym = ks->first; - ks_vec = ks->second; - ++ks; - } - Parallel_Common::bcast_int(isym); - Parallel_Common::bcast_double(ks_vec.x); - Parallel_Common::bcast_double(ks_vec.y); - Parallel_Common::bcast_double(ks_vec.z); - //GlobalV::ofs_running << "isym: " << isym << " ks_vec: " << ks_vec.x << " " << ks_vec.y << " " - // << ks_vec.z << std::endl; - if (GlobalV::MY_RANK != 0) + { // bcast kstars + kv.kstars.resize (kv.nkstot); + for (int ikibz = 0; ikibz < kv.nkstot; ++ikibz) { - kv.kstars[ikibz].insert(std::make_pair(isym, ks_vec)); + int starsize = kv.kstars[ikibz].size (); + Parallel_Common::bcast_int (starsize); + // GlobalV::ofs_running << "starsize: " << starsize << std::endl; + auto ks = kv.kstars[ikibz].begin (); + for (int ik = 0; ik < starsize; ++ik) + { + int isym = 0; + ModuleBase::Vector3 ks_vec (0, 0, 0); + if (GlobalV::MY_RANK == 0) + { + isym = ks->first; + ks_vec = ks->second; + ++ks; + } + Parallel_Common::bcast_int (isym); + Parallel_Common::bcast_double (ks_vec.x); + Parallel_Common::bcast_double (ks_vec.y); + Parallel_Common::bcast_double (ks_vec.z); + // GlobalV::ofs_running << "isym: " << isym << " ks_vec: " << ks_vec.x << " " << ks_vec.y << + // " " + // << ks_vec.z << std::endl; + if (GlobalV::MY_RANK != 0) + { + kv.kstars[ikibz].insert (std::make_pair (isym, ks_vec)); + } + } } - } } - } #endif } // END SUBROUTINE #endif - -void kvec_ibz_kpoint(K_Vectors& kv, +void + kvec_ibz_kpoint (K_Vectors& kv, const ModuleSymmetry::Symmetry& symm, bool use_symm, std::string& skpt, @@ -358,34 +365,40 @@ void kvec_ibz_kpoint(K_Vectors& kv, bool& match) { if (GlobalV::MY_RANK != 0) - { - return; - } - ModuleBase::TITLE("K_Vectors", "ibz_kpoint"); + { + return; + } + ModuleBase::TITLE ("K_Vectors", "ibz_kpoint"); // k-lattice: "pricell" of reciprocal space // CAUTION: should fit into all k-input method, not only MP !!! // the basis vector of reciprocal lattice: recip_vec1, recip_vec2, recip_vec3 - ModuleBase::Vector3 recip_vec1(ucell.G.e11, ucell.G.e12, ucell.G.e13); - ModuleBase::Vector3 recip_vec2(ucell.G.e21, ucell.G.e22, ucell.G.e23); - ModuleBase::Vector3 recip_vec3(ucell.G.e31, ucell.G.e32, ucell.G.e33); + ModuleBase::Vector3 recip_vec1 (ucell.G.e11, ucell.G.e12, ucell.G.e13); + ModuleBase::Vector3 recip_vec2 (ucell.G.e21, ucell.G.e22, ucell.G.e23); + ModuleBase::Vector3 recip_vec3 (ucell.G.e31, ucell.G.e32, ucell.G.e33); ModuleBase::Vector3 k_vec1, k_vec2, k_vec3; ModuleBase::Matrix3 k_vec; - if (kv.get_is_mp()) - { - k_vec1 = ModuleBase::Vector3(recip_vec1.x / kv.nmp[0], recip_vec1.y / kv.nmp[0], recip_vec1.z / kv.nmp[0]); - k_vec2 = ModuleBase::Vector3(recip_vec2.x / kv.nmp[1], recip_vec2.y / kv.nmp[1], recip_vec2.z / kv.nmp[1]); - k_vec3 = ModuleBase::Vector3(recip_vec3.x / kv.nmp[2], recip_vec3.y / kv.nmp[2], recip_vec3.z / kv.nmp[2]); - k_vec = ModuleBase::Matrix3(k_vec1.x, - k_vec1.y, - k_vec1.z, - k_vec2.x, - k_vec2.y, - k_vec2.z, - k_vec3.x, - k_vec3.y, - k_vec3.z); - } + if (kv.get_is_mp ()) + { + k_vec1 = ModuleBase::Vector3 (recip_vec1.x / kv.nmp[0], + recip_vec1.y / kv.nmp[0], + recip_vec1.z / kv.nmp[0]); + k_vec2 = ModuleBase::Vector3 (recip_vec2.x / kv.nmp[1], + recip_vec2.y / kv.nmp[1], + recip_vec2.z / kv.nmp[1]); + k_vec3 = ModuleBase::Vector3 (recip_vec3.x / kv.nmp[2], + recip_vec3.y / kv.nmp[2], + recip_vec3.z / kv.nmp[2]); + k_vec = ModuleBase::Matrix3 (k_vec1.x, + k_vec1.y, + k_vec1.z, + k_vec2.x, + k_vec2.y, + k_vec2.z, + k_vec3.x, + k_vec3.y, + k_vec3.z); + } //=============================================== // search in all space group operations @@ -393,194 +406,203 @@ void kvec_ibz_kpoint(K_Vectors& kv, // inverse operation, double it. //=============================================== bool include_inv = false; - std::vector kgmatrix(48 * 2); - ModuleBase::Matrix3 inv(-1, 0, 0, 0, -1, 0, 0, 0, -1); - ModuleBase::Matrix3 ind(1, 0, 0, 0, 1, 0, 0, 0, 1); + std::vector kgmatrix (48 * 2); + ModuleBase::Matrix3 inv (-1, 0, 0, 0, -1, 0, 0, 0, -1); + ModuleBase::Matrix3 ind (1, 0, 0, 0, 1, 0, 0, 0, 1); int nrotkm = 0; if (use_symm) - { - // bravais type of reciprocal lattice and k-lattice - - double recip_vec_const[6]; - double recip_vec0_const[6]; - double k_vec_const[6]; - double k_vec0_const[6]; - int recip_brav_type = 15; - int k_brav_type = 15; - std::string recip_brav_name; - std::string k_brav_name; - ModuleBase::Vector3 k_vec01 = k_vec1, k_vec02 = k_vec2, k_vec03 = k_vec3; - - // it's not necessary to calculate gb01, gb02, gb03, - // because they are only used as a vector, no need to be assigned values - - // determine the Bravais type and related parameters of the lattice - symm.lattice_type(recip_vec1, - recip_vec2, - recip_vec3, - recip_vec1, - recip_vec2, - recip_vec3, - recip_vec_const, - recip_vec0_const, - recip_brav_type, - recip_brav_name, - ucell.atoms, - false, - nullptr); - GlobalV::ofs_running << "\n For reciprocal-space lattice" << std::endl; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Bravais lattice type", recip_brav_type); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Bravais lattice name", recip_brav_name); - - // the map of bravis lattice from real to reciprocal space - // for example, 3(fcc) in real space matches 2(bcc) in reciprocal space - std::vector ibrav_a2b{1, 3, 2, 4, 5, 6, 7, 8, 10, 9, 11, 12, 13, 14}; - // check if the reciprocal lattice is compatible with the real space lattice - auto ibrav_match = [&](int ibrav_b) -> bool { - const int& ibrav_a = symm.real_brav; - if (ibrav_a < 1 || ibrav_a > 14) - { - return false; - } - return (ibrav_b == ibrav_a2b[ibrav_a - 1]); - }; - if (!ibrav_match(recip_brav_type)) // if not match, exit and return { - GlobalV::ofs_running << "Error: Bravais lattice type of reciprocal lattice is not compatible with that of " - "real space lattice:" - << std::endl; - GlobalV::ofs_running << "ibrav of real space lattice: " << symm.ilattname << std::endl; - GlobalV::ofs_running << "ibrav of reciprocal lattice: " << recip_brav_name << std::endl; - GlobalV::ofs_running << "(which should be " << ibrav_a2b[symm.real_brav - 1] << ")." << std::endl; - match = false; - return; - } + // bravais type of reciprocal lattice and k-lattice + + double recip_vec_const[6]; + double recip_vec0_const[6]; + double k_vec_const[6]; + double k_vec0_const[6]; + int recip_brav_type = 15; + int k_brav_type = 15; + std::string recip_brav_name; + std::string k_brav_name; + ModuleBase::Vector3 k_vec01 = k_vec1, k_vec02 = k_vec2, k_vec03 = k_vec3; + + // it's not necessary to calculate gb01, gb02, gb03, + // because they are only used as a vector, no need to be assigned values + + // determine the Bravais type and related parameters of the lattice + symm.lattice_type (recip_vec1, + recip_vec2, + recip_vec3, + recip_vec1, + recip_vec2, + recip_vec3, + recip_vec_const, + recip_vec0_const, + recip_brav_type, + recip_brav_name, + ucell.atoms, + false, + nullptr); + GlobalV::ofs_running << "\n For reciprocal-space lattice" << std::endl; + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Bravais lattice type", recip_brav_type); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Bravais lattice name", recip_brav_name); + + // the map of bravis lattice from real to reciprocal space + // for example, 3(fcc) in real space matches 2(bcc) in reciprocal space + std::vector ibrav_a2b{1, 3, 2, 4, 5, 6, 7, 8, 10, 9, 11, 12, 13, 14}; + // check if the reciprocal lattice is compatible with the real space lattice + auto ibrav_match = [&] (int ibrav_b) -> bool + { + const int& ibrav_a = symm.real_brav; + if (ibrav_a < 1 || ibrav_a > 14) + { + return false; + } + return (ibrav_b == ibrav_a2b[ibrav_a - 1]); + }; + if (!ibrav_match (recip_brav_type)) // if not match, exit and return + { + GlobalV::ofs_running + << "Error: Bravais lattice type of reciprocal lattice is not compatible with that of " + "real space lattice:" + << std::endl; + GlobalV::ofs_running << "ibrav of real space lattice: " << symm.ilattname << std::endl; + GlobalV::ofs_running << "ibrav of reciprocal lattice: " << recip_brav_name << std::endl; + GlobalV::ofs_running << "(which should be " << ibrav_a2b[symm.real_brav - 1] << ")." << std::endl; + match = false; + return; + } - // if match, continue - if (kv.get_is_mp()) - { - symm.lattice_type(k_vec1, - k_vec2, - k_vec3, - k_vec01, - k_vec02, - k_vec03, - k_vec_const, - k_vec0_const, - k_brav_type, - k_brav_name, - ucell.atoms, - false, - nullptr); - GlobalV::ofs_running << "\n For k-vectors" << std::endl; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Bravais lattice type", k_brav_type); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Bravais lattice name", k_brav_name); - } - // point-group analysis of reciprocal lattice - ModuleBase::Matrix3 bsymop[48]; - int bnop = 0; - // search again - symm.lattice_type(recip_vec1, - recip_vec2, - recip_vec3, - recip_vec1, - recip_vec2, - recip_vec3, - recip_vec_const, - recip_vec0_const, - recip_brav_type, - recip_brav_name, - ucell.atoms, - false, - nullptr); - ModuleBase::Matrix3 b_optlat_new(recip_vec1.x, recip_vec1.y, recip_vec1.z, - recip_vec2.x, recip_vec2.y, recip_vec2.z, - recip_vec3.x, recip_vec3.y, recip_vec3.z); - // set the crystal point-group symmetry operation - symm.setgroup(bsymop, bnop, recip_brav_type); - // transform the above symmetric operation matrices between different coordinate - symm.gmatrix_convert(bsymop, bsymop, bnop, b_optlat_new, ucell.G); - - // check if all the kgmatrix are in bsymop - auto matequal = [&symm](ModuleBase::Matrix3 a, ModuleBase::Matrix3 b) { - return (symm.equal(a.e11, b.e11) && symm.equal(a.e12, b.e12) && symm.equal(a.e13, b.e13) - && symm.equal(a.e21, b.e21) && symm.equal(a.e22, b.e22) && symm.equal(a.e23, b.e23) - && symm.equal(a.e31, b.e31) && symm.equal(a.e32, b.e32) && symm.equal(a.e33, b.e33)); - }; - for (int i = 0; i < symm.nrotk; ++i) - { - match = false; - for (int j = 0; j < bnop; ++j) - { - if (matequal(symm.kgmatrix[i], bsymop[j])) + // if match, continue + if (kv.get_is_mp ()) + { + symm.lattice_type (k_vec1, + k_vec2, + k_vec3, + k_vec01, + k_vec02, + k_vec03, + k_vec_const, + k_vec0_const, + k_brav_type, + k_brav_name, + ucell.atoms, + false, + nullptr); + GlobalV::ofs_running << "\n For k-vectors" << std::endl; + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Bravais lattice type", k_brav_type); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Bravais lattice name", k_brav_name); + } + // point-group analysis of reciprocal lattice + ModuleBase::Matrix3 bsymop[48]; + int bnop = 0; + // search again + symm.lattice_type (recip_vec1, + recip_vec2, + recip_vec3, + recip_vec1, + recip_vec2, + recip_vec3, + recip_vec_const, + recip_vec0_const, + recip_brav_type, + recip_brav_name, + ucell.atoms, + false, + nullptr); + ModuleBase::Matrix3 b_optlat_new (recip_vec1.x, + recip_vec1.y, + recip_vec1.z, + recip_vec2.x, + recip_vec2.y, + recip_vec2.z, + recip_vec3.x, + recip_vec3.y, + recip_vec3.z); + // set the crystal point-group symmetry operation + symm.setgroup (bsymop, bnop, recip_brav_type); + // transform the above symmetric operation matrices between different coordinate + symm.gmatrix_convert (bsymop, bsymop, bnop, b_optlat_new, ucell.G); + + // check if all the kgmatrix are in bsymop + auto matequal = [&symm] (ModuleBase::Matrix3 a, ModuleBase::Matrix3 b) + { + return (symm.equal (a.e11, b.e11) && symm.equal (a.e12, b.e12) && symm.equal (a.e13, b.e13) + && symm.equal (a.e21, b.e21) && symm.equal (a.e22, b.e22) && symm.equal (a.e23, b.e23) + && symm.equal (a.e31, b.e31) && symm.equal (a.e32, b.e32) && symm.equal (a.e33, b.e33)); + }; + for (int i = 0; i < symm.nrotk; ++i) { - match = true; - break; + match = false; + for (int j = 0; j < bnop; ++j) + { + if (matequal (symm.kgmatrix[i], bsymop[j])) + { + match = true; + break; + } + } + if (!match) + { + return; + } + } + nrotkm = symm.nrotk; // change if inv not included + for (int i = 0; i < nrotkm; ++i) + { + if (symm.kgmatrix[i] == inv) + { + include_inv = true; + } + kgmatrix[i] = symm.kgmatrix[i]; + } + + if (!include_inv) + { + for (int i = 0; i < symm.nrotk; ++i) + { + kgmatrix[i + symm.nrotk] = inv * symm.kgmatrix[i]; + } + nrotkm = 2 * symm.nrotk; } - } - if (!match) - { - return; - } } - nrotkm = symm.nrotk; // change if inv not included - for (int i = 0; i < nrotkm; ++i) + else if (kv.get_is_mp ()) // only include for Monkhorst-Pack grid { - if (symm.kgmatrix[i] == inv) - { - include_inv = true; - } - kgmatrix[i] = symm.kgmatrix[i]; + nrotkm = 2; + kgmatrix[0] = ind; + kgmatrix[1] = inv; } - - if (!include_inv) + else { - for (int i = 0; i < symm.nrotk; ++i) - { - kgmatrix[i + symm.nrotk] = inv * symm.kgmatrix[i]; - } - nrotkm = 2 * symm.nrotk; + return; } - } - else if (kv.get_is_mp()) // only include for Monkhorst-Pack grid - { - nrotkm = 2; - kgmatrix[0] = ind; - kgmatrix[1] = inv; - } - else - { - return; - } // convert kgmatrix to k-lattice ModuleBase::Matrix3* kkmatrix = new ModuleBase::Matrix3[nrotkm]; - if (kv.get_is_mp()) - { - symm.gmatrix_convert(kgmatrix.data(), kkmatrix, nrotkm, ucell.G, k_vec); - } + if (kv.get_is_mp ()) + { + symm.gmatrix_convert (kgmatrix.data (), kkmatrix, nrotkm, ucell.G, k_vec); + } // direct coordinates of k-points in k-lattice - std::vector> kvec_d_k(kv.get_nkstot()); - if (kv.get_is_mp()) - { - for (int i = 0; i < kv.get_nkstot(); ++i) + std::vector> kvec_d_k (kv.get_nkstot ()); + if (kv.get_is_mp ()) { - kvec_d_k[i] = kv.kvec_d[i] * ucell.G * k_vec.Inverse(); + for (int i = 0; i < kv.get_nkstot (); ++i) + { + kvec_d_k[i] = kv.kvec_d[i] * ucell.G * k_vec.Inverse (); + } } - } // use operation : kgmatrix to find // the new set kvec_d : ir_kpt int nkstot_ibz = 0; - assert(kv.get_nkstot() > 0); - std::vector> kvec_d_ibz(kv.get_nkstot()); - std::vector wk_ibz(kv.get_nkstot()); // ibz kpoint wk ,weight of k points - std::vector ibz2bz(kv.get_nkstot()); + assert (kv.get_nkstot () > 0); + std::vector> kvec_d_ibz (kv.get_nkstot ()); + std::vector wk_ibz (kv.get_nkstot ()); // ibz kpoint wk ,weight of k points + std::vector ibz2bz (kv.get_nkstot ()); // nkstot is the total input k-points number. - double weight = 1.0 / static_cast(kv.get_nkstot()); + double weight = 1.0 / static_cast (kv.get_nkstot ()); ModuleBase::Vector3 kvec_rot; ModuleBase::Vector3 kvec_rot_k; @@ -589,221 +611,229 @@ void kvec_ibz_kpoint(K_Vectors& kv, // { // out.printM3("rot matrix",kgmatrix[i]); // } - auto restrict_kpt = [&symm](ModuleBase::Vector3& kvec) { - // in (-0.5, 0.5] - kvec.x = fmod(kvec.x + 100.5 - 0.5 * symm.epsilon, 1) - 0.5 + 0.5 * symm.epsilon; - kvec.y = fmod(kvec.y + 100.5 - 0.5 * symm.epsilon, 1) - 0.5 + 0.5 * symm.epsilon; - kvec.z = fmod(kvec.z + 100.5 - 0.5 * symm.epsilon, 1) - 0.5 + 0.5 * symm.epsilon; - // in [0, 1) - // kvec.x = fmod(kvec.x + 100 + symm.epsilon, 1) - symm.epsilon; - // kvec.y = fmod(kvec.y + 100 + symm.epsilon, 1) - symm.epsilon; - // kvec.z = fmod(kvec.z + 100 + symm.epsilon, 1) - symm.epsilon; - if (std::abs(kvec.x) < symm.epsilon) - { - kvec.x = 0.0; - } - if (std::abs(kvec.y) < symm.epsilon) + auto restrict_kpt = [&symm] (ModuleBase::Vector3& kvec) { - kvec.y = 0.0; - } - if (std::abs(kvec.z) < symm.epsilon) - { - kvec.z = 0.0; - } - return; - }; + // in (-0.5, 0.5] + kvec.x = fmod (kvec.x + 100.5 - 0.5 * symm.epsilon, 1) - 0.5 + 0.5 * symm.epsilon; + kvec.y = fmod (kvec.y + 100.5 - 0.5 * symm.epsilon, 1) - 0.5 + 0.5 * symm.epsilon; + kvec.z = fmod (kvec.z + 100.5 - 0.5 * symm.epsilon, 1) - 0.5 + 0.5 * symm.epsilon; + // in [0, 1) + // kvec.x = fmod(kvec.x + 100 + symm.epsilon, 1) - symm.epsilon; + // kvec.y = fmod(kvec.y + 100 + symm.epsilon, 1) - symm.epsilon; + // kvec.z = fmod(kvec.z + 100 + symm.epsilon, 1) - symm.epsilon; + if (std::abs (kvec.x) < symm.epsilon) + { + kvec.x = 0.0; + } + if (std::abs (kvec.y) < symm.epsilon) + { + kvec.y = 0.0; + } + if (std::abs (kvec.z) < symm.epsilon) + { + kvec.z = 0.0; + } + return; + }; // update map k -> irreducible k - kv.ibz_index.assign( kv.get_nkstot_full(), -1); // -1 means not in ibz_kpoint list + kv.ibz_index.assign (kv.get_nkstot_full (), -1); // -1 means not in ibz_kpoint list // search in all k-poins. - for (int i = 0; i < kv.get_nkstot(); ++i) - { - if (!kv.get_is_mp()) { weight = kv.wk[i]; } // use the input weight, instead of 1/nkstot - - // restrict to [0, 1) - restrict_kpt(kv.kvec_d[i]); - - // std::cout << "\n kpoint = " << i << std::endl; - // std::cout << "\n kvec_d = " << kvec_d[i].x << " " << kvec_d[i].y << " " << kvec_d[i].z; - bool already_exist = false; - int exist_number = -1; - // search over all symmetry operations - for (int j = 0; j < nrotkm; ++j) + for (int i = 0; i < kv.get_nkstot (); ++i) { + if (!kv.get_is_mp ()) + { + weight = kv.wk[i]; + } // use the input weight, instead of 1/nkstot + + // restrict to [0, 1) + restrict_kpt (kv.kvec_d[i]); + + // std::cout << "\n kpoint = " << i << std::endl; + // std::cout << "\n kvec_d = " << kvec_d[i].x << " " << kvec_d[i].y << " " << kvec_d[i].z; + bool already_exist = false; + int exist_number = -1; + // search over all symmetry operations + for (int j = 0; j < nrotkm; ++j) + { + if (!already_exist) + { + // rotate the kvec_d within all operations. + // here use direct coordinates. + // kvec_rot = kgmatrix[j] * kvec_d[i]; + // mohan modify 2010-01-30. + // mohan modify again 2010-01-31 + // fix the bug like kvec_d * G; is wrong + kvec_rot + = kv.kvec_d[i] * kgmatrix[j]; // wrong for total energy, but correct for nonlocal force. + // kvec_rot = kgmatrix[j] * kvec_d[i]; //correct for total energy, but wrong for nonlocal + // force. + restrict_kpt (kvec_rot); + if (kv.get_is_mp ()) + { + kvec_rot_k = kvec_d_k[i] * kkmatrix[j]; // k-lattice rotation + kvec_rot_k = kvec_rot_k * k_vec * ucell.G.Inverse (); // convert to recip lattice + restrict_kpt (kvec_rot_k); + + assert (symm.equal (kvec_rot.x, kvec_rot_k.x)); + assert (symm.equal (kvec_rot.y, kvec_rot_k.y)); + assert (symm.equal (kvec_rot.z, kvec_rot_k.z)); + // std::cout << "\n kvec_rot (in recip) = " << kvec_rot.x << " " << kvec_rot.y << " + // " << kvec_rot.z; std::cout << "\n kvec_rot(k to recip)= " << kvec_rot_k.x << " " + // << kvec_rot_k.y << " " << kvec_rot_k.z; + kvec_rot_k = kvec_rot_k * ucell.G * k_vec.Inverse (); // convert back to k-latice + } + for (int k = 0; k < nkstot_ibz; ++k) + { + if (symm.equal (kvec_rot.x, kvec_d_ibz[k].x) + && symm.equal (kvec_rot.y, kvec_d_ibz[k].y) + && symm.equal (kvec_rot.z, kvec_d_ibz[k].z)) + { + already_exist = true; + // find another ibz k point, + // but is already in the ibz_kpoint list. + // so the weight need to +1; + wk_ibz[k] += weight; + exist_number = k; + break; + } + } + } // end !already_exist + } + // if really there is no equivalent k point in the list, then add it. if (!already_exist) - { - // rotate the kvec_d within all operations. - // here use direct coordinates. - // kvec_rot = kgmatrix[j] * kvec_d[i]; - // mohan modify 2010-01-30. - // mohan modify again 2010-01-31 - // fix the bug like kvec_d * G; is wrong - kvec_rot = kv.kvec_d[i] * kgmatrix[j]; // wrong for total energy, but correct for nonlocal force. - // kvec_rot = kgmatrix[j] * kvec_d[i]; //correct for total energy, but wrong for nonlocal force. - restrict_kpt(kvec_rot); - if (kv.get_is_mp()) { - kvec_rot_k = kvec_d_k[i] * kkmatrix[j]; // k-lattice rotation - kvec_rot_k = kvec_rot_k * k_vec * ucell.G.Inverse(); // convert to recip lattice - restrict_kpt(kvec_rot_k); - - assert(symm.equal(kvec_rot.x, kvec_rot_k.x)); - assert(symm.equal(kvec_rot.y, kvec_rot_k.y)); - assert(symm.equal(kvec_rot.z, kvec_rot_k.z)); - // std::cout << "\n kvec_rot (in recip) = " << kvec_rot.x << " " << kvec_rot.y << " " << kvec_rot.z; - // std::cout << "\n kvec_rot(k to recip)= " << kvec_rot_k.x << " " << kvec_rot_k.y << " " << - // kvec_rot_k.z; - kvec_rot_k = kvec_rot_k * ucell.G * k_vec.Inverse(); // convert back to k-latice + // if it's a new ibz kpoint. + // nkstot_ibz indicate the index of ibz kpoint. + kvec_d_ibz[nkstot_ibz] = kv.kvec_d[i]; + // output in kpoints file + kv.ibz_index[i] = nkstot_ibz; + + // the weight should be averged k-point weight. + wk_ibz[nkstot_ibz] = weight; + + // ibz2bz records the index of origin k points. + ibz2bz[nkstot_ibz] = i; + ++nkstot_ibz; } - for (int k = 0; k < nkstot_ibz; ++k) + else // mohan fix bug 2010-1-30 { - if (symm.equal(kvec_rot.x, kvec_d_ibz[k].x) && symm.equal(kvec_rot.y, kvec_d_ibz[k].y) - && symm.equal(kvec_rot.z, kvec_d_ibz[k].z)) - { - already_exist = true; - // find another ibz k point, - // but is already in the ibz_kpoint list. - // so the weight need to +1; - wk_ibz[k] += weight; - exist_number = k; - break; - } + // std::cout << "\n\n already exist ! "; + + // std::cout << "\n kvec_rot = " << kvec_rot.x << " " << kvec_rot.y << " " << kvec_rot.z; + // std::cout << "\n kvec_d_ibz = " << kvec_d_ibz[exist_number].x + // << " " << kvec_d_ibz[exist_number].y + // << " " << kvec_d_ibz[exist_number].z; + + double kmol_new = kv.kvec_d[i].norm2 (); + double kmol_old = kvec_d_ibz[exist_number].norm2 (); + + kv.ibz_index[i] = exist_number; + + // std::cout << "\n kmol_new = " << kmol_new; + // std::cout << "\n kmol_old = " << kmol_old; + + // why we need this step? + // because in pw_basis.cpp, while calculate ggwfc2, + // if we want to keep the result of symmetry operation is right. + // we need to fix the number of plane wave. + // and the number of plane wave is depending on the |K+G|, + // so we need to |K|max to be the same as 'no symmetry'. + // mohan 2010-01-30 + if (kmol_new > kmol_old) + { + kvec_d_ibz[exist_number] = kv.kvec_d[i]; + } } - } // end !already_exist - } - // if really there is no equivalent k point in the list, then add it. - if (!already_exist) - { - // if it's a new ibz kpoint. - // nkstot_ibz indicate the index of ibz kpoint. - kvec_d_ibz[nkstot_ibz] = kv.kvec_d[i]; - // output in kpoints file - kv.ibz_index[i] = nkstot_ibz; - - // the weight should be averged k-point weight. - wk_ibz[nkstot_ibz] = weight; - - // ibz2bz records the index of origin k points. - ibz2bz[nkstot_ibz] = i; - ++nkstot_ibz; + // BLOCK_HERE("check k point"); } - else // mohan fix bug 2010-1-30 - { - // std::cout << "\n\n already exist ! "; - - // std::cout << "\n kvec_rot = " << kvec_rot.x << " " << kvec_rot.y << " " << kvec_rot.z; - // std::cout << "\n kvec_d_ibz = " << kvec_d_ibz[exist_number].x - // << " " << kvec_d_ibz[exist_number].y - // << " " << kvec_d_ibz[exist_number].z; - - double kmol_new = kv.kvec_d[i].norm2(); - double kmol_old = kvec_d_ibz[exist_number].norm2(); - - kv.ibz_index[i] = exist_number; - - // std::cout << "\n kmol_new = " << kmol_new; - // std::cout << "\n kmol_old = " << kmol_old; - - // why we need this step? - // because in pw_basis.cpp, while calculate ggwfc2, - // if we want to keep the result of symmetry operation is right. - // we need to fix the number of plane wave. - // and the number of plane wave is depending on the |K+G|, - // so we need to |K|max to be the same as 'no symmetry'. - // mohan 2010-01-30 - if (kmol_new > kmol_old) - { - kvec_d_ibz[exist_number] = kv.kvec_d[i]; - } - } - // BLOCK_HERE("check k point"); - } delete[] kkmatrix; #ifdef __EXX // setup kstars according to the final (max-norm) kvec_d_ibz - kv.kstars.resize(nkstot_ibz); + kv.kstars.resize (nkstot_ibz); if (ModuleSymmetry::Symmetry::symm_flag == 1) - { - for (int i = 0; i < kv.get_nkstot(); ++i) { - int exist_number = -1; - int isym = 0; - for (int j = 0; j < nrotkm; ++j) - { - kvec_rot = kv.kvec_d[i] * kgmatrix[j]; - restrict_kpt(kvec_rot); - for (int k = 0; k < nkstot_ibz; ++k) + for (int i = 0; i < kv.get_nkstot (); ++i) { - if (symm.equal(kvec_rot.x, kvec_d_ibz[k].x) && symm.equal(kvec_rot.y, kvec_d_ibz[k].y) - && symm.equal(kvec_rot.z, kvec_d_ibz[k].z)) - { - isym = j; - exist_number = k; - break; - } + int exist_number = -1; + int isym = 0; + for (int j = 0; j < nrotkm; ++j) + { + kvec_rot = kv.kvec_d[i] * kgmatrix[j]; + restrict_kpt (kvec_rot); + for (int k = 0; k < nkstot_ibz; ++k) + { + if (symm.equal (kvec_rot.x, kvec_d_ibz[k].x) + && symm.equal (kvec_rot.y, kvec_d_ibz[k].y) + && symm.equal (kvec_rot.z, kvec_d_ibz[k].z)) + { + isym = j; + exist_number = k; + break; + } + } + if (exist_number != -1) + { + break; + } + } + kv.kstars[exist_number].insert (std::make_pair (isym, kv.kvec_d[i])); } - if (exist_number != -1) - { - break; - } - } - kv.kstars[exist_number].insert(std::make_pair(isym, kv.kvec_d[i])); } - } #endif // output in kpoints file std::stringstream ss; - ss << " " << std::setw(40) << "nkstot" - << " = " << kv.get_nkstot() << std::setw(66) << "ibzkpt" << std::endl; + ss << " " << std::setw (40) << "nkstot" + << " = " << kv.get_nkstot () << std::setw (66) << "ibzkpt" << std::endl; std::string table; table += "K-POINTS REDUCTION ACCORDING TO SYMMETRY\n"; - table += FmtCore::format("%8s%12s%12s%12s%8s%12s%12s%12s\n", - "KPT", - "DIRECT_X", - "DIRECT_Y", - "DIRECT_Z", - "IBZ", - "DIRECT_X", - "DIRECT_Y", - "DIRECT_Z"); - for (int i = 0; i < kv.get_nkstot(); ++i) - { - table += FmtCore::format("%8d%12.8f%12.8f%12.8f%8d%12.8f%12.8f%12.8f\n", - i + 1, - kv.kvec_d[i].x, - kv.kvec_d[i].y, - kv.kvec_d[i].z, - kv.ibz_index[i] + 1, - kvec_d_ibz[kv.ibz_index[i]].x, - kvec_d_ibz[kv.ibz_index[i]].y, - kvec_d_ibz[kv.ibz_index[i]].z); - } + table += FmtCore::format ("%8s%12s%12s%12s%8s%12s%12s%12s\n", + "KPT", + "DIRECT_X", + "DIRECT_Y", + "DIRECT_Z", + "IBZ", + "DIRECT_X", + "DIRECT_Y", + "DIRECT_Z"); + for (int i = 0; i < kv.get_nkstot (); ++i) + { + table += FmtCore::format ("%8d%12.8f%12.8f%12.8f%8d%12.8f%12.8f%12.8f\n", + i + 1, + kv.kvec_d[i].x, + kv.kvec_d[i].y, + kv.kvec_d[i].z, + kv.ibz_index[i] + 1, + kvec_d_ibz[kv.ibz_index[i]].x, + kvec_d_ibz[kv.ibz_index[i]].y, + kvec_d_ibz[kv.ibz_index[i]].z); + } ss << table << std::endl; - skpt = ss.str(); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Number of irreducible k-points", nkstot_ibz); + skpt = ss.str (); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Number of irreducible k-points", nkstot_ibz); - table.clear(); + table.clear (); table += "\n K-POINTS REDUCTION ACCORDING TO SYMMETRY\n"; - table += FmtCore::format("%8s%12s%12s%12s%8s%8s\n", "IBZ", "DIRECT_X", "DIRECT_Y", "DIRECT_Z", "WEIGHT", "ibz2bz"); + table += FmtCore::format ("%8s%12s%12s%12s%8s%8s\n", "IBZ", "DIRECT_X", "DIRECT_Y", "DIRECT_Z", "WEIGHT", "ibz2bz"); for (int ik = 0; ik < nkstot_ibz; ik++) - { - table += FmtCore::format("%8d%12.8f%12.8f%12.8f%8.4f%8d\n", - ik + 1, - kvec_d_ibz[ik].x, - kvec_d_ibz[ik].y, - kvec_d_ibz[ik].z, - wk_ibz[ik], - ibz2bz[ik]); - } + { + table += FmtCore::format ("%8d%12.8f%12.8f%12.8f%8.4f%8d\n", + ik + 1, + kvec_d_ibz[ik].x, + kvec_d_ibz[ik].y, + kvec_d_ibz[ik].z, + wk_ibz[ik], + ibz2bz[ik]); + } GlobalV::ofs_running << table << std::endl; // resize the kpoint container according to nkstot_ibz - if (use_symm || kv.get_is_mp()) - { - kv.update_use_ibz(nkstot_ibz, kvec_d_ibz, wk_ibz); - } + if (use_symm || kv.get_is_mp ()) + { + kv.update_use_ibz (nkstot_ibz, kvec_d_ibz, wk_ibz); + } return; } diff --git a/source/source_cell/k_vector_utils.h b/source/source_cell/k_vector_utils.h index 624012b7d31..c86f9886fcc 100644 --- a/source/source_cell/k_vector_utils.h +++ b/source/source_cell/k_vector_utils.h @@ -12,9 +12,9 @@ class K_Vectors; namespace KVectorUtils { -void kvec_d2c(K_Vectors& kv, const ModuleBase::Matrix3& reciprocal_vec); +void kvec_d2c (K_Vectors& kv, const ModuleBase::Matrix3& reciprocal_vec); -void kvec_c2d(K_Vectors& kv, const ModuleBase::Matrix3& latvec); +void kvec_c2d (K_Vectors& kv, const ModuleBase::Matrix3& latvec); /** * @brief Sets both the direct and Cartesian k-vectors. @@ -36,7 +36,7 @@ void kvec_c2d(K_Vectors& kv, const ModuleBase::Matrix3& latvec); * @note If the function is called by the master process (MY_RANK == 0), the k-point table is also stored in the * string skpt. */ -void set_both_kvec(K_Vectors& kv, const ModuleBase::Matrix3& G, const ModuleBase::Matrix3& R, std::string& skpt); +void set_both_kvec (K_Vectors& kv, const ModuleBase::Matrix3& G, const ModuleBase::Matrix3& R, std::string& skpt); /** * @brief Sets up the k-points after a volume change. @@ -58,7 +58,7 @@ void set_both_kvec(K_Vectors& kv, const ModuleBase::Matrix3& G, const ModuleBase * @note The function calls the print_klists function to print the k-points in both Cartesian and direct * coordinates. */ -void set_after_vc(K_Vectors& kv, const int& nspin, const ModuleBase::Matrix3& G); +void set_after_vc (K_Vectors& kv, const int& nspin, const ModuleBase::Matrix3& G); /** * @brief Prints the k-points in both Cartesian and direct coordinates. @@ -76,7 +76,7 @@ void set_after_vc(K_Vectors& kv, const int& nspin, const ModuleBase::Matrix3& G) * coordinates. * @note The function uses the FmtCore::format function to format the output. */ -void print_klists(const K_Vectors& kv, std::ofstream& ofs); +void print_klists (const K_Vectors& kv, std::ofstream& ofs); // step 3 : mpi kpoints information. @@ -100,7 +100,7 @@ void print_klists(const K_Vectors& kv, std::ofstream& ofs); * @note If a process has no k-points to work on, the function will quit with an error message. */ #ifdef __MPI -void kvec_mpi_k(K_Vectors& kv); +void kvec_mpi_k (K_Vectors& kv); #endif // __MPI /** @@ -116,12 +116,12 @@ void kvec_mpi_k(K_Vectors& kv); * @param ucell The unit cell of the crystal. * @param match A boolean flag that indicates if the results matches the real condition. */ -void kvec_ibz_kpoint(K_Vectors& kv, - const ModuleSymmetry::Symmetry& symm, - bool use_symm, - std::string& skpt, - const UnitCell& ucell, - bool& match); +void kvec_ibz_kpoint (K_Vectors& kv, + const ModuleSymmetry::Symmetry& symm, + bool use_symm, + std::string& skpt, + const UnitCell& ucell, + bool& match); } // namespace KVectorUtils #endif // K_VECTOR_UTILS_H diff --git a/source/source_cell/klist.cpp b/source/source_cell/klist.cpp index cca33fb8ba2..946a4fb39ad 100644 --- a/source/source_cell/klist.cpp +++ b/source/source_cell/klist.cpp @@ -10,36 +10,37 @@ #include "source_io/module_unk/berryphase.h" #include "source_io/module_parameter/parameter.h" -void K_Vectors::cal_ik_global() +void + K_Vectors::cal_ik_global () { const int my_pool = this->para_k.my_pool; - this->ik2iktot.resize(this->nks); + this->ik2iktot.resize (this->nks); #ifdef __MPI - if(this->nspin == 2) - { - for (int ik = 0; ik < this->nks / 2; ++ik) + if (this->nspin == 2) { - this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik; - this->ik2iktot[ik + this->nks / 2] = this->nkstot / 2 + this->para_k.startk_pool[my_pool] + ik; + for (int ik = 0; ik < this->nks / 2; ++ik) + { + this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik; + this->ik2iktot[ik + this->nks / 2] = this->nkstot / 2 + this->para_k.startk_pool[my_pool] + ik; + } } - } else - { - for (int ik = 0; ik < this->nks; ++ik) { - this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik; + for (int ik = 0; ik < this->nks; ++ik) + { + this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik; + } } - } #else for (int ik = 0; ik < this->nks; ++ik) - { - this->ik2iktot[ik] = ik; - } + { + this->ik2iktot[ik] = ik; + } #endif - } -void K_Vectors::set(const UnitCell& ucell, +void + K_Vectors::set (const UnitCell& ucell, const ModuleSymmetry::Symmetry& symm, const std::string& k_file_name, const int& nspin_in, @@ -47,7 +48,7 @@ void K_Vectors::set(const UnitCell& ucell, const ModuleBase::Matrix3& latvec, std::ofstream& ofs) { - ModuleBase::TITLE("K_Vectors", "set"); + ModuleBase::TITLE ("K_Vectors", "set"); ofs << "\n"; ofs << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; @@ -64,124 +65,135 @@ void K_Vectors::set(const UnitCell& ucell, // (1) set nspin, read kpoints. this->nspin = nspin_in; - ModuleBase::GlobalFunc::OUT(ofs, "nspin", nspin); + ModuleBase::GlobalFunc::OUT (ofs, "nspin", nspin); if (this->nspin != 1 && this->nspin != 2 && this->nspin != 4) - { - ModuleBase::WARNING_QUIT("K_Vectors::set", "Only available for nspin = 1 or 2 or 4"); - } + { + ModuleBase::WARNING_QUIT ("K_Vectors::set", "Only available for nspin = 1 or 2 or 4"); + } this->nspin = (this->nspin == 4) ? 1 : this->nspin; // read KPT file and generate K-point grid - bool read_succesfully = this->read_kpoints(ucell,k_file_name); + bool read_succesfully = this->read_kpoints (ucell, k_file_name); #ifdef __MPI - Parallel_Common::bcast_bool(read_succesfully); + Parallel_Common::bcast_bool (read_succesfully); #endif if (!read_succesfully) - { - ModuleBase::WARNING_QUIT("K_Vectors::set", "Something wrong while reading KPOINTS."); - } + { + ModuleBase::WARNING_QUIT ("K_Vectors::set", "Something wrong while reading KPOINTS."); + } // output kpoints file std::string skpt1; std::string skpt2; if (!this->kc_done && this->kd_done) - { - for (size_t ik = 0; ik != this->nkstot_full; ++ik) - this->kvec_c_full[ik] = this->kvec_d[ik] * reciprocal_vec; - } + { + for (size_t ik = 0; ik != this->nkstot_full; ++ik) + { + this->kvec_c_full[ik] = this->kvec_d[ik] * reciprocal_vec; + } + } else if (this->kc_done && !this->kd_done) - { - for (size_t ik = 0; ik != this->nkstot_full; ++ik) - this->kvec_c_full[ik] = this->kvec_c[ik]; - } - + { + for (size_t ik = 0; ik != this->nkstot_full; ++ik) + { + this->kvec_c_full[ik] = this->kvec_c[ik]; + } + } // (2) // only berry phase need all kpoints including time-reversal symmetry! // if symm_flag is not set, only time-reversal symmetry would be considered. if (!berryphase::berry_phase_flag && ModuleSymmetry::Symmetry::symm_flag != -1) - { - bool match = true; - // calculate kpoints in IBZ and reduce kpoints according to symmetry - KVectorUtils::kvec_ibz_kpoint(*this, symm, ModuleSymmetry::Symmetry::symm_flag, skpt1, ucell, match); + { + bool match = true; + // calculate kpoints in IBZ and reduce kpoints according to symmetry + KVectorUtils::kvec_ibz_kpoint (*this, symm, ModuleSymmetry::Symmetry::symm_flag, skpt1, ucell, match); #ifdef __MPI - Parallel_Common::bcast_bool(match); + Parallel_Common::bcast_bool (match); #endif - if (!match) - { - std::cout << "Optimized lattice type of reciprocal lattice cannot match the optimized real lattice. " - << std::endl; - std::cout << "It is often because the inaccuracy of lattice parameters in STRU." << std::endl; - if (ModuleSymmetry::Symmetry::symm_autoclose) - { - ModuleBase::WARNING("K_Vectors::ibz_kpoint", "Automatically set symmetry to 0 and continue ..."); - std::cout << "Automatically set symmetry to 0 and continue ..." << std::endl; - ModuleSymmetry::Symmetry::symm_flag = 0; - match = true; - KVectorUtils::kvec_ibz_kpoint(*this, symm, ModuleSymmetry::Symmetry::symm_flag, skpt1, ucell, match); - } else { - ModuleBase::WARNING_QUIT("K_Vectors::ibz_kpoint", - "Possible solutions: \n \ + if (!match) + { + std::cout + << "Optimized lattice type of reciprocal lattice cannot match the optimized real lattice. " + << std::endl; + std::cout << "It is often because the inaccuracy of lattice parameters in STRU." << std::endl; + if (ModuleSymmetry::Symmetry::symm_autoclose) + { + ModuleBase::WARNING ("K_Vectors::ibz_kpoint", + "Automatically set symmetry to 0 and continue ..."); + std::cout << "Automatically set symmetry to 0 and continue ..." << std::endl; + ModuleSymmetry::Symmetry::symm_flag = 0; + match = true; + KVectorUtils::kvec_ibz_kpoint (*this, + symm, + ModuleSymmetry::Symmetry::symm_flag, + skpt1, + ucell, + match); + } + else + { + ModuleBase::WARNING_QUIT ("K_Vectors::ibz_kpoint", "Possible solutions: \n \ 1. Refine the lattice parameters in STRU;\n \ 2. Use a different`symmetry_prec`. \n \ 3. Close symemtry: set `symmetry` to 0 in INPUT. \n \ 4. Set `symmetry_autoclose` to 1 in INPUT to automatically close symmetry when this error occurs."); - } + } + } } - } // (3) // Improve k point information // Complement the coordinates of k point -// this->set_both_kvec(reciprocal_vec, latvec, skpt2); - KVectorUtils::set_both_kvec(*this, reciprocal_vec, latvec, skpt2); + // this->set_both_kvec(reciprocal_vec, latvec, skpt2); + KVectorUtils::set_both_kvec (*this, reciprocal_vec, latvec, skpt2); if (GlobalV::MY_RANK == 0) - { - // output kpoints file - std::stringstream skpt; - skpt << PARAM.globalv.global_out_dir << "KPT.info"; //mohan modified 20250325 - std::ofstream ofkpt(skpt.str().c_str()); // clear kpoints - ofkpt << skpt2 << skpt1; - ofkpt.close(); - } + { + // output kpoints file + std::stringstream skpt; + skpt << PARAM.globalv.global_out_dir << "KPT.info"; // mohan modified 20250325 + std::ofstream ofkpt (skpt.str ().c_str ()); // clear kpoints + ofkpt << skpt2 << skpt1; + ofkpt.close (); + } int deg = (nspin_in == 1) ? 2 : 1; // normalize k points weights according to nspin - this->normalize_wk(deg); + this->normalize_wk (deg); // It's very important in parallel case, // firstly do the mpi_k() and then // do set_kup_and_kdw() - this->para_k.kinfo(nkstot, - GlobalV::KPAR, - GlobalV::MY_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::NPROC, - nspin_in); // assign k points to several process pools + this->para_k.kinfo (nkstot, + GlobalV::KPAR, + GlobalV::MY_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::NPROC, + nspin_in); // assign k points to several process pools #ifdef __MPI // distribute K point data to the corresponding process - KVectorUtils::kvec_mpi_k(*this); + KVectorUtils::kvec_mpi_k (*this); #endif // set the k vectors for the up and down spin - this->set_kup_and_kdw(); + this->set_kup_and_kdw (); // initialize ibz_index - this->ibz_index.resize(this->nkstot_full); + this->ibz_index.resize (this->nkstot_full); for (int ik = 0; ik < this->nkstot_full; ik++) - { - this->ibz_index[ik] = ik; - } - + { + this->ibz_index[ik] = ik; + } + // get ik2iktot - this->cal_ik_global(); + this->cal_ik_global (); - KVectorUtils::print_klists(*this, ofs); + KVectorUtils::print_klists (*this, ofs); // std::cout << " NUMBER OF K-POINTS : " << nkstot << std::endl; @@ -190,317 +202,321 @@ void K_Vectors::set(const UnitCell& ucell, // 1.reset the size of the K-point container according to nspin and nkstot // 2.reserve space for nspin>2 (symmetry) -void K_Vectors::renew(const int& kpoint_number) +void + K_Vectors::renew (const int& kpoint_number) { - kvec_c.resize(kpoint_number); - kvec_d.resize(kpoint_number); - kvec_c_full.resize(kpoint_number); - wk.resize(kpoint_number); - isk.resize(kpoint_number); - ngk.resize(kpoint_number); + kvec_c.resize (kpoint_number); + kvec_d.resize (kpoint_number); + kvec_c_full.resize (kpoint_number); + wk.resize (kpoint_number); + isk.resize (kpoint_number); + ngk.resize (kpoint_number); return; } // Read the KPT file, which contains K-point coordinates, weights, and grid size information // Generate K-point grid according to different parameters of the KPT file -bool K_Vectors::read_kpoints(const UnitCell& ucell, - const std::string& fn) +bool + K_Vectors::read_kpoints (const UnitCell& ucell, const std::string& fn) { - ModuleBase::TITLE("K_Vectors", "read_kpoints"); + ModuleBase::TITLE ("K_Vectors", "read_kpoints"); if (GlobalV::MY_RANK != 0) - { - return true; - } + { + return true; + } // 1. Overwrite the KPT file and default K-point information if needed // mohan add 2010-09-04 if (PARAM.globalv.gamma_only_local) - { - GlobalV::ofs_warning << " Auto generating k-points file: " << fn << std::endl; - std::ofstream ofs(fn.c_str()); - ofs << "K_POINTS" << std::endl; - ofs << "0" << std::endl; - ofs << "Gamma" << std::endl; - ofs << "1 1 1 0 0 0" << std::endl; - ofs.close(); - } - else if (PARAM.inp.kspacing[0] > 0.0) - { - if (PARAM.inp.kspacing[1] <= 0 || PARAM.inp.kspacing[2] <= 0) - { - ModuleBase::WARNING_QUIT("K_Vectors", "kspacing should > 0"); - }; - // number of K points = max(1,int(|bi|/KSPACING+1)) - ModuleBase::Matrix3 btmp = ucell.G; - double b1 = sqrt(btmp.e11 * btmp.e11 + btmp.e12 * btmp.e12 + btmp.e13 * btmp.e13); - double b2 = sqrt(btmp.e21 * btmp.e21 + btmp.e22 * btmp.e22 + btmp.e23 * btmp.e23); - double b3 = sqrt(btmp.e31 * btmp.e31 + btmp.e32 * btmp.e32 + btmp.e33 * btmp.e33); - int nk1 - = std::max(1, static_cast(b1 * ModuleBase::TWO_PI / PARAM.inp.kspacing[0] / ucell.lat0 + 1)); - int nk2 - = std::max(1, static_cast(b2 * ModuleBase::TWO_PI / PARAM.inp.kspacing[1] / ucell.lat0 + 1)); - int nk3 - = std::max(1, static_cast(b3 * ModuleBase::TWO_PI / PARAM.inp.kspacing[2] / ucell.lat0 + 1)); - - GlobalV::ofs_warning << " Generate k-points file according to KSPACING: " << fn << std::endl; - std::ofstream ofs(fn.c_str()); - ofs << "K_POINTS" << std::endl; - ofs << "0" << std::endl; - if (PARAM.inp.kmesh_type == "mp") - { - ofs << "Monkhorst-Pack" << std::endl; - } - else { + GlobalV::ofs_warning << " Auto generating k-points file: " << fn << std::endl; + std::ofstream ofs (fn.c_str ()); + ofs << "K_POINTS" << std::endl; + ofs << "0" << std::endl; ofs << "Gamma" << std::endl; + ofs << "1 1 1 0 0 0" << std::endl; + ofs.close (); + } + else if (PARAM.inp.kspacing[0] > 0.0) + { + if (PARAM.inp.kspacing[1] <= 0 || PARAM.inp.kspacing[2] <= 0) + { + ModuleBase::WARNING_QUIT ("K_Vectors", "kspacing should > 0"); + }; + // number of K points = max(1,int(|bi|/KSPACING+1)) + ModuleBase::Matrix3 btmp = ucell.G; + double b1 = sqrt (btmp.e11 * btmp.e11 + btmp.e12 * btmp.e12 + btmp.e13 * btmp.e13); + double b2 = sqrt (btmp.e21 * btmp.e21 + btmp.e22 * btmp.e22 + btmp.e23 * btmp.e23); + double b3 = sqrt (btmp.e31 * btmp.e31 + btmp.e32 * btmp.e32 + btmp.e33 * btmp.e33); + int nk1 = std::max (1, static_cast (b1 * ModuleBase::TWO_PI / PARAM.inp.kspacing[0] / ucell.lat0 + 1)); + int nk2 = std::max (1, static_cast (b2 * ModuleBase::TWO_PI / PARAM.inp.kspacing[1] / ucell.lat0 + 1)); + int nk3 = std::max (1, static_cast (b3 * ModuleBase::TWO_PI / PARAM.inp.kspacing[2] / ucell.lat0 + 1)); + + GlobalV::ofs_warning << " Generate k-points file according to KSPACING: " << fn << std::endl; + std::ofstream ofs (fn.c_str ()); + ofs << "K_POINTS" << std::endl; + ofs << "0" << std::endl; + if (PARAM.inp.kmesh_type == "mp") + { + ofs << "Monkhorst-Pack" << std::endl; + } + else + { + ofs << "Gamma" << std::endl; + } + ofs << nk1 << " " << nk2 << " " << nk3 << " " << PARAM.inp.koffset[0] << " " << PARAM.inp.koffset[1] << " " + << PARAM.inp.koffset[2] << std::endl; + ofs.close (); } - ofs << nk1 << " " << nk2 << " " << nk3 << " " << PARAM.inp.koffset[0] << " " << PARAM.inp.koffset[1] << " " - << PARAM.inp.koffset[2] << std::endl; - ofs.close(); - } // 2. Generate the K-point grid automatically according to the KPT file // 2.1 read the KPT file - std::ifstream ifk(fn.c_str()); + std::ifstream ifk (fn.c_str ()); if (!ifk) - { - GlobalV::ofs_warning << " Can't find File name : " << fn << std::endl; - return false; - } + { + GlobalV::ofs_warning << " Can't find File name : " << fn << std::endl; + return false; + } - ifk >> std::setiosflags(std::ios::uppercase); + ifk >> std::setiosflags (std::ios::uppercase); - ifk.clear(); - ifk.seekg(0); + ifk.clear (); + ifk.seekg (0); std::string word; std::string kword; int ierr = 0; - ifk.rdstate(); + ifk.rdstate (); - while (ifk.good()) - { - ifk >> word; - ifk.ignore(150, '\n'); // LiuXh add 20180416, fix bug in k-point file when the first line with comments - if (word == "K_POINTS" || word == "KPOINTS" || word == "K") + while (ifk.good ()) { - ierr = 1; - break; - } + ifk >> word; + ifk.ignore (150, '\n'); // LiuXh add 20180416, fix bug in k-point file when the first line with comments + if (word == "K_POINTS" || word == "KPOINTS" || word == "K") + { + ierr = 1; + break; + } - ifk.rdstate(); - } + ifk.rdstate (); + } if (ierr == 0) - { - GlobalV::ofs_warning << " symbol K_POINTS not found." << std::endl; - return false; - } + { + GlobalV::ofs_warning << " symbol K_POINTS not found." << std::endl; + return false; + } // input k-points are in 2pi/a units - ModuleBase::GlobalFunc::READ_VALUE(ifk, nkstot); + ModuleBase::GlobalFunc::READ_VALUE (ifk, nkstot); this->k_nkstot = nkstot; // LiuXh add 20180619 // std::cout << " nkstot = " << nkstot << std::endl; - ModuleBase::GlobalFunc::READ_VALUE(ifk, kword); + ModuleBase::GlobalFunc::READ_VALUE (ifk, kword); this->k_kword = kword; // LiuXh add 20180619 // mohan update 2021-02-22 const int max_kpoints = 100000; if (nkstot > max_kpoints) - { - GlobalV::ofs_warning << " nkstot > MAX_KPOINTS" << std::endl; - return false; - } + { + GlobalV::ofs_warning << " nkstot > MAX_KPOINTS" << std::endl; + return false; + } // 2.2 Select different methods and generate K-point grid int k_type = 0; if (nkstot == 0) // nkstot==0, use monkhorst_pack. add by dwan - { - if (kword == "Gamma") // MP(Gamma) - { - is_mp = true; - k_type = 0; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Input type of k points", "Monkhorst-Pack(Gamma)"); - } - else if (kword == "Monkhorst-Pack" || kword == "MP" || kword == "mp") { - is_mp = true; - k_type = 1; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Input type of k points", "Monkhorst-Pack"); - } - else - { - GlobalV::ofs_warning << " Error: neither Gamma nor Monkhorst-Pack." << std::endl; - return false; - } + if (kword == "Gamma") // MP(Gamma) + { + is_mp = true; + k_type = 0; + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, + "Input type of k points", + "Monkhorst-Pack(Gamma)"); + } + else if (kword == "Monkhorst-Pack" || kword == "MP" || kword == "mp") + { + is_mp = true; + k_type = 1; + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Input type of k points", "Monkhorst-Pack"); + } + else + { + GlobalV::ofs_warning << " Error: neither Gamma nor Monkhorst-Pack." << std::endl; + return false; + } - ifk >> nmp[0] >> nmp[1] >> nmp[2]; + ifk >> nmp[0] >> nmp[1] >> nmp[2]; - koffset[0] = 0; - koffset[1] = 0; - koffset[2] = 0; - if (!(ifk >> koffset[0] >> koffset[1] >> koffset[2])) - { - ModuleBase::WARNING("K_Vectors::read_kpoints", "Missing k-point offsets in the k-points file."); - } - - this->Monkhorst_Pack(nmp, koffset, k_type); - } - else if (nkstot > 0) // nkstot>0, the K-point information is clearly set - { - if (kword == "Cartesian" || kword == "C") // Cartesian coordinates - { - this->renew(nkstot * nspin); // mohan fix bug 2009-09-01 - for (int i = 0; i < nkstot; i++) - { - ifk >> kvec_c[i].x >> kvec_c[i].y >> kvec_c[i].z; - ModuleBase::GlobalFunc::READ_VALUE(ifk, wk[i]); - } + koffset[0] = 0; + koffset[1] = 0; + koffset[2] = 0; + if (!(ifk >> koffset[0] >> koffset[1] >> koffset[2])) + { + ModuleBase::WARNING ("K_Vectors::read_kpoints", "Missing k-point offsets in the k-points file."); + } - this->kc_done = true; - } - else if (kword == "Direct" || kword == "D") // Direct coordinates - { - this->renew(nkstot * nspin); // mohan fix bug 2009-09-01 - for (int i = 0; i < nkstot; i++) - { - ifk >> kvec_d[i].x >> kvec_d[i].y >> kvec_d[i].z; - ModuleBase::GlobalFunc::READ_VALUE(ifk, wk[i]); - } - this->kd_done = true; + this->Monkhorst_Pack (nmp, koffset, k_type); } - else if (kword == "Line_Cartesian") + else if (nkstot > 0) // nkstot>0, the K-point information is clearly set { - if (ModuleSymmetry::Symmetry::symm_flag == 1) - { - ModuleBase::WARNING("K_Vectors::read_kpoints", - "Line mode of k-points is open, please set symmetry to 0 or -1."); - return false; - } + if (kword == "Cartesian" || kword == "C") // Cartesian coordinates + { + this->renew (nkstot * nspin); // mohan fix bug 2009-09-01 + for (int i = 0; i < nkstot; i++) + { + ifk >> kvec_c[i].x >> kvec_c[i].y >> kvec_c[i].z; + ModuleBase::GlobalFunc::READ_VALUE (ifk, wk[i]); + } + + this->kc_done = true; + } + else if (kword == "Direct" || kword == "D") // Direct coordinates + { + this->renew (nkstot * nspin); // mohan fix bug 2009-09-01 + for (int i = 0; i < nkstot; i++) + { + ifk >> kvec_d[i].x >> kvec_d[i].y >> kvec_d[i].z; + ModuleBase::GlobalFunc::READ_VALUE (ifk, wk[i]); + } + this->kd_done = true; + } + else if (kword == "Line_Cartesian") + { + if (ModuleSymmetry::Symmetry::symm_flag == 1) + { + ModuleBase::WARNING ("K_Vectors::read_kpoints", + "Line mode of k-points is open, please set symmetry to 0 or -1."); + return false; + } - interpolate_k_between(ifk, kvec_c); + interpolate_k_between (ifk, kvec_c); - std::for_each(wk.begin(), wk.end(), [](double& d) { d = 1.0; }); + std::for_each (wk.begin (), wk.end (), [] (double& d) { d = 1.0; }); - this->kc_done = true; - } + this->kc_done = true; + } - else if (kword == "Line_Direct" || kword == "L" || kword == "Line") - { - if (ModuleSymmetry::Symmetry::symm_flag == 1) - { - ModuleBase::WARNING("K_Vectors::read_kpoints", - "Line mode of k-points is open, please set symmetry to 0 or -1."); - return false; - } + else if (kword == "Line_Direct" || kword == "L" || kword == "Line") + { + if (ModuleSymmetry::Symmetry::symm_flag == 1) + { + ModuleBase::WARNING ("K_Vectors::read_kpoints", + "Line mode of k-points is open, please set symmetry to 0 or -1."); + return false; + } - interpolate_k_between(ifk, kvec_d); + interpolate_k_between (ifk, kvec_d); - std::for_each(wk.begin(), wk.end(), [](double& d) { d = 1.0; }); + std::for_each (wk.begin (), wk.end (), [] (double& d) { d = 1.0; }); - this->kd_done = true; - } + this->kd_done = true; + } - else - { - GlobalV::ofs_warning << " Error : neither Cartesian nor Direct kpoint." << std::endl; - return false; + else + { + GlobalV::ofs_warning << " Error : neither Cartesian nor Direct kpoint." << std::endl; + return false; + } } - } this->nkstot_full = this->nks = this->nkstot; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nkstot", nkstot); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nkstot", nkstot); return true; } // END SUBROUTINE -void K_Vectors::interpolate_k_between(std::ifstream& ifk, std::vector>& kvec) +void + K_Vectors::interpolate_k_between (std::ifstream& ifk, std::vector>& kvec) { // how many special points. int nks_special = this->nkstot; // number of points to the next k points - std::vector nkl(nks_special, 0); + std::vector nkl (nks_special, 0); // coordinates of special points. - std::vector> ks(nks_special); + std::vector> ks (nks_special); // recalculate nkstot. nkstot = 0; /* ISSUE#3482: to distinguish different kline segments */ std::vector kpt_segids; - kl_segids.clear(); - kl_segids.shrink_to_fit(); + kl_segids.clear (); + kl_segids.shrink_to_fit (); int kpt_segid = 0; for (int iks = 0; iks < nks_special; iks++) - { - ifk >> ks[iks].x; - ifk >> ks[iks].y; - ifk >> ks[iks].z; - ModuleBase::GlobalFunc::READ_VALUE(ifk, nkl[iks]); - - assert(nkl[iks] >= 0); - nkstot += nkl[iks]; - /* ISSUE#3482: to distinguish different kline segments */ - if ((nkl[iks] == 1) && (iks != (nks_special - 1))) { - kpt_segid++; - } - kpt_segids.push_back(kpt_segid); - } - assert(nkl[nks_special - 1] == 1); + { + ifk >> ks[iks].x; + ifk >> ks[iks].y; + ifk >> ks[iks].z; + ModuleBase::GlobalFunc::READ_VALUE (ifk, nkl[iks]); + + assert (nkl[iks] >= 0); + nkstot += nkl[iks]; + /* ISSUE#3482: to distinguish different kline segments */ + if ((nkl[iks] == 1) && (iks != (nks_special - 1))) + { + kpt_segid++; + } + kpt_segids.push_back (kpt_segid); + } + assert (nkl[nks_special - 1] == 1); // std::cout << " nkstot = " << nkstot << std::endl; - this->renew(nkstot * nspin); // mohan fix bug 2009-09-01 + this->renew (nkstot * nspin); // mohan fix bug 2009-09-01 int count = 0; for (int iks = 1; iks < nks_special; iks++) - { - double dxs = (ks[iks].x - ks[iks - 1].x) / nkl[iks - 1]; - double dys = (ks[iks].y - ks[iks - 1].y) / nkl[iks - 1]; - double dzs = (ks[iks].z - ks[iks - 1].z) / nkl[iks - 1]; - for (int is = 0; is < nkl[iks - 1]; is++) { - kvec[count].x = ks[iks - 1].x + is * dxs; - kvec[count].y = ks[iks - 1].y + is * dys; - kvec[count].z = ks[iks - 1].z + is * dzs; - kl_segids.push_back(kpt_segids[iks - 1]); /* ISSUE#3482: to distinguish different kline segments */ - ++count; + double dxs = (ks[iks].x - ks[iks - 1].x) / nkl[iks - 1]; + double dys = (ks[iks].y - ks[iks - 1].y) / nkl[iks - 1]; + double dzs = (ks[iks].z - ks[iks - 1].z) / nkl[iks - 1]; + for (int is = 0; is < nkl[iks - 1]; is++) + { + kvec[count].x = ks[iks - 1].x + is * dxs; + kvec[count].y = ks[iks - 1].y + is * dys; + kvec[count].z = ks[iks - 1].z + is * dzs; + kl_segids.push_back (kpt_segids[iks - 1]); /* ISSUE#3482: to distinguish different kline segments */ + ++count; + } } - } // deal with the last special k point. kvec[count].x = ks[nks_special - 1].x; kvec[count].y = ks[nks_special - 1].y; kvec[count].z = ks[nks_special - 1].z; - kl_segids.push_back(kpt_segids[nks_special - 1]); /* ISSUE#3482: to distinguish different kline segments */ + kl_segids.push_back (kpt_segids[nks_special - 1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; - assert(count == nkstot); - assert(kl_segids.size() == nkstot); /* ISSUE#3482: to distinguish different kline segments */ + assert (count == nkstot); + assert (kl_segids.size () == nkstot); /* ISSUE#3482: to distinguish different kline segments */ } -double K_Vectors::Monkhorst_Pack_formula(const int& k_type, const double& offset, const int& n, const int& dim) +double + K_Vectors::Monkhorst_Pack_formula (const int& k_type, const double& offset, const int& n, const int& dim) { double coordinate = 0.0; if (k_type == 1) - { - coordinate = (offset + 2.0 * (double)n - (double)dim - 1.0) / (2.0 * (double)dim); - } + { + coordinate = (offset + 2.0 * (double)n - (double)dim - 1.0) / (2.0 * (double)dim); + } else - { - coordinate = (offset + (double)n - 1.0) / (double)dim; - } + { + coordinate = (offset + (double)n - 1.0) / (double)dim; + } return coordinate; } // add by dwan -void K_Vectors::Monkhorst_Pack(const int* nmp_in, const double* koffset_in, const int k_type) +void + K_Vectors::Monkhorst_Pack (const int* nmp_in, const double* koffset_in, const int k_type) { const int mpnx = nmp_in[0]; const int mpny = nmp_in[1]; @@ -508,107 +524,114 @@ void K_Vectors::Monkhorst_Pack(const int* nmp_in, const double* koffset_in, cons this->nkstot = mpnx * mpny * mpnz; // only can renew after nkstot is estimated. - this->renew(nkstot * nspin); // mohan fix bug 2009-09-01 + this->renew (nkstot * nspin); // mohan fix bug 2009-09-01 for (int x = 1; x <= mpnx; x++) - { - double v1 = Monkhorst_Pack_formula(k_type, koffset_in[0], x, mpnx); - if (std::abs(v1) < 1.0e-10) { - v1 = 0.0; // mohan update 2012-06-10 - } - for (int y = 1; y <= mpny; y++) - { - double v2 = Monkhorst_Pack_formula(k_type, koffset_in[1], y, mpny); - if (std::abs(v2) < 1.0e-10) { - v2 = 0.0; - } - for (int z = 1; z <= mpnz; z++) - { - double v3 = Monkhorst_Pack_formula(k_type, koffset_in[2], z, mpnz); - if (std::abs(v3) < 1.0e-10) { - v3 = 0.0; + { + double v1 = Monkhorst_Pack_formula (k_type, koffset_in[0], x, mpnx); + if (std::abs (v1) < 1.0e-10) + { + v1 = 0.0; // mohan update 2012-06-10 + } + for (int y = 1; y <= mpny; y++) + { + double v2 = Monkhorst_Pack_formula (k_type, koffset_in[1], y, mpny); + if (std::abs (v2) < 1.0e-10) + { + v2 = 0.0; + } + for (int z = 1; z <= mpnz; z++) + { + double v3 = Monkhorst_Pack_formula (k_type, koffset_in[2], z, mpnz); + if (std::abs (v3) < 1.0e-10) + { + v3 = 0.0; + } + // index of nks kpoint + const int i = mpnx * mpny * (z - 1) + mpnx * (y - 1) + (x - 1); + kvec_d[i].set (v1, v2, v3); + } } - // index of nks kpoint - const int i = mpnx * mpny * (z - 1) + mpnx * (y - 1) + (x - 1); - kvec_d[i].set(v1, v2, v3); - } } - } - const double weight = 1.0 / static_cast(nkstot); + const double weight = 1.0 / static_cast (nkstot); for (int ik = 0; ik < nkstot; ik++) - { - wk[ik] = weight; - } + { + wk[ik] = weight; + } this->kd_done = true; return; } -void K_Vectors::update_use_ibz(const int& nkstot_ibz, +void + K_Vectors::update_use_ibz (const int& nkstot_ibz, const std::vector>& kvec_d_ibz, const std::vector& wk_ibz) { - if (GlobalV::MY_RANK != 0) { - return; - } - ModuleBase::TITLE("K_Vectors", "update_use_ibz"); - assert(nkstot_ibz > 0); - assert(nkstot_ibz <= kvec_d_ibz.size()); + if (GlobalV::MY_RANK != 0) + { + return; + } + ModuleBase::TITLE ("K_Vectors", "update_use_ibz"); + assert (nkstot_ibz > 0); + assert (nkstot_ibz <= kvec_d_ibz.size ()); // update nkstot this->nks = this->nkstot = nkstot_ibz; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nkstot now", nkstot); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nkstot now", nkstot); - this->kvec_d.resize(this->nkstot * nspin); // qianrui fix a bug 2021-7-13 for nspin=2 in set_kup_and_kdw() + this->kvec_d.resize (this->nkstot * nspin); // qianrui fix a bug 2021-7-13 for nspin=2 in set_kup_and_kdw() for (int i = 0; i < this->nkstot; ++i) - { - this->kvec_d[i] = kvec_d_ibz[i]; + { + this->kvec_d[i] = kvec_d_ibz[i]; - // update weight. - this->wk[i] = wk_ibz[i]; - } + // update weight. + this->wk[i] = wk_ibz[i]; + } this->kd_done = true; this->kc_done = false; return; } -void K_Vectors::normalize_wk(const int& degspin) +void + K_Vectors::normalize_wk (const int& degspin) { - if (GlobalV::MY_RANK != 0) { - return; - } + if (GlobalV::MY_RANK != 0) + { + return; + } double sum = 0.0; for (int ik = 0; ik < nkstot; ik++) - { - sum += this->wk[ik]; - } + { + sum += this->wk[ik]; + } // If sum of weights is zero or very small, set equal weights if (sum < 1e-10) - { - ModuleBase::WARNING("K_Vectors::normalize_wk", - "Sum of k-point weights is zero or very small. " - "Setting equal weights for all k-points."); - for (int ik = 0; ik < nkstot; ik++) { - this->wk[ik] = 1.0 / double(nkstot); + ModuleBase::WARNING ("K_Vectors::normalize_wk", + "Sum of k-point weights is zero or very small. " + "Setting equal weights for all k-points."); + for (int ik = 0; ik < nkstot; ik++) + { + this->wk[ik] = 1.0 / double (nkstot); + } + sum = 1.0; } - sum = 1.0; - } for (int ik = 0; ik < nkstot; ik++) - { - this->wk[ik] /= sum; - } + { + this->wk[ik] /= sum; + } for (int ik = 0; ik < nkstot; ik++) - { - this->wk[ik] *= degspin; - } + { + this->wk[ik] *= degspin; + } return; } @@ -617,9 +640,10 @@ void K_Vectors::normalize_wk(const int& degspin) // This routine sets the k vectors for the up and down spin //---------------------------------------------------------- // from set_kup_and_kdw.f90 -void K_Vectors::set_kup_and_kdw() +void + K_Vectors::set_kup_and_kdw () { - ModuleBase::TITLE("K_Vectors", "setup_kup_and_kdw"); + ModuleBase::TITLE ("K_Vectors", "setup_kup_and_kdw"); //========================================================================= // on output: the number of points is doubled and xk and wk in the @@ -627,42 +651,42 @@ void K_Vectors::set_kup_and_kdw() // those in the second (nks/2) ones correspond to down spin //========================================================================= switch (nspin) - { - case 1: - - for (int ik = 0; ik < nks; ik++) { - this->isk[ik] = 0; - } + case 1: - break; + for (int ik = 0; ik < nks; ik++) + { + this->isk[ik] = 0; + } - case 2: + break; - for (int ik = 0; ik < nks; ik++) - { - this->kvec_c[ik + nks] = kvec_c[ik]; - this->kvec_d[ik + nks] = kvec_d[ik]; - this->wk[ik + nks] = wk[ik]; - this->isk[ik] = 0; - this->isk[ik + nks] = 1; - } + case 2: + + for (int ik = 0; ik < nks; ik++) + { + this->kvec_c[ik + nks] = kvec_c[ik]; + this->kvec_d[ik + nks] = kvec_d[ik]; + this->wk[ik + nks] = wk[ik]; + this->isk[ik] = 0; + this->isk[ik + nks] = 1; + } - this->nks *= 2; - this->nkstot *= 2; + this->nks *= 2; + this->nkstot *= 2; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nks(nspin=2)", nks); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nkstot(nspin=2)", nkstot); - break; - case 4: + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nks(nspin=2)", nks); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nkstot(nspin=2)", nkstot); + break; + case 4: - for (int ik = 0; ik < nks; ik++) - { - this->isk[ik] = 0; - } + for (int ik = 0; ik < nks; ik++) + { + this->isk[ik] = 0; + } - break; - } + break; + } return; } // end subroutine set_kup_and_kdw diff --git a/source/source_cell/klist.h b/source/source_cell/klist.h index 4960ac68463..ab8bf69eb59 100644 --- a/source/source_cell/klist.h +++ b/source/source_cell/klist.h @@ -10,9 +10,9 @@ class K_Vectors { -public: - std::vector> kvec_c; /// Cartesian coordinates of k points - std::vector> kvec_d; /// Direct coordinates of k points + public: + std::vector> kvec_c; /// Cartesian coordinates of k points + std::vector> kvec_d; /// Direct coordinates of k points std::vector> kvec_c_full; // Cartesian coordinates of full k mesh match with nkstot_full std::vector wk; /// wk, weight of k points @@ -20,24 +20,23 @@ class K_Vectors std::vector ngk; /// ngk, number of plane waves for each k point std::vector isk; /// distinguish spin up and down k points - int nmp[3]={0}; /// Number of Monhorst-Pack + int nmp[3] = {0}; /// Number of Monhorst-Pack std::vector kl_segids; /// index of kline segment - /// @brief equal k points to each ibz-kpont, corresponding to a certain symmetry operations. + /// @brief equal k points to each ibz-kpont, corresponding to a certain symmetry operations. /// dim: [iks_ibz][(isym, kvec_d)] std::vector>> kstars; bool kc_done = false; bool kd_done = false; - K_Vectors(){}; - ~K_Vectors(){}; - K_Vectors& operator=(const K_Vectors&) = default; - K_Vectors& operator=(K_Vectors&& rhs) = default; + K_Vectors () {}; + ~K_Vectors () {}; + K_Vectors& operator= (const K_Vectors&) = default; + K_Vectors& operator= (K_Vectors&& rhs) = default; Parallel_Kpoints para_k; ///< parallel for kpoints - /** * @brief Set up the k-points for the system. * @@ -57,75 +56,87 @@ class K_Vectors * it will output a warning and suggest possible solutions. * @note Only available for nspin = 1 or 2 or 4. */ - void set(const UnitCell& ucell, - const ModuleSymmetry::Symmetry& symm, - const std::string& k_file_name, - const int& nspin, - const ModuleBase::Matrix3& reciprocal_vec, - const ModuleBase::Matrix3& latvec, - std::ofstream& ofs); - - int get_nks() const + void set (const UnitCell& ucell, + const ModuleSymmetry::Symmetry& symm, + const std::string& k_file_name, + const int& nspin, + const ModuleBase::Matrix3& reciprocal_vec, + const ModuleBase::Matrix3& latvec, + std::ofstream& ofs); + + int + get_nks () const { return this->nks; } - int get_nkstot() const + int + get_nkstot () const { return this->nkstot; } - int get_nkstot_full() const + int + get_nkstot_full () const { return this->nkstot_full; } - double get_koffset(const int i) const + double + get_koffset (const int i) const { return this->koffset[i]; } - int get_k_nkstot() const + int + get_k_nkstot () const { return this->k_nkstot; } - int get_nspin() const + int + get_nspin () const { return this->nspin; } - std::string get_k_kword() const + std::string + get_k_kword () const { return this->k_kword; } - void set_nks(int value) + void + set_nks (int value) { this->nks = value; } - void set_nkstot(int value) + void + set_nkstot (int value) { this->nkstot = value; } - void set_nkstot_full(int value) + void + set_nkstot_full (int value) { this->nkstot_full = value; } - void set_nspin(int value) + void + set_nspin (int value) { this->nspin = value; } - bool get_is_mp() const + bool + get_is_mp () const { return is_mp; } - std::vector ik2iktot; ///<[nks] map ik to the global index of k points + std::vector ik2iktot; ///<[nks] map ik to the global index of k points std::vector ibz_index; ///< map k points (before symmetry reduction) to irreducible k-points /** @@ -145,9 +156,9 @@ class K_Vectors * updated, and the flag kc_done is set to false to indicate that the Cartesian coordinates of the k-points need to * be recalculated. */ - void update_use_ibz(const int& nkstot_ibz, - const std::vector>& kvec_d_ibz, - const std::vector& wk_ibz); + void update_use_ibz (const int& nkstot_ibz, + const std::vector>& kvec_d_ibz, + const std::vector& wk_ibz); private: int nks = 0; ///< number of symmetry-reduced k points in this pool(processor, up+dw) @@ -174,7 +185,7 @@ class K_Vectors * @note The memory recording lines are commented out. If you want to track the memory usage, * you can uncomment these lines. */ - void renew(const int& kpoint_number); + void renew (const int& kpoint_number); // step 1 : generate kpoints @@ -197,8 +208,8 @@ class K_Vectors * @note If the k-points type is Line mode and the symmetry flag is 1, it will quit with a warning. * @note If the number of k-points is greater than 100000, it will quit with a warning. */ - bool read_kpoints(const UnitCell& ucell, - const std::string& fn); // return 0: something wrong. + bool read_kpoints (const UnitCell& ucell, + const std::string& fn); // return 0: something wrong. /** * @brief Adds k-points linearly between special points. @@ -221,7 +232,7 @@ class K_Vectors * k-points. * @note The function checks that the size of the segment ID vector matches the total number of k-points. */ - void interpolate_k_between(std::ifstream& ifk, std::vector>& kvec); + void interpolate_k_between (std::ifstream& ifk, std::vector>& kvec); /** * @brief Generates k-points using the Monkhorst-Pack scheme. @@ -238,7 +249,7 @@ class K_Vectors * @note The function sets the weight of each k-point to be equal, so that the total weight of all k-points is 1. * @note The function sets the flag kd_done to true to indicate that the k-points have been generated. */ - void Monkhorst_Pack(const int* nmp_in, const double* koffset_in, const int tipo); + void Monkhorst_Pack (const int* nmp_in, const double* koffset_in, const int tipo); /** * @brief Calculates the coordinate of a k-point using the Monkhorst-Pack scheme. @@ -255,7 +266,7 @@ class K_Vectors * * @note The function assumes that the k-points are evenly distributed in the reciprocal space. */ - double Monkhorst_Pack_formula(const int& k_type, const double& offset, const int& n, const int& dim); + double Monkhorst_Pack_formula (const int& k_type, const double& offset, const int& n, const int& dim); // step 2 : set both kvec and kved; normalize weight @@ -278,9 +289,7 @@ class K_Vectors * @note The function first normalizes the weights so that their sum is 1, and then scales them by the degeneracy of * spin. */ - void normalize_wk(const int& degspin); - - + void normalize_wk (const int& degspin); // step 4 : *2 kpoints. @@ -303,15 +312,15 @@ class K_Vectors * @note The function also doubles the total number of k-points (nks and nkstot) for spin-polarized calculations. * @note The function prints the total number of k-points for spin-polarized calculations. */ - void set_kup_and_kdw(); + void set_kup_and_kdw (); /** * @brief Gets the global index of a k-point. * @return this->ik2iktot[ik] */ - void cal_ik_global(); + void cal_ik_global (); #ifdef __MPI - friend void KVectorUtils::kvec_mpi_k(K_Vectors& kvec); + friend void KVectorUtils::kvec_mpi_k (K_Vectors& kvec); #endif }; #endif // KVECT_H \ No newline at end of file diff --git a/source/source_cell/module_neighbor/sltk_atom.cpp b/source/source_cell/module_neighbor/sltk_atom.cpp index c6c847a9ddc..7a091563555 100644 --- a/source/source_cell/module_neighbor/sltk_atom.cpp +++ b/source/source_cell/module_neighbor/sltk_atom.cpp @@ -2,14 +2,14 @@ #include /*** Constructors and destructor ***/ -FAtom::FAtom() +FAtom::FAtom () { - x = 0.0; - y = 0.0; - z = 0.0; - type = 0; - natom = 0; - cell_x = 0; - cell_y = 0; - cell_z = 0; + x = 0.0; + y = 0.0; + z = 0.0; + type = 0; + natom = 0; + cell_x = 0; + cell_y = 0; + cell_z = 0; } diff --git a/source/source_cell/module_neighbor/sltk_atom.h b/source/source_cell/module_neighbor/sltk_atom.h index a21055f3f5c..371601b45b5 100644 --- a/source/source_cell/module_neighbor/sltk_atom.h +++ b/source/source_cell/module_neighbor/sltk_atom.h @@ -4,39 +4,42 @@ #include #include -// a class contains the atom position, +// a class contains the atom position, // the type and the index, class FAtom { -public: - double x; - double y; - double z; + public: + double x; + double y; + double z; - int type; - int natom; + int type; + int natom; - int cell_x; - int cell_y; - int cell_z; + int cell_x; + int cell_y; + int cell_z; - FAtom(); - FAtom(const double& x_in, const double& y_in, const double& z_in, - const int& type_in, const int& natom_in, - const int& cell_x_in, const int& cell_y_in, const int& cell_z_in) - { - x = x_in; - y = y_in; - z = z_in; - type = type_in; - natom = natom_in; - cell_x = cell_x_in; - cell_y = cell_y_in; - cell_z = cell_z_in; - } - ~FAtom() - { - } + FAtom (); + FAtom (const double& x_in, + const double& y_in, + const double& z_in, + const int& type_in, + const int& natom_in, + const int& cell_x_in, + const int& cell_y_in, + const int& cell_z_in) + { + x = x_in; + y = y_in; + z = z_in; + type = type_in; + natom = natom_in; + cell_x = cell_x_in; + cell_y = cell_y_in; + cell_z = cell_z_in; + } + ~FAtom () {} }; #endif diff --git a/source/source_cell/module_neighbor/sltk_atom_arrange.cpp b/source/source_cell/module_neighbor/sltk_atom_arrange.cpp index 71a47b9eddf..c5c6261417e 100644 --- a/source/source_cell/module_neighbor/sltk_atom_arrange.cpp +++ b/source/source_cell/module_neighbor/sltk_atom_arrange.cpp @@ -8,57 +8,55 @@ // update the followig class in near future #include "source_cell/unitcell.h" -atom_arrange::atom_arrange() -{ -} +atom_arrange::atom_arrange () {} -atom_arrange::~atom_arrange() -{ -} +atom_arrange::~atom_arrange () {} -double atom_arrange::set_sr_NL(std::ofstream& ofs_in, - const std::string& output_level, - const double& rcutmax_Phi, - const double& rcutmax_Beta, - const bool gamma_only_local) +double + atom_arrange::set_sr_NL (std::ofstream& ofs_in, + const std::string& output_level, + const double& rcutmax_Phi, + const double& rcutmax_Beta, + const bool gamma_only_local) { - ModuleBase::TITLE("atom_arrange", "set_sr_NL"); + ModuleBase::TITLE ("atom_arrange", "set_sr_NL"); // check in use_overlap_matrix, double sr = 0.0; if (gamma_only_local) - { - sr = 2 * rcutmax_Phi + 0.001; - } + { + sr = 2 * rcutmax_Phi + 0.001; + } else - { - sr = 2 * (rcutmax_Phi + rcutmax_Beta) + 0.001; // 0.001 is added to make safe. - // sr = 2 * longest_orb_rcut + 0.001; - } + { + sr = 2 * (rcutmax_Phi + rcutmax_Beta) + 0.001; // 0.001 is added to make safe. + // sr = 2 * longest_orb_rcut + 0.001; + } if (output_level != "m") // xiaohui add 'output_level', 2015-09-16 - { - ofs_in << "\n\n"; - ofs_in << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; - ofs_in << " | |" << std::endl; - ofs_in << " | #Search Adjacent Atoms# |" << std::endl; - ofs_in << " | Set the adjacent atoms for each atom and set the periodic boundary |" << std::endl; - ofs_in << " | condition for the atoms on real space FFT grid. For k-dependent |" << std::endl; - ofs_in << " | algorithm, we also need to set the sparse H and S matrix element |" << std::endl; - ofs_in << " | for each atom. |" << std::endl; - ofs_in << " | |" << std::endl; - ofs_in << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; - ofs_in << "\n"; - - ofs_in << " SETUP SEARCHING RADIUS" << std::endl; - ofs_in << std::setprecision(3); - ModuleBase::GlobalFunc::OUT(ofs_in, "Orbital max radius cutoff (Bohr)", rcutmax_Phi); - ModuleBase::GlobalFunc::OUT(ofs_in, "Nonlocal proj. max radius cutoff (Bohr)", rcutmax_Beta); - ModuleBase::GlobalFunc::OUT(ofs_in, "Search radius (Bohr)", sr); - } + { + ofs_in << "\n\n"; + ofs_in << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; + ofs_in << " | |" << std::endl; + ofs_in << " | #Search Adjacent Atoms# |" << std::endl; + ofs_in << " | Set the adjacent atoms for each atom and set the periodic boundary |" << std::endl; + ofs_in << " | condition for the atoms on real space FFT grid. For k-dependent |" << std::endl; + ofs_in << " | algorithm, we also need to set the sparse H and S matrix element |" << std::endl; + ofs_in << " | for each atom. |" << std::endl; + ofs_in << " | |" << std::endl; + ofs_in << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; + ofs_in << "\n"; + + ofs_in << " SETUP SEARCHING RADIUS" << std::endl; + ofs_in << std::setprecision (3); + ModuleBase::GlobalFunc::OUT (ofs_in, "Orbital max radius cutoff (Bohr)", rcutmax_Phi); + ModuleBase::GlobalFunc::OUT (ofs_in, "Nonlocal proj. max radius cutoff (Bohr)", rcutmax_Beta); + ModuleBase::GlobalFunc::OUT (ofs_in, "Search radius (Bohr)", sr); + } return sr; } -void atom_arrange::search(const bool pbc_flag, +void + atom_arrange::search (const bool pbc_flag, std::ofstream& ofs_in, Grid_Driver& grid_d, const UnitCell& ucell, @@ -66,69 +64,76 @@ void atom_arrange::search(const bool pbc_flag, const int& test_atom_in, const bool test_only) { - ModuleBase::TITLE("atom_arrange", "search"); - ModuleBase::timer::start("atom_arrange", "search"); + ModuleBase::TITLE ("atom_arrange", "search"); + ModuleBase::timer::start ("atom_arrange", "search"); if (search_radius_bohr < 0.0) - { - ModuleBase::WARNING_QUIT("atom_arrange::search", " search_radius_bohr < 0,forbidden"); - } + { + ModuleBase::WARNING_QUIT ("atom_arrange::search", " search_radius_bohr < 0,forbidden"); + } ofs_in << " SEARCH ADJACENT ATOMS" << std::endl; - ModuleBase::GlobalFunc::OUT(ofs_in, "searching radius (Bohr)", search_radius_bohr); -// ModuleBase::GlobalFunc::OUT(ofs_in, "searching radius unit is (Bohr)", ucell.lat0); + ModuleBase::GlobalFunc::OUT (ofs_in, "searching radius (Bohr)", search_radius_bohr); + // ModuleBase::GlobalFunc::OUT(ofs_in, "searching radius unit is (Bohr)", ucell.lat0); - assert(ucell.nat > 0); + assert (ucell.nat > 0); /* 2024-12-04 Zhang Haochong The neighboring atom search module has been completely rewritten. - The new algorithm places atoms into boxes with an edge length of twice the atomic radius. The neighboring + The new algorithm places atoms into boxes with an edge length of twice the atomic radius. The neighboring atom list stores the data using the atom's type and its index within that type. - By setting pbc_flag = false, periodic boundary conditions can be forcibly disabled. In this case, the search - process will not expand the supercell, and the neighboring atoms will only consider those within the original unit cell. + By setting pbc_flag = false, periodic boundary conditions can be forcibly disabled. In this case, the search + process will not expand the supercell, and the neighboring atoms will only consider those within the original unit + cell. */ const double radius_lat0unit = search_radius_bohr / ucell.lat0; // Atom_input at(ofs_in, ucell, pbc_flag, radius_lat0unit, test_atom_in); - grid_d.init(ofs_in, ucell, radius_lat0unit, pbc_flag); + grid_d.init (ofs_in, ucell, radius_lat0unit, pbc_flag); - // The screen output is very time-consuming. To avoid interfering with the timing, we will insert logging here earlier. - ModuleBase::timer::end("atom_arrange", "search"); + // The screen output is very time-consuming. To avoid interfering with the timing, we will insert logging here + // earlier. + ModuleBase::timer::end ("atom_arrange", "search"); if (test_only) - { - std::cout << "radius_lat0unit = " << radius_lat0unit << std::endl; - std::cout << "search_radius_bohr = " << search_radius_bohr << std::endl; - - ofs_in << " " << std::setw(5) << "Type" << std::setw(5) << "Atom" << std::setw(8) << "AdjNum" << std::endl; - std::cout << std::setw(8) << "Labels" << std::setw(15) << "tau.x" << std::setw(15) << "tau.y" << std::setw(15) - << "tau.z" << std::setw(8) << "box.x" << std::setw(8) << "box.y" << std::setw(8) << "box.z" - << std::endl; - for (int it = 0; it < ucell.ntype; it++) { - for (int ia = 0; ia < ucell.atoms[it].na; ia++) - { - grid_d.Find_atom(ucell, ucell.atoms[it].tau[ia], it, ia); - - ofs_in << " " << std::setw(5) << it << std::setw(5) << ia << std::setw(8) << grid_d.getAdjacentNum() + 1 - << std::endl; - std::cout << " adjacent atoms of " << ucell.atoms[it].label + std::to_string(ia + 1) << ":" << std::endl; - std::cout << "getAdjacentNum: " << grid_d.getAdjacentNum() + 1 << std::endl; - /* - for (int ad = 0; ad < grid_d.getAdjacentNum() + 1; ad++) + std::cout << "radius_lat0unit = " << radius_lat0unit << std::endl; + std::cout << "search_radius_bohr = " << search_radius_bohr << std::endl; + + ofs_in << " " << std::setw (5) << "Type" << std::setw (5) << "Atom" << std::setw (8) << "AdjNum" + << std::endl; + std::cout << std::setw (8) << "Labels" << std::setw (15) << "tau.x" << std::setw (15) << "tau.y" + << std::setw (15) << "tau.z" << std::setw (8) << "box.x" << std::setw (8) << "box.y" + << std::setw (8) << "box.z" << std::endl; + for (int it = 0; it < ucell.ntype; it++) { - ModuleBase::Vector3 tau = grid_d.getAdjacentTau(ad); - ModuleBase::Vector3 box = grid_d.getBox(ad); - std::cout << std::setw(8) << ucell.atoms[it].label + std::to_string(ia + 1) << std::setw(15) - << tau.x << " " << std::setw(15) << tau.y << " " << std::setw(15) << tau.z << " " - << std::setw(8) << box.x << std::setw(8) << box.y << std::setw(8) << box.z << std::endl; - }*/ - } + for (int ia = 0; ia < ucell.atoms[it].na; ia++) + { + grid_d.Find_atom (ucell, ucell.atoms[it].tau[ia], it, ia); + + ofs_in << " " << std::setw (5) << it << std::setw (5) << ia << std::setw (8) + << grid_d.getAdjacentNum () + 1 << std::endl; + std::cout << " adjacent atoms of " << ucell.atoms[it].label + std::to_string (ia + 1) << ":" + << std::endl; + std::cout << "getAdjacentNum: " << grid_d.getAdjacentNum () + 1 << std::endl; + /* + for (int ad = 0; ad < grid_d.getAdjacentNum() + 1; ad++) + { + ModuleBase::Vector3 tau = grid_d.getAdjacentTau(ad); + ModuleBase::Vector3 box = grid_d.getBox(ad); + std::cout << std::setw(8) << ucell.atoms[it].label + std::to_string(ia + 1) << + std::setw(15) + << tau.x << " " << std::setw(15) << tau.y << " " << std::setw(15) << tau.z << + " " + << std::setw(8) << box.x << std::setw(8) << box.y << std::setw(8) << box.z << + std::endl; + }*/ + } + } + ofs_in << "search neighboring atoms done." << std::endl; } - ofs_in << "search neighboring atoms done." << std::endl; - } return; } diff --git a/source/source_cell/module_neighbor/sltk_atom_arrange.h b/source/source_cell/module_neighbor/sltk_atom_arrange.h index 2fdd5fc469e..5e53ad6a2e5 100644 --- a/source/source_cell/module_neighbor/sltk_atom_arrange.h +++ b/source/source_cell/module_neighbor/sltk_atom_arrange.h @@ -3,30 +3,26 @@ #include "sltk_grid_driver.h" - class atom_arrange { -public: + public: + atom_arrange (); + ~atom_arrange (); - atom_arrange(); - ~atom_arrange(); - - static void search( - const bool flag, - std::ofstream &ofs, - Grid_Driver &grid_d, - const UnitCell &ucell, - const double& search_radius_bohr, - const int &test_atom_in, - const bool test_only = false); + static void search (const bool flag, + std::ofstream& ofs, + Grid_Driver& grid_d, + const UnitCell& ucell, + const double& search_radius_bohr, + const int& test_atom_in, + const bool test_only = false); - //caoyu modify 2021-05-24 - static double set_sr_NL( - std::ofstream &ofs_in, - const std::string &output_level, - const double& rcutmax_Phi, - const double& rcutmax_Beta, - const bool gamma_only_local); + // caoyu modify 2021-05-24 + static double set_sr_NL (std::ofstream& ofs_in, + const std::string& output_level, + const double& rcutmax_Phi, + const double& rcutmax_Beta, + const bool gamma_only_local); }; #endif diff --git a/source/source_cell/module_neighbor/sltk_grid.cpp b/source/source_cell/module_neighbor/sltk_grid.cpp index 9aad1a64e45..66272b29522 100644 --- a/source/source_cell/module_neighbor/sltk_grid.cpp +++ b/source/source_cell/module_neighbor/sltk_grid.cpp @@ -5,43 +5,40 @@ #include "source_base/memory.h" #include "source_base/timer.h" -Grid::Grid(const int& test_grid_in) : test_grid(test_grid_in) -{ -} +Grid::Grid (const int& test_grid_in) : test_grid (test_grid_in) {} -Grid::~Grid() -{ - this->clear_atoms(); -} +Grid::~Grid () { this->clear_atoms (); } -void Grid::init(std::ofstream& ofs_in, const UnitCell& ucell, const double radius_in, const bool boundary) +void + Grid::init (std::ofstream& ofs_in, const UnitCell& ucell, const double radius_in, const bool boundary) { - ModuleBase::TITLE("Grid", "init"); - ModuleBase::timer::start("Grid", "init"); + ModuleBase::TITLE ("Grid", "init"); + ModuleBase::timer::start ("Grid", "init"); this->pbc = boundary; this->sradius2 = radius_in * radius_in; this->sradius = radius_in; -// ModuleBase::GlobalFunc::OUT(ofs_in, "PeriodicBoundary", this->pbc); - ModuleBase::GlobalFunc::OUT(ofs_in, "Radius (unit: lattice constant)", sradius); + // ModuleBase::GlobalFunc::OUT(ofs_in, "PeriodicBoundary", this->pbc); + ModuleBase::GlobalFunc::OUT (ofs_in, "Radius (unit: lattice constant)", sradius); - this->Check_Expand_Condition(ucell); - ModuleBase::GlobalFunc::OUT(ofs_in, "Max number of cells", glayerX, glayerY, glayerZ); - ModuleBase::GlobalFunc::OUT(ofs_in, "Min number of cells", glayerX_minus, glayerY_minus, glayerZ_minus); + this->Check_Expand_Condition (ucell); + ModuleBase::GlobalFunc::OUT (ofs_in, "Max number of cells", glayerX, glayerY, glayerZ); + ModuleBase::GlobalFunc::OUT (ofs_in, "Min number of cells", glayerX_minus, glayerY_minus, glayerZ_minus); - this->setMemberVariables(ofs_in, ucell); - this->Construct_Adjacent(ucell); - ModuleBase::timer::end("Grid", "init"); + this->setMemberVariables (ofs_in, ucell); + this->Construct_Adjacent (ucell); + ModuleBase::timer::end ("Grid", "init"); } -void Grid::Check_Expand_Condition(const UnitCell& ucell) +void + Grid::Check_Expand_Condition (const UnitCell& ucell) { // ModuleBase::TITLE(GlobalV::ofs_running, "Atom_input", "Check_Expand_Condition"); if (!pbc) - { - return; - } + { + return; + } /*2016-07-19, LiuXh // the unit of extent_1DX,Y,Z is lat0. @@ -113,24 +110,24 @@ void Grid::Check_Expand_Condition(const UnitCell& ucell) double a23_1 = ucell.latvec.e22 * ucell.latvec.e33 - ucell.latvec.e23 * ucell.latvec.e32; double a23_2 = ucell.latvec.e21 * ucell.latvec.e33 - ucell.latvec.e23 * ucell.latvec.e31; double a23_3 = ucell.latvec.e21 * ucell.latvec.e32 - ucell.latvec.e22 * ucell.latvec.e31; - double a23_norm = sqrt(a23_1 * a23_1 + a23_2 * a23_2 + a23_3 * a23_3); + double a23_norm = sqrt (a23_1 * a23_1 + a23_2 * a23_2 + a23_3 * a23_3); double extend_v = a23_norm * sradius; double extend_d1 = extend_v / ucell.omega * ucell.lat0 * ucell.lat0 * ucell.lat0; - int extend_d11 = std::ceil(extend_d1); + int extend_d11 = std::ceil (extend_d1); double a31_1 = ucell.latvec.e32 * ucell.latvec.e13 - ucell.latvec.e33 * ucell.latvec.e12; double a31_2 = ucell.latvec.e31 * ucell.latvec.e13 - ucell.latvec.e33 * ucell.latvec.e11; double a31_3 = ucell.latvec.e31 * ucell.latvec.e12 - ucell.latvec.e32 * ucell.latvec.e11; - double a31_norm = sqrt(a31_1 * a31_1 + a31_2 * a31_2 + a31_3 * a31_3); + double a31_norm = sqrt (a31_1 * a31_1 + a31_2 * a31_2 + a31_3 * a31_3); double extend_d2 = a31_norm * sradius / ucell.omega * ucell.lat0 * ucell.lat0 * ucell.lat0; - int extend_d22 = std::ceil(extend_d2); + int extend_d22 = std::ceil (extend_d2); double a12_1 = ucell.latvec.e12 * ucell.latvec.e23 - ucell.latvec.e13 * ucell.latvec.e22; double a12_2 = ucell.latvec.e11 * ucell.latvec.e23 - ucell.latvec.e13 * ucell.latvec.e21; double a12_3 = ucell.latvec.e11 * ucell.latvec.e22 - ucell.latvec.e12 * ucell.latvec.e21; - double a12_norm = sqrt(a12_1 * a12_1 + a12_2 * a12_2 + a12_3 * a12_3); + double a12_norm = sqrt (a12_1 * a12_1 + a12_2 * a12_2 + a12_3 * a12_3); double extend_d3 = a12_norm * sradius / ucell.omega * ucell.lat0 * ucell.lat0 * ucell.lat0; - int extend_d33 = std::ceil(extend_d3); + int extend_d33 = std::ceil (extend_d3); // 2016-09-05, LiuXh glayerX = extend_d11 + 1; @@ -140,179 +137,188 @@ void Grid::Check_Expand_Condition(const UnitCell& ucell) glayerY_minus = extend_d22; glayerZ_minus = extend_d33; // End, 2016-09-05, LiuXh - } - -void Grid::setMemberVariables(std::ofstream& ofs_in, // output data to ofs +void + Grid::setMemberVariables (std::ofstream& ofs_in, // output data to ofs const UnitCell& ucell) { - ModuleBase::TITLE("SLTK_Grid", "setMemberVariables"); + ModuleBase::TITLE ("SLTK_Grid", "setMemberVariables"); - this->clear_atoms(); + this->clear_atoms (); // random selection, in order to estimate again. for (int it = 0; it < ucell.ntype; it++) - { - if (ucell.atoms[it].na > 0) { - this->x_min = ucell.atoms[it].tau[0].x; - this->y_min = ucell.atoms[it].tau[0].y; - this->z_min = ucell.atoms[it].tau[0].z; - this->x_max = ucell.atoms[it].tau[0].x; - this->y_max = ucell.atoms[it].tau[0].y; - this->z_max = ucell.atoms[it].tau[0].z; - break; + if (ucell.atoms[it].na > 0) + { + this->x_min = ucell.atoms[it].tau[0].x; + this->y_min = ucell.atoms[it].tau[0].y; + this->z_min = ucell.atoms[it].tau[0].z; + this->x_max = ucell.atoms[it].tau[0].x; + this->y_max = ucell.atoms[it].tau[0].y; + this->z_max = ucell.atoms[it].tau[0].z; + break; + } } - } - ModuleBase::Vector3 vec1(ucell.latvec.e11, ucell.latvec.e12, ucell.latvec.e13); - ModuleBase::Vector3 vec2(ucell.latvec.e21, ucell.latvec.e22, ucell.latvec.e23); - ModuleBase::Vector3 vec3(ucell.latvec.e31, ucell.latvec.e32, ucell.latvec.e33); + ModuleBase::Vector3 vec1 (ucell.latvec.e11, ucell.latvec.e12, ucell.latvec.e13); + ModuleBase::Vector3 vec2 (ucell.latvec.e21, ucell.latvec.e22, ucell.latvec.e23); + ModuleBase::Vector3 vec3 (ucell.latvec.e31, ucell.latvec.e32, ucell.latvec.e33); // calculate min & max value for (int ix = -glayerX_minus; ix < glayerX; ix++) - { - for (int iy = -glayerY_minus; iy < glayerY; iy++) { - for (int iz = -glayerZ_minus; iz < glayerZ; iz++) - { - for (int i = 0; i < ucell.ntype; i++) + for (int iy = -glayerY_minus; iy < glayerY; iy++) { - for (int j = 0; j < ucell.atoms[i].na; j++) - { - double x = ucell.atoms[i].tau[j].x + vec1[0] * ix + vec2[0] * iy + vec3[0] * iz; - double y = ucell.atoms[i].tau[j].y + vec1[1] * ix + vec2[1] * iy + vec3[1] * iz; - double z = ucell.atoms[i].tau[j].z + vec1[2] * ix + vec2[2] * iy + vec3[2] * iz; - x_min = std::min(x_min, x); - x_max = std::max(x_max, x); - y_min = std::min(y_min, y); - y_max = std::max(y_max, y); - z_min = std::min(z_min, z); - z_max = std::max(z_max, z); - } + for (int iz = -glayerZ_minus; iz < glayerZ; iz++) + { + for (int i = 0; i < ucell.ntype; i++) + { + for (int j = 0; j < ucell.atoms[i].na; j++) + { + double x + = ucell.atoms[i].tau[j].x + vec1[0] * ix + vec2[0] * iy + vec3[0] * iz; + double y + = ucell.atoms[i].tau[j].y + vec1[1] * ix + vec2[1] * iy + vec3[1] * iz; + double z + = ucell.atoms[i].tau[j].z + vec1[2] * ix + vec2[2] * iy + vec3[2] * iz; + x_min = std::min (x_min, x); + x_max = std::max (x_max, x); + y_min = std::min (y_min, y); + y_max = std::max (y_max, y); + z_min = std::min (z_min, z); + z_max = std::max (z_max, z); + } + } + } } - } } - } -// ofs_in << " RANGE OF ATOMIC COORDINATES (unit: lat0)" << std::endl; - ModuleBase::GlobalFunc::OUT(ofs_in, "Min coordinates of atoms", x_min, y_min, z_min); - ModuleBase::GlobalFunc::OUT(ofs_in, "Max coordinates of atoms", x_max, y_max, z_max); + // ofs_in << " RANGE OF ATOMIC COORDINATES (unit: lat0)" << std::endl; + ModuleBase::GlobalFunc::OUT (ofs_in, "Min coordinates of atoms", x_min, y_min, z_min); + ModuleBase::GlobalFunc::OUT (ofs_in, "Max coordinates of atoms", x_max, y_max, z_max); this->box_edge_length = sradius + 0.1; // To avoid edge cases, the size of the box is slightly increased. -/* warning box algorithm - this->box_nx = std::ceil((this->x_max - this->x_min) / box_edge_length) + 1; - this->box_ny = std::ceil((this->y_max - this->y_min) / box_edge_length) + 1; - this->box_nz = std::ceil((this->z_max - this->z_min) / box_edge_length) + 1; - ModuleBase::GlobalFunc::OUT(ofs_in, "BoxNumber", box_nx, box_ny, box_nz); + /* warning box algorithm + this->box_nx = std::ceil((this->x_max - this->x_min) / box_edge_length) + 1; + this->box_ny = std::ceil((this->y_max - this->y_min) / box_edge_length) + 1; + this->box_nz = std::ceil((this->z_max - this->z_min) / box_edge_length) + 1; + ModuleBase::GlobalFunc::OUT(ofs_in, "BoxNumber", box_nx, box_ny, box_nz); - atoms_in_box.resize(this->box_nx); - for (int i = 0; i < this->box_nx; i++) - { - atoms_in_box[i].resize(this->box_ny); - for (int j = 0; j < this->box_ny; j++) + atoms_in_box.resize(this->box_nx); + for (int i = 0; i < this->box_nx; i++) { - atoms_in_box[i][j].resize(this->box_nz); + atoms_in_box[i].resize(this->box_ny); + for (int j = 0; j < this->box_ny; j++) + { + atoms_in_box[i][j].resize(this->box_nz); + } } - } - */ + */ this->box_nx = glayerX + glayerX_minus; this->box_ny = glayerY + glayerY_minus; this->box_nz = glayerZ + glayerZ_minus; - ModuleBase::GlobalFunc::OUT(ofs_in, "Number of needed cells", box_nx, box_ny, box_nz); + ModuleBase::GlobalFunc::OUT (ofs_in, "Number of needed cells", box_nx, box_ny, box_nz); - atoms_in_box.resize(this->box_nx); + atoms_in_box.resize (this->box_nx); for (int i = 0; i < this->box_nx; i++) - { - atoms_in_box[i].resize(this->box_ny); - for (int j = 0; j < this->box_ny; j++) { - atoms_in_box[i][j].resize(this->box_nz); + atoms_in_box[i].resize (this->box_ny); + for (int j = 0; j < this->box_ny; j++) + { + atoms_in_box[i][j].resize (this->box_nz); + } } - } for (int ix = -glayerX_minus; ix < glayerX; ix++) - { - for (int iy = -glayerY_minus; iy < glayerY; iy++) { - for (int iz = -glayerZ_minus; iz < glayerZ; iz++) - { - for (int i = 0; i < ucell.ntype; i++) + for (int iy = -glayerY_minus; iy < glayerY; iy++) { - for (int j = 0; j < ucell.atoms[i].na; j++) - { - double x = ucell.atoms[i].tau[j].x + vec1[0] * ix + vec2[0] * iy + vec3[0] * iz; - double y = ucell.atoms[i].tau[j].y + vec1[1] * ix + vec2[1] * iy + vec3[1] * iz; - double z = ucell.atoms[i].tau[j].z + vec1[2] * ix + vec2[2] * iy + vec3[2] * iz; - FAtom atom(x, y, z, i, j, ix, iy, iz); - int box_i_x, box_i_y, box_i_z; - //this->getBox(box_i_x, box_i_y, box_i_z, x, y, z); - box_i_x = ix + glayerX_minus; - box_i_y = iy + glayerY_minus; - box_i_z = iz + glayerZ_minus; - this->atoms_in_box[box_i_x][box_i_y][box_i_z].push_back(atom); - } + for (int iz = -glayerZ_minus; iz < glayerZ; iz++) + { + for (int i = 0; i < ucell.ntype; i++) + { + for (int j = 0; j < ucell.atoms[i].na; j++) + { + double x + = ucell.atoms[i].tau[j].x + vec1[0] * ix + vec2[0] * iy + vec3[0] * iz; + double y + = ucell.atoms[i].tau[j].y + vec1[1] * ix + vec2[1] * iy + vec3[1] * iz; + double z + = ucell.atoms[i].tau[j].z + vec1[2] * ix + vec2[2] * iy + vec3[2] * iz; + FAtom atom (x, y, z, i, j, ix, iy, iz); + int box_i_x, box_i_y, box_i_z; + // this->getBox(box_i_x, box_i_y, box_i_z, x, y, z); + box_i_x = ix + glayerX_minus; + box_i_y = iy + glayerY_minus; + box_i_z = iz + glayerZ_minus; + this->atoms_in_box[box_i_x][box_i_y][box_i_z].push_back (atom); + } + } + } } - } } - } - - this->all_adj_info.resize(ucell.ntype); + + this->all_adj_info.resize (ucell.ntype); for (int i = 0; i < ucell.ntype; i++) - { - this->all_adj_info[i].resize(ucell.atoms[i].na); - } + { + this->all_adj_info[i].resize (ucell.atoms[i].na); + } } -void Grid::Construct_Adjacent(const UnitCell& ucell) +void + Grid::Construct_Adjacent (const UnitCell& ucell) { - ModuleBase::timer::start("Grid", "constru_adj"); + ModuleBase::timer::start ("Grid", "constru_adj"); - for (int i_type = 0; i_type < ucell.ntype; i_type++) - { - for (int j_atom = 0; j_atom < ucell.atoms[i_type].na; j_atom++) + for (int i_type = 0; i_type < ucell.ntype; i_type++) { + for (int j_atom = 0; j_atom < ucell.atoms[i_type].na; j_atom++) + { - FAtom atom(ucell.atoms[i_type].tau[j_atom].x, - ucell.atoms[i_type].tau[j_atom].y, - ucell.atoms[i_type].tau[j_atom].z, - i_type, - j_atom, - 0, 0 ,0); + FAtom atom (ucell.atoms[i_type].tau[j_atom].x, + ucell.atoms[i_type].tau[j_atom].y, + ucell.atoms[i_type].tau[j_atom].z, + i_type, + j_atom, + 0, + 0, + 0); - this->Construct_Adjacent_near_box(atom); + this->Construct_Adjacent_near_box (atom); + } } - } - ModuleBase::timer::end("Grid", "constru_adj"); + ModuleBase::timer::end ("Grid", "constru_adj"); } -void Grid::Construct_Adjacent_near_box(const FAtom& fatom) +void + Grid::Construct_Adjacent_near_box (const FAtom& fatom) { - ModuleBase::timer::start("Grid", "adj_near_box"); - int box_i_x=0; - int box_i_y=0; - int box_i_z=0; - this->getBox(box_i_x, box_i_y, box_i_z, fatom.x, fatom.y, fatom.z); + ModuleBase::timer::start ("Grid", "adj_near_box"); + int box_i_x = 0; + int box_i_y = 0; + int box_i_z = 0; + this->getBox (box_i_x, box_i_y, box_i_z, fatom.x, fatom.y, fatom.z); for (int box_i_x_adj = 0; box_i_x_adj < glayerX + glayerX_minus; box_i_x_adj++) - { - for (int box_i_y_adj = 0; box_i_y_adj < glayerY + glayerY_minus; box_i_y_adj++) { - for (int box_i_z_adj = 0; box_i_z_adj < glayerZ + glayerZ_minus; box_i_z_adj++) - { - for (auto &fatom2 : this->atoms_in_box[box_i_x_adj][box_i_y_adj][box_i_z_adj]) + for (int box_i_y_adj = 0; box_i_y_adj < glayerY + glayerY_minus; box_i_y_adj++) { - this->Construct_Adjacent_final(fatom, &fatom2); + for (int box_i_z_adj = 0; box_i_z_adj < glayerZ + glayerZ_minus; box_i_z_adj++) + { + for (auto& fatom2: this->atoms_in_box[box_i_x_adj][box_i_y_adj][box_i_z_adj]) + { + this->Construct_Adjacent_final (fatom, &fatom2); + } + } } - } } - } - ModuleBase::timer::end("Grid", "adj_near_box"); + ModuleBase::timer::end ("Grid", "adj_near_box"); } -void Grid::Construct_Adjacent_final(const FAtom& fatom1, - FAtom* fatom2) +void + Grid::Construct_Adjacent_final (const FAtom& fatom1, FAtom* fatom2) { double delta_x = fatom1.x - fatom2->x; double delta_y = fatom1.y - fatom2->y; @@ -320,14 +326,13 @@ void Grid::Construct_Adjacent_final(const FAtom& fatom1, double dr = delta_x * delta_x + delta_y * delta_y + delta_z * delta_z; - // 20241204 zhanghaochong // dr == 0 means the same atom // the atom itself is neighbour atom, but the order itself must on last in the list. // so we will add itself on find atom function, and skip here. // I dont know why, but if we add self here, test 701_LJ_MD_Anderson will assert if (dr != 0.0 && dr <= this->sradius2) - { - all_adj_info[fatom1.type][fatom1.natom].push_back(fatom2); - } + { + all_adj_info[fatom1.type][fatom1.natom].push_back (fatom2); + } } diff --git a/source/source_cell/module_neighbor/sltk_grid.h b/source/source_cell/module_neighbor/sltk_grid.h index 4b2da582800..1351bc316d0 100644 --- a/source/source_cell/module_neighbor/sltk_grid.h +++ b/source/source_cell/module_neighbor/sltk_grid.h @@ -16,99 +16,109 @@ class Grid public: // Constructors and destructor // Grid is Global class,so init it with constant number - Grid() : test_grid(0){}; - Grid(const int& test_grid_in); - virtual ~Grid(); + Grid () : test_grid (0) {}; + Grid (const int& test_grid_in); + virtual ~Grid (); - Grid& operator=(Grid&&) = default; + Grid& operator= (Grid&&) = default; - void init(std::ofstream& ofs, const UnitCell& ucell, const double radius_in, const bool boundary = true); + void init (std::ofstream& ofs, const UnitCell& ucell, const double radius_in, const bool boundary = true); // Data - bool pbc=false; // When pbc is set to false, periodic boundary conditions are explicitly ignored. - double sradius2=0.0; // searching radius squared (unit:lat0) - double sradius=0.0; // searching radius (unit:lat0) - + bool pbc = false; // When pbc is set to false, periodic boundary conditions are explicitly ignored. + double sradius2 = 0.0; // searching radius squared (unit:lat0) + double sradius = 0.0; // searching radius (unit:lat0) + // coordinate range of the input atom (unit:lat0) - double x_min=0.0; - double y_min=0.0; - double z_min=0.0; - double x_max=0.0; - double y_max=0.0; - double z_max=0.0; - - // The algorithm for searching neighboring atoms uses a "box" partitioning method. + double x_min = 0.0; + double y_min = 0.0; + double z_min = 0.0; + double x_max = 0.0; + double y_max = 0.0; + double z_max = 0.0; + + // The algorithm for searching neighboring atoms uses a "box" partitioning method. // Each box has an edge length of sradius, and the number of boxes in each direction is recorded here. - double box_edge_length=0.0; - int box_nx=0; - int box_ny=0; - int box_nz=0; + double box_edge_length = 0.0; + int box_nx = 0; + int box_ny = 0; + int box_nz = 0; - void getBox(int& bx, int& by, int& bz, const double& x, const double& y, const double& z) + void + getBox (int& bx, int& by, int& bz, const double& x, const double& y, const double& z) { - bx = std::floor((x - x_min) / box_edge_length); - by = std::floor((y - y_min) / box_edge_length); - bz = std::floor((z - z_min) / box_edge_length); + bx = std::floor ((x - x_min) / box_edge_length); + by = std::floor ((y - y_min) / box_edge_length); + bz = std::floor ((z - z_min) / box_edge_length); } // Stores the atoms after box partitioning. std::vector>> atoms_in_box; // Stores the adjacent information of atoms. [ntype][natom][adj list] - std::vector >> all_adj_info; - void clear_atoms() + std::vector>> all_adj_info; + void + clear_atoms () { // we have to clear the all_adj_info // because the pointers point to the memory in vector atoms_in_box - all_adj_info.clear(); + all_adj_info.clear (); - atoms_in_box.clear(); + atoms_in_box.clear (); } - void clear_adj_info() + void + clear_adj_info () { - // here dont need to free the memory, + // here dont need to free the memory, // because the pointers point to the memory in vector atoms_in_box - all_adj_info.clear(); + all_adj_info.clear (); } - int getGlayerX() const + int + getGlayerX () const { return glayerX; } - int getGlayerY() const + int + getGlayerY () const { return glayerY; } - int getGlayerZ() const + int + getGlayerZ () const { return glayerZ; } - int getGlayerX_minus() const + int + getGlayerX_minus () const { return glayerX_minus; } - int getGlayerY_minus() const + int + getGlayerY_minus () const { return glayerY_minus; } - int getGlayerZ_minus() const + int + getGlayerZ_minus () const { return glayerZ_minus; } + private: int test_grid; - void setMemberVariables(std::ofstream& ofs_in, const UnitCell& ucell); + void setMemberVariables (std::ofstream& ofs_in, const UnitCell& ucell); - void Construct_Adjacent(const UnitCell& ucell); - void Construct_Adjacent_near_box(const FAtom& fatom); - void Construct_Adjacent_final(const FAtom& fatom1, FAtom* fatom2); + void Construct_Adjacent (const UnitCell& ucell); + void Construct_Adjacent_near_box (const FAtom& fatom); + void Construct_Adjacent_final (const FAtom& fatom1, FAtom* fatom2); - void Check_Expand_Condition(const UnitCell& ucell); - int glayerX=0; - int glayerX_minus=0; - int glayerY=0; - int glayerY_minus=0; - int glayerZ=0; - int glayerZ_minus=0; + void Check_Expand_Condition (const UnitCell& ucell); + int glayerX = 0; + int glayerX_minus = 0; + int glayerY = 0; + int glayerY_minus = 0; + int glayerZ = 0; + int glayerZ_minus = 0; }; #endif diff --git a/source/source_cell/module_neighbor/sltk_grid_driver.cpp b/source/source_cell/module_neighbor/sltk_grid_driver.cpp index 7986a37eebf..b846182fa83 100644 --- a/source/source_cell/module_neighbor/sltk_grid_driver.cpp +++ b/source/source_cell/module_neighbor/sltk_grid_driver.cpp @@ -9,73 +9,71 @@ #include #endif -Grid_Driver::Grid_Driver( - const int &test_d_in, - const int &test_grid_in) -:test_deconstructor(test_d_in), -Grid(test_grid_in) +Grid_Driver::Grid_Driver (const int& test_d_in, const int& test_grid_in) + : test_deconstructor (test_d_in), Grid (test_grid_in) { - test_deconstructor = test_d_in; + test_deconstructor = test_d_in; } -Grid_Driver::~Grid_Driver() -{ -} +Grid_Driver::~Grid_Driver () {} -void Grid_Driver::Find_atom(const UnitCell& ucell, - const int ntype, - const int nnumber, - AdjacentAtomInfo* adjs) const +void + Grid_Driver::Find_atom (const UnitCell& ucell, const int ntype, const int nnumber, AdjacentAtomInfo* adjs) const { - ModuleBase::timer::start("Grid_Driver", "Find_atom"); + ModuleBase::timer::start ("Grid_Driver", "Find_atom"); // std::cout << "lenght in Find atom = " << atomlink[offset].fatom.getAdjacentSet()->getLength() << std::endl; // store result in member adj_info when parameter adjs is NULL AdjacentAtomInfo* local_adjs = adjs == nullptr ? &this->adj_info : adjs; - local_adjs->clear(); + local_adjs->clear (); const std::vector& all_atom = all_adj_info[ntype][nnumber]; for (const FAtom* atom: all_atom) - { - local_adjs->ntype.push_back(atom->type); - local_adjs->natom.push_back(atom->natom); - local_adjs->box.push_back(ModuleBase::Vector3(atom->cell_x, atom->cell_y, atom->cell_z)); - local_adjs->adjacent_tau.push_back(ModuleBase::Vector3(atom->x, atom->y, atom->z)); - local_adjs->adj_num++; - } + { + local_adjs->ntype.push_back (atom->type); + local_adjs->natom.push_back (atom->natom); + local_adjs->box.push_back (ModuleBase::Vector3 (atom->cell_x, atom->cell_y, atom->cell_z)); + local_adjs->adjacent_tau.push_back (ModuleBase::Vector3 (atom->x, atom->y, atom->z)); + local_adjs->adj_num++; + } // 20241204 zhanghaochong // for some unknown reason, the last neighbour atom must be it self // is self must in last, the order cannot be changed. // if self not in last, test 701_LJ_MD_Anderson will assert - local_adjs->ntype.push_back(ntype); - local_adjs->natom.push_back(nnumber); - local_adjs->box.push_back(ModuleBase::Vector3(0, 0, 0)); - local_adjs->adjacent_tau.push_back(ModuleBase::Vector3(ucell.atoms[ntype].tau[nnumber].x, ucell.atoms[ntype].tau[nnumber].y, ucell.atoms[ntype].tau[nnumber].z)); - ModuleBase::timer::end("Grid_Driver", "Find_atom"); + local_adjs->ntype.push_back (ntype); + local_adjs->natom.push_back (nnumber); + local_adjs->box.push_back (ModuleBase::Vector3 (0, 0, 0)); + local_adjs->adjacent_tau.push_back (ModuleBase::Vector3 (ucell.atoms[ntype].tau[nnumber].x, + ucell.atoms[ntype].tau[nnumber].y, + ucell.atoms[ntype].tau[nnumber].z)); + ModuleBase::timer::end ("Grid_Driver", "Find_atom"); return; } -void Grid_Driver::Find_atom(const UnitCell& ucell, - const ModuleBase::Vector3& cartesian_posi, - const int& ntype, - const int& nnumber, - AdjacentAtomInfo* adjs) const +void + Grid_Driver::Find_atom (const UnitCell& ucell, + const ModuleBase::Vector3& cartesian_posi, + const int& ntype, + const int& nnumber, + AdjacentAtomInfo* adjs) const { - this->Find_atom(ucell, ntype, nnumber, adjs); + this->Find_atom (ucell, ntype, nnumber, adjs); } // filter_adjs delete not adjacent atoms in adjs -void filter_adjs(const std::vector& is_adj, AdjacentAtomInfo& adjs) +void + filter_adjs (const std::vector& is_adj, AdjacentAtomInfo& adjs) { const int size = adjs.adj_num + 1; for (int i = size - 1; i >= 0; --i) - { - if (!is_adj[i]) { - adjs.adj_num--; - adjs.ntype.erase(adjs.ntype.begin() + i); - adjs.natom.erase(adjs.natom.begin() + i); - adjs.adjacent_tau.erase(adjs.adjacent_tau.begin() + i); // info of adjacent_tau is not used in future - adjs.box.erase(adjs.box.begin() + i); + if (!is_adj[i]) + { + adjs.adj_num--; + adjs.ntype.erase (adjs.ntype.begin () + i); + adjs.natom.erase (adjs.natom.begin () + i); + adjs.adjacent_tau.erase (adjs.adjacent_tau.begin () + + i); // info of adjacent_tau is not used in future + adjs.box.erase (adjs.box.begin () + i); + } } - } } diff --git a/source/source_cell/module_neighbor/sltk_grid_driver.h b/source/source_cell/module_neighbor/sltk_grid_driver.h index 95db2d85c29..1a221966046 100644 --- a/source/source_cell/module_neighbor/sltk_grid_driver.h +++ b/source/source_cell/module_neighbor/sltk_grid_driver.h @@ -16,25 +16,24 @@ class AdjacentAtomInfo { public: - AdjacentAtomInfo() : adj_num(0) - { - } + AdjacentAtomInfo () : adj_num (0) {} int adj_num; std::vector ntype; std::vector natom; std::vector> adjacent_tau; std::vector> box; - void clear() + void + clear () { adj_num = 0; - ntype.clear(); - natom.clear(); - adjacent_tau.clear(); - box.clear(); + ntype.clear (); + natom.clear (); + adjacent_tau.clear (); + box.clear (); } }; -void filter_adjs(const std::vector& is_adj, AdjacentAtomInfo& adjs); +void filter_adjs (const std::vector& is_adj, AdjacentAtomInfo& adjs); class Grid_Driver : public Grid { @@ -46,12 +45,12 @@ class Grid_Driver : public Grid // adjacent of this atom,and store the information // in 'adj_num','ntype','natom' //========================================================== - Grid_Driver(){ test_deconstructor = false; }; - Grid_Driver(const int& test_d_in, const int& test_grid_in); + Grid_Driver () { test_deconstructor = false; }; + Grid_Driver (const int& test_d_in, const int& test_grid_in); - ~Grid_Driver(); + ~Grid_Driver (); - Grid_Driver& operator=(Grid_Driver&&) = default; + Grid_Driver& operator= (Grid_Driver&&) = default; //========================================================== // EXPLAIN FOR default parameter `adjs = nullptr` @@ -62,18 +61,15 @@ class Grid_Driver : public Grid // 2. And store results into parameter adjs when adjs is // NOT NULL //========================================================== - void Find_atom(const UnitCell& ucell, - const int ntype, - const int nnumber, - AdjacentAtomInfo* adjs = nullptr) const; + void Find_atom (const UnitCell& ucell, const int ntype, const int nnumber, AdjacentAtomInfo* adjs = nullptr) const; // cartesian_posi and ucell is deprecated 20241204 zhanghaochong // this interface is deprecated, please use Find_atom above - void Find_atom(const UnitCell& ucell, - const ModuleBase::Vector3& cartesian_posi, - const int& ntype, - const int& nnumber, - AdjacentAtomInfo* adjs = nullptr) const; + void Find_atom (const UnitCell& ucell, + const ModuleBase::Vector3& cartesian_posi, + const int& ntype, + const int& nnumber, + AdjacentAtomInfo* adjs = nullptr) const; //========================================================== // EXPLAIN : The adjacent information for the input // cartesian_pos @@ -83,23 +79,28 @@ class Grid_Driver : public Grid // NAME : getNatom // NAME : getAdjaentTau //========================================================== - const int& getAdjacentNum() const + const int& + getAdjacentNum () const { return adj_info.adj_num; } - const int& getType(const int i) const + const int& + getType (const int i) const { return adj_info.ntype[i]; } - const int& getNatom(const int i) const + const int& + getNatom (const int i) const { return adj_info.natom[i]; } - const ModuleBase::Vector3& getAdjacentTau(const int i) const + const ModuleBase::Vector3& + getAdjacentTau (const int i) const { return adj_info.adjacent_tau[i]; } - const ModuleBase::Vector3& getBox(const int i) const + const ModuleBase::Vector3& + getBox (const int i) const { return adj_info.box[i]; } diff --git a/source/source_cell/module_neighbor/sltk_util.h b/source/source_cell/module_neighbor/sltk_util.h index 0e783899c8b..f30a629af2d 100644 --- a/source/source_cell/module_neighbor/sltk_util.h +++ b/source/source_cell/module_neighbor/sltk_util.h @@ -5,22 +5,47 @@ /*** Data ***/ -static struct { template operator T*() const { return static_cast(0); } } NullPtr; +static struct +{ + template + operator T*() const + { + return static_cast (0); + } +} NullPtr; /*** Function ***/ -template -static inline void affirm(const bool b) - -{ if (!b) throw exception(); } - -template -static inline void affirm(const bool b, const char* const message) - -{ if (!b) throw exception(message); } - -template -static inline void affirm(const bool b, const std::string& message) - -{ if (!b) throw exception(message); } +template +static inline void + affirm (const bool b) + +{ + if (!b) + { + throw exception (); + } +} + +template +static inline void + affirm (const bool b, const char* const message) + +{ + if (!b) + { + throw exception (message); + } +} + +template +static inline void + affirm (const bool b, const std::string& message) + +{ + if (!b) + { + throw exception (message); + } +} #endif diff --git a/source/source_cell/module_neighbor/test/prepare_unitcell.h b/source/source_cell/module_neighbor/test/prepare_unitcell.h index 11bd94ba7bd..8456c78f90b 100644 --- a/source/source_cell/module_neighbor/test/prepare_unitcell.h +++ b/source/source_cell/module_neighbor/test/prepare_unitcell.h @@ -1,328 +1,306 @@ #ifndef PREPARE_UNITCELL_H #define PREPARE_UNITCELL_H -#include -#include +#include +#include #include "source_base/mathzone.h" class UcellTestPrepare { -public: - UcellTestPrepare()=default; - UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in); - UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in, - std::valarray mbl_in, - std::valarray velocity_in); - UcellTestPrepare(const UcellTestPrepare &utp); + public: + UcellTestPrepare () = default; + UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in); + UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in, + std::valarray mbl_in, + std::valarray velocity_in); + UcellTestPrepare (const UcellTestPrepare& utp); - std::string latname; - int lmaxmax; - bool init_vel; - bool selective_dynamics; - bool relax_new; - std::string fixed_axes; - double lat0; - std::valarray latvec; - std::vector elements; - std::vector pp_files; - std::vector pp_types; - std::vector orb_files; - std::valarray natom; - std::vector atomic_mass; - std::string coor_type; - std::valarray coordinates; - std::valarray mbl; - std::valarray velocity; - // ntype - int ntype; - int atomic_index; + std::string latname; + int lmaxmax; + bool init_vel; + bool selective_dynamics; + bool relax_new; + std::string fixed_axes; + double lat0; + std::valarray latvec; + std::vector elements; + std::vector pp_files; + std::vector pp_types; + std::vector orb_files; + std::valarray natom; + std::vector atomic_mass; + std::string coor_type; + std::valarray coordinates; + std::valarray mbl; + std::valarray velocity; + // ntype + int ntype; + int atomic_index; - UnitCell* SetUcellInfo() - { - //basic info - this->ntype = this->elements.size(); - UnitCell* ucell = new UnitCell; - ucell->setup(this->latname, - this->ntype, - this->lmaxmax, - this->init_vel, - this->fixed_axes); - - delete[] ucell->magnet.start_mag; //mag set here - ucell->atom_label.resize(ucell->ntype); - ucell->atom_mass.resize(ucell->ntype); - ucell->pseudo_fn.resize(ucell->ntype); - ucell->pseudo_type.resize(ucell->ntype); - ucell->orbital_fn.resize(ucell->ntype); - ucell->magnet.start_mag = new double[ucell->ntype]; //mag set here - ucell->magnet.ux_[0] = 0.0; // ux_ set here - ucell->magnet.ux_[1] = 0.0; - ucell->magnet.ux_[2] = 0.0; - for(int it=0;itntype;++it) - { - ucell->atom_label[it] = this->elements[it]; - ucell->atom_mass[it] = this->atomic_mass[it]; - ucell->pseudo_fn[it] = this->pp_files[it]; - ucell->pseudo_type[it] = this->pp_types[it]; - ucell->orbital_fn[it] = this->orb_files[it]; - ucell->magnet.start_mag[it] = 0.0; //mag set here - } - //lattice info - ucell->lat0 = this->lat0; - ucell->lat0_angstrom = ucell->lat0 * ModuleBase::BOHR_TO_A; - ucell->tpiba = ModuleBase::TWO_PI/ucell->lat0; - ucell->tpiba2 = ucell->tpiba * ucell->tpiba; - ucell->latvec.e11 = this->latvec[0]; - ucell->latvec.e12 = this->latvec[1]; - ucell->latvec.e13 = this->latvec[2]; - ucell->latvec.e21 = this->latvec[3]; - ucell->latvec.e22 = this->latvec[4]; - ucell->latvec.e23 = this->latvec[5]; - ucell->latvec.e31 = this->latvec[6]; - ucell->latvec.e32 = this->latvec[7]; - ucell->latvec.e33 = this->latvec[8]; - ucell->a1.x = ucell->latvec.e11; - ucell->a1.y = ucell->latvec.e12; - ucell->a1.z = ucell->latvec.e13; - ucell->a2.x = ucell->latvec.e21; - ucell->a2.y = ucell->latvec.e22; - ucell->a2.z = ucell->latvec.e23; - ucell->a3.x = ucell->latvec.e31; - ucell->a3.y = ucell->latvec.e32; - ucell->a3.z = ucell->latvec.e33; - ucell->GT = ucell->latvec.Inverse(); - ucell->G = ucell->GT.Transpose(); - ucell->GGT = ucell->G*ucell->GT; - ucell->invGGT = ucell->GGT.Inverse(); - ucell->omega = std::abs(ucell->latvec.Det())*(ucell->lat0)*(ucell->lat0)*(ucell->lat0); - //atomic info - ucell->Coordinate = this->coor_type; - ucell->atoms = new Atom[ucell->ntype]; - ucell->set_atom_flag = true; - this->atomic_index = 0; - for(int it=0;itntype;++it) - { - ucell->atoms[it].label = this->elements[it]; - ucell->atoms[it].nw = 0; - ucell->atoms[it].nwl = 2; - ucell->atoms[it].l_nchi.resize(ucell->atoms[it].nwl+1); - for(int L=0; Latoms[it].nwl+1; L++) - { - ucell->atoms[it].l_nchi[L] = 1; - ucell->atoms[it].nw += (2*L + 1) * ucell->atoms[it].l_nchi[L]; - } - ucell->atoms[it].na = this->natom[it]; - //coordinates and related physical quantities - ucell->atoms[it].tau.resize(ucell->atoms[it].na); - ucell->atoms[it].dis.resize(ucell->atoms[it].na); - ucell->atoms[it].taud.resize(ucell->atoms[it].na); - ucell->atoms[it].vel.resize(ucell->atoms[it].na); - ucell->atoms[it].mag.resize(ucell->atoms[it].na); - ucell->atoms[it].angle1.resize(ucell->atoms[it].na); - ucell->atoms[it].angle2.resize(ucell->atoms[it].na); - ucell->atoms[it].m_loc_.resize(ucell->atoms[it].na); - ucell->atoms[it].mbl.resize(ucell->atoms[it].na); - ucell->atoms[it].mass = ucell->atom_mass[it]; // mass set here + UnitCell* + SetUcellInfo () + { + // basic info + this->ntype = this->elements.size (); + UnitCell* ucell = new UnitCell; + ucell->setup (this->latname, this->ntype, this->lmaxmax, this->init_vel, this->fixed_axes); - for(int ia=0; iaatoms[it].na; ++ia) - { - if (ucell->Coordinate == "Direct") - { - ucell->atoms[it].taud[ia].x = this->coordinates[this->atomic_index*3+0]; - ucell->atoms[it].taud[ia].y = this->coordinates[this->atomic_index*3+1]; - ucell->atoms[it].taud[ia].z = this->coordinates[this->atomic_index*3+2]; - ucell->atoms[it].tau[ia] = ucell->atoms[it].taud[ia]*ucell->latvec; - } - else if (ucell->Coordinate == "Cartesian") - { - ucell->atoms[it].tau[ia].x = this->coordinates[this->atomic_index*3+0]; - ucell->atoms[it].tau[ia].y = this->coordinates[this->atomic_index*3+1]; - ucell->atoms[it].tau[ia].z = this->coordinates[this->atomic_index*3+2]; - ModuleBase::Mathzone::Cartesian_to_Direct( - ucell->atoms[it].tau[ia].x, ucell->atoms[it].tau[ia].y, ucell->atoms[it].tau[ia].z, - ucell->latvec.e11, ucell->latvec.e12, ucell->latvec.e13, - ucell->latvec.e21, ucell->latvec.e22, ucell->latvec.e23, - ucell->latvec.e31, ucell->latvec.e32, ucell->latvec.e33, - ucell->atoms[it].taud[ia].x, ucell->atoms[it].taud[ia].y, ucell->atoms[it].taud[ia].z); - } - ucell->atoms[it].dis[ia].set(0, 0, 0); - if(this->init_vel) - { - ucell->atoms[it].vel[ia].x = this->velocity[this->atomic_index*3+0]; - ucell->atoms[it].vel[ia].y = this->velocity[this->atomic_index*3+1]; - ucell->atoms[it].vel[ia].z = this->velocity[this->atomic_index*3+2]; - } - else - { - ucell->atoms[it].vel[ia].set(0,0,0); - } - ucell->atoms[it].m_loc_[ia].set(0,0,0); - ucell->atoms[it].angle1[ia] = 0; - ucell->atoms[it].angle2[ia] = 0; - if(this->selective_dynamics) - { - ucell->atoms[it].mbl[ia].x = this->mbl[this->atomic_index*3+0]; - ucell->atoms[it].mbl[ia].y = this->mbl[this->atomic_index*3+1]; - ucell->atoms[it].mbl[ia].z = this->mbl[this->atomic_index*3+2]; - } - else - { - ucell->atoms[it].mbl[ia] = {1,1,1}; - } - ++(this->atomic_index); - } - } - ucell->nat = this->natom.sum(); - return ucell; - } + delete[] ucell->magnet.start_mag; // mag set here + ucell->atom_label.resize (ucell->ntype); + ucell->atom_mass.resize (ucell->ntype); + ucell->pseudo_fn.resize (ucell->ntype); + ucell->pseudo_type.resize (ucell->ntype); + ucell->orbital_fn.resize (ucell->ntype); + ucell->magnet.start_mag = new double[ucell->ntype]; // mag set here + ucell->magnet.ux_[0] = 0.0; // ux_ set here + ucell->magnet.ux_[1] = 0.0; + ucell->magnet.ux_[2] = 0.0; + for (int it = 0; it < ucell->ntype; ++it) + { + ucell->atom_label[it] = this->elements[it]; + ucell->atom_mass[it] = this->atomic_mass[it]; + ucell->pseudo_fn[it] = this->pp_files[it]; + ucell->pseudo_type[it] = this->pp_types[it]; + ucell->orbital_fn[it] = this->orb_files[it]; + ucell->magnet.start_mag[it] = 0.0; // mag set here + } + // lattice info + ucell->lat0 = this->lat0; + ucell->lat0_angstrom = ucell->lat0 * ModuleBase::BOHR_TO_A; + ucell->tpiba = ModuleBase::TWO_PI / ucell->lat0; + ucell->tpiba2 = ucell->tpiba * ucell->tpiba; + ucell->latvec.e11 = this->latvec[0]; + ucell->latvec.e12 = this->latvec[1]; + ucell->latvec.e13 = this->latvec[2]; + ucell->latvec.e21 = this->latvec[3]; + ucell->latvec.e22 = this->latvec[4]; + ucell->latvec.e23 = this->latvec[5]; + ucell->latvec.e31 = this->latvec[6]; + ucell->latvec.e32 = this->latvec[7]; + ucell->latvec.e33 = this->latvec[8]; + ucell->a1.x = ucell->latvec.e11; + ucell->a1.y = ucell->latvec.e12; + ucell->a1.z = ucell->latvec.e13; + ucell->a2.x = ucell->latvec.e21; + ucell->a2.y = ucell->latvec.e22; + ucell->a2.z = ucell->latvec.e23; + ucell->a3.x = ucell->latvec.e31; + ucell->a3.y = ucell->latvec.e32; + ucell->a3.z = ucell->latvec.e33; + ucell->GT = ucell->latvec.Inverse (); + ucell->G = ucell->GT.Transpose (); + ucell->GGT = ucell->G * ucell->GT; + ucell->invGGT = ucell->GGT.Inverse (); + ucell->omega = std::abs (ucell->latvec.Det ()) * (ucell->lat0) * (ucell->lat0) * (ucell->lat0); + // atomic info + ucell->Coordinate = this->coor_type; + ucell->atoms = new Atom[ucell->ntype]; + ucell->set_atom_flag = true; + this->atomic_index = 0; + for (int it = 0; it < ucell->ntype; ++it) + { + ucell->atoms[it].label = this->elements[it]; + ucell->atoms[it].nw = 0; + ucell->atoms[it].nwl = 2; + ucell->atoms[it].l_nchi.resize (ucell->atoms[it].nwl + 1); + for (int L = 0; L < ucell->atoms[it].nwl + 1; L++) + { + ucell->atoms[it].l_nchi[L] = 1; + ucell->atoms[it].nw += (2 * L + 1) * ucell->atoms[it].l_nchi[L]; + } + ucell->atoms[it].na = this->natom[it]; + // coordinates and related physical quantities + ucell->atoms[it].tau.resize (ucell->atoms[it].na); + ucell->atoms[it].dis.resize (ucell->atoms[it].na); + ucell->atoms[it].taud.resize (ucell->atoms[it].na); + ucell->atoms[it].vel.resize (ucell->atoms[it].na); + ucell->atoms[it].mag.resize (ucell->atoms[it].na); + ucell->atoms[it].angle1.resize (ucell->atoms[it].na); + ucell->atoms[it].angle2.resize (ucell->atoms[it].na); + ucell->atoms[it].m_loc_.resize (ucell->atoms[it].na); + ucell->atoms[it].mbl.resize (ucell->atoms[it].na); + ucell->atoms[it].mass = ucell->atom_mass[it]; // mass set here + + for (int ia = 0; ia < ucell->atoms[it].na; ++ia) + { + if (ucell->Coordinate == "Direct") + { + ucell->atoms[it].taud[ia].x = this->coordinates[this->atomic_index * 3 + 0]; + ucell->atoms[it].taud[ia].y = this->coordinates[this->atomic_index * 3 + 1]; + ucell->atoms[it].taud[ia].z = this->coordinates[this->atomic_index * 3 + 2]; + ucell->atoms[it].tau[ia] = ucell->atoms[it].taud[ia] * ucell->latvec; + } + else if (ucell->Coordinate == "Cartesian") + { + ucell->atoms[it].tau[ia].x = this->coordinates[this->atomic_index * 3 + 0]; + ucell->atoms[it].tau[ia].y = this->coordinates[this->atomic_index * 3 + 1]; + ucell->atoms[it].tau[ia].z = this->coordinates[this->atomic_index * 3 + 2]; + ModuleBase::Mathzone::Cartesian_to_Direct (ucell->atoms[it].tau[ia].x, + ucell->atoms[it].tau[ia].y, + ucell->atoms[it].tau[ia].z, + ucell->latvec.e11, + ucell->latvec.e12, + ucell->latvec.e13, + ucell->latvec.e21, + ucell->latvec.e22, + ucell->latvec.e23, + ucell->latvec.e31, + ucell->latvec.e32, + ucell->latvec.e33, + ucell->atoms[it].taud[ia].x, + ucell->atoms[it].taud[ia].y, + ucell->atoms[it].taud[ia].z); + } + ucell->atoms[it].dis[ia].set (0, 0, 0); + if (this->init_vel) + { + ucell->atoms[it].vel[ia].x = this->velocity[this->atomic_index * 3 + 0]; + ucell->atoms[it].vel[ia].y = this->velocity[this->atomic_index * 3 + 1]; + ucell->atoms[it].vel[ia].z = this->velocity[this->atomic_index * 3 + 2]; + } + else + { + ucell->atoms[it].vel[ia].set (0, 0, 0); + } + ucell->atoms[it].m_loc_[ia].set (0, 0, 0); + ucell->atoms[it].angle1[ia] = 0; + ucell->atoms[it].angle2[ia] = 0; + if (this->selective_dynamics) + { + ucell->atoms[it].mbl[ia].x = this->mbl[this->atomic_index * 3 + 0]; + ucell->atoms[it].mbl[ia].y = this->mbl[this->atomic_index * 3 + 1]; + ucell->atoms[it].mbl[ia].z = this->mbl[this->atomic_index * 3 + 2]; + } + else + { + ucell->atoms[it].mbl[ia] = {1, 1, 1}; + } + ++(this->atomic_index); + } + } + ucell->nat = this->natom.sum (); + return ucell; + } }; -UcellTestPrepare::UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in): - latname(latname_in), - lmaxmax(lmaxmax_in), - init_vel(init_vel_in), - selective_dynamics(selective_dynamics_in), - relax_new(relax_new_in), - fixed_axes(fixed_axes_in), - lat0(lat0_in), - latvec(latvec_in), - elements(elements_in), - pp_files(pp_files_in), - pp_types(pp_types_in), - orb_files(orb_files_in), - natom(natom_in), - atomic_mass(atomic_mass_in), - coor_type(coor_type_in), - coordinates(coordinates_in) +UcellTestPrepare::UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in) + : latname (latname_in), lmaxmax (lmaxmax_in), init_vel (init_vel_in), selective_dynamics (selective_dynamics_in), + relax_new (relax_new_in), fixed_axes (fixed_axes_in), lat0 (lat0_in), latvec (latvec_in), elements (elements_in), + pp_files (pp_files_in), pp_types (pp_types_in), orb_files (orb_files_in), natom (natom_in), + atomic_mass (atomic_mass_in), coor_type (coor_type_in), coordinates (coordinates_in) { - mbl = std::valarray(0.0, coordinates_in.size()); - velocity = std::valarray(0.0, coordinates_in.size()); + mbl = std::valarray (0.0, coordinates_in.size ()); + velocity = std::valarray (0.0, coordinates_in.size ()); } -UcellTestPrepare::UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in, - std::valarray mbl_in, - std::valarray velocity_in): - latname(latname_in), - lmaxmax(lmaxmax_in), - init_vel(init_vel_in), - selective_dynamics(selective_dynamics_in), - relax_new(relax_new_in), - fixed_axes(fixed_axes_in), - lat0(lat0_in), - latvec(latvec_in), - elements(elements_in), - pp_files(pp_files_in), - pp_types(pp_types_in), - orb_files(orb_files_in), - natom(natom_in), - atomic_mass(atomic_mass_in), - coor_type(coor_type_in), - coordinates(coordinates_in), - mbl(mbl_in), - velocity(velocity_in) // velocity assume the existence of mbl in print_stru_file() -{} - -UcellTestPrepare::UcellTestPrepare(const UcellTestPrepare &utp): - latname(utp.latname), - lmaxmax(utp.lmaxmax), - init_vel(utp.init_vel), - selective_dynamics(utp.selective_dynamics), - relax_new(utp.relax_new), - fixed_axes(utp.fixed_axes), - lat0(utp.lat0), - latvec(utp.latvec), - elements(utp.elements), - pp_files(utp.pp_files), - pp_types(utp.pp_types), - orb_files(utp.orb_files), - natom(utp.natom), - atomic_mass(utp.atomic_mass), - coor_type(utp.coor_type), - coordinates(utp.coordinates), - mbl(utp.mbl), - velocity(utp.velocity) // velocity assume the existence of mbl in print_stru_file() -{} +UcellTestPrepare::UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in, + std::valarray mbl_in, + std::valarray velocity_in) + : latname (latname_in), lmaxmax (lmaxmax_in), init_vel (init_vel_in), selective_dynamics (selective_dynamics_in), + relax_new (relax_new_in), fixed_axes (fixed_axes_in), lat0 (lat0_in), latvec (latvec_in), elements (elements_in), + pp_files (pp_files_in), pp_types (pp_types_in), orb_files (orb_files_in), natom (natom_in), + atomic_mass (atomic_mass_in), coor_type (coor_type_in), coordinates (coordinates_in), mbl (mbl_in), + velocity (velocity_in) // velocity assume the existence of mbl in print_stru_file() +{ +} -std::map UcellTestLib +UcellTestPrepare::UcellTestPrepare (const UcellTestPrepare& utp) + : latname (utp.latname), lmaxmax (utp.lmaxmax), init_vel (utp.init_vel), + selective_dynamics (utp.selective_dynamics), relax_new (utp.relax_new), fixed_axes (utp.fixed_axes), + lat0 (utp.lat0), latvec (utp.latvec), elements (utp.elements), pp_files (utp.pp_files), pp_types (utp.pp_types), + orb_files (utp.orb_files), natom (utp.natom), atomic_mass (utp.atomic_mass), coor_type (utp.coor_type), + coordinates (utp.coordinates), mbl (utp.mbl), + velocity (utp.velocity) // velocity assume the existence of mbl in print_stru_file() { - {"Si", UcellTestPrepare( - "fcc", //latname - 2, //lmaxmax - true, //init_vel - true, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 10.2, //lat0 - {-0.5,0.0,0.5, //latvec - 0.0,0.5,0.5, - -0.5,0.5,0.0}, - {"Si"}, //elements - {"Si.upf"}, //upf file - {"upf201"}, //upf types - {"Si.orb"}, //orb file - {2}, //number of each elements - {28.0}, //atomic mass - "Cartesian", //coordination type - {0.0,0.0,0.0, //atomic coordinates - 0.25,0.25,0.25})} -}; +} + +std::map UcellTestLib{{"Si", + UcellTestPrepare ("fcc", // latname + 2, // lmaxmax + true, // init_vel + true, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 10.2, // lat0 + {-0.5, + 0.0, + 0.5, // latvec + 0.0, + 0.5, + 0.5, + -0.5, + 0.5, + 0.0}, + {"Si"}, // elements + {"Si.upf"}, // upf file + {"upf201"}, // upf types + {"Si.orb"}, // orb file + {2}, // number of each elements + {28.0}, // atomic mass + "Cartesian", // coordination type + {0.0, + 0.0, + 0.0, // atomic coordinates + 0.25, + 0.25, + 0.25})}}; #endif diff --git a/source/source_cell/module_neighbor/test/sltk_atom_arrange_test.cpp b/source/source_cell/module_neighbor/test/sltk_atom_arrange_test.cpp index 329554d62c3..27de04524dc 100644 --- a/source/source_cell/module_neighbor/test/sltk_atom_arrange_test.cpp +++ b/source/source_cell/module_neighbor/test/sltk_atom_arrange_test.cpp @@ -11,29 +11,18 @@ #include "prepare_unitcell.h" #include "source_cell/read_stru.h" #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -LCAO_Orbitals::LCAO_Orbitals() -{ -} -LCAO_Orbitals::~LCAO_Orbitals() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +LCAO_Orbitals::LCAO_Orbitals () {} +LCAO_Orbitals::~LCAO_Orbitals () {} #endif -Magnetism::Magnetism() +Magnetism::Magnetism () { this->tot_mag = 0.0; this->abs_mag = 0.0; this->start_mag = nullptr; } -Magnetism::~Magnetism() -{ - delete[] this->start_mag; -} +Magnetism::~Magnetism () { delete[] this->start_mag; } /************************************************ * unit test of atom_arrange @@ -49,7 +38,8 @@ Magnetism::~Magnetism() * - filter AdjacentAtomInfo to the minimized adjacent atoms */ -void SetGlobalV() +void + SetGlobalV () { PARAM.input.test_grid = false; } @@ -65,18 +55,20 @@ class SltkAtomArrangeTest : public testing::Test double radius = ((8 + 5.01) * 2.0 + 0.01) / 10.2; int test_atom_in = 0; std::string output; - void SetUp() + void + SetUp () { - SetGlobalV(); - ucell = utp.SetUcellInfo(); + SetGlobalV (); + ucell = utp.SetUcellInfo (); } - void TearDown() + void + TearDown () { delete ucell; } }; -TEST_F(SltkAtomArrangeTest, setsrNL) +TEST_F (SltkAtomArrangeTest, setsrNL) { atom_arrange test; const std::string teststring = "m"; @@ -85,71 +77,71 @@ TEST_F(SltkAtomArrangeTest, setsrNL) bool gamma_only_local = true; double test_sr = 0; std::ofstream ofs; - ofs.open("./to_test_arrange.txt"); - test_sr = test.set_sr_NL(ofs, teststring, rcutmax_Phi, rcutmax_Beta, gamma_only_local); - EXPECT_DOUBLE_EQ(test_sr, 2.001); + ofs.open ("./to_test_arrange.txt"); + test_sr = test.set_sr_NL (ofs, teststring, rcutmax_Phi, rcutmax_Beta, gamma_only_local); + EXPECT_DOUBLE_EQ (test_sr, 2.001); gamma_only_local = false; - test_sr = test.set_sr_NL(ofs, teststring, rcutmax_Phi, rcutmax_Beta, gamma_only_local); - EXPECT_DOUBLE_EQ(test_sr, 6.001); + test_sr = test.set_sr_NL (ofs, teststring, rcutmax_Phi, rcutmax_Beta, gamma_only_local); + EXPECT_DOUBLE_EQ (test_sr, 6.001); const std::string teststring2 = "no"; - test_sr = test.set_sr_NL(ofs, teststring2, rcutmax_Phi, rcutmax_Beta, gamma_only_local); - ofs.close(); + test_sr = test.set_sr_NL (ofs, teststring2, rcutmax_Phi, rcutmax_Beta, gamma_only_local); + ofs.close (); std::ifstream ifs; std::string test2, s; - ifs.open("./to_test_arrange.txt"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Orbital max radius cutoff (Bohr) = 1")); - EXPECT_THAT(str, testing::HasSubstr("Nonlocal proj. max radius cutoff (Bohr) = 2")); - ifs.close(); - //remove("./to_test_arrange"); + ifs.open ("./to_test_arrange.txt"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Orbital max radius cutoff (Bohr) = 1")); + EXPECT_THAT (str, testing::HasSubstr ("Nonlocal proj. max radius cutoff (Bohr) = 2")); + ifs.close (); + // remove("./to_test_arrange"); } -TEST_F(SltkAtomArrangeTest, Search) +TEST_F (SltkAtomArrangeTest, Search) { - unitcell::check_dtau(ucell->atoms,ucell->ntype, ucell->lat0, ucell->latvec); - Grid_Driver grid_d(PARAM.input.test_deconstructor, PARAM.input.test_grid); - ofs.open("test.out"); + unitcell::check_dtau (ucell->atoms, ucell->ntype, ucell->lat0, ucell->latvec); + Grid_Driver grid_d (PARAM.input.test_deconstructor, PARAM.input.test_grid); + ofs.open ("test.out"); bool test_only = true; - atom_arrange::search(pbc, ofs, grid_d, *ucell, radius, test_atom_in, test_only); - EXPECT_EQ(grid_d.getType(0),0); - EXPECT_EQ(grid_d.getNatom(0), 1); // adjacent atom is 1 - ofs.close(); - ifs.open("test.out"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("search neighboring atoms done.")); - remove("test.out"); + atom_arrange::search (pbc, ofs, grid_d, *ucell, radius, test_atom_in, test_only); + EXPECT_EQ (grid_d.getType (0), 0); + EXPECT_EQ (grid_d.getNatom (0), 1); // adjacent atom is 1 + ofs.close (); + ifs.open ("test.out"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("search neighboring atoms done.")); + remove ("test.out"); } -TEST_F(SltkAtomArrangeTest, Filteradjs) +TEST_F (SltkAtomArrangeTest, Filteradjs) { - unitcell::check_dtau(ucell->atoms,ucell->ntype, ucell->lat0, ucell->latvec); - Grid_Driver grid_d(PARAM.input.test_deconstructor, PARAM.input.test_grid); - ofs.open("test.out"); + unitcell::check_dtau (ucell->atoms, ucell->ntype, ucell->lat0, ucell->latvec); + Grid_Driver grid_d (PARAM.input.test_deconstructor, PARAM.input.test_grid); + ofs.open ("test.out"); bool test_only = true; - atom_arrange::search(pbc, ofs, grid_d, *ucell, radius, test_atom_in, test_only); - EXPECT_EQ(grid_d.getType(0),0); - EXPECT_EQ(grid_d.getNatom(0), 1); // adjacent atom is 1 - ofs.close(); - ifs.open("test.out"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("search neighboring atoms done.")); - remove("test.out"); + atom_arrange::search (pbc, ofs, grid_d, *ucell, radius, test_atom_in, test_only); + EXPECT_EQ (grid_d.getType (0), 0); + EXPECT_EQ (grid_d.getNatom (0), 1); // adjacent atom is 1 + ofs.close (); + ifs.open ("test.out"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("search neighboring atoms done.")); + remove ("test.out"); AdjacentAtomInfo adjs; - grid_d.Find_atom(*ucell, ucell->atoms[0].tau[0], 0, 0, &adjs); - EXPECT_EQ(adjs.adj_num, 0); + grid_d.Find_atom (*ucell, ucell->atoms[0].tau[0], 0, 0, &adjs); + EXPECT_EQ (adjs.adj_num, 0); // add one adjacent atom adjs.adj_num++; - adjs.adjacent_tau.push_back(ModuleBase::Vector3(0,0,0)); - adjs.box.push_back(ModuleBase::Vector3(0,0,0)); - adjs.natom.push_back(1); - adjs.ntype.push_back(0); - EXPECT_EQ(adjs.adj_num, 1); + adjs.adjacent_tau.push_back (ModuleBase::Vector3 (0, 0, 0)); + adjs.box.push_back (ModuleBase::Vector3 (0, 0, 0)); + adjs.natom.push_back (1); + adjs.ntype.push_back (0); + EXPECT_EQ (adjs.adj_num, 1); // filter adjs to no adjacent status - std::vector is_adjs(adjs.adj_num + 1, false); + std::vector is_adjs (adjs.adj_num + 1, false); is_adjs[0] = true; - filter_adjs(is_adjs, adjs); - EXPECT_EQ(adjs.adj_num, 0); + filter_adjs (is_adjs, adjs); + EXPECT_EQ (adjs.adj_num, 0); } diff --git a/source/source_cell/module_neighbor/test/sltk_atom_test.cpp b/source/source_cell/module_neighbor/test/sltk_atom_test.cpp index fb11b93be3d..4a02c9cf27a 100644 --- a/source/source_cell/module_neighbor/test/sltk_atom_test.cpp +++ b/source/source_cell/module_neighbor/test/sltk_atom_test.cpp @@ -20,21 +20,20 @@ class SltkAtomTest : public testing::Test { -protected: + protected: FAtom test; }; - -TEST_F(SltkAtomTest, SetterGetters) +TEST_F (SltkAtomTest, SetterGetters) { - FAtom test_temp(1.0, 2.0, 3.0, 4, 5, 0, 1, 2); + FAtom test_temp (1.0, 2.0, 3.0, 4, 5, 0, 1, 2); - EXPECT_DOUBLE_EQ(test_temp.x, 1.0); - EXPECT_DOUBLE_EQ(test_temp.y, 2.0); - EXPECT_DOUBLE_EQ(test_temp.z, 3.0); - EXPECT_EQ(test_temp.type, 4); - EXPECT_EQ(test_temp.natom, 5); - EXPECT_EQ(test_temp.cell_x, 0); - EXPECT_EQ(test_temp.cell_y, 1); - EXPECT_EQ(test_temp.cell_z, 2); + EXPECT_DOUBLE_EQ (test_temp.x, 1.0); + EXPECT_DOUBLE_EQ (test_temp.y, 2.0); + EXPECT_DOUBLE_EQ (test_temp.z, 3.0); + EXPECT_EQ (test_temp.type, 4); + EXPECT_EQ (test_temp.natom, 5); + EXPECT_EQ (test_temp.cell_x, 0); + EXPECT_EQ (test_temp.cell_y, 1); + EXPECT_EQ (test_temp.cell_z, 2); } diff --git a/source/source_cell/module_neighbor/test/sltk_grid_test.cpp b/source/source_cell/module_neighbor/test/sltk_grid_test.cpp index 044feafc2de..e45642600e8 100644 --- a/source/source_cell/module_neighbor/test/sltk_grid_test.cpp +++ b/source/source_cell/module_neighbor/test/sltk_grid_test.cpp @@ -8,29 +8,18 @@ #undef private #include "source_cell/read_stru.h" #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -LCAO_Orbitals::LCAO_Orbitals() -{ -} -LCAO_Orbitals::~LCAO_Orbitals() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +LCAO_Orbitals::LCAO_Orbitals () {} +LCAO_Orbitals::~LCAO_Orbitals () {} #endif -Magnetism::Magnetism() +Magnetism::Magnetism () { this->tot_mag = 0.0; this->abs_mag = 0.0; this->start_mag = nullptr; } -Magnetism::~Magnetism() -{ - delete[] this->start_mag; -} +Magnetism::~Magnetism () { delete[] this->start_mag; } /************************************************ * unit test of sltk_grid @@ -45,7 +34,8 @@ Magnetism::~Magnetism() * member Cell as a 3D array of CellSet */ -void SetGlobalV() +void + SetGlobalV () { PARAM.input.test_grid = 0; } @@ -61,12 +51,14 @@ class SltkGridTest : public testing::Test double radius = ((8 + 5.01) * 2.0 + 0.01) / 10.2; int test_atom_in = 0; std::string output; - void SetUp() + void + SetUp () { - SetGlobalV(); - ucell = utp.SetUcellInfo(); + SetGlobalV (); + ucell = utp.SetUcellInfo (); } - void TearDown() + void + TearDown () { delete ucell; } @@ -74,40 +66,40 @@ class SltkGridTest : public testing::Test using SltkGridDeathTest = SltkGridTest; -TEST_F(SltkGridTest, Init) +TEST_F (SltkGridTest, Init) { - ofs.open("test.out"); - unitcell::check_dtau(ucell->atoms,ucell->ntype, ucell->lat0, ucell->latvec); + ofs.open ("test.out"); + unitcell::check_dtau (ucell->atoms, ucell->ntype, ucell->lat0, ucell->latvec); test_atom_in = 2; PARAM.input.test_grid = 1; - Grid LatGrid(PARAM.input.test_grid); - LatGrid.init(ofs, *ucell, radius, pbc); - EXPECT_EQ(LatGrid.getGlayerX(), 6); - EXPECT_EQ(LatGrid.getGlayerY(), 6); - EXPECT_EQ(LatGrid.getGlayerZ(), 6); - EXPECT_EQ(LatGrid.getGlayerX_minus(), 5); - EXPECT_EQ(LatGrid.getGlayerY_minus(), 5); - EXPECT_EQ(LatGrid.getGlayerZ_minus(), 5); - ofs.close(); - remove("test.out"); + Grid LatGrid (PARAM.input.test_grid); + LatGrid.init (ofs, *ucell, radius, pbc); + EXPECT_EQ (LatGrid.getGlayerX (), 6); + EXPECT_EQ (LatGrid.getGlayerY (), 6); + EXPECT_EQ (LatGrid.getGlayerZ (), 6); + EXPECT_EQ (LatGrid.getGlayerX_minus (), 5); + EXPECT_EQ (LatGrid.getGlayerY_minus (), 5); + EXPECT_EQ (LatGrid.getGlayerZ_minus (), 5); + ofs.close (); + remove ("test.out"); } -TEST_F(SltkGridTest, InitSmall) +TEST_F (SltkGridTest, InitSmall) { - ofs.open("test.out"); - unitcell::check_dtau(ucell->atoms,ucell->ntype, ucell->lat0, ucell->latvec); + ofs.open ("test.out"); + unitcell::check_dtau (ucell->atoms, ucell->ntype, ucell->lat0, ucell->latvec); test_atom_in = 2; PARAM.input.test_grid = 1; radius = 0.5; - Grid LatGrid(PARAM.input.test_grid); - LatGrid.init(ofs, *ucell, radius, pbc); - LatGrid.setMemberVariables(ofs, *ucell); - EXPECT_EQ(LatGrid.pbc, true); - EXPECT_TRUE(LatGrid.pbc); - EXPECT_DOUBLE_EQ(LatGrid.sradius2, radius * radius); - EXPECT_DOUBLE_EQ(LatGrid.sradius2, 0.5 * 0.5); - EXPECT_DOUBLE_EQ(LatGrid.sradius, radius); - EXPECT_DOUBLE_EQ(LatGrid.sradius, 0.5); + Grid LatGrid (PARAM.input.test_grid); + LatGrid.init (ofs, *ucell, radius, pbc); + LatGrid.setMemberVariables (ofs, *ucell); + EXPECT_EQ (LatGrid.pbc, true); + EXPECT_TRUE (LatGrid.pbc); + EXPECT_DOUBLE_EQ (LatGrid.sradius2, radius * radius); + EXPECT_DOUBLE_EQ (LatGrid.sradius2, 0.5 * 0.5); + EXPECT_DOUBLE_EQ (LatGrid.sradius, radius); + EXPECT_DOUBLE_EQ (LatGrid.sradius, 0.5); /* // minimal value of x, y, z EXPECT_DOUBLE_EQ(LatGrid.true_cell_x, 1); @@ -118,8 +110,8 @@ TEST_F(SltkGridTest, InitSmall) EXPECT_EQ(LatGrid.cell_ny, 3); EXPECT_EQ(LatGrid.cell_nz, 3); */ - ofs.close(); - remove("test.out"); + ofs.close (); + remove ("test.out"); } /* diff --git a/source/source_cell/module_symmetry/run_symmetry.cpp b/source/source_cell/module_symmetry/run_symmetry.cpp index b950a805a7a..8da934432ca 100644 --- a/source/source_cell/module_symmetry/run_symmetry.cpp +++ b/source/source_cell/module_symmetry/run_symmetry.cpp @@ -6,50 +6,48 @@ #include "source_base/global_variable.h" #include "source_base/parallel_global.h" -void calculate(); +void calculate (); -int main(int argc, char **argv) +int + main (int argc, char** argv) { - std::cout << "Hello, this is the 'symmetry' module of ABACUS." << std::endl; + std::cout << "Hello, this is the 'symmetry' module of ABACUS." << std::endl; - std::cout << "The module does symmetry analysis for an input geometry." << std::endl; - Parallel_Global::read_pal_param(argc,argv); - //std::cout << "Right now, the module is still empty, soon we will have more tests." << std::endl; + std::cout << "The module does symmetry analysis for an input geometry." << std::endl; + Parallel_Global::read_pal_param (argc, argv); + // std::cout << "Right now, the module is still empty, soon we will have more tests." << std::endl; - calculate(); + calculate (); return 0; } - -void calculate() +void + calculate () { - //std::ofstream ofs("log.txt"); - std::ofstream ofs_running("log.txt"); - //std::ofstream ofs("useless.txt"); - std::ofstream ofs_warning("warning.txt"); - std::ifstream ifs("INPUT"); - UnitCell ucell; - ModuleSymmetry::Symmetry symm; - ifs >> ucell.ntype; - ucell.latName = "none"; - ifs.close(); - output out; - ucell.setup_cell( - "STRU", - ofs_running); - std::cout << "set up cell classic done." << std::endl; - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, ofs_running); - ofs_running.close(); -// ooo.set_orb_tables(); - - //ofs.close(); - - std::cout << "--------------------" << std::endl; - std::cout << " Have a great day! " << std::endl; - std::cout << "--------------------" << std::endl; - + // std::ofstream ofs("log.txt"); + std::ofstream ofs_running ("log.txt"); + // std::ofstream ofs("useless.txt"); + std::ofstream ofs_warning ("warning.txt"); + std::ifstream ifs ("INPUT"); + UnitCell ucell; + ModuleSymmetry::Symmetry symm; + ifs >> ucell.ntype; + ucell.latName = "none"; + ifs.close (); + output out; + ucell.setup_cell ("STRU", ofs_running); + std::cout << "set up cell classic done." << std::endl; + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, ofs_running); + ofs_running.close (); + // ooo.set_orb_tables(); + + // ofs.close(); + + std::cout << "--------------------" << std::endl; + std::cout << " Have a great day! " << std::endl; + std::cout << "--------------------" << std::endl; return; } diff --git a/source/source_cell/module_symmetry/symm_analysis.cpp b/source/source_cell/module_symmetry/symm_analysis.cpp index 16bd470ff33..4d7daaf6212 100644 --- a/source/source_cell/module_symmetry/symm_analysis.cpp +++ b/source/source_cell/module_symmetry/symm_analysis.cpp @@ -4,26 +4,27 @@ using namespace ModuleSymmetry; -void Symmetry::analy_sys(const Lattice& lat, const Statistics& st, Atom* atoms, std::ofstream& ofs_running) +void + Symmetry::analy_sys (const Lattice& lat, const Statistics& st, Atom* atoms, std::ofstream& ofs_running) { - const double MAX_EPS = std::max(1e-3, epsilon_input * 1.001); + const double MAX_EPS = std::max (1e-3, epsilon_input * 1.001); const double MULT_EPS = 2.0; - ModuleBase::TITLE("Symmetry","analy_sys"); - ModuleBase::timer::start("Symmetry","analy_sys"); - - ofs_running << "\n\n"; - ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; - ofs_running << " | |" << std::endl; - ofs_running << " | #Symmetry Analysis# |" << std::endl; - ofs_running << " | We calculate the norm of 3 vectors and the angles between them, |" << std::endl; - ofs_running << " | the type of Bravais lattice is given. We can judge if the unticell |" << std::endl; - ofs_running << " | is a primitive cell. Finally we give the point group operation for |" << std::endl; - ofs_running << " | this unitcell. We use the point group operations to perform |" << std::endl; - ofs_running << " | symmetry analysis on given k-point mesh and the charge density. |" << std::endl; - ofs_running << " | |" << std::endl; - ofs_running << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; - ofs_running << "\n"; + ModuleBase::TITLE ("Symmetry", "analy_sys"); + ModuleBase::timer::start ("Symmetry", "analy_sys"); + + ofs_running << "\n\n"; + ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; + ofs_running << " | |" << std::endl; + ofs_running << " | #Symmetry Analysis# |" << std::endl; + ofs_running << " | We calculate the norm of 3 vectors and the angles between them, |" << std::endl; + ofs_running << " | the type of Bravais lattice is given. We can judge if the unticell |" << std::endl; + ofs_running << " | is a primitive cell. Finally we give the point group operation for |" << std::endl; + ofs_running << " | this unitcell. We use the point group operations to perform |" << std::endl; + ofs_running << " | symmetry analysis on given k-point mesh and the charge density. |" << std::endl; + ofs_running << " | |" << std::endl; + ofs_running << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; + ofs_running << "\n"; // -------------------------------- // 1. copy data and allocate memory @@ -33,215 +34,263 @@ void Symmetry::analy_sys(const Lattice& lat, const Statistics& st, Atom* atoms, // number of atom species this->ntype = st.ntype; - assert(ntype>0); + assert (ntype > 0); this->na = new int[ntype]; this->istart = new int[ntype]; // start number of atom. - this->index = new int [nat + 2]; // index of atoms + this->index = new int[nat + 2]; // index of atoms - ModuleBase::GlobalFunc::ZEROS(na, ntype); - ModuleBase::GlobalFunc::ZEROS(istart, ntype); - ModuleBase::GlobalFunc::ZEROS(index, nat+2); + ModuleBase::GlobalFunc::ZEROS (na, ntype); + ModuleBase::GlobalFunc::ZEROS (istart, ntype); + ModuleBase::GlobalFunc::ZEROS (index, nat + 2); // atom positions // used in checksym. - newpos = new double[3*nat]; // positions of atoms before rotation - rotpos = new double[3*nat]; // positions of atoms after rotation - ModuleBase::GlobalFunc::ZEROS(newpos, 3*nat); - ModuleBase::GlobalFunc::ZEROS(rotpos, 3*nat); + newpos = new double[3 * nat]; // positions of atoms before rotation + rotpos = new double[3 * nat]; // positions of atoms after rotation + ModuleBase::GlobalFunc::ZEROS (newpos, 3 * nat); + ModuleBase::GlobalFunc::ZEROS (rotpos, 3 * nat); this->a1 = lat.a1; this->a2 = lat.a2; this->a3 = lat.a3; - ModuleBase::Matrix3 latvec1; - latvec1.e11 = a1.x; latvec1.e12 = a1.y; latvec1.e13 = a1.z; - latvec1.e21 = a2.x; latvec1.e22 = a2.y; latvec1.e23 = a2.z; - latvec1.e31 = a3.x; latvec1.e32 = a3.y; latvec1.e33 = a3.z; + ModuleBase::Matrix3 latvec1; + latvec1.e11 = a1.x; + latvec1.e12 = a1.y; + latvec1.e13 = a1.z; + latvec1.e21 = a2.x; + latvec1.e22 = a2.y; + latvec1.e23 = a2.z; + latvec1.e31 = a3.x; + latvec1.e32 = a3.y; + latvec1.e33 = a3.z; - output::printM3(ofs_running,"LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0)",latvec1); + output::printM3 (ofs_running, "LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0)", latvec1); istart[0] = 0; this->itmin_type = 0; this->itmin_start = 0; for (int it = 0; it < ntype; ++it) - { - Atom* atom = &atoms[it]; - this->na[it] = atom->na; - if (it > 0) { - istart[it] = istart[it-1] + na[it-1]; - } - //std::cout << "\n istart = " << istart[it]; - if (na[it] < na[itmin_type]) { - this->itmin_type = it; - this->itmin_start = istart[it]; + Atom* atom = &atoms[it]; + this->na[it] = atom->na; + if (it > 0) + { + istart[it] = istart[it - 1] + na[it - 1]; + } + // std::cout << "\n istart = " << istart[it]; + if (na[it] < na[itmin_type]) + { + this->itmin_type = it; + this->itmin_start = istart[it]; + } } - } - //s: input config + // s: input config s1 = a1; s2 = a2; s3 = a3; + auto lattice_to_group = [&, this] (int& nrot_out, int& nrotk_out, std::ofstream& ofs_running) -> void + { + // a: the optimized lattice vectors, output + // s: the input lattice vectors, input + // find the real_brav type accordiing to lattice vectors. + this->lattice_type (this->a1, + this->a2, + this->a3, + this->s1, + this->s2, + this->s3, + this->cel_const, + this->pre_const, + this->real_brav, + ilattname, + atoms, + true, + this->newpos); + + ofs_running << " For optimal symmetric configuration:" << std::endl; + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS TYPE", real_brav); + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS LATTICE NAME", ilattname); + ModuleBase::GlobalFunc::OUT (ofs_running, "ibrav", real_brav); + Symm_Other::print1 (real_brav, cel_const, ofs_running); + + optlat.e11 = a1.x; + optlat.e12 = a1.y; + optlat.e13 = a1.z; + optlat.e21 = a2.x; + optlat.e22 = a2.y; + optlat.e23 = a2.z; + optlat.e31 = a3.x; + optlat.e32 = a3.y; + optlat.e33 = a3.z; + + // count the number of primitive cells in the supercell + this->pricell (this->newpos, atoms); + + test_brav = true; // output the real ibrav and point group + + // list all possible point group operations + this->setgroup (this->symop, this->nop, this->real_brav); + + // special case for AFM analysis + // which should be loop over all atoms, f.e only loop over spin-up atoms + // -------------------------------- + // AFM analysis Start + if (PARAM.inp.nspin > 1) + { + pricell_loop = this->magmom_same_check (atoms); + } - auto lattice_to_group = [&, this](int& nrot_out, int& nrotk_out, std::ofstream& ofs_running) -> void - { - // a: the optimized lattice vectors, output - // s: the input lattice vectors, input - // find the real_brav type accordiing to lattice vectors. - this->lattice_type(this->a1, this->a2, this->a3, this->s1, this->s2, this->s3, - this->cel_const, this->pre_const, this->real_brav, ilattname, atoms, true, this->newpos); - - ofs_running << " For optimal symmetric configuration:" << std::endl; - ModuleBase::GlobalFunc::OUT(ofs_running, "BRAVAIS TYPE", real_brav); - ModuleBase::GlobalFunc::OUT(ofs_running, "BRAVAIS LATTICE NAME", ilattname); - ModuleBase::GlobalFunc::OUT(ofs_running, "ibrav", real_brav); - Symm_Other::print1(real_brav, cel_const, ofs_running); - - optlat.e11 = a1.x; optlat.e12 = a1.y; optlat.e13 = a1.z; - optlat.e21 = a2.x; optlat.e22 = a2.y; optlat.e23 = a2.z; - optlat.e31 = a3.x; optlat.e32 = a3.y; optlat.e33 = a3.z; - - // count the number of primitive cells in the supercell - this->pricell(this->newpos, atoms); - - test_brav = true; // output the real ibrav and point group - - // list all possible point group operations - this->setgroup(this->symop, this->nop, this->real_brav); - - // special case for AFM analysis - // which should be loop over all atoms, f.e only loop over spin-up atoms - // -------------------------------- - // AFM analysis Start - if (PARAM.inp.nspin > 1) - { - pricell_loop = this->magmom_same_check(atoms); - } - - if (!pricell_loop && PARAM.inp.nspin == 2) - { - this->analyze_magnetic_group(atoms, st, nrot_out, nrotk_out); - } - else - { - // get the real symmetry operations according to the input structure - // nrot_out: the number of pure point group rotations - // nrotk_out: the number of all space group operations - this->getgroup(nrot_out, nrotk_out, ofs_running, this->nop, this->symop, - this->gmatrix, this->gtrans, this->newpos, this->rotpos, this->index, - this->ntype, this->itmin_type, this->itmin_start, this->istart, this->na); - } - }; + if (!pricell_loop && PARAM.inp.nspin == 2) + { + this->analyze_magnetic_group (atoms, st, nrot_out, nrotk_out); + } + else + { + // get the real symmetry operations according to the input structure + // nrot_out: the number of pure point group rotations + // nrotk_out: the number of all space group operations + this->getgroup (nrot_out, + nrotk_out, + ofs_running, + this->nop, + this->symop, + this->gmatrix, + this->gtrans, + this->newpos, + this->rotpos, + this->index, + this->ntype, + this->itmin_type, + this->itmin_start, + this->istart, + this->na); + } + }; // -------------------------------- // 2. analyze the symmetry // -------------------------------- // 2.1 skip the symmetry analysis if the symmetry has been analyzed if (PARAM.inp.calculation == "cell-relax" && nrotk > 0) - { - std::ofstream no_out; // to screen the output when trying new epsilon + { + std::ofstream no_out; // to screen the output when trying new epsilon - // For the cases where cell-relax cause the number of symmetry operations to increase - if (this->nrotk > this->max_nrotk) { - this->max_nrotk = this->nrotk; - } + // For the cases where cell-relax cause the number of symmetry operations to increase + if (this->nrotk > this->max_nrotk) + { + this->max_nrotk = this->nrotk; + } - int tmp_nrot, tmp_nrotk; - lattice_to_group(tmp_nrot, tmp_nrotk, ofs_running); // get the real symmetry operations - - // Actually, the analysis of symmetry has been done now - // Following implementation is find the best epsilon to keep the symmetry - // some different method to enlarge symmetry_prec - bool eps_enlarged = false; - auto eps_mult = [this](double mult) {epsilon *= mult;}; - auto eps_to = [this](double new_eps) {epsilon = new_eps;}; - - // store the symmetry_prec and nrotk for each try - std::vector precs_try; - std::vector nrotks_try; - // store the initial result - precs_try.push_back(epsilon); - nrotks_try.push_back(tmp_nrotk); - //enlarge epsilon and regenerate pointgroup - // Try to find the symmetry operations by increasing epsilon - while (tmp_nrotk < this->max_nrotk && epsilon < MAX_EPS) - { - eps_mult(MULT_EPS); - eps_enlarged = true; - // lattice_to_group(tmp_nrot, tmp_nrotk, no_out); - lattice_to_group(tmp_nrot, tmp_nrotk, no_out); - precs_try.push_back(epsilon); - nrotks_try.push_back(tmp_nrotk); - } - if (tmp_nrotk > this->nrotk) - { - this->nrotk = tmp_nrotk; - ofs_running << " Find new symmtry operations during cell-relax." << std::endl; - if (this->nrotk > this->max_nrotk) - { - this->max_nrotk = this->nrotk; - } - } - if (eps_enlarged) - { - if (epsilon > MAX_EPS) - { - ofs_running << " WARNING: Symmetry cannot be kept due to the lost of accuracy with atom position during cell-relax." << std::endl; - ofs_running << " Continue cell-relax with a lower symmetry. " << std::endl; - // find the smallest epsilon that gives the current number of symmetry operations - int valid_index = nrotks_try.size() - 1; - while (valid_index > 0 - && tmp_nrotk <= nrotks_try[valid_index - 1]) { - --valid_index; + int tmp_nrot, tmp_nrotk; + lattice_to_group (tmp_nrot, tmp_nrotk, ofs_running); // get the real symmetry operations + + // Actually, the analysis of symmetry has been done now + // Following implementation is find the best epsilon to keep the symmetry + // some different method to enlarge symmetry_prec + bool eps_enlarged = false; + auto eps_mult = [this] (double mult) { epsilon *= mult; }; + auto eps_to = [this] (double new_eps) { epsilon = new_eps; }; + + // store the symmetry_prec and nrotk for each try + std::vector precs_try; + std::vector nrotks_try; + // store the initial result + precs_try.push_back (epsilon); + nrotks_try.push_back (tmp_nrotk); + // enlarge epsilon and regenerate pointgroup + // Try to find the symmetry operations by increasing epsilon + while (tmp_nrotk < this->max_nrotk && epsilon < MAX_EPS) + { + eps_mult (MULT_EPS); + eps_enlarged = true; + // lattice_to_group(tmp_nrot, tmp_nrotk, no_out); + lattice_to_group (tmp_nrot, tmp_nrotk, no_out); + precs_try.push_back (epsilon); + nrotks_try.push_back (tmp_nrotk); } - eps_to(precs_try[valid_index]); - if (valid_index > 0) { - ofs_running << " Enlarging `symmetry_prec` to " << epsilon - << " ..." << std::endl; - } else { - eps_enlarged = false; + if (tmp_nrotk > this->nrotk) + { + this->nrotk = tmp_nrotk; + ofs_running << " Find new symmtry operations during cell-relax." << std::endl; + if (this->nrotk > this->max_nrotk) + { + this->max_nrotk = this->nrotk; + } } - // regenerate pointgroup after change epsilon (may not give the same result) - lattice_to_group(tmp_nrot, tmp_nrotk, ofs_running); - this->nrotk = tmp_nrotk; - } else { - ofs_running << " Enlarging `symmetry_prec` to " << epsilon - << " ..." << std::endl; - } - } - if (!eps_enlarged && epsilon > epsilon_input * 1.001) // not "else" here. "eps_enlarged" can be set to false in the above "if" - { // try a smaller symmetry_prec until the number of symmetry operations decreases - precs_try.erase(precs_try.begin() + 1, precs_try.end()); - nrotks_try.erase(nrotks_try.begin() + 1, nrotks_try.end()); - double eps_current = epsilon; // record the current symmetry_prec - do { - eps_mult(1 / MULT_EPS); - lattice_to_group(tmp_nrot, tmp_nrotk, no_out); - precs_try.push_back(epsilon); - nrotks_try.push_back(tmp_nrotk); - } while (tmp_nrotk >= nrotks_try[0] && epsilon > epsilon_input * 1.001 && precs_try.size() < 5); - int valid_index = (tmp_nrotk < nrotks_try[0]) ? nrotks_try.size() - 2 : nrotks_try.size() - 1; + if (eps_enlarged) + { + if (epsilon > MAX_EPS) + { + ofs_running << " WARNING: Symmetry cannot be kept due to the lost of accuracy with atom " + "position during cell-relax." + << std::endl; + ofs_running << " Continue cell-relax with a lower symmetry. " << std::endl; + // find the smallest epsilon that gives the current number of symmetry operations + int valid_index = nrotks_try.size () - 1; + while (valid_index > 0 && tmp_nrotk <= nrotks_try[valid_index - 1]) + { + --valid_index; + } + eps_to (precs_try[valid_index]); + if (valid_index > 0) + { + ofs_running << " Enlarging `symmetry_prec` to " << epsilon << " ..." << std::endl; + } + else + { + eps_enlarged = false; + } + // regenerate pointgroup after change epsilon (may not give the same result) + lattice_to_group (tmp_nrot, tmp_nrotk, ofs_running); + this->nrotk = tmp_nrotk; + } + else + { + ofs_running << " Enlarging `symmetry_prec` to " << epsilon << " ..." << std::endl; + } + } + if (!eps_enlarged + && epsilon + > epsilon_input * 1.001) // not "else" here. "eps_enlarged" can be set to false in the above "if" + { // try a smaller symmetry_prec until the number of symmetry operations decreases + precs_try.erase (precs_try.begin () + 1, precs_try.end ()); + nrotks_try.erase (nrotks_try.begin () + 1, nrotks_try.end ()); + double eps_current = epsilon; // record the current symmetry_prec + do + { + eps_mult (1 / MULT_EPS); + lattice_to_group (tmp_nrot, tmp_nrotk, no_out); + precs_try.push_back (epsilon); + nrotks_try.push_back (tmp_nrotk); + } + while (tmp_nrotk >= nrotks_try[0] && epsilon > epsilon_input * 1.001 && precs_try.size () < 5); + int valid_index = (tmp_nrotk < nrotks_try[0]) ? nrotks_try.size () - 2 : nrotks_try.size () - 1; #ifdef __DEBUG - assert(valid_index >= 0); - assert(nrotks_try[valid_index] >= nrotks_try[0]); + assert (valid_index >= 0); + assert (nrotks_try[valid_index] >= nrotks_try[0]); #endif - epsilon = precs_try[valid_index]; - // regenerate pointgroup after change epsilon - lattice_to_group(tmp_nrot, tmp_nrotk, ofs_running); - this->nrotk = tmp_nrotk; - if (valid_index > 0) { // epsilon is set smaller - ofs_running << " Narrowing `symmetry_prec` from " << eps_current - << " to " << epsilon << " ..." << std::endl; - } + epsilon = precs_try[valid_index]; + // regenerate pointgroup after change epsilon + lattice_to_group (tmp_nrot, tmp_nrotk, ofs_running); + this->nrotk = tmp_nrotk; + if (valid_index > 0) + { // epsilon is set smaller + ofs_running << " Narrowing `symmetry_prec` from " << eps_current << " to " << epsilon + << " ..." << std::endl; + } + } + } + else + { + lattice_to_group (this->nrot, this->nrotk, ofs_running); } - } else { - lattice_to_group(this->nrot, this->nrotk, ofs_running); - } - // Symmetry analysis End! - //------------------------------------------- + // Symmetry analysis End! + //------------------------------------------- - // final number of symmetry operations + // final number of symmetry operations #ifdef __DEBUG ofs_running << "symmetry_prec(epsilon) in current ion step: " << this->epsilon << std::endl; ofs_running << "number of symmetry operations in current ion step: " << this->nrotk << std::endl; @@ -250,63 +299,62 @@ void Symmetry::analy_sys(const Lattice& lat, const Statistics& st, Atom* atoms, // 3. output to running.log //---------------------------------- // output the point group - bool valid_group = this->pointgroup(this->nrot, this->pgnumber, this->pgname, this->gmatrix, ofs_running); - ModuleBase::GlobalFunc::OUT(ofs_running,"POINT GROUP", this->pgname); + bool valid_group = this->pointgroup (this->nrot, this->pgnumber, this->pgname, this->gmatrix, ofs_running); + ModuleBase::GlobalFunc::OUT (ofs_running, "POINT GROUP", this->pgname); // output the space group - valid_group = this->pointgroup(this->nrotk, this->spgnumber, this->spgname, this->gmatrix, ofs_running); - ModuleBase::GlobalFunc::OUT(ofs_running, "POINT GROUP IN SPACE GROUP", this->spgname); + valid_group = this->pointgroup (this->nrotk, this->spgnumber, this->spgname, this->gmatrix, ofs_running); + ModuleBase::GlobalFunc::OUT (ofs_running, "POINT GROUP IN SPACE GROUP", this->spgname); //----------------------------- // 4. For the case where point group is not complete due to symmetry_prec //----------------------------- if (!valid_group) - { // select the operations that have the inverse - std::vectorinvmap(this->nrotk, -1); - this->gmatrix_invmap(this->gmatrix, this->nrotk, invmap.data()); - int nrotk_new = 0; - for (int isym = 0;isym < this->nrotk;++isym) - { - if (invmap[isym] != -1) - { - if(nrotk_new < isym) + { // select the operations that have the inverse + std::vector invmap (this->nrotk, -1); + this->gmatrix_invmap (this->gmatrix, this->nrotk, invmap.data ()); + int nrotk_new = 0; + for (int isym = 0; isym < this->nrotk; ++isym) { - this->gmatrix[nrotk_new] = this->gmatrix[isym]; - this->gtrans[nrotk_new] = this->gtrans[isym]; + if (invmap[isym] != -1) + { + if (nrotk_new < isym) + { + this->gmatrix[nrotk_new] = this->gmatrix[isym]; + this->gtrans[nrotk_new] = this->gtrans[isym]; + } + ++nrotk_new; + } } - ++nrotk_new; - } + this->nrotk = nrotk_new; } - this->nrotk = nrotk_new; - } // convert gmatrix to reciprocal space - this->gmatrix_convert_int(gmatrix, kgmatrix, nrotk, optlat, lat.G); - - // convert the symmetry operations from the basis of optimal symmetric configuration + this->gmatrix_convert_int (gmatrix, kgmatrix, nrotk, optlat, lat.G); + + // convert the symmetry operations from the basis of optimal symmetric configuration // to the basis of input configuration - this->gmatrix_convert_int(gmatrix, gmatrix, nrotk, optlat, latvec1); - this->gtrans_convert(gtrans, gtrans, nrotk, optlat, latvec1); + this->gmatrix_convert_int (gmatrix, gmatrix, nrotk, optlat, latvec1); + this->gtrans_convert (gtrans, gtrans, nrotk, optlat, latvec1); - this->set_atom_map(atoms); // find the atom mapping according to the symmetry operations + this->set_atom_map (atoms); // find the atom mapping according to the symmetry operations // Do this here for debug if (PARAM.inp.calculation == "relax") - { - this->all_mbl = this->is_all_movable(atoms, st); - if (!this->all_mbl) { - std::cout << "WARNING: Symmetry cannot be kept when not all atoms are movable.\n "; - std::cout << "Continue with symmetry=0 ... \n"; - ModuleSymmetry::Symmetry::symm_flag = 0; + this->all_mbl = this->is_all_movable (atoms, st); + if (!this->all_mbl) + { + std::cout << "WARNING: Symmetry cannot be kept when not all atoms are movable.\n "; + std::cout << "Continue with symmetry=0 ... \n"; + ModuleSymmetry::Symmetry::symm_flag = 0; + } } - } delete[] newpos; delete[] na; delete[] rotpos; delete[] index; delete[] istart; - ModuleBase::timer::end("Symmetry","analy_sys"); + ModuleBase::timer::end ("Symmetry", "analy_sys"); return; } - diff --git a/source/source_cell/module_symmetry/symm_check.cpp b/source/source_cell/module_symmetry/symm_check.cpp index bd51cf9c546..52c2417693e 100644 --- a/source/source_cell/module_symmetry/symm_check.cpp +++ b/source/source_cell/module_symmetry/symm_check.cpp @@ -1,161 +1,155 @@ #include "symmetry.h" using namespace ModuleSymmetry; -bool Symmetry::checksym(const ModuleBase::Matrix3 &s, - ModuleBase::Vector3& gtrans, - double* pos, double* rotpos, int* index, - const int ntype, const int itmin_type, const int itmin_start, - int* istart, int* na)const +bool + Symmetry::checksym (const ModuleBase::Matrix3& s, + ModuleBase::Vector3& gtrans, + double* pos, + double* rotpos, + int* index, + const int ntype, + const int itmin_type, + const int itmin_start, + int* istart, + int* na) const { - //---------------------------------------------- - // checks whether a point group symmetry element - // is a valid symmetry operation on a supercell - //---------------------------------------------- + //---------------------------------------------- + // checks whether a point group symmetry element + // is a valid symmetry operation on a supercell + //---------------------------------------------- // the start atom index. bool no_diff = false; - ModuleBase::Vector3 trans(2.0, 2.0, 2.0); + ModuleBase::Vector3 trans (2.0, 2.0, 2.0); bool s_flag = false; for (int it = 0; it < ntype; it++) - { - //------------------------------------ - // impose periodic boundary condition - // 0.5 -> -0.5 - //------------------------------------ - for (int j = istart[it]; j < istart[it] + na[it]; ++j) { - this->check_boundary(pos[j*3+0]); - this->check_boundary(pos[j*3+1]); - this->check_boundary(pos[j*3+2]); - } - //order original atomic positions for current species - this->atom_ordering_new(pos + istart[it] * 3, na[it], index + istart[it]); - - //Rotate atoms of current species - for (int j = istart[it]; j < istart[it] + na[it]; ++j) - { - const int xx=j*3; - const int yy=j*3+1; - const int zz=j*3+2; + //------------------------------------ + // impose periodic boundary condition + // 0.5 -> -0.5 + //------------------------------------ + for (int j = istart[it]; j < istart[it] + na[it]; ++j) + { + this->check_boundary (pos[j * 3 + 0]); + this->check_boundary (pos[j * 3 + 1]); + this->check_boundary (pos[j * 3 + 2]); + } + // order original atomic positions for current species + this->atom_ordering_new (pos + istart[it] * 3, na[it], index + istart[it]); + // Rotate atoms of current species + for (int j = istart[it]; j < istart[it] + na[it]; ++j) + { + const int xx = j * 3; + const int yy = j * 3 + 1; + const int zz = j * 3 + 2; - rotpos[xx] = pos[xx] * s.e11 - + pos[yy] * s.e21 - + pos[zz] * s.e31; + rotpos[xx] = pos[xx] * s.e11 + pos[yy] * s.e21 + pos[zz] * s.e31; - rotpos[yy] = pos[xx] * s.e12 - + pos[yy] * s.e22 - + pos[zz] * s.e32; + rotpos[yy] = pos[xx] * s.e12 + pos[yy] * s.e22 + pos[zz] * s.e32; - rotpos[zz] = pos[xx] * s.e13 - + pos[yy] * s.e23 - + pos[zz] * s.e33; + rotpos[zz] = pos[xx] * s.e13 + pos[yy] * s.e23 + pos[zz] * s.e33; - rotpos[xx] = fmod(rotpos[xx] + 100.5,1) - 0.5; - rotpos[yy] = fmod(rotpos[yy] + 100.5,1) - 0.5; - rotpos[zz] = fmod(rotpos[zz] + 100.5,1) - 0.5; - this->check_boundary(rotpos[xx]); - this->check_boundary(rotpos[yy]); - this->check_boundary(rotpos[zz]); + rotpos[xx] = fmod (rotpos[xx] + 100.5, 1) - 0.5; + rotpos[yy] = fmod (rotpos[yy] + 100.5, 1) - 0.5; + rotpos[zz] = fmod (rotpos[zz] + 100.5, 1) - 0.5; + this->check_boundary (rotpos[xx]); + this->check_boundary (rotpos[yy]); + this->check_boundary (rotpos[zz]); + } + // order rotated atomic positions for current species + this->atom_ordering_new (rotpos + istart[it] * 3, na[it], index + istart[it]); } - //order rotated atomic positions for current species - this->atom_ordering_new(rotpos + istart[it] * 3, na[it], index + istart[it]); - } ModuleBase::Vector3 diff; - //--------------------------------------------------------- + //--------------------------------------------------------- // itmin_start = the start atom positions of species itmin - //--------------------------------------------------------- + //--------------------------------------------------------- // (s)tart (p)osition of atom (t)ype which has (min)inal number. - ModuleBase::Vector3 sptmin(rotpos[itmin_start * 3], rotpos[itmin_start * 3 + 1], rotpos[itmin_start * 3 + 2]); + ModuleBase::Vector3 sptmin (rotpos[itmin_start * 3], + rotpos[itmin_start * 3 + 1], + rotpos[itmin_start * 3 + 2]); for (int i = itmin_start; i < itmin_start + na[itmin_type]; ++i) - { - //set up the current test std::vector "gtrans" - //and "gtrans" could possibly contain trivial translations: - gtrans.x = this->get_translation_vector( sptmin.x, pos[i*3+0]); - gtrans.y = this->get_translation_vector( sptmin.y, pos[i*3+1]); - gtrans.z = this->get_translation_vector( sptmin.z, pos[i*3+2]); - - //If we had already detected some translation, - //we must only look at the vectors with coordinates smaller than those - //of the previously detected std::vector (find the smallest) - if (gtrans.x > trans.x + epsilon || - gtrans.y > trans.y + epsilon || - gtrans.z > trans.z + epsilon - ) { - continue; - } + // set up the current test std::vector "gtrans" + // and "gtrans" could possibly contain trivial translations: + gtrans.x = this->get_translation_vector (sptmin.x, pos[i * 3 + 0]); + gtrans.y = this->get_translation_vector (sptmin.y, pos[i * 3 + 1]); + gtrans.z = this->get_translation_vector (sptmin.z, pos[i * 3 + 2]); + + // If we had already detected some translation, + // we must only look at the vectors with coordinates smaller than those + // of the previously detected std::vector (find the smallest) + if (gtrans.x > trans.x + epsilon || gtrans.y > trans.y + epsilon || gtrans.z > trans.z + epsilon) + { + continue; + } - //translate all the atomic coordinates BACK by "gtrans" - for (int it = 0; it < ntype; it++) - { - for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) - { - this->check_translation( rotpos[ia*3+0], gtrans.x ); - this->check_translation( rotpos[ia*3+1], gtrans.y ); - this->check_translation( rotpos[ia*3+2], gtrans.z ); - - this->check_boundary( rotpos[ia*3+0] ); - this->check_boundary( rotpos[ia*3+1] ); - this->check_boundary( rotpos[ia*3+2] ); - } - //order translated atomic positions for current species - this->atom_ordering_new(rotpos + istart[it] * 3, na[it], index + istart[it]); - } + // translate all the atomic coordinates BACK by "gtrans" + for (int it = 0; it < ntype; it++) + { + for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) + { + this->check_translation (rotpos[ia * 3 + 0], gtrans.x); + this->check_translation (rotpos[ia * 3 + 1], gtrans.y); + this->check_translation (rotpos[ia * 3 + 2], gtrans.z); + + this->check_boundary (rotpos[ia * 3 + 0]); + this->check_boundary (rotpos[ia * 3 + 1]); + this->check_boundary (rotpos[ia * 3 + 2]); + } + // order translated atomic positions for current species + this->atom_ordering_new (rotpos + istart[it] * 3, na[it], index + istart[it]); + } - no_diff = true; - //compare the two lattices 'one-by-one' whether they are identical - for (int it = 0; it < ntype; it++) - { - for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) - { - //take the difference of the rotated and the original coordinates - diff.x = this->check_diff( pos[ia*3+0], rotpos[ia*3+0]); - diff.y = this->check_diff( pos[ia*3+1], rotpos[ia*3+1]); - diff.z = this->check_diff( pos[ia*3+2], rotpos[ia*3+2]); - //only if all "diff" are zero vectors, flag will remain "1" - if ( no_diff == false|| - !equal(diff.x,0.0)|| - !equal(diff.y,0.0)|| - !equal(diff.z,0.0) - ) + no_diff = true; + // compare the two lattices 'one-by-one' whether they are identical + for (int it = 0; it < ntype; it++) { - no_diff = false; + for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) + { + // take the difference of the rotated and the original coordinates + diff.x = this->check_diff (pos[ia * 3 + 0], rotpos[ia * 3 + 0]); + diff.y = this->check_diff (pos[ia * 3 + 1], rotpos[ia * 3 + 1]); + diff.z = this->check_diff (pos[ia * 3 + 2], rotpos[ia * 3 + 2]); + // only if all "diff" are zero vectors, flag will remain "1" + if (no_diff == false || !equal (diff.x, 0.0) || !equal (diff.y, 0.0) + || !equal (diff.z, 0.0)) + { + no_diff = false; + } + } } - } - } - - //the current test is successful - if (no_diff == true) - { - s_flag = true; - //save the detected translation std::vector temporarily - trans.x = gtrans.x; - trans.y = gtrans.y; - trans.z = gtrans.z; - } - //restore the original rotated coordinates by subtracting "gtrans" - for (int it = 0; it < ntype; it++) - { - for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) - { - rotpos[ia*3+0] -= gtrans.x; - rotpos[ia*3+1] -= gtrans.y; - rotpos[ia*3+2] -= gtrans.z; - } + // the current test is successful + if (no_diff == true) + { + s_flag = true; + // save the detected translation std::vector temporarily + trans.x = gtrans.x; + trans.y = gtrans.y; + trans.z = gtrans.z; + } + + // restore the original rotated coordinates by subtracting "gtrans" + for (int it = 0; it < ntype; it++) + { + for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) + { + rotpos[ia * 3 + 0] -= gtrans.x; + rotpos[ia * 3 + 1] -= gtrans.y; + rotpos[ia * 3 + 2] -= gtrans.z; + } + } } - } if (s_flag == 1) - { - gtrans.x = trans.x; - gtrans.y = trans.y; - gtrans.z = trans.z; - } + { + gtrans.x = trans.x; + gtrans.y = trans.y; + gtrans.z = trans.z; + } return s_flag; } - diff --git a/source/source_cell/module_symmetry/symm_getgroup.cpp b/source/source_cell/module_symmetry/symm_getgroup.cpp index 7c7085e6ed3..6fc91ccd9be 100644 --- a/source/source_cell/module_symmetry/symm_getgroup.cpp +++ b/source/source_cell/module_symmetry/symm_getgroup.cpp @@ -1,108 +1,118 @@ #include "symmetry.h" using namespace ModuleSymmetry; -void Symmetry::getgroup(int& nrot, int& nrotk, std::ofstream& ofs_running, - const int& nop, const ModuleBase::Matrix3* symop, ModuleBase::Matrix3* gmatrix, - ModuleBase::Vector3* gtrans, double* pos, double* rotpos, - int* index, const int ntype, const int itmin_type, - const int itmin_start, int* istart, int* na)const +void + Symmetry::getgroup (int& nrot, + int& nrotk, + std::ofstream& ofs_running, + const int& nop, + const ModuleBase::Matrix3* symop, + ModuleBase::Matrix3* gmatrix, + ModuleBase::Vector3* gtrans, + double* pos, + double* rotpos, + int* index, + const int ntype, + const int itmin_type, + const int itmin_start, + int* istart, + int* na) const { - ModuleBase::TITLE("Symmetry", "getgroup"); + ModuleBase::TITLE ("Symmetry", "getgroup"); - //-------------------------------------------------------------------------------- - //return all possible space group operators that reproduce a lattice with basis - //out of a (maximum) pool of point group operations that is compatible with - //the symmetry of the pure translation lattice without any basic. - //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + // return all possible space group operators that reproduce a lattice with basis + // out of a (maximum) pool of point group operations that is compatible with + // the symmetry of the pure translation lattice without any basic. + //-------------------------------------------------------------------------------- - ModuleBase::Matrix3 zero(0,0,0,0,0,0,0,0,0); + ModuleBase::Matrix3 zero (0, 0, 0, 0, 0, 0, 0, 0, 0); ModuleBase::Matrix3 help[48]; ModuleBase::Vector3 temp[48]; nrot = 0; nrotk = 0; - //------------------------------------------------------------------------- - //pass through the pool of (possibly allowed) symmetry operations and - //check each operation whether it can reproduce the lattice with basis - //------------------------------------------------------------------------- - //std::cout << "nop = " < h(1, 1) - epsilon) - { - for(int r=0;r<3;++r) - { - h(r, 0) -= h(r, 1); - b(r, 0) -= b(r, 1); - } - } - while (h(1, 0) < -epsilon) - { - for(int r=0;r<3;++r) - { - h(r, 0) += h(r, 1); - b(r, 0) += b(r, 1); - } - } - for(int j=0;j<2;++j) - { - while (h(2, j) > h(2, 2) - epsilon) - { - for(int r=0;r<3;++r) - { - h(r, j) -= h(r, 2); - b(r, j) -= b(r, 2); - } + while (h (1, 0) > h (1, 1) - epsilon) + { + for (int r = 0; r < 3; ++r) + { + h (r, 0) -= h (r, 1); + b (r, 0) -= b (r, 1); + } } - while (h(2, j) < -epsilon) - { - for(int r=0;r<3;++r) - { - h(r, j) += h(r, 2); - b(r, j) += b(r, 2); - } + while (h (1, 0) < -epsilon) + { + for (int r = 0; r < 3; ++r) + { + h (r, 0) += h (r, 1); + b (r, 0) += b (r, 1); + } } - } - - //convert matrix to Matrix3 - h3.e11=h(0, 0); h3.e12=h(0, 1); h3.e13=h(0, 2); - h3.e21=h(1, 0); h3.e22=h(1, 1); h3.e23=h(1, 2); - h3.e31=h(2, 0); h3.e32=h(2, 1); h3.e33=h(2, 2); - b3.e11=b(0, 0); b3.e12=b(0, 1); b3.e13=b(0, 2); - b3.e21=b(1, 0); b3.e22=b(1, 1); b3.e23=b(1, 2); - b3.e31=b(2, 0); b3.e32=b(2, 1); b3.e33=b(2, 2); - - //check s*b=h - ModuleBase::matrix check_zeros = s3.to_matrix() * b - h; + for (int j = 0; j < 2; ++j) + { + while (h (2, j) > h (2, 2) - epsilon) + { + for (int r = 0; r < 3; ++r) + { + h (r, j) -= h (r, 2); + b (r, j) -= b (r, 2); + } + } + while (h (2, j) < -epsilon) + { + for (int r = 0; r < 3; ++r) + { + h (r, j) += h (r, 2); + b (r, j) += b (r, 2); + } + } + } + + // convert matrix to Matrix3 + h3.e11 = h (0, 0); + h3.e12 = h (0, 1); + h3.e13 = h (0, 2); + h3.e21 = h (1, 0); + h3.e22 = h (1, 1); + h3.e23 = h (1, 2); + h3.e31 = h (2, 0); + h3.e32 = h (2, 1); + h3.e33 = h (2, 2); + b3.e11 = b (0, 0); + b3.e12 = b (0, 1); + b3.e13 = b (0, 2); + b3.e21 = b (1, 0); + b3.e22 = b (1, 1); + b3.e23 = b (1, 2); + b3.e31 = b (2, 0); + b3.e32 = b (2, 1); + b3.e33 = b (2, 2); + + // check s*b=h + ModuleBase::matrix check_zeros = s3.to_matrix () * b - h; #ifdef __DEBUG - for (int i = 0;i < 3;++i) - { - for(int j=0;j<3;++j) - { - assert(near_equal(check_zeros(i, j), 0)); - } - } + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + assert (near_equal (check_zeros (i, j), 0)); + } + } #endif return; } diff --git a/source/source_cell/module_symmetry/symm_lattice.cpp b/source/source_cell/module_symmetry/symm_lattice.cpp index 5d2899c02bf..17a583b9faa 100644 --- a/source/source_cell/module_symmetry/symm_lattice.cpp +++ b/source/source_cell/module_symmetry/symm_lattice.cpp @@ -9,268 +9,257 @@ using namespace ModuleSymmetry; // 'origin' and 'transformed' lattice vectors will // be givin in matrix form //--------------------------------------------------- -int Symmetry::standard_lat( - ModuleBase::Vector3 &a, - ModuleBase::Vector3 &b, - ModuleBase::Vector3 &c, - double *cel_const) const +int + Symmetry::standard_lat (ModuleBase::Vector3& a, + ModuleBase::Vector3& b, + ModuleBase::Vector3& c, + double* cel_const) const { static bool first = true; // there are only 14 types of Bravais lattice. int type = 15; - //---------------------------------------------------- - // used to calculte the volume to judge whether - // the lattice vectors corrispond the right-hand-sense - //---------------------------------------------------- + //---------------------------------------------------- + // used to calculte the volume to judge whether + // the lattice vectors corrispond the right-hand-sense + //---------------------------------------------------- double volume = 0; - //the lattice vectors have not been changed + // the lattice vectors have not been changed const double aa = a * a; const double bb = b * b; const double cc = c * c; - const double ab = a * b; //std::vector: a * b * cos(alpha) - const double bc = b * c; //std::vector: b * c * cos(beta) - const double ca = c * a; //std::vector: c * a * cos(gamma) - double norm_a = a.norm(); - double norm_b = b.norm(); - double norm_c = c.norm(); - double gamma = ab /( norm_a * norm_b ); // cos(gamma) - double alpha = bc /( norm_b * norm_c ); // cos(alpha) - double beta = ca /( norm_a * norm_c ); // cos(beta) - double amb = sqrt( aa + bb - 2 * ab ); //amb = |a - b| - double bmc = sqrt( bb + cc - 2 * bc ); - double cma = sqrt( cc + aa - 2 * ca ); - double apb = sqrt( aa + bb + 2 * ab ); //amb = |a + b| - double bpc = sqrt( bb + cc + 2 * bc ); - double cpa = sqrt( cc + aa + 2 * ca ); - double apbmc = sqrt( aa + bb + cc + 2 * ab - 2 * bc - 2 * ca ); //apbmc = |a + b - c| - double bpcma = sqrt( bb + cc + aa + 2 * bc - 2 * ca - 2 * ab ); - double cpamb = sqrt( cc + aa + bb + 2 * ca - 2 * ab - 2 * bc ); + const double ab = a * b; // std::vector: a * b * cos(alpha) + const double bc = b * c; // std::vector: b * c * cos(beta) + const double ca = c * a; // std::vector: c * a * cos(gamma) + double norm_a = a.norm (); + double norm_b = b.norm (); + double norm_c = c.norm (); + double gamma = ab / (norm_a * norm_b); // cos(gamma) + double alpha = bc / (norm_b * norm_c); // cos(alpha) + double beta = ca / (norm_a * norm_c); // cos(beta) + double amb = sqrt (aa + bb - 2 * ab); // amb = |a - b| + double bmc = sqrt (bb + cc - 2 * bc); + double cma = sqrt (cc + aa - 2 * ca); + double apb = sqrt (aa + bb + 2 * ab); // amb = |a + b| + double bpc = sqrt (bb + cc + 2 * bc); + double cpa = sqrt (cc + aa + 2 * ca); + double apbmc = sqrt (aa + bb + cc + 2 * ab - 2 * bc - 2 * ca); // apbmc = |a + b - c| + double bpcma = sqrt (bb + cc + aa + 2 * bc - 2 * ca - 2 * ab); + double cpamb = sqrt (cc + aa + bb + 2 * ca - 2 * ab - 2 * bc); double abc = ab + bc + ca; - if (first) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"NORM_A",norm_a); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"NORM_B",norm_b); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"NORM_C",norm_c); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"ALPHA (DEGREE)", acos(alpha)/ModuleBase::PI*180.0 ); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"BETA (DEGREE)" ,acos(beta)/ModuleBase::PI*180.0 ); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"GAMMA (DEGREE)" ,acos(gamma)/ModuleBase::PI*180.0 ); - first = false; - } + if (first) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "NORM_A", norm_a); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "NORM_B", norm_b); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "NORM_C", norm_c); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "ALPHA (DEGREE)", acos (alpha) / ModuleBase::PI * 180.0); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "BETA (DEGREE)", acos (beta) / ModuleBase::PI * 180.0); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "GAMMA (DEGREE)", acos (gamma) / ModuleBase::PI * 180.0); + first = false; + } - Symm_Other::right_hand_sense(a, b, c); - ModuleBase::GlobalFunc::ZEROS(cel_const, 6); - const double small = PARAM.inp.symmetry_prec; + Symm_Other::right_hand_sense (a, b, c); + ModuleBase::GlobalFunc::ZEROS (cel_const, 6); + const double small = PARAM.inp.symmetry_prec; - //--------------------------- - // 1. alpha == beta == gamma - //--------------------------- - if( equal(alpha, gamma) && equal(alpha, beta) ) - { - //-------------- - // a == b == c - //-------------- - if( equal(norm_a, norm_b) && equal(norm_b, norm_c)) - { - //--------------------------------------- - // alpha == beta == gamma == 90 degree - //--------------------------------------- - if ( equal(alpha,0.0) ) - { - type=1; - cel_const[0]=norm_a; - } - //---------------------------------------- - // cos(alpha) = -1.0/3.0 - //---------------------------------------- - else if( equal(alpha, -1.0/3.0) ) - { - type=2; - cel_const[0]=norm_a*2.0/sqrt(3.0); - } - //---------------------------------------- - // cos(alpha) = 0.5 - //---------------------------------------- - else if( equal(alpha, 0.5) ) - { - type=3; - cel_const[0]=norm_a*sqrt(2.0); - } - //---------------------------------------- - // cos(alpha) = all the others - //---------------------------------------- - else - { - type=7; - cel_const[0]=norm_a; - cel_const[3]=alpha; - } - } - // Crystal classes with inequal length of lattice vectors but also with - // A1*A2=A1*A3=A2*A3: - // Orthogonal axes: - else if(equal(gamma,0.0)) - { - // Two axes with equal lengths means simple tetragonal: (IBRAV=5) - // Adjustment: 'c-axis' shall be the special axis. - if (equal(norm_a, norm_b)) - { - type=5; - cel_const[0]=norm_a; - cel_const[2]=norm_c/norm_a; - // No axes with equal lengths means simple orthorhombic (IBRAV=8): - // Adjustment: Sort the axis by increasing lengths: - } - else if(((norm_c-norm_b)>small) && ((norm_b-norm_a)>small) ) - { - type=8; - cel_const[0]=norm_a; - cel_const[1]=norm_b/norm_a; - cel_const[2]=norm_c/norm_a; - } - // Crystal classes with A1*A3=A2*A3=/A1*A2: - } - }//end alpha=beta=gamma - //----------------------- - // TWO EQUAL ANGLES - // alpha == beta != gamma (gamma is special) - //------------------------ - else if (equal(alpha-beta, 0)) - { - //--------------------------------------------------------- - // alpha = beta = 90 degree - // One axis orthogonal with respect to the other two axes: - //--------------------------------------------------------- - if (equal(alpha, 0.0)) - { - //----------------------------------------------- - // a == b - // Equal length of the two nonorthogonal axes: - //----------------------------------------------- - if (equal(norm_a, norm_b)) - { - // Cosine(alpha) equal to -1/2 means hexagonal: (IBRAV=4) - // Adjustment: 'c-axis' shall be the special axis. - if ( equal(gamma, -0.5)) //gamma = 120 degree - { - type=4; - cel_const[0]=norm_a; - cel_const[2]=norm_c/norm_a; - // Other angles mean base-centered orthorhombic: (IBRAV=11) - // Adjustment: Cosine between A1 and A2 shall be lower than zero, the - // 'c-axis' shall be the special axis. - } - else if(gamma<(-1.0*small)) //gamma > 90 degree - { - type=11; - cel_const[0]=apb; - cel_const[1]=amb/apb; - cel_const[2]=norm_c/apb; - cel_const[5]=gamma; - } - // Different length of the two axes means simple monoclinic (IBRAV=12): - // Adjustment: Cosine(gamma) should be lower than zero, special axis - // shall be the 'b-axis'(!!!) and |A1|<|A3|: - } - //---------- - // a!=b!=c - //---------- - else if( gamma<(-1.0*small) && (norm_a-norm_b)>small) - { - type=12; - cel_const[0]=norm_b; - cel_const[1]=norm_c/norm_b; - cel_const[2]=norm_a/norm_b; - cel_const[4]=gamma; - //adjust: a->c, b->a, c->b - ModuleBase::Vector3 tmp=c; - c=a; - a=b; - b=tmp; - } - }//end gamma) - cel_const[4]=(a+b)*c/apb/norm_c; - } - } - } //end alpha==beta - //------------------------------- - // three angles are not equal - //------------------------------- - else - { - // Crystal classes with A1*A2=/A1*A3=/A2*A3 - // |A1|=|A2|=|A3| means body-centered orthorhombic (IBRAV=9): - // Further additional criterions are: (A1+A2), (A1+A3) and (A2+A3) are - // orthogonal to one another and (adjustment//): |A1+A2|>|A1+A3|>|A2+A3| - if (equal(norm_a, norm_b) && - equal(norm_b, norm_c) && - ((cpa-bpc)>small) && - ((apb-cpa)>small) && - equal(norm_c*norm_c+abc, 0)) - { - type=9; - cel_const[0]=bpc; - cel_const[1]=cpa/bpc; - cel_const[2]=apb/bpc; - } - // |A1|=|A2-A3| and |A2|=|A1-A3| and |A3|=|A1-A2| means face-centered - // orthorhombic (IBRAV=10): - // Adjustment: |A1+A2-A3|>|A1+A3-A2|>|A2+A3-A1| - else if(equal(amb, norm_c) && - equal(cma, norm_b) && - equal(bmc, norm_a) && - ((apbmc-cpamb)>small) && - ((cpamb-bpcma)>small)) - { - type=10; - cel_const[0]=bpcma; - cel_const[1]=cpamb/bpcma; - cel_const[2]=apbmc/bpcma; - } - // Now there exists only one further possibility - triclinic (IBRAV=14): - // Adjustment: All three cosines shall be greater than zero and ordered: - else if((gamma>beta) && (beta>alpha) && (alpha>small)) - { - type=14; - cel_const[0]=norm_a; - cel_const[1]=norm_b/norm_a; - cel_const[2]=norm_c/norm_a; - cel_const[3]=alpha; - cel_const[4]=beta; - cel_const[5]=gamma; - } - } - - return type; + //--------------------------- + // 1. alpha == beta == gamma + //--------------------------- + if (equal (alpha, gamma) && equal (alpha, beta)) + { + //-------------- + // a == b == c + //-------------- + if (equal (norm_a, norm_b) && equal (norm_b, norm_c)) + { + //--------------------------------------- + // alpha == beta == gamma == 90 degree + //--------------------------------------- + if (equal (alpha, 0.0)) + { + type = 1; + cel_const[0] = norm_a; + } + //---------------------------------------- + // cos(alpha) = -1.0/3.0 + //---------------------------------------- + else if (equal (alpha, -1.0 / 3.0)) + { + type = 2; + cel_const[0] = norm_a * 2.0 / sqrt (3.0); + } + //---------------------------------------- + // cos(alpha) = 0.5 + //---------------------------------------- + else if (equal (alpha, 0.5)) + { + type = 3; + cel_const[0] = norm_a * sqrt (2.0); + } + //---------------------------------------- + // cos(alpha) = all the others + //---------------------------------------- + else + { + type = 7; + cel_const[0] = norm_a; + cel_const[3] = alpha; + } + } + // Crystal classes with inequal length of lattice vectors but also with + // A1*A2=A1*A3=A2*A3: + // Orthogonal axes: + else if (equal (gamma, 0.0)) + { + // Two axes with equal lengths means simple tetragonal: (IBRAV=5) + // Adjustment: 'c-axis' shall be the special axis. + if (equal (norm_a, norm_b)) + { + type = 5; + cel_const[0] = norm_a; + cel_const[2] = norm_c / norm_a; + // No axes with equal lengths means simple orthorhombic (IBRAV=8): + // Adjustment: Sort the axis by increasing lengths: + } + else if (((norm_c - norm_b) > small) && ((norm_b - norm_a) > small)) + { + type = 8; + cel_const[0] = norm_a; + cel_const[1] = norm_b / norm_a; + cel_const[2] = norm_c / norm_a; + } + // Crystal classes with A1*A3=A2*A3=/A1*A2: + } + } // end alpha=beta=gamma + //----------------------- + // TWO EQUAL ANGLES + // alpha == beta != gamma (gamma is special) + //------------------------ + else if (equal (alpha - beta, 0)) + { + //--------------------------------------------------------- + // alpha = beta = 90 degree + // One axis orthogonal with respect to the other two axes: + //--------------------------------------------------------- + if (equal (alpha, 0.0)) + { + //----------------------------------------------- + // a == b + // Equal length of the two nonorthogonal axes: + //----------------------------------------------- + if (equal (norm_a, norm_b)) + { + // Cosine(alpha) equal to -1/2 means hexagonal: (IBRAV=4) + // Adjustment: 'c-axis' shall be the special axis. + if (equal (gamma, -0.5)) // gamma = 120 degree + { + type = 4; + cel_const[0] = norm_a; + cel_const[2] = norm_c / norm_a; + // Other angles mean base-centered orthorhombic: (IBRAV=11) + // Adjustment: Cosine between A1 and A2 shall be lower than zero, the + // 'c-axis' shall be the special axis. + } + else if (gamma < (-1.0 * small)) // gamma > 90 degree + { + type = 11; + cel_const[0] = apb; + cel_const[1] = amb / apb; + cel_const[2] = norm_c / apb; + cel_const[5] = gamma; + } + // Different length of the two axes means simple monoclinic (IBRAV=12): + // Adjustment: Cosine(gamma) should be lower than zero, special axis + // shall be the 'b-axis'(!!!) and |A1|<|A3|: + } + //---------- + // a!=b!=c + //---------- + else if (gamma < (-1.0 * small) && (norm_a - norm_b) > small) + { + type = 12; + cel_const[0] = norm_b; + cel_const[1] = norm_c / norm_b; + cel_const[2] = norm_a / norm_b; + cel_const[4] = gamma; + // adjust: a->c, b->a, c->b + ModuleBase::Vector3 tmp = c; + c = a; + a = b; + b = tmp; + } + } // end gamma) + cel_const[4] = (a + b) * c / apb / norm_c; + } + } + } // end alpha==beta + //------------------------------- + // three angles are not equal + //------------------------------- + else + { + // Crystal classes with A1*A2=/A1*A3=/A2*A3 + // |A1|=|A2|=|A3| means body-centered orthorhombic (IBRAV=9): + // Further additional criterions are: (A1+A2), (A1+A3) and (A2+A3) are + // orthogonal to one another and (adjustment//): |A1+A2|>|A1+A3|>|A2+A3| + if (equal (norm_a, norm_b) && equal (norm_b, norm_c) && ((cpa - bpc) > small) && ((apb - cpa) > small) + && equal (norm_c * norm_c + abc, 0)) + { + type = 9; + cel_const[0] = bpc; + cel_const[1] = cpa / bpc; + cel_const[2] = apb / bpc; + } + // |A1|=|A2-A3| and |A2|=|A1-A3| and |A3|=|A1-A2| means face-centered + // orthorhombic (IBRAV=10): + // Adjustment: |A1+A2-A3|>|A1+A3-A2|>|A2+A3-A1| + else if (equal (amb, norm_c) && equal (cma, norm_b) && equal (bmc, norm_a) && ((apbmc - cpamb) > small) + && ((cpamb - bpcma) > small)) + { + type = 10; + cel_const[0] = bpcma; + cel_const[1] = cpamb / bpcma; + cel_const[2] = apbmc / bpcma; + } + // Now there exists only one further possibility - triclinic (IBRAV=14): + // Adjustment: All three cosines shall be greater than zero and ordered: + else if ((gamma > beta) && (beta > alpha) && (alpha > small)) + { + type = 14; + cel_const[0] = norm_a; + cel_const[1] = norm_b / norm_a; + cel_const[2] = norm_c / norm_a; + cel_const[3] = alpha; + cel_const[4] = beta; + cel_const[5] = gamma; + } + } + + return type; } //--------------------------------------------------- @@ -281,155 +270,174 @@ int Symmetry::standard_lat( // must be called before symmetry analysis // only need to called once for each ion step //--------------------------------------------------- -void Symmetry::lattice_type( - ModuleBase::Vector3 &v1, - ModuleBase::Vector3 &v2, - ModuleBase::Vector3 &v3, - ModuleBase::Vector3 &v01, - ModuleBase::Vector3 &v02, - ModuleBase::Vector3 &v03, - double *cel_const, - double *pre_const, - int& real_brav, - std::string& bravname, - const Atom* atoms, - bool convert_atoms, - double* newpos)const +void + Symmetry::lattice_type (ModuleBase::Vector3& v1, + ModuleBase::Vector3& v2, + ModuleBase::Vector3& v3, + ModuleBase::Vector3& v01, + ModuleBase::Vector3& v02, + ModuleBase::Vector3& v03, + double* cel_const, + double* pre_const, + int& real_brav, + std::string& bravname, + const Atom* atoms, + bool convert_atoms, + double* newpos) const { - ModuleBase::TITLE("Symmetry","lattice_type"); + ModuleBase::TITLE ("Symmetry", "lattice_type"); - //---------------------------------------------- - // (1) adjustement of the basis to right hand - // sense by inversion of all three lattice - // vectors if necessary - //---------------------------------------------- - const bool right = Symm_Other::right_hand_sense(v1, v2, v3); + //---------------------------------------------- + // (1) adjustement of the basis to right hand + // sense by inversion of all three lattice + // vectors if necessary + //---------------------------------------------- + const bool right = Symm_Other::right_hand_sense (v1, v2, v3); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Right-handed lattice",right); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Right-handed lattice", right); - //------------------------------------------------- - // (2) save and copy the original lattice vectors. - //------------------------------------------------- + //------------------------------------------------- + // (2) save and copy the original lattice vectors. + //------------------------------------------------- v01 = v1; v02 = v2; v03 = v3; - - //-------------------------------------------- - // (3) calculate the 'pre_const' - //-------------------------------------------- - ModuleBase::GlobalFunc::ZEROS(pre_const, 6); - int pre_brav = standard_lat(v1, v2, v3, cel_const); + //-------------------------------------------- + // (3) calculate the 'pre_const' + //-------------------------------------------- + ModuleBase::GlobalFunc::ZEROS (pre_const, 6); - for ( int i = 0; i < 6; ++i) - { - pre_const[i] = cel_const[i]; - } + int pre_brav = standard_lat (v1, v2, v3, cel_const); + + for (int i = 0; i < 6; ++i) + { + pre_const[i] = cel_const[i]; + } // find the shortest basis vectors of the lattice - this->get_shortest_latvec(v1, v2, v3); + this->get_shortest_latvec (v1, v2, v3); - Symm_Other::right_hand_sense(v1, v2, v3); + Symm_Other::right_hand_sense (v1, v2, v3); real_brav = 15; double temp_const[6]; - //then we should find the best lattice vectors to make much easier the determination of the lattice symmetry - //the method is to contrast the combination of the shortest vectors and determine their symmmetry + // then we should find the best lattice vectors to make much easier the determination of the lattice symmetry + // the method is to contrast the combination of the shortest vectors and determine their symmmetry ModuleBase::Vector3 w1, w2, w3; ModuleBase::Vector3 q1, q2, q3; - this->get_optlat(v1, v2, v3, w1, w2, w3, real_brav, cel_const, temp_const); + this->get_optlat (v1, v2, v3, w1, w2, w3, real_brav, cel_const, temp_const); - //now, the highest symmetry of the combination of the shortest vectors has been found - //then we compare it with the original symmetry - - bool change_flag=false; - for (int i = 0; i < 6; ++i) { - if(!equal(cel_const[i], pre_const[i])) - {change_flag=true; break; - } - } + // now, the highest symmetry of the combination of the shortest vectors has been found + // then we compare it with the original symmetry - if ( real_brav < pre_brav || change_flag ) - { - //if the symmetry of the new vectors is higher, store the new ones - for (int i = 0; i < 6; ++i) + bool change_flag = false; + for (int i = 0; i < 6; ++i) { - cel_const[i] = temp_const[i]; + if (!equal (cel_const[i], pre_const[i])) + { + change_flag = true; + break; + } } - q1 = w1; - q2 = w2; - q3 = w3; - if(convert_atoms) + + if (real_brav < pre_brav || change_flag) { - GlobalV::ofs_running <ntype; ++it) - { - for (int ia = 0; ia < this->na[it]; ++ia) - { - ModuleBase::Mathzone::Cartesian_to_Direct(atoms[it].tau[ia].x, - atoms[it].tau[ia].y, - atoms[it].tau[ia].z, - q1.x, q1.y, q1.z, - q2.x, q2.y, q2.z, - q3.x, q3.y, q3.z, - newpos[3*at],newpos[3*at+1],newpos[3*at+2]); + // if the symmetry of the new vectors is higher, store the new ones + for (int i = 0; i < 6; ++i) + { + cel_const[i] = temp_const[i]; + } + q1 = w1; + q2 = w2; + q3 = w3; + if (convert_atoms) + { + GlobalV::ofs_running << std::endl; + GlobalV::ofs_running << " The lattice vectors have been changed (STRU_SIMPLE.cif)" << std::endl; + GlobalV::ofs_running << std::endl; + int at = 0; + for (int it = 0; it < this->ntype; ++it) + { + for (int ia = 0; ia < this->na[it]; ++ia) + { + ModuleBase::Mathzone::Cartesian_to_Direct (atoms[it].tau[ia].x, + atoms[it].tau[ia].y, + atoms[it].tau[ia].z, + q1.x, + q1.y, + q1.z, + q2.x, + q2.y, + q2.z, + q3.x, + q3.y, + q3.z, + newpos[3 * at], + newpos[3 * at + 1], + newpos[3 * at + 2]); - for(int k=0; k<3; ++k) - { - this->check_translation( newpos[at*3+k], -floor(newpos[at*3+k])); - this->check_boundary( newpos[at*3+k] ); - } - ++at; - } - } + for (int k = 0; k < 3; ++k) + { + this->check_translation (newpos[at * 3 + k], -floor (newpos[at * 3 + k])); + this->check_boundary (newpos[at * 3 + k]); + } + ++at; + } + } + } + // return the optimized lattice in v1, v2, v3 + v1 = q1; + v2 = q2; + v3 = q3; } - // return the optimized lattice in v1, v2, v3 - v1=q1; - v2=q2; - v3=q3; - } else - { - //else, store the original ones - for (int i = 0; i < 6; ++i) { - cel_const[i] = pre_const[i]; - } - //newpos also need to be set - if(convert_atoms) - { - int at=0; - for (int it = 0; it < this->ntype; ++it) - { - for (int ia = 0; ia < this->na[it]; ++ia) + // else, store the original ones + for (int i = 0; i < 6; ++i) { - ModuleBase::Mathzone::Cartesian_to_Direct(atoms[it].tau[ia].x, - atoms[it].tau[ia].y, - atoms[it].tau[ia].z, - v1.x, v1.y, v1.z, - v2.x, v2.y, v2.z, - v3.x, v3.y, v3.z, - newpos[3*at],newpos[3*at+1],newpos[3*at+2]); - for(int k=0; k<3; ++k) - { - this->check_translation( newpos[at*3+k], -floor(newpos[at*3+k])); - this->check_boundary( newpos[at*3+k] ); - } - ++at; + cel_const[i] = pre_const[i]; + } + // newpos also need to be set + if (convert_atoms) + { + int at = 0; + for (int it = 0; it < this->ntype; ++it) + { + for (int ia = 0; ia < this->na[it]; ++ia) + { + ModuleBase::Mathzone::Cartesian_to_Direct (atoms[it].tau[ia].x, + atoms[it].tau[ia].y, + atoms[it].tau[ia].z, + v1.x, + v1.y, + v1.z, + v2.x, + v2.y, + v2.z, + v3.x, + v3.y, + v3.z, + newpos[3 * at], + newpos[3 * at + 1], + newpos[3 * at + 2]); + for (int k = 0; k < 3; ++k) + { + this->check_translation (newpos[at * 3 + k], -floor (newpos[at * 3 + k])); + this->check_boundary (newpos[at * 3 + k]); + } + ++at; + } + } } - } } - } /* bool flag3; - if (pre_brav == temp_brav) - { + if (pre_brav == temp_brav) + { flag3 = 0; if (!equal(temp_const[0], pre_const[0]) || !equal(temp_const[1], pre_const[1]) || @@ -447,10 +455,10 @@ void Symmetry::lattice_type( v1 = s1; v2 = s2; v3 = s3; - change=0; - GlobalV::ofs_running<<" The lattice vectors have been set back!"< -void Symmetry::analyze_magnetic_group(const Atom* atoms, const Statistics& st, int& nrot_out, int& nrotk_out) +void + Symmetry::analyze_magnetic_group (const Atom* atoms, const Statistics& st, int& nrot_out, int& nrotk_out) { // 1. classify atoms with different magmom // (use symmetry_prec to judge if two magmoms are the same) std::vector> mag_type_atoms; - for (int it = 0;it < ntype;++it) - { - for (int ia = 0; ia < atoms[it].na; ++ia) + for (int it = 0; it < ntype; ++it) { - bool find = false; - for (auto& mt : mag_type_atoms) - { - const int mag_iat = *mt.begin(); - const int mag_it = st.iat2it[mag_iat]; - const int mag_ia = st.iat2ia[mag_iat]; - if (it == mag_it && this->equal(atoms[it].mag[ia], atoms[mag_it].mag[mag_ia])) + for (int ia = 0; ia < atoms[it].na; ++ia) { - mt.insert(st.itia2iat(it, ia)); - find = true; - break; + bool find = false; + for (auto& mt: mag_type_atoms) + { + const int mag_iat = *mt.begin (); + const int mag_it = st.iat2it[mag_iat]; + const int mag_ia = st.iat2ia[mag_iat]; + if (it == mag_it && this->equal (atoms[it].mag[ia], atoms[mag_it].mag[mag_ia])) + { + mt.insert (st.itia2iat (it, ia)); + find = true; + break; + } + } + if (!find) + { + mag_type_atoms.push_back (std::set ({st.itia2iat (it, ia)})); + } } - } - if (!find) - { - mag_type_atoms.push_back(std::set({ st.itia2iat(it,ia) })); - } } - } // 2. get the start index, number of atoms and positions for each mag_type - std::vector mag_istart(mag_type_atoms.size()); - std::vector mag_na(mag_type_atoms.size()); + std::vector mag_istart (mag_type_atoms.size ()); + std::vector mag_na (mag_type_atoms.size ()); std::vector mag_pos; - int mag_itmin_type = 0; - int mag_itmin_start = 0; - for (int mag_it = 0;mag_it < mag_type_atoms.size(); ++mag_it) - { - mag_na[mag_it] = mag_type_atoms.at(mag_it).size(); - if (mag_it > 0) - { - mag_istart[mag_it] = mag_istart[mag_it - 1] + mag_na[mag_it - 1]; - } - if (mag_na[mag_it] < mag_na[itmin_type]) - { - mag_itmin_type = mag_it; - mag_itmin_start = mag_istart[mag_it]; - } - for (auto& mag_iat : mag_type_atoms.at(mag_it)) - { - // this->newpos have been ordered by original structure(ntype, na), it cannot be directly used here. - // we need to reset the calculate again the coordinate of the new structure. - const ModuleBase::Vector3 direct_tmp = atoms[st.iat2it[mag_iat]].tau[st.iat2ia[mag_iat]] * this->optlat.Inverse(); - std::array direct = { direct_tmp.x, direct_tmp.y, direct_tmp.z }; - for (int i = 0; i < 3; ++i) - { - this->check_translation(direct[i], -floor(direct[i])); - this->check_boundary(direct[i]); - mag_pos.push_back(direct[i]); - } - } - } - - // 3. analyze the effective structure - this->getgroup(nrot_out, nrotk_out, GlobalV::ofs_running, - this->nop, this->symop, this->gmatrix, - this->gtrans, mag_pos.data(), this->rotpos, - this->index, mag_type_atoms.size(), mag_itmin_type, - mag_itmin_start, mag_istart.data(), mag_na.data()); + int mag_itmin_type = 0; + int mag_itmin_start = 0; + for (int mag_it = 0; mag_it < mag_type_atoms.size (); ++mag_it) + { + mag_na[mag_it] = mag_type_atoms.at (mag_it).size (); + if (mag_it > 0) + { + mag_istart[mag_it] = mag_istart[mag_it - 1] + mag_na[mag_it - 1]; + } + if (mag_na[mag_it] < mag_na[itmin_type]) + { + mag_itmin_type = mag_it; + mag_itmin_start = mag_istart[mag_it]; + } + for (auto& mag_iat: mag_type_atoms.at (mag_it)) + { + // this->newpos have been ordered by original structure(ntype, na), it cannot be directly used here. + // we need to reset the calculate again the coordinate of the new structure. + const ModuleBase::Vector3 direct_tmp + = atoms[st.iat2it[mag_iat]].tau[st.iat2ia[mag_iat]] * this->optlat.Inverse (); + std::array direct = {direct_tmp.x, direct_tmp.y, direct_tmp.z}; + for (int i = 0; i < 3; ++i) + { + this->check_translation (direct[i], -floor (direct[i])); + this->check_boundary (direct[i]); + mag_pos.push_back (direct[i]); + } + } + } + // 3. analyze the effective structure + this->getgroup (nrot_out, + nrotk_out, + GlobalV::ofs_running, + this->nop, + this->symop, + this->gmatrix, + this->gtrans, + mag_pos.data (), + this->rotpos, + this->index, + mag_type_atoms.size (), + mag_itmin_type, + mag_itmin_start, + mag_istart.data (), + mag_na.data ()); } -bool Symmetry::magmom_same_check(const Atom* atoms)const +bool + Symmetry::magmom_same_check (const Atom* atoms) const { - ModuleBase::TITLE("Symmetry", "magmom_same_check"); + ModuleBase::TITLE ("Symmetry", "magmom_same_check"); bool pricell_loop = true; - for (int it = 0;it < ntype;++it) - { - if (pricell_loop) { - for (int ia = 1;ia < atoms[it].na;++ia) - { - if (!equal(atoms[it].m_loc_[ia].x, atoms[it].m_loc_[0].x) || - !equal(atoms[it].m_loc_[ia].y, atoms[it].m_loc_[0].y) || - !equal(atoms[it].m_loc_[ia].z, atoms[it].m_loc_[0].z)) + for (int it = 0; it < ntype; ++it) + { + if (pricell_loop) { - pricell_loop = false; - break; + for (int ia = 1; ia < atoms[it].na; ++ia) + { + if (!equal (atoms[it].m_loc_[ia].x, atoms[it].m_loc_[0].x) + || !equal (atoms[it].m_loc_[ia].y, atoms[it].m_loc_[0].y) + || !equal (atoms[it].m_loc_[ia].z, atoms[it].m_loc_[0].z)) + { + pricell_loop = false; + break; + } + } } - } } - } return pricell_loop; } - diff --git a/source/source_cell/module_symmetry/symm_other.cpp b/source/source_cell/module_symmetry/symm_other.cpp index 1b7e09c02af..f05ff88fa3d 100644 --- a/source/source_cell/module_symmetry/symm_other.cpp +++ b/source/source_cell/module_symmetry/symm_other.cpp @@ -2,131 +2,135 @@ namespace ModuleSymmetry { -void Symm_Other::print1(const int &ibrav, const double *cel_const, std::ofstream &ofs_running) +void + Symm_Other::print1 (const int& ibrav, const double* cel_const, std::ofstream& ofs_running) { - ModuleBase::TITLE("Symm_Other","print1"); + ModuleBase::TITLE ("Symm_Other", "print1"); - ModuleBase::GlobalFunc::OUT(ofs_running,"IBRAV",ibrav); - if(ibrav==1) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","SIMPLE CUBIC"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - } - else if(ibrav==2) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","BODY CENTERED CUBIC"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - } - else if(ibrav==3) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","FACE CENTERED CUBIC"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - } - else if(ibrav==4) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","HEXAGONAL CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - } - else if(ibrav==5) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","SIMPLE TETROGONAL CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - } - else if(ibrav==6) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","BODY CENTERED TETROGONAL CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - } - else if(ibrav==7) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","TRIGONAL (RHOMBOEDRIC) CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"COS(ALPHA)",cel_const[3]); - } - else if(ibrav==8) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","SIMPLE ORTHORHOMBIC CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"B/A RATIO",cel_const[1]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - } - else if(ibrav==9) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","BODY CENTERED ORTHORHOMBIC CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"B/A RATIO",cel_const[1]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - } - else if(ibrav==10) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","FACE CENTERED ORTHORHOMBIC CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"B/A RATIO",cel_const[1]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - } - else if(ibrav==11) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","BASE CENTERED ORTHORHOMBIC CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"B/A RATIO",cel_const[1]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - } - else if(ibrav==12) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","SIMPLE MONOLINIC CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"B/A RATIO",cel_const[1]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - ModuleBase::GlobalFunc::OUT(ofs_running,"COS(BETA)",cel_const[4]); - } - else if(ibrav==13) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","BASE CENTERED MONOLINIC CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"B/A RATIO",cel_const[1]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - ModuleBase::GlobalFunc::OUT(ofs_running,"COS(BETA)",cel_const[4]); - } - else if(ibrav==14) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"BRAVAIS","TRICLINIC CELL"); - ModuleBase::GlobalFunc::OUT(ofs_running,"LATTICE CONSTANT A",cel_const[0]); - ModuleBase::GlobalFunc::OUT(ofs_running,"B/A RATIO",cel_const[1]); - ModuleBase::GlobalFunc::OUT(ofs_running,"C/A RATIO",cel_const[2]); - ModuleBase::GlobalFunc::OUT(ofs_running,"COS(ALPHA)",cel_const[3]); - ModuleBase::GlobalFunc::OUT(ofs_running,"COS(BETA)",cel_const[4]); - ModuleBase::GlobalFunc::OUT(ofs_running,"COS(GAMMA)",cel_const[5]); - } - else - { - ModuleBase::WARNING_QUIT("Symm_Other::print1","ibrav is wrong."); - } - return; + ModuleBase::GlobalFunc::OUT (ofs_running, "IBRAV", ibrav); + if (ibrav == 1) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "SIMPLE CUBIC"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + } + else if (ibrav == 2) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "BODY CENTERED CUBIC"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + } + else if (ibrav == 3) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "FACE CENTERED CUBIC"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + } + else if (ibrav == 4) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "HEXAGONAL CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + } + else if (ibrav == 5) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "SIMPLE TETROGONAL CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + } + else if (ibrav == 6) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "BODY CENTERED TETROGONAL CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + } + else if (ibrav == 7) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "TRIGONAL (RHOMBOEDRIC) CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "COS(ALPHA)", cel_const[3]); + } + else if (ibrav == 8) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "SIMPLE ORTHORHOMBIC CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "B/A RATIO", cel_const[1]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + } + else if (ibrav == 9) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "BODY CENTERED ORTHORHOMBIC CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "B/A RATIO", cel_const[1]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + } + else if (ibrav == 10) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "FACE CENTERED ORTHORHOMBIC CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "B/A RATIO", cel_const[1]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + } + else if (ibrav == 11) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "BASE CENTERED ORTHORHOMBIC CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "B/A RATIO", cel_const[1]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + } + else if (ibrav == 12) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "SIMPLE MONOLINIC CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "B/A RATIO", cel_const[1]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + ModuleBase::GlobalFunc::OUT (ofs_running, "COS(BETA)", cel_const[4]); + } + else if (ibrav == 13) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "BASE CENTERED MONOLINIC CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "B/A RATIO", cel_const[1]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + ModuleBase::GlobalFunc::OUT (ofs_running, "COS(BETA)", cel_const[4]); + } + else if (ibrav == 14) + { + ModuleBase::GlobalFunc::OUT (ofs_running, "BRAVAIS", "TRICLINIC CELL"); + ModuleBase::GlobalFunc::OUT (ofs_running, "LATTICE CONSTANT A", cel_const[0]); + ModuleBase::GlobalFunc::OUT (ofs_running, "B/A RATIO", cel_const[1]); + ModuleBase::GlobalFunc::OUT (ofs_running, "C/A RATIO", cel_const[2]); + ModuleBase::GlobalFunc::OUT (ofs_running, "COS(ALPHA)", cel_const[3]); + ModuleBase::GlobalFunc::OUT (ofs_running, "COS(BETA)", cel_const[4]); + ModuleBase::GlobalFunc::OUT (ofs_running, "COS(GAMMA)", cel_const[5]); + } + else + { + ModuleBase::WARNING_QUIT ("Symm_Other::print1", "ibrav is wrong."); + } + return; } -bool Symm_Other::right_hand_sense(ModuleBase::Vector3 &v1,ModuleBase::Vector3 &v2,ModuleBase::Vector3 &v3) +bool + Symm_Other::right_hand_sense (ModuleBase::Vector3& v1, + ModuleBase::Vector3& v2, + ModuleBase::Vector3& v3) { - double volume = Symm_Other::celvol(v1,v2,v3); - //OUT(ofs_running,"volume = ",volume); - if(volume < 0) - { - v1.reverse(); - v2.reverse(); - v3.reverse(); - return false; - } - return true; + double volume = Symm_Other::celvol (v1, v2, v3); + // OUT(ofs_running,"volume = ",volume); + if (volume < 0) + { + v1.reverse (); + v2.reverse (); + v3.reverse (); + return false; + } + return true; } -//calculate the volume of the cell spanned by the vectors -double Symm_Other::celvol(const ModuleBase::Vector3 &a, -const ModuleBase::Vector3 &b, const ModuleBase::Vector3 &c) +// calculate the volume of the cell spanned by the vectors +double + Symm_Other::celvol (const ModuleBase::Vector3& a, + const ModuleBase::Vector3& b, + const ModuleBase::Vector3& c) { - return a.x * ( b.y * c.z - b.z * c.y ) + a.y * ( b.z * c.x - b.x * c.z ) - + a.z * ( b.x * c.y - b.y * c.x ); + return a.x * (b.y * c.z - b.z * c.y) + a.y * (b.z * c.x - b.x * c.z) + a.z * (b.x * c.y - b.y * c.x); } -} - +} // namespace ModuleSymmetry diff --git a/source/source_cell/module_symmetry/symm_other.h b/source/source_cell/module_symmetry/symm_other.h index a2295ded183..8e39063efda 100644 --- a/source/source_cell/module_symmetry/symm_other.h +++ b/source/source_cell/module_symmetry/symm_other.h @@ -7,15 +7,17 @@ namespace ModuleSymmetry { namespace Symm_Other { - void print1(const int &ibrav, const double *cel_const, std::ofstream &ofs_running); +void print1 (const int& ibrav, const double* cel_const, std::ofstream& ofs_running); - bool right_hand_sense(ModuleBase::Vector3 &v1,ModuleBase::Vector3 &v2,ModuleBase::Vector3 &v3); +bool right_hand_sense (ModuleBase::Vector3& v1, + ModuleBase::Vector3& v2, + ModuleBase::Vector3& v3); - double celvol(const ModuleBase::Vector3 &a, - const ModuleBase::Vector3 &b, const ModuleBase::Vector3 &c); +double celvol (const ModuleBase::Vector3& a, + const ModuleBase::Vector3& b, + const ModuleBase::Vector3& c); - -} -} +} // namespace Symm_Other +} // namespace ModuleSymmetry #endif diff --git a/source/source_cell/module_symmetry/symm_pricell.cpp b/source/source_cell/module_symmetry/symm_pricell.cpp index c4dfad8f06e..0fb55a6dd0e 100644 --- a/source/source_cell/module_symmetry/symm_pricell.cpp +++ b/source/source_cell/module_symmetry/symm_pricell.cpp @@ -1,322 +1,323 @@ #include "symmetry.h" using namespace ModuleSymmetry; -void Symmetry::pricell(double* pos, const Atom* atoms) +void + Symmetry::pricell (double* pos, const Atom* atoms) { bool no_diff = false; - ptrans.clear(); + ptrans.clear (); for (int it = 0; it < ntype; it++) - { - //------------------------------------ - // impose periodic boundary condition - // 0.5 -> -0.5 - //------------------------------------ - for (int j = istart[it]; j < istart[it] + na[it]; ++j) { - this->check_boundary(pos[j*3+0]); - this->check_boundary(pos[j*3+1]); - this->check_boundary(pos[j*3+2]); - } + //------------------------------------ + // impose periodic boundary condition + // 0.5 -> -0.5 + //------------------------------------ + for (int j = istart[it]; j < istart[it] + na[it]; ++j) + { + this->check_boundary (pos[j * 3 + 0]); + this->check_boundary (pos[j * 3 + 1]); + this->check_boundary (pos[j * 3 + 2]); + } - //order original atomic positions for current species - this->atom_ordering_new(pos + istart[it] * 3, na[it], index + istart[it]); - //copy pos to rotpos - for (int j = istart[it]; j < istart[it] + na[it]; ++j) - { - const int xx=j*3; - const int yy=j*3+1; - const int zz=j*3+2; - rotpos[xx] = pos[xx]; - rotpos[yy] = pos[yy]; - rotpos[zz] = pos[zz]; + // order original atomic positions for current species + this->atom_ordering_new (pos + istart[it] * 3, na[it], index + istart[it]); + // copy pos to rotpos + for (int j = istart[it]; j < istart[it] + na[it]; ++j) + { + const int xx = j * 3; + const int yy = j * 3 + 1; + const int zz = j * 3 + 2; + rotpos[xx] = pos[xx]; + rotpos[yy] = pos[yy]; + rotpos[zz] = pos[zz]; + } } - } ModuleBase::Vector3 diff; double tmp_ptrans[3]; - //--------------------------------------------------------- + //--------------------------------------------------------- // itmin_start = the start atom positions of species itmin //--------------------------------------------------------- // (s)tart (p)osition of atom (t)ype which has (min)inal number. - ModuleBase::Vector3 sptmin(pos[itmin_start * 3], pos[itmin_start * 3 + 1], pos[itmin_start * 3 + 2]); + ModuleBase::Vector3 sptmin (pos[itmin_start * 3], pos[itmin_start * 3 + 1], pos[itmin_start * 3 + 2]); for (int i = itmin_start; i < itmin_start + na[itmin_type]; ++i) - { - //set up the current test std::vector "gtrans" - //and "gtrans" could possibly contain trivial translations: - tmp_ptrans[0] = this->get_translation_vector( pos[i*3+0], sptmin.x); - tmp_ptrans[1] = this->get_translation_vector( pos[i*3+1], sptmin.y); - tmp_ptrans[2] = this->get_translation_vector( pos[i*3+2], sptmin.z); - //translate all the atomic coordinates by "gtrans" - for (int it = 0; it < ntype; it++) { - for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) - { - this->check_translation( rotpos[ia*3+0], tmp_ptrans[0] ); - this->check_translation( rotpos[ia*3+1], tmp_ptrans[1] ); - this->check_translation( rotpos[ia*3+2], tmp_ptrans[2] ); + // set up the current test std::vector "gtrans" + // and "gtrans" could possibly contain trivial translations: + tmp_ptrans[0] = this->get_translation_vector (pos[i * 3 + 0], sptmin.x); + tmp_ptrans[1] = this->get_translation_vector (pos[i * 3 + 1], sptmin.y); + tmp_ptrans[2] = this->get_translation_vector (pos[i * 3 + 2], sptmin.z); + // translate all the atomic coordinates by "gtrans" + for (int it = 0; it < ntype; it++) + { + for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) + { + this->check_translation (rotpos[ia * 3 + 0], tmp_ptrans[0]); + this->check_translation (rotpos[ia * 3 + 1], tmp_ptrans[1]); + this->check_translation (rotpos[ia * 3 + 2], tmp_ptrans[2]); - this->check_boundary( rotpos[ia*3+0] ); - this->check_boundary( rotpos[ia*3+1] ); - this->check_boundary( rotpos[ia*3+2] ); - } - //order translated atomic positions for current species - this->atom_ordering_new(rotpos + istart[it] * 3, na[it], index + istart[it]); - } + this->check_boundary (rotpos[ia * 3 + 0]); + this->check_boundary (rotpos[ia * 3 + 1]); + this->check_boundary (rotpos[ia * 3 + 2]); + } + // order translated atomic positions for current species + this->atom_ordering_new (rotpos + istart[it] * 3, na[it], index + istart[it]); + } - no_diff = true; - //compare the two lattices 'one-by-one' whether they are identical - for (int it = 0; it < ntype; it++) - { - for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) - { - //take the difference of the rotated and the original coordinates - diff.x = this->check_diff( pos[ia*3+0], rotpos[ia*3+0]); - diff.y = this->check_diff( pos[ia*3+1], rotpos[ia*3+1]); - diff.z = this->check_diff( pos[ia*3+2], rotpos[ia*3+2]); - //only if all "diff" are zero vectors, flag will remain "1" - if (!equal(diff.x,0.0)|| - !equal(diff.y,0.0)|| - !equal(diff.z,0.0)) + no_diff = true; + // compare the two lattices 'one-by-one' whether they are identical + for (int it = 0; it < ntype; it++) { - no_diff = false; - break; + for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) + { + // take the difference of the rotated and the original coordinates + diff.x = this->check_diff (pos[ia * 3 + 0], rotpos[ia * 3 + 0]); + diff.y = this->check_diff (pos[ia * 3 + 1], rotpos[ia * 3 + 1]); + diff.z = this->check_diff (pos[ia * 3 + 2], rotpos[ia * 3 + 2]); + // only if all "diff" are zero vectors, flag will remain "1" + if (!equal (diff.x, 0.0) || !equal (diff.y, 0.0) || !equal (diff.z, 0.0)) + { + no_diff = false; + break; + } + } + if (!no_diff) + { + break; + } } - } - if (!no_diff) { - break; - } - } - //the current test is successful - if (no_diff) { - ptrans.push_back(ModuleBase::Vector3(tmp_ptrans[0], - tmp_ptrans[1], - tmp_ptrans[2])); - } - //restore the original rotated coordinates by subtracting "ptrans" - for (int it = 0; it < ntype; it++) - { - for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) - { - rotpos[ia*3+0] -= tmp_ptrans[0]; - rotpos[ia*3+1] -= tmp_ptrans[1]; - rotpos[ia*3+2] -= tmp_ptrans[2]; - } + // the current test is successful + if (no_diff) + { + ptrans.push_back (ModuleBase::Vector3 (tmp_ptrans[0], tmp_ptrans[1], tmp_ptrans[2])); + } + // restore the original rotated coordinates by subtracting "ptrans" + for (int it = 0; it < ntype; it++) + { + for (int ia = istart[it]; ia < na[it] + istart[it]; ia++) + { + rotpos[ia * 3 + 0] -= tmp_ptrans[0]; + rotpos[ia * 3 + 1] -= tmp_ptrans[1]; + rotpos[ia * 3 + 2] -= tmp_ptrans[2]; + } + } } - } - int ntrans=ptrans.size(); + int ntrans = ptrans.size (); if (ntrans <= 1) - { - GlobalV::ofs_running<<"\n Original cell was already a primitive cell."<p1=this->a1; - this->p2=this->a2; - this->p3=this->a3; - this->pbrav=this->real_brav; - this->ncell=1; - for (int i = 0; i < 6; ++i) { - this->pcel_const[i] = this->cel_const[i]; + { + GlobalV::ofs_running << "\n Original cell was already a primitive cell." << std::endl; + this->p1 = this->a1; + this->p2 = this->a2; + this->p3 = this->a3; + this->pbrav = this->real_brav; + this->ncell = 1; + for (int i = 0; i < 6; ++i) + { + this->pcel_const[i] = this->cel_const[i]; + } + return; } - return; - } - //sort ptrans: - double* ptrans_array = new double[ntrans*3]; - for(int i=0;iatom_ordering_new(ptrans_array, ntrans, index); + // sort ptrans: + double* ptrans_array = new double[ntrans * 3]; + for (int i = 0; i < ntrans; ++i) + { + ptrans_array[i * 3] = ptrans[i].x; + ptrans_array[i * 3 + 1] = ptrans[i].y; + ptrans_array[i * 3 + 2] = ptrans[i].z; + } + this->atom_ordering_new (ptrans_array, ntrans, index); // std::cout<<"final ptrans:"< b1, b2, b3; - int iplane=0, jplane=0, kplane=0; - //1. kplane for b3 - while (kplane < ntrans - && std::abs(ptrans[kplane].z - ptrans[0].z) < this->epsilon) { - ++kplane; - } - if (kplane == ntrans) { - kplane = 0; // a3-direction have no smaller pricell - } - b3=kplane>0 ? - ModuleBase::Vector3(ptrans[kplane].x, ptrans[kplane].y, ptrans[kplane].z) : - ModuleBase::Vector3(0, 0, 1); - //2. jplane for b2 (not collinear with b3) - jplane=kplane+1; + int iplane = 0, jplane = 0, kplane = 0; + // 1. kplane for b3 + while (kplane < ntrans && std::abs (ptrans[kplane].z - ptrans[0].z) < this->epsilon) + { + ++kplane; + } + if (kplane == ntrans) + { + kplane = 0; // a3-direction have no smaller pricell + } + b3 = kplane > 0 ? ModuleBase::Vector3 (ptrans[kplane].x, ptrans[kplane].y, ptrans[kplane].z) + : ModuleBase::Vector3 (0, 0, 1); + // 2. jplane for b2 (not collinear with b3) + jplane = kplane + 1; while (jplane < ntrans - && (std::abs(ptrans[jplane].y - ptrans[0].y) < this->epsilon - || equal((ptrans[jplane] ^ b3).norm(), 0))) { - ++jplane; - } - if (jplane == ntrans) { - jplane = kplane; // a2-direction have no smaller pricell - } - b2=jplane>kplane ? - ModuleBase::Vector3(ptrans[jplane].x, ptrans[jplane].y, ptrans[jplane].z) : - ModuleBase::Vector3(0, 1, 0); - //3. iplane for b1 (not coplane with ) - iplane=jplane+1; - while (iplane < ntrans - && (std::abs(ptrans[iplane].x - ptrans[0].x) < this->epsilon - || equal(ptrans[iplane] * (b2 ^ b3), 0))) { - ++iplane; - } - b1=(iplane>jplane && iplane(ptrans[iplane].x, ptrans[iplane].y, ptrans[iplane].z) : - ModuleBase::Vector3(1, 0, 0); //a1-direction have no smaller pricell - - - ModuleBase::Matrix3 coeff(b1.x, b1.y, b1.z, b2.x, b2.y, b2.z, b3.x, b3.y, b3.z); - this->plat=coeff*this->optlat; - - //deal with collineation caused by default b1, b2, b3 - if(equal(plat.Det(), 0)) - { - if(kplane==0) //try a new b3 + && (std::abs (ptrans[jplane].y - ptrans[0].y) < this->epsilon || equal ((ptrans[jplane] ^ b3).norm (), 0))) { - std::cout<<"try a new b3"<kplane) // use default b2 - { - coeff.e31=0; - coeff.e32=1; - coeff.e33=0; - } - else //use default b1 - { - coeff.e31=1; - coeff.e32=0; - coeff.e33=0; - } + ++jplane; } - else if(jplane<=kplane) + if (jplane == ntrans) { - coeff.e21=0; - coeff.e22=0; - coeff.e23=1; + jplane = kplane; // a2-direction have no smaller pricell } - else + b2 = jplane > kplane ? ModuleBase::Vector3 (ptrans[jplane].x, ptrans[jplane].y, ptrans[jplane].z) + : ModuleBase::Vector3 (0, 1, 0); + // 3. iplane for b1 (not coplane with ) + iplane = jplane + 1; + while (iplane < ntrans + && (std::abs (ptrans[iplane].x - ptrans[0].x) < this->epsilon || equal (ptrans[iplane] * (b2 ^ b3), 0))) { - coeff.e11=0; - coeff.e12=0; - coeff.e13=1; + ++iplane; } - this->plat=coeff*this->optlat; - assert(!equal(plat.Det(), 0)); - } + b1 = (iplane > jplane && iplane < ntrans) + ? ModuleBase::Vector3 (ptrans[iplane].x, ptrans[iplane].y, ptrans[iplane].z) + : ModuleBase::Vector3 (1, 0, 0); // a1-direction have no smaller pricell - this->p1.x=plat.e11; - this->p1.y=plat.e12; - this->p1.z=plat.e13; - this->p2.x=plat.e21; - this->p2.y=plat.e22; - this->p2.z=plat.e23; - this->p3.x=plat.e31; - this->p3.y=plat.e32; - this->p3.z=plat.e33; + ModuleBase::Matrix3 coeff (b1.x, b1.y, b1.z, b2.x, b2.y, b2.z, b3.x, b3.y, b3.z); + this->plat = coeff * this->optlat; + + // deal with collineation caused by default b1, b2, b3 + if (equal (plat.Det (), 0)) + { + if (kplane == 0) // try a new b3 + { + std::cout << "try a new b3" << std::endl; + if (jplane > kplane) // use default b2 + { + coeff.e31 = 0; + coeff.e32 = 1; + coeff.e33 = 0; + } + else // use default b1 + { + coeff.e31 = 1; + coeff.e32 = 0; + coeff.e33 = 0; + } + } + else if (jplane <= kplane) + { + coeff.e21 = 0; + coeff.e22 = 0; + coeff.e23 = 1; + } + else + { + coeff.e11 = 0; + coeff.e12 = 0; + coeff.e13 = 1; + } + this->plat = coeff * this->optlat; + assert (!equal (plat.Det (), 0)); + } + + this->p1.x = plat.e11; + this->p1.y = plat.e12; + this->p1.z = plat.e13; + this->p2.x = plat.e21; + this->p2.y = plat.e22; + this->p2.z = plat.e23; + this->p3.x = plat.e31; + this->p3.y = plat.e32; + this->p3.z = plat.e33; #ifdef __DEBUG - GlobalV::ofs_running<<"lattice vectors of primitive cell (initial):"< p01=p1, p02=p2, p03=p3; + ModuleBase::Vector3 p01 = p1, p02 = p2, p03 = p3; double pcel_pre_const[6]; - for (int i = 0; i < 6; ++i) { - pcel_pre_const[i] = pcel_const[i]; - } - this->lattice_type(p1, p2, p3, p01, p02, p03, pcel_const, pcel_pre_const, pbrav, pbravname, atoms, false, nullptr); + for (int i = 0; i < 6; ++i) + { + pcel_pre_const[i] = pcel_const[i]; + } + this->lattice_type (p1, p2, p3, p01, p02, p03, pcel_const, pcel_pre_const, pbrav, pbravname, atoms, false, nullptr); - this->plat.e11=p1.x; - this->plat.e12=p1.y; - this->plat.e13=p1.z; - this->plat.e21=p2.x; - this->plat.e22=p2.y; - this->plat.e23=p2.z; - this->plat.e31=p3.x; - this->plat.e32=p3.y; - this->plat.e33=p3.z; + this->plat.e11 = p1.x; + this->plat.e12 = p1.y; + this->plat.e13 = p1.z; + this->plat.e21 = p2.x; + this->plat.e22 = p2.y; + this->plat.e23 = p2.z; + this->plat.e31 = p3.x; + this->plat.e32 = p3.y; + this->plat.e33 = p3.z; #ifdef __DEBUG - GlobalV::ofs_running<<"lattice vectors of primitive cell (optimized):"<pbrav, this->pcel_const, GlobalV::ofs_running); + GlobalV::ofs_running << "(for primitive cell:)" << std::endl; + Symm_Other::print1 (this->pbrav, this->pcel_const, GlobalV::ofs_running); - //count the number of pricells - GlobalV::ofs_running<<"optimized lattice volume: "<optlat.Det()<plat.Det()<optlat.Det()/this->plat.Det()); - this->ncell=floor(ncell_double+0.5); + // count the number of pricells + GlobalV::ofs_running << "optimized lattice volume: " << this->optlat.Det () << std::endl; + GlobalV::ofs_running << "optimized primitive cell volume: " << this->plat.Det () << std::endl; + double ncell_double = std::abs (this->optlat.Det () / this->plat.Det ()); + this->ncell = floor (ncell_double + 0.5); - auto reset_pcell = [this]() -> void { - std::cout << " Now regard the structure as a primitive cell." << std::endl; - this->ncell = 1; - this->ptrans = std::vector >(1, ModuleBase::Vector3(0, 0, 0)); - GlobalV::ofs_running << "WARNING: Original cell may have more than one primitive cells, \ - but we have to treat it as a primitive cell. Use a larger `symmetry_prec`to avoid this warning." << std::endl; + auto reset_pcell = [this] () -> void + { + std::cout << " Now regard the structure as a primitive cell." << std::endl; + this->ncell = 1; + this->ptrans = std::vector> (1, ModuleBase::Vector3 (0, 0, 0)); + GlobalV::ofs_running << "WARNING: Original cell may have more than one primitive cells, \ + but we have to treat it as a primitive cell. Use a larger `symmetry_prec`to avoid this warning." + << std::endl; }; if (this->ncell != ntrans) - { - std::cout << " WARNING: PRICELL: NCELL != NTRANS !" << std::endl; - std::cout << " NCELL=" << ncell << ", NTRANS=" << ntrans << std::endl; - std::cout << " Suggest solution: Use a larger `symmetry_prec`. " << std::endl; - reset_pcell(); - return; - } - if(std::abs(ncell_double-double(this->ncell)) > this->epsilon*100) - { - std::cout << " WARNING: THE NUMBER OF PRIMITIVE CELL IS NOT AN INTEGER !" << std::endl; - std::cout << " NCELL(double)=" << ncell_double << ", NTRANS=" << ncell << std::endl; - std::cout << " Suggest solution: Use a larger `symmetry_prec`. " << std::endl; - reset_pcell(); - return; - } - GlobalV::ofs_running<<"Original cell was built up by "<ncell<<" primitive cells."<ncell)) > this->epsilon * 100) + { + std::cout << " WARNING: THE NUMBER OF PRIMITIVE CELL IS NOT AN INTEGER !" << std::endl; + std::cout << " NCELL(double)=" << ncell_double << ", NTRANS=" << ncell << std::endl; + std::cout << " Suggest solution: Use a larger `symmetry_prec`. " << std::endl; + reset_pcell (); + return; + } + GlobalV::ofs_running << "Original cell was built up by " << this->ncell << " primitive cells." << std::endl; + + // convert ptrans to input configuration + ModuleBase::Matrix3 inputlat (s1.x, s1.y, s1.z, s2.x, s2.y, s2.z, s3.x, s3.y, s3.z); + this->gtrans_convert (ptrans.data (), ptrans.data (), ntrans, this->optlat, inputlat); - //convert ptrans to input configuration - ModuleBase::Matrix3 inputlat(s1.x, s1.y, s1.z, s2.x, s2.y, s2.z, s3.x, s3.y, s3.z); - this->gtrans_convert(ptrans.data(), ptrans.data(), ntrans, this->optlat, inputlat ); - - //how many pcell in supercell - int n1=0; - int n2=0; - int n3=0; + // how many pcell in supercell + int n1 = 0; + int n2 = 0; + int n3 = 0; - ModuleBase::Matrix3 nummat0=this->optlat*this->plat.Inverse(); + ModuleBase::Matrix3 nummat0 = this->optlat * this->plat.Inverse (); ModuleBase::Matrix3 nummat, transmat; - hermite_normal_form(nummat0, nummat, transmat); - n1=floor (nummat.e11 + epsilon); - n2=floor (nummat.e22 + epsilon); - n3=floor (nummat.e33 + epsilon); - if(n1*n2*n3 != this->ncell) - { - std::cout << " WARNING: Number of cells and number of vectors did not agree."; - std::cout<<"Try to change symmetry_prec in INPUT." << std::endl; - reset_pcell(); - } + hermite_normal_form (nummat0, nummat, transmat); + n1 = floor (nummat.e11 + epsilon); + n2 = floor (nummat.e22 + epsilon); + n3 = floor (nummat.e33 + epsilon); + if (n1 * n2 * n3 != this->ncell) + { + std::cout << " WARNING: Number of cells and number of vectors did not agree."; + std::cout << "Try to change symmetry_prec in INPUT." << std::endl; + reset_pcell (); + } return; } - diff --git a/source/source_cell/module_symmetry/symm_rho.cpp b/source/source_cell/module_symmetry/symm_rho.cpp index 48aeaffc779..c8587e8bf5d 100644 --- a/source/source_cell/module_symmetry/symm_rho.cpp +++ b/source/source_cell/module_symmetry/symm_rho.cpp @@ -4,306 +4,330 @@ using namespace ModuleSymmetry; #include "source_base/libm/libm.h" #include "source_io/module_parameter/parameter.h" -void Symmetry::rho_symmetry( double *rho, - const int &nr1, const int &nr2, const int &nr3) +void + Symmetry::rho_symmetry (double* rho, const int& nr1, const int& nr2, const int& nr3) { - ModuleBase::timer::start("Symmetry","rho_symmetry"); + ModuleBase::timer::start ("Symmetry", "rho_symmetry"); - assert(nr1>0); - assert(nr2>0); - assert(nr3>0); + assert (nr1 > 0); + assert (nr2 > 0); + assert (nr3 > 0); - // allocate flag for each FFT grid. + // allocate flag for each FFT grid. bool* symflag = new bool[nr1 * nr2 * nr3]; - for (int i=0; i0 ); - assert(nrotk <=48 ); - int *ri = new int[nrotk]; - int *rj = new int[nrotk]; - int *rk = new int[nrotk]; + assert (nrotk > 0); + assert (nrotk <= 48); + int* ri = new int[nrotk]; + int* rj = new int[nrotk]; + int* rk = new int[nrotk]; int ci = 0; - for (int i = 0; i< nr1; ++i) - { - for (int j = 0; j< nr2; ++j) + for (int i = 0; i < nr1; ++i) { - for (int k = 0; k< nr3; ++k) - { - if (!symflag[i * nr2 * nr3 + j * nr3 + k]) + for (int j = 0; j < nr2; ++j) { - double sum = 0; - - for (int isym = 0; isym < nrotk; ++isym) - { - this->rotate(gmatrix[isym], gtrans[isym], i, j, k, nr1, nr2, nr3, ri[isym], rj[isym], rk[isym]); - const int index = ri[isym] * nr2 * nr3 + rj[isym] * nr3 + rk[isym]; - sum += rho[ index ]; - } - sum /= nrotk; - - for (int isym = 0; isym < nrotk; ++isym) - { - const int index = ri[isym] * nr2 * nr3 + rj[isym] * nr3 + rk[isym]; - rho[index] = sum; - symflag[index] = true; - } + for (int k = 0; k < nr3; ++k) + { + if (!symflag[i * nr2 * nr3 + j * nr3 + k]) + { + double sum = 0; + + for (int isym = 0; isym < nrotk; ++isym) + { + this->rotate (gmatrix[isym], + gtrans[isym], + i, + j, + k, + nr1, + nr2, + nr3, + ri[isym], + rj[isym], + rk[isym]); + const int index = ri[isym] * nr2 * nr3 + rj[isym] * nr3 + rk[isym]; + sum += rho[index]; + } + sum /= nrotk; + + for (int isym = 0; isym < nrotk; ++isym) + { + const int index = ri[isym] * nr2 * nr3 + rj[isym] * nr3 + rk[isym]; + rho[index] = sum; + symflag[index] = true; + } + } + } } - } } - } delete[] symflag; delete[] ri; delete[] rj; delete[] rk; - ModuleBase::timer::end("Symmetry","rho_symmetry"); + ModuleBase::timer::end ("Symmetry", "rho_symmetry"); } -void Symmetry::rhog_symmetry(std::complex *rhogtot, - int* ixyz2ipw, const int &nx, const int &ny, const int &nz, - const int &fftnx, const int &fftny, const int &fftnz) +void + Symmetry::rhog_symmetry (std::complex* rhogtot, + int* ixyz2ipw, + const int& nx, + const int& ny, + const int& nz, + const int& fftnx, + const int& fftny, + const int& fftnz) { - ModuleBase::timer::start("Symmetry","rhog_symmetry"); - // ---------------------------------------------------------------------- - // the current way is to cluster the FFT grid points into groups in advance. - // and use OpenMP to realize parallel calculation, one thread works in one group. - // ---------------------------------------------------------------------- + ModuleBase::timer::start ("Symmetry", "rhog_symmetry"); + // ---------------------------------------------------------------------- + // the current way is to cluster the FFT grid points into groups in advance. + // and use OpenMP to realize parallel calculation, one thread works in one group. + // ---------------------------------------------------------------------- - const int nxyz = fftnx*fftny*fftnz; - assert(nxyz>0); + const int nxyz = fftnx * fftny * fftnz; + assert (nxyz > 0); - // allocate flag for each FFT grid. + // allocate flag for each FFT grid. // which group the grid belongs to - int* symflag = new int[nxyz]; + int* symflag = new int[nxyz]; // which rotration operation the grid corresponds to - int(*isymflag)[48] = new int[nxyz][48]; + int (*isymflag)[48] = new int[nxyz][48]; // group information - int(*table_xyz)[48] = new int[nxyz][48]; + int (*table_xyz)[48] = new int[nxyz][48]; // how many symmetry operations have been covered - int* count_xyz = new int[nxyz]; - - for (int i = 0; i < nxyz; i++) - { - symflag[i] = -1; - } - int group_index = 0; - - assert(nrotk >0 ); - assert(nrotk <=48 ); - - //map the gmatrix to inv - std::vectorinvmap(this->nrotk, -1); - this->gmatrix_invmap(kgmatrix, nrotk, invmap.data()); - - // ------------------------------------------------------------------------ - // This code defines a lambda function called "rotate_recip" that takes - // a 3x3 matrix and a 3D vector as input. It performs a rotation operation - // on the vector using the matrix and returns the rotated vector. - // Specifically, it calculates the new coordinates of the vector after - // the rotation and applies periodic boundary conditions to ensure that - // the coordinates are within the FFT-grid dimensions. - // The rotated vector is returned by modifying the input vector. - // ------------------------------------------------------------------------ - //rotate function (different from real space, without scaling gmatrix) - auto rotate_recip = [&] (ModuleBase::Matrix3& g, ModuleBase::Vector3& g0, int& ii, int& jj, int& kk) - { - ii = int(g.e11 * g0.x + g.e21 * g0.y + g.e31 * g0.z) ; - if (ii < 0) - { - ii += 10 * nx; - } - ii = ii%nx; - jj = int(g.e12 * g0.x + g.e22 * g0.y + g.e32 * g0.z) ; - if (jj < 0) + int* count_xyz = new int[nxyz]; + + for (int i = 0; i < nxyz; i++) { - jj += 10 * ny; + symflag[i] = -1; } - jj = jj%ny; - kk = int(g.e13 * g0.x + g.e23 * g0.y + g.e33 * g0.z); - if (kk < 0) + int group_index = 0; + + assert (nrotk > 0); + assert (nrotk <= 48); + + // map the gmatrix to inv + std::vector invmap (this->nrotk, -1); + this->gmatrix_invmap (kgmatrix, nrotk, invmap.data ()); + + // ------------------------------------------------------------------------ + // This code defines a lambda function called "rotate_recip" that takes + // a 3x3 matrix and a 3D vector as input. It performs a rotation operation + // on the vector using the matrix and returns the rotated vector. + // Specifically, it calculates the new coordinates of the vector after + // the rotation and applies periodic boundary conditions to ensure that + // the coordinates are within the FFT-grid dimensions. + // The rotated vector is returned by modifying the input vector. + // ------------------------------------------------------------------------ + // rotate function (different from real space, without scaling gmatrix) + auto rotate_recip = [&] (ModuleBase::Matrix3& g, ModuleBase::Vector3& g0, int& ii, int& jj, int& kk) { - kk += 10 * nz; - } - kk = kk%nz; - return; - }; + ii = int (g.e11 * g0.x + g.e21 * g0.y + g.e31 * g0.z); + if (ii < 0) + { + ii += 10 * nx; + } + ii = ii % nx; + jj = int (g.e12 * g0.x + g.e22 * g0.y + g.e32 * g0.z); + if (jj < 0) + { + jj += 10 * ny; + } + jj = jj % ny; + kk = int (g.e13 * g0.x + g.e23 * g0.y + g.e33 * g0.z); + if (kk < 0) + { + kk += 10 * nz; + } + kk = kk % nz; + return; + }; - // ------------------------------------------------------------------------ + // ------------------------------------------------------------------------ // Trying to group fft grids first. - // It iterates over each FFT-grid point and checks if it is within the - // PW-sphere. If it is, put all the FFT-grid points connected by the + // It iterates over each FFT-grid point and checks if it is within the + // PW-sphere. If it is, put all the FFT-grid points connected by the // rotation operation into one group( the index is stored in int(*table_xyz)). // The code marks the point as processed to avoid redundant calculations // by using int* symflag. - // ------------------------------------------------------------------------ - - ModuleBase::timer::start("Symmetry","group_fft_grids"); - for (int i = 0; i< fftnx; ++i) - { - //tmp variable - ModuleBase::Vector3 tmp_gdirect0(0, 0, 0); - tmp_gdirect0.x=(i>int(nx/2)+1)?(i-nx):i; - for (int j = 0; j< fftny; ++j) + // ------------------------------------------------------------------------ + + ModuleBase::timer::start ("Symmetry", "group_fft_grids"); + for (int i = 0; i < fftnx; ++i) { - tmp_gdirect0.y=(j>int(ny/2)+1)?(j-ny):j; - for (int k = 0; k< fftnz; ++k) - { - int ixyz0=(i*fftny+j)*fftnz+k; - if (symflag[ixyz0] == -1) + // tmp variable + ModuleBase::Vector3 tmp_gdirect0 (0, 0, 0); + tmp_gdirect0.x = (i > int (nx / 2) + 1) ? (i - nx) : i; + for (int j = 0; j < fftny; ++j) { - int ipw0=ixyz2ipw[ixyz0]; - //if a fft-grid is not in pw-sphere, just do not consider it. - if (ipw0 == -1) { - continue; - } - tmp_gdirect0.z=(k>int(nz/2)+1)?(k-nz):k; - int rot_count=0; - for (int isym = 0; isym < nrotk; ++isym) - { - if (invmap[isym] < 0 || invmap[isym] > nrotk) { continue; } - //tmp variables - int ii, jj, kk=0; - rotate_recip(kgmatrix[invmap[isym]], tmp_gdirect0, ii, jj, kk); - if(ii>=fftnx || jj>=fftny || kk>= fftnz) + tmp_gdirect0.y = (j > int (ny / 2) + 1) ? (j - ny) : j; + for (int k = 0; k < fftnz; ++k) { - if(!PARAM.globalv.gamma_only_pw) - { - std::cout << " ROTATE OUT OF FFT-GRID IN RHOG_SYMMETRY !" << std::endl; - ModuleBase::QUIT(); - } - // for gamma_only_pw, just do not consider this rotation. - continue; + int ixyz0 = (i * fftny + j) * fftnz + k; + if (symflag[ixyz0] == -1) + { + int ipw0 = ixyz2ipw[ixyz0]; + // if a fft-grid is not in pw-sphere, just do not consider it. + if (ipw0 == -1) + { + continue; + } + tmp_gdirect0.z = (k > int (nz / 2) + 1) ? (k - nz) : k; + int rot_count = 0; + for (int isym = 0; isym < nrotk; ++isym) + { + if (invmap[isym] < 0 || invmap[isym] > nrotk) + { + continue; + } + // tmp variables + int ii, jj, kk = 0; + rotate_recip (kgmatrix[invmap[isym]], tmp_gdirect0, ii, jj, kk); + if (ii >= fftnx || jj >= fftny || kk >= fftnz) + { + if (!PARAM.globalv.gamma_only_pw) + { + std::cout << " ROTATE OUT OF FFT-GRID IN RHOG_SYMMETRY !" + << std::endl; + ModuleBase::QUIT (); + } + // for gamma_only_pw, just do not consider this rotation. + continue; + } + int ixyz = (ii * fftny + jj) * fftnz + kk; + // fft-grid index to (ip, ig) + int ipw = ixyz2ipw[ixyz]; + if (ipw == -1) // not in pw-sphere + { + continue; // else, just skip it + } + symflag[ixyz] = group_index; + isymflag[group_index][rot_count] = invmap[isym]; + table_xyz[group_index][rot_count] = ixyz; + ++rot_count; + assert (rot_count <= nrotk); + count_xyz[group_index] = rot_count; + } + group_index++; + } } - int ixyz=(ii*fftny+jj)*fftnz+kk; - //fft-grid index to (ip, ig) - int ipw=ixyz2ipw[ixyz]; - if(ipw==-1) //not in pw-sphere - { - continue; //else, just skip it - } - symflag[ixyz] = group_index; - isymflag[group_index][rot_count] = invmap[isym]; - table_xyz[group_index][rot_count] = ixyz; - ++rot_count; - assert(rot_count <= nrotk); - count_xyz[group_index] = rot_count; - } - group_index++; } - } } - } - ModuleBase::timer::end("Symmetry","group_fft_grids"); - - // ------------------------------------------------------------------- - // This code performs symmetry operations on the reciprocal space - // charge density using FFT-grids. It iterates over each FFT-grid - // point in a particular group, applies a phase factor and sums the - // charge density over the symmetry operations, and then divides by - // the number of symmetry operations. Finally, it updates the charge - // density for each FFT-grid point using the calculated sum. - // ------------------------------------------------------------------- + ModuleBase::timer::end ("Symmetry", "group_fft_grids"); + + // ------------------------------------------------------------------- + // This code performs symmetry operations on the reciprocal space + // charge density using FFT-grids. It iterates over each FFT-grid + // point in a particular group, applies a phase factor and sums the + // charge density over the symmetry operations, and then divides by + // the number of symmetry operations. Finally, it updates the charge + // density for each FFT-grid point using the calculated sum. + // ------------------------------------------------------------------- #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int g_index = 0; g_index < group_index; g_index++) - { - // record the index and gphase but not the final gdirect for each symm-opt - int *ipw_record = new int[nrotk]; - int *ixyz_record = new int[nrotk]; - std::complex* gphase_record = new std::complex [nrotk]; - std::complex sum(0, 0); - int rot_count=0; - - for (int c_index = 0; c_index < count_xyz[g_index]; ++c_index) - { - int ixyz0 = table_xyz[g_index][c_index]; - int ipw0 = ixyz2ipw[ixyz0]; - - if (symflag[ixyz0] == g_index) - { - // note : do not use PBC after rotation. - // we need a real gdirect to get the correspoding rhogtot. - int k = ixyz0%fftnz; - int j = ((ixyz0-k)/fftnz)%fftny; - int i = ((ixyz0-k)/fftnz-j)/fftny; - - //fft-grid index to gdirect - ModuleBase::Vector3 tmp_gdirect_double(0.0, 0.0, 0.0); - tmp_gdirect_double.x=static_cast((i>int(nx/2)+1)?(i-nx):i); - tmp_gdirect_double.y=static_cast((j>int(ny/2)+1)?(j-ny):j); - tmp_gdirect_double.z=static_cast((k>int(nz/2)+1)?(k-nz):k); - - //calculate phase factor - tmp_gdirect_double = tmp_gdirect_double * ModuleBase::TWO_PI; - - double cos_arg = 0.0, sin_arg = 0.0; - double arg_gtrans = tmp_gdirect_double * gtrans[isymflag[g_index][c_index]]; - - std::complex phase_gtrans (ModuleBase::libm::cos(arg_gtrans), - ModuleBase::libm::sin(arg_gtrans)); - - // for each pricell in supercell: - for (int ipt = 0;ipt < ((ModuleSymmetry::Symmetry::pricell_loop) ? this->ncell : 1);++ipt) - { - double arg = tmp_gdirect_double * ptrans[ipt]; - double tmp_cos = 0.0, tmp_sin = 0.0; - ModuleBase::libm::sincos(arg, &tmp_sin, &tmp_cos); - cos_arg += tmp_cos; - sin_arg += tmp_sin; - } - - // add nothing to sum, so don't consider this isym into rot_count - cos_arg/=static_cast(ncell); - sin_arg/=static_cast(ncell); - - //deal with double-zero - if (equal(cos_arg, 0.0) && equal(sin_arg, 0.0)) - { - continue; - } - - std::complex gphase(cos_arg, sin_arg); - gphase = phase_gtrans * gphase; - - //deal with small difference from 1 - if (equal(gphase.real(), 1.0) && equal(gphase.imag(), 0)) - { - gphase = std::complex(1.0, 0.0); - } - - gphase_record[rot_count]=gphase; - sum += rhogtot[ipw0]*gphase; - //record - ipw_record[rot_count]=ipw0; - ixyz_record[rot_count]=ixyz0; - ++rot_count; - //assert(rot_count<=nrotk); - }//end if section - }//end c_index loop - if (rot_count!=0) sum/= rot_count; - for (int isym = 0; isym < rot_count; ++isym) - { - rhogtot[ipw_record[isym]] = sum/gphase_record[isym]; - } - - //Clean the records variables for each fft grid point - delete[] ipw_record; - delete[] ixyz_record; - delete[] gphase_record; - }//end g_index loop - - delete[] symflag; - delete[] isymflag; - delete[] table_xyz; - delete[] count_xyz; - ModuleBase::timer::end("Symmetry","rhog_symmetry"); + for (int g_index = 0; g_index < group_index; g_index++) + { + // record the index and gphase but not the final gdirect for each symm-opt + int* ipw_record = new int[nrotk]; + int* ixyz_record = new int[nrotk]; + std::complex* gphase_record = new std::complex[nrotk]; + std::complex sum (0, 0); + int rot_count = 0; + + for (int c_index = 0; c_index < count_xyz[g_index]; ++c_index) + { + int ixyz0 = table_xyz[g_index][c_index]; + int ipw0 = ixyz2ipw[ixyz0]; + + if (symflag[ixyz0] == g_index) + { + // note : do not use PBC after rotation. + // we need a real gdirect to get the correspoding rhogtot. + int k = ixyz0 % fftnz; + int j = ((ixyz0 - k) / fftnz) % fftny; + int i = ((ixyz0 - k) / fftnz - j) / fftny; + + // fft-grid index to gdirect + ModuleBase::Vector3 tmp_gdirect_double (0.0, 0.0, 0.0); + tmp_gdirect_double.x = static_cast ((i > int (nx / 2) + 1) ? (i - nx) : i); + tmp_gdirect_double.y = static_cast ((j > int (ny / 2) + 1) ? (j - ny) : j); + tmp_gdirect_double.z = static_cast ((k > int (nz / 2) + 1) ? (k - nz) : k); + + // calculate phase factor + tmp_gdirect_double = tmp_gdirect_double * ModuleBase::TWO_PI; + + double cos_arg = 0.0, sin_arg = 0.0; + double arg_gtrans = tmp_gdirect_double * gtrans[isymflag[g_index][c_index]]; + + std::complex phase_gtrans (ModuleBase::libm::cos (arg_gtrans), + ModuleBase::libm::sin (arg_gtrans)); + + // for each pricell in supercell: + for (int ipt = 0; ipt < ((ModuleSymmetry::Symmetry::pricell_loop) ? this->ncell : 1); ++ipt) + { + double arg = tmp_gdirect_double * ptrans[ipt]; + double tmp_cos = 0.0, tmp_sin = 0.0; + ModuleBase::libm::sincos (arg, &tmp_sin, &tmp_cos); + cos_arg += tmp_cos; + sin_arg += tmp_sin; + } + + // add nothing to sum, so don't consider this isym into rot_count + cos_arg /= static_cast (ncell); + sin_arg /= static_cast (ncell); + + // deal with double-zero + if (equal (cos_arg, 0.0) && equal (sin_arg, 0.0)) + { + continue; + } + + std::complex gphase (cos_arg, sin_arg); + gphase = phase_gtrans * gphase; + + // deal with small difference from 1 + if (equal (gphase.real (), 1.0) && equal (gphase.imag (), 0)) + { + gphase = std::complex (1.0, 0.0); + } + + gphase_record[rot_count] = gphase; + sum += rhogtot[ipw0] * gphase; + // record + ipw_record[rot_count] = ipw0; + ixyz_record[rot_count] = ixyz0; + ++rot_count; + // assert(rot_count<=nrotk); + } // end if section + } // end c_index loop + if (rot_count != 0) + { + sum /= rot_count; + } + for (int isym = 0; isym < rot_count; ++isym) + { + rhogtot[ipw_record[isym]] = sum / gphase_record[isym]; + } + + // Clean the records variables for each fft grid point + delete[] ipw_record; + delete[] ixyz_record; + delete[] gphase_record; + } // end g_index loop + + delete[] symflag; + delete[] isymflag; + delete[] table_xyz; + delete[] count_xyz; + ModuleBase::timer::end ("Symmetry", "rhog_symmetry"); } diff --git a/source/source_cell/module_symmetry/symmetry.cpp b/source/source_cell/module_symmetry/symmetry.cpp index 30c479d16aa..b0294c33157 100644 --- a/source/source_cell/module_symmetry/symmetry.cpp +++ b/source/source_cell/module_symmetry/symmetry.cpp @@ -8,247 +8,279 @@ int Symmetry::symm_flag = 0; bool Symmetry::symm_autoclose = false; bool Symmetry::pricell_loop = true; -void Symmetry::set_atom_map(const Atom* atoms) +void + Symmetry::set_atom_map (const Atom* atoms) { - ModuleBase::TITLE("Symmetry", "set_atom_map"); - if (this->isym_rotiat_.size() == this->nrotk) { - return; - } - this->isym_rotiat_.resize(this->nrotk); - for (int i = 0; i < this->nrotk; ++i) { - this->isym_rotiat_[i].resize(this->nat, -1); - } + ModuleBase::TITLE ("Symmetry", "set_atom_map"); + if (this->isym_rotiat_.size () == this->nrotk) + { + return; + } + this->isym_rotiat_.resize (this->nrotk); + for (int i = 0; i < this->nrotk; ++i) + { + this->isym_rotiat_[i].resize (this->nat, -1); + } double* pos = this->newpos; double* rotpos = this->rotpos; - ModuleBase::GlobalFunc::ZEROS(pos, this->nat * 3); + ModuleBase::GlobalFunc::ZEROS (pos, this->nat * 3); int iat = 0; for (int it = 0; it < this->ntype; it++) - { - for (int ia = 0; ia < this->na[it]; ia++) { - pos[3 * iat] = atoms[it].taud[ia].x; - pos[3 * iat + 1] = atoms[it].taud[ia].y; - pos[3 * iat + 2] = atoms[it].taud[ia].z; - for (int k = 0; k < 3; ++k) - { - this->check_translation(pos[iat * 3 + k], -floor(pos[iat * 3 + k])); - this->check_boundary(pos[iat * 3 + k]); - } - iat++; + for (int ia = 0; ia < this->na[it]; ia++) + { + pos[3 * iat] = atoms[it].taud[ia].x; + pos[3 * iat + 1] = atoms[it].taud[ia].y; + pos[3 * iat + 2] = atoms[it].taud[ia].z; + for (int k = 0; k < 3; ++k) + { + this->check_translation (pos[iat * 3 + k], -floor (pos[iat * 3 + k])); + this->check_boundary (pos[iat * 3 + k]); + } + iat++; + } } - } for (int it = 0; it < this->ntype; it++) - { - for (int ia = istart[it]; ia < istart[it] + na[it]; ++ia) { - const int xx = ia * 3; - const int yy = ia * 3 + 1; - const int zz = ia * 3 + 2; + for (int ia = istart[it]; ia < istart[it] + na[it]; ++ia) + { + const int xx = ia * 3; + const int yy = ia * 3 + 1; + const int zz = ia * 3 + 2; - for (int k = 0;k < this->nrotk;++k) - { - rotpos[xx] = pos[xx] * gmatrix[k].e11 - + pos[yy] * gmatrix[k].e21 - + pos[zz] * gmatrix[k].e31 + gtrans[k].x; - rotpos[yy] = pos[xx] * gmatrix[k].e12 - + pos[yy] * gmatrix[k].e22 - + pos[zz] * gmatrix[k].e32 + gtrans[k].y; - rotpos[zz] = pos[xx] * gmatrix[k].e13 - + pos[yy] * gmatrix[k].e23 - + pos[zz] * gmatrix[k].e33 + gtrans[k].z; + for (int k = 0; k < this->nrotk; ++k) + { + rotpos[xx] = pos[xx] * gmatrix[k].e11 + pos[yy] * gmatrix[k].e21 + pos[zz] * gmatrix[k].e31 + + gtrans[k].x; + rotpos[yy] = pos[xx] * gmatrix[k].e12 + pos[yy] * gmatrix[k].e22 + pos[zz] * gmatrix[k].e32 + + gtrans[k].y; + rotpos[zz] = pos[xx] * gmatrix[k].e13 + pos[yy] * gmatrix[k].e23 + pos[zz] * gmatrix[k].e33 + + gtrans[k].z; - check_translation(rotpos[xx], -floor(rotpos[xx])); - check_boundary(rotpos[xx]); - check_translation(rotpos[yy], -floor(rotpos[yy])); - check_boundary(rotpos[yy]); - check_translation(rotpos[zz], -floor(rotpos[zz])); - check_boundary(rotpos[zz]); + check_translation (rotpos[xx], -floor (rotpos[xx])); + check_boundary (rotpos[xx]); + check_translation (rotpos[yy], -floor (rotpos[yy])); + check_boundary (rotpos[yy]); + check_translation (rotpos[zz], -floor (rotpos[zz])); + check_boundary (rotpos[zz]); - for (int ja = istart[it]; ja < istart[it] + na[it]; ++ja) - { - double diff1 = check_diff(pos[ja * 3], rotpos[xx]); - double diff2 = check_diff(pos[ja * 3 + 1], rotpos[yy]); - double diff3 = check_diff(pos[ja * 3 + 2], rotpos[zz]); - if (equal(diff1, 0.0) && equal(diff2, 0.0) && equal(diff3, 0.0)) - { - this->isym_rotiat_[k][ia] = ja; + for (int ja = istart[it]; ja < istart[it] + na[it]; ++ja) + { + double diff1 = check_diff (pos[ja * 3], rotpos[xx]); + double diff2 = check_diff (pos[ja * 3 + 1], rotpos[yy]); + double diff3 = check_diff (pos[ja * 3 + 2], rotpos[zz]); + if (equal (diff1, 0.0) && equal (diff2, 0.0) && equal (diff3, 0.0)) + { + this->isym_rotiat_[k][ia] = ja; - break; - } + break; + } + } + } } - } } - } } -void Symmetry::symmetrize_vec3_nat(double* v)const // pengfei 2016-12-20 +void + Symmetry::symmetrize_vec3_nat (double* v) const // pengfei 2016-12-20 { - ModuleBase::TITLE("Symmetry", "symmetrize_vec3_nat"); + ModuleBase::TITLE ("Symmetry", "symmetrize_vec3_nat"); double* vtot = nullptr; int* n = nullptr; - vtot = new double[nat * 3]; ModuleBase::GlobalFunc::ZEROS(vtot, nat * 3); - n = new int[nat]; ModuleBase::GlobalFunc::ZEROS(n, nat); + vtot = new double[nat * 3]; + ModuleBase::GlobalFunc::ZEROS (vtot, nat * 3); + n = new int[nat]; + ModuleBase::GlobalFunc::ZEROS (n, nat); - for (int j = 0;j < nat; ++j) - { - const int jx = j * 3; const int jy = j * 3 + 1; const int jz = j * 3 + 2; - for (int k = 0; k < nrotk; ++k) + for (int j = 0; j < nat; ++j) + { + const int jx = j * 3; + const int jy = j * 3 + 1; + const int jz = j * 3 + 2; + for (int k = 0; k < nrotk; ++k) + { + int l = this->isym_rotiat_[k][j]; + if (l < 0) + { + continue; + } + vtot[l * 3] + = vtot[l * 3] + v[jx] * gmatrix[k].e11 + v[jy] * gmatrix[k].e21 + v[jz] * gmatrix[k].e31; + vtot[l * 3 + 1] + = vtot[l * 3 + 1] + v[jx] * gmatrix[k].e12 + v[jy] * gmatrix[k].e22 + v[jz] * gmatrix[k].e32; + vtot[l * 3 + 2] + = vtot[l * 3 + 2] + v[jx] * gmatrix[k].e13 + v[jy] * gmatrix[k].e23 + v[jz] * gmatrix[k].e33; + n[l]++; + } + } + for (int j = 0; j < nat; ++j) { - int l = this->isym_rotiat_[k][j]; - if (l < 0) { - continue; - } - vtot[l*3] = vtot[l*3] + v[jx] * gmatrix[k].e11 + v[jy] * gmatrix[k].e21 + v[jz] * gmatrix[k].e31; - vtot[l*3+1] = vtot[l*3+1] + v[jx] * gmatrix[k].e12 + v[jy] * gmatrix[k].e22 + v[jz] * gmatrix[k].e32; - vtot[l*3+2] = vtot[l*3+2] + v[jx] * gmatrix[k].e13 + v[jy] * gmatrix[k].e23 + v[jz] * gmatrix[k].e33; - n[l]++; + v[j * 3] = vtot[j * 3] / n[j]; + v[j * 3 + 1] = vtot[j * 3 + 1] / n[j]; + v[j * 3 + 2] = vtot[j * 3 + 2] / n[j]; } - } - for (int j = 0;j < nat; ++j) - { - v[j * 3] = vtot[j * 3] / n[j]; - v[j * 3 + 1] = vtot[j * 3 + 1] / n[j]; - v[j * 3 + 2] = vtot[j * 3 + 2] / n[j]; - } delete[] vtot; delete[] n; - return; + return; } -void Symmetry::symmetrize_mat3(ModuleBase::matrix& sigma, const Lattice& lat)const //zhengdy added 2017 +void + Symmetry::symmetrize_mat3 (ModuleBase::matrix& sigma, const Lattice& lat) const // zhengdy added 2017 { - ModuleBase::matrix A = lat.latvec.to_matrix(); - ModuleBase::matrix AT = lat.latvec.Transpose().to_matrix(); - ModuleBase::matrix invA = lat.GT.to_matrix(); - ModuleBase::matrix invAT = lat.G.to_matrix(); - ModuleBase::matrix tot_sigma(3, 3, true); + ModuleBase::matrix A = lat.latvec.to_matrix (); + ModuleBase::matrix AT = lat.latvec.Transpose ().to_matrix (); + ModuleBase::matrix invA = lat.GT.to_matrix (); + ModuleBase::matrix invAT = lat.G.to_matrix (); + ModuleBase::matrix tot_sigma (3, 3, true); sigma = A * sigma * AT; - for (int k = 0; k < nrotk; ++k) { - tot_sigma += invA * gmatrix[k].to_matrix() * sigma - * gmatrix[k].Transpose().to_matrix() * invAT; - } - sigma = tot_sigma * static_cast(1.0 / nrotk); - return; + for (int k = 0; k < nrotk; ++k) + { + tot_sigma += invA * gmatrix[k].to_matrix () * sigma * gmatrix[k].Transpose ().to_matrix () * invAT; + } + sigma = tot_sigma * static_cast (1.0 / nrotk); + return; } -void Symmetry::gmatrix_convert_int(const ModuleBase::Matrix3* sa, ModuleBase::Matrix3* sb, - const int n, const ModuleBase::Matrix3 &a, const ModuleBase::Matrix3 &b) const +void + Symmetry::gmatrix_convert_int (const ModuleBase::Matrix3* sa, + ModuleBase::Matrix3* sb, + const int n, + const ModuleBase::Matrix3& a, + const ModuleBase::Matrix3& b) const { - auto round = [](double x){return (x>0.0)?floor(x+0.5):ceil(x-0.5);}; - ModuleBase::Matrix3 ai = a.Inverse(); - ModuleBase::Matrix3 bi = b.Inverse(); - for (int i=0;i 0.0) ? floor (x + 0.5) : ceil (x - 0.5); }; + ModuleBase::Matrix3 ai = a.Inverse (); + ModuleBase::Matrix3 bi = b.Inverse (); + for (int i = 0; i < n; ++i) + { + sb[i] = b * ai * sa[i] * a * bi; + // to int + sb[i].e11 = round (sb[i].e11); + sb[i].e12 = round (sb[i].e12); + sb[i].e13 = round (sb[i].e13); + sb[i].e21 = round (sb[i].e21); + sb[i].e22 = round (sb[i].e22); + sb[i].e23 = round (sb[i].e23); + sb[i].e31 = round (sb[i].e31); + sb[i].e32 = round (sb[i].e32); + sb[i].e33 = round (sb[i].e33); + } } -void Symmetry::gmatrix_convert(const ModuleBase::Matrix3* sa, ModuleBase::Matrix3* sb, - const int n, const ModuleBase::Matrix3 &a, const ModuleBase::Matrix3 &b)const +void + Symmetry::gmatrix_convert (const ModuleBase::Matrix3* sa, + ModuleBase::Matrix3* sb, + const int n, + const ModuleBase::Matrix3& a, + const ModuleBase::Matrix3& b) const { - ModuleBase::Matrix3 ai = a.Inverse(); - ModuleBase::Matrix3 bi = b.Inverse(); - for (int i=0;i* va, ModuleBase::Vector3* vb, - const int n, const ModuleBase::Matrix3 &a, const ModuleBase::Matrix3 &b)const +void + Symmetry::gtrans_convert (const ModuleBase::Vector3* va, + ModuleBase::Vector3* vb, + const int n, + const ModuleBase::Matrix3& a, + const ModuleBase::Matrix3& b) const { - ModuleBase::Matrix3 bi = b.Inverse(); - for (int i=0;i &a1, - ModuleBase::Vector3 &a2, ModuleBase::Vector3 &a3) const +void + Symmetry::get_shortest_latvec (ModuleBase::Vector3& a1, + ModuleBase::Vector3& a2, + ModuleBase::Vector3& a3) const { - double len1=a1.norm(); - double len2=a2.norm(); - double len3=a3.norm(); - bool flag=true; //at least one iter - auto loop = [this, &flag](ModuleBase::Vector3 &v1, ModuleBase::Vector3&v2, double &len) - { - bool fa=false, fb=false; - // loop a - double tmp_len=(v1-v2).norm(); - while (tmp_len < len-epsilon) + double len1 = a1.norm (); + double len2 = a2.norm (); + double len3 = a3.norm (); + bool flag = true; // at least one iter + auto loop = [this, &flag] (ModuleBase::Vector3& v1, ModuleBase::Vector3& v2, double& len) { - v1=v1-v2; - len=v1.norm(); - tmp_len=(v1-v2).norm(); - fa=true; - } - // loop b - tmp_len=(v1+v2).norm(); - while(tmp_len < len-epsilon) + bool fa = false, fb = false; + // loop a + double tmp_len = (v1 - v2).norm (); + while (tmp_len < len - epsilon) + { + v1 = v1 - v2; + len = v1.norm (); + tmp_len = (v1 - v2).norm (); + fa = true; + } + // loop b + tmp_len = (v1 + v2).norm (); + while (tmp_len < len - epsilon) + { + assert (!fa); + v1 = v1 + v2; + len = v1.norm (); + tmp_len = (v1 + v2).norm (); + fb = true; + } + if (fa || fb) + { + flag = true; + } + return; + }; + while (flag) // iter { - assert(!fa); - v1=v1+v2; - len=v1.norm(); - tmp_len=(v1+v2).norm(); - fb=true; - } - if (fa || fb) { - flag = true; + flag = false; + // if any of a1, a2, a3 is updated, flag will become true. + // which means a further search is needed. + loop (a1, a2, len1); + loop (a1, a3, len1); + loop (a2, a1, len2); + loop (a2, a3, len2); + loop (a3, a1, len3); + loop (a3, a2, len3); } - return; - }; - while(flag) //iter - { - flag=false; - // if any of a1, a2, a3 is updated, flag will become true. - // which means a further search is needed. - loop(a1, a2, len1); - loop(a1, a3, len1); - loop(a2, a1, len2); - loop(a2, a3, len2); - loop(a3, a1, len3); - loop(a3, a2, len3); - } return; } -void Symmetry::get_optlat(ModuleBase::Vector3 &v1, ModuleBase::Vector3 &v2, - ModuleBase::Vector3 &v3, ModuleBase::Vector3 &w1, - ModuleBase::Vector3 &w2, ModuleBase::Vector3 &w3, - int& real_brav, double* cel_const, double* tmp_const) const +void + Symmetry::get_optlat (ModuleBase::Vector3& v1, + ModuleBase::Vector3& v2, + ModuleBase::Vector3& v3, + ModuleBase::Vector3& w1, + ModuleBase::Vector3& w2, + ModuleBase::Vector3& w3, + int& real_brav, + double* cel_const, + double* tmp_const) const { ModuleBase::Vector3 r1, r2, r3; double cos1 = 1; @@ -257,88 +289,119 @@ void Symmetry::get_optlat(ModuleBase::Vector3 &v1, ModuleBase::Vector3epsilon = 1e-6; - }; - ~Symmetry() {}; - - //symmetry flag for levels - //-1 : no symmetry at all, k points would be total nks in KPT - //0 : only basic time-reversal symmetry is considered, point k and -k would fold to k - //1 : point group symmetry is considered + public: + Symmetry () { this->epsilon = 1e-6; }; + ~Symmetry () {}; + + // symmetry flag for levels + //-1 : no symmetry at all, k points would be total nks in KPT + // 0 : only basic time-reversal symmetry is considered, point k and -k would fold to k + // 1 : point group symmetry is considered static int symm_flag; static bool symm_autoclose; // controled by INPUT static bool pricell_loop; ///< whether to loop primitive cell in rhog_symmetry, Only for AFM /// @brief analyze the symmetry of the system /// @param lat structure of lattice - /// @param st + /// @param st /// @param atoms all atoms - /// @param ofs_running - /// get the symmetry information of the system, gmatries (rotation 3*3 matrixs), gtrans (transfer a collections vector3), etc. - void analy_sys(const Lattice& lat, const Statistics& st, Atom* atoms, std::ofstream& ofs_running); - - ModuleBase::Vector3 s1, s2, s3; - ModuleBase::Vector3 a1, a2, a3; //primitive cell vectors(might be changed during the process of the program) - ModuleBase::Vector3 p1, p2, p3; //primitive cell vectors - - int ntype=0; //the number of atomic species - int nat =0; //the number of all atoms - int *na =nullptr;//number of atoms for each species - int *istart=nullptr; //start number of atom. - int itmin_type=0; //the type has smallest number of atoms - int itmin_start=0; - - // direct coordinates of atoms. - double *newpos=nullptr; - // positions of atoms after rotation. - double *rotpos=nullptr; - - - std::vector> ptrans; // the translation vectors of the primitive cell in the input structure - int ncell=1; //the number of primitive cells within one supercell - int *index=nullptr; - - double cel_const[6]={0.0}; - double pcel_const[6]={0.0}; //cel_const of primitive cell - double pre_const[6]={0.0}; //cel_const of input configuration, first 3 is moduli of a1, a2, a3, last 3 is eular angle - - bool symflag_fft[48]={false}; - int sym_test=0; - int pbrav=0; //ibrav of primitive cell - int real_brav=0; // the real ibrav for the cell pengfei Li 3-15-2022 - std::string ilattname; //the bravais lattice type of the supercell - std::string plattname; //the bravais lattice type of the primitive cell - - ModuleBase::Matrix3 gmatrix[48]; //the rotation matrices for all space group operations - ModuleBase::Matrix3 kgmatrix[48]; //the rotation matrices in reciprocal space - ModuleBase::Vector3 gtrans[48]; - - ModuleBase::Matrix3 symop[48]; //the rotation matrices for the pure bravais lattice - int nop=0; //the number of point group operations of the pure bravais lattice without basis - int nrot=0; //the number of pure point group rotations - int nrotk = -1; //the number of all space group operations, >0 means the nrotk has been analyzed - int max_nrotk = -1; ///< record the maximum number of symmetry operations during cell-relax - int pgnumber=0; //the serial number of point group - int spgnumber=0; //the serial number of point group in space group - std::string pgname; //the Schoenflies name of the point group R in {R|0} - std::string spgname; //the Schoenflies name of the point group R in the space group {R|t} - - ModuleBase::Matrix3 optlat; //the optimized-symmetry lattice - ModuleBase::Matrix3 plat; //the primitive lattice - - bool all_mbl = true; ///< whether all the atoms are movable in all the directions - - int standard_lat(ModuleBase::Vector3& a, - ModuleBase::Vector3& b, - ModuleBase::Vector3& c, - double* celconst)const; - - void lattice_type(ModuleBase::Vector3 &v1, - ModuleBase::Vector3 &v2, - ModuleBase::Vector3 &v3, - ModuleBase::Vector3 &v01, - ModuleBase::Vector3 &v02, - ModuleBase::Vector3 &v03, - double* cel_const, - double* pre_const, - int& real_brav, - std::string& bravname, - const Atom* atoms, - bool convert_atoms, - double* newpos = nullptr)const; - - void getgroup(int& nrot, - int& nrotk, - std::ofstream& ofs_running, - const int& nop, - const ModuleBase::Matrix3* symop, - ModuleBase::Matrix3* gmatrix, - ModuleBase::Vector3* gtrans, - double* pos, double* rotpos, int* index, - const int ntype, const int itmin_type, const int itmin_start, - int* istart, int* na)const; - - bool checksym(const ModuleBase::Matrix3 &s, - ModuleBase::Vector3& gtrans, - double* pos, double* rotpos, int* index, - const int itmin_type, const int ntype, const int itmin_start, - int* istart, int* na)const; + /// @param ofs_running + /// get the symmetry information of the system, gmatries (rotation 3*3 matrixs), gtrans (transfer a collections + /// vector3), etc. + void analy_sys (const Lattice& lat, const Statistics& st, Atom* atoms, std::ofstream& ofs_running); + + ModuleBase::Vector3 s1, s2, s3; + ModuleBase::Vector3 a1, a2, a3; // primitive cell vectors(might be changed during the process of the + // program) + ModuleBase::Vector3 p1, p2, p3; // primitive cell vectors + + int ntype = 0; // the number of atomic species + int nat = 0; // the number of all atoms + int* na = nullptr; // number of atoms for each species + int* istart = nullptr; // start number of atom. + int itmin_type = 0; // the type has smallest number of atoms + int itmin_start = 0; + + // direct coordinates of atoms. + double* newpos = nullptr; + // positions of atoms after rotation. + double* rotpos = nullptr; + + std::vector> + ptrans; // the translation vectors of the primitive cell in the input structure + int ncell = 1; // the number of primitive cells within one supercell + int* index = nullptr; + + double cel_const[6] = {0.0}; + double pcel_const[6] = {0.0}; // cel_const of primitive cell + double pre_const[6] + = {0.0}; // cel_const of input configuration, first 3 is moduli of a1, a2, a3, last 3 is eular angle + + bool symflag_fft[48] = {false}; + int sym_test = 0; + int pbrav = 0; // ibrav of primitive cell + int real_brav = 0; // the real ibrav for the cell pengfei Li 3-15-2022 + std::string ilattname; // the bravais lattice type of the supercell + std::string plattname; // the bravais lattice type of the primitive cell + + ModuleBase::Matrix3 gmatrix[48]; // the rotation matrices for all space group operations + ModuleBase::Matrix3 kgmatrix[48]; // the rotation matrices in reciprocal space + ModuleBase::Vector3 gtrans[48]; + + ModuleBase::Matrix3 symop[48]; // the rotation matrices for the pure bravais lattice + int nop = 0; // the number of point group operations of the pure bravais lattice without basis + int nrot = 0; // the number of pure point group rotations + int nrotk = -1; // the number of all space group operations, >0 means the nrotk has been analyzed + int max_nrotk = -1; ///< record the maximum number of symmetry operations during cell-relax + int pgnumber = 0; // the serial number of point group + int spgnumber = 0; // the serial number of point group in space group + std::string pgname; // the Schoenflies name of the point group R in {R|0} + std::string spgname; // the Schoenflies name of the point group R in the space group {R|t} + + ModuleBase::Matrix3 optlat; // the optimized-symmetry lattice + ModuleBase::Matrix3 plat; // the primitive lattice + + bool all_mbl = true; ///< whether all the atoms are movable in all the directions + + int standard_lat (ModuleBase::Vector3& a, + ModuleBase::Vector3& b, + ModuleBase::Vector3& c, + double* celconst) const; + + void lattice_type (ModuleBase::Vector3& v1, + ModuleBase::Vector3& v2, + ModuleBase::Vector3& v3, + ModuleBase::Vector3& v01, + ModuleBase::Vector3& v02, + ModuleBase::Vector3& v03, + double* cel_const, + double* pre_const, + int& real_brav, + std::string& bravname, + const Atom* atoms, + bool convert_atoms, + double* newpos = nullptr) const; + + void getgroup (int& nrot, + int& nrotk, + std::ofstream& ofs_running, + const int& nop, + const ModuleBase::Matrix3* symop, + ModuleBase::Matrix3* gmatrix, + ModuleBase::Vector3* gtrans, + double* pos, + double* rotpos, + int* index, + const int ntype, + const int itmin_type, + const int itmin_start, + int* istart, + int* na) const; + + bool checksym (const ModuleBase::Matrix3& s, + ModuleBase::Vector3& gtrans, + double* pos, + double* rotpos, + int* index, + const int itmin_type, + const int ntype, + const int itmin_start, + int* istart, + int* na) const; /// @brief primitive cell analysis - void pricell(double* pos, const Atom* atoms); - - /// ----------------------- - /// Symmetrize the charge density, the forces, and the stress - /// ----------------------- - void rho_symmetry(double *rho, const int &nr1, const int &nr2, const int &nr3); - - void rhog_symmetry(std::complex *rhogtot, int* ixyz2ipw, const int &nx, - const int &ny, const int &nz, const int & fftnx, const int &fftny, const int &fftnz); + void pricell (double* pos, const Atom* atoms); + + /// ----------------------- + /// Symmetrize the charge density, the forces, and the stress + /// ----------------------- + void rho_symmetry (double* rho, const int& nr1, const int& nr2, const int& nr3); + + void rhog_symmetry (std::complex* rhogtot, + int* ixyz2ipw, + const int& nx, + const int& ny, + const int& nz, + const int& fftnx, + const int& fftny, + const int& fftnz); /// symmetrize a vector3 with nat elements, which can be forces or variation of atom positions in relax - void symmetrize_vec3_nat(double* v)const; // force + void symmetrize_vec3_nat (double* v) const; // force /// symmetrize a 3*3 tensor, which can be stress or variation of unitcell in cell-relax - void symmetrize_mat3(ModuleBase::matrix& sigma, const Lattice& lat)const; // stress - - //convert n rotation-matrices from sa on basis {a1, a2, a3} to sb on basis {b1, b2, b3} - void gmatrix_convert(const ModuleBase::Matrix3* sa, ModuleBase::Matrix3* sb, - const int n, const ModuleBase::Matrix3 &a, const ModuleBase::Matrix3 &b)const; - - void gmatrix_convert_int(const ModuleBase::Matrix3* sa, ModuleBase::Matrix3* sb, - const int n, const ModuleBase::Matrix3 &a, const ModuleBase::Matrix3 &b)const; - - //convert n translation-vectors from va on basis {a1, a2, a3} to vb on basis {b1, b2, b3} - void gtrans_convert(const ModuleBase::Vector3* va, ModuleBase::Vector3* vb, - const int n, const ModuleBase::Matrix3 &a, const ModuleBase::Matrix3 &b)const; - - void gmatrix_invmap(const ModuleBase::Matrix3* s, const int n, int* invmap) const; - - void hermite_normal_form(const ModuleBase::Matrix3 &s, ModuleBase::Matrix3 &H, ModuleBase::Matrix3 &b) const; - - int get_rotated_atom(int isym, int iat)const + void symmetrize_mat3 (ModuleBase::matrix& sigma, const Lattice& lat) const; // stress + + // convert n rotation-matrices from sa on basis {a1, a2, a3} to sb on basis {b1, b2, b3} + void gmatrix_convert (const ModuleBase::Matrix3* sa, + ModuleBase::Matrix3* sb, + const int n, + const ModuleBase::Matrix3& a, + const ModuleBase::Matrix3& b) const; + + void gmatrix_convert_int (const ModuleBase::Matrix3* sa, + ModuleBase::Matrix3* sb, + const int n, + const ModuleBase::Matrix3& a, + const ModuleBase::Matrix3& b) const; + + // convert n translation-vectors from va on basis {a1, a2, a3} to vb on basis {b1, b2, b3} + void gtrans_convert (const ModuleBase::Vector3* va, + ModuleBase::Vector3* vb, + const int n, + const ModuleBase::Matrix3& a, + const ModuleBase::Matrix3& b) const; + + void gmatrix_invmap (const ModuleBase::Matrix3* s, const int n, int* invmap) const; + + void hermite_normal_form (const ModuleBase::Matrix3& s, ModuleBase::Matrix3& H, ModuleBase::Matrix3& b) const; + + int + get_rotated_atom (int isym, int iat) const { - if (!this->isym_rotiat_.empty()) { return this->isym_rotiat_[isym][iat]; } - else { return -1; } + if (!this->isym_rotiat_.empty ()) + { + return this->isym_rotiat_[isym][iat]; + } + else + { + return -1; + } } - private: - + private: /// atom-map for each symmetry operation: isym_rotiat[isym][iat]=rotiat std::vector> isym_rotiat_; /// @brief set atom map for each symmetry operation - void set_atom_map(const Atom* atoms); + void set_atom_map (const Atom* atoms); /// @brief check if all the atoms are movable /// delta_pos symmetrization in relax is only meaningful when all the atoms are movable in all the directions. - bool is_all_movable(const Atom* atoms, const Statistics& st)const; + bool is_all_movable (const Atom* atoms, const Statistics& st) const; // to be called in lattice_type - void get_shortest_latvec(ModuleBase::Vector3 &a1, - ModuleBase::Vector3 &a2, ModuleBase::Vector3 &a3)const; - - void get_optlat(ModuleBase::Vector3 &v1, ModuleBase::Vector3 &v2, - ModuleBase::Vector3 &v3, ModuleBase::Vector3 &w1, - ModuleBase::Vector3 &w2, ModuleBase::Vector3 &w3, - int& real_brav, double* cel_const, double* tmp_const)const; - - /// Loop the magmom of each atoms in its type when NSPIN>1. + void get_shortest_latvec (ModuleBase::Vector3& a1, + ModuleBase::Vector3& a2, + ModuleBase::Vector3& a3) const; + + void get_optlat (ModuleBase::Vector3& v1, + ModuleBase::Vector3& v2, + ModuleBase::Vector3& v3, + ModuleBase::Vector3& w1, + ModuleBase::Vector3& w2, + ModuleBase::Vector3& w3, + int& real_brav, + double* cel_const, + double* tmp_const) const; + + /// Loop the magmom of each atoms in its type when NSPIN>1. /// If not all the same, primitive cells should not be looped in rhog_symmetry. - bool magmom_same_check(const Atom* atoms)const; + bool magmom_same_check (const Atom* atoms) const; - /// Analyze magnetic group without time-reversal symmetry + /// Analyze magnetic group without time-reversal symmetry /// (because currently the charge density symmetrization does not support it) /// Method: treat atoms with different magmom as atoms of different type - void analyze_magnetic_group(const Atom* atoms, const Statistics& st, int& nrot_out, int& nrotk_out); + void analyze_magnetic_group (const Atom* atoms, const Statistics& st, int& nrot_out, int& nrotk_out); }; -} +} // namespace ModuleSymmetry #endif diff --git a/source/source_cell/module_symmetry/symmetry_basic.cpp b/source/source_cell/module_symmetry/symmetry_basic.cpp index 1c4e3e84855..4ecd5a8fb4d 100644 --- a/source/source_cell/module_symmetry/symmetry_basic.cpp +++ b/source/source_cell/module_symmetry/symmetry_basic.cpp @@ -3,320 +3,346 @@ #include "source_io/module_parameter/parameter.h" #include "source_base/formatter.h" -bool ModuleSymmetry::test_brav = 0; +bool ModuleSymmetry::test_brav = false; namespace ModuleSymmetry { // Find the type of bravais lattice. -std::string Symmetry_Basic::get_brav_name(const int ibrav) const +std::string + Symmetry_Basic::get_brav_name (const int ibrav) const { - switch(ibrav) - { - case 1: return "01. Cubic P (simple)"; - case 2: return "02. Cubic I (body-centered)"; - case 3: return "03. Cubic F (face-centered)"; - case 4: return "04. Hexagonal cell"; - case 5: return "05. Tetrogonal P (simple)"; - case 6: return "06. Tetrogonal I (body-centered)"; - case 7: return "07. Rhombohedral (Trigonal) cell"; - case 8: return "08. Orthorhombic P(simple)"; - case 9: return "09. Orthorhombic I (body-centered)"; - case 10: return "10. Orthorhombic F (face-centered)"; - case 11: return "11. Orthorhombic C (base-centered)"; - case 12: return "12. Monoclinic P (simple)"; - case 13: return "13. Monoclinic A (base-center)"; - case 14: return "14. Triclinic cell"; - case 15: return "wrong !! "; - } - // return "Congratulations! You have found a bravais lattice that never existed!"; - return "Unknown Bravais lattice"; + switch (ibrav) + { + case 1: + return "01. Cubic P (simple)"; + case 2: + return "02. Cubic I (body-centered)"; + case 3: + return "03. Cubic F (face-centered)"; + case 4: + return "04. Hexagonal cell"; + case 5: + return "05. Tetrogonal P (simple)"; + case 6: + return "06. Tetrogonal I (body-centered)"; + case 7: + return "07. Rhombohedral (Trigonal) cell"; + case 8: + return "08. Orthorhombic P(simple)"; + case 9: + return "09. Orthorhombic I (body-centered)"; + case 10: + return "10. Orthorhombic F (face-centered)"; + case 11: + return "11. Orthorhombic C (base-centered)"; + case 12: + return "12. Monoclinic P (simple)"; + case 13: + return "13. Monoclinic A (base-center)"; + case 14: + return "14. Triclinic cell"; + case 15: + return "wrong !! "; + } + // return "Congratulations! You have found a bravais lattice that never existed!"; + return "Unknown Bravais lattice"; } // Control the accuracy -bool Symmetry_Basic::equal(const double &m, const double &n) const +bool + Symmetry_Basic::equal (const double& m, const double& n) const { - //if( fabs(m-n) < 1.0e-5 ) - if (fabs(m-n) < epsilon) //LiuXh add 2021-08-12, use accuracy for symmetry - { - return true; - } - return false; + // if( fabs(m-n) < 1.0e-5 ) + if (fabs (m - n) < epsilon) // LiuXh add 2021-08-12, use accuracy for symmetry + { + return true; + } + return false; } // check the boundary condition of atom positions. -void Symmetry_Basic::check_boundary(double &x)const +void + Symmetry_Basic::check_boundary (double& x) const { - if(equal(x,-0.5) || equal(x,0.5)) x=-0.5; + if (equal (x, -0.5) || equal (x, 0.5)) + { + x = -0.5; + } } -double Symmetry_Basic::get_translation_vector(const double& x1, const double& x2) const +double + Symmetry_Basic::get_translation_vector (const double& x1, const double& x2) const { - double t=0.0; // "t"ranslation - t = x2 - x1; - t = fmod(t+100.0, 1.0); - if( fabs(t-1) < epsilon * 0.5) { t = 0.0; } - return t; + double t = 0.0; // "t"ranslation + t = x2 - x1; + t = fmod (t + 100.0, 1.0); + if (fabs (t - 1) < epsilon * 0.5) + { + t = 0.0; + } + return t; } -void Symmetry_Basic::check_translation(double &x, const double &t) const +void + Symmetry_Basic::check_translation (double& x, const double& t) const { - x += t; - //impose the periodic boundary condition - x = fmod(x + 100.5,1) - 0.5; - return; + x += t; + // impose the periodic boundary condition + x = fmod (x + 100.5, 1) - 0.5; + return; } -double Symmetry_Basic::check_diff(const double& x1, const double& x2)const +double + Symmetry_Basic::check_diff (const double& x1, const double& x2) const { - double diff = x1 - x2; - diff = fmod(diff + 100,1); - //for reasons of safety - if(fabs(diff - 1.0) < epsilon) - { - diff = 0; - } - return diff; + double diff = x1 - x2; + diff = fmod (diff + 100, 1); + // for reasons of safety + if (fabs (diff - 1.0) < epsilon) + { + diff = 0; + } + return diff; } - -void Symmetry_Basic::order_atoms(double* pos, const int& nat, const int* index) const +void + Symmetry_Basic::order_atoms (double* pos, const int& nat, const int* index) const { - double** tmp = new double*[nat]; - for(int ia=0; ia &old1, - const ModuleBase::Vector3 &old2, - const ModuleBase::Vector3 &old3, - const ModuleBase::Vector3 &new1, - const ModuleBase::Vector3 &new2, - const ModuleBase::Vector3 &new3 - ) +void + Symmetry_Basic::veccon (double* carpos, + double* rotpos, + const int num, + const ModuleBase::Vector3& old1, + const ModuleBase::Vector3& old2, + const ModuleBase::Vector3& old3, + const ModuleBase::Vector3& new1, + const ModuleBase::Vector3& new2, + const ModuleBase::Vector3& new3) { - GlobalV::ofs_running << "\n old1:" << old1.x << " " << old1.y << " " << old1.z; - GlobalV::ofs_running << "\n old2:" << old2.x << " " << old2.y << " " << old2.z; - GlobalV::ofs_running << "\n old3:" << old3.x << " " << old3.y << " " << old3.z; - - GlobalV::ofs_running << "\n new1:" << new1.x << " " << new1.y << " " << new1.z; - GlobalV::ofs_running << "\n new2:" << new2.x << " " << new2.y << " " << new2.z; - GlobalV::ofs_running << "\n new3:" << new3.x << " " << new3.y << " " << new3.z; - - ModuleBase::Matrix3 oldlat; - oldlat.e11 = old1.x; - oldlat.e12 = old1.y; - oldlat.e13 = old1.z; - oldlat.e21 = old2.x; - oldlat.e22 = old2.y; - oldlat.e23 = old2.z; - oldlat.e31 = old3.x; - oldlat.e32 = old3.y; - oldlat.e33 = old3.z; - - ModuleBase::Matrix3 newlat; - newlat.e11 = new1.x; - newlat.e12 = new1.y; - newlat.e13 = new1.z; - newlat.e21 = new2.x; - newlat.e22 = new2.y; - newlat.e23 = new2.z; - newlat.e31 = new3.x; - newlat.e32 = new3.y; - newlat.e33 = new3.z; - - ModuleBase::Matrix3 GT = newlat.Inverse(); - - ModuleBase::Vector3 car; - ModuleBase::Vector3 direct_old; - ModuleBase::Vector3 direct_new; - - //calculate the reciprocal vectors rb1, rb2, rb3 for the vectors new1, new2, new3 - //this->recip(1.0, new1, new2, new3, rb1, rb2, rb3); - - for(int i = 0; i < num; ++i) - { - direct_old.x = carpos[i * 3 + 0]; - direct_old.y = carpos[i * 3 + 1]; - direct_old.z = carpos[i * 3 + 2]; - - car = direct_old * oldlat; - direct_new = car * GT; - - rotpos[i * 3 + 0] = direct_new.x; - rotpos[i * 3 + 1] = direct_new.y; - rotpos[i * 3 + 2] = direct_new.z; - } - return; -} + GlobalV::ofs_running << "\n old1:" << old1.x << " " << old1.y << " " << old1.z; + GlobalV::ofs_running << "\n old2:" << old2.x << " " << old2.y << " " << old2.z; + GlobalV::ofs_running << "\n old3:" << old3.x << " " << old3.y << " " << old3.z; + + GlobalV::ofs_running << "\n new1:" << new1.x << " " << new1.y << " " << new1.z; + GlobalV::ofs_running << "\n new2:" << new2.x << " " << new2.y << " " << new2.z; + GlobalV::ofs_running << "\n new3:" << new3.x << " " << new3.y << " " << new3.z; + + ModuleBase::Matrix3 oldlat; + oldlat.e11 = old1.x; + oldlat.e12 = old1.y; + oldlat.e13 = old1.z; + oldlat.e21 = old2.x; + oldlat.e22 = old2.y; + oldlat.e23 = old2.z; + oldlat.e31 = old3.x; + oldlat.e32 = old3.y; + oldlat.e33 = old3.z; + + ModuleBase::Matrix3 newlat; + newlat.e11 = new1.x; + newlat.e12 = new1.y; + newlat.e13 = new1.z; + newlat.e21 = new2.x; + newlat.e22 = new2.y; + newlat.e23 = new2.z; + newlat.e31 = new3.x; + newlat.e32 = new3.y; + newlat.e33 = new3.z; + + ModuleBase::Matrix3 GT = newlat.Inverse (); + + ModuleBase::Vector3 car; + ModuleBase::Vector3 direct_old; + ModuleBase::Vector3 direct_new; + + // calculate the reciprocal vectors rb1, rb2, rb3 for the vectors new1, new2, new3 + // this->recip(1.0, new1, new2, new3, rb1, rb2, rb3); + + for (int i = 0; i < num; ++i) + { + direct_old.x = carpos[i * 3 + 0]; + direct_old.y = carpos[i * 3 + 1]; + direct_old.z = carpos[i * 3 + 2]; + car = direct_old * oldlat; + direct_new = car * GT; + + rotpos[i * 3 + 0] = direct_new.x; + rotpos[i * 3 + 1] = direct_new.y; + rotpos[i * 3 + 2] = direct_new.z; + } + return; +} // generate all point group symmetry operations from the generation group -void Symmetry_Basic::matrigen(ModuleBase::Matrix3 *symgen, const int ngen, ModuleBase::Matrix3* symop, int &nop) const +void + Symmetry_Basic::matrigen (ModuleBase::Matrix3* symgen, const int ngen, ModuleBase::Matrix3* symop, int& nop) const { - int m1 = 0; + int m1 = 0; int m2 = 0; - int n = 0; - - // allocate memory for the symmetry operations - ModuleBase::Matrix3 iden(1,0,0,0,1,0,0,0,1); - ModuleBase::Matrix3 sig(1,0,0,0,1,0,0,0,1); - ModuleBase::Matrix3 temp1(1,0,0,0,1,0,0,0,1); - ModuleBase::Matrix3 temp2(1,0,0,0,1,0,0,0,1); - - bool flag = false; // mark whether the symmetry operation is a new one - int order = 0; - int now = 0; - - symop[0] = iden; //identity (the trivial element) - nop = 1; // counter of the symmetry operations - - // take all generators - for (int i = 0; i < ngen; ++i) - { - sig = symgen[i]; - flag = true; // assume it is a new symmetry operation - // search if the symmetry operation already exists among the found symmetry operations - // if so, skip it - for (int j = 0; j < nop; ++j) - { - if (symop[j] == sig) - { - flag = 0; // not a new symmetry operation - break; - } - } - if (flag == 0) // if old, return - { - continue; - } - // otherwise - - // determine the order of the operation: by which power will the operation return - // to the identity operation. - temp1 = sig; - for (int j = 1; j < 100; ++j) - { - order = j; - if (temp1 == iden) - { - break; - } - temp1 = sig * temp1; - } - now = nop; - for (int j = 0; j < nop; ++j) - { - temp1 = symop[j]; - for (int k = 1; k < order; ++k) - { - temp1 = sig * temp1; - - for (int l = 0; l < nop; ++l) - { - temp2 = symop[l] * temp1; - flag = 1; - for (int m = 0; m < now; ++m) - { - if (symop[m] == temp2) - { - flag = 0; - break; - } - } - if (flag == 0) - { - continue; //the newly-found element has already existed. - } - - ++now; // the number of elements we found - if (now > 48) // number of symm_op cannot be more than 48 (of O_h point group) - { - std::cout << "\n a: now= "< 48) - { - std::cout << "\n b: now= "< 48) // number of symm_op cannot be more than 48 (of O_h point group) + { + std::cout << "\n a: now= " << now << std::endl; + std::cout << "\n There are too many symmetrical matrices!" << std::endl; + return; + } + symop[now - 1] = temp2; + } + } + if (j == 0) + { + n = now; + } + } + + m1 = nop; + m2 = now; + for (int j = 1; j < 50; ++j) + { + for (int k = nop; k < n; ++k) + { + for (int m = m1; m < m2; ++m) + { + temp1 = symop[k] * symop[m]; + flag = true; + for (int l = 0; l < now; ++l) + { + if (symop[l] == temp1) + { + flag = false; + break; + } + } + if (flag == 0) + { + continue; // the new-found element has already existed + } + + ++now; + if (now > 48) + { + std::cout << "\n b: now= " << now << std::endl; + std::cout << "\n There are too many symmetrical matrices!" << std::endl; + return; + } + symop[now - 1] = temp1; + } + } + if (now == m2) + { + break; // if no more new element could be found, stop the loop + } + m1 = m2; + m2 = now; + } + nop = now; + } } //-------------------------------------------------------------- @@ -325,652 +351,788 @@ void Symmetry_Basic::matrigen(ModuleBase::Matrix3 *symgen, const int ngen, Modul // given in crystal coordinates) // of a lattice with some arbitrary basis (atomic arrangement). //-------------------------------------------------------------- -void Symmetry_Basic::setgroup(ModuleBase::Matrix3* symop, int &nop, const int &ibrav) const +void + Symmetry_Basic::setgroup (ModuleBase::Matrix3* symop, int& nop, const int& ibrav) const { - if(PARAM.inp.cal_symm_repr[0] > 1) { - ModuleBase::TITLE("Symmetry_Basic", "setgroup"); - } - ModuleBase::Matrix3 symgen[3]; // the number of generators is up to 3 - - ModuleBase::Matrix3 inv(-1, 0, 0, 0,-1, 0, 0, 0,-1); // (x, y, z) -> (-x, -y, -z) - ModuleBase::Matrix3 r3d( 0, 1, 0, 0, 0, 1, 1, 0, 0); // (x, y, z) -> (y, z, x) - ModuleBase::Matrix3 r6z( 1, 1, 0,-1, 0, 0, 0, 0, 1); // (x, y, z) -> (x+y, -x, z) - ModuleBase::Matrix3 r2hex( 1, 0, 0,-1,-1, 0, 0, 0,-1); // (x, y, z) -> (x, -x-y, -z) - ModuleBase::Matrix3 r2tri(-1, 0, 0, 0, 0,-1, 0,-1, 0); // (x, y, z) -> (-x, -z, -y) - ModuleBase::Matrix3 r4zp( 0, 1, 0,-1, 0, 0, 0, 0, 1); // (x, y, z) -> (y, -x, z) - ModuleBase::Matrix3 r2yp(-1, 0, 0, 0, 1, 0, 0, 0,-1); // (x, y, z) -> (-x, y, -z) - ModuleBase::Matrix3 r4zbc( 0, 0,-1, 1, 1, 1, 0,-1, 0); // (x, y, z) -> (-z, x+y+z, -y) - ModuleBase::Matrix3 r4zfc( 1, 0,-1, 1, 0, 0, 1,-1, 0); // (x, y, z) -> (x-z, x, x-y) - ModuleBase::Matrix3 r2zp(-1, 0, 0, 0,-1, 0, 0, 0, 1); // (x, y, z) -> (-x, -y, z) - ModuleBase::Matrix3 r2ybc( 0, 0, 1,-1,-1,-1, 1, 0, 0); // (x, y, z) -> (z, -x-y-z, x) - ModuleBase::Matrix3 r2zbc( 0, 1, 0, 1, 0, 0,-1,-1,-1); // (x, y, z) -> (y, x, -x-y-z) - ModuleBase::Matrix3 r2ybas( 0,-1, 0,-1, 0, 0, 0, 0,-1); // (x, y, z) -> (-y, -x, -z) - ModuleBase::Matrix3 r2yfc( 0,-1, 1, 0,-1, 0, 1,-1, 0); // (x, y, z) -> (-y+z, -y, x-y) - ModuleBase::Matrix3 r2zfc( 0, 1,-1, 1, 0,-1, 0, 0,-1); // (x, y, z) -> (y-z, x-z, -z) - - //the pure translation lattice (bravais lattice) has some maximum symmetry - //set first up the point group operations for this symmetry. - symgen[0] = inv; - // generate the point group operations for the bravais lattice - // rewrite with switch-case to get better performance and readability - switch (ibrav) { - case 1: - symgen[1] = r3d; - symgen[2] = r4zp; - this->matrigen(symgen, 3, symop, nop); - break; - case 2: - symgen[1] = r3d; - symgen[2] = r4zbc; - this->matrigen(symgen, 3, symop, nop); - break; - case 3: - symgen[1] = r3d; - symgen[2] = r4zfc; - this->matrigen(symgen, 3, symop, nop); - break; - case 4: - symgen[1] = r6z; - symgen[2] = r2hex; - this->matrigen(symgen, 3, symop, nop); - break; - case 5: - symgen[1] = r4zp; - symgen[2] = r2yp; - this->matrigen(symgen, 3, symop, nop); - break; - case 6: - symgen[1] = r4zbc; - symgen[2] = r2ybc; - this->matrigen(symgen, 3, symop, nop); - break; - case 7: - symgen[1] = r2tri; - symgen[2] = r3d; - this->matrigen(symgen, 3, symop, nop); - break; - case 8: - symgen[1] = r2zp; - symgen[2] = r2yp; - this->matrigen(symgen, 3, symop, nop); - break; - case 9: - symgen[1] = r2zbc; - symgen[2] = r2ybc; - this->matrigen(symgen, 3, symop, nop); - break; - case 10: - symgen[1] = r2zfc; - symgen[2] = r2yfc; - this->matrigen(symgen, 3, symop, nop); - break; - case 11: - symgen[1] = r2zp; - symgen[2] = r2ybas; - this->matrigen(symgen, 3, symop, nop); - break; - case 12: - symgen[1] = r2yp; - this->matrigen(symgen, 2, symop, nop); - break; - case 13: - symgen[1] = r2ybas; - this->matrigen(symgen, 2, symop, nop); - break; - case 14: - this->matrigen(symgen, 1, symop, nop); - break; - default: - ModuleBase::WARNING_QUIT("Symmetry_Basic::setgroup", - "ibrav = " + std::to_string(ibrav) + " is not supported."); - break; - } - - // print - if (test_brav) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Number of rotation matrices", nop); - } - - // print the symmetry operations - if (PARAM.inp.cal_symm_repr[0] > 0) - { - GlobalV::ofs_running << std::endl - << " ======================================================================\n" - << " MATRIX REPRESENTATION OF SYMMETRY OPERATION\n" - << " ======================================================================\n" - << " There are " << nop << " symmetry operation representation matrices.\n" - << " For each matrix, the elements are arranged like: \n" - << " [[e11, e12, e13], [e21, e22, e23], [e31, e32, e33]].reshape(3, 3)\n" - << std::endl; - - // control the digits - const int precision = PARAM.inp.cal_symm_repr[1]; - const int width = precision + 4; - std::string fmtstr = " %" + std::to_string(width) + "." + std::to_string(precision) + "f"; - fmtstr += fmtstr + fmtstr + "\n"; - - // print the symmetry operations - std::string mat; - for (int i = 0; i < nop; ++i) - { - mat = " " + FmtCore::format("No. %3d", i + 1) + "\n" - + FmtCore::format(fmtstr.c_str(), symop[i].e11, symop[i].e12, symop[i].e13) - + FmtCore::format(fmtstr.c_str(), symop[i].e21, symop[i].e22, symop[i].e23) - + FmtCore::format(fmtstr.c_str(), symop[i].e31, symop[i].e32, symop[i].e33); - GlobalV::ofs_running << mat << std::endl; - } - GlobalV::ofs_running << " ======================================================================\n"; - } - - return; + if (PARAM.inp.cal_symm_repr[0] > 1) + { + ModuleBase::TITLE ("Symmetry_Basic", "setgroup"); + } + ModuleBase::Matrix3 symgen[3]; // the number of generators is up to 3 + + ModuleBase::Matrix3 inv (-1, 0, 0, 0, -1, 0, 0, 0, -1); // (x, y, z) -> (-x, -y, -z) + ModuleBase::Matrix3 r3d (0, 1, 0, 0, 0, 1, 1, 0, 0); // (x, y, z) -> (y, z, x) + ModuleBase::Matrix3 r6z (1, 1, 0, -1, 0, 0, 0, 0, 1); // (x, y, z) -> (x+y, -x, z) + ModuleBase::Matrix3 r2hex (1, 0, 0, -1, -1, 0, 0, 0, -1); // (x, y, z) -> (x, -x-y, -z) + ModuleBase::Matrix3 r2tri (-1, 0, 0, 0, 0, -1, 0, -1, 0); // (x, y, z) -> (-x, -z, -y) + ModuleBase::Matrix3 r4zp (0, 1, 0, -1, 0, 0, 0, 0, 1); // (x, y, z) -> (y, -x, z) + ModuleBase::Matrix3 r2yp (-1, 0, 0, 0, 1, 0, 0, 0, -1); // (x, y, z) -> (-x, y, -z) + ModuleBase::Matrix3 r4zbc (0, 0, -1, 1, 1, 1, 0, -1, 0); // (x, y, z) -> (-z, x+y+z, -y) + ModuleBase::Matrix3 r4zfc (1, 0, -1, 1, 0, 0, 1, -1, 0); // (x, y, z) -> (x-z, x, x-y) + ModuleBase::Matrix3 r2zp (-1, 0, 0, 0, -1, 0, 0, 0, 1); // (x, y, z) -> (-x, -y, z) + ModuleBase::Matrix3 r2ybc (0, 0, 1, -1, -1, -1, 1, 0, 0); // (x, y, z) -> (z, -x-y-z, x) + ModuleBase::Matrix3 r2zbc (0, 1, 0, 1, 0, 0, -1, -1, -1); // (x, y, z) -> (y, x, -x-y-z) + ModuleBase::Matrix3 r2ybas (0, -1, 0, -1, 0, 0, 0, 0, -1); // (x, y, z) -> (-y, -x, -z) + ModuleBase::Matrix3 r2yfc (0, -1, 1, 0, -1, 0, 1, -1, 0); // (x, y, z) -> (-y+z, -y, x-y) + ModuleBase::Matrix3 r2zfc (0, 1, -1, 1, 0, -1, 0, 0, -1); // (x, y, z) -> (y-z, x-z, -z) + + // the pure translation lattice (bravais lattice) has some maximum symmetry + // set first up the point group operations for this symmetry. + symgen[0] = inv; + // generate the point group operations for the bravais lattice + // rewrite with switch-case to get better performance and readability + switch (ibrav) + { + case 1: + symgen[1] = r3d; + symgen[2] = r4zp; + this->matrigen (symgen, 3, symop, nop); + break; + case 2: + symgen[1] = r3d; + symgen[2] = r4zbc; + this->matrigen (symgen, 3, symop, nop); + break; + case 3: + symgen[1] = r3d; + symgen[2] = r4zfc; + this->matrigen (symgen, 3, symop, nop); + break; + case 4: + symgen[1] = r6z; + symgen[2] = r2hex; + this->matrigen (symgen, 3, symop, nop); + break; + case 5: + symgen[1] = r4zp; + symgen[2] = r2yp; + this->matrigen (symgen, 3, symop, nop); + break; + case 6: + symgen[1] = r4zbc; + symgen[2] = r2ybc; + this->matrigen (symgen, 3, symop, nop); + break; + case 7: + symgen[1] = r2tri; + symgen[2] = r3d; + this->matrigen (symgen, 3, symop, nop); + break; + case 8: + symgen[1] = r2zp; + symgen[2] = r2yp; + this->matrigen (symgen, 3, symop, nop); + break; + case 9: + symgen[1] = r2zbc; + symgen[2] = r2ybc; + this->matrigen (symgen, 3, symop, nop); + break; + case 10: + symgen[1] = r2zfc; + symgen[2] = r2yfc; + this->matrigen (symgen, 3, symop, nop); + break; + case 11: + symgen[1] = r2zp; + symgen[2] = r2ybas; + this->matrigen (symgen, 3, symop, nop); + break; + case 12: + symgen[1] = r2yp; + this->matrigen (symgen, 2, symop, nop); + break; + case 13: + symgen[1] = r2ybas; + this->matrigen (symgen, 2, symop, nop); + break; + case 14: + this->matrigen (symgen, 1, symop, nop); + break; + default: + ModuleBase::WARNING_QUIT ("Symmetry_Basic::setgroup", + "ibrav = " + std::to_string (ibrav) + " is not supported."); + break; + } + + // print + if (test_brav) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Number of rotation matrices", nop); + } + + // print the symmetry operations + if (PARAM.inp.cal_symm_repr[0] > 0) + { + GlobalV::ofs_running << std::endl + << " ======================================================================\n" + << " MATRIX REPRESENTATION OF SYMMETRY OPERATION\n" + << " ======================================================================\n" + << " There are " << nop << " symmetry operation representation matrices.\n" + << " For each matrix, the elements are arranged like: \n" + << " [[e11, e12, e13], [e21, e22, e23], [e31, e32, e33]].reshape(3, 3)\n" + << std::endl; + + // control the digits + const int precision = PARAM.inp.cal_symm_repr[1]; + const int width = precision + 4; + std::string fmtstr = " %" + std::to_string (width) + "." + std::to_string (precision) + "f"; + fmtstr += fmtstr + fmtstr + "\n"; + + // print the symmetry operations + std::string mat; + for (int i = 0; i < nop; ++i) + { + mat = " " + FmtCore::format ("No. %3d", i + 1) + "\n" + + FmtCore::format (fmtstr.c_str (), symop[i].e11, symop[i].e12, symop[i].e13) + + FmtCore::format (fmtstr.c_str (), symop[i].e21, symop[i].e22, symop[i].e23) + + FmtCore::format (fmtstr.c_str (), symop[i].e31, symop[i].e32, symop[i].e33); + GlobalV::ofs_running << mat << std::endl; + } + GlobalV::ofs_running << " ======================================================================\n"; + } + + return; } -int Symmetry_Basic::subgroup(const int& nrot, const int& ninv, - const int& nc2, const int& nc3, const int& nc4, const int& nc6, - const int& ns1, const int& ns3, const int& ns4, const int& ns6)const +int + Symmetry_Basic::subgroup (const int& nrot, + const int& ninv, + const int& nc2, + const int& nc3, + const int& nc4, + const int& nc6, + const int& ns1, + const int& ns3, + const int& ns4, + const int& ns6) const { if (nrot > 24) - { - if (ninv) { - // if (nc2 >= 7 && nc3 >= 2 && nc6 >= 2 && ns1 >= 7 && ns3 >= 2 && ns6 >= 2) { return 27; } //D_6h - if (nc2 >= 3 && nc3 >= 8 && ns1 >= 3 && ns6 >= 8) { return 29; } //T_h + if (ninv) + { + // if (nc2 >= 7 && nc3 >= 2 && nc6 >= 2 && ns1 >= 7 && ns3 >= 2 && ns6 >= 2) { return 27; } //D_6h + if (nc2 >= 3 && nc3 >= 8 && ns1 >= 3 && ns6 >= 8) + { + return 29; + } // T_h + } + else + { + if (nc2 >= 9 && nc3 >= 8 && nc4 >= 6) + { + return 30; + } // O + if (nc2 >= 3 && nc3 >= 8 && ns1 >= 6 && ns4 >= 6) + { + return 31; + } // T_d + } } - else + if (nrot > 16) // not else if: nrot>24 can also fall in this part and below { - if (nc2 >= 9 && nc3 >= 8 && nc4 >= 6) { return 30; } //O - if (nc2 >= 3 && nc3 >= 8 && ns1 >= 6 && ns4 >= 6) { return 31; } //T_d + if (ninv && nc2 >= 5 && nc4 >= 2 && ns1 >= 5 && ns4 >= 2) + { + return 20; + } // D_4h } - } - if (nrot > 16)//not else if: nrot>24 can also fall in this part and below - { - if (ninv && nc2 >= 5 && nc4 >= 2 && ns1 >= 5 && ns4 >= 2) { return 20; } //D_4h - } if (nrot > 12) - { - if (ninv) - { - if (nc2 >= 1 && nc3 >= 2 && nc6 >= 2 && ns1 >= 1 && ns3 >= 2 && ns6 >= 2) { return 23; } //C_6h - if (nc2 >= 3 && nc3 >= 2 && ns1 >= 3 && ns6 >= 2) { return 13; } //D_3d - } - else { - if (nc2 >= 3 && nc3 >= 8) { return 28; } //T - if (nc2 >= 3 && nc3 >= 2 && ns1 >= 4 && ns3 >= 2) { return 26; } //D_3h - if (nc2 >= 1 && nc3 >= 2 && nc6 >= 2 && ns1 >= 6) { return 25; } //C_6v - if (nc2 >= 7 && nc3 >= 2 && nc6 >= 2) { return 24; } //D_6 + if (ninv) + { + if (nc2 >= 1 && nc3 >= 2 && nc6 >= 2 && ns1 >= 1 && ns3 >= 2 && ns6 >= 2) + { + return 23; + } // C_6h + if (nc2 >= 3 && nc3 >= 2 && ns1 >= 3 && ns6 >= 2) + { + return 13; + } // D_3d + } + else + { + if (nc2 >= 3 && nc3 >= 8) + { + return 28; + } // T + if (nc2 >= 3 && nc3 >= 2 && ns1 >= 4 && ns3 >= 2) + { + return 26; + } // D_3h + if (nc2 >= 1 && nc3 >= 2 && nc6 >= 2 && ns1 >= 6) + { + return 25; + } // C_6v + if (nc2 >= 7 && nc3 >= 2 && nc6 >= 2) + { + return 24; + } // D_6 + } } - } if (nrot > 8) - { - if (ninv) { - if (nc2 >= 1 && nc4 >= 2 && ns1 >= 1 && ns4 >= 2) { return 16; } //C_4h - if (nc2 >= 3 && ns1 >= 3) { return 8; } //D_2h + if (ninv) + { + if (nc2 >= 1 && nc4 >= 2 && ns1 >= 1 && ns4 >= 2) + { + return 16; + } // C_4h + if (nc2 >= 3 && ns1 >= 3) + { + return 8; + } // D_2h + } + else + { + if (nc2 >= 3 && ns1 >= 2 && ns4 >= 2) + { + return 19; + } // D_2d + if (nc2 >= 1 && nc4 >= 2 && ns1 >= 4) + { + return 18; + } // C_4v + if (nc2 >= 5 && nc4 >= 2) + { + return 17; + } // D_4 + } } - else + if (nrot > 6) { - if (nc2 >= 3 && ns1 >= 2 && ns4 >= 2) { return 19; } //D_2d - if (nc2 >= 1 && nc4 >= 2 && ns1 >= 4) { return 18; } //C_4v - if (nc2 >= 5 && nc4 >= 2) { return 17; } //D_4 + if (nc3 >= 2 && ns1 >= 1 && ns3 >= 2) + { + return 22; + } // C_3h + if (nc2 >= 1 && nc3 >= 2 && nc6 >= 2) + { + return 21; + } // C_6 + if (nc3 >= 2 && ns1 >= 3) + { + return 12; + } // C_3v + if (nc2 >= 3 && nc3 >= 2) + { + return 11; + } // D_3 + if (ninv && nc3 >= 2 && ns3 >= 2) + { + return 10; + } // S_6 } - } - if (nrot > 6) - { - if (nc3 >= 2 && ns1 >= 1 && ns3 >= 2) { return 22; } //C_3h - if (nc2 >= 1 && nc3 >= 2 && nc6 >= 2) { return 21; } //C_6 - if (nc3 >= 2 && ns1 >= 3) { return 12; } //C_3v - if (nc2 >= 3 && nc3 >= 2) { return 11; } //D_3 - if (ninv && nc3 >= 2 && ns3 >= 2) { return 10; }//S_6 - } if (nrot > 4) - { - if (nc2 >= 1 && ns4 >= 2) { return 15; } //S_4 - if (nc2 >= 1 && nc4 >= 2) { return 14; } //C_4 - if (nc2 >= 1 && ns1 >= 2) { return 7; } //C_2v - if (nc2 >= 3) { return 6; } //D_2 - if (ninv && nc2 >= 1 && ns1 >= 1) { return 5; } //C_2h - } + { + if (nc2 >= 1 && ns4 >= 2) + { + return 15; + } // S_4 + if (nc2 >= 1 && nc4 >= 2) + { + return 14; + } // C_4 + if (nc2 >= 1 && ns1 >= 2) + { + return 7; + } // C_2v + if (nc2 >= 3) + { + return 6; + } // D_2 + if (ninv && nc2 >= 1 && ns1 >= 1) + { + return 5; + } // C_2h + } if (nrot > 3) - { - if (nc3 >= 2) { return 9; } //C_3 - } + { + if (nc3 >= 2) + { + return 9; + } // C_3 + } if (nrot > 2) - { - if (ns1 >= 1) { return 4; } //C_1h - if (nc2 >= 1) { return 3; } //C_2 - if (ninv) { return 2; } //S_2 - } - return 1; //C_1 + { + if (ns1 >= 1) + { + return 4; + } // C_1h + if (nc2 >= 1) + { + return 3; + } // C_2 + if (ninv) + { + return 2; + } // S_2 + } + return 1; // C_1 } - -bool Symmetry_Basic::pointgroup(const int& nrot, int& pgnumber, - std::string& pgname, const ModuleBase::Matrix3* gmatrix, std::ofstream& ofs_running)const +bool + Symmetry_Basic::pointgroup (const int& nrot, + int& pgnumber, + std::string& pgname, + const ModuleBase::Matrix3* gmatrix, + std::ofstream& ofs_running) const { - //------------------------------------------------------------------------- - //return the name of the point group - //the "name" (Schoenflies mark) of the group defined by following key: - // 1 --> C_1 9 --> C_3 17 --> D_4 25 --> C_6v * - // 2 --> S_2 10 --> S_6 18 --> C_4v 26 --> D_3h * - // 3 --> C_2 11 --> D_3 19 --> D_2d 27 --> D_6h * - // 4 --> C_1h 12 --> C_3v 20 --> D_4h 28 --> T * - // 5 --> C_2h 13 --> D_3d 21 --> C_6 29 --> T_h * - // 6 --> D_2 14 --> C_4 22 --> C_3h 30 --> O * - // 7 --> C_2v 15 --> S_4 23 --> C_6h 31 --> T_d * - // 8 --> D_2h 16 --> C_4h 24 --> D_6 32 --> O_h * - //------------------------------------------------------------------------- - - //there are four trivial cases which could be easily determined - //because the number of their elements are exclusive - if (PARAM.inp.cal_symm_repr[0] > 1) { - ModuleBase::TITLE("Symmetry_Basic", "pointgroup"); - } - - std::vector pgdict = { "none", "C_1", "S_2", "C_2", "C_1h", "C_2h", - "D_2", "C_2v", "D_2h", "C_3", "S_6", "D_3", "C_3v", "D_3d", "C_4", "S_4", "C_4h", - "D_4", "C_4v", "D_2d", "D_4h", "C_6", "C_3h", "C_6h", "D_6", "C_6v", "D_3h", "D_6h", - "T", "T_h", "O", "T_d", "O_h" }; - - if(nrot == 1) - { - pgnumber = 1; - pgname="C_1"; - return true; - } - if(nrot == 3) - { - pgnumber = 9; - pgname="C_3"; - return true; - } - if(nrot == 16) - { - pgnumber = 20; - pgname="D_4h"; - return true; - } - if(nrot == 48) - { - pgnumber = 32; - pgname="O_h"; - return true; - } - - //------------------------------------------------------------------------------- - //all other groups need further investigations and detailed analysis - //first determine the type of elements and count them - //Possible elements are E, I, C_2, C_3, C_4, C_6 and S_1, S_3, S_4, S_6 (S_1 = m) - //The type of a symmetry operation can be identified simply by - //calculating the trace and the determinant of the rotation matrix. The - //combination of these two quantities is specific for specific elements: - //------------------------------------------------------------------------------- - - // Element: E I C_2 C_3 C_4 C_6 S_1 S_6 S_4 S_3 - // Trace: +3 -3 -1 0 +1 +2 +1 0 -1 -2 - // Determinant: +1 -1 +1 +1 +1 +1 -1 -1 -1 -1 - - int trace = 0; - int det = 0; - int ninv = 0; - - int nc2 = 0; - int nc3 = 0; - int nc4 = 0; - int nc6 = 0; - int ns1 = 0; - int ns3 = 0; //mohan add 2012-01-15 - int ns4 = 0; - int ns6 = 0; //mohan add 2012-01-15 - - for(int i = 0; i < nrot; ++i) - { - //calculate the trace of a matrix - trace = int(gmatrix[i].e11+gmatrix[i].e22+gmatrix[i].e33); - //calculate the determinant of a matrix - det = int(gmatrix[i].Det()); - - if(trace == 3) - { - continue; //found unity operator (trivial) - } - //found inversion - if(trace == -3) - { - ninv = 1; - continue; - } - - if(trace == -1 && det == 1) { ++nc2; } - else if(trace == 0 && det == 1) { ++nc3; } - else if(trace == 1 && det == 1) { ++nc4; } - else if(trace == 2 && det == 1) { ++nc6; } - else if(trace == 1 && det == -1) { ++ns1; } - else if(trace == 0 && det == -1) { ++ns6; } //mohan add 2012-01-15 - else if(trace == -1 && det == -1) { ++ns4; } - else if(trace == -2 && det == -1) { ++ns3; } //mohan add 2012-01-15 - } - - if(test_brav) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "C2", nc2); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "C3", nc3); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "C4", nc4); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "C6", nc6); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "S1", ns1); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "S3", ns3); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "S4", ns4); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "S6", ns6); - } - - if(nrot == 2) - { - if(ninv == 1) - { - pgnumber = 2; - pgname="S_2"; - return true; - } - if(nc2 == 1) - { - pgnumber = 3; - pgname="C_2"; - return true; - } - if(ns1 == 1) - { - pgnumber = 4; - pgname="C_1h"; - return true; - } - } - if(nrot == 4) - { - if(ninv == 1) - { - pgnumber = 5; - pgname="C_2h"; - return true; - } - if(nc2 == 3) - { - pgnumber = 6; - pgname="D_2"; - return true; - } - if(ns1 == 2) - { - pgnumber = 7; - pgname="C_2v"; - return true; - } - if(nc4 == 2) - { - pgnumber = 14; - pgname="C_4"; - return true; - } - if(ns4 == 2) - { - pgnumber = 15; - pgname="S_4"; - return true; - } - } - if(nrot == 6) - { - if(ninv == 1) - { - pgnumber = 10; - pgname="S_6"; - return true; - } - if(nc2 == 3) - { - pgnumber = 11; - pgname="D_3"; - return true; - } - if(ns1 == 3) - { - pgnumber = 12; - pgname="C_3v"; - return true; - } - if(nc2 == 1) - { - pgnumber = 21; - pgname="C_6"; - return true; - } - if(ns1 == 1) - { - pgnumber = 22; - pgname="C_3h"; - return true; - } - } - if(nrot == 8) - { - if(ns1 == 3) - { - pgnumber = 8; - pgname="D_2h"; - return true; - } - if(ns1 == 1) - { - pgnumber = 16; - pgname="C_4h"; - return true; - } - if(ns1 == 0) - { - pgnumber = 17; - pgname="D_4"; - return true; - } - if(ns1 == 4) - { - pgnumber = 18; - pgname="C_4v"; - return true; - } - if(ns1 == 2) - { - pgnumber = 19; - pgname="D_2d"; - return true; - } - } - if(nrot == 12) - { - if(ns1 == 3) - { - pgnumber = 13; - pgname="D_3d"; - return true; - } - if(ns1 == 1) - { - pgnumber = 23; - pgname="C_6h"; - return true; - } - if(nc2 == 7) - { - pgnumber = 24; - pgname="D_6"; - return true; - } - if(ns1 == 6) - { - pgnumber = 25; - pgname="C_6v"; - return true; - } - if(ns1 == 4) - { - pgnumber = 26; - pgname="D_3h"; - return true; - } - if(nc3 == 8) - { - pgnumber = 28; - pgname="T"; - return true; - } - } - if(nrot == 24) - { - if(nc6 == 2) - { - pgnumber = 27; - pgname="D_6h"; + //------------------------------------------------------------------------- + // return the name of the point group + // the "name" (Schoenflies mark) of the group defined by following key: + // 1 --> C_1 9 --> C_3 17 --> D_4 25 --> C_6v * + // 2 --> S_2 10 --> S_6 18 --> C_4v 26 --> D_3h * + // 3 --> C_2 11 --> D_3 19 --> D_2d 27 --> D_6h * + // 4 --> C_1h 12 --> C_3v 20 --> D_4h 28 --> T * + // 5 --> C_2h 13 --> D_3d 21 --> C_6 29 --> T_h * + // 6 --> D_2 14 --> C_4 22 --> C_3h 30 --> O * + // 7 --> C_2v 15 --> S_4 23 --> C_6h 31 --> T_d * + // 8 --> D_2h 16 --> C_4h 24 --> D_6 32 --> O_h * + //------------------------------------------------------------------------- + + // there are four trivial cases which could be easily determined + // because the number of their elements are exclusive + if (PARAM.inp.cal_symm_repr[0] > 1) + { + ModuleBase::TITLE ("Symmetry_Basic", "pointgroup"); + } + + std::vector pgdict + = {"none", "C_1", "S_2", "C_2", "C_1h", "C_2h", "D_2", "C_2v", "D_2h", "C_3", "S_6", + "D_3", "C_3v", "D_3d", "C_4", "S_4", "C_4h", "D_4", "C_4v", "D_2d", "D_4h", "C_6", + "C_3h", "C_6h", "D_6", "C_6v", "D_3h", "D_6h", "T", "T_h", "O", "T_d", "O_h"}; + + if (nrot == 1) + { + pgnumber = 1; + pgname = "C_1"; return true; - } - if(ninv == 1) - { - pgnumber = 29; - pgname="T_h"; + } + if (nrot == 3) + { + pgnumber = 9; + pgname = "C_3"; return true; - } - if(nc4 == 6) - { - pgnumber = 30; - pgname="O"; + } + if (nrot == 16) + { + pgnumber = 20; + pgname = "D_4h"; return true; - } - if(ns4 == 6) - { - pgnumber = 31; - pgname="T_d"; + } + if (nrot == 48) + { + pgnumber = 32; + pgname = "O_h"; return true; - } - } + } + + //------------------------------------------------------------------------------- + // all other groups need further investigations and detailed analysis + // first determine the type of elements and count them + // Possible elements are E, I, C_2, C_3, C_4, C_6 and S_1, S_3, S_4, S_6 (S_1 = m) + // The type of a symmetry operation can be identified simply by + // calculating the trace and the determinant of the rotation matrix. The + // combination of these two quantities is specific for specific elements: + //------------------------------------------------------------------------------- + + // Element: E I C_2 C_3 C_4 C_6 S_1 S_6 S_4 S_3 + // Trace: +3 -3 -1 0 +1 +2 +1 0 -1 -2 + // Determinant: +1 -1 +1 +1 +1 +1 -1 -1 -1 -1 + + int trace = 0; + int det = 0; + int ninv = 0; + + int nc2 = 0; + int nc3 = 0; + int nc4 = 0; + int nc6 = 0; + int ns1 = 0; + int ns3 = 0; // mohan add 2012-01-15 + int ns4 = 0; + int ns6 = 0; // mohan add 2012-01-15 + + for (int i = 0; i < nrot; ++i) + { + // calculate the trace of a matrix + trace = int (gmatrix[i].e11 + gmatrix[i].e22 + gmatrix[i].e33); + // calculate the determinant of a matrix + det = int (gmatrix[i].Det ()); + + if (trace == 3) + { + continue; // found unity operator (trivial) + } + // found inversion + if (trace == -3) + { + ninv = 1; + continue; + } + + if (trace == -1 && det == 1) + { + ++nc2; + } + else if (trace == 0 && det == 1) + { + ++nc3; + } + else if (trace == 1 && det == 1) + { + ++nc4; + } + else if (trace == 2 && det == 1) + { + ++nc6; + } + else if (trace == 1 && det == -1) + { + ++ns1; + } + else if (trace == 0 && det == -1) + { + ++ns6; + } // mohan add 2012-01-15 + else if (trace == -1 && det == -1) + { + ++ns4; + } + else if (trace == -2 && det == -1) + { + ++ns3; + } // mohan add 2012-01-15 + } + + if (test_brav) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "C2", nc2); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "C3", nc3); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "C4", nc4); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "C6", nc6); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "S1", ns1); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "S3", ns3); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "S4", ns4); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "S6", ns6); + } + + if (nrot == 2) + { + if (ninv == 1) + { + pgnumber = 2; + pgname = "S_2"; + return true; + } + if (nc2 == 1) + { + pgnumber = 3; + pgname = "C_2"; + return true; + } + if (ns1 == 1) + { + pgnumber = 4; + pgname = "C_1h"; + return true; + } + } + if (nrot == 4) + { + if (ninv == 1) + { + pgnumber = 5; + pgname = "C_2h"; + return true; + } + if (nc2 == 3) + { + pgnumber = 6; + pgname = "D_2"; + return true; + } + if (ns1 == 2) + { + pgnumber = 7; + pgname = "C_2v"; + return true; + } + if (nc4 == 2) + { + pgnumber = 14; + pgname = "C_4"; + return true; + } + if (ns4 == 2) + { + pgnumber = 15; + pgname = "S_4"; + return true; + } + } + if (nrot == 6) + { + if (ninv == 1) + { + pgnumber = 10; + pgname = "S_6"; + return true; + } + if (nc2 == 3) + { + pgnumber = 11; + pgname = "D_3"; + return true; + } + if (ns1 == 3) + { + pgnumber = 12; + pgname = "C_3v"; + return true; + } + if (nc2 == 1) + { + pgnumber = 21; + pgname = "C_6"; + return true; + } + if (ns1 == 1) + { + pgnumber = 22; + pgname = "C_3h"; + return true; + } + } + if (nrot == 8) + { + if (ns1 == 3) + { + pgnumber = 8; + pgname = "D_2h"; + return true; + } + if (ns1 == 1) + { + pgnumber = 16; + pgname = "C_4h"; + return true; + } + if (ns1 == 0) + { + pgnumber = 17; + pgname = "D_4"; + return true; + } + if (ns1 == 4) + { + pgnumber = 18; + pgname = "C_4v"; + return true; + } + if (ns1 == 2) + { + pgnumber = 19; + pgname = "D_2d"; + return true; + } + } + if (nrot == 12) + { + if (ns1 == 3) + { + pgnumber = 13; + pgname = "D_3d"; + return true; + } + if (ns1 == 1) + { + pgnumber = 23; + pgname = "C_6h"; + return true; + } + if (nc2 == 7) + { + pgnumber = 24; + pgname = "D_6"; + return true; + } + if (ns1 == 6) + { + pgnumber = 25; + pgname = "C_6v"; + return true; + } + if (ns1 == 4) + { + pgnumber = 26; + pgname = "D_3h"; + return true; + } + if (nc3 == 8) + { + pgnumber = 28; + pgname = "T"; + return true; + } + } + if (nrot == 24) + { + if (nc6 == 2) + { + pgnumber = 27; + pgname = "D_6h"; + return true; + } + if (ninv == 1) + { + pgnumber = 29; + pgname = "T_h"; + return true; + } + if (nc4 == 6) + { + pgnumber = 30; + pgname = "O"; + return true; + } + if (ns4 == 6) + { + pgnumber = 31; + pgname = "T_d"; + return true; + } + } GlobalV::ofs_running << "\n WARNING: Symmetry operations cannot completely constitute a point group.\n\ - It'll be better to try another `symmetry_prec`.\n Now search the subgroups ..." << std::endl; - pgnumber = this->subgroup(nrot, ninv, nc2, nc3, nc4, nc6, ns1, ns3, ns4, ns6); + It'll be better to try another `symmetry_prec`.\n Now search the subgroups ..." + << std::endl; + pgnumber = this->subgroup (nrot, ninv, nc2, nc3, nc4, nc6, ns1, ns3, ns4, ns6); pgname = pgdict[pgnumber]; return false; } - -void Symmetry_Basic::rotate( ModuleBase::Matrix3 &gmatrix, ModuleBase::Vector3 >rans, - int i, int j, int k, // FFT grid index. - const int nr1, const int nr2, const int nr3, // dimension of FFT grid. - int &ri, int &rj, int &rk) +void + Symmetry_Basic::rotate (ModuleBase::Matrix3& gmatrix, + ModuleBase::Vector3& gtrans, + int i, + int j, + int k, // FFT grid index. + const int nr1, + const int nr2, + const int nr3, // dimension of FFT grid. + int& ri, + int& rj, + int& rk) { - static ModuleBase::Matrix3 g; - g.e11 = gmatrix.e11; - g.e21 = gmatrix.e21 * (double)nr1 / (double)nr2; - g.e31 = gmatrix.e31 * (double)nr1 / (double)nr3; - g.e12 = gmatrix.e12 * (double)nr2 / (double)nr1; - g.e22 = gmatrix.e22; - g.e32 = gmatrix.e32 * (double)nr2 / (double)nr3; - g.e13 = gmatrix.e13 * (double)nr3 / (double)nr1; - g.e23 = gmatrix.e23 * (double)nr3 / (double)nr2; - g.e33 = gmatrix.e33; - - ri = int(g.e11 * i + g.e21 * j + g.e31 * k) + (int)(gtrans.x * nr1); - if (ri < 0) - { - ri += 10 * nr1; - } - ri = ri%nr1; - rj = static_cast(g.e12 * i + g.e22 * j + g.e32 * k) + static_cast(gtrans.y * nr2); - if (rj < 0) - { - rj += 10 * nr2; - } - rj = rj%nr2; - rk = static_cast(g.e13 * i + g.e23 * j + g.e33 * k) + static_cast(gtrans.z * nr3); - if (rk < 0) - { - rk += 10 * nr3; - } - rk = rk%nr3; - return; + static ModuleBase::Matrix3 g; + g.e11 = gmatrix.e11; + g.e21 = gmatrix.e21 * (double)nr1 / (double)nr2; + g.e31 = gmatrix.e31 * (double)nr1 / (double)nr3; + g.e12 = gmatrix.e12 * (double)nr2 / (double)nr1; + g.e22 = gmatrix.e22; + g.e32 = gmatrix.e32 * (double)nr2 / (double)nr3; + g.e13 = gmatrix.e13 * (double)nr3 / (double)nr1; + g.e23 = gmatrix.e23 * (double)nr3 / (double)nr2; + g.e33 = gmatrix.e33; + + ri = int (g.e11 * i + g.e21 * j + g.e31 * k) + (int)(gtrans.x * nr1); + if (ri < 0) + { + ri += 10 * nr1; + } + ri = ri % nr1; + rj = static_cast (g.e12 * i + g.e22 * j + g.e32 * k) + static_cast (gtrans.y * nr2); + if (rj < 0) + { + rj += 10 * nr2; + } + rj = rj % nr2; + rk = static_cast (g.e13 * i + g.e23 * j + g.e33 * k) + static_cast (gtrans.z * nr3); + if (rk < 0) + { + rk += 10 * nr3; + } + rk = rk % nr3; + return; } // atom ordering for each atom type // by a "weighted function" f // (instead of ordering by x, y, z directly) -void Symmetry_Basic::atom_ordering_new(double *posi, const int natom, int *subindex) const +void + Symmetry_Basic::atom_ordering_new (double* posi, const int natom, int* subindex) const { - //order the atomic positions inside a supercell by a unique ordering scheme - subindex[0] = 0; - - if(natom == 1) - { - //if there is only one atom, it is not necessary to order - return; - } - - std::vector tmpx(natom); - std::vector tmpy(natom); - std::vector tmpz(natom); - for(int i=0; iorder_atoms(posi, natom, subindex); - for(int i=0; i1) //need a new sort - { - subindex[0] = 0; - for(int j=0; jorder_atoms(&posi[i*3], nxequal, subindex); - } - i=ix_right; - } - - delete[] weighted_func; - return; + // order the atomic positions inside a supercell by a unique ordering scheme + subindex[0] = 0; + + if (natom == 1) + { + // if there is only one atom, it is not necessary to order + return; + } + + std::vector tmpx (natom); + std::vector tmpy (natom); + std::vector tmpz (natom); + for (int i = 0; i < natom; i++) + { + tmpx[i] = posi[i * 3]; + tmpy[i] = posi[i * 3 + 1]; + tmpz[i] = posi[i * 3 + 2]; + } + double x_max = *max_element (tmpx.begin (), tmpx.end ()); + double x_min = *min_element (tmpx.begin (), tmpx.end ()); + double y_max = *max_element (tmpy.begin (), tmpy.end ()); + double y_min = *min_element (tmpy.begin (), tmpy.end ()); + double z_max = *max_element (tmpz.begin (), tmpz.end ()); + double z_min = *min_element (tmpz.begin (), tmpz.end ()); + + double* weighted_func = new double[natom]; + + // the first time: f(x, y, z) + for (int i = 0; i < natom; i++) + { + weighted_func[i] = 1 / epsilon / epsilon * tmpx[i] + 1 / epsilon * tmpy[i] + tmpz[i]; + } + ModuleBase::heapsort (natom, weighted_func, subindex); + this->order_atoms (posi, natom, subindex); + for (int i = 0; i < natom; i++) + { + tmpx[i] = posi[i * 3]; + tmpy[i] = posi[i * 3 + 1]; + tmpz[i] = posi[i * 3 + 2]; + } + + // the second time: f(y, z) for fixed x + for (int i = 0; i < natom - 1;) + { + int ix_right = i + 1; // right bound is no included + while (ix_right < natom && equal (tmpx[ix_right], tmpx[i])) + { + ++ix_right; + } + + int nxequal = ix_right - i; + if (nxequal > 1) // need a new sort + { + subindex[0] = 0; + for (int j = 0; j < nxequal; ++j) + { + weighted_func[j] = 1 / epsilon * tmpy[i + j] + tmpz[i + j]; + } + ModuleBase::heapsort (nxequal, weighted_func, subindex); + this->order_atoms (&posi[i * 3], nxequal, subindex); + } + i = ix_right; + } + + delete[] weighted_func; + return; } -void Symmetry_Basic::test_atom_ordering(double *posi, const int natom, int *subindex) const +void + Symmetry_Basic::test_atom_ordering (double* posi, const int natom, int* subindex) const { - //an interface to test a protected function - this->atom_ordering_new(posi, natom, subindex); -} + // an interface to test a protected function + this->atom_ordering_new (posi, natom, subindex); } +} // namespace ModuleSymmetry diff --git a/source/source_cell/module_symmetry/symmetry_basic.h b/source/source_cell/module_symmetry/symmetry_basic.h index 7308a20df9f..7a6603e9477 100644 --- a/source/source_cell/module_symmetry/symmetry_basic.h +++ b/source/source_cell/module_symmetry/symmetry_basic.h @@ -12,60 +12,76 @@ namespace ModuleSymmetry { class Symmetry_Basic { - public: + public: + Symmetry_Basic () {}; + ~Symmetry_Basic () {}; - Symmetry_Basic() {}; - ~Symmetry_Basic() {}; + double epsilon; ///< the precision of symmetry operation + double epsilon_input; ///< the input value of symmetry_prec, should not be changed - double epsilon; ///< the precision of symmetry operation - double epsilon_input; ///< the input value of symmetry_prec, should not be changed + // control accuray + bool equal (const double& m, const double& n) const; + void check_boundary (double& x) const; + double get_translation_vector (const double& x1, const double& x2) const; + void check_translation (double& x, const double& t) const; + double check_diff (const double& x1, const double& x2) const; - // control accuray - bool equal(const double &m, const double &n)const; - void check_boundary(double &x)const; - double get_translation_vector(const double& x1, const double& x2)const; - void check_translation(double &x, const double &t) const; - double check_diff(const double& x1, const double& x2) const; - - void veccon( - double *va, - double *vb, - const int num, - const ModuleBase::Vector3 &aa1, - const ModuleBase::Vector3 &aa2, - const ModuleBase::Vector3 &aa3, - const ModuleBase::Vector3 &bb1, - const ModuleBase::Vector3 &bb2, - const ModuleBase::Vector3 &bb3 - ); - void matrigen(ModuleBase::Matrix3 *symgen, const int ngen, ModuleBase::Matrix3* symop, int &nop) const; - void setgroup(ModuleBase::Matrix3 *symop, int &nop, const int &ibrav) const; - void rotate( - ModuleBase::Matrix3 &gmatrix, ModuleBase::Vector3 >rans, - int i, int j, int k, const int, const int, const int, int&, int&, int&); - void test_atom_ordering(double *posi, const int natom, int *subindex) const; + void veccon (double* va, + double* vb, + const int num, + const ModuleBase::Vector3& aa1, + const ModuleBase::Vector3& aa2, + const ModuleBase::Vector3& aa3, + const ModuleBase::Vector3& bb1, + const ModuleBase::Vector3& bb2, + const ModuleBase::Vector3& bb3); + void matrigen (ModuleBase::Matrix3* symgen, const int ngen, ModuleBase::Matrix3* symop, int& nop) const; + void setgroup (ModuleBase::Matrix3* symop, int& nop, const int& ibrav) const; + void rotate (ModuleBase::Matrix3& gmatrix, + ModuleBase::Vector3& gtrans, + int i, + int j, + int k, + const int, + const int, + const int, + int&, + int&, + int&); + void test_atom_ordering (double* posi, const int natom, int* subindex) const; /// find out the greatest subgrop according to the number of operations of certain type. /// used to deal with incomplete group due to a subtle`symmetry_prec` - int subgroup(const int& nrot, const int& ninv, const int& nc2, const int& nc3, const int& nc4, const int& nc6, - const int& ns1, const int& ns3, const int& ns4, const int& ns6)const; - bool pointgroup(const int& nrot, int& pgnumber, std::string& pgname, const ModuleBase::Matrix3* gmatrix, std::ofstream& ofs_running)const; + int subgroup (const int& nrot, + const int& ninv, + const int& nc2, + const int& nc3, + const int& nc4, + const int& nc6, + const int& ns1, + const int& ns3, + const int& ns4, + const int& ns6) const; + bool pointgroup (const int& nrot, + int& pgnumber, + std::string& pgname, + const ModuleBase::Matrix3* gmatrix, + std::ofstream& ofs_running) const; -protected: - std::string get_brav_name(const int ibrav) const; - void atom_ordering(double *posi, const int natom, int *subindex); - void atom_ordering_new(double *posi, const int natom, int *subindex) const; + protected: + std::string get_brav_name (const int ibrav) const; + void atom_ordering (double* posi, const int natom, int* subindex); + void atom_ordering_new (double* posi, const int natom, int* subindex) const; - private: - - void order_atoms(double* pos, const int &nat, const int *index) const; - void order_y(double *pos, const int &oldpos, const int &newpos); - void order_z(double *pos, const int &oldpos, const int &newpos); + private: + void order_atoms (double* pos, const int& nat, const int* index) const; + void order_y (double* pos, const int& oldpos, const int& newpos); + void order_z (double* pos, const int& oldpos, const int& newpos); }; -//for test only +// for test only extern bool test_brav; -}//end of define namespace +} // namespace ModuleSymmetry #endif diff --git a/source/source_cell/module_symmetry/test/symmetry_test.cpp b/source/source_cell/module_symmetry/test/symmetry_test.cpp index 30323195cae..f545fa458a9 100644 --- a/source/source_cell/module_symmetry/test/symmetry_test.cpp +++ b/source/source_cell/module_symmetry/test/symmetry_test.cpp @@ -1,80 +1,83 @@ #include "symmetry_test.h" -void SymmetryTest::construct_ucell(stru_ &stru) +void + SymmetryTest::construct_ucell (stru_& stru) { std::vector coord = stru.all_type; - ucell.a1 = ModuleBase::Vector3(stru.cell[0], stru.cell[1], stru.cell[2]); - ucell.a2 = ModuleBase::Vector3(stru.cell[3], stru.cell[4], stru.cell[5]); - ucell.a3 = ModuleBase::Vector3(stru.cell[6], stru.cell[7], stru.cell[8]); - ucell.latvec.e11=ucell.a1.x; - ucell.latvec.e12=ucell.a1.y; - ucell.latvec.e13=ucell.a1.z; - ucell.latvec.e21=ucell.a2.x; - ucell.latvec.e22=ucell.a2.y; - ucell.latvec.e23=ucell.a2.z; - ucell.latvec.e31=ucell.a3.x; - ucell.latvec.e32=ucell.a3.y; - ucell.latvec.e33=ucell.a3.z; - ucell.GT = ucell.latvec.Inverse(); - ucell.G = ucell.GT.Transpose(); - ucell.ntype = stru.all_type.size(); + ucell.a1 = ModuleBase::Vector3 (stru.cell[0], stru.cell[1], stru.cell[2]); + ucell.a2 = ModuleBase::Vector3 (stru.cell[3], stru.cell[4], stru.cell[5]); + ucell.a3 = ModuleBase::Vector3 (stru.cell[6], stru.cell[7], stru.cell[8]); + ucell.latvec.e11 = ucell.a1.x; + ucell.latvec.e12 = ucell.a1.y; + ucell.latvec.e13 = ucell.a1.z; + ucell.latvec.e21 = ucell.a2.x; + ucell.latvec.e22 = ucell.a2.y; + ucell.latvec.e23 = ucell.a2.z; + ucell.latvec.e31 = ucell.a3.x; + ucell.latvec.e32 = ucell.a3.y; + ucell.latvec.e33 = ucell.a3.z; + ucell.GT = ucell.latvec.Inverse (); + ucell.G = ucell.GT.Transpose (); + ucell.ntype = stru.all_type.size (); ucell.atoms = new Atom[ucell.ntype]; ucell.nat = 0; - for (int i = 0; i < coord.size(); i++) - { - ucell.atoms[i].label = coord[i].atomname; - ucell.atoms[i].na = coord[i].coordinate.size(); - ucell.atoms[i].tau.resize(ucell.atoms[i].na); - ucell.atoms[i].taud.resize(ucell.atoms[i].na); - for (int j = 0; j < ucell.atoms[i].na; j++) + for (int i = 0; i < coord.size (); i++) { - std::vector this_atom = coord[i].coordinate[j]; - if (stru.coordtype == "C") - { - ucell.atoms[i].tau[j] = ModuleBase::Vector3(this_atom[0], this_atom[1], this_atom[2]); - ModuleBase::Mathzone::Cartesian_to_Direct(ucell.atoms[i].tau[j].x, - ucell.atoms[i].tau[j].y, - ucell.atoms[i].tau[j].z, - ucell.a1.x, - ucell.a1.y, - ucell.a1.z, - ucell.a2.x, - ucell.a2.y, - ucell.a2.z, - ucell.a3.x, - ucell.a3.y, - ucell.a3.z, - ucell.atoms[i].taud[j].x, - ucell.atoms[i].taud[j].y, - ucell.atoms[i].taud[j].z); - } - else - { - ucell.atoms[i].taud[j] = ModuleBase::Vector3(this_atom[0], this_atom[1], this_atom[2]); - ModuleBase::Mathzone::Direct_to_Cartesian(ucell.atoms[i].taud[j].x, - ucell.atoms[i].taud[j].y, - ucell.atoms[i].taud[j].z, - ucell.a1.x, - ucell.a1.y, - ucell.a1.z, - ucell.a2.x, - ucell.a2.y, - ucell.a2.z, - ucell.a3.x, - ucell.a3.y, - ucell.a3.z, - ucell.atoms[i].tau[j].x, - ucell.atoms[i].tau[j].y, - ucell.atoms[i].tau[j].z); - } - + ucell.atoms[i].label = coord[i].atomname; + ucell.atoms[i].na = coord[i].coordinate.size (); + ucell.atoms[i].tau.resize (ucell.atoms[i].na); + ucell.atoms[i].taud.resize (ucell.atoms[i].na); + for (int j = 0; j < ucell.atoms[i].na; j++) + { + std::vector this_atom = coord[i].coordinate[j]; + if (stru.coordtype == "C") + { + ucell.atoms[i].tau[j] + = ModuleBase::Vector3 (this_atom[0], this_atom[1], this_atom[2]); + ModuleBase::Mathzone::Cartesian_to_Direct (ucell.atoms[i].tau[j].x, + ucell.atoms[i].tau[j].y, + ucell.atoms[i].tau[j].z, + ucell.a1.x, + ucell.a1.y, + ucell.a1.z, + ucell.a2.x, + ucell.a2.y, + ucell.a2.z, + ucell.a3.x, + ucell.a3.y, + ucell.a3.z, + ucell.atoms[i].taud[j].x, + ucell.atoms[i].taud[j].y, + ucell.atoms[i].taud[j].z); + } + else + { + ucell.atoms[i].taud[j] + = ModuleBase::Vector3 (this_atom[0], this_atom[1], this_atom[2]); + ModuleBase::Mathzone::Direct_to_Cartesian (ucell.atoms[i].taud[j].x, + ucell.atoms[i].taud[j].y, + ucell.atoms[i].taud[j].z, + ucell.a1.x, + ucell.a1.y, + ucell.a1.z, + ucell.a2.x, + ucell.a2.y, + ucell.a2.z, + ucell.a3.x, + ucell.a3.y, + ucell.a3.z, + ucell.atoms[i].tau[j].x, + ucell.atoms[i].tau[j].y, + ucell.atoms[i].tau[j].z); + } + } + ucell.nat += ucell.atoms[i].na; } - ucell.nat += ucell.atoms[i].na; - } } -void SymmetryTest::ClearUcell() +void + SymmetryTest::ClearUcell () { delete[] ucell.atoms; } \ No newline at end of file diff --git a/source/source_cell/module_symmetry/test/symmetry_test.h b/source/source_cell/module_symmetry/test/symmetry_test.h index b49091b665e..aa63c3e2d98 100644 --- a/source/source_cell/module_symmetry/test/symmetry_test.h +++ b/source/source_cell/module_symmetry/test/symmetry_test.h @@ -14,24 +14,25 @@ struct atomtype_ struct stru_ { int ibrav; - std::string point_group; // Schoenflies symbol + std::string point_group; // Schoenflies symbol std::string point_group_hm; // Hermann–Mauguin notation. std::string space_group; std::vector cell; std::vector all_type; - std::string coordtype; // caltesian or direct - std::vector force_zero_iat; // the index of atoms whose force should be zero - std::map force_oppo_iat; // the index of atoms pairs whose forces should be opposite + std::string coordtype; // caltesian or direct + std::vector force_zero_iat; // the index of atoms whose force should be zero + std::map force_oppo_iat; // the index of atoms pairs whose forces should be opposite std::vector> force_oppo_iat_xyz; //{ia1, ia2, xoppo(1)/eq(0), yoppo, zoppo} - std::vector> stress_zero; //a set of elements in the stress tensor that should be zero - std::vector>> stress_eq; //a set of elements in the stress tensor that should be equal + std::vector> stress_zero; // a set of elements in the stress tensor that should be zero + std::vector>> + stress_eq; // a set of elements in the stress tensor that should be equal }; class SymmetryTest : public testing::Test { -protected: + protected: UnitCell ucell; std::ofstream ofs_running; - void construct_ucell(stru_& stru); - void ClearUcell(); + void construct_ucell (stru_& stru); + void ClearUcell (); }; \ No newline at end of file diff --git a/source/source_cell/module_symmetry/test/symmetry_test_analysis.cpp b/source/source_cell/module_symmetry/test/symmetry_test_analysis.cpp index 1ca197c032d..436d56034ff 100644 --- a/source/source_cell/module_symmetry/test/symmetry_test_analysis.cpp +++ b/source/source_cell/module_symmetry/test/symmetry_test_analysis.cpp @@ -20,270 +20,293 @@ * is different from its point group. ***********************************************/ // mock the useless functions -void output::printM3(std::ofstream &ofs, const std::string &description, const ModuleBase::Matrix3 &m){} -pseudo::pseudo() +void + output::printM3 (std::ofstream& ofs, const std::string& description, const ModuleBase::Matrix3& m) { } -pseudo::~pseudo() -{ -} -Atom::Atom(){} -Atom::~Atom(){} -Atom_pseudo::Atom_pseudo(){} -Atom_pseudo::~Atom_pseudo(){} -UnitCell::UnitCell(){} -UnitCell::~UnitCell(){} -Magnetism::Magnetism(){} -Magnetism::~Magnetism() {} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} +pseudo::pseudo () {} +pseudo::~pseudo () {} +Atom::Atom () {} +Atom::~Atom () {} +Atom_pseudo::Atom_pseudo () {} +Atom_pseudo::~Atom_pseudo () {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} -TEST_F(SymmetryTest, AnalySys) +TEST_F (SymmetryTest, AnalySys) { - for (int stru = 0; stru < stru_lib.size(); stru++) - { - ModuleSymmetry::Symmetry symm; - construct_ucell(stru_lib[stru]); - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, ofs_running); - - //1. ibrav - std::string ref_point_group = stru_lib[stru].point_group; - std::string cal_point_group = symm.pgname; - int ref_ibrav = stru_lib[stru].ibrav; - int cal_ibrav = symm.real_brav; - EXPECT_EQ(cal_ibrav, ref_ibrav); - EXPECT_EQ(cal_point_group, ref_point_group) << "ibrav=" << stru_lib[stru].ibrav; - - //2. input and optimized lattice, gtrans_convert and veccon - //input lattice - EXPECT_EQ(symm.s1, ucell.a1); - EXPECT_EQ(symm.s2, ucell.a2); - EXPECT_EQ(symm.s3, ucell.a3); - //optimized lattice - EXPECT_EQ(symm.a1, ModuleBase::Vector3(symm.optlat.e11, symm.optlat.e12, symm.optlat.e13)); - EXPECT_EQ(symm.a2, ModuleBase::Vector3(symm.optlat.e21, symm.optlat.e22, symm.optlat.e23)); - EXPECT_EQ(symm.a3, ModuleBase::Vector3(symm.optlat.e31, symm.optlat.e32, symm.optlat.e33)); - //gtrans_convert - std::vector> gtrans_optconf(symm.nrotk); - double* gtrans_veccon=new double [symm.nrotk*3]; - for (int i=0;i(gtrans_optconf_veccon[i*3], - gtrans_optconf_veccon[i*3+1], gtrans_optconf_veccon[i*3+2])); - delete[] gtrans_veccon; - delete[] gtrans_optconf_veccon; + ModuleSymmetry::Symmetry symm; + construct_ucell (stru_lib[stru]); + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, ofs_running); - //3. invmap - int* ivmp=new int[symm.nrotk]; - symm.gmatrix_invmap(symm.gmatrix, symm.nrotk, ivmp); - ModuleBase::Matrix3 test; + // 1. ibrav + std::string ref_point_group = stru_lib[stru].point_group; + std::string cal_point_group = symm.pgname; + int ref_ibrav = stru_lib[stru].ibrav; + int cal_ibrav = symm.real_brav; + EXPECT_EQ (cal_ibrav, ref_ibrav); + EXPECT_EQ (cal_point_group, ref_point_group) << "ibrav=" << stru_lib[stru].ibrav; - for (int i=0;i (symm.optlat.e11, symm.optlat.e12, symm.optlat.e13)); + EXPECT_EQ (symm.a2, ModuleBase::Vector3 (symm.optlat.e21, symm.optlat.e22, symm.optlat.e23)); + EXPECT_EQ (symm.a3, ModuleBase::Vector3 (symm.optlat.e31, symm.optlat.e32, symm.optlat.e33)); + // gtrans_convert + std::vector> gtrans_optconf (symm.nrotk); + double* gtrans_veccon = new double[symm.nrotk * 3]; + for (int i = 0; i < symm.nrotk; ++i) + { + gtrans_veccon[3 * i] = symm.gtrans[i].x; + gtrans_veccon[3 * i + 1] = symm.gtrans[i].y; + gtrans_veccon[3 * i + 2] = symm.gtrans[i].z; + } + double* gtrans_optconf_veccon = new double[symm.nrotk * 3]; + symm.gtrans_convert (symm.gtrans, gtrans_optconf.data (), symm.nrotk, ucell.latvec, symm.optlat); + symm.veccon (gtrans_veccon, + gtrans_optconf_veccon, + symm.nrotk, + symm.s1, + symm.s2, + symm.s3, + symm.a1, + symm.a2, + symm.a3); + for (int i = 0; i < symm.nrotk; ++i) + EXPECT_EQ (gtrans_optconf[i], + ModuleBase::Vector3 (gtrans_optconf_veccon[i * 3], + gtrans_optconf_veccon[i * 3 + 1], + gtrans_optconf_veccon[i * 3 + 2])); + delete[] gtrans_veccon; + delete[] gtrans_optconf_veccon; - } - delete[] ivmp; + // 3. invmap + int* ivmp = new int[symm.nrotk]; + symm.gmatrix_invmap (symm.gmatrix, symm.nrotk, ivmp); + ModuleBase::Matrix3 test; - //4. gmatrix_convert : input(gmatrix) -> opt(gmatrix_opt) ->input(gmatrix_input_back) - //-> opt(gmatrix_optback) <-> reciprocal(int or non-int) - ModuleBase::Matrix3* gmatrix_input_back=new ModuleBase::Matrix3[symm.nrotk];//3 - ModuleBase::Matrix3* gmatrix_opt=new ModuleBase::Matrix3[symm.nrotk];//2 - ModuleBase::Matrix3* gmatrix_opt_back=new ModuleBase::Matrix3[symm.nrotk];//4 - ModuleBase::Matrix3* kgmatrix_nonint=new ModuleBase::Matrix3[symm.nrotk]; - symm.gmatrix_convert_int(symm.gmatrix, gmatrix_opt, symm.nrotk, ucell.latvec, symm.optlat); //1->2 - symm.gmatrix_convert_int(gmatrix_opt, gmatrix_input_back, symm.nrotk, symm.optlat, ucell.latvec); //2->3 - symm.gmatrix_convert_int(gmatrix_input_back, gmatrix_opt_back, symm.nrotk, ucell.latvec, symm.optlat); //3->4 + for (int i = 0; i < symm.nrotk; ++i) + { + test = symm.gmatrix[i] * symm.gmatrix[ivmp[i]]; + EXPECT_NEAR (test.e11, 1, DOUBLETHRESHOLD); + EXPECT_NEAR (test.e22, 1, DOUBLETHRESHOLD); + EXPECT_NEAR (test.e33, 1, DOUBLETHRESHOLD); + EXPECT_NEAR (test.e12, 0, DOUBLETHRESHOLD); + EXPECT_NEAR (test.e21, 0, DOUBLETHRESHOLD); + EXPECT_NEAR (test.e13, 0, DOUBLETHRESHOLD); + EXPECT_NEAR (test.e31, 0, DOUBLETHRESHOLD); + EXPECT_NEAR (test.e23, 0, DOUBLETHRESHOLD); + EXPECT_NEAR (test.e32, 0, DOUBLETHRESHOLD); + } + delete[] ivmp; - symm.gmatrix_convert(symm.gmatrix, kgmatrix_nonint, symm.nrotk, ucell.latvec, ucell.G); - for (int i=0;i opt(gmatrix_opt) ->input(gmatrix_input_back) + //-> opt(gmatrix_optback) <-> reciprocal(int or non-int) + ModuleBase::Matrix3* gmatrix_input_back = new ModuleBase::Matrix3[symm.nrotk]; // 3 + ModuleBase::Matrix3* gmatrix_opt = new ModuleBase::Matrix3[symm.nrotk]; // 2 + ModuleBase::Matrix3* gmatrix_opt_back = new ModuleBase::Matrix3[symm.nrotk]; // 4 + ModuleBase::Matrix3* kgmatrix_nonint = new ModuleBase::Matrix3[symm.nrotk]; + symm.gmatrix_convert_int (symm.gmatrix, gmatrix_opt, symm.nrotk, ucell.latvec, symm.optlat); // 1->2 + symm.gmatrix_convert_int (gmatrix_opt, gmatrix_input_back, symm.nrotk, symm.optlat, ucell.latvec); // 2->3 + symm.gmatrix_convert_int (gmatrix_input_back, + gmatrix_opt_back, + symm.nrotk, + ucell.latvec, + symm.optlat); // 3->4 - ModuleBase::Matrix3 tmpA=symm.optlat.Inverse()*gmatrix_opt[i]*symm.optlat; //A^-1*SA*A - ModuleBase::Matrix3 tmpB=ucell.latvec.Inverse()*symm.gmatrix[i]*ucell.latvec;//B^-1*SB*B - ModuleBase::Matrix3 tmpG_int=ucell.G.Inverse()*symm.kgmatrix[i]*ucell.G;//G^-1*SG*G - ModuleBase::Matrix3 tmpG=ucell.G.Inverse()*kgmatrix_nonint[i]*ucell.G;//G^-1*SG*G - EXPECT_NEAR(tmpA.e11, tmpB.e11, DOUBLETHRESHOLD); - EXPECT_NEAR(tmpA.e22, tmpB.e22, DOUBLETHRESHOLD); - EXPECT_NEAR(tmpA.e33, tmpB.e33, DOUBLETHRESHOLD); - EXPECT_NEAR(tmpA.e12, tmpB.e12, DOUBLETHRESHOLD); - EXPECT_NEAR(tmpA.e21, tmpB.e21, DOUBLETHRESHOLD); - EXPECT_NEAR(tmpA.e13, tmpB.e13, DOUBLETHRESHOLD); - EXPECT_NEAR(tmpA.e31, tmpB.e31, DOUBLETHRESHOLD); - EXPECT_NEAR(tmpA.e23, tmpB.e23, DOUBLETHRESHOLD); - EXPECT_NEAR(tmpA.e32, tmpB.e32, DOUBLETHRESHOLD); + symm.gmatrix_convert (symm.gmatrix, kgmatrix_nonint, symm.nrotk, ucell.latvec, ucell.G); + for (int i = 0; i < symm.nrotk; ++i) + { + EXPECT_NEAR (symm.gmatrix[i].e11, gmatrix_input_back[i].e11, DOUBLETHRESHOLD); + EXPECT_NEAR (symm.gmatrix[i].e22, gmatrix_input_back[i].e22, DOUBLETHRESHOLD); + EXPECT_NEAR (symm.gmatrix[i].e33, gmatrix_input_back[i].e33, DOUBLETHRESHOLD); + EXPECT_NEAR (symm.gmatrix[i].e12, gmatrix_input_back[i].e12, DOUBLETHRESHOLD); + EXPECT_NEAR (symm.gmatrix[i].e21, gmatrix_input_back[i].e21, DOUBLETHRESHOLD); + EXPECT_NEAR (symm.gmatrix[i].e13, gmatrix_input_back[i].e13, DOUBLETHRESHOLD); + EXPECT_NEAR (symm.gmatrix[i].e31, gmatrix_input_back[i].e31, DOUBLETHRESHOLD); + EXPECT_NEAR (symm.gmatrix[i].e23, gmatrix_input_back[i].e23, DOUBLETHRESHOLD); + EXPECT_NEAR (symm.gmatrix[i].e32, gmatrix_input_back[i].e32, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e11, gmatrix_opt_back[i].e11, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e22, gmatrix_opt_back[i].e22, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e33, gmatrix_opt_back[i].e33, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e12, gmatrix_opt_back[i].e12, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e21, gmatrix_opt_back[i].e21, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e13, gmatrix_opt_back[i].e13, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e31, gmatrix_opt_back[i].e31, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e23, gmatrix_opt_back[i].e23, DOUBLETHRESHOLD); + EXPECT_NEAR (gmatrix_opt[i].e32, gmatrix_opt_back[i].e32, DOUBLETHRESHOLD); - if(!symm.equal(tmpG.e13, tmpG_int.e13) || !symm.equal(tmpG.e23, tmpG_int.e23) || !symm.equal(tmpG.e12, tmpG_int.e12)) - { - std::cout<<"stru_ibrav:"< stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{2, "O_h", "m-3m", @@ -39,7 +40,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{3, "O_h", "m-3m", @@ -48,7 +50,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{4, "D_6h", "6/mmm", @@ -57,7 +60,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{5, "D_4h", "4/mmm", @@ -66,7 +70,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{6, "D_4h", "4/mmm", @@ -75,7 +80,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{7, "D_3d", "-3m", @@ -92,7 +98,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {-0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{8, "D_2h", "mmm", @@ -101,7 +108,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{9, "D_2h", "mmm", @@ -110,7 +118,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{10, "D_2h", "mmm", @@ -119,7 +128,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{11, "D_2h", "mmm", @@ -128,7 +138,8 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, stru_{12, "C_2h", "2/m", @@ -137,198 +148,228 @@ std::vector stru_lib{ std::vector{atomtype_{"C", std::vector>{ {0., 0., 0.}, - }}}, "C"}, - stru_{13, - "C_2h", - "2/m", - "C2/m", - std::vector{0.5, -1., 0., 0.5, 1., 0., -0.40192379, 0., 1.5}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0., 0.}, - }}}, "C"}, - stru_{14, - "S_2", - "-1", - "P-1", - std::vector{1., 0., 0., -0.28989928, 1.53691386, 0., -0.31595971, -0.66789914, 1.75670135}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0., 0.}, - }}}, "C"}, + }}}, + "C"}, + stru_{13, + "C_2h", + "2/m", + "C2/m", + std::vector{0.5, -1., 0., 0.5, 1., 0., -0.40192379, 0., 1.5}, + std::vector{atomtype_{"C", + std::vector>{ + {0., 0., 0.}, + }}}, + "C"}, + stru_{14, + "S_2", + "-1", + "P-1", + std::vector{1., 0., 0., -0.28989928, 1.53691386, 0., -0.31595971, -0.66789914, 1.75670135}, + std::vector{atomtype_{"C", + std::vector>{ + {0., 0., 0.}, + }}}, + "C"}, }; // test cases for space group and primitive cell analysis -// ibrav here means the number of primitive cells +// ibrav here means the number of primitive cells std::vector supercell_lib{ // bcc, 2 primitive cells stru_{2, - "O_h", - "", - "O_h", - std::vector{1., 0., 0., 0., 1., 0., 0., 0., 1.}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0., 0.}, {0.5, 0.5, 0.5}}}}, "D", - std::vector{0, 1}, - {}, - {}, - {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, - {{{0, 0}, {1, 1}, {2, 2}}}}, + "O_h", + "", + "O_h", + std::vector{1., 0., 0., 0., 1., 0., 0., 0., 1.}, + std::vector{atomtype_{"C", std::vector>{{0., 0., 0.}, {0.5, 0.5, 0.5}}}}, + "D", + std::vector{0, 1}, + {}, + {}, + {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, + {{{0, 0}, {1, 1}, {2, 2}}}}, // bct, 2 stru_{2, - "D_4h", - "", - "D_4h", - std::vector{1.2, 0., 0., 0., 1., 0., 0., 0., 1.}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0., 0.}, {0.5, 0.5, 0.5},}}} , "D", - std::vector{0, 1}, - {}, - {}, - {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, - {{{1, 1}, {2, 2}}}}, - //bct, 2 + "D_4h", + "", + "D_4h", + std::vector{1.2, 0., 0., 0., 1., 0., 0., 0., 1.}, + std::vector{atomtype_{"C", + std::vector>{ + {0., 0., 0.}, + {0.5, 0.5, 0.5}, + }}}, + "D", + std::vector{0, 1}, + {}, + {}, + {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, + {{{1, 1}, {2, 2}}}}, + // bct, 2 stru_{2, - "D_2h", - "", - "D_2h", - std::vector{1.2, 0., 0., 0., 1.1, 0., 0., 0., 1.}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0., 0.}, {0.5, 0.5, 0.5}}}} , "D", - std::vector{0, 1}, - {}, - {}, - {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, - {}}, - //fcc, 4 + "D_2h", + "", + "D_2h", + std::vector{1.2, 0., 0., 0., 1.1, 0., 0., 0., 1.}, + std::vector{atomtype_{"C", std::vector>{{0., 0., 0.}, {0.5, 0.5, 0.5}}}}, + "D", + std::vector{0, 1}, + {}, + {}, + {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, + {}}, + // fcc, 4 stru_{4, - "O_h", - "", - "O_h", - std::vector{1., 0., 0., 0., 1., 0., 0., 0., 1.}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0., 0.}, {0.5, 0.5, 0.},{0.5, 0., 0.5},{0., 0.5, 0.5}}}} , "D", - std::vector{0, 1, 2, 3}, - {}, - {}, - {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, - {{{0, 0}, {1, 1}, {2, 2}}}}, - //fco, 4 + "O_h", + "", + "O_h", + std::vector{1., 0., 0., 0., 1., 0., 0., 0., 1.}, + std::vector{atomtype_{ + "C", + std::vector>{{0., 0., 0.}, {0.5, 0.5, 0.}, {0.5, 0., 0.5}, {0., 0.5, 0.5}}}}, + "D", + std::vector{0, 1, 2, 3}, + {}, + {}, + {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, + {{{0, 0}, {1, 1}, {2, 2}}}}, + // fco, 4 stru_{4, - "D_2h", - "", - "D_2h", - std::vector{1.2, 0., 0., 0., 1.1, 0., 0., 0., 1.}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0., 0.}, {0.5, 0.5, 0.},{0.5, 0., 0.5},{0., 0.5, 0.5}}}} , "D", - std::vector{0, 1, 2, 3}, - {}, - {}, - {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2,1}}, - {}}, - //3 in x - stru_{3, + "D_2h", + "", + "D_2h", + std::vector{1.2, 0., 0., 0., 1.1, 0., 0., 0., 1.}, + std::vector{atomtype_{ + "C", + std::vector>{{0., 0., 0.}, {0.5, 0.5, 0.}, {0.5, 0., 0.5}, {0., 0.5, 0.5}}}}, + "D", + std::vector{0, 1, 2, 3}, + {}, + {}, + {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, + {}}, + // 3 in x + stru_{ + 3, "C_1h", "", "D_2h", std::vector{3., -3., -3., -1., 1., -1., -1., -1., 1.}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0.1, 0.5}, {1. / 3., 0.1, 0.5},{2. / 3., 0.1, 0.5}}}} , "D", + std::vector{ + atomtype_{"C", std::vector>{{0., 0.1, 0.5}, {1. / 3., 0.1, 0.5}, {2. / 3., 0.1, 0.5}}}}, + "D", std::vector{1}, - std::map{{0, 2}}, + std::map{{0, 2}}, {}, {{0, 1}, {0, 2}, {1, 0}, {2, 0}}, {{{1, 2}, {2, 1}}, {{1, 1}, {2, 2}}}}, - //3 in y - stru_{3, + // 3 in y + stru_{ + 3, "C_1h", "", "D_2h", std::vector{1., -1., -1., -3., 3., -3., -1., -1., 1.}, - std::vector{atomtype_{"C", - std::vector>{ - {0.4, 0., 0.1}, {0.4, 1. / 3., 0.1},{0.4, 2. / 3., 0.1}}}} , "D", + std::vector{ + atomtype_{"C", std::vector>{{0.4, 0., 0.1}, {0.4, 1. / 3., 0.1}, {0.4, 2. / 3., 0.1}}}}, + "D", std::vector{2}, - std::map{{0, 1}}, + std::map{{0, 1}}, {}, {{0, 1}, {1, 0}, {1, 2}, {2, 1}}, {{{0, 2}, {2, 0}}, {{0, 0}, {2, 2}}}}, - //3 in z - stru_{3, + // 3 in z + stru_{ + 3, "C_1h", "", "D_2h", std::vector{1., -1., -1., -1., 1., -1., -3., -3., 3.}, - std::vector{atomtype_{"C", - std::vector>{ - {0.3, 0.1, 0.}, {0.3, 0.1, 1. / 3.},{0.3, 0.1, 2. / 3.}}}} , "D", + std::vector{ + atomtype_{"C", std::vector>{{0.3, 0.1, 0.}, {0.3, 0.1, 1. / 3.}, {0.3, 0.1, 2. / 3.}}}}, + "D", std::vector{1}, - std::map{{0, 2}}, + std::map{{0, 2}}, {}, {{0, 2}, {1, 2}, {2, 0}, {2, 1}}, - {{{0, 0}, {1, 1}}, {{0, 1}, {1, 0}}} }, - //6 in xy + {{{0, 0}, {1, 1}}, {{0, 1}, {1, 0}}}}, + // 6 in xy stru_{6, - "C_1", - "", - "S_2", - std::vector{2., -2., -2., -3., 3., -3., -1., -1., 1.}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0., 0.1}, {0., 1. / 3., 0.1},{0., 2. / 3., 0.1},{0.5, 0., 0.1}, {0.5, 1. / 3., 0.1},{0.5, 2. / 3., 0.1}}}} , "D", - std::vector{0, 3}, - std::map{{1, 2}, {4, 5}}, - {} }, - //6 in yz + "C_1", + "", + "S_2", + std::vector{2., -2., -2., -3., 3., -3., -1., -1., 1.}, + std::vector{atomtype_{"C", + std::vector>{{0., 0., 0.1}, + {0., 1. / 3., 0.1}, + {0., 2. / 3., 0.1}, + {0.5, 0., 0.1}, + {0.5, 1. / 3., 0.1}, + {0.5, 2. / 3., 0.1}}}}, + "D", + std::vector{0, 3}, + std::map{{1, 2}, {4, 5}}, + {}}, + // 6 in yz stru_{6, - "C_1", - "", - "S_2", - std::vector{1., -1., -1., -2., 2., -2., -3., -3., 3.}, - std::vector{atomtype_{"C", - std::vector>{ - {0.1, 0., 0.}, {0.1, 0., 1. / 3.},{0.1, 0., 2. / 3.},{0.1, 0.5, 0.}, {0.1, 0.5, 1. / 3.},{0.1, 0.5, 2. / 3.}}}} , "D", - std::vector{2, 5}, - std::map{{0, 1}, {3, 4}}, - {} }, - //6 in zx + "C_1", + "", + "S_2", + std::vector{1., -1., -1., -2., 2., -2., -3., -3., 3.}, + std::vector{atomtype_{"C", + std::vector>{{0.1, 0., 0.}, + {0.1, 0., 1. / 3.}, + {0.1, 0., 2. / 3.}, + {0.1, 0.5, 0.}, + {0.1, 0.5, 1. / 3.}, + {0.1, 0.5, 2. / 3.}}}}, + "D", + std::vector{2, 5}, + std::map{{0, 1}, {3, 4}}, + {}}, + // 6 in zx stru_{6, - "C_1", - "", - "S_2", - std::vector{3., -3., -3., -1., 1., -1., -2., -2., 2.}, - std::vector{atomtype_{"C", - std::vector>{ - {0., 0.1, 0.}, {1. / 3., 0.1, 0.},{2. / 3., 0.1, 0.}, {0., 0.1, 0.5}, {1. / 3., 0.1, 0.5},{2. / 3., 0.1, 0.5}}}} , "D", - std::vector{0, 3}, - std::map{{1, 2}, {4, 5}}, - {} }, - //hex: 3 in a1 - 231 + "C_1", + "", + "S_2", + std::vector{3., -3., -3., -1., 1., -1., -2., -2., 2.}, + std::vector{atomtype_{"C", + std::vector>{{0., 0.1, 0.}, + {1. / 3., 0.1, 0.}, + {2. / 3., 0.1, 0.}, + {0., 0.1, 0.5}, + {1. / 3., 0.1, 0.5}, + {2. / 3., 0.1, 0.5}}}}, + "D", + std::vector{0, 3}, + std::map{{1, 2}, {4, 5}}, + {}}, + // hex: 3 in a1 - 231 stru_{3, - "C_1h", - "", - "C_2v", - std::vector{0., 1.59516, 2.76289, 20., 0., 0., 0., 9.57096, 0.}, - std::vector{atomtype_{"Mo", - std::vector>{ - {2./3., 0.1859875, 0.22222222}, {2./3., 0.1859875, 0.55555555},{2./3., 0.1859875, 0.88888888}, - }}, - atomtype_{"S", - std::vector>{ - {1./3., 0.2642317, 0.11111111}, {1./3., 0.1077433, 0.11111111},{1./3., 0.2642317, 0.44444444}, - {1./3., 0.1077433, 0.44444444}, {1./3., 0.2642317, 0.77777777},{1./3., 0.1077433, 0.77777777}, - }}} - , "D", - {}, - {}, - std::vector>{{3, 4, 0, 1, 0}, {5, 6, 0, 1, 0}, {7, 8, 0, 1, 0}}, - {{0, 1}, {0, 2}, {1, 0}, {2, 0}}, - {} }, + "C_1h", + "", + "C_2v", + std::vector{0., 1.59516, 2.76289, 20., 0., 0., 0., 9.57096, 0.}, + std::vector{atomtype_{"Mo", + std::vector>{ + {2. / 3., 0.1859875, 0.22222222}, + {2. / 3., 0.1859875, 0.55555555}, + {2. / 3., 0.1859875, 0.88888888}, + }}, + atomtype_{"S", + std::vector>{ + {1. / 3., 0.2642317, 0.11111111}, + {1. / 3., 0.1077433, 0.11111111}, + {1. / 3., 0.2642317, 0.44444444}, + {1. / 3., 0.1077433, 0.44444444}, + {1. / 3., 0.2642317, 0.77777777}, + {1. / 3., 0.1077433, 0.77777777}, + }}}, + "D", + {}, + {}, + std::vector>{{3, 4, 0, 1, 0}, {5, 6, 0, 1, 0}, {7, 8, 0, 1, 0}}, + {{0, 1}, {0, 2}, {1, 0}, {2, 0}}, + {}}, }; \ No newline at end of file diff --git a/source/source_cell/module_symmetry/test/symmetry_test_symtrz.cpp b/source/source_cell/module_symmetry/test/symmetry_test_symtrz.cpp index 45ea873e3f7..b48217fc812 100644 --- a/source/source_cell/module_symmetry/test/symmetry_test_symtrz.cpp +++ b/source/source_cell/module_symmetry/test/symmetry_test_symtrz.cpp @@ -7,121 +7,127 @@ * 4. function: `symmetrize_vec3_nat` * 5. function `symmetrize_mat3` * -***********************************************/ + ***********************************************/ // mock the useless functions -void output::printM3(std::ofstream& ofs, const std::string& description, const ModuleBase::Matrix3& m) {} -pseudo::pseudo() {} -pseudo::~pseudo() {} -Atom::Atom() {} -Atom::~Atom() {} -Atom_pseudo::Atom_pseudo() {} -Atom_pseudo::~Atom_pseudo() {} -UnitCell::UnitCell() {} -UnitCell::~UnitCell() {} -Magnetism::Magnetism() {} -Magnetism::~Magnetism() {} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} +void + output::printM3 (std::ofstream& ofs, const std::string& description, const ModuleBase::Matrix3& m) +{ +} +pseudo::pseudo () {} +pseudo::~pseudo () {} +Atom::Atom () {} +Atom::~Atom () {} +Atom_pseudo::Atom_pseudo () {} +Atom_pseudo::~Atom_pseudo () {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} -inline std::vector allocate_pos(ModuleSymmetry::Symmetry& symm, UnitCell& ucell) +inline std::vector + allocate_pos (ModuleSymmetry::Symmetry& symm, UnitCell& ucell) { - std::vector pos(ucell.nat * 3, 0.0); + std::vector pos (ucell.nat * 3, 0.0); int iat = 0; for (int it = 0; it < ucell.ntype; it++) - { - for (int ia = 0; ia < ucell.atoms[it].na; ia++) { - pos[3 * iat] = ucell.atoms[it].taud[ia].x; - pos[3 * iat + 1] = ucell.atoms[it].taud[ia].y; - pos[3 * iat + 2] = ucell.atoms[it].taud[ia].z; - for (int k = 0; k < 3; ++k) - { - symm.check_translation(pos[iat * 3 + k], -floor(pos[iat * 3 + k])); - symm.check_boundary(pos[iat * 3 + k]); - } - ++iat; + for (int ia = 0; ia < ucell.atoms[it].na; ia++) + { + pos[3 * iat] = ucell.atoms[it].taud[ia].x; + pos[3 * iat + 1] = ucell.atoms[it].taud[ia].y; + pos[3 * iat + 2] = ucell.atoms[it].taud[ia].z; + for (int k = 0; k < 3; ++k) + { + symm.check_translation (pos[iat * 3 + k], -floor (pos[iat * 3 + k])); + symm.check_boundary (pos[iat * 3 + k]); + } + ++iat; + } } - } return pos; } -TEST_F(SymmetryTest, ForceSymmetry) +TEST_F (SymmetryTest, ForceSymmetry) { - auto check_force = [](stru_& conf, ModuleBase::matrix& force) - { - // 1. check zeros - for (auto iat : conf.force_zero_iat) - for (int j = 0; j < 3; ++j) - EXPECT_NEAR(force(iat, j), 0.0, DOUBLETHRESHOLD); - // 2. check opposites - for (auto oppo_pair : conf.force_oppo_iat) - for (int j = 0; j < 3; ++j) - EXPECT_NEAR(force(oppo_pair.first, j), -force(oppo_pair.second, j), DOUBLETHRESHOLD); - for (auto oppo_xyz : conf.force_oppo_iat_xyz) - for (int j = 0;j < 3;++j) - if (oppo_xyz[j + 2] == 1) - EXPECT_NEAR(force(oppo_xyz[0], j), -force(oppo_xyz[1], j), DOUBLETHRESHOLD); - else - EXPECT_NEAR(force(oppo_xyz[0], j), force(oppo_xyz[1], j), DOUBLETHRESHOLD); - }; + auto check_force = [] (stru_& conf, ModuleBase::matrix& force) + { + // 1. check zeros + for (auto iat: conf.force_zero_iat) + for (int j = 0; j < 3; ++j) + EXPECT_NEAR (force (iat, j), 0.0, DOUBLETHRESHOLD); + // 2. check opposites + for (auto oppo_pair: conf.force_oppo_iat) + for (int j = 0; j < 3; ++j) + EXPECT_NEAR (force (oppo_pair.first, j), -force (oppo_pair.second, j), DOUBLETHRESHOLD); + for (auto oppo_xyz: conf.force_oppo_iat_xyz) + for (int j = 0; j < 3; ++j) + if (oppo_xyz[j + 2] == 1) + EXPECT_NEAR (force (oppo_xyz[0], j), -force (oppo_xyz[1], j), DOUBLETHRESHOLD); + else + EXPECT_NEAR (force (oppo_xyz[0], j), force (oppo_xyz[1], j), DOUBLETHRESHOLD); + }; - for (int stru = 0; stru < supercell_lib.size(); ++stru) - { - ModuleSymmetry::Symmetry symm; - construct_ucell(supercell_lib[stru]); - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, ofs_running); + for (int stru = 0; stru < supercell_lib.size (); ++stru) + { + ModuleSymmetry::Symmetry symm; + construct_ucell (supercell_lib[stru]); + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, ofs_running); - ModuleBase::matrix force(ucell.nat, 3, true); - //generate random number for force and restrict to [-100,100) - for (int i = 0;i < ucell.nat;++i) - for (int j = 0;j < 3;++j) - force(i, j) = double(rand()) / double(RAND_MAX) * 200 - 100; + ModuleBase::matrix force (ucell.nat, 3, true); + // generate random number for force and restrict to [-100,100) + for (int i = 0; i < ucell.nat; ++i) + for (int j = 0; j < 3; ++j) + force (i, j) = double (rand ()) / double (RAND_MAX) * 200 - 100; - std::vector pos = allocate_pos(symm, ucell); - symm.symmetrize_vec3_nat(force.c); - check_force(supercell_lib[stru], force); - } + std::vector pos = allocate_pos (symm, ucell); + symm.symmetrize_vec3_nat (force.c); + check_force (supercell_lib[stru], force); + } } -TEST_F(SymmetryTest, StressSymmetry) +TEST_F (SymmetryTest, StressSymmetry) { - auto check_stress = [](stru_& conf, ModuleBase::matrix& stress) + auto check_stress = [] (stru_& conf, ModuleBase::matrix& stress) { // 1. check zeros - for (auto elm : conf.stress_zero) - EXPECT_NEAR(stress(elm.first, elm.second), 0.0, DOUBLETHRESHOLD); + for (auto elm: conf.stress_zero) + EXPECT_NEAR (stress (elm.first, elm.second), 0.0, DOUBLETHRESHOLD); // 2. check equals - for (auto eq_set : conf.stress_eq) - for (int i = 1;i < eq_set.size();++i) - EXPECT_NEAR(stress(eq_set[i].first, eq_set[i].second), stress(eq_set[0].first, eq_set[0].second), DOUBLETHRESHOLD); + for (auto eq_set: conf.stress_eq) + for (int i = 1; i < eq_set.size (); ++i) + EXPECT_NEAR (stress (eq_set[i].first, eq_set[i].second), + stress (eq_set[0].first, eq_set[0].second), + DOUBLETHRESHOLD); }; - for (int stru = 0; stru < supercell_lib.size(); ++stru) - { - ModuleSymmetry::Symmetry symm; - construct_ucell(supercell_lib[stru]); - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, ofs_running); - - ModuleBase::matrix stress(3, 3, true); - //generate random number for stress and restrict to [-1e5,1e5) - for (int i = 0;i < 3;++i) - for (int j = 0;j < 3;++j) - stress(i, j) = double(rand()) / double(RAND_MAX) * 2e5 - 1e5; + for (int stru = 0; stru < supercell_lib.size (); ++stru) + { + ModuleSymmetry::Symmetry symm; + construct_ucell (supercell_lib[stru]); + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, ofs_running); - symm.symmetrize_mat3(stress, ucell.lat); - check_stress(supercell_lib[stru], stress); + ModuleBase::matrix stress (3, 3, true); + // generate random number for stress and restrict to [-1e5,1e5) + for (int i = 0; i < 3; ++i) + for (int j = 0; j < 3; ++j) + stress (i, j) = double (rand ()) / double (RAND_MAX) * 2e5 - 1e5; - } + symm.symmetrize_mat3 (stress, ucell.lat); + check_stress (supercell_lib[stru], stress); + } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - srand(time(NULL)); // for random number generator - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); - MPI_Finalize(); + srand (time (NULL)); // for random number generator + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); + MPI_Finalize (); return result; } diff --git a/source/source_cell/parallel_kpoints.cpp b/source/source_cell/parallel_kpoints.cpp index 2ca14090fba..ba385aebe9c 100644 --- a/source/source_cell/parallel_kpoints.cpp +++ b/source/source_cell/parallel_kpoints.cpp @@ -4,7 +4,8 @@ #include "source_base/parallel_global.h" // the kpoints here are reduced after symmetry applied. -void Parallel_Kpoints::kinfo(int& nkstot_in, +void + Parallel_Kpoints::kinfo (int& nkstot_in, const int& kpar_in, const int& my_pool_in, const int& rank_in_pool_in, @@ -19,12 +20,12 @@ void Parallel_Kpoints::kinfo(int& nkstot_in, this->nproc = nproc_in; this->nspin = nspin_in; - Parallel_Common::bcast_int(nkstot_in); - this->get_nks_pool(nkstot_in); // assign k-points to each pool - this->get_startk_pool(nkstot_in); // get the start k-point index for each pool - this->get_whichpool(nkstot_in); // get the pool index for each k-point + Parallel_Common::bcast_int (nkstot_in); + this->get_nks_pool (nkstot_in); // assign k-points to each pool + this->get_startk_pool (nkstot_in); // get the start k-point index for each pool + this->get_whichpool (nkstot_in); // get the pool index for each k-point - this->set_startpro_pool(); // get the start processor index for each pool + this->set_startpro_pool (); // get the start processor index for each pool this->nkstot_np = nkstot_in; @@ -44,92 +45,97 @@ void Parallel_Kpoints::kinfo(int& nkstot_in, } #ifdef __MPI -void Parallel_Kpoints::get_whichpool(const int& nkstot) +void + Parallel_Kpoints::get_whichpool (const int& nkstot) { - this->whichpool.resize(nkstot, 0); + this->whichpool.resize (nkstot, 0); for (int i = 0; i < this->kpar; i++) - { - for (int ik = 0; ik < this->nks_pool[i]; ik++) { - const int k_now = ik + startk_pool[i]; - this->whichpool[k_now] = i; + for (int ik = 0; ik < this->nks_pool[i]; ik++) + { + const int k_now = ik + startk_pool[i]; + this->whichpool[k_now] = i; + } } - } return; } -void Parallel_Kpoints::get_nks_pool(const int& nkstot) +void + Parallel_Kpoints::get_nks_pool (const int& nkstot) { - nks_pool.resize(this->kpar, 0); + nks_pool.resize (this->kpar, 0); const int nks_ave = nkstot / this->kpar; const int remain = nkstot % this->kpar; for (int i = 0; i < this->kpar; i++) - { - this->nks_pool[i] = nks_ave; - if (i < remain) { - nks_pool[i]++; + this->nks_pool[i] = nks_ave; + if (i < remain) + { + nks_pool[i]++; + } } - } return; } -void Parallel_Kpoints::get_startk_pool(const int& nkstot) +void + Parallel_Kpoints::get_startk_pool (const int& nkstot) { - startk_pool.resize(this->kpar, 0); + startk_pool.resize (this->kpar, 0); startk_pool[0] = 0; for (int i = 1; i < this->kpar; i++) - { - startk_pool[i] = startk_pool[i - 1] + nks_pool[i - 1]; - } + { + startk_pool[i] = startk_pool[i - 1] + nks_pool[i - 1]; + } return; } -void Parallel_Kpoints::set_startpro_pool() +void + Parallel_Kpoints::set_startpro_pool () { - startpro_pool.resize(this->kpar, 0); + startpro_pool.resize (this->kpar, 0); const int nproc_ave = this->nproc / this->kpar; const int remain = this->nproc % this->kpar; startpro_pool[0] = 0; for (int i = 1; i < this->kpar; i++) - { - startpro_pool[i] = startpro_pool[i - 1] + nproc_ave; - if (i - 1 < remain) { - startpro_pool[i]++; + startpro_pool[i] = startpro_pool[i - 1] + nproc_ave; + if (i - 1 < remain) + { + startpro_pool[i]++; + } } - } return; } - // gather kpoints from all processor pools, only need to be called by the first processor of each pool. -void Parallel_Kpoints::gatherkvec(const std::vector>& vec_local, +void + Parallel_Kpoints::gatherkvec (const std::vector>& vec_local, std::vector>& vec_global) const { - vec_global.resize(this->nkstot_np, ModuleBase::Vector3(0.0, 0.0, 0.0)); + vec_global.resize (this->nkstot_np, ModuleBase::Vector3 (0.0, 0.0, 0.0)); for (int i = 0; i < this->nks_np; ++i) - { - - if (this->rank_in_pool == 0) { - vec_global[i + startk_pool[this->my_pool]] = vec_local[i]; + + if (this->rank_in_pool == 0) + { + vec_global[i + startk_pool[this->my_pool]] = vec_local[i]; + } } - } - MPI_Allreduce(MPI_IN_PLACE, &vec_global[0], 3 * this->nkstot_np, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce (MPI_IN_PLACE, &vec_global[0], 3 * this->nkstot_np, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); return; } #endif -void Parallel_Kpoints::pool_collection(double& value, const double* wk, const int& ik) +void + Parallel_Kpoints::pool_collection (double& value, const double* wk, const int& ik) { #ifdef __MPI @@ -138,71 +144,75 @@ void Parallel_Kpoints::pool_collection(double& value, const double* wk, const in const int pool = this->whichpool[ik]; if (this->rank_in_pool == 0) - { - if (this->my_pool == 0) { - if (pool == 0) - - { - value = wk[ik_now]; - } + if (this->my_pool == 0) + { + if (pool == 0) + + { + value = wk[ik_now]; + } + else + { + MPI_Status ierror; + MPI_Recv (&value, 1, MPI_DOUBLE, this->startpro_pool[pool], ik, MPI_COMM_WORLD, &ierror); + } + } else - { - MPI_Status ierror; - MPI_Recv(&value, 1, MPI_DOUBLE, this->startpro_pool[pool], ik, MPI_COMM_WORLD, &ierror); - - } + { + if (this->my_pool == pool) + { + MPI_Send (&wk[ik_now], 1, MPI_DOUBLE, 0, ik, MPI_COMM_WORLD); + } + } } - else + else { - if (this->my_pool == pool) - { - MPI_Send(&wk[ik_now], 1, MPI_DOUBLE, 0, ik, MPI_COMM_WORLD); - } } - } - else - { - } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier (MPI_COMM_WORLD); #else value = wk[ik]; #endif return; } -void Parallel_Kpoints::pool_collection(double* value_re, +void + Parallel_Kpoints::pool_collection (double* value_re, double* value_im, const ModuleBase::realArray& re, const ModuleBase::realArray& im, const int& ik) { - const int dim2 = re.getBound2(); - const int dim3 = re.getBound3(); - const int dim4 = re.getBound4(); - assert(re.getBound2() == im.getBound2()); - assert(re.getBound3() == im.getBound3()); - assert(re.getBound4() == im.getBound4()); + const int dim2 = re.getBound2 (); + const int dim3 = re.getBound3 (); + const int dim4 = re.getBound4 (); + assert (re.getBound2 () == im.getBound2 ()); + assert (re.getBound3 () == im.getBound3 ()); + assert (re.getBound4 () == im.getBound4 ()); const int dim = dim2 * dim3 * dim4; - pool_collection_aux(value_re, re, dim, ik); - pool_collection_aux(value_im, im, dim, ik); + pool_collection_aux (value_re, re, dim, ik); + pool_collection_aux (value_im, im, dim, ik); return; } -void Parallel_Kpoints::pool_collection(std::complex* value, const ModuleBase::ComplexArray& w, const int& ik) const +void + Parallel_Kpoints::pool_collection (std::complex* value, + const ModuleBase::ComplexArray& w, + const int& ik) const { - const int dim2 = w.getBound2(); - const int dim3 = w.getBound3(); - const int dim4 = w.getBound4(); + const int dim2 = w.getBound2 (); + const int dim3 = w.getBound3 (); + const int dim4 = w.getBound4 (); const int dim = dim2 * dim3 * dim4; - pool_collection_aux(value, w, dim, ik); + pool_collection_aux (value, w, dim, ik); } template -void Parallel_Kpoints::pool_collection_aux(T* value, const V& w, const int& dim, const int& ik) const +void + Parallel_Kpoints::pool_collection_aux (T* value, const V& w, const int& dim, const int& ik) const { #ifdef __MPI const int ik_now = ik - this->startk_pool[this->my_pool]; @@ -211,52 +221,58 @@ void Parallel_Kpoints::pool_collection_aux(T* value, const V& w, const int& dim, T* p = &w.ptr[begin]; // temprary restrict kpar=1 for NSPIN=2 case for generating_orbitals int pool = 0; - if (this->nspin != 2) - { - pool = this->whichpool[ik]; - } + if (this->nspin != 2) + { + pool = this->whichpool[ik]; + } if (this->rank_in_pool == 0) - { - if (this->my_pool == 0) - { - if (pool == 0) - { - for (int i = 0; i < dim; i++) + if (this->my_pool == 0) + { - value[i] = *p; - ++p; + if (pool == 0) + { + for (int i = 0; i < dim; i++) + { + value[i] = *p; + ++p; + } + } + else + { + MPI_Status ierror; + MPI_Recv (value, + dim, + MPI_DOUBLE, + this->startpro_pool[pool], + ik * 2 + 0, + MPI_COMM_WORLD, + &ierror); + } } - } else - { - MPI_Status ierror; - MPI_Recv(value, dim, MPI_DOUBLE, this->startpro_pool[pool], ik * 2 + 0, MPI_COMM_WORLD, &ierror); - } + { + if (this->my_pool == pool) + { + MPI_Send (p, dim, MPI_DOUBLE, 0, ik * 2 + 0, MPI_COMM_WORLD); + } + } } - else + else { - if (this->my_pool == pool) - { - MPI_Send(p, dim, MPI_DOUBLE, 0, ik * 2 + 0, MPI_COMM_WORLD); - } } - } - else - { - } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier (MPI_COMM_WORLD); #else // data transfer ends. const int begin = ik * dim; T* p = &w.ptr[begin]; for (int i = 0; i < dim; i++) - { - value[i] = *p; - ++p; - } - // data transfer ends. + { + value[i] = *p; + ++p; + } + // data transfer ends. #endif } diff --git a/source/source_cell/parallel_kpoints.h b/source/source_cell/parallel_kpoints.h index f29ae540fb7..6b02f00759b 100644 --- a/source/source_cell/parallel_kpoints.h +++ b/source/source_cell/parallel_kpoints.h @@ -9,28 +9,28 @@ class Parallel_Kpoints { public: - Parallel_Kpoints(){}; - ~Parallel_Kpoints(){}; + Parallel_Kpoints () {}; + ~Parallel_Kpoints () {}; - void kinfo(int& nkstot_in, - const int& kpar_in, - const int& my_pool_in, - const int& rank_in_pool_in, - const int& nproc_in, - const int& nspin_in); + void kinfo (int& nkstot_in, + const int& kpar_in, + const int& my_pool_in, + const int& rank_in_pool_in, + const int& nproc_in, + const int& nspin_in); // collect value from each pool to wk. - void pool_collection(double& value, const double* wk, const int& ik); + void pool_collection (double& value, const double* wk, const int& ik); // collect value from each pool to overlap. - void pool_collection(double* valuea, - double* valueb, - const ModuleBase::realArray& a, - const ModuleBase::realArray& b, - const int& ik); - void pool_collection(std::complex* value, const ModuleBase::ComplexArray& w, const int& ik) const; + void pool_collection (double* valuea, + double* valueb, + const ModuleBase::realArray& a, + const ModuleBase::realArray& b, + const int& ik); + void pool_collection (std::complex* value, const ModuleBase::ComplexArray& w, const int& ik) const; template - void pool_collection_aux(T* value, const V& w, const int& dim, const int& ik) const; + void pool_collection_aux (T* value, const V& w, const int& dim, const int& ik) const; #ifdef __MPI /** * @brief gather kpoints from all processors @@ -38,8 +38,8 @@ class Parallel_Kpoints * @param vec_local kpoint vector in local processor * @param vec_global kpoint vector in all processors */ - void gatherkvec(const std::vector>& vec_local, - std::vector>& vec_global) const; + void gatherkvec (const std::vector>& vec_local, + std::vector>& vec_global) const; #endif // information about pool, dim: KPAR @@ -56,16 +56,18 @@ class Parallel_Kpoints int nks_np = 0; // number of k-points without spin in the present pool // get the first processor in the pool - int get_startpro_pool(const int& pool) const + int + get_startpro_pool (const int& pool) const { return startpro_pool[pool]; } // get the maximum number of k-points in all pools - int get_max_nks_pool() const + int + get_max_nks_pool () const { - return *std::max_element(nks_pool.begin(), nks_pool.end()); + return *std::max_element (nks_pool.begin (), nks_pool.end ()); } public: @@ -77,11 +79,11 @@ class Parallel_Kpoints private: std::vector startpro_pool; // the first processor in each pool #ifdef __MPI - void get_nks_pool(const int& nkstot); - void get_startk_pool(const int& nkstot); - void get_whichpool(const int& nkstot); + void get_nks_pool (const int& nkstot); + void get_startk_pool (const int& nkstot); + void get_whichpool (const int& nkstot); - void set_startpro_pool(); + void set_startpro_pool (); #endif }; diff --git a/source/source_cell/print_cell.cpp b/source/source_cell/print_cell.cpp index b9c3abdf211..cbb0c181c53 100644 --- a/source/source_cell/print_cell.cpp +++ b/source/source_cell/print_cell.cpp @@ -8,162 +8,164 @@ namespace unitcell { - void print_tau(Atom* atoms, - const std::string& Coordinate, - const int ntype, - const double lat0, - std::ofstream &ofs) - { - ModuleBase::TITLE("UnitCell", "print_tau"); - // assert (direct || Coordinate == "Cartesian" || Coordinate == "Cartesian_angstrom"); // this line causes abort in unittest ReadAtomPositionsCACXY. - // previously there are two if-statements, the first is `if(Coordinate == "Direct")` and the second is `if(Coordinate == "Cartesian" || Coordiante == "Cartesian_angstrom")` - // however the Coordinate can also be value among Cartesian_angstrom_center_xy, Cartesian_angstrom_center_xz, Cartesian_angstrom_center_yz and Cartesian_angstrom_center_xyz +void + print_tau (Atom* atoms, const std::string& Coordinate, const int ntype, const double lat0, std::ofstream& ofs) +{ + ModuleBase::TITLE ("UnitCell", "print_tau"); + // assert (direct || Coordinate == "Cartesian" || Coordinate == "Cartesian_angstrom"); // this line causes abort in + // unittest ReadAtomPositionsCACXY. previously there are two if-statements, the first is `if(Coordinate == + // "Direct")` and the second is `if(Coordinate == "Cartesian" || Coordiante == "Cartesian_angstrom")` however the + // Coordinate can also be value among Cartesian_angstrom_center_xy, Cartesian_angstrom_center_xz, + // Cartesian_angstrom_center_yz and Cartesian_angstrom_center_xyz - // if Coordinate has value one of them, this print_tau will not print anything. - std::regex pattern("Direct|Cartesian(_angstrom)?(_center_(xy|xz|yz|xyz))?"); - assert(std::regex_search(Coordinate, pattern)); - bool direct = (Coordinate == "Direct"); + // if Coordinate has value one of them, this print_tau will not print anything. + std::regex pattern ("Direct|Cartesian(_angstrom)?(_center_(xy|xz|yz|xyz))?"); + assert (std::regex_search (Coordinate, pattern)); + bool direct = (Coordinate == "Direct"); - //---------------------- - // print atom positions - //---------------------- - std::string table; - table += direct? " DIRECT COORDINATES\n": FmtCore::format(" CARTESIAN COORDINATES ( UNIT = %15.8f Bohr )\n", lat0); - table += FmtCore::format("%5s%19s%19s%19s%8s\n", "atom", "x", "y", "z", "mag"); - for(int it = 0; it < ntype; it++) + //---------------------- + // print atom positions + //---------------------- + std::string table; + table + += direct ? " DIRECT COORDINATES\n" : FmtCore::format (" CARTESIAN COORDINATES ( UNIT = %15.8f Bohr )\n", lat0); + table += FmtCore::format ("%5s%19s%19s%19s%8s\n", "atom", "x", "y", "z", "mag"); + for (int it = 0; it < ntype; it++) { for (int ia = 0; ia < atoms[it].na; ia++) - { - const double& x = direct? atoms[it].taud[ia].x: atoms[it].tau[ia].x; - const double& y = direct? atoms[it].taud[ia].y: atoms[it].tau[ia].y; - const double& z = direct? atoms[it].taud[ia].z: atoms[it].tau[ia].z; - table += FmtCore::format("%5s%19.12f%19.12f%19.12f%8.4f\n", - atoms[it].label, - x, - y, - z, - atoms[it].mag[ia]); - } + { + const double& x = direct ? atoms[it].taud[ia].x : atoms[it].tau[ia].x; + const double& y = direct ? atoms[it].taud[ia].y : atoms[it].tau[ia].y; + const double& z = direct ? atoms[it].taud[ia].z : atoms[it].tau[ia].z; + table += FmtCore::format ("%5s%19.12f%19.12f%19.12f%8.4f\n", + atoms[it].label, + x, + y, + z, + atoms[it].mag[ia]); + } } - table += "\n"; - ofs << table; + table += "\n"; + ofs << table; + // print velocities + ofs << " ATOMIC VELOCITIES" << std::endl; + ofs << std::setprecision (12); + ofs << std::setw (5) << "atom" << std::setw (19) << "vx" << std::setw (19) << "vy" << std::setw (19) << "vz" + << std::endl; - // print velocities - ofs << " ATOMIC VELOCITIES" << std::endl; - ofs << std::setprecision(12); - ofs << std::setw(5) << "atom" - << std::setw(19) << "vx" - << std::setw(19) << "vy" - << std::setw(19) << "vz" - << std::endl; - - for(int it = 0; it < ntype; it++) - { - for (int ia = 0; ia < atoms[it].na; ia++) - { - ofs << std::setw(5) << atoms[it].label; - ofs << " " << std::setw(18) << atoms[it].vel[ia].x; - ofs << " " << std::setw(18) << atoms[it].vel[ia].y; - ofs << " " << std::setw(18) << atoms[it].vel[ia].z; - ofs << std::endl; - } - } - ofs << std::endl; - ofs << std::setprecision(6); // return to 6, as original - + for (int it = 0; it < ntype; it++) + { + for (int ia = 0; ia < atoms[it].na; ia++) + { + ofs << std::setw (5) << atoms[it].label; + ofs << " " << std::setw (18) << atoms[it].vel[ia].x; + ofs << " " << std::setw (18) << atoms[it].vel[ia].y; + ofs << " " << std::setw (18) << atoms[it].vel[ia].z; + ofs << std::endl; + } + } + ofs << std::endl; + ofs << std::setprecision (6); // return to 6, as original - return; - } + return; +} - void print_stru_file(const UnitCell& ucell, - const Atom* atoms, - const ModuleBase::Matrix3& latvec, - const std::string& fn, - const int& nspin, - const bool& direct, - const bool& vel, - const bool& magmom, - const bool& orb, - const bool& dpks_desc, - const int& iproc) - { - ModuleBase::TITLE("UnitCell","print_stru_file"); - if (iproc != 0) +void + print_stru_file (const UnitCell& ucell, + const Atom* atoms, + const ModuleBase::Matrix3& latvec, + const std::string& fn, + const int& nspin, + const bool& direct, + const bool& vel, + const bool& magmom, + const bool& orb, + const bool& dpks_desc, + const int& iproc) +{ + ModuleBase::TITLE ("UnitCell", "print_stru_file"); + if (iproc != 0) { return; // old: if(GlobalV::MY_RANK != 0) return; } - // ATOMIC_SPECIES - std::string str = "ATOMIC_SPECIES\n"; - for(int it=0; it -pseudo::pseudo() -{ -} +pseudo::pseudo () {} -pseudo::~pseudo() -{ -} +pseudo::~pseudo () {} -void pseudo::check_betar() +void + pseudo::check_betar () { - bool min_flag = false; - for (int ib = 0; ib < nbeta; ib++) - { - for (int ir = 0; ir < mesh; ir++) - { - // Get the bit representation of the double - uint64_t bits = *(uint64_t*)&betar(ib, ir); - // Extract exponent field (bits 52-62) - uint64_t exponent = (bits >> 52) & 0x7FF; - // Define exponent threshold for 1e-30 - // Calculated as: bias + floor(log2(1e-30)) - // Where bias = 1023 and log2(1e-30) ≈ -99.657 - // Thus threshold is approximately 923 - if ((exponent <= 923)) - { - min_flag = true; - betar(ib, ir) = 0.0; - } - } - } - if (min_flag) - { - std::cout << " WARNING: some of potential function is set to zero cause of less than 1e-30.\n"; - } + bool min_flag = false; + for (int ib = 0; ib < nbeta; ib++) + { + for (int ir = 0; ir < mesh; ir++) + { + // Get the bit representation of the double + uint64_t bits = *(uint64_t*)&betar (ib, ir); + // Extract exponent field (bits 52-62) + uint64_t exponent = (bits >> 52) & 0x7FF; + // Define exponent threshold for 1e-30 + // Calculated as: bias + floor(log2(1e-30)) + // Where bias = 1023 and log2(1e-30) ≈ -99.657 + // Thus threshold is approximately 923 + if ((exponent <= 923)) + { + min_flag = true; + betar (ib, ir) = 0.0; + } + } + } + if (min_flag) + { + std::cout << " WARNING: some of potential function is set to zero cause of less than 1e-30.\n"; + } } -void pseudo::print_pseudo(std::ofstream& ofs) const +void + pseudo::print_pseudo (std::ofstream& ofs) const { - print_pseudo_vl(ofs); - ofs << "\n pseudo : "; - ofs << "\n kkbeta " << kkbeta; - ofs << "\n nh " << nh; - output::printr1_d(ofs, " lll : ", lll.data(), nbeta); - output::printrm(ofs, " betar : ", betar); - output::printrm(ofs, " dion : ", dion); - ofs << "\n ----------------------"; + print_pseudo_vl (ofs); + ofs << "\n pseudo : "; + ofs << "\n kkbeta " << kkbeta; + ofs << "\n nh " << nh; + output::printr1_d (ofs, " lll : ", lll.data (), nbeta); + output::printrm (ofs, " betar : ", betar); + output::printrm (ofs, " dion : ", dion); + ofs << "\n ----------------------"; } -void pseudo::print_pseudo_atom(std::ofstream& ofs) const +void + pseudo::print_pseudo_atom (std::ofstream& ofs) const { - print_pseudo_h(ofs); - ofs << "\n pseudo_atom : "; - ofs << "\n msh " << msh; -// ofs << "\n nchi " << nchi; - output::printr1_d(ofs, " r : ", r.data(), mesh); - output::printr1_d(ofs, " rab : ", rab.data(), mesh); - output::printr1_d(ofs, " rho_atc : ", rho_atc.data(), mesh); - output::printr1_d(ofs, " rho_at : ", rho_at.data(), mesh); - output::printr1_d(ofs," jchi : ", jchi.data(), nchi); - output::printrm(ofs, " chi : ", chi); - ofs << "\n ----------------------"; + print_pseudo_h (ofs); + ofs << "\n pseudo_atom : "; + ofs << "\n msh " << msh; + // ofs << "\n nchi " << nchi; + output::printr1_d (ofs, " r : ", r.data (), mesh); + output::printr1_d (ofs, " rab : ", rab.data (), mesh); + output::printr1_d (ofs, " rho_atc : ", rho_atc.data (), mesh); + output::printr1_d (ofs, " rho_at : ", rho_at.data (), mesh); + output::printr1_d (ofs, " jchi : ", jchi.data (), nchi); + output::printrm (ofs, " chi : ", chi); + ofs << "\n ----------------------"; } - -void pseudo::print_pseudo_vl(std::ofstream& ofs) const +void + pseudo::print_pseudo_vl (std::ofstream& ofs) const { - ofs << "\n pseudo_vl:"; - print_pseudo_atom(ofs); - output::printr1_d(ofs, "vloc_at : ", vloc_at.data(), mesh); - ofs << "\n ----------------------------------- "; + ofs << "\n pseudo_vl:"; + print_pseudo_atom (ofs); + output::printr1_d (ofs, "vloc_at : ", vloc_at.data (), mesh); + ofs << "\n ----------------------------------- "; } -void pseudo::print_pseudo_h(std::ofstream& ofs) const +void + pseudo::print_pseudo_h (std::ofstream& ofs) const { ofs << "\n pseudo_info :"; ofs << "\n nv " << nv; @@ -92,9 +92,8 @@ void pseudo::print_pseudo_h(std::ofstream& ofs) const ofs << "\n mesh " << mesh; ofs << "\n nchi " << nchi; ofs << "\n nbeta " << nbeta; -// out.printr1_d(ofs," els: ", els, nchi); - output::printr1_d(ofs, " lchi: ", lchi.data(), nchi); - output::printr1_d(ofs, " oc: ", oc.data(), nchi); + // out.printr1_d(ofs," els: ", els, nchi); + output::printr1_d (ofs, " lchi: ", lchi.data (), nchi); + output::printr1_d (ofs, " oc: ", oc.data (), nchi); ofs << "\n ----------------------"; } - diff --git a/source/source_cell/pseudo.h b/source/source_cell/pseudo.h index 11dee21af36..51aef8af9c0 100644 --- a/source/source_cell/pseudo.h +++ b/source/source_cell/pseudo.h @@ -13,8 +13,8 @@ class pseudo { public: - pseudo(); - ~pseudo(); + pseudo (); + ~pseudo (); // bool has_so = false; // if .true. includes spin-orbit @@ -24,7 +24,7 @@ class pseudo bool tvanp = false; // .true. if Ultrasoft bool nlcc = false; // Non linear core corrections(bool) std::string xc_func; // Exch-Corr type - double zv = 0; // z valence + double zv = 0; // z valence double etotps = 0.0; // total energy double ecutwfc = 0.0; // suggested cut-off for wfc double ecutrho = 0.0; // suggested cut-off for rho @@ -81,12 +81,12 @@ class pseudo * Subsequent values following non-normal numbers will be reset to zero * to prevent potential computational issues arising from invalid data. */ - void check_betar(); + void check_betar (); - void print_pseudo_h(std::ofstream& ofs) const; - void print_pseudo_atom(std::ofstream& ofs) const; - void print_pseudo_vl(std::ofstream& ofs) const; - void print_pseudo(std::ofstream& ofs) const; + void print_pseudo_h (std::ofstream& ofs) const; + void print_pseudo_atom (std::ofstream& ofs) const; + void print_pseudo_vl (std::ofstream& ofs) const; + void print_pseudo (std::ofstream& ofs) const; }; #endif // PSEUDO_H diff --git a/source/source_cell/read_atom_species.cpp b/source/source_cell/read_atom_species.cpp index 4c78551c9bb..cc8c286c183 100644 --- a/source/source_cell/read_atom_species.cpp +++ b/source/source_cell/read_atom_species.cpp @@ -9,11 +9,10 @@ namespace unitcell { -bool read_atom_species(std::ifstream& ifa, - std::ofstream& ofs_running, - UnitCell& ucell) +bool + read_atom_species (std::ifstream& ifa, std::ofstream& ofs_running, UnitCell& ucell) { - ModuleBase::TITLE("UnitCell","read_atom_species"); + ModuleBase::TITLE ("UnitCell", "read_atom_species"); const int ntype = ucell.ntype; std::string word; @@ -21,111 +20,115 @@ bool read_atom_species(std::ifstream& ifa, //========================================== // read in information of each type of atom //========================================== - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "ATOMIC_SPECIES") ) - { - ModuleBase::GlobalFunc::OUT(ofs_running,"Number of elements",ntype); - for (int i = 0;i < ntype;i++) + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "ATOMIC_SPECIES")) { - std::string one_line; - std::string one_string; - std::getline(ifa, one_line); - std::stringstream ss; - ss << one_line; - ss >> ucell.atom_label[i] >> ucell.atom_mass[i]; - ucell.pseudo_fn[i] = "auto"; - ucell.pseudo_type[i] = "auto"; - - bool end = false; - if (ss >> one_string) - { - if (one_string[0] != '#') - { - ucell.pseudo_fn[i] = one_string; - } - else + ModuleBase::GlobalFunc::OUT (ofs_running, "Number of elements", ntype); + for (int i = 0; i < ntype; i++) { - end = true; + std::string one_line; + std::string one_string; + std::getline (ifa, one_line); + std::stringstream ss; + ss << one_line; + ss >> ucell.atom_label[i] >> ucell.atom_mass[i]; + ucell.pseudo_fn[i] = "auto"; + ucell.pseudo_type[i] = "auto"; + + bool end = false; + if (ss >> one_string) + { + if (one_string[0] != '#') + { + ucell.pseudo_fn[i] = one_string; + } + else + { + end = true; + } + } + + if (!end && ss >> one_string && one_string[0] != '#') + { + if (one_string == "auto" || one_string == "upf" || one_string == "vwr" + || one_string == "upf201" || one_string == "blps") + { + ucell.pseudo_type[i] = one_string; + } + else if (one_string == "1/r") + { + ucell.atoms[i].coulomb_potential = true; + } + else + { + GlobalV::ofs_warning << "unrecognized pseudopotential type: " << one_string + << ", check your STRU file." << std::endl; + ModuleBase::WARNING_QUIT ("read_atom_species", + "unrecognized pseudopotential type."); + } + } + + // Peize Lin test for bsse 2021.04.07 + const std::string bsse_label = "empty"; + ucell.atoms[i].flag_empty_element = (search (ucell.atom_label[i].begin (), + ucell.atom_label[i].end (), + bsse_label.begin (), + bsse_label.end ()) + != ucell.atom_label[i].end ()) + ? true + : false; } - } + } - if (!end && ss >> one_string && one_string[0] != '#') - { - if (one_string == "auto" || one_string == "upf" - || one_string == "vwr" || one_string == "upf201" || one_string == "blps") + if ((PARAM.inp.basis_type == "lcao") || (PARAM.inp.basis_type == "lcao_in_pw") + || ((PARAM.inp.basis_type == "pw") && (PARAM.inp.init_wfc.substr (0, 3) == "nao")) + || PARAM.inp.onsite_radius > 0.0) + { + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "NUMERICAL_ORBITAL")) { - ucell.pseudo_type[i] = one_string; + for (int i = 0; i < ntype; i++) + { + ifa >> ucell.orbital_fn[i]; + } } - else if (one_string == "1/r") + // caoyu add 2021-03-16 + if (PARAM.globalv.deepks_setorb) { - ucell.atoms[i].coulomb_potential = true; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "NUMERICAL_DESCRIPTOR")) + { + ifa >> ucell.descriptor_file; + } } - else + else { - GlobalV::ofs_warning << "unrecognized pseudopotential type: " - << one_string << ", check your STRU file." << std::endl; - ModuleBase::WARNING_QUIT("read_atom_species", "unrecognized pseudopotential type."); + ucell.descriptor_file = PARAM.inp.orbital_dir + ucell.orbital_fn[0]; } - } - - // Peize Lin test for bsse 2021.04.07 - const std::string bsse_label = "empty"; - ucell.atoms[i].flag_empty_element = - (search( ucell.atom_label[i].begin(), ucell.atom_label[i].end(), - bsse_label.begin(), bsse_label.end() ) != ucell.atom_label[i].end()) - ? true : false; } - } - - if((PARAM.inp.basis_type == "lcao") - ||(PARAM.inp.basis_type == "lcao_in_pw") - ||((PARAM.inp.basis_type == "pw")&&(PARAM.inp.init_wfc.substr(0, 3) == "nao")) - || PARAM.inp.onsite_radius > 0.0) - { - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "NUMERICAL_ORBITAL") ) - { - for(int i=0; i> ucell.orbital_fn[i]; - } - } - // caoyu add 2021-03-16 - if(PARAM.globalv.deepks_setorb) - { - if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "NUMERICAL_DESCRIPTOR")) { - ifa >> ucell.descriptor_file; - } - } - else - { - ucell.descriptor_file = PARAM.inp.orbital_dir + ucell.orbital_fn[0]; - } - } #ifdef __LCAO - // Peize Lin add 2016-09-23 -#ifdef __MPI + // Peize Lin add 2016-09-23 +#ifdef __MPI #ifdef __EXX - if( GlobalC::exx_info.info_global.cal_exx || PARAM.inp.rpa ) - { - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "ABFS_ORBITAL") ) - { - for(int i=0; i> ofile; - GlobalC::exx_info.info_ri.files_abfs.push_back(ofile); - GlobalC::exx_info.info_opt_abfs.files_abfs.push_back(ofile); - } - } - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "ABFS_JLES_ORBITAL") ) + if (GlobalC::exx_info.info_global.cal_exx || PARAM.inp.rpa) { - for(int i=0; i> ofile; - GlobalC::exx_info.info_opt_abfs.files_jles.push_back(ofile); - } + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "ABFS_ORBITAL")) + { + for (int i = 0; i < ntype; i++) + { + std::string ofile; + ifa >> ofile; + GlobalC::exx_info.info_ri.files_abfs.push_back (ofile); + GlobalC::exx_info.info_opt_abfs.files_abfs.push_back (ofile); + } + } + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "ABFS_JLES_ORBITAL")) + { + for (int i = 0; i < ntype; i++) + { + std::string ofile; + ifa >> ofile; + GlobalC::exx_info.info_opt_abfs.files_jles.push_back (ofile); + } + } } - } #endif // __EXX #endif // __MPI @@ -133,256 +136,342 @@ bool read_atom_species(std::ifstream& ifa, return true; } -bool read_lattice_constant(std::ifstream& ifa, - std::ofstream& ofs_running, - Lattice& lat) +bool + read_lattice_constant (std::ifstream& ifa, std::ofstream& ofs_running, Lattice& lat) { //========================== // read in lattice constant //========================== double& lat0 = lat.lat0; - double& lat0_angstrom =lat.lat0_angstrom; + double& lat0_angstrom = lat.lat0_angstrom; std::string& latName = lat.latName; ModuleBase::Matrix3& latvec = lat.latvec; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_CONSTANT") ) - { - ModuleBase::GlobalFunc::READ_VALUE(ifa, lat0); - if(lat0<=0.0) + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_CONSTANT")) { - ModuleBase::WARNING_QUIT("read_atom_species","Lattice constant <= 0.0"); + ModuleBase::GlobalFunc::READ_VALUE (ifa, lat0); + if (lat0 <= 0.0) + { + ModuleBase::WARNING_QUIT ("read_atom_species", "Lattice constant <= 0.0"); + } + lat0_angstrom = lat0 * ModuleBase::BOHR_TO_A; + ModuleBase::GlobalFunc::OUT (ofs_running, "Lattice constant (Bohr)", lat0); + ModuleBase::GlobalFunc::OUT (ofs_running, "Lattice constant (Angstrom)", lat0_angstrom); + lat.tpiba = ModuleBase::TWO_PI / lat0; + lat.tpiba2 = lat.tpiba * lat.tpiba; } - lat0_angstrom = lat0 * ModuleBase::BOHR_TO_A; - ModuleBase::GlobalFunc::OUT(ofs_running,"Lattice constant (Bohr)",lat0); - ModuleBase::GlobalFunc::OUT(ofs_running,"Lattice constant (Angstrom)",lat0_angstrom); - lat.tpiba = ModuleBase::TWO_PI / lat0; - lat.tpiba2 = lat.tpiba * lat.tpiba; - } //=========================== // Read in latticies vector //=========================== - if(latName=="user_defined_lattice") - { - // check the existence of keyword "LATTICE_PARAMETERS" - if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, - "LATTICE_PARAMETERS", - true, - false)) + if (latName == "user_defined_lattice") { - ModuleBase::WARNING_QUIT("unitcell::read_lattice_constant", - "do not use LATTICE_PARAMETERS without explicit specification of lattice type"); - } + // check the existence of keyword "LATTICE_PARAMETERS" + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS", true, false)) + { + ModuleBase::WARNING_QUIT ( + "unitcell::read_lattice_constant", + "do not use LATTICE_PARAMETERS without explicit specification of lattice type"); + } - // check the existence of keyword "LATTICE_VECTORS" - if( !ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_VECTORS") ) - { - ModuleBase::WARNING_QUIT("unitcell::read_lattice_constant", - "Please set LATTICE_VECTORS in the STRU file"); - } - else if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_VECTORS") ) - { - // Reading lattice vectors. notice - // here that only one cpu read these - // parameters. - ifa >> latvec.e11 >> latvec.e12; - ModuleBase::GlobalFunc::READ_VALUE(ifa, latvec.e13); - ifa >> latvec.e21 >> latvec.e22; - ModuleBase::GlobalFunc::READ_VALUE(ifa, latvec.e23); - ifa >> latvec.e31 >> latvec.e32; - ModuleBase::GlobalFunc::READ_VALUE(ifa, latvec.e33); - } - }//supply lattice vectors + // check the existence of keyword "LATTICE_VECTORS" + if (!ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_VECTORS")) + { + ModuleBase::WARNING_QUIT ("unitcell::read_lattice_constant", + "Please set LATTICE_VECTORS in the STRU file"); + } + else if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_VECTORS")) + { + // Reading lattice vectors. notice + // here that only one cpu read these + // parameters. + ifa >> latvec.e11 >> latvec.e12; + ModuleBase::GlobalFunc::READ_VALUE (ifa, latvec.e13); + ifa >> latvec.e21 >> latvec.e22; + ModuleBase::GlobalFunc::READ_VALUE (ifa, latvec.e23); + ifa >> latvec.e31 >> latvec.e32; + ModuleBase::GlobalFunc::READ_VALUE (ifa, latvec.e33); + } + } // supply lattice vectors else - { - if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, - "LATTICE_VECTORS", - true, - false)) { - ModuleBase::WARNING_QUIT("unitcell::read_lattice_constant", - "do not use LATTICE_VECTORS along with explicit specification of lattice type"); - } - if(latName=="sc") - {//simple-cubic, ibrav = 1 - latvec.e11 = 1.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 1.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 1.0; - } - else if(latName=="fcc") - {//face-centered cubic, ibrav = 2 - latvec.e11 =-0.5; latvec.e12 = 0.0; latvec.e13 = 0.5; - latvec.e21 = 0.0; latvec.e22 = 0.5; latvec.e23 = 0.5; - latvec.e31 =-0.5; latvec.e32 = 0.5; latvec.e33 = 0.0; - } - else if(latName=="bcc") - {//body-centered cubic, ibrav = 3 - latvec.e11 = 0.5; latvec.e12 = 0.5; latvec.e13 = 0.5; - latvec.e21 =-0.5; latvec.e22 = 0.5; latvec.e23 = 0.5; - latvec.e31 =-0.5; latvec.e32 =-0.5; latvec.e33 = 0.5; - } - else if(latName=="hexagonal") - {//hexagonal, ibrav = 4 - double e22 = sqrt(3.0) / 2.0; - latvec.e11 = 1.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 =-0.5; latvec.e22 = e22; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ModuleBase::GlobalFunc::READ_VALUE(ifa, latvec.e33); - } - } - else if(latName=="trigonal") - {//trigonal, ibrav = 5 - double t1 = 0.0; - double t2 = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - double cosab=0.0; - ModuleBase::GlobalFunc::READ_VALUE(ifa, cosab); - t1 = sqrt(1.0 + 2.0*cosab); - t2 = sqrt(1.0 - cosab); - } - double e11 = t2 / sqrt(2.0); - double e12 = -t2 / sqrt(6.0); - double e13 = t1 / sqrt(3.0); - double e22 = sqrt(2.0) * t2 / sqrt(3.0); - latvec.e11 = e11; latvec.e12 = e12; latvec.e13 = e13; - latvec.e21 = 0.0; latvec.e22 = e22; latvec.e23 = e13; - latvec.e31 =-e11; latvec.e32 = e12; latvec.e33 = e13; - } - else if(latName=="st") - {//simple tetragonal, ibrav= 6 - latvec.e11 = 1.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 1.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ModuleBase::GlobalFunc::READ_VALUE(ifa, latvec.e33); - } - } - else if(latName=="bct") - {//body-centered tetragonal, ibrav = 7 - double cba = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ModuleBase::GlobalFunc::READ_VALUE(ifa, cba); - cba = cba / 2.0; - } - latvec.e11 = 0.5; latvec.e12 =-0.5; latvec.e13 = cba; - latvec.e21 = 0.5; latvec.e22 = 0.5; latvec.e23 = cba; - latvec.e31 =-0.5; latvec.e32 =-0.5; latvec.e33 = cba; - } - else if(latName=="so") - {//simple orthorhombic, ibrav = 8 - latvec.e11 = 1.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 0.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ifa >> latvec.e22; - ModuleBase::GlobalFunc::READ_VALUE(ifa, latvec.e33); - } - } - else if(latName=="baco") - {//base-centered orthorhombic, ibrav = 9 - latvec.e11 = 0.5; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 =-0.5; latvec.e22 = 0.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ifa >> latvec.e12; - latvec.e12 = latvec.e12 / 2.0; - latvec.e22 = latvec.e12; - ModuleBase::GlobalFunc::READ_VALUE(ifa, latvec.e33); - } - } - else if(latName=="fco") - {//face-centered orthorhombic, ibrav = 10 - double bba = 0.0; double cba = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ifa >> bba; - ModuleBase::GlobalFunc::READ_VALUE(ifa, cba); - bba = bba / 2.0; cba = cba / 2.0; - } - latvec.e11 = 0.5; latvec.e12 = 0.0; latvec.e13 = cba; - latvec.e21 = 0.5; latvec.e22 = bba; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = bba; latvec.e33 = cba; - } - else if(latName=="bco") - {//body-centered orthorhombic, ibrav = 11 - double bba = 0.0; double cba = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ifa >> bba; - ModuleBase::GlobalFunc::READ_VALUE(ifa, cba); - bba = bba / 2.0; cba = cba / 2.0; - } - latvec.e11 = 0.5; latvec.e12 = bba; latvec.e13 = cba; - latvec.e21 =-0.5; latvec.e22 = bba; latvec.e23 = cba; - latvec.e31 =-0.5; latvec.e32 =-bba; latvec.e33 = cba; - } - else if(latName=="sm") - {//simple monoclinic, ibrav = 12 - double bba = 0.0; double cba = 0.0; - double cosab = 0.0; - double e21 = 0.0; double e22 = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ifa >> bba >> cba; - ModuleBase::GlobalFunc::READ_VALUE(ifa, cosab); - e21 = bba * cosab; - e22 = bba * sqrt(1.0-cosab*cosab); - } - latvec.e11 = 1.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = e21; latvec.e22 = e22; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = cba; - } - else if(latName=="bacm") - {//base-centered monoclinic, ibrav = 13 - double bba = 0.0; double cba = 0.0; - double cosab = 0.0; - double e21 = 0.0; double e22 = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ifa >> bba >> cba; - ModuleBase::GlobalFunc::READ_VALUE(ifa, cosab); - e21 = bba * cosab; - e22 = bba * sqrt(1.0-cosab*cosab); - cba = cba / 2.0; - } - latvec.e11 = 0.5; latvec.e12 = 0.0; latvec.e13 =-cba; - latvec.e21 = e21; latvec.e22 = e22; latvec.e23 = 0.0; - latvec.e31 = 0.5; latvec.e32 = 0.0; latvec.e33 = cba; - } - else if(latName=="triclinic") - {//triclinic, ibrav = 14 - double bba = 0.0; - double cba = 0.0; - double cosab = 0.0; - double cosac = 0.0; - double cosbc = 0.0; - double sinab = 0.0; - double term = 0.0; - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifa, "LATTICE_PARAMETERS") ) - { - ifa >> bba >> cba >> cosab >> cosac; - ModuleBase::GlobalFunc::READ_VALUE(ifa, cosbc); - sinab = sqrt(1.0-cosab*cosab); - } - latvec.e11 = 1.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = bba * cosab; latvec.e22 = bba * sinab; latvec.e23 = 0.0; - latvec.e31 = cba * cosac; latvec.e32 = cba * (cosbc - cosac*cosab) / sinab; - term = 1.0 + 2.0 * cosab*cosac*cosbc - cosab*cosab - cosac*cosac - cosbc*cosbc; - term = sqrt(term)/sinab; - latvec.e33 = cba * term; - } - else - { - ModuleBase::WARNING_QUIT("unitcell::read_lattice_constant","latname not supported!"); + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_VECTORS", true, false)) + { + ModuleBase::WARNING_QUIT ( + "unitcell::read_lattice_constant", + "do not use LATTICE_VECTORS along with explicit specification of lattice type"); + } + if (latName == "sc") + { // simple-cubic, ibrav = 1 + latvec.e11 = 1.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 1.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 1.0; + } + else if (latName == "fcc") + { // face-centered cubic, ibrav = 2 + latvec.e11 = -0.5; + latvec.e12 = 0.0; + latvec.e13 = 0.5; + latvec.e21 = 0.0; + latvec.e22 = 0.5; + latvec.e23 = 0.5; + latvec.e31 = -0.5; + latvec.e32 = 0.5; + latvec.e33 = 0.0; + } + else if (latName == "bcc") + { // body-centered cubic, ibrav = 3 + latvec.e11 = 0.5; + latvec.e12 = 0.5; + latvec.e13 = 0.5; + latvec.e21 = -0.5; + latvec.e22 = 0.5; + latvec.e23 = 0.5; + latvec.e31 = -0.5; + latvec.e32 = -0.5; + latvec.e33 = 0.5; + } + else if (latName == "hexagonal") + { // hexagonal, ibrav = 4 + double e22 = sqrt (3.0) / 2.0; + latvec.e11 = 1.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = -0.5; + latvec.e22 = e22; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ModuleBase::GlobalFunc::READ_VALUE (ifa, latvec.e33); + } + } + else if (latName == "trigonal") + { // trigonal, ibrav = 5 + double t1 = 0.0; + double t2 = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + double cosab = 0.0; + ModuleBase::GlobalFunc::READ_VALUE (ifa, cosab); + t1 = sqrt (1.0 + 2.0 * cosab); + t2 = sqrt (1.0 - cosab); + } + double e11 = t2 / sqrt (2.0); + double e12 = -t2 / sqrt (6.0); + double e13 = t1 / sqrt (3.0); + double e22 = sqrt (2.0) * t2 / sqrt (3.0); + latvec.e11 = e11; + latvec.e12 = e12; + latvec.e13 = e13; + latvec.e21 = 0.0; + latvec.e22 = e22; + latvec.e23 = e13; + latvec.e31 = -e11; + latvec.e32 = e12; + latvec.e33 = e13; + } + else if (latName == "st") + { // simple tetragonal, ibrav= 6 + latvec.e11 = 1.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 1.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ModuleBase::GlobalFunc::READ_VALUE (ifa, latvec.e33); + } + } + else if (latName == "bct") + { // body-centered tetragonal, ibrav = 7 + double cba = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ModuleBase::GlobalFunc::READ_VALUE (ifa, cba); + cba = cba / 2.0; + } + latvec.e11 = 0.5; + latvec.e12 = -0.5; + latvec.e13 = cba; + latvec.e21 = 0.5; + latvec.e22 = 0.5; + latvec.e23 = cba; + latvec.e31 = -0.5; + latvec.e32 = -0.5; + latvec.e33 = cba; + } + else if (latName == "so") + { // simple orthorhombic, ibrav = 8 + latvec.e11 = 1.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 0.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ifa >> latvec.e22; + ModuleBase::GlobalFunc::READ_VALUE (ifa, latvec.e33); + } + } + else if (latName == "baco") + { // base-centered orthorhombic, ibrav = 9 + latvec.e11 = 0.5; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = -0.5; + latvec.e22 = 0.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ifa >> latvec.e12; + latvec.e12 = latvec.e12 / 2.0; + latvec.e22 = latvec.e12; + ModuleBase::GlobalFunc::READ_VALUE (ifa, latvec.e33); + } + } + else if (latName == "fco") + { // face-centered orthorhombic, ibrav = 10 + double bba = 0.0; + double cba = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ifa >> bba; + ModuleBase::GlobalFunc::READ_VALUE (ifa, cba); + bba = bba / 2.0; + cba = cba / 2.0; + } + latvec.e11 = 0.5; + latvec.e12 = 0.0; + latvec.e13 = cba; + latvec.e21 = 0.5; + latvec.e22 = bba; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = bba; + latvec.e33 = cba; + } + else if (latName == "bco") + { // body-centered orthorhombic, ibrav = 11 + double bba = 0.0; + double cba = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ifa >> bba; + ModuleBase::GlobalFunc::READ_VALUE (ifa, cba); + bba = bba / 2.0; + cba = cba / 2.0; + } + latvec.e11 = 0.5; + latvec.e12 = bba; + latvec.e13 = cba; + latvec.e21 = -0.5; + latvec.e22 = bba; + latvec.e23 = cba; + latvec.e31 = -0.5; + latvec.e32 = -bba; + latvec.e33 = cba; + } + else if (latName == "sm") + { // simple monoclinic, ibrav = 12 + double bba = 0.0; + double cba = 0.0; + double cosab = 0.0; + double e21 = 0.0; + double e22 = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ifa >> bba >> cba; + ModuleBase::GlobalFunc::READ_VALUE (ifa, cosab); + e21 = bba * cosab; + e22 = bba * sqrt (1.0 - cosab * cosab); + } + latvec.e11 = 1.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = e21; + latvec.e22 = e22; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = cba; + } + else if (latName == "bacm") + { // base-centered monoclinic, ibrav = 13 + double bba = 0.0; + double cba = 0.0; + double cosab = 0.0; + double e21 = 0.0; + double e22 = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ifa >> bba >> cba; + ModuleBase::GlobalFunc::READ_VALUE (ifa, cosab); + e21 = bba * cosab; + e22 = bba * sqrt (1.0 - cosab * cosab); + cba = cba / 2.0; + } + latvec.e11 = 0.5; + latvec.e12 = 0.0; + latvec.e13 = -cba; + latvec.e21 = e21; + latvec.e22 = e22; + latvec.e23 = 0.0; + latvec.e31 = 0.5; + latvec.e32 = 0.0; + latvec.e33 = cba; + } + else if (latName == "triclinic") + { // triclinic, ibrav = 14 + double bba = 0.0; + double cba = 0.0; + double cosab = 0.0; + double cosac = 0.0; + double cosbc = 0.0; + double sinab = 0.0; + double term = 0.0; + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifa, "LATTICE_PARAMETERS")) + { + ifa >> bba >> cba >> cosab >> cosac; + ModuleBase::GlobalFunc::READ_VALUE (ifa, cosbc); + sinab = sqrt (1.0 - cosab * cosab); + } + latvec.e11 = 1.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = bba * cosab; + latvec.e22 = bba * sinab; + latvec.e23 = 0.0; + latvec.e31 = cba * cosac; + latvec.e32 = cba * (cosbc - cosac * cosab) / sinab; + term = 1.0 + 2.0 * cosab * cosac * cosbc - cosab * cosab - cosac * cosac - cosbc * cosbc; + term = sqrt (term) / sinab; + latvec.e33 = cba * term; + } + else + { + ModuleBase::WARNING_QUIT ("unitcell::read_lattice_constant", "latname not supported!"); + } } - } // lattice vectors in another form. lat.a1.x = latvec.e11; @@ -399,4 +488,4 @@ bool read_lattice_constant(std::ifstream& ifa, return true; } -} +} // namespace unitcell diff --git a/source/source_cell/read_atoms.cpp b/source/source_cell/read_atoms.cpp index 748be95c015..7e7bebe06fc 100644 --- a/source/source_cell/read_atoms.cpp +++ b/source/source_cell/read_atoms.cpp @@ -1,4 +1,4 @@ -#include // Peize Lin fix bug about strcmp 2016-08-02 +#include // Peize Lin fix bug about strcmp 2016-08-02 #include #include #include @@ -18,104 +18,123 @@ #include "source_basis/module_ao/ORB_read.h" // to use 'ORB' -- mohan 2021-01-30 #endif -bool unitcell::read_atom_positions(UnitCell& ucell, - std::ifstream &ifpos, - std::ofstream &ofs_running, - std::ofstream &ofs_warning) +bool + unitcell::read_atom_positions (UnitCell& ucell, + std::ifstream& ifpos, + std::ofstream& ofs_running, + std::ofstream& ofs_warning) { - ModuleBase::TITLE("UnitCell","read_atom_positions"); + ModuleBase::TITLE ("UnitCell", "read_atom_positions"); - std::string& Coordinate = ucell.Coordinate; + std::string& Coordinate = ucell.Coordinate; const int ntype = ucell.ntype; const int nspin = PARAM.inp.nspin; - assert (nspin==1 || nspin==2 || nspin==4); + assert (nspin == 1 || nspin == 2 || nspin == 4); - if( ModuleBase::GlobalFunc::SCAN_LINE_BEGIN(ifpos, "ATOMIC_POSITIONS")) - { - ModuleBase::GlobalFunc::READ_VALUE(ifpos, Coordinate); - - if (!unitcell::validate_coordinate_system(Coordinate, ofs_warning)) + if (ModuleBase::GlobalFunc::SCAN_LINE_BEGIN (ifpos, "ATOMIC_POSITIONS")) { - return false; - } + ModuleBase::GlobalFunc::READ_VALUE (ifpos, Coordinate); + + if (!unitcell::validate_coordinate_system (Coordinate, ofs_warning)) + { + return false; + } - ucell.nat = 0; + ucell.nat = 0; - //====================================== - // calculate total number of ucell.atoms - // and adjust the order of atom species - //====================================== - for (int it = 0;it < ntype; it++) - { - ofs_running << "\n READING ATOM TYPE " << it+1 << std::endl; - - bool set_element_mag_zero = false; - if (!unitcell::read_atom_type_header(it, ucell, ifpos, ofs_running, - ofs_warning, set_element_mag_zero)) - { - return false; - } - - int na = ucell.atoms[it].na; - ucell.nat += na; - - if (na > 0) - { - unitcell::allocate_atom_properties(ucell.atoms[it], na, ucell.atom_mass[it]); - for (int ia = 0;ia < na; ia++) + //====================================== + // calculate total number of ucell.atoms + // and adjust the order of atom species + //====================================== + for (int it = 0; it < ntype; it++) { - // modify the reading of frozen ions and velocities -- Yuanbo Li 2021/8/20 - ModuleBase::Vector3 v; - ModuleBase::Vector3 mv; - ifpos >> v.x >> v.y >> v.z; - mv.x = true ; - mv.y = true ; - mv.z = true ; - ucell.atoms[it].vel[ia].set(0,0,0); - ucell.atoms[it].mag[ia]=ucell.magnet.start_mag[it]; - //if this line is used, default startmag_type would be 2 - ucell.atoms[it].angle1[ia]=0; - ucell.atoms[it].angle2[ia]=0; - ucell.atoms[it].m_loc_[ia].set(0,0,0); - ucell.atoms[it].lambda[ia].set(0,0,0); - ucell.atoms[it].constrain[ia].set(0,0,0); - - bool input_vec_mag=false; - bool input_angle_mag=false; - - // Parse optional properties - if (!unitcell::parse_atom_properties(ifpos, ucell.atoms[it], ia, mv, - input_vec_mag, input_angle_mag, - set_element_mag_zero)) - { - return false; - } - - // Process magnetization - unitcell::process_magnetization(ucell.atoms[it], it, ia, nspin, - input_vec_mag, input_angle_mag, ofs_running); - - // Transform coordinates - unitcell::transform_atom_coordinates(ucell.atoms[it], ia, Coordinate, - v, ucell.latvec, ucell.lat0, ucell.latcenter); - - // Set movement flags - unitcell::set_atom_movement_flags(ucell.atoms[it], ia, mv); - ucell.atoms[it].dis[ia].set(0, 0, 0); - }//endj - } // end na - // reset some useless parameters - if (set_element_mag_zero) - { - ucell.magnet.start_mag[it] = 0.0; - } - } // end for ntype - - // Auto-set magnetization if needed - unitcell::autoset_magnetization(ucell, nspin, ofs_running); - } // end scan_begin + ofs_running << "\n READING ATOM TYPE " << it + 1 << std::endl; + + bool set_element_mag_zero = false; + if (!unitcell::read_atom_type_header (it, + ucell, + ifpos, + ofs_running, + ofs_warning, + set_element_mag_zero)) + { + return false; + } + + int na = ucell.atoms[it].na; + ucell.nat += na; + + if (na > 0) + { + unitcell::allocate_atom_properties (ucell.atoms[it], na, ucell.atom_mass[it]); + for (int ia = 0; ia < na; ia++) + { + // modify the reading of frozen ions and velocities -- Yuanbo Li 2021/8/20 + ModuleBase::Vector3 v; + ModuleBase::Vector3 mv; + ifpos >> v.x >> v.y >> v.z; + mv.x = true; + mv.y = true; + mv.z = true; + ucell.atoms[it].vel[ia].set (0, 0, 0); + ucell.atoms[it].mag[ia] = ucell.magnet.start_mag[it]; + // if this line is used, default startmag_type would be 2 + ucell.atoms[it].angle1[ia] = 0; + ucell.atoms[it].angle2[ia] = 0; + ucell.atoms[it].m_loc_[ia].set (0, 0, 0); + ucell.atoms[it].lambda[ia].set (0, 0, 0); + ucell.atoms[it].constrain[ia].set (0, 0, 0); + + bool input_vec_mag = false; + bool input_angle_mag = false; + + // Parse optional properties + if (!unitcell::parse_atom_properties (ifpos, + ucell.atoms[it], + ia, + mv, + input_vec_mag, + input_angle_mag, + set_element_mag_zero)) + { + return false; + } + + // Process magnetization + unitcell::process_magnetization (ucell.atoms[it], + it, + ia, + nspin, + input_vec_mag, + input_angle_mag, + ofs_running); + + // Transform coordinates + unitcell::transform_atom_coordinates (ucell.atoms[it], + ia, + Coordinate, + v, + ucell.latvec, + ucell.lat0, + ucell.latcenter); + + // Set movement flags + unitcell::set_atom_movement_flags (ucell.atoms[it], ia, mv); + ucell.atoms[it].dis[ia].set (0, 0, 0); + } // endj + } // end na + // reset some useless parameters + if (set_element_mag_zero) + { + ucell.magnet.start_mag[it] = 0.0; + } + } // end for ntype + + // Auto-set magnetization if needed + unitcell::autoset_magnetization (ucell, nspin, ofs_running); + } // end scan_begin // Final validation and output - return unitcell::finalize_atom_positions(ucell, ofs_running, ofs_warning); + return unitcell::finalize_atom_positions (ucell, ofs_running, ofs_warning); -}//end read_atom_positions +} // end read_atom_positions diff --git a/source/source_cell/read_atoms_helper.cpp b/source/source_cell/read_atoms_helper.cpp index 4fc3dfe6cb0..70b8d9a7a3a 100644 --- a/source/source_cell/read_atoms_helper.cpp +++ b/source/source_cell/read_atoms_helper.cpp @@ -10,504 +10,514 @@ #include #include -namespace { - // Magic number constants for character code checks - constexpr char DIGIT_START = '0'; // ASCII 48 - constexpr char DIGIT_END = '9'; // ASCII 57 - constexpr char LOWER_A = 'a'; - constexpr char LOWER_Z = 'z'; - constexpr char MINUS_SIGN = '-'; -} - -namespace unitcell { +namespace +{ +// Magic number constants for character code checks +constexpr char DIGIT_START = '0'; // ASCII 48 +constexpr char DIGIT_END = '9'; // ASCII 57 +constexpr char LOWER_A = 'a'; +constexpr char LOWER_Z = 'z'; +constexpr char MINUS_SIGN = '-'; +} // namespace + +namespace unitcell +{ -bool validate_coordinate_system(const std::string& Coordinate, - std::ofstream& ofs_warning) +bool + validate_coordinate_system (const std::string& Coordinate, std::ofstream& ofs_warning) { - if(Coordinate != "Cartesian" - && Coordinate != "Direct" - && Coordinate != "Cartesian_angstrom" - && Coordinate != "Cartesian_au" - && Coordinate != "Cartesian_angstrom_center_xy" - && Coordinate != "Cartesian_angstrom_center_xz" - && Coordinate != "Cartesian_angstrom_center_yz" - && Coordinate != "Cartesian_angstrom_center_xyz" - ) - { - ModuleBase::WARNING("read_atom_position","Cartesian or Direct?"); - ofs_warning << " There are several options for you:" << std::endl; - ofs_warning << " Direct" << std::endl; - ofs_warning << " Cartesian_angstrom" << std::endl; - ofs_warning << " Cartesian_au" << std::endl; - ofs_warning << " Cartesian_angstrom_center_xy" << std::endl; - ofs_warning << " Cartesian_angstrom_center_xz" << std::endl; - ofs_warning << " Cartesian_angstrom_center_yz" << std::endl; - ofs_warning << " Cartesian_angstrom_center_xyz" << std::endl; - return false; - } + if (Coordinate != "Cartesian" && Coordinate != "Direct" && Coordinate != "Cartesian_angstrom" + && Coordinate != "Cartesian_au" && Coordinate != "Cartesian_angstrom_center_xy" + && Coordinate != "Cartesian_angstrom_center_xz" && Coordinate != "Cartesian_angstrom_center_yz" + && Coordinate != "Cartesian_angstrom_center_xyz") + { + ModuleBase::WARNING ("read_atom_position", "Cartesian or Direct?"); + ofs_warning << " There are several options for you:" << std::endl; + ofs_warning << " Direct" << std::endl; + ofs_warning << " Cartesian_angstrom" << std::endl; + ofs_warning << " Cartesian_au" << std::endl; + ofs_warning << " Cartesian_angstrom_center_xy" << std::endl; + ofs_warning << " Cartesian_angstrom_center_xz" << std::endl; + ofs_warning << " Cartesian_angstrom_center_yz" << std::endl; + ofs_warning << " Cartesian_angstrom_center_xyz" << std::endl; + return false; + } return true; } -void allocate_atom_properties(Atom& atom, int na, double mass) +void + allocate_atom_properties (Atom& atom, int na, double mass) { - atom.tau.resize(na, ModuleBase::Vector3(0,0,0)); - atom.dis.resize(na, ModuleBase::Vector3(0,0,0)); - atom.taud.resize(na, ModuleBase::Vector3(0,0,0)); - atom.boundary_shift.resize(na, ModuleBase::Vector3(0,0,0)); - atom.vel.resize(na, ModuleBase::Vector3(0,0,0)); - atom.mbl.resize(na, ModuleBase::Vector3(0,0,0)); - atom.mag.resize(na, 0); - atom.angle1.resize(na, 0); - atom.angle2.resize(na, 0); - atom.m_loc_.resize(na, ModuleBase::Vector3(0,0,0)); - atom.lambda.resize(na, ModuleBase::Vector3(0,0,0)); - atom.constrain.resize(na, ModuleBase::Vector3(0,0,0)); + atom.tau.resize (na, ModuleBase::Vector3 (0, 0, 0)); + atom.dis.resize (na, ModuleBase::Vector3 (0, 0, 0)); + atom.taud.resize (na, ModuleBase::Vector3 (0, 0, 0)); + atom.boundary_shift.resize (na, ModuleBase::Vector3 (0, 0, 0)); + atom.vel.resize (na, ModuleBase::Vector3 (0, 0, 0)); + atom.mbl.resize (na, ModuleBase::Vector3 (0, 0, 0)); + atom.mag.resize (na, 0); + atom.angle1.resize (na, 0); + atom.angle2.resize (na, 0); + atom.m_loc_.resize (na, ModuleBase::Vector3 (0, 0, 0)); + atom.lambda.resize (na, ModuleBase::Vector3 (0, 0, 0)); + atom.constrain.resize (na, ModuleBase::Vector3 (0, 0, 0)); atom.mass = mass; } -void set_atom_movement_flags(Atom& atom, int ia, - const ModuleBase::Vector3& mv) +void + set_atom_movement_flags (Atom& atom, int ia, const ModuleBase::Vector3& mv) { - if(!PARAM.inp.fixed_atoms) - { - atom.mbl[ia] = mv; - } + if (!PARAM.inp.fixed_atoms) + { + atom.mbl[ia] = mv; + } else - { - atom.mbl[ia] = 0.0; - atom.mbl[ia].print(); - } + { + atom.mbl[ia] = 0.0; + atom.mbl[ia].print (); + } } -void autoset_magnetization(UnitCell& ucell, int nspin, - std::ofstream& ofs_running) +void + autoset_magnetization (UnitCell& ucell, int nspin, std::ofstream& ofs_running) { const int ntype = ucell.ntype; // Check if any atom has non-zero magnetization int autoset_mag = 1; for (int it = 0; it < ntype; it++) - { - for (int ia = 0; ia < ucell.atoms[it].na; ia++) { - if(std::abs(ucell.atoms[it].mag[ia]) > 1e-5) - { - autoset_mag = 0; - break; - } + for (int ia = 0; ia < ucell.atoms[it].na; ia++) + { + if (std::abs (ucell.atoms[it].mag[ia]) > 1e-5) + { + autoset_mag = 0; + break; + } + } } - } if (autoset_mag) - { - if(nspin==4) { - for (int it = 0; it < ntype; it++) - { - for (int ia = 0; ia < ucell.atoms[it].na; ia++) + if (nspin == 4) { - ucell.atoms[it].m_loc_[ia].x = 1.0; - ucell.atoms[it].m_loc_[ia].y = 1.0; - ucell.atoms[it].m_loc_[ia].z = 1.0; - ucell.atoms[it].mag[ia] = sqrt(pow(ucell.atoms[it].m_loc_[ia].x,2) - +pow(ucell.atoms[it].m_loc_[ia].y,2) - +pow(ucell.atoms[it].m_loc_[ia].z,2)); - ModuleBase::GlobalFunc::OUT(ofs_running,"Autoset magnetism for this atom", 1.0, 1.0, 1.0); + for (int it = 0; it < ntype; it++) + { + for (int ia = 0; ia < ucell.atoms[it].na; ia++) + { + ucell.atoms[it].m_loc_[ia].x = 1.0; + ucell.atoms[it].m_loc_[ia].y = 1.0; + ucell.atoms[it].m_loc_[ia].z = 1.0; + ucell.atoms[it].mag[ia] = sqrt (pow (ucell.atoms[it].m_loc_[ia].x, 2) + + pow (ucell.atoms[it].m_loc_[ia].y, 2) + + pow (ucell.atoms[it].m_loc_[ia].z, 2)); + ModuleBase::GlobalFunc::OUT (ofs_running, + "Autoset magnetism for this atom", + 1.0, + 1.0, + 1.0); + } + } } - } - } - else if(nspin==2) - { - for (int it = 0; it < ntype; it++) - { - for (int ia = 0; ia < ucell.atoms[it].na; ia++) + else if (nspin == 2) { - ucell.atoms[it].mag[ia] = 1.0; - ucell.atoms[it].m_loc_[ia].x = ucell.atoms[it].mag[ia]; - ModuleBase::GlobalFunc::OUT(ofs_running,"Autoset magnetism for this atom", 1.0); + for (int it = 0; it < ntype; it++) + { + for (int ia = 0; ia < ucell.atoms[it].na; ia++) + { + ucell.atoms[it].mag[ia] = 1.0; + ucell.atoms[it].m_loc_[ia].x = ucell.atoms[it].mag[ia]; + ModuleBase::GlobalFunc::OUT (ofs_running, "Autoset magnetism for this atom", 1.0); + } + } } - } } - } } -bool finalize_atom_positions(UnitCell& ucell, - std::ofstream& ofs_running, - std::ofstream& ofs_warning) +bool + finalize_atom_positions (UnitCell& ucell, std::ofstream& ofs_running, std::ofstream& ofs_warning) { // Check if any atom can move in MD - if(!ucell.if_atoms_can_move() && PARAM.inp.calculation=="md" && PARAM.inp.esolver_type!="tddft") - { - ModuleBase::WARNING("read_atoms", "no atoms can move in MD simulations!"); - return false; - } + if (!ucell.if_atoms_can_move () && PARAM.inp.calculation == "md" && PARAM.inp.esolver_type != "tddft") + { + ModuleBase::WARNING ("read_atoms", "no atoms can move in MD simulations!"); + return false; + } ofs_running << std::endl; - ModuleBase::GlobalFunc::OUT(ofs_running,"TOTAL ATOM NUMBER",ucell.nat); + ModuleBase::GlobalFunc::OUT (ofs_running, "TOTAL ATOM NUMBER", ucell.nat); ofs_running << std::endl; if (ucell.nat == 0) - { - ModuleBase::WARNING("read_atom_positions","no atoms found in the system!"); - return false; - } + { + ModuleBase::WARNING ("read_atom_positions", "no atoms found in the system!"); + return false; + } // Check atom positions - unitcell::check_dtau(ucell.atoms, ucell.ntype, ucell.lat0, ucell.latvec); + unitcell::check_dtau (ucell.atoms, ucell.ntype, ucell.lat0, ucell.latvec); - if (unitcell::check_tau(ucell.atoms, ucell.ntype, ucell.lat0)) - { - print_tau(ucell.atoms, ucell.Coordinate, ucell.ntype, ucell.lat0, ofs_running); - return true; - } + if (unitcell::check_tau (ucell.atoms, ucell.ntype, ucell.lat0)) + { + print_tau (ucell.atoms, ucell.Coordinate, ucell.ntype, ucell.lat0, ofs_running); + return true; + } return false; } -ModuleBase::Vector3 calculate_lattice_center( - const ModuleBase::Matrix3& latvec, - const std::string& center_mode) +ModuleBase::Vector3 + calculate_lattice_center (const ModuleBase::Matrix3& latvec, const std::string& center_mode) { - ModuleBase::Vector3 latcenter(0.0, 0.0, 0.0); + ModuleBase::Vector3 latcenter (0.0, 0.0, 0.0); if (center_mode == "xy" || center_mode == "xyz") - { - latcenter.x = (latvec.e11 + latvec.e21 + latvec.e31) / 2.0; - latcenter.y = (latvec.e12 + latvec.e22 + latvec.e32) / 2.0; - } + { + latcenter.x = (latvec.e11 + latvec.e21 + latvec.e31) / 2.0; + latcenter.y = (latvec.e12 + latvec.e22 + latvec.e32) / 2.0; + } if (center_mode == "xz" || center_mode == "xyz") - { - latcenter.x = (latvec.e11 + latvec.e21 + latvec.e31) / 2.0; - latcenter.z = (latvec.e13 + latvec.e23 + latvec.e33) / 2.0; - } + { + latcenter.x = (latvec.e11 + latvec.e21 + latvec.e31) / 2.0; + latcenter.z = (latvec.e13 + latvec.e23 + latvec.e33) / 2.0; + } if (center_mode == "yz" || center_mode == "xyz") - { - latcenter.y = (latvec.e12 + latvec.e22 + latvec.e32) / 2.0; - latcenter.z = (latvec.e13 + latvec.e23 + latvec.e33) / 2.0; - } + { + latcenter.y = (latvec.e12 + latvec.e22 + latvec.e32) / 2.0; + latcenter.z = (latvec.e13 + latvec.e23 + latvec.e33) / 2.0; + } return latcenter; } -void transform_atom_coordinates(Atom& atom, int ia, - const std::string& Coordinate, - const ModuleBase::Vector3& v, - const ModuleBase::Matrix3& latvec, - double lat0, - ModuleBase::Vector3& latcenter) +void + transform_atom_coordinates (Atom& atom, + int ia, + const std::string& Coordinate, + const ModuleBase::Vector3& v, + const ModuleBase::Matrix3& latvec, + double lat0, + ModuleBase::Vector3& latcenter) { - if(Coordinate=="Direct") - { - // change v from direct to cartesian, - // the unit is GlobalC::sf.ucell.lat0 - atom.taud[ia] = v; - atom.tau[ia] = v * latvec; - } - else if(Coordinate=="Cartesian") - { - atom.tau[ia] = v; // in unit ucell.lat0 - } - else if(Coordinate=="Cartesian_angstrom") - { - atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0; - } - else if(Coordinate=="Cartesian_angstrom_center_xy") - { - latcenter = calculate_lattice_center(latvec, "xy"); - atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0 + latcenter; - } - else if(Coordinate=="Cartesian_angstrom_center_xz") - { - latcenter = calculate_lattice_center(latvec, "xz"); - atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0 + latcenter; - } - else if(Coordinate=="Cartesian_angstrom_center_yz") - { - latcenter = calculate_lattice_center(latvec, "yz"); - atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0 + latcenter; - } - else if(Coordinate=="Cartesian_angstrom_center_xyz") - { - latcenter = calculate_lattice_center(latvec, "xyz"); - atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0 + latcenter; - } - else if(Coordinate=="Cartesian_au") - { - atom.tau[ia] = v / lat0; - } + if (Coordinate == "Direct") + { + // change v from direct to cartesian, + // the unit is GlobalC::sf.ucell.lat0 + atom.taud[ia] = v; + atom.tau[ia] = v * latvec; + } + else if (Coordinate == "Cartesian") + { + atom.tau[ia] = v; // in unit ucell.lat0 + } + else if (Coordinate == "Cartesian_angstrom") + { + atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0; + } + else if (Coordinate == "Cartesian_angstrom_center_xy") + { + latcenter = calculate_lattice_center (latvec, "xy"); + atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0 + latcenter; + } + else if (Coordinate == "Cartesian_angstrom_center_xz") + { + latcenter = calculate_lattice_center (latvec, "xz"); + atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0 + latcenter; + } + else if (Coordinate == "Cartesian_angstrom_center_yz") + { + latcenter = calculate_lattice_center (latvec, "yz"); + atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0 + latcenter; + } + else if (Coordinate == "Cartesian_angstrom_center_xyz") + { + latcenter = calculate_lattice_center (latvec, "xyz"); + atom.tau[ia] = v / ModuleBase::BOHR_TO_A / lat0 + latcenter; + } + else if (Coordinate == "Cartesian_au") + { + atom.tau[ia] = v / lat0; + } // Convert to direct coordinates if using Cartesian - if(Coordinate=="Cartesian" || - Coordinate=="Cartesian_angstrom" || - Coordinate=="Cartesian_angstrom_center_xy" || - Coordinate=="Cartesian_angstrom_center_xz" || - Coordinate=="Cartesian_angstrom_center_yz" || - Coordinate=="Cartesian_angstrom_center_xyz" || - Coordinate=="Cartesian_au") - { - double dx=0.0; - double dy=0.0; - double dz=0.0; - ModuleBase::Mathzone::Cartesian_to_Direct(atom.tau[ia].x, - atom.tau[ia].y, - atom.tau[ia].z, - latvec.e11, latvec.e12, latvec.e13, - latvec.e21, latvec.e22, latvec.e23, - latvec.e31, latvec.e32, latvec.e33, - dx,dy,dz); - - atom.taud[ia].x = dx; - atom.taud[ia].y = dy; - atom.taud[ia].z = dz; - } + if (Coordinate == "Cartesian" || Coordinate == "Cartesian_angstrom" || Coordinate == "Cartesian_angstrom_center_xy" + || Coordinate == "Cartesian_angstrom_center_xz" || Coordinate == "Cartesian_angstrom_center_yz" + || Coordinate == "Cartesian_angstrom_center_xyz" || Coordinate == "Cartesian_au") + { + double dx = 0.0; + double dy = 0.0; + double dz = 0.0; + ModuleBase::Mathzone::Cartesian_to_Direct (atom.tau[ia].x, + atom.tau[ia].y, + atom.tau[ia].z, + latvec.e11, + latvec.e12, + latvec.e13, + latvec.e21, + latvec.e22, + latvec.e23, + latvec.e31, + latvec.e32, + latvec.e33, + dx, + dy, + dz); + + atom.taud[ia].x = dx; + atom.taud[ia].y = dy; + atom.taud[ia].z = dz; + } } -void process_magnetization(Atom& atom, int it, int ia, - int nspin, bool input_vec_mag, - bool input_angle_mag, - std::ofstream& ofs_running) +void + process_magnetization (Atom& atom, + int it, + int ia, + int nspin, + bool input_vec_mag, + bool input_angle_mag, + std::ofstream& ofs_running) { // Recalculate mag and m_loc_ from read in angle1, angle2 and mag or mx, my, mz - if(input_angle_mag) - { - // angle1 or angle2 are given, calculate mx, my, mz from angle1 and angle2 and mag - atom.m_loc_[ia].z = atom.mag[ia] * cos(atom.angle1[ia]); - if(std::abs(sin(atom.angle1[ia])) > 1e-10) - { - atom.m_loc_[ia].x = atom.mag[ia] * - sin(atom.angle1[ia]) * cos(atom.angle2[ia]); - atom.m_loc_[ia].y = atom.mag[ia] * - sin(atom.angle1[ia]) * sin(atom.angle2[ia]); - } - } + if (input_angle_mag) + { + // angle1 or angle2 are given, calculate mx, my, mz from angle1 and angle2 and mag + atom.m_loc_[ia].z = atom.mag[ia] * cos (atom.angle1[ia]); + if (std::abs (sin (atom.angle1[ia])) > 1e-10) + { + atom.m_loc_[ia].x = atom.mag[ia] * sin (atom.angle1[ia]) * cos (atom.angle2[ia]); + atom.m_loc_[ia].y = atom.mag[ia] * sin (atom.angle1[ia]) * sin (atom.angle2[ia]); + } + } else if (input_vec_mag) - { - // mx, my, mz are given, calculate angle1 and angle2 from mx, my, mz - double mxy=sqrt(pow(atom.m_loc_[ia].x,2)+pow(atom.m_loc_[ia].y,2)); - atom.angle1[ia]=atan2(mxy,atom.m_loc_[ia].z); - if(mxy>1e-8) { - atom.angle2[ia]=atan2(atom.m_loc_[ia].y,atom.m_loc_[ia].x); + // mx, my, mz are given, calculate angle1 and angle2 from mx, my, mz + double mxy = sqrt (pow (atom.m_loc_[ia].x, 2) + pow (atom.m_loc_[ia].y, 2)); + atom.angle1[ia] = atan2 (mxy, atom.m_loc_[ia].z); + if (mxy > 1e-8) + { + atom.angle2[ia] = atan2 (atom.m_loc_[ia].y, atom.m_loc_[ia].x); + } } - } else - { - // only one mag is given, assume it is z - atom.m_loc_[ia].x = 0; - atom.m_loc_[ia].y = 0; - atom.m_loc_[ia].z = atom.mag[ia]; - } - - if(nspin==4) - { - if(!PARAM.inp.noncolin) - { - // collinear case with nspin = 4, only z component is used + { + // only one mag is given, assume it is z atom.m_loc_[ia].x = 0; atom.m_loc_[ia].y = 0; + atom.m_loc_[ia].z = atom.mag[ia]; } - // print only ia==0 && mag>0 to avoid too much output - // print when ia!=0 && mag[ia] != mag[0] to avoid too much output - if(ia==0 || (atom.m_loc_[ia].x != atom.m_loc_[0].x - || atom.m_loc_[ia].y != atom.m_loc_[0].y + + if (nspin == 4) + { + if (!PARAM.inp.noncolin) + { + // collinear case with nspin = 4, only z component is used + atom.m_loc_[ia].x = 0; + atom.m_loc_[ia].y = 0; + } + // print only ia==0 && mag>0 to avoid too much output + // print when ia!=0 && mag[ia] != mag[0] to avoid too much output + if (ia == 0 + || (atom.m_loc_[ia].x != atom.m_loc_[0].x || atom.m_loc_[ia].y != atom.m_loc_[0].y || atom.m_loc_[ia].z != atom.m_loc_[0].z)) + { + std::stringstream ss; + ss << "Magnetization for this type"; + if (ia != 0) + { + ss << " (atom" << ia + 1 << ")"; + } + ModuleBase::GlobalFunc::OUT (ofs_running, + ss.str (), + atom.m_loc_[ia].x, + atom.m_loc_[ia].y, + atom.m_loc_[ia].z); + } + // Note: The original code had ZEROS(ucell.magnet.ux_, 3) here + // but ucell is not available in this function scope + } + else if (nspin == 2) { - std::stringstream ss; - ss << "Magnetization for this type"; - if(ia!=0) - { - ss<<" (atom"<0 to avoid too much output - // print when ia!=0 && mag[ia] != mag[0] to avoid too much output - if(ia==0 || (atom.mag[ia] != atom.mag[0])) - { - std::stringstream ss; - ss << "magnetization of element " << it+1; - if(ia!=0) - { - ss<<" (atom"<0 to avoid too much output + // print when ia!=0 && mag[ia] != mag[0] to avoid too much output + if (ia == 0 || (atom.mag[ia] != atom.mag[0])) + { + std::stringstream ss; + ss << "magnetization of element " << it + 1; + if (ia != 0) + { + ss << " (atom" << ia + 1 << ")"; + } + ModuleBase::GlobalFunc::OUT (ofs_running, ss.str (), atom.mag[ia]); + } + } } -bool parse_atom_properties(std::ifstream& ifpos, - Atom& atom, int ia, - ModuleBase::Vector3& mv, - bool& input_vec_mag, - bool& input_angle_mag, - bool& set_element_mag_zero) +bool + parse_atom_properties (std::ifstream& ifpos, + Atom& atom, + int ia, + ModuleBase::Vector3& mv, + bool& input_vec_mag, + bool& input_angle_mag, + bool& set_element_mag_zero) { std::string tmpid; - tmpid = ifpos.get(); - - if( (int)tmpid[0] < 0 ) - { - std::cout << "read_atom_positions, mismatch in atom number for atom type: " - << atom.label << std::endl; - exit(1); - } + tmpid = ifpos.get (); - // read if catch goodbit before "\n" and "#" - while ( (tmpid != "\n") && (ifpos.good()) && (tmpid !="#") ) - { - tmpid = ifpos.get(); - // old method of reading frozen ions - char tmp = (char)tmpid[0]; - if ( tmp >= DIGIT_START && tmp <= DIGIT_END ) - { - mv.x = std::stoi(tmpid); - ifpos >> mv.y >> mv.z; - } - // new method of reading frozen ions and velocities - if ( tmp >= LOWER_A && tmp <= LOWER_Z) + if ((int)tmpid[0] < 0) { - ifpos.putback(tmp); - ifpos >> tmpid; + std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atom.label << std::endl; + exit (1); } - if ( tmpid == "m" ) - { - ifpos >> mv.x >> mv.y >> mv.z; - } - else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" ) + + // read if catch goodbit before "\n" and "#" + while ((tmpid != "\n") && (ifpos.good ()) && (tmpid != "#")) { - ifpos >> atom.vel[ia].x >> atom.vel[ia].y >> atom.vel[ia].z; + tmpid = ifpos.get (); + // old method of reading frozen ions + char tmp = (char)tmpid[0]; + if (tmp >= DIGIT_START && tmp <= DIGIT_END) + { + mv.x = std::stoi (tmpid); + ifpos >> mv.y >> mv.z; + } + // new method of reading frozen ions and velocities + if (tmp >= LOWER_A && tmp <= LOWER_Z) + { + ifpos.putback (tmp); + ifpos >> tmpid; + } + if (tmpid == "m") + { + ifpos >> mv.x >> mv.y >> mv.z; + } + else if (tmpid == "v" || tmpid == "vel" || tmpid == "velocity") + { + ifpos >> atom.vel[ia].x >> atom.vel[ia].y >> atom.vel[ia].z; + } + else if (tmpid == "mag" || tmpid == "magmom") + { + set_element_mag_zero = true; + double tmpamg = 0; + ifpos >> tmpamg; + tmp = ifpos.get (); + while (tmp == ' ') + { + tmp = ifpos.get (); + } + + if ((tmp >= DIGIT_START && tmp <= DIGIT_END) or tmp == MINUS_SIGN) + { + ifpos.putback (tmp); + ifpos >> atom.m_loc_[ia].y >> atom.m_loc_[ia].z; + atom.m_loc_[ia].x = tmpamg; + atom.mag[ia] = sqrt (pow (atom.m_loc_[ia].x, 2) + pow (atom.m_loc_[ia].y, 2) + + pow (atom.m_loc_[ia].z, 2)); + input_vec_mag = true; + } + else + { + ifpos.putback (tmp); + atom.mag[ia] = tmpamg; + } + } + else if (tmpid == "angle1") + { + ifpos >> atom.angle1[ia]; + atom.angle1[ia] = atom.angle1[ia] / 180 * ModuleBase::PI; + input_angle_mag = true; + set_element_mag_zero = true; + } + else if (tmpid == "angle2") + { + ifpos >> atom.angle2[ia]; + atom.angle2[ia] = atom.angle2[ia] / 180 * ModuleBase::PI; + input_angle_mag = true; + set_element_mag_zero = true; + } + else if (tmpid == "lambda") + { + double tmplam = 0; + ifpos >> tmplam; + tmp = ifpos.get (); + while (tmp == ' ') + { + tmp = ifpos.get (); + } + if ((tmp >= DIGIT_START && tmp <= DIGIT_END) or tmp == MINUS_SIGN) + { + ifpos.putback (tmp); + ifpos >> atom.lambda[ia].y >> atom.lambda[ia].z; + atom.lambda[ia].x = tmplam; + } + else + { + ifpos.putback (tmp); + atom.lambda[ia].z = tmplam; + } + atom.lambda[ia].x /= ModuleBase::Ry_to_eV; + atom.lambda[ia].y /= ModuleBase::Ry_to_eV; + atom.lambda[ia].z /= ModuleBase::Ry_to_eV; + } + else if (tmpid == "sc") + { + double tmplam = 0; + ifpos >> tmplam; + tmp = ifpos.get (); + while (tmp == ' ') + { + tmp = ifpos.get (); + } + if ((tmp >= DIGIT_START && tmp <= DIGIT_END) or tmp == MINUS_SIGN) + { + ifpos.putback (tmp); + ifpos >> atom.constrain[ia].y >> atom.constrain[ia].z; + atom.constrain[ia].x = tmplam; + } + else + { + ifpos.putback (tmp); + atom.constrain[ia].z = tmplam; + } + } } - else if ( tmpid == "mag" || tmpid == "magmom") + // move to next line + while ((tmpid != "\n") && (ifpos.good ())) { - set_element_mag_zero = true; - double tmpamg=0; - ifpos >> tmpamg; - tmp=ifpos.get(); - while (tmp==' ') - { - tmp=ifpos.get(); - } - - if((tmp >= DIGIT_START && tmp <= DIGIT_END) or tmp==MINUS_SIGN) - { - ifpos.putback(tmp); - ifpos >> atom.m_loc_[ia].y>>atom.m_loc_[ia].z; - atom.m_loc_[ia].x=tmpamg; - atom.mag[ia]=sqrt(pow(atom.m_loc_[ia].x,2) - +pow(atom.m_loc_[ia].y,2) - +pow(atom.m_loc_[ia].z,2)); - input_vec_mag=true; - - } - else - { - ifpos.putback(tmp); - atom.mag[ia]=tmpamg; - } - } - else if ( tmpid == "angle1") - { - ifpos >> atom.angle1[ia]; - atom.angle1[ia]=atom.angle1[ia]/180 *ModuleBase::PI; - input_angle_mag=true; - set_element_mag_zero = true; - } - else if ( tmpid == "angle2") - { - ifpos >> atom.angle2[ia]; - atom.angle2[ia]=atom.angle2[ia]/180 *ModuleBase::PI; - input_angle_mag=true; - set_element_mag_zero = true; - } - else if ( tmpid == "lambda") - { - double tmplam=0; - ifpos >> tmplam; - tmp=ifpos.get(); - while (tmp==' ') - { - tmp=ifpos.get(); - } - if((tmp >= DIGIT_START && tmp <= DIGIT_END) or tmp==MINUS_SIGN) - { - ifpos.putback(tmp); - ifpos >> atom.lambda[ia].y>>atom.lambda[ia].z; - atom.lambda[ia].x=tmplam; - } - else - { - ifpos.putback(tmp); - atom.lambda[ia].z=tmplam; - } - atom.lambda[ia].x /= ModuleBase::Ry_to_eV; - atom.lambda[ia].y /= ModuleBase::Ry_to_eV; - atom.lambda[ia].z /= ModuleBase::Ry_to_eV; - } - else if ( tmpid == "sc") - { - double tmplam=0; - ifpos >> tmplam; - tmp=ifpos.get(); - while (tmp==' ') - { - tmp=ifpos.get(); - } - if((tmp >= DIGIT_START && tmp <= DIGIT_END) or tmp==MINUS_SIGN) - { - ifpos.putback(tmp); - ifpos >> atom.constrain[ia].y>>atom.constrain[ia].z; - atom.constrain[ia].x=tmplam; - } - else - { - ifpos.putback(tmp); - atom.constrain[ia].z=tmplam; - } + tmpid = ifpos.get (); } - } - // move to next line - while ( (tmpid != "\n") && (ifpos.good()) ) - { - tmpid = ifpos.get(); - } return true; } -bool read_atom_type_header(int it, UnitCell& ucell, - std::ifstream& ifpos, - std::ofstream& ofs_running, - std::ofstream& ofs_warning, - bool& set_element_mag_zero) +bool + read_atom_type_header (int it, + UnitCell& ucell, + std::ifstream& ifpos, + std::ofstream& ofs_running, + std::ofstream& ofs_warning, + bool& set_element_mag_zero) { //======================================= // (1) read in atom label // start magnetization //======================================= - ModuleBase::GlobalFunc::READ_VALUE(ifpos, ucell.atoms[it].label); + ModuleBase::GlobalFunc::READ_VALUE (ifpos, ucell.atoms[it].label); - if(ucell.atoms[it].label != ucell.atom_label[it]) - { - ofs_warning << " Label orders in ATOMIC_POSITIONS and ATOMIC_SPECIES sections do not match!" << std::endl; - ofs_warning << " Label read from ATOMIC_POSITIONS is " << ucell.atoms[it].label << std::endl; - ofs_warning << " Label from ATOMIC_SPECIES is " << ucell.atom_label[it] << std::endl; - return false; - } - ModuleBase::GlobalFunc::OUT(ofs_running, "Atom label", ucell.atoms[it].label); + if (ucell.atoms[it].label != ucell.atom_label[it]) + { + ofs_warning << " Label orders in ATOMIC_POSITIONS and ATOMIC_SPECIES sections do not match!" << std::endl; + ofs_warning << " Label read from ATOMIC_POSITIONS is " << ucell.atoms[it].label << std::endl; + ofs_warning << " Label from ATOMIC_SPECIES is " << ucell.atom_label[it] << std::endl; + return false; + } + ModuleBase::GlobalFunc::OUT (ofs_running, "Atom label", ucell.atoms[it].label); set_element_mag_zero = false; - ModuleBase::GlobalFunc::READ_VALUE(ifpos, ucell.magnet.start_mag[it]); + ModuleBase::GlobalFunc::READ_VALUE (ifpos, ucell.magnet.start_mag[it]); #ifndef __SYMMETRY //=========================================== @@ -516,56 +526,56 @@ bool read_atom_type_header(int it, UnitCell& ucell, // int* ucell.atoms[it].l_nchi; //=========================================== - if ((PARAM.inp.basis_type == "lcao")||(PARAM.inp.basis_type == "lcao_in_pw")) - { - std::string orbital_file = PARAM.inp.orbital_dir + ucell.orbital_fn[it]; - bool normal = elecstate::read_orb_file(it, orbital_file, ofs_running, &(ucell.atoms[it])); - if(!normal) + if ((PARAM.inp.basis_type == "lcao") || (PARAM.inp.basis_type == "lcao_in_pw")) { - return false; + std::string orbital_file = PARAM.inp.orbital_dir + ucell.orbital_fn[it]; + bool normal = elecstate::read_orb_file (it, orbital_file, ofs_running, &(ucell.atoms[it])); + if (!normal) + { + return false; + } } - } - else if(PARAM.inp.basis_type == "pw") - { - if ((PARAM.inp.init_wfc.substr(0, 3) == "nao") || PARAM.inp.onsite_radius > 0.0) + else if (PARAM.inp.basis_type == "pw") { - std::string orbital_file = PARAM.inp.orbital_dir + ucell.orbital_fn[it]; - bool normal = elecstate::read_orb_file(it, orbital_file, ofs_running, &(ucell.atoms[it])); - if(!normal) - { - return false; - } - } - else - { - ucell.atoms[it].nw = 0; - ucell.atoms[it].nwl = 2; - if ( ucell.lmaxmax != 2 ) - { - ucell.atoms[it].nwl = ucell.lmaxmax; - } - ucell.atoms[it].l_nchi.resize(ucell.atoms[it].nwl+1, 0); - for(int L=0; L 0.0) + { + std::string orbital_file = PARAM.inp.orbital_dir + ucell.orbital_fn[it]; + bool normal = elecstate::read_orb_file (it, orbital_file, ofs_running, &(ucell.atoms[it])); + if (!normal) + { + return false; + } + } + else + { + ucell.atoms[it].nw = 0; + ucell.atoms[it].nwl = 2; + if (ucell.lmaxmax != 2) + { + ucell.atoms[it].nwl = ucell.lmaxmax; + } + ucell.atoms[it].l_nchi.resize (ucell.atoms[it].nwl + 1, 0); + for (int L = 0; L < ucell.atoms[it].nwl + 1; L++) + { + ucell.atoms[it].l_nchi[L] = 1; + // calculate the number of local basis(3D) + ucell.atoms[it].nw += (2 * L + 1) * ucell.atoms[it].l_nchi[L]; + std::stringstream ss; + ss << "L=" << L << ", number of zeta"; + ModuleBase::GlobalFunc::OUT (ofs_running, ss.str (), ucell.atoms[it].l_nchi[L]); + } + } + } // end basis type #endif //========================= // (3) read in atom number //========================= int na = 0; - ModuleBase::GlobalFunc::READ_VALUE(ifpos, na); + ModuleBase::GlobalFunc::READ_VALUE (ifpos, na); ucell.atoms[it].na = na; - ModuleBase::GlobalFunc::OUT(ofs_running,"Number of atoms for this type",na); + ModuleBase::GlobalFunc::OUT (ofs_running, "Number of atoms for this type", na); /** * liuyu update 2023-05-11 @@ -574,21 +584,21 @@ bool read_atom_type_header(int it, UnitCell& ucell, * even if the number of ucell.atoms is zero! */ if (na < 0) - { - ModuleBase::WARNING("read_atom_positions", " atom number < 0."); - return false; - } + { + ModuleBase::WARNING ("read_atom_positions", " atom number < 0."); + return false; + } else if (na == 0) - { - std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - std::cout << " Warning: atom number is 0 for atom type: " << ucell.atoms[it].label << std::endl; - std::cout << " If you are confident that this is not a mistake, please ignore this warning." << std::endl; - std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - ofs_running << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - ofs_running << " Warning: atom number is 0 for atom type: " << ucell.atoms[it].label << std::endl; - ofs_running << " If you are confident that this is not a mistake, please ignore this warning." << std::endl; - ofs_running << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - } + { + std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + std::cout << " Warning: atom number is 0 for atom type: " << ucell.atoms[it].label << std::endl; + std::cout << " If you are confident that this is not a mistake, please ignore this warning." << std::endl; + std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + ofs_running << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + ofs_running << " Warning: atom number is 0 for atom type: " << ucell.atoms[it].label << std::endl; + ofs_running << " If you are confident that this is not a mistake, please ignore this warning." << std::endl; + ofs_running << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + } return true; } diff --git a/source/source_cell/read_atoms_helper.h b/source/source_cell/read_atoms_helper.h index 7049549986c..8c27d199605 100644 --- a/source/source_cell/read_atoms_helper.h +++ b/source/source_cell/read_atoms_helper.h @@ -7,7 +7,8 @@ #include "source_base/vector3.h" #include "source_base/matrix3.h" -namespace unitcell { +namespace unitcell +{ /** * @brief Validate coordinate system type @@ -15,8 +16,7 @@ namespace unitcell { * @param ofs_warning Output stream for warnings * @return true if valid, false otherwise */ -bool validate_coordinate_system(const std::string& Coordinate, - std::ofstream& ofs_warning); +bool validate_coordinate_system (const std::string& Coordinate, std::ofstream& ofs_warning); /** * @brief Allocate and initialize atom property vectors @@ -24,7 +24,7 @@ bool validate_coordinate_system(const std::string& Coordinate, * @param na Number of atoms * @param mass Atomic mass */ -void allocate_atom_properties(Atom& atom, int na, double mass); +void allocate_atom_properties (Atom& atom, int na, double mass); /** * @brief Set atom movement constraints based on fixed_atoms parameter @@ -32,8 +32,7 @@ void allocate_atom_properties(Atom& atom, int na, double mass); * @param ia Atom index * @param mv Movement vector (1=movable, 0=fixed) */ -void set_atom_movement_flags(Atom& atom, int ia, - const ModuleBase::Vector3& mv); +void set_atom_movement_flags (Atom& atom, int ia, const ModuleBase::Vector3& mv); /** * @brief Set default magnetization if not explicitly specified @@ -41,8 +40,7 @@ void set_atom_movement_flags(Atom& atom, int ia, * @param nspin Number of spin components * @param ofs_running Output stream for running information */ -void autoset_magnetization(UnitCell& ucell, int nspin, - std::ofstream& ofs_running); +void autoset_magnetization (UnitCell& ucell, int nspin, std::ofstream& ofs_running); /** * @brief Perform final validation and output @@ -51,9 +49,7 @@ void autoset_magnetization(UnitCell& ucell, int nspin, * @param ofs_warning Output stream for warnings * @return true if validation passes, false otherwise */ -bool finalize_atom_positions(UnitCell& ucell, - std::ofstream& ofs_running, - std::ofstream& ofs_warning); +bool finalize_atom_positions (UnitCell& ucell, std::ofstream& ofs_running, std::ofstream& ofs_warning); /** * @brief Calculate lattice center for different centering modes @@ -61,9 +57,8 @@ bool finalize_atom_positions(UnitCell& ucell, * @param center_mode Centering mode: "xy", "xz", "yz", or "xyz" * @return Lattice center coordinates */ -ModuleBase::Vector3 calculate_lattice_center( - const ModuleBase::Matrix3& latvec, - const std::string& center_mode); +ModuleBase::Vector3 calculate_lattice_center (const ModuleBase::Matrix3& latvec, + const std::string& center_mode); /** * @brief Convert between different coordinate systems @@ -75,12 +70,13 @@ ModuleBase::Vector3 calculate_lattice_center( * @param lat0 Lattice constant * @param latcenter Lattice center (output parameter) */ -void transform_atom_coordinates(Atom& atom, int ia, - const std::string& Coordinate, - const ModuleBase::Vector3& v, - const ModuleBase::Matrix3& latvec, - double lat0, - ModuleBase::Vector3& latcenter); +void transform_atom_coordinates (Atom& atom, + int ia, + const std::string& Coordinate, + const ModuleBase::Vector3& v, + const ModuleBase::Matrix3& latvec, + double lat0, + ModuleBase::Vector3& latcenter); /** * @brief Convert between magnetization representations and output @@ -92,10 +88,13 @@ void transform_atom_coordinates(Atom& atom, int ia, * @param input_angle_mag Whether angle magnetization was input * @param ofs_running Output stream for running information */ -void process_magnetization(Atom& atom, int it, int ia, - int nspin, bool input_vec_mag, - bool input_angle_mag, - std::ofstream& ofs_running); +void process_magnetization (Atom& atom, + int it, + int ia, + int nspin, + bool input_vec_mag, + bool input_angle_mag, + std::ofstream& ofs_running); /** * @brief Parse optional atom properties (mag, angle1, angle2, lambda, sc, m, v) @@ -108,12 +107,13 @@ void process_magnetization(Atom& atom, int it, int ia, * @param set_element_mag_zero Whether to reset element magnetization (output parameter) * @return true if parsing succeeds, false otherwise */ -bool parse_atom_properties(std::ifstream& ifpos, - Atom& atom, int ia, - ModuleBase::Vector3& mv, - bool& input_vec_mag, - bool& input_angle_mag, - bool& set_element_mag_zero); +bool parse_atom_properties (std::ifstream& ifpos, + Atom& atom, + int ia, + ModuleBase::Vector3& mv, + bool& input_vec_mag, + bool& input_angle_mag, + bool& set_element_mag_zero); /** * @brief Read atom type metadata (label, magnetization, orbital info, atom count) @@ -125,11 +125,12 @@ bool parse_atom_properties(std::ifstream& ifpos, * @param set_element_mag_zero Whether to reset element magnetization (output parameter) * @return true if reading succeeds, false otherwise */ -bool read_atom_type_header(int it, UnitCell& ucell, - std::ifstream& ifpos, - std::ofstream& ofs_running, - std::ofstream& ofs_warning, - bool& set_element_mag_zero); +bool read_atom_type_header (int it, + UnitCell& ucell, + std::ifstream& ifpos, + std::ofstream& ofs_running, + std::ofstream& ofs_warning, + bool& set_element_mag_zero); } // namespace unitcell diff --git a/source/source_cell/read_pp.cpp b/source/source_cell/read_pp.cpp index a6b2e1d4a84..84c0b88aeb5 100644 --- a/source/source_cell/read_pp.cpp +++ b/source/source_cell/read_pp.cpp @@ -11,403 +11,409 @@ #include "source_base/math_integral.h" // for numerical integration -Pseudopot_upf::Pseudopot_upf() -{ -} +Pseudopot_upf::Pseudopot_upf () {} -Pseudopot_upf::~Pseudopot_upf() -{ -} +Pseudopot_upf::~Pseudopot_upf () {} -int Pseudopot_upf::init_pseudo_reader(const std::string &fn, std::string &type, Atom_pseudo& pp) +int + Pseudopot_upf::init_pseudo_reader (const std::string& fn, std::string& type, Atom_pseudo& pp) { - ModuleBase::TITLE("Pseudopot_upf","init"); + ModuleBase::TITLE ("Pseudopot_upf", "init"); // First check if this pseudo-potential has spin-orbit information - std::ifstream ifs(fn.c_str(), std::ios::in); - - // can't find the file. - if (!ifs) - { - return 1; - } - - if (type == "auto") - { - set_pseudo_type(fn, type); - } - - int info = -1; - if (type == "upf") - { - info = read_pseudo_upf(ifs, pp); - } - else if (type == "vwr") - { - info = read_pseudo_vwr(ifs, pp); - } - else if (type == "upf201") - { - info = read_pseudo_upf201(ifs, pp); - } - else if (type == "blps") - { - info = read_pseudo_blps(ifs, pp); - } + std::ifstream ifs (fn.c_str (), std::ios::in); + + // can't find the file. + if (!ifs) + { + return 1; + } + + if (type == "auto") + { + set_pseudo_type (fn, type); + } + + int info = -1; + if (type == "upf") + { + info = read_pseudo_upf (ifs, pp); + } + else if (type == "vwr") + { + info = read_pseudo_vwr (ifs, pp); + } + else if (type == "upf201") + { + info = read_pseudo_upf201 (ifs, pp); + } + else if (type == "blps") + { + info = read_pseudo_blps (ifs, pp); + } else - { - return 4; - } + { + return 4; + } - return info; + return info; } - //---------------------------------------------------------- // setting the type of the pseudopotential file //---------------------------------------------------------- -int Pseudopot_upf::set_pseudo_type(const std::string &fn, std::string &type) //zws add +int + Pseudopot_upf::set_pseudo_type (const std::string& fn, std::string& type) // zws add { - std::ifstream pptype_ifs(fn.c_str(), std::ios::in); + std::ifstream pptype_ifs (fn.c_str (), std::ios::in); std::string dummy; - std::string strversion; - - if (pptype_ifs.good()) - { - getline(pptype_ifs,dummy); - - std::stringstream wdsstream(dummy); - getline(wdsstream,strversion,'"'); - getline(wdsstream,strversion,'"'); - - if ( trim(strversion) == "2.0.1" ) - { - type = "upf201"; - } - else - { - type = "upf"; - } - } - return 0; + std::string strversion; + + if (pptype_ifs.good ()) + { + getline (pptype_ifs, dummy); + + std::stringstream wdsstream (dummy); + getline (wdsstream, strversion, '"'); + getline (wdsstream, strversion, '"'); + + if (trim (strversion) == "2.0.1") + { + type = "upf201"; + } + else + { + type = "upf"; + } + } + return 0; } -std::string& Pseudopot_upf::trim(std::string &in_str) +std::string& + Pseudopot_upf::trim (std::string& in_str) { - static const std::string deltri = " \t" ; // delete tab or space - std::string::size_type position = in_str.find_first_of(deltri, 0); + static const std::string deltri = " \t"; // delete tab or space + std::string::size_type position = in_str.find_first_of (deltri, 0); if (position == std::string::npos) - { - return in_str; - } - return trim(in_str.erase(position, 1) ); + { + return in_str; + } + return trim (in_str.erase (position, 1)); } -std::string Pseudopot_upf::trimend(std::string &in_str) +std::string + Pseudopot_upf::trimend (std::string& in_str) { - const std::string &deltri =" \t" ; - std::string::size_type position = in_str.find_last_not_of(deltri)+1; - std::string tmpstr=in_str.erase(position); - return tmpstr.erase(0,tmpstr.find_first_not_of(deltri)); -} //zws - - -int Pseudopot_upf::average_p(const double& lambda, Atom_pseudo& pp) + const std::string& deltri = " \t"; + std::string::size_type position = in_str.find_last_not_of (deltri) + 1; + std::string tmpstr = in_str.erase (position); + return tmpstr.erase (0, tmpstr.find_first_not_of (deltri)); +} // zws + +int + Pseudopot_upf::average_p (const double& lambda, Atom_pseudo& pp) { int error = 0; double lambda_ = lambda; - if(!PARAM.inp.lspinorb) { lambda_ = 0.0; } + if (!PARAM.inp.lspinorb) + { + lambda_ = 0.0; + } if (pp.has_so && pp.tvanp) - { - error++; - std::cout << "------------------------------------------------------" << std::endl; - std::cout << " FR-USPP please use lspinorb=.true." << std::endl; - std::cout << "------------------------------------------------------" << std::endl; - return error; - } + { + error++; + std::cout << "------------------------------------------------------" << std::endl; + std::cout << " FR-USPP please use lspinorb=.true." << std::endl; + std::cout << "------------------------------------------------------" << std::endl; + return error; + } if (!pp.has_so && PARAM.inp.lspinorb) - { - error++; - std::cout << "warning_quit! no soc upf used for lspinorb calculation, error!" << std::endl; - return error; - } + { + error++; + std::cout << "warning_quit! no soc upf used for lspinorb calculation, error!" << std::endl; + return error; + } // ModuleBase::WARNING_QUIT("average_p", "no soc upf used for lspinorb calculation, error!"); - if (!pp.has_so || (PARAM.inp.lspinorb && std::abs(lambda_ - 1.0) < 1.0e-8)) - { - return error; - } - - //if(std::abs(lambda_)<1.0e-8) - if(!PARAM.inp.lspinorb) - { - int new_nbeta = 0; //calculate the new nbeta - for(int nb=0; nb< pp.nbeta; nb++) - { - new_nbeta++; - if(pp.lll[nb] != 0 && std::abs(pp.jjj[nb] - pp.lll[nb] - 0.5) < 1e-6) //two J = l +- 0.5 average to one - { - new_nbeta--; - } - } - - pp.nbeta = new_nbeta; - ModuleBase::matrix dion_new; - dion_new.create(pp.nbeta, pp.nbeta); - - int old_nbeta=-1; - for(int nb=0; nb1e-6) - { - error = 1; - std::cout<<"warning_quit! error beta function 1 !" <1e-6) - { - error = 1; - std::cout<<"warning_quit! error beta function 2 !" <1e-6) - { - error++; - std::cout<<"warning_quit! error chi function 1 !"<1e-6) - { - error++; - std::cout<<"warning_quit! error chi function 2 !"<1e-6) - { - error = 1; - std::cout<<"warning_quit! error beta function 1 !" <1e-6) - { - error = 1; - std::cout<<"warning_quit! error beta function 2 !" <1e-6) - { - error++; - std::cout<<"warning_quit! error chi function 1 !"<1e-6) - { - error++; - std::cout<<"warning_quit! error chi function 2 !"< 1e-6) + { + error = 1; + std::cout << "warning_quit! error beta function 1 !" << std::endl; + return error; + } + ind = old_nbeta + 1; + ind1 = old_nbeta; + } + else + { + if (std::abs (pp.jjj[old_nbeta + 1] - pp.lll[old_nbeta + 1] + 0.5) > 1e-6) + { + error = 1; + std::cout << "warning_quit! error beta function 2 !" << std::endl; + return error; + } + ind = old_nbeta; + ind1 = old_nbeta + 1; + } + double vion1 + = ((l + 1.0) * pp.dion (ind, ind) + l * pp.dion (ind1, ind1)) / (2.0 * l + 1.0); + if (std::abs (vion1) < 1.0e-8) + { + vion1 = 0.1; + } + + // average beta (betar) + for (int ir = 0; ir < pp.mesh; ir++) + { + pp.betar (nb, ir) = 1.0 / (2.0 * l + 1.0) + * ((l + 1.0) * sqrt (std::abs (pp.dion (ind, ind) / vion1)) + * pp.betar (ind, ir) + + l * sqrt (std::abs (pp.dion (ind1, ind1) / vion1)) + * pp.betar (ind1, ir)); + } + + // average the dion matrix + pp.dion (nb, nb) = vion1; + old_nbeta++; + } + else + { + for (int ir = 0; ir < pp.mesh; ir++) + { + pp.betar (nb, ir) = pp.betar (old_nbeta, ir); + } + pp.dion (nb, nb) = pp.dion (old_nbeta, old_nbeta); + } + pp.lll[nb] = pp.lll[old_nbeta]; // reset the lll index, ignore jjj index + } + + // store the old dion and then recreate dion + for (int i = 0; i < pp.nbeta; i++) + { + for (int j = 0; j < pp.nbeta; j++) + { + dion_new (i, j) = pp.dion (i, j); + } + } + + pp.dion = dion_new; + // pp.dion.create(pp.nbeta, pp.nbeta); + // for(int i=0;i 1e-6) + { + error++; + std::cout << "warning_quit! error chi function 1 !" << std::endl; + return error; + } + ind = old_nwfc + 1; + ind1 = old_nwfc; + } + else + { + if (std::abs (pp.jchi[old_nwfc + 1] - pp.lchi[old_nwfc + 1] + 0.5) > 1e-6) + { + error++; + std::cout << "warning_quit! error chi function 2 !" << std::endl; + return error; + } + ind = old_nwfc; + ind1 = old_nwfc + 1; + } + // average chi + for (int ir = 0; ir < pp.mesh; ir++) + { + pp.chi (nb, ir) = 1.0 / (2.0 * l + 1.0) + * ((l + 1.0) * pp.chi (ind, ir) + (l * pp.chi (ind1, ir))); + } + old_nwfc++; + } + else + { + for (int ir = 0; ir < pp.mesh; ir++) + { + pp.chi (nb, ir) = pp.chi (old_nwfc, ir); + } + } + pp.lchi[nb] = pp.lchi[old_nwfc]; // reset lchi index + } + pp.has_so = false; + return error; + } + else // lambda_ != 0, modulate the soc effect in pseudopotential + { + for (int nb = 0; nb < pp.nbeta; nb++) + { + int l = pp.lll[nb]; + int ind = 0, ind1 = 0; + if (l != 0) + { + if (std::abs (pp.jjj[nb] - pp.lll[nb] + 0.5) < 1e-6) + { + if (std::abs (pp.jjj[nb + 1] - pp.lll[nb + 1] - 0.5) > 1e-6) + { + error = 1; + std::cout << "warning_quit! error beta function 1 !" << std::endl; + return error; + } + ind = nb + 1; + ind1 = nb; + } + else + { + if (std::abs (pp.jjj[nb + 1] - pp.lll[nb + 1] + 0.5) > 1e-6) + { + error = 1; + std::cout << "warning_quit! error beta function 2 !" << std::endl; + return error; + } + ind = nb; + ind1 = nb + 1; + } + double vion1 + = ((l + 1.0) * pp.dion (ind, ind) + l * pp.dion (ind1, ind1)) / (2.0 * l + 1.0); + if (std::abs (vion1) < 1.0e-10) + { + vion1 = 0.1; + } + // average beta (betar) + const double sqrtDplus = sqrt (std::abs (pp.dion (ind, ind) / vion1)); + const double sqrtDminus = sqrt (std::abs (pp.dion (ind1, ind1) / vion1)); + pp.dion (ind, ind) = vion1; + pp.dion (ind1, ind1) = vion1; + for (int ir = 0; ir < pp.mesh; ir++) + { + double avera = 1.0 / (2.0 * l + 1.0) + * ((l + 1.0) * sqrtDplus * pp.betar (ind, ir) + + l * sqrtDminus * pp.betar (ind1, ir)); + double delta + = 1.0 / (2.0 * l + 1.0) + * (sqrtDplus * pp.betar (ind, ir) - sqrtDminus * pp.betar (ind1, ir)); + pp.betar (ind, ir) = (avera + l * delta * lambda_); + pp.betar (ind1, ir) = (avera - (l + 1) * delta * lambda_); + } + nb++; + } + } + + for (int nb = 0; nb < pp.nchi; nb++) + { + int l = pp.lchi[nb]; + int ind = 0, ind1 = 0; + if (l != 0) + { + if (std::abs (pp.jchi[nb] - pp.lchi[nb] + 0.5) < 1e-6) + { + if (std::abs (pp.jchi[nb + 1] - pp.lchi[nb + 1] - 0.5) > 1e-6) + { + error++; + std::cout << "warning_quit! error chi function 1 !" << std::endl; + return error; + } + ind = nb + 1; + ind1 = nb; + } + else + { + if (std::abs (pp.jchi[nb + 1] - pp.lchi[nb + 1] + 0.5) > 1e-6) + { + error++; + std::cout << "warning_quit! error chi function 2 !" << std::endl; + return error; + } + ind = nb; + ind1 = nb + 1; + } + // average chi + for (int ir = 0; ir < pp.mesh; ir++) + { + double avera = 0.5 * (pp.chi (ind, ir) + pp.chi (ind1, ir)); + double delta = 0.5 * (pp.chi (ind, ir) - pp.chi (ind1, ir)); + pp.chi (ind, ir) = avera + delta * lambda_; + pp.chi (ind1, ir) = avera - delta * lambda_; + } + nb++; + } + } + return error; + } } // Peize Lin add for bsse 2021.04.07 -void Pseudopot_upf::set_empty_element(Atom_pseudo& pp) +void + Pseudopot_upf::set_empty_element (Atom_pseudo& pp) { - pp.zv = 0; - for(double &value : pp.vloc_at) - { value = 0; } - for(double &value : pp.rho_atc) - { value = 0; } - for(double &value : pp.rho_at) - { value = 0; } - pp.chi.zero_out(); - pp.dion.zero_out(); - pp.betar.zero_out(); + pp.zv = 0; + for (double& value: pp.vloc_at) + { + value = 0; + } + for (double& value: pp.rho_atc) + { + value = 0; + } + for (double& value: pp.rho_at) + { + value = 0; + } + pp.chi.zero_out (); + pp.dion.zero_out (); + pp.betar.zero_out (); } /** @@ -423,51 +429,59 @@ void Pseudopot_upf::set_empty_element(Atom_pseudo& pp) * This requires a little extra memory but unifies the treatment of q_l(r) * and allows further weaking with the augmentation charge. */ -void Pseudopot_upf::set_upf_q(Atom_pseudo& pp) +void + Pseudopot_upf::set_upf_q (Atom_pseudo& pp) { if (pp.tvanp && !q_with_l) - { - pp.qfuncl.create(pp.nqlc, pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); - for (int nb = 0; nb < pp.nbeta; nb++) { - int ln = pp.lll[nb]; - for (int mb = nb; mb < pp.nbeta; mb++) - { - int lm = pp.lll[mb]; - int nmb = mb * (mb + 1) / 2 + nb; - - for (int l = std::abs(ln - lm); l <= ln + lm; l += 2) + pp.qfuncl.create (pp.nqlc, pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); + for (int nb = 0; nb < pp.nbeta; nb++) { - // copy q(r) to the l-dependent grid - for (int ir = 0; ir < pp.mesh; ir++) - { - pp.qfuncl(l, nmb, ir) = qfunc(nmb, ir); - } - - // adjust the inner values on the l-dependent grid if nqf and rinner are defined - if (nqf > 0 && rinner[l] > 0.0) - { - int ilast = 0; - for (int ir = 0; ir < pp.kkbeta; ++ir) + int ln = pp.lll[nb]; + for (int mb = nb; mb < pp.nbeta; mb++) { - if (pp.r[ir] < rinner[l]) - { - ilast = ir + 1; - } - else - { - break; - } + int lm = pp.lll[mb]; + int nmb = mb * (mb + 1) / 2 + nb; + + for (int l = std::abs (ln - lm); l <= ln + lm; l += 2) + { + // copy q(r) to the l-dependent grid + for (int ir = 0; ir < pp.mesh; ir++) + { + pp.qfuncl (l, nmb, ir) = qfunc (nmb, ir); + } + + // adjust the inner values on the l-dependent grid if nqf and rinner are defined + if (nqf > 0 && rinner[l] > 0.0) + { + int ilast = 0; + for (int ir = 0; ir < pp.kkbeta; ++ir) + { + if (pp.r[ir] < rinner[l]) + { + ilast = ir + 1; + } + else + { + break; + } + } + this->setqfnew (nqf, + ilast, + l, + 2, + &(qfcoef (nb, mb, l, 0)), + pp.r.data (), + &(pp.qfuncl (l, nmb, 0))); + } + } } - this->setqfnew(nqf, ilast, l, 2, &(qfcoef(nb, mb, l, 0)), pp.r.data(), &(pp.qfuncl(l, nmb, 0))); - } } - } } - } } -void Pseudopot_upf::setqfnew(const int& nqf, +void + Pseudopot_upf::setqfnew (const int& nqf, const int& mesh, const int& l, const int& n, @@ -476,22 +490,23 @@ void Pseudopot_upf::setqfnew(const int& nqf, double* rho) { for (int ir = 0; ir < mesh; ++ir) - { - double rr = r[ir] * r[ir]; - rho[ir] = qfcoef[0]; - for (int iq = 1; iq < nqf; ++iq) { - rho[ir] += qfcoef[iq] * pow(rr, iq); + double rr = r[ir] * r[ir]; + rho[ir] = qfcoef[0]; + for (int iq = 1; iq < nqf; ++iq) + { + rho[ir] += qfcoef[iq] * pow (rr, iq); + } + rho[ir] *= pow (r[ir], l + n); } - rho[ir] *= pow(r[ir], l + n); - } } -void Pseudopot_upf::skip_number(std::ifstream& ifs, bool mesh_changed) +void + Pseudopot_upf::skip_number (std::ifstream& ifs, bool mesh_changed) { - if (mesh_changed) - { - double temp = 0.; - ifs >> temp; - } + if (mesh_changed) + { + double temp = 0.; + ifs >> temp; + } } diff --git a/source/source_cell/read_pp.h b/source/source_cell/read_pp.h index 1290950e92e..ca224e2efad 100644 --- a/source/source_cell/read_pp.h +++ b/source/source_cell/read_pp.h @@ -9,19 +9,19 @@ class Pseudopot_upf { -public: - //PP_INFO - //PP_HEADER - //PP_MESH - //PP_NLCC - //PP_LOCAL - //PP_NONLOCAL - //PP_PSWFC - //PP_PSRHOATOM - //addinfo - - Pseudopot_upf(); - ~Pseudopot_upf(); + public: + // PP_INFO + // PP_HEADER + // PP_MESH + // PP_NLCC + // PP_LOCAL + // PP_NONLOCAL + // PP_PSWFC + // PP_PSRHOATOM + // addinfo + + Pseudopot_upf (); + ~Pseudopot_upf (); std::string relativistic; // relativistic: no, scalar, full int lmax_rho; // maximum angular momentum component in rho (should be 2*lmax) @@ -33,13 +33,13 @@ class Pseudopot_upf int lloc; // L of channel used to generate local potential // (if < 0 it was generated by smoothing AE potential) // double rcloc; // vloc = v_ae for r > rcloc - bool q_with_l; // if .true. qfunc is pseudized in - int nqf; // number of Q coefficients + bool q_with_l; // if .true. qfunc is pseudized in + int nqf; // number of Q coefficients // bool has_wfc; // if true, UPF contain AE and PS wfc for each beta // need 'new' and 'delete' - bool coulomb_potential = false; // coulomb potentail : z/r - ModuleBase::matrix chi; // chi(nwfc,mesh) atomic wavefcts + bool coulomb_potential = false; // coulomb potentail : z/r + ModuleBase::matrix chi; // chi(nwfc,mesh) atomic wavefcts std::vector kbeta = {}; // kbeta(nbeta):number of mesh points for projector i (must be .le. mesh ) std::vector els_beta = {}; // els_beta(nwfc):label for the beta std::vector nchi = {}; // nchi(nwfc) value of pseudo-n for wavefcts @@ -47,12 +47,12 @@ class Pseudopot_upf std::vector rcut_chi = {}; // rcut_chi(nwfc) cutoff inner radius std::vector rcutus_chi = {}; // rcutus_chi(nwfc) ultrasoft outer radius std::vector rinner = {}; // rinner(2*lmax+1) r_L - ModuleBase::matrix qfunc; // qfunc(nbeta*(nbeta+1)/2,mesh) Q_{mu,nu}(|r|) function for |r|> r_L - ModuleBase::realArray qfcoef; // qfcoef(nbeta,nbeta,2*lmax+1,nqf) coefficients for Q for |r| r_L + ModuleBase::realArray qfcoef; // qfcoef(nbeta,nbeta,2*lmax+1,nqf) coefficients for Q for |r| rcut = {}; // cut-off radius(nbeta) - std::vector rcutus = {}; // ultrasoft cut-off radius (nbeta) + std::vector rcut = {}; // cut-off radius(nbeta) + std::vector rcutus = {}; // ultrasoft cut-off radius (nbeta) int nd; // nl_5 // Number of nonzero Dij @@ -63,44 +63,44 @@ class Pseudopot_upf int iTB_d; // return error - int init_pseudo_reader(const std::string& fn, std::string& type, Atom_pseudo& pp); - void print_pseudo_upf(std::ofstream& ofs, Atom_pseudo& pp); + int init_pseudo_reader (const std::string& fn, std::string& type, Atom_pseudo& pp); + void print_pseudo_upf (std::ofstream& ofs, Atom_pseudo& pp); - int average_p(const double& lambda, Atom_pseudo& pp); // zhengdy add 2020-10-20 - void set_empty_element(Atom_pseudo& pp); // Peize Lin add for bsse 2022.04.07 - void set_upf_q(Atom_pseudo& pp); // liuyu add 2023-09-21 - void complete_default(Atom_pseudo& pp); + int average_p (const double& lambda, Atom_pseudo& pp); // zhengdy add 2020-10-20 + void set_empty_element (Atom_pseudo& pp); // Peize Lin add for bsse 2022.04.07 + void set_upf_q (Atom_pseudo& pp); // liuyu add 2023-09-21 + void complete_default (Atom_pseudo& pp); private: - bool mesh_changed = false; // if the mesh is even, it will be changed to odd - void skip_number(std::ifstream& ifs, bool mesh_changed); // skip the last number if the mesh is even - - int set_pseudo_type(const std::string& fn, std::string& type); - std::string& trim(std::string& in_str); - std::string trimend(std::string& in_str); - - int read_pseudo_upf(std::ifstream& ifs, Atom_pseudo& pp); - int read_pseudo_vwr(std::ifstream& ifs, Atom_pseudo& pp); - int read_pseudo_blps(std::ifstream& ifs, Atom_pseudo& pp); // sunliang added 2021.07.08 - void read_pseudo_header(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_mesh(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_nlcc(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_local(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_nl(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_pswfc(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_rhoatom(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_addinfo(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_so(std::ifstream& ifs, Atom_pseudo& pp); + bool mesh_changed = false; // if the mesh is even, it will be changed to odd + void skip_number (std::ifstream& ifs, bool mesh_changed); // skip the last number if the mesh is even + + int set_pseudo_type (const std::string& fn, std::string& type); + std::string& trim (std::string& in_str); + std::string trimend (std::string& in_str); + + int read_pseudo_upf (std::ifstream& ifs, Atom_pseudo& pp); + int read_pseudo_vwr (std::ifstream& ifs, Atom_pseudo& pp); + int read_pseudo_blps (std::ifstream& ifs, Atom_pseudo& pp); // sunliang added 2021.07.08 + void read_pseudo_header (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_mesh (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_nlcc (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_local (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_nl (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_pswfc (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_rhoatom (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_addinfo (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_so (std::ifstream& ifs, Atom_pseudo& pp); // upf201 - int read_pseudo_upf201(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_upf201_header(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_upf201_mesh(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_upf201_nonlocal(std::ifstream& ifs, Atom_pseudo& pp); - void read_pseudo_upf201_pswfc(std::ifstream& ifs, Atom_pseudo& pp); + int read_pseudo_upf201 (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_upf201_header (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_upf201_mesh (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_upf201_nonlocal (std::ifstream& ifs, Atom_pseudo& pp); + void read_pseudo_upf201_pswfc (std::ifstream& ifs, Atom_pseudo& pp); // void read_pseudo_upf201_fullwfc(std::ifstream& ifs); - void read_pseudo_upf201_so(std::ifstream& ifs, Atom_pseudo& pp); - void getnameval(std::ifstream&, int&, std::string*, std::string*); + void read_pseudo_upf201_so (std::ifstream& ifs, Atom_pseudo& pp); + void getnameval (std::ifstream&, int&, std::string*, std::string*); /** * @brief Computes the Q function from its polynomial expansion (r < rinner) @@ -112,19 +112,19 @@ class Pseudopot_upf * @param r radial mesh * @param rho output: r^n * Q(r) */ - void setqfnew(const int& nqf, - const int& mesh, - const int& l, - const int& n, - const double* qfcoef, - const double* r, - double* rho); + void setqfnew (const int& nqf, + const int& mesh, + const int& l, + const int& n, + const double* qfcoef, + const double* r, + double* rho); // complete default // void complete_default(Atom_pseudo& pp); - void complete_default_h(Atom_pseudo& pp); - void complete_default_atom(Atom_pseudo& pp); - void complete_default_vl(Atom_pseudo& pp); + void complete_default_h (Atom_pseudo& pp); + void complete_default_atom (Atom_pseudo& pp); + void complete_default_vl (Atom_pseudo& pp); }; -#endif //pseudopot_upf class +#endif // pseudopot_upf class diff --git a/source/source_cell/read_pp_blps.cpp b/source/source_cell/read_pp_blps.cpp index 5370dbc41b4..e3b079366f9 100644 --- a/source/source_cell/read_pp_blps.cpp +++ b/source/source_cell/read_pp_blps.cpp @@ -2,7 +2,8 @@ #include "source_base/atom_in.h" #include "source_base/element_name.h" -int Pseudopot_upf::read_pseudo_blps(std::ifstream &ifs, Atom_pseudo& pp) +int + Pseudopot_upf::read_pseudo_blps (std::ifstream& ifs, Atom_pseudo& pp) { // double bohr2a = 0.529177249; pp.nlcc = false; @@ -11,149 +12,149 @@ int Pseudopot_upf::read_pseudo_blps(std::ifstream &ifs, Atom_pseudo& pp) pp.nbeta = 0; pp.kkbeta = 0; - pp.lll = std::vector(pp.nbeta, 0); - pp.betar.create(0, 0); - pp.dion.create(pp.nbeta, pp.nbeta); + pp.lll = std::vector (pp.nbeta, 0); + pp.betar.create (0, 0); + pp.dion.create (pp.nbeta, pp.nbeta); pp.nchi = 0; - pp.nn = std::vector(pp.nchi, 0); - pp.jchi = std::vector(pp.nchi, 0.0); - pp.jjj = std::vector(pp.nchi, 0.0); + pp.nn = std::vector (pp.nchi, 0); + pp.jchi = std::vector (pp.nchi, 0.0); + pp.jjj = std::vector (pp.nchi, 0.0); ifs >> pp.psd; // if(!SCAN_BEGIN(ifs,"BLPS")) WARNING_QUIT("read_pp_blps","Find no PP_HEADER"); - ifs.ignore(300, '\n'); + ifs.ignore (300, '\n'); double zatom = 0.0; double zion = 0.0; ifs >> zatom >> zion; pp.zv = zion; - ifs.ignore(300, '\n'); + ifs.ignore (300, '\n'); atom_in ai; - for (auto each_type: ModuleBase::element_name) - { - if (zatom == ai.atom_Z[each_type]) + for (auto each_type: ModuleBase::element_name) { - pp.psd = each_type; - break; + if (zatom == ai.atom_Z[each_type]) + { + pp.psd = each_type; + break; + } } - } int pspcod, pspxc, lloc, r2well; ifs >> pspcod >> pspxc >> pp.lmax >> lloc >> pp.mesh >> r2well; this->mesh_changed = false; - if (pp.mesh%2 == 0) - { - pp.mesh -= 1; - this->mesh_changed = true; - } + if (pp.mesh % 2 == 0) + { + pp.mesh -= 1; + this->mesh_changed = true; + } if (pspxc == 2) - { - pp.xc_func = "PZ"; - } + { + pp.xc_func = "PZ"; + } else if (pspxc == 11) - { - pp.xc_func = "PBE"; - } + { + pp.xc_func = "PBE"; + } else - { - std::string msg = "Unknown pspxc: " + std::to_string(pspxc); - ModuleBase::WARNING_QUIT("Pseudopot_upf::read_pseudo_blps", msg); - } + { + std::string msg = "Unknown pspxc: " + std::to_string (pspxc); + ModuleBase::WARNING_QUIT ("Pseudopot_upf::read_pseudo_blps", msg); + } if (pspcod == 8) - { - for (int i = 0; i < 5; ++i) { - ifs.ignore(300, '\n'); + for (int i = 0; i < 5; ++i) + { + ifs.ignore (300, '\n'); + } } - } else if (pspcod == 6) - { - for (int i = 0; i < 17; ++i) { - ifs.ignore(300, '\n'); + for (int i = 0; i < 17; ++i) + { + ifs.ignore (300, '\n'); + } } - } else - { - std::string msg = "Unknown pspcod: " + std::to_string(pspcod); - ModuleBase::WARNING_QUIT("Pseudopot_upf::read_pseudo_blps", msg); - } + { + std::string msg = "Unknown pspcod: " + std::to_string (pspcod); + ModuleBase::WARNING_QUIT ("Pseudopot_upf::read_pseudo_blps", msg); + } - assert(pp.mesh > 0); + assert (pp.mesh > 0); - pp.r = std::vector(pp.mesh, 0.0); // Bohr - pp.rab = std::vector(pp.mesh, 0.0); - pp.vloc_at = std::vector(pp.mesh, 0.0); // Hartree + pp.r = std::vector (pp.mesh, 0.0); // Bohr + pp.rab = std::vector (pp.mesh, 0.0); + pp.vloc_at = std::vector (pp.mesh, 0.0); // Hartree int num = 0; if (pspcod == 8) - { - for(int i = 0;i < pp.mesh; ++i) { - ifs >> num >> pp.r[i] >> pp.vloc_at[i]; - pp.vloc_at[i] = pp.vloc_at[i]*2; // Hartree to Ry + for (int i = 0; i < pp.mesh; ++i) + { + ifs >> num >> pp.r[i] >> pp.vloc_at[i]; + pp.vloc_at[i] = pp.vloc_at[i] * 2; // Hartree to Ry + } } - } else if (pspcod == 6) - { - double temp = 0.; - for(int i = 0;i < pp.mesh; ++i) { - ifs >> num >> pp.r[i] >> temp >> pp.vloc_at[i]; - pp.vloc_at[i] = pp.vloc_at[i]*2; // Hartree to Ry + double temp = 0.; + for (int i = 0; i < pp.mesh; ++i) + { + ifs >> num >> pp.r[i] >> temp >> pp.vloc_at[i]; + pp.vloc_at[i] = pp.vloc_at[i] * 2; // Hartree to Ry + } } - } pp.rab[0] = pp.r[1] - pp.r[0]; - for(int i = 1; i < pp.mesh - 1; ++i) - { - pp.rab[i] = (pp.r[i+1] - pp.r[i-1])/2.0; - } + for (int i = 1; i < pp.mesh - 1; ++i) + { + pp.rab[i] = (pp.r[i + 1] - pp.r[i - 1]) / 2.0; + } pp.rab[pp.mesh - 1] = pp.r[pp.mesh - 1] - pp.r[pp.mesh - 2]; - pp.rho_at = std::vector(pp.mesh, 0.0); - double charge = zion/pp.r[pp.mesh - 1]; - for(int i = 0;i < pp.mesh; ++i) - { - pp.rho_at[i] = charge; - } + pp.rho_at = std::vector (pp.mesh, 0.0); + double charge = zion / pp.r[pp.mesh - 1]; + for (int i = 0; i < pp.mesh; ++i) + { + pp.rho_at[i] = charge; + } return 0; } -//parameters -//read_pp.h <--> blps_real -//nv - -//psd head -//pp_type(NC or US) - -//tvanp False -//nlcc False -//dft pspxc 2->lda, 11->gga -//zp zion -//etotps - -//ecutwfc - -//ecutrho - -//lmax lmax -//mesh mmax -//nwfc - -//nbeta - -//els - -//lchi - -//oc - - -//rab rab[ir]=(r[ir+1]-r[ir-1])/2.0 -//rho_atc(nonlocal) - -//vloc -//chi - -//rho_at - +// parameters +// read_pp.h <--> blps_real +// nv - +// psd head +// pp_type(NC or US) - +// tvanp False +// nlcc False +// dft pspxc 2->lda, 11->gga +// zp zion +// etotps - +// ecutwfc - +// ecutrho - +// lmax lmax +// mesh mmax +// nwfc - +// nbeta - +// els - +// lchi - +// oc - + +// rab rab[ir]=(r[ir+1]-r[ir-1])/2.0 +// rho_atc(nonlocal) - +// vloc +// chi - +// rho_at - // lll - // kbeta - // beta - // dion - -//nn - -//jchi - -//jjj - -//nd - +// nn - +// jchi - +// jjj - +// nd - diff --git a/source/source_cell/read_pp_complete.cpp b/source/source_cell/read_pp_complete.cpp index 6adeca1d2f1..6c4fcd9762a 100644 --- a/source/source_cell/read_pp_complete.cpp +++ b/source/source_cell/read_pp_complete.cpp @@ -1,164 +1,178 @@ #include "read_pp.h" #include "source_io/module_parameter/parameter.h" -void Pseudopot_upf::complete_default(Atom_pseudo& pp) +void + Pseudopot_upf::complete_default (Atom_pseudo& pp) { - ModuleBase::TITLE("Pseudopot_upf", "complete_default"); + ModuleBase::TITLE ("Pseudopot_upf", "complete_default"); // call subroutines - this->complete_default_h(pp); - this->complete_default_atom(pp); - this->complete_default_vl(pp); - - if (pp.nbeta == 0) { - return; - } - - if (pp.lll.empty()) - { - pp.lll = std::vector(pp.nbeta, 0); - } - - pp.nh = 0; - - for (int nb = 0; nb < pp.nbeta;nb++) - { - pp.nh += 2 * pp.lll [nb] + 1; - } - - return; + this->complete_default_h (pp); + this->complete_default_atom (pp); + this->complete_default_vl (pp); + + if (pp.nbeta == 0) + { + return; + } + + if (pp.lll.empty ()) + { + pp.lll = std::vector (pp.nbeta, 0); + } + + pp.nh = 0; + + for (int nb = 0; nb < pp.nbeta; nb++) + { + pp.nh += 2 * pp.lll[nb] + 1; + } + + return; } -void Pseudopot_upf::complete_default_h(Atom_pseudo& pp) +void + Pseudopot_upf::complete_default_h (Atom_pseudo& pp) { - ModuleBase::TITLE("Pseudopot_upf","complete_default_h"); - - // mohan update 2021-02-22 - // max number of points in the atomic radial mesh - int ndmx = 200000; - if (pp.mesh > ndmx) - { - std::cout << "\n complete_default_h, too many grid points,"; - } - - if (pp.els.empty()) - { - pp.els = std::vector(pp.nchi, ""); - } - - if (pp.lchi.empty()) - { - pp.lchi = std::vector(pp.nchi, 0); - } - - if (pp.oc.empty()) - { - pp.oc = std::vector(pp.nchi, 0.0); - } - - if (pp.jjj.empty()) { - pp.jjj = std::vector(pp.nbeta, 0.0); - assert(!pp.has_so or pp.nbeta == 0); - for (int i=0; i(pp.nchi, 0); - assert(!pp.has_so or pp.nchi == 0); - for (int i=0; i(pp.nchi, 0.0); - assert(!pp.has_so or pp.nchi == 0); - for (int i=0; i ndmx) + { + std::cout << "\n complete_default_h, too many grid points,"; + } + + if (pp.els.empty ()) + { + pp.els = std::vector (pp.nchi, ""); + } + + if (pp.lchi.empty ()) + { + pp.lchi = std::vector (pp.nchi, 0); + } + + if (pp.oc.empty ()) + { + pp.oc = std::vector (pp.nchi, 0.0); + } + + if (pp.jjj.empty ()) + { + pp.jjj = std::vector (pp.nbeta, 0.0); + assert (!pp.has_so or pp.nbeta == 0); + for (int i = 0; i < pp.nbeta; i++) + { + pp.jjj[i] = 0; + } + } + + if (pp.nn.empty ()) + { + pp.nn = std::vector (pp.nchi, 0); + assert (!pp.has_so or pp.nchi == 0); + for (int i = 0; i < pp.nchi; i++) + { + pp.nn[i] = 0; + } + } + + if (pp.jchi.empty ()) + { + pp.jchi = std::vector (pp.nchi, 0.0); + assert (!pp.has_so or pp.nchi == 0); + for (int i = 0; i < pp.nchi; i++) + { + pp.jchi[i] = 0; + } + } return; } -void Pseudopot_upf::complete_default_atom(Atom_pseudo& pp) +void + Pseudopot_upf::complete_default_atom (Atom_pseudo& pp) { - ModuleBase::TITLE("Pseudopot_upf","complete_default_atom"); - - // mohan 2009-12-15 - // mohan update again 2011-05-23, - // in order to calculate more accurate Vna. - pp.rcut = PARAM.inp.pseudo_rcut;//(a.u.); - - // remember to update here if you need it. - // rcut = 25.0; - - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"PAO radial cut off (Bohr)", pp.rcut); - if(pp.rcut <= 0.0) - { - ModuleBase::WARNING_QUIT("Pseudopot_upf::complete_default_atom","PAO rcut<=0.0"); - } - - // chi.create(nchi, mesh); - - if (pp.r.empty()) { - pp.r = std::vector(pp.mesh, 0.0); - } - - if (pp.rab.empty()) { - pp.rab = std::vector(pp.mesh, 0.0); - } - - if (pp.rho_at.empty()) { - pp.rho_at = std::vector(pp.mesh, 0.0); - } - - if (pp.rho_atc.empty()) { - pp.rho_atc = std::vector(pp.mesh, 0.0); - assert(!pp.nlcc or pp.mesh == 0); - } - - bool br = false; - - pp.msh = 0; - - for (int ir = 0;ir < pp.mesh;ir++) - { - if (pp.r [ir] > pp.rcut) - { - pp.msh = ir + 1; - br = true; - break; - } - } - - if (br) - { - // force msh to be odd for simpson integration - pp.msh = 2 * static_cast((pp.msh + 1) / 2) - 1; // Use static_cast instead of C-style cast for type safety - } - else - { - pp.msh = pp.mesh ; - } - - return; + ModuleBase::TITLE ("Pseudopot_upf", "complete_default_atom"); + + // mohan 2009-12-15 + // mohan update again 2011-05-23, + // in order to calculate more accurate Vna. + pp.rcut = PARAM.inp.pseudo_rcut; //(a.u.); + + // remember to update here if you need it. + // rcut = 25.0; + + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "PAO radial cut off (Bohr)", pp.rcut); + if (pp.rcut <= 0.0) + { + ModuleBase::WARNING_QUIT ("Pseudopot_upf::complete_default_atom", "PAO rcut<=0.0"); + } + + // chi.create(nchi, mesh); + + if (pp.r.empty ()) + { + pp.r = std::vector (pp.mesh, 0.0); + } + + if (pp.rab.empty ()) + { + pp.rab = std::vector (pp.mesh, 0.0); + } + + if (pp.rho_at.empty ()) + { + pp.rho_at = std::vector (pp.mesh, 0.0); + } + + if (pp.rho_atc.empty ()) + { + pp.rho_atc = std::vector (pp.mesh, 0.0); + assert (!pp.nlcc or pp.mesh == 0); + } + + bool br = false; + + pp.msh = 0; + + for (int ir = 0; ir < pp.mesh; ir++) + { + if (pp.r[ir] > pp.rcut) + { + pp.msh = ir + 1; + br = true; + break; + } + } + + if (br) + { + // force msh to be odd for simpson integration + pp.msh = 2 * static_cast ((pp.msh + 1) / 2) + - 1; // Use static_cast instead of C-style cast for type safety + } + else + { + pp.msh = pp.mesh; + } + + return; } -void Pseudopot_upf::complete_default_vl(Atom_pseudo& pp) +void + Pseudopot_upf::complete_default_vl (Atom_pseudo& pp) { - ModuleBase::TITLE("Pseudopot_upf","complete_default_vl"); + ModuleBase::TITLE ("Pseudopot_upf", "complete_default_vl"); - assert(pp.mesh>0);//mohan add 2021-05-01 + assert (pp.mesh > 0); // mohan add 2021-05-01 - if (pp.vloc_at.empty()) { - pp.vloc_at = std::vector(pp.mesh, 0.0); - } + if (pp.vloc_at.empty ()) + { + pp.vloc_at = std::vector (pp.mesh, 0.0); + } - return; -} + return; +} diff --git a/source/source_cell/read_pp_upf100.cpp b/source/source_cell/read_pp_upf100.cpp index 30cc94fa825..b998a712e74 100644 --- a/source/source_cell/read_pp_upf100.cpp +++ b/source/source_cell/read_pp_upf100.cpp @@ -2,7 +2,8 @@ // read pseudopot_upf potential "upf" in the Unified // Pseudopot_upfpotential Format -int Pseudopot_upf::read_pseudo_upf(std::ifstream& ifs, Atom_pseudo& pp) +int + Pseudopot_upf::read_pseudo_upf (std::ifstream& ifs, Atom_pseudo& pp) { std::string dummy; pp.has_so = false; @@ -12,26 +13,26 @@ int Pseudopot_upf::read_pseudo_upf(std::ifstream& ifs, Atom_pseudo& pp) this->nd = 0; // addinfo_loop - ifs.rdstate(); + ifs.rdstate (); - while (ifs.good()) - { - ifs >> dummy; - if (dummy == "") + while (ifs.good ()) { - pp.has_so = true; - } - else if (dummy == "") - { - this->q_with_l = true; - } + ifs >> dummy; + if (dummy == "") + { + pp.has_so = true; + } + else if (dummy == "") + { + this->q_with_l = true; + } - if (pp.has_so && q_with_l) - { - break; + if (pp.has_so && q_with_l) + { + break; + } + ifs.rdstate (); } - ifs.rdstate(); - } // Search for Header // This version doesn't use the new routine SCAN_BEGIN @@ -40,251 +41,256 @@ int Pseudopot_upf::read_pseudo_upf(std::ifstream& ifs, Atom_pseudo& pp) int ierr = 0; - ifs.clear(); - ifs.seekg(0); - ifs.rdstate(); + ifs.clear (); + ifs.seekg (0); + ifs.rdstate (); // header_loop: - while (ifs.good()) - { - ifs >> dummy; - if (dummy == "") + while (ifs.good ()) { - ierr = 1; - //--------------------- - // call member function - //--------------------- - read_pseudo_header(ifs, pp); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - break; + ifs >> dummy; + if (dummy == "") + { + ierr = 1; + //--------------------- + // call member function + //--------------------- + read_pseudo_header (ifs, pp); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + break; + } } - } if (ierr == 0) - { - // 2: something in pseudopotential file not match. - return 2; - } + { + // 2: something in pseudopotential file not match. + return 2; + } // Search for mesh information - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "")) - { - //--------------------- - // call member function - //--------------------- - read_pseudo_mesh(ifs, pp); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "")) + { + //--------------------- + // call member function + //--------------------- + read_pseudo_mesh (ifs, pp); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + } // If present, search for nlcc if (pp.nlcc) - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - //--------------------- - // call member function - //--------------------- - read_pseudo_nlcc(ifs, pp); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } + { + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + //--------------------- + // call member function + //--------------------- + read_pseudo_nlcc (ifs, pp); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + } if (!this->coulomb_potential) - { - // Search for Local potential - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - //--------------------- - // call member function - //--------------------- - read_pseudo_local(ifs, pp); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } + { + // Search for Local potential + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + //--------------------- + // call member function + //--------------------- + read_pseudo_local (ifs, pp); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + } // Search for Nonlocal potential - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); //--------------------- // call member function //--------------------- - read_pseudo_nl(ifs, pp); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + read_pseudo_nl (ifs, pp); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); // Search for atomic wavefunctions - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); //--------------------- // call member function //--------------------- - read_pseudo_pswfc(ifs, pp); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + read_pseudo_pswfc (ifs, pp); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); // Search for atomic charge - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); //--------------------- // call member function //--------------------- - read_pseudo_rhoatom(ifs, pp); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + read_pseudo_rhoatom (ifs, pp); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); // Search for add_info if (pp.has_so) - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); // added by zhengdy-soc - //--------------------- - // call member function - //--------------------- - read_pseudo_so(ifs, pp); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } - - ifs.clear(); - ifs.seekg(0); + { + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); // added by zhengdy-soc + //--------------------- + // call member function + //--------------------- + read_pseudo_so (ifs, pp); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + } + + ifs.clear (); + ifs.seekg (0); // return 0: read in sucessfully. return 0; } // end subroutine read_pseudopot_upf -void Pseudopot_upf::read_pseudo_header(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_header (std::ifstream& ifs, Atom_pseudo& pp) { - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.nv); // Version number - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.psd); // Element label + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.nv); // Version number + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.psd); // Element label // Type of pseudo : NC or US - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.pp_type); + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.pp_type); if (pp.pp_type == "US") - { - pp.tvanp = true; - this->coulomb_potential = false; - } + { + pp.tvanp = true; + this->coulomb_potential = false; + } else if (pp.pp_type == "NC") - { - pp.tvanp = false; - this->coulomb_potential = false; - } + { + pp.tvanp = false; + this->coulomb_potential = false; + } else if (pp.pp_type == "1/r") - { - pp.tvanp = false; - this->coulomb_potential = true; - } + { + pp.tvanp = false; + this->coulomb_potential = true; + } else - { - // A bug here!!! can't quit together. - std::cout << " pp_type=" << pp.pp_type << std::endl; - ModuleBase::WARNING_QUIT("Pseudopot_upf::read_pseudo_header", "unknown pseudo type"); - } + { + // A bug here!!! can't quit together. + std::cout << " pp_type=" << pp.pp_type << std::endl; + ModuleBase::WARNING_QUIT ("Pseudopot_upf::read_pseudo_header", "unknown pseudo type"); + } // If use nlcc std::string nlc; - ModuleBase::GlobalFunc::READ_VALUE(ifs, nlc); + ModuleBase::GlobalFunc::READ_VALUE (ifs, nlc); if (nlc == "T") - { - pp.nlcc = true; - } + { + pp.nlcc = true; + } else - { - pp.nlcc = false; - } + { + pp.nlcc = false; + } // mohan modify 2009-12-15 std::string junk; ifs >> junk >> junk >> junk >> junk; - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.xc_func); + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.xc_func); - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.zv); - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.etotps); + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.zv); + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.etotps); ifs >> pp.ecutwfc >> pp.ecutrho; - ifs.ignore(75, '\n'); + ifs.ignore (75, '\n'); - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.lmax); - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.mesh); + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.lmax); + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.mesh); if (pp.mesh % 2 == 0) - { - pp.mesh -= 1; - this->mesh_changed = true; - } + { + pp.mesh -= 1; + this->mesh_changed = true; + } ifs >> pp.nchi >> pp.nbeta; - ifs.ignore(75, '\n'); - ifs.ignore(75, '\n'); + ifs.ignore (75, '\n'); + ifs.ignore (75, '\n'); - pp.els = std::vector(pp.nchi, ""); - pp.lchi = std::vector(pp.nchi, 0); - pp.oc = std::vector(pp.nchi, 0.0); + pp.els = std::vector (pp.nchi, ""); + pp.lchi = std::vector (pp.nchi, 0); + pp.oc = std::vector (pp.nchi, 0.0); for (int i = 0; i < pp.nchi; i++) - { - ifs >> pp.els[i] >> pp.lchi[i] >> pp.oc[i]; - } + { + ifs >> pp.els[i] >> pp.lchi[i] >> pp.oc[i]; + } if (this->coulomb_potential) - { - pp.nbeta = 0; - pp.lmax = 0; - this->lloc = 0; - } + { + pp.nbeta = 0; + pp.lmax = 0; + this->lloc = 0; + } else if (pp.nbeta == 0 && pp.lmax < 0) - { - // Some legacy UPF100 files use lmax = -1 when no projectors exist. - // Normalize to 0 to avoid negative-size propagation in downstream code. - pp.lmax = 0; - } + { + // Some legacy UPF100 files use lmax = -1 when no projectors exist. + // Normalize to 0 to avoid negative-size propagation in downstream code. + pp.lmax = 0; + } return; } -void Pseudopot_upf::read_pseudo_mesh(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_mesh (std::ifstream& ifs, Atom_pseudo& pp) { - assert(pp.mesh > 0); + assert (pp.mesh > 0); - pp.r = std::vector(pp.mesh, 0.0); - pp.rab = std::vector(pp.mesh, 0.0); + pp.r = std::vector (pp.mesh, 0.0); + pp.rab = std::vector (pp.mesh, 0.0); int ir = 0; - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false)) - { - for (ir = 0; ir < pp.mesh; ir++) + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false)) { - ifs >> pp.r[ir]; + for (ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.r[ir]; + } + this->skip_number (ifs, this->mesh_changed); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } - this->skip_number(ifs, this->mesh_changed); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false)) - { - for (ir = 0; ir < pp.mesh; ir++) + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false)) { - ifs >> pp.rab[ir]; + for (ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.rab[ir]; + } + this->skip_number (ifs, this->mesh_changed); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } - this->skip_number(ifs, this->mesh_changed); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } return; } -void Pseudopot_upf::read_pseudo_nlcc(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_nlcc (std::ifstream& ifs, Atom_pseudo& pp) { - assert(pp.mesh > 0); - pp.rho_atc = std::vector(pp.mesh, 0.0); + assert (pp.mesh > 0); + pp.rho_atc = std::vector (pp.mesh, 0.0); for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.rho_atc[ir]; - } - this->skip_number(ifs, this->mesh_changed); + { + ifs >> pp.rho_atc[ir]; + } + this->skip_number (ifs, this->mesh_changed); return; } -void Pseudopot_upf::read_pseudo_local(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_local (std::ifstream& ifs, Atom_pseudo& pp) { - assert(pp.mesh > 0); - pp.vloc_at = std::vector(pp.mesh, 0.0); + assert (pp.mesh > 0); + pp.vloc_at = std::vector (pp.mesh, 0.0); for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.vloc_at[ir]; - } - this->skip_number(ifs, this->mesh_changed); + { + ifs >> pp.vloc_at[ir]; + } + this->skip_number (ifs, this->mesh_changed); return; } -void Pseudopot_upf::read_pseudo_nl(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_nl (std::ifstream& ifs, Atom_pseudo& pp) { // int nb, mb, n, ir, idum, ldum, lp, i, ikk; int nb = 0; @@ -294,226 +300,230 @@ void Pseudopot_upf::read_pseudo_nl(std::ifstream& ifs, Atom_pseudo& pp) int ldum = 0; if (pp.nbeta == 0) - { - this->nqf = 0; - pp.nqlc = 0; - pp.kkbeta = 0; - return; - } - else - { - this->kbeta = std::vector(pp.nbeta, 0); - pp.lll = std::vector(pp.nbeta, 0); - pp.betar.create(pp.nbeta, pp.mesh); - pp.dion.create(pp.nbeta, pp.nbeta); - pp.kkbeta = 0; - - for (int i = 0; i < pp.nbeta; i++) { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false); - ifs >> idum; - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.lll[i]); // nl_1 - ModuleBase::GlobalFunc::READ_VALUE(ifs, this->kbeta[i]); // nl_2 - if (this->kbeta[i] > pp.mesh) - { - this->kbeta[i] = pp.mesh; - } - // number of mesh points for projectors - - for (ir = 0; ir < this->kbeta[i]; ir++) - { - ifs >> pp.betar(i, ir); // nl_3 - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - pp.kkbeta = (this->kbeta[i] > pp.kkbeta) ? this->kbeta[i] : pp.kkbeta; + this->nqf = 0; + pp.nqlc = 0; + pp.kkbeta = 0; + return; } - //check the betar for non-normal number - pp.check_betar(); - // DIJ - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false); - ModuleBase::GlobalFunc::READ_VALUE(ifs, this->nd); // nl_4 - for (int i = 0; i < this->nd; i++) + else { - double swap = 0.0; - ifs >> nb >> mb >> swap; - nb--; - mb--; - if (nb < 0 || mb < 0 || nb >= pp.nbeta || mb >= pp.nbeta) - { - ModuleBase::WARNING_QUIT( - "Pseudopot_upf::read_pseudo_nl", - "PP_DIJ index out of range: nb=" + std::to_string(nb) + ", mb=" + std::to_string(mb) - + ", nbeta=" + std::to_string(pp.nbeta)); - } - pp.dion(mb, nb) = swap; // nl_5 - pp.dion(nb, mb) = swap; - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + this->kbeta = std::vector (pp.nbeta, 0); + pp.lll = std::vector (pp.nbeta, 0); + pp.betar.create (pp.nbeta, pp.mesh); + pp.dion.create (pp.nbeta, pp.nbeta); + pp.kkbeta = 0; - // QIJ - if (pp.tvanp) - { - if (!ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false)) - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false); - } - // If nqf is not zero, Qij's inside rinner are computed using qfcoef's - ModuleBase::GlobalFunc::READ_VALUE(ifs, this->nqf); - pp.nqlc = 2 * pp.lmax + 1; - this->rinner = std::vector(pp.nqlc, 0.0); - pp.qqq.create(pp.nbeta, pp.nbeta); - if (q_with_l) - { - pp.qfuncl.create(2 * pp.lmax + 1, pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); - } - else - { - this->qfunc.create(pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); - } - - if (nqf <= 0) - { - this->qfcoef.create(1, 1, 1, 1); - } - else - { - this->qfcoef.create(pp.nbeta, pp.nbeta, pp.nqlc, nqf); - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false); - for (int i = 0; i < pp.nqlc; i++) + for (int i = 0; i < pp.nbeta; i++) { - ifs >> idum >> rinner[i]; - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false); + ifs >> idum; + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.lll[i]); // nl_1 + ModuleBase::GlobalFunc::READ_VALUE (ifs, this->kbeta[i]); // nl_2 + if (this->kbeta[i] > pp.mesh) + { + this->kbeta[i] = pp.mesh; + } + // number of mesh points for projectors - for (int nb = 0; nb < pp.nbeta; nb++) - { - int ln = pp.lll[nb]; - for (int mb = nb; mb < pp.nbeta; mb++) + for (ir = 0; ir < this->kbeta[i]; ir++) + { + ifs >> pp.betar (i, ir); // nl_3 + } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + pp.kkbeta = (this->kbeta[i] > pp.kkbeta) ? this->kbeta[i] : pp.kkbeta; + } + // check the betar for non-normal number + pp.check_betar (); + // DIJ + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false); + ModuleBase::GlobalFunc::READ_VALUE (ifs, this->nd); // nl_4 + for (int i = 0; i < this->nd; i++) { - int lm = pp.lll[mb]; - int nmb = mb * (mb + 1) / 2 + nb; - ifs >> idum >> idum >> ldum; // i j (l(j)) - ifs.ignore(75, '\n'); - - if (ldum != lm) - { - ModuleBase::WARNING_QUIT("Pseudopot_upf::read_pseudo_nl", - "inconsistent angular momentum for Q_ij"); - } - - ModuleBase::GlobalFunc::READ_VALUE(ifs, pp.qqq(nb, mb)); - pp.qqq(mb, nb) = pp.qqq(nb, mb); + double swap = 0.0; + ifs >> nb >> mb >> swap; + nb--; + mb--; + if (nb < 0 || mb < 0 || nb >= pp.nbeta || mb >= pp.nbeta) + { + ModuleBase::WARNING_QUIT ("Pseudopot_upf::read_pseudo_nl", + "PP_DIJ index out of range: nb=" + std::to_string (nb) + + ", mb=" + std::to_string (mb) + + ", nbeta=" + std::to_string (pp.nbeta)); + } + pp.dion (mb, nb) = swap; // nl_5 + pp.dion (nb, mb) = swap; + } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + // QIJ + if (pp.tvanp) + { + if (!ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false)) + { + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false); + } + // If nqf is not zero, Qij's inside rinner are computed using qfcoef's + ModuleBase::GlobalFunc::READ_VALUE (ifs, this->nqf); + pp.nqlc = 2 * pp.lmax + 1; + this->rinner = std::vector (pp.nqlc, 0.0); + pp.qqq.create (pp.nbeta, pp.nbeta); if (q_with_l) - { - for (int l = std::abs(ln - lm); l <= ln + lm; l += 2) { - for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.qfuncl(l, nmb, ir); - } + pp.qfuncl.create (2 * pp.lmax + 1, pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); } - } else - { - for (int ir = 0; ir < pp.mesh; ir++) { - ifs >> qfunc(nmb, ir); + this->qfunc.create (pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); } - } - this->skip_number(ifs, this->mesh_changed); - if (this->nqf > 0) - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false); - for (int k = 0; k < pp.nqlc; k++) + if (nqf <= 0) + { + this->qfcoef.create (1, 1, 1, 1); + } + else { - for (int l = 0; l < nqf; l++) - { - ifs >> qfcoef(nb, mb, k, l); - qfcoef(mb, nb, k, l) = qfcoef(nb, mb, k, l); - } + this->qfcoef.create (pp.nbeta, pp.nbeta, pp.nqlc, nqf); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false); + for (int i = 0; i < pp.nqlc; i++) + { + ifs >> idum >> rinner[i]; + } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } + + for (int nb = 0; nb < pp.nbeta; nb++) + { + int ln = pp.lll[nb]; + for (int mb = nb; mb < pp.nbeta; mb++) + { + int lm = pp.lll[mb]; + int nmb = mb * (mb + 1) / 2 + nb; + ifs >> idum >> idum >> ldum; // i j (l(j)) + ifs.ignore (75, '\n'); + + if (ldum != lm) + { + ModuleBase::WARNING_QUIT ("Pseudopot_upf::read_pseudo_nl", + "inconsistent angular momentum for Q_ij"); + } + + ModuleBase::GlobalFunc::READ_VALUE (ifs, pp.qqq (nb, mb)); + pp.qqq (mb, nb) = pp.qqq (nb, mb); + + if (q_with_l) + { + for (int l = std::abs (ln - lm); l <= ln + lm; l += 2) + { + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.qfuncl (l, nmb, ir); + } + } + } + else + { + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> qfunc (nmb, ir); + } + } + this->skip_number (ifs, this->mesh_changed); + + if (this->nqf > 0) + { + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false); + for (int k = 0; k < pp.nqlc; k++) + { + for (int l = 0; l < nqf; l++) + { + ifs >> qfcoef (nb, mb, k, l); + qfcoef (mb, nb, k, l) = qfcoef (nb, mb, k, l); + } + } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + } + } + } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + } + else // not tvanp + { } - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } - else // not tvanp - { } - } return; } -void Pseudopot_upf::read_pseudo_pswfc(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_pswfc (std::ifstream& ifs, Atom_pseudo& pp) { - pp.chi.create(pp.nchi, pp.mesh); + pp.chi.create (pp.nchi, pp.mesh); for (int i = 0; i < pp.nchi; i++) - { - std::string OrbitalName; - int BelongToL = 0; - double occupation = 0.0; - std::string dummy; - ifs >> OrbitalName >> BelongToL >> occupation >> dummy; - for (int ir = 0; ir < pp.mesh; ir++) { - ifs >> pp.chi(i, ir); + std::string OrbitalName; + int BelongToL = 0; + double occupation = 0.0; + std::string dummy; + ifs >> OrbitalName >> BelongToL >> occupation >> dummy; + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.chi (i, ir); + } + this->skip_number (ifs, this->mesh_changed); } - this->skip_number(ifs, this->mesh_changed); - } return; } -void Pseudopot_upf::read_pseudo_rhoatom(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_rhoatom (std::ifstream& ifs, Atom_pseudo& pp) { - pp.rho_at = std::vector(pp.mesh, 0.0); + pp.rho_at = std::vector (pp.mesh, 0.0); for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.rho_at[ir]; - } - this->skip_number(ifs, this->mesh_changed); + { + ifs >> pp.rho_at[ir]; + } + this->skip_number (ifs, this->mesh_changed); return; } -void Pseudopot_upf::read_pseudo_so(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_so (std::ifstream& ifs, Atom_pseudo& pp) { // read soc info from upf, added by zhengdy-soc if (!pp.has_so) - { - return; - } - pp.nn = std::vector(pp.nchi, 0); - pp.jchi = std::vector(pp.nchi, 0.0); - pp.jjj = std::vector(pp.nbeta, 0.0); + { + return; + } + pp.nn = std::vector (pp.nchi, 0); + pp.jchi = std::vector (pp.nchi, 0.0); + pp.jjj = std::vector (pp.nbeta, 0.0); // RELWFC for (int nw = 0; nw < pp.nchi; nw++) - { - ifs >> pp.els[nw] >> pp.nn[nw] >> pp.lchi[nw] >> pp.jchi[nw] >> pp.oc[nw]; - if (std::abs(std::abs(pp.lchi[nw] - pp.jchi[nw]) - 0.5) > 1e-7) { - std::cout << "Ignore ADDINFO section" << std::endl; - pp.has_so = false; + ifs >> pp.els[nw] >> pp.nn[nw] >> pp.lchi[nw] >> pp.jchi[nw] >> pp.oc[nw]; + if (std::abs (std::abs (pp.lchi[nw] - pp.jchi[nw]) - 0.5) > 1e-7) + { + std::cout << "Ignore ADDINFO section" << std::endl; + pp.has_so = false; + } } - } // RELBETA for (int nb = 0; nb < pp.nbeta; nb++) - { - ifs >> pp.lll[nb] >> pp.jjj[nb]; - if (std::abs(std::abs(pp.lll[nb] - pp.jjj[nb]) - 0.5) > 1e-7) { - std::cout << "Ignore ADDINFO section" << std::endl; - pp.has_so = false; + ifs >> pp.lll[nb] >> pp.jjj[nb]; + if (std::abs (std::abs (pp.lll[nb] - pp.jjj[nb]) - 0.5) > 1e-7) + { + std::cout << "Ignore ADDINFO section" << std::endl; + pp.has_so = false; + } } - } return; } -void Pseudopot_upf::print_pseudo_upf(std::ofstream& ofs, Atom_pseudo& pp) +void + Pseudopot_upf::print_pseudo_upf (std::ofstream& ofs, Atom_pseudo& pp) { - ModuleBase::TITLE("Pseudopot_upf", "print_pseudo_upf"); + ModuleBase::TITLE ("Pseudopot_upf", "print_pseudo_upf"); ofs << " ==== read_pseudo_upf === " << std::endl; // print header @@ -533,9 +543,9 @@ void Pseudopot_upf::print_pseudo_upf(std::ofstream& ofs, Atom_pseudo& pp) ofs << " nwfc: " << pp.nchi << std::endl; ofs << " nbeta: " << pp.nbeta << std::endl; for (int i = 0; i < pp.nchi; ++i) - { - ofs << " iw=" << i << " els=" << pp.els[i] << " lchi=" << pp.lchi[i] << " oc=" << pp.oc[i] << std::endl; - } + { + ofs << " iw=" << i << " els=" << pp.els[i] << " lchi=" << pp.lchi[i] << " oc=" << pp.oc[i] << std::endl; + } ofs << " End of pseudopot_upf." << std::endl; diff --git a/source/source_cell/read_pp_upf201.cpp b/source/source_cell/read_pp_upf201.cpp index d02bb8e6d51..f9a0c0d283a 100644 --- a/source/source_cell/read_pp_upf201.cpp +++ b/source/source_cell/read_pp_upf201.cpp @@ -2,71 +2,72 @@ // qianrui rewrite it 2021-5-10 // liuyu update 2023-09-17 add uspp support -int Pseudopot_upf::read_pseudo_upf201(std::ifstream &ifs, Atom_pseudo& pp) +int + Pseudopot_upf::read_pseudo_upf201 (std::ifstream& ifs, Atom_pseudo& pp) { //-------------------------------------- //- PP_HEADER - //-------------------------------------- - this->read_pseudo_upf201_header(ifs, pp); + this->read_pseudo_upf201_header (ifs, pp); //-------------------------------------- //- PP_MESH - //-------------------------------------- - this->read_pseudo_upf201_mesh(ifs, pp); + this->read_pseudo_upf201_mesh (ifs, pp); //-------------------------------------- //- PP_NLCC - //-------------------------------------- if (pp.nlcc) - { - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "'); // skip type, size, columns and so on. - } - else { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - } - pp.rho_atc = std::vector(pp.mesh, 0.0); - for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.rho_atc[ir]; + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "'); // skip type, size, columns and so on. + } + else + { + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + } + pp.rho_atc = std::vector (pp.mesh, 0.0); + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.rho_atc[ir]; + } + this->skip_number (ifs, this->mesh_changed); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } - this->skip_number(ifs, this->mesh_changed); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } //-------------------------------------- //- PP_LOCAL - //-------------------------------------- if (!this->coulomb_potential) - { - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "'); // skip type, size, columns and so on. - } - else - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - } - pp.vloc_at = std::vector(pp.mesh, 0.0); - for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.vloc_at[ir]; + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "'); // skip type, size, columns and so on. + } + else + { + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + } + pp.vloc_at = std::vector (pp.mesh, 0.0); + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.vloc_at[ir]; + } + this->skip_number (ifs, this->mesh_changed); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } - this->skip_number(ifs, this->mesh_changed); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } //-------------------------------------- //- PP_NONLOCAL - //-------------------------------------- - this->read_pseudo_upf201_nonlocal(ifs, pp); + this->read_pseudo_upf201_nonlocal (ifs, pp); //-------------------------------------- //- PP_PSWFC - //-------------------------------------- - this->read_pseudo_upf201_pswfc(ifs, pp); + this->read_pseudo_upf201_pswfc (ifs, pp); //-------------------------------------- //- PP_FULL_WFC - @@ -79,35 +80,36 @@ int Pseudopot_upf::read_pseudo_upf201(std::ifstream &ifs, Atom_pseudo& pp) //-------------------------------------- //- PP_RHOATOM - //-------------------------------------- - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "'); - } + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "'); + } else - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - } - pp.rho_at = std::vector(pp.mesh, 0.0); + { + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + } + pp.rho_at = std::vector (pp.mesh, 0.0); for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.rho_at[ir]; - } - this->skip_number(ifs, this->mesh_changed); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + { + ifs >> pp.rho_at[ir]; + } + this->skip_number (ifs, this->mesh_changed); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); //-------------------------------------- //- PP_SPIN_ORB - //-------------------------------------- if (pp.has_so) - { - this->read_pseudo_upf201_so(ifs, pp); - } + { + this->read_pseudo_upf201_so (ifs, pp); + } - ModuleBase::GlobalFunc::SCAN_END(ifs, "", false); + ModuleBase::GlobalFunc::SCAN_END (ifs, "", false); return 0; } -void Pseudopot_upf::getnameval(std::ifstream& ifs, int& n, std::string* name, std::string* val) +void + Pseudopot_upf::getnameval (std::ifstream& ifs, int& n, std::string* name, std::string* val) { std::string txt; std::string word; @@ -115,689 +117,706 @@ void Pseudopot_upf::getnameval(std::ifstream& ifs, int& n, std::string* name, st // get long txt ifs >> txt; while (ifs >> word) - { - size_t wl = word.length() - 1; - txt = txt + " " + word; - if (word.substr(wl, 1) == ">") { - break; + size_t wl = word.length () - 1; + txt = txt + " " + word; + if (word.substr (wl, 1) == ">") + { + break; + } } - } // count number of parameters according to "=" size_t pos = 0; n = 0; - while (1) - { - pos = txt.find("=", pos); - if (pos == std::string::npos) - { - break; - } - pos++; - n++; - } + while (true) + { + pos = txt.find ("=", pos); + if (pos == std::string::npos) + { + break; + } + pos++; + n++; + } // get name & value pos = 0; - size_t pos2=0; - size_t ll=0; + size_t pos2 = 0; + size_t ll = 0; for (int i = 0; i < n; ++i) - { - pos2 = txt.find("=", pos); - for (; pos2 > pos; --pos2) // There may be a space before "="; - { - if (txt.substr(pos2 - 1, 1) != " ") - { - break; - } - } - ll = pos2 - pos; - name[i] = txt.substr(pos, ll); - std::string mark; - bool findmark = false; - for (int j = 0; j < 100; ++j) // The mark can be ' or " or . - { - mark = txt.substr(pos2, 1); - pos2++; - if (mark == "\"" || mark == "\'" || mark == ".") - { - findmark = true; - break; - } - } - if (!findmark) - { - ModuleBase::WARNING_QUIT( - "Pseudopot_upf::getnameval", - "The values are not in \' or \". Please improve the program in read_pp_upf201.cpp"); - } - pos = pos2; - pos2 = txt.find(mark, pos); - ll = pos2 - pos; - std::string tmpval = txt.substr(pos, ll); - tmpval = trim(tmpval); - val[i] = tmpval; - pos = pos2 + 1; - for (int j = 0; j < 100; ++j) - { - if (txt.substr(pos, 1) == " " || txt.substr(pos, 1) == ",") - { - pos++; - } - else - { - break; - } - } - //std::cout<getnameval(ifs, nparameter, name, val); + this->getnameval (ifs, nparameter, name, val); for (int ip = 0; ip < nparameter; ++ip) - { - if (name[ip] == "generated") - { - // add something// - } - else if (name[ip] == "author") { - } - else if (name[ip] == "date") - { - } - else if (name[ip] == "comment") - { - } - else if (name[ip] == "element") - { - pp.psd = val[ip]; - } - else if (name[ip] == "pseudo_type") - { - pp.pp_type = val[ip]; - if (pp.pp_type == "SL") - { - ModuleBase::WARNING_QUIT("Pseudopot_upf::read_pseudo_upf201_header", - "SEMI-LOCAL PSEUDOPOTENTIAL IS NOT SUPPORTED"); - } - } - else if (name[ip] == "relativistic") - { - relativistic = val[ip]; - } - else if (name[ip] == "is_ultrasoft") - { - if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") - { - pp.tvanp = true; - } - else - { - pp.tvanp = false; - } - } - else if (name[ip] == "is_paw") - { - if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") - { - ModuleBase::WARNING_QUIT("Pseudopot_upf::read_pseudo_upf201_header", "PAW POTENTIAL IS NOT SUPPORTED"); - } - } - else if (name[ip] == "is_coulomb") - { - if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") - { - this->coulomb_potential = true; - } - } - else if (name[ip] == "has_so") - { - if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") - pp.has_so = true; - else - pp.has_so = false; - } - else if (name[ip] == "has_wfc") - { - // if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") - // { - // has_wfc = true; - // } - // else - // { - // has_wfc = false; - // } - } - else if (name[ip] == "has_gipaw") - { - } - else if (name[ip] == "paw_as_gipaw") - { - } - else if (name[ip] == "core_correction") - { - if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") - pp.nlcc = true; + if (name[ip] == "generated") + { + // add something// + } + else if (name[ip] == "author") + { + } + else if (name[ip] == "date") + { + } + else if (name[ip] == "comment") + { + } + else if (name[ip] == "element") + { + pp.psd = val[ip]; + } + else if (name[ip] == "pseudo_type") + { + pp.pp_type = val[ip]; + if (pp.pp_type == "SL") + { + ModuleBase::WARNING_QUIT ("Pseudopot_upf::read_pseudo_upf201_header", + "SEMI-LOCAL PSEUDOPOTENTIAL IS NOT SUPPORTED"); + } + } + else if (name[ip] == "relativistic") + { + relativistic = val[ip]; + } + else if (name[ip] == "is_ultrasoft") + { + if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") + { + pp.tvanp = true; + } + else + { + pp.tvanp = false; + } + } + else if (name[ip] == "is_paw") + { + if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") + { + ModuleBase::WARNING_QUIT ("Pseudopot_upf::read_pseudo_upf201_header", + "PAW POTENTIAL IS NOT SUPPORTED"); + } + } + else if (name[ip] == "is_coulomb") + { + if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") + { + this->coulomb_potential = true; + } + } + else if (name[ip] == "has_so") + { + if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") + { + pp.has_so = true; + } + else + { + pp.has_so = false; + } + } + else if (name[ip] == "has_wfc") + { + // if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") + // { + // has_wfc = true; + // } + // else + // { + // has_wfc = false; + // } + } + else if (name[ip] == "has_gipaw") + { + } + else if (name[ip] == "paw_as_gipaw") + { + } + else if (name[ip] == "core_correction") + { + if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") + { + pp.nlcc = true; + } + else + { + pp.nlcc = false; + } + } + else if (name[ip] == "functional") + { + pp.xc_func = val[ip]; + } + else if (name[ip] == "z_valence") + { + pp.zv = std::stod (val[ip]); + } + else if (name[ip] == "total_psenergy") + { + pp.etotps = atof (val[ip].c_str ()); + } + else if (name[ip] == "wfc_cutoff") + { + pp.ecutwfc = atof (val[ip].c_str ()); + } + else if (name[ip] == "rho_cutoff") + { + pp.ecutrho = atof (val[ip].c_str ()); + } + else if (name[ip] == "l_max") + { + pp.lmax = atoi (val[ip].c_str ()); + } + else if (name[ip] == "l_max_rho") + { + this->lmax_rho = atoi (val[ip].c_str ()); + } + else if (name[ip] == "l_local") + { + this->lloc = atoi (val[ip].c_str ()); + } + else if (name[ip] == "mesh_size") + { + pp.mesh = atoi (val[ip].c_str ()); + this->mesh_changed = false; + if (pp.mesh % 2 == 0) + { + pp.mesh -= 1; + this->mesh_changed = true; + } + } + else if (name[ip] == "number_of_wfc") + { + pp.nchi = atoi (val[ip].c_str ()); + } + else if (name[ip] == "number_of_proj") + { + pp.nbeta = atoi (val[ip].c_str ()); + } else - pp.nlcc = false; - } - else if (name[ip] == "functional") - { - pp.xc_func = val[ip]; - } - else if (name[ip] == "z_valence") - { - pp.zv = std::stod(val[ip]); - } - else if (name[ip] == "total_psenergy") - { - pp.etotps = atof(val[ip].c_str()); + { + std::string warningstr + = name[ip] + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; + ModuleBase::WARNING ("PP_HEADRER reading", warningstr); + } } - else if (name[ip] == "wfc_cutoff") + if (this->coulomb_potential) { - pp.ecutwfc = atof(val[ip].c_str()); + pp.nbeta = 0; + pp.lmax = 0; + this->lloc = 0; } - else if (name[ip] == "rho_cutoff") +} + +void + Pseudopot_upf::read_pseudo_upf201_mesh (std::ifstream& ifs, Atom_pseudo& pp) +{ + const int max_n = 100; + std::string name[max_n]; + std::string val[max_n]; + int nparameter = 0; + + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "getnameval (ifs, nparameter, name, val); + + for (int ip = 0; ip < nparameter; ++ip) + { + if (name[ip] == "dx") + { + dx = atof (val[ip].c_str ()); + } + else if (name[ip] == "mesh") + { + pp.mesh = atoi (val[ip].c_str ()); + + this->mesh_changed = false; + if (pp.mesh % 2 == 0) + { + pp.mesh -= 1; + this->mesh_changed = true; + } + } + else if (name[ip] == "xmin") + { + xmin = atof (val[ip].c_str ()); + } + else if (name[ip] == "rmax") + { + rmax = atof (val[ip].c_str ()); + } + else if (name[ip] == "zmesh") + { + zmesh = atof (val[ip].c_str ()); + } + else + { + std::string warningstr + = name[ip] + + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; + ModuleBase::WARNING ("PP_MESH reading", warningstr); + } + } } - else if (name[ip] == "l_max") + else { - pp.lmax = atoi(val[ip].c_str()); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); } - else if (name[ip] == "l_max_rho") + + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "lmax_rho = atoi(val[ip].c_str()); + ifs.ignore (150, '>'); } - else if (name[ip] == "l_local") + else { - this->lloc = atoi(val[ip].c_str()); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); } - else if (name[ip] == "mesh_size") + assert (pp.mesh > 0); + pp.r = std::vector (pp.mesh, 0.0); + pp.rab = std::vector (pp.mesh, 0.0); + for (int ir = 0; ir < pp.mesh; ir++) { - pp.mesh = atoi(val[ip].c_str()); - this->mesh_changed = false; - if (pp.mesh % 2 == 0) - { - pp.mesh -= 1; - this->mesh_changed = true; - } + ifs >> pp.r[ir]; } - else if (name[ip] == "number_of_wfc") + this->skip_number (ifs, this->mesh_changed); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "'); } - else if (name[ip] == "number_of_proj") + else { - pp.nbeta = atoi(val[ip].c_str()); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); } - else + for (int ir = 0; ir < pp.mesh; ir++) { - std::string warningstr - = name[ip] + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; - ModuleBase::WARNING("PP_HEADRER reading", warningstr); + ifs >> pp.rab[ir]; } - } - if (this->coulomb_potential) - { - pp.nbeta = 0; - pp.lmax = 0; - this->lloc = 0; - } + this->skip_number (ifs, this->mesh_changed); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } -void Pseudopot_upf::read_pseudo_upf201_mesh(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_upf201_nonlocal (std::ifstream& ifs, Atom_pseudo& pp) { - const int max_n = 100; - std::string name[max_n]; - std::string val[max_n]; - int nparameter=0; - - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "getnameval(ifs, nparameter, name, val); - - for (int ip = 0; ip < nparameter; ++ip) - { - if (name[ip] == "dx") - { - dx = atof(val[ip].c_str()); - } - else if (name[ip] == "mesh") - { - pp.mesh = atoi(val[ip].c_str()); - - this->mesh_changed = false; - if (pp.mesh % 2 == 0) - { - pp.mesh -= 1; - this->mesh_changed = true; - } - } - else if (name[ip] == "xmin") - { - xmin = atof(val[ip].c_str()); - } - else if (name[ip] == "rmax") - { - rmax = atof(val[ip].c_str()); - } - else if (name[ip] == "zmesh") - { - zmesh = atof(val[ip].c_str()); - } - else - { - std::string warningstr - = name[ip] + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; - ModuleBase::WARNING("PP_MESH reading", warningstr); - } - } - } - else - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - } - - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "'); - } - else - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - } - assert(pp.mesh > 0); - pp.r = std::vector(pp.mesh, 0.0); - pp.rab = std::vector(pp.mesh, 0.0); - for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.r[ir]; - } - this->skip_number(ifs, this->mesh_changed); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "'); - } - else - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - } - for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.rab[ir]; - } - this->skip_number(ifs, this->mesh_changed); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); -} - -void Pseudopot_upf::read_pseudo_upf201_nonlocal(std::ifstream& ifs, Atom_pseudo& pp) -{ - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); if (pp.nbeta == 0) - { - return; - } + { + return; + } std::string word; const int max_n = 100; std::string name[max_n]; std::string val[max_n]; - int nparameter=0; - - this->kbeta = std::vector(pp.nbeta); - pp.lll = std::vector(pp.nbeta); - this->els_beta = std::vector(pp.nbeta); - this->rcut = std::vector(pp.nbeta, 0.0); - this->rcutus = std::vector(pp.nbeta, 0.0); - pp.betar.create(pp.nbeta, pp.mesh); - pp.dion.create(pp.nbeta, pp.nbeta); + int nparameter = 0; + + this->kbeta = std::vector (pp.nbeta); + pp.lll = std::vector (pp.nbeta); + this->els_beta = std::vector (pp.nbeta); + this->rcut = std::vector (pp.nbeta, 0.0); + this->rcutus = std::vector (pp.nbeta, 0.0); + pp.betar.create (pp.nbeta, pp.mesh); + pp.dion.create (pp.nbeta, pp.nbeta); for (int ib = 0; ib < pp.nbeta; ib++) - { - word = "getnameval(ifs, nparameter, name, val); - // default value - els_beta[ib] = "Xn"; - this->kbeta[ib] = pp.mesh; - rcut[ib] = 0.0; - rcutus[ib] = 0.0; - for (int ip = 0; ip < nparameter; ++ip) - { - if (name[ip] == "type") - { - } - else if (name[ip] == "size") - { - } - else if (name[ip] == "columns") - { - } - else if (name[ip] == "index") - { - } - else if (name[ip] == "label") - { - els_beta[ib] = val[ip]; - } - else if (name[ip] == "angular_momentum") - { - pp.lll[ib] = atoi(val[ip].c_str()); - } - else if (name[ip] == "cutoff_radius_index") - { - this->kbeta[ib] = atoi(val[ip].c_str()); - } - else if (name[ip] == "cutoff_radius") - { - rcut[ib] = atof(val[ip].c_str()); - } - else if (name[ip] == "ultrasoft_cutoff_radius") - { - rcutus[ib] = atof(val[ip].c_str()); - } - else - { - std::string warningstr - = name[ip] + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; - ModuleBase::WARNING("PP_BETA reading", warningstr); - } - } - for (int ir = 0; ir < pp.mesh; ir++) { - ifs >> pp.betar(ib, ir); + word = "getnameval (ifs, nparameter, name, val); + // default value + els_beta[ib] = "Xn"; + this->kbeta[ib] = pp.mesh; + rcut[ib] = 0.0; + rcutus[ib] = 0.0; + for (int ip = 0; ip < nparameter; ++ip) + { + if (name[ip] == "type") + { + } + else if (name[ip] == "size") + { + } + else if (name[ip] == "columns") + { + } + else if (name[ip] == "index") + { + } + else if (name[ip] == "label") + { + els_beta[ib] = val[ip]; + } + else if (name[ip] == "angular_momentum") + { + pp.lll[ib] = atoi (val[ip].c_str ()); + } + else if (name[ip] == "cutoff_radius_index") + { + this->kbeta[ib] = atoi (val[ip].c_str ()); + } + else if (name[ip] == "cutoff_radius") + { + rcut[ib] = atof (val[ip].c_str ()); + } + else if (name[ip] == "ultrasoft_cutoff_radius") + { + rcutus[ib] = atof (val[ip].c_str ()); + } + else + { + std::string warningstr + = name[ip] + + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; + ModuleBase::WARNING ("PP_BETA reading", warningstr); + } + } + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.betar (ib, ir); + } + this->skip_number (ifs, this->mesh_changed); + word = ""; + ModuleBase::GlobalFunc::SCAN_END (ifs, word); } - this->skip_number(ifs, this->mesh_changed); - word = ""; - ModuleBase::GlobalFunc::SCAN_END(ifs, word); - } - //check the betar for non-normal number - pp.check_betar(); + // check the betar for non-normal number + pp.check_betar (); // Read the hamiltonian terms D_ij - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "'); - } - else - { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - } - this->nd = pp.nbeta * pp.nbeta; - for (int i = 0; i < pp.nbeta; i++) - { - for (int j = 0; j < pp.nbeta; j++) + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "> pp.dion(i, j); + ifs.ignore (150, '>'); } - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - - // Read the augmentation charge section need by uspp - if (pp.tvanp && ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "getnameval(ifs, nparameter, name, val); - // default value - pp.nqlc = 2 * pp.lmax + 1; - for (int ip = 0; ip < nparameter; ++ip) - { - if (name[ip] == "q_with_l") - { - if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") - { - q_with_l = true; - } - else - { - q_with_l = false; - } - } - else if (name[ip] == "nqf") - { - nqf = atoi(val[ip].c_str()); - } - else if (name[ip] == "nqlc") - { - pp.nqlc = atoi(val[ip].c_str()); - } - else - { - std::string warningstr - = name[ip] + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; - ModuleBase::WARNING("PP_AUGMENTATION reading", warningstr); - } - } - - // PP_Q - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "'); - } - else + else { - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); } - pp.qqq.create(pp.nbeta, pp.nbeta); - for (int i = 0; i < pp.nbeta; i++) + this->nd = pp.nbeta * pp.nbeta; + for (int i = 0; i < pp.nbeta; i++) { for (int j = 0; j < pp.nbeta; j++) - { - ifs >> pp.qqq(i, j); - } + { + ifs >> pp.dion (i, j); + } } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); - // Read polinomial coefficients for Q_ij expansion at small radius - this->rinner = std::vector(pp.nqlc, 0.0); - if (nqf <= 0) - { - this->qfcoef.create(1, 1, 1, 1); - } - else + // Read the augmentation charge section need by uspp + if (pp.tvanp && ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "qfcoef.create(pp.nbeta, pp.nbeta, pp.nqlc, nqf); - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "")) - { - for (int i = 0; i < pp.nbeta; i++) + this->getnameval (ifs, nparameter, name, val); + // default value + pp.nqlc = 2 * pp.lmax + 1; + for (int ip = 0; ip < nparameter; ++ip) { - for (int j = 0; j < pp.nbeta; j++) - { - for (int k = 0; k < pp.nqlc; k++) + if (name[ip] == "q_with_l") + { + if (val[ip] == "T" || val[ip] == "TRUE" || val[ip] == "True" || val[ip] == "true") + { + q_with_l = true; + } + else + { + q_with_l = false; + } + } + else if (name[ip] == "nqf") + { + nqf = atoi (val[ip].c_str ()); + } + else if (name[ip] == "nqlc") { - for (int l = 0; l < nqf; l++) - { - ifs >> qfcoef(i, j, k, l); - } + pp.nqlc = atoi (val[ip].c_str ()); + } + else + { + std::string warningstr + = name[ip] + + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; + ModuleBase::WARNING ("PP_AUGMENTATION reading", warningstr); } - } } - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "")) - { - for (int i = 0; i < pp.nqlc; i++) + + // PP_Q + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "> rinner[i]; + ifs.ignore (150, '>'); } - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } - - // Read augmentation charge Q_ij - if (q_with_l) - { - pp.qfuncl.create(2 * pp.lmax + 1, pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); - } - else - { - this->qfunc.create(pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); - } + else + { + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + } + pp.qqq.create (pp.nbeta, pp.nbeta); + for (int i = 0; i < pp.nbeta; i++) + { + for (int j = 0; j < pp.nbeta; j++) + { + ifs >> pp.qqq (i, j); + } + } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); - for (int nb = 0; nb < pp.nbeta; nb++) - { - int ln = pp.lll[nb]; - for (int mb = nb; mb < pp.nbeta; mb++) - { - int lm = pp.lll[mb]; - int nmb = mb * (mb + 1) / 2 + nb; - if (q_with_l) + // Read polinomial coefficients for Q_ij expansion at small radius + this->rinner = std::vector (pp.nqlc, 0.0); + if (nqf <= 0) + { + this->qfcoef.create (1, 1, 1, 1); + } + else { - for (int l = std::abs(ln - lm); l <= ln + lm; l += 2) - { - word = "'); - for (int ir = 0; ir < pp.mesh; ir++) + this->qfcoef.create (pp.nbeta, pp.nbeta, pp.nqlc, nqf); + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "")) + { + for (int i = 0; i < pp.nbeta; i++) + { + for (int j = 0; j < pp.nbeta; j++) + { + for (int k = 0; k < pp.nqlc; k++) + { + for (int l = 0; l < nqf; l++) + { + ifs >> qfcoef (i, j, k, l); + } + } + } + } + } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "")) { - ifs >> pp.qfuncl(l, nmb, ir); + for (int i = 0; i < pp.nqlc; i++) + { + ifs >> rinner[i]; + } } - this->skip_number(ifs, this->mesh_changed); - word = ""; - ModuleBase::GlobalFunc::SCAN_END(ifs, word); - } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + } + + // Read augmentation charge Q_ij + if (q_with_l) + { + pp.qfuncl.create (2 * pp.lmax + 1, pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); + } + else + { + this->qfunc.create (pp.nbeta * (pp.nbeta + 1) / 2, pp.mesh); } - else + + for (int nb = 0; nb < pp.nbeta; nb++) { - word = "'); - for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> this->qfunc(nmb, ir); - } - this->skip_number(ifs, this->mesh_changed); - word = ""; - ModuleBase::GlobalFunc::SCAN_END(ifs, word); + int ln = pp.lll[nb]; + for (int mb = nb; mb < pp.nbeta; mb++) + { + int lm = pp.lll[mb]; + int nmb = mb * (mb + 1) / 2 + nb; + if (q_with_l) + { + for (int l = std::abs (ln - lm); l <= ln + lm; l += 2) + { + word = "'); + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.qfuncl (l, nmb, ir); + } + this->skip_number (ifs, this->mesh_changed); + word = ""; + ModuleBase::GlobalFunc::SCAN_END (ifs, word); + } + } + else + { + word = "'); + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> this->qfunc (nmb, ir); + } + this->skip_number (ifs, this->mesh_changed); + word = ""; + ModuleBase::GlobalFunc::SCAN_END (ifs, word); + } + } } - } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); pp.kkbeta = 0; for (int nb = 0; nb < pp.nbeta; nb++) - { - pp.kkbeta = (this->kbeta[nb] > pp.kkbeta) ? this->kbeta[nb] : pp.kkbeta; - } + { + pp.kkbeta = (this->kbeta[nb] > pp.kkbeta) ? this->kbeta[nb] : pp.kkbeta; + } } -void Pseudopot_upf::read_pseudo_upf201_pswfc(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_upf201_pswfc (std::ifstream& ifs, Atom_pseudo& pp) { std::string word; const int max_n = 100; std::string name[max_n]; std::string val[max_n]; - int nparameter=0; - - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - pp.els = std::vector(pp.nchi, ""); - pp.lchi = std::vector(pp.nchi, 0); - this->nchi = std::vector(pp.nchi, 0); - pp.oc = std::vector(pp.nchi, 0.0); - this->epseu = std::vector(pp.nchi, 0.0); - this->rcut_chi = std::vector(pp.nchi, 0.0); - this->rcutus_chi = std::vector(pp.nchi, 0.0); - pp.chi.create(pp.nchi, pp.mesh); + int nparameter = 0; + + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + pp.els = std::vector (pp.nchi, ""); + pp.lchi = std::vector (pp.nchi, 0); + this->nchi = std::vector (pp.nchi, 0); + pp.oc = std::vector (pp.nchi, 0.0); + this->epseu = std::vector (pp.nchi, 0.0); + this->rcut_chi = std::vector (pp.nchi, 0.0); + this->rcutus_chi = std::vector (pp.nchi, 0.0); + pp.chi.create (pp.nchi, pp.mesh); for (int iw = 0; iw < pp.nchi; iw++) - { - // default value - pp.els[iw] = "Xn"; - nchi[iw] = -1; - epseu[iw] = 0.0; - rcut_chi[iw] = 0.0; - rcutus_chi[iw] = 0.0; - - word = "getnameval(ifs, nparameter, name, val); - for (int ip = 0; ip < nparameter; ++ip) - { - if (name[ip] == "type") - { - } - else if (name[ip] == "size") - { - } - else if (name[ip] == "columns") - { - } - else if (name[ip] == "index") - { - } - else if (name[ip] == "label") - { - pp.els[iw] = val[ip]; - } - else if (name[ip] == "l") - { - pp.lchi[iw] = atoi(val[ip].c_str()); - if (nchi[iw] == -1) - { - nchi[iw] = pp.lchi[iw] - 1; - } - } - else if (name[ip] == "occupation") - { - pp.oc[iw] = atof(val[ip].c_str()); - } - else if (name[ip] == "n") - { - nchi[iw] = atoi(val[ip].c_str()); - } - else if (name[ip] == "pseudo_energy") - { - epseu[iw] = atof(val[ip].c_str()); - } - else if (name[ip] == "cutoff_radius") - { - rcut_chi[iw] = atof(val[ip].c_str()); - } - else if (name[ip] == "ultrasoft_cutoff_radius") - { - rcutus_chi[iw] = atof(val[ip].c_str()); - } - else - { - std::string warningstr - = name[ip] + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; - ModuleBase::WARNING("PP_CHI reading", warningstr); - } - } - for (int ir = 0; ir < pp.mesh; ir++) - { - ifs >> pp.chi(iw, ir); - } - for (int ir = 0; ir < pp.mesh; ir++) { - assert(pp.chi.c[iw * pp.mesh + ir] == pp.chi(iw, ir)); + // default value + pp.els[iw] = "Xn"; + nchi[iw] = -1; + epseu[iw] = 0.0; + rcut_chi[iw] = 0.0; + rcutus_chi[iw] = 0.0; + + word = "getnameval (ifs, nparameter, name, val); + for (int ip = 0; ip < nparameter; ++ip) + { + if (name[ip] == "type") + { + } + else if (name[ip] == "size") + { + } + else if (name[ip] == "columns") + { + } + else if (name[ip] == "index") + { + } + else if (name[ip] == "label") + { + pp.els[iw] = val[ip]; + } + else if (name[ip] == "l") + { + pp.lchi[iw] = atoi (val[ip].c_str ()); + if (nchi[iw] == -1) + { + nchi[iw] = pp.lchi[iw] - 1; + } + } + else if (name[ip] == "occupation") + { + pp.oc[iw] = atof (val[ip].c_str ()); + } + else if (name[ip] == "n") + { + nchi[iw] = atoi (val[ip].c_str ()); + } + else if (name[ip] == "pseudo_energy") + { + epseu[iw] = atof (val[ip].c_str ()); + } + else if (name[ip] == "cutoff_radius") + { + rcut_chi[iw] = atof (val[ip].c_str ()); + } + else if (name[ip] == "ultrasoft_cutoff_radius") + { + rcutus_chi[iw] = atof (val[ip].c_str ()); + } + else + { + std::string warningstr + = name[ip] + + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; + ModuleBase::WARNING ("PP_CHI reading", warningstr); + } + } + for (int ir = 0; ir < pp.mesh; ir++) + { + ifs >> pp.chi (iw, ir); + } + for (int ir = 0; ir < pp.mesh; ir++) + { + assert (pp.chi.c[iw * pp.mesh + ir] == pp.chi (iw, ir)); + } + this->skip_number (ifs, this->mesh_changed); + word = ""; + ModuleBase::GlobalFunc::SCAN_END (ifs, word); } - this->skip_number(ifs, this->mesh_changed); - word = ""; - ModuleBase::GlobalFunc::SCAN_END(ifs, word); - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } /* @@ -840,88 +859,91 @@ void Pseudopot_upf::read_pseudo_upf201_fullwfc(std::ifstream& ifs) ModuleBase::GlobalFunc::SCAN_END(ifs, ""); }*/ -void Pseudopot_upf::read_pseudo_upf201_so(std::ifstream& ifs, Atom_pseudo& pp) +void + Pseudopot_upf::read_pseudo_upf201_so (std::ifstream& ifs, Atom_pseudo& pp) { std::string word; const int max_n = 100; std::string name[max_n]; std::string val[max_n]; - int nparameter=0; + int nparameter = 0; - ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, ""); - pp.jchi = std::vector(pp.nchi, 0.0); - pp.jjj = std::vector(pp.nbeta, 0.0); - pp.nn = std::vector(pp.nchi, 0); + ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, ""); + pp.jchi = std::vector (pp.nchi, 0.0); + pp.jjj = std::vector (pp.nbeta, 0.0); + pp.nn = std::vector (pp.nchi, 0); for (int nw = 0; nw < pp.nchi; nw++) - { - word = "getnameval(ifs, nparameter, name, val); - for (int ip = 0; ip < nparameter; ++ip) - { - if (name[ip] == "index") - { - } - else if (name[ip] == "els") - { - pp.els[nw] = val[ip]; - } - else if (name[ip] == "nn") - { - pp.nn[nw] = atoi(val[ip].c_str()); - } - else if (name[ip] == "lchi") - { - pp.lchi[nw] = atoi(val[ip].c_str()); - } - else if (name[ip] == "jchi") - { - pp.jchi[nw] = atof(val[ip].c_str()); - } - else if (name[ip] == "oc") - { - pp.oc[nw] = atof(val[ip].c_str()); - } - else - { - std::string warningstr - = name[ip] + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; - ModuleBase::WARNING("PP_RELWFC reading", warningstr); - } + { + word = "getnameval (ifs, nparameter, name, val); + for (int ip = 0; ip < nparameter; ++ip) + { + if (name[ip] == "index") + { + } + else if (name[ip] == "els") + { + pp.els[nw] = val[ip]; + } + else if (name[ip] == "nn") + { + pp.nn[nw] = atoi (val[ip].c_str ()); + } + else if (name[ip] == "lchi") + { + pp.lchi[nw] = atoi (val[ip].c_str ()); + } + else if (name[ip] == "jchi") + { + pp.jchi[nw] = atof (val[ip].c_str ()); + } + else if (name[ip] == "oc") + { + pp.oc[nw] = atof (val[ip].c_str ()); + } + else + { + std::string warningstr + = name[ip] + + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; + ModuleBase::WARNING ("PP_RELWFC reading", warningstr); + } + } + word = ""; + ModuleBase::GlobalFunc::SCAN_END (ifs, word); } - word = ""; - ModuleBase::GlobalFunc::SCAN_END(ifs, word); - } for (int nb = 0; nb < pp.nbeta; nb++) - { - word = "getnameval(ifs, nparameter, name, val); - for (int ip = 0; ip < nparameter; ++ip) - { - if (name[ip] == "index") - { - } - else if (name[ip] == "lll") - { - pp.lll[nb] = atoi(val[ip].c_str()); - } - else if (name[ip] == "jjj") - { - pp.jjj[nb] = atof(val[ip].c_str()); - } - else - { - std::string warningstr - = name[ip] + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; - ModuleBase::WARNING("PP_RELBETA reading", warningstr); - } + { + word = "getnameval (ifs, nparameter, name, val); + for (int ip = 0; ip < nparameter; ++ip) + { + if (name[ip] == "index") + { + } + else if (name[ip] == "lll") + { + pp.lll[nb] = atoi (val[ip].c_str ()); + } + else if (name[ip] == "jjj") + { + pp.jjj[nb] = atof (val[ip].c_str ()); + } + else + { + std::string warningstr + = name[ip] + + " is not read in. Please add this parameter in read_pp_upf201.cpp if needed."; + ModuleBase::WARNING ("PP_RELBETA reading", warningstr); + } + } + word = ""; + ModuleBase::GlobalFunc::SCAN_END (ifs, word); } - word = ""; - ModuleBase::GlobalFunc::SCAN_END(ifs, word); - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } diff --git a/source/source_cell/read_pp_vwr.cpp b/source/source_cell/read_pp_vwr.cpp index b6e0e7847ed..0b2608c0de2 100644 --- a/source/source_cell/read_pp_vwr.cpp +++ b/source/source_cell/read_pp_vwr.cpp @@ -2,379 +2,521 @@ //---------------------------------------------------------- // This code is used to read in vwr pseudopotential format, -// Now we only use LDA, so if PBE or other functionals are used, -// one needs to change the following code. The vwr format -// needs we to generate NL projectors by ourself. +// Now we only use LDA, so if PBE or other functionals are used, +// one needs to change the following code. The vwr format +// needs we to generate NL projectors by ourself. // One way to check this is correct -// is to use opium to generate .ncpp pseudopotential first, +// is to use opium to generate .ncpp pseudopotential first, // which contains the same informaiton in vwr, then we had // both UPF format from upftools in Quantum Espresso and // we can write a short code to transform ncpp to vwr. // Then compare the two results. // mohan 2013-05-25 //----------------------------------------------------------- -int Pseudopot_upf::read_pseudo_vwr(std::ifstream &ifs, Atom_pseudo& pp) +int + Pseudopot_upf::read_pseudo_vwr (std::ifstream& ifs, Atom_pseudo& pp) { - GlobalV::ofs_running << " -------------------------------------------------" << std::endl; - std::cout << " READ IN VWR TYPE PSEUDOPOTENTIALS." << std::endl; - GlobalV::ofs_running << " Read in vwr type pseudopotentials " << std::endl; + GlobalV::ofs_running << " -------------------------------------------------" << std::endl; + std::cout << " READ IN VWR TYPE PSEUDOPOTENTIALS." << std::endl; + GlobalV::ofs_running << " Read in vwr type pseudopotentials " << std::endl; - // -------------------------------------- - // (1) read in data - // -------------------------------------- - pp.xc_func="PZ"; - pp.pp_type="NC"; + // -------------------------------------- + // (1) read in data + // -------------------------------------- + pp.xc_func = "PZ"; + pp.pp_type = "NC"; pp.tvanp = false; // (1) read in mesh - std::string value; - size_t length=0; - ifs >> value; length = value.find(","); value.erase(length,1); - pp.mesh = std::atoi( value.c_str() ); - //the mesh should be odd, which is forced in Simpson integration - this->mesh_changed = false; - if(pp.mesh%2==0) - { - pp.mesh=pp.mesh-1; - this->mesh_changed = true; - GlobalV::ofs_running << " Mesh number - 1, we need odd number, \n this may affect some polar atomic orbitals." << std::endl; - } - GlobalV::ofs_running << std::setw(15) << "MESH" << std::setw(15) << pp.mesh << std::endl; - // (2) read in nlcc: nonlinear core correction - ifs >> value; length = value.find(","); value.erase(length,1); - pp.nlcc = std::atoi( value.c_str() ); - GlobalV::ofs_running << std::setw(15) << "NLCC" << std::setw(15) << pp.nlcc << std::endl; - // (3) iatom : index for atom - ifs >> value; length = value.find(","); value.erase(length,1); - pp.psd = value; - GlobalV::ofs_running << std::setw(15) << "ATOM" << std::setw(15) << pp.psd << std::endl; - // (4) valence electron number - ifs >> value; length = value.find(","); value.erase(length,1); - pp.zv = std::stod( value ); - GlobalV::ofs_running << std::setw(15) << "Z(VALENCE)" << std::setw(15) << pp.zv << std::endl; - // (5) spd_loc, which local pseudopotential should I choose - ifs >> value; length = value.find(","); value.erase(length,1); - spd_loc = std::atoi( value.c_str() ); - GlobalV::ofs_running << std::setw(15) << "LOC(spd)" << std::setw(15) << spd_loc << std::endl; - // (6) read in the occupations - std::vector tmp_oc(3, 0.0); - ifs >> value; length = value.find(","); value.erase(length,1); - tmp_oc[0]= std::atoi( value.c_str() ); - ifs >> value; length = value.find(","); value.erase(length,1); - tmp_oc[1]= std::atoi( value.c_str() ); - ifs >> value; length = value.find(","); value.erase(length,1); - tmp_oc[2]= std::atoi( value.c_str() ); - GlobalV::ofs_running << std::setw(15) << "OCCUPATION" << std::setw(15) << tmp_oc[0] - << std::setw(15) << tmp_oc[1] << std::setw(15) << tmp_oc[2] << std::endl; - // (7) spin orbital - ifs >> pp.has_so; + std::string value; + size_t length = 0; + ifs >> value; + length = value.find (","); + value.erase (length, 1); + pp.mesh = std::atoi (value.c_str ()); + // the mesh should be odd, which is forced in Simpson integration + this->mesh_changed = false; + if (pp.mesh % 2 == 0) + { + pp.mesh = pp.mesh - 1; + this->mesh_changed = true; + GlobalV::ofs_running + << " Mesh number - 1, we need odd number, \n this may affect some polar atomic orbitals." << std::endl; + } + GlobalV::ofs_running << std::setw (15) << "MESH" << std::setw (15) << pp.mesh << std::endl; + // (2) read in nlcc: nonlinear core correction + ifs >> value; + length = value.find (","); + value.erase (length, 1); + pp.nlcc = std::atoi (value.c_str ()); + GlobalV::ofs_running << std::setw (15) << "NLCC" << std::setw (15) << pp.nlcc << std::endl; + // (3) iatom : index for atom + ifs >> value; + length = value.find (","); + value.erase (length, 1); + pp.psd = value; + GlobalV::ofs_running << std::setw (15) << "ATOM" << std::setw (15) << pp.psd << std::endl; + // (4) valence electron number + ifs >> value; + length = value.find (","); + value.erase (length, 1); + pp.zv = std::stod (value); + GlobalV::ofs_running << std::setw (15) << "Z(VALENCE)" << std::setw (15) << pp.zv << std::endl; + // (5) spd_loc, which local pseudopotential should I choose + ifs >> value; + length = value.find (","); + value.erase (length, 1); + spd_loc = std::atoi (value.c_str ()); + GlobalV::ofs_running << std::setw (15) << "LOC(spd)" << std::setw (15) << spd_loc << std::endl; + // (6) read in the occupations + std::vector tmp_oc (3, 0.0); + ifs >> value; + length = value.find (","); + value.erase (length, 1); + tmp_oc[0] = std::atoi (value.c_str ()); + ifs >> value; + length = value.find (","); + value.erase (length, 1); + tmp_oc[1] = std::atoi (value.c_str ()); + ifs >> value; + length = value.find (","); + value.erase (length, 1); + tmp_oc[2] = std::atoi (value.c_str ()); + GlobalV::ofs_running << std::setw (15) << "OCCUPATION" << std::setw (15) << tmp_oc[0] << std::setw (15) << tmp_oc[1] + << std::setw (15) << tmp_oc[2] << std::endl; + // (7) spin orbital + ifs >> pp.has_so; + // label to count the projector or atomic wave functions + getline (ifs, value); + int iref_s, iref_p, iref_d; + ifs >> iref_s >> iref_p >> iref_d; + GlobalV::ofs_running << std::setw (15) << "Vnl_USED" << std::setw (15) << iref_s << std::setw (15) << iref_p + << std::setw (15) << iref_d << std::endl; + if (spd_loc == 1) + { + iref_s = 0; + } + else if (spd_loc == 2) + { + iref_p = 0; + } + else if (spd_loc == 3) + { + iref_d = 0; + } + ifs >> iTB_s >> iTB_p >> iTB_d; + GlobalV::ofs_running << std::setw (15) << "Orb_USED" << std::setw (15) << iTB_s << std::setw (15) << iTB_p + << std::setw (15) << iTB_d << std::endl; - // label to count the projector or atomic wave functions - getline(ifs,value); - int iref_s, iref_p, iref_d; - ifs >> iref_s >> iref_p >> iref_d; - GlobalV::ofs_running << std::setw(15) << "Vnl_USED" << std::setw(15) << iref_s - << std::setw(15) << iref_p << std::setw(15) << iref_d << std::endl; - if(spd_loc==1) { iref_s=0; - } else if(spd_loc==2) { iref_p=0; - } else if(spd_loc==3) { iref_d=0; -} - ifs >> iTB_s >> iTB_p >> iTB_d; - GlobalV::ofs_running << std::setw(15) << "Orb_USED" << std::setw(15) << iTB_s - << std::setw(15) << iTB_p << std::setw(15) << iTB_d << std::endl; - - - // calculate the number of wave functions - pp.nchi = 0; - if(iTB_s) { ++pp.nchi; -} - if(iTB_p) { ++pp.nchi; -} - if(iTB_d) { ++pp.nchi; -} - GlobalV::ofs_running << std::setw(15) << "NWFC" << std::setw(15) << pp.nchi << std::endl; - // allocate occupation number array for wave functions - pp.oc = std::vector(pp.nchi, 0.0); - pp.els = std::vector(pp.nchi, ""); - // set the value of occupations - pp.lchi = std::vector(pp.nchi, 0); - int iwfc=0; - if(iTB_s){pp.oc[iwfc]=tmp_oc[0];pp.lchi[iwfc]=0;pp.els[iwfc]="S";++iwfc;} - if(iTB_p){pp.oc[iwfc]=tmp_oc[1];pp.lchi[iwfc]=1;pp.els[iwfc]="P";++iwfc;} - if(iTB_d){pp.oc[iwfc]=tmp_oc[2];pp.lchi[iwfc]=2;pp.els[iwfc]="D";++iwfc;} - getline(ifs,value); - - - // global variables that will be used - // in other classes. - pp.r = std::vector(pp.mesh, 0.0); - pp.rab = std::vector(pp.mesh, 0.0); - pp.vloc_at = std::vector(pp.mesh, 0.0); - pp.rho_at = std::vector(pp.mesh, 0.0); - pp.rho_atc = std::vector(pp.mesh, 0.0); - // local variables in this function - std::vector vs = std::vector(pp.mesh, 0.0); // local pseudopotential for s, unit is Hartree - std::vector vp = std::vector(pp.mesh, 0.0); // local pseudopotential for p - std::vector vd = std::vector(pp.mesh, 0.0); // local pseudopotential for d - std::vector ws = std::vector(pp.mesh, 0.0); // wave function for s - std::vector wp = std::vector(pp.mesh, 0.0); // wave function for p - std::vector wd = std::vector(pp.mesh, 0.0); // wave function for d - std::string line; - if(spd_loc>0 && pp.nlcc==0) - { - for(int ir=0; ir> pp.r[ir] >> vs[ir] >> vp[ir] >> vd[ir] - >> ws[ir] >> wp[ir] >> wd[ir]; - getline(ifs, line); - } - } - else if(spd_loc==0 && pp.nlcc==0) - { - for(int ir=0; ir> pp.r[ir] >> vs[ir] >> vp[ir] >> vd[ir] - >> ws[ir] >> wp[ir] >> wd[ir] >> pp.vloc_at[ir]; - getline(ifs, line); - } - } - else if(spd_loc>0 && pp.nlcc==1) - { - for(int ir=0; ir> pp.r[ir] >> vs[ir] >> vp[ir] >> vd[ir] - >> ws[ir] >> wp[ir] >> wd[ir] >> pp.rho_atc[ir]; - getline(ifs, line); - } - } - else if(spd_loc==0 && pp.nlcc==1) - { - for(int ir=0; ir> pp.r[ir] >> vs[ir] >> vp[ir] >> vd[ir] - >> ws[ir] >> wp[ir] >> wd[ir] >> pp.vloc_at[ir] >> pp.rho_atc[ir]; - getline(ifs, line); - } - } - // Hartree to Rydberg - for(int ir=0; ir (pp.nchi, 0.0); + pp.els = std::vector (pp.nchi, ""); + // set the value of occupations + pp.lchi = std::vector (pp.nchi, 0); + int iwfc = 0; + if (iTB_s) + { + pp.oc[iwfc] = tmp_oc[0]; + pp.lchi[iwfc] = 0; + pp.els[iwfc] = "S"; + ++iwfc; + } + if (iTB_p) + { + pp.oc[iwfc] = tmp_oc[1]; + pp.lchi[iwfc] = 1; + pp.els[iwfc] = "P"; + ++iwfc; + } + if (iTB_d) + { + pp.oc[iwfc] = tmp_oc[2]; + pp.lchi[iwfc] = 2; + pp.els[iwfc] = "D"; + ++iwfc; + } + getline (ifs, value); + // global variables that will be used + // in other classes. + pp.r = std::vector (pp.mesh, 0.0); + pp.rab = std::vector (pp.mesh, 0.0); + pp.vloc_at = std::vector (pp.mesh, 0.0); + pp.rho_at = std::vector (pp.mesh, 0.0); + pp.rho_atc = std::vector (pp.mesh, 0.0); + // local variables in this function + std::vector vs = std::vector (pp.mesh, 0.0); // local pseudopotential for s, unit is Hartree + std::vector vp = std::vector (pp.mesh, 0.0); // local pseudopotential for p + std::vector vd = std::vector (pp.mesh, 0.0); // local pseudopotential for d + std::vector ws = std::vector (pp.mesh, 0.0); // wave function for s + std::vector wp = std::vector (pp.mesh, 0.0); // wave function for p + std::vector wd = std::vector (pp.mesh, 0.0); // wave function for d + std::string line; + if (spd_loc > 0 && pp.nlcc == 0) + { + for (int ir = 0; ir < pp.mesh; ++ir) + { + // it's an interesting question whether + // ws[ir] has 1/sqrt(4pi) + ifs >> pp.r[ir] >> vs[ir] >> vp[ir] >> vd[ir] >> ws[ir] >> wp[ir] >> wd[ir]; + getline (ifs, line); + } + } + else if (spd_loc == 0 && pp.nlcc == 0) + { + for (int ir = 0; ir < pp.mesh; ++ir) + { + ifs >> pp.r[ir] >> vs[ir] >> vp[ir] >> vd[ir] >> ws[ir] >> wp[ir] >> wd[ir] >> pp.vloc_at[ir]; + getline (ifs, line); + } + } + else if (spd_loc > 0 && pp.nlcc == 1) + { + for (int ir = 0; ir < pp.mesh; ++ir) + { + ifs >> pp.r[ir] >> vs[ir] >> vp[ir] >> vd[ir] >> ws[ir] >> wp[ir] >> wd[ir] >> pp.rho_atc[ir]; + getline (ifs, line); + } + } + else if (spd_loc == 0 && pp.nlcc == 1) + { + for (int ir = 0; ir < pp.mesh; ++ir) + { + ifs >> pp.r[ir] >> vs[ir] >> vp[ir] >> vd[ir] >> ws[ir] >> wp[ir] >> wd[ir] >> pp.vloc_at[ir] + >> pp.rho_atc[ir]; + getline (ifs, line); + } + } + // Hartree to Rydberg + for (int ir = 0; ir < pp.mesh; ++ir) + { + vs[ir] *= 2.0; + vp[ir] *= 2.0; + vd[ir] *= 2.0; + pp.vloc_at[ir] *= 2.0; + } - // because only the rank=0 procesor read the pseudopotential - // information, in order to make all the processors to stop - // the job, we need to return the error information first. - // we need to choose a threshold for the deviation of the - // norm of pseudo atomic orbitals, I set 0.2 - // mohan 2013-06-28 - if( std::abs(units-1.0) > 0.2 && (iTB_s==1 || iref_s==1)) {return 3;} - if( std::abs(unitp-1.0) > 0.2 && (iTB_p==1 || iref_p==1)) {return 3;} - if( std::abs(unitd-1.0) > 0.2 && (iTB_d==1 || iref_d==1)) {return 3;} + // -------------------------------------- + // (2) check unit + // -------------------------------------- + // calculate rab; + // rab may not be accurate enough + pp.rab[0] = pp.r[0]; + for (int ir = 1; ir < pp.mesh - 1; ++ir) + { + pp.rab[ir] = (pp.r[ir + 1] - pp.r[ir - 1]) / 2.0; + } + // check unit of vs, vp, vd + double units = 0.0; + double unitp = 0.0; + double unitd = 0.0; + for (int ir = 1; ir < pp.mesh - 1; ++ir) + { + double dr = (pp.r[ir + 1] - pp.r[ir - 1]) / 2.0; + units += ws[ir] * ws[ir] * pp.r[ir] * pp.r[ir] * dr; + unitp += wp[ir] * wp[ir] * pp.r[ir] * pp.r[ir] * dr; + unitd += wd[ir] * wd[ir] * pp.r[ir] * pp.r[ir] * dr; + } + GlobalV::ofs_running << std::setw (15) << "WFC_UNIT" << std::setw (15) << units << std::setw (15) << unitp + << std::setw (15) << unitd << std::endl; + // because only the rank=0 procesor read the pseudopotential + // information, in order to make all the processors to stop + // the job, we need to return the error information first. + // we need to choose a threshold for the deviation of the + // norm of pseudo atomic orbitals, I set 0.2 + // mohan 2013-06-28 + if (std::abs (units - 1.0) > 0.2 && (iTB_s == 1 || iref_s == 1)) + { + return 3; + } + if (std::abs (unitp - 1.0) > 0.2 && (iTB_p == 1 || iref_p == 1)) + { + return 3; + } + if (std::abs (unitd - 1.0) > 0.2 && (iTB_d == 1 || iref_d == 1)) + { + return 3; + } - // calculate the phi*r*sqrt(4pi) - pp.chi.create(pp.nchi,pp.mesh); - for(int ir=0; irnd = pp.nbeta; - GlobalV::ofs_running << std::setw(15) << "N-Dij" << std::setw(15) << nd << std::endl; - // calculate the angular momentum for each pp.betar - pp.lll = std::vector(pp.nbeta, 0); - int icount=0; - if(iref_s==1) {pp.lll[icount]=0; ++icount;}// s projector - if(iref_p==1) {pp.lll[icount]=1; ++icount;}// p projector - if(iref_d==1) {pp.lll[icount]=2; ++icount;}// p projector - for(int i=0; ind = pp.nbeta; + GlobalV::ofs_running << std::setw (15) << "N-Dij" << std::setw (15) << nd << std::endl; + // calculate the angular momentum for each pp.betar + pp.lll = std::vector (pp.nbeta, 0); + int icount = 0; + if (iref_s == 1) + { + pp.lll[icount] = 0; + ++icount; + } // s projector + if (iref_p == 1) + { + pp.lll[icount] = 1; + ++icount; + } // p projector + if (iref_d == 1) + { + pp.lll[icount] = 2; + ++icount; + } // p projector + for (int i = 0; i < pp.nbeta; ++i) + { + GlobalV::ofs_running << " lll[" << i << "]=" << pp.lll[i] << std::endl; + } // this->kbeta(pp.nbeta): number of mesh points for projector i (must be .le.mesh ) - this->kbeta = std::vector(pp.nbeta, 0); + this->kbeta = std::vector (pp.nbeta, 0); pp.kkbeta = 0; for (int ib = 0; ib < pp.nbeta; ++ib) - { - this->kbeta[ib] = pp.mesh; - pp.kkbeta = (this->kbeta[ib] > pp.kkbeta) ? this->kbeta[ib] : pp.kkbeta; - } + { + this->kbeta[ib] = pp.mesh; + pp.kkbeta = (this->kbeta[ib] > pp.kkbeta) ? this->kbeta[ib] : pp.kkbeta; + } // nonlocal projector - pp.betar.create(pp.nbeta,pp.mesh); - // coefficients - pp.dion.create(pp.nbeta,pp.nbeta); - + pp.betar.create (pp.nbeta, pp.mesh); + // coefficients + pp.dion.create (pp.nbeta, pp.nbeta); - // -------------------------------------- - // (6) generate nonlocal pseudopotentials - // -------------------------------------- - // tmp function to evaluate < pp.betar | delta_v | pp.betar> - std::vector func = std::vector(pp.mesh, 0.0); - // tmp value (vs, vp or vd) - std::vector vl = std::vector(pp.mesh, 0.0); - // tmp wave function (ws, wp or wd with r) - std::vector wlr = std::vector(pp.mesh, 0.0); - double rcut = 5.0/1.03; - GlobalV::ofs_running << std::setw(15) << "RCUT_NL" << std::setw(15) << rcut << std::endl; - for(int ib=0; ib integration must have 4pi, - // this 4pi is also needed in < phi | phi > = 1 integration. - // However, this phi has sqrt(sphi) already because I - // found < phi | phi > = 1 directly. - GlobalV::ofs_running << " Projector index = " << ib+1 << ", L = " << lnow << std::endl; - for(int ir=2; ir + std::vector func = std::vector (pp.mesh, 0.0); + // tmp value (vs, vp or vd) + std::vector vl = std::vector (pp.mesh, 0.0); + // tmp wave function (ws, wp or wd with r) + std::vector wlr = std::vector (pp.mesh, 0.0); + double rcut = 5.0 / 1.03; + GlobalV::ofs_running << std::setw (15) << "RCUT_NL" << std::setw (15) << rcut << std::endl; + for (int ib = 0; ib < pp.nbeta; ++ib) + { + double coef = 0.0; + const int lnow = pp.lll[ib]; + if (lnow == 0) + { + for (int ir = 0; ir < pp.mesh; ++ir) + { + vl[ir] = vs[ir]; + wlr[ir] = ws[ir] * pp.r[ir]; + } + } + else if (lnow == 1) + { + for (int ir = 0; ir < pp.mesh; ++ir) + { + vl[ir] = vp[ir]; + wlr[ir] = wp[ir] * pp.r[ir]; + } + } + else if (lnow == 2) + { + for (int ir = 0; ir < pp.mesh; ++ir) + { + vl[ir] = vd[ir]; + wlr[ir] = wd[ir] * pp.r[ir]; + } + } + // for non-local projectors + // note that < phi | dV | phi > integration must have 4pi, + // this 4pi is also needed in < phi | phi > = 1 integration. + // However, this phi has sqrt(sphi) already because I + // found < phi | phi > = 1 directly. + GlobalV::ofs_running << " Projector index = " << ib + 1 << ", L = " << lnow << std::endl; + for (int ir = 2; ir < pp.mesh - 1; ++ir) + { + // p nl + pp.betar (ib, ir) = (vl[ir] - pp.vloc_at[ir]) * wlr[ir]; + if (pp.r[ir] < rcut) + { + coef + = coef + + (vl[ir] - pp.vloc_at[ir]) * wlr[ir] * wlr[ir] * (pp.r[ir + 1] - pp.r[ir - 1]) / 2.0; + } + } + // In pw they did this: + // pp.dion(ib,ib)=1.0/coef; -// In pw they did this: -// pp.dion(ib,ib)=1.0/coef; + if (coef < 0.0) + { + pp.dion (ib, ib) = -1.0; + } + if (coef >= 0.0) + { + pp.dion (ib, ib) = 1.0; + } + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + // suppose wave function have sqrt(4pi) already + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + coef = 1.0 / sqrt (std::abs (coef)); + GlobalV::ofs_running << std::setw (25) << "1/sqrt()" << std::setw (15) << coef << std::endl; + for (int ir = 0; ir < pp.mesh; ++ir) + { + pp.betar (ib, ir) *= coef; + // --------- FOR TEST --------- + if (ib > 2) + { + // pp.betar(ib,ir) *= 0.0; // for test, disable Non-local + } + // --------- FOR TEST --------- + } + } - if(coef<0.0) { pp.dion(ib,ib) = -1.0; -} - if(coef>=0.0) { pp.dion(ib,ib) = 1.0; -} - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // suppose wave function have sqrt(4pi) already - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - coef=1.0/sqrt(std::abs(coef)); - GlobalV::ofs_running << std::setw(25) << "1/sqrt()" << std::setw(15) << coef << std::endl; - for(int ir=0; ir2) - { -// pp.betar(ib,ir) *= 0.0; // for test, disable Non-local - } - // --------- FOR TEST --------- - } - - } - - // print out the projector. - /* - GlobalV::ofs_running << " Nonlocal projector : " << std::endl; - for(int ir=0; ir diff = 0.0; - double norm = 0.0; - double tolerence_bohr = 1.0e-3; + ModuleBase::Vector3 diff = 0.0; + double norm = 0.0; + double tolerence_bohr = 1.0e-3; - for(int T1=0; T1< ntype; T1++) - { - for(int I1=0; I1< atoms[T1].na; I1++) - { - double shortest_norm = 10000.0; // a large number - for(int T2=0; T2 norm ) - { - shortest_norm = norm; - } - if( norm < tolerence_bohr ) // unit is Bohr - { - GlobalV::ofs_warning << " two atoms are too close!" << std::endl; - GlobalV::ofs_warning << " type:" << atoms[T1].label << " atom " << I1 + 1 << std::endl; - GlobalV::ofs_warning << " type:" << atoms[T2].label << " atom " << I2 + 1 << std::endl; - GlobalV::ofs_warning << " distance = " << norm << " Bohr" << std::endl; - ModuleBase::timer::end("UnitCell","check_tau"); - return false; - } - } - } - } - } - } - ModuleBase::timer::end("UnitCell","check_tau"); - return true; + for (int T1 = 0; T1 < ntype; T1++) + { + for (int I1 = 0; I1 < atoms[T1].na; I1++) + { + double shortest_norm = 10000.0; // a large number + for (int T2 = 0; T2 < ntype; T2++) + { + for (int I2 = 0; I2 < atoms[T2].na; I2++) + { + if (T1 == T2 && I1 == I2) + { + shortest_norm = 0.0; + } + else + { + diff = atoms[T1].tau[I1] - atoms[T2].tau[I2]; + norm = diff.norm () * lat0; + if (shortest_norm > norm) + { + shortest_norm = norm; + } + if (norm < tolerence_bohr) // unit is Bohr + { + GlobalV::ofs_warning << " two atoms are too close!" << std::endl; + GlobalV::ofs_warning << " type:" << atoms[T1].label << " atom " + << I1 + 1 << std::endl; + GlobalV::ofs_warning << " type:" << atoms[T2].label << " atom " + << I2 + 1 << std::endl; + GlobalV::ofs_warning << " distance = " << norm << " Bohr" + << std::endl; + ModuleBase::timer::end ("UnitCell", "check_tau"); + return false; + } + } + } + } + } + } + ModuleBase::timer::end ("UnitCell", "check_tau"); + return true; } - -void unitcell::check_dtau(Atom* atoms, - const int& ntype, - const double& lat0, - ModuleBase::Matrix3& latvec) +void + unitcell::check_dtau (Atom* atoms, const int& ntype, const double& lat0, ModuleBase::Matrix3& latvec) { - for(int it=0; ittaud[ia].x=fmod(atom1->taud[ia].x + 10000,1.0); - atom1->taud[ia].y=fmod(atom1->taud[ia].y + 10000,1.0); - atom1->taud[ia].z=fmod(atom1->taud[ia].z + 10000,1.0); - - double cx2=0.0; - double cy2=0.0; - double cz2=0.0; + for (int it = 0; it < ntype; it++) + { + Atom* atom1 = &atoms[it]; + for (int ia = 0; ia < atoms[it].na; ia++) + { + // mohan add 2011-04-07 + // fmod(x,1.0) set the result between the [0,1.0), + // while the x may be the negtivate value,thus we add 10000. + atom1->taud[ia].x = fmod (atom1->taud[ia].x + 10000, 1.0); + atom1->taud[ia].y = fmod (atom1->taud[ia].y + 10000, 1.0); + atom1->taud[ia].z = fmod (atom1->taud[ia].z + 10000, 1.0); - ModuleBase::Mathzone::Direct_to_Cartesian( - atom1->taud[ia].x, atom1->taud[ia].y, atom1->taud[ia].z, - latvec.e11, latvec.e12, latvec.e13, - latvec.e21, latvec.e22, latvec.e23, - latvec.e31, latvec.e32, latvec.e33, - cx2, cy2, cz2); + double cx2 = 0.0; + double cy2 = 0.0; + double cz2 = 0.0; - atom1->tau[ia].x = cx2; - atom1->tau[ia].y = cy2; - atom1->tau[ia].z = cz2; + ModuleBase::Mathzone::Direct_to_Cartesian (atom1->taud[ia].x, + atom1->taud[ia].y, + atom1->taud[ia].z, + latvec.e11, + latvec.e12, + latvec.e13, + latvec.e21, + latvec.e22, + latvec.e23, + latvec.e31, + latvec.e32, + latvec.e33, + cx2, + cy2, + cz2); - } - } - return; + atom1->tau[ia].x = cx2; + atom1->tau[ia].y = cy2; + atom1->tau[ia].z = cz2; + } + } + return; } diff --git a/source/source_cell/read_stru.h b/source/source_cell/read_stru.h index 402256a3dbf..cc9352ff2a7 100644 --- a/source/source_cell/read_stru.h +++ b/source/source_cell/read_stru.h @@ -5,29 +5,18 @@ #include "source_cell/unitcell.h" namespace unitcell { - bool check_tau(const Atom* atoms, - const int& ntype, - const double& lat0); - void check_dtau(Atom* atoms, - const int& ntype, - const double& lat0, - ModuleBase::Matrix3& latvec); - - // read in the atom information for each type of atom - bool read_atom_species(std::ifstream& ifa, - std::ofstream& ofs_running, - UnitCell& ucell); - - bool read_lattice_constant(std::ifstream& ifa, - std::ofstream& ofs_running, - Lattice& lat); - - // Read atomic positions - // return 1: no problem. - // return 0: some problems. - bool read_atom_positions(UnitCell& ucell, - std::ifstream &ifpos, - std::ofstream &ofs_running, - std::ofstream &ofs_warning); -} +bool check_tau (const Atom* atoms, const int& ntype, const double& lat0); +void check_dtau (Atom* atoms, const int& ntype, const double& lat0, ModuleBase::Matrix3& latvec); + +// read in the atom information for each type of atom +bool read_atom_species (std::ifstream& ifa, std::ofstream& ofs_running, UnitCell& ucell); + +bool read_lattice_constant (std::ifstream& ifa, std::ofstream& ofs_running, Lattice& lat); + +// Read atomic positions +// return 1: no problem. +// return 0: some problems. +bool + read_atom_positions (UnitCell& ucell, std::ifstream& ifpos, std::ofstream& ofs_running, std::ofstream& ofs_warning); +} // namespace unitcell #endif // READ_STRU_H \ No newline at end of file diff --git a/source/source_cell/sep.cpp b/source/source_cell/sep.cpp index bd55f3c06b7..d267e12870c 100644 --- a/source/source_cell/sep.cpp +++ b/source/source_cell/sep.cpp @@ -9,11 +9,9 @@ #include #include -SepPot::SepPot() -{ -} +SepPot::SepPot () {} -SepPot::~SepPot() +SepPot::~SepPot () { delete[] r; r = nullptr; @@ -21,60 +19,62 @@ SepPot::~SepPot() rv = nullptr; } -int SepPot::read_sep(std::ifstream& ifs) +int + SepPot::read_sep (std::ifstream& ifs) { std::string line; - while (std::getline(ifs, line)) - { - std::istringstream iss(line); - std::string key; - iss >> key; - - if (key == "Sep.Element") - { - iss >> label; - } - else if (key == "Sep.XcType") - { - iss >> xc_type; - } - else if (key == "Sep.Orbital") - { - iss >> orbital; - } - else if (key == "Sep.Points") - { - iss >> mesh; - delete[] r; - r = new double[mesh]; - delete[] rv; - rv = new double[mesh]; - } - else if (key == "Sep.StripAmount") - { - iss >> strip_elec; - } - else if (key == "> r_val >> rv_val) + std::istringstream iss (line); + std::string key; + iss >> key; + + if (key == "Sep.Element") + { + iss >> label; + } + else if (key == "Sep.XcType") + { + iss >> xc_type; + } + else if (key == "Sep.Orbital") + { + iss >> orbital; + } + else if (key == "Sep.Points") { - r[idx] = r_val; - rv[idx] = rv_val; - idx++; + iss >> mesh; + delete[] r; + r = new double[mesh]; + delete[] rv; + rv = new double[mesh]; + } + else if (key == "Sep.StripAmount") + { + iss >> strip_elec; + } + else if (key == "> r_val >> rv_val) + { + r[idx] = r_val; + rv[idx] = rv_val; + idx++; + } + } + break; } - } - break; } - } return 0; } -void SepPot::print_sep_info(std::ofstream& ofs) const +void + SepPot::print_sep_info (std::ofstream& ofs) const { ofs << "\n sep_vl:"; ofs << "\n sep_info:"; @@ -84,38 +84,40 @@ void SepPot::print_sep_info(std::ofstream& ofs) const ofs << "\n strip electron" << strip_elec; } -void SepPot::print_sep_vsep(std::ofstream& ofs) const +void + SepPot::print_sep_vsep (std::ofstream& ofs) const { ofs << "\n mesh " << mesh; - output::printr1_d(ofs, " r : ", r, mesh); - output::printr1_d(ofs, " vsep : ", rv, mesh); + output::printr1_d (ofs, " r : ", r, mesh); + output::printr1_d (ofs, " vsep : ", rv, mesh); ofs << "\n -----------------------------"; } #ifdef __MPI -void SepPot::bcast_sep() +void + SepPot::bcast_sep () { - ModuleBase::TITLE("SepPot", "bcast_sep"); - Parallel_Common::bcast_bool(is_enable); - Parallel_Common::bcast_double(r_in); - Parallel_Common::bcast_double(r_out); - Parallel_Common::bcast_double(r_power); - Parallel_Common::bcast_double(enhence_a); - Parallel_Common::bcast_string(label); - Parallel_Common::bcast_string(xc_type); - Parallel_Common::bcast_string(orbital); - Parallel_Common::bcast_int(strip_elec); - Parallel_Common::bcast_int(mesh); + ModuleBase::TITLE ("SepPot", "bcast_sep"); + Parallel_Common::bcast_bool (is_enable); + Parallel_Common::bcast_double (r_in); + Parallel_Common::bcast_double (r_out); + Parallel_Common::bcast_double (r_power); + Parallel_Common::bcast_double (enhence_a); + Parallel_Common::bcast_string (label); + Parallel_Common::bcast_string (xc_type); + Parallel_Common::bcast_string (orbital); + Parallel_Common::bcast_int (strip_elec); + Parallel_Common::bcast_int (mesh); if (GlobalV::MY_RANK != 0 && mesh > 0) - { - r = new double[mesh]; - rv = new double[mesh]; - } + { + r = new double[mesh]; + rv = new double[mesh]; + } - Parallel_Common::bcast_double(r, mesh); - Parallel_Common::bcast_double(rv, mesh); + Parallel_Common::bcast_double (r, mesh); + Parallel_Common::bcast_double (rv, mesh); return; } diff --git a/source/source_cell/sep.h b/source/source_cell/sep.h index ae6fb2679c7..32957c7096e 100644 --- a/source/source_cell/sep.h +++ b/source/source_cell/sep.h @@ -12,8 +12,8 @@ class SepPot { public: - SepPot(); - ~SepPot(); + SepPot (); + ~SepPot (); bool is_enable = false; double r_in = 0.0; /**< cut-off radius inner */ @@ -28,11 +28,11 @@ class SepPot double* r = nullptr; /**< ridial mesh */ double* rv = nullptr; /**< sep potential, but rV, unit: Ry */ - int read_sep(std::ifstream& is); - void print_sep_info(std::ofstream& ofs) const; - void print_sep_vsep(std::ofstream& ofs) const; + int read_sep (std::ifstream& is); + void print_sep_info (std::ofstream& ofs) const; + void print_sep_vsep (std::ofstream& ofs) const; #ifdef __MPI - void bcast_sep(); + void bcast_sep (); #endif /* ifdef __MPI */ }; diff --git a/source/source_cell/sep_cell.cpp b/source/source_cell/sep_cell.cpp index e7ec5f1bafa..e0e9eb5b2f6 100644 --- a/source/source_cell/sep_cell.cpp +++ b/source/source_cell/sep_cell.cpp @@ -14,21 +14,21 @@ // Sep_Cell sep_cell; // } -Sep_Cell::Sep_Cell() noexcept : ntype(0), omega(0.0), tpiba2(0.0) -{ -} +Sep_Cell::Sep_Cell () noexcept : ntype (0), omega (0.0), tpiba2 (0.0) {} -Sep_Cell::~Sep_Cell() noexcept = default; +Sep_Cell::~Sep_Cell () noexcept = default; -void Sep_Cell::init(const int ntype_in) +void + Sep_Cell::init (const int ntype_in) { this->ntype = ntype_in; - this->seps.resize(ntype); - this->sep_enable.resize(ntype); - std::fill(this->sep_enable.begin(), this->sep_enable.end(), false); + this->seps.resize (ntype); + this->sep_enable.resize (ntype); + std::fill (this->sep_enable.begin (), this->sep_enable.end (), false); } -void Sep_Cell::set_omega(const double omega_in, const double tpiba2_in) +void + Sep_Cell::set_omega (const double omega_in, const double tpiba2_in) { this->omega = omega_in; this->tpiba2 = tpiba2_in; @@ -45,83 +45,85 @@ void Sep_Cell::set_omega(const double omega_in, const double tpiba2_in) * Li 0 * F 1 F_pbe_50.sep 0.0 2.0 20.0 1.0 */ -int Sep_Cell::read_sep_potentials(std::ifstream& ifpos, - const std::string& pp_dir, - std::ofstream& ofs_running, - std::vector& ucell_atom_label) +int + Sep_Cell::read_sep_potentials (std::ifstream& ifpos, + const std::string& pp_dir, + std::ofstream& ofs_running, + std::vector& ucell_atom_label) { - ModuleBase::TITLE("Sep_Cell", "read_sep_potentials"); - - if (!ModuleBase::GlobalFunc::SCAN_BEGIN(ifpos, "SEP_FILES")) - { - GlobalV::ofs_running << "Cannot find SEP_FILES section in STRU" << std::endl; - return false; - } - - ifpos.ignore(300, '\n'); - - for (int i = 0; i < this->ntype; ++i) - { - std::string one_line, atom_label; - std::getline(ifpos, one_line); - std::stringstream ss(one_line); + ModuleBase::TITLE ("Sep_Cell", "read_sep_potentials"); - // read the label of the atom - bool enable_tmp; - ss >> atom_label >> enable_tmp; - - // Validate atom label - if (atom_label != ucell_atom_label[i]) + if (!ModuleBase::GlobalFunc::SCAN_BEGIN (ifpos, "SEP_FILES")) { - GlobalV::ofs_running << "Sep potential and atom order do not match. " - << "Expected: " << ucell_atom_label[i] << ", Got: " << atom_label << std::endl; + GlobalV::ofs_running << "Cannot find SEP_FILES section in STRU" << std::endl; return false; } - this->sep_enable[i] = enable_tmp; - if (this->sep_enable[i]) + + ifpos.ignore (300, '\n'); + + for (int i = 0; i < this->ntype; ++i) { - this->seps[i].is_enable = this->sep_enable[i]; - std::string sep_filename; - ss >> sep_filename; - ss >> this->seps[i].r_in >> this->seps[i].r_out >> this->seps[i].r_power >> this->seps[i].enhence_a; - std::string sep_addr = pp_dir + sep_filename; - std::ifstream sep_ifs(sep_addr.c_str(), std::ios::in); - if (!sep_ifs) - { - GlobalV::ofs_running << "Cannot find sep potential file: " << sep_addr << std::endl; - return false; - } - this->seps[i].read_sep(sep_ifs); + std::string one_line, atom_label; + std::getline (ifpos, one_line); + std::stringstream ss (one_line); + + // read the label of the atom + bool enable_tmp; + ss >> atom_label >> enable_tmp; + + // Validate atom label + if (atom_label != ucell_atom_label[i]) + { + GlobalV::ofs_running << "Sep potential and atom order do not match. " + << "Expected: " << ucell_atom_label[i] << ", Got: " << atom_label << std::endl; + return false; + } + this->sep_enable[i] = enable_tmp; + if (this->sep_enable[i]) + { + this->seps[i].is_enable = this->sep_enable[i]; + std::string sep_filename; + ss >> sep_filename; + ss >> this->seps[i].r_in >> this->seps[i].r_out >> this->seps[i].r_power >> this->seps[i].enhence_a; + std::string sep_addr = pp_dir + sep_filename; + std::ifstream sep_ifs (sep_addr.c_str (), std::ios::in); + if (!sep_ifs) + { + GlobalV::ofs_running << "Cannot find sep potential file: " << sep_addr << std::endl; + return false; + } + this->seps[i].read_sep (sep_ifs); + } } - } return true; } #ifdef __MPI -void Sep_Cell::bcast_sep_cell() +void + Sep_Cell::bcast_sep_cell () { - ModuleBase::TITLE("Sep_Cell", "bcast_sep_cell"); - Parallel_Common::bcast_int(this->ntype); + ModuleBase::TITLE ("Sep_Cell", "bcast_sep_cell"); + Parallel_Common::bcast_int (this->ntype); if (GlobalV::MY_RANK != 0) - { - this->seps.resize(this->ntype); - this->sep_enable.resize(this->ntype); - } - for (int i = 0; i < this->ntype; ++i) - { - bool tmp = false; - if (GlobalV::MY_RANK == 0) { - tmp = this->sep_enable[i]; + this->seps.resize (this->ntype); + this->sep_enable.resize (this->ntype); } - Parallel_Common::bcast_bool(tmp); - if (GlobalV::MY_RANK != 0) + for (int i = 0; i < this->ntype; ++i) { - this->sep_enable[i] = tmp; + bool tmp = false; + if (GlobalV::MY_RANK == 0) + { + tmp = this->sep_enable[i]; + } + Parallel_Common::bcast_bool (tmp); + if (GlobalV::MY_RANK != 0) + { + this->sep_enable[i] = tmp; + } + this->seps[i].bcast_sep (); } - this->seps[i].bcast_sep(); - } } #endif // __MPI diff --git a/source/source_cell/sep_cell.h b/source/source_cell/sep_cell.h index 03cddca2ea0..1a260a31ab7 100644 --- a/source/source_cell/sep_cell.h +++ b/source/source_cell/sep_cell.h @@ -12,46 +12,51 @@ class Sep_Cell { public: - Sep_Cell() noexcept; - ~Sep_Cell() noexcept; + Sep_Cell () noexcept; + ~Sep_Cell () noexcept; // Sets the number of atom types and initializes internal vectors - void init(const int ntype_in); + void init (const int ntype_in); - void set_omega(const double omega_in, const double tpiba2_in); + void set_omega (const double omega_in, const double tpiba2_in); // Reads self potentials from STRU file and xx.sep files // Returns true if successful, false otherwise - int read_sep_potentials(std::ifstream& ifpos, - const std::string& pp_dir, - std::ofstream& ofs_running, - std::vector& ucell_atom_label); + int read_sep_potentials (std::ifstream& ifpos, + const std::string& pp_dir, + std::ofstream& ofs_running, + std::vector& ucell_atom_label); #ifdef __MPI // Broadcasts the Sep_Cell object to all processes - void bcast_sep_cell(); + void bcast_sep_cell (); #endif // __MPI // Getter methods - const std::vector& get_seps() const + const std::vector& + get_seps () const { return seps; } - int get_ntype() const + int + get_ntype () const { return ntype; } - const std::vector& get_sep_enable() const + const std::vector& + get_sep_enable () const { return sep_enable; } - double get_omega() const + double + get_omega () const { return omega; } - double get_tpiba2() const + double + get_tpiba2 () const { return tpiba2; } diff --git a/source/source_cell/setup_nonlocal.cpp b/source/source_cell/setup_nonlocal.cpp index 96bf8988645..2a295222937 100644 --- a/source/source_cell/setup_nonlocal.cpp +++ b/source/source_cell/setup_nonlocal.cpp @@ -8,21 +8,22 @@ // mohan add 2013-08-02 // In order to get rid of the read in file .NONLOCAL. -InfoNonlocal::InfoNonlocal() +InfoNonlocal::InfoNonlocal () { this->Beta = new Numerical_Nonlocal[1]; this->nproj = nullptr; this->nprojmax = 0; this->rcutmax_Beta = 0.0; } -InfoNonlocal::~InfoNonlocal() +InfoNonlocal::~InfoNonlocal () { delete[] Beta; delete[] nproj; } #include "../source_base/complexmatrix.h" -void InfoNonlocal::Set_NonLocal(const int& it, +void + InfoNonlocal::Set_NonLocal (const int& it, Atom* atom, int& n_projectors, const int& kmesh, @@ -30,7 +31,7 @@ void InfoNonlocal::Set_NonLocal(const int& it, const double& dr_uniform, std::ofstream& log) { - ModuleBase::TITLE("InfoNonlocal", "Set_NonLocal"); + ModuleBase::TITLE ("InfoNonlocal", "Set_NonLocal"); // set a pointer // Atom* atom = &ucell.atoms[it]; @@ -43,121 +44,136 @@ void InfoNonlocal::Set_NonLocal(const int& it, // set the nonlocal projector objects Numerical_Nonlocal_Lm* tmpBeta_lm = new Numerical_Nonlocal_Lm[n_projectors]; - ModuleBase::ComplexMatrix coefficient_D_nc_in(nh * 2, nh * 2); // zhengdy-soc + ModuleBase::ComplexMatrix coefficient_D_nc_in (nh * 2, nh * 2); // zhengdy-soc int lmaxkb = -1; for (int ibeta = 0; ibeta < atom->ncpp.nbeta; ibeta++) - { - lmaxkb = std::max(lmaxkb, atom->ncpp.lll[ibeta]); - } + { + lmaxkb = std::max (lmaxkb, atom->ncpp.lll[ibeta]); + } Soc soc; if (atom->ncpp.has_so) - { - soc.rot_ylm(lmaxkb); - soc.fcoef.create(1, atom->ncpp.nh, atom->ncpp.nh); - } + { + soc.rot_ylm (lmaxkb); + soc.fcoef.create (1, atom->ncpp.nh, atom->ncpp.nh); + } int ip1 = 0; for (int p1 = 0; p1 < n_projectors; p1++) // nbeta - { - const int lnow = atom->ncpp.lll[p1]; - - const int l1 = atom->ncpp.lll[p1]; - const double j1 = atom->ncpp.jjj[p1]; - for (int m1 = 0; m1 < 2 * l1 + 1; m1++) { - int ip2 = 0; - for (int p2 = 0; p2 < n_projectors; p2++) - { - const int l2 = atom->ncpp.lll[p2]; - const double j2 = atom->ncpp.jjj[p2]; - for (int m2 = 0; m2 < 2 * l2 + 1; m2++) + const int lnow = atom->ncpp.lll[p1]; + + const int l1 = atom->ncpp.lll[p1]; + const double j1 = atom->ncpp.jjj[p1]; + for (int m1 = 0; m1 < 2 * l1 + 1; m1++) { - if (l1 == l2 && fabs(j1 - j2) < 1e-7) - { - for (int is1 = 0; is1 < 2; is1++) + int ip2 = 0; + for (int p2 = 0; p2 < n_projectors; p2++) { - for (int is2 = 0; is2 < 2; is2++) - { - if (atom->ncpp.has_so) - { - soc.set_fcoef(l1, l2, is1, is2, m1, m2, j1, j2, 0, ip1, ip2); - - coefficient_D_nc_in(ip1 + nh * is1, ip2 + nh * is2) - = atom->ncpp.dion(p1, p2) * soc.fcoef(0, is1, is2, ip1, ip2); - if (p1 != p2) - { - soc.fcoef(0, is1, is2, ip1, ip2) = std::complex(0.0, 0.0); - } - } - else + const int l2 = atom->ncpp.lll[p2]; + const double j2 = atom->ncpp.jjj[p2]; + for (int m2 = 0; m2 < 2 * l2 + 1; m2++) { - if (is1 == is2 && m1 == m2) - { - coefficient_D_nc_in(ip1 + nh * is1, ip2 + nh * is2) = atom->ncpp.dion(p1, p2); - } - } - } // end is2 - } // end is1 - } // end l1==l2 - ip2++; - } // end m2 - } // end p2 - assert(ip2 == nh); - ip1++; - } // end m1 - - // only keep the nonzero part. - int cut_mesh = atom->ncpp.mesh; - for (int ir = atom->ncpp.mesh - 1; ir >= 0; --ir) - { - if (std::abs(atom->ncpp.betar(p1, ir)) > 1.0e-10) - { - cut_mesh = ir; - break; - } - } - if (cut_mesh % 2 == 0) - { - ++cut_mesh; - } + if (l1 == l2 && fabs (j1 - j2) < 1e-7) + { + for (int is1 = 0; is1 < 2; is1++) + { + for (int is2 = 0; is2 < 2; is2++) + { + if (atom->ncpp.has_so) + { + soc.set_fcoef (l1, + l2, + is1, + is2, + m1, + m2, + j1, + j2, + 0, + ip1, + ip2); + + coefficient_D_nc_in (ip1 + nh * is1, ip2 + nh * is2) + = atom->ncpp.dion (p1, p2) + * soc.fcoef (0, is1, is2, ip1, ip2); + if (p1 != p2) + { + soc.fcoef (0, is1, is2, ip1, ip2) + = std::complex (0.0, 0.0); + } + } + else + { + if (is1 == is2 && m1 == m2) + { + coefficient_D_nc_in (ip1 + nh * is1, + ip2 + nh * is2) + = atom->ncpp.dion (p1, p2); + } + } + } // end is2 + } // end is1 + } // end l1==l2 + ip2++; + } // end m2 + } // end p2 + assert (ip2 == nh); + ip1++; + } // end m1 + + // only keep the nonzero part. + int cut_mesh = atom->ncpp.mesh; + for (int ir = atom->ncpp.mesh - 1; ir >= 0; --ir) + { + if (std::abs (atom->ncpp.betar (p1, ir)) > 1.0e-10) + { + cut_mesh = ir; + break; + } + } + if (cut_mesh % 2 == 0) + { + ++cut_mesh; + } - double* beta_r = new double[cut_mesh]; - ModuleBase::GlobalFunc::ZEROS(beta_r, cut_mesh); - for (int ir = 0; ir < cut_mesh; ++ir) - { - beta_r[ir] = atom->ncpp.betar(p1, ir); - } + double* beta_r = new double[cut_mesh]; + ModuleBase::GlobalFunc::ZEROS (beta_r, cut_mesh); + for (int ir = 0; ir < cut_mesh; ++ir) + { + beta_r[ir] = atom->ncpp.betar (p1, ir); + } - tmpBeta_lm[p1].set_NL_proj(atom->label, - it, // type - lnow, // angular momentum L - cut_mesh, // number of radial mesh - atom->ncpp.rab.data(), - atom->ncpp.r.data(), // radial mesh value (a.u.) - beta_r, - kmesh, - dk, - dr_uniform); // delta k mesh in reciprocal space - - if (PARAM.inp.out_element_info) { - tmpBeta_lm[p1].plot(GlobalV::MY_RANK); - } + tmpBeta_lm[p1].set_NL_proj (atom->label, + it, // type + lnow, // angular momentum L + cut_mesh, // number of radial mesh + atom->ncpp.rab.data (), + atom->ncpp.r.data (), // radial mesh value (a.u.) + beta_r, + kmesh, + dk, + dr_uniform); // delta k mesh in reciprocal space + + if (PARAM.inp.out_element_info) + { + tmpBeta_lm[p1].plot (GlobalV::MY_RANK); + } - delete[] beta_r; - } + delete[] beta_r; + } - assert(ip1 == nh); + assert (ip1 == nh); - this->Beta[it].set_type_info(it, - atom->label, - atom->ncpp.pp_type, - atom->ncpp.lmax, - n_projectors, - tmpBeta_lm); // zhengdy-soc 2018-09-10 + this->Beta[it].set_type_info (it, + atom->label, + atom->ncpp.pp_type, + atom->ncpp.lmax, + n_projectors, + tmpBeta_lm); // zhengdy-soc 2018-09-10 // mohan add 2021-05-07 - atom->ncpp.set_d_so(coefficient_D_nc_in, n_projectors, nh, atom->ncpp.has_so); + atom->ncpp.set_d_so (coefficient_D_nc_in, n_projectors, nh, atom->ncpp.has_so); delete[] tmpBeta_lm; @@ -165,7 +181,8 @@ void InfoNonlocal::Set_NonLocal(const int& it, return; } -void InfoNonlocal::Read_NonLocal(const int& it, +void + InfoNonlocal::Read_NonLocal (const int& it, Atom* atom, int& n_projectors, const int& my_rank, @@ -174,7 +191,7 @@ void InfoNonlocal::Read_NonLocal(const int& it, const double& dr_uniform, const std::string& nonlocalFile) { - ModuleBase::TITLE("InfoNonlocal", "Read_NonLocal"); + ModuleBase::TITLE ("InfoNonlocal", "Read_NonLocal"); std::ifstream ifs; @@ -182,25 +199,25 @@ void InfoNonlocal::Read_NonLocal(const int& it, // check if the non-local pseudopotential file exist. bool open = false; if (my_rank == 0) - { - ifs.open(nonlocalFile.c_str()); - if (ifs) { - open = true; + ifs.open (nonlocalFile.c_str ()); + if (ifs) + { + open = true; + } } - } #ifdef __MPI - Parallel_Common::bcast_bool(open); + Parallel_Common::bcast_bool (open); #endif if (!open) - { - std::cout << " Non-local File : " << nonlocalFile << std::endl; - ModuleBase::WARNING_QUIT("InfoNonlocal::Read_NonLocal", "Can not find the NONLOCAL file."); - } + { + std::cout << " Non-local File : " << nonlocalFile << std::endl; + ModuleBase::WARNING_QUIT ("InfoNonlocal::Read_NonLocal", "Can not find the NONLOCAL file."); + } else - { -// GlobalV::ofs_running << " Open nonlocal pseudopotential file: " << nonlocalFile << std::endl; - } + { + // GlobalV::ofs_running << " Open nonlocal pseudopotential file: " << nonlocalFile << std::endl; + } std::string label; std::string ps_type; @@ -209,192 +226,192 @@ void InfoNonlocal::Read_NonLocal(const int& it, int nlmax = 0; if (my_rank == 0) - { - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "
")) { - ModuleBase::GlobalFunc::READ_VALUE(ifs, label); - ModuleBase::GlobalFunc::READ_VALUE(ifs, ps_type); - if (ps_type != "NC") - { - ModuleBase::WARNING_QUIT("InfoNonlocal::Read_NonLocal", - "Only available for NC nonlocal pseudopotential"); - } - ModuleBase::GlobalFunc::READ_VALUE(ifs, nlmax); - // std::cout << " " << label << " " << ps_type << " " << nlmax << std::endl; - assert(nlmax >= -1); - ModuleBase::GlobalFunc::SCAN_END(ifs, "
"); + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "
")) + { + ModuleBase::GlobalFunc::READ_VALUE (ifs, label); + ModuleBase::GlobalFunc::READ_VALUE (ifs, ps_type); + if (ps_type != "NC") + { + ModuleBase::WARNING_QUIT ("InfoNonlocal::Read_NonLocal", + "Only available for NC nonlocal pseudopotential"); + } + ModuleBase::GlobalFunc::READ_VALUE (ifs, nlmax); + // std::cout << " " << label << " " << ps_type << " " << nlmax << std::endl; + assert (nlmax >= -1); + ModuleBase::GlobalFunc::SCAN_END (ifs, "
"); + } } - } #ifdef __MPI - Parallel_Common::bcast_string(label); - Parallel_Common::bcast_string(ps_type); - Parallel_Common::bcast_int(nlmax); + Parallel_Common::bcast_string (label); + Parallel_Common::bcast_string (ps_type); + Parallel_Common::bcast_int (nlmax); #endif // mohan add 2012-06-09 if (nlmax != -1) - { - bool find_lmax = false; - for (int ic = 0; ic < atom->ncpp.nbeta; ic++) { - if (nlmax == atom->ncpp.lll[ic]) - { - find_lmax = true; - break; - } - } + bool find_lmax = false; + for (int ic = 0; ic < atom->ncpp.nbeta; ic++) + { + if (nlmax == atom->ncpp.lll[ic]) + { + find_lmax = true; + break; + } + } - if (!find_lmax) - { - std::cout << " For element " << label << std::endl; - std::cout << " Max L Read in from NONLOCAL = " << nlmax << std::endl; - for (int ib = 0; ib < atom->ncpp.nbeta; ++ib) - { - std::cout << " Max L Read in from pseudopotential file = " << atom->ncpp.lll[ib] << std::endl; - } - ModuleBase::WARNING_QUIT("InfoNonlocal::Read_NonLocal", "nlmax != atom->lll"); + if (!find_lmax) + { + std::cout << " For element " << label << std::endl; + std::cout << " Max L Read in from NONLOCAL = " << nlmax << std::endl; + for (int ib = 0; ib < atom->ncpp.nbeta; ++ib) + { + std::cout << " Max L Read in from pseudopotential file = " << atom->ncpp.lll[ib] + << std::endl; + } + ModuleBase::WARNING_QUIT ("InfoNonlocal::Read_NonLocal", "nlmax != atom->lll"); + } } - } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "label", label); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nlmax", nlmax); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "label", label); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nlmax", nlmax); //------------------------------------------- // if each L has projectors more than once, // this needed to be modified. //------------------------------------------- int nproj_allowed = nlmax + 1; - ModuleBase::matrix coefficient_D_in(nproj_allowed, nproj_allowed); - ModuleBase::ComplexMatrix coefficient_D_nc_in(nproj_allowed * 2, nproj_allowed * 2); + ModuleBase::matrix coefficient_D_in (nproj_allowed, nproj_allowed); + ModuleBase::ComplexMatrix coefficient_D_nc_in (nproj_allowed * 2, nproj_allowed * 2); if (my_rank == 0) - { - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "")) { - //-------------------------------------- - // this parameter is very important!!! - //-------------------------------------- - ModuleBase::GlobalFunc::READ_VALUE(ifs, n_projectors); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "n_projectors", n_projectors); - - for (int p1 = 0; p1 < n_projectors; p1++) - { - for (int p2 = 0; p2 < n_projectors; p2++) + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "")) { - int L1_read, L2_read; + //-------------------------------------- + // this parameter is very important!!! + //-------------------------------------- + ModuleBase::GlobalFunc::READ_VALUE (ifs, n_projectors); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "n_projectors", n_projectors); - ifs >> L1_read >> L2_read; + for (int p1 = 0; p1 < n_projectors; p1++) + { + for (int p2 = 0; p2 < n_projectors; p2++) + { + int L1_read, L2_read; - assert(L1_read <= nlmax); - assert(L2_read <= nlmax); + ifs >> L1_read >> L2_read; - ifs >> coefficient_D_in(L1_read, L2_read); + assert (L1_read <= nlmax); + assert (L2_read <= nlmax); + ifs >> coefficient_D_in (L1_read, L2_read); + } + } + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); } - } - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); } - } #ifdef __MPI - Parallel_Common::bcast_int(n_projectors); // mohan add 2010-12-20 + Parallel_Common::bcast_int (n_projectors); // mohan add 2010-12-20 #endif Numerical_Nonlocal_Lm* tmpBeta_lm = new Numerical_Nonlocal_Lm[n_projectors]; int* LfromBeta = new int[n_projectors]; - ModuleBase::GlobalFunc::ZEROS(LfromBeta, n_projectors); + ModuleBase::GlobalFunc::ZEROS (LfromBeta, n_projectors); for (int p1 = 0; p1 < n_projectors; p1++) - { - int meshr_ps = 0; - if (my_rank == 0) { - if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "", false)) - { - int iproj = 0; - ModuleBase::GlobalFunc::READ_VALUE(ifs, iproj); - if (iproj != p1) + int meshr_ps = 0; + if (my_rank == 0) { - std::cout << " iproj=" << iproj << " p1=" << p1 << std::endl; - ModuleBase::WARNING_QUIT("InfoNonlocal::Read_NonLocal", "Check non-local projector index."); - } + if (ModuleBase::GlobalFunc::SCAN_BEGIN (ifs, "", false)) + { + int iproj = 0; + ModuleBase::GlobalFunc::READ_VALUE (ifs, iproj); + if (iproj != p1) + { + std::cout << " iproj=" << iproj << " p1=" << p1 << std::endl; + ModuleBase::WARNING_QUIT ("InfoNonlocal::Read_NonLocal", + "Check non-local projector index."); + } - ModuleBase::GlobalFunc::READ_VALUE(ifs, LfromBeta[p1]); - assert(LfromBeta[p1] >= 0); - assert(LfromBeta[p1] <= nlmax); + ModuleBase::GlobalFunc::READ_VALUE (ifs, LfromBeta[p1]); + assert (LfromBeta[p1] >= 0); + assert (LfromBeta[p1] <= nlmax); - ModuleBase::GlobalFunc::READ_VALUE(ifs, meshr_ps); - if (meshr_ps % 2 == 0) - { - std::cout << " meshr_ps = " << meshr_ps << std::endl; - ModuleBase::WARNING_QUIT("InfoNonlocal::Read_NonLocal", "meshr_ps must be odd!"); - } - } - else - { - ModuleBase::WARNING_QUIT("InfoNonlocal::Read_NonLocal", " doesn't match!"); - } - } // end my_rank==0 + ModuleBase::GlobalFunc::READ_VALUE (ifs, meshr_ps); + if (meshr_ps % 2 == 0) + { + std::cout << " meshr_ps = " << meshr_ps << std::endl; + ModuleBase::WARNING_QUIT ("InfoNonlocal::Read_NonLocal", "meshr_ps must be odd!"); + } + } + else + { + ModuleBase::WARNING_QUIT ("InfoNonlocal::Read_NonLocal", " doesn't match!"); + } + } // end my_rank==0 #ifdef __MPI - Parallel_Common::bcast_int(meshr_ps); - Parallel_Common::bcast_int(LfromBeta[p1]); + Parallel_Common::bcast_int (meshr_ps); + Parallel_Common::bcast_int (LfromBeta[p1]); #endif - double* radial_ps = new double[meshr_ps]; - double* rab_ps = new double[meshr_ps]; - double* beta_r = new double[meshr_ps]; - ModuleBase::GlobalFunc::ZEROS(radial_ps, meshr_ps); - ModuleBase::GlobalFunc::ZEROS(rab_ps, meshr_ps); - ModuleBase::GlobalFunc::ZEROS(beta_r, meshr_ps); + double* radial_ps = new double[meshr_ps]; + double* rab_ps = new double[meshr_ps]; + double* beta_r = new double[meshr_ps]; + ModuleBase::GlobalFunc::ZEROS (radial_ps, meshr_ps); + ModuleBase::GlobalFunc::ZEROS (rab_ps, meshr_ps); + ModuleBase::GlobalFunc::ZEROS (beta_r, meshr_ps); - if (my_rank == 0) - { - for (int ir = 0; ir < meshr_ps; ir++) - { - ifs >> radial_ps[ir]; - ifs >> beta_r[ir]; - ifs >> rab_ps[ir]; - } - } + if (my_rank == 0) + { + for (int ir = 0; ir < meshr_ps; ir++) + { + ifs >> radial_ps[ir]; + ifs >> beta_r[ir]; + ifs >> rab_ps[ir]; + } + } #ifdef __MPI - Parallel_Common::bcast_double(radial_ps, meshr_ps); - Parallel_Common::bcast_double(beta_r, meshr_ps); - Parallel_Common::bcast_double(rab_ps, meshr_ps); + Parallel_Common::bcast_double (radial_ps, meshr_ps); + Parallel_Common::bcast_double (beta_r, meshr_ps); + Parallel_Common::bcast_double (rab_ps, meshr_ps); #endif + tmpBeta_lm[p1].set_NL_proj (label, + it, // type + LfromBeta[p1], // angular momentum L + meshr_ps, // number of radial mesh + rab_ps, + radial_ps, // radial mesh value(a.u.) + beta_r, + kmesh, + dk, + dr_uniform); // delta k mesh in reciprocal space + + if (PARAM.inp.out_element_info) + { + tmpBeta_lm[p1].plot (my_rank); + } - tmpBeta_lm[p1].set_NL_proj(label, - it, // type - LfromBeta[p1], // angular momentum L - meshr_ps, // number of radial mesh - rab_ps, - radial_ps, // radial mesh value(a.u.) - beta_r, - kmesh, - dk, - dr_uniform); // delta k mesh in reciprocal space - - if (PARAM.inp.out_element_info) - { - tmpBeta_lm[p1].plot(my_rank); - } - - delete[] radial_ps; - delete[] rab_ps; - delete[] beta_r; + delete[] radial_ps; + delete[] rab_ps; + delete[] beta_r; - if (my_rank == 0) - { - ModuleBase::GlobalFunc::SCAN_END(ifs, ""); - } - } // end projectors. + if (my_rank == 0) + { + ModuleBase::GlobalFunc::SCAN_END (ifs, ""); + } + } // end projectors. - this->Beta[it].set_type_info(it, label, ps_type, nlmax, n_projectors, tmpBeta_lm); + this->Beta[it].set_type_info (it, label, ps_type, nlmax, n_projectors, tmpBeta_lm); - ifs.close(); + ifs.close (); delete[] LfromBeta; delete[] tmpBeta_lm; @@ -402,7 +419,8 @@ void InfoNonlocal::Read_NonLocal(const int& it, return; } -void InfoNonlocal::setupNonlocal(const int& ntype, Atom* atoms, std::ofstream& log, LCAO_Orbitals& orb) +void + InfoNonlocal::setupNonlocal (const int& ntype, Atom* atoms, std::ofstream& log, LCAO_Orbitals& orb) { //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> //~~~~~~~~~~~~~~~~~~~~~~ 2 ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -413,46 +431,51 @@ void InfoNonlocal::setupNonlocal(const int& ntype, Atom* atoms, std::ofstream& l // mohan note 2011-03-04 //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> if (PARAM.inp.basis_type == "lcao" || PARAM.inp.basis_type == "lcao_in_pw") - { - delete[] this->Beta; - this->Beta = new Numerical_Nonlocal[ntype]; + { + delete[] this->Beta; + this->Beta = new Numerical_Nonlocal[ntype]; - delete[] this->nproj; - this->nproj = new int[ntype]; - ModuleBase::GlobalFunc::ZEROS(this->nproj, ntype); + delete[] this->nproj; + this->nproj = new int[ntype]; + ModuleBase::GlobalFunc::ZEROS (this->nproj, ntype); - this->nprojmax = 0; + this->nprojmax = 0; - // if true: read in the nonlocal file from file. - // if false: get nonlocal information from .upf or .vwr directly - bool readin_nonlocal = false; + // if true: read in the nonlocal file from file. + // if false: get nonlocal information from .upf or .vwr directly + bool readin_nonlocal = false; - for (int it = 0; it < ntype; it++) - { - Atom* atom = &atoms[it]; - if (readin_nonlocal) - { - this->Read_NonLocal(it, - atom, - this->nproj[it], - GlobalV::MY_RANK, - orb.get_kmesh(), - orb.get_dk(), - orb.get_dr_uniform(), - orb.orbital_file[it]); - } - else - { - this->Set_NonLocal(it, atom, this->nproj[it], orb.get_kmesh(), orb.get_dk(), orb.get_dr_uniform(), log); - } - this->nprojmax = std::max(this->nprojmax, this->nproj[it]); - // caoyu add 2021-05-24 to reconstruct atom_arrange::set_sr_NL - this->rcutmax_Beta = std::max(this->rcutmax_Beta, this->Beta[it].get_rcut_max()); - } + for (int it = 0; it < ntype; it++) + { + Atom* atom = &atoms[it]; + if (readin_nonlocal) + { + this->Read_NonLocal (it, + atom, + this->nproj[it], + GlobalV::MY_RANK, + orb.get_kmesh (), + orb.get_dk (), + orb.get_dr_uniform (), + orb.orbital_file[it]); + } + else + { + this->Set_NonLocal (it, + atom, + this->nproj[it], + orb.get_kmesh (), + orb.get_dk (), + orb.get_dr_uniform (), + log); + } + this->nprojmax = std::max (this->nprojmax, this->nproj[it]); + // caoyu add 2021-05-24 to reconstruct atom_arrange::set_sr_NL + this->rcutmax_Beta = std::max (this->rcutmax_Beta, this->Beta[it].get_rcut_max ()); + } - ModuleBase::GlobalFunc::OUT(log, "Max number of nonlocal projectors (all elements)", this->nprojmax); - - } + ModuleBase::GlobalFunc::OUT (log, "Max number of nonlocal projectors (all elements)", this->nprojmax); + } return; } diff --git a/source/source_cell/setup_nonlocal.h b/source/source_cell/setup_nonlocal.h index b9a3ad9ea4b..390d9ca5f7d 100644 --- a/source/source_cell/setup_nonlocal.h +++ b/source/source_cell/setup_nonlocal.h @@ -6,43 +6,40 @@ #include "../source_basis/module_ao/ORB_read.h" class InfoNonlocal { - public: - InfoNonlocal(); - ~InfoNonlocal(); - /// - ///NON-LOCAL part for LCAO - /// - Numerical_Nonlocal* Beta = nullptr;/// nonlocal projectors (1-dimension array) - int * nproj = nullptr; //mohan add 2010-12-19 - int nprojmax; // mohan add 2010-03-07 - double rcutmax_Beta; //caoyu add 2021-05-24 - const double& get_rcutmax_Beta(void) const { return rcutmax_Beta; } - /// in order to get rid of the .NONLOCAL file. - void Set_NonLocal( - const int &it, - Atom* atom, - int &n_projectors, - const int& kmesh, - const double& dk, - const double& dr_uniform, - std::ofstream &log); - /// read in the NONLOCAL projector from file. - void Read_NonLocal( - const int &it, - Atom* atom, - int &n_projectors, - const int &my_rank, - const int& kmesh, - const double& dk, - const double& dr_uniform, - const std::string& nonlocalFile); - //workflow to setup nonlocal part for LCAO - void setupNonlocal( - const int& ntype, - Atom* atoms, - std::ofstream &log, - LCAO_Orbitals &orb - ); + public: + InfoNonlocal (); + ~InfoNonlocal (); + /// + /// NON-LOCAL part for LCAO + /// + Numerical_Nonlocal* Beta = nullptr; /// nonlocal projectors (1-dimension array) + int* nproj = nullptr; // mohan add 2010-12-19 + int nprojmax; // mohan add 2010-03-07 + double rcutmax_Beta; // caoyu add 2021-05-24 + const double& + get_rcutmax_Beta () const + { + return rcutmax_Beta; + } + /// in order to get rid of the .NONLOCAL file. + void Set_NonLocal (const int& it, + Atom* atom, + int& n_projectors, + const int& kmesh, + const double& dk, + const double& dr_uniform, + std::ofstream& log); + /// read in the NONLOCAL projector from file. + void Read_NonLocal (const int& it, + Atom* atom, + int& n_projectors, + const int& my_rank, + const int& kmesh, + const double& dk, + const double& dr_uniform, + const std::string& nonlocalFile); + // workflow to setup nonlocal part for LCAO + void setupNonlocal (const int& ntype, Atom* atoms, std::ofstream& log, LCAO_Orbitals& orb); }; #endif \ No newline at end of file diff --git a/source/source_cell/test/atom_pseudo_test.cpp b/source/source_cell/test/atom_pseudo_test.cpp index 502c54ac2b9..b1385da06ae 100644 --- a/source/source_cell/test/atom_pseudo_test.cpp +++ b/source/source_cell/test/atom_pseudo_test.cpp @@ -3,7 +3,7 @@ #define private public #include "source_io/module_parameter/parameter.h" #undef private -#include +#include #ifdef __MPI #include "mpi.h" #endif @@ -31,74 +31,76 @@ #undef private class AtomPseudoTest : public testing::Test { -protected: - std::unique_ptr upf{new Pseudopot_upf}; - std::unique_ptr atom_pseudo{new Atom_pseudo}; + protected: + std::unique_ptr upf{new Pseudopot_upf}; + std::unique_ptr atom_pseudo{new Atom_pseudo}; }; -TEST_F(AtomPseudoTest, SetDSo) +TEST_F (AtomPseudoTest, SetDSo) { #ifdef __MPI - if(GlobalV::MY_RANK==0) - { + if (GlobalV::MY_RANK == 0) + { #endif - std::ifstream ifs; - ifs.open("./support/C.upf"); - PARAM.input.pseudo_rcut = 15.0; - upf->read_pseudo_upf201(ifs, *atom_pseudo); - upf->complete_default(*atom_pseudo); - ifs.close(); - EXPECT_EQ(atom_pseudo->nh,14); - EXPECT_TRUE(atom_pseudo->has_so); - ModuleBase::ComplexMatrix d_so_in(atom_pseudo->nh*2,atom_pseudo->nh*2); - int nproj = 6; - int nproj_soc = 4; - bool has_so = true; - PARAM.input.nspin = 4; - atom_pseudo->set_d_so(d_so_in,nproj,nproj_soc,has_so); - EXPECT_NEAR(atom_pseudo->d_so(0,0,0).real(),1e-8,1e-7); - EXPECT_NEAR(atom_pseudo->d_so(0,0,0).imag(),1e-8,1e-7); - PARAM.input.lspinorb = true; - atom_pseudo->set_d_so(d_so_in,nproj,nproj_soc,has_so); - EXPECT_NEAR(atom_pseudo->d_so(0,0,0).real(),1e-8,1e-7); - EXPECT_NEAR(atom_pseudo->d_so(0,0,0).imag(),1e-8,1e-7); + std::ifstream ifs; + ifs.open ("./support/C.upf"); + PARAM.input.pseudo_rcut = 15.0; + upf->read_pseudo_upf201 (ifs, *atom_pseudo); + upf->complete_default (*atom_pseudo); + ifs.close (); + EXPECT_EQ (atom_pseudo->nh, 14); + EXPECT_TRUE (atom_pseudo->has_so); + ModuleBase::ComplexMatrix d_so_in (atom_pseudo->nh * 2, atom_pseudo->nh * 2); + int nproj = 6; + int nproj_soc = 4; + bool has_so = true; + PARAM.input.nspin = 4; + atom_pseudo->set_d_so (d_so_in, nproj, nproj_soc, has_so); + EXPECT_NEAR (atom_pseudo->d_so (0, 0, 0).real (), 1e-8, 1e-7); + EXPECT_NEAR (atom_pseudo->d_so (0, 0, 0).imag (), 1e-8, 1e-7); + PARAM.input.lspinorb = true; + atom_pseudo->set_d_so (d_so_in, nproj, nproj_soc, has_so); + EXPECT_NEAR (atom_pseudo->d_so (0, 0, 0).real (), 1e-8, 1e-7); + EXPECT_NEAR (atom_pseudo->d_so (0, 0, 0).imag (), 1e-8, 1e-7); #ifdef __MPI - } + } #endif } #ifdef __MPI -TEST_F(AtomPseudoTest, BcastAtomPseudo) +TEST_F (AtomPseudoTest, BcastAtomPseudo) { - if(GlobalV::MY_RANK==0) - { - std::ifstream ifs; - ifs.open("./support/C.upf"); - PARAM.input.pseudo_rcut = 15.0; - upf->read_pseudo_upf201(ifs, *atom_pseudo); - upf->complete_default(*atom_pseudo);; - ifs.close(); - } - atom_pseudo->bcast_atom_pseudo(); - if(GlobalV::MY_RANK!=0) - { - EXPECT_EQ(atom_pseudo->nbeta,6); - EXPECT_EQ(atom_pseudo->nchi,3); - EXPECT_DOUBLE_EQ(atom_pseudo->rho_atc[0],8.7234550809E-01); - } + if (GlobalV::MY_RANK == 0) + { + std::ifstream ifs; + ifs.open ("./support/C.upf"); + PARAM.input.pseudo_rcut = 15.0; + upf->read_pseudo_upf201 (ifs, *atom_pseudo); + upf->complete_default (*atom_pseudo); + ; + ifs.close (); + } + atom_pseudo->bcast_atom_pseudo (); + if (GlobalV::MY_RANK != 0) + { + EXPECT_EQ (atom_pseudo->nbeta, 6); + EXPECT_EQ (atom_pseudo->nchi, 3); + EXPECT_DOUBLE_EQ (atom_pseudo->rho_atc[0], 8.7234550809E-01); + } } -int main(int argc, char **argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); - int result = RUN_ALL_TESTS(); - - MPI_Finalize(); - - return result; + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); + int result = RUN_ALL_TESTS (); + + MPI_Finalize (); + + return result; } #endif diff --git a/source/source_cell/test/atom_spec_test.cpp b/source/source_cell/test/atom_spec_test.cpp index 4fc633dc760..877c1b22b6d 100644 --- a/source/source_cell/test/atom_spec_test.cpp +++ b/source/source_cell/test/atom_spec_test.cpp @@ -3,7 +3,7 @@ #define private public #include "source_io/module_parameter/parameter.h" #undef private -#include +#include #ifdef __MPI #include "mpi.h" #endif @@ -36,180 +36,180 @@ #undef private class AtomSpecTest : public testing::Test { -protected: - Atom atom; - Pseudopot_upf upf; - std::ofstream ofs; - std::ifstream ifs; + protected: + Atom atom; + Pseudopot_upf upf; + std::ofstream ofs; + std::ifstream ifs; }; -TEST_F(AtomSpecTest, PrintAtom) +TEST_F (AtomSpecTest, PrintAtom) { #ifdef __MPI - if(GlobalV::MY_RANK==0) - { + if (GlobalV::MY_RANK == 0) + { #endif - ofs.open("tmp_atom_info"); - atom.label = "C"; - atom.type = 1; - atom.na = 2; - atom.nwl = 2; - atom.Rcut = 1.1; - atom.nw = 14; - atom.stapos_wf = 0; - atom.mass = 12.0; - atom.tau.resize(atom.na); - atom.tau[0].x = 0.2; - atom.tau[0].y = 0.2; - atom.tau[0].z = 0.2; - atom.tau[1].x = 0.4; - atom.tau[1].y = 0.4; - atom.tau[1].z = 0.4; - atom.print_Atom(ofs); - ofs.close(); - ifs.open("tmp_atom_info"); - std::string str((std::istreambuf_iterator(ifs)),std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("label = C")); - EXPECT_THAT(str, testing::HasSubstr("type = 1")); - EXPECT_THAT(str, testing::HasSubstr("na = 2")); - EXPECT_THAT(str, testing::HasSubstr("nwl = 2")); - EXPECT_THAT(str, testing::HasSubstr("Rcut = 1.1")); - EXPECT_THAT(str, testing::HasSubstr("nw = 14")); - EXPECT_THAT(str, testing::HasSubstr("stapos_wf = 0")); - EXPECT_THAT(str, testing::HasSubstr("mass = 12")); - EXPECT_THAT(str, testing::HasSubstr("atom_position(cartesian) Dimension = 2")); - ifs.close(); - remove("tmp_atom_info"); + ofs.open ("tmp_atom_info"); + atom.label = "C"; + atom.type = 1; + atom.na = 2; + atom.nwl = 2; + atom.Rcut = 1.1; + atom.nw = 14; + atom.stapos_wf = 0; + atom.mass = 12.0; + atom.tau.resize (atom.na); + atom.tau[0].x = 0.2; + atom.tau[0].y = 0.2; + atom.tau[0].z = 0.2; + atom.tau[1].x = 0.4; + atom.tau[1].y = 0.4; + atom.tau[1].z = 0.4; + atom.print_Atom (ofs); + ofs.close (); + ifs.open ("tmp_atom_info"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("label = C")); + EXPECT_THAT (str, testing::HasSubstr ("type = 1")); + EXPECT_THAT (str, testing::HasSubstr ("na = 2")); + EXPECT_THAT (str, testing::HasSubstr ("nwl = 2")); + EXPECT_THAT (str, testing::HasSubstr ("Rcut = 1.1")); + EXPECT_THAT (str, testing::HasSubstr ("nw = 14")); + EXPECT_THAT (str, testing::HasSubstr ("stapos_wf = 0")); + EXPECT_THAT (str, testing::HasSubstr ("mass = 12")); + EXPECT_THAT (str, testing::HasSubstr ("atom_position(cartesian) Dimension = 2")); + ifs.close (); + remove ("tmp_atom_info"); #ifdef __MPI - } + } #endif } -TEST_F(AtomSpecTest, SetIndex) +TEST_F (AtomSpecTest, SetIndex) { #ifdef __MPI - if(GlobalV::MY_RANK==0) - { + if (GlobalV::MY_RANK == 0) + { #endif - atom.nw = 0; - atom.nwl = 1; - atom.l_nchi.resize(atom.nwl+1); - atom.l_nchi[0] = 2; // l:0, N:2 (arbitrary) - atom.nw += 1*atom.l_nchi[0]; // m = 2*0+1 = 1 - atom.l_nchi[1] = 4; // l:1, N:4 (arbitrary) - atom.nw += 3*atom.l_nchi[1]; // m = 2*1+1 = 3 - atom.set_index(); - EXPECT_EQ(atom.iw2l[13],1); - EXPECT_EQ(atom.iw2n[13],3); - EXPECT_EQ(atom.iw2m[13],2); - EXPECT_EQ(atom.iw2_ylm[13],3); - EXPECT_TRUE(atom.iw2_new[11]); - // here is the table: - // nw = 2 + 3*4 = 14 - // L N m L*L+m - // 0 0 0 0 0 - // 1 0 1 0 0 - // 2 1 0 0 1 - // 3 1 0 1 2 - // 4 1 0 2 3 - // 5 1 1 0 1 - // 6 1 1 1 2 - // 7 1 1 2 3 - // 8 1 2 0 1 - // 9 1 2 1 2 - // 10 1 2 2 3 - // 11 1 3 0 1 - // 12 1 3 1 2 - // 13 1 3 2 3 + atom.nw = 0; + atom.nwl = 1; + atom.l_nchi.resize (atom.nwl + 1); + atom.l_nchi[0] = 2; // l:0, N:2 (arbitrary) + atom.nw += 1 * atom.l_nchi[0]; // m = 2*0+1 = 1 + atom.l_nchi[1] = 4; // l:1, N:4 (arbitrary) + atom.nw += 3 * atom.l_nchi[1]; // m = 2*1+1 = 3 + atom.set_index (); + EXPECT_EQ (atom.iw2l[13], 1); + EXPECT_EQ (atom.iw2n[13], 3); + EXPECT_EQ (atom.iw2m[13], 2); + EXPECT_EQ (atom.iw2_ylm[13], 3); + EXPECT_TRUE (atom.iw2_new[11]); + // here is the table: + // nw = 2 + 3*4 = 14 + // L N m L*L+m + // 0 0 0 0 0 + // 1 0 1 0 0 + // 2 1 0 0 1 + // 3 1 0 1 2 + // 4 1 0 2 3 + // 5 1 1 0 1 + // 6 1 1 1 2 + // 7 1 1 2 3 + // 8 1 2 0 1 + // 9 1 2 1 2 + // 10 1 2 2 3 + // 11 1 3 0 1 + // 12 1 3 1 2 + // 13 1 3 2 3 #ifdef __MPI - } + } #endif } #ifdef __MPI -TEST_F(AtomSpecTest, BcastAtom) +TEST_F (AtomSpecTest, BcastAtom) { - if(GlobalV::MY_RANK==0) - { - atom.label = "C"; - atom.type = 1; - atom.na = 2; - atom.nw = 0; - atom.nwl = 1; - atom.Rcut = 1.1; - atom.l_nchi.resize(atom.nwl+1); - atom.l_nchi[0] = 2; - atom.nw += atom.l_nchi[0]; - atom.l_nchi[1] = 4; - atom.nw += 3*atom.l_nchi[1]; - atom.stapos_wf = 0; - atom.mass = 12.0; - atom.tau.resize(atom.na); - atom.taud.resize(atom.na); - atom.dis.resize(atom.na); - atom.vel.resize(atom.na); - atom.mag.resize(atom.na); - atom.angle1.resize(atom.na); - atom.angle2.resize(atom.na); - atom.m_loc_.resize(atom.na); - atom.mbl.resize(atom.na); - atom.lambda.resize(atom.na); - atom.constrain.resize(atom.na); - atom.tau[0].x = 0.2; - atom.tau[0].y = 0.2; - atom.tau[0].z = 0.2; - atom.tau[1].x = 0.4; - atom.tau[1].y = 0.4; - atom.tau[1].z = 0.4; - } - atom.bcast_atom(); - if(GlobalV::MY_RANK!=0) - { - EXPECT_EQ(atom.label,"C"); - EXPECT_EQ(atom.type,1); - EXPECT_EQ(atom.na,2); - EXPECT_EQ(atom.nwl,1); - EXPECT_DOUBLE_EQ(atom.Rcut,1.1); - EXPECT_EQ(atom.nw,14); - EXPECT_EQ(atom.stapos_wf,0); - EXPECT_DOUBLE_EQ(atom.mass,12.0); - EXPECT_DOUBLE_EQ(atom.tau[0].x,0.2); - EXPECT_DOUBLE_EQ(atom.tau[1].z,0.4); - } + if (GlobalV::MY_RANK == 0) + { + atom.label = "C"; + atom.type = 1; + atom.na = 2; + atom.nw = 0; + atom.nwl = 1; + atom.Rcut = 1.1; + atom.l_nchi.resize (atom.nwl + 1); + atom.l_nchi[0] = 2; + atom.nw += atom.l_nchi[0]; + atom.l_nchi[1] = 4; + atom.nw += 3 * atom.l_nchi[1]; + atom.stapos_wf = 0; + atom.mass = 12.0; + atom.tau.resize (atom.na); + atom.taud.resize (atom.na); + atom.dis.resize (atom.na); + atom.vel.resize (atom.na); + atom.mag.resize (atom.na); + atom.angle1.resize (atom.na); + atom.angle2.resize (atom.na); + atom.m_loc_.resize (atom.na); + atom.mbl.resize (atom.na); + atom.lambda.resize (atom.na); + atom.constrain.resize (atom.na); + atom.tau[0].x = 0.2; + atom.tau[0].y = 0.2; + atom.tau[0].z = 0.2; + atom.tau[1].x = 0.4; + atom.tau[1].y = 0.4; + atom.tau[1].z = 0.4; + } + atom.bcast_atom (); + if (GlobalV::MY_RANK != 0) + { + EXPECT_EQ (atom.label, "C"); + EXPECT_EQ (atom.type, 1); + EXPECT_EQ (atom.na, 2); + EXPECT_EQ (atom.nwl, 1); + EXPECT_DOUBLE_EQ (atom.Rcut, 1.1); + EXPECT_EQ (atom.nw, 14); + EXPECT_EQ (atom.stapos_wf, 0); + EXPECT_DOUBLE_EQ (atom.mass, 12.0); + EXPECT_DOUBLE_EQ (atom.tau[0].x, 0.2); + EXPECT_DOUBLE_EQ (atom.tau[1].z, 0.4); + } } -TEST_F(AtomSpecTest, BcastAtom2) +TEST_F (AtomSpecTest, BcastAtom2) { - if(GlobalV::MY_RANK==0) - { - ifs.open("./support/C.upf"); - PARAM.input.pseudo_rcut = 15.0; - upf.read_pseudo_upf201(ifs, atom.ncpp); - upf.complete_default(atom.ncpp); - ifs.close(); - EXPECT_TRUE(atom.ncpp.has_so); - } - atom.bcast_atom2(); - if(GlobalV::MY_RANK!=0) - { - EXPECT_EQ(atom.ncpp.nbeta,6); - EXPECT_EQ(atom.ncpp.nchi,3); - EXPECT_DOUBLE_EQ(atom.ncpp.rho_atc[0],8.7234550809E-01); - } + if (GlobalV::MY_RANK == 0) + { + ifs.open ("./support/C.upf"); + PARAM.input.pseudo_rcut = 15.0; + upf.read_pseudo_upf201 (ifs, atom.ncpp); + upf.complete_default (atom.ncpp); + ifs.close (); + EXPECT_TRUE (atom.ncpp.has_so); + } + atom.bcast_atom2 (); + if (GlobalV::MY_RANK != 0) + { + EXPECT_EQ (atom.ncpp.nbeta, 6); + EXPECT_EQ (atom.ncpp.nchi, 3); + EXPECT_DOUBLE_EQ (atom.ncpp.rho_atc[0], 8.7234550809E-01); + } } -int main(int argc, char **argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); - int result = RUN_ALL_TESTS(); - - MPI_Finalize(); - - return result; + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); + int result = RUN_ALL_TESTS (); + + MPI_Finalize (); + + return result; } #endif - diff --git a/source/source_cell/test/cell_index_test.cpp b/source/source_cell/test/cell_index_test.cpp index e0767ad6aaf..82650a0ebe4 100644 --- a/source/source_cell/test/cell_index_test.cpp +++ b/source/source_cell/test/cell_index_test.cpp @@ -19,48 +19,48 @@ class CellIndexTest : public testing::Test std::vector atom_labels = {"C", "H"}; std::vector atom_counts = {1, 2}; std::vector> lnchi_counts = {{1, 1, 1}, {1, 1, 1}}; - CellIndex cell_index = CellIndex(atom_labels, atom_counts, lnchi_counts, 1); + CellIndex cell_index = CellIndex (atom_labels, atom_counts, lnchi_counts, 1); }; -TEST_F(CellIndexTest, EmptyTest) +TEST_F (CellIndexTest, EmptyTest) { CellIndex cell_index1; - EXPECT_EQ(0, cell_index1.get_ntype()); - EXPECT_EQ(0, cell_index1.get_nw()); - testing::internal::CaptureStdout(); - EXPECT_EXIT(cell_index1.get_atom_label(0), ::testing::ExitedWithCode(1), ""); - std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("iat out of range [0, nat)")); + EXPECT_EQ (0, cell_index1.get_ntype ()); + EXPECT_EQ (0, cell_index1.get_nw ()); + testing::internal::CaptureStdout (); + EXPECT_EXIT (cell_index1.get_atom_label (0), ::testing::ExitedWithCode (1), ""); + std::string output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("iat out of range [0, nat)")); } -TEST_F(CellIndexTest, Index) +TEST_F (CellIndexTest, Index) { - EXPECT_EQ(2, cell_index.get_ntype()); - EXPECT_EQ(3, cell_index.get_nat()); - EXPECT_EQ(1, cell_index.get_nat(0)); - EXPECT_EQ(2, cell_index.get_nat(1)); - EXPECT_EQ(27, cell_index.get_nw()); - EXPECT_EQ(9, cell_index.get_nw(0)); - EXPECT_EQ(9, cell_index.get_nw(1)); - EXPECT_EQ(0, cell_index.get_iwt(0, 0)); - EXPECT_EQ(9, cell_index.get_iwt(1, 0)); - EXPECT_EQ(18, cell_index.get_iwt(2, 0)); - EXPECT_EQ(2, cell_index.get_maxL(0)); - EXPECT_EQ(2, cell_index.get_maxL(1)); - EXPECT_EQ(2, cell_index.get_maxL(2)); - EXPECT_EQ(1, cell_index.get_nchi(0, 0)); - EXPECT_EQ("C", cell_index.get_atom_label(0)); - EXPECT_EQ("H", cell_index.get_atom_label(1)); + EXPECT_EQ (2, cell_index.get_ntype ()); + EXPECT_EQ (3, cell_index.get_nat ()); + EXPECT_EQ (1, cell_index.get_nat (0)); + EXPECT_EQ (2, cell_index.get_nat (1)); + EXPECT_EQ (27, cell_index.get_nw ()); + EXPECT_EQ (9, cell_index.get_nw (0)); + EXPECT_EQ (9, cell_index.get_nw (1)); + EXPECT_EQ (0, cell_index.get_iwt (0, 0)); + EXPECT_EQ (9, cell_index.get_iwt (1, 0)); + EXPECT_EQ (18, cell_index.get_iwt (2, 0)); + EXPECT_EQ (2, cell_index.get_maxL (0)); + EXPECT_EQ (2, cell_index.get_maxL (1)); + EXPECT_EQ (2, cell_index.get_maxL (2)); + EXPECT_EQ (1, cell_index.get_nchi (0, 0)); + EXPECT_EQ ("C", cell_index.get_atom_label (0)); + EXPECT_EQ ("H", cell_index.get_atom_label (1)); } -TEST_F(CellIndexTest, WriteOrbInfo) +TEST_F (CellIndexTest, WriteOrbInfo) { - cell_index.write_orb_info("./"); - std::ifstream ifs("./Orbital"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("#io spec l m z sym")); - EXPECT_THAT(str, testing::HasSubstr("0 C 2 4 1 dxy")); - EXPECT_THAT(str, testing::HasSubstr("1 H 2 4 1 dxy")); - EXPECT_THAT(str, testing::HasSubstr("2 H 2 4 1 dxy")); - remove("./Orbital"); + cell_index.write_orb_info ("./"); + std::ifstream ifs ("./Orbital"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("#io spec l m z sym")); + EXPECT_THAT (str, testing::HasSubstr ("0 C 2 4 1 dxy")); + EXPECT_THAT (str, testing::HasSubstr ("1 H 2 4 1 dxy")); + EXPECT_THAT (str, testing::HasSubstr ("2 H 2 4 1 dxy")); + remove ("./Orbital"); } \ No newline at end of file diff --git a/source/source_cell/test/klist_test.cpp b/source/source_cell/test/klist_test.cpp index 57b7eac90ad..37c33292b5b 100644 --- a/source/source_cell/test/klist_test.cpp +++ b/source/source_cell/test/klist_test.cpp @@ -23,71 +23,30 @@ #include "source_cell/parallel_kpoints.h" bool berryphase::berry_phase_flag = false; -pseudo::pseudo() -{ -} -pseudo::~pseudo() -{ -} -Atom::Atom() -{ -} -Atom::~Atom() -{ -} -Atom_pseudo::Atom_pseudo() -{ -} -Atom_pseudo::~Atom_pseudo() -{ -} -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -UnitCell::UnitCell() -{ -} -UnitCell::~UnitCell() -{ -} -Magnetism::Magnetism() -{ -} -Magnetism::~Magnetism() -{ -} -ORB_gaunt_table::ORB_gaunt_table() -{ -} -ORB_gaunt_table::~ORB_gaunt_table() -{ -} -pseudopot_cell_vl::pseudopot_cell_vl() -{ -} -pseudopot_cell_vl::~pseudopot_cell_vl() -{ -} -pseudopot_cell_vnl::pseudopot_cell_vnl() -{ -} -pseudopot_cell_vnl::~pseudopot_cell_vnl() -{ -} -Soc::~Soc() -{ -} -Fcoef::~Fcoef() -{ -} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} - +pseudo::pseudo () {} +pseudo::~pseudo () {} +Atom::Atom () {} +Atom::~Atom () {} +Atom_pseudo::Atom_pseudo () {} +Atom_pseudo::~Atom_pseudo () {} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +ORB_gaunt_table::ORB_gaunt_table () {} +ORB_gaunt_table::~ORB_gaunt_table () {} +pseudopot_cell_vl::pseudopot_cell_vl () {} +pseudopot_cell_vl::~pseudopot_cell_vl () {} +pseudopot_cell_vnl::pseudopot_cell_vnl () {} +pseudopot_cell_vnl::~pseudopot_cell_vnl () {} +Soc::~Soc () {} +Fcoef::~Fcoef () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} /************************************************ * unit test of class K_Vectors @@ -171,13 +130,14 @@ class KlistTest : public testing::Test // used to construct cell and analyse its symmetry UnitCell ucell; - void construct_ucell(stru_& stru) + void + construct_ucell (stru_& stru) { std::vector coord = stru.all_type; - ucell.a1 = ModuleBase::Vector3(stru.cell[0], stru.cell[1], stru.cell[2]); - ucell.a2 = ModuleBase::Vector3(stru.cell[3], stru.cell[4], stru.cell[5]); - ucell.a3 = ModuleBase::Vector3(stru.cell[6], stru.cell[7], stru.cell[8]); - ucell.ntype = stru.all_type.size(); + ucell.a1 = ModuleBase::Vector3 (stru.cell[0], stru.cell[1], stru.cell[2]); + ucell.a2 = ModuleBase::Vector3 (stru.cell[3], stru.cell[4], stru.cell[5]); + ucell.a3 = ModuleBase::Vector3 (stru.cell[6], stru.cell[7], stru.cell[8]); + ucell.ntype = stru.all_type.size (); ucell.atoms = new Atom[ucell.ntype]; ucell.nat = 0; ucell.latvec.e11 = ucell.a1.x; @@ -189,40 +149,41 @@ class KlistTest : public testing::Test ucell.latvec.e31 = ucell.a3.x; ucell.latvec.e32 = ucell.a3.y; ucell.latvec.e33 = ucell.a3.z; - ucell.GT = ucell.latvec.Inverse(); - ucell.G = ucell.GT.Transpose(); + ucell.GT = ucell.latvec.Inverse (); + ucell.G = ucell.GT.Transpose (); ucell.lat0 = 1.8897261254578281; - for (int i = 0; i < coord.size(); i++) - { - ucell.atoms[i].label = coord[i].atomname; - ucell.atoms[i].na = coord[i].coordinate.size(); - ucell.atoms[i].tau.resize(ucell.atoms[i].na); - ucell.atoms[i].taud.resize(ucell.atoms[i].na); - for (int j = 0; j < ucell.atoms[i].na; j++) + for (int i = 0; i < coord.size (); i++) { - std::vector this_atom = coord[i].coordinate[j]; - ucell.atoms[i].tau[j] = ModuleBase::Vector3(this_atom[0], this_atom[1], this_atom[2]); - ModuleBase::Mathzone::Cartesian_to_Direct(ucell.atoms[i].tau[j].x, - ucell.atoms[i].tau[j].y, - ucell.atoms[i].tau[j].z, - ucell.a1.x, - ucell.a1.y, - ucell.a1.z, - ucell.a2.x, - ucell.a2.y, - ucell.a2.z, - ucell.a3.x, - ucell.a3.y, - ucell.a3.z, - ucell.atoms[i].taud[j].x, - ucell.atoms[i].taud[j].y, - ucell.atoms[i].taud[j].z); + ucell.atoms[i].label = coord[i].atomname; + ucell.atoms[i].na = coord[i].coordinate.size (); + ucell.atoms[i].tau.resize (ucell.atoms[i].na); + ucell.atoms[i].taud.resize (ucell.atoms[i].na); + for (int j = 0; j < ucell.atoms[i].na; j++) + { + std::vector this_atom = coord[i].coordinate[j]; + ucell.atoms[i].tau[j] = ModuleBase::Vector3 (this_atom[0], this_atom[1], this_atom[2]); + ModuleBase::Mathzone::Cartesian_to_Direct (ucell.atoms[i].tau[j].x, + ucell.atoms[i].tau[j].y, + ucell.atoms[i].tau[j].z, + ucell.a1.x, + ucell.a1.y, + ucell.a1.z, + ucell.a2.x, + ucell.a2.y, + ucell.a2.z, + ucell.a3.x, + ucell.a3.y, + ucell.a3.z, + ucell.atoms[i].taud[j].x, + ucell.atoms[i].taud[j].y, + ucell.atoms[i].taud[j].z); + } + ucell.nat += ucell.atoms[i].na; } - ucell.nat += ucell.atoms[i].na; - } } - void setucell() + void + setucell () { ucell.latvec.e11 = 10.0; ucell.latvec.e12 = 0.0; @@ -233,29 +194,30 @@ class KlistTest : public testing::Test ucell.latvec.e31 = 0.0; ucell.latvec.e32 = 0.0; ucell.latvec.e33 = 10.0; - ucell.GT = ucell.latvec.Inverse(); - ucell.G = ucell.GT.Transpose(); + ucell.GT = ucell.latvec.Inverse (); + ucell.G = ucell.GT.Transpose (); ucell.lat0 = 1.8897261254578281; } // clear ucell - void ClearUcell() + void + ClearUcell () { delete[] ucell.atoms; } }; -TEST_F(KlistTest, Construct) +TEST_F (KlistTest, Construct) { - EXPECT_EQ(kv->get_nks(), 0); - EXPECT_EQ(kv->get_nkstot(), 0); - EXPECT_EQ(kv->nspin, 0); - EXPECT_EQ(kv->k_nkstot, 0); - EXPECT_FALSE(kv->kc_done); - EXPECT_FALSE(kv->kd_done); + EXPECT_EQ (kv->get_nks (), 0); + EXPECT_EQ (kv->get_nkstot (), 0); + EXPECT_EQ (kv->nspin, 0); + EXPECT_EQ (kv->k_nkstot, 0); + EXPECT_FALSE (kv->kc_done); + EXPECT_FALSE (kv->kd_done); // just to set ucell info here, however it is used in the following tests } -TEST_F(KlistTest, MP) +TEST_F (KlistTest, MP) { kv->nmp[0] = 2; kv->nmp[1] = 2; @@ -265,7 +227,7 @@ TEST_F(KlistTest, MP) kv->koffset[2] = 0; kv->nspin = 1; int k_type = 0; - kv->Monkhorst_Pack(kv->nmp, kv->koffset, k_type); + kv->Monkhorst_Pack (kv->nmp, kv->koffset, k_type); /* std::cout << " " <get_nkstot();ik++) @@ -282,32 +244,32 @@ TEST_F(KlistTest, MP) kv1->koffset[2] = 1; kv1->nspin = 1; k_type = 1; - kv1->Monkhorst_Pack(kv1->nmp, kv1->koffset, k_type); + kv1->Monkhorst_Pack (kv1->nmp, kv1->koffset, k_type); // std::cout << " " <nkstot; ik++) - { - EXPECT_EQ(kv->kvec_d[ik].x, kv1->kvec_d[ik].x); - EXPECT_EQ(kv->kvec_d[ik].y, kv1->kvec_d[ik].y); - EXPECT_EQ(kv->kvec_d[ik].z, kv1->kvec_d[ik].z); - // std::cout<kvec_d[ik]<kvec_d[ik].x, kv1->kvec_d[ik].x); + EXPECT_EQ (kv->kvec_d[ik].y, kv1->kvec_d[ik].y); + EXPECT_EQ (kv->kvec_d[ik].z, kv1->kvec_d[ik].z); + // std::cout<kvec_d[ik]<nspin = 1; - kv->read_kpoints(ucell,k_file); - ifs.open("KPT_GO"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Gamma")); - EXPECT_THAT(str, testing::HasSubstr("1 1 1 0 0 0")); - ifs.close(); + kv->read_kpoints (ucell, k_file); + ifs.open ("KPT_GO"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Gamma")); + EXPECT_THAT (str, testing::HasSubstr ("1 1 1 0 0 0")); + ifs.close (); PARAM.sys.gamma_only_local = false; // this is important for the following tests because it is global } -TEST_F(KlistTest, ReadKpointsKspacing) +TEST_F (KlistTest, ReadKpointsKspacing) { kv->nspin = 1; PARAM.input.kspacing[0] = 0.052918; // 0.52918/Bohr = 1/A @@ -317,16 +279,16 @@ TEST_F(KlistTest, ReadKpointsKspacing) PARAM.input.koffset[0] = 0.0; PARAM.input.koffset[1] = 0.0; PARAM.input.koffset[2] = 0.0; - setucell(); + setucell (); std::string k_file = "./support/KPT3"; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 343); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 343); PARAM.input.kspacing[0] = 0.0; PARAM.input.kspacing[1] = 0.0; PARAM.input.kspacing[2] = 0.0; } -TEST_F(KlistTest, ReadKpointsKspacing3values) +TEST_F (KlistTest, ReadKpointsKspacing3values) { kv->nspin = 1; PARAM.input.kspacing[0] = 0.052918; // 0.52918/Bohr = 1/A @@ -336,16 +298,16 @@ TEST_F(KlistTest, ReadKpointsKspacing3values) PARAM.input.koffset[0] = 0.0; PARAM.input.koffset[1] = 0.0; PARAM.input.koffset[2] = 0.0; - setucell(); + setucell (); std::string k_file = "./support/KPT3"; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 210); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 210); PARAM.input.kspacing[0] = 0.0; PARAM.input.kspacing[1] = 0.0; PARAM.input.kspacing[2] = 0.0; } -TEST_F(KlistTest, ReadKpointsInvalidKspacing3values) +TEST_F (KlistTest, ReadKpointsInvalidKspacing3values) { kv->nspin = 1; PARAM.input.kspacing[0] = 0.052918; // 0.52918/Bohr = 1/A @@ -356,15 +318,15 @@ TEST_F(KlistTest, ReadKpointsInvalidKspacing3values) PARAM.input.koffset[1] = 0.0; PARAM.input.koffset[2] = 0.0; std::string k_file = "./support/KPT3"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(kv->read_kpoints(ucell,k_file), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); + testing::internal::CaptureStdout (); + EXPECT_EXIT (kv->read_kpoints (ucell, k_file), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); PARAM.input.kspacing[0] = 0.0; PARAM.input.kspacing[1] = 0.0; PARAM.input.kspacing[2] = 0.0; } -TEST_F(KlistTest, ReadKpointsKspacingShiftedGamma) +TEST_F (KlistTest, ReadKpointsKspacingShiftedGamma) { kv->nspin = 1; PARAM.input.kspacing[0] = 0.052918; // 0.52918/Bohr = 1/A @@ -374,19 +336,19 @@ TEST_F(KlistTest, ReadKpointsKspacingShiftedGamma) PARAM.input.koffset[0] = 0.5; PARAM.input.koffset[1] = 0.5; PARAM.input.koffset[2] = 0.5; - setucell(); + setucell (); std::string k_file = "./support/KPT3"; - kv->read_kpoints(ucell, k_file); + kv->read_kpoints (ucell, k_file); - EXPECT_EQ(kv->get_nkstot(), 343); - EXPECT_EQ(kv->get_k_kword(), "Gamma"); - EXPECT_DOUBLE_EQ(kv->get_koffset(0), 0.5); - EXPECT_DOUBLE_EQ(kv->get_koffset(1), 0.5); - EXPECT_DOUBLE_EQ(kv->get_koffset(2), 0.5); - EXPECT_NEAR(kv->kvec_d[0].x, 1.0 / 14.0, 1e-12); - EXPECT_NEAR(kv->kvec_d[0].y, 1.0 / 14.0, 1e-12); - EXPECT_NEAR(kv->kvec_d[0].z, 1.0 / 14.0, 1e-12); + EXPECT_EQ (kv->get_nkstot (), 343); + EXPECT_EQ (kv->get_k_kword (), "Gamma"); + EXPECT_DOUBLE_EQ (kv->get_koffset (0), 0.5); + EXPECT_DOUBLE_EQ (kv->get_koffset (1), 0.5); + EXPECT_DOUBLE_EQ (kv->get_koffset (2), 0.5); + EXPECT_NEAR (kv->kvec_d[0].x, 1.0 / 14.0, 1e-12); + EXPECT_NEAR (kv->kvec_d[0].y, 1.0 / 14.0, 1e-12); + EXPECT_NEAR (kv->kvec_d[0].z, 1.0 / 14.0, 1e-12); PARAM.input.kspacing[0] = 0.0; PARAM.input.kspacing[1] = 0.0; @@ -397,7 +359,7 @@ TEST_F(KlistTest, ReadKpointsKspacingShiftedGamma) PARAM.input.kmesh_type = "gamma"; } -TEST_F(KlistTest, ReadKpointsKspacingShiftedMP) +TEST_F (KlistTest, ReadKpointsKspacingShiftedMP) { kv->nspin = 1; PARAM.input.kspacing[0] = 0.052918; // 0.52918/Bohr = 1/A @@ -407,19 +369,19 @@ TEST_F(KlistTest, ReadKpointsKspacingShiftedMP) PARAM.input.koffset[0] = 0.5; PARAM.input.koffset[1] = 0.5; PARAM.input.koffset[2] = 0.5; - setucell(); + setucell (); std::string k_file = "./support/KPT3"; - kv->read_kpoints(ucell, k_file); + kv->read_kpoints (ucell, k_file); - EXPECT_EQ(kv->get_nkstot(), 343); - EXPECT_EQ(kv->get_k_kword(), "Monkhorst-Pack"); - EXPECT_DOUBLE_EQ(kv->get_koffset(0), 0.5); - EXPECT_DOUBLE_EQ(kv->get_koffset(1), 0.5); - EXPECT_DOUBLE_EQ(kv->get_koffset(2), 0.5); - EXPECT_NEAR(kv->kvec_d[0].x, -5.5 / 14.0, 1e-12); - EXPECT_NEAR(kv->kvec_d[0].y, -5.5 / 14.0, 1e-12); - EXPECT_NEAR(kv->kvec_d[0].z, -5.5 / 14.0, 1e-12); + EXPECT_EQ (kv->get_nkstot (), 343); + EXPECT_EQ (kv->get_k_kword (), "Monkhorst-Pack"); + EXPECT_DOUBLE_EQ (kv->get_koffset (0), 0.5); + EXPECT_DOUBLE_EQ (kv->get_koffset (1), 0.5); + EXPECT_DOUBLE_EQ (kv->get_koffset (2), 0.5); + EXPECT_NEAR (kv->kvec_d[0].x, -5.5 / 14.0, 1e-12); + EXPECT_NEAR (kv->kvec_d[0].y, -5.5 / 14.0, 1e-12); + EXPECT_NEAR (kv->kvec_d[0].z, -5.5 / 14.0, 1e-12); PARAM.input.kspacing[0] = 0.0; PARAM.input.kspacing[1] = 0.0; @@ -430,302 +392,302 @@ TEST_F(KlistTest, ReadKpointsKspacingShiftedMP) PARAM.input.kmesh_type = "gamma"; } -TEST_F(KlistTest, ReadKpointsGamma) +TEST_F (KlistTest, ReadKpointsGamma) { std::string k_file = "./support/KPT"; kv->nspin = 1; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 512); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 512); } -TEST_F(KlistTest, ReadKpointsMP) +TEST_F (KlistTest, ReadKpointsMP) { std::string k_file = "./support/KPT1"; kv->nspin = 1; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 512); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 512); } -TEST_F(KlistTest, ReadKpointsLine) +TEST_F (KlistTest, ReadKpointsLine) { ModuleSymmetry::Symmetry::symm_flag = 0; // symm_flag is required in read_kpoints for a k list std::string k_file = "./support/KPT2"; kv->nspin = 1; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 122); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 122); } -TEST_F(KlistTest, ReadKpointsCartesian) +TEST_F (KlistTest, ReadKpointsCartesian) { std::string k_file = "./support/KPT4"; // Cartesian: non-spin case nspin=1 kv->nspin = 1; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->kvec_c.size(), 5); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->kvec_c.size (), 5); // spin case nspin=2 kv->nspin = 2; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->kvec_c.size(), 10); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->kvec_c.size (), 10); } -TEST_F(KlistTest, ReadKpointsLineCartesian) +TEST_F (KlistTest, ReadKpointsLineCartesian) { std::string k_file = "./support/KPT5"; // Line Cartesian: non-spin case nspin=1 kv->nspin = 1; - kv->set_kup_and_kdw(); + kv->set_kup_and_kdw (); // Read from k point file under the case of Line_Cartesian. - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 51); - EXPECT_EQ(kv->kvec_c.size(), 51); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 51); + EXPECT_EQ (kv->kvec_c.size (), 51); // Line Cartesian: spin case nspin=2 kv->nspin = 2; // Read from k point file under the case of Line_Cartesian. - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 51); - EXPECT_EQ(kv->kvec_c.size(), 102); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 51); + EXPECT_EQ (kv->kvec_c.size (), 102); } -TEST_F(KlistTest, ReadKpointsDirect) +TEST_F (KlistTest, ReadKpointsDirect) { std::string k_file = "./support/KPT6"; kv->nspin = 1; - kv->set_kup_and_kdw(); + kv->set_kup_and_kdw (); // Read from k point file under the case of Direct - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 6); - EXPECT_TRUE(kv->kd_done); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 6); + EXPECT_TRUE (kv->kd_done); } -TEST_F(KlistTest, ReadKpointsWarning1) +TEST_F (KlistTest, ReadKpointsWarning1) { std::string k_file = "arbitrary_1"; kv->nspin = 1; - GlobalV::ofs_warning.open("klist_tmp_warning_1"); - EXPECT_NO_THROW(kv->read_kpoints(ucell,k_file)); - GlobalV::ofs_warning.close(); - ifs.open("klist_tmp_warning_1"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Can't find File name : arbitrary_1")); - ifs.close(); - remove("klist_tmp_warning_1"); + GlobalV::ofs_warning.open ("klist_tmp_warning_1"); + EXPECT_NO_THROW (kv->read_kpoints (ucell, k_file)); + GlobalV::ofs_warning.close (); + ifs.open ("klist_tmp_warning_1"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Can't find File name : arbitrary_1")); + ifs.close (); + remove ("klist_tmp_warning_1"); } -TEST_F(KlistTest, ReadKpointsWarning2) +TEST_F (KlistTest, ReadKpointsWarning2) { std::string k_file = "arbitrary_2"; - ofs.open(k_file.c_str()); + ofs.open (k_file.c_str ()); ofs << "ARBITRARY"; - ofs.close(); + ofs.close (); kv->nspin = 1; - GlobalV::ofs_warning.open("klist_tmp_warning_2"); - EXPECT_NO_THROW(kv->read_kpoints(ucell,k_file)); - GlobalV::ofs_warning.close(); - ifs.open("klist_tmp_warning_2"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("symbol K_POINTS not found.")); - ifs.close(); - remove("klist_tmp_warning_2"); - remove("arbitrary_2"); + GlobalV::ofs_warning.open ("klist_tmp_warning_2"); + EXPECT_NO_THROW (kv->read_kpoints (ucell, k_file)); + GlobalV::ofs_warning.close (); + ifs.open ("klist_tmp_warning_2"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("symbol K_POINTS not found.")); + ifs.close (); + remove ("klist_tmp_warning_2"); + remove ("arbitrary_2"); } -TEST_F(KlistTest, ReadKpointsWarning3) +TEST_F (KlistTest, ReadKpointsWarning3) { std::string k_file = "arbitrary_3"; - ofs.open(k_file.c_str()); + ofs.open (k_file.c_str ()); ofs << "KPOINTS" << std::endl; ofs << "100001" << std::endl; - ofs.close(); + ofs.close (); kv->nspin = 1; - GlobalV::ofs_warning.open("klist_tmp_warning_3"); - EXPECT_NO_THROW(kv->read_kpoints(ucell,k_file)); - GlobalV::ofs_warning.close(); - ifs.open("klist_tmp_warning_3"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("nkstot > MAX_KPOINTS")); - ifs.close(); - remove("klist_tmp_warning_3"); - remove("arbitrary_3"); + GlobalV::ofs_warning.open ("klist_tmp_warning_3"); + EXPECT_NO_THROW (kv->read_kpoints (ucell, k_file)); + GlobalV::ofs_warning.close (); + ifs.open ("klist_tmp_warning_3"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("nkstot > MAX_KPOINTS")); + ifs.close (); + remove ("klist_tmp_warning_3"); + remove ("arbitrary_3"); } -TEST_F(KlistTest, ReadKpointsWarning4) +TEST_F (KlistTest, ReadKpointsWarning4) { std::string k_file = "arbitrary_4"; - ofs.open(k_file.c_str()); + ofs.open (k_file.c_str ()); ofs << "KPOINTS" << std::endl; ofs << "0" << std::endl; ofs << "arbitrary" << std::endl; - ofs.close(); + ofs.close (); kv->nspin = 1; - GlobalV::ofs_warning.open("klist_tmp_warning_4"); - EXPECT_NO_THROW(kv->read_kpoints(ucell,k_file)); - GlobalV::ofs_warning.close(); - ifs.open("klist_tmp_warning_4"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Error: neither Gamma nor Monkhorst-Pack.")); - ifs.close(); - remove("klist_tmp_warning_4"); - remove("arbitrary_4"); + GlobalV::ofs_warning.open ("klist_tmp_warning_4"); + EXPECT_NO_THROW (kv->read_kpoints (ucell, k_file)); + GlobalV::ofs_warning.close (); + ifs.open ("klist_tmp_warning_4"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Error: neither Gamma nor Monkhorst-Pack.")); + ifs.close (); + remove ("klist_tmp_warning_4"); + remove ("arbitrary_4"); } -TEST_F(KlistTest, ReadKpointsWarning5) +TEST_F (KlistTest, ReadKpointsWarning5) { std::string k_file = "arbitrary_5"; - ofs.open(k_file.c_str()); + ofs.open (k_file.c_str ()); ofs << "KPOINTS" << std::endl; ofs << "100000" << std::endl; ofs << "arbitrary" << std::endl; - ofs.close(); + ofs.close (); // Cartesian: non-spin case nspin=1 kv->nspin = 1; - GlobalV::ofs_warning.open("klist_tmp_warning_5"); - EXPECT_NO_THROW(kv->read_kpoints(ucell,k_file)); - GlobalV::ofs_warning.close(); - ifs.open("klist_tmp_warning_5"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Error : neither Cartesian nor Direct kpoint")); - ifs.close(); - remove("klist_tmp_warning_5"); - remove("arbitrary_5"); + GlobalV::ofs_warning.open ("klist_tmp_warning_5"); + EXPECT_NO_THROW (kv->read_kpoints (ucell, k_file)); + GlobalV::ofs_warning.close (); + ifs.open ("klist_tmp_warning_5"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Error : neither Cartesian nor Direct kpoint")); + ifs.close (); + remove ("klist_tmp_warning_5"); + remove ("arbitrary_5"); } -TEST_F(KlistTest, ReadKpointsWarning6) +TEST_F (KlistTest, ReadKpointsWarning6) { std::string k_file = "arbitrary_6"; - ofs.open(k_file.c_str()); + ofs.open (k_file.c_str ()); ofs << "KPOINTS" << std::endl; ofs << "100000" << std::endl; ofs << "Line_Cartesian" << std::endl; - ofs.close(); + ofs.close (); // Cartesian: non-spin case nspin=1 kv->nspin = 1; ModuleSymmetry::Symmetry::symm_flag = 1; - GlobalV::ofs_warning.open("klist_tmp_warning_6"); - EXPECT_NO_THROW(kv->read_kpoints(ucell,k_file)); - GlobalV::ofs_warning.close(); - ifs.open("klist_tmp_warning_6"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Line mode of k-points is open, please set symmetry to 0 or -1")); - ifs.close(); - remove("klist_tmp_warning_6"); - remove("arbitrary_6"); + GlobalV::ofs_warning.open ("klist_tmp_warning_6"); + EXPECT_NO_THROW (kv->read_kpoints (ucell, k_file)); + GlobalV::ofs_warning.close (); + ifs.open ("klist_tmp_warning_6"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Line mode of k-points is open, please set symmetry to 0 or -1")); + ifs.close (); + remove ("klist_tmp_warning_6"); + remove ("arbitrary_6"); ModuleSymmetry::Symmetry::symm_flag = 0; } -TEST_F(KlistTest, ReadKpointsWarning7) +TEST_F (KlistTest, ReadKpointsWarning7) { std::string k_file = "arbitrary_7"; - ofs.open(k_file.c_str()); + ofs.open (k_file.c_str ()); ofs << "KPOINTS" << std::endl; ofs << "100000" << std::endl; ofs << "Line_Direct" << std::endl; - ofs.close(); + ofs.close (); kv->nspin = 1; ModuleSymmetry::Symmetry::symm_flag = 1; - GlobalV::ofs_warning.open("klist_tmp_warning_7"); - EXPECT_NO_THROW(kv->read_kpoints(ucell,k_file)); - GlobalV::ofs_warning.close(); - ifs.open("klist_tmp_warning_7"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Line mode of k-points is open, please set symmetry to 0 or -1")); - ifs.close(); - remove("klist_tmp_warning_7"); - remove("arbitrary_7"); + GlobalV::ofs_warning.open ("klist_tmp_warning_7"); + EXPECT_NO_THROW (kv->read_kpoints (ucell, k_file)); + GlobalV::ofs_warning.close (); + ifs.open ("klist_tmp_warning_7"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Line mode of k-points is open, please set symmetry to 0 or -1")); + ifs.close (); + remove ("klist_tmp_warning_7"); + remove ("arbitrary_7"); ModuleSymmetry::Symmetry::symm_flag = 0; } -TEST_F(KlistTest, SetKupKdown) +TEST_F (KlistTest, SetKupKdown) { std::string k_file = "./support/KPT4"; // Cartesian: non-spin case nspin=1 kv->nspin = 1; - kv->read_kpoints(ucell,k_file); - kv->set_kup_and_kdw(); + kv->read_kpoints (ucell, k_file); + kv->set_kup_and_kdw (); for (int ik = 0; ik < 5; ik++) - { - EXPECT_EQ(kv->isk[ik], 0); - } + { + EXPECT_EQ (kv->isk[ik], 0); + } kv->nspin = 4; - kv->read_kpoints(ucell,k_file); - kv->set_kup_and_kdw(); + kv->read_kpoints (ucell, k_file); + kv->set_kup_and_kdw (); for (int ik = 0; ik < 5; ik++) - { - EXPECT_EQ(kv->isk[ik], 0); - EXPECT_EQ(kv->isk[ik + 5], 0); - EXPECT_EQ(kv->isk[ik + 10], 0); - EXPECT_EQ(kv->isk[ik + 15], 0); - } + { + EXPECT_EQ (kv->isk[ik], 0); + EXPECT_EQ (kv->isk[ik + 5], 0); + EXPECT_EQ (kv->isk[ik + 10], 0); + EXPECT_EQ (kv->isk[ik + 15], 0); + } kv->nspin = 2; - kv->read_kpoints(ucell,k_file); - kv->set_kup_and_kdw(); + kv->read_kpoints (ucell, k_file); + kv->set_kup_and_kdw (); for (int ik = 0; ik < 5; ik++) - { - EXPECT_EQ(kv->isk[ik], 0); - EXPECT_EQ(kv->isk[ik + 5], 1); - } + { + EXPECT_EQ (kv->isk[ik], 0); + EXPECT_EQ (kv->isk[ik + 5], 1); + } } -TEST_F(KlistTest, SetAfterVC) +TEST_F (KlistTest, SetAfterVC) { kv->nspin = 1; - kv->set_nkstot(1); - GlobalV::ofs_running.open("tmp_klist_1"); - kv->renew(kv->get_nkstot()); + kv->set_nkstot (1); + GlobalV::ofs_running.open ("tmp_klist_1"); + kv->renew (kv->get_nkstot ()); kv->kvec_c[0].x = 0; kv->kvec_c[0].y = 0; kv->kvec_c[0].z = 0; -// kv->set_after_vc(PARAM.input.nspin, ucell.G, ucell.latvec); - KVectorUtils::set_after_vc(*kv, PARAM.input.nspin, ucell.G); - - EXPECT_TRUE(kv->kd_done); - EXPECT_TRUE(kv->kc_done); - EXPECT_DOUBLE_EQ(kv->kvec_d[0].x, 0); - EXPECT_DOUBLE_EQ(kv->kvec_d[0].y, 0); - EXPECT_DOUBLE_EQ(kv->kvec_d[0].z, 0); - GlobalV::ofs_running.close(); - remove("tmp_klist_1"); + // kv->set_after_vc(PARAM.input.nspin, ucell.G, ucell.latvec); + KVectorUtils::set_after_vc (*kv, PARAM.input.nspin, ucell.G); + + EXPECT_TRUE (kv->kd_done); + EXPECT_TRUE (kv->kc_done); + EXPECT_DOUBLE_EQ (kv->kvec_d[0].x, 0); + EXPECT_DOUBLE_EQ (kv->kvec_d[0].y, 0); + EXPECT_DOUBLE_EQ (kv->kvec_d[0].z, 0); + GlobalV::ofs_running.close (); + remove ("tmp_klist_1"); } -TEST_F(KlistTest, PrintKlists) +TEST_F (KlistTest, PrintKlists) { kv->nspin = 1; - kv->set_nkstot(1); - kv->set_nks(1); - GlobalV::ofs_running.open("tmp_klist_2"); - kv->renew(kv->get_nkstot()); + kv->set_nkstot (1); + kv->set_nks (1); + GlobalV::ofs_running.open ("tmp_klist_2"); + kv->renew (kv->get_nkstot ()); kv->kvec_c[0].x = 0; kv->kvec_c[0].y = 0; kv->kvec_c[0].z = 0; -// kv->set_after_vc(PARAM.input.nspin, ucell.G, ucell.latvec); - KVectorUtils::set_after_vc(*kv, PARAM.input.nspin, ucell.G); - EXPECT_TRUE(kv->kd_done); - KVectorUtils::print_klists(*kv, GlobalV::ofs_running); - GlobalV::ofs_running.close(); - remove("tmp_klist_2"); + // kv->set_after_vc(PARAM.input.nspin, ucell.G, ucell.latvec); + KVectorUtils::set_after_vc (*kv, PARAM.input.nspin, ucell.G); + EXPECT_TRUE (kv->kd_done); + KVectorUtils::print_klists (*kv, GlobalV::ofs_running); + GlobalV::ofs_running.close (); + remove ("tmp_klist_2"); } -TEST_F(KlistTest, PrintKlistsWarnigQuit) +TEST_F (KlistTest, PrintKlistsWarnigQuit) { kv->nspin = 1; - kv->set_nkstot(1); - kv->set_nks(2); - kv->renew(kv->get_nkstot()); + kv->set_nkstot (1); + kv->set_nks (2); + kv->renew (kv->get_nkstot ()); kv->kvec_c[0].x = 0; kv->kvec_c[0].y = 0; kv->kvec_c[0].z = 0; - testing::internal::CaptureStdout(); - EXPECT_EXIT(KVectorUtils::print_klists(*kv, GlobalV::ofs_running), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("nkstot < nks")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (KVectorUtils::print_klists (*kv, GlobalV::ofs_running), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("nkstot < nks")); } -TEST_F(KlistTest, SetBothKvecFinalSCF) +TEST_F (KlistTest, SetBothKvecFinalSCF) { kv->nspin = 1; - kv->set_nkstot(1); - kv->set_nks(1); - kv->renew(kv->get_nkstot()); + kv->set_nkstot (1); + kv->set_nks (1); + kv->renew (kv->get_nkstot ()); kv->kvec_d[0].x = 0.0; kv->kvec_d[0].y = 0.0; kv->kvec_d[0].z = 0.0; @@ -733,184 +695,183 @@ TEST_F(KlistTest, SetBothKvecFinalSCF) kv->kvec_c[0].y = 0.0; kv->kvec_c[0].z = 0.0; std::string skpt; -// PARAM.input.final_scf = true; + // PARAM.input.final_scf = true; kv->kd_done = false; kv->kc_done = false; // case 1 kv->k_nkstot = 0; -// kv->set_both_kvec(ucell.G, ucell.latvec, skpt); - KVectorUtils::set_both_kvec(*kv, ucell.G, ucell.latvec, skpt); - EXPECT_TRUE(kv->kd_done); - EXPECT_TRUE(kv->kc_done); + // kv->set_both_kvec(ucell.G, ucell.latvec, skpt); + KVectorUtils::set_both_kvec (*kv, ucell.G, ucell.latvec, skpt); + EXPECT_TRUE (kv->kd_done); + EXPECT_TRUE (kv->kc_done); // case 2 kv->k_nkstot = 1; kv->k_kword = "D"; -// kv->set_both_kvec(ucell.G, ucell.latvec, skpt); - KVectorUtils::set_both_kvec(*kv, ucell.G, ucell.latvec, skpt); - EXPECT_TRUE(kv->kd_done); - EXPECT_TRUE(kv->kc_done); + // kv->set_both_kvec(ucell.G, ucell.latvec, skpt); + KVectorUtils::set_both_kvec (*kv, ucell.G, ucell.latvec, skpt); + EXPECT_TRUE (kv->kd_done); + EXPECT_TRUE (kv->kc_done); // case 3 kv->k_kword = "C"; -// kv->set_both_kvec(ucell.G, ucell.latvec, skpt); - KVectorUtils::set_both_kvec(*kv, ucell.G, ucell.latvec, skpt); - EXPECT_TRUE(kv->kc_done); - EXPECT_TRUE(kv->kd_done); + // kv->set_both_kvec(ucell.G, ucell.latvec, skpt); + KVectorUtils::set_both_kvec (*kv, ucell.G, ucell.latvec, skpt); + EXPECT_TRUE (kv->kc_done); + EXPECT_TRUE (kv->kd_done); // case 4 - GlobalV::ofs_warning.open("klist_tmp_warning_8"); + GlobalV::ofs_warning.open ("klist_tmp_warning_8"); kv->k_kword = "arbitrary"; -// kv->set_both_kvec(ucell.G, ucell.latvec, skpt); - KVectorUtils::set_both_kvec(*kv, ucell.G, ucell.latvec, skpt); - GlobalV::ofs_warning.close(); - ifs.open("klist_tmp_warning_8"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Error : neither Cartesian nor Direct kpoint.")); - ifs.close(); - remove("klist_tmp_warning_8"); + // kv->set_both_kvec(ucell.G, ucell.latvec, skpt); + KVectorUtils::set_both_kvec (*kv, ucell.G, ucell.latvec, skpt); + GlobalV::ofs_warning.close (); + ifs.open ("klist_tmp_warning_8"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Error : neither Cartesian nor Direct kpoint.")); + ifs.close (); + remove ("klist_tmp_warning_8"); } -TEST_F(KlistTest, SetBothKvec) +TEST_F (KlistTest, SetBothKvec) { kv->nspin = 1; - kv->set_nkstot(1); - kv->set_nks(1); - kv->renew(kv->get_nkstot()); + kv->set_nkstot (1); + kv->set_nks (1); + kv->renew (kv->get_nkstot ()); kv->kvec_d[0].x = 0.0; kv->kvec_d[0].y = 0.0; kv->kvec_d[0].z = 0.0; kv->kc_done = false; kv->kd_done = true; std::string skpt; -// PARAM.input.final_scf = false; -// kv->set_both_kvec(ucell.G, ucell.latvec, skpt); - KVectorUtils::set_both_kvec(*kv, ucell.G, ucell.latvec, skpt); - EXPECT_TRUE(kv->kc_done); + // PARAM.input.final_scf = false; + // kv->set_both_kvec(ucell.G, ucell.latvec, skpt); + KVectorUtils::set_both_kvec (*kv, ucell.G, ucell.latvec, skpt); + EXPECT_TRUE (kv->kc_done); kv->kc_done = true; kv->kd_done = false; -// kv->set_both_kvec(ucell.G, ucell.latvec, skpt); - KVectorUtils::set_both_kvec(*kv, ucell.G, ucell.latvec, skpt); - EXPECT_TRUE(kv->kd_done); + // kv->set_both_kvec(ucell.G, ucell.latvec, skpt); + KVectorUtils::set_both_kvec (*kv, ucell.G, ucell.latvec, skpt); + EXPECT_TRUE (kv->kd_done); } -TEST_F(KlistTest, NormalizeWk) +TEST_F (KlistTest, NormalizeWk) { kv->nspin = 1; - kv->set_nkstot(2); - kv->set_nks(2); - kv->renew(kv->get_nkstot()); + kv->set_nkstot (2); + kv->set_nks (2); + kv->renew (kv->get_nkstot ()); kv->wk[0] = 1.0; kv->wk[1] = 1.0; int deg = 2; - kv->normalize_wk(deg); - EXPECT_DOUBLE_EQ(kv->wk[0], 1.0); - EXPECT_DOUBLE_EQ(kv->wk[1], 1.0); + kv->normalize_wk (deg); + EXPECT_DOUBLE_EQ (kv->wk[0], 1.0); + EXPECT_DOUBLE_EQ (kv->wk[1], 1.0); } -TEST_F(KlistTest, NormalizeWkZeroWeights) +TEST_F (KlistTest, NormalizeWkZeroWeights) { // Test that zero weights are handled correctly kv->nspin = 1; - kv->set_nkstot(3); - kv->set_nks(3); - kv->renew(kv->get_nkstot()); + kv->set_nkstot (3); + kv->set_nks (3); + kv->renew (kv->get_nkstot ()); kv->wk[0] = 0.0; kv->wk[1] = 0.0; kv->wk[2] = 0.0; int deg = 2; // Should not crash and should set equal weights - kv->normalize_wk(deg); + kv->normalize_wk (deg); // Each k-point should have weight = deg / nkstot = 2 / 3 - EXPECT_NEAR(kv->wk[0], 2.0 / 3.0, 1e-10); - EXPECT_NEAR(kv->wk[1], 2.0 / 3.0, 1e-10); - EXPECT_NEAR(kv->wk[2], 2.0 / 3.0, 1e-10); + EXPECT_NEAR (kv->wk[0], 2.0 / 3.0, 1e-10); + EXPECT_NEAR (kv->wk[1], 2.0 / 3.0, 1e-10); + EXPECT_NEAR (kv->wk[2], 2.0 / 3.0, 1e-10); // Sum should equal deg double sum = kv->wk[0] + kv->wk[1] + kv->wk[2]; - EXPECT_NEAR(sum, 2.0, 1e-10); + EXPECT_NEAR (sum, 2.0, 1e-10); } - -TEST_F(KlistTest, UpdateUseIBZ) +TEST_F (KlistTest, UpdateUseIBZ) { kv->nspin = 1; - kv->set_nkstot(3); - kv->set_nks(3); - kv->renew(kv->get_nkstot()); - kv->update_use_ibz(2, std::vector>(2, {0, 0, 0}), std::vector(2, 0.0)); - EXPECT_EQ(kv->get_nkstot(), 2); - EXPECT_EQ(kv->kvec_d.size(), 2); - EXPECT_TRUE(kv->kd_done); - EXPECT_FALSE(kv->kc_done); + kv->set_nkstot (3); + kv->set_nks (3); + kv->renew (kv->get_nkstot ()); + kv->update_use_ibz (2, std::vector> (2, {0, 0, 0}), std::vector (2, 0.0)); + EXPECT_EQ (kv->get_nkstot (), 2); + EXPECT_EQ (kv->kvec_d.size (), 2); + EXPECT_TRUE (kv->kd_done); + EXPECT_FALSE (kv->kc_done); } -TEST_F(KlistTest, IbzKpoint) +TEST_F (KlistTest, IbzKpoint) { // construct cell and symmetry ModuleSymmetry::Symmetry symm; - construct_ucell(stru_lib[0]); - GlobalV::ofs_running.open("tmp_klist_3"); - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); + construct_ucell (stru_lib[0]); + GlobalV::ofs_running.open ("tmp_klist_3"); + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); // read KPT std::string k_file = "./support/KPT1"; kv->nspin = 1; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 512); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 512); // calculate ibz_kpoint std::string skpt; ModuleSymmetry::Symmetry::symm_flag = 1; bool match = true; - KVectorUtils::kvec_ibz_kpoint(*kv, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); - EXPECT_EQ(kv->get_nkstot(), 35); + KVectorUtils::kvec_ibz_kpoint (*kv, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); + EXPECT_EQ (kv->get_nkstot (), 35); GlobalV::ofs_running << skpt << std::endl; - GlobalV::ofs_running.close(); - ClearUcell(); - remove("tmp_klist_3"); + GlobalV::ofs_running.close (); + ClearUcell (); + remove ("tmp_klist_3"); } -TEST_F(KlistTest, IbzKpointIsMP) +TEST_F (KlistTest, IbzKpointIsMP) { // construct cell and symmetry ModuleSymmetry::Symmetry symm; - construct_ucell(stru_lib[0]); - GlobalV::ofs_running.open("tmp_klist_4"); - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); + construct_ucell (stru_lib[0]); + GlobalV::ofs_running.open ("tmp_klist_4"); + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); // read KPT std::string k_file = "./support/KPT1"; kv->nspin = 1; - kv->read_kpoints(ucell,k_file); - EXPECT_EQ(kv->get_nkstot(), 512); - EXPECT_TRUE(kv->is_mp); + kv->read_kpoints (ucell, k_file); + EXPECT_EQ (kv->get_nkstot (), 512); + EXPECT_TRUE (kv->is_mp); // calculate ibz_kpoint std::string skpt; ModuleSymmetry::Symmetry::symm_flag = 0; bool match = true; - KVectorUtils::kvec_ibz_kpoint(*kv, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); - EXPECT_EQ(kv->get_nks(), 260); + KVectorUtils::kvec_ibz_kpoint (*kv, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); + EXPECT_EQ (kv->get_nks (), 260); GlobalV::ofs_running << skpt << std::endl; - GlobalV::ofs_running.close(); - ClearUcell(); - remove("tmp_klist_4"); + GlobalV::ofs_running.close (); + ClearUcell (); + remove ("tmp_klist_4"); } -TEST_F(KlistTest, IbzKpointCustomWeights) +TEST_F (KlistTest, IbzKpointCustomWeights) { // This test verifies the fix for issue #6552: k-point weights should not be overwritten // during IBZ reduction for non-Monkhorst-Pack k-point lists. ModuleSymmetry::Symmetry symm; - construct_ucell(stru_lib[0]); - GlobalV::ofs_running.open("tmp_klist_custom_weights"); - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); + construct_ucell (stru_lib[0]); + GlobalV::ofs_running.open ("tmp_klist_custom_weights"); + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); // Test 1: Non-MP k-points with uniform weights (KPT4) { K_Vectors kv_test1; std::string k_file = "./support/KPT4"; kv_test1.nspin = 1; - kv_test1.read_kpoints(ucell, k_file); - EXPECT_EQ(kv_test1.get_nkstot(), 5); - EXPECT_FALSE(kv_test1.is_mp); // Should be non-MP + kv_test1.read_kpoints (ucell, k_file); + EXPECT_EQ (kv_test1.get_nkstot (), 5); + EXPECT_FALSE (kv_test1.is_mp); // Should be non-MP // Store original weights before IBZ reduction std::vector original_weights = kv_test1.wk; @@ -919,17 +880,17 @@ TEST_F(KlistTest, IbzKpointCustomWeights) std::string skpt; ModuleSymmetry::Symmetry::symm_flag = 1; bool match = true; - KVectorUtils::kvec_ibz_kpoint(kv_test1, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); + KVectorUtils::kvec_ibz_kpoint (kv_test1, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); // Verify that weights are preserved (not overwritten with 1/nkstot) // After IBZ reduction, weights should still reflect the input weights double total_weight = 0.0; - for (int i = 0; i < kv_test1.get_nkstot(); ++i) - { - total_weight += kv_test1.wk[i]; - } + for (int i = 0; i < kv_test1.get_nkstot (); ++i) + { + total_weight += kv_test1.wk[i]; + } // Weights should sum to approximately the number of original k-points (before normalization) - EXPECT_GT(total_weight, 0.0); + EXPECT_GT (total_weight, 0.0); } // Test 2: Non-MP k-points with non-uniform custom weights @@ -937,56 +898,56 @@ TEST_F(KlistTest, IbzKpointCustomWeights) K_Vectors kv_test2; std::string k_file = "./support/KPT_custom_weights"; kv_test2.nspin = 1; - kv_test2.read_kpoints(ucell, k_file); - EXPECT_EQ(kv_test2.get_nkstot(), 5); - EXPECT_FALSE(kv_test2.is_mp); // Should be non-MP + kv_test2.read_kpoints (ucell, k_file); + EXPECT_EQ (kv_test2.get_nkstot (), 5); + EXPECT_FALSE (kv_test2.is_mp); // Should be non-MP // Verify custom weights were read correctly - EXPECT_DOUBLE_EQ(kv_test2.wk[0], 0.1); - EXPECT_DOUBLE_EQ(kv_test2.wk[1], 0.2); - EXPECT_DOUBLE_EQ(kv_test2.wk[2], 0.3); - EXPECT_DOUBLE_EQ(kv_test2.wk[3], 0.2); - EXPECT_DOUBLE_EQ(kv_test2.wk[4], 0.2); + EXPECT_DOUBLE_EQ (kv_test2.wk[0], 0.1); + EXPECT_DOUBLE_EQ (kv_test2.wk[1], 0.2); + EXPECT_DOUBLE_EQ (kv_test2.wk[2], 0.3); + EXPECT_DOUBLE_EQ (kv_test2.wk[3], 0.2); + EXPECT_DOUBLE_EQ (kv_test2.wk[4], 0.2); // Store original weights std::vector original_weights = kv_test2.wk; double original_sum = 0.0; - for (double w : original_weights) - { - original_sum += w; - } + for (double w: original_weights) + { + original_sum += w; + } // Apply IBZ reduction std::string skpt; ModuleSymmetry::Symmetry::symm_flag = 1; bool match = true; - KVectorUtils::kvec_ibz_kpoint(kv_test2, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); + KVectorUtils::kvec_ibz_kpoint (kv_test2, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); // After IBZ reduction, the weights should be based on the custom input weights, // not uniform 1/nkstot weights. The total weight should be preserved. double total_weight_after = 0.0; - for (int i = 0; i < kv_test2.get_nkstot(); ++i) - { - total_weight_after += kv_test2.wk[i]; - } + for (int i = 0; i < kv_test2.get_nkstot (); ++i) + { + total_weight_after += kv_test2.wk[i]; + } // The sum of weights after IBZ reduction should equal the sum before // (accounting for symmetry operations that may combine k-points) - EXPECT_NEAR(total_weight_after, original_sum, 1e-10); + EXPECT_NEAR (total_weight_after, original_sum, 1e-10); // Verify that at least one weight is NOT equal to 1/5 (which would indicate // the bug where custom weights are overwritten with uniform weights) bool has_custom_weight = false; double uniform_weight = 1.0 / 5.0; - for (int i = 0; i < kv_test2.get_nkstot(); ++i) - { - if (std::abs(kv_test2.wk[i] - uniform_weight) > 1e-10) + for (int i = 0; i < kv_test2.get_nkstot (); ++i) { - has_custom_weight = true; - break; + if (std::abs (kv_test2.wk[i] - uniform_weight) > 1e-10) + { + has_custom_weight = true; + break; + } } - } - EXPECT_TRUE(has_custom_weight) << "Custom weights were overwritten with uniform weights!"; + EXPECT_TRUE (has_custom_weight) << "Custom weights were overwritten with uniform weights!"; } // Test 3: MP grid (regression test - should still work correctly) @@ -994,26 +955,26 @@ TEST_F(KlistTest, IbzKpointCustomWeights) K_Vectors kv_test3; std::string k_file = "./support/KPT1"; kv_test3.nspin = 1; - kv_test3.read_kpoints(ucell, k_file); - EXPECT_EQ(kv_test3.get_nkstot(), 512); - EXPECT_TRUE(kv_test3.is_mp); // Should be MP + kv_test3.read_kpoints (ucell, k_file); + EXPECT_EQ (kv_test3.get_nkstot (), 512); + EXPECT_TRUE (kv_test3.is_mp); // Should be MP // Apply IBZ reduction std::string skpt; ModuleSymmetry::Symmetry::symm_flag = 1; bool match = true; - KVectorUtils::kvec_ibz_kpoint(kv_test3, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); + KVectorUtils::kvec_ibz_kpoint (kv_test3, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); // For MP grids, all weights should be uniform after IBZ reduction - EXPECT_EQ(kv_test3.get_nkstot(), 35); // Known result from existing test + EXPECT_EQ (kv_test3.get_nkstot (), 35); // Known result from existing test // Verify weights sum correctly double total_weight = 0.0; - for (int i = 0; i < kv_test3.get_nkstot(); ++i) - { - total_weight += kv_test3.wk[i]; - } - EXPECT_GT(total_weight, 0.0); + for (int i = 0; i < kv_test3.get_nkstot (); ++i) + { + total_weight += kv_test3.wk[i]; + } + EXPECT_GT (total_weight, 0.0); } // Test 4: Weight normalization verification @@ -1021,29 +982,28 @@ TEST_F(KlistTest, IbzKpointCustomWeights) K_Vectors kv_test4; std::string k_file = "./support/KPT_custom_weights"; kv_test4.nspin = 1; - kv_test4.read_kpoints(ucell, k_file); + kv_test4.read_kpoints (ucell, k_file); // Apply IBZ reduction std::string skpt; ModuleSymmetry::Symmetry::symm_flag = 1; bool match = true; - KVectorUtils::kvec_ibz_kpoint(kv_test4, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); + KVectorUtils::kvec_ibz_kpoint (kv_test4, symm, ModuleSymmetry::Symmetry::symm_flag, skpt, ucell, match); // Normalize weights int degspin = (kv_test4.nspin == 2) ? 1 : 2; - kv_test4.normalize_wk(degspin); + kv_test4.normalize_wk (degspin); // After normalization, weights should sum to degspin double total_weight = 0.0; - for (int i = 0; i < kv_test4.get_nkstot(); ++i) - { - total_weight += kv_test4.wk[i]; - } - EXPECT_NEAR(total_weight, degspin, 1e-10); + for (int i = 0; i < kv_test4.get_nkstot (); ++i) + { + total_weight += kv_test4.wk[i]; + } + EXPECT_NEAR (total_weight, degspin, 1e-10); } - GlobalV::ofs_running.close(); - ClearUcell(); - remove("tmp_klist_custom_weights"); + GlobalV::ofs_running.close (); + ClearUcell (); + remove ("tmp_klist_custom_weights"); } - diff --git a/source/source_cell/test/klist_test_para.cpp b/source/source_cell/test/klist_test_para.cpp index d37d2da5168..1380bf878ff 100644 --- a/source/source_cell/test/klist_test_para.cpp +++ b/source/source_cell/test/klist_test_para.cpp @@ -27,71 +27,30 @@ #undef private bool berryphase::berry_phase_flag = false; -pseudo::pseudo() -{ -} -pseudo::~pseudo() -{ -} -Atom::Atom() -{ -} -Atom::~Atom() -{ -} -Atom_pseudo::Atom_pseudo() -{ -} -Atom_pseudo::~Atom_pseudo() -{ -} -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -UnitCell::UnitCell() -{ -} -UnitCell::~UnitCell() -{ -} -Magnetism::Magnetism() -{ -} -Magnetism::~Magnetism() -{ -} -ORB_gaunt_table::ORB_gaunt_table() -{ -} -ORB_gaunt_table::~ORB_gaunt_table() -{ -} -pseudopot_cell_vl::pseudopot_cell_vl() -{ -} -pseudopot_cell_vl::~pseudopot_cell_vl() -{ -} -pseudopot_cell_vnl::pseudopot_cell_vnl() -{ -} -pseudopot_cell_vnl::~pseudopot_cell_vnl() -{ -} -Soc::~Soc() -{ -} -Fcoef::~Fcoef() -{ -} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} - +pseudo::pseudo () {} +pseudo::~pseudo () {} +Atom::Atom () {} +Atom::~Atom () {} +Atom_pseudo::Atom_pseudo () {} +Atom_pseudo::~Atom_pseudo () {} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +ORB_gaunt_table::ORB_gaunt_table () {} +ORB_gaunt_table::~ORB_gaunt_table () {} +pseudopot_cell_vl::pseudopot_cell_vl () {} +pseudopot_cell_vl::~pseudopot_cell_vl () {} +pseudopot_cell_vnl::pseudopot_cell_vnl () {} +pseudopot_cell_vnl::~pseudopot_cell_vnl () {} +Soc::~Soc () {} +Fcoef::~Fcoef () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} /************************************************ * unit test of class K_Vectors @@ -149,13 +108,14 @@ class KlistParaTest : public testing::Test std::string output; UnitCell ucell; // used to construct cell and analyse its symmetry - void construct_ucell(stru_& stru) + void + construct_ucell (stru_& stru) { std::vector coord = stru.all_type; - ucell.a1 = ModuleBase::Vector3(stru.cell[0], stru.cell[1], stru.cell[2]); - ucell.a2 = ModuleBase::Vector3(stru.cell[3], stru.cell[4], stru.cell[5]); - ucell.a3 = ModuleBase::Vector3(stru.cell[6], stru.cell[7], stru.cell[8]); - ucell.ntype = stru.all_type.size(); + ucell.a1 = ModuleBase::Vector3 (stru.cell[0], stru.cell[1], stru.cell[2]); + ucell.a2 = ModuleBase::Vector3 (stru.cell[3], stru.cell[4], stru.cell[5]); + ucell.a3 = ModuleBase::Vector3 (stru.cell[6], stru.cell[7], stru.cell[8]); + ucell.ntype = stru.all_type.size (); ucell.atoms = new Atom[ucell.ntype]; ucell.nat = 0; ucell.latvec.e11 = ucell.a1.x; @@ -167,236 +127,248 @@ class KlistParaTest : public testing::Test ucell.latvec.e31 = ucell.a3.x; ucell.latvec.e32 = ucell.a3.y; ucell.latvec.e33 = ucell.a3.z; - ucell.GT = ucell.latvec.Inverse(); - ucell.G = ucell.GT.Transpose(); + ucell.GT = ucell.latvec.Inverse (); + ucell.G = ucell.GT.Transpose (); ucell.lat0 = 1.8897261254578281; - for (int i = 0; i < coord.size(); i++) - { - ucell.atoms[i].label = coord[i].atomname; - ucell.atoms[i].na = coord[i].coordinate.size(); - ucell.atoms[i].tau.resize(ucell.atoms[i].na); - ucell.atoms[i].taud.resize(ucell.atoms[i].na); - for (int j = 0; j < ucell.atoms[i].na; j++) + for (int i = 0; i < coord.size (); i++) { - std::vector this_atom = coord[i].coordinate[j]; - ucell.atoms[i].tau[j] = ModuleBase::Vector3(this_atom[0], this_atom[1], this_atom[2]); - ModuleBase::Mathzone::Cartesian_to_Direct(ucell.atoms[i].tau[j].x, - ucell.atoms[i].tau[j].y, - ucell.atoms[i].tau[j].z, - ucell.a1.x, - ucell.a1.y, - ucell.a1.z, - ucell.a2.x, - ucell.a2.y, - ucell.a2.z, - ucell.a3.x, - ucell.a3.y, - ucell.a3.z, - ucell.atoms[i].taud[j].x, - ucell.atoms[i].taud[j].y, - ucell.atoms[i].taud[j].z); + ucell.atoms[i].label = coord[i].atomname; + ucell.atoms[i].na = coord[i].coordinate.size (); + ucell.atoms[i].tau.resize (ucell.atoms[i].na); + ucell.atoms[i].taud.resize (ucell.atoms[i].na); + for (int j = 0; j < ucell.atoms[i].na; j++) + { + std::vector this_atom = coord[i].coordinate[j]; + ucell.atoms[i].tau[j] = ModuleBase::Vector3 (this_atom[0], this_atom[1], this_atom[2]); + ModuleBase::Mathzone::Cartesian_to_Direct (ucell.atoms[i].tau[j].x, + ucell.atoms[i].tau[j].y, + ucell.atoms[i].tau[j].z, + ucell.a1.x, + ucell.a1.y, + ucell.a1.z, + ucell.a2.x, + ucell.a2.y, + ucell.a2.z, + ucell.a3.x, + ucell.a3.y, + ucell.a3.z, + ucell.atoms[i].taud[j].x, + ucell.atoms[i].taud[j].y, + ucell.atoms[i].taud[j].z); + } + ucell.nat += ucell.atoms[i].na; } - ucell.nat += ucell.atoms[i].na; - } } // clear ucell - void ClearUcell() + void + ClearUcell () { delete[] ucell.atoms; } }; #ifdef __MPI -TEST_F(KlistParaTest, Set) +TEST_F (KlistParaTest, Set) { // construct cell and symmetry ModuleSymmetry::Symmetry symm; - construct_ucell(stru_lib[0]); - if (GlobalV::MY_RANK == 0) { - GlobalV::ofs_running.open("tmp_klist_5"); -} - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); + construct_ucell (stru_lib[0]); + if (GlobalV::MY_RANK == 0) + { + GlobalV::ofs_running.open ("tmp_klist_5"); + } + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); // read KPT std::string k_file = "./support/KPT1"; // set klist kv->nspin = 1; PARAM.input.nspin = 1; if (GlobalV::NPROC == 4) - { - GlobalV::KPAR = 2; - } - Parallel_Global::init_pools(GlobalV::NPROC, - GlobalV::MY_RANK, - PARAM.input.bndpar, - GlobalV::KPAR, - GlobalV::NPROC_IN_BNDGROUP, - GlobalV::RANK_IN_BPGROUP, - GlobalV::MY_BNDGROUP, - GlobalV::NPROC_IN_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::MY_POOL); + { + GlobalV::KPAR = 2; + } + Parallel_Global::init_pools (GlobalV::NPROC, + GlobalV::MY_RANK, + PARAM.input.bndpar, + GlobalV::KPAR, + GlobalV::NPROC_IN_BNDGROUP, + GlobalV::RANK_IN_BPGROUP, + GlobalV::MY_BNDGROUP, + GlobalV::NPROC_IN_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::MY_POOL); ModuleSymmetry::Symmetry::symm_flag = 1; - kv->set(ucell,symm, k_file, kv->nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); - EXPECT_EQ(kv->get_nkstot(), 35); - EXPECT_EQ(kv->get_nkstot_full(), 512); - EXPECT_GT(kv->get_nkstot_full(), kv->get_nkstot()); - EXPECT_TRUE(kv->kc_done); - EXPECT_TRUE(kv->kd_done); + kv->set (ucell, symm, k_file, kv->nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); + EXPECT_EQ (kv->get_nkstot (), 35); + EXPECT_EQ (kv->get_nkstot_full (), 512); + EXPECT_GT (kv->get_nkstot_full (), kv->get_nkstot ()); + EXPECT_TRUE (kv->kc_done); + EXPECT_TRUE (kv->kd_done); if (GlobalV::NPROC == 4) - { - if (GlobalV::MY_RANK == 0) { - EXPECT_EQ(kv->get_nks(), 18); -} - if (GlobalV::MY_RANK == 1) { - EXPECT_EQ(kv->get_nks(), 18); -} - if (GlobalV::MY_RANK == 2) { - EXPECT_EQ(kv->get_nks(), 17); -} - if (GlobalV::MY_RANK == 3) { - EXPECT_EQ(kv->get_nks(), 17); -} - } - std::vector local_kvec_c_full(kv->kvec_c_full.size() * 3); - for (size_t ik = 0; ik < kv->kvec_c_full.size(); ++ik) - { - local_kvec_c_full[3 * ik] = kv->kvec_c_full[ik].x; - local_kvec_c_full[3 * ik + 1] = kv->kvec_c_full[ik].y; - local_kvec_c_full[3 * ik + 2] = kv->kvec_c_full[ik].z; - } - const int local_count = static_cast(local_kvec_c_full.size()); + { + if (GlobalV::MY_RANK == 0) + { + EXPECT_EQ (kv->get_nks (), 18); + } + if (GlobalV::MY_RANK == 1) + { + EXPECT_EQ (kv->get_nks (), 18); + } + if (GlobalV::MY_RANK == 2) + { + EXPECT_EQ (kv->get_nks (), 17); + } + if (GlobalV::MY_RANK == 3) + { + EXPECT_EQ (kv->get_nks (), 17); + } + } + std::vector local_kvec_c_full (kv->kvec_c_full.size () * 3); + for (size_t ik = 0; ik < kv->kvec_c_full.size (); ++ik) + { + local_kvec_c_full[3 * ik] = kv->kvec_c_full[ik].x; + local_kvec_c_full[3 * ik + 1] = kv->kvec_c_full[ik].y; + local_kvec_c_full[3 * ik + 2] = kv->kvec_c_full[ik].z; + } + const int local_count = static_cast (local_kvec_c_full.size ()); std::vector counts; std::vector displs; std::vector pools; if (GlobalV::MY_RANK == 0) - { - counts.resize(GlobalV::NPROC); - pools.resize(GlobalV::NPROC); - } - MPI_Gather(&local_count, 1, MPI_INT, counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Gather(&GlobalV::MY_POOL, 1, MPI_INT, pools.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); + { + counts.resize (GlobalV::NPROC); + pools.resize (GlobalV::NPROC); + } + MPI_Gather (&local_count, 1, MPI_INT, counts.data (), 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gather (&GlobalV::MY_POOL, 1, MPI_INT, pools.data (), 1, MPI_INT, 0, MPI_COMM_WORLD); std::vector gathered_kvec_c_full; if (GlobalV::MY_RANK == 0) - { - displs.resize(GlobalV::NPROC, 0); - for (int irank = 1; irank < GlobalV::NPROC; ++irank) { - displs[irank] = displs[irank - 1] + counts[irank - 1]; + displs.resize (GlobalV::NPROC, 0); + for (int irank = 1; irank < GlobalV::NPROC; ++irank) + { + displs[irank] = displs[irank - 1] + counts[irank - 1]; + } + gathered_kvec_c_full.resize (displs.back () + counts.back ()); } - gathered_kvec_c_full.resize(displs.back() + counts.back()); - } - MPI_Gatherv(local_kvec_c_full.data(), - local_count, - MPI_DOUBLE, - gathered_kvec_c_full.data(), - counts.data(), - displs.data(), - MPI_DOUBLE, - 0, - MPI_COMM_WORLD); + MPI_Gatherv (local_kvec_c_full.data (), + local_count, + MPI_DOUBLE, + gathered_kvec_c_full.data (), + counts.data (), + displs.data (), + MPI_DOUBLE, + 0, + MPI_COMM_WORLD); if (GlobalV::MY_RANK == 0) - { - for (int irank = 0; irank < GlobalV::NPROC; ++irank) { - for (int jrank = irank + 1; jrank < GlobalV::NPROC; ++jrank) - { - if (pools[irank] != pools[jrank]) + for (int irank = 0; irank < GlobalV::NPROC; ++irank) { - continue; + for (int jrank = irank + 1; jrank < GlobalV::NPROC; ++jrank) + { + if (pools[irank] != pools[jrank]) + { + continue; + } + ASSERT_EQ (counts[irank], counts[jrank]); + for (int i = 0; i < counts[irank]; ++i) + { + EXPECT_NEAR (gathered_kvec_c_full[displs[irank] + i], + gathered_kvec_c_full[displs[jrank] + i], + 1e-12); + } + } } - ASSERT_EQ(counts[irank], counts[jrank]); - for (int i = 0; i < counts[irank]; ++i) - { - EXPECT_NEAR(gathered_kvec_c_full[displs[irank] + i], - gathered_kvec_c_full[displs[jrank] + i], - 1e-12); - } - } } - } - ClearUcell(); + ClearUcell (); if (GlobalV::MY_RANK == 0) - { - GlobalV::ofs_running.close(); - remove("tmp_klist_5"); - remove("kpoints"); - } + { + GlobalV::ofs_running.close (); + remove ("tmp_klist_5"); + remove ("kpoints"); + } } -TEST_F(KlistParaTest, SetAfterVC) +TEST_F (KlistParaTest, SetAfterVC) { // construct cell and symmetry ModuleSymmetry::Symmetry symm; - construct_ucell(stru_lib[0]); - if (GlobalV::MY_RANK == 0) { - GlobalV::ofs_running.open("tmp_klist_6"); -} - symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); + construct_ucell (stru_lib[0]); + if (GlobalV::MY_RANK == 0) + { + GlobalV::ofs_running.open ("tmp_klist_6"); + } + symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); // read KPT std::string k_file = "./support/KPT1"; // set klist kv->nspin = 1; PARAM.input.nspin = 1; if (GlobalV::NPROC == 4) - { - GlobalV::KPAR = 1; - } - Parallel_Global::init_pools(GlobalV::NPROC, - GlobalV::MY_RANK, - PARAM.input.bndpar, - GlobalV::KPAR, - GlobalV::NPROC_IN_BNDGROUP, - GlobalV::RANK_IN_BPGROUP, - GlobalV::MY_BNDGROUP, - GlobalV::NPROC_IN_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::MY_POOL); + { + GlobalV::KPAR = 1; + } + Parallel_Global::init_pools (GlobalV::NPROC, + GlobalV::MY_RANK, + PARAM.input.bndpar, + GlobalV::KPAR, + GlobalV::NPROC_IN_BNDGROUP, + GlobalV::RANK_IN_BPGROUP, + GlobalV::MY_BNDGROUP, + GlobalV::NPROC_IN_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::MY_POOL); ModuleSymmetry::Symmetry::symm_flag = 1; - kv->set(ucell,symm, k_file, kv->nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); - EXPECT_EQ(kv->get_nkstot(), 35); - EXPECT_TRUE(kv->kc_done); - EXPECT_TRUE(kv->kd_done); + kv->set (ucell, symm, k_file, kv->nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); + EXPECT_EQ (kv->get_nkstot (), 35); + EXPECT_TRUE (kv->kc_done); + EXPECT_TRUE (kv->kd_done); if (GlobalV::NPROC == 4) - { - if (GlobalV::MY_RANK == 0) { - EXPECT_EQ(kv->get_nks(), 35); -} - if (GlobalV::MY_RANK == 1) { - EXPECT_EQ(kv->get_nks(), 35); -} - if (GlobalV::MY_RANK == 2) { - EXPECT_EQ(kv->get_nks(), 35); -} - if (GlobalV::MY_RANK == 3) { - EXPECT_EQ(kv->get_nks(), 35); -} - } + { + if (GlobalV::MY_RANK == 0) + { + EXPECT_EQ (kv->get_nks (), 35); + } + if (GlobalV::MY_RANK == 1) + { + EXPECT_EQ (kv->get_nks (), 35); + } + if (GlobalV::MY_RANK == 2) + { + EXPECT_EQ (kv->get_nks (), 35); + } + if (GlobalV::MY_RANK == 3) + { + EXPECT_EQ (kv->get_nks (), 35); + } + } // call set_after_vc here kv->kc_done = false; -// kv->set_after_vc(kv->nspin, ucell.G, ucell.latvec); - KVectorUtils::set_after_vc(*kv, kv->nspin, ucell.G); - EXPECT_TRUE(kv->kc_done); - EXPECT_TRUE(kv->kd_done); + // kv->set_after_vc(kv->nspin, ucell.G, ucell.latvec); + KVectorUtils::set_after_vc (*kv, kv->nspin, ucell.G); + EXPECT_TRUE (kv->kc_done); + EXPECT_TRUE (kv->kd_done); // clear - ClearUcell(); + ClearUcell (); if (GlobalV::MY_RANK == 0) - { - GlobalV::ofs_running.close(); - remove("tmp_klist_6"); - } + { + GlobalV::ofs_running.close (); + remove ("tmp_klist_6"); + } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); - int result = RUN_ALL_TESTS(); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); + int result = RUN_ALL_TESTS (); - MPI_Finalize(); + MPI_Finalize (); return result; } diff --git a/source/source_cell/test/parallel_kpoints_test.cpp b/source/source_cell/test/parallel_kpoints_test.cpp index fe1ab848888..017165f59dd 100644 --- a/source/source_cell/test/parallel_kpoints_test.cpp +++ b/source/source_cell/test/parallel_kpoints_test.cpp @@ -31,17 +31,19 @@ class MPIContext { public: - MPIContext() + MPIContext () { - MPI_Comm_rank(MPI_COMM_WORLD, &_rank); - MPI_Comm_size(MPI_COMM_WORLD, &_size); + MPI_Comm_rank (MPI_COMM_WORLD, &_rank); + MPI_Comm_size (MPI_COMM_WORLD, &_size); } - int GetRank() const + int + GetRank () const { return _rank; } - int GetSize() const + int + GetSize () const { return _size; } @@ -59,74 +61,75 @@ class MPIContext class ParaPrepare { public: - ParaPrepare(int KPAR_in, int nkstot_in) : KPAR_(KPAR_in), nkstot_(nkstot_in) - { - } + ParaPrepare (int KPAR_in, int nkstot_in) : KPAR_ (KPAR_in), nkstot_ (nkstot_in) {} int KPAR_; int nkstot_; - void test_init_pools(const int& NPROC, - const int& MY_RANK, - const int& MY_POOL, - const int& RANK_IN_POOL, - const int& NPROC_IN_POOL); - void test_kinfo(const Parallel_Kpoints* Pkpts); - void test_gatherkvec(const Parallel_Kpoints* Pkpts, const MPIContext& mpi); + void test_init_pools (const int& NPROC, + const int& MY_RANK, + const int& MY_POOL, + const int& RANK_IN_POOL, + const int& NPROC_IN_POOL); + void test_kinfo (const Parallel_Kpoints* Pkpts); + void test_gatherkvec (const Parallel_Kpoints* Pkpts, const MPIContext& mpi); }; -void ParaPrepare::test_gatherkvec(const Parallel_Kpoints* Pkpts, const MPIContext& mpi) +void + ParaPrepare::test_gatherkvec (const Parallel_Kpoints* Pkpts, const MPIContext& mpi) { - std::vector> vec_local(Pkpts->nks_np); + std::vector> vec_local (Pkpts->nks_np); std::vector> vec_global; for (int i = 0; i < Pkpts->nks_np; ++i) - { - int k_now = i + Pkpts->startk_pool[mpi.MY_POOL]; - vec_local[i] = ModuleBase::Vector3(k_now, k_now, k_now); - } - Pkpts->gatherkvec(vec_local, vec_global); + { + int k_now = i + Pkpts->startk_pool[mpi.MY_POOL]; + vec_local[i] = ModuleBase::Vector3 (k_now, k_now, k_now); + } + Pkpts->gatherkvec (vec_local, vec_global); for (int i = 0; i < Pkpts->nkstot_np; ++i) - { - EXPECT_DOUBLE_EQ(vec_global[i].x, i); - EXPECT_DOUBLE_EQ(vec_global[i].y, i); - EXPECT_DOUBLE_EQ(vec_global[i].z, i); - } + { + EXPECT_DOUBLE_EQ (vec_global[i].x, i); + EXPECT_DOUBLE_EQ (vec_global[i].y, i); + EXPECT_DOUBLE_EQ (vec_global[i].z, i); + } } -void ParaPrepare::test_kinfo(const Parallel_Kpoints* Pkpts) +void + ParaPrepare::test_kinfo (const Parallel_Kpoints* Pkpts) { - std::vector nks_pool_(KPAR_, 0); - std::vector startk_pool_(KPAR_, 0); - std::vector whichpool_(nkstot_, 0); + std::vector nks_pool_ (KPAR_, 0); + std::vector startk_pool_ (KPAR_, 0); + std::vector whichpool_ (nkstot_, 0); int quotient = nkstot_ / KPAR_; int residue = nkstot_ % KPAR_; // the previous "residue" pools have (quotient+1) kpoints for (int i = 0; i < KPAR_; i++) - { - nks_pool_[i] = quotient; - if (i < residue) { - nks_pool_[i]++; + nks_pool_[i] = quotient; + if (i < residue) + { + nks_pool_[i]++; + } + // number of kpoints in each pool + EXPECT_EQ (Pkpts->nks_pool[i], nks_pool_[i]); + // + if (i > 0) + { + startk_pool_[i] = startk_pool_[i - 1] + nks_pool_[i - 1]; + } + // the rank of the 1st process of each pool in MPI_COMM_WORLD + EXPECT_EQ (Pkpts->startk_pool[i], startk_pool_[i]); + // + for (int ik = 0; ik < nks_pool_[i]; ik++) + { + int k_now = ik + startk_pool_[i]; + // the pool where this kpoint (k_now) resides + EXPECT_EQ (Pkpts->whichpool[k_now], i); + } } - // number of kpoints in each pool - EXPECT_EQ(Pkpts->nks_pool[i], nks_pool_[i]); - // - if (i > 0) - { - startk_pool_[i] = startk_pool_[i - 1] + nks_pool_[i - 1]; - } - // the rank of the 1st process of each pool in MPI_COMM_WORLD - EXPECT_EQ(Pkpts->startk_pool[i], startk_pool_[i]); - // - for (int ik = 0; ik < nks_pool_[i]; ik++) - { - int k_now = ik + startk_pool_[i]; - // the pool where this kpoint (k_now) resides - EXPECT_EQ(Pkpts->whichpool[k_now], i); - } - } } -void ParaPrepare::test_init_pools(const int& NPROC, +void + ParaPrepare::test_init_pools (const int& NPROC, const int& MY_RANK, const int& MY_POOL, const int& RANK_IN_POOL, @@ -137,38 +140,38 @@ void ParaPrepare::test_init_pools(const int& NPROC, int residue = NPROC % KPAR_; // the previous "residue" pools have (quotient+1) processes for (int i = 0; i < KPAR_; i++) - { - nproc_pool_[i] = quotient; - if (i < residue) { - ++nproc_pool_[i]; + nproc_pool_[i] = quotient; + if (i < residue) + { + ++nproc_pool_[i]; + } } - } int color = -1; int np_now = 0; for (int i = 0; i < KPAR_; i++) - { - np_now += nproc_pool_[i]; - if (MY_RANK < np_now) { - color = i; - // MY_POOL is the pool where this process resides - EXPECT_EQ(MY_POOL, i); - break; + np_now += nproc_pool_[i]; + if (MY_RANK < np_now) + { + color = i; + // MY_POOL is the pool where this process resides + EXPECT_EQ (MY_POOL, i); + break; + } } - } MPI_Comm test_comm; int test_rank, test_size; - MPI_Comm_split(MPI_COMM_WORLD, color, MY_RANK, &test_comm); - MPI_Comm_rank(test_comm, &test_rank); - MPI_Comm_size(test_comm, &test_size); + MPI_Comm_split (MPI_COMM_WORLD, color, MY_RANK, &test_comm); + MPI_Comm_rank (test_comm, &test_rank); + MPI_Comm_size (test_comm, &test_size); // RANK_IN_POOL is the rank of this process in MY_POOL - EXPECT_EQ(RANK_IN_POOL, test_rank); + EXPECT_EQ (RANK_IN_POOL, test_rank); // NPROC_IN_POOL is the number of processes in MY_POOL where this process resides - EXPECT_EQ(NPROC_IN_POOL, test_size); + EXPECT_EQ (NPROC_IN_POOL, test_size); // printf("my_rank: %d \t test rank/size: %d/%d \t pool rank/size: %d/%d\n", // MY_RANK,test_rank,test_size,RANK_IN_POOL,NPROC_IN_POOL); - MPI_Comm_free(&test_comm); + MPI_Comm_free (&test_comm); } class ParaKpoints : public ::testing::TestWithParam @@ -177,103 +180,105 @@ class ParaKpoints : public ::testing::TestWithParam MPIContext mpi; int NPROC; int MY_RANK; - void SetUp() override + void + SetUp () override { - NPROC = mpi.GetSize(); - MY_RANK = mpi.GetRank(); + NPROC = mpi.GetSize (); + MY_RANK = mpi.GetRank (); } }; -TEST_P(ParaKpoints, GatherkvecTest) +TEST_P (ParaKpoints, GatherkvecTest) { - ParaPrepare pp = GetParam(); + ParaPrepare pp = GetParam (); Parallel_Kpoints* Pkpoints; Pkpoints = new Parallel_Kpoints; mpi.KPAR = pp.KPAR_; if (mpi.KPAR > NPROC) - { - std::string output; - testing::internal::CaptureStdout(); - EXPECT_EXIT(Parallel_Global::divide_mpi_groups(this->NPROC, - mpi.KPAR, - this->MY_RANK, - mpi.NPROC_IN_POOL, - mpi.MY_POOL, - mpi.RANK_IN_POOL), - testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("must be greater than the number of groups")); - } + { + std::string output; + testing::internal::CaptureStdout (); + EXPECT_EXIT (Parallel_Global::divide_mpi_groups (this->NPROC, + mpi.KPAR, + this->MY_RANK, + mpi.NPROC_IN_POOL, + mpi.MY_POOL, + mpi.RANK_IN_POOL), + testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("must be greater than the number of groups")); + } else - { - Parallel_Global::divide_mpi_groups(this->NPROC, - mpi.KPAR, - this->MY_RANK, - mpi.NPROC_IN_POOL, - mpi.MY_POOL, - mpi.RANK_IN_POOL); - MPI_Comm_split(MPI_COMM_WORLD, mpi.MY_POOL, mpi.RANK_IN_POOL, &POOL_WORLD); - pp.test_init_pools(this->NPROC, this->MY_RANK, mpi.MY_POOL, mpi.RANK_IN_POOL, mpi.NPROC_IN_POOL); - Pkpoints->kinfo(pp.nkstot_, mpi.KPAR, mpi.MY_POOL, mpi.RANK_IN_POOL, this->NPROC, 1); - pp.test_kinfo(Pkpoints); - pp.test_gatherkvec(Pkpoints, mpi); - } + { + Parallel_Global::divide_mpi_groups (this->NPROC, + mpi.KPAR, + this->MY_RANK, + mpi.NPROC_IN_POOL, + mpi.MY_POOL, + mpi.RANK_IN_POOL); + MPI_Comm_split (MPI_COMM_WORLD, mpi.MY_POOL, mpi.RANK_IN_POOL, &POOL_WORLD); + pp.test_init_pools (this->NPROC, this->MY_RANK, mpi.MY_POOL, mpi.RANK_IN_POOL, mpi.NPROC_IN_POOL); + Pkpoints->kinfo (pp.nkstot_, mpi.KPAR, mpi.MY_POOL, mpi.RANK_IN_POOL, this->NPROC, 1); + pp.test_kinfo (Pkpoints); + pp.test_gatherkvec (Pkpoints, mpi); + } delete Pkpoints; } -TEST_P(ParaKpoints, DividePools) +TEST_P (ParaKpoints, DividePools) { - ParaPrepare pp = GetParam(); + ParaPrepare pp = GetParam (); Parallel_Kpoints* Pkpoints; Pkpoints = new Parallel_Kpoints; mpi.KPAR = pp.KPAR_; if (mpi.KPAR > NPROC) - { - std::string output; - testing::internal::CaptureStdout(); - EXPECT_EXIT(Parallel_Global::divide_mpi_groups(this->NPROC, - mpi.KPAR, - this->MY_RANK, - mpi.NPROC_IN_POOL, - mpi.MY_POOL, - mpi.RANK_IN_POOL), - testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("must be greater than the number of groups")); - } + { + std::string output; + testing::internal::CaptureStdout (); + EXPECT_EXIT (Parallel_Global::divide_mpi_groups (this->NPROC, + mpi.KPAR, + this->MY_RANK, + mpi.NPROC_IN_POOL, + mpi.MY_POOL, + mpi.RANK_IN_POOL), + testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("must be greater than the number of groups")); + } else - { - Parallel_Global::divide_mpi_groups(this->NPROC, - mpi.KPAR, - this->MY_RANK, - mpi.NPROC_IN_POOL, - mpi.MY_POOL, - mpi.RANK_IN_POOL); - MPI_Comm_split(MPI_COMM_WORLD, mpi.MY_POOL, mpi.RANK_IN_POOL, &POOL_WORLD); - pp.test_init_pools(this->NPROC, this->MY_RANK, mpi.MY_POOL, mpi.RANK_IN_POOL, mpi.NPROC_IN_POOL); - Pkpoints->kinfo(pp.nkstot_, mpi.KPAR, mpi.MY_POOL, mpi.RANK_IN_POOL, this->NPROC, 1); - pp.test_kinfo(Pkpoints); - } + { + Parallel_Global::divide_mpi_groups (this->NPROC, + mpi.KPAR, + this->MY_RANK, + mpi.NPROC_IN_POOL, + mpi.MY_POOL, + mpi.RANK_IN_POOL); + MPI_Comm_split (MPI_COMM_WORLD, mpi.MY_POOL, mpi.RANK_IN_POOL, &POOL_WORLD); + pp.test_init_pools (this->NPROC, this->MY_RANK, mpi.MY_POOL, mpi.RANK_IN_POOL, mpi.NPROC_IN_POOL); + Pkpoints->kinfo (pp.nkstot_, mpi.KPAR, mpi.MY_POOL, mpi.RANK_IN_POOL, this->NPROC, 1); + pp.test_kinfo (Pkpoints); + } delete Pkpoints; } -INSTANTIATE_TEST_SUITE_P(TESTPK, - ParaKpoints, - ::testing::Values( - // KPAR, nkstot - ParaPrepare(2, 57), - ParaPrepare(3, 67), - ParaPrepare(5, 97), - ParaPrepare(97, 97))); +INSTANTIATE_TEST_SUITE_P (TESTPK, + ParaKpoints, + ::testing::Values ( + // KPAR, nkstot + ParaPrepare (2, 57), + ParaPrepare (3, 67), + ParaPrepare (5, 97), + ParaPrepare (97, 97))); -int main(int argc, char** argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); - MPI_Finalize(); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); + MPI_Finalize (); return result; } #endif diff --git a/source/source_cell/test/prepare_unitcell.h b/source/source_cell/test/prepare_unitcell.h index 73914644086..912ee3b60b7 100644 --- a/source/source_cell/test/prepare_unitcell.h +++ b/source/source_cell/test/prepare_unitcell.h @@ -1,484 +1,559 @@ #ifndef PREPARE_UNITCELL_H #define PREPARE_UNITCELL_H -#include -#include +#include +#include class UcellTestPrepare { -public: - UcellTestPrepare()=default; - UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in); - UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in, - std::valarray mbl_in, - std::valarray velocity_in); - UcellTestPrepare(const UcellTestPrepare &utp); + public: + UcellTestPrepare () = default; + UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in); + UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in, + std::valarray mbl_in, + std::valarray velocity_in); + UcellTestPrepare (const UcellTestPrepare& utp); - std::string latname; - int lmaxmax; - bool init_vel; - bool selective_dynamics; - bool relax_new; - std::string fixed_axes; - double lat0; - std::valarray latvec; - std::vector elements; - std::vector pp_files; - std::vector pp_types; - std::vector orb_files; - std::valarray natom; - std::vector atomic_mass; - std::string coor_type; - std::valarray coordinates; - std::valarray mbl; - std::valarray velocity; - // ntype - int ntype; - int atomic_index; + std::string latname; + int lmaxmax; + bool init_vel; + bool selective_dynamics; + bool relax_new; + std::string fixed_axes; + double lat0; + std::valarray latvec; + std::vector elements; + std::vector pp_files; + std::vector pp_types; + std::vector orb_files; + std::valarray natom; + std::vector atomic_mass; + std::string coor_type; + std::valarray coordinates; + std::valarray mbl; + std::valarray velocity; + // ntype + int ntype; + int atomic_index; - std::unique_ptr SetUcellInfo() - { - //basic info - this->ntype = this->elements.size(); - std::unique_ptr ucell(new UnitCell); - ucell->setup(this->latname, - this->ntype, - this->lmaxmax, - this->init_vel, - this->fixed_axes); - - delete[] ucell->magnet.start_mag; //mag set here - ucell->atom_label.resize(ucell->ntype); - ucell->atom_mass.resize(ucell->ntype); - ucell->pseudo_fn.resize(ucell->ntype); - ucell->pseudo_type.resize(ucell->ntype); - ucell->orbital_fn.resize(ucell->ntype); - ucell->magnet.start_mag = new double[ucell->ntype]; //mag set here - ucell->magnet.ux_[0] = 0.0; // ux_ set here - ucell->magnet.ux_[1] = 0.0; - ucell->magnet.ux_[2] = 0.0; - for(int it=0;itntype;++it) - { - ucell->atom_label[it] = this->elements[it]; - ucell->atom_mass[it] = this->atomic_mass[it]; - ucell->pseudo_fn[it] = this->pp_files[it]; - ucell->pseudo_type[it] = this->pp_types[it]; - ucell->orbital_fn[it] = this->orb_files[it]; - ucell->magnet.start_mag[it] = 0.0; //mag set here - } - //lattice info - ucell->lat0 = this->lat0; - ucell->lat0_angstrom = ucell->lat0 * ModuleBase::BOHR_TO_A; - ucell->tpiba = ModuleBase::TWO_PI/ucell->lat0; - ucell->tpiba2 = ucell->tpiba * ucell->tpiba; - ucell->latvec.e11 = this->latvec[0]; - ucell->latvec.e12 = this->latvec[1]; - ucell->latvec.e13 = this->latvec[2]; - ucell->latvec.e21 = this->latvec[3]; - ucell->latvec.e22 = this->latvec[4]; - ucell->latvec.e23 = this->latvec[5]; - ucell->latvec.e31 = this->latvec[6]; - ucell->latvec.e32 = this->latvec[7]; - ucell->latvec.e33 = this->latvec[8]; - ucell->a1.x = ucell->latvec.e11; - ucell->a1.y = ucell->latvec.e12; - ucell->a1.z = ucell->latvec.e13; - ucell->a2.x = ucell->latvec.e21; - ucell->a2.y = ucell->latvec.e22; - ucell->a2.z = ucell->latvec.e23; - ucell->a3.x = ucell->latvec.e31; - ucell->a3.y = ucell->latvec.e32; - ucell->a3.z = ucell->latvec.e33; - ucell->GT = ucell->latvec.Inverse(); - ucell->G = ucell->GT.Transpose(); - ucell->GGT = ucell->G*ucell->GT; - ucell->invGGT = ucell->GGT.Inverse(); - ucell->omega = std::abs(ucell->latvec.Det())*(ucell->lat0)*(ucell->lat0)*(ucell->lat0); - //atomic info - ucell->Coordinate = this->coor_type; - ucell->atoms = new Atom[ucell->ntype]; - ucell->set_atom_flag = true; - this->atomic_index = 0; - for(int it=0;itntype;++it) - { - ucell->atoms[it].label = this->elements[it]; - ucell->atoms[it].nw = 0; - ucell->atoms[it].nwl = 2; - ucell->atoms[it].l_nchi.resize(ucell->atoms[it].nwl+1); - for(int L=0; Latoms[it].nwl+1; L++) - { - ucell->atoms[it].l_nchi[L] = 1; - ucell->atoms[it].nw += (2*L + 1) * ucell->atoms[it].l_nchi[L]; - } - ucell->atoms[it].na = this->natom[it]; - //coordinates and related physical quantities - ucell->atoms[it].tau.resize(ucell->atoms[it].na); - ucell->atoms[it].dis.resize(ucell->atoms[it].na); - ucell->atoms[it].taud.resize(ucell->atoms[it].na); - ucell->atoms[it].vel.resize(ucell->atoms[it].na); - ucell->atoms[it].mag.resize(ucell->atoms[it].na); - ucell->atoms[it].angle1.resize(ucell->atoms[it].na); - ucell->atoms[it].angle2.resize(ucell->atoms[it].na); - ucell->atoms[it].m_loc_.resize(ucell->atoms[it].na); - ucell->atoms[it].mbl.resize(ucell->atoms[it].na); - ucell->atoms[it].lambda.resize(ucell->atoms[it].na); - ucell->atoms[it].constrain.resize(ucell->atoms[it].na); - ucell->atoms[it].mass = ucell->atom_mass[it]; // mass set here - for(int ia=0; iaatoms[it].na; ++ia) - { - if (ucell->Coordinate == "Direct") - { - ucell->atoms[it].taud[ia].x = this->coordinates[this->atomic_index*3+0]; - ucell->atoms[it].taud[ia].y = this->coordinates[this->atomic_index*3+1]; - ucell->atoms[it].taud[ia].z = this->coordinates[this->atomic_index*3+2]; - ucell->atoms[it].tau[ia] = ucell->atoms[it].taud[ia]*ucell->latvec; - } - else if (ucell->Coordinate == "Cartesian") - { - ucell->atoms[it].tau[ia].x = this->coordinates[this->atomic_index*3+0]; - ucell->atoms[it].tau[ia].y = this->coordinates[this->atomic_index*3+1]; - ucell->atoms[it].tau[ia].z = this->coordinates[this->atomic_index*3+2]; - ModuleBase::Mathzone::Cartesian_to_Direct( - ucell->atoms[it].tau[ia].x, ucell->atoms[it].tau[ia].y, ucell->atoms[it].tau[ia].z, - ucell->latvec.e11, ucell->latvec.e12, ucell->latvec.e13, - ucell->latvec.e21, ucell->latvec.e22, ucell->latvec.e23, - ucell->latvec.e31, ucell->latvec.e32, ucell->latvec.e33, - ucell->atoms[it].taud[ia].x, ucell->atoms[it].taud[ia].y, ucell->atoms[it].taud[ia].z); - } - ucell->atoms[it].dis[ia].set(0, 0, 0); - if(this->init_vel) - { - ucell->atoms[it].vel[ia].x = this->velocity[this->atomic_index*3+0]; - ucell->atoms[it].vel[ia].y = this->velocity[this->atomic_index*3+1]; - ucell->atoms[it].vel[ia].z = this->velocity[this->atomic_index*3+2]; - } - else - { - ucell->atoms[it].vel[ia].set(0,0,0); - } - ucell->atoms[it].m_loc_[ia].set(0,0,0); - ucell->atoms[it].angle1[ia] = 0; - ucell->atoms[it].angle2[ia] = 0; - if(this->selective_dynamics) - { - ucell->atoms[it].mbl[ia].x = this->mbl[this->atomic_index*3+0]; - ucell->atoms[it].mbl[ia].y = this->mbl[this->atomic_index*3+1]; - ucell->atoms[it].mbl[ia].z = this->mbl[this->atomic_index*3+2]; - } - else - { - ucell->atoms[it].mbl[ia] = {1,1,1}; - } - ++(this->atomic_index); - } - } - ucell->nat = this->natom.sum(); - return ucell; - } + std::unique_ptr + SetUcellInfo () + { + // basic info + this->ntype = this->elements.size (); + std::unique_ptr ucell (new UnitCell); + ucell->setup (this->latname, this->ntype, this->lmaxmax, this->init_vel, this->fixed_axes); + + delete[] ucell->magnet.start_mag; // mag set here + ucell->atom_label.resize (ucell->ntype); + ucell->atom_mass.resize (ucell->ntype); + ucell->pseudo_fn.resize (ucell->ntype); + ucell->pseudo_type.resize (ucell->ntype); + ucell->orbital_fn.resize (ucell->ntype); + ucell->magnet.start_mag = new double[ucell->ntype]; // mag set here + ucell->magnet.ux_[0] = 0.0; // ux_ set here + ucell->magnet.ux_[1] = 0.0; + ucell->magnet.ux_[2] = 0.0; + for (int it = 0; it < ucell->ntype; ++it) + { + ucell->atom_label[it] = this->elements[it]; + ucell->atom_mass[it] = this->atomic_mass[it]; + ucell->pseudo_fn[it] = this->pp_files[it]; + ucell->pseudo_type[it] = this->pp_types[it]; + ucell->orbital_fn[it] = this->orb_files[it]; + ucell->magnet.start_mag[it] = 0.0; // mag set here + } + // lattice info + ucell->lat0 = this->lat0; + ucell->lat0_angstrom = ucell->lat0 * ModuleBase::BOHR_TO_A; + ucell->tpiba = ModuleBase::TWO_PI / ucell->lat0; + ucell->tpiba2 = ucell->tpiba * ucell->tpiba; + ucell->latvec.e11 = this->latvec[0]; + ucell->latvec.e12 = this->latvec[1]; + ucell->latvec.e13 = this->latvec[2]; + ucell->latvec.e21 = this->latvec[3]; + ucell->latvec.e22 = this->latvec[4]; + ucell->latvec.e23 = this->latvec[5]; + ucell->latvec.e31 = this->latvec[6]; + ucell->latvec.e32 = this->latvec[7]; + ucell->latvec.e33 = this->latvec[8]; + ucell->a1.x = ucell->latvec.e11; + ucell->a1.y = ucell->latvec.e12; + ucell->a1.z = ucell->latvec.e13; + ucell->a2.x = ucell->latvec.e21; + ucell->a2.y = ucell->latvec.e22; + ucell->a2.z = ucell->latvec.e23; + ucell->a3.x = ucell->latvec.e31; + ucell->a3.y = ucell->latvec.e32; + ucell->a3.z = ucell->latvec.e33; + ucell->GT = ucell->latvec.Inverse (); + ucell->G = ucell->GT.Transpose (); + ucell->GGT = ucell->G * ucell->GT; + ucell->invGGT = ucell->GGT.Inverse (); + ucell->omega = std::abs (ucell->latvec.Det ()) * (ucell->lat0) * (ucell->lat0) * (ucell->lat0); + // atomic info + ucell->Coordinate = this->coor_type; + ucell->atoms = new Atom[ucell->ntype]; + ucell->set_atom_flag = true; + this->atomic_index = 0; + for (int it = 0; it < ucell->ntype; ++it) + { + ucell->atoms[it].label = this->elements[it]; + ucell->atoms[it].nw = 0; + ucell->atoms[it].nwl = 2; + ucell->atoms[it].l_nchi.resize (ucell->atoms[it].nwl + 1); + for (int L = 0; L < ucell->atoms[it].nwl + 1; L++) + { + ucell->atoms[it].l_nchi[L] = 1; + ucell->atoms[it].nw += (2 * L + 1) * ucell->atoms[it].l_nchi[L]; + } + ucell->atoms[it].na = this->natom[it]; + // coordinates and related physical quantities + ucell->atoms[it].tau.resize (ucell->atoms[it].na); + ucell->atoms[it].dis.resize (ucell->atoms[it].na); + ucell->atoms[it].taud.resize (ucell->atoms[it].na); + ucell->atoms[it].vel.resize (ucell->atoms[it].na); + ucell->atoms[it].mag.resize (ucell->atoms[it].na); + ucell->atoms[it].angle1.resize (ucell->atoms[it].na); + ucell->atoms[it].angle2.resize (ucell->atoms[it].na); + ucell->atoms[it].m_loc_.resize (ucell->atoms[it].na); + ucell->atoms[it].mbl.resize (ucell->atoms[it].na); + ucell->atoms[it].lambda.resize (ucell->atoms[it].na); + ucell->atoms[it].constrain.resize (ucell->atoms[it].na); + ucell->atoms[it].mass = ucell->atom_mass[it]; // mass set here + for (int ia = 0; ia < ucell->atoms[it].na; ++ia) + { + if (ucell->Coordinate == "Direct") + { + ucell->atoms[it].taud[ia].x = this->coordinates[this->atomic_index * 3 + 0]; + ucell->atoms[it].taud[ia].y = this->coordinates[this->atomic_index * 3 + 1]; + ucell->atoms[it].taud[ia].z = this->coordinates[this->atomic_index * 3 + 2]; + ucell->atoms[it].tau[ia] = ucell->atoms[it].taud[ia] * ucell->latvec; + } + else if (ucell->Coordinate == "Cartesian") + { + ucell->atoms[it].tau[ia].x = this->coordinates[this->atomic_index * 3 + 0]; + ucell->atoms[it].tau[ia].y = this->coordinates[this->atomic_index * 3 + 1]; + ucell->atoms[it].tau[ia].z = this->coordinates[this->atomic_index * 3 + 2]; + ModuleBase::Mathzone::Cartesian_to_Direct (ucell->atoms[it].tau[ia].x, + ucell->atoms[it].tau[ia].y, + ucell->atoms[it].tau[ia].z, + ucell->latvec.e11, + ucell->latvec.e12, + ucell->latvec.e13, + ucell->latvec.e21, + ucell->latvec.e22, + ucell->latvec.e23, + ucell->latvec.e31, + ucell->latvec.e32, + ucell->latvec.e33, + ucell->atoms[it].taud[ia].x, + ucell->atoms[it].taud[ia].y, + ucell->atoms[it].taud[ia].z); + } + ucell->atoms[it].dis[ia].set (0, 0, 0); + if (this->init_vel) + { + ucell->atoms[it].vel[ia].x = this->velocity[this->atomic_index * 3 + 0]; + ucell->atoms[it].vel[ia].y = this->velocity[this->atomic_index * 3 + 1]; + ucell->atoms[it].vel[ia].z = this->velocity[this->atomic_index * 3 + 2]; + } + else + { + ucell->atoms[it].vel[ia].set (0, 0, 0); + } + ucell->atoms[it].m_loc_[ia].set (0, 0, 0); + ucell->atoms[it].angle1[ia] = 0; + ucell->atoms[it].angle2[ia] = 0; + if (this->selective_dynamics) + { + ucell->atoms[it].mbl[ia].x = this->mbl[this->atomic_index * 3 + 0]; + ucell->atoms[it].mbl[ia].y = this->mbl[this->atomic_index * 3 + 1]; + ucell->atoms[it].mbl[ia].z = this->mbl[this->atomic_index * 3 + 2]; + } + else + { + ucell->atoms[it].mbl[ia] = {1, 1, 1}; + } + ++(this->atomic_index); + } + } + ucell->nat = this->natom.sum (); + return ucell; + } }; -UcellTestPrepare::UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in): - latname(latname_in), - lmaxmax(lmaxmax_in), - init_vel(init_vel_in), - selective_dynamics(selective_dynamics_in), - relax_new(relax_new_in), - fixed_axes(fixed_axes_in), - lat0(lat0_in), - latvec(latvec_in), - elements(elements_in), - pp_files(pp_files_in), - pp_types(pp_types_in), - orb_files(orb_files_in), - natom(natom_in), - atomic_mass(atomic_mass_in), - coor_type(coor_type_in), - coordinates(coordinates_in) +UcellTestPrepare::UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in) + : latname (latname_in), lmaxmax (lmaxmax_in), init_vel (init_vel_in), selective_dynamics (selective_dynamics_in), + relax_new (relax_new_in), fixed_axes (fixed_axes_in), lat0 (lat0_in), latvec (latvec_in), elements (elements_in), + pp_files (pp_files_in), pp_types (pp_types_in), orb_files (orb_files_in), natom (natom_in), + atomic_mass (atomic_mass_in), coor_type (coor_type_in), coordinates (coordinates_in) { - mbl = std::valarray(0.0, coordinates_in.size()); - velocity = std::valarray(0.0, coordinates_in.size()); + mbl = std::valarray (0.0, coordinates_in.size ()); + velocity = std::valarray (0.0, coordinates_in.size ()); } -UcellTestPrepare::UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in, - std::valarray mbl_in, - std::valarray velocity_in): - latname(latname_in), - lmaxmax(lmaxmax_in), - init_vel(init_vel_in), - selective_dynamics(selective_dynamics_in), - relax_new(relax_new_in), - fixed_axes(fixed_axes_in), - lat0(lat0_in), - latvec(latvec_in), - elements(elements_in), - pp_files(pp_files_in), - pp_types(pp_types_in), - orb_files(orb_files_in), - natom(natom_in), - atomic_mass(atomic_mass_in), - coor_type(coor_type_in), - coordinates(coordinates_in), - mbl(mbl_in), - velocity(velocity_in) // velocity assume the existence of mbl in print_stru_file() -{} - -UcellTestPrepare::UcellTestPrepare(const UcellTestPrepare &utp): - latname(utp.latname), - lmaxmax(utp.lmaxmax), - init_vel(utp.init_vel), - selective_dynamics(utp.selective_dynamics), - relax_new(utp.relax_new), - fixed_axes(utp.fixed_axes), - lat0(utp.lat0), - latvec(utp.latvec), - elements(utp.elements), - pp_files(utp.pp_files), - pp_types(utp.pp_types), - orb_files(utp.orb_files), - natom(utp.natom), - atomic_mass(utp.atomic_mass), - coor_type(utp.coor_type), - coordinates(utp.coordinates), - mbl(utp.mbl), - velocity(utp.velocity) // velocity assume the existence of mbl in print_stru_file() -{} +UcellTestPrepare::UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in, + std::valarray mbl_in, + std::valarray velocity_in) + : latname (latname_in), lmaxmax (lmaxmax_in), init_vel (init_vel_in), selective_dynamics (selective_dynamics_in), + relax_new (relax_new_in), fixed_axes (fixed_axes_in), lat0 (lat0_in), latvec (latvec_in), elements (elements_in), + pp_files (pp_files_in), pp_types (pp_types_in), orb_files (orb_files_in), natom (natom_in), + atomic_mass (atomic_mass_in), coor_type (coor_type_in), coordinates (coordinates_in), mbl (mbl_in), + velocity (velocity_in) // velocity assume the existence of mbl in print_stru_file() +{ +} -std::map UcellTestLib +UcellTestPrepare::UcellTestPrepare (const UcellTestPrepare& utp) + : latname (utp.latname), lmaxmax (utp.lmaxmax), init_vel (utp.init_vel), + selective_dynamics (utp.selective_dynamics), relax_new (utp.relax_new), fixed_axes (utp.fixed_axes), + lat0 (utp.lat0), latvec (utp.latvec), elements (utp.elements), pp_files (utp.pp_files), pp_types (utp.pp_types), + orb_files (utp.orb_files), natom (utp.natom), atomic_mass (utp.atomic_mass), coor_type (utp.coor_type), + coordinates (utp.coordinates), mbl (utp.mbl), + velocity (utp.velocity) // velocity assume the existence of mbl in print_stru_file() { - {"C1H2-Index", UcellTestPrepare( - "bcc", //latname - 2, //lmaxmax - true, //init_vel - true, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 1.8897261254578281, //lat0 - {10.0,0.0,0.0, //latvec - 0.0,10.0,0.0, - 0.0,0.0,10.0}, - {"C","H"}, //elements - {"C.upf","H.upf"}, //upf file - {"upf201","upf201"}, //upf types - {"C.orb","H.orb"}, //orb file - {1,2}, //number of each elements - {12.0,1.0}, //atomic mass - "Direct", //coordination type - {0.1,0.1,0.1, //atomic coordinates - 0.15,0.15,0.15, - 0.05,0.05,0.05}, - {1,1,1, //if atom can move: mbl - 0,0,0, - 0,0,1}, - {0.1,0.1,0.1, //velocity: vel - 0.1,0.1,0.1, - 0.1,0.1,0.1})}, - {"C1H2-Cartesian", UcellTestPrepare( - "bcc", //latname - 2, //lmaxmax - true, //init_vel - true, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 1.8897261254578281, //lat0 - {10.0,0.0,0.0, //latvec - 0.0,10.0,0.0, - 0.0,0.0,10.0}, - {"C","H"}, //elements - {"C.upf","H.upf"}, //upf file - {"upf201","upf201"}, //upf types - {"C.orb","H.orb"}, //orb file - {1,2}, //number of each elements - {12.0,1.0}, //atomic mass - "Cartesian", //coordination type - {1,1,1, //atomic coordinates - 1.5,1.5,1.5, - 0.5,0.5,0.5})}, - {"C1H2-CheckDTau", UcellTestPrepare( - "bcc", //latname - 2, //lmaxmax - false, //init_vel - false, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 1.8897261254578281, //lat0 - {0.1,0.1,0.1, //latvec - 0.15,0.15,0.15, - 0.05,0.05,0.05}, - {"C","H"}, //elements - {"C.upf","H.upf"}, //upf file - {"upf201","upf201"}, //upf types - {"C.orb","H.orb"}, //orb file - {1,2}, //number of each elements - {12.0,1.0}, //atomic mass - "Direct", //coordination type - {1.6,2.5,3.8, //atomic coordinates - -0.15,1.0,-0.15, - -3.05,-2.8,0.0})}, - {"C1H2-CheckTau", UcellTestPrepare( - "bcc", //latname - 2, //lmaxmax - false, //init_vel - false, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 1.8897261254578281, //lat0 - {0.1,0.1,0.1, //latvec - 0.15,0.15,0.15, - 0.05,0.05,0.05}, - {"C","H"}, //elements - {"C.upf","H.upf"}, //upf file - {"upf201","upf201"}, //upf types - {"C.orb","H.orb"}, //orb file - {1,2}, //number of each elements - {12.0,1.0}, //atomic mass - "Direct", //coordination type - {0.0,0.0,0.0, //atomic coordinates - 0.00001,0.00001,0.00001, - -3.05,-2.8,0.0})}, - {"C1H2-SD", UcellTestPrepare( - "bcc", //latname - 2, //lmaxmax - false, //init_vel - false, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 1.8897261254578281, //lat0 - {0.1,0.1,0.1, //latvec - 0.15,0.15,0.15, - 0.05,0.05,0.05}, - {"C","H"}, //elements - {"C.upf","H.upf"}, //upf file - {"upf201","upf201"}, //upf types - {"C.orb","H.orb"}, //orb file - {1,2}, //number of each elements - {12.0,1.0}, //atomic mass - "Direct", //coordination type - {0.1,0.1,0.1, //atomic coordinates - 0.15,0.15,0.15, - 0.05,0.05,0.05})}, - {"C1H2-PBA", UcellTestPrepare( - "bcc", //latname - 2, //lmaxmax - false, //init_vel - false, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 1.8897261254578281, //lat0 - {0.1,0.1,0.1, //latvec - 0.15,0.15,0.15, - 0.05,0.05,0.05}, - {"C","H"}, //elements - {"C.upf","H.upf"}, //upf file - {"upf201","upf201"}, //upf types - {"C.orb","H.orb"}, //orb file - {1,2}, //number of each elements - {12.0,1.0}, //atomic mass - "Direct", //coordination type - {-0.1,-0.1,-0.1, //atomic coordinates - 1.2,1.2,1.2, - -3.05,-2.8,0.0})}, - {"C1H2-Read", UcellTestPrepare( - "bcc", //latname - 2, //lmaxmax - true, //init_vel - true, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 1.8897261254578281, //lat0 - {10.0,0.0,0.0, //latvec - 0.0,10.0,0.0, - 0.0,0.0,10.0}, - {"C","H"}, //elements - {"C.upf","H.upf"}, //upf file - {"upf201","upf201"}, //upf types - {"C.orb","H.orb"}, //orb file - {1,2}, //number of each elements - {12.0,1.0}, //atomic mass - "Direct", //coordination type - {0.1,0.1,0.1, //atomic coordinates - 0.12,0.12,0.12, - 0.08,0.08,0.08})}, - {"flz-Read", UcellTestPrepare( - "bcc", //latname - 2, //lmaxmax - false, //init_vel - false, //selective_dyanmics - false, //relax_new - "volume", //fixed_axes - 1.8897261254578281, //lat0 - {10.0,0.0,0.0, //latvec - 0.0,10.0,0.0, - 0.0,0.0,10.0}, - {"C","H"}, //elements - {"C.upf","H.upf"}, //upf file - {"upf201","upf201"}, //upf types - {"C_gga_8au_100Ry_2s2p1d.orb","H_gga_8au_100Ry_2s1p.orb"}, //orb file - {1,2}, //number of each elements - {12.0,1.0}, //atomic mass - "Direct", //coordination type - {0.1,0.1,0.1, //atomic coordinates - 0.12,0.12,0.12, - 0.08,0.08,0.08} - ) - } -}; +} + +std::map UcellTestLib{ + {"C1H2-Index", + UcellTestPrepare ("bcc", // latname + 2, // lmaxmax + true, // init_vel + true, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 1.8897261254578281, // lat0 + {10.0, + 0.0, + 0.0, // latvec + 0.0, + 10.0, + 0.0, + 0.0, + 0.0, + 10.0}, + {"C", "H"}, // elements + {"C.upf", "H.upf"}, // upf file + {"upf201", "upf201"}, // upf types + {"C.orb", "H.orb"}, // orb file + {1, 2}, // number of each elements + {12.0, 1.0}, // atomic mass + "Direct", // coordination type + {0.1, + 0.1, + 0.1, // atomic coordinates + 0.15, + 0.15, + 0.15, + 0.05, + 0.05, + 0.05}, + {1, + 1, + 1, // if atom can move: mbl + 0, + 0, + 0, + 0, + 0, + 1}, + {0.1, + 0.1, + 0.1, // velocity: vel + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1})}, + {"C1H2-Cartesian", + UcellTestPrepare ("bcc", // latname + 2, // lmaxmax + true, // init_vel + true, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 1.8897261254578281, // lat0 + {10.0, + 0.0, + 0.0, // latvec + 0.0, + 10.0, + 0.0, + 0.0, + 0.0, + 10.0}, + {"C", "H"}, // elements + {"C.upf", "H.upf"}, // upf file + {"upf201", "upf201"}, // upf types + {"C.orb", "H.orb"}, // orb file + {1, 2}, // number of each elements + {12.0, 1.0}, // atomic mass + "Cartesian", // coordination type + {1, + 1, + 1, // atomic coordinates + 1.5, + 1.5, + 1.5, + 0.5, + 0.5, + 0.5})}, + {"C1H2-CheckDTau", + UcellTestPrepare ("bcc", // latname + 2, // lmaxmax + false, // init_vel + false, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 1.8897261254578281, // lat0 + {0.1, + 0.1, + 0.1, // latvec + 0.15, + 0.15, + 0.15, + 0.05, + 0.05, + 0.05}, + {"C", "H"}, // elements + {"C.upf", "H.upf"}, // upf file + {"upf201", "upf201"}, // upf types + {"C.orb", "H.orb"}, // orb file + {1, 2}, // number of each elements + {12.0, 1.0}, // atomic mass + "Direct", // coordination type + {1.6, + 2.5, + 3.8, // atomic coordinates + -0.15, + 1.0, + -0.15, + -3.05, + -2.8, + 0.0})}, + {"C1H2-CheckTau", + UcellTestPrepare ("bcc", // latname + 2, // lmaxmax + false, // init_vel + false, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 1.8897261254578281, // lat0 + {0.1, + 0.1, + 0.1, // latvec + 0.15, + 0.15, + 0.15, + 0.05, + 0.05, + 0.05}, + {"C", "H"}, // elements + {"C.upf", "H.upf"}, // upf file + {"upf201", "upf201"}, // upf types + {"C.orb", "H.orb"}, // orb file + {1, 2}, // number of each elements + {12.0, 1.0}, // atomic mass + "Direct", // coordination type + {0.0, + 0.0, + 0.0, // atomic coordinates + 0.00001, + 0.00001, + 0.00001, + -3.05, + -2.8, + 0.0})}, + {"C1H2-SD", + UcellTestPrepare ("bcc", // latname + 2, // lmaxmax + false, // init_vel + false, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 1.8897261254578281, // lat0 + {0.1, + 0.1, + 0.1, // latvec + 0.15, + 0.15, + 0.15, + 0.05, + 0.05, + 0.05}, + {"C", "H"}, // elements + {"C.upf", "H.upf"}, // upf file + {"upf201", "upf201"}, // upf types + {"C.orb", "H.orb"}, // orb file + {1, 2}, // number of each elements + {12.0, 1.0}, // atomic mass + "Direct", // coordination type + {0.1, + 0.1, + 0.1, // atomic coordinates + 0.15, + 0.15, + 0.15, + 0.05, + 0.05, + 0.05})}, + {"C1H2-PBA", + UcellTestPrepare ("bcc", // latname + 2, // lmaxmax + false, // init_vel + false, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 1.8897261254578281, // lat0 + {0.1, + 0.1, + 0.1, // latvec + 0.15, + 0.15, + 0.15, + 0.05, + 0.05, + 0.05}, + {"C", "H"}, // elements + {"C.upf", "H.upf"}, // upf file + {"upf201", "upf201"}, // upf types + {"C.orb", "H.orb"}, // orb file + {1, 2}, // number of each elements + {12.0, 1.0}, // atomic mass + "Direct", // coordination type + {-0.1, + -0.1, + -0.1, // atomic coordinates + 1.2, + 1.2, + 1.2, + -3.05, + -2.8, + 0.0})}, + {"C1H2-Read", + UcellTestPrepare ("bcc", // latname + 2, // lmaxmax + true, // init_vel + true, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 1.8897261254578281, // lat0 + {10.0, + 0.0, + 0.0, // latvec + 0.0, + 10.0, + 0.0, + 0.0, + 0.0, + 10.0}, + {"C", "H"}, // elements + {"C.upf", "H.upf"}, // upf file + {"upf201", "upf201"}, // upf types + {"C.orb", "H.orb"}, // orb file + {1, 2}, // number of each elements + {12.0, 1.0}, // atomic mass + "Direct", // coordination type + {0.1, + 0.1, + 0.1, // atomic coordinates + 0.12, + 0.12, + 0.12, + 0.08, + 0.08, + 0.08})}, + {"flz-Read", + UcellTestPrepare ("bcc", // latname + 2, // lmaxmax + false, // init_vel + false, // selective_dyanmics + false, // relax_new + "volume", // fixed_axes + 1.8897261254578281, // lat0 + {10.0, + 0.0, + 0.0, // latvec + 0.0, + 10.0, + 0.0, + 0.0, + 0.0, + 10.0}, + {"C", "H"}, // elements + {"C.upf", "H.upf"}, // upf file + {"upf201", "upf201"}, // upf types + {"C_gga_8au_100Ry_2s2p1d.orb", "H_gga_8au_100Ry_2s1p.orb"}, // orb file + {1, 2}, // number of each elements + {12.0, 1.0}, // atomic mass + "Direct", // coordination type + {0.1, + 0.1, + 0.1, // atomic coordinates + 0.12, + 0.12, + 0.12, + 0.08, + 0.08, + 0.08})}}; #endif diff --git a/source/source_cell/test/pseudo_nc_test.cpp b/source/source_cell/test/pseudo_nc_test.cpp index 3ed2af5bd2d..9d13e7f84ab 100644 --- a/source/source_cell/test/pseudo_nc_test.cpp +++ b/source/source_cell/test/pseudo_nc_test.cpp @@ -3,7 +3,7 @@ #define private public #include "source_io/module_parameter/parameter.h" #undef private -#include +#include /************************************************ * unit test of pseudo @@ -29,103 +29,102 @@ #undef private class NCPPTest : public testing::Test { -protected: - std::unique_ptr upf{new Pseudopot_upf}; - std::unique_ptr ncpp{new Atom_pseudo}; + protected: + std::unique_ptr upf{new Pseudopot_upf}; + std::unique_ptr ncpp{new Atom_pseudo}; }; -TEST_F(NCPPTest, SetPseudoH) +TEST_F (NCPPTest, SetPseudoH) { - std::ifstream ifs; - //set - ifs.open("./support/C.upf"); - PARAM.input.pseudo_rcut = 15.0; - upf->read_pseudo_upf201(ifs, *ncpp); - //set_pseudo_h - upf->complete_default_h(*ncpp); + std::ifstream ifs; + // set + ifs.open ("./support/C.upf"); + PARAM.input.pseudo_rcut = 15.0; + upf->read_pseudo_upf201 (ifs, *ncpp); + // set_pseudo_h + upf->complete_default_h (*ncpp); - if(!ncpp->has_so) - { - for (int i=0;inchi;i++) - { - EXPECT_EQ(ncpp->nn[i],0); - EXPECT_EQ(ncpp->jchi[i],0); - } - for (int i=0;inbeta;i++) - { - EXPECT_EQ(ncpp->jjj[i],0); - } - } - ifs.close(); + if (!ncpp->has_so) + { + for (int i = 0; i < ncpp->nchi; i++) + { + EXPECT_EQ (ncpp->nn[i], 0); + EXPECT_EQ (ncpp->jchi[i], 0); + } + for (int i = 0; i < ncpp->nbeta; i++) + { + EXPECT_EQ (ncpp->jjj[i], 0); + } + } + ifs.close (); } -TEST_F(NCPPTest, SetPseudoAtom) +TEST_F (NCPPTest, SetPseudoAtom) { - std::ifstream ifs; - //set - ifs.open("./support/C.upf"); - PARAM.input.pseudo_rcut = 15.0; - upf->read_pseudo_upf201(ifs, *ncpp); - //set_pseudo_atom - upf->complete_default_h(*ncpp); - upf->complete_default_atom(*ncpp); - EXPECT_EQ(ncpp->rcut,PARAM.input.pseudo_rcut); + std::ifstream ifs; + // set + ifs.open ("./support/C.upf"); + PARAM.input.pseudo_rcut = 15.0; + upf->read_pseudo_upf201 (ifs, *ncpp); + // set_pseudo_atom + upf->complete_default_h (*ncpp); + upf->complete_default_atom (*ncpp); + EXPECT_EQ (ncpp->rcut, PARAM.input.pseudo_rcut); - if(!ncpp->nlcc) - { - for(int i=0;imesh;i++) - { - EXPECT_EQ(ncpp->rho_atc[i],0.0); - } - } - EXPECT_EQ(ncpp->msh,ncpp->mesh); - ifs.close(); + if (!ncpp->nlcc) + { + for (int i = 0; i < ncpp->mesh; i++) + { + EXPECT_EQ (ncpp->rho_atc[i], 0.0); + } + } + EXPECT_EQ (ncpp->msh, ncpp->mesh); + ifs.close (); } -TEST_F(NCPPTest, SetPseudoNC) +TEST_F (NCPPTest, SetPseudoNC) { - std::ifstream ifs; - //set - ifs.open("./support/C.upf"); - PARAM.input.pseudo_rcut = 15.0; - // set pseudo nbeta = 0 - upf->read_pseudo_upf201(ifs, *ncpp); - ncpp->nbeta = 0; - upf->complete_default(*ncpp); - EXPECT_EQ(ncpp->nh,0); + std::ifstream ifs; + // set + ifs.open ("./support/C.upf"); + PARAM.input.pseudo_rcut = 15.0; + // set pseudo nbeta = 0 + upf->read_pseudo_upf201 (ifs, *ncpp); + ncpp->nbeta = 0; + upf->complete_default (*ncpp); + EXPECT_EQ (ncpp->nh, 0); // set pseudo nbeta > 0 - upf->read_pseudo_upf201(ifs, *ncpp); - upf->complete_default(*ncpp); - EXPECT_EQ(ncpp->nh,14); - EXPECT_EQ(ncpp->kkbeta,132); - ifs.close(); - + upf->read_pseudo_upf201 (ifs, *ncpp); + upf->complete_default (*ncpp); + EXPECT_EQ (ncpp->nh, 14); + EXPECT_EQ (ncpp->kkbeta, 132); + ifs.close (); } -TEST_F(NCPPTest, PrintNC) +TEST_F (NCPPTest, PrintNC) { - std::ifstream ifs; - //set - ifs.open("./support/C.upf"); - PARAM.input.pseudo_rcut = 15.0; - upf->read_pseudo_upf201(ifs, *ncpp); - upf->complete_default(*ncpp); - ifs.close(); - //print - std::ofstream ofs; - ofs.open("./tmp_log"); - ncpp->print_pseudo(ofs); - ofs.close(); - ifs.open("./tmp_log"); - std::string str((std::istreambuf_iterator(ifs)),std::istreambuf_iterator()); - EXPECT_THAT(str,testing::HasSubstr("psd C")); - EXPECT_THAT(str,testing::HasSubstr("pp_type NC")); - EXPECT_THAT(str,testing::HasSubstr("dft PBE")); - EXPECT_THAT(str,testing::HasSubstr("zv 4")); - EXPECT_THAT(str,testing::HasSubstr("nchi 3")); - EXPECT_THAT(str,testing::HasSubstr("nbeta 6")); - EXPECT_THAT(str,testing::HasSubstr("dion : nr=6 nc=6")); - EXPECT_THAT(str,testing::HasSubstr("msh\t1247")); - ifs.close(); - remove("./tmp_log"); + std::ifstream ifs; + // set + ifs.open ("./support/C.upf"); + PARAM.input.pseudo_rcut = 15.0; + upf->read_pseudo_upf201 (ifs, *ncpp); + upf->complete_default (*ncpp); + ifs.close (); + // print + std::ofstream ofs; + ofs.open ("./tmp_log"); + ncpp->print_pseudo (ofs); + ofs.close (); + ifs.open ("./tmp_log"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("psd C")); + EXPECT_THAT (str, testing::HasSubstr ("pp_type NC")); + EXPECT_THAT (str, testing::HasSubstr ("dft PBE")); + EXPECT_THAT (str, testing::HasSubstr ("zv 4")); + EXPECT_THAT (str, testing::HasSubstr ("nchi 3")); + EXPECT_THAT (str, testing::HasSubstr ("nbeta 6")); + EXPECT_THAT (str, testing::HasSubstr ("dion : nr=6 nc=6")); + EXPECT_THAT (str, testing::HasSubstr ("msh\t1247")); + ifs.close (); + remove ("./tmp_log"); } diff --git a/source/source_cell/test/read_atoms_helper_test.cpp b/source/source_cell/test/read_atoms_helper_test.cpp index 39336cd4a0c..a7047b7caa3 100644 --- a/source/source_cell/test/read_atoms_helper_test.cpp +++ b/source/source_cell/test/read_atoms_helper_test.cpp @@ -8,57 +8,71 @@ #include // Mock implementations for missing functions that are not in the linked sources -namespace elecstate { - bool read_orb_file(int it, std::string& orbital_file, std::ofstream& ofs_running, Atom* atom) { - // Mock implementation - just return true - return true; - } +namespace elecstate +{ +bool + read_orb_file (int it, std::string& orbital_file, std::ofstream& ofs_running, Atom* atom) +{ + // Mock implementation - just return true + return true; } +} // namespace elecstate // Mock output class methods -void output::printM3(std::ofstream& ofs, const std::string& description, const ModuleBase::Matrix3& m) { +void + output::printM3 (std::ofstream& ofs, const std::string& description, const ModuleBase::Matrix3& m) +{ // Mock implementation } -void output::printrm(std::ofstream& ofs, const std::string& description, const ModuleBase::matrix& m, const double& limit) { +void + output::printrm (std::ofstream& ofs, + const std::string& description, + const ModuleBase::matrix& m, + const double& limit) +{ // Mock implementation } // Mock InfoNonlocal class -InfoNonlocal::InfoNonlocal() {} -InfoNonlocal::~InfoNonlocal() {} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} // Mock Magnetism class -Magnetism::Magnetism() {} -Magnetism::~Magnetism() {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} // Mock read_atom_positions function (we're testing the helpers, not the main function) -namespace unitcell { - bool read_atom_positions(UnitCell& ucell, std::ifstream& ifpos, - std::ofstream& ofs_running, std::ofstream& ofs_warning) { - // Mock implementation - return true; - } +namespace unitcell +{ +bool + read_atom_positions (UnitCell& ucell, std::ifstream& ifpos, std::ofstream& ofs_running, std::ofstream& ofs_warning) +{ + // Mock implementation + return true; } +} // namespace unitcell // Test fixture for read_atoms_helper tests class ReadAtomsHelperTest : public ::testing::Test { -protected: - void SetUp() override + protected: + void + SetUp () override { // Create temporary output streams - ofs_warning.open("test_warning.log"); - ofs_running.open("test_running.log"); + ofs_warning.open ("test_warning.log"); + ofs_running.open ("test_running.log"); } - void TearDown() override + void + TearDown () override { - ofs_warning.close(); - ofs_running.close(); + ofs_warning.close (); + ofs_running.close (); // Clean up temporary files - std::remove("test_warning.log"); - std::remove("test_running.log"); + std::remove ("test_warning.log"); + std::remove ("test_running.log"); } std::ofstream ofs_warning; @@ -66,264 +80,306 @@ class ReadAtomsHelperTest : public ::testing::Test }; // Test validate_coordinate_system function -TEST_F(ReadAtomsHelperTest, ValidateCoordinateSystem_ValidInputs) +TEST_F (ReadAtomsHelperTest, ValidateCoordinateSystem_ValidInputs) { - EXPECT_TRUE(unitcell::validate_coordinate_system("Direct", ofs_warning)); - EXPECT_TRUE(unitcell::validate_coordinate_system("Cartesian", ofs_warning)); - EXPECT_TRUE(unitcell::validate_coordinate_system("Cartesian_angstrom", ofs_warning)); - EXPECT_TRUE(unitcell::validate_coordinate_system("Cartesian_au", ofs_warning)); - EXPECT_TRUE(unitcell::validate_coordinate_system("Cartesian_angstrom_center_xy", ofs_warning)); - EXPECT_TRUE(unitcell::validate_coordinate_system("Cartesian_angstrom_center_xz", ofs_warning)); - EXPECT_TRUE(unitcell::validate_coordinate_system("Cartesian_angstrom_center_yz", ofs_warning)); - EXPECT_TRUE(unitcell::validate_coordinate_system("Cartesian_angstrom_center_xyz", ofs_warning)); + EXPECT_TRUE (unitcell::validate_coordinate_system ("Direct", ofs_warning)); + EXPECT_TRUE (unitcell::validate_coordinate_system ("Cartesian", ofs_warning)); + EXPECT_TRUE (unitcell::validate_coordinate_system ("Cartesian_angstrom", ofs_warning)); + EXPECT_TRUE (unitcell::validate_coordinate_system ("Cartesian_au", ofs_warning)); + EXPECT_TRUE (unitcell::validate_coordinate_system ("Cartesian_angstrom_center_xy", ofs_warning)); + EXPECT_TRUE (unitcell::validate_coordinate_system ("Cartesian_angstrom_center_xz", ofs_warning)); + EXPECT_TRUE (unitcell::validate_coordinate_system ("Cartesian_angstrom_center_yz", ofs_warning)); + EXPECT_TRUE (unitcell::validate_coordinate_system ("Cartesian_angstrom_center_xyz", ofs_warning)); } -TEST_F(ReadAtomsHelperTest, ValidateCoordinateSystem_InvalidInputs) +TEST_F (ReadAtomsHelperTest, ValidateCoordinateSystem_InvalidInputs) { - EXPECT_FALSE(unitcell::validate_coordinate_system("Invalid", ofs_warning)); - EXPECT_FALSE(unitcell::validate_coordinate_system("direct", ofs_warning)); // case sensitive - EXPECT_FALSE(unitcell::validate_coordinate_system("", ofs_warning)); - EXPECT_FALSE(unitcell::validate_coordinate_system("Cartesian_angstrom_center", ofs_warning)); + EXPECT_FALSE (unitcell::validate_coordinate_system ("Invalid", ofs_warning)); + EXPECT_FALSE (unitcell::validate_coordinate_system ("direct", ofs_warning)); // case sensitive + EXPECT_FALSE (unitcell::validate_coordinate_system ("", ofs_warning)); + EXPECT_FALSE (unitcell::validate_coordinate_system ("Cartesian_angstrom_center", ofs_warning)); } // Test calculate_lattice_center function -TEST_F(ReadAtomsHelperTest, CalculateLatticeCenterXY) +TEST_F (ReadAtomsHelperTest, CalculateLatticeCenterXY) { ModuleBase::Matrix3 latvec; - latvec.e11 = 10.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 10.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 10.0; - - auto center = unitcell::calculate_lattice_center(latvec, "xy"); - - EXPECT_DOUBLE_EQ(center.x, 5.0); - EXPECT_DOUBLE_EQ(center.y, 5.0); - EXPECT_DOUBLE_EQ(center.z, 0.0); + latvec.e11 = 10.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 10.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 10.0; + + auto center = unitcell::calculate_lattice_center (latvec, "xy"); + + EXPECT_DOUBLE_EQ (center.x, 5.0); + EXPECT_DOUBLE_EQ (center.y, 5.0); + EXPECT_DOUBLE_EQ (center.z, 0.0); } -TEST_F(ReadAtomsHelperTest, CalculateLatticeCenterXZ) +TEST_F (ReadAtomsHelperTest, CalculateLatticeCenterXZ) { ModuleBase::Matrix3 latvec; - latvec.e11 = 10.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 10.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 10.0; - - auto center = unitcell::calculate_lattice_center(latvec, "xz"); - - EXPECT_DOUBLE_EQ(center.x, 5.0); - EXPECT_DOUBLE_EQ(center.y, 0.0); - EXPECT_DOUBLE_EQ(center.z, 5.0); + latvec.e11 = 10.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 10.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 10.0; + + auto center = unitcell::calculate_lattice_center (latvec, "xz"); + + EXPECT_DOUBLE_EQ (center.x, 5.0); + EXPECT_DOUBLE_EQ (center.y, 0.0); + EXPECT_DOUBLE_EQ (center.z, 5.0); } -TEST_F(ReadAtomsHelperTest, CalculateLatticeCenterYZ) +TEST_F (ReadAtomsHelperTest, CalculateLatticeCenterYZ) { ModuleBase::Matrix3 latvec; - latvec.e11 = 10.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 10.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 10.0; - - auto center = unitcell::calculate_lattice_center(latvec, "yz"); - - EXPECT_DOUBLE_EQ(center.x, 0.0); - EXPECT_DOUBLE_EQ(center.y, 5.0); - EXPECT_DOUBLE_EQ(center.z, 5.0); + latvec.e11 = 10.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 10.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 10.0; + + auto center = unitcell::calculate_lattice_center (latvec, "yz"); + + EXPECT_DOUBLE_EQ (center.x, 0.0); + EXPECT_DOUBLE_EQ (center.y, 5.0); + EXPECT_DOUBLE_EQ (center.z, 5.0); } -TEST_F(ReadAtomsHelperTest, CalculateLatticeCenterXYZ) +TEST_F (ReadAtomsHelperTest, CalculateLatticeCenterXYZ) { ModuleBase::Matrix3 latvec; - latvec.e11 = 10.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 10.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 10.0; - - auto center = unitcell::calculate_lattice_center(latvec, "xyz"); - - EXPECT_DOUBLE_EQ(center.x, 5.0); - EXPECT_DOUBLE_EQ(center.y, 5.0); - EXPECT_DOUBLE_EQ(center.z, 5.0); + latvec.e11 = 10.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 10.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 10.0; + + auto center = unitcell::calculate_lattice_center (latvec, "xyz"); + + EXPECT_DOUBLE_EQ (center.x, 5.0); + EXPECT_DOUBLE_EQ (center.y, 5.0); + EXPECT_DOUBLE_EQ (center.z, 5.0); } -TEST_F(ReadAtomsHelperTest, CalculateLatticeCenterNonCubic) +TEST_F (ReadAtomsHelperTest, CalculateLatticeCenterNonCubic) { ModuleBase::Matrix3 latvec; - latvec.e11 = 8.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 2.0; latvec.e22 = 6.0; latvec.e23 = 0.0; - latvec.e31 = 1.0; latvec.e32 = 1.0; latvec.e33 = 10.0; - - auto center = unitcell::calculate_lattice_center(latvec, "xyz"); - - EXPECT_DOUBLE_EQ(center.x, (8.0 + 2.0 + 1.0) / 2.0); - EXPECT_DOUBLE_EQ(center.y, (0.0 + 6.0 + 1.0) / 2.0); - EXPECT_DOUBLE_EQ(center.z, (0.0 + 0.0 + 10.0) / 2.0); + latvec.e11 = 8.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 2.0; + latvec.e22 = 6.0; + latvec.e23 = 0.0; + latvec.e31 = 1.0; + latvec.e32 = 1.0; + latvec.e33 = 10.0; + + auto center = unitcell::calculate_lattice_center (latvec, "xyz"); + + EXPECT_DOUBLE_EQ (center.x, (8.0 + 2.0 + 1.0) / 2.0); + EXPECT_DOUBLE_EQ (center.y, (0.0 + 6.0 + 1.0) / 2.0); + EXPECT_DOUBLE_EQ (center.z, (0.0 + 0.0 + 10.0) / 2.0); } // Test allocate_atom_properties function -TEST_F(ReadAtomsHelperTest, AllocateAtomProperties) +TEST_F (ReadAtomsHelperTest, AllocateAtomProperties) { Atom atom; int na = 5; double mass = 12.0; - unitcell::allocate_atom_properties(atom, na, mass); - - EXPECT_EQ(atom.tau.size(), na); - EXPECT_EQ(atom.dis.size(), na); - EXPECT_EQ(atom.taud.size(), na); - EXPECT_EQ(atom.boundary_shift.size(), na); - EXPECT_EQ(atom.vel.size(), na); - EXPECT_EQ(atom.mbl.size(), na); - EXPECT_EQ(atom.mag.size(), na); - EXPECT_EQ(atom.angle1.size(), na); - EXPECT_EQ(atom.angle2.size(), na); - EXPECT_EQ(atom.m_loc_.size(), na); - EXPECT_EQ(atom.lambda.size(), na); - EXPECT_EQ(atom.constrain.size(), na); - EXPECT_DOUBLE_EQ(atom.mass, mass); + unitcell::allocate_atom_properties (atom, na, mass); + + EXPECT_EQ (atom.tau.size (), na); + EXPECT_EQ (atom.dis.size (), na); + EXPECT_EQ (atom.taud.size (), na); + EXPECT_EQ (atom.boundary_shift.size (), na); + EXPECT_EQ (atom.vel.size (), na); + EXPECT_EQ (atom.mbl.size (), na); + EXPECT_EQ (atom.mag.size (), na); + EXPECT_EQ (atom.angle1.size (), na); + EXPECT_EQ (atom.angle2.size (), na); + EXPECT_EQ (atom.m_loc_.size (), na); + EXPECT_EQ (atom.lambda.size (), na); + EXPECT_EQ (atom.constrain.size (), na); + EXPECT_DOUBLE_EQ (atom.mass, mass); } // Test transform_atom_coordinates for Direct coordinates -TEST_F(ReadAtomsHelperTest, TransformAtomCoordinatesDirect) +TEST_F (ReadAtomsHelperTest, TransformAtomCoordinatesDirect) { Atom atom; - atom.tau.resize(1); - atom.taud.resize(1); + atom.tau.resize (1); + atom.taud.resize (1); - ModuleBase::Vector3 v(0.5, 0.5, 0.5); + ModuleBase::Vector3 v (0.5, 0.5, 0.5); ModuleBase::Matrix3 latvec; - latvec.e11 = 10.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 10.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 10.0; + latvec.e11 = 10.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 10.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 10.0; double lat0 = 1.0; ModuleBase::Vector3 latcenter; - unitcell::transform_atom_coordinates(atom, 0, "Direct", v, latvec, lat0, latcenter); + unitcell::transform_atom_coordinates (atom, 0, "Direct", v, latvec, lat0, latcenter); - EXPECT_DOUBLE_EQ(atom.taud[0].x, 0.5); - EXPECT_DOUBLE_EQ(atom.taud[0].y, 0.5); - EXPECT_DOUBLE_EQ(atom.taud[0].z, 0.5); - EXPECT_DOUBLE_EQ(atom.tau[0].x, 5.0); - EXPECT_DOUBLE_EQ(atom.tau[0].y, 5.0); - EXPECT_DOUBLE_EQ(atom.tau[0].z, 5.0); + EXPECT_DOUBLE_EQ (atom.taud[0].x, 0.5); + EXPECT_DOUBLE_EQ (atom.taud[0].y, 0.5); + EXPECT_DOUBLE_EQ (atom.taud[0].z, 0.5); + EXPECT_DOUBLE_EQ (atom.tau[0].x, 5.0); + EXPECT_DOUBLE_EQ (atom.tau[0].y, 5.0); + EXPECT_DOUBLE_EQ (atom.tau[0].z, 5.0); } // Test transform_atom_coordinates for Cartesian coordinates -TEST_F(ReadAtomsHelperTest, TransformAtomCoordinatesCartesian) +TEST_F (ReadAtomsHelperTest, TransformAtomCoordinatesCartesian) { Atom atom; - atom.tau.resize(1); - atom.taud.resize(1); + atom.tau.resize (1); + atom.taud.resize (1); - ModuleBase::Vector3 v(5.0, 5.0, 5.0); + ModuleBase::Vector3 v (5.0, 5.0, 5.0); ModuleBase::Matrix3 latvec; - latvec.e11 = 10.0; latvec.e12 = 0.0; latvec.e13 = 0.0; - latvec.e21 = 0.0; latvec.e22 = 10.0; latvec.e23 = 0.0; - latvec.e31 = 0.0; latvec.e32 = 0.0; latvec.e33 = 10.0; + latvec.e11 = 10.0; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = 10.0; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = 10.0; double lat0 = 1.0; ModuleBase::Vector3 latcenter; - unitcell::transform_atom_coordinates(atom, 0, "Cartesian", v, latvec, lat0, latcenter); + unitcell::transform_atom_coordinates (atom, 0, "Cartesian", v, latvec, lat0, latcenter); - EXPECT_DOUBLE_EQ(atom.tau[0].x, 5.0); - EXPECT_DOUBLE_EQ(atom.tau[0].y, 5.0); - EXPECT_DOUBLE_EQ(atom.tau[0].z, 5.0); - EXPECT_DOUBLE_EQ(atom.taud[0].x, 0.5); - EXPECT_DOUBLE_EQ(atom.taud[0].y, 0.5); - EXPECT_DOUBLE_EQ(atom.taud[0].z, 0.5); + EXPECT_DOUBLE_EQ (atom.tau[0].x, 5.0); + EXPECT_DOUBLE_EQ (atom.tau[0].y, 5.0); + EXPECT_DOUBLE_EQ (atom.tau[0].z, 5.0); + EXPECT_DOUBLE_EQ (atom.taud[0].x, 0.5); + EXPECT_DOUBLE_EQ (atom.taud[0].y, 0.5); + EXPECT_DOUBLE_EQ (atom.taud[0].z, 0.5); } // Test process_magnetization for nspin=2 -TEST_F(ReadAtomsHelperTest, ProcessMagnetizationNspin2) +TEST_F (ReadAtomsHelperTest, ProcessMagnetizationNspin2) { Atom atom; - atom.mag.resize(1); - atom.m_loc_.resize(1); - atom.angle1.resize(1); - atom.angle2.resize(1); + atom.mag.resize (1); + atom.m_loc_.resize (1); + atom.angle1.resize (1); + atom.angle2.resize (1); atom.mag[0] = 2.0; - atom.m_loc_[0].set(0, 0, 0); + atom.m_loc_[0].set (0, 0, 0); - unitcell::process_magnetization(atom, 0, 0, 2, false, false, ofs_running); + unitcell::process_magnetization (atom, 0, 0, 2, false, false, ofs_running); // For nspin=2, only z component should be set - EXPECT_DOUBLE_EQ(atom.m_loc_[0].x, 0.0); - EXPECT_DOUBLE_EQ(atom.m_loc_[0].y, 0.0); - EXPECT_DOUBLE_EQ(atom.m_loc_[0].z, 2.0); - EXPECT_DOUBLE_EQ(atom.mag[0], 2.0); + EXPECT_DOUBLE_EQ (atom.m_loc_[0].x, 0.0); + EXPECT_DOUBLE_EQ (atom.m_loc_[0].y, 0.0); + EXPECT_DOUBLE_EQ (atom.m_loc_[0].z, 2.0); + EXPECT_DOUBLE_EQ (atom.mag[0], 2.0); } // Test process_magnetization for nspin=4 with vector input -TEST_F(ReadAtomsHelperTest, ProcessMagnetizationNspin4VectorInput) +TEST_F (ReadAtomsHelperTest, ProcessMagnetizationNspin4VectorInput) { Atom atom; - atom.mag.resize(1); - atom.m_loc_.resize(1); - atom.angle1.resize(1); - atom.angle2.resize(1); + atom.mag.resize (1); + atom.m_loc_.resize (1); + atom.angle1.resize (1); + atom.angle2.resize (1); - atom.m_loc_[0].set(1.0, 1.0, 1.0); - atom.mag[0] = sqrt(3.0); + atom.m_loc_[0].set (1.0, 1.0, 1.0); + atom.mag[0] = sqrt (3.0); // Set noncolin to true to allow non-collinear magnetization // Note: This requires PARAM to be properly initialized - unitcell::process_magnetization(atom, 0, 0, 4, true, false, ofs_running); + unitcell::process_magnetization (atom, 0, 0, 4, true, false, ofs_running); // Angles should be calculated from vector components - EXPECT_GT(atom.angle1[0], 0.0); - EXPECT_GT(atom.angle2[0], 0.0); + EXPECT_GT (atom.angle1[0], 0.0); + EXPECT_GT (atom.angle2[0], 0.0); } // Test process_magnetization with angle input -TEST_F(ReadAtomsHelperTest, ProcessMagnetizationAngleInput) +TEST_F (ReadAtomsHelperTest, ProcessMagnetizationAngleInput) { Atom atom; - atom.mag.resize(1); - atom.m_loc_.resize(1); - atom.angle1.resize(1); - atom.angle2.resize(1); + atom.mag.resize (1); + atom.m_loc_.resize (1); + atom.angle1.resize (1); + atom.angle2.resize (1); atom.mag[0] = 2.0; - atom.angle1[0] = M_PI / 2.0; // 90 degrees + atom.angle1[0] = M_PI / 2.0; // 90 degrees atom.angle2[0] = 0.0; - atom.m_loc_[0].set(0, 0, 0); + atom.m_loc_[0].set (0, 0, 0); // Note: For nspin=4, if noncolin is false (default), x and y components are zeroed // So we test with nspin=2 instead to verify the angle calculation works - unitcell::process_magnetization(atom, 0, 0, 2, false, true, ofs_running); + unitcell::process_magnetization (atom, 0, 0, 2, false, true, ofs_running); // For nspin=2, only z component is used, which should be mag[0] * cos(angle1) // With angle1 = PI/2, cos(PI/2) = 0 - EXPECT_NEAR(atom.m_loc_[0].z, 0.0, 1e-10); - EXPECT_DOUBLE_EQ(atom.mag[0], atom.m_loc_[0].z); + EXPECT_NEAR (atom.m_loc_[0].z, 0.0, 1e-10); + EXPECT_DOUBLE_EQ (atom.mag[0], atom.m_loc_[0].z); } // Test parse_atom_properties with movement flags -TEST_F(ReadAtomsHelperTest, ParseAtomPropertiesMovementFlags) +TEST_F (ReadAtomsHelperTest, ParseAtomPropertiesMovementFlags) { std::string input_str = "1.0 2.0 3.0 m 1 0 1\n"; - std::istringstream iss(input_str); + std::istringstream iss (input_str); // Create a temporary file for testing - std::ofstream temp_file("test_input.tmp"); + std::ofstream temp_file ("test_input.tmp"); temp_file << input_str; - temp_file.close(); + temp_file.close (); - std::ifstream ifpos("test_input.tmp"); + std::ifstream ifpos ("test_input.tmp"); Atom atom; atom.label = "C"; - atom.vel.resize(1); - atom.mag.resize(1); - atom.m_loc_.resize(1); - atom.angle1.resize(1); - atom.angle2.resize(1); - atom.lambda.resize(1); - atom.constrain.resize(1); - - ModuleBase::Vector3 mv(1, 1, 1); + atom.vel.resize (1); + atom.mag.resize (1); + atom.m_loc_.resize (1); + atom.angle1.resize (1); + atom.angle2.resize (1); + atom.lambda.resize (1); + atom.constrain.resize (1); + + ModuleBase::Vector3 mv (1, 1, 1); bool input_vec_mag = false; bool input_angle_mag = false; bool set_element_mag_zero = false; @@ -332,41 +388,40 @@ TEST_F(ReadAtomsHelperTest, ParseAtomPropertiesMovementFlags) double x, y, z; ifpos >> x >> y >> z; - bool result = unitcell::parse_atom_properties(ifpos, atom, 0, mv, - input_vec_mag, input_angle_mag, - set_element_mag_zero); + bool result + = unitcell::parse_atom_properties (ifpos, atom, 0, mv, input_vec_mag, input_angle_mag, set_element_mag_zero); - EXPECT_TRUE(result); - EXPECT_EQ(mv.x, 1); - EXPECT_EQ(mv.y, 0); - EXPECT_EQ(mv.z, 1); + EXPECT_TRUE (result); + EXPECT_EQ (mv.x, 1); + EXPECT_EQ (mv.y, 0); + EXPECT_EQ (mv.z, 1); - ifpos.close(); - std::remove("test_input.tmp"); + ifpos.close (); + std::remove ("test_input.tmp"); } // Test parse_atom_properties with velocity -TEST_F(ReadAtomsHelperTest, ParseAtomPropertiesVelocity) +TEST_F (ReadAtomsHelperTest, ParseAtomPropertiesVelocity) { std::string input_str = "1.0 2.0 3.0 v 0.1 0.2 0.3\n"; - std::ofstream temp_file("test_input.tmp"); + std::ofstream temp_file ("test_input.tmp"); temp_file << input_str; - temp_file.close(); + temp_file.close (); - std::ifstream ifpos("test_input.tmp"); + std::ifstream ifpos ("test_input.tmp"); Atom atom; atom.label = "C"; - atom.vel.resize(1); - atom.mag.resize(1); - atom.m_loc_.resize(1); - atom.angle1.resize(1); - atom.angle2.resize(1); - atom.lambda.resize(1); - atom.constrain.resize(1); - - ModuleBase::Vector3 mv(1, 1, 1); + atom.vel.resize (1); + atom.mag.resize (1); + atom.m_loc_.resize (1); + atom.angle1.resize (1); + atom.angle2.resize (1); + atom.lambda.resize (1); + atom.constrain.resize (1); + + ModuleBase::Vector3 mv (1, 1, 1); bool input_vec_mag = false; bool input_angle_mag = false; bool set_element_mag_zero = false; @@ -375,41 +430,40 @@ TEST_F(ReadAtomsHelperTest, ParseAtomPropertiesVelocity) double x, y, z; ifpos >> x >> y >> z; - bool result = unitcell::parse_atom_properties(ifpos, atom, 0, mv, - input_vec_mag, input_angle_mag, - set_element_mag_zero); + bool result + = unitcell::parse_atom_properties (ifpos, atom, 0, mv, input_vec_mag, input_angle_mag, set_element_mag_zero); - EXPECT_TRUE(result); - EXPECT_DOUBLE_EQ(atom.vel[0].x, 0.1); - EXPECT_DOUBLE_EQ(atom.vel[0].y, 0.2); - EXPECT_DOUBLE_EQ(atom.vel[0].z, 0.3); + EXPECT_TRUE (result); + EXPECT_DOUBLE_EQ (atom.vel[0].x, 0.1); + EXPECT_DOUBLE_EQ (atom.vel[0].y, 0.2); + EXPECT_DOUBLE_EQ (atom.vel[0].z, 0.3); - ifpos.close(); - std::remove("test_input.tmp"); + ifpos.close (); + std::remove ("test_input.tmp"); } // Test parse_atom_properties with scalar magnetization -TEST_F(ReadAtomsHelperTest, ParseAtomPropertiesScalarMag) +TEST_F (ReadAtomsHelperTest, ParseAtomPropertiesScalarMag) { std::string input_str = "1.0 2.0 3.0 mag 2.5\n"; - std::ofstream temp_file("test_input.tmp"); + std::ofstream temp_file ("test_input.tmp"); temp_file << input_str; - temp_file.close(); + temp_file.close (); - std::ifstream ifpos("test_input.tmp"); + std::ifstream ifpos ("test_input.tmp"); Atom atom; atom.label = "C"; - atom.vel.resize(1); - atom.mag.resize(1); - atom.m_loc_.resize(1); - atom.angle1.resize(1); - atom.angle2.resize(1); - atom.lambda.resize(1); - atom.constrain.resize(1); - - ModuleBase::Vector3 mv(1, 1, 1); + atom.vel.resize (1); + atom.mag.resize (1); + atom.m_loc_.resize (1); + atom.angle1.resize (1); + atom.angle2.resize (1); + atom.lambda.resize (1); + atom.constrain.resize (1); + + ModuleBase::Vector3 mv (1, 1, 1); bool input_vec_mag = false; bool input_angle_mag = false; bool set_element_mag_zero = false; @@ -418,41 +472,40 @@ TEST_F(ReadAtomsHelperTest, ParseAtomPropertiesScalarMag) double x, y, z; ifpos >> x >> y >> z; - bool result = unitcell::parse_atom_properties(ifpos, atom, 0, mv, - input_vec_mag, input_angle_mag, - set_element_mag_zero); + bool result + = unitcell::parse_atom_properties (ifpos, atom, 0, mv, input_vec_mag, input_angle_mag, set_element_mag_zero); - EXPECT_TRUE(result); - EXPECT_DOUBLE_EQ(atom.mag[0], 2.5); - EXPECT_TRUE(set_element_mag_zero); - EXPECT_FALSE(input_vec_mag); + EXPECT_TRUE (result); + EXPECT_DOUBLE_EQ (atom.mag[0], 2.5); + EXPECT_TRUE (set_element_mag_zero); + EXPECT_FALSE (input_vec_mag); - ifpos.close(); - std::remove("test_input.tmp"); + ifpos.close (); + std::remove ("test_input.tmp"); } // Test parse_atom_properties with vector magnetization -TEST_F(ReadAtomsHelperTest, ParseAtomPropertiesVectorMag) +TEST_F (ReadAtomsHelperTest, ParseAtomPropertiesVectorMag) { std::string input_str = "1.0 2.0 3.0 mag 1.0 2.0 3.0\n"; - std::ofstream temp_file("test_input.tmp"); + std::ofstream temp_file ("test_input.tmp"); temp_file << input_str; - temp_file.close(); + temp_file.close (); - std::ifstream ifpos("test_input.tmp"); + std::ifstream ifpos ("test_input.tmp"); Atom atom; atom.label = "C"; - atom.vel.resize(1); - atom.mag.resize(1); - atom.m_loc_.resize(1); - atom.angle1.resize(1); - atom.angle2.resize(1); - atom.lambda.resize(1); - atom.constrain.resize(1); - - ModuleBase::Vector3 mv(1, 1, 1); + atom.vel.resize (1); + atom.mag.resize (1); + atom.m_loc_.resize (1); + atom.angle1.resize (1); + atom.angle2.resize (1); + atom.lambda.resize (1); + atom.constrain.resize (1); + + ModuleBase::Vector3 mv (1, 1, 1); bool input_vec_mag = false; bool input_angle_mag = false; bool set_element_mag_zero = false; @@ -461,24 +514,24 @@ TEST_F(ReadAtomsHelperTest, ParseAtomPropertiesVectorMag) double x, y, z; ifpos >> x >> y >> z; - bool result = unitcell::parse_atom_properties(ifpos, atom, 0, mv, - input_vec_mag, input_angle_mag, - set_element_mag_zero); + bool result + = unitcell::parse_atom_properties (ifpos, atom, 0, mv, input_vec_mag, input_angle_mag, set_element_mag_zero); - EXPECT_TRUE(result); - EXPECT_DOUBLE_EQ(atom.m_loc_[0].x, 1.0); - EXPECT_DOUBLE_EQ(atom.m_loc_[0].y, 2.0); - EXPECT_DOUBLE_EQ(atom.m_loc_[0].z, 3.0); - EXPECT_NEAR(atom.mag[0], sqrt(1.0 + 4.0 + 9.0), 1e-10); - EXPECT_TRUE(input_vec_mag); - EXPECT_TRUE(set_element_mag_zero); + EXPECT_TRUE (result); + EXPECT_DOUBLE_EQ (atom.m_loc_[0].x, 1.0); + EXPECT_DOUBLE_EQ (atom.m_loc_[0].y, 2.0); + EXPECT_DOUBLE_EQ (atom.m_loc_[0].z, 3.0); + EXPECT_NEAR (atom.mag[0], sqrt (1.0 + 4.0 + 9.0), 1e-10); + EXPECT_TRUE (input_vec_mag); + EXPECT_TRUE (set_element_mag_zero); - ifpos.close(); - std::remove("test_input.tmp"); + ifpos.close (); + std::remove ("test_input.tmp"); } -int main(int argc, char **argv) +int + main (int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + ::testing::InitGoogleTest (&argc, argv); + return RUN_ALL_TESTS (); } diff --git a/source/source_cell/test/read_pp_test.cpp b/source/source_cell/test/read_pp_test.cpp index 069eaa1ac7f..f3b3334937f 100644 --- a/source/source_cell/test/read_pp_test.cpp +++ b/source/source_cell/test/read_pp_test.cpp @@ -3,7 +3,7 @@ #define private public #include "source_io/module_parameter/parameter.h" #undef private -#include +#include /************************************************ * unit test of read_pp ***********************************************/ @@ -68,616 +68,613 @@ #undef private class ReadPPTest : public testing::Test { -protected: - std::string output; - std::unique_ptr read_pp{new Pseudopot_upf}; - std::unique_ptr upf{new Atom_pseudo}; + protected: + std::string output; + std::unique_ptr read_pp{new Pseudopot_upf}; + std::unique_ptr upf{new Atom_pseudo}; }; -TEST_F(ReadPPTest, ReadUPF100_Coulomb) +TEST_F (ReadPPTest, ReadUPF100_Coulomb) { - std::ifstream ifs; - ifs.open("./support/Te.pbe-coulomb.UPF"); - read_pp->read_pseudo_upf(ifs, *upf); - EXPECT_TRUE(upf->vloc_at.empty()); - EXPECT_EQ(read_pp->coulomb_potential, true); - EXPECT_EQ(upf->tvanp, false); - EXPECT_EQ(upf->nbeta, 0); - EXPECT_EQ(upf->lmax, 0); - EXPECT_EQ(read_pp->lloc, 0); - ifs.close(); + std::ifstream ifs; + ifs.open ("./support/Te.pbe-coulomb.UPF"); + read_pp->read_pseudo_upf (ifs, *upf); + EXPECT_TRUE (upf->vloc_at.empty ()); + EXPECT_EQ (read_pp->coulomb_potential, true); + EXPECT_EQ (upf->tvanp, false); + EXPECT_EQ (upf->nbeta, 0); + EXPECT_EQ (upf->lmax, 0); + EXPECT_EQ (read_pp->lloc, 0); + ifs.close (); } -TEST_F(ReadPPTest, ReadUPF100) +TEST_F (ReadPPTest, ReadUPF100) { - std::ifstream ifs; - ifs.open("./support/Te.pbe-rrkj.UPF"); - read_pp->read_pseudo_upf(ifs, *upf); - EXPECT_FALSE(upf->has_so); // no soc info - EXPECT_EQ(upf->nv,0); // number of version - EXPECT_EQ(upf->psd,"Te"); // element label - EXPECT_EQ(upf->pp_type,"NC"); // pp_type - EXPECT_FALSE(upf->tvanp); // not ultrasoft - EXPECT_FALSE(upf->nlcc); // no Nonlinear core correction - EXPECT_EQ(upf->xc_func,"PBE"); // Exchange-Correlation functional - EXPECT_EQ(upf->zv,6); // Z valence - EXPECT_DOUBLE_EQ(upf->etotps,-15.54533017755); // total energy - EXPECT_DOUBLE_EQ(upf->ecutwfc,0.0); // suggested cutoff for wfc - EXPECT_DOUBLE_EQ(upf->ecutrho,0.0); // suggested cutoff for rho - EXPECT_EQ(upf->lmax,2); // max angular momentum component - EXPECT_EQ(upf->mesh,1191); // Number of points in mesh - EXPECT_EQ(upf->nchi,3); // Number of wavefunctions - EXPECT_EQ(upf->nbeta,3); // Number of projectors - EXPECT_EQ(upf->els[0],"5S"); // label for i-th atomic orbital - EXPECT_EQ(upf->els[1],"5P"); // label for i-th atomic orbital - EXPECT_EQ(upf->els[2],"5D"); // label for i-th atomic orbital - EXPECT_EQ(upf->lchi[0],0); // angluar momentum of each atomic orbital - EXPECT_EQ(upf->lchi[1],1); // angluar momentum of each atomic orbital - EXPECT_EQ(upf->lchi[2],2); // angluar momentum of each atomic orbital - EXPECT_DOUBLE_EQ(upf->oc[0],2.0); // occupation of each atomic orbital - EXPECT_DOUBLE_EQ(upf->oc[1],3.0); // occupation of each atomic orbital - EXPECT_DOUBLE_EQ(upf->oc[2],1.0); // occupation of each atomic orbital - EXPECT_DOUBLE_EQ(upf->r[0],1.75361916453E-05); // r - EXPECT_DOUBLE_EQ(upf->r[upf->mesh-1],5.05901190442E+01); // r - EXPECT_DOUBLE_EQ(upf->rab[0],2.19202395566E-07); // rab - EXPECT_DOUBLE_EQ(upf->rab[upf->mesh-1],6.32376488053E-01); // rab - EXPECT_DOUBLE_EQ(upf->vloc_at[0],-5.00890143222E+00); // vloc - EXPECT_DOUBLE_EQ(upf->vloc_at[upf->mesh-1],-2.37200471955E-01); // vloc - EXPECT_EQ(upf->lll[0],0); // BETA - EXPECT_EQ(read_pp->kbeta[0], 957); - EXPECT_DOUBLE_EQ(upf->betar(0, 0), -1.82560984478E-03); - EXPECT_DOUBLE_EQ(upf->betar(0, read_pp->kbeta[0] - 1), -1.61398366674E-03); - EXPECT_EQ(upf->lll[1], 0); // BETA - EXPECT_EQ(read_pp->kbeta[1], 957); - EXPECT_DOUBLE_EQ(upf->betar(1, 0), 1.51692136792E-03); - EXPECT_DOUBLE_EQ(upf->betar(1, read_pp->kbeta[1] - 1), 1.51458412218E-03); - EXPECT_EQ(upf->lll[2], 2); // BETA - EXPECT_EQ(read_pp->kbeta[2], 957); - EXPECT_DOUBLE_EQ(upf->betar(2, 0), -3.10582746893E-13); - EXPECT_DOUBLE_EQ(upf->betar(2, read_pp->kbeta[2] - 1), -4.17131335030E-04); - EXPECT_EQ(read_pp->nd,4); // DIJ - EXPECT_DOUBLE_EQ(upf->dion(0,0),-1.70394647943E-01); - EXPECT_DOUBLE_EQ(upf->dion(0,1),-1.76521654672E-01); - EXPECT_DOUBLE_EQ(upf->dion(1,1),-1.80323263809E-01); - EXPECT_DOUBLE_EQ(upf->dion(2,2),1.16612440320E-02); - EXPECT_DOUBLE_EQ(upf->chi(0,0),1.40252610787E-06); // PSWFC - EXPECT_DOUBLE_EQ(upf->chi(0,upf->mesh-1),6.15962544650E-25); - EXPECT_DOUBLE_EQ(upf->chi(1,0),7.65306256201E-11); - EXPECT_DOUBLE_EQ(upf->chi(1,upf->mesh-1),1.44320361049E-17); - EXPECT_DOUBLE_EQ(upf->chi(2,0),4.37015997370E-16); - EXPECT_DOUBLE_EQ(upf->chi(2,upf->mesh-1),6.20093850585E-05); - EXPECT_DOUBLE_EQ(upf->rho_at[0],3.93415898407E-12); // RhoAtom - EXPECT_DOUBLE_EQ(upf->rho_at[upf->mesh-1],3.84516383534E-09); - EXPECT_EQ(upf->nn[0],1); // nn - EXPECT_EQ(upf->nn[1],2); - EXPECT_EQ(upf->nn[2],3); - EXPECT_DOUBLE_EQ(upf->jchi[0],0.0); // jchi - EXPECT_DOUBLE_EQ(upf->jchi[1],0.0); - EXPECT_DOUBLE_EQ(upf->jchi[2],0.0); - EXPECT_DOUBLE_EQ(upf->jjj[0],0.0); // jjj - EXPECT_DOUBLE_EQ(upf->jjj[1],0.0); - EXPECT_DOUBLE_EQ(upf->jjj[2],0.0); - //EXPECT_EQ - ifs.close(); - std::ofstream ofs; - ofs.open("tmp"); - read_pp->print_pseudo_upf(ofs, *upf); - ofs.close(); - ifs.open("tmp"); - getline(ifs, output); - EXPECT_THAT(output, testing::HasSubstr("==== read_pseudo_upf ===")); - ifs.close(); + std::ifstream ifs; + ifs.open ("./support/Te.pbe-rrkj.UPF"); + read_pp->read_pseudo_upf (ifs, *upf); + EXPECT_FALSE (upf->has_so); // no soc info + EXPECT_EQ (upf->nv, 0); // number of version + EXPECT_EQ (upf->psd, "Te"); // element label + EXPECT_EQ (upf->pp_type, "NC"); // pp_type + EXPECT_FALSE (upf->tvanp); // not ultrasoft + EXPECT_FALSE (upf->nlcc); // no Nonlinear core correction + EXPECT_EQ (upf->xc_func, "PBE"); // Exchange-Correlation functional + EXPECT_EQ (upf->zv, 6); // Z valence + EXPECT_DOUBLE_EQ (upf->etotps, -15.54533017755); // total energy + EXPECT_DOUBLE_EQ (upf->ecutwfc, 0.0); // suggested cutoff for wfc + EXPECT_DOUBLE_EQ (upf->ecutrho, 0.0); // suggested cutoff for rho + EXPECT_EQ (upf->lmax, 2); // max angular momentum component + EXPECT_EQ (upf->mesh, 1191); // Number of points in mesh + EXPECT_EQ (upf->nchi, 3); // Number of wavefunctions + EXPECT_EQ (upf->nbeta, 3); // Number of projectors + EXPECT_EQ (upf->els[0], "5S"); // label for i-th atomic orbital + EXPECT_EQ (upf->els[1], "5P"); // label for i-th atomic orbital + EXPECT_EQ (upf->els[2], "5D"); // label for i-th atomic orbital + EXPECT_EQ (upf->lchi[0], 0); // angluar momentum of each atomic orbital + EXPECT_EQ (upf->lchi[1], 1); // angluar momentum of each atomic orbital + EXPECT_EQ (upf->lchi[2], 2); // angluar momentum of each atomic orbital + EXPECT_DOUBLE_EQ (upf->oc[0], 2.0); // occupation of each atomic orbital + EXPECT_DOUBLE_EQ (upf->oc[1], 3.0); // occupation of each atomic orbital + EXPECT_DOUBLE_EQ (upf->oc[2], 1.0); // occupation of each atomic orbital + EXPECT_DOUBLE_EQ (upf->r[0], 1.75361916453E-05); // r + EXPECT_DOUBLE_EQ (upf->r[upf->mesh - 1], 5.05901190442E+01); // r + EXPECT_DOUBLE_EQ (upf->rab[0], 2.19202395566E-07); // rab + EXPECT_DOUBLE_EQ (upf->rab[upf->mesh - 1], 6.32376488053E-01); // rab + EXPECT_DOUBLE_EQ (upf->vloc_at[0], -5.00890143222E+00); // vloc + EXPECT_DOUBLE_EQ (upf->vloc_at[upf->mesh - 1], -2.37200471955E-01); // vloc + EXPECT_EQ (upf->lll[0], 0); // BETA + EXPECT_EQ (read_pp->kbeta[0], 957); + EXPECT_DOUBLE_EQ (upf->betar (0, 0), -1.82560984478E-03); + EXPECT_DOUBLE_EQ (upf->betar (0, read_pp->kbeta[0] - 1), -1.61398366674E-03); + EXPECT_EQ (upf->lll[1], 0); // BETA + EXPECT_EQ (read_pp->kbeta[1], 957); + EXPECT_DOUBLE_EQ (upf->betar (1, 0), 1.51692136792E-03); + EXPECT_DOUBLE_EQ (upf->betar (1, read_pp->kbeta[1] - 1), 1.51458412218E-03); + EXPECT_EQ (upf->lll[2], 2); // BETA + EXPECT_EQ (read_pp->kbeta[2], 957); + EXPECT_DOUBLE_EQ (upf->betar (2, 0), -3.10582746893E-13); + EXPECT_DOUBLE_EQ (upf->betar (2, read_pp->kbeta[2] - 1), -4.17131335030E-04); + EXPECT_EQ (read_pp->nd, 4); // DIJ + EXPECT_DOUBLE_EQ (upf->dion (0, 0), -1.70394647943E-01); + EXPECT_DOUBLE_EQ (upf->dion (0, 1), -1.76521654672E-01); + EXPECT_DOUBLE_EQ (upf->dion (1, 1), -1.80323263809E-01); + EXPECT_DOUBLE_EQ (upf->dion (2, 2), 1.16612440320E-02); + EXPECT_DOUBLE_EQ (upf->chi (0, 0), 1.40252610787E-06); // PSWFC + EXPECT_DOUBLE_EQ (upf->chi (0, upf->mesh - 1), 6.15962544650E-25); + EXPECT_DOUBLE_EQ (upf->chi (1, 0), 7.65306256201E-11); + EXPECT_DOUBLE_EQ (upf->chi (1, upf->mesh - 1), 1.44320361049E-17); + EXPECT_DOUBLE_EQ (upf->chi (2, 0), 4.37015997370E-16); + EXPECT_DOUBLE_EQ (upf->chi (2, upf->mesh - 1), 6.20093850585E-05); + EXPECT_DOUBLE_EQ (upf->rho_at[0], 3.93415898407E-12); // RhoAtom + EXPECT_DOUBLE_EQ (upf->rho_at[upf->mesh - 1], 3.84516383534E-09); + EXPECT_EQ (upf->nn[0], 1); // nn + EXPECT_EQ (upf->nn[1], 2); + EXPECT_EQ (upf->nn[2], 3); + EXPECT_DOUBLE_EQ (upf->jchi[0], 0.0); // jchi + EXPECT_DOUBLE_EQ (upf->jchi[1], 0.0); + EXPECT_DOUBLE_EQ (upf->jchi[2], 0.0); + EXPECT_DOUBLE_EQ (upf->jjj[0], 0.0); // jjj + EXPECT_DOUBLE_EQ (upf->jjj[1], 0.0); + EXPECT_DOUBLE_EQ (upf->jjj[2], 0.0); + // EXPECT_EQ + ifs.close (); + std::ofstream ofs; + ofs.open ("tmp"); + read_pp->print_pseudo_upf (ofs, *upf); + ofs.close (); + ifs.open ("tmp"); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("==== read_pseudo_upf ===")); + ifs.close (); } -TEST_F(ReadPPTest, ReadUPF100USPP) +TEST_F (ReadPPTest, ReadUPF100USPP) { std::ifstream ifs; - ifs.open("./support/fe_pbe_v1.5.uspp.F.UPF"); - read_pp->read_pseudo_upf(ifs, *upf); - EXPECT_FALSE(upf->has_so); // has soc info - EXPECT_FALSE(read_pp->q_with_l); // q_with_l - EXPECT_EQ(upf->nv, 0); // number of version - EXPECT_EQ(upf->psd, "Fe"); // element label - EXPECT_EQ(upf->pp_type, "US"); // pp_type - EXPECT_TRUE(upf->tvanp); // not ultrasoft - EXPECT_TRUE(upf->nlcc); // no Nonlinear core correction - EXPECT_EQ(upf->xc_func, "PBE"); // Exchange-Correlation functional - EXPECT_EQ(upf->zv, 16); // Z valence - EXPECT_DOUBLE_EQ(upf->etotps, -248.63387366200); // total energy - EXPECT_DOUBLE_EQ(upf->ecutwfc, 0.0); // suggested cutoff for wfc - EXPECT_DOUBLE_EQ(upf->ecutrho, 0.0); // suggested cutoff for rho - EXPECT_EQ(upf->lmax, 2); // max angular momentum component - EXPECT_EQ(upf->mesh, 861); // Number of points in mesh - EXPECT_EQ(upf->nchi, 5); // Number of wavefunctions - EXPECT_EQ(upf->nbeta, 6); // Number of projectors - EXPECT_EQ(upf->els[0], "3S"); // label for i-th atomic orbital - EXPECT_EQ(upf->els[1], "3P"); // label for i-th atomic orbital - EXPECT_EQ(upf->els[2], "3D"); // label for i-th atomic orbital - EXPECT_EQ(upf->els[3], "4S"); // label for i-th atomic orbital - EXPECT_EQ(upf->els[4], "4P"); // label for i-th atomic orbital - EXPECT_EQ(upf->lchi[0], 0); // angluar momentum of each atomic orbital - EXPECT_EQ(upf->lchi[1], 1); // angluar momentum of each atomic orbital - EXPECT_EQ(upf->lchi[2], 2); // angluar momentum of each atomic orbital - EXPECT_EQ(upf->lchi[3], 0); // angluar momentum of each atomic orbital - EXPECT_EQ(upf->lchi[4], 1); // angluar momentum of each atomic orbital - EXPECT_DOUBLE_EQ(upf->oc[0], 2.0); // occupation of each atomic orbital - EXPECT_DOUBLE_EQ(upf->oc[1], 6.0); // occupation of each atomic orbital - EXPECT_DOUBLE_EQ(upf->oc[2], 5.0); // occupation of each atomic orbital - EXPECT_DOUBLE_EQ(upf->oc[3], 2.0); // occupation of each atomic orbital - EXPECT_DOUBLE_EQ(upf->oc[4], 0.0); // occupation of each atomic orbital - EXPECT_DOUBLE_EQ(upf->r[0], 0.00000000000E+00); // r - EXPECT_DOUBLE_EQ(upf->r[upf->mesh - 1], 2.04011054501E+02); // r - EXPECT_DOUBLE_EQ(upf->rab[0], 1.61587495219E-06); // rab - EXPECT_DOUBLE_EQ(upf->rab[upf->mesh - 1], 3.45781609895E+00); // rab - EXPECT_DOUBLE_EQ(upf->rho_atc[1], 3.48284864470E+00); // nlcc - EXPECT_DOUBLE_EQ(upf->rho_atc[upf->mesh - 1], 0.00000000000E+00); // nlcc - EXPECT_DOUBLE_EQ(upf->vloc_at[0], -5.13189996435E+01); // vloc - EXPECT_DOUBLE_EQ(upf->vloc_at[upf->mesh - 1], -1.56854245366E-01); // vloc - EXPECT_EQ(upf->lll[0], 0); // BETA - EXPECT_EQ(read_pp->kbeta[0], 607); - EXPECT_DOUBLE_EQ(upf->betar(0, 0), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->betar(0, read_pp->kbeta[0] - 1), 0.00000000000E+00); - EXPECT_EQ(upf->lll[1], 0); // BETA - EXPECT_EQ(read_pp->kbeta[1], 607); - EXPECT_DOUBLE_EQ(upf->betar(1, 0), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->betar(1, read_pp->kbeta[0] - 1), 0.00000000000E+00); - EXPECT_EQ(upf->lll[2], 1); // BETA - EXPECT_EQ(read_pp->kbeta[2], 607); - EXPECT_DOUBLE_EQ(upf->betar(2, 1), 5.76970159520E-12); - EXPECT_DOUBLE_EQ(upf->betar(2, read_pp->kbeta[2] - 1), 0.00000000000E+00); - EXPECT_EQ(upf->lll[5], 2); // BETA - EXPECT_EQ(read_pp->kbeta[5], 607); - EXPECT_DOUBLE_EQ(upf->betar(5, 2), -3.30692076673E-11); - EXPECT_DOUBLE_EQ(upf->betar(5, read_pp->kbeta[2] - 1), 0.00000000000E+00); - EXPECT_EQ(read_pp->nd, 9); // DIJ - EXPECT_DOUBLE_EQ(upf->dion(0, 0), -2.44502386412E-02); - EXPECT_DOUBLE_EQ(upf->dion(0, 1), 1.88646719481E+00); - EXPECT_DOUBLE_EQ(upf->dion(1, 1), 2.82162386984E+00); - EXPECT_DOUBLE_EQ(upf->dion(2, 2), 9.10650114165E+00); - EXPECT_DOUBLE_EQ(upf->dion(2, 3), -1.66542402638E+01); - EXPECT_DOUBLE_EQ(upf->dion(3, 3), 2.82741263018E+01); - EXPECT_DOUBLE_EQ(upf->dion(4, 4), 5.48893904730E+01); - EXPECT_DOUBLE_EQ(upf->dion(4, 5), 6.28094728901E+01); - EXPECT_DOUBLE_EQ(upf->dion(5, 5), 7.17648258086E+01); - EXPECT_EQ(read_pp->nqf, 8); // QIJ - EXPECT_EQ(upf->nqlc, 5); // nqlc - EXPECT_DOUBLE_EQ(read_pp->rinner[0], 1.0); // rinner - EXPECT_DOUBLE_EQ(read_pp->rinner[1], 1.0); - EXPECT_DOUBLE_EQ(read_pp->rinner[2], 1.0); - EXPECT_DOUBLE_EQ(read_pp->rinner[3], 1.0); - EXPECT_DOUBLE_EQ(read_pp->rinner[4], 1.0); - EXPECT_DOUBLE_EQ(upf->qqq(0, 0), 9.61156723771E-02); - EXPECT_DOUBLE_EQ(read_pp->qfunc(0, 1), -2.94880294140E-11); - EXPECT_DOUBLE_EQ(read_pp->qfunc(0, upf->mesh - 1), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(read_pp->qfcoef(0, 0, 0, 0), -1.11034753554E+01); - EXPECT_DOUBLE_EQ(read_pp->qfcoef(0, 0, 4, 7), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->qqq(0, 1), 6.30706989525E-02); - EXPECT_DOUBLE_EQ(read_pp->qfunc(1, 1), -9.73254487126E-12); - EXPECT_DOUBLE_EQ(read_pp->qfunc(0, upf->mesh - 1), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(read_pp->qfcoef(0, 1, 0, 0), -3.66470985929E+00); - EXPECT_DOUBLE_EQ(read_pp->qfcoef(0, 1, 4, 7), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->qqq(5, 5), 4.88172232559E+00); - EXPECT_DOUBLE_EQ(read_pp->qfunc(20, 1), 1.69461625558E-10); - EXPECT_DOUBLE_EQ(read_pp->qfunc(0, upf->mesh - 1), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(read_pp->qfcoef(5, 5, 0, 0), 6.38093836870E+01); - EXPECT_DOUBLE_EQ(read_pp->qfcoef(5, 5, 4, 7), 8.40128670914E+03); - EXPECT_DOUBLE_EQ(upf->chi(0, 1), 4.36429934825E-06); // PSWFC - EXPECT_DOUBLE_EQ(upf->chi(0, upf->mesh - 1), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->chi(1, 1), 6.46349114028E-12); - EXPECT_DOUBLE_EQ(upf->chi(1, upf->mesh - 1), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->chi(2, 1), 7.06492930658E-18); - EXPECT_DOUBLE_EQ(upf->chi(2, upf->mesh - 1), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->chi(3, 1), 1.45872860322E-06); - EXPECT_DOUBLE_EQ(upf->chi(3, upf->mesh - 1), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->chi(4, 1), 1.27728623256E-12); - EXPECT_DOUBLE_EQ(upf->chi(4, upf->mesh - 1), 0.00000000000E+00); - EXPECT_DOUBLE_EQ(upf->rho_at[1], 7.94937989763E-11); // RhoAtom - EXPECT_DOUBLE_EQ(upf->rho_at[upf->mesh - 1], 0.00000000000E+00); + ifs.open ("./support/fe_pbe_v1.5.uspp.F.UPF"); + read_pp->read_pseudo_upf (ifs, *upf); + EXPECT_FALSE (upf->has_so); // has soc info + EXPECT_FALSE (read_pp->q_with_l); // q_with_l + EXPECT_EQ (upf->nv, 0); // number of version + EXPECT_EQ (upf->psd, "Fe"); // element label + EXPECT_EQ (upf->pp_type, "US"); // pp_type + EXPECT_TRUE (upf->tvanp); // not ultrasoft + EXPECT_TRUE (upf->nlcc); // no Nonlinear core correction + EXPECT_EQ (upf->xc_func, "PBE"); // Exchange-Correlation functional + EXPECT_EQ (upf->zv, 16); // Z valence + EXPECT_DOUBLE_EQ (upf->etotps, -248.63387366200); // total energy + EXPECT_DOUBLE_EQ (upf->ecutwfc, 0.0); // suggested cutoff for wfc + EXPECT_DOUBLE_EQ (upf->ecutrho, 0.0); // suggested cutoff for rho + EXPECT_EQ (upf->lmax, 2); // max angular momentum component + EXPECT_EQ (upf->mesh, 861); // Number of points in mesh + EXPECT_EQ (upf->nchi, 5); // Number of wavefunctions + EXPECT_EQ (upf->nbeta, 6); // Number of projectors + EXPECT_EQ (upf->els[0], "3S"); // label for i-th atomic orbital + EXPECT_EQ (upf->els[1], "3P"); // label for i-th atomic orbital + EXPECT_EQ (upf->els[2], "3D"); // label for i-th atomic orbital + EXPECT_EQ (upf->els[3], "4S"); // label for i-th atomic orbital + EXPECT_EQ (upf->els[4], "4P"); // label for i-th atomic orbital + EXPECT_EQ (upf->lchi[0], 0); // angluar momentum of each atomic orbital + EXPECT_EQ (upf->lchi[1], 1); // angluar momentum of each atomic orbital + EXPECT_EQ (upf->lchi[2], 2); // angluar momentum of each atomic orbital + EXPECT_EQ (upf->lchi[3], 0); // angluar momentum of each atomic orbital + EXPECT_EQ (upf->lchi[4], 1); // angluar momentum of each atomic orbital + EXPECT_DOUBLE_EQ (upf->oc[0], 2.0); // occupation of each atomic orbital + EXPECT_DOUBLE_EQ (upf->oc[1], 6.0); // occupation of each atomic orbital + EXPECT_DOUBLE_EQ (upf->oc[2], 5.0); // occupation of each atomic orbital + EXPECT_DOUBLE_EQ (upf->oc[3], 2.0); // occupation of each atomic orbital + EXPECT_DOUBLE_EQ (upf->oc[4], 0.0); // occupation of each atomic orbital + EXPECT_DOUBLE_EQ (upf->r[0], 0.00000000000E+00); // r + EXPECT_DOUBLE_EQ (upf->r[upf->mesh - 1], 2.04011054501E+02); // r + EXPECT_DOUBLE_EQ (upf->rab[0], 1.61587495219E-06); // rab + EXPECT_DOUBLE_EQ (upf->rab[upf->mesh - 1], 3.45781609895E+00); // rab + EXPECT_DOUBLE_EQ (upf->rho_atc[1], 3.48284864470E+00); // nlcc + EXPECT_DOUBLE_EQ (upf->rho_atc[upf->mesh - 1], 0.00000000000E+00); // nlcc + EXPECT_DOUBLE_EQ (upf->vloc_at[0], -5.13189996435E+01); // vloc + EXPECT_DOUBLE_EQ (upf->vloc_at[upf->mesh - 1], -1.56854245366E-01); // vloc + EXPECT_EQ (upf->lll[0], 0); // BETA + EXPECT_EQ (read_pp->kbeta[0], 607); + EXPECT_DOUBLE_EQ (upf->betar (0, 0), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->betar (0, read_pp->kbeta[0] - 1), 0.00000000000E+00); + EXPECT_EQ (upf->lll[1], 0); // BETA + EXPECT_EQ (read_pp->kbeta[1], 607); + EXPECT_DOUBLE_EQ (upf->betar (1, 0), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->betar (1, read_pp->kbeta[0] - 1), 0.00000000000E+00); + EXPECT_EQ (upf->lll[2], 1); // BETA + EXPECT_EQ (read_pp->kbeta[2], 607); + EXPECT_DOUBLE_EQ (upf->betar (2, 1), 5.76970159520E-12); + EXPECT_DOUBLE_EQ (upf->betar (2, read_pp->kbeta[2] - 1), 0.00000000000E+00); + EXPECT_EQ (upf->lll[5], 2); // BETA + EXPECT_EQ (read_pp->kbeta[5], 607); + EXPECT_DOUBLE_EQ (upf->betar (5, 2), -3.30692076673E-11); + EXPECT_DOUBLE_EQ (upf->betar (5, read_pp->kbeta[2] - 1), 0.00000000000E+00); + EXPECT_EQ (read_pp->nd, 9); // DIJ + EXPECT_DOUBLE_EQ (upf->dion (0, 0), -2.44502386412E-02); + EXPECT_DOUBLE_EQ (upf->dion (0, 1), 1.88646719481E+00); + EXPECT_DOUBLE_EQ (upf->dion (1, 1), 2.82162386984E+00); + EXPECT_DOUBLE_EQ (upf->dion (2, 2), 9.10650114165E+00); + EXPECT_DOUBLE_EQ (upf->dion (2, 3), -1.66542402638E+01); + EXPECT_DOUBLE_EQ (upf->dion (3, 3), 2.82741263018E+01); + EXPECT_DOUBLE_EQ (upf->dion (4, 4), 5.48893904730E+01); + EXPECT_DOUBLE_EQ (upf->dion (4, 5), 6.28094728901E+01); + EXPECT_DOUBLE_EQ (upf->dion (5, 5), 7.17648258086E+01); + EXPECT_EQ (read_pp->nqf, 8); // QIJ + EXPECT_EQ (upf->nqlc, 5); // nqlc + EXPECT_DOUBLE_EQ (read_pp->rinner[0], 1.0); // rinner + EXPECT_DOUBLE_EQ (read_pp->rinner[1], 1.0); + EXPECT_DOUBLE_EQ (read_pp->rinner[2], 1.0); + EXPECT_DOUBLE_EQ (read_pp->rinner[3], 1.0); + EXPECT_DOUBLE_EQ (read_pp->rinner[4], 1.0); + EXPECT_DOUBLE_EQ (upf->qqq (0, 0), 9.61156723771E-02); + EXPECT_DOUBLE_EQ (read_pp->qfunc (0, 1), -2.94880294140E-11); + EXPECT_DOUBLE_EQ (read_pp->qfunc (0, upf->mesh - 1), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (read_pp->qfcoef (0, 0, 0, 0), -1.11034753554E+01); + EXPECT_DOUBLE_EQ (read_pp->qfcoef (0, 0, 4, 7), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->qqq (0, 1), 6.30706989525E-02); + EXPECT_DOUBLE_EQ (read_pp->qfunc (1, 1), -9.73254487126E-12); + EXPECT_DOUBLE_EQ (read_pp->qfunc (0, upf->mesh - 1), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (read_pp->qfcoef (0, 1, 0, 0), -3.66470985929E+00); + EXPECT_DOUBLE_EQ (read_pp->qfcoef (0, 1, 4, 7), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->qqq (5, 5), 4.88172232559E+00); + EXPECT_DOUBLE_EQ (read_pp->qfunc (20, 1), 1.69461625558E-10); + EXPECT_DOUBLE_EQ (read_pp->qfunc (0, upf->mesh - 1), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (read_pp->qfcoef (5, 5, 0, 0), 6.38093836870E+01); + EXPECT_DOUBLE_EQ (read_pp->qfcoef (5, 5, 4, 7), 8.40128670914E+03); + EXPECT_DOUBLE_EQ (upf->chi (0, 1), 4.36429934825E-06); // PSWFC + EXPECT_DOUBLE_EQ (upf->chi (0, upf->mesh - 1), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->chi (1, 1), 6.46349114028E-12); + EXPECT_DOUBLE_EQ (upf->chi (1, upf->mesh - 1), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->chi (2, 1), 7.06492930658E-18); + EXPECT_DOUBLE_EQ (upf->chi (2, upf->mesh - 1), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->chi (3, 1), 1.45872860322E-06); + EXPECT_DOUBLE_EQ (upf->chi (3, upf->mesh - 1), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->chi (4, 1), 1.27728623256E-12); + EXPECT_DOUBLE_EQ (upf->chi (4, upf->mesh - 1), 0.00000000000E+00); + EXPECT_DOUBLE_EQ (upf->rho_at[1], 7.94937989763E-11); // RhoAtom + EXPECT_DOUBLE_EQ (upf->rho_at[upf->mesh - 1], 0.00000000000E+00); // EXPECT_EQ - ifs.close(); + ifs.close (); std::ofstream ofs; - ofs.open("tmp"); - read_pp->print_pseudo_upf(ofs, *upf); - ofs.close(); - ifs.open("tmp"); - getline(ifs, output); - EXPECT_THAT(output, testing::HasSubstr("==== read_pseudo_upf ===")); - ifs.close(); + ofs.open ("tmp"); + read_pp->print_pseudo_upf (ofs, *upf); + ofs.close (); + ifs.open ("tmp"); + getline (ifs, output); + EXPECT_THAT (output, testing::HasSubstr ("==== read_pseudo_upf ===")); + ifs.close (); } -TEST_F(ReadPPTest, ReadUPF201_Coulomb) +TEST_F (ReadPPTest, ReadUPF201_Coulomb) { - std::ifstream ifs; - ifs.open("./support/Al.pbe-coulomb.UPF"); - read_pp->read_pseudo_upf201(ifs, *upf); - EXPECT_TRUE(upf->vloc_at.empty()); - EXPECT_EQ(read_pp->coulomb_potential, true); - EXPECT_EQ(upf->nbeta, 0); - EXPECT_EQ(upf->lmax, 0); - EXPECT_EQ(read_pp->lloc, 0); - ifs.close(); + std::ifstream ifs; + ifs.open ("./support/Al.pbe-coulomb.UPF"); + read_pp->read_pseudo_upf201 (ifs, *upf); + EXPECT_TRUE (upf->vloc_at.empty ()); + EXPECT_EQ (read_pp->coulomb_potential, true); + EXPECT_EQ (upf->nbeta, 0); + EXPECT_EQ (upf->lmax, 0); + EXPECT_EQ (read_pp->lloc, 0); + ifs.close (); } -TEST_F(ReadPPTest, ReadUPF201) +TEST_F (ReadPPTest, ReadUPF201) { - std::ifstream ifs; - ifs.open("./support/Cu_ONCV_PBE-1.0.upf"); - read_pp->read_pseudo_upf201(ifs, *upf); - EXPECT_EQ(upf->psd,"Cu"); - EXPECT_EQ(upf->pp_type,"NC"); - EXPECT_FALSE(upf->has_so); - EXPECT_FALSE(upf->nlcc); - EXPECT_EQ(upf->xc_func,"PBE"); - EXPECT_EQ(upf->zv,19); - EXPECT_DOUBLE_EQ(upf->etotps,-1.82394100797E+02); - EXPECT_EQ(upf->lmax,2); - EXPECT_EQ(upf->mesh,601); // mesh -= 1 at line 388 (why? Let's see) - EXPECT_EQ(upf->nchi,0); - EXPECT_EQ(upf->nbeta,6); - EXPECT_DOUBLE_EQ(upf->r[0],0.0); - EXPECT_DOUBLE_EQ(upf->r[600],6.00); - EXPECT_DOUBLE_EQ(upf->rab[0],0.01); - EXPECT_DOUBLE_EQ(upf->rab[600],0.01); - EXPECT_DOUBLE_EQ(upf->vloc_at[0],-5.3426582174E+01); - EXPECT_DOUBLE_EQ(upf->vloc_at[600],-6.3333339776E+00); - EXPECT_EQ(upf->lll[0],0); - EXPECT_EQ(read_pp->kbeta[0], 196); - EXPECT_DOUBLE_EQ(upf->betar(0,0),0.0); - EXPECT_DOUBLE_EQ(upf->betar(0,600),0.0); - EXPECT_EQ(upf->lll[1],0); - EXPECT_EQ(read_pp->kbeta[1], 196); - EXPECT_DOUBLE_EQ(upf->betar(1,0),0.0); - EXPECT_DOUBLE_EQ(upf->betar(1,600),0.0); - EXPECT_EQ(upf->lll[2],1); - EXPECT_EQ(read_pp->kbeta[2], 196); - EXPECT_DOUBLE_EQ(upf->betar(2,0),0.0); - EXPECT_DOUBLE_EQ(upf->betar(2,600),0.0); - EXPECT_EQ(upf->lll[3],1); - EXPECT_EQ(read_pp->kbeta[3], 196); - EXPECT_DOUBLE_EQ(upf->betar(3,0),0.0); - EXPECT_DOUBLE_EQ(upf->betar(3,600),0.0); - EXPECT_EQ(upf->lll[4],2); - EXPECT_EQ(read_pp->kbeta[4], 196); - EXPECT_DOUBLE_EQ(upf->betar(4,0),0.0); - EXPECT_DOUBLE_EQ(upf->betar(4,600),0.0); - EXPECT_EQ(upf->lll[5],2); - EXPECT_EQ(read_pp->kbeta[5], 196); - EXPECT_DOUBLE_EQ(upf->betar(5,0),0.0); - EXPECT_DOUBLE_EQ(upf->betar(5,600),0.0); - EXPECT_DOUBLE_EQ(upf->dion(0,0),-6.6178420255E+00); - EXPECT_DOUBLE_EQ(upf->dion(5,5),-7.0938557228E+00); - EXPECT_DOUBLE_EQ(upf->rho_at[0],0.0); - EXPECT_DOUBLE_EQ(upf->rho_at[600],3.2115793029E-02); - ifs.close(); + std::ifstream ifs; + ifs.open ("./support/Cu_ONCV_PBE-1.0.upf"); + read_pp->read_pseudo_upf201 (ifs, *upf); + EXPECT_EQ (upf->psd, "Cu"); + EXPECT_EQ (upf->pp_type, "NC"); + EXPECT_FALSE (upf->has_so); + EXPECT_FALSE (upf->nlcc); + EXPECT_EQ (upf->xc_func, "PBE"); + EXPECT_EQ (upf->zv, 19); + EXPECT_DOUBLE_EQ (upf->etotps, -1.82394100797E+02); + EXPECT_EQ (upf->lmax, 2); + EXPECT_EQ (upf->mesh, 601); // mesh -= 1 at line 388 (why? Let's see) + EXPECT_EQ (upf->nchi, 0); + EXPECT_EQ (upf->nbeta, 6); + EXPECT_DOUBLE_EQ (upf->r[0], 0.0); + EXPECT_DOUBLE_EQ (upf->r[600], 6.00); + EXPECT_DOUBLE_EQ (upf->rab[0], 0.01); + EXPECT_DOUBLE_EQ (upf->rab[600], 0.01); + EXPECT_DOUBLE_EQ (upf->vloc_at[0], -5.3426582174E+01); + EXPECT_DOUBLE_EQ (upf->vloc_at[600], -6.3333339776E+00); + EXPECT_EQ (upf->lll[0], 0); + EXPECT_EQ (read_pp->kbeta[0], 196); + EXPECT_DOUBLE_EQ (upf->betar (0, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->betar (0, 600), 0.0); + EXPECT_EQ (upf->lll[1], 0); + EXPECT_EQ (read_pp->kbeta[1], 196); + EXPECT_DOUBLE_EQ (upf->betar (1, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->betar (1, 600), 0.0); + EXPECT_EQ (upf->lll[2], 1); + EXPECT_EQ (read_pp->kbeta[2], 196); + EXPECT_DOUBLE_EQ (upf->betar (2, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->betar (2, 600), 0.0); + EXPECT_EQ (upf->lll[3], 1); + EXPECT_EQ (read_pp->kbeta[3], 196); + EXPECT_DOUBLE_EQ (upf->betar (3, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->betar (3, 600), 0.0); + EXPECT_EQ (upf->lll[4], 2); + EXPECT_EQ (read_pp->kbeta[4], 196); + EXPECT_DOUBLE_EQ (upf->betar (4, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->betar (4, 600), 0.0); + EXPECT_EQ (upf->lll[5], 2); + EXPECT_EQ (read_pp->kbeta[5], 196); + EXPECT_DOUBLE_EQ (upf->betar (5, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->betar (5, 600), 0.0); + EXPECT_DOUBLE_EQ (upf->dion (0, 0), -6.6178420255E+00); + EXPECT_DOUBLE_EQ (upf->dion (5, 5), -7.0938557228E+00); + EXPECT_DOUBLE_EQ (upf->rho_at[0], 0.0); + EXPECT_DOUBLE_EQ (upf->rho_at[600], 3.2115793029E-02); + ifs.close (); } -TEST_F(ReadPPTest, ReadUSPPUPF201) +TEST_F (ReadPPTest, ReadUSPPUPF201) { std::ifstream ifs; - ifs.open("./support/Al.pbe-sp-van.UPF"); - read_pp->read_pseudo_upf201(ifs, *upf); - EXPECT_EQ(upf->psd, "Al"); - EXPECT_EQ(upf->pp_type, "US"); - EXPECT_EQ(read_pp->relativistic, "no"); - EXPECT_TRUE(upf->tvanp); - EXPECT_FALSE(upf->has_so); - EXPECT_FALSE(upf->nlcc); - EXPECT_EQ(upf->xc_func, "PBE"); - EXPECT_EQ(upf->zv, 11); - EXPECT_EQ(upf->nv, 0); - EXPECT_DOUBLE_EQ(upf->etotps, -1.596432307730e2); - EXPECT_DOUBLE_EQ(upf->ecutwfc, 0.0); - EXPECT_DOUBLE_EQ(upf->ecutrho, 0.0); - EXPECT_EQ(upf->lmax, 2); - EXPECT_EQ(read_pp->lmax_rho, 0); - EXPECT_EQ(read_pp->lloc, 0); - EXPECT_EQ(upf->mesh, 893); - EXPECT_EQ(upf->nchi, 4); - EXPECT_EQ(upf->nbeta, 5); - EXPECT_EQ(upf->kkbeta, 617); - EXPECT_DOUBLE_EQ(read_pp->rmax, 2.006810756590e2); - EXPECT_DOUBLE_EQ(read_pp->zmesh, 13.0); - EXPECT_FALSE(read_pp->q_with_l); - EXPECT_EQ(read_pp->nqf, 8); - EXPECT_EQ(upf->nqlc, 5); - EXPECT_DOUBLE_EQ(upf->r[0], 0.0); - EXPECT_DOUBLE_EQ(upf->r[892], 2.006810756590000e2); - EXPECT_DOUBLE_EQ(upf->rab[0], 1.169079443020000e-6); - EXPECT_DOUBLE_EQ(upf->rab[892], 3.344685763390000e0); - EXPECT_DOUBLE_EQ(upf->vloc_at[0], 3.456089057550000e0); - EXPECT_DOUBLE_EQ(upf->vloc_at[892], -1.096266796840000e-1); - EXPECT_TRUE(upf->rho_atc.empty()); - EXPECT_EQ(upf->lll[0], 0); - EXPECT_EQ(read_pp->kbeta[0], 617); - EXPECT_EQ(read_pp->els_beta[0], "2S"); - EXPECT_DOUBLE_EQ(read_pp->rcut[0], 0.0); - EXPECT_DOUBLE_EQ(read_pp->rcutus[0], 1.4); - EXPECT_DOUBLE_EQ(upf->betar(0, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->betar(0, 892), 0.0); - EXPECT_EQ(upf->lll[1], 0); - EXPECT_EQ(read_pp->kbeta[1], 617); - EXPECT_EQ(read_pp->els_beta[1], "2P"); - EXPECT_DOUBLE_EQ(read_pp->rcut[1], 0.0); - EXPECT_DOUBLE_EQ(read_pp->rcutus[1], 1.42); - EXPECT_DOUBLE_EQ(upf->betar(1, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->betar(1, 892), 0.0); - EXPECT_DOUBLE_EQ(upf->dion(0, 0), -2.182408428460000e2); - EXPECT_DOUBLE_EQ(upf->dion(4, 4), -3.087562171130000e1); - EXPECT_DOUBLE_EQ(upf->qqq(0, 0), 3.896280866700000e0); - EXPECT_DOUBLE_EQ(upf->qqq(4, 4), 7.218068659650000e-1); - EXPECT_DOUBLE_EQ(read_pp->qfcoef(0, 0, 0, 0), 8.705252055130002e1); - EXPECT_DOUBLE_EQ(read_pp->qfcoef(4, 4, 4, 7), 9.910935792140002e1); - EXPECT_DOUBLE_EQ(read_pp->rinner[0], 1.1); - EXPECT_DOUBLE_EQ(read_pp->rinner[4], 1.1); - EXPECT_DOUBLE_EQ(read_pp->qfunc(0, 0), 0.0); - EXPECT_DOUBLE_EQ(read_pp->qfunc(0, 892), 0.0); - EXPECT_EQ(upf->els[0], "2S"); - EXPECT_EQ(upf->lchi[0], 0); - EXPECT_EQ(read_pp->nchi[0], 0); - EXPECT_DOUBLE_EQ(upf->oc[0], 2.0); - EXPECT_DOUBLE_EQ(read_pp->epseu[0], 0.0); - EXPECT_DOUBLE_EQ(read_pp->rcut_chi[0], 0.0); - EXPECT_DOUBLE_EQ(read_pp->rcutus_chi[0], 1.4); - EXPECT_DOUBLE_EQ(upf->chi(0, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->chi(0, 892), 0.0); - EXPECT_DOUBLE_EQ(upf->rho_at[0], 0.0); - EXPECT_DOUBLE_EQ(upf->rho_at[892], 0.0); - EXPECT_TRUE(upf->jchi.empty()); - EXPECT_TRUE(upf->jjj.empty()); - EXPECT_TRUE(upf->nn.empty()); - ifs.close(); + ifs.open ("./support/Al.pbe-sp-van.UPF"); + read_pp->read_pseudo_upf201 (ifs, *upf); + EXPECT_EQ (upf->psd, "Al"); + EXPECT_EQ (upf->pp_type, "US"); + EXPECT_EQ (read_pp->relativistic, "no"); + EXPECT_TRUE (upf->tvanp); + EXPECT_FALSE (upf->has_so); + EXPECT_FALSE (upf->nlcc); + EXPECT_EQ (upf->xc_func, "PBE"); + EXPECT_EQ (upf->zv, 11); + EXPECT_EQ (upf->nv, 0); + EXPECT_DOUBLE_EQ (upf->etotps, -1.596432307730e2); + EXPECT_DOUBLE_EQ (upf->ecutwfc, 0.0); + EXPECT_DOUBLE_EQ (upf->ecutrho, 0.0); + EXPECT_EQ (upf->lmax, 2); + EXPECT_EQ (read_pp->lmax_rho, 0); + EXPECT_EQ (read_pp->lloc, 0); + EXPECT_EQ (upf->mesh, 893); + EXPECT_EQ (upf->nchi, 4); + EXPECT_EQ (upf->nbeta, 5); + EXPECT_EQ (upf->kkbeta, 617); + EXPECT_DOUBLE_EQ (read_pp->rmax, 2.006810756590e2); + EXPECT_DOUBLE_EQ (read_pp->zmesh, 13.0); + EXPECT_FALSE (read_pp->q_with_l); + EXPECT_EQ (read_pp->nqf, 8); + EXPECT_EQ (upf->nqlc, 5); + EXPECT_DOUBLE_EQ (upf->r[0], 0.0); + EXPECT_DOUBLE_EQ (upf->r[892], 2.006810756590000e2); + EXPECT_DOUBLE_EQ (upf->rab[0], 1.169079443020000e-6); + EXPECT_DOUBLE_EQ (upf->rab[892], 3.344685763390000e0); + EXPECT_DOUBLE_EQ (upf->vloc_at[0], 3.456089057550000e0); + EXPECT_DOUBLE_EQ (upf->vloc_at[892], -1.096266796840000e-1); + EXPECT_TRUE (upf->rho_atc.empty ()); + EXPECT_EQ (upf->lll[0], 0); + EXPECT_EQ (read_pp->kbeta[0], 617); + EXPECT_EQ (read_pp->els_beta[0], "2S"); + EXPECT_DOUBLE_EQ (read_pp->rcut[0], 0.0); + EXPECT_DOUBLE_EQ (read_pp->rcutus[0], 1.4); + EXPECT_DOUBLE_EQ (upf->betar (0, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->betar (0, 892), 0.0); + EXPECT_EQ (upf->lll[1], 0); + EXPECT_EQ (read_pp->kbeta[1], 617); + EXPECT_EQ (read_pp->els_beta[1], "2P"); + EXPECT_DOUBLE_EQ (read_pp->rcut[1], 0.0); + EXPECT_DOUBLE_EQ (read_pp->rcutus[1], 1.42); + EXPECT_DOUBLE_EQ (upf->betar (1, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->betar (1, 892), 0.0); + EXPECT_DOUBLE_EQ (upf->dion (0, 0), -2.182408428460000e2); + EXPECT_DOUBLE_EQ (upf->dion (4, 4), -3.087562171130000e1); + EXPECT_DOUBLE_EQ (upf->qqq (0, 0), 3.896280866700000e0); + EXPECT_DOUBLE_EQ (upf->qqq (4, 4), 7.218068659650000e-1); + EXPECT_DOUBLE_EQ (read_pp->qfcoef (0, 0, 0, 0), 8.705252055130002e1); + EXPECT_DOUBLE_EQ (read_pp->qfcoef (4, 4, 4, 7), 9.910935792140002e1); + EXPECT_DOUBLE_EQ (read_pp->rinner[0], 1.1); + EXPECT_DOUBLE_EQ (read_pp->rinner[4], 1.1); + EXPECT_DOUBLE_EQ (read_pp->qfunc (0, 0), 0.0); + EXPECT_DOUBLE_EQ (read_pp->qfunc (0, 892), 0.0); + EXPECT_EQ (upf->els[0], "2S"); + EXPECT_EQ (upf->lchi[0], 0); + EXPECT_EQ (read_pp->nchi[0], 0); + EXPECT_DOUBLE_EQ (upf->oc[0], 2.0); + EXPECT_DOUBLE_EQ (read_pp->epseu[0], 0.0); + EXPECT_DOUBLE_EQ (read_pp->rcut_chi[0], 0.0); + EXPECT_DOUBLE_EQ (read_pp->rcutus_chi[0], 1.4); + EXPECT_DOUBLE_EQ (upf->chi (0, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->chi (0, 892), 0.0); + EXPECT_DOUBLE_EQ (upf->rho_at[0], 0.0); + EXPECT_DOUBLE_EQ (upf->rho_at[892], 0.0); + EXPECT_TRUE (upf->jchi.empty ()); + EXPECT_TRUE (upf->jjj.empty ()); + EXPECT_TRUE (upf->nn.empty ()); + ifs.close (); } -TEST_F(ReadPPTest, HeaderErr2011) +TEST_F (ReadPPTest, HeaderErr2011) { - std::ifstream ifs; - // 1st - ifs.open("./support/HeaderError1"); - //read_pp->read_pseudo_upf201(ifs, *upf); - testing::internal::CaptureStdout(); - EXPECT_EXIT(read_pp->read_pseudo_upf201(ifs, *upf), - ::testing::ExitedWithCode(1),""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Found no PP_HEADER")); - ifs.close(); + std::ifstream ifs; + // 1st + ifs.open ("./support/HeaderError1"); + // read_pp->read_pseudo_upf201(ifs, *upf); + testing::internal::CaptureStdout (); + EXPECT_EXIT (read_pp->read_pseudo_upf201 (ifs, *upf), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Found no PP_HEADER")); + ifs.close (); } -TEST_F(ReadPPTest, HeaderErr2012) +TEST_F (ReadPPTest, HeaderErr2012) { - std::ifstream ifs; - // 2nd - ifs.open("./support/HeaderError2"); - //read_pp->read_pseudo_upf201(ifs, *upf); - testing::internal::CaptureStdout(); - EXPECT_EXIT(read_pp->read_pseudo_upf201(ifs, *upf), - ::testing::ExitedWithCode(1),""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("SEMI-LOCAL PSEUDOPOTENTIAL IS NOT SUPPORTED")); - ifs.close(); + std::ifstream ifs; + // 2nd + ifs.open ("./support/HeaderError2"); + // read_pp->read_pseudo_upf201(ifs, *upf); + testing::internal::CaptureStdout (); + EXPECT_EXIT (read_pp->read_pseudo_upf201 (ifs, *upf), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("SEMI-LOCAL PSEUDOPOTENTIAL IS NOT SUPPORTED")); + ifs.close (); } -TEST_F(ReadPPTest, HeaderErr2013) +TEST_F (ReadPPTest, HeaderErr2013) { - std::ifstream ifs; - // 3rd - ifs.open("./support/HeaderError3"); - //read_pp->read_pseudo_upf201(ifs, *upf); - testing::internal::CaptureStdout(); - EXPECT_EXIT(read_pp->read_pseudo_upf201(ifs, *upf), - ::testing::ExitedWithCode(1),""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("PAW POTENTIAL IS NOT SUPPORTED")); - ifs.close(); + std::ifstream ifs; + // 3rd + ifs.open ("./support/HeaderError3"); + // read_pp->read_pseudo_upf201(ifs, *upf); + testing::internal::CaptureStdout (); + EXPECT_EXIT (read_pp->read_pseudo_upf201 (ifs, *upf), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("PAW POTENTIAL IS NOT SUPPORTED")); + ifs.close (); } -TEST_F(ReadPPTest, HeaderErr2015) +TEST_F (ReadPPTest, HeaderErr2015) { std::ifstream ifs; // 4th - GlobalV::ofs_warning.open("warning.log"); - ifs.open("./support/HeaderError5"); - upf->mesh = 1; // avoid assert(pp.mesh > 0) in line 406 of read_pp_upf201.cpp - read_pp->read_pseudo_upf201(ifs, *upf); - GlobalV::ofs_warning.close(); - ifs.close(); - ifs.open("warning.log"); - getline(ifs, output); - EXPECT_THAT( + GlobalV::ofs_warning.open ("warning.log"); + ifs.open ("./support/HeaderError5"); + upf->mesh = 1; // avoid assert(pp.mesh > 0) in line 406 of read_pp_upf201.cpp + read_pp->read_pseudo_upf201 (ifs, *upf); + GlobalV::ofs_warning.close (); + ifs.close (); + ifs.open ("warning.log"); + getline (ifs, output); + EXPECT_THAT ( output, - testing::HasSubstr("arbitrary is not read in. Please add this parameter in read_pp_upf201.cpp if needed.")); - ifs.close(); - remove("warning.log"); + testing::HasSubstr ("arbitrary is not read in. Please add this parameter in read_pp_upf201.cpp if needed.")); + ifs.close (); + remove ("warning.log"); } -TEST_F(ReadPPTest, ReadUPF201FR) +TEST_F (ReadPPTest, ReadUPF201FR) { - std::ifstream ifs; - // this is a dojo full-relativisitic pp - ifs.open("./support/C.upf"); - read_pp->read_pseudo_upf201(ifs, *upf); - EXPECT_EQ(upf->psd,"C"); - EXPECT_TRUE(upf->has_so); - EXPECT_TRUE(upf->nlcc); - EXPECT_EQ(upf->mesh,1247); - //RELBETA - EXPECT_EQ(upf->nbeta,6); - EXPECT_EQ(upf->lll[0],0); - EXPECT_EQ(upf->lll[1],0); - EXPECT_EQ(upf->lll[2],1); - EXPECT_EQ(upf->lll[3],1); - EXPECT_EQ(upf->lll[4],1); - EXPECT_EQ(upf->lll[5],1); - EXPECT_DOUBLE_EQ(upf->jjj[0],0.5); - EXPECT_DOUBLE_EQ(upf->jjj[1],0.5); - EXPECT_DOUBLE_EQ(upf->jjj[2],0.5); - EXPECT_DOUBLE_EQ(upf->jjj[3],1.5); - EXPECT_DOUBLE_EQ(upf->jjj[4],0.5); - EXPECT_DOUBLE_EQ(upf->jjj[5],1.5); - //RELWFC - EXPECT_EQ(upf->nchi,3); - EXPECT_EQ(upf->nn[0],1); - EXPECT_EQ(upf->nn[1],2); - EXPECT_EQ(upf->nn[2],2); - EXPECT_EQ(upf->lchi[0],0); - EXPECT_EQ(upf->lchi[1],1); - EXPECT_EQ(upf->lchi[2],1); - EXPECT_DOUBLE_EQ(upf->jchi[0],0.5); - EXPECT_DOUBLE_EQ(upf->jchi[1],1.5); - EXPECT_DOUBLE_EQ(upf->jchi[2],0.5); - //PSWFC - EXPECT_EQ(upf->els[0],"2S"); - EXPECT_EQ(upf->lchi[0],0); - EXPECT_DOUBLE_EQ(upf->oc[0],2.0); - EXPECT_EQ(upf->els[1],"2P"); - EXPECT_EQ(upf->lchi[1],1); - EXPECT_DOUBLE_EQ(upf->oc[1],1.333); - EXPECT_EQ(upf->els[2],"2P"); - EXPECT_EQ(upf->lchi[2],1); - EXPECT_DOUBLE_EQ(upf->oc[2],0.667); - EXPECT_DOUBLE_EQ(upf->chi(0,0),2.0715339166E-12); - EXPECT_DOUBLE_EQ(upf->chi(2,upf->mesh-1),1.1201306967E-03); - //NLCC - EXPECT_DOUBLE_EQ(upf->rho_atc[0],8.7234550809E-01); - EXPECT_DOUBLE_EQ(upf->rho_atc[upf->mesh-1],0.0); - ifs.close(); + std::ifstream ifs; + // this is a dojo full-relativisitic pp + ifs.open ("./support/C.upf"); + read_pp->read_pseudo_upf201 (ifs, *upf); + EXPECT_EQ (upf->psd, "C"); + EXPECT_TRUE (upf->has_so); + EXPECT_TRUE (upf->nlcc); + EXPECT_EQ (upf->mesh, 1247); + // RELBETA + EXPECT_EQ (upf->nbeta, 6); + EXPECT_EQ (upf->lll[0], 0); + EXPECT_EQ (upf->lll[1], 0); + EXPECT_EQ (upf->lll[2], 1); + EXPECT_EQ (upf->lll[3], 1); + EXPECT_EQ (upf->lll[4], 1); + EXPECT_EQ (upf->lll[5], 1); + EXPECT_DOUBLE_EQ (upf->jjj[0], 0.5); + EXPECT_DOUBLE_EQ (upf->jjj[1], 0.5); + EXPECT_DOUBLE_EQ (upf->jjj[2], 0.5); + EXPECT_DOUBLE_EQ (upf->jjj[3], 1.5); + EXPECT_DOUBLE_EQ (upf->jjj[4], 0.5); + EXPECT_DOUBLE_EQ (upf->jjj[5], 1.5); + // RELWFC + EXPECT_EQ (upf->nchi, 3); + EXPECT_EQ (upf->nn[0], 1); + EXPECT_EQ (upf->nn[1], 2); + EXPECT_EQ (upf->nn[2], 2); + EXPECT_EQ (upf->lchi[0], 0); + EXPECT_EQ (upf->lchi[1], 1); + EXPECT_EQ (upf->lchi[2], 1); + EXPECT_DOUBLE_EQ (upf->jchi[0], 0.5); + EXPECT_DOUBLE_EQ (upf->jchi[1], 1.5); + EXPECT_DOUBLE_EQ (upf->jchi[2], 0.5); + // PSWFC + EXPECT_EQ (upf->els[0], "2S"); + EXPECT_EQ (upf->lchi[0], 0); + EXPECT_DOUBLE_EQ (upf->oc[0], 2.0); + EXPECT_EQ (upf->els[1], "2P"); + EXPECT_EQ (upf->lchi[1], 1); + EXPECT_DOUBLE_EQ (upf->oc[1], 1.333); + EXPECT_EQ (upf->els[2], "2P"); + EXPECT_EQ (upf->lchi[2], 1); + EXPECT_DOUBLE_EQ (upf->oc[2], 0.667); + EXPECT_DOUBLE_EQ (upf->chi (0, 0), 2.0715339166E-12); + EXPECT_DOUBLE_EQ (upf->chi (2, upf->mesh - 1), 1.1201306967E-03); + // NLCC + EXPECT_DOUBLE_EQ (upf->rho_atc[0], 8.7234550809E-01); + EXPECT_DOUBLE_EQ (upf->rho_atc[upf->mesh - 1], 0.0); + ifs.close (); } -TEST_F(ReadPPTest, ReadUPF201MESH2) +TEST_F (ReadPPTest, ReadUPF201MESH2) { - std::ifstream ifs; - // this pp file has gipaw, thus a different header - ifs.open("./support/Fe.pbe-sp-mt_gipaw.UPF"); - read_pp->read_pseudo_upf201(ifs, *upf); - EXPECT_EQ(upf->psd,"Fe"); - ifs.close(); + std::ifstream ifs; + // this pp file has gipaw, thus a different header + ifs.open ("./support/Fe.pbe-sp-mt_gipaw.UPF"); + read_pp->read_pseudo_upf201 (ifs, *upf); + EXPECT_EQ (upf->psd, "Fe"); + ifs.close (); } -TEST_F(ReadPPTest, VWR) +TEST_F (ReadPPTest, VWR) { - std::ifstream ifs; - // this pp file is a vwr type of pp - ifs.open("./support/vwr.Si"); - read_pp->read_pseudo_vwr(ifs, *upf); - EXPECT_EQ(upf->xc_func,"PZ"); - EXPECT_EQ(upf->pp_type,"NC"); - EXPECT_FALSE(upf->tvanp); - EXPECT_EQ(upf->mesh,1073); - EXPECT_FALSE(upf->nlcc); - EXPECT_EQ(upf->psd,"14"); - EXPECT_EQ(upf->zv,4); - EXPECT_EQ(read_pp->spd_loc,2); - EXPECT_FALSE(upf->has_so); - EXPECT_EQ(read_pp->iTB_s,1); - EXPECT_EQ(read_pp->iTB_p,1); - EXPECT_EQ(read_pp->iTB_d,0); - EXPECT_EQ(upf->nchi,2); - EXPECT_DOUBLE_EQ(upf->oc[0],2); - EXPECT_DOUBLE_EQ(upf->oc[1],2); - EXPECT_EQ(upf->lchi[0],0); - EXPECT_EQ(upf->lchi[1],1); - EXPECT_EQ(upf->els[0],"S"); - EXPECT_EQ(upf->els[1],"P"); - EXPECT_DOUBLE_EQ(upf->r[0],.22270617E-05); - EXPECT_DOUBLE_EQ(upf->r[upf->mesh-1],.11832572E+03); - EXPECT_NEAR(upf->rho_at[0],6.18479e-13,1.0e-17); - EXPECT_NEAR(upf->rho_at[upf->mesh-1],3.46232e-56,1.0e-60); - EXPECT_EQ(upf->nbeta,1); - EXPECT_NEAR(upf->betar(0,2),2.67501e-05,1.0e-9); - EXPECT_EQ(upf->lll[0],0); - ifs.close(); + std::ifstream ifs; + // this pp file is a vwr type of pp + ifs.open ("./support/vwr.Si"); + read_pp->read_pseudo_vwr (ifs, *upf); + EXPECT_EQ (upf->xc_func, "PZ"); + EXPECT_EQ (upf->pp_type, "NC"); + EXPECT_FALSE (upf->tvanp); + EXPECT_EQ (upf->mesh, 1073); + EXPECT_FALSE (upf->nlcc); + EXPECT_EQ (upf->psd, "14"); + EXPECT_EQ (upf->zv, 4); + EXPECT_EQ (read_pp->spd_loc, 2); + EXPECT_FALSE (upf->has_so); + EXPECT_EQ (read_pp->iTB_s, 1); + EXPECT_EQ (read_pp->iTB_p, 1); + EXPECT_EQ (read_pp->iTB_d, 0); + EXPECT_EQ (upf->nchi, 2); + EXPECT_DOUBLE_EQ (upf->oc[0], 2); + EXPECT_DOUBLE_EQ (upf->oc[1], 2); + EXPECT_EQ (upf->lchi[0], 0); + EXPECT_EQ (upf->lchi[1], 1); + EXPECT_EQ (upf->els[0], "S"); + EXPECT_EQ (upf->els[1], "P"); + EXPECT_DOUBLE_EQ (upf->r[0], .22270617E-05); + EXPECT_DOUBLE_EQ (upf->r[upf->mesh - 1], .11832572E+03); + EXPECT_NEAR (upf->rho_at[0], 6.18479e-13, 1.0e-17); + EXPECT_NEAR (upf->rho_at[upf->mesh - 1], 3.46232e-56, 1.0e-60); + EXPECT_EQ (upf->nbeta, 1); + EXPECT_NEAR (upf->betar (0, 2), 2.67501e-05, 1.0e-9); + EXPECT_EQ (upf->lll[0], 0); + ifs.close (); } -TEST_F(ReadPPTest, BLPS) +TEST_F (ReadPPTest, BLPS) { - std::ifstream ifs; - // this pp file is a vwr type of pp - ifs.open("./support/si.lda.lps"); - PARAM.input.dft_functional="default"; - read_pp->read_pseudo_blps(ifs, *upf); - EXPECT_FALSE(upf->nlcc); - EXPECT_FALSE(upf->tvanp); - EXPECT_FALSE(upf->has_so); - EXPECT_EQ(upf->nbeta,0); - EXPECT_EQ(upf->psd,"Si"); - EXPECT_EQ(upf->zv,4); - EXPECT_EQ(upf->lmax,0); - EXPECT_EQ(upf->mesh,1601); - EXPECT_EQ(upf->xc_func,"PZ"); - EXPECT_DOUBLE_EQ(upf->r[0],0.0); - EXPECT_DOUBLE_EQ(upf->r[upf->mesh-1],16.0); - EXPECT_DOUBLE_EQ(upf->vloc_at[0],2.4189229665506291*2.0); - EXPECT_DOUBLE_EQ(upf->vloc_at[upf->mesh-1],-0.25*2.0); - EXPECT_DOUBLE_EQ(upf->rho_at[0],0.25); - EXPECT_DOUBLE_EQ(upf->rho_at[upf->mesh-1],0.25); - ifs.close(); + std::ifstream ifs; + // this pp file is a vwr type of pp + ifs.open ("./support/si.lda.lps"); + PARAM.input.dft_functional = "default"; + read_pp->read_pseudo_blps (ifs, *upf); + EXPECT_FALSE (upf->nlcc); + EXPECT_FALSE (upf->tvanp); + EXPECT_FALSE (upf->has_so); + EXPECT_EQ (upf->nbeta, 0); + EXPECT_EQ (upf->psd, "Si"); + EXPECT_EQ (upf->zv, 4); + EXPECT_EQ (upf->lmax, 0); + EXPECT_EQ (upf->mesh, 1601); + EXPECT_EQ (upf->xc_func, "PZ"); + EXPECT_DOUBLE_EQ (upf->r[0], 0.0); + EXPECT_DOUBLE_EQ (upf->r[upf->mesh - 1], 16.0); + EXPECT_DOUBLE_EQ (upf->vloc_at[0], 2.4189229665506291 * 2.0); + EXPECT_DOUBLE_EQ (upf->vloc_at[upf->mesh - 1], -0.25 * 2.0); + EXPECT_DOUBLE_EQ (upf->rho_at[0], 0.25); + EXPECT_DOUBLE_EQ (upf->rho_at[upf->mesh - 1], 0.25); + ifs.close (); } -TEST_F(ReadPPTest, SetPseudoType) +TEST_F (ReadPPTest, SetPseudoType) { - std::string pp_address = "./support/Cu_ONCV_PBE-1.0.upf"; - std::string type = "auto"; - read_pp->set_pseudo_type(pp_address,type); - EXPECT_EQ(type,"upf201"); - pp_address = "./support/Te.pbe-rrkj.UPF"; - read_pp->set_pseudo_type(pp_address,type); - EXPECT_EQ(type,"upf"); + std::string pp_address = "./support/Cu_ONCV_PBE-1.0.upf"; + std::string type = "auto"; + read_pp->set_pseudo_type (pp_address, type); + EXPECT_EQ (type, "upf201"); + pp_address = "./support/Te.pbe-rrkj.UPF"; + read_pp->set_pseudo_type (pp_address, type); + EXPECT_EQ (type, "upf"); } -TEST_F(ReadPPTest, Trim) +TEST_F (ReadPPTest, Trim) { - std::string tmp_string = " aaa \t bbb\t "; - output = read_pp->trim(tmp_string); - EXPECT_EQ(output,"aaabbb"); - tmp_string = " \taaa\tbbb\t "; - output = read_pp->trimend(tmp_string); - EXPECT_EQ(output,"aaa\tbbb"); + std::string tmp_string = " aaa \t bbb\t "; + output = read_pp->trim (tmp_string); + EXPECT_EQ (output, "aaabbb"); + tmp_string = " \taaa\tbbb\t "; + output = read_pp->trimend (tmp_string); + EXPECT_EQ (output, "aaa\tbbb"); } -TEST_F(ReadPPTest, SetEmptyElement) +TEST_F (ReadPPTest, SetEmptyElement) { - upf->mesh = 10; - upf->nbeta = 10; - upf->vloc_at = std::vector(upf->mesh, 0.0); - upf->rho_at = std::vector(upf->mesh, 0.0); - upf->dion.create(upf->nbeta,upf->nbeta); - read_pp->set_empty_element(*upf); - for(int ir=0;irmesh;++ir) - { - EXPECT_DOUBLE_EQ(upf->vloc_at[ir],0.0); - EXPECT_DOUBLE_EQ(upf->rho_at[ir],0.0); - } - for(int i=0;inbeta;++i) - { - for(int j=0;jnbeta;++j) - { - EXPECT_DOUBLE_EQ(upf->dion(i,j),0.0); - } - } + upf->mesh = 10; + upf->nbeta = 10; + upf->vloc_at = std::vector (upf->mesh, 0.0); + upf->rho_at = std::vector (upf->mesh, 0.0); + upf->dion.create (upf->nbeta, upf->nbeta); + read_pp->set_empty_element (*upf); + for (int ir = 0; ir < upf->mesh; ++ir) + { + EXPECT_DOUBLE_EQ (upf->vloc_at[ir], 0.0); + EXPECT_DOUBLE_EQ (upf->rho_at[ir], 0.0); + } + for (int i = 0; i < upf->nbeta; ++i) + { + for (int j = 0; j < upf->nbeta; ++j) + { + EXPECT_DOUBLE_EQ (upf->dion (i, j), 0.0); + } + } } -TEST_F(ReadPPTest, SetUpfQ) +TEST_F (ReadPPTest, SetUpfQ) { std::ifstream ifs; - ifs.open("./support/Al.pbe-sp-van.UPF"); - read_pp->read_pseudo_upf201(ifs, *upf); - read_pp->set_upf_q(*upf); - EXPECT_DOUBLE_EQ(upf->qfuncl(0, 0, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->qfuncl(0, 0, 100), 7.8994151918886213e-06); - EXPECT_DOUBLE_EQ(upf->qfuncl(0, 1, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->qfuncl(0, 1, 100), -2.1915710970869145e-05); - EXPECT_DOUBLE_EQ(upf->qfuncl(0, 2, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->qfuncl(0, 2, 100), 5.9614487166963409e-05); - EXPECT_DOUBLE_EQ(upf->qfuncl(1, 0, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->qfuncl(1, 0, 100), 0.0); - EXPECT_DOUBLE_EQ(upf->qfuncl(1, 1, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->qfuncl(1, 1, 100), 0.0); - EXPECT_DOUBLE_EQ(upf->qfuncl(1, 2, 0), 0.0); - EXPECT_DOUBLE_EQ(upf->qfuncl(1, 2, 100), 0.0); + ifs.open ("./support/Al.pbe-sp-van.UPF"); + read_pp->read_pseudo_upf201 (ifs, *upf); + read_pp->set_upf_q (*upf); + EXPECT_DOUBLE_EQ (upf->qfuncl (0, 0, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->qfuncl (0, 0, 100), 7.8994151918886213e-06); + EXPECT_DOUBLE_EQ (upf->qfuncl (0, 1, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->qfuncl (0, 1, 100), -2.1915710970869145e-05); + EXPECT_DOUBLE_EQ (upf->qfuncl (0, 2, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->qfuncl (0, 2, 100), 5.9614487166963409e-05); + EXPECT_DOUBLE_EQ (upf->qfuncl (1, 0, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->qfuncl (1, 0, 100), 0.0); + EXPECT_DOUBLE_EQ (upf->qfuncl (1, 1, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->qfuncl (1, 1, 100), 0.0); + EXPECT_DOUBLE_EQ (upf->qfuncl (1, 2, 0), 0.0); + EXPECT_DOUBLE_EQ (upf->qfuncl (1, 2, 100), 0.0); - ifs.close(); + ifs.close (); } -TEST_F(ReadPPTest, SetQfNew) +TEST_F (ReadPPTest, SetQfNew) { // Set up input data int nqf = 3; @@ -690,123 +687,123 @@ TEST_F(ReadPPTest, SetQfNew) double rho[mesh]; for (int i = 0; i < mesh; ++i) - { - r[i] = i + 1; // Assuming some values for r - } + { + r[i] = i + 1; // Assuming some values for r + } // Call the function under test - read_pp->setqfnew(nqf, mesh, l, n, qfcoef, r, rho); + read_pp->setqfnew (nqf, mesh, l, n, qfcoef, r, rho); // Validate the output for (int ir = 0; ir < mesh; ++ir) - { - double rr = r[ir] * r[ir]; - double expectedValue = qfcoef[0]; - for (int iq = 1; iq < nqf; ++iq) { - expectedValue += qfcoef[iq] * pow(rr, iq); + double rr = r[ir] * r[ir]; + double expectedValue = qfcoef[0]; + for (int iq = 1; iq < nqf; ++iq) + { + expectedValue += qfcoef[iq] * pow (rr, iq); + } + expectedValue *= pow (r[ir], l + n); + EXPECT_DOUBLE_EQ (expectedValue, rho[ir]); } - expectedValue *= pow(r[ir], l + n); - EXPECT_DOUBLE_EQ(expectedValue, rho[ir]); - } } -TEST_F(ReadPPTest, InitReader) +TEST_F (ReadPPTest, InitReader) { - std::string pp_file = "arbitrary"; - std::string type = "auto"; - int info = read_pp->init_pseudo_reader(pp_file,type,*upf); - EXPECT_EQ(info,1); - pp_file = "./support/Te.pbe-rrkj.UPF"; - info = read_pp->init_pseudo_reader(pp_file,type,*upf); - EXPECT_EQ(type,"upf"); - EXPECT_EQ(info,0); - pp_file = "./support/Cu_ONCV_PBE-1.0.upf"; - info = read_pp->init_pseudo_reader(pp_file,type,*upf); - EXPECT_EQ(info,2); - pp_file = "./support/Cu_ONCV_PBE-1.0.upf"; - type = "auto"; - info = read_pp->init_pseudo_reader(pp_file,type,*upf); - EXPECT_EQ(type,"upf201"); - EXPECT_EQ(info,0); - pp_file = "./support/vwr.Si"; - type = "vwr"; - info = read_pp->init_pseudo_reader(pp_file,type,*upf); - EXPECT_EQ(info,0); - pp_file = "./support/si.lda.lps"; - type = "blps"; - info = read_pp->init_pseudo_reader(pp_file,type,*upf); - EXPECT_EQ(info,0); + std::string pp_file = "arbitrary"; + std::string type = "auto"; + int info = read_pp->init_pseudo_reader (pp_file, type, *upf); + EXPECT_EQ (info, 1); + pp_file = "./support/Te.pbe-rrkj.UPF"; + info = read_pp->init_pseudo_reader (pp_file, type, *upf); + EXPECT_EQ (type, "upf"); + EXPECT_EQ (info, 0); + pp_file = "./support/Cu_ONCV_PBE-1.0.upf"; + info = read_pp->init_pseudo_reader (pp_file, type, *upf); + EXPECT_EQ (info, 2); + pp_file = "./support/Cu_ONCV_PBE-1.0.upf"; + type = "auto"; + info = read_pp->init_pseudo_reader (pp_file, type, *upf); + EXPECT_EQ (type, "upf201"); + EXPECT_EQ (info, 0); + pp_file = "./support/vwr.Si"; + type = "vwr"; + info = read_pp->init_pseudo_reader (pp_file, type, *upf); + EXPECT_EQ (info, 0); + pp_file = "./support/si.lda.lps"; + type = "blps"; + info = read_pp->init_pseudo_reader (pp_file, type, *upf); + EXPECT_EQ (info, 0); } -TEST_F(ReadPPTest, AverageSimpleReturns) +TEST_F (ReadPPTest, AverageSimpleReturns) { - int ierr; - double lambda = 1.0; - // first return - PARAM.input.lspinorb = 1; - upf->has_so = 0; - ierr = read_pp->average_p(lambda, *upf); - EXPECT_EQ(ierr,1); - // second return - upf->has_so = 1; - ierr = read_pp->average_p(lambda, *upf); - EXPECT_EQ(ierr,0); - upf->has_so = 1; - upf->tvanp = 1; - ierr = read_pp->average_p(lambda, *upf); - EXPECT_EQ(ierr, 1); + int ierr; + double lambda = 1.0; + // first return + PARAM.input.lspinorb = true; + upf->has_so = false; + ierr = read_pp->average_p (lambda, *upf); + EXPECT_EQ (ierr, 1); + // second return + upf->has_so = true; + ierr = read_pp->average_p (lambda, *upf); + EXPECT_EQ (ierr, 0); + upf->has_so = true; + upf->tvanp = true; + ierr = read_pp->average_p (lambda, *upf); + EXPECT_EQ (ierr, 1); } -TEST_F(ReadPPTest, AverageErrReturns) +TEST_F (ReadPPTest, AverageErrReturns) { - int ierr; - double lambda = 1.0; - // LSPINORB = 0 - std::ifstream ifs; - ifs.open("./support/Si.rel-pbe-rrkj.UPF"); - read_pp->read_pseudo_upf(ifs, *upf); - EXPECT_TRUE(upf->has_so); // has soc info - PARAM.input.lspinorb = 0; - ierr = read_pp->average_p(lambda, *upf); - EXPECT_EQ(upf->nbeta,2); - EXPECT_EQ(ierr,0); - // LSPINORB = 1 - ierr = read_pp->average_p(lambda, *upf); - EXPECT_EQ(ierr,0); - ifs.close(); + int ierr; + double lambda = 1.0; + // LSPINORB = 0 + std::ifstream ifs; + ifs.open ("./support/Si.rel-pbe-rrkj.UPF"); + read_pp->read_pseudo_upf (ifs, *upf); + EXPECT_TRUE (upf->has_so); // has soc info + PARAM.input.lspinorb = false; + ierr = read_pp->average_p (lambda, *upf); + EXPECT_EQ (upf->nbeta, 2); + EXPECT_EQ (ierr, 0); + // LSPINORB = 1 + ierr = read_pp->average_p (lambda, *upf); + EXPECT_EQ (ierr, 0); + ifs.close (); } -TEST_F(ReadPPTest, AverageLSPINORB0) +TEST_F (ReadPPTest, AverageLSPINORB0) { - std::ifstream ifs; - // this is a dojo full-relativisitic pp - ifs.open("./support/C.upf"); - read_pp->read_pseudo_upf201(ifs, *upf); - EXPECT_TRUE(upf->has_so); // has soc info - int ierr; - double lambda = 1.0; - // LSPINORB = 0 - PARAM.input.lspinorb = 0; - ierr = read_pp->average_p(lambda, *upf); - EXPECT_EQ(ierr,0); - EXPECT_EQ(upf->nbeta,4); - EXPECT_FALSE(upf->has_so); // has not soc info,why? + std::ifstream ifs; + // this is a dojo full-relativisitic pp + ifs.open ("./support/C.upf"); + read_pp->read_pseudo_upf201 (ifs, *upf); + EXPECT_TRUE (upf->has_so); // has soc info + int ierr; + double lambda = 1.0; + // LSPINORB = 0 + PARAM.input.lspinorb = false; + ierr = read_pp->average_p (lambda, *upf); + EXPECT_EQ (ierr, 0); + EXPECT_EQ (upf->nbeta, 4); + EXPECT_FALSE (upf->has_so); // has not soc info,why? } -TEST_F(ReadPPTest, AverageLSPINORB1) +TEST_F (ReadPPTest, AverageLSPINORB1) { - std::ifstream ifs; - // this is a dojo full-relativisitic pp - ifs.open("./support/C.upf"); - read_pp->read_pseudo_upf201(ifs, *upf); - EXPECT_TRUE(upf->has_so); // has soc info - int ierr; - double lambda = 1.1; - // LSPINORB = 0 - PARAM.input.lspinorb = 1; - ierr = read_pp->average_p(lambda, *upf); - EXPECT_EQ(ierr,0); - EXPECT_EQ(upf->nbeta,6); - EXPECT_TRUE(upf->has_so); // has soc info + std::ifstream ifs; + // this is a dojo full-relativisitic pp + ifs.open ("./support/C.upf"); + read_pp->read_pseudo_upf201 (ifs, *upf); + EXPECT_TRUE (upf->has_so); // has soc info + int ierr; + double lambda = 1.1; + // LSPINORB = 0 + PARAM.input.lspinorb = true; + ierr = read_pp->average_p (lambda, *upf); + EXPECT_EQ (ierr, 0); + EXPECT_EQ (upf->nbeta, 6); + EXPECT_TRUE (upf->has_so); // has soc info } diff --git a/source/source_cell/test/read_sep_test.cpp b/source/source_cell/test/read_sep_test.cpp index 0bfada1a36d..c4f7ce8f26a 100644 --- a/source/source_cell/test/read_sep_test.cpp +++ b/source/source_cell/test/read_sep_test.cpp @@ -19,128 +19,131 @@ class ReadSepTest : public testing::Test std::string output; std::unique_ptr read_sep{new SepPot}; - void SetUp() override + void + SetUp () override { // Initialization default check - EXPECT_FALSE(read_sep->is_enable); - EXPECT_DOUBLE_EQ(read_sep->r_in, 0.0); - EXPECT_DOUBLE_EQ(read_sep->r_out, 0.0); - EXPECT_DOUBLE_EQ(read_sep->r_power, 20.0); - EXPECT_DOUBLE_EQ(read_sep->enhence_a, 1.0); - EXPECT_EQ(read_sep->mesh, 0); - EXPECT_EQ(read_sep->strip_elec, 0); - EXPECT_EQ(read_sep->r, nullptr); - EXPECT_EQ(read_sep->rv, nullptr); + EXPECT_FALSE (read_sep->is_enable); + EXPECT_DOUBLE_EQ (read_sep->r_in, 0.0); + EXPECT_DOUBLE_EQ (read_sep->r_out, 0.0); + EXPECT_DOUBLE_EQ (read_sep->r_power, 20.0); + EXPECT_DOUBLE_EQ (read_sep->enhence_a, 1.0); + EXPECT_EQ (read_sep->mesh, 0); + EXPECT_EQ (read_sep->strip_elec, 0); + EXPECT_EQ (read_sep->r, nullptr); + EXPECT_EQ (read_sep->rv, nullptr); } - void TearDown() override + void + TearDown () override { // Cleaning is done automatically in the destructor } }; -TEST_F(ReadSepTest, ReadSep) +TEST_F (ReadSepTest, ReadSep) { #ifdef __MPI if (GlobalV::MY_RANK == 0) - { + { #endif // !__MPI - std::ifstream ifs; - ifs.open("./support/F_pbe_50.sep"); - ASSERT_TRUE(ifs.is_open()); - read_sep->read_sep(ifs); - ifs.close(); - EXPECT_EQ(read_sep->label, "F"); - EXPECT_EQ(read_sep->mesh, 1038); - EXPECT_EQ(read_sep->xc_type, "pbe"); - EXPECT_EQ(read_sep->strip_elec, 50); - - EXPECT_EQ(read_sep->r[0], 3.4643182373e-06); - EXPECT_NE(read_sep->r, nullptr); - EXPECT_NE(read_sep->rv, nullptr); + std::ifstream ifs; + ifs.open ("./support/F_pbe_50.sep"); + ASSERT_TRUE (ifs.is_open ()); + read_sep->read_sep (ifs); + ifs.close (); + EXPECT_EQ (read_sep->label, "F"); + EXPECT_EQ (read_sep->mesh, 1038); + EXPECT_EQ (read_sep->xc_type, "pbe"); + EXPECT_EQ (read_sep->strip_elec, 50); + + EXPECT_EQ (read_sep->r[0], 3.4643182373e-06); + EXPECT_NE (read_sep->r, nullptr); + EXPECT_NE (read_sep->rv, nullptr); #ifdef __MPI - } + } #endif // __MPI } -TEST_F(ReadSepTest, PrintSep) +TEST_F (ReadSepTest, PrintSep) { #ifdef __MPI if (GlobalV::MY_RANK == 0) - { -#endif - // 设置测试数据 - read_sep->label = "F"; - read_sep->xc_type = "pbe"; - read_sep->orbital = "p"; - read_sep->strip_elec = 50; - read_sep->mesh = 2; - read_sep->r = new double[2]{0.1, 0.2}; - read_sep->rv = new double[2]{1.0, 2.0}; - - // 测试打印功能 - std::ofstream ofs("test_sep.out"); - read_sep->print_sep_info(ofs); - read_sep->print_sep_vsep(ofs); - ofs.close(); - - // 验证输出文件 - std::ifstream ifs("test_sep.out"); - std::string line; - std::vector lines; - while (std::getline(ifs, line)) { - lines.push_back(line); - } - ifs.close(); - - EXPECT_THAT(lines, testing::Contains(" label F")); - EXPECT_THAT(lines, testing::Contains(" xc pbe")); - EXPECT_THAT(lines, testing::Contains(" orbital p")); - EXPECT_THAT(lines, testing::Contains(" strip electron50")); - EXPECT_THAT(lines, testing::Contains(" mesh 2")); - - std::remove("test_sep.out"); +#endif + // 设置测试数据 + read_sep->label = "F"; + read_sep->xc_type = "pbe"; + read_sep->orbital = "p"; + read_sep->strip_elec = 50; + read_sep->mesh = 2; + read_sep->r = new double[2]{0.1, 0.2}; + read_sep->rv = new double[2]{1.0, 2.0}; + + // 测试打印功能 + std::ofstream ofs ("test_sep.out"); + read_sep->print_sep_info (ofs); + read_sep->print_sep_vsep (ofs); + ofs.close (); + + // 验证输出文件 + std::ifstream ifs ("test_sep.out"); + std::string line; + std::vector lines; + while (std::getline (ifs, line)) + { + lines.push_back (line); + } + ifs.close (); + + EXPECT_THAT (lines, testing::Contains (" label F")); + EXPECT_THAT (lines, testing::Contains (" xc pbe")); + EXPECT_THAT (lines, testing::Contains (" orbital p")); + EXPECT_THAT (lines, testing::Contains (" strip electron50")); + EXPECT_THAT (lines, testing::Contains (" mesh 2")); + + std::remove ("test_sep.out"); #ifdef __MPI - } + } #endif } #ifdef __MPI -TEST_F(ReadSepTest, BcastSep) +TEST_F (ReadSepTest, BcastSep) { if (GlobalV::MY_RANK == 0) - { - std::ifstream ifs; - ifs.open("./support/F_pbe_50.sep"); - ASSERT_TRUE(ifs.is_open()); - read_sep->read_sep(ifs); - ifs.close(); - } - read_sep->bcast_sep(); + { + std::ifstream ifs; + ifs.open ("./support/F_pbe_50.sep"); + ASSERT_TRUE (ifs.is_open ()); + read_sep->read_sep (ifs); + ifs.close (); + } + read_sep->bcast_sep (); if (GlobalV::MY_RANK != 0) - { - EXPECT_EQ(read_sep->label, "F"); - EXPECT_EQ(read_sep->mesh, 1038); - EXPECT_EQ(read_sep->xc_type, "pbe"); - EXPECT_EQ(read_sep->strip_elec, 50); - EXPECT_DOUBLE_EQ(read_sep->r[0], 3.4643182373e-06); - EXPECT_NE(read_sep->r, nullptr); - EXPECT_NE(read_sep->rv, nullptr); - } + { + EXPECT_EQ (read_sep->label, "F"); + EXPECT_EQ (read_sep->mesh, 1038); + EXPECT_EQ (read_sep->xc_type, "pbe"); + EXPECT_EQ (read_sep->strip_elec, 50); + EXPECT_DOUBLE_EQ (read_sep->r[0], 3.4643182373e-06); + EXPECT_NE (read_sep->r, nullptr); + EXPECT_NE (read_sep->rv, nullptr); + } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); - int result = RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS (); - MPI_Finalize(); + MPI_Finalize (); return result; } #endif // __MPI diff --git a/source/source_cell/test/sepcell_test.cpp b/source/source_cell/test/sepcell_test.cpp index 11316355e1c..b0e81d00548 100644 --- a/source/source_cell/test/sepcell_test.cpp +++ b/source/source_cell/test/sepcell_test.cpp @@ -15,48 +15,20 @@ #include "source_cell/sep_cell.h" #include "source_cell/unitcell.h" #undef private -pseudo::pseudo() -{ -} -pseudo::~pseudo() -{ -} -Atom_pseudo::Atom_pseudo() -{ -} -Atom_pseudo::~Atom_pseudo() -{ -} -Atom::Atom() -{ -} -Atom::~Atom() -{ -} -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -LCAO_Orbitals::LCAO_Orbitals() -{ -} -LCAO_Orbitals::~LCAO_Orbitals() -{ -} -Magnetism::Magnetism() -{ -} -Magnetism::~Magnetism() -{ -} -UnitCell::UnitCell() -{ -} -UnitCell::~UnitCell() -{ -} +pseudo::pseudo () {} +pseudo::~pseudo () {} +Atom_pseudo::Atom_pseudo () {} +Atom_pseudo::~Atom_pseudo () {} +Atom::Atom () {} +Atom::~Atom () {} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +LCAO_Orbitals::LCAO_Orbitals () {} +LCAO_Orbitals::~LCAO_Orbitals () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} // Test fixture for Sep_Cell tests class SepCellTest : public ::testing::Test @@ -71,12 +43,13 @@ class SepCellTest : public ::testing::Test std::string f_sep_filename = "F_pbe_50.sep"; std::string pp_dir = "support/"; // Directory for pseudopotential files - void SetUp() override + void + SetUp () override { // Initialize UnitCell for tests that need it. // This setup is common for many read_sep_potentials tests. ucell.ntype = 2; - ucell.atom_label.resize(ucell.ntype); + ucell.atom_label.resize (ucell.ntype); ucell.atom_label[0] = "Li"; ucell.atom_label[1] = "F"; ucell.atoms = new Atom[ucell.ntype]; @@ -86,201 +59,203 @@ class SepCellTest : public ::testing::Test ucell.atoms[1].na = 1; } - void TearDown() override + void + TearDown () override { delete[] ucell.atoms; ucell.atoms = nullptr; } }; -TEST_F(SepCellTest, Constructor) +TEST_F (SepCellTest, Constructor) { - EXPECT_EQ(sep_cell.get_ntype(), 0); - EXPECT_DOUBLE_EQ(sep_cell.get_omega(), 0.0); - EXPECT_DOUBLE_EQ(sep_cell.get_tpiba2(), 0.0); - EXPECT_TRUE(sep_cell.get_seps().empty()); - EXPECT_TRUE(sep_cell.get_sep_enable().empty()); + EXPECT_EQ (sep_cell.get_ntype (), 0); + EXPECT_DOUBLE_EQ (sep_cell.get_omega (), 0.0); + EXPECT_DOUBLE_EQ (sep_cell.get_tpiba2 (), 0.0); + EXPECT_TRUE (sep_cell.get_seps ().empty ()); + EXPECT_TRUE (sep_cell.get_sep_enable ().empty ()); } -TEST_F(SepCellTest, Init) +TEST_F (SepCellTest, Init) { - sep_cell.init(2); - EXPECT_EQ(sep_cell.get_ntype(), 2); - ASSERT_EQ(sep_cell.get_seps().size(), 2); - ASSERT_EQ(sep_cell.get_sep_enable().size(), 2); - EXPECT_FALSE(sep_cell.get_sep_enable()[0]); - EXPECT_FALSE(sep_cell.get_sep_enable()[1]); + sep_cell.init (2); + EXPECT_EQ (sep_cell.get_ntype (), 2); + ASSERT_EQ (sep_cell.get_seps ().size (), 2); + ASSERT_EQ (sep_cell.get_sep_enable ().size (), 2); + EXPECT_FALSE (sep_cell.get_sep_enable ()[0]); + EXPECT_FALSE (sep_cell.get_sep_enable ()[1]); // Check default values of SepPot within seps - EXPECT_EQ(sep_cell.get_seps()[0].mesh, 0); - EXPECT_FALSE(sep_cell.get_seps()[0].is_enable); + EXPECT_EQ (sep_cell.get_seps ()[0].mesh, 0); + EXPECT_FALSE (sep_cell.get_seps ()[0].is_enable); } -TEST_F(SepCellTest, SetOmega) +TEST_F (SepCellTest, SetOmega) { - sep_cell.set_omega(100.0, 0.25); - EXPECT_DOUBLE_EQ(sep_cell.get_omega(), 100.0); - EXPECT_DOUBLE_EQ(sep_cell.get_tpiba2(), 0.25); + sep_cell.set_omega (100.0, 0.25); + EXPECT_DOUBLE_EQ (sep_cell.get_omega (), 100.0); + EXPECT_DOUBLE_EQ (sep_cell.get_tpiba2 (), 0.25); } -TEST_F(SepCellTest, ReadSepPotentialsSuccess) +TEST_F (SepCellTest, ReadSepPotentialsSuccess) { #ifdef __MPI if (GlobalV::MY_RANK == 0) - { + { #endif - std::ifstream ifs(pp_dir + stru_filename); - ASSERT_TRUE(ifs.is_open()); - - sep_cell.init(ucell.ntype); - std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); - int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell.atom_label); - ifs.close(); - std::remove("dummy_ofs_running.tmp"); - - EXPECT_EQ(result, 1); // Expect success (true) - - // Due to the bug mentioned (this->sep_enable[i] is always false), - // SEP data won't actually be loaded. - ASSERT_EQ(sep_cell.get_sep_enable().size(), 2); - EXPECT_FALSE(sep_cell.get_sep_enable()[0]); // Stays false from init - EXPECT_TRUE(sep_cell.get_sep_enable()[1]); // Stays false from init - - const auto& seps = sep_cell.get_seps(); - ASSERT_EQ(seps.size(), 2); - EXPECT_FALSE(seps[0].is_enable); // Default value, not set from file - EXPECT_EQ(seps[0].mesh, 0); // Default value - EXPECT_EQ(seps[0].label, ""); // Default value - - EXPECT_TRUE(seps[1].is_enable); // Default value - EXPECT_EQ(seps[1].mesh, 1038); // Default value - EXPECT_EQ(seps[1].label, "F"); // Default value - EXPECT_DOUBLE_EQ(seps[1].r_in, 0.0); - EXPECT_DOUBLE_EQ(seps[1].r_out, 2.5); - EXPECT_DOUBLE_EQ(seps[1].r_power, 20.0); - EXPECT_DOUBLE_EQ(seps[1].enhence_a, 1.0); + std::ifstream ifs (pp_dir + stru_filename); + ASSERT_TRUE (ifs.is_open ()); + + sep_cell.init (ucell.ntype); + std::ofstream ofs_running_dummy ("dummy_ofs_running.tmp"); + int result = sep_cell.read_sep_potentials (ifs, pp_dir, ofs_running_dummy, ucell.atom_label); + ifs.close (); + std::remove ("dummy_ofs_running.tmp"); + + EXPECT_EQ (result, 1); // Expect success (true) + + // Due to the bug mentioned (this->sep_enable[i] is always false), + // SEP data won't actually be loaded. + ASSERT_EQ (sep_cell.get_sep_enable ().size (), 2); + EXPECT_FALSE (sep_cell.get_sep_enable ()[0]); // Stays false from init + EXPECT_TRUE (sep_cell.get_sep_enable ()[1]); // Stays false from init + + const auto& seps = sep_cell.get_seps (); + ASSERT_EQ (seps.size (), 2); + EXPECT_FALSE (seps[0].is_enable); // Default value, not set from file + EXPECT_EQ (seps[0].mesh, 0); // Default value + EXPECT_EQ (seps[0].label, ""); // Default value + + EXPECT_TRUE (seps[1].is_enable); // Default value + EXPECT_EQ (seps[1].mesh, 1038); // Default value + EXPECT_EQ (seps[1].label, "F"); // Default value + EXPECT_DOUBLE_EQ (seps[1].r_in, 0.0); + EXPECT_DOUBLE_EQ (seps[1].r_out, 2.5); + EXPECT_DOUBLE_EQ (seps[1].r_power, 20.0); + EXPECT_DOUBLE_EQ (seps[1].enhence_a, 1.0); #ifdef __MPI - } - // If run in MPI, other ranks might need to know the outcome or have sep_cell state consistent. - // For this specific test, only rank 0 performs the read. - // A broadcast test would cover data consistency across ranks. + } + // If run in MPI, other ranks might need to know the outcome or have sep_cell state consistent. + // For this specific test, only rank 0 performs the read. + // A broadcast test would cover data consistency across ranks. #endif } -TEST_F(SepCellTest, ReadSepPotentialsNoSepFilesSection) +TEST_F (SepCellTest, ReadSepPotentialsNoSepFilesSection) { #ifdef __MPI if (GlobalV::MY_RANK == 0) - { + { #endif - std::ifstream ifs(pp_dir + stru_noLi_filename); - ASSERT_TRUE(ifs.is_open()); - std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); + std::ifstream ifs (pp_dir + stru_noLi_filename); + ASSERT_TRUE (ifs.is_open ()); + std::ofstream ofs_running_dummy ("dummy_ofs_running.tmp"); - sep_cell.init(ucell.ntype); - int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell.atom_label); - ifs.close(); - std::remove("dummy_ofs_running.tmp"); + sep_cell.init (ucell.ntype); + int result = sep_cell.read_sep_potentials (ifs, pp_dir, ofs_running_dummy, ucell.atom_label); + ifs.close (); + std::remove ("dummy_ofs_running.tmp"); - EXPECT_EQ(result, 0); // Expect failure (false) because "SEP_FILES" not found + EXPECT_EQ (result, 0); // Expect failure (false) because "SEP_FILES" not found #ifdef __MPI - } + } #endif } #ifdef __MPI -TEST_F(SepCellTest, BcastSepCell) +TEST_F (SepCellTest, BcastSepCell) { - sep_cell.init(2); // ntype = 2 + sep_cell.init (2); // ntype = 2 // Rank 0 prepares some data (or reads from file) if (GlobalV::MY_RANK == 0) - { - sep_cell.set_omega(150.0, 0.75); - std::ifstream ifs(pp_dir + stru_filename); - ASSERT_TRUE(ifs.is_open()); + { + sep_cell.set_omega (150.0, 0.75); + std::ifstream ifs (pp_dir + stru_filename); + ASSERT_TRUE (ifs.is_open ()); - sep_cell.init(ucell.ntype); - std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); - int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell.atom_label); - ifs.close(); - std::remove("dummy_ofs_running.tmp"); + sep_cell.init (ucell.ntype); + std::ofstream ofs_running_dummy ("dummy_ofs_running.tmp"); + int result = sep_cell.read_sep_potentials (ifs, pp_dir, ofs_running_dummy, ucell.atom_label); + ifs.close (); + std::remove ("dummy_ofs_running.tmp"); - EXPECT_EQ(result, 1); // Expect success (true) - } + EXPECT_EQ (result, 1); // Expect success (true) + } - sep_cell.bcast_sep_cell(); + sep_cell.bcast_sep_cell (); // All ranks should have the same data - EXPECT_EQ(sep_cell.get_ntype(), 2); + EXPECT_EQ (sep_cell.get_ntype (), 2); // Omega and tpiba2 are NOT part of Sep_Cell::bcast_sep_cell, so they remain default on non-zero ranks if (GlobalV::MY_RANK == 0) - { - EXPECT_DOUBLE_EQ(sep_cell.get_omega(), 150.0); - EXPECT_DOUBLE_EQ(sep_cell.get_tpiba2(), 0.75); - } + { + EXPECT_DOUBLE_EQ (sep_cell.get_omega (), 150.0); + EXPECT_DOUBLE_EQ (sep_cell.get_tpiba2 (), 0.75); + } else - { - EXPECT_DOUBLE_EQ(sep_cell.get_omega(), 0.0); // Default - EXPECT_DOUBLE_EQ(sep_cell.get_tpiba2(), 0.0); // Default - } + { + EXPECT_DOUBLE_EQ (sep_cell.get_omega (), 0.0); // Default + EXPECT_DOUBLE_EQ (sep_cell.get_tpiba2 (), 0.0); // Default + } - ASSERT_EQ(sep_cell.get_sep_enable().size(), 2); + ASSERT_EQ (sep_cell.get_sep_enable ().size (), 2); // sep_enable will be broadcast as false from rank 0 due to read_sep_potentials bug - EXPECT_FALSE(sep_cell.get_sep_enable()[0]); - EXPECT_TRUE(sep_cell.get_sep_enable()[1]); + EXPECT_FALSE (sep_cell.get_sep_enable ()[0]); + EXPECT_TRUE (sep_cell.get_sep_enable ()[1]); - const auto& seps = sep_cell.get_seps(); - ASSERT_EQ(seps.size(), 2); + const auto& seps = sep_cell.get_seps (); + ASSERT_EQ (seps.size (), 2); // Check SepPot data (will be default values due to bug and current test setup) - EXPECT_EQ(seps[0].label, ""); // Default broadcasted - EXPECT_EQ(seps[0].mesh, 0); // Default broadcasted - EXPECT_FALSE(seps[0].is_enable); // Default broadcasted + EXPECT_EQ (seps[0].label, ""); // Default broadcasted + EXPECT_EQ (seps[0].mesh, 0); // Default broadcasted + EXPECT_FALSE (seps[0].is_enable); // Default broadcasted - EXPECT_EQ(seps[1].label, "F"); // Default broadcasted - EXPECT_EQ(seps[1].mesh, 1038); // Default broadcasted - EXPECT_TRUE(seps[1].is_enable); // Default broadcasted + EXPECT_EQ (seps[1].label, "F"); // Default broadcasted + EXPECT_EQ (seps[1].mesh, 1038); // Default broadcasted + EXPECT_TRUE (seps[1].is_enable); // Default broadcasted // Note: SepPot::bcast_sep() allocates memory for r and rv on all ranks // whenever mesh > 0, regardless of is_enable status if (seps[0].mesh > 0) - { - EXPECT_NE(seps[0].r, nullptr); - EXPECT_NE(seps[0].rv, nullptr); - } + { + EXPECT_NE (seps[0].r, nullptr); + EXPECT_NE (seps[0].rv, nullptr); + } else - { - EXPECT_EQ(seps[0].r, nullptr); - EXPECT_EQ(seps[0].rv, nullptr); - } - EXPECT_NE(seps[1].r, nullptr); - EXPECT_NE(seps[1].rv, nullptr); - EXPECT_DOUBLE_EQ(seps[1].r[0], 3.4643182373e-06); - EXPECT_DOUBLE_EQ(seps[1].rv[0], -2.0868200000e-05); - EXPECT_DOUBLE_EQ(seps[1].r[7], 2.8965849122e-05); - EXPECT_DOUBLE_EQ(seps[1].rv[7], -1.9723800000e-05); + { + EXPECT_EQ (seps[0].r, nullptr); + EXPECT_EQ (seps[0].rv, nullptr); + } + EXPECT_NE (seps[1].r, nullptr); + EXPECT_NE (seps[1].rv, nullptr); + EXPECT_DOUBLE_EQ (seps[1].r[0], 3.4643182373e-06); + EXPECT_DOUBLE_EQ (seps[1].rv[0], -2.0868200000e-05); + EXPECT_DOUBLE_EQ (seps[1].r[7], 2.8965849122e-05); + EXPECT_DOUBLE_EQ (seps[1].rv[7], -1.9723800000e-05); } #endif // __MPI // Main function for running tests -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); #endif - testing::InitGoogleTest(&argc, argv); + testing::InitGoogleTest (&argc, argv); // Potentially initialize GlobalV::ofs_running here if not handled by test infra // e.g., if (GlobalV::MY_RANK == 0) GlobalV::ofs_running.open("sep_cell_test.log"); // For now, assume it's usable or output to console/dev_null is acceptable. - int result = RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } diff --git a/source/source_cell/test/support/mock_unitcell.cpp b/source/source_cell/test/support/mock_unitcell.cpp index 67fabe5a9fb..4a932c4fd45 100644 --- a/source/source_cell/test/support/mock_unitcell.cpp +++ b/source/source_cell/test/support/mock_unitcell.cpp @@ -8,36 +8,65 @@ to avoid using UnitCell functions because there is GLobalC, which will bring endless compile troubles like undefined behavior" */ -void UnitCell::set_iat2iwt(const int& npol_in) {} -UnitCell::UnitCell() { - itia2iat.create(1, 1); +void + UnitCell::set_iat2iwt (const int& npol_in) +{ } -UnitCell::~UnitCell() { - if (set_atom_flag) { - delete[] atoms; - } +UnitCell::UnitCell () { itia2iat.create (1, 1); } +UnitCell::~UnitCell () +{ + if (set_atom_flag) + { + delete[] atoms; + } } -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} -void UnitCell::print_cell(std::ofstream& ofs) const {} +void + UnitCell::print_cell (std::ofstream& ofs) const +{ +} -void UnitCell::set_iat2itia() {} +void + UnitCell::set_iat2itia () +{ +} -void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) {} +void + UnitCell::setup_cell (const std::string& fn, std::ofstream& log) +{ +} -bool UnitCell::if_atoms_can_move() const { return true; } +bool + UnitCell::if_atoms_can_move () const +{ + return true; +} -bool UnitCell::if_cell_can_change() const { return true; } +bool + UnitCell::if_cell_can_change () const +{ + return true; +} -void UnitCell::setup(const std::string& latname_in, +void + UnitCell::setup (const std::string& latname_in, const int& ntype_in, const int& lmaxmax_in, const bool& init_vel_in, - const std::string& fixed_axes_in) {} + const std::string& fixed_axes_in) +{ +} -void cal_nelec(const Atom* atoms, const int& ntype, double& nelec) {} +void + cal_nelec (const Atom* atoms, const int& ntype, double& nelec) +{ +} -void UnitCell::compare_atom_labels(const std::string &label1, const std::string &label2) const {} +void + UnitCell::compare_atom_labels (const std::string& label1, const std::string& label2) const +{ +} diff --git a/source/source_cell/test/unitcell_test.cpp b/source/source_cell/test/unitcell_test.cpp index 47642b22a1d..2ea8b5ebd19 100644 --- a/source/source_cell/test/unitcell_test.cpp +++ b/source/source_cell/test/unitcell_test.cpp @@ -21,29 +21,18 @@ #ifdef __LCAO #include "source_basis/module_ao/ORB_read.h" -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -LCAO_Orbitals::LCAO_Orbitals() -{ -} -LCAO_Orbitals::~LCAO_Orbitals() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +LCAO_Orbitals::LCAO_Orbitals () {} +LCAO_Orbitals::~LCAO_Orbitals () {} #endif -Magnetism::Magnetism() +Magnetism::Magnetism () { this->tot_mag = 0.0; this->abs_mag = 0.0; this->start_mag = nullptr; } -Magnetism::~Magnetism() -{ - delete[] this->start_mag; -} +Magnetism::~Magnetism () { delete[] this->start_mag; } /************************************************ * unit test of class UnitCell @@ -154,7 +143,8 @@ Magnetism::~Magnetism() // mock function #ifdef __LCAO -void LCAO_Orbitals::bcast_files(const int& ntype_in, const int& my_rank) +void + LCAO_Orbitals::bcast_files (const int& ntype_in, const int& my_rank) { return; } @@ -169,27 +159,27 @@ class UcellTest : public ::testing::Test using UcellDeathTest = UcellTest; -TEST_F(UcellTest, Constructor) +TEST_F (UcellTest, Constructor) { - EXPECT_EQ(ucell->Coordinate, "Direct"); - EXPECT_EQ(ucell->latName, "user_defined_lattice"); - EXPECT_DOUBLE_EQ(ucell->lat0, 0.0); - EXPECT_DOUBLE_EQ(ucell->lat0_angstrom, 0.0); - EXPECT_EQ(ucell->ntype, 0); - EXPECT_EQ(ucell->nat, 0); - EXPECT_EQ(ucell->namax, 0); - EXPECT_EQ(ucell->nwmax, 0); - EXPECT_EQ(ucell->iat2it, nullptr); - EXPECT_EQ(ucell->iat2ia, nullptr); - EXPECT_EQ(ucell->iwt2iat, nullptr); - EXPECT_EQ(ucell->iwt2iw, nullptr); - EXPECT_DOUBLE_EQ(ucell->tpiba, 0.0); - EXPECT_DOUBLE_EQ(ucell->tpiba2, 0.0); - EXPECT_DOUBLE_EQ(ucell->omega, 0.0); - EXPECT_FALSE(ucell->set_atom_flag); + EXPECT_EQ (ucell->Coordinate, "Direct"); + EXPECT_EQ (ucell->latName, "user_defined_lattice"); + EXPECT_DOUBLE_EQ (ucell->lat0, 0.0); + EXPECT_DOUBLE_EQ (ucell->lat0_angstrom, 0.0); + EXPECT_EQ (ucell->ntype, 0); + EXPECT_EQ (ucell->nat, 0); + EXPECT_EQ (ucell->namax, 0); + EXPECT_EQ (ucell->nwmax, 0); + EXPECT_EQ (ucell->iat2it, nullptr); + EXPECT_EQ (ucell->iat2ia, nullptr); + EXPECT_EQ (ucell->iwt2iat, nullptr); + EXPECT_EQ (ucell->iwt2iw, nullptr); + EXPECT_DOUBLE_EQ (ucell->tpiba, 0.0); + EXPECT_DOUBLE_EQ (ucell->tpiba2, 0.0); + EXPECT_DOUBLE_EQ (ucell->omega, 0.0); + EXPECT_FALSE (ucell->set_atom_flag); } -TEST_F(UcellTest, Setup) +TEST_F (UcellTest, Setup) { std::string latname_in = "bcc"; int ntype_in = 1; @@ -197,98 +187,98 @@ TEST_F(UcellTest, Setup) bool init_vel_in = false; std::vector fixed_axes_in = {"None", "volume", "shape", "a", "b", "c", "ab", "ac", "bc", "abc"}; PARAM.input.relax_new = true; - for (int i = 0; i < fixed_axes_in.size(); ++i) - { - ucell->setup(latname_in, ntype_in, lmaxmax_in, init_vel_in, fixed_axes_in[i]); - EXPECT_EQ(ucell->latName, latname_in); - EXPECT_EQ(ucell->ntype, ntype_in); - EXPECT_EQ(ucell->lmaxmax, lmaxmax_in); - EXPECT_EQ(ucell->init_vel, init_vel_in); - if (fixed_axes_in[i] == "None" || fixed_axes_in[i] == "volume" || fixed_axes_in[i] == "shape") - { - EXPECT_EQ(ucell->lc[0], 1); - EXPECT_EQ(ucell->lc[1], 1); - EXPECT_EQ(ucell->lc[2], 1); - EXPECT_TRUE(ucell->if_cell_can_change()); - } - else if (fixed_axes_in[i] == "a") - { - EXPECT_EQ(ucell->lc[0], 0); - EXPECT_EQ(ucell->lc[1], 1); - EXPECT_EQ(ucell->lc[2], 1); - EXPECT_TRUE(ucell->if_cell_can_change()); - } - else if (fixed_axes_in[i] == "b") - { - EXPECT_EQ(ucell->lc[0], 1); - EXPECT_EQ(ucell->lc[1], 0); - EXPECT_EQ(ucell->lc[2], 1); - EXPECT_TRUE(ucell->if_cell_can_change()); - } - else if (fixed_axes_in[i] == "c") - { - EXPECT_EQ(ucell->lc[0], 1); - EXPECT_EQ(ucell->lc[1], 1); - EXPECT_EQ(ucell->lc[2], 0); - EXPECT_TRUE(ucell->if_cell_can_change()); - } - else if (fixed_axes_in[i] == "ab") - { - EXPECT_EQ(ucell->lc[0], 0); - EXPECT_EQ(ucell->lc[1], 0); - EXPECT_EQ(ucell->lc[2], 1); - EXPECT_TRUE(ucell->if_cell_can_change()); - } - else if (fixed_axes_in[i] == "ac") - { - EXPECT_EQ(ucell->lc[0], 0); - EXPECT_EQ(ucell->lc[1], 1); - EXPECT_EQ(ucell->lc[2], 0); - EXPECT_TRUE(ucell->if_cell_can_change()); - } - else if (fixed_axes_in[i] == "bc") + for (int i = 0; i < fixed_axes_in.size (); ++i) { - EXPECT_EQ(ucell->lc[0], 1); - EXPECT_EQ(ucell->lc[1], 0); - EXPECT_EQ(ucell->lc[2], 0); - EXPECT_TRUE(ucell->if_cell_can_change()); - } - else if (fixed_axes_in[i] == "abc") - { - EXPECT_EQ(ucell->lc[0], 0); - EXPECT_EQ(ucell->lc[1], 0); - EXPECT_EQ(ucell->lc[2], 0); - EXPECT_FALSE(ucell->if_cell_can_change()); + ucell->setup (latname_in, ntype_in, lmaxmax_in, init_vel_in, fixed_axes_in[i]); + EXPECT_EQ (ucell->latName, latname_in); + EXPECT_EQ (ucell->ntype, ntype_in); + EXPECT_EQ (ucell->lmaxmax, lmaxmax_in); + EXPECT_EQ (ucell->init_vel, init_vel_in); + if (fixed_axes_in[i] == "None" || fixed_axes_in[i] == "volume" || fixed_axes_in[i] == "shape") + { + EXPECT_EQ (ucell->lc[0], 1); + EXPECT_EQ (ucell->lc[1], 1); + EXPECT_EQ (ucell->lc[2], 1); + EXPECT_TRUE (ucell->if_cell_can_change ()); + } + else if (fixed_axes_in[i] == "a") + { + EXPECT_EQ (ucell->lc[0], 0); + EXPECT_EQ (ucell->lc[1], 1); + EXPECT_EQ (ucell->lc[2], 1); + EXPECT_TRUE (ucell->if_cell_can_change ()); + } + else if (fixed_axes_in[i] == "b") + { + EXPECT_EQ (ucell->lc[0], 1); + EXPECT_EQ (ucell->lc[1], 0); + EXPECT_EQ (ucell->lc[2], 1); + EXPECT_TRUE (ucell->if_cell_can_change ()); + } + else if (fixed_axes_in[i] == "c") + { + EXPECT_EQ (ucell->lc[0], 1); + EXPECT_EQ (ucell->lc[1], 1); + EXPECT_EQ (ucell->lc[2], 0); + EXPECT_TRUE (ucell->if_cell_can_change ()); + } + else if (fixed_axes_in[i] == "ab") + { + EXPECT_EQ (ucell->lc[0], 0); + EXPECT_EQ (ucell->lc[1], 0); + EXPECT_EQ (ucell->lc[2], 1); + EXPECT_TRUE (ucell->if_cell_can_change ()); + } + else if (fixed_axes_in[i] == "ac") + { + EXPECT_EQ (ucell->lc[0], 0); + EXPECT_EQ (ucell->lc[1], 1); + EXPECT_EQ (ucell->lc[2], 0); + EXPECT_TRUE (ucell->if_cell_can_change ()); + } + else if (fixed_axes_in[i] == "bc") + { + EXPECT_EQ (ucell->lc[0], 1); + EXPECT_EQ (ucell->lc[1], 0); + EXPECT_EQ (ucell->lc[2], 0); + EXPECT_TRUE (ucell->if_cell_can_change ()); + } + else if (fixed_axes_in[i] == "abc") + { + EXPECT_EQ (ucell->lc[0], 0); + EXPECT_EQ (ucell->lc[1], 0); + EXPECT_EQ (ucell->lc[2], 0); + EXPECT_FALSE (ucell->if_cell_can_change ()); + } } - } } // These tests are removed because fixed_axes="volume" and fixed_axes="shape" // are now supported with relax_new=false (see commit cdc3457f5a8546cda869655c3faabd8b29687aff) // The old implementation now properly handles these constraints via post-update enforcement -TEST_F(UcellDeathTest, CompareAatomLabel) +TEST_F (UcellDeathTest, CompareAatomLabel) { std::string stru_label[] = {"Ag", "Ag", "Ag", "47", "47", "47", "Silver", "Silver", "Silver", "Ag", "Ag", "Ag", "Ag_empty"}; std::string pseudo_label[] = {"Ag", "47", "Silver", "Ag", "47", "Silver", "Ag", "47", "Silver", "Ag1", "ag", "ag_locpsp", "Ag"}; for (int it = 0; it < 12; it++) - { - ucell->compare_atom_labels(stru_label[it], pseudo_label[it]); - } + { + ucell->compare_atom_labels (stru_label[it], pseudo_label[it]); + } stru_label[0] = "Fe"; pseudo_label[0] = "O"; std::string atom_label_in_orbtial = "atom label in orbital file "; std::string mismatch_with_pseudo = " mismatch with pseudo file of "; - testing::internal::CaptureStdout(); - EXPECT_EXIT(ucell->compare_atom_labels(stru_label[0], pseudo_label[0]), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, - testing::HasSubstr(atom_label_in_orbtial + stru_label[0] + mismatch_with_pseudo + pseudo_label[0])); + testing::internal::CaptureStdout (); + EXPECT_EXIT (ucell->compare_atom_labels (stru_label[0], pseudo_label[0]), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, + testing::HasSubstr (atom_label_in_orbtial + stru_label[0] + mismatch_with_pseudo + pseudo_label[0])); } -TEST_F(UcellTest, RemakeCell) +TEST_F (UcellTest, RemakeCell) { std::vector latname_in = {"sc", "fcc", @@ -304,314 +294,330 @@ TEST_F(UcellTest, RemakeCell) "sm", "bacm", "triclinic"}; - for (int i = 0; i < latname_in.size(); ++i) - { - ucell->latvec.e11 = 10.0; - ucell->latvec.e12 = 0.00; - ucell->latvec.e13 = 0.00; - ucell->latvec.e21 = 0.00; - ucell->latvec.e22 = 10.0; - ucell->latvec.e23 = 0.00; - ucell->latvec.e31 = 0.00; - ucell->latvec.e32 = 0.00; - ucell->latvec.e33 = 10.0; - ucell->latName = latname_in[i]; - unitcell::remake_cell(ucell->lat); - if (latname_in[i] == "sc") - { - double celldm - = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, celldm); - } - else if (latname_in[i] == "fcc") - { - double celldm = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)) - / std::sqrt(2.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, -celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, -celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 0.0); - } - else if (latname_in[i] == "bcc") - { - double celldm = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)) - / std::sqrt(3.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, -celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, -celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, -celldm); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, celldm); - } - else if (latname_in[i] == "hexagonal") - { - double celldm1 - = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)); - double celldm3 - = std::sqrt(pow(ucell->latvec.e31, 2) + pow(ucell->latvec.e32, 2) + pow(ucell->latvec.e33, 2)); - double mathfoo = sqrt(3.0) / 2.0; - EXPECT_DOUBLE_EQ(ucell->latvec.e11, celldm1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, -0.5 * celldm1); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, celldm1 * mathfoo); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, celldm3); - } - else if (latname_in[i] == "trigonal") - { - double a1 = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)); - double a2 = std::sqrt(pow(ucell->latvec.e21, 2) + pow(ucell->latvec.e22, 2) + pow(ucell->latvec.e23, 2)); - double a1da2 = (ucell->latvec.e11 * ucell->latvec.e21 + ucell->latvec.e12 * ucell->latvec.e22 - + ucell->latvec.e13 * ucell->latvec.e23); - double cosgamma = a1da2 / (a1 * a2); - double tx = std::sqrt((1.0 - cosgamma) / 2.0); - double ty = std::sqrt((1.0 - cosgamma) / 6.0); - double tz = std::sqrt((1.0 + 2.0 * cosgamma) / 3.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, a1 * tx); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, -a1 * ty); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, a1 * tz); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 2.0 * a1 * ty); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, a1 * tz); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, -a1 * tx); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, -a1 * ty); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, a1 * tz); - } - else if (latname_in[i] == "st") - { - double a1 = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)); - double a3 = std::sqrt(pow(ucell->latvec.e31, 2) + pow(ucell->latvec.e32, 2) + pow(ucell->latvec.e33, 2)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, a1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, a1); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, a3); - } - else if (latname_in[i] == "bct") - { - double d1 = std::abs(ucell->latvec.e11); - double d2 = std::abs(ucell->latvec.e13); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, -d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, -d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, -d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, d2); - } - else if (latname_in[i] == "so") - { - double a1 = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)); - double a2 = std::sqrt(pow(ucell->latvec.e21, 2) + pow(ucell->latvec.e22, 2) + pow(ucell->latvec.e23, 2)); - double a3 = std::sqrt(pow(ucell->latvec.e31, 2) + pow(ucell->latvec.e32, 2) + pow(ucell->latvec.e33, 2)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, a1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, a2); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, a3); - } - else if (latname_in[i] == "baco") - { - double d1 = std::abs(ucell->latvec.e11); - double d2 = std::abs(ucell->latvec.e22); - double d3 = std::abs(ucell->latvec.e33); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, -d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, d3); - } - else if (latname_in[i] == "fco") - { - double d1 = std::abs(ucell->latvec.e11); - double d2 = std::abs(ucell->latvec.e22); - double d3 = std::abs(ucell->latvec.e33); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, d3); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, d3); - } - else if (latname_in[i] == "bco") + for (int i = 0; i < latname_in.size (); ++i) { - double d1 = std::abs(ucell->latvec.e11); - double d2 = std::abs(ucell->latvec.e22); - double d3 = std::abs(ucell->latvec.e33); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, d3); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, -d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, d3); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, -d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, -d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, d3); - } - else if (latname_in[i] == "sm") - { - double a1 = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)); - double a2 = std::sqrt(pow(ucell->latvec.e21, 2) + pow(ucell->latvec.e22, 2) + pow(ucell->latvec.e23, 2)); - double a3 = std::sqrt(pow(ucell->latvec.e31, 2) + pow(ucell->latvec.e32, 2) + pow(ucell->latvec.e33, 2)); - double a1da2 = (ucell->latvec.e11 * ucell->latvec.e21 + ucell->latvec.e12 * ucell->latvec.e22 - + ucell->latvec.e13 * ucell->latvec.e23); - double cosgamma = a1da2 / (a1 * a2); - double d1 = a2 * cosgamma; - double d2 = a2 * std::sqrt(1.0 - cosgamma * cosgamma); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, a1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, d2); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, a3); - } - else if (latname_in[i] == "bacm") - { - double d1 = std::abs(ucell->latvec.e11); - double a2 = std::sqrt(pow(ucell->latvec.e21, 2) + pow(ucell->latvec.e22, 2) + pow(ucell->latvec.e23, 2)); - double d3 = std::abs(ucell->latvec.e13); - double cosgamma = ucell->latvec.e21 / a2; - double f1 = a2 * cosgamma; - double f2 = a2 * std::sqrt(1.0 - cosgamma * cosgamma); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, -d3); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, f1); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, f2); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, d1); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, d3); - } - else if (latname_in[i] == "triclinic") - { - double a1 = std::sqrt(pow(ucell->latvec.e11, 2) + pow(ucell->latvec.e12, 2) + pow(ucell->latvec.e13, 2)); - double a2 = std::sqrt(pow(ucell->latvec.e21, 2) + pow(ucell->latvec.e22, 2) + pow(ucell->latvec.e23, 2)); - double a3 = std::sqrt(pow(ucell->latvec.e31, 2) + pow(ucell->latvec.e32, 2) + pow(ucell->latvec.e33, 2)); - double a1da2 = (ucell->latvec.e11 * ucell->latvec.e21 + ucell->latvec.e12 * ucell->latvec.e22 - + ucell->latvec.e13 * ucell->latvec.e23); - double a1da3 = (ucell->latvec.e11 * ucell->latvec.e31 + ucell->latvec.e12 * ucell->latvec.e32 - + ucell->latvec.e13 * ucell->latvec.e33); - double a2da3 = (ucell->latvec.e21 * ucell->latvec.e31 + ucell->latvec.e22 * ucell->latvec.e32 - + ucell->latvec.e23 * ucell->latvec.e33); - double cosgamma = a1da2 / a1 / a2; - double singamma = std::sqrt(1.0 - cosgamma * cosgamma); - double cosbeta = a1da3 / a1 / a3; - double cosalpha = a2da3 / a2 / a3; - double d1 = std::sqrt(1.0 + 2.0 * cosgamma * cosbeta * cosalpha - cosgamma * cosgamma - cosbeta * cosbeta - - cosalpha * cosalpha) - / singamma; - EXPECT_DOUBLE_EQ(ucell->latvec.e11, a1); - EXPECT_DOUBLE_EQ(ucell->latvec.e12, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e13, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e21, a2 * cosgamma); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, a2 * singamma); - EXPECT_DOUBLE_EQ(ucell->latvec.e23, 0.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e31, a3 * cosbeta); - EXPECT_DOUBLE_EQ(ucell->latvec.e32, a3 * (cosalpha - cosbeta * cosgamma) / singamma); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, a3 * d1); + ucell->latvec.e11 = 10.0; + ucell->latvec.e12 = 0.00; + ucell->latvec.e13 = 0.00; + ucell->latvec.e21 = 0.00; + ucell->latvec.e22 = 10.0; + ucell->latvec.e23 = 0.00; + ucell->latvec.e31 = 0.00; + ucell->latvec.e32 = 0.00; + ucell->latvec.e33 = 10.0; + ucell->latName = latname_in[i]; + unitcell::remake_cell (ucell->lat); + if (latname_in[i] == "sc") + { + double celldm = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, celldm); + } + else if (latname_in[i] == "fcc") + { + double celldm = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)) + / std::sqrt (2.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, -celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, -celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 0.0); + } + else if (latname_in[i] == "bcc") + { + double celldm = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)) + / std::sqrt (3.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, -celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, -celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, -celldm); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, celldm); + } + else if (latname_in[i] == "hexagonal") + { + double celldm1 = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)); + double celldm3 = std::sqrt (pow (ucell->latvec.e31, 2) + pow (ucell->latvec.e32, 2) + + pow (ucell->latvec.e33, 2)); + double mathfoo = sqrt (3.0) / 2.0; + EXPECT_DOUBLE_EQ (ucell->latvec.e11, celldm1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, -0.5 * celldm1); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, celldm1 * mathfoo); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, celldm3); + } + else if (latname_in[i] == "trigonal") + { + double a1 = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)); + double a2 = std::sqrt (pow (ucell->latvec.e21, 2) + pow (ucell->latvec.e22, 2) + + pow (ucell->latvec.e23, 2)); + double a1da2 = (ucell->latvec.e11 * ucell->latvec.e21 + ucell->latvec.e12 * ucell->latvec.e22 + + ucell->latvec.e13 * ucell->latvec.e23); + double cosgamma = a1da2 / (a1 * a2); + double tx = std::sqrt ((1.0 - cosgamma) / 2.0); + double ty = std::sqrt ((1.0 - cosgamma) / 6.0); + double tz = std::sqrt ((1.0 + 2.0 * cosgamma) / 3.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, a1 * tx); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, -a1 * ty); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, a1 * tz); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 2.0 * a1 * ty); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, a1 * tz); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, -a1 * tx); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, -a1 * ty); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, a1 * tz); + } + else if (latname_in[i] == "st") + { + double a1 = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)); + double a3 = std::sqrt (pow (ucell->latvec.e31, 2) + pow (ucell->latvec.e32, 2) + + pow (ucell->latvec.e33, 2)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, a1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, a1); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, a3); + } + else if (latname_in[i] == "bct") + { + double d1 = std::abs (ucell->latvec.e11); + double d2 = std::abs (ucell->latvec.e13); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, -d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, -d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, -d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, d2); + } + else if (latname_in[i] == "so") + { + double a1 = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)); + double a2 = std::sqrt (pow (ucell->latvec.e21, 2) + pow (ucell->latvec.e22, 2) + + pow (ucell->latvec.e23, 2)); + double a3 = std::sqrt (pow (ucell->latvec.e31, 2) + pow (ucell->latvec.e32, 2) + + pow (ucell->latvec.e33, 2)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, a1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, a2); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, a3); + } + else if (latname_in[i] == "baco") + { + double d1 = std::abs (ucell->latvec.e11); + double d2 = std::abs (ucell->latvec.e22); + double d3 = std::abs (ucell->latvec.e33); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, -d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, d3); + } + else if (latname_in[i] == "fco") + { + double d1 = std::abs (ucell->latvec.e11); + double d2 = std::abs (ucell->latvec.e22); + double d3 = std::abs (ucell->latvec.e33); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, d3); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, d3); + } + else if (latname_in[i] == "bco") + { + double d1 = std::abs (ucell->latvec.e11); + double d2 = std::abs (ucell->latvec.e22); + double d3 = std::abs (ucell->latvec.e33); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, d3); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, -d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, d3); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, -d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, -d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, d3); + } + else if (latname_in[i] == "sm") + { + double a1 = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)); + double a2 = std::sqrt (pow (ucell->latvec.e21, 2) + pow (ucell->latvec.e22, 2) + + pow (ucell->latvec.e23, 2)); + double a3 = std::sqrt (pow (ucell->latvec.e31, 2) + pow (ucell->latvec.e32, 2) + + pow (ucell->latvec.e33, 2)); + double a1da2 = (ucell->latvec.e11 * ucell->latvec.e21 + ucell->latvec.e12 * ucell->latvec.e22 + + ucell->latvec.e13 * ucell->latvec.e23); + double cosgamma = a1da2 / (a1 * a2); + double d1 = a2 * cosgamma; + double d2 = a2 * std::sqrt (1.0 - cosgamma * cosgamma); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, a1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, d2); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, a3); + } + else if (latname_in[i] == "bacm") + { + double d1 = std::abs (ucell->latvec.e11); + double a2 = std::sqrt (pow (ucell->latvec.e21, 2) + pow (ucell->latvec.e22, 2) + + pow (ucell->latvec.e23, 2)); + double d3 = std::abs (ucell->latvec.e13); + double cosgamma = ucell->latvec.e21 / a2; + double f1 = a2 * cosgamma; + double f2 = a2 * std::sqrt (1.0 - cosgamma * cosgamma); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, -d3); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, f1); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, f2); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, d1); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, d3); + } + else if (latname_in[i] == "triclinic") + { + double a1 = std::sqrt (pow (ucell->latvec.e11, 2) + pow (ucell->latvec.e12, 2) + + pow (ucell->latvec.e13, 2)); + double a2 = std::sqrt (pow (ucell->latvec.e21, 2) + pow (ucell->latvec.e22, 2) + + pow (ucell->latvec.e23, 2)); + double a3 = std::sqrt (pow (ucell->latvec.e31, 2) + pow (ucell->latvec.e32, 2) + + pow (ucell->latvec.e33, 2)); + double a1da2 = (ucell->latvec.e11 * ucell->latvec.e21 + ucell->latvec.e12 * ucell->latvec.e22 + + ucell->latvec.e13 * ucell->latvec.e23); + double a1da3 = (ucell->latvec.e11 * ucell->latvec.e31 + ucell->latvec.e12 * ucell->latvec.e32 + + ucell->latvec.e13 * ucell->latvec.e33); + double a2da3 = (ucell->latvec.e21 * ucell->latvec.e31 + ucell->latvec.e22 * ucell->latvec.e32 + + ucell->latvec.e23 * ucell->latvec.e33); + double cosgamma = a1da2 / a1 / a2; + double singamma = std::sqrt (1.0 - cosgamma * cosgamma); + double cosbeta = a1da3 / a1 / a3; + double cosalpha = a2da3 / a2 / a3; + double d1 = std::sqrt (1.0 + 2.0 * cosgamma * cosbeta * cosalpha - cosgamma * cosgamma + - cosbeta * cosbeta - cosalpha * cosalpha) + / singamma; + EXPECT_DOUBLE_EQ (ucell->latvec.e11, a1); + EXPECT_DOUBLE_EQ (ucell->latvec.e12, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e13, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e21, a2 * cosgamma); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, a2 * singamma); + EXPECT_DOUBLE_EQ (ucell->latvec.e23, 0.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e31, a3 * cosbeta); + EXPECT_DOUBLE_EQ (ucell->latvec.e32, a3 * (cosalpha - cosbeta * cosgamma) / singamma); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, a3 * d1); + } } - } } -TEST_F(UcellDeathTest, RemakeCellWarnings) +TEST_F (UcellDeathTest, RemakeCellWarnings) { std::vector latname_in = {"user_defined_lattice", "trigonal", "bacm", "triclinic", "arbitrary"}; - for (int i = 0; i < latname_in.size(); ++i) - { - ucell->latvec.e11 = 10.0; - ucell->latvec.e12 = 0.00; - ucell->latvec.e13 = 0.00; - ucell->latvec.e21 = 10.0; - ucell->latvec.e22 = 0.00; - ucell->latvec.e23 = 0.00; - ucell->latvec.e31 = 0.00; - ucell->latvec.e32 = 0.00; - ucell->latvec.e33 = 10.0; - ucell->latName = latname_in[i]; - testing::internal::CaptureStdout(); - EXPECT_EXIT(unitcell::remake_cell(ucell->lat), ::testing::ExitedWithCode(1), ""); - std::string output = testing::internal::GetCapturedStdout(); - if (latname_in[i] == "user_defined_lattice") - { - EXPECT_THAT(output, testing::HasSubstr("to use fixed_ibrav, latname must be provided")); - } - else if (latname_in[i] == "trigonal" || latname_in[i] == "bacm" || latname_in[i] == "triclinic") - { - EXPECT_THAT(output, testing::HasSubstr("wrong cos12!")); - } - else + for (int i = 0; i < latname_in.size (); ++i) { - EXPECT_THAT(output, testing::HasSubstr("latname type not supported!")); + ucell->latvec.e11 = 10.0; + ucell->latvec.e12 = 0.00; + ucell->latvec.e13 = 0.00; + ucell->latvec.e21 = 10.0; + ucell->latvec.e22 = 0.00; + ucell->latvec.e23 = 0.00; + ucell->latvec.e31 = 0.00; + ucell->latvec.e32 = 0.00; + ucell->latvec.e33 = 10.0; + ucell->latName = latname_in[i]; + testing::internal::CaptureStdout (); + EXPECT_EXIT (unitcell::remake_cell (ucell->lat), ::testing::ExitedWithCode (1), ""); + std::string output = testing::internal::GetCapturedStdout (); + if (latname_in[i] == "user_defined_lattice") + { + EXPECT_THAT (output, testing::HasSubstr ("to use fixed_ibrav, latname must be provided")); + } + else if (latname_in[i] == "trigonal" || latname_in[i] == "bacm" || latname_in[i] == "triclinic") + { + EXPECT_THAT (output, testing::HasSubstr ("wrong cos12!")); + } + else + { + EXPECT_THAT (output, testing::HasSubstr ("latname type not supported!")); + } } - } } -TEST_F(UcellTest, JudgeParallel) +TEST_F (UcellTest, JudgeParallel) { - ModuleBase::Vector3 b(1.0, 1.0, 1.0); + ModuleBase::Vector3 b (1.0, 1.0, 1.0); double a[3] = {1.0, 1.0, 1.0}; - EXPECT_TRUE(elecstate::judge_parallel(a, b)); + EXPECT_TRUE (elecstate::judge_parallel (a, b)); } -TEST_F(UcellTest, Index) +TEST_F (UcellTest, Index) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); // test set_iat2itia - ucell->set_iat2itia(); + ucell->set_iat2itia (); int iat = 0; - for (int it = 0; it < utp.natom.size(); ++it) - { - for (int ia = 0; ia < utp.natom[it]; ++ia) + for (int it = 0; it < utp.natom.size (); ++it) { - EXPECT_EQ(ucell->iat2it[iat], it); - EXPECT_EQ(ucell->iat2ia[iat], ia); - // test iat2iait - int ia_beg, it_beg; - ucell->iat2iait(iat, &ia_beg, &it_beg); - EXPECT_EQ(it_beg, it); - EXPECT_EQ(ia_beg, ia); - ++iat; + for (int ia = 0; ia < utp.natom[it]; ++ia) + { + EXPECT_EQ (ucell->iat2it[iat], it); + EXPECT_EQ (ucell->iat2ia[iat], ia); + // test iat2iait + int ia_beg, it_beg; + ucell->iat2iait (iat, &ia_beg, &it_beg); + EXPECT_EQ (it_beg, it); + EXPECT_EQ (ia_beg, ia); + ++iat; + } } - } // test iat2iait: case of (iat >= nat) int ia_beg2; int it_beg2; long long iat2 = ucell->nat + 1; - EXPECT_FALSE(ucell->iat2iait(iat2, &ia_beg2, &it_beg2)); + EXPECT_FALSE (ucell->iat2iait (iat2, &ia_beg2, &it_beg2)); // test ijat2iaitjajt, step_jajtiait, step_iat, step_ia, step_it int ia_test; int it_test; @@ -622,159 +628,159 @@ TEST_F(UcellTest, Index) int ja_test2 = 0; int jt_test2 = 0; long long ijat = 0; - for (int it = 0; it < utp.natom.size(); ++it) - { - for (int ia = 0; ia < utp.natom[it]; ++ia) + for (int it = 0; it < utp.natom.size (); ++it) { - for (int jt = 0; jt < utp.natom.size(); ++jt) - { - for (int ja = 0; ja < utp.natom[jt]; ++ja) + for (int ia = 0; ia < utp.natom[it]; ++ia) { - ucell->ijat2iaitjajt(ijat, &ia_test, &it_test, &ja_test, &jt_test); - EXPECT_EQ(ia_test, ia); - EXPECT_EQ(it_test, it); - EXPECT_EQ(ja_test, ja); - EXPECT_EQ(jt_test, jt); - ++ijat; - if (it_test == utp.natom.size() - 1 && ia_test == utp.natom[it] - 1 - && jt_test == utp.natom.size() - 1 && ja_test == utp.natom[jt] - 1) - { - EXPECT_TRUE(ucell->step_jajtiait(&ja_test, &jt_test, &ia_test, &it_test)); - } - else - { - EXPECT_FALSE(ucell->step_jajtiait(&ja_test, &jt_test, &ia_test, &it_test)); - } + for (int jt = 0; jt < utp.natom.size (); ++jt) + { + for (int ja = 0; ja < utp.natom[jt]; ++ja) + { + ucell->ijat2iaitjajt (ijat, &ia_test, &it_test, &ja_test, &jt_test); + EXPECT_EQ (ia_test, ia); + EXPECT_EQ (it_test, it); + EXPECT_EQ (ja_test, ja); + EXPECT_EQ (jt_test, jt); + ++ijat; + if (it_test == utp.natom.size () - 1 && ia_test == utp.natom[it] - 1 + && jt_test == utp.natom.size () - 1 && ja_test == utp.natom[jt] - 1) + { + EXPECT_TRUE (ucell->step_jajtiait (&ja_test, &jt_test, &ia_test, &it_test)); + } + else + { + EXPECT_FALSE ( + ucell->step_jajtiait (&ja_test, &jt_test, &ia_test, &it_test)); + } + } + } } - } } - } } -TEST_F(UcellTest, GetAtomCounts) +TEST_F (UcellTest, GetAtomCounts) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); // test set_iat2itia - ucell->set_iat2itia(); - std::map atomCounts = ucell->get_atom_Counts(); - EXPECT_EQ(atomCounts[0], 1); - EXPECT_EQ(atomCounts[1], 2); + ucell->set_iat2itia (); + std::map atomCounts = ucell->get_atom_Counts (); + EXPECT_EQ (atomCounts[0], 1); + EXPECT_EQ (atomCounts[1], 2); /// atomCounts as vector - std::vector atomCounts2 = ucell->get_atomCounts(); - EXPECT_EQ(atomCounts2[0], 1); - EXPECT_EQ(atomCounts2[1], 2); + std::vector atomCounts2 = ucell->get_atomCounts (); + EXPECT_EQ (atomCounts2[0], 1); + EXPECT_EQ (atomCounts2[1], 2); } -TEST_F(UcellTest, GetOrbitalCounts) +TEST_F (UcellTest, GetOrbitalCounts) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); // test set_iat2itia - ucell->set_iat2itia(); - std::map orbitalCounts = ucell->get_orbital_Counts(); - EXPECT_EQ(orbitalCounts[0], 9); - EXPECT_EQ(orbitalCounts[1], 9); + ucell->set_iat2itia (); + std::map orbitalCounts = ucell->get_orbital_Counts (); + EXPECT_EQ (orbitalCounts[0], 9); + EXPECT_EQ (orbitalCounts[1], 9); } -TEST_F(UcellTest, GetLnchiCounts) +TEST_F (UcellTest, GetLnchiCounts) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); // test set_iat2itia - ucell->set_iat2itia(); - std::map> LnchiCounts = ucell->get_lnchi_Counts(); - EXPECT_EQ(LnchiCounts[0][0], 1); - EXPECT_EQ(LnchiCounts[0][1], 1); - EXPECT_EQ(LnchiCounts[0][2], 1); - EXPECT_EQ(LnchiCounts[1][0], 1); - EXPECT_EQ(LnchiCounts[1][1], 1); - EXPECT_EQ(LnchiCounts[1][2], 1); + ucell->set_iat2itia (); + std::map> LnchiCounts = ucell->get_lnchi_Counts (); + EXPECT_EQ (LnchiCounts[0][0], 1); + EXPECT_EQ (LnchiCounts[0][1], 1); + EXPECT_EQ (LnchiCounts[0][2], 1); + EXPECT_EQ (LnchiCounts[1][0], 1); + EXPECT_EQ (LnchiCounts[1][1], 1); + EXPECT_EQ (LnchiCounts[1][2], 1); /// LnchiCounts as vector - std::vector> LnchiCounts2 = ucell->get_lnchiCounts(); - EXPECT_EQ(LnchiCounts2[0][0], 1); - EXPECT_EQ(LnchiCounts2[0][1], 1); - EXPECT_EQ(LnchiCounts2[0][2], 1); - EXPECT_EQ(LnchiCounts2[1][0], 1); - EXPECT_EQ(LnchiCounts2[1][1], 1); - EXPECT_EQ(LnchiCounts2[1][2], 1); + std::vector> LnchiCounts2 = ucell->get_lnchiCounts (); + EXPECT_EQ (LnchiCounts2[0][0], 1); + EXPECT_EQ (LnchiCounts2[0][1], 1); + EXPECT_EQ (LnchiCounts2[0][2], 1); + EXPECT_EQ (LnchiCounts2[1][0], 1); + EXPECT_EQ (LnchiCounts2[1][1], 1); + EXPECT_EQ (LnchiCounts2[1][2], 1); } -TEST_F(UcellTest, CheckDTau) +TEST_F (UcellTest, CheckDTau) { UcellTestPrepare utp = UcellTestLib["C1H2-CheckDTau"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - unitcell::check_dtau(ucell->atoms,ucell->ntype, ucell->lat0, ucell->latvec); - for (int it = 0; it < utp.natom.size(); ++it) - { - for (int ia = 0; ia < utp.natom[it]; ++ia) + ucell = utp.SetUcellInfo (); + unitcell::check_dtau (ucell->atoms, ucell->ntype, ucell->lat0, ucell->latvec); + for (int it = 0; it < utp.natom.size (); ++it) { - EXPECT_GE(ucell->atoms[it].taud[ia].x, 0); - EXPECT_GE(ucell->atoms[it].taud[ia].y, 0); - EXPECT_GE(ucell->atoms[it].taud[ia].z, 0); - EXPECT_LT(ucell->atoms[it].taud[ia].x, 1); - EXPECT_LT(ucell->atoms[it].taud[ia].y, 1); - EXPECT_LT(ucell->atoms[it].taud[ia].z, 1); + for (int ia = 0; ia < utp.natom[it]; ++ia) + { + EXPECT_GE (ucell->atoms[it].taud[ia].x, 0); + EXPECT_GE (ucell->atoms[it].taud[ia].y, 0); + EXPECT_GE (ucell->atoms[it].taud[ia].z, 0); + EXPECT_LT (ucell->atoms[it].taud[ia].x, 1); + EXPECT_LT (ucell->atoms[it].taud[ia].y, 1); + EXPECT_LT (ucell->atoms[it].taud[ia].z, 1); + } } - } } -TEST_F(UcellTest, CheckTauFalse) +TEST_F (UcellTest, CheckTauFalse) { UcellTestPrepare utp = UcellTestLib["C1H2-CheckTau"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - GlobalV::ofs_warning.open("checktau_warning"); - unitcell::check_tau(ucell->atoms ,ucell->ntype, ucell->lat0); - GlobalV::ofs_warning.close(); + ucell = utp.SetUcellInfo (); + GlobalV::ofs_warning.open ("checktau_warning"); + unitcell::check_tau (ucell->atoms, ucell->ntype, ucell->lat0); + GlobalV::ofs_warning.close (); std::ifstream ifs; - ifs.open("checktau_warning"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("two atoms are too close!")); - ifs.close(); - remove("checktau_warning"); + ifs.open ("checktau_warning"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("two atoms are too close!")); + ifs.close (); + remove ("checktau_warning"); } -TEST_F(UcellTest, CheckTauTrue) +TEST_F (UcellTest, CheckTauTrue) { UcellTestPrepare utp = UcellTestLib["C1H2-CheckTau"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - GlobalV::ofs_warning.open("checktau_warning"); - int atom=0; - //cause the ucell->lat0 is 0.5,if the type of the check_tau has - //an int type,it will set to zero,and it will not pass the unittest - ucell->lat0=0.5; - ucell->nat=3; - for (int it=0;itntype;it++) - { - for(int ia=0; iaatoms[it].na; ++ia) + ucell = utp.SetUcellInfo (); + GlobalV::ofs_warning.open ("checktau_warning"); + int atom = 0; + // cause the ucell->lat0 is 0.5,if the type of the check_tau has + // an int type,it will set to zero,and it will not pass the unittest + ucell->lat0 = 0.5; + ucell->nat = 3; + for (int it = 0; it < ucell->ntype; it++) { - - for (int i=0;i<3;i++) - { - ucell->atoms[it].tau[ia][i]=((atom+i)/(ucell->nat*3.0)); - } - atom+=3; + for (int ia = 0; ia < ucell->atoms[it].na; ++ia) + { + + for (int i = 0; i < 3; i++) + { + ucell->atoms[it].tau[ia][i] = ((atom + i) / (ucell->nat * 3.0)); + } + atom += 3; + } } - } - EXPECT_EQ(unitcell::check_tau(ucell->atoms ,ucell->ntype, ucell->lat0),true); - GlobalV::ofs_warning.close(); + EXPECT_EQ (unitcell::check_tau (ucell->atoms, ucell->ntype, ucell->lat0), true); + GlobalV::ofs_warning.close (); } -TEST_F(UcellTest, SelectiveDynamics) +TEST_F (UcellTest, SelectiveDynamics) { UcellTestPrepare utp = UcellTestLib["C1H2-SD"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - EXPECT_TRUE(ucell->if_atoms_can_move()); + ucell = utp.SetUcellInfo (); + EXPECT_TRUE (ucell->if_atoms_can_move ()); } - // mohan comment out 2025-07-14 /* TEST_F(UcellDeathTest, PeriodicBoundaryAdjustment1) @@ -791,58 +797,57 @@ TEST_F(UcellDeathTest, PeriodicBoundaryAdjustment1) } */ -TEST_F(UcellTest, PeriodicBoundaryAdjustment2) +TEST_F (UcellTest, PeriodicBoundaryAdjustment2) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - EXPECT_NO_THROW(unitcell::periodic_boundary_adjustment( - ucell->atoms,ucell->latvec,ucell->ntype)); + ucell = utp.SetUcellInfo (); + EXPECT_NO_THROW (unitcell::periodic_boundary_adjustment (ucell->atoms, ucell->latvec, ucell->ntype)); } -TEST_F(UcellTest, PrintCell) +TEST_F (UcellTest, PrintCell) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); std::ofstream ofs; - ofs.open("printcell.log"); - ucell->print_cell(ofs); - ofs.close(); + ofs.open ("printcell.log"); + ucell->print_cell (ofs); + ofs.close (); std::ifstream ifs; - ifs.open("printcell.log"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("latName = bcc")); - EXPECT_THAT(str, testing::HasSubstr("ntype = 2")); - EXPECT_THAT(str, testing::HasSubstr("nat = 3")); - EXPECT_THAT(str, testing::HasSubstr("GGT :")); - EXPECT_THAT(str, testing::HasSubstr("omega = 6748.33")); - remove("printcell.log"); + ifs.open ("printcell.log"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("latName = bcc")); + EXPECT_THAT (str, testing::HasSubstr ("ntype = 2")); + EXPECT_THAT (str, testing::HasSubstr ("nat = 3")); + EXPECT_THAT (str, testing::HasSubstr ("GGT :")); + EXPECT_THAT (str, testing::HasSubstr ("omega = 6748.33")); + remove ("printcell.log"); } -TEST_F(UcellTest, PrintUnitcellPseudo) +TEST_F (UcellTest, PrintUnitcellPseudo) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); PARAM.input.test_pseudo_cell = 1; std::string fn = "printcell.log"; - elecstate::print_unitcell_pseudo(fn, *ucell); + elecstate::print_unitcell_pseudo (fn, *ucell); std::ifstream ifs; - ifs.open("printcell.log"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("latName = bcc")); - EXPECT_THAT(str, testing::HasSubstr("ntype = 2")); - EXPECT_THAT(str, testing::HasSubstr("nat = 3")); - EXPECT_THAT(str, testing::HasSubstr("GGT :")); - EXPECT_THAT(str, testing::HasSubstr("omega = 6748.33")); - EXPECT_THAT(str, testing::HasSubstr("label = C")); - EXPECT_THAT(str, testing::HasSubstr("mass = 12")); - EXPECT_THAT(str, testing::HasSubstr("atom_position(cartesian) Dimension = 1")); - EXPECT_THAT(str, testing::HasSubstr("label = H")); - EXPECT_THAT(str, testing::HasSubstr("mass = 1")); - EXPECT_THAT(str, testing::HasSubstr("atom_position(cartesian) Dimension = 2")); - remove("printcell.log"); + ifs.open ("printcell.log"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("latName = bcc")); + EXPECT_THAT (str, testing::HasSubstr ("ntype = 2")); + EXPECT_THAT (str, testing::HasSubstr ("nat = 3")); + EXPECT_THAT (str, testing::HasSubstr ("GGT :")); + EXPECT_THAT (str, testing::HasSubstr ("omega = 6748.33")); + EXPECT_THAT (str, testing::HasSubstr ("label = C")); + EXPECT_THAT (str, testing::HasSubstr ("mass = 12")); + EXPECT_THAT (str, testing::HasSubstr ("atom_position(cartesian) Dimension = 1")); + EXPECT_THAT (str, testing::HasSubstr ("label = H")); + EXPECT_THAT (str, testing::HasSubstr ("mass = 1")); + EXPECT_THAT (str, testing::HasSubstr ("atom_position(cartesian) Dimension = 2")); + remove ("printcell.log"); } // Comments and suggestions on the refactor of UnitCell class @@ -856,11 +861,11 @@ TEST_F(UcellTest, PrintUnitcellPseudo) // In summmary, there are two cents: // 1. STRU file needed to be re-designed to be more well-organized // 2. STRU file parser can therefore be programmed more succinctly -TEST_F(UcellTest, PrintSTRU) +TEST_F (UcellTest, PrintSTRU) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); // Cartesian type of coordinates std::string fn = "C1H2_STRU"; PARAM.input.calculation = "md"; // print velocity in STRU, not needed anymore after refactor of this function @@ -869,71 +874,69 @@ TEST_F(UcellTest, PrintSTRU) * CASE: nspin1|Cartesian|no vel|no mag|no orb|no dpks_desc|rank0 * */ - unitcell::print_stru_file(*ucell,ucell->atoms,ucell->latvec, - fn, 1, false, false, false, false, false, 0); + unitcell::print_stru_file (*ucell, ucell->atoms, ucell->latvec, fn, 1, false, false, false, false, false, 0); std::ifstream ifs; - ifs.open("C1H2_STRU"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("ATOMIC_SPECIES")); - EXPECT_THAT(str, testing::HasSubstr("C 12.0000 C.upf upf201")); - EXPECT_THAT(str, testing::HasSubstr("H 1.0000 H.upf upf201")); - EXPECT_THAT(str, testing::HasSubstr("LATTICE_CONSTANT")); - EXPECT_THAT(str, testing::HasSubstr("1.8897261255")); - EXPECT_THAT(str, testing::HasSubstr("LATTICE_VECTORS")); - EXPECT_THAT(str, testing::HasSubstr("10.0000000000 0.0000000000 0.0000000000")); - EXPECT_THAT(str, testing::HasSubstr(" 0.0000000000 10.0000000000 0.0000000000")); - EXPECT_THAT(str, testing::HasSubstr(" 0.0000000000 0.0000000000 10.0000000000")); - EXPECT_THAT(str, testing::HasSubstr("ATOMIC_POSITIONS")); - EXPECT_THAT(str, testing::HasSubstr("Cartesian")); - EXPECT_THAT(str, testing::HasSubstr("C #label")); - EXPECT_THAT(str, testing::HasSubstr("0.0000 #magnetism")); - EXPECT_THAT(str, testing::HasSubstr("1 #number of atoms")); - EXPECT_THAT(str, testing::HasSubstr(" 1.0000000000 1.0000000000 1.0000000000 m 1 1 1")); - EXPECT_THAT(str, testing::HasSubstr("H #label")); - EXPECT_THAT(str, testing::HasSubstr("0.0000 #magnetism")); - EXPECT_THAT(str, testing::HasSubstr("2 #number of atoms")); - EXPECT_THAT(str, testing::HasSubstr(" 1.5000000000 1.5000000000 1.5000000000 m 0 0 0")); - EXPECT_THAT(str, testing::HasSubstr(" 0.5000000000 0.5000000000 0.5000000000 m 0 0 1")); - str.clear(); - ifs.close(); - remove("C1H2_STRU"); + ifs.open ("C1H2_STRU"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("ATOMIC_SPECIES")); + EXPECT_THAT (str, testing::HasSubstr ("C 12.0000 C.upf upf201")); + EXPECT_THAT (str, testing::HasSubstr ("H 1.0000 H.upf upf201")); + EXPECT_THAT (str, testing::HasSubstr ("LATTICE_CONSTANT")); + EXPECT_THAT (str, testing::HasSubstr ("1.8897261255")); + EXPECT_THAT (str, testing::HasSubstr ("LATTICE_VECTORS")); + EXPECT_THAT (str, testing::HasSubstr ("10.0000000000 0.0000000000 0.0000000000")); + EXPECT_THAT (str, testing::HasSubstr (" 0.0000000000 10.0000000000 0.0000000000")); + EXPECT_THAT (str, testing::HasSubstr (" 0.0000000000 0.0000000000 10.0000000000")); + EXPECT_THAT (str, testing::HasSubstr ("ATOMIC_POSITIONS")); + EXPECT_THAT (str, testing::HasSubstr ("Cartesian")); + EXPECT_THAT (str, testing::HasSubstr ("C #label")); + EXPECT_THAT (str, testing::HasSubstr ("0.0000 #magnetism")); + EXPECT_THAT (str, testing::HasSubstr ("1 #number of atoms")); + EXPECT_THAT (str, testing::HasSubstr (" 1.0000000000 1.0000000000 1.0000000000 m 1 1 1")); + EXPECT_THAT (str, testing::HasSubstr ("H #label")); + EXPECT_THAT (str, testing::HasSubstr ("0.0000 #magnetism")); + EXPECT_THAT (str, testing::HasSubstr ("2 #number of atoms")); + EXPECT_THAT (str, testing::HasSubstr (" 1.5000000000 1.5000000000 1.5000000000 m 0 0 0")); + EXPECT_THAT (str, testing::HasSubstr (" 0.5000000000 0.5000000000 0.5000000000 m 0 0 1")); + str.clear (); + ifs.close (); + remove ("C1H2_STRU"); /** * CASE: nspin2|Direct|vel|no mag|no orb|no dpks_desc|rank0 * */ - unitcell::print_stru_file(*ucell,ucell->atoms,ucell->latvec, - fn, 2, true, true, false, false, false, 0); - ifs.open("C1H2_STRU"); - str = {(std::istreambuf_iterator(ifs)), std::istreambuf_iterator()}; - EXPECT_THAT(str, testing::HasSubstr("ATOMIC_SPECIES")); - EXPECT_THAT(str, testing::HasSubstr("C 12.0000 C.upf upf201")); - EXPECT_THAT(str, testing::HasSubstr("H 1.0000 H.upf upf201")); - EXPECT_THAT(str, testing::HasSubstr("LATTICE_CONSTANT")); - EXPECT_THAT(str, testing::HasSubstr("1.8897261255")); - EXPECT_THAT(str, testing::HasSubstr("LATTICE_VECTORS")); - EXPECT_THAT(str, testing::HasSubstr("10.0000000000 0.0000000000 0.0000000000")); - EXPECT_THAT(str, testing::HasSubstr(" 0.0000000000 10.0000000000 0.0000000000")); - EXPECT_THAT(str, testing::HasSubstr(" 0.0000000000 0.0000000000 10.0000000000")); - EXPECT_THAT(str, testing::HasSubstr("ATOMIC_POSITIONS")); - EXPECT_THAT(str, testing::HasSubstr("Direct")); - EXPECT_THAT(str, testing::HasSubstr("C #label")); - EXPECT_THAT(str, testing::HasSubstr("0.0000 #magnetism")); - EXPECT_THAT(str, testing::HasSubstr("1 #number of atoms")); - EXPECT_THAT(str, - testing::HasSubstr(" 0.1000000000 0.1000000000 0.1000000000 m 1 1 1 v " - "0.1000000000 0.1000000000 0.1000000000")); - EXPECT_THAT(str, testing::HasSubstr("H #label")); - EXPECT_THAT(str, testing::HasSubstr("0.0000 #magnetism")); - EXPECT_THAT(str, testing::HasSubstr("2 #number of atoms")); - EXPECT_THAT(str, - testing::HasSubstr(" 0.1500000000 0.1500000000 0.1500000000 m 0 0 0 v " - "0.1000000000 0.1000000000 0.1000000000")); - EXPECT_THAT(str, - testing::HasSubstr(" 0.0500000000 0.0500000000 0.0500000000 m 0 0 1 v " - "0.1000000000 0.1000000000 0.1000000000")); - str.clear(); - ifs.close(); - remove("C1H2_STRU"); + unitcell::print_stru_file (*ucell, ucell->atoms, ucell->latvec, fn, 2, true, true, false, false, false, 0); + ifs.open ("C1H2_STRU"); + str = {(std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()}; + EXPECT_THAT (str, testing::HasSubstr ("ATOMIC_SPECIES")); + EXPECT_THAT (str, testing::HasSubstr ("C 12.0000 C.upf upf201")); + EXPECT_THAT (str, testing::HasSubstr ("H 1.0000 H.upf upf201")); + EXPECT_THAT (str, testing::HasSubstr ("LATTICE_CONSTANT")); + EXPECT_THAT (str, testing::HasSubstr ("1.8897261255")); + EXPECT_THAT (str, testing::HasSubstr ("LATTICE_VECTORS")); + EXPECT_THAT (str, testing::HasSubstr ("10.0000000000 0.0000000000 0.0000000000")); + EXPECT_THAT (str, testing::HasSubstr (" 0.0000000000 10.0000000000 0.0000000000")); + EXPECT_THAT (str, testing::HasSubstr (" 0.0000000000 0.0000000000 10.0000000000")); + EXPECT_THAT (str, testing::HasSubstr ("ATOMIC_POSITIONS")); + EXPECT_THAT (str, testing::HasSubstr ("Direct")); + EXPECT_THAT (str, testing::HasSubstr ("C #label")); + EXPECT_THAT (str, testing::HasSubstr ("0.0000 #magnetism")); + EXPECT_THAT (str, testing::HasSubstr ("1 #number of atoms")); + EXPECT_THAT (str, + testing::HasSubstr (" 0.1000000000 0.1000000000 0.1000000000 m 1 1 1 v " + "0.1000000000 0.1000000000 0.1000000000")); + EXPECT_THAT (str, testing::HasSubstr ("H #label")); + EXPECT_THAT (str, testing::HasSubstr ("0.0000 #magnetism")); + EXPECT_THAT (str, testing::HasSubstr ("2 #number of atoms")); + EXPECT_THAT (str, + testing::HasSubstr (" 0.1500000000 0.1500000000 0.1500000000 m 0 0 0 v " + "0.1000000000 0.1000000000 0.1000000000")); + EXPECT_THAT (str, + testing::HasSubstr (" 0.0500000000 0.0500000000 0.0500000000 m 0 0 1 v " + "0.1000000000 0.1000000000 0.1000000000")); + str.clear (); + ifs.close (); + remove ("C1H2_STRU"); /** * CASE: nspin2|Direct|no vel|mag|orb|dpks_desc|rank0 * @@ -943,159 +946,161 @@ TEST_F(UcellTest, PrintSTRU) ucell->orbital_fn[1] = "__unittest_orbital_fn_1__"; ucell->atom_mulliken = {{-1, 0.5}, {-1, 0.4}, {-1, 0.3}}; // first index is iat, the second is components, starts seems from 1 - unitcell::print_stru_file(*ucell,ucell->atoms,ucell->latvec, - fn, 2, true, false, true, true, true, 0); - ifs.open("C1H2_STRU"); - str = {(std::istreambuf_iterator(ifs)), std::istreambuf_iterator()}; - EXPECT_THAT(str, testing::HasSubstr("ATOMIC_SPECIES")); - EXPECT_THAT(str, testing::HasSubstr("C 12.0000 C.upf upf201")); - EXPECT_THAT(str, testing::HasSubstr("H 1.0000 H.upf upf201")); - EXPECT_THAT(str, testing::HasSubstr("NUMERICAL_ORBITAL")); - EXPECT_THAT(str, testing::HasSubstr("__unittest_orbital_fn_0__")); - EXPECT_THAT(str, testing::HasSubstr("__unittest_orbital_fn_1__")); - EXPECT_THAT(str, testing::HasSubstr("NUMERICAL_DESCRIPTOR")); - EXPECT_THAT(str, testing::HasSubstr("__unittest_numerical_descriptor__")); - EXPECT_THAT(str, testing::HasSubstr("LATTICE_CONSTANT")); - EXPECT_THAT(str, testing::HasSubstr("1.8897261255")); - EXPECT_THAT(str, testing::HasSubstr("LATTICE_VECTORS")); - EXPECT_THAT(str, testing::HasSubstr("10.0000000000 0.0000000000 0.0000000000")); - EXPECT_THAT(str, testing::HasSubstr(" 0.0000000000 10.0000000000 0.0000000000")); - EXPECT_THAT(str, testing::HasSubstr(" 0.0000000000 0.0000000000 10.0000000000")); - EXPECT_THAT(str, testing::HasSubstr("ATOMIC_POSITIONS")); - EXPECT_THAT(str, testing::HasSubstr("Direct")); - EXPECT_THAT(str, testing::HasSubstr("C #label")); - EXPECT_THAT(str, testing::HasSubstr("0.0000 #magnetism")); - EXPECT_THAT(str, testing::HasSubstr("1 #number of atoms")); - EXPECT_THAT(str, - testing::HasSubstr(" 0.1000000000 0.1000000000 0.1000000000 m 1 1 1 mag 0.5000")); - EXPECT_THAT(str, testing::HasSubstr("H #label")); - EXPECT_THAT(str, testing::HasSubstr("0.0000 #magnetism")); - EXPECT_THAT(str, testing::HasSubstr("2 #number of atoms")); - EXPECT_THAT(str, - testing::HasSubstr(" 0.1500000000 0.1500000000 0.1500000000 m 0 0 0 mag 0.4000")); - EXPECT_THAT(str, - testing::HasSubstr(" 0.0500000000 0.0500000000 0.0500000000 m 0 0 1 mag 0.3000")); - ifs.close(); - remove("C1H2_STRU"); + unitcell::print_stru_file (*ucell, ucell->atoms, ucell->latvec, fn, 2, true, false, true, true, true, 0); + ifs.open ("C1H2_STRU"); + str = {(std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()}; + EXPECT_THAT (str, testing::HasSubstr ("ATOMIC_SPECIES")); + EXPECT_THAT (str, testing::HasSubstr ("C 12.0000 C.upf upf201")); + EXPECT_THAT (str, testing::HasSubstr ("H 1.0000 H.upf upf201")); + EXPECT_THAT (str, testing::HasSubstr ("NUMERICAL_ORBITAL")); + EXPECT_THAT (str, testing::HasSubstr ("__unittest_orbital_fn_0__")); + EXPECT_THAT (str, testing::HasSubstr ("__unittest_orbital_fn_1__")); + EXPECT_THAT (str, testing::HasSubstr ("NUMERICAL_DESCRIPTOR")); + EXPECT_THAT (str, testing::HasSubstr ("__unittest_numerical_descriptor__")); + EXPECT_THAT (str, testing::HasSubstr ("LATTICE_CONSTANT")); + EXPECT_THAT (str, testing::HasSubstr ("1.8897261255")); + EXPECT_THAT (str, testing::HasSubstr ("LATTICE_VECTORS")); + EXPECT_THAT (str, testing::HasSubstr ("10.0000000000 0.0000000000 0.0000000000")); + EXPECT_THAT (str, testing::HasSubstr (" 0.0000000000 10.0000000000 0.0000000000")); + EXPECT_THAT (str, testing::HasSubstr (" 0.0000000000 0.0000000000 10.0000000000")); + EXPECT_THAT (str, testing::HasSubstr ("ATOMIC_POSITIONS")); + EXPECT_THAT (str, testing::HasSubstr ("Direct")); + EXPECT_THAT (str, testing::HasSubstr ("C #label")); + EXPECT_THAT (str, testing::HasSubstr ("0.0000 #magnetism")); + EXPECT_THAT (str, testing::HasSubstr ("1 #number of atoms")); + EXPECT_THAT ( + str, + testing::HasSubstr (" 0.1000000000 0.1000000000 0.1000000000 m 1 1 1 mag 0.5000")); + EXPECT_THAT (str, testing::HasSubstr ("H #label")); + EXPECT_THAT (str, testing::HasSubstr ("0.0000 #magnetism")); + EXPECT_THAT (str, testing::HasSubstr ("2 #number of atoms")); + EXPECT_THAT ( + str, + testing::HasSubstr (" 0.1500000000 0.1500000000 0.1500000000 m 0 0 0 mag 0.4000")); + EXPECT_THAT ( + str, + testing::HasSubstr (" 0.0500000000 0.0500000000 0.0500000000 m 0 0 1 mag 0.3000")); + ifs.close (); + remove ("C1H2_STRU"); } -TEST_F(UcellTest, PrintTauDirect) +TEST_F (UcellTest, PrintTauDirect) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - EXPECT_EQ(ucell->Coordinate, "Direct"); + ucell = utp.SetUcellInfo (); + EXPECT_EQ (ucell->Coordinate, "Direct"); // open a file - std::ofstream ofs("print_tau_direct"); - unitcell::print_tau(ucell->atoms,ucell->Coordinate,ucell->ntype,ucell->lat0,ofs); - ofs.close(); - + std::ofstream ofs ("print_tau_direct"); + unitcell::print_tau (ucell->atoms, ucell->Coordinate, ucell->ntype, ucell->lat0, ofs); + ofs.close (); + // readin the data std::ifstream ifs; - ifs.open("print_tau_direct"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("DIRECT COORDINATES")); - EXPECT_THAT(str, testing::HasSubstr(" C 0.100000000000 0.100000000000 0.100000000000 0.0000")); - EXPECT_THAT(str, testing::HasSubstr(" H 0.150000000000 0.150000000000 0.150000000000 0.0000")); - ifs.close(); - - remove("print_tau_direct"); + ifs.open ("print_tau_direct"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("DIRECT COORDINATES")); + EXPECT_THAT (str, testing::HasSubstr (" C 0.100000000000 0.100000000000 0.100000000000 0.0000")); + EXPECT_THAT (str, testing::HasSubstr (" H 0.150000000000 0.150000000000 0.150000000000 0.0000")); + ifs.close (); + + remove ("print_tau_direct"); } -TEST_F(UcellTest, PrintTauCartesian) +TEST_F (UcellTest, PrintTauCartesian) { UcellTestPrepare utp = UcellTestLib["C1H2-Cartesian"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - EXPECT_EQ(ucell->Coordinate, "Cartesian"); + ucell = utp.SetUcellInfo (); + EXPECT_EQ (ucell->Coordinate, "Cartesian"); // open a file - std::ofstream ofs("print_tau_Cartesian"); - unitcell::print_tau(ucell->atoms,ucell->Coordinate,ucell->ntype,ucell->lat0,ofs); - ofs.close(); + std::ofstream ofs ("print_tau_Cartesian"); + unitcell::print_tau (ucell->atoms, ucell->Coordinate, ucell->ntype, ucell->lat0, ofs); + ofs.close (); // readin the data std::ifstream ifs; - ifs.open("print_tau_Cartesian"); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("CARTESIAN COORDINATES")); - EXPECT_THAT(str, testing::HasSubstr(" C 1.000000000000 1.000000000000 1.000000000000 0.0000")); - EXPECT_THAT(str, testing::HasSubstr(" H 1.500000000000 1.500000000000 1.500000000000 0.0000")); - ifs.close(); + ifs.open ("print_tau_Cartesian"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("CARTESIAN COORDINATES")); + EXPECT_THAT (str, testing::HasSubstr (" C 1.000000000000 1.000000000000 1.000000000000 0.0000")); + EXPECT_THAT (str, testing::HasSubstr (" H 1.500000000000 1.500000000000 1.500000000000 0.0000")); + ifs.close (); // remove the file - remove("print_tau_Cartesian"); + remove ("print_tau_Cartesian"); } -TEST_F(UcellTest, UpdateVel) +TEST_F (UcellTest, UpdateVel) { UcellTestPrepare utp = UcellTestLib["C1H2-Index"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); ModuleBase::Vector3* vel_in = new ModuleBase::Vector3[ucell->nat]; for (int iat = 0; iat < ucell->nat; ++iat) - { - vel_in[iat].set(iat * 0.1, iat * 0.1, iat * 0.1); - } - unitcell::update_vel(vel_in,ucell->ntype,ucell->nat,ucell->atoms); + { + vel_in[iat].set (iat * 0.1, iat * 0.1, iat * 0.1); + } + unitcell::update_vel (vel_in, ucell->ntype, ucell->nat, ucell->atoms); for (int iat = 0; iat < ucell->nat; ++iat) - { - EXPECT_DOUBLE_EQ(vel_in[iat].x, 0.1 * iat); - EXPECT_DOUBLE_EQ(vel_in[iat].y, 0.1 * iat); - EXPECT_DOUBLE_EQ(vel_in[iat].z, 0.1 * iat); - } + { + EXPECT_DOUBLE_EQ (vel_in[iat].x, 0.1 * iat); + EXPECT_DOUBLE_EQ (vel_in[iat].y, 0.1 * iat); + EXPECT_DOUBLE_EQ (vel_in[iat].z, 0.1 * iat); + } delete[] vel_in; } -TEST_F(UcellTest, CalUx1) +TEST_F (UcellTest, CalUx1) { UcellTestPrepare utp = UcellTestLib["C1H2-Read"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - ucell->atoms[0].m_loc_[0].set(0, -1, 0); - ucell->atoms[1].m_loc_[0].set(1, 1, 1); - ucell->atoms[1].m_loc_[1].set(0, 0, 0); + ucell = utp.SetUcellInfo (); + ucell->atoms[0].m_loc_[0].set (0, -1, 0); + ucell->atoms[1].m_loc_[0].set (1, 1, 1); + ucell->atoms[1].m_loc_[1].set (0, 0, 0); PARAM.input.nspin = 4; - elecstate::cal_ux(*ucell); - EXPECT_FALSE(ucell->magnet.lsign_); - EXPECT_DOUBLE_EQ(ucell->magnet.ux_[0], 0); - EXPECT_DOUBLE_EQ(ucell->magnet.ux_[1], -1); - EXPECT_DOUBLE_EQ(ucell->magnet.ux_[2], 0); + elecstate::cal_ux (*ucell); + EXPECT_FALSE (ucell->magnet.lsign_); + EXPECT_DOUBLE_EQ (ucell->magnet.ux_[0], 0); + EXPECT_DOUBLE_EQ (ucell->magnet.ux_[1], -1); + EXPECT_DOUBLE_EQ (ucell->magnet.ux_[2], 0); } -TEST_F(UcellTest, CalUx2) +TEST_F (UcellTest, CalUx2) { UcellTestPrepare utp = UcellTestLib["C1H2-Read"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); - ucell->atoms[0].m_loc_[0].set(0, 0, 0); - ucell->atoms[1].m_loc_[0].set(1, 1, 1); - ucell->atoms[1].m_loc_[1].set(0, 0, 0); + ucell = utp.SetUcellInfo (); + ucell->atoms[0].m_loc_[0].set (0, 0, 0); + ucell->atoms[1].m_loc_[0].set (1, 1, 1); + ucell->atoms[1].m_loc_[1].set (0, 0, 0); //(0,0,0) is also parallel to (1,1,1) PARAM.input.nspin = 4; - elecstate::cal_ux(*ucell); - EXPECT_TRUE(ucell->magnet.lsign_); - EXPECT_NEAR(ucell->magnet.ux_[0], 0.57735, 1e-5); - EXPECT_NEAR(ucell->magnet.ux_[1], 0.57735, 1e-5); - EXPECT_NEAR(ucell->magnet.ux_[2], 0.57735, 1e-5); + elecstate::cal_ux (*ucell); + EXPECT_TRUE (ucell->magnet.lsign_); + EXPECT_NEAR (ucell->magnet.ux_[0], 0.57735, 1e-5); + EXPECT_NEAR (ucell->magnet.ux_[1], 0.57735, 1e-5); + EXPECT_NEAR (ucell->magnet.ux_[2], 0.57735, 1e-5); } #ifdef __LCAO -TEST_F(UcellTest, ReadOrbFile) +TEST_F (UcellTest, ReadOrbFile) { UcellTestPrepare utp = UcellTestLib["C1H2-Read"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); std::string orb_file = "./support/C.orb"; std::ofstream ofs_running; - ofs_running.open("tmp_readorbfile"); - bool result = elecstate::read_orb_file(0, orb_file, ofs_running, &(ucell->atoms[0])); + ofs_running.open ("tmp_readorbfile"); + bool result = elecstate::read_orb_file (0, orb_file, ofs_running, &(ucell->atoms[0])); ofs_running << " result=" << result << std::endl; - EXPECT_TRUE(result); - ofs_running.close(); - EXPECT_EQ(ucell->atoms[0].nw, 25); - remove("tmp_readorbfile"); + EXPECT_TRUE (result); + ofs_running.close (); + EXPECT_EQ (ucell->atoms[0].nw, 25); + remove ("tmp_readorbfile"); } class UcellTestReadStru : public ::testing::Test @@ -1103,119 +1108,121 @@ class UcellTestReadStru : public ::testing::Test protected: std::unique_ptr ucell{new UnitCell}; std::string output; - void SetUp() override + void + SetUp () override { - ucell->ntype = 2; - ucell->atom_mass.resize(ucell->ntype); - ucell->atom_label.resize(ucell->ntype); - ucell->pseudo_fn.resize(ucell->ntype); - ucell->pseudo_type.resize(ucell->ntype); - ucell->orbital_fn.resize(ucell->ntype); + ucell->ntype = 2; + ucell->atom_mass.resize (ucell->ntype); + ucell->atom_label.resize (ucell->ntype); + ucell->pseudo_fn.resize (ucell->ntype); + ucell->pseudo_type.resize (ucell->ntype); + ucell->orbital_fn.resize (ucell->ntype); } - void TearDown() override + void + TearDown () override { - ucell->orbital_fn.shrink_to_fit(); + ucell->orbital_fn.shrink_to_fit (); } }; -TEST_F(UcellTestReadStru, ReadAtomSpecies) +TEST_F (UcellTestReadStru, ReadAtomSpecies) { std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; - ofs_running.open("read_atom_species.tmp"); + ofs_running.open ("read_atom_species.tmp"); ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; ucell->set_atom_flag = true; PARAM.input.test_pseudo_cell = 2; PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running, ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); - ofs_running.close(); - ifa.close(); - remove("read_atom_species.tmp"); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); + ofs_running.close (); + ifa.close (); + remove ("read_atom_species.tmp"); } -TEST_F(UcellTestReadStru, ReadAtomSpeciesWarning1) +TEST_F (UcellTestReadStru, ReadAtomSpeciesWarning1) { std::string fn = "./support/STRU_MgO_Warning1"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; - ofs_running.open("read_atom_species.txt"); + ofs_running.open ("read_atom_species.txt"); ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; ucell->set_atom_flag = true; - testing::internal::CaptureStdout(); - EXPECT_EXIT(unitcell::read_atom_species(ifa, ofs_running,*ucell), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("unrecognized pseudopotential type.")); - ofs_running.close(); - ifa.close(); - //remove("read_atom_species.txt"); + testing::internal::CaptureStdout (); + EXPECT_EXIT (unitcell::read_atom_species (ifa, ofs_running, *ucell), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("unrecognized pseudopotential type.")); + ofs_running.close (); + ifa.close (); + // remove("read_atom_species.txt"); } -TEST_F(UcellTestReadStru, ReadLatticeConstantWarning1) +TEST_F (UcellTestReadStru, ReadLatticeConstantWarning1) { std::string fn = "./support/STRU_MgO_Warning2"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; - ofs_running.open("read_atom_species1.tmp"); + ofs_running.open ("read_atom_species1.tmp"); ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; ucell->set_atom_flag = true; - testing::internal::CaptureStdout(); - EXPECT_EXIT(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Lattice constant <= 0.0")); - ofs_running.close(); - ifa.close(); - remove("read_atom_species1.tmp"); + testing::internal::CaptureStdout (); + EXPECT_EXIT (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Lattice constant <= 0.0")); + ofs_running.close (); + ifa.close (); + remove ("read_atom_species1.tmp"); } -TEST_F(UcellTestReadStru, ReadLatticeConstantWarning2) +TEST_F (UcellTestReadStru, ReadLatticeConstantWarning2) { std::string fn = "./support/STRU_MgO_Warning3"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; - ofs_running.open("read_atom_species.tmp"); + ofs_running.open ("read_atom_species.tmp"); ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; ucell->set_atom_flag = true; - testing::internal::CaptureStdout(); - EXPECT_EXIT(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, - testing::HasSubstr("do not use LATTICE_PARAMETERS without explicit specification of lattice type")); - ofs_running.close(); - ifa.close(); - remove("read_atom_species.tmp"); + testing::internal::CaptureStdout (); + EXPECT_EXIT (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, + testing::HasSubstr ("do not use LATTICE_PARAMETERS without explicit specification of lattice type")); + ofs_running.close (); + ifa.close (); + remove ("read_atom_species.tmp"); } -TEST_F(UcellTestReadStru, ReadLatticeConstantWarning3) +TEST_F (UcellTestReadStru, ReadLatticeConstantWarning3) { std::string fn = "./support/STRU_MgO_Warning4"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; - ofs_running.open("read_atom_species.tmp"); + ofs_running.open ("read_atom_species.tmp"); ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; ucell->set_atom_flag = true; ucell->latName = "bcc"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, - testing::HasSubstr("do not use LATTICE_VECTORS along with explicit specification of lattice type")); - ofs_running.close(); - ifa.close(); - remove("read_atom_species.tmp"); + testing::internal::CaptureStdout (); + EXPECT_EXIT (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, + testing::HasSubstr ("do not use LATTICE_VECTORS along with explicit specification of lattice type")); + ofs_running.close (); + ifa.close (); + remove ("read_atom_species.tmp"); } -TEST_F(UcellTestReadStru, ReadAtomSpeciesLatName) +TEST_F (UcellTestReadStru, ReadAtomSpeciesLatName) { ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1234,53 +1241,53 @@ TEST_F(UcellTestReadStru, ReadAtomSpeciesLatName) "sm", "bacm", "triclinic"}; - for (int i = 0; i < latName_in.size(); ++i) - { - std::string fn = "./support/STRU_MgO_LatName"; - std::ifstream ifa(fn.c_str()); - std::ofstream ofs_running; - ofs_running.open("read_atom_species.tmp"); - ucell->latName = latName_in[i]; - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - if (ucell->latName == "sc") + for (int i = 0; i < latName_in.size (); ++i) { - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 1.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 1.0); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 1.0); + std::string fn = "./support/STRU_MgO_LatName"; + std::ifstream ifa (fn.c_str ()); + std::ofstream ofs_running; + ofs_running.open ("read_atom_species.tmp"); + ucell->latName = latName_in[i]; + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + if (ucell->latName == "sc") + { + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 1.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 1.0); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 1.0); + } + ofs_running.close (); + ifa.close (); + remove ("read_atom_species.tmp"); } - ofs_running.close(); - ifa.close(); - remove("read_atom_species.tmp"); - } } -TEST_F(UcellDeathTest, ReadAtomSpeciesWarning5) +TEST_F (UcellDeathTest, ReadAtomSpeciesWarning5) { std::string fn = "./support/STRU_MgO_LatName"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; - ofs_running.open("read_atom_species.tmp"); + ofs_running.open ("read_atom_species.tmp"); ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; ucell->set_atom_flag = true; ucell->latName = "arbitrary"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("latname not supported")); - ofs_running.close(); - ifa.close(); - remove("read_atom_species.tmp"); + testing::internal::CaptureStdout (); + EXPECT_EXIT (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("latname not supported")); + ofs_running.close (); + ifa.close (); + remove ("read_atom_species.tmp"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsS1) +TEST_F (UcellTestReadStru, ReadAtomPositionsS1) { std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1289,30 +1296,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsS1) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 1; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsS2) +TEST_F (UcellTestReadStru, ReadAtomPositionsS2) { std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1321,30 +1328,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsS2) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 2; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsS4Noncolin) +TEST_F (UcellTestReadStru, ReadAtomPositionsS4Noncolin) { std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1354,30 +1361,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsS4Noncolin) PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 4; PARAM.input.noncolin = true; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsS4Colin) +TEST_F (UcellTestReadStru, ReadAtomPositionsS4Colin) { std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1387,30 +1394,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsS4Colin) PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 4; PARAM.input.noncolin = false; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsC) +TEST_F (UcellTestReadStru, ReadAtomPositionsC) { std::string fn = "./support/STRU_MgO_c"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1419,30 +1426,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsC) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 1; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsCA) +TEST_F (UcellTestReadStru, ReadAtomPositionsCA) { std::string fn = "./support/STRU_MgO_ca"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1451,30 +1458,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsCA) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 1; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsCACXY) +TEST_F (UcellTestReadStru, ReadAtomPositionsCACXY) { std::string fn = "./support/STRU_MgO_cacxy"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1483,30 +1490,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsCACXY) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 1; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsCACXZ) +TEST_F (UcellTestReadStru, ReadAtomPositionsCACXZ) { std::string fn = "./support/STRU_MgO_cacxz"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1515,30 +1522,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsCACXZ) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 1; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsCACYZ) +TEST_F (UcellTestReadStru, ReadAtomPositionsCACYZ) { std::string fn = "./support/STRU_MgO_cacyz"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1547,30 +1554,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsCACYZ) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 1; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsCACXYZ) +TEST_F (UcellTestReadStru, ReadAtomPositionsCACXYZ) { std::string fn = "./support/STRU_MgO_cacxyz"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1579,30 +1586,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsCACXYZ) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 1; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsCAU) +TEST_F (UcellTestReadStru, ReadAtomPositionsCAU) { std::string fn = "./support/STRU_MgO_cau"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1612,30 +1619,30 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsCAU) PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 1; PARAM.input.fixed_atoms = true; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsAutosetMag) +TEST_F (UcellTestReadStru, ReadAtomPositionsAutosetMag) { std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1644,53 +1651,53 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsAutosetMag) PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; PARAM.input.nspin = 2; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); for (int it = 0; it < ucell->ntype; it++) - { - for (int ia = 0; ia < ucell->atoms[it].na; ia++) { - EXPECT_DOUBLE_EQ(ucell->atoms[it].mag[ia], 1.0); - EXPECT_DOUBLE_EQ(ucell->atoms[it].m_loc_[ia].x, 1.0); + for (int ia = 0; ia < ucell->atoms[it].na; ia++) + { + EXPECT_DOUBLE_EQ (ucell->atoms[it].mag[ia], 1.0); + EXPECT_DOUBLE_EQ (ucell->atoms[it].m_loc_[ia].x, 1.0); + } } - } // for nspin == 4 PARAM.input.nspin = 4; delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning); + unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning); for (int it = 0; it < ucell->ntype; it++) - { - for (int ia = 0; ia < ucell->atoms[it].na; ia++) { - EXPECT_DOUBLE_EQ(ucell->atoms[it].mag[ia], sqrt(pow(1.0, 2) + pow(1.0, 2) + pow(1.0, 2))); - EXPECT_DOUBLE_EQ(ucell->atoms[it].m_loc_[ia].x, 1.0); - EXPECT_DOUBLE_EQ(ucell->atoms[it].m_loc_[ia].y, 1.0); - EXPECT_DOUBLE_EQ(ucell->atoms[it].m_loc_[ia].z, 1.0); + for (int ia = 0; ia < ucell->atoms[it].na; ia++) + { + EXPECT_DOUBLE_EQ (ucell->atoms[it].mag[ia], sqrt (pow (1.0, 2) + pow (1.0, 2) + pow (1.0, 2))); + EXPECT_DOUBLE_EQ (ucell->atoms[it].m_loc_[ia].x, 1.0); + EXPECT_DOUBLE_EQ (ucell->atoms[it].m_loc_[ia].y, 1.0); + EXPECT_DOUBLE_EQ (ucell->atoms[it].m_loc_[ia].z, 1.0); + } } - } - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsWarning1) +TEST_F (UcellTestReadStru, ReadAtomPositionsWarning1) { std::string fn = "./support/STRU_MgO_WarningC1"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1698,43 +1705,43 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsWarning1) PARAM.input.test_pseudo_cell = 2; PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - EXPECT_NO_THROW(unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning)); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); + EXPECT_NO_THROW (unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning)); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); // check warning file std::ifstream ifs_tmp; - ifs_tmp.open("read_atom_positions.warn"); - std::string str((std::istreambuf_iterator(ifs_tmp)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("There are several options for you:")); - EXPECT_THAT(str, testing::HasSubstr("Direct")); - EXPECT_THAT(str, testing::HasSubstr("Cartesian_angstrom")); - EXPECT_THAT(str, testing::HasSubstr("Cartesian_au")); - EXPECT_THAT(str, testing::HasSubstr("Cartesian_angstrom_center_xy")); - EXPECT_THAT(str, testing::HasSubstr("Cartesian_angstrom_center_xz")); - EXPECT_THAT(str, testing::HasSubstr("Cartesian_angstrom_center_yz")); - EXPECT_THAT(str, testing::HasSubstr("Cartesian_angstrom_center_xyz")); - ifs_tmp.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + ifs_tmp.open ("read_atom_positions.warn"); + std::string str ((std::istreambuf_iterator (ifs_tmp)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("There are several options for you:")); + EXPECT_THAT (str, testing::HasSubstr ("Direct")); + EXPECT_THAT (str, testing::HasSubstr ("Cartesian_angstrom")); + EXPECT_THAT (str, testing::HasSubstr ("Cartesian_au")); + EXPECT_THAT (str, testing::HasSubstr ("Cartesian_angstrom_center_xy")); + EXPECT_THAT (str, testing::HasSubstr ("Cartesian_angstrom_center_xz")); + EXPECT_THAT (str, testing::HasSubstr ("Cartesian_angstrom_center_yz")); + EXPECT_THAT (str, testing::HasSubstr ("Cartesian_angstrom_center_xyz")); + ifs_tmp.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsWarning2) +TEST_F (UcellTestReadStru, ReadAtomPositionsWarning2) { std::string fn = "./support/STRU_MgO_WarningC2"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1742,36 +1749,36 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsWarning2) PARAM.input.test_pseudo_cell = 2; PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - EXPECT_NO_THROW(unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning)); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); + EXPECT_NO_THROW (unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning)); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); // check warning file std::ifstream ifs_tmp; - ifs_tmp.open("read_atom_positions.warn"); - std::string str((std::istreambuf_iterator(ifs_tmp)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Label read from ATOMIC_POSITIONS is Mo")); - EXPECT_THAT(str, testing::HasSubstr("Label from ATOMIC_SPECIES is Mg")); - ifs_tmp.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + ifs_tmp.open ("read_atom_positions.warn"); + std::string str ((std::istreambuf_iterator (ifs_tmp)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Label read from ATOMIC_POSITIONS is Mo")); + EXPECT_THAT (str, testing::HasSubstr ("Label from ATOMIC_SPECIES is Mg")); + ifs_tmp.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsWarning3) +TEST_F (UcellTestReadStru, ReadAtomPositionsWarning3) { std::string fn = "./support/STRU_MgO_WarningC3"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; - ofs_running.open("read_atom_positions.tmp"); - GlobalV::ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + GlobalV::ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1779,70 +1786,72 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsWarning3) PARAM.input.test_pseudo_cell = 2; PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - EXPECT_NO_THROW(unitcell::read_atom_positions(*ucell,ifa, ofs_running, GlobalV::ofs_warning)); - ofs_running.close(); - GlobalV::ofs_warning.close(); - ifa.close(); + EXPECT_NO_THROW (unitcell::read_atom_positions (*ucell, ifa, ofs_running, GlobalV::ofs_warning)); + ofs_running.close (); + GlobalV::ofs_warning.close (); + ifa.close (); // check warning file std::ifstream ifs_tmp; - ifs_tmp.open("read_atom_positions.warn"); - std::string str((std::istreambuf_iterator(ifs_tmp)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("read_atom_positions warning : atom number < 0.")); - ifs_tmp.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + ifs_tmp.open ("read_atom_positions.warn"); + std::string str ((std::istreambuf_iterator (ifs_tmp)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("read_atom_positions warning : atom number < 0.")); + ifs_tmp.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsWarning4) +TEST_F (UcellTestReadStru, ReadAtomPositionsWarning4) { std::string fn = "./support/STRU_MgO_WarningC4"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; std::ofstream ofs_warning; - ofs_running.open("read_atom_positions.tmp"); - ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; - ucell->orbital_fn.resize(ucell->ntype); + ucell->orbital_fn.resize (ucell->ntype); ucell->set_atom_flag = true; PARAM.input.test_pseudo_cell = 2; PARAM.input.basis_type = "lcao"; PARAM.sys.deepks_setorb = true; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - testing::internal::CaptureStdout(); - EXPECT_EXIT(unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("read_atom_positions, mismatch in atom number for atom type: Mg")); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + testing::internal::CaptureStdout (); + EXPECT_EXIT (unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("read_atom_positions, mismatch in atom number for atom type: Mg")); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } -TEST_F(UcellTestReadStru, ReadAtomPositionsWarning5) +TEST_F (UcellTestReadStru, ReadAtomPositionsWarning5) { std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); + std::ifstream ifa (fn.c_str ()); std::ofstream ofs_running; - ofs_running.open("read_atom_positions.tmp"); - GlobalV::ofs_warning.open("read_atom_positions.warn"); + ofs_running.open ("read_atom_positions.tmp"); + GlobalV::ofs_warning.open ("read_atom_positions.warn"); // mandatory preliminaries ucell->ntype = 2; ucell->atoms = new Atom[ucell->ntype]; @@ -1852,43 +1861,43 @@ TEST_F(UcellTestReadStru, ReadAtomPositionsWarning5) PARAM.sys.deepks_setorb = true; PARAM.input.calculation = "md"; PARAM.input.esolver_type = "arbitrary"; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22, 4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33, 4.27957); + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); // mandatory preliminaries delete[] ucell->magnet.start_mag; ucell->magnet.start_mag = new double[ucell->ntype]; - EXPECT_NO_THROW(unitcell::read_atom_positions(*ucell,ifa, ofs_running, GlobalV::ofs_warning)); - ofs_running.close(); - GlobalV::ofs_warning.close(); - ifa.close(); + EXPECT_NO_THROW (unitcell::read_atom_positions (*ucell, ifa, ofs_running, GlobalV::ofs_warning)); + ofs_running.close (); + GlobalV::ofs_warning.close (); + ifa.close (); // check warning file std::ifstream ifs_tmp; - ifs_tmp.open("read_atom_positions.warn"); - std::string str((std::istreambuf_iterator(ifs_tmp)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("read_atoms warning : no atoms can move in MD simulations!")); - ifs_tmp.close(); - remove("read_atom_positions.tmp"); - remove("read_atom_positions.warn"); + ifs_tmp.open ("read_atom_positions.warn"); + std::string str ((std::istreambuf_iterator (ifs_tmp)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("read_atoms warning : no atoms can move in MD simulations!")); + ifs_tmp.close (); + remove ("read_atom_positions.tmp"); + remove ("read_atom_positions.warn"); } #endif -TEST_F(UcellTest, ReadOrbFileWarning) +TEST_F (UcellTest, ReadOrbFileWarning) { UcellTestPrepare utp = UcellTestLib["C1H2-Read"]; PARAM.input.relax_new = utp.relax_new; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); std::string orb_file = "./support/CC.orb"; std::ofstream ofs_running; - ofs_running.open("tmp_readorbfilewarning"); - testing::internal::CaptureStdout(); - bool result = elecstate::read_orb_file(0, orb_file, ofs_running, &(ucell->atoms[0])); - output = testing::internal::GetCapturedStdout(); + ofs_running.open ("tmp_readorbfilewarning"); + testing::internal::CaptureStdout (); + bool result = elecstate::read_orb_file (0, orb_file, ofs_running, &(ucell->atoms[0])); + output = testing::internal::GetCapturedStdout (); ofs_running << output << std::endl; - EXPECT_FALSE(result); - EXPECT_THAT(output, testing::HasSubstr("Element index 1")); - EXPECT_THAT(output, testing::HasSubstr("orbital file: ./support/CC.orb")); - ofs_running.close(); - remove("tmp_readorbfilewarning"); + EXPECT_FALSE (result); + EXPECT_THAT (output, testing::HasSubstr ("Element index 1")); + EXPECT_THAT (output, testing::HasSubstr ("orbital file: ./support/CC.orb")); + ofs_running.close (); + remove ("tmp_readorbfilewarning"); } diff --git a/source/source_cell/test/unitcell_test_para.cpp b/source/source_cell/test/unitcell_test_para.cpp index ca6b3bad1f0..5f1e564c7ec 100644 --- a/source/source_cell/test/unitcell_test_para.cpp +++ b/source/source_cell/test/unitcell_test_para.cpp @@ -20,23 +20,16 @@ #include "../update_cell.h" #include "../bcast_cell.h" #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} #endif -Magnetism::Magnetism() +Magnetism::Magnetism () { this->tot_mag = 0.0; this->abs_mag = 0.0; this->start_mag = nullptr; } -Magnetism::~Magnetism() -{ - delete[] this->start_mag; -} +Magnetism::~Magnetism () { delete[] this->start_mag; } #define private public #include "source_io/module_parameter/parameter.h" #undef private @@ -62,7 +55,8 @@ Magnetism::~Magnetism() // mock function #ifdef __LCAO -void LCAO_Orbitals::bcast_files(const int& ntype_in, const int& my_rank) +void + LCAO_Orbitals::bcast_files (const int& ntype_in, const int& my_rank) { return; } @@ -76,12 +70,13 @@ class UcellTest : public ::testing::Test std::ofstream ofs; std::string pp_dir; std::string output; - void SetUp() + void + SetUp () { - ofs.open("running.log"); + ofs.open ("running.log"); PARAM.input.relax_new = utp.relax_new; PARAM.sys.global_out_dir = "./"; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); PARAM.input.lspinorb = false; pp_dir = "./support/"; PARAM.input.pseudo_rcut = 15.0; @@ -90,207 +85,206 @@ class UcellTest : public ::testing::Test PARAM.input.nspin = 1; PARAM.input.basis_type = "pw"; } - void TearDown() + void + TearDown () { - ofs.close(); + ofs.close (); } }; #ifdef __MPI -TEST_F(UcellTest, BcastUnitcell) +TEST_F (UcellTest, BcastUnitcell) { PARAM.input.nspin = 4; - unitcell::bcast_unitcell(*ucell); + unitcell::bcast_unitcell (*ucell); if (GlobalV::MY_RANK != 0) - { - EXPECT_EQ(ucell->Coordinate, "Direct"); - EXPECT_DOUBLE_EQ(ucell->a1.x, 10.0); - EXPECT_EQ(ucell->atoms[0].na, 1); - EXPECT_EQ(ucell->atoms[1].na, 2); - /// this is to ensure all processes have the atom label info - auto atom_labels = ucell->get_atomLabels(); - std::string atom_type1_expected = "C"; - std::string atom_type2_expected = "H"; - EXPECT_EQ(atom_labels[0], atom_type1_expected); - EXPECT_EQ(atom_labels[1], atom_type2_expected); - } + { + EXPECT_EQ (ucell->Coordinate, "Direct"); + EXPECT_DOUBLE_EQ (ucell->a1.x, 10.0); + EXPECT_EQ (ucell->atoms[0].na, 1); + EXPECT_EQ (ucell->atoms[1].na, 2); + /// this is to ensure all processes have the atom label info + auto atom_labels = ucell->get_atomLabels (); + std::string atom_type1_expected = "C"; + std::string atom_type2_expected = "H"; + EXPECT_EQ (atom_labels[0], atom_type1_expected); + EXPECT_EQ (atom_labels[1], atom_type2_expected); + } } -TEST_F(UcellTest, BcastLattice) +TEST_F (UcellTest, BcastLattice) { - unitcell::bcast_Lattice(ucell->lat); + unitcell::bcast_Lattice (ucell->lat); if (GlobalV::MY_RANK != 0) - { - EXPECT_EQ(ucell->Coordinate, "Direct"); - EXPECT_DOUBLE_EQ(ucell->a1.x, 10.0); - EXPECT_EQ(ucell->atoms[0].na, 1); - EXPECT_EQ(ucell->atoms[1].na, 2); - /// this is to ensure all processes have the atom label info - auto atom_labels = ucell->get_atomLabels(); - std::string atom_type1_expected = "C"; - std::string atom_type2_expected = "H"; - EXPECT_EQ(atom_labels[0], atom_type1_expected); - EXPECT_EQ(atom_labels[1], atom_type2_expected); - } + { + EXPECT_EQ (ucell->Coordinate, "Direct"); + EXPECT_DOUBLE_EQ (ucell->a1.x, 10.0); + EXPECT_EQ (ucell->atoms[0].na, 1); + EXPECT_EQ (ucell->atoms[1].na, 2); + /// this is to ensure all processes have the atom label info + auto atom_labels = ucell->get_atomLabels (); + std::string atom_type1_expected = "C"; + std::string atom_type2_expected = "H"; + EXPECT_EQ (atom_labels[0], atom_type1_expected); + EXPECT_EQ (atom_labels[1], atom_type2_expected); + } } -TEST_F(UcellTest, BcastMagnitism) +TEST_F (UcellTest, BcastMagnitism) { - unitcell::bcast_magnetism(ucell->magnet, ucell->ntype); + unitcell::bcast_magnetism (ucell->magnet, ucell->ntype); PARAM.input.nspin = 4; if (GlobalV::MY_RANK != 0) - { - EXPECT_DOUBLE_EQ(ucell->magnet.start_mag[0], 0.0); - EXPECT_DOUBLE_EQ(ucell->magnet.start_mag[1], 0.0); - for (int i = 0; i < 3; ++i) { - EXPECT_DOUBLE_EQ(ucell->magnet.ux_[i], 0.0); + EXPECT_DOUBLE_EQ (ucell->magnet.start_mag[0], 0.0); + EXPECT_DOUBLE_EQ (ucell->magnet.start_mag[1], 0.0); + for (int i = 0; i < 3; ++i) + { + EXPECT_DOUBLE_EQ (ucell->magnet.ux_[i], 0.0); + } } - } } -TEST_F(UcellTest, UpdatePosTau) +TEST_F (UcellTest, UpdatePosTau) { double* pos_in = new double[ucell->nat * 3]; - ucell->set_iat2itia(); - std::fill(pos_in, pos_in + ucell->nat * 3, 0); + ucell->set_iat2itia (); + std::fill (pos_in, pos_in + ucell->nat * 3, 0); for (int iat = 0; iat < ucell->nat; ++iat) - { - int it, ia; - ucell->iat2iait(iat, &ia, &it); - for (int ik = 0; ik < 3; ++ik) { - ucell->atoms[it].mbl[ia][ik] = true; - pos_in[iat * 3 + ik] = (iat * 3 + ik) / (ucell->nat * 3.0) * (ucell->lat.lat0); + int it, ia; + ucell->iat2iait (iat, &ia, &it); + for (int ik = 0; ik < 3; ++ik) + { + ucell->atoms[it].mbl[ia][ik] = true; + pos_in[iat * 3 + ik] = (iat * 3 + ik) / (ucell->nat * 3.0) * (ucell->lat.lat0); + } } - } - unitcell::update_pos_tau(ucell->lat,pos_in,ucell->ntype,ucell->nat,ucell->atoms); + unitcell::update_pos_tau (ucell->lat, pos_in, ucell->ntype, ucell->nat, ucell->atoms); for (int iat = 0; iat < ucell->nat; ++iat) - { - int it, ia; - ucell->iat2iait(iat, &ia, &it); - for (int ik = 0; ik < 3; ++ik) { - EXPECT_DOUBLE_EQ(ucell->atoms[it].tau[ia][ik], - (iat*3+ik)/(ucell->nat*3.0)); + int it, ia; + ucell->iat2iait (iat, &ia, &it); + for (int ik = 0; ik < 3; ++ik) + { + EXPECT_DOUBLE_EQ (ucell->atoms[it].tau[ia][ik], (iat * 3 + ik) / (ucell->nat * 3.0)); + } } - } delete[] pos_in; } -TEST_F(UcellTest, UpdatePosTaud_pointer) +TEST_F (UcellTest, UpdatePosTaud_pointer) { double* pos_in = new double[ucell->nat * 3]; ModuleBase::Vector3* tmp = new ModuleBase::Vector3[ucell->nat]; - ucell->set_iat2itia(); + ucell->set_iat2itia (); for (int iat = 0; iat < ucell->nat; ++iat) - { - pos_in[iat * 3] = 0.01; - pos_in[iat * 3 + 1] = 0.01; - pos_in[iat * 3 + 2] = 0.01; - int it, ia; - ucell->iat2iait(iat, &ia, &it); - tmp[iat] = ucell->atoms[it].taud[ia]; - } - unitcell::update_pos_taud(ucell->lat,pos_in,ucell->ntype, - ucell->nat,ucell->atoms); + { + pos_in[iat * 3] = 0.01; + pos_in[iat * 3 + 1] = 0.01; + pos_in[iat * 3 + 2] = 0.01; + int it, ia; + ucell->iat2iait (iat, &ia, &it); + tmp[iat] = ucell->atoms[it].taud[ia]; + } + unitcell::update_pos_taud (ucell->lat, pos_in, ucell->ntype, ucell->nat, ucell->atoms); for (int iat = 0; iat < ucell->nat; ++iat) - { - int it, ia; - ucell->iat2iait(iat, &ia, &it); - EXPECT_DOUBLE_EQ(ucell->atoms[it].taud[ia].x, tmp[iat].x + 0.01); - EXPECT_DOUBLE_EQ(ucell->atoms[it].taud[ia].y, tmp[iat].y + 0.01); - EXPECT_DOUBLE_EQ(ucell->atoms[it].taud[ia].z, tmp[iat].z + 0.01); - } + { + int it, ia; + ucell->iat2iait (iat, &ia, &it); + EXPECT_DOUBLE_EQ (ucell->atoms[it].taud[ia].x, tmp[iat].x + 0.01); + EXPECT_DOUBLE_EQ (ucell->atoms[it].taud[ia].y, tmp[iat].y + 0.01); + EXPECT_DOUBLE_EQ (ucell->atoms[it].taud[ia].z, tmp[iat].z + 0.01); + } delete[] tmp; delete[] pos_in; } -//test update_pos_taud with ModuleBase::Vector3 version -TEST_F(UcellTest, UpdatePosTaud_Vector3) +// test update_pos_taud with ModuleBase::Vector3 version +TEST_F (UcellTest, UpdatePosTaud_Vector3) { ModuleBase::Vector3* pos_in = new ModuleBase::Vector3[ucell->nat]; ModuleBase::Vector3* tmp = new ModuleBase::Vector3[ucell->nat]; - ucell->set_iat2itia(); + ucell->set_iat2itia (); for (int iat = 0; iat < ucell->nat; ++iat) - { - for (int ik = 0; ik < 3; ++ik) { - pos_in[iat][ik] = 0.01; + for (int ik = 0; ik < 3; ++ik) + { + pos_in[iat][ik] = 0.01; + } + int it = 0; + int ia = 0; + ucell->iat2iait (iat, &ia, &it); + tmp[iat] = ucell->atoms[it].taud[ia]; } - int it=0; - int ia=0; - ucell->iat2iait(iat, &ia, &it); - tmp[iat] = ucell->atoms[it].taud[ia]; - } - unitcell::update_pos_taud(ucell->lat,pos_in,ucell->ntype, - ucell->nat,ucell->atoms); + unitcell::update_pos_taud (ucell->lat, pos_in, ucell->ntype, ucell->nat, ucell->atoms); for (int iat = 0; iat < ucell->nat; ++iat) - { - int it, ia; - ucell->iat2iait(iat, &ia, &it); - for (int ik = 0; ik < 3; ++ik) { - EXPECT_DOUBLE_EQ(ucell->atoms[it].taud[ia][ik], tmp[iat][ik] + 0.01); + int it, ia; + ucell->iat2iait (iat, &ia, &it); + for (int ik = 0; ik < 3; ++ik) + { + EXPECT_DOUBLE_EQ (ucell->atoms[it].taud[ia][ik], tmp[iat][ik] + 0.01); + } } - } delete[] tmp; delete[] pos_in; } -TEST_F(UcellTest, ReadPseudo) +TEST_F (UcellTest, ReadPseudo) { PARAM.input.pseudo_dir = pp_dir; PARAM.input.out_element_info = true; - elecstate::read_pseudo(ofs, *ucell); + elecstate::read_pseudo (ofs, *ucell); // check_structure will print some warning info // output nonlocal file if (GlobalV::MY_RANK == 0) - { - std::ifstream ifs; - ifs.open("./C/C.NONLOCAL"); - EXPECT_TRUE(ifs.good()); - ifs.close(); - ifs.open("./H/H.NONLOCAL"); - EXPECT_TRUE(ifs.good()); - ifs.close(); - - struct stat st; - int ret1 = stat("C", &st); - EXPECT_EQ(ret1, 0); - EXPECT_TRUE(S_ISDIR(st.st_mode)); - rmdir("C"); - - int ret2 = stat("H", &st); - EXPECT_EQ(ret2, 0); - EXPECT_TRUE(S_ISDIR(st.st_mode)); - rmdir("H"); - } + { + std::ifstream ifs; + ifs.open ("./C/C.NONLOCAL"); + EXPECT_TRUE (ifs.good ()); + ifs.close (); + ifs.open ("./H/H.NONLOCAL"); + EXPECT_TRUE (ifs.good ()); + ifs.close (); + + struct stat st; + int ret1 = stat ("C", &st); + EXPECT_EQ (ret1, 0); + EXPECT_TRUE (S_ISDIR (st.st_mode)); + rmdir ("C"); + + int ret2 = stat ("H", &st); + EXPECT_EQ (ret2, 0); + EXPECT_TRUE (S_ISDIR (st.st_mode)); + rmdir ("H"); + } // read_cell_pseudopots - EXPECT_FALSE(ucell->atoms[0].ncpp.has_so); - EXPECT_FALSE(ucell->atoms[1].ncpp.has_so); - EXPECT_EQ(ucell->atoms[0].ncpp.nbeta, 4); - EXPECT_EQ(ucell->atoms[0].ncpp.nchi, 2); - EXPECT_EQ(ucell->atoms[1].ncpp.nbeta, 3); - EXPECT_EQ(ucell->atoms[1].ncpp.nchi, 1); + EXPECT_FALSE (ucell->atoms[0].ncpp.has_so); + EXPECT_FALSE (ucell->atoms[1].ncpp.has_so); + EXPECT_EQ (ucell->atoms[0].ncpp.nbeta, 4); + EXPECT_EQ (ucell->atoms[0].ncpp.nchi, 2); + EXPECT_EQ (ucell->atoms[1].ncpp.nbeta, 3); + EXPECT_EQ (ucell->atoms[1].ncpp.nchi, 1); // cal_meshx - EXPECT_EQ(ucell->meshx, 1247); + EXPECT_EQ (ucell->meshx, 1247); // cal_natomwfc - EXPECT_EQ(ucell->natomwfc, (1 + 3) * 1 + 1 * 2); + EXPECT_EQ (ucell->natomwfc, (1 + 3) * 1 + 1 * 2); // cal_nwfc - EXPECT_EQ(ucell->lmax, 2); - EXPECT_EQ(ucell->lmax_ppwf, 1); + EXPECT_EQ (ucell->lmax, 2); + EXPECT_EQ (ucell->lmax_ppwf, 1); } #include "mpi.h" -int main(int argc, char** argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); - int result = RUN_ALL_TESTS(); - MPI_Finalize(); + int result = RUN_ALL_TESTS (); + MPI_Finalize (); return result; } #endif diff --git a/source/source_cell/test/unitcell_test_readpp.cpp b/source/source_cell/test/unitcell_test_readpp.cpp index 77ab7a21a8b..20bdc3e1342 100644 --- a/source/source_cell/test/unitcell_test_readpp.cpp +++ b/source/source_cell/test/unitcell_test_readpp.cpp @@ -12,22 +12,23 @@ #include "source_estate/read_pseudo.h" #include #include -#include "string.h" +#include #ifdef __MPI #include "mpi.h" #endif #include "prepare_unitcell.h" #ifdef __LCAO -InfoNonlocal::InfoNonlocal() {} -InfoNonlocal::~InfoNonlocal() {} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} #endif -Magnetism::Magnetism() { +Magnetism::Magnetism () +{ this->tot_mag = 0.0; this->abs_mag = 0.0; this->start_mag = nullptr; } -Magnetism::~Magnetism() { delete[] this->start_mag; } +Magnetism::~Magnetism () { delete[] this->start_mag; } #define private public #include "source_io/module_parameter/parameter.h" #undef private @@ -98,23 +99,28 @@ Magnetism::~Magnetism() { delete[] this->start_mag; } // mock function #ifdef __LCAO -void LCAO_Orbitals::bcast_files(const int& ntype_in, const int& my_rank) { +void + LCAO_Orbitals::bcast_files (const int& ntype_in, const int& my_rank) +{ return; } #endif -class UcellTest : public ::testing::Test { +class UcellTest : public ::testing::Test +{ protected: UcellTestPrepare utp = UcellTestLib["C1H2-Read"]; std::unique_ptr ucell; std::ofstream ofs; std::string pp_dir; std::string output; - void SetUp() { - ofs.open("running.log"); + void + SetUp () + { + ofs.open ("running.log"); PARAM.input.relax_new = utp.relax_new; PARAM.sys.global_out_dir = "./"; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); PARAM.input.lspinorb = false; pp_dir = "./support/"; PARAM.input.pseudo_rcut = 15.0; @@ -124,156 +130,155 @@ class UcellTest : public ::testing::Test { PARAM.input.nspin = 1; PARAM.input.basis_type = "pw"; PARAM.input.nelec = 10.0; - PARAM.input.nupdown = 0.0; + PARAM.input.nupdown = 0.0; PARAM.sys.two_fermi = false; PARAM.input.nbands = 6; PARAM.sys.nlocal = 6; PARAM.input.lspinorb = false; } - void TearDown() { ofs.close(); } + void + TearDown () + { + ofs.close (); + } }; using UcellDeathTest = UcellTest; -TEST_F(UcellDeathTest, ReadCellPPWarning1) { +TEST_F (UcellDeathTest, ReadCellPPWarning1) +{ PARAM.input.lspinorb = true; ucell->pseudo_fn[1] = "H_sr.upf"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, - testing::HasSubstr("error when average the pseudopotential.")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("error when average the pseudopotential.")); } -TEST_F(UcellDeathTest, ReadCellPPWarning2) { +TEST_F (UcellDeathTest, ReadCellPPWarning2) +{ pp_dir = "./arbitrary/"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, - testing::HasSubstr("Couldn't find pseudopotential file")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Couldn't find pseudopotential file")); } -TEST_F(UcellDeathTest, ReadCellPPWarning3) { +TEST_F (UcellDeathTest, ReadCellPPWarning3) +{ ucell->pseudo_type[0] = "upf"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, - testing::HasSubstr("Pseudopotential data do not match.")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Pseudopotential data do not match.")); } -TEST_F(UcellDeathTest, ReadCellPPWarning4) { +TEST_F (UcellDeathTest, ReadCellPPWarning4) +{ PARAM.input.dft_functional = "LDA"; - testing::internal::CaptureStdout(); - EXPECT_NO_THROW(elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell)); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("DFT FUNC. (PSEUDO) : PBE")); - EXPECT_THAT(output, testing::HasSubstr("DFT FUNC. (SET TO) : LDA")); + testing::internal::CaptureStdout (); + EXPECT_NO_THROW (elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell)); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("DFT FUNC. (PSEUDO) : PBE")); + EXPECT_THAT (output, testing::HasSubstr ("DFT FUNC. (SET TO) : LDA")); } -TEST_F(UcellDeathTest, ReadCellPPWarning5) { +TEST_F (UcellDeathTest, ReadCellPPWarning5) +{ ucell->pseudo_type[0] = "upf0000"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Unknown pseudopotential type.")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Unknown pseudopotential type.")); } -TEST_F(UcellTest, ReadCellPP) { +TEST_F (UcellTest, ReadCellPP) +{ ucell->atoms[1].flag_empty_element = true; - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - EXPECT_EQ(ucell->atoms[0].ncpp.pp_type, "NC"); - EXPECT_FALSE(ucell->atoms[0].ncpp.has_so); // becomes false in average_p - EXPECT_FALSE(ucell->atoms[1].ncpp.has_so); - EXPECT_EQ(ucell->atoms[0].ncpp.nchi, 2); // 3=>2 in average_p - EXPECT_EQ(ucell->atoms[1].ncpp.nchi, 1); - ofs.close(); + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + EXPECT_EQ (ucell->atoms[0].ncpp.pp_type, "NC"); + EXPECT_FALSE (ucell->atoms[0].ncpp.has_so); // becomes false in average_p + EXPECT_FALSE (ucell->atoms[1].ncpp.has_so); + EXPECT_EQ (ucell->atoms[0].ncpp.nchi, 2); // 3=>2 in average_p + EXPECT_EQ (ucell->atoms[1].ncpp.nchi, 1); + ofs.close (); std::ifstream ifs; - ifs.open("running.log"); - std::string str((std::istreambuf_iterator(ifs)), - std::istreambuf_iterator()); - EXPECT_THAT(str, - testing::HasSubstr("Pseudopotential file = C.upf")); - EXPECT_THAT(str, testing::HasSubstr("Pseudopotential type = NC")); - EXPECT_THAT(str, - testing::HasSubstr("Exchange-correlation functional = PBE")); - EXPECT_THAT(str, testing::HasSubstr("Valence electrons = 4")); - EXPECT_THAT(str, - testing::HasSubstr("Pseudopotential file = H.upf")); - EXPECT_THAT(str, testing::HasSubstr("Valence electrons = 0")); -} - -TEST_F(UcellTest, CalMeshx) { - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - elecstate::cal_meshx(ucell->meshx,ucell->atoms,ucell->ntype); - EXPECT_EQ(ucell->atoms[0].ncpp.msh, 1247); - EXPECT_EQ(ucell->atoms[1].ncpp.msh, 1165); - EXPECT_EQ(ucell->meshx, 1247); -} - -TEST_F(UcellTest, CalNatomwfc1) { - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - EXPECT_FALSE(ucell->atoms[0].ncpp.has_so); - EXPECT_FALSE(ucell->atoms[1].ncpp.has_so); - elecstate::cal_natomwfc(ofs,ucell->natomwfc,ucell->ntype,ucell->atoms); - EXPECT_EQ(ucell->atoms[0].ncpp.nchi, 2); - EXPECT_EQ(ucell->atoms[1].ncpp.nchi, 1); - EXPECT_EQ(ucell->atoms[0].na, 1); - EXPECT_EQ(ucell->atoms[1].na, 2); - EXPECT_EQ(ucell->natomwfc, (1 + 3) * 1 + 1 * 2); -} - -TEST_F(UcellTest, CalNatomwfc2) { + ifs.open ("running.log"); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Pseudopotential file = C.upf")); + EXPECT_THAT (str, testing::HasSubstr ("Pseudopotential type = NC")); + EXPECT_THAT (str, testing::HasSubstr ("Exchange-correlation functional = PBE")); + EXPECT_THAT (str, testing::HasSubstr ("Valence electrons = 4")); + EXPECT_THAT (str, testing::HasSubstr ("Pseudopotential file = H.upf")); + EXPECT_THAT (str, testing::HasSubstr ("Valence electrons = 0")); +} + +TEST_F (UcellTest, CalMeshx) +{ + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + elecstate::cal_meshx (ucell->meshx, ucell->atoms, ucell->ntype); + EXPECT_EQ (ucell->atoms[0].ncpp.msh, 1247); + EXPECT_EQ (ucell->atoms[1].ncpp.msh, 1165); + EXPECT_EQ (ucell->meshx, 1247); +} + +TEST_F (UcellTest, CalNatomwfc1) +{ + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + EXPECT_FALSE (ucell->atoms[0].ncpp.has_so); + EXPECT_FALSE (ucell->atoms[1].ncpp.has_so); + elecstate::cal_natomwfc (ofs, ucell->natomwfc, ucell->ntype, ucell->atoms); + EXPECT_EQ (ucell->atoms[0].ncpp.nchi, 2); + EXPECT_EQ (ucell->atoms[1].ncpp.nchi, 1); + EXPECT_EQ (ucell->atoms[0].na, 1); + EXPECT_EQ (ucell->atoms[1].na, 2); + EXPECT_EQ (ucell->natomwfc, (1 + 3) * 1 + 1 * 2); +} + +TEST_F (UcellTest, CalNatomwfc2) +{ PARAM.input.lspinorb = false; PARAM.input.nspin = 4; - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - EXPECT_FALSE(ucell->atoms[0].ncpp.has_so); - EXPECT_FALSE(ucell->atoms[1].ncpp.has_so); - elecstate::cal_natomwfc(ofs,ucell->natomwfc,ucell->ntype,ucell->atoms); - EXPECT_EQ(ucell->atoms[0].ncpp.nchi, 2); - EXPECT_EQ(ucell->atoms[1].ncpp.nchi, 1); - EXPECT_EQ(ucell->atoms[0].na, 1); - EXPECT_EQ(ucell->atoms[1].na, 2); - EXPECT_EQ(ucell->natomwfc, ((1 + 3) * 1 + 1 * 2) * 2); -} - -TEST_F(UcellTest, CalNatomwfc3) { + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + EXPECT_FALSE (ucell->atoms[0].ncpp.has_so); + EXPECT_FALSE (ucell->atoms[1].ncpp.has_so); + elecstate::cal_natomwfc (ofs, ucell->natomwfc, ucell->ntype, ucell->atoms); + EXPECT_EQ (ucell->atoms[0].ncpp.nchi, 2); + EXPECT_EQ (ucell->atoms[1].ncpp.nchi, 1); + EXPECT_EQ (ucell->atoms[0].na, 1); + EXPECT_EQ (ucell->atoms[1].na, 2); + EXPECT_EQ (ucell->natomwfc, ((1 + 3) * 1 + 1 * 2) * 2); +} + +TEST_F (UcellTest, CalNatomwfc3) +{ PARAM.input.lspinorb = true; PARAM.input.nspin = 4; - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - EXPECT_TRUE(ucell->atoms[0].ncpp.has_so); - EXPECT_TRUE(ucell->atoms[1].ncpp.has_so); - elecstate::cal_natomwfc(ofs,ucell->natomwfc,ucell->ntype,ucell->atoms); - EXPECT_EQ(ucell->atoms[0].ncpp.nchi, 3); - EXPECT_EQ(ucell->atoms[1].ncpp.nchi, 1); - EXPECT_EQ(ucell->atoms[0].na, 1); - EXPECT_EQ(ucell->atoms[1].na, 2); - EXPECT_EQ(ucell->natomwfc, - ((2 * 0 + 2) + (2 * 1 + 2) + (2 * 1)) * 1 + (2 * 0 + 2) * 2); -} - -TEST_F(UcellTest, CalNwfc1) { - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - EXPECT_FALSE(ucell->atoms[0].ncpp.has_so); - EXPECT_FALSE(ucell->atoms[1].ncpp.has_so); + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + EXPECT_TRUE (ucell->atoms[0].ncpp.has_so); + EXPECT_TRUE (ucell->atoms[1].ncpp.has_so); + elecstate::cal_natomwfc (ofs, ucell->natomwfc, ucell->ntype, ucell->atoms); + EXPECT_EQ (ucell->atoms[0].ncpp.nchi, 3); + EXPECT_EQ (ucell->atoms[1].ncpp.nchi, 1); + EXPECT_EQ (ucell->atoms[0].na, 1); + EXPECT_EQ (ucell->atoms[1].na, 2); + EXPECT_EQ (ucell->natomwfc, ((2 * 0 + 2) + (2 * 1 + 2) + (2 * 1)) * 1 + (2 * 0 + 2) * 2); +} + +TEST_F (UcellTest, CalNwfc1) +{ + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + EXPECT_FALSE (ucell->atoms[0].ncpp.has_so); + EXPECT_FALSE (ucell->atoms[1].ncpp.has_so); PARAM.sys.nlocal = 3 * 9; - elecstate::cal_nwfc(ofs,*ucell,ucell->atoms); - EXPECT_EQ(ucell->atoms[0].iw2l[8], 2); - EXPECT_EQ(ucell->atoms[0].iw2n[8], 0); - EXPECT_EQ(ucell->atoms[0].iw2m[8], 4); - EXPECT_EQ(ucell->atoms[1].iw2l[8], 2); - EXPECT_EQ(ucell->atoms[1].iw2n[8], 0); - EXPECT_EQ(ucell->atoms[1].iw2m[8], 4); - EXPECT_EQ(ucell->atoms[1].iw2_ylm[8], 8); + elecstate::cal_nwfc (ofs, *ucell, ucell->atoms); + EXPECT_EQ (ucell->atoms[0].iw2l[8], 2); + EXPECT_EQ (ucell->atoms[0].iw2n[8], 0); + EXPECT_EQ (ucell->atoms[0].iw2m[8], 4); + EXPECT_EQ (ucell->atoms[1].iw2l[8], 2); + EXPECT_EQ (ucell->atoms[1].iw2n[8], 0); + EXPECT_EQ (ucell->atoms[1].iw2m[8], 4); + EXPECT_EQ (ucell->atoms[1].iw2_ylm[8], 8); // here is the default table for pw basis calculation // nw = 1*1 + 3*1 + 5*1 = 9 // L N m L*L+m @@ -286,300 +291,315 @@ TEST_F(UcellTest, CalNwfc1) { // 6 2 0 2 6 // 7 2 0 3 7 // 8 2 0 4 8 - EXPECT_EQ(ucell->atoms[0].na, 1); - EXPECT_EQ(ucell->atoms[1].na, 2); - EXPECT_EQ(ucell->namax, 2); - EXPECT_EQ(ucell->atoms[0].nw, 9); - EXPECT_EQ(ucell->atoms[1].nw, 9); - EXPECT_EQ(ucell->nwmax, 9); + EXPECT_EQ (ucell->atoms[0].na, 1); + EXPECT_EQ (ucell->atoms[1].na, 2); + EXPECT_EQ (ucell->namax, 2); + EXPECT_EQ (ucell->atoms[0].nw, 9); + EXPECT_EQ (ucell->atoms[1].nw, 9); + EXPECT_EQ (ucell->nwmax, 9); // check itia2iat - EXPECT_EQ(ucell->itia2iat.getSize(), 4); - EXPECT_EQ(ucell->itia2iat(0, 0), 0); - EXPECT_EQ(ucell->itia2iat(0, 1), 0); - EXPECT_EQ(ucell->itia2iat(1, 0), 1); - EXPECT_EQ(ucell->itia2iat(1, 1), 2); + EXPECT_EQ (ucell->itia2iat.getSize (), 4); + EXPECT_EQ (ucell->itia2iat (0, 0), 0); + EXPECT_EQ (ucell->itia2iat (0, 1), 0); + EXPECT_EQ (ucell->itia2iat (1, 0), 1); + EXPECT_EQ (ucell->itia2iat (1, 1), 2); // check iat2iwt - EXPECT_EQ(ucell->get_npol(), 1); - EXPECT_EQ(ucell->get_iat2iwt()[0], 0); - EXPECT_EQ(ucell->get_iat2iwt()[1], 9); - EXPECT_EQ(ucell->get_iat2iwt()[2], 18); + EXPECT_EQ (ucell->get_npol (), 1); + EXPECT_EQ (ucell->get_iat2iwt ()[0], 0); + EXPECT_EQ (ucell->get_iat2iwt ()[1], 9); + EXPECT_EQ (ucell->get_iat2iwt ()[2], 18); // check itiaiw2iwt - EXPECT_EQ(ucell->itiaiw2iwt(0, 0, 0), 0); - EXPECT_EQ(ucell->itiaiw2iwt(0, 0, 1), 1); - EXPECT_EQ(ucell->itiaiw2iwt(0, 0, 8), 8); - EXPECT_EQ(ucell->itiaiw2iwt(1, 0, 0), 9); - EXPECT_EQ(ucell->itiaiw2iwt(1, 1, 0), 18); + EXPECT_EQ (ucell->itiaiw2iwt (0, 0, 0), 0); + EXPECT_EQ (ucell->itiaiw2iwt (0, 0, 1), 1); + EXPECT_EQ (ucell->itiaiw2iwt (0, 0, 8), 8); + EXPECT_EQ (ucell->itiaiw2iwt (1, 0, 0), 9); + EXPECT_EQ (ucell->itiaiw2iwt (1, 1, 0), 18); // check itia2iat - EXPECT_EQ(ucell->itia2iat.getSize(), 4); - EXPECT_EQ(ucell->itia2iat(0, 0), 0); - EXPECT_EQ(ucell->itia2iat(0, 1), 0); - EXPECT_EQ(ucell->itia2iat(1, 0), 1); - EXPECT_EQ(ucell->itia2iat(1, 1), 2); + EXPECT_EQ (ucell->itia2iat.getSize (), 4); + EXPECT_EQ (ucell->itia2iat (0, 0), 0); + EXPECT_EQ (ucell->itia2iat (0, 1), 0); + EXPECT_EQ (ucell->itia2iat (1, 0), 1); + EXPECT_EQ (ucell->itia2iat (1, 1), 2); // check iwt2iat - EXPECT_EQ(ucell->iwt2iat[0], 0); - EXPECT_EQ(ucell->iwt2iat[10], 1); - EXPECT_EQ(ucell->iwt2iat[20], 2); + EXPECT_EQ (ucell->iwt2iat[0], 0); + EXPECT_EQ (ucell->iwt2iat[10], 1); + EXPECT_EQ (ucell->iwt2iat[20], 2); // check iwt2iw - EXPECT_EQ(ucell->iwt2iw[0], 0); - EXPECT_EQ(ucell->iwt2iw[10], 1); - EXPECT_EQ(ucell->iwt2iw[20], 2); + EXPECT_EQ (ucell->iwt2iw[0], 0); + EXPECT_EQ (ucell->iwt2iw[10], 1); + EXPECT_EQ (ucell->iwt2iw[20], 2); } -TEST_F(UcellTest, CalNwfc2) { +TEST_F (UcellTest, CalNwfc2) +{ PARAM.input.nspin = 4; PARAM.input.basis_type = "lcao"; - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - EXPECT_FALSE(ucell->atoms[0].ncpp.has_so); - EXPECT_FALSE(ucell->atoms[1].ncpp.has_so); + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + EXPECT_FALSE (ucell->atoms[0].ncpp.has_so); + EXPECT_FALSE (ucell->atoms[1].ncpp.has_so); PARAM.sys.nlocal = 3 * 9 * 2; - EXPECT_NO_THROW(elecstate::cal_nwfc(ofs,*ucell,ucell->atoms)); + EXPECT_NO_THROW (elecstate::cal_nwfc (ofs, *ucell, ucell->atoms)); } -TEST_F(UcellDeathTest, CheckStructure) { - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - EXPECT_FALSE(ucell->atoms[0].ncpp.has_so); - EXPECT_FALSE(ucell->atoms[1].ncpp.has_so); +TEST_F (UcellDeathTest, CheckStructure) +{ + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + EXPECT_FALSE (ucell->atoms[0].ncpp.has_so); + EXPECT_FALSE (ucell->atoms[1].ncpp.has_so); // trial 1 - - testing::internal::CaptureStdout(); + + testing::internal::CaptureStdout (); double factor = 0.2; - ucell->set_iat2itia(); - EXPECT_NO_THROW(unitcell::check_atomic_stru(*ucell, factor)); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("WARNING: Some atoms are too close!!!")); + ucell->set_iat2itia (); + EXPECT_NO_THROW (unitcell::check_atomic_stru (*ucell, factor)); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("WARNING: Some atoms are too close!!!")); // trial 2 - GlobalV::ofs_running.open("CheckStructure2.txt"); + GlobalV::ofs_running.open ("CheckStructure2.txt"); ::testing::FLAGS_gtest_death_test_style = "threadsafe"; factor = 0.4; - EXPECT_EXIT(unitcell::check_atomic_stru(*ucell, factor), - ::testing::ExitedWithCode(1), - ""); - std::ifstream ifs("CheckStructure2.txt"); - if (ifs.is_open()) - { - std::string line; - while (std::getline(ifs, line)) { - output+=line; + EXPECT_EXIT (unitcell::check_atomic_stru (*ucell, factor), ::testing::ExitedWithCode (1), ""); + std::ifstream ifs ("CheckStructure2.txt"); + if (ifs.is_open ()) + { + std::string line; + while (std::getline (ifs, line)) + { + output += line; + } } - } - EXPECT_THAT(output, testing::HasSubstr("The structure is unreasonable!")); - GlobalV::ofs_running.open("running.log"); + EXPECT_THAT (output, testing::HasSubstr ("The structure is unreasonable!")); + GlobalV::ofs_running.open ("running.log"); // trial 3 ucell->atoms[0].label = "arbitrary"; - testing::internal::CaptureStdout(); + testing::internal::CaptureStdout (); factor = 0.2; - EXPECT_NO_THROW(unitcell::check_atomic_stru(*ucell, factor)); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("Notice: symbol 'arbitrary' is not an element " - "symbol!!!! set the covalent radius to be 0.")); + EXPECT_NO_THROW (unitcell::check_atomic_stru (*ucell, factor)); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, + testing::HasSubstr ("Notice: symbol 'arbitrary' is not an element " + "symbol!!!! set the covalent radius to be 0.")); // trial 4 ucell->atoms[0].label = "Fe1"; - testing::internal::CaptureStdout(); + testing::internal::CaptureStdout (); factor = 0.2; - EXPECT_NO_THROW(unitcell::check_atomic_stru(*ucell, factor)); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("WARNING: Some atoms are too close!!!")); + EXPECT_NO_THROW (unitcell::check_atomic_stru (*ucell, factor)); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("WARNING: Some atoms are too close!!!")); } -TEST_F(UcellDeathTest, ReadPseudoWarning1) { +TEST_F (UcellDeathTest, ReadPseudoWarning1) +{ PARAM.input.pseudo_dir = pp_dir; PARAM.input.out_element_info = true; ucell->pseudo_fn[1] = "H_sr_lda.upf"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::read_pseudo(ofs, *ucell), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, - testing::HasSubstr("All DFT functional must consistent.")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::read_pseudo (ofs, *ucell), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("All DFT functional must consistent.")); } -TEST_F(UcellDeathTest, ReadPseudoWarning2) { +TEST_F (UcellDeathTest, ReadPseudoWarning2) +{ PARAM.input.pseudo_dir = pp_dir; PARAM.input.out_element_info = true; ucell->pseudo_fn[0] = "Al_ONCV_PBE-1.0.upf"; - testing::internal::CaptureStdout(); - EXPECT_NO_THROW(elecstate::read_pseudo(ofs, *ucell)); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT( - output, - testing::HasSubstr("Warning: the number of valence electrons in " - "pseudopotential > 3 for Al: [Ne] 3s2 3p1")); -} - -TEST_F(UcellTest, CalNelec) { - elecstate::read_cell_pseudopots(pp_dir, ofs, *ucell); - EXPECT_EQ(4, ucell->atoms[0].ncpp.zv); - EXPECT_EQ(1, ucell->atoms[1].ncpp.zv); - EXPECT_EQ(1, ucell->atoms[0].na); - EXPECT_EQ(2, ucell->atoms[1].na); + testing::internal::CaptureStdout (); + EXPECT_NO_THROW (elecstate::read_pseudo (ofs, *ucell)); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, + testing::HasSubstr ("Warning: the number of valence electrons in " + "pseudopotential > 3 for Al: [Ne] 3s2 3p1")); +} + +TEST_F (UcellTest, CalNelec) +{ + elecstate::read_cell_pseudopots (pp_dir, ofs, *ucell); + EXPECT_EQ (4, ucell->atoms[0].ncpp.zv); + EXPECT_EQ (1, ucell->atoms[1].ncpp.zv); + EXPECT_EQ (1, ucell->atoms[0].na); + EXPECT_EQ (2, ucell->atoms[1].na); double nelec = 0; - elecstate::cal_nelec(ucell->atoms, ucell->ntype, nelec); - EXPECT_DOUBLE_EQ(6, nelec); + elecstate::cal_nelec (ucell->atoms, ucell->ntype, nelec); + EXPECT_DOUBLE_EQ (6, nelec); } -TEST_F(UcellTest, CalNbands) +TEST_F (UcellTest, CalNbands) { - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 6); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 6); } -TEST_F(UcellTest, CalNbandsFractionElec) +TEST_F (UcellTest, CalNbandsFractionElec) { PARAM.input.nelec = 9.5; - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 6); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 6); } -TEST_F(UcellTest, CalNbandsSOC) +TEST_F (UcellTest, CalNbandsSOC) { PARAM.input.lspinorb = true; PARAM.input.nbands = 0; - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 20); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 20); } -TEST_F(UcellTest, CalNbandsSDFT) +TEST_F (UcellTest, CalNbandsSDFT) { PARAM.input.esolver_type = "sdft"; - std::vector nelec_spin(2, 5.0); - EXPECT_NO_THROW(elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands)); + std::vector nelec_spin (2, 5.0); + EXPECT_NO_THROW (elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands)); } -TEST_F(UcellTest, CalNbandsLCAO) +TEST_F (UcellTest, CalNbandsLCAO) { PARAM.input.basis_type = "lcao"; - std::vector nelec_spin(2, 5.0); - EXPECT_NO_THROW(elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands)); + std::vector nelec_spin (2, 5.0); + EXPECT_NO_THROW (elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands)); } -TEST_F(UcellTest, CalNbandsLCAOINPW) +TEST_F (UcellTest, CalNbandsLCAOINPW) { PARAM.input.basis_type = "lcao_in_pw"; PARAM.sys.nlocal = PARAM.input.nbands - 1; - std::vector nelec_spin(2, 5.0); - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Number of basis (NLOCAL) < Number of electronic states (NBANDS)")); + std::vector nelec_spin (2, 5.0); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Number of basis (NLOCAL) < Number of electronic states (NBANDS)")); } -TEST_F(UcellTest, CalNbandsWarning1) +TEST_F (UcellTest, CalNbandsWarning1) { PARAM.input.nbands = PARAM.input.nelec / 2 - 1; - std::vector nelec_spin(2, 5.0); - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Too few bands!")); + std::vector nelec_spin (2, 5.0); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Too few bands!")); } -TEST_F(UcellTest, CalNbandsWarning2) +TEST_F (UcellTest, CalNbandsWarning2) { PARAM.input.nspin = 2; - PARAM.input.nupdown = 4.0; - std::vector nelec_spin(2); - nelec_spin[0] = (PARAM.input.nelec + PARAM.input.nupdown ) / 2.0; - nelec_spin[1] = (PARAM.input.nelec - PARAM.input.nupdown ) / 2.0; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Too few spin up bands!")); -} - -TEST_F(UcellTest, CalNbandsWarning3) + PARAM.input.nupdown = 4.0; + std::vector nelec_spin (2); + nelec_spin[0] = (PARAM.input.nelec + PARAM.input.nupdown) / 2.0; + nelec_spin[1] = (PARAM.input.nelec - PARAM.input.nupdown) / 2.0; + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Too few spin up bands!")); +} + +TEST_F (UcellTest, CalNbandsWarning3) { PARAM.input.nspin = 2; - PARAM.input.nupdown = -4.0; - std::vector nelec_spin(2); - nelec_spin[0] = (PARAM.input.nelec + PARAM.input.nupdown ) / 2.0; - nelec_spin[1] = (PARAM.input.nelec - PARAM.input.nupdown ) / 2.0; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Too few spin down bands!")); -} - -TEST_F(UcellTest, CalNbandsSpin1) + PARAM.input.nupdown = -4.0; + std::vector nelec_spin (2); + nelec_spin[0] = (PARAM.input.nelec + PARAM.input.nupdown) / 2.0; + nelec_spin[1] = (PARAM.input.nelec - PARAM.input.nupdown) / 2.0; + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Too few spin down bands!")); +} + +TEST_F (UcellTest, CalNbandsSpin1) { PARAM.input.nspin = 1; PARAM.input.nbands = 0; - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 15); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 15); } -TEST_F(UcellTest, CalNbandsSpin1LCAO) +TEST_F (UcellTest, CalNbandsSpin1LCAO) { PARAM.input.nspin = 1; PARAM.input.nbands = 0; PARAM.input.basis_type = "lcao"; - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 6); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 6); } -TEST_F(UcellTest, CalNbandsSpin4) +TEST_F (UcellTest, CalNbandsSpin4) { PARAM.input.nspin = 4; PARAM.input.nbands = 0; - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 30); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 30); } -TEST_F(UcellTest, CalNbandsSpin4LCAO) +TEST_F (UcellTest, CalNbandsSpin4LCAO) { PARAM.input.nspin = 4; PARAM.input.nbands = 0; PARAM.input.basis_type = "lcao"; - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 6); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 6); } -TEST_F(UcellTest, CalNbandsSpin2) +TEST_F (UcellTest, CalNbandsSpin2) { PARAM.input.nspin = 2; PARAM.input.nbands = 0; - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 16); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 16); } -TEST_F(UcellTest, CalNbandsSpin2LCAO) +TEST_F (UcellTest, CalNbandsSpin2LCAO) { PARAM.input.nspin = 2; PARAM.input.nbands = 0; PARAM.input.basis_type = "lcao"; - std::vector nelec_spin(2, 5.0); - elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); - EXPECT_EQ(PARAM.input.nbands, 6); + std::vector nelec_spin (2, 5.0); + elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands); + EXPECT_EQ (PARAM.input.nbands, 6); } -TEST_F(UcellTest, CalNbandsGaussWarning) +TEST_F (UcellTest, CalNbandsGaussWarning) { PARAM.input.nbands = 5; - std::vector nelec_spin(2, 5.0); + std::vector nelec_spin (2, 5.0); PARAM.input.smearing_method = "gaussian"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::cal_nbands(PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("for smearing, num. of bands > num. of occupied bands")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::cal_nbands (PARAM.input.nelec, PARAM.sys.nlocal, nelec_spin, PARAM.input.nbands), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("for smearing, num. of bands > num. of occupied bands")); } #ifdef __MPI #include "mpi.h" -int main(int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); +int + main (int argc, char** argv) +{ + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); - int result = RUN_ALL_TESTS(); - MPI_Finalize(); + int result = RUN_ALL_TESTS (); + MPI_Finalize (); return result; } #endif diff --git a/source/source_cell/test/unitcell_test_setupcell.cpp b/source/source_cell/test/unitcell_test_setupcell.cpp index 3bb2cd7e859..352d50f6069 100644 --- a/source/source_cell/test/unitcell_test_setupcell.cpp +++ b/source/source_cell/test/unitcell_test_setupcell.cpp @@ -7,28 +7,25 @@ #include "source_base/mathzone.h" #include "source_base/global_variable.h" #include "source_cell/unitcell.h" -#include -#include +#include +#include #include #include "prepare_unitcell.h" #include "source_cell/update_cell.h" #ifdef __LCAO #include "source_basis/module_ao/ORB_read.h" -InfoNonlocal::InfoNonlocal(){} -InfoNonlocal::~InfoNonlocal(){} -LCAO_Orbitals::LCAO_Orbitals(){} -LCAO_Orbitals::~LCAO_Orbitals(){} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +LCAO_Orbitals::LCAO_Orbitals () {} +LCAO_Orbitals::~LCAO_Orbitals () {} #endif -Magnetism::Magnetism() +Magnetism::Magnetism () { - this->tot_mag = 0.0; - this->abs_mag = 0.0; - this->start_mag = nullptr; -} -Magnetism::~Magnetism() -{ - delete[] this->start_mag; + this->tot_mag = 0.0; + this->abs_mag = 0.0; + this->start_mag = nullptr; } +Magnetism::~Magnetism () { delete[] this->start_mag; } /************************************************ * unit test of class UnitCell @@ -50,166 +47,167 @@ Magnetism::~Magnetism() * - setup_cell_after_vc */ -//mock function +// mock function #ifdef __LCAO -void LCAO_Orbitals::bcast_files( - const int &ntype_in, - const int &my_rank) +void + LCAO_Orbitals::bcast_files (const int& ntype_in, const int& my_rank) { - return; + return; } class UcellTest : public ::testing::Test { -protected: - std::unique_ptr ucell{new UnitCell}; - std::string output; - void SetUp() + protected: + std::unique_ptr ucell{new UnitCell}; + std::string output; + void + SetUp () { - ucell->lmaxmax = 2; - ucell->ntype = 2; - ucell->atom_mass.resize(ucell->ntype); - ucell->atom_label.resize(ucell->ntype); - ucell->pseudo_fn.resize(ucell->ntype); - ucell->pseudo_type.resize(ucell->ntype); - ucell->orbital_fn.resize(ucell->ntype); + ucell->lmaxmax = 2; + ucell->ntype = 2; + ucell->atom_mass.resize (ucell->ntype); + ucell->atom_label.resize (ucell->ntype); + ucell->pseudo_fn.resize (ucell->ntype); + ucell->pseudo_type.resize (ucell->ntype); + ucell->orbital_fn.resize (ucell->ntype); } }; using UcellDeathTest = UcellTest; -TEST_F(UcellTest,SetupCellS1) +TEST_F (UcellTest, SetupCellS1) { - std::string fn = "./support/STRU_MgO"; - std::ofstream ofs_running; - ofs_running.open("setup_cell.tmp"); - PARAM.input.nspin = 1; - - ucell->setup_cell(fn,ofs_running); - ofs_running.close(); - remove("setup_cell.tmp"); + std::string fn = "./support/STRU_MgO"; + std::ofstream ofs_running; + ofs_running.open ("setup_cell.tmp"); + PARAM.input.nspin = 1; + + ucell->setup_cell (fn, ofs_running); + ofs_running.close (); + remove ("setup_cell.tmp"); } -TEST_F(UcellTest,SetupCellS2) +TEST_F (UcellTest, SetupCellS2) { - std::string fn = "./support/STRU_MgO"; - std::ofstream ofs_running; - ofs_running.open("setup_cell.tmp"); - PARAM.input.nspin = 2; - - ucell->setup_cell(fn,ofs_running); - ofs_running.close(); - remove("setup_cell.tmp"); + std::string fn = "./support/STRU_MgO"; + std::ofstream ofs_running; + ofs_running.open ("setup_cell.tmp"); + PARAM.input.nspin = 2; + + ucell->setup_cell (fn, ofs_running); + ofs_running.close (); + remove ("setup_cell.tmp"); } -TEST_F(UcellTest,SetupCellS4) +TEST_F (UcellTest, SetupCellS4) { - std::string fn = "./support/STRU_MgO"; - std::ofstream ofs_running; - ofs_running.open("setup_cell.tmp"); - PARAM.input.nspin = 4; - - ucell->setup_cell(fn,ofs_running); - ofs_running.close(); - remove("setup_cell.tmp"); + std::string fn = "./support/STRU_MgO"; + std::ofstream ofs_running; + ofs_running.open ("setup_cell.tmp"); + PARAM.input.nspin = 4; + + ucell->setup_cell (fn, ofs_running); + ofs_running.close (); + remove ("setup_cell.tmp"); } -TEST_F(UcellDeathTest,SetupCellWarning1) +TEST_F (UcellDeathTest, SetupCellWarning1) { - std::string fn = "./STRU_MgO"; - std::ofstream ofs_running; - ofs_running.open("setup_cell.tmp"); - - testing::internal::CaptureStdout(); - EXPECT_EXIT(ucell->setup_cell(fn,ofs_running),::testing::ExitedWithCode(1),""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("Can not find the file containing atom positions.!")); - ofs_running.close(); - remove("setup_cell.tmp"); + std::string fn = "./STRU_MgO"; + std::ofstream ofs_running; + ofs_running.open ("setup_cell.tmp"); + + testing::internal::CaptureStdout (); + EXPECT_EXIT (ucell->setup_cell (fn, ofs_running), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Can not find the file containing atom positions.!")); + ofs_running.close (); + remove ("setup_cell.tmp"); } -TEST_F(UcellDeathTest,SetupCellWarning2) +TEST_F (UcellDeathTest, SetupCellWarning2) { - std::string fn = "./support/STRU_MgO_WarningC2"; - std::ofstream ofs_running; - ofs_running.open("setup_cell.tmp"); - - testing::internal::CaptureStdout(); - EXPECT_EXIT(ucell->setup_cell(fn,ofs_running),::testing::ExitedWithCode(1),""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("Something wrong during read_atom_positions")); - ofs_running.close(); - remove("setup_cell.tmp"); + std::string fn = "./support/STRU_MgO_WarningC2"; + std::ofstream ofs_running; + ofs_running.open ("setup_cell.tmp"); + + testing::internal::CaptureStdout (); + EXPECT_EXIT (ucell->setup_cell (fn, ofs_running), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Something wrong during read_atom_positions")); + ofs_running.close (); + remove ("setup_cell.tmp"); } -TEST_F(UcellTest,SetupCellAfterVC) +TEST_F (UcellTest, SetupCellAfterVC) { - std::string fn = "./support/STRU_MgO"; - std::ofstream ofs_running; - ofs_running.open("setup_cell.tmp"); - PARAM.input.nspin = 1; - - delete[] ucell->magnet.start_mag; - ucell->magnet.start_mag = new double[ucell->ntype]; - - - ucell->setup_cell(fn,ofs_running); - ucell->lat0 = 1.0; - ucell->latvec.Zero(); - ucell->latvec.e11 = 10.0; - ucell->latvec.e22 = 10.0; - ucell->latvec.e33 = 10.0; - for (int i =0;intype;i++) - { - ucell->atoms[i].na = 1; - ucell->atoms[i].taud.resize(ucell->atoms[i].na); - ucell->atoms[i].tau.resize(ucell->atoms[i].na); - ucell->atoms[i].taud[0].x = 0.1; - ucell->atoms[i].taud[0].y = 0.1; - ucell->atoms[i].taud[0].z = 0.1; - } - - unitcell::setup_cell_after_vc(*ucell,ofs_running); - EXPECT_EQ(ucell->lat0_angstrom,0.529177); - EXPECT_EQ(ucell->tpiba,ModuleBase::TWO_PI); - EXPECT_EQ(ucell->tpiba2,ModuleBase::TWO_PI*ModuleBase::TWO_PI); - EXPECT_EQ(ucell->a1.x ,10.0); - EXPECT_EQ(ucell->a2.y ,10.0); - EXPECT_EQ(ucell->a3.z ,10.0); - EXPECT_EQ(ucell->omega,1000.0); - EXPECT_EQ(ucell->GT.e11,0.1); - EXPECT_EQ(ucell->GT.e22,0.1); - EXPECT_EQ(ucell->GT.e33,0.1); - EXPECT_EQ(ucell->G.e11,0.1); - EXPECT_EQ(ucell->G.e22,0.1); - EXPECT_EQ(ucell->G.e33,0.1); - - for (int it = 0; it < ucell->ntype; it++) { - Atom* atom = &ucell->atoms[it]; - for (int ia = 0; ia < atom->na; ia++) { - EXPECT_EQ(atom->tau[ia].x,1); - EXPECT_EQ(atom->tau[ia].y,1); - EXPECT_EQ(atom->tau[ia].z,1); + std::string fn = "./support/STRU_MgO"; + std::ofstream ofs_running; + ofs_running.open ("setup_cell.tmp"); + PARAM.input.nspin = 1; + + delete[] ucell->magnet.start_mag; + ucell->magnet.start_mag = new double[ucell->ntype]; + + ucell->setup_cell (fn, ofs_running); + ucell->lat0 = 1.0; + ucell->latvec.Zero (); + ucell->latvec.e11 = 10.0; + ucell->latvec.e22 = 10.0; + ucell->latvec.e33 = 10.0; + for (int i = 0; i < ucell->ntype; i++) + { + ucell->atoms[i].na = 1; + ucell->atoms[i].taud.resize (ucell->atoms[i].na); + ucell->atoms[i].tau.resize (ucell->atoms[i].na); + ucell->atoms[i].taud[0].x = 0.1; + ucell->atoms[i].taud[0].y = 0.1; + ucell->atoms[i].taud[0].z = 0.1; } - } - ofs_running.close(); - remove("setup_cell.tmp"); -} + unitcell::setup_cell_after_vc (*ucell, ofs_running); + EXPECT_EQ (ucell->lat0_angstrom, 0.529177); + EXPECT_EQ (ucell->tpiba, ModuleBase::TWO_PI); + EXPECT_EQ (ucell->tpiba2, ModuleBase::TWO_PI * ModuleBase::TWO_PI); + EXPECT_EQ (ucell->a1.x, 10.0); + EXPECT_EQ (ucell->a2.y, 10.0); + EXPECT_EQ (ucell->a3.z, 10.0); + EXPECT_EQ (ucell->omega, 1000.0); + EXPECT_EQ (ucell->GT.e11, 0.1); + EXPECT_EQ (ucell->GT.e22, 0.1); + EXPECT_EQ (ucell->GT.e33, 0.1); + EXPECT_EQ (ucell->G.e11, 0.1); + EXPECT_EQ (ucell->G.e22, 0.1); + EXPECT_EQ (ucell->G.e33, 0.1); + + for (int it = 0; it < ucell->ntype; it++) + { + Atom* atom = &ucell->atoms[it]; + for (int ia = 0; ia < atom->na; ia++) + { + EXPECT_EQ (atom->tau[ia].x, 1); + EXPECT_EQ (atom->tau[ia].y, 1); + EXPECT_EQ (atom->tau[ia].z, 1); + } + } + ofs_running.close (); + remove ("setup_cell.tmp"); +} #ifdef __MPI #include "mpi.h" -int main(int argc, char **argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); - int result = RUN_ALL_TESTS(); - MPI_Finalize(); - return result; + int result = RUN_ALL_TESTS (); + MPI_Finalize (); + return result; } #endif #endif diff --git a/source/source_cell/test_pw/unitcell_test_pw.cpp b/source/source_cell/test_pw/unitcell_test_pw.cpp index 52d52236a73..3d0e4a512aa 100644 --- a/source/source_cell/test_pw/unitcell_test_pw.cpp +++ b/source/source_cell/test_pw/unitcell_test_pw.cpp @@ -8,19 +8,16 @@ #include "source_base/global_variable.h" #include "source_cell/unitcell.h" #include "source_cell/read_stru.h" -#include -#include +#include +#include -Magnetism::Magnetism() +Magnetism::Magnetism () { - this->tot_mag = 0.0; - this->abs_mag = 0.0; - this->start_mag = nullptr; -} -Magnetism::~Magnetism() -{ - delete[] this->start_mag; + this->tot_mag = 0.0; + this->abs_mag = 0.0; + this->start_mag = nullptr; } +Magnetism::~Magnetism () { delete[] this->start_mag; } /************************************************ * unit test of class UnitCell @@ -38,107 +35,109 @@ Magnetism::~Magnetism() class UcellTest : public ::testing::Test { -protected: - std::unique_ptr ucell{new UnitCell}; - std::string output; - void SetUp() + protected: + std::unique_ptr ucell{new UnitCell}; + std::string output; + void + SetUp () { - ucell->lmaxmax = 2; - ucell->ntype = 2; - ucell->atom_mass.resize(ucell->ntype); - ucell->atom_label.resize(ucell->ntype); - ucell->pseudo_fn.resize(ucell->ntype); - ucell->pseudo_type.resize(ucell->ntype); - ucell->orbital_fn.resize(ucell->ntype); + ucell->lmaxmax = 2; + ucell->ntype = 2; + ucell->atom_mass.resize (ucell->ntype); + ucell->atom_label.resize (ucell->ntype); + ucell->pseudo_fn.resize (ucell->ntype); + ucell->pseudo_type.resize (ucell->ntype); + ucell->orbital_fn.resize (ucell->ntype); } }; -TEST_F(UcellTest,ReadAtomSpecies) +TEST_F (UcellTest, ReadAtomSpecies) { #ifdef __MPI -if(GlobalV::MY_RANK==0) -{ + if (GlobalV::MY_RANK == 0) + { #endif - std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); - std::ofstream ofs_running; - ofs_running.open("read_atom_species.tmp"); - ucell->atoms = new Atom[ucell->ntype]; - ucell->set_atom_flag = true; - PARAM.input.test_pseudo_cell = 2; - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11,4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22,4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33,4.27957); - ofs_running.close(); - ifa.close(); - remove("read_atom_species.tmp"); + std::string fn = "./support/STRU_MgO"; + std::ifstream ifa (fn.c_str ()); + std::ofstream ofs_running; + ofs_running.open ("read_atom_species.tmp"); + ucell->atoms = new Atom[ucell->ntype]; + ucell->set_atom_flag = true; + PARAM.input.test_pseudo_cell = 2; + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); + ofs_running.close (); + ifa.close (); + remove ("read_atom_species.tmp"); #ifdef __MPI -} + } #endif } -TEST_F(UcellTest,ReadAtomPositions) +TEST_F (UcellTest, ReadAtomPositions) { #ifdef __MPI -if(GlobalV::MY_RANK==0) -{ + if (GlobalV::MY_RANK == 0) + { #endif - std::string fn = "./support/STRU_MgO"; - std::ifstream ifa(fn.c_str()); - std::ofstream ofs_running; - std::ofstream ofs_warning; - ofs_running.open("read_atom_species.tmp"); - ofs_warning.open("read_atom_species.warn"); - ucell->atoms = new Atom[ucell->ntype]; - ucell->set_atom_flag = true; - PARAM.input.test_pseudo_cell = 2; - PARAM.input.basis_type = "pw"; - //call read_atom_species - EXPECT_NO_THROW(unitcell::read_atom_species(ifa, ofs_running,*ucell)); - EXPECT_NO_THROW(unitcell::read_lattice_constant(ifa, ofs_running,ucell->lat)); - EXPECT_DOUBLE_EQ(ucell->latvec.e11,4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e22,4.27957); - EXPECT_DOUBLE_EQ(ucell->latvec.e33,4.27957); - //mandatory preliminaries - delete[] ucell->magnet.start_mag; - ucell->magnet.start_mag = new double[ucell->ntype]; - //call read_atom_positions - EXPECT_NO_THROW(unitcell::read_atom_positions(*ucell,ifa, ofs_running, ofs_warning)); - ofs_running.close(); - ofs_warning.close(); - ifa.close(); - remove("read_atom_species.tmp"); - remove("read_atom_species.warn"); + std::string fn = "./support/STRU_MgO"; + std::ifstream ifa (fn.c_str ()); + std::ofstream ofs_running; + std::ofstream ofs_warning; + ofs_running.open ("read_atom_species.tmp"); + ofs_warning.open ("read_atom_species.warn"); + ucell->atoms = new Atom[ucell->ntype]; + ucell->set_atom_flag = true; + PARAM.input.test_pseudo_cell = 2; + PARAM.input.basis_type = "pw"; + // call read_atom_species + EXPECT_NO_THROW (unitcell::read_atom_species (ifa, ofs_running, *ucell)); + EXPECT_NO_THROW (unitcell::read_lattice_constant (ifa, ofs_running, ucell->lat)); + EXPECT_DOUBLE_EQ (ucell->latvec.e11, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e22, 4.27957); + EXPECT_DOUBLE_EQ (ucell->latvec.e33, 4.27957); + // mandatory preliminaries + delete[] ucell->magnet.start_mag; + ucell->magnet.start_mag = new double[ucell->ntype]; + // call read_atom_positions + EXPECT_NO_THROW (unitcell::read_atom_positions (*ucell, ifa, ofs_running, ofs_warning)); + ofs_running.close (); + ofs_warning.close (); + ifa.close (); + remove ("read_atom_species.tmp"); + remove ("read_atom_species.warn"); #ifdef __MPI -} + } #endif } -TEST_F(UcellTest,SetupCell) +TEST_F (UcellTest, SetupCell) { - std::string fn = "./support/STRU_MgO"; - std::ofstream ofs_running; - ofs_running.open("setup_cell.tmp"); - PARAM.input.nspin = 1; - ucell->setup_cell(fn,ofs_running); - ofs_running.close(); - remove("setup_cell.tmp"); + std::string fn = "./support/STRU_MgO"; + std::ofstream ofs_running; + ofs_running.open ("setup_cell.tmp"); + PARAM.input.nspin = 1; + ucell->setup_cell (fn, ofs_running); + ofs_running.close (); + remove ("setup_cell.tmp"); } #ifdef __MPI #include "mpi.h" -int main(int argc, char **argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - testing::InitGoogleTest(&argc, argv); + MPI_Init (&argc, &argv); + testing::InitGoogleTest (&argc, argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); - int result = RUN_ALL_TESTS(); - MPI_Finalize(); - return result; + int result = RUN_ALL_TESTS (); + MPI_Finalize (); + return result; } #endif diff --git a/source/source_cell/unitcell.cpp b/source/source_cell/unitcell.cpp index f2c15102748..98a3359982f 100644 --- a/source/source_cell/unitcell.cpp +++ b/source/source_cell/unitcell.cpp @@ -26,170 +26,196 @@ #endif #include "update_cell.h" -UnitCell::UnitCell() -{ - itia2iat.create(1, 1); -} +UnitCell::UnitCell () { itia2iat.create (1, 1); } -UnitCell::~UnitCell() +UnitCell::~UnitCell () { if (set_atom_flag) - { - delete[] atoms; - } + { + delete[] atoms; + } } +void + UnitCell::print_cell (std::ofstream& ofs) const +{ -void UnitCell::print_cell(std::ofstream& ofs) const { - - ModuleBase::GlobalFunc::OUT(ofs, "print_unitcell()"); + ModuleBase::GlobalFunc::OUT (ofs, "print_unitcell()"); - ModuleBase::GlobalFunc::OUT(ofs, "latName", latName); - ModuleBase::GlobalFunc::OUT(ofs, "ntype", ntype); - ModuleBase::GlobalFunc::OUT(ofs, "nat", nat); - ModuleBase::GlobalFunc::OUT(ofs, "lat0", lat0); - ModuleBase::GlobalFunc::OUT(ofs, "lat0_angstrom", lat0_angstrom); - ModuleBase::GlobalFunc::OUT(ofs, "tpiba", tpiba); - ModuleBase::GlobalFunc::OUT(ofs, "omega", omega); + ModuleBase::GlobalFunc::OUT (ofs, "latName", latName); + ModuleBase::GlobalFunc::OUT (ofs, "ntype", ntype); + ModuleBase::GlobalFunc::OUT (ofs, "nat", nat); + ModuleBase::GlobalFunc::OUT (ofs, "lat0", lat0); + ModuleBase::GlobalFunc::OUT (ofs, "lat0_angstrom", lat0_angstrom); + ModuleBase::GlobalFunc::OUT (ofs, "tpiba", tpiba); + ModuleBase::GlobalFunc::OUT (ofs, "omega", omega); - output::printM3(ofs, "Lattices Vector (R) : ", latvec); - output::printM3(ofs, "Supercell lattice vector : ", latvec_supercell); - output::printM3(ofs, "Reciprocal lattice Vector (G): ", G); - output::printM3(ofs, "GGT : ", GGT); + output::printM3 (ofs, "Lattices Vector (R) : ", latvec); + output::printM3 (ofs, "Supercell lattice vector : ", latvec_supercell); + output::printM3 (ofs, "Reciprocal lattice Vector (G): ", G); + output::printM3 (ofs, "GGT : ", GGT); ofs << std::endl; return; } - -void UnitCell::set_iat2itia() { - assert(nat > 0); +void + UnitCell::set_iat2itia () +{ + assert (nat > 0); delete[] iat2it; delete[] iat2ia; this->iat2it = new int[nat]; this->iat2ia = new int[nat]; int iat = 0; - for (int it = 0; it < ntype; it++) { - for (int ia = 0; ia < atoms[it].na; ia++) { - this->iat2it[iat] = it; - this->iat2ia[iat] = ia; - ++iat; + for (int it = 0; it < ntype; it++) + { + for (int ia = 0; ia < atoms[it].na; ia++) + { + this->iat2it[iat] = it; + this->iat2ia[iat] = ia; + ++iat; + } } - } return; } -std::map UnitCell::get_atom_Counts() const { +std::map + UnitCell::get_atom_Counts () const +{ std::map atomCounts; - for (int it = 0; it < this->ntype; it++) { - atomCounts.insert(std::pair(it, this->atoms[it].na)); - } + for (int it = 0; it < this->ntype; it++) + { + atomCounts.insert (std::pair (it, this->atoms[it].na)); + } return atomCounts; } -std::map UnitCell::get_orbital_Counts() const { +std::map + UnitCell::get_orbital_Counts () const +{ std::map orbitalCounts; - for (int it = 0; it < this->ntype; it++) { - orbitalCounts.insert(std::pair(it, this->atoms[it].nw)); - } + for (int it = 0; it < this->ntype; it++) + { + orbitalCounts.insert (std::pair (it, this->atoms[it].nw)); + } return orbitalCounts; } -std::map> UnitCell::get_lnchi_Counts() const { +std::map> + UnitCell::get_lnchi_Counts () const +{ std::map> lnchiCounts; - for (int it = 0; it < this->ntype; it++) { - for (int L = 0; L < this->atoms[it].nwl + 1; L++) { - // Check if the key 'it' exists in the outer map - if (lnchiCounts.find(it) == lnchiCounts.end()) { - // If it doesn't exist, initialize an empty inner map - lnchiCounts[it] = std::map(); - } - int l_nchi = this->atoms[it].l_nchi[L]; - // Insert the key-value pair into the inner map - lnchiCounts[it].insert(std::pair(L, l_nchi)); - } - } + for (int it = 0; it < this->ntype; it++) + { + for (int L = 0; L < this->atoms[it].nwl + 1; L++) + { + // Check if the key 'it' exists in the outer map + if (lnchiCounts.find (it) == lnchiCounts.end ()) + { + // If it doesn't exist, initialize an empty inner map + lnchiCounts[it] = std::map (); + } + int l_nchi = this->atoms[it].l_nchi[L]; + // Insert the key-value pair into the inner map + lnchiCounts[it].insert (std::pair (L, l_nchi)); + } + } return lnchiCounts; } -std::vector UnitCell::get_atomLabels() const { - std::vector atomLabels(this->ntype); - for (int it = 0; it < this->ntype; it++) { - atomLabels[it] = this->atoms[it].label; - } +std::vector + UnitCell::get_atomLabels () const +{ + std::vector atomLabels (this->ntype); + for (int it = 0; it < this->ntype; it++) + { + atomLabels[it] = this->atoms[it].label; + } return atomLabels; } -std::vector UnitCell::get_atomCounts() const { - std::vector atomCounts(this->ntype); - for (int it = 0; it < this->ntype; it++) { - atomCounts[it] = this->atoms[it].na; - } +std::vector + UnitCell::get_atomCounts () const +{ + std::vector atomCounts (this->ntype); + for (int it = 0; it < this->ntype; it++) + { + atomCounts[it] = this->atoms[it].na; + } return atomCounts; } -std::vector> UnitCell::get_lnchiCounts() const { - std::vector> lnchiCounts(this->ntype); - for (int it = 0; it < this->ntype; it++) { - lnchiCounts[it].resize(this->atoms[it].nwl + 1); - for (int L = 0; L < this->atoms[it].nwl + 1; L++) { - lnchiCounts[it][L] = this->atoms[it].l_nchi[L]; +std::vector> + UnitCell::get_lnchiCounts () const +{ + std::vector> lnchiCounts (this->ntype); + for (int it = 0; it < this->ntype; it++) + { + lnchiCounts[it].resize (this->atoms[it].nwl + 1); + for (int L = 0; L < this->atoms[it].nwl + 1; L++) + { + lnchiCounts[it][L] = this->atoms[it].l_nchi[L]; + } } - } return lnchiCounts; } -std::vector> UnitCell::get_target_mag() const +std::vector> + UnitCell::get_target_mag () const { - std::vector> target_mag(this->nat); - for (int it = 0; it < this->ntype; it++) - { - for (int ia = 0; ia < this->atoms[it].na; ia++) - { - int iat = itia2iat(it, ia); - target_mag[iat] = this->atoms[it].m_loc_[ia]; - } - } - return target_mag; + std::vector> target_mag (this->nat); + for (int it = 0; it < this->ntype; it++) + { + for (int ia = 0; ia < this->atoms[it].na; ia++) + { + int iat = itia2iat (it, ia); + target_mag[iat] = this->atoms[it].m_loc_[ia]; + } + } + return target_mag; } -std::vector> UnitCell::get_lambda() const +std::vector> + UnitCell::get_lambda () const { - std::vector> lambda(this->nat); - for (int it = 0; it < this->ntype; it++) - { - for (int ia = 0; ia < this->atoms[it].na; ia++) - { - int iat = itia2iat(it, ia); - lambda[iat] = this->atoms[it].lambda[ia]; - } - } - return lambda; + std::vector> lambda (this->nat); + for (int it = 0; it < this->ntype; it++) + { + for (int ia = 0; ia < this->atoms[it].na; ia++) + { + int iat = itia2iat (it, ia); + lambda[iat] = this->atoms[it].lambda[ia]; + } + } + return lambda; } -std::vector> UnitCell::get_constrain() const +std::vector> + UnitCell::get_constrain () const { - std::vector> constrain(this->nat); - for (int it = 0; it < this->ntype; it++) - { - for (int ia = 0; ia < this->atoms[it].na; ia++) - { - int iat = itia2iat(it, ia); - constrain[iat] = this->atoms[it].constrain[ia]; - } - } - return constrain; + std::vector> constrain (this->nat); + for (int it = 0; it < this->ntype; it++) + { + for (int ia = 0; ia < this->atoms[it].na; ia++) + { + int iat = itia2iat (it, ia); + constrain[iat] = this->atoms[it].constrain[ia]; + } + } + return constrain; } //============================================================== // Calculate various lattice related quantities for given latvec //============================================================== -void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) +void + UnitCell::setup_cell (const std::string& fn, std::ofstream& log) { - ModuleBase::TITLE("UnitCell", "setup_cell"); + ModuleBase::TITLE ("UnitCell", "setup_cell"); // (1) init mag - assert(ntype > 0); + assert (ntype > 0); delete[] magnet.start_mag; magnet.start_mag = new double[this->ntype]; @@ -206,87 +232,92 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) bool ok3 = true; // for sep potential in DFT-1/2 // (3) read in atom information - this->atom_mass.resize(ntype); - this->atom_label.resize(ntype); - this->pseudo_fn.resize(ntype); - this->pseudo_type.resize(ntype); - this->orbital_fn.resize(ntype); + this->atom_mass.resize (ntype); + this->atom_label.resize (ntype); + this->pseudo_fn.resize (ntype); + this->pseudo_type.resize (ntype); + this->orbital_fn.resize (ntype); if (GlobalV::MY_RANK == 0) - { - // open "atom_unitcell" file. - std::ifstream ifa(fn.c_str(), std::ios::in); - if (!ifa) - { - GlobalV::ofs_warning << fn; - ok = false; - } - - if (ok) - { - log << "\n\n"; - log << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; - log << " | |" << std::endl; - log << " | #Setup Unitcell# |" << std::endl; - log << " | From the input file and the structure file we know the number of |" << std::endl; - log << " | different elments in this unitcell, then we list the detail |" << std::endl; - log << " | information for each element, especially the zeta and polar atomic |" << std::endl; - log << " | orbital number for each element. The total atom number is counted. |" << std::endl; - log << " | We calculate the nearest atom distance for each atom and show the |" << std::endl; - log << " | Cartesian and Direct coordinates for each atom. We list the file |" << std::endl; - log << " | address for atomic orbitals. The volume and the lattice vectors |" << std::endl; - log << " | in real and reciprocal space is also shown. |" << std::endl; - log << " | |" << std::endl; - log << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; - log << "\n"; - - log << " READING UNITCELL INFORMATION" << std::endl; - //======================== - // call read_atom_species - //======================== - const bool read_atom_species = unitcell::read_atom_species(ifa, log ,*this); - //======================== - // call read_lattice_constant - //======================== - const bool read_lattice_constant = unitcell::read_lattice_constant(ifa, log ,this->lat); - //========================== - // readl sep potential, currently using the pseudopotential folder (pseudo_dir in INPUT) - //========================== - if (PARAM.inp.dfthalf_type > 0) { - // GlobalC::sep_cell.init(this->ntype); - // ok3 = GlobalC::sep_cell.read_sep_potentials(ifa, PARAM.inp.pseudo_dir, GlobalV::ofs_warning, this->atom_label); - - sep_cell.init(this->ntype); - ok3 = sep_cell.read_sep_potentials(ifa, PARAM.inp.pseudo_dir, GlobalV::ofs_warning, this->atom_label); - } - //========================== - // call read_atom_positions - //========================== - ok2 = unitcell::read_atom_positions(*this, ifa, log, GlobalV::ofs_warning); - } - } + { + // open "atom_unitcell" file. + std::ifstream ifa (fn.c_str (), std::ios::in); + if (!ifa) + { + GlobalV::ofs_warning << fn; + ok = false; + } + + if (ok) + { + log << "\n\n"; + log << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; + log << " | |" << std::endl; + log << " | #Setup Unitcell# |" << std::endl; + log << " | From the input file and the structure file we know the number of |" << std::endl; + log << " | different elments in this unitcell, then we list the detail |" << std::endl; + log << " | information for each element, especially the zeta and polar atomic |" << std::endl; + log << " | orbital number for each element. The total atom number is counted. |" << std::endl; + log << " | We calculate the nearest atom distance for each atom and show the |" << std::endl; + log << " | Cartesian and Direct coordinates for each atom. We list the file |" << std::endl; + log << " | address for atomic orbitals. The volume and the lattice vectors |" << std::endl; + log << " | in real and reciprocal space is also shown. |" << std::endl; + log << " | |" << std::endl; + log << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; + log << "\n"; + + log << " READING UNITCELL INFORMATION" << std::endl; + //======================== + // call read_atom_species + //======================== + const bool read_atom_species = unitcell::read_atom_species (ifa, log, *this); + //======================== + // call read_lattice_constant + //======================== + const bool read_lattice_constant = unitcell::read_lattice_constant (ifa, log, this->lat); + //========================== + // readl sep potential, currently using the pseudopotential folder (pseudo_dir in INPUT) + //========================== + if (PARAM.inp.dfthalf_type > 0) + { + // GlobalC::sep_cell.init(this->ntype); + // ok3 = GlobalC::sep_cell.read_sep_potentials(ifa, PARAM.inp.pseudo_dir, + // GlobalV::ofs_warning, this->atom_label); + + sep_cell.init (this->ntype); + ok3 = sep_cell.read_sep_potentials (ifa, + PARAM.inp.pseudo_dir, + GlobalV::ofs_warning, + this->atom_label); + } + //========================== + // call read_atom_positions + //========================== + ok2 = unitcell::read_atom_positions (*this, ifa, log, GlobalV::ofs_warning); + } + } #ifdef __MPI - Parallel_Common::bcast_bool(ok); - Parallel_Common::bcast_bool(ok2); - Parallel_Common::bcast_bool(ok3); + Parallel_Common::bcast_bool (ok); + Parallel_Common::bcast_bool (ok2); + Parallel_Common::bcast_bool (ok3); #endif - if (!ok) { - ModuleBase::WARNING_QUIT( - "UnitCell::setup_cell", - "Can not find the file containing atom positions.!"); - } - if (!ok2) { - ModuleBase::WARNING_QUIT("UnitCell::setup_cell", - "Something wrong during read_atom_positions."); - } - if (!ok3) { - ModuleBase::WARNING_QUIT("UnitCell::setup_cell", "Something wrong during read_sep_potentials"); - } + if (!ok) + { + ModuleBase::WARNING_QUIT ("UnitCell::setup_cell", "Can not find the file containing atom positions.!"); + } + if (!ok2) + { + ModuleBase::WARNING_QUIT ("UnitCell::setup_cell", "Something wrong during read_atom_positions."); + } + if (!ok3) + { + ModuleBase::WARNING_QUIT ("UnitCell::setup_cell", "Something wrong during read_sep_potentials"); + } #ifdef __MPI - unitcell::bcast_unitcell(*this); + unitcell::bcast_unitcell (*this); // GlobalC::sep_cell.bcast_sep_cell(); - sep_cell.bcast_sep_cell(); + sep_cell.bcast_sep_cell (); #endif //======================================================== @@ -294,251 +325,252 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) // the reason to calculate volume here is // Firstly, latvec must be read in. //======================================================== - assert(lat0 > 0.0); - this->omega = latvec.Det() * this->lat0 * this->lat0 * this->lat0; - + assert (lat0 > 0.0); + this->omega = latvec.Det () * this->lat0 * this->lat0 * this->lat0; if (this->omega < 0) - { - std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - std::cout << " Warning: The lattice vector is left-handed; a right-handed vector is prefered." << std::endl; - std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - GlobalV::ofs_warning << - "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - GlobalV::ofs_warning << - " Warning: The lattice vector is left-handed; a right-handed vector is prefered." << std::endl; - GlobalV::ofs_warning << - "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - this->omega = std::abs(this->omega); - } + { + std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + std::cout << " Warning: The lattice vector is left-handed; a right-handed vector is prefered." << std::endl; + std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; + GlobalV::ofs_warning << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + << std::endl; + GlobalV::ofs_warning << " Warning: The lattice vector is left-handed; a right-handed vector is prefered." + << std::endl; + GlobalV::ofs_warning << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + << std::endl; + this->omega = std::abs (this->omega); + } else if (this->omega == 0) - { - ModuleBase::WARNING_QUIT("setup_cell", "The volume is zero."); - } + { + ModuleBase::WARNING_QUIT ("setup_cell", "The volume is zero."); + } else - { - ModuleBase::GlobalFunc::OUT(log, "Cell volume (Bohr^3)", this->omega); - ModuleBase::GlobalFunc::OUT(log, "Cell volume (A^3)", this->omega * pow(ModuleBase::BOHR_TO_A, 3)); - } + { + ModuleBase::GlobalFunc::OUT (log, "Cell volume (Bohr^3)", this->omega); + ModuleBase::GlobalFunc::OUT (log, "Cell volume (A^3)", this->omega * pow (ModuleBase::BOHR_TO_A, 3)); + } //========================================================== // Calculate recip. lattice vectors and dot products // latvec have the unit of lat0, but G has the unit 2Pi/lat0 //========================================================== - this->GT = latvec.Inverse(); - this->G = GT.Transpose(); + this->GT = latvec.Inverse (); + this->G = GT.Transpose (); this->GGT = G * GT; - this->invGGT = GGT.Inverse(); + this->invGGT = GGT.Inverse (); // LiuXh add 20180515 - this->GT0 = latvec.Inverse(); - this->G0 = GT.Transpose(); + this->GT0 = latvec.Inverse (); + this->G0 = GT.Transpose (); this->GGT0 = G * GT; - this->invGGT0 = GGT.Inverse(); + this->invGGT0 = GGT.Inverse (); log << std::endl; - output::printM3(log, - "Lattice vectors: (Cartesian coordinate: in unit of a_0)", - latvec); - output::printM3( - log, - "Reciprocal vectors: (Cartesian coordinate: in unit of 2 pi/a_0)", - G); + output::printM3 (log, "Lattice vectors: (Cartesian coordinate: in unit of a_0)", latvec); + output::printM3 (log, "Reciprocal vectors: (Cartesian coordinate: in unit of 2 pi/a_0)", G); //=================================== // set index for iat2it, iat2ia //=================================== - this->set_iat2itia(); + this->set_iat2itia (); // GlobalC::sep_cell.set_omega(this->omega, this->tpiba2); - sep_cell.set_omega(this->omega, this->tpiba2); + sep_cell.set_omega (this->omega, this->tpiba2); return; } - -void UnitCell::set_iat2iwt(const int& npol_in) +void + UnitCell::set_iat2iwt (const int& npol_in) { #ifdef __DEBUG - assert(npol_in == 1 || npol_in == 2); - assert(this->nat > 0); - assert(this->ntype > 0); + assert (npol_in == 1 || npol_in == 2); + assert (this->nat > 0); + assert (this->ntype > 0); #endif - this->iat2iwt.resize(this->nat); + this->iat2iwt.resize (this->nat); this->npol = npol_in; int iat = 0; int iwt = 0; for (int it = 0; it < this->ntype; it++) - { - for (int ia = 0; ia < atoms[it].na; ia++) { - this->iat2iwt[iat] = iwt; - iwt += atoms[it].nw * this->npol; - ++iat; + for (int ia = 0; ia < atoms[it].na; ia++) + { + this->iat2iwt[iat] = iwt; + iwt += atoms[it].nw * this->npol; + ++iat; + } } - } return; } - - // check if any atom can be moved -bool UnitCell::if_atoms_can_move() const +bool + UnitCell::if_atoms_can_move () const { for (int it = 0; it < this->ntype; it++) - { - Atom* atom = &atoms[it]; - for (int ia = 0; ia < atom->na; ia++) - { - if (atom->mbl[ia].x || atom->mbl[ia].y || atom->mbl[ia].z) - { - return true; - } - } - } + { + Atom* atom = &atoms[it]; + for (int ia = 0; ia < atom->na; ia++) + { + if (atom->mbl[ia].x || atom->mbl[ia].y || atom->mbl[ia].z) + { + return true; + } + } + } return false; } // check if lattice vector can be changed -bool UnitCell::if_cell_can_change() const +bool + UnitCell::if_cell_can_change () const { - // need to be fixed next - if (this->lc[0] || this->lc[1] || this->lc[2]) - { - return true; - } - return false; + // need to be fixed next + if (this->lc[0] || this->lc[1] || this->lc[2]) + { + return true; + } + return false; } -void UnitCell::setup(const std::string& latname_in, +void + UnitCell::setup (const std::string& latname_in, const int& ntype_in, const int& lmaxmax_in, const bool& init_vel_in, - const std::string& fixed_axes_in) { + const std::string& fixed_axes_in) +{ this->latName = latname_in; this->ntype = ntype_in; this->lmaxmax = lmaxmax_in; this->init_vel = init_vel_in; // pengfei Li add 2018-11-11 - if (fixed_axes_in == "None") { - this->lc[0] = 1; - this->lc[1] = 1; - this->lc[2] = 1; - } else if (fixed_axes_in == "volume") { - this->lc[0] = 1; - this->lc[1] = 1; - this->lc[2] = 1; - } else if (fixed_axes_in == "shape") { - this->lc[0] = 1; - this->lc[1] = 1; - this->lc[2] = 1; - } else if (fixed_axes_in == "a") { - this->lc[0] = 0; - this->lc[1] = 1; - this->lc[2] = 1; - } else if (fixed_axes_in == "b") { - this->lc[0] = 1; - this->lc[1] = 0; - this->lc[2] = 1; - } else if (fixed_axes_in == "c") { - this->lc[0] = 1; - this->lc[1] = 1; - this->lc[2] = 0; - } else if (fixed_axes_in == "ab") { - this->lc[0] = 0; - this->lc[1] = 0; - this->lc[2] = 1; - } else if (fixed_axes_in == "ac") { - this->lc[0] = 0; - this->lc[1] = 1; - this->lc[2] = 0; - } else if (fixed_axes_in == "bc") { - this->lc[0] = 1; - this->lc[1] = 0; - this->lc[2] = 0; - } else if (fixed_axes_in == "abc") { - this->lc[0] = 0; - this->lc[1] = 0; - this->lc[2] = 0; - } else { - ModuleBase::WARNING_QUIT( - "Input", - "fixed_axes should be none, volume, shape, a, b, c, ab, ac, bc or abc!"); - } + if (fixed_axes_in == "None") + { + this->lc[0] = 1; + this->lc[1] = 1; + this->lc[2] = 1; + } + else if (fixed_axes_in == "volume") + { + this->lc[0] = 1; + this->lc[1] = 1; + this->lc[2] = 1; + } + else if (fixed_axes_in == "shape") + { + this->lc[0] = 1; + this->lc[1] = 1; + this->lc[2] = 1; + } + else if (fixed_axes_in == "a") + { + this->lc[0] = 0; + this->lc[1] = 1; + this->lc[2] = 1; + } + else if (fixed_axes_in == "b") + { + this->lc[0] = 1; + this->lc[1] = 0; + this->lc[2] = 1; + } + else if (fixed_axes_in == "c") + { + this->lc[0] = 1; + this->lc[1] = 1; + this->lc[2] = 0; + } + else if (fixed_axes_in == "ab") + { + this->lc[0] = 0; + this->lc[1] = 0; + this->lc[2] = 1; + } + else if (fixed_axes_in == "ac") + { + this->lc[0] = 0; + this->lc[1] = 1; + this->lc[2] = 0; + } + else if (fixed_axes_in == "bc") + { + this->lc[0] = 1; + this->lc[1] = 0; + this->lc[2] = 0; + } + else if (fixed_axes_in == "abc") + { + this->lc[0] = 0; + this->lc[1] = 0; + this->lc[2] = 0; + } + else + { + ModuleBase::WARNING_QUIT ("Input", "fixed_axes should be none, volume, shape, a, b, c, ab, ac, bc or abc!"); + } return; } - -void UnitCell::compare_atom_labels(const std::string& label1, const std::string& label2) const +void + UnitCell::compare_atom_labels (const std::string& label1, const std::string& label2) const { - if (label1!= label2) //'!( "Ag" == "Ag" || "47" == "47" || "Silver" == Silver" )' - { - atom_in ai; - if (!(std::to_string(ai.atom_Z[label1]) == label2 - || // '!( "Ag" == "47" )' - ai.atom_symbol[label1] == label2 || // '!( "Ag" == "Silver" )' - label1 == std::to_string(ai.atom_Z[label2]) - || // '!( "47" == "Ag" )' - label1 == std::to_string(ai.symbol_Z[label2]) - || // '!( "47" == "Silver" )' - label1 == ai.atom_symbol[label2] || // '!( "Silver" == "Ag" )' - std::to_string(ai.symbol_Z[label1]) - == label2)) // '!( "Silver" == "47" )' - { - std::string stru_label = ""; - std::string psuedo_label = ""; - for (int ip = 0; ip < label1.length(); ip++) - { - if (!(isdigit(label1[ip]) || label1[ip] == '_')) - { - stru_label += label1[ip]; - } - else - { - break; - } - } - stru_label[0] = toupper(stru_label[0]); - - for (int ip = 0; ip < label2.length(); ip++) - { - if (!(isdigit(label2[ip]) || label2[ip] == '_')) - { - psuedo_label += label2[ip]; - } - else - { - break; - } - } - psuedo_label[0] = toupper(psuedo_label[0]); - - if (!(stru_label == psuedo_label - || //' !("Ag1" == "ag_locpsp" || "47" == "47" || "Silver" == - //Silver" )' - std::to_string(ai.atom_Z[stru_label]) == psuedo_label - || // ' !("Ag1" == "47" )' - ai.atom_symbol[stru_label] == psuedo_label - || // ' !("Ag1" == "Silver")' - stru_label == std::to_string(ai.atom_Z[psuedo_label]) - || // ' !("47" == "Ag1" )' - stru_label == std::to_string(ai.symbol_Z[psuedo_label]) - || // ' !("47" == "Silver1" )' - stru_label == ai.atom_symbol[psuedo_label] - || // ' !("Silver1" == "Ag" )' - std::to_string(ai.symbol_Z[stru_label]) - == psuedo_label)) // ' !("Silver1" == "47" )' - - { - std::string atom_label_in_orbtial - = "atom label in orbital file "; - std::string mismatch_with_pseudo - = " mismatch with pseudo file of "; - ModuleBase::WARNING_QUIT("UnitCell::read_pseudo", - atom_label_in_orbtial + label1 - + mismatch_with_pseudo + label2); - } - } - } + if (label1 != label2) //'!( "Ag" == "Ag" || "47" == "47" || "Silver" == Silver" )' + { + atom_in ai; + if (!(std::to_string (ai.atom_Z[label1]) == label2 || // '!( "Ag" == "47" )' + ai.atom_symbol[label1] == label2 || // '!( "Ag" == "Silver" )' + label1 == std::to_string (ai.atom_Z[label2]) || // '!( "47" == "Ag" )' + label1 == std::to_string (ai.symbol_Z[label2]) || // '!( "47" == "Silver" )' + label1 == ai.atom_symbol[label2] || // '!( "Silver" == "Ag" )' + std::to_string (ai.symbol_Z[label1]) == label2)) // '!( "Silver" == "47" )' + { + std::string stru_label = ""; + std::string psuedo_label = ""; + for (int ip = 0; ip < label1.length (); ip++) + { + if (!(isdigit (label1[ip]) || label1[ip] == '_')) + { + stru_label += label1[ip]; + } + else + { + break; + } + } + stru_label[0] = toupper (stru_label[0]); + + for (int ip = 0; ip < label2.length (); ip++) + { + if (!(isdigit (label2[ip]) || label2[ip] == '_')) + { + psuedo_label += label2[ip]; + } + else + { + break; + } + } + psuedo_label[0] = toupper (psuedo_label[0]); + + if (!(stru_label == psuedo_label || //' !("Ag1" == "ag_locpsp" || "47" == "47" || "Silver" == + // Silver" )' + std::to_string (ai.atom_Z[stru_label]) == psuedo_label || // ' !("Ag1" == "47" )' + ai.atom_symbol[stru_label] == psuedo_label || // ' !("Ag1" == "Silver")' + stru_label == std::to_string (ai.atom_Z[psuedo_label]) || // ' !("47" == "Ag1" )' + stru_label == std::to_string (ai.symbol_Z[psuedo_label]) || // ' !("47" == "Silver1" )' + stru_label == ai.atom_symbol[psuedo_label] || // ' !("Silver1" == "Ag" )' + std::to_string (ai.symbol_Z[stru_label]) == psuedo_label)) // ' !("Silver1" == "47" )' + + { + std::string atom_label_in_orbtial = "atom label in orbital file "; + std::string mismatch_with_pseudo = " mismatch with pseudo file of "; + ModuleBase::WARNING_QUIT ("UnitCell::read_pseudo", + atom_label_in_orbtial + label1 + mismatch_with_pseudo + label2); + } + } + } } diff --git a/source/source_cell/unitcell.h b/source/source_cell/unitcell.h index e2256b78363..fee6f477a13 100644 --- a/source/source_cell/unitcell.h +++ b/source/source_cell/unitcell.h @@ -11,7 +11,8 @@ #endif // provide the basic information about unitcell. -class UnitCell { +class UnitCell +{ public: Atom* atoms = nullptr; Sep_Cell sep_cell; @@ -64,20 +65,29 @@ class UnitCell { // indexing tool for find orbital global index from it,ia,iw template inline Tiait - itiaiw2iwt(const Tiait& it, const Tiait& ia, const Tiait& iw) const { - return Tiait(this->iat2iwt[this->itia2iat(it, ia)] + iw); + itiaiw2iwt (const Tiait& it, const Tiait& ia, const Tiait& iw) const + { + return Tiait (this->iat2iwt[this->itia2iat (it, ia)] + iw); } // initialize iat2iwt - void set_iat2iwt(const int& npol_in); + void set_iat2iwt (const int& npol_in); // get iat2iwt - inline const int* get_iat2iwt() const { return iat2iwt.data(); } + inline const int* + get_iat2iwt () const + { + return iat2iwt.data (); + } // get npol - inline const int& get_npol() const { return npol; } + inline const int& + get_npol () const + { + return npol; + } private: std::vector iat2iwt; // iat ==> iwt, the first global index for orbital of this atom - int npol = 1; // number of spin polarizations, initialized in set_iat2iwt - // ----------------- END of iat2iwt part ----------------- + int npol = 1; // number of spin polarizations, initialized in set_iat2iwt + // ----------------- END of iat2iwt part ----------------- public: //======================================================== @@ -85,77 +95,89 @@ class UnitCell { // return true if the last out is reset //======================================================== template - inline bool iat2iait(const Tiat iat, Tiait* ia, Tiait* it) const { - if (iat >= nat) { - *ia = 0; - *it = ntype; - return false; - } + inline bool + iat2iait (const Tiat iat, Tiait* ia, Tiait* it) const + { + if (iat >= nat) + { + *ia = 0; + *it = ntype; + return false; + } *ia = (Tiait)iat2ia[iat]; *it = (Tiait)iat2it[iat]; return true; } template - inline bool ijat2iaitjajt(const Tiat ijat, - Tiait* ia, - Tiait* it, - Tiait* ja, - Tiait* jt) const { + inline bool + ijat2iaitjajt (const Tiat ijat, Tiait* ia, Tiait* it, Tiait* ja, Tiait* jt) const + { Tiat iat = ijat / nat; Tiat jat = ijat % nat; - iat2iait(iat, ia, it); - iat2iait(jat, ja, jt); + iat2iait (iat, ia, it); + iat2iait (jat, ja, jt); return true; } template - inline bool step_it(Tiait* it) const { - if (++(*it) >= ntype) { - *it = 0; - return true; - } + inline bool + step_it (Tiait* it) const + { + if (++(*it) >= ntype) + { + *it = 0; + return true; + } return false; } template - inline bool step_ia(const Tiait it, Tiait* ia) const { - if (++(*ia) >= atoms[it].na) { - *ia = 0; - return true; - } + inline bool + step_ia (const Tiait it, Tiait* ia) const + { + if (++(*ia) >= atoms[it].na) + { + *ia = 0; + return true; + } return false; } template - inline bool step_iait(Tiait* ia, Tiait* it) const { - if (step_ia(*it, ia)) { - return step_it(it); - } + inline bool + step_iait (Tiait* ia, Tiait* it) const + { + if (step_ia (*it, ia)) + { + return step_it (it); + } return false; } template inline bool - step_jajtiait(Tiait* ja, Tiait* jt, Tiait* ia, Tiait* it) const { - if (step_iait(ja, jt)) { - return step_iait(ia, it); - } + step_jajtiait (Tiait* ja, Tiait* jt, Tiait* ia, Tiait* it) const + { + if (step_iait (ja, jt)) + { + return step_iait (ia, it); + } return false; } // get tau for atom iat - inline const ModuleBase::Vector3& get_tau(const int& iat) const { + inline const ModuleBase::Vector3& + get_tau (const int& iat) const + { return atoms[iat2it[iat]].tau[iat2ia[iat]]; } // calculate vector between two atoms with R cell inline const ModuleBase::Vector3 - cal_dtau(const int& iat1, - const int& iat2, - const ModuleBase::Vector3& R) const { - return get_tau(iat2) + double(R.x) * a1 + double(R.y) * a2 - + double(R.z) * a3 - get_tau(iat1); + cal_dtau (const int& iat1, const int& iat2, const ModuleBase::Vector3& R) const + { + return get_tau (iat2) + double (R.x) * a1 + double (R.y) * a2 + double (R.z) * a3 - get_tau (iat1); } // LiuXh add 20180515 @@ -165,10 +187,8 @@ class UnitCell { ModuleBase::Matrix3 invGGT0; // I'm doing a bad thing here! Will change later - bool ionic_position_updated - = false; // whether the ionic position has been updated - bool cell_parameter_updated - = false; // whether the cell parameters are updated + bool ionic_position_updated = false; // whether the ionic position has been updated + bool cell_parameter_updated = false; // whether the cell parameters are updated //============================================================ // meshx : max number of mesh point in pseudopotential file @@ -185,29 +205,29 @@ class UnitCell { int nmax = 0; int nmax_total = 0; // mohan add 2009-09-10 int lmax_ppwf = 0; - int lmaxmax = 0; // liuyu 2021-07-04 + int lmaxmax = 0; // liuyu 2021-07-04 bool init_vel = false; // liuyu 2021-07-15 - // double nelec; + // double nelec; private: ModuleBase::Matrix3 stress; // calculate stress on the cell public: - UnitCell(); - ~UnitCell(); - void print_cell(std::ofstream& ofs) const; + UnitCell (); + ~UnitCell (); + void print_cell (std::ofstream& ofs) const; - std::vector atom_mass; + std::vector atom_mass; std::vector atom_label; std::vector pseudo_fn; std::vector pseudo_type; - std::vector orbital_fn; // filenames of orbitals, liuyu add 2022-10-19 - std::string descriptor_file; // filenames of descriptor_file, liuyu add 2023-04-06 + std::vector orbital_fn; // filenames of orbitals, liuyu add 2022-10-19 + std::string descriptor_file; // filenames of descriptor_file, liuyu add 2023-04-06 - void set_iat2itia(); + void set_iat2itia (); - void setup_cell(const std::string& fn, std::ofstream& log); + void setup_cell (const std::string& fn, std::ofstream& log); #ifdef __LCAO InfoNonlocal infoNL; // store nonlocal information of lcao, added by zhengdy @@ -222,41 +242,41 @@ class UnitCell { // cal_nwfc : calculate total number of local basis and lmax // cal_meshx : calculate max number of mesh points in pp file //================================================================ - bool if_atoms_can_move() const; - bool if_cell_can_change() const; - void setup(const std::string& latname_in, - const int& ntype_in, - const int& lmaxmax_in, - const bool& init_vel_in, - const std::string& fixed_axes_in); + bool if_atoms_can_move () const; + bool if_cell_can_change () const; + void setup (const std::string& latname_in, + const int& ntype_in, + const int& lmaxmax_in, + const bool& init_vel_in, + const std::string& fixed_axes_in); /// @brief check consistency between two atom labels from STRU and pseudo or /// orb file - void compare_atom_labels(const std::string& label1, const std::string& label2) const; + void compare_atom_labels (const std::string& label1, const std::string& label2) const; /// @brief get atomCounts, which is a map from element type to atom number - std::map get_atom_Counts() const; + std::map get_atom_Counts () const; /// @brief get orbitalCounts, which is a map from element type to orbital /// number - std::map get_orbital_Counts() const; + std::map get_orbital_Counts () const; /// @brief get lnchiCounts, which is a map from element type to the l:nchi /// map - std::map> get_lnchi_Counts() const; + std::map> get_lnchi_Counts () const; /// these are newly added functions, the three above functions are /// deprecated and will be removed in the future /// @brief get atom labels - std::vector get_atomLabels() const; + std::vector get_atomLabels () const; /// @brief get atomCounts, which is a vector of element type with atom /// number - std::vector get_atomCounts() const; + std::vector get_atomCounts () const; /// @brief get lnchiCounts, which is a vector of element type with the /// l:nchi vector - std::vector> get_lnchiCounts() const; + std::vector> get_lnchiCounts () const; /// @brief get target magnetic moment for deltaspin - std::vector> get_target_mag() const; + std::vector> get_target_mag () const; /// @brief get lagrange multiplier for deltaspin - std::vector> get_lambda() const; + std::vector> get_lambda () const; /// @brief get constrain for deltaspin - std::vector> get_constrain() const; + std::vector> get_constrain () const; }; #endif // unitcell class diff --git a/source/source_cell/unitcell_data.h b/source/source_cell/unitcell_data.h index 522d638fd26..4d8715d116d 100644 --- a/source/source_cell/unitcell_data.h +++ b/source/source_cell/unitcell_data.h @@ -6,28 +6,25 @@ /// @brief info of lattice struct Lattice { - std::string Coordinate = "Direct"; // "Direct" or "Cartesian" or "Cartesian_angstrom" - std::string latName = "user_defined_lattice"; // Lattice name - double lat0 = 0.0; // Lattice constant(bohr)(a.u.) - double lat0_angstrom = 0.0; // Lattice constant(angstrom) - double tpiba = 0.0; // 2*pi / lat0; - double tpiba2 = 0.0; // tpiba ^ 2 - double omega = 0.0; // the volume of the unit cell - int* lc = new int[3]; // Change the lattice vectors or not + std::string Coordinate = "Direct"; // "Direct" or "Cartesian" or "Cartesian_angstrom" + std::string latName = "user_defined_lattice"; // Lattice name + double lat0 = 0.0; // Lattice constant(bohr)(a.u.) + double lat0_angstrom = 0.0; // Lattice constant(angstrom) + double tpiba = 0.0; // 2*pi / lat0; + double tpiba2 = 0.0; // tpiba ^ 2 + double omega = 0.0; // the volume of the unit cell + int* lc = new int[3]; // Change the lattice vectors or not - ModuleBase::Matrix3 latvec = ModuleBase::Matrix3(); // Unitcell lattice vectors - ModuleBase::Vector3 a1, a2, a3; // Same as latvec, just at another form. - ModuleBase::Vector3 latcenter; // (a1+a2+a3)/2 the center of vector - ModuleBase::Matrix3 latvec_supercell = ModuleBase::Matrix3(); // Supercell lattice vectors - ModuleBase::Matrix3 G = ModuleBase::Matrix3(); // reciprocal lattice vector (2pi*inv(R) ) - ModuleBase::Matrix3 GT = ModuleBase::Matrix3(); // traspose of G - ModuleBase::Matrix3 GGT = ModuleBase::Matrix3(); // GGT = G*GT - ModuleBase::Matrix3 invGGT = ModuleBase::Matrix3(); // inverse G + ModuleBase::Matrix3 latvec = ModuleBase::Matrix3 (); // Unitcell lattice vectors + ModuleBase::Vector3 a1, a2, a3; // Same as latvec, just at another form. + ModuleBase::Vector3 latcenter; // (a1+a2+a3)/2 the center of vector + ModuleBase::Matrix3 latvec_supercell = ModuleBase::Matrix3 (); // Supercell lattice vectors + ModuleBase::Matrix3 G = ModuleBase::Matrix3 (); // reciprocal lattice vector (2pi*inv(R) ) + ModuleBase::Matrix3 GT = ModuleBase::Matrix3 (); // traspose of G + ModuleBase::Matrix3 GGT = ModuleBase::Matrix3 (); // GGT = G*GT + ModuleBase::Matrix3 invGGT = ModuleBase::Matrix3 (); // inverse G - ~Lattice() - { - delete[] lc; - } + ~Lattice () { delete[] lc; } }; //======================================================== @@ -55,7 +52,7 @@ struct Statistics int namax = 0; // the max na among all atom species int nwmax = 0; // the max nw among all atom species - ~Statistics() + ~Statistics () { delete[] iat2it; delete[] iat2ia; diff --git a/source/source_cell/update_cell.cpp b/source/source_cell/update_cell.cpp index 4da4013d6af..1eb4fae4179 100644 --- a/source/source_cell/update_cell.cpp +++ b/source/source_cell/update_cell.cpp @@ -6,319 +6,296 @@ namespace unitcell { -void remake_cell(Lattice& lat) +void + remake_cell (Lattice& lat) { - ModuleBase::TITLE("Lattice", "rmake_cell"); + ModuleBase::TITLE ("Lattice", "rmake_cell"); // The idea is as follows: for each type of lattice, first calculate // from current latvec the lattice parameters, then use the parameters // to reconstruct latvec std::string& latName = lat.latName; - ModuleBase::Matrix3& latvec = lat.latvec; + ModuleBase::Matrix3& latvec = lat.latvec; if (latName == "user_defined_lattice") - { - ModuleBase::WARNING_QUIT("UnitCell", "to use fixed_ibrav, latname must be provided"); - } + { + ModuleBase::WARNING_QUIT ("UnitCell", "to use fixed_ibrav, latname must be provided"); + } else if (latName == "sc") // ibrav = 1 - { - double celldm = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)); + { + double celldm = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)); - latvec.Zero(); - latvec.e11 = latvec.e22 = latvec.e33 = celldm; - } + latvec.Zero (); + latvec.e11 = latvec.e22 = latvec.e33 = celldm; + } else if (latName == "fcc") // ibrav = 2 - { - double celldm = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)) / std::sqrt(2.0); - - latvec.e11 = -celldm; - latvec.e12 = 0.0; - latvec.e13 = celldm; - latvec.e21 = 0.0; - latvec.e22 = celldm; - latvec.e23 = celldm; - latvec.e31 = -celldm; - latvec.e32 = celldm; - latvec.e33 = 0.0; - } + { + double celldm + = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)) / std::sqrt (2.0); + + latvec.e11 = -celldm; + latvec.e12 = 0.0; + latvec.e13 = celldm; + latvec.e21 = 0.0; + latvec.e22 = celldm; + latvec.e23 = celldm; + latvec.e31 = -celldm; + latvec.e32 = celldm; + latvec.e33 = 0.0; + } else if (latName == "bcc") // ibrav = 3 - { - double celldm = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)) - / std::sqrt(3.0); - - latvec.e11 = celldm; - latvec.e12 = celldm; - latvec.e13 = celldm; - latvec.e21 = -celldm; - latvec.e22 = celldm; - latvec.e23 = celldm; - latvec.e31 = -celldm; - latvec.e32 = -celldm; - latvec.e33 = celldm; - } + { + double celldm + = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)) / std::sqrt (3.0); + + latvec.e11 = celldm; + latvec.e12 = celldm; + latvec.e13 = celldm; + latvec.e21 = -celldm; + latvec.e22 = celldm; + latvec.e23 = celldm; + latvec.e31 = -celldm; + latvec.e32 = -celldm; + latvec.e33 = celldm; + } else if (latName == "hexagonal") // ibrav = 4 - { - double celldm1 = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)); - double celldm3 = std::sqrt(pow(latvec.e31, 2) + pow(latvec.e32, 2) - + pow(latvec.e33, 2)); - double e22 = sqrt(3.0) / 2.0; - - latvec.e11 = celldm1; - latvec.e12 = 0.0; - latvec.e13 = 0.0; - latvec.e21 = -0.5 * celldm1; - latvec.e22 = celldm1 * e22; - latvec.e23 = 0.0; - latvec.e31 = 0.0; - latvec.e32 = 0.0; - latvec.e33 = celldm3; - } + { + double celldm1 = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)); + double celldm3 = std::sqrt (pow (latvec.e31, 2) + pow (latvec.e32, 2) + pow (latvec.e33, 2)); + double e22 = sqrt (3.0) / 2.0; + + latvec.e11 = celldm1; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = -0.5 * celldm1; + latvec.e22 = celldm1 * e22; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = celldm3; + } else if (latName == "trigonal") // ibrav = 5 - { - double celldm1 = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)); - double celldm2 = std::sqrt(pow(latvec.e21, 2) + pow(latvec.e22, 2) - + pow(latvec.e23, 2)); - double celldm12 = (latvec.e11 * latvec.e21 + latvec.e12 * latvec.e22 - + latvec.e13 * latvec.e23); - double cos12 = celldm12 / celldm1 / celldm2; - - if (cos12 <= -0.5 || cos12 >= 1.0) - { - ModuleBase::WARNING_QUIT("unitcell", "wrong cos12!"); - } - double t1 = sqrt(1.0 + 2.0 * cos12); - double t2 = sqrt(1.0 - cos12); - - double e11 = celldm1 * t2 / sqrt(2.0); - double e12 = -celldm1 * t2 / sqrt(6.0); - double e13 = celldm1 * t1 / sqrt(3.0); - double e22 = celldm1 * sqrt(2.0) * t2 / sqrt(3.0); - - latvec.e11 = e11; - latvec.e12 = e12; - latvec.e13 = e13; - latvec.e21 = 0.0; - latvec.e22 = e22; - latvec.e23 = e13; - latvec.e31 = -e11; - latvec.e32 = e12; - latvec.e33 = e13; - } + { + double celldm1 = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)); + double celldm2 = std::sqrt (pow (latvec.e21, 2) + pow (latvec.e22, 2) + pow (latvec.e23, 2)); + double celldm12 = (latvec.e11 * latvec.e21 + latvec.e12 * latvec.e22 + latvec.e13 * latvec.e23); + double cos12 = celldm12 / celldm1 / celldm2; + + if (cos12 <= -0.5 || cos12 >= 1.0) + { + ModuleBase::WARNING_QUIT ("unitcell", "wrong cos12!"); + } + double t1 = sqrt (1.0 + 2.0 * cos12); + double t2 = sqrt (1.0 - cos12); + + double e11 = celldm1 * t2 / sqrt (2.0); + double e12 = -celldm1 * t2 / sqrt (6.0); + double e13 = celldm1 * t1 / sqrt (3.0); + double e22 = celldm1 * sqrt (2.0) * t2 / sqrt (3.0); + + latvec.e11 = e11; + latvec.e12 = e12; + latvec.e13 = e13; + latvec.e21 = 0.0; + latvec.e22 = e22; + latvec.e23 = e13; + latvec.e31 = -e11; + latvec.e32 = e12; + latvec.e33 = e13; + } else if (latName == "st") // ibrav = 6 - { - double celldm1 = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)); - double celldm3 = std::sqrt(pow(latvec.e31, 2) + pow(latvec.e32, 2) - + pow(latvec.e33, 2)); - latvec.e11 = celldm1; - latvec.e12 = 0.0; - latvec.e13 = 0.0; - latvec.e21 = 0.0; - latvec.e22 = celldm1; - latvec.e23 = 0.0; - latvec.e31 = 0.0; - latvec.e32 = 0.0; - latvec.e33 = celldm3; - } + { + double celldm1 = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)); + double celldm3 = std::sqrt (pow (latvec.e31, 2) + pow (latvec.e32, 2) + pow (latvec.e33, 2)); + latvec.e11 = celldm1; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = celldm1; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = celldm3; + } else if (latName == "bct") // ibrav = 7 - { - double celldm1 = std::abs(latvec.e11); - double celldm2 = std::abs(latvec.e13); - - latvec.e11 = celldm1; - latvec.e12 = -celldm1; - latvec.e13 = celldm2; - latvec.e21 = celldm1; - latvec.e22 = celldm1; - latvec.e23 = celldm2; - latvec.e31 = -celldm1; - latvec.e32 = -celldm1; - latvec.e33 = celldm2; - } + { + double celldm1 = std::abs (latvec.e11); + double celldm2 = std::abs (latvec.e13); + + latvec.e11 = celldm1; + latvec.e12 = -celldm1; + latvec.e13 = celldm2; + latvec.e21 = celldm1; + latvec.e22 = celldm1; + latvec.e23 = celldm2; + latvec.e31 = -celldm1; + latvec.e32 = -celldm1; + latvec.e33 = celldm2; + } else if (latName == "so") // ibrav = 8 - { - double celldm1 = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)); - double celldm2 = std::sqrt(pow(latvec.e21, 2) + pow(latvec.e22, 2) - + pow(latvec.e23, 2)); - double celldm3 = std::sqrt(pow(latvec.e31, 2) + pow(latvec.e32, 2) - + pow(latvec.e33, 2)); - - latvec.e11 = celldm1; - latvec.e12 = 0.0; - latvec.e13 = 0.0; - latvec.e21 = 0.0; - latvec.e22 = celldm2; - latvec.e23 = 0.0; - latvec.e31 = 0.0; - latvec.e32 = 0.0; - latvec.e33 = celldm3; - } + { + double celldm1 = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)); + double celldm2 = std::sqrt (pow (latvec.e21, 2) + pow (latvec.e22, 2) + pow (latvec.e23, 2)); + double celldm3 = std::sqrt (pow (latvec.e31, 2) + pow (latvec.e32, 2) + pow (latvec.e33, 2)); + + latvec.e11 = celldm1; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = 0.0; + latvec.e22 = celldm2; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = celldm3; + } else if (latName == "baco") // ibrav = 9 - { - double celldm1 = std::abs(latvec.e11); - double celldm2 = std::abs(latvec.e22); - double celldm3 = std::abs(latvec.e33); - - latvec.e11 = celldm1; - latvec.e12 = celldm2; - latvec.e13 = 0.0; - latvec.e21 = -celldm1; - latvec.e22 = celldm2; - latvec.e23 = 0.0; - latvec.e31 = 0.0; - latvec.e32 = 0.0; - latvec.e33 = celldm3; - } + { + double celldm1 = std::abs (latvec.e11); + double celldm2 = std::abs (latvec.e22); + double celldm3 = std::abs (latvec.e33); + + latvec.e11 = celldm1; + latvec.e12 = celldm2; + latvec.e13 = 0.0; + latvec.e21 = -celldm1; + latvec.e22 = celldm2; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = celldm3; + } else if (latName == "fco") // ibrav = 10 - { - double celldm1 = std::abs(latvec.e11); - double celldm2 = std::abs(latvec.e22); - double celldm3 = std::abs(latvec.e33); - - latvec.e11 = celldm1; - latvec.e12 = 0.0; - latvec.e13 = celldm3; - latvec.e21 = celldm1; - latvec.e22 = celldm2; - latvec.e23 = 0.0; - latvec.e31 = 0.0; - latvec.e32 = celldm2; - latvec.e33 = celldm3; - } + { + double celldm1 = std::abs (latvec.e11); + double celldm2 = std::abs (latvec.e22); + double celldm3 = std::abs (latvec.e33); + + latvec.e11 = celldm1; + latvec.e12 = 0.0; + latvec.e13 = celldm3; + latvec.e21 = celldm1; + latvec.e22 = celldm2; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = celldm2; + latvec.e33 = celldm3; + } else if (latName == "bco") // ibrav = 11 - { - double celldm1 = std::abs(latvec.e11); - double celldm2 = std::abs(latvec.e12); - double celldm3 = std::abs(latvec.e13); - - latvec.e11 = celldm1; - latvec.e12 = celldm2; - latvec.e13 = celldm3; - latvec.e21 = -celldm1; - latvec.e22 = celldm2; - latvec.e23 = celldm3; - latvec.e31 = -celldm1; - latvec.e32 = -celldm2; - latvec.e33 = celldm3; - } + { + double celldm1 = std::abs (latvec.e11); + double celldm2 = std::abs (latvec.e12); + double celldm3 = std::abs (latvec.e13); + + latvec.e11 = celldm1; + latvec.e12 = celldm2; + latvec.e13 = celldm3; + latvec.e21 = -celldm1; + latvec.e22 = celldm2; + latvec.e23 = celldm3; + latvec.e31 = -celldm1; + latvec.e32 = -celldm2; + latvec.e33 = celldm3; + } else if (latName == "sm") // ibrav = 12 - { - double celldm1 = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)); - double celldm2 = std::sqrt(pow(latvec.e21, 2) + pow(latvec.e22, 2) - + pow(latvec.e23, 2)); - double celldm3 = std::sqrt(pow(latvec.e31, 2) + pow(latvec.e32, 2) - + pow(latvec.e33, 2)); - double celldm12 = (latvec.e11 * latvec.e21 + latvec.e12 * latvec.e22 - + latvec.e13 * latvec.e23); - double cos12 = celldm12 / celldm1 / celldm2; - - double e21 = celldm2 * cos12; - double e22 = celldm2 * std::sqrt(1.0 - cos12 * cos12); - - latvec.e11 = celldm1; - latvec.e12 = 0.0; - latvec.e13 = 0.0; - latvec.e21 = e21; - latvec.e22 = e22; - latvec.e23 = 0.0; - latvec.e31 = 0.0; - latvec.e32 = 0.0; - latvec.e33 = celldm3; - } + { + double celldm1 = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)); + double celldm2 = std::sqrt (pow (latvec.e21, 2) + pow (latvec.e22, 2) + pow (latvec.e23, 2)); + double celldm3 = std::sqrt (pow (latvec.e31, 2) + pow (latvec.e32, 2) + pow (latvec.e33, 2)); + double celldm12 = (latvec.e11 * latvec.e21 + latvec.e12 * latvec.e22 + latvec.e13 * latvec.e23); + double cos12 = celldm12 / celldm1 / celldm2; + + double e21 = celldm2 * cos12; + double e22 = celldm2 * std::sqrt (1.0 - cos12 * cos12); + + latvec.e11 = celldm1; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = e21; + latvec.e22 = e22; + latvec.e23 = 0.0; + latvec.e31 = 0.0; + latvec.e32 = 0.0; + latvec.e33 = celldm3; + } else if (latName == "bacm") // ibrav = 13 - { - double celldm1 = std::abs(latvec.e11); - double celldm2 = std::sqrt(pow(latvec.e21, 2) + pow(latvec.e22, 2) - + pow(latvec.e23, 2)); - double celldm3 = std::abs(latvec.e13); - - double cos12 = latvec.e21 / celldm2; - if (cos12 >= 1.0) - { - ModuleBase::WARNING_QUIT("unitcell", "wrong cos12!"); - } - - double e21 = celldm2 * cos12; - double e22 = celldm2 * std::sqrt(1.0 - cos12 * cos12); - - latvec.e11 = celldm1; - latvec.e12 = 0.0; - latvec.e13 = -celldm3; - latvec.e21 = e21; - latvec.e22 = e22; - latvec.e23 = 0.0; - latvec.e31 = celldm1; - latvec.e32 = 0.0; - latvec.e33 = celldm3; - } + { + double celldm1 = std::abs (latvec.e11); + double celldm2 = std::sqrt (pow (latvec.e21, 2) + pow (latvec.e22, 2) + pow (latvec.e23, 2)); + double celldm3 = std::abs (latvec.e13); + + double cos12 = latvec.e21 / celldm2; + if (cos12 >= 1.0) + { + ModuleBase::WARNING_QUIT ("unitcell", "wrong cos12!"); + } + + double e21 = celldm2 * cos12; + double e22 = celldm2 * std::sqrt (1.0 - cos12 * cos12); + + latvec.e11 = celldm1; + latvec.e12 = 0.0; + latvec.e13 = -celldm3; + latvec.e21 = e21; + latvec.e22 = e22; + latvec.e23 = 0.0; + latvec.e31 = celldm1; + latvec.e32 = 0.0; + latvec.e33 = celldm3; + } else if (latName == "triclinic") // ibrav = 14 - { - double celldm1 = std::sqrt(pow(latvec.e11, 2) + pow(latvec.e12, 2) - + pow(latvec.e13, 2)); - double celldm2 = std::sqrt(pow(latvec.e21, 2) + pow(latvec.e22, 2) - + pow(latvec.e23, 2)); - double celldm3 = std::sqrt(pow(latvec.e31, 2) + pow(latvec.e32, 2) - + pow(latvec.e33, 2)); - double celldm12 = (latvec.e11 * latvec.e21 + latvec.e12 * latvec.e22 - + latvec.e13 * latvec.e23); - double cos12 = celldm12 / celldm1 / celldm2; - double celldm13 = (latvec.e11 * latvec.e31 + latvec.e12 * latvec.e32 - + latvec.e13 * latvec.e33); - double cos13 = celldm13 / celldm1 / celldm3; - double celldm23 = (latvec.e21 * latvec.e31 + latvec.e22 * latvec.e32 - + latvec.e23 * latvec.e33); - double cos23 = celldm23 / celldm2 / celldm3; - - double sin12 = std::sqrt(1.0 - cos12 * cos12); - if (cos12 >= 1.0) - { - ModuleBase::WARNING_QUIT("unitcell", "wrong cos12!"); - } - - latvec.e11 = celldm1; - latvec.e12 = 0.0; - latvec.e13 = 0.0; - latvec.e21 = celldm2 * cos12; - latvec.e22 = celldm2 * sin12; - latvec.e23 = 0.0; - latvec.e31 = celldm3 * cos13; - latvec.e32 = celldm3 * (cos23 - cos13 * cos12) / sin12; - double term = 1.0 + 2.0 * cos12 * cos13 * cos23 - cos12 * cos12 - - cos13 * cos13 - cos23 * cos23; - term = sqrt(term) / sin12; - latvec.e33 = celldm3 * term; - } - else - { - std::cout << "latname is : " << latName << std::endl; - ModuleBase::WARNING_QUIT("unitcell::remake_cell", - "latname type not supported!"); - } + { + double celldm1 = std::sqrt (pow (latvec.e11, 2) + pow (latvec.e12, 2) + pow (latvec.e13, 2)); + double celldm2 = std::sqrt (pow (latvec.e21, 2) + pow (latvec.e22, 2) + pow (latvec.e23, 2)); + double celldm3 = std::sqrt (pow (latvec.e31, 2) + pow (latvec.e32, 2) + pow (latvec.e33, 2)); + double celldm12 = (latvec.e11 * latvec.e21 + latvec.e12 * latvec.e22 + latvec.e13 * latvec.e23); + double cos12 = celldm12 / celldm1 / celldm2; + double celldm13 = (latvec.e11 * latvec.e31 + latvec.e12 * latvec.e32 + latvec.e13 * latvec.e33); + double cos13 = celldm13 / celldm1 / celldm3; + double celldm23 = (latvec.e21 * latvec.e31 + latvec.e22 * latvec.e32 + latvec.e23 * latvec.e33); + double cos23 = celldm23 / celldm2 / celldm3; + + double sin12 = std::sqrt (1.0 - cos12 * cos12); + if (cos12 >= 1.0) + { + ModuleBase::WARNING_QUIT ("unitcell", "wrong cos12!"); + } + + latvec.e11 = celldm1; + latvec.e12 = 0.0; + latvec.e13 = 0.0; + latvec.e21 = celldm2 * cos12; + latvec.e22 = celldm2 * sin12; + latvec.e23 = 0.0; + latvec.e31 = celldm3 * cos13; + latvec.e32 = celldm3 * (cos23 - cos13 * cos12) / sin12; + double term = 1.0 + 2.0 * cos12 * cos13 * cos23 - cos12 * cos12 - cos13 * cos13 - cos23 * cos23; + term = sqrt (term) / sin12; + latvec.e33 = celldm3 * term; + } + else + { + std::cout << "latname is : " << latName << std::endl; + ModuleBase::WARNING_QUIT ("unitcell::remake_cell", "latname type not supported!"); + } } // LiuXh add a new function here, // 20180515 -void setup_cell_after_vc(UnitCell& ucell, std::ofstream& log) +void + setup_cell_after_vc (UnitCell& ucell, std::ofstream& log) { - ModuleBase::TITLE("unitcell", "setup_cell_after_vc"); - assert(ucell.lat0 > 0.0); - ucell.omega = std::abs(ucell.latvec.Det()) * - pow(ucell.lat0, 3); - if (ucell.omega <= 0) - { - ModuleBase::WARNING_QUIT("setup_cell_after_vc", "Cell volume <= 0 ."); - } else { - ModuleBase::GlobalFunc::OUT(log, "Cell volume (Bohr^3)", ucell.omega); - ModuleBase::GlobalFunc::OUT(log, "Cell volume (A^3)", - ucell.omega * pow(ModuleBase::BOHR_TO_A, 3)); - } + ModuleBase::TITLE ("unitcell", "setup_cell_after_vc"); + assert (ucell.lat0 > 0.0); + ucell.omega = std::abs (ucell.latvec.Det ()) * pow (ucell.lat0, 3); + if (ucell.omega <= 0) + { + ModuleBase::WARNING_QUIT ("setup_cell_after_vc", "Cell volume <= 0 ."); + } + else + { + ModuleBase::GlobalFunc::OUT (log, "Cell volume (Bohr^3)", ucell.omega); + ModuleBase::GlobalFunc::OUT (log, "Cell volume (A^3)", ucell.omega * pow (ModuleBase::BOHR_TO_A, 3)); + } ucell.lat0_angstrom = ucell.lat0 * ModuleBase::BOHR_TO_A; ucell.tpiba = ModuleBase::TWO_PI / ucell.lat0; @@ -341,138 +318,126 @@ void setup_cell_after_vc(UnitCell& ucell, std::ofstream& log) // Calculate recip. lattice vectors and dot products // latvec has the unit of lat0, but G has the unit 2Pi/lat0 //========================================================== - ucell.GT = ucell.latvec.Inverse(); - ucell.G = ucell.GT.Transpose(); + ucell.GT = ucell.latvec.Inverse (); + ucell.G = ucell.GT.Transpose (); ucell.GGT = ucell.G * ucell.GT; - ucell.invGGT = ucell.GGT.Inverse(); - - for (int it = 0; it < ucell.ntype; it++) - { - Atom* atom = &ucell.atoms[it]; - for (int ia = 0; ia < atom->na; ia++) - { - atom->tau[ia] = atom->taud[ia] * ucell.latvec; + ucell.invGGT = ucell.GGT.Inverse (); + + for (int it = 0; it < ucell.ntype; it++) + { + Atom* atom = &ucell.atoms[it]; + for (int ia = 0; ia < atom->na; ia++) + { + atom->tau[ia] = atom->taud[ia] * ucell.latvec; + } } - } #ifdef __MPI - bcast_unitcell(ucell); + bcast_unitcell (ucell); #endif log << std::endl; - output::printM3(log, - "Lattice vectors: (Cartesian coordinate: in unit of a_0)", - ucell.latvec); - output::printM3(log, - "Reciprocal vectors: (Cartesian coordinate: in unit of 2 pi/a_0)", - ucell.G); + output::printM3 (log, "Lattice vectors: (Cartesian coordinate: in unit of a_0)", ucell.latvec); + output::printM3 (log, "Reciprocal vectors: (Cartesian coordinate: in unit of 2 pi/a_0)", ucell.G); return; } -void update_pos_tau(const Lattice& lat, - const double* pos, - const int ntype, - const int nat, - Atom* atoms) +void + update_pos_tau (const Lattice& lat, const double* pos, const int ntype, const int nat, Atom* atoms) { int iat = 0; - for (int it = 0; it < ntype; it++) - { - Atom* atom = &atoms[it]; - for (int ia = 0; ia < atom->na; ia++) - { - for (int ik = 0; ik < 3; ++ik) - { - if (atom->mbl[ia][ik]) + for (int it = 0; it < ntype; it++) + { + Atom* atom = &atoms[it]; + for (int ia = 0; ia < atom->na; ia++) { - atom->dis[ia][ik] = pos[3 * iat + ik] / lat.lat0 - atom->tau[ia][ik]; - atom->tau[ia][ik] = pos[3 * iat + ik] / lat.lat0; + for (int ik = 0; ik < 3; ++ik) + { + if (atom->mbl[ia][ik]) + { + atom->dis[ia][ik] = pos[3 * iat + ik] / lat.lat0 - atom->tau[ia][ik]; + atom->tau[ia][ik] = pos[3 * iat + ik] / lat.lat0; + } + } + // the direct coordinates also need to be updated. + atom->dis[ia] = atom->dis[ia] * lat.GT; + atom->taud[ia] = atom->tau[ia] * lat.GT; + iat++; } - } - // the direct coordinates also need to be updated. - atom->dis[ia] = atom->dis[ia] * lat.GT; - atom->taud[ia] = atom->tau[ia] * lat.GT; - iat++; } - } - assert(iat == nat); - periodic_boundary_adjustment(atoms,lat.latvec,ntype); - bcast_atoms_tau(atoms, ntype); + assert (iat == nat); + periodic_boundary_adjustment (atoms, lat.latvec, ntype); + bcast_atoms_tau (atoms, ntype); } -void update_pos_taud(const Lattice& lat, - const double* posd_in, - const int ntype, - const int nat, - Atom* atoms) +void + update_pos_taud (const Lattice& lat, const double* posd_in, const int ntype, const int nat, Atom* atoms) { int iat = 0; - for (int it = 0; it < ntype; it++) - { - Atom* atom = &atoms[it]; - for (int ia = 0; ia < atom->na; ia++) + for (int it = 0; it < ntype; it++) { - for (int ik = 0; ik < 3; ++ik) - { - atom->taud[ia][ik] += posd_in[3 * iat + ik]; - atom->dis[ia][ik] = posd_in[3 * iat + ik]; - } - iat++; + Atom* atom = &atoms[it]; + for (int ia = 0; ia < atom->na; ia++) + { + for (int ik = 0; ik < 3; ++ik) + { + atom->taud[ia][ik] += posd_in[3 * iat + ik]; + atom->dis[ia][ik] = posd_in[3 * iat + ik]; + } + iat++; + } } - } - assert(iat == nat); - periodic_boundary_adjustment(atoms,lat.latvec,ntype); - bcast_atoms_tau(atoms, ntype); + assert (iat == nat); + periodic_boundary_adjustment (atoms, lat.latvec, ntype); + bcast_atoms_tau (atoms, ntype); } // posd_in is atomic displacements here liuyu 2023-03-22 -void update_pos_taud(const Lattice& lat, +void + update_pos_taud (const Lattice& lat, const ModuleBase::Vector3* posd_in, const int ntype, const int nat, Atom* atoms) { int iat = 0; - for (int it = 0; it < ntype; it++) - { - Atom* atom = &atoms[it]; - for (int ia = 0; ia < atom->na; ia++) + for (int it = 0; it < ntype; it++) { - for (int ik = 0; ik < 3; ++ik) - { - atom->taud[ia][ik] += posd_in[iat][ik]; - atom->dis[ia][ik] = posd_in[iat][ik]; - } - iat++; + Atom* atom = &atoms[it]; + for (int ia = 0; ia < atom->na; ia++) + { + for (int ik = 0; ik < 3; ++ik) + { + atom->taud[ia][ik] += posd_in[iat][ik]; + atom->dis[ia][ik] = posd_in[iat][ik]; + } + iat++; + } } - } - assert(iat == nat); - periodic_boundary_adjustment(atoms,lat.latvec,ntype); - bcast_atoms_tau(atoms, ntype); + assert (iat == nat); + periodic_boundary_adjustment (atoms, lat.latvec, ntype); + bcast_atoms_tau (atoms, ntype); } -void update_vel(const ModuleBase::Vector3* vel_in, - const int ntype, - const int nat, - Atom* atoms) +void + update_vel (const ModuleBase::Vector3* vel_in, const int ntype, const int nat, Atom* atoms) { int iat = 0; - for (int it = 0; it < ntype; ++it) - { - Atom* atom = &atoms[it]; - for (int ia = 0; ia < atom->na; ++ia) + for (int it = 0; it < ntype; ++it) { - atoms[it].vel[ia] = vel_in[iat]; - ++iat; + Atom* atom = &atoms[it]; + for (int ia = 0; ia < atom->na; ++ia) + { + atoms[it].vel[ia] = vel_in[iat]; + ++iat; + } } - } - assert(iat == nat); + assert (iat == nat); } -void periodic_boundary_adjustment(Atom* atoms, - const ModuleBase::Matrix3& latvec, - const int ntype) +void + periodic_boundary_adjustment (Atom* atoms, const ModuleBase::Matrix3& latvec, const int ntype) { //---------------------------------------------- // because of the periodic boundary condition @@ -480,45 +445,41 @@ void periodic_boundary_adjustment(Atom* atoms, // first adjust direct coordinates, // then update them into cartesian coordinates, //---------------------------------------------- - for (int it = 0; it < ntype; it++) - { - Atom* atom = &atoms[it]; - atom->boundary_shift.assign(atom->na, {0,0,0}); - for (int ia = 0; ia < atom->na; ia++) - { - // mohan update 2011-03-21 - for (int ik = 0; ik < 3; ik++) - { - if (atom->taud[ia][ik] < 0) - { - atom->boundary_shift[ia][ik] += 1; - atom->taud[ia][ik] += 1.0; - } - if (atom->taud[ia][ik] >= 1.0) + for (int it = 0; it < ntype; it++) + { + Atom* atom = &atoms[it]; + atom->boundary_shift.assign (atom->na, {0, 0, 0}); + for (int ia = 0; ia < atom->na; ia++) { - atom->boundary_shift[ia][ik] -= 1; - atom->taud[ia][ik] -= 1.0; + // mohan update 2011-03-21 + for (int ik = 0; ik < 3; ik++) + { + if (atom->taud[ia][ik] < 0) + { + atom->boundary_shift[ia][ik] += 1; + atom->taud[ia][ik] += 1.0; + } + if (atom->taud[ia][ik] >= 1.0) + { + atom->boundary_shift[ia][ik] -= 1; + atom->taud[ia][ik] -= 1.0; + } + } + const double eps = 1e-12; + if (atom->taud[ia].x < -eps || atom->taud[ia].y < -eps || atom->taud[ia].z < -eps + || atom->taud[ia].x >= 1.0 + eps || atom->taud[ia].y >= 1.0 + eps + || atom->taud[ia].z >= 1.0 + eps) + { + GlobalV::ofs_warning << " atom type=" << it + 1 << " atom index=" << ia + 1 << std::endl; + GlobalV::ofs_warning << " direct coordinate=" << atom->taud[ia].x << " " << atom->taud[ia].y + << " " << atom->taud[ia].z << std::endl; + ModuleBase::WARNING_QUIT ("unitcell::periodic_boundary_adjustment", + "Movement of atom is larger than the cell length"); + } + + atom->tau[ia] = atom->taud[ia] * latvec; } - } - const double eps = 1e-12; - if (atom->taud[ia].x < -eps - || atom->taud[ia].y < -eps - || atom->taud[ia].z < -eps - || atom->taud[ia].x >= 1.0+eps - || atom->taud[ia].y >= 1.0+eps - || atom->taud[ia].z >= 1.0+eps) - { - GlobalV::ofs_warning << " atom type=" << it + 1 << " atom index=" << ia + 1 << std::endl; - GlobalV::ofs_warning << " direct coordinate=" << atom->taud[ia].x << " " - << atom->taud[ia].y << " " - << atom->taud[ia].z << std::endl; - ModuleBase::WARNING_QUIT("unitcell::periodic_boundary_adjustment", - "Movement of atom is larger than the cell length"); - } - - atom->tau[ia] = atom->taud[ia] * latvec; } - } return; } diff --git a/source/source_cell/update_cell.h b/source/source_cell/update_cell.h index a13bf585461..18561af3b89 100644 --- a/source/source_cell/update_cell.h +++ b/source/source_cell/update_cell.h @@ -2,11 +2,11 @@ #define UPDATE_CELL_H #include "unitcell_data.h" -#include "unitcell.h" +#include "unitcell.h" /* this file is used to update the cell,contains the following functions: -1. remake_cell: for constrained vc-relaxation where type of lattice +1. remake_cell: for constrained vc-relaxation where type of lattice is fixed, adjust the lattice vectors 2. setup_cell_after_vc: setup cell after vc-relaxation the functions are defined in the namespace UnitCell, @@ -17,79 +17,66 @@ of the UnitCell class. */ namespace unitcell { - // for constrained vc-relaxation where type of lattice - // is fixed, adjust the lattice vectors - void remake_cell(Lattice& lat); +// for constrained vc-relaxation where type of lattice +// is fixed, adjust the lattice vectors +void remake_cell (Lattice& lat); - void setup_cell_after_vc(UnitCell& ucell, std::ofstream& log); - - /** - * @brief check the boundary of the cell, for each atom,the taud - * in three directions should be in the range of [-1,1) - * @param atoms: the atoms to be adjusted [in] - * @param latvec: the lattice of the atoms [in] - * @param ntype: the number of types of the atoms [in] - */ - void periodic_boundary_adjustment(Atom* atoms, - const ModuleBase::Matrix3& latvec, - const int ntype); +void setup_cell_after_vc (UnitCell& ucell, std::ofstream& log); - /** - * @brief update the position and tau of the atoms - * - * @param lat: the lattice of the atoms [in] - * @param pos: the position of the atoms [in] - * @param ntype: the number of types of the atoms [in] - * @param nat: the number of atoms [in] - * @param atoms: the atoms to be updated [out] - */ - void update_pos_tau(const Lattice& lat, - const double* pos, - const int ntype, - const int nat, - Atom* atoms); - - /** - * @brief update the position and tau of the atoms - * - * @param lat: the lattice of the atoms [in] - * @param pos_in: the position of the atoms in direct coordinate system [in] - * @param ntype: the number of types of the atoms [in] - * @param nat: the number of atoms [in] - * @param atoms: the atoms to be updated [out] - */ - void update_pos_taud(const Lattice& lat, - const double* posd_in, - const int ntype, - const int nat, - Atom* atoms); - /** - * @brief update the velocity of the atoms - * - * @param lat: the lattice of the atoms [in] - * @param pos_in: the position of the atoms in direct coordinate system - * in ModuleBase::Vector3 version [in] - * @param ntype: the number of types of the atoms [in] - * @param nat: the number of atoms [in] - * @param atoms: the atoms to be updated [out] - */ - void update_pos_taud(const Lattice& lat, - const ModuleBase::Vector3* posd_in, - const int ntype, - const int nat, - Atom* atoms); - /** - * @brief update the velocity of the atoms - * - * @param vel_in: the velocity of the atoms [in] - * @param ntype: the number of types of the atoms [in] - * @param nat: the number of atoms [in] - * @param atoms: the atoms to be updated [out] - */ - void update_vel(const ModuleBase::Vector3* vel_in, - const int ntype, - const int nat, - Atom* atoms); -} +/** + * @brief check the boundary of the cell, for each atom,the taud + * in three directions should be in the range of [-1,1) + * @param atoms: the atoms to be adjusted [in] + * @param latvec: the lattice of the atoms [in] + * @param ntype: the number of types of the atoms [in] + */ +void periodic_boundary_adjustment (Atom* atoms, const ModuleBase::Matrix3& latvec, const int ntype); + +/** + * @brief update the position and tau of the atoms + * + * @param lat: the lattice of the atoms [in] + * @param pos: the position of the atoms [in] + * @param ntype: the number of types of the atoms [in] + * @param nat: the number of atoms [in] + * @param atoms: the atoms to be updated [out] + */ +void update_pos_tau (const Lattice& lat, const double* pos, const int ntype, const int nat, Atom* atoms); + +/** + * @brief update the position and tau of the atoms + * + * @param lat: the lattice of the atoms [in] + * @param pos_in: the position of the atoms in direct coordinate system [in] + * @param ntype: the number of types of the atoms [in] + * @param nat: the number of atoms [in] + * @param atoms: the atoms to be updated [out] + */ +void update_pos_taud (const Lattice& lat, const double* posd_in, const int ntype, const int nat, Atom* atoms); +/** + * @brief update the velocity of the atoms + * + * @param lat: the lattice of the atoms [in] + * @param pos_in: the position of the atoms in direct coordinate system + * in ModuleBase::Vector3 version [in] + * @param ntype: the number of types of the atoms [in] + * @param nat: the number of atoms [in] + * @param atoms: the atoms to be updated [out] + */ +void update_pos_taud (const Lattice& lat, + const ModuleBase::Vector3* posd_in, + const int ntype, + const int nat, + Atom* atoms); +/** + * @brief update the velocity of the atoms + * + * @param vel_in: the velocity of the atoms [in] + * @param ntype: the number of types of the atoms [in] + * @param nat: the number of atoms [in] + * @param atoms: the atoms to be updated [out] + */ +void update_vel (const ModuleBase::Vector3* vel_in, const int ntype, const int nat, Atom* atoms); +} // namespace unitcell // #endif // UPDATE_CELL_H \ No newline at end of file diff --git a/source/source_esolver/esolver.cpp b/source/source_esolver/esolver.cpp index 0e932465f81..f6b309d4f50 100644 --- a/source/source_esolver/esolver.cpp +++ b/source/source_esolver/esolver.cpp @@ -26,97 +26,100 @@ namespace ModuleESolver { -std::string determine_type() +std::string + determine_type () { std::string esolver_type = "none"; if (PARAM.inp.basis_type == "pw") - { - if (PARAM.inp.esolver_type == "sdft") { - esolver_type = "sdft_pw"; + if (PARAM.inp.esolver_type == "sdft") + { + esolver_type = "sdft_pw"; + } + else if (PARAM.inp.esolver_type == "ofdft") + { + esolver_type = "ofdft"; + } + else if (PARAM.inp.esolver_type == "tdofdft") + { + esolver_type = "tdofdft"; + } + else if (PARAM.inp.esolver_type == "ksdft") + { + esolver_type = "ksdft_pw"; + } } - else if (PARAM.inp.esolver_type == "ofdft") - { - esolver_type = "ofdft"; - } - else if (PARAM.inp.esolver_type == "tdofdft") - { - esolver_type = "tdofdft"; - } - else if (PARAM.inp.esolver_type == "ksdft") - { - esolver_type = "ksdft_pw"; - } - } else if (PARAM.inp.basis_type == "lcao_in_pw") - { -#ifdef __LCAO - if (PARAM.inp.esolver_type == "sdft") - { - esolver_type = "sdft_pw"; - } - else if (PARAM.inp.esolver_type == "ksdft") { - esolver_type = "ksdft_lip"; - } +#ifdef __LCAO + if (PARAM.inp.esolver_type == "sdft") + { + esolver_type = "sdft_pw"; + } + else if (PARAM.inp.esolver_type == "ksdft") + { + esolver_type = "ksdft_lip"; + } #else - ModuleBase::WARNING_QUIT("ESolver", "Calculation involving numerical orbitals must be compiled with __LCAO"); + ModuleBase::WARNING_QUIT ("ESolver", + "Calculation involving numerical orbitals must be compiled with __LCAO"); #endif - } + } else if (PARAM.inp.basis_type == "lcao") - { -#ifdef __LCAO - if (PARAM.inp.esolver_type == "tddft") { - esolver_type = "ksdft_lcao_tddft"; +#ifdef __LCAO + if (PARAM.inp.esolver_type == "tddft") + { + esolver_type = "ksdft_lcao_tddft"; + } + else if (PARAM.inp.esolver_type == "ksdft") + { + esolver_type = "ksdft_lcao"; + } + else if (PARAM.inp.esolver_type == "ks-lr") + { + esolver_type = "ksdft_lr_lcao"; + } + else if (PARAM.inp.esolver_type == "lr") + { + esolver_type = "lr_lcao"; + } +#else + ModuleBase::WARNING_QUIT ("ESolver", + "Calculation involving numerical orbitals must be compiled with __LCAO"); +#endif } - else if (PARAM.inp.esolver_type == "ksdft") + + if (PARAM.inp.esolver_type == "lj") { - esolver_type = "ksdft_lcao"; + esolver_type = "lj_pot"; } - else if (PARAM.inp.esolver_type == "ks-lr") + else if (PARAM.inp.esolver_type == "dp") { - esolver_type = "ksdft_lr_lcao"; + esolver_type = "dp_pot"; } - else if (PARAM.inp.esolver_type == "lr") + else if (PARAM.inp.esolver_type == "nep") { - esolver_type = "lr_lcao"; + esolver_type = "nep_pot"; } -#else - ModuleBase::WARNING_QUIT("ESolver", "Calculation involving numerical orbitals must be compiled with __LCAO"); -#endif - } - - if (PARAM.inp.esolver_type == "lj") - { - esolver_type = "lj_pot"; - } - else if (PARAM.inp.esolver_type == "dp") - { - esolver_type = "dp_pot"; - } - else if (PARAM.inp.esolver_type == "nep") - { - esolver_type = "nep_pot"; - } else if (esolver_type == "none") - { - ModuleBase::WARNING_QUIT("ESolver", "No such esolver_type combined with basis_type"); - } + { + ModuleBase::WARNING_QUIT ("ESolver", "No such esolver_type combined with basis_type"); + } GlobalV::ofs_running << "\n #ENERGY SOLVER# " << esolver_type << std::endl; auto device_info = PARAM.inp.device; for (char& c: device_info) - { - if (std::islower(c)) { - c = std::toupper(c); + if (std::islower (c)) + { + c = std::toupper (c); + } } - } - base_device::information::output_device_info(std::cout, PARAM.inp.device); - base_device::information::output_device_info(GlobalV::ofs_running, PARAM.inp.device); + base_device::information::output_device_info (std::cout, PARAM.inp.device); + base_device::information::output_device_info (GlobalV::ofs_running, PARAM.inp.device); /***auto end_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast>(end_time - start_time); std::cout << "hipGetDeviceInfo took " << duration.count() << " seconds" << std::endl;***/ @@ -124,233 +127,234 @@ std::string determine_type() } // Some API to operate E_Solver -ESolver* init_esolver(const Input_para& inp, UnitCell& ucell) +ESolver* + init_esolver (const Input_para& inp, UnitCell& ucell) { // determine type of esolver based on INPUT information - const std::string esolver_type = determine_type(); + const std::string esolver_type = determine_type (); // initialize the corresponding Esolver child class if (esolver_type == "ksdft_pw") - { -#if ((defined __CUDA) || (defined __ROCM)) - if (PARAM.inp.device == "gpu") { +#if ((defined __CUDA) || (defined __ROCM)) + if (PARAM.inp.device == "gpu") + { + if (PARAM.inp.precision == "single") + { + return new ESolver_KS_PW, base_device::DEVICE_GPU> (); + } + else + { + return new ESolver_KS_PW, base_device::DEVICE_GPU> (); + } + } +#endif if (PARAM.inp.precision == "single") - { - return new ESolver_KS_PW, base_device::DEVICE_GPU>(); - } + { + return new ESolver_KS_PW, base_device::DEVICE_CPU> (); + } else - { - return new ESolver_KS_PW, base_device::DEVICE_GPU>(); - } + { + return new ESolver_KS_PW, base_device::DEVICE_CPU> (); + } } -#endif - if (PARAM.inp.precision == "single") - { - return new ESolver_KS_PW, base_device::DEVICE_CPU>(); - } - else - { - return new ESolver_KS_PW, base_device::DEVICE_CPU>(); - } - } else if (esolver_type == "sdft_pw") - { -#if ((defined __CUDA) || (defined __ROCM)) - if (PARAM.inp.device == "gpu") { +#if ((defined __CUDA) || (defined __ROCM)) + if (PARAM.inp.device == "gpu") + { + // if (PARAM.inp.precision == "single") + // { + // return new ESolver_SDFT_PW, base_device::DEVICE_GPU>(); + // } + // else + // { + return new ESolver_SDFT_PW, base_device::DEVICE_GPU> (); + // } + } +#endif // if (PARAM.inp.precision == "single") // { - // return new ESolver_SDFT_PW, base_device::DEVICE_GPU>(); + // return new ESolver_SDFT_PW, base_device::DEVICE_CPU>(); // } // else // { - return new ESolver_SDFT_PW, base_device::DEVICE_GPU>(); + return new ESolver_SDFT_PW, base_device::DEVICE_CPU> (); // } } -#endif - // if (PARAM.inp.precision == "single") - // { - // return new ESolver_SDFT_PW, base_device::DEVICE_CPU>(); - // } - // else - // { - return new ESolver_SDFT_PW, base_device::DEVICE_CPU>(); - // } - } #ifdef __LCAO else if (esolver_type == "ksdft_lip") - { - if (PARAM.inp.precision == "single") { - return new ESolver_KS_LIP>(); - } - else - { - return new ESolver_KS_LIP>(); + if (PARAM.inp.precision == "single") + { + return new ESolver_KS_LIP> (); + } + else + { + return new ESolver_KS_LIP> (); + } } - } else if (esolver_type == "ksdft_lcao") - { - if (PARAM.inp.calculation == "get_s") { - if (PARAM.globalv.gamma_only_local) - { - ModuleBase::WARNING_QUIT("ESolver", "get_s is not implemented for gamma_only"); - } + if (PARAM.inp.calculation == "get_s") + { + if (PARAM.globalv.gamma_only_local) + { + ModuleBase::WARNING_QUIT ("ESolver", "get_s is not implemented for gamma_only"); + } + else + { + return new ESolver_GetS (); + } + } + else if (PARAM.inp.deepks_out_base != "none") + { + if (PARAM.globalv.gamma_only_local) + { + return new ESolver_DoubleXC (); + } + else if (PARAM.inp.nspin < 4) + { + return new ESolver_DoubleXC, double> (); + } + else + { + return new ESolver_DoubleXC, std::complex> (); + } + } + else if (PARAM.inp.dm_to_rho) + { + if (PARAM.globalv.gamma_only_local) + { + ModuleBase::WARNING_QUIT ("ESolver", "dm_to_rho is not implemented for gamma_only"); + } + else if (PARAM.inp.nspin < 4) + { + return new ESolver_DM2rho, double> (); + } + else + { + return new ESolver_DM2rho, std::complex> (); + } + } else - { - return new ESolver_GetS(); - } + { + if (PARAM.globalv.gamma_only_local) + { + return new ESolver_KS_LCAO (); + } + else if (PARAM.inp.nspin < 4) + { + return new ESolver_KS_LCAO, double> (); + } + else + { + return new ESolver_KS_LCAO, std::complex> (); + } + } } - else if (PARAM.inp.deepks_out_base != "none") + else if (esolver_type == "ksdft_lcao_tddft") { - if (PARAM.globalv.gamma_only_local) - { - return new ESolver_DoubleXC(); - } - else if (PARAM.inp.nspin < 4) - { - return new ESolver_DoubleXC, double>(); - } + if (PARAM.inp.nspin < 4) + { +#if ((defined __CUDA) /* || (defined __ROCM) */) + if (PARAM.inp.device == "gpu") + { + return new ESolver_KS_LCAO_TDDFT (); + } +#endif + return new ESolver_KS_LCAO_TDDFT (); + } else - { - return new ESolver_DoubleXC, std::complex>(); - } + { +#if ((defined __CUDA) /* || (defined __ROCM) */) + if (PARAM.inp.device == "gpu") + { + return new ESolver_KS_LCAO_TDDFT, base_device::DEVICE_GPU> (); + } +#endif + return new ESolver_KS_LCAO_TDDFT, base_device::DEVICE_CPU> (); + } } - else if (PARAM.inp.dm_to_rho) + else if (esolver_type == "lr_lcao") { + // use constructor rather than Init function to initialize reference (instead of pointers) to ucell if (PARAM.globalv.gamma_only_local) - { - ModuleBase::WARNING_QUIT("ESolver", "dm_to_rho is not implemented for gamma_only"); - } - else if (PARAM.inp.nspin < 4) - { - return new ESolver_DM2rho, double>(); - } + { + return new LR::ESolver_LR (inp, ucell); + } else - { - return new ESolver_DM2rho, std::complex>(); - } + { + return new LR::ESolver_LR, double> (inp, ucell); + } } - else + else if (esolver_type == "ksdft_lr_lcao") { + // initialize the 1st ESolver_KS + ModuleESolver::ESolver* p_esolver = nullptr; if (PARAM.globalv.gamma_only_local) - { - return new ESolver_KS_LCAO(); - } + { + p_esolver = new ESolver_KS_LCAO (); + } else if (PARAM.inp.nspin < 4) - { - return new ESolver_KS_LCAO, double>(); - } + { + p_esolver = new ESolver_KS_LCAO, double> (); + } else - { - return new ESolver_KS_LCAO, std::complex>(); - } - } - } - else if (esolver_type == "ksdft_lcao_tddft") - { - if (PARAM.inp.nspin < 4) - { -#if ((defined __CUDA) /* || (defined __ROCM) */) - if (PARAM.inp.device == "gpu") - { - return new ESolver_KS_LCAO_TDDFT(); - } -#endif - return new ESolver_KS_LCAO_TDDFT(); + { + p_esolver = new ESolver_KS_LCAO, std::complex> (); + } + p_esolver->before_all_runners (ucell, inp); + p_esolver->runner (ucell, 0); // scf-only + + // force and stress is not needed currently, + // they will be supported after the analytical gradient + // of LR-TDDFT is implemented. + std::cout << " PREPARING FOR EXCITED STATES." << std::endl; + // initialize the 2nd ESolver_LR at the temporary pointer + ModuleESolver::ESolver* p_esolver_lr = nullptr; + if (PARAM.globalv.gamma_only_local) + { + p_esolver_lr = new LR::ESolver_LR ( + std::move (*dynamic_cast*> (p_esolver)), + inp, + ucell); + } + else + { + p_esolver_lr = new LR::ESolver_LR, double> ( + std::move ( + *dynamic_cast, double>*> (p_esolver)), + inp, + ucell); + } + // clean the 1st ESolver_KS and swap the pointer + delete p_esolver; + return p_esolver_lr; } - else - { -#if ((defined __CUDA) /* || (defined __ROCM) */) - if (PARAM.inp.device == "gpu") - { - return new ESolver_KS_LCAO_TDDFT, base_device::DEVICE_GPU>(); - } #endif - return new ESolver_KS_LCAO_TDDFT, base_device::DEVICE_CPU>(); - } - } - else if (esolver_type == "lr_lcao") - { - // use constructor rather than Init function to initialize reference (instead of pointers) to ucell - if (PARAM.globalv.gamma_only_local) + else if (esolver_type == "ofdft") { - return new LR::ESolver_LR(inp, ucell); + return new ESolver_OF (); } - else + else if (esolver_type == "tdofdft") { - return new LR::ESolver_LR, double>(inp, ucell); + return new ESolver_OF_TDDFT (); } - } - else if (esolver_type == "ksdft_lr_lcao") - { - // initialize the 1st ESolver_KS - ModuleESolver::ESolver* p_esolver = nullptr; - if (PARAM.globalv.gamma_only_local) + else if (esolver_type == "lj_pot") { - p_esolver = new ESolver_KS_LCAO(); + return new ESolver_LJ (); } - else if (PARAM.inp.nspin < 4) + else if (esolver_type == "dp_pot") { - p_esolver = new ESolver_KS_LCAO, double>(); + return new ESolver_DP (PARAM.mdp.pot_file); } - else + else if (esolver_type == "nep_pot") { - p_esolver = new ESolver_KS_LCAO, std::complex>(); + return new ESolver_NEP (PARAM.mdp.pot_file); } - p_esolver->before_all_runners(ucell, inp); - p_esolver->runner(ucell, 0); // scf-only - - // force and stress is not needed currently, - // they will be supported after the analytical gradient - // of LR-TDDFT is implemented. - std::cout << " PREPARING FOR EXCITED STATES." << std::endl; - // initialize the 2nd ESolver_LR at the temporary pointer - ModuleESolver::ESolver* p_esolver_lr = nullptr; - if (PARAM.globalv.gamma_only_local) - { - p_esolver_lr = new LR::ESolver_LR( - std::move(*dynamic_cast*>(p_esolver)), - inp, - ucell); - } - else - { - p_esolver_lr = new LR::ESolver_LR, double>( - std::move(*dynamic_cast, double>*>(p_esolver)), - inp, - ucell); - } - // clean the 1st ESolver_KS and swap the pointer - delete p_esolver; - return p_esolver_lr; - } -#endif - else if (esolver_type == "ofdft") - { - return new ESolver_OF(); - } - else if (esolver_type == "tdofdft") - { - return new ESolver_OF_TDDFT(); - } - else if (esolver_type == "lj_pot") - { - return new ESolver_LJ(); - } - else if (esolver_type == "dp_pot") - { - return new ESolver_DP(PARAM.mdp.pot_file); - } - else if (esolver_type == "nep_pot") - { - return new ESolver_NEP(PARAM.mdp.pot_file); - } - throw std::invalid_argument("esolver_type = " + std::string(esolver_type) + ". Wrong in " + std::string(__FILE__) - + " line " + std::to_string(__LINE__)); + throw std::invalid_argument ("esolver_type = " + std::string (esolver_type) + ". Wrong in " + std::string (__FILE__) + + " line " + std::to_string (__LINE__)); } - } // namespace ModuleESolver diff --git a/source/source_esolver/esolver.h b/source/source_esolver/esolver.h index d1f2b1ae782..343b5d01cb3 100644 --- a/source/source_esolver/esolver.h +++ b/source/source_esolver/esolver.h @@ -11,12 +11,9 @@ namespace ModuleESolver class ESolver { public: - ESolver() - { - classname = "ESolver"; - } + ESolver () { classname = "ESolver"; } - virtual ~ESolver() + virtual ~ESolver () { //**************************************************** // do not add any codes in this deconstructor funcion @@ -24,26 +21,26 @@ class ESolver } //! initialize the energy solver by using input parameters and cell modules - virtual void before_all_runners(UnitCell& ucell, const Input_para& inp) = 0; + virtual void before_all_runners (UnitCell& ucell, const Input_para& inp) = 0; //! run energy solver - virtual void runner(UnitCell& cell, const int istep) = 0; + virtual void runner (UnitCell& cell, const int istep) = 0; //! perform post processing calculations - virtual void after_all_runners(UnitCell& ucell) = 0; + virtual void after_all_runners (UnitCell& ucell) = 0; //! deal with exx and other calculation than scf/md/relax/cell-relax: //! such as nscf, get_wf and get_pchg - virtual void others(UnitCell& ucell, const int istep) {}; + virtual void others (UnitCell& ucell, const int istep) {}; //! calculate total energy of a given system - virtual double cal_energy() = 0; + virtual double cal_energy () = 0; //! calcualte forces for the atoms in the given cell - virtual void cal_force(UnitCell& ucell, ModuleBase::matrix& force) = 0; + virtual void cal_force (UnitCell& ucell, ModuleBase::matrix& force) = 0; //! calcualte stress of given cell - virtual void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) = 0; + virtual void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) = 0; bool conv_esolver = true; // whether esolver is converged @@ -54,10 +51,10 @@ class ESolver * @brief A subrutine called in init_esolver() * This function returns type of ESolver * Based on PARAM.inp.basis_type and PARAM.inp.esolver_type - * + * * @return [out] std::string The type of ESolver */ -std::string determine_type(); +std::string determine_type (); /** * @brief Determine and initialize an ESolver based on input information. @@ -68,9 +65,7 @@ std::string determine_type(); * * @return [out] A pointer to an ESolver object that will be initialized. */ -ESolver* init_esolver(const Input_para& inp, UnitCell& ucell); - - +ESolver* init_esolver (const Input_para& inp, UnitCell& ucell); } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_dm2rho.cpp b/source/source_esolver/esolver_dm2rho.cpp index 0903afb29a9..185381c233d 100644 --- a/source/source_esolver/esolver_dm2rho.cpp +++ b/source/source_esolver/esolver_dm2rho.cpp @@ -16,84 +16,87 @@ namespace ModuleESolver { template -ESolver_DM2rho::ESolver_DM2rho() +ESolver_DM2rho::ESolver_DM2rho () { this->classname = "ESolver_DM2rho"; this->basisname = "LCAO"; } template -ESolver_DM2rho::~ESolver_DM2rho() +ESolver_DM2rho::~ESolver_DM2rho () { } template -void ESolver_DM2rho::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_DM2rho::before_all_runners (UnitCell& ucell, const Input_para& inp) { - ModuleBase::TITLE("ESolver_DM2rho", "before_all_runners"); - ModuleBase::timer::start("ESolver_DM2rho", "before_all_runners"); + ModuleBase::TITLE ("ESolver_DM2rho", "before_all_runners"); + ModuleBase::timer::start ("ESolver_DM2rho", "before_all_runners"); - ESolver_KS_LCAO::before_all_runners(ucell, inp); + ESolver_KS_LCAO::before_all_runners (ucell, inp); - ModuleBase::timer::end("ESolver_DM2rho", "before_all_runners"); + ModuleBase::timer::end ("ESolver_DM2rho", "before_all_runners"); } template -void ESolver_DM2rho::runner(UnitCell& ucell, const int istep) +void + ESolver_DM2rho::runner (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_DM2rho", "runner"); - ModuleBase::timer::start("ESolver_DM2rho", "runner"); + ModuleBase::TITLE ("ESolver_DM2rho", "runner"); + ModuleBase::timer::start ("ESolver_DM2rho", "runner"); - ESolver_KS_LCAO::before_scf(ucell, istep); + ESolver_KS_LCAO::before_scf (ucell, istep); // file name of DM std::string zipname = "output_DM0.npz"; // read DM from file - ModuleIO::read_mat_npz(&(this->pv), ucell, zipname, *(this->dmat.dm->get_DMR_pointer(1))); + ModuleIO::read_mat_npz (&(this->pv), ucell, zipname, *(this->dmat.dm->get_DMR_pointer (1))); // if nspin=2, need extra reading if (PARAM.inp.nspin == 2) - { - zipname = "output_DM1.npz"; - ModuleIO::read_mat_npz(&(this->pv), ucell, zipname, *(this->dmat.dm->get_DMR_pointer(2))); - } + { + zipname = "output_DM1.npz"; + ModuleIO::read_mat_npz (&(this->pv), ucell, zipname, *(this->dmat.dm->get_DMR_pointer (2))); + } // it's dangerous to design psiToRho function like this, mohan note 20251024 // this->pelec->psiToRho(*this->psi); - LCAO_domain::dm2rho(this->dmat.dm->get_DMR_vector(), PARAM.inp.nspin, &this->chr); + LCAO_domain::dm2rho (this->dmat.dm->get_DMR_vector (), PARAM.inp.nspin, &this->chr); int nspin0 = PARAM.inp.nspin == 2 ? 2 : 1; for (int is = 0; is < nspin0; is++) - { - std::string fn = PARAM.globalv.global_out_dir + "/SPIN" + std::to_string(is + 1) + "_CHG.cube"; - - // write electron density - ModuleIO::write_vdata_palgrid(this->Pgrid, - this->chr.rho[is], - is, - PARAM.inp.nspin, - istep, - fn, - this->pelec->eferm.get_efval(is), - &(ucell), - 3, - 1); - } - - ModuleBase::timer::end("ESolver_DM2rho", "runner"); + { + std::string fn = PARAM.globalv.global_out_dir + "/SPIN" + std::to_string (is + 1) + "_CHG.cube"; + + // write electron density + ModuleIO::write_vdata_palgrid (this->Pgrid, + this->chr.rho[is], + is, + PARAM.inp.nspin, + istep, + fn, + this->pelec->eferm.get_efval (is), + &(ucell), + 3, + 1); + } + + ModuleBase::timer::end ("ESolver_DM2rho", "runner"); } template -void ESolver_DM2rho::after_all_runners(UnitCell& ucell) +void + ESolver_DM2rho::after_all_runners (UnitCell& ucell) { - ModuleBase::TITLE("ESolver_DM2rho", "after_all_runners"); - ModuleBase::timer::start("ESolver_DM2rho", "after_all_runners"); + ModuleBase::TITLE ("ESolver_DM2rho", "after_all_runners"); + ModuleBase::timer::start ("ESolver_DM2rho", "after_all_runners"); - ESolver_KS_LCAO::after_all_runners(ucell); + ESolver_KS_LCAO::after_all_runners (ucell); - ModuleBase::timer::end("ESolver_DM2rho", "after_all_runners"); + ModuleBase::timer::end ("ESolver_DM2rho", "after_all_runners"); }; template class ESolver_DM2rho, double>; diff --git a/source/source_esolver/esolver_dm2rho.h b/source/source_esolver/esolver_dm2rho.h index 5d596de3bee..9651e87c581 100644 --- a/source/source_esolver/esolver_dm2rho.h +++ b/source/source_esolver/esolver_dm2rho.h @@ -12,14 +12,14 @@ template class ESolver_DM2rho : public ESolver_KS_LCAO { public: - ESolver_DM2rho(); - ~ESolver_DM2rho(); + ESolver_DM2rho (); + ~ESolver_DM2rho (); - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - void after_all_runners(UnitCell& ucell) override; + void after_all_runners (UnitCell& ucell) override; - void runner(UnitCell& ucell, const int istep) override; + void runner (UnitCell& ucell, const int istep) override; }; } // namespace ModuleESolver #endif diff --git a/source/source_esolver/esolver_double_xc.cpp b/source/source_esolver/esolver_double_xc.cpp index 14a86779edc..ef62b3b5508 100644 --- a/source/source_esolver/esolver_double_xc.cpp +++ b/source/source_esolver/esolver_double_xc.cpp @@ -16,20 +16,20 @@ #include "source_hsolver/hsolver_lcao.h" #include "source_io/module_parameter/parameter.h" #include "source_io/module_hs/write_HS.h" // use ModuleIO::write_hsk() -#include "source_lcao/setup_deepks.h" // use deepks, mohan add 2025-10-10 +#include "source_lcao/setup_deepks.h" // use deepks, mohan add 2025-10-10 namespace ModuleESolver { template -ESolver_DoubleXC::ESolver_DoubleXC() +ESolver_DoubleXC::ESolver_DoubleXC () { this->classname = "ESolver_DoubleXC"; this->basisname = "LCAO"; } template -ESolver_DoubleXC::~ESolver_DoubleXC() +ESolver_DoubleXC::~ESolver_DoubleXC () { delete this->psi_base; delete this->p_hamilt_base; @@ -37,382 +37,402 @@ ESolver_DoubleXC::~ESolver_DoubleXC() } template -void ESolver_DoubleXC::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_DoubleXC::before_all_runners (UnitCell& ucell, const Input_para& inp) { - ModuleBase::TITLE("ESolver_DoubleXC", "before_all_runners"); - ModuleBase::timer::start("ESolver_DoubleXC", "before_all_runners"); + ModuleBase::TITLE ("ESolver_DoubleXC", "before_all_runners"); + ModuleBase::timer::start ("ESolver_DoubleXC", "before_all_runners"); - ESolver_KS_LCAO::before_all_runners(ucell, inp); + ESolver_KS_LCAO::before_all_runners (ucell, inp); // init some items for base functional // 2) init ElecState if (this->pelec_base == nullptr) - { - this->pelec_base = new elecstate::ElecStateLCAO(&(this->chr_base), // use which parameter? - &(this->kv), - this->kv.get_nks(), - this->pw_big); - } + { + this->pelec_base = new elecstate::ElecStateLCAO (&(this->chr_base), // use which parameter? + &(this->kv), + this->kv.get_nks (), + this->pw_big); + } // 4) initialize electronic wave function psi if (this->psi_base == nullptr) - { - int nsk = 0; - int ncol = 0; - if (PARAM.globalv.gamma_only_local) - { - nsk = PARAM.inp.nspin; - ncol = this->pv.ncol_bands; - if (PARAM.inp.ks_solver == "genelpa" || PARAM.inp.ks_solver == "elpa" || PARAM.inp.ks_solver == "lapack" - || PARAM.inp.ks_solver == "pexsi" || PARAM.inp.ks_solver == "cusolver" - || PARAM.inp.ks_solver == "cusolvermp") - { - ncol = this->pv.ncol; - } - } - else { - nsk = this->kv.get_nks(); + int nsk = 0; + int ncol = 0; + if (PARAM.globalv.gamma_only_local) + { + nsk = PARAM.inp.nspin; + ncol = this->pv.ncol_bands; + if (PARAM.inp.ks_solver == "genelpa" || PARAM.inp.ks_solver == "elpa" + || PARAM.inp.ks_solver == "lapack" || PARAM.inp.ks_solver == "pexsi" + || PARAM.inp.ks_solver == "cusolver" || PARAM.inp.ks_solver == "cusolvermp") + { + ncol = this->pv.ncol; + } + } + else + { + nsk = this->kv.get_nks (); #ifdef __MPI - ncol = this->pv.ncol_bands; + ncol = this->pv.ncol_bands; #else - ncol = PARAM.inp.nbands; + ncol = PARAM.inp.nbands; #endif + } + this->psi_base = new psi::Psi (nsk, ncol, this->pv.nrow, this->kv.ngk, true); } - this->psi_base = new psi::Psi(nsk, ncol, this->pv.nrow, this->kv.ngk, true); - } // 6) initialize the density matrix - this->dmat_base.allocate_dm(&this->kv, &this->pv, PARAM.inp.nspin); + this->dmat_base.allocate_dm (&this->kv, &this->pv, PARAM.inp.nspin); // 10) inititlize the charge density - this->chr_base.set_rhopw(this->pw_rhod); // mohan add 20251130 - const bool kin_den = this->chr_base.kin_density(); // mohan add 20251202 - this->chr_base.allocate(PARAM.inp.nspin, kin_den); - this->chr_base.init_rho(ucell, this->Pgrid, this->sf.strucFac, ucell.symm, &this->kv); - this->chr_base.check_rho(); + this->chr_base.set_rhopw (this->pw_rhod); // mohan add 20251130 + const bool kin_den = this->chr_base.kin_density (); // mohan add 20251202 + this->chr_base.allocate (PARAM.inp.nspin, kin_den); + this->chr_base.init_rho (ucell, this->Pgrid, this->sf.strucFac, ucell.symm, &this->kv); + this->chr_base.check_rho (); // 11) initialize the potential if (this->pelec_base->pot == nullptr) - { - this->pelec_base->pot = new elecstate::Potential(this->pw_rhod, - this->pw_rho, - &ucell, - &(this->locpp.vloc), - &(this->sf), - &(this->solvent), - &(this->pelec_base->f_en.etxc), - &(this->pelec_base->f_en.vtxc)); - } - - ModuleBase::timer::end("ESolver_DoubleXC", "before_all_runners"); + { + this->pelec_base->pot = new elecstate::Potential (this->pw_rhod, + this->pw_rho, + &ucell, + &(this->locpp.vloc), + &(this->sf), + &(this->solvent), + &(this->pelec_base->f_en.etxc), + &(this->pelec_base->f_en.vtxc)); + } + + ModuleBase::timer::end ("ESolver_DoubleXC", "before_all_runners"); } template -void ESolver_DoubleXC::before_scf(UnitCell& ucell, const int istep) +void + ESolver_DoubleXC::before_scf (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_DoubleXC", "before_scf"); - ModuleBase::timer::start("ESolver_DoubleXC", "before_scf"); + ModuleBase::TITLE ("ESolver_DoubleXC", "before_scf"); + ModuleBase::timer::start ("ESolver_DoubleXC", "before_scf"); - ESolver_KS_LCAO::before_scf(ucell, istep); + ESolver_KS_LCAO::before_scf (ucell, istep); //---------------------------------------------------------- //! calculate D2 or D3 vdW //---------------------------------------------------------- - auto vdw_solver = vdw::make_vdw(ucell, PARAM.inp, &(GlobalV::ofs_running)); + auto vdw_solver = vdw::make_vdw (ucell, PARAM.inp, &(GlobalV::ofs_running)); if (vdw_solver != nullptr) - { - this->pelec_base->f_en.evdw = vdw_solver->get_energy(); - } + { + this->pelec_base->f_en.evdw = vdw_solver->get_energy (); + } //---------------------------------------------------------- //! calculate ewald energy //---------------------------------------------------------- if (!PARAM.inp.test_skip_ewald) - { - //this->pelec_base->f_en.ewald_energy = H_Ewald_pw::compute_ewald(ucell, this->pw_rhod, this->sf.strucFac); - this->pelec_base->f_en.ewald_energy = this->pelec->f_en.ewald_energy; - } + { + // this->pelec_base->f_en.ewald_energy = H_Ewald_pw::compute_ewald(ucell, this->pw_rhod, this->sf.strucFac); + this->pelec_base->f_en.ewald_energy = this->pelec->f_en.ewald_energy; + } if (this->p_hamilt_base != nullptr) - { - delete this->p_hamilt_base; - this->p_hamilt_base = nullptr; - } + { + delete this->p_hamilt_base; + this->p_hamilt_base = nullptr; + } if (this->p_hamilt_base == nullptr) - { - this->p_hamilt_base = new hamilt::HamiltLCAO( - ucell, - this->gd, - &this->pv, - this->pelec_base->pot, - this->kv, - this->two_center_bundle_, - this->orb_, - this->dmat_base.dm, - &this->dftu, - this->deepks, - istep, - this->exx_nao); - } - - XC_Functional::set_xc_type(PARAM.inp.deepks_out_base); - this->pelec_base->init_scf(ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, ucell.symm); - XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); + { + this->p_hamilt_base = new hamilt::HamiltLCAO (ucell, + this->gd, + &this->pv, + this->pelec_base->pot, + this->kv, + this->two_center_bundle_, + this->orb_, + this->dmat_base.dm, + &this->dftu, + this->deepks, + istep, + this->exx_nao); + } + + XC_Functional::set_xc_type (PARAM.inp.deepks_out_base); + this->pelec_base->init_scf (ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, ucell.symm); + XC_Functional::set_xc_type (ucell.atoms[0].ncpp.xc_func); // DMR should be same size with Hamiltonian(R) - this->dmat_base.dm->init_DMR(*(dynamic_cast*>(this->p_hamilt_base)->getHR())); + this->dmat_base.dm->init_DMR (*(dynamic_cast*> (this->p_hamilt_base)->getHR ())); if (istep > 0) - { - this->dmat_base.dm->cal_DMR(); - } + { + this->dmat_base.dm->cal_DMR (); + } - ModuleBase::timer::end("ESolver_DoubleXC", "before_scf"); - return; + ModuleBase::timer::end ("ESolver_DoubleXC", "before_scf"); + return; } template -void ESolver_DoubleXC::iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) +void + ESolver_DoubleXC::iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) { - ModuleBase::TITLE("ESolver_DoubleXC", "iter_finish"); - ModuleBase::timer::start("ESolver_DoubleXC", "iter_finish"); + ModuleBase::TITLE ("ESolver_DoubleXC", "iter_finish"); + ModuleBase::timer::start ("ESolver_DoubleXC", "iter_finish"); - bool output_iter = PARAM.inp.deepks_out_labels >0 && PARAM.inp.deepks_out_freq_elec && - (iter % PARAM.inp.deepks_out_freq_elec == 0); + bool output_iter = PARAM.inp.deepks_out_labels > 0 && PARAM.inp.deepks_out_freq_elec + && (iter % PARAM.inp.deepks_out_freq_elec == 0); - if ( output_iter ) - { - // save output charge density (density after diagnonalization) - for (int is = 0; is < PARAM.inp.nspin; is++) + if (output_iter) { - ModuleBase::GlobalFunc::DCOPY(this->chr.rho[is], this->chr_base.rho[is], this->chr.rhopw->nrxx); - if (XC_Functional::get_ked_flag()) - { - ModuleBase::GlobalFunc::DCOPY(this->chr.kin_r[is], this->chr_base.kin_r[is], this->chr.rhopw->nrxx); - } - } - } + // save output charge density (density after diagnonalization) + for (int is = 0; is < PARAM.inp.nspin; is++) + { + ModuleBase::GlobalFunc::DCOPY (this->chr.rho[is], this->chr_base.rho[is], this->chr.rhopw->nrxx); + if (XC_Functional::get_ked_flag ()) + { + ModuleBase::GlobalFunc::DCOPY (this->chr.kin_r[is], + this->chr_base.kin_r[is], + this->chr.rhopw->nrxx); + } + } + } - ESolver_KS_LCAO::iter_finish(ucell, istep, iter, conv_esolver); + ESolver_KS_LCAO::iter_finish (ucell, istep, iter, conv_esolver); // for deepks, output labels during electronic steps (after conv_esolver is renewed) - if ( output_iter) - { - // ---------- update etot and htot ---------- - // get etot of output charge density, now the etot is of density after charge mixing - this->pelec->pot->update_from_charge(&this->chr_base, &ucell); - this->pelec->f_en.descf = 0.0; - this->pelec->cal_energies(2); - // std::cout<<"in deepks etot------"<pelec->f_en.print_all(); - // std::cout<<"in deepks etot------"<pelec->f_en.etot << std::endl; - - // update p_hamilt using output charge density - // Note!!! - // This will change the result of out_mat_hs - // The original result of out_mat_hs is H of input density, but this change H to that of output density - // When converged, these two should be close - if (PARAM.inp.deepks_v_delta > 0 && PARAM.inp.vl_in_h) + if (output_iter) { - // update real space Hamiltonian - this->p_hamilt->refresh(); - } + // ---------- update etot and htot ---------- + // get etot of output charge density, now the etot is of density after charge mixing + this->pelec->pot->update_from_charge (&this->chr_base, &ucell); + this->pelec->f_en.descf = 0.0; + this->pelec->cal_energies (2); + // std::cout<<"in deepks etot------"<pelec->f_en.print_all(); + // std::cout<<"in deepks etot------"<pelec->f_en.etot << std::endl; + + // update p_hamilt using output charge density + // Note!!! + // This will change the result of out_mat_hs + // The original result of out_mat_hs is H of input density, but this change H to that of output density + // When converged, these two should be close + if (PARAM.inp.deepks_v_delta > 0 && PARAM.inp.vl_in_h) + { + // update real space Hamiltonian + this->p_hamilt->refresh (); + } #ifdef __MLALGO - // ---------- output tot and precalc ---------- - hamilt::HamiltLCAO* p_ham_deepks = dynamic_cast*>(this->p_hamilt); - std::shared_ptr> ld_shared_ptr(&this->deepks.ld, [](LCAO_Deepks*) {}); - LCAO_Deepks_Interface deepks_interface(ld_shared_ptr); - - deepks_interface.out_deepks_labels(this->pelec->f_en.etot, - this->kv.get_nks(), - ucell.nat, - PARAM.globalv.nlocal, - this->pelec->ekb, - this->kv.kvec_d, - ucell, - this->orb_, - this->gd, - &(this->pv), - *(this->psi), - this->dmat.dm, - p_ham_deepks, - iter, - conv_esolver, - GlobalV::MY_RANK, - GlobalV::ofs_running); + // ---------- output tot and precalc ---------- + hamilt::HamiltLCAO* p_ham_deepks = dynamic_cast*> (this->p_hamilt); + std::shared_ptr> ld_shared_ptr (&this->deepks.ld, [] (LCAO_Deepks*) {}); + LCAO_Deepks_Interface deepks_interface (ld_shared_ptr); + + deepks_interface.out_deepks_labels (this->pelec->f_en.etot, + this->kv.get_nks (), + ucell.nat, + PARAM.globalv.nlocal, + this->pelec->ekb, + this->kv.kvec_d, + ucell, + this->orb_, + this->gd, + &(this->pv), + *(this->psi), + this->dmat.dm, + p_ham_deepks, + iter, + conv_esolver, + GlobalV::MY_RANK, + GlobalV::ofs_running); #endif - - // restore to density after charge mixing - this->pelec->pot->update_from_charge(&this->chr, &ucell); - // ---------- prepare for base ---------- - // set as base functional Temporarily - XC_Functional::set_xc_type(PARAM.inp.deepks_out_base); + // restore to density after charge mixing + this->pelec->pot->update_from_charge (&this->chr, &ucell); - // update pot of pelec_base according to chr_base - if (!conv_esolver) - { - this->pelec_base->pot->update_from_charge(&this->chr_base, &ucell); - } - else - { - this->pelec_base->cal_converged(); - } + // ---------- prepare for base ---------- + // set as base functional Temporarily + XC_Functional::set_xc_type (PARAM.inp.deepks_out_base); + + // update pot of pelec_base according to chr_base + if (!conv_esolver) + { + this->pelec_base->pot->update_from_charge (&this->chr_base, &ucell); + } + else + { + this->pelec_base->cal_converged (); + } - // ---------- e_base ---------- - // ebase use the same output density with etot, just different in xc - this->pelec_base->f_en.eband = this->pelec->f_en.eband; - this->pelec_base->f_en.deband = this->pelec->f_en.deband; - this->pelec_base->f_en.demet = this->pelec->f_en.demet; - this->pelec_base->f_en.descf = 0.0; // set descf to 0 - this->pelec_base->cal_energies(2); // 2 means Kohn-Sham functional - // std::cout<<"in double_xc------"<pelec_base->f_en.print_all(); - // std::cout<<"in double_xc------"<f_en.etot << std::endl; - -#ifdef __MLALGO - const std::string file_ebase = deepks_interface.get_filename("ebase", PARAM.inp.deepks_out_labels, iter); - LCAO_deepks_io::save_npy_e(pelec_base->f_en.etot, file_ebase, GlobalV::MY_RANK); + // ---------- e_base ---------- + // ebase use the same output density with etot, just different in xc + this->pelec_base->f_en.eband = this->pelec->f_en.eband; + this->pelec_base->f_en.deband = this->pelec->f_en.deband; + this->pelec_base->f_en.demet = this->pelec->f_en.demet; + this->pelec_base->f_en.descf = 0.0; // set descf to 0 + this->pelec_base->cal_energies (2); // 2 means Kohn-Sham functional + // std::cout<<"in double_xc------"<pelec_base->f_en.print_all(); + // std::cout<<"in double_xc------"<f_en.etot << std::endl; + +#ifdef __MLALGO + const std::string file_ebase = deepks_interface.get_filename ("ebase", PARAM.inp.deepks_out_labels, iter); + LCAO_deepks_io::save_npy_e (pelec_base->f_en.etot, file_ebase, GlobalV::MY_RANK); #endif - // ---------- h_base ---------- - if (PARAM.inp.deepks_v_delta > 0) - { - if (PARAM.inp.vl_in_h) - { - // update real space Hamiltonian - this->p_hamilt_base->refresh(); - } + // ---------- h_base ---------- + if (PARAM.inp.deepks_v_delta > 0) + { + if (PARAM.inp.vl_in_h) + { + // update real space Hamiltonian + this->p_hamilt_base->refresh (); + } - // Note!!! - // should not use ModuleIO::write_hsk() to output h_base, because it will call get_hs_pointers() - // which will change the hsolver::DiagoElpa::DecomposedState, influencing the following SCF steps + // Note!!! + // should not use ModuleIO::write_hsk() to output h_base, because it will call get_hs_pointers() + // which will change the hsolver::DiagoElpa::DecomposedState, influencing the following + // SCF steps #ifdef __MLALGO - using TH = std::conditional_t::value, ModuleBase::matrix, ModuleBase::ComplexMatrix>; - hamilt::HamiltLCAO* p_ham_deepks_base = dynamic_cast*>(this->p_hamilt_base); - int nks = this->kv.get_nks(); - std::vector h_tot(nks); - DeePKS_domain::get_h_tot(this->pv, p_ham_deepks_base, h_tot, PARAM.globalv.nlocal, nks, 'H'); - - const std::string file_htot = deepks_interface.get_filename("hbase", PARAM.inp.deepks_out_labels, iter); - LCAO_deepks_io::save_npy_h(h_tot, file_htot, PARAM.globalv.nlocal, nks, GlobalV::MY_RANK); + using TH = std:: + conditional_t::value, ModuleBase::matrix, ModuleBase::ComplexMatrix>; + hamilt::HamiltLCAO* p_ham_deepks_base + = dynamic_cast*> (this->p_hamilt_base); + int nks = this->kv.get_nks (); + std::vector h_tot (nks); + DeePKS_domain::get_h_tot (this->pv, + p_ham_deepks_base, + h_tot, + PARAM.globalv.nlocal, + nks, + 'H'); + + const std::string file_htot + = deepks_interface.get_filename ("hbase", PARAM.inp.deepks_out_labels, iter); + LCAO_deepks_io::save_npy_h (h_tot, file_htot, PARAM.globalv.nlocal, nks, GlobalV::MY_RANK); #endif - } + } - // ---------- o_base ---------- - if ( PARAM.inp.deepks_bandgap > 0 ) - { - // obase isn't implemented yet - // don't need to solve p_hamilt_base - // just dm*p_hamilt_base, similar to cal_o_delta - } - - // restore to original xc - XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); + // ---------- o_base ---------- + if (PARAM.inp.deepks_bandgap > 0) + { + // obase isn't implemented yet + // don't need to solve p_hamilt_base + // just dm*p_hamilt_base, similar to cal_o_delta + } - } + // restore to original xc + XC_Functional::set_xc_type (ucell.atoms[0].ncpp.xc_func); + } // ---------- prepare for f_base ---------- - else if ( PARAM.inp.cal_force && conv_esolver ) - { - // vnew must be updated for force_scc() even if not output_iter - // set as base functional Temporarily - XC_Functional::set_xc_type(PARAM.inp.deepks_out_base); - this->pelec_base->cal_converged(); - // restore to original xc - XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); - } - - if ( PARAM.inp.cal_force ) - { - if ( ! conv_esolver ) + else if (PARAM.inp.cal_force && conv_esolver) { - // use chr after mixing to restore veff, useful for vnew when converged - this->pelec_base->pot->update_from_charge(&this->chr, &ucell); + // vnew must be updated for force_scc() even if not output_iter + // set as base functional Temporarily + XC_Functional::set_xc_type (PARAM.inp.deepks_out_base); + this->pelec_base->cal_converged (); + // restore to original xc + XC_Functional::set_xc_type (ucell.atoms[0].ncpp.xc_func); } - else + + if (PARAM.inp.cal_force) { - // copy charge - for (int is = 0; is < PARAM.inp.nspin; is++) - { - ModuleBase::GlobalFunc::DCOPY(this->chr.rho[is], this->chr_base.rho[is], this->chr.rhopw->nrxx); - if (XC_Functional::get_ked_flag()) + if (!conv_esolver) { - ModuleBase::GlobalFunc::DCOPY(this->chr.kin_r[is], this->chr_base.kin_r[is], this->chr.rhopw->nrxx); + // use chr after mixing to restore veff, useful for vnew when converged + this->pelec_base->pot->update_from_charge (&this->chr, &ucell); } - } - - // copy dm - int nks = this->kv.get_nks(); - auto _pes_lcao_base = dynamic_cast*>(this->pelec_base); - auto _pes_lcao = dynamic_cast*>(this->pelec); - for (int ik = 0; ik < nks; ik++) - { -// mohan update 2025-11-03 - this->dmat_base.dm->set_DMK_pointer(ik, this->dmat.dm->get_DMK_pointer(ik)); -// _pes_lcao_base->get_DM()->set_DMK_pointer(ik, _pes_lcao->get_DM()->get_DMK_pointer(ik)); - } - this->dmat_base.dm->cal_DMR(); -// _pes_lcao_base->get_DM()->cal_DMR(); - _pes_lcao_base->ekb = _pes_lcao->ekb; - _pes_lcao_base->wg = _pes_lcao->wg; - } - } - ModuleBase::timer::end("ESolver_DoubleXC", "iter_finish"); + else + { + // copy charge + for (int is = 0; is < PARAM.inp.nspin; is++) + { + ModuleBase::GlobalFunc::DCOPY (this->chr.rho[is], + this->chr_base.rho[is], + this->chr.rhopw->nrxx); + if (XC_Functional::get_ked_flag ()) + { + ModuleBase::GlobalFunc::DCOPY (this->chr.kin_r[is], + this->chr_base.kin_r[is], + this->chr.rhopw->nrxx); + } + } + + // copy dm + int nks = this->kv.get_nks (); + auto _pes_lcao_base = dynamic_cast*> (this->pelec_base); + auto _pes_lcao = dynamic_cast*> (this->pelec); + for (int ik = 0; ik < nks; ik++) + { + // mohan update 2025-11-03 + this->dmat_base.dm->set_DMK_pointer (ik, this->dmat.dm->get_DMK_pointer (ik)); + // _pes_lcao_base->get_DM()->set_DMK_pointer(ik, + // _pes_lcao->get_DM()->get_DMK_pointer(ik)); + } + this->dmat_base.dm->cal_DMR (); + // _pes_lcao_base->get_DM()->cal_DMR(); + _pes_lcao_base->ekb = _pes_lcao->ekb; + _pes_lcao_base->wg = _pes_lcao->wg; + } + } + ModuleBase::timer::end ("ESolver_DoubleXC", "iter_finish"); } template -void ESolver_DoubleXC::cal_force(UnitCell& ucell, ModuleBase::matrix& force) +void + ESolver_DoubleXC::cal_force (UnitCell& ucell, ModuleBase::matrix& force) { - ModuleBase::TITLE("ESolver_DoubleXC", "cal_force"); - ModuleBase::timer::start("ESolver_DoubleXC", "cal_force"); + ModuleBase::TITLE ("ESolver_DoubleXC", "cal_force"); + ModuleBase::timer::start ("ESolver_DoubleXC", "cal_force"); ModuleBase::matrix force_base; ModuleBase::matrix stress_base; - Force_Stress_LCAO fsl(this->RA, ucell.nat); + Force_Stress_LCAO fsl (this->RA, ucell.nat); // set as base functional Temporarily - XC_Functional::set_xc_type(PARAM.inp.deepks_out_base); - - this->deepks.dpks_out_type = "base"; // for deepks method - - fsl.getForceStress(ucell, - PARAM.inp.cal_force, - PARAM.inp.cal_stress, - PARAM.inp.test_force, - PARAM.inp.test_stress, - this->gd, - this->pv, - this->pelec_base, - this->dmat_base, // mohan add 2025-11-03 - this->psi, - this->two_center_bundle_, - this->orb_, - force_base, - stress_base, - this->locpp, - this->sf, - this->kv, - this->pw_rho, - this->solvent, - this->dftu, - this->deepks, - this->exx_nao, - &ucell.symm); + XC_Functional::set_xc_type (PARAM.inp.deepks_out_base); + + this->deepks.dpks_out_type = "base"; // for deepks method + + fsl.getForceStress (ucell, + PARAM.inp.cal_force, + PARAM.inp.cal_stress, + PARAM.inp.test_force, + PARAM.inp.test_stress, + this->gd, + this->pv, + this->pelec_base, + this->dmat_base, // mohan add 2025-11-03 + this->psi, + this->two_center_bundle_, + this->orb_, + force_base, + stress_base, + this->locpp, + this->sf, + this->kv, + this->pw_rho, + this->solvent, + this->dftu, + this->deepks, + this->exx_nao, + &ucell.symm); // restore to original xc - XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); + XC_Functional::set_xc_type (ucell.atoms[0].ncpp.xc_func); // this will delete RA, so call it later - ESolver_KS_LCAO::cal_force(ucell, force); + ESolver_KS_LCAO::cal_force (ucell, force); - ModuleBase::timer::end("ESolver_DoubleXC", "cal_force"); + ModuleBase::timer::end ("ESolver_DoubleXC", "cal_force"); } template class ESolver_DoubleXC; diff --git a/source/source_esolver/esolver_double_xc.h b/source/source_esolver/esolver_double_xc.h index bff9e28bc70..e4dc96a3696 100644 --- a/source/source_esolver/esolver_double_xc.h +++ b/source/source_esolver/esolver_double_xc.h @@ -10,18 +10,17 @@ template class ESolver_DoubleXC : public ESolver_KS_LCAO { public: - ESolver_DoubleXC(); - ~ESolver_DoubleXC(); + ESolver_DoubleXC (); + ~ESolver_DoubleXC (); - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; protected: + void before_scf (UnitCell& ucell, const int istep) override; - void before_scf(UnitCell& ucell, const int istep) override; - - void iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; + void iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; //! Hamiltonian hamilt::Hamilt* p_hamilt_base = nullptr; diff --git a/source/source_esolver/esolver_dp.cpp b/source/source_esolver/esolver_dp.cpp index 879193e668b..e156d3b6bf8 100644 --- a/source/source_esolver/esolver_dp.cpp +++ b/source/source_esolver/esolver_dp.cpp @@ -31,18 +31,19 @@ using namespace ModuleESolver; -void ESolver_DP::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_DP::before_all_runners (UnitCell& ucell, const Input_para& inp) { dp_potential = 0; - dp_force.create(ucell.nat, 3); - dp_virial.create(3, 3); + dp_force.create (ucell.nat, 3); + dp_virial.create (3, 3); - ModuleIO::CifParser::write(PARAM.globalv.global_out_dir + "STRU.cif", - ucell, - "# Generated by ABACUS ModuleIO::CifParser", - "data_?"); + ModuleIO::CifParser::write (PARAM.globalv.global_out_dir + "STRU.cif", + ucell, + "# Generated by ABACUS ModuleIO::CifParser", + "data_?"); - atype.resize(ucell.nat); + atype.resize (ucell.nat); rescaling = inp.mdp.dp_rescaling; fparam = inp.mdp.dp_fparam; @@ -50,16 +51,17 @@ void ESolver_DP::before_all_runners(UnitCell& ucell, const Input_para& inp) #ifdef __DPMD /// determine the type map from STRU to DP model - type_map(ucell); + type_map (ucell); #endif } -void ESolver_DP::runner(UnitCell& ucell, const int istep) +void + ESolver_DP::runner (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_DP", "runner"); - ModuleBase::timer::start("ESolver_DP", "runner"); + ModuleBase::TITLE ("ESolver_DP", "runner"); + ModuleBase::timer::start ("ESolver_DP", "runner"); - std::vector cell(9, 0.0); + std::vector cell (9, 0.0); cell[0] = ucell.latvec.e11 * ucell.lat0_angstrom; cell[1] = ucell.latvec.e12 * ucell.lat0_angstrom; cell[2] = ucell.latvec.e13 * ucell.lat0_angstrom; @@ -70,27 +72,27 @@ void ESolver_DP::runner(UnitCell& ucell, const int istep) cell[7] = ucell.latvec.e32 * ucell.lat0_angstrom; cell[8] = ucell.latvec.e33 * ucell.lat0_angstrom; - std::vector coord(3 * ucell.nat, 0.0); + std::vector coord (3 * ucell.nat, 0.0); int iat = 0; for (int it = 0; it < ucell.ntype; ++it) - { - for (int ia = 0; ia < ucell.atoms[it].na; ++ia) { - coord[3 * iat] = ucell.atoms[it].tau[ia].x * ucell.lat0_angstrom; - coord[3 * iat + 1] = ucell.atoms[it].tau[ia].y * ucell.lat0_angstrom; - coord[3 * iat + 2] = ucell.atoms[it].tau[ia].z * ucell.lat0_angstrom; - iat++; + for (int ia = 0; ia < ucell.atoms[it].na; ++ia) + { + coord[3 * iat] = ucell.atoms[it].tau[ia].x * ucell.lat0_angstrom; + coord[3 * iat + 1] = ucell.atoms[it].tau[ia].y * ucell.lat0_angstrom; + coord[3 * iat + 2] = ucell.atoms[it].tau[ia].z * ucell.lat0_angstrom; + iat++; + } } - } - assert(ucell.nat == iat); + assert (ucell.nat == iat); #ifdef __DPMD std::vector f, v; dp_potential = 0; - dp_force.zero_out(); - dp_virial.zero_out(); + dp_force.zero_out (); + dp_virial.zero_out (); - dp.compute(dp_potential, f, v, coord, atype, cell, fparam, aparam); + dp.compute (dp_potential, f, v, coord, atype, cell, fparam, aparam); // rescale the energy, force, and stress const double fact_e = rescaling / ModuleBase::Ry_to_eV; @@ -98,108 +100,114 @@ void ESolver_DP::runner(UnitCell& ucell, const int istep) const double fact_v = rescaling / (ucell.omega * ModuleBase::Ry_to_eV); dp_potential *= fact_e; - GlobalV::ofs_running << " #TOTAL ENERGY# " << std::setprecision(11) << dp_potential * ModuleBase::Ry_to_eV << " eV" + GlobalV::ofs_running << " #TOTAL ENERGY# " << std::setprecision (11) << dp_potential * ModuleBase::Ry_to_eV << " eV" << std::endl; for (int i = 0; i < ucell.nat; ++i) - { - dp_force(i, 0) = f[3 * i] * fact_f; - dp_force(i, 1) = f[3 * i + 1] * fact_f; - dp_force(i, 2) = f[3 * i + 2] * fact_f; - } + { + dp_force (i, 0) = f[3 * i] * fact_f; + dp_force (i, 1) = f[3 * i + 1] * fact_f; + dp_force (i, 2) = f[3 * i + 2] * fact_f; + } for (int i = 0; i < 3; ++i) - { - for (int j = 0; j < 3; ++j) { - dp_virial(i, j) = v[3 * i + j] * fact_v; + for (int j = 0; j < 3; ++j) + { + dp_virial (i, j) = v[3 * i + j] * fact_v; + } } - } #else - ModuleBase::WARNING_QUIT("ESolver_DP", "Please recompile with -D__DPMD"); + ModuleBase::WARNING_QUIT ("ESolver_DP", "Please recompile with -D__DPMD"); #endif - ModuleBase::timer::end("ESolver_DP", "runner"); + ModuleBase::timer::end ("ESolver_DP", "runner"); } -double ESolver_DP::cal_energy() +double + ESolver_DP::cal_energy () { return dp_potential; } -void ESolver_DP::cal_force(UnitCell& ucell, ModuleBase::matrix& force) +void + ESolver_DP::cal_force (UnitCell& ucell, ModuleBase::matrix& force) { force = dp_force; - ModuleIO::print_force(GlobalV::ofs_running, ucell, "TOTAL-FORCE (eV/Angstrom)", force, false); + ModuleIO::print_force (GlobalV::ofs_running, ucell, "TOTAL-FORCE (eV/Angstrom)", force, false); } -void ESolver_DP::cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) +void + ESolver_DP::cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) { stress = dp_virial; - ModuleIO::print_stress("TOTAL-STRESS", stress, true, false, GlobalV::ofs_running); + ModuleIO::print_stress ("TOTAL-STRESS", stress, true, false, GlobalV::ofs_running); // external stress - double unit_transform = ModuleBase::RYDBERG_SI / pow(ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; + double unit_transform = ModuleBase::RYDBERG_SI / pow (ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; double external_stress[3] = {PARAM.inp.press1, PARAM.inp.press2, PARAM.inp.press3}; for (int i = 0; i < 3; i++) - { - stress(i, i) -= external_stress[i] / unit_transform; - } + { + stress (i, i) -= external_stress[i] / unit_transform; + } } -void ESolver_DP::after_all_runners(UnitCell& ucell) +void + ESolver_DP::after_all_runners (UnitCell& ucell) { GlobalV::ofs_running << "\n --------------------------------------------" << std::endl; - GlobalV::ofs_running << std::setprecision(16); + GlobalV::ofs_running << std::setprecision (16); GlobalV::ofs_running << " !FINAL_ETOT_IS " << dp_potential * ModuleBase::Ry_to_eV << " eV" << std::endl; GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; } #ifdef __DPMD -void ESolver_DP::type_map(const UnitCell& ucell) +void + ESolver_DP::type_map (const UnitCell& ucell) { std::string type = ""; - dp.get_type_map(type); - std::stringstream ss(type); + dp.get_type_map (type); + std::stringstream ss (type); std::unordered_map label; std::string temp; int index = 0; while (ss >> temp) - { - label[temp] = index; - index++; - } + { + label[temp] = index; + index++; + } std::cout << "\n type map of model file " << dp_file << " " << std::endl; std::cout << " ----------------------------------------------------------------"; int count = 0; - for (auto it = label.begin(); it != label.end(); ++it) - { - if (count % 5 == 0) + for (auto it = label.begin (); it != label.end (); ++it) { - std::cout << std::endl; - std::cout << " "; + if (count % 5 == 0) + { + std::cout << std::endl; + std::cout << " "; + } + count++; + temp = it->first + ": " + std::to_string (it->second); + std::cout << std::left << std::setw (10) << temp; } - count++; - temp = it->first + ": " + std::to_string(it->second); - std::cout << std::left << std::setw(10) << temp; - } std::cout << "\n -----------------------------------------------------------------" << std::endl; int iat = 0; for (int it = 0; it < ucell.ntype; ++it) - { - for (int ia = 0; ia < ucell.atoms[it].na; ++ia) { - if (label.find(ucell.atoms[it].label) == label.end()) - { - ModuleBase::WARNING_QUIT("ESolver_DP", - "The label " + ucell.atoms[it].label + " is not found in the type map."); - } - atype[iat] = label[ucell.atoms[it].label]; - iat++; + for (int ia = 0; ia < ucell.atoms[it].na; ++ia) + { + if (label.find (ucell.atoms[it].label) == label.end ()) + { + ModuleBase::WARNING_QUIT ("ESolver_DP", + "The label " + ucell.atoms[it].label + + " is not found in the type map."); + } + atype[iat] = label[ucell.atoms[it].label]; + iat++; + } } - } - assert(ucell.nat == iat); + assert (ucell.nat == iat); } #endif diff --git a/source/source_esolver/esolver_dp.h b/source/source_esolver/esolver_dp.h index 405bae44461..942c50f469d 100644 --- a/source/source_esolver/esolver_dp.h +++ b/source/source_esolver/esolver_dp.h @@ -17,13 +17,13 @@ class ESolver_DP : public ESolver { public: #ifdef __DPMD - ESolver_DP(const std::string& pot_file) : dp(pot_file) + ESolver_DP (const std::string& pot_file) : dp (pot_file) { classname = "ESolver_DP"; dp_file = pot_file; } #else - ESolver_DP(const std::string& pot_file) + ESolver_DP (const std::string& pot_file) { classname = "ESolver_DP"; dp_file = pot_file; @@ -36,7 +36,7 @@ class ESolver_DP : public ESolver * @param inp input parameters * @param cell unitcell information */ - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; /** * @brief Run the DP solver for a given ion/md step and unit cell @@ -44,7 +44,7 @@ class ESolver_DP : public ESolver * @param istep the current ion/md step * @param cell unitcell information */ - void runner(UnitCell& cell, const int istep) override; + void runner (UnitCell& cell, const int istep) override; /** * @brief get the total energy without ion kinetic energy @@ -52,28 +52,28 @@ class ESolver_DP : public ESolver * @param etot the computed energy * @return total energy without ion kinetic energy */ - double cal_energy() override; + double cal_energy () override; /** * @brief get the computed atomic forces * * @param force the computed atomic forces */ - void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; /** * @brief get the computed lattice virials * * @param stress the computed lattice virials */ - void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) override; + void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) override; /** * @brief Prints the final total energy of the DP model to the output file * * This function prints the final total energy of the DP model in eV to the output file along with some formatting. */ - void after_all_runners(UnitCell& ucell) override; + void after_all_runners (UnitCell& ucell) override; private: /** @@ -81,7 +81,7 @@ class ESolver_DP : public ESolver * * @param ucell unitcell information */ - void type_map(const UnitCell& ucell); + void type_map (const UnitCell& ucell); /** * @brief DeePMD related variables for ESolver_DP class diff --git a/source/source_esolver/esolver_fp.cpp b/source/source_esolver/esolver_fp.cpp index 238c093580d..2e06e69069d 100644 --- a/source/source_esolver/esolver_fp.cpp +++ b/source/source_esolver/esolver_fp.cpp @@ -14,7 +14,7 @@ #include "source_io/module_chgpot/rhog_io.h" #include "source_io/module_parameter/parameter.h" -#include "source_pw/module_pwdft/setup_pwrho.h" // mohan 20251005 +#include "source_pw/module_pwdft/setup_pwrho.h" // mohan 20251005 #include "source_hamilt/module_xc/xc_functional.h" // mohan 20251005 #include "source_io/module_ctrl/ctrl_output_fp.h" #include "source_io/module_chgpot/write_init.h" // write_chg_init, write_pot_init @@ -22,218 +22,243 @@ namespace ModuleESolver { -ESolver_FP::ESolver_FP() -{ -} +ESolver_FP::ESolver_FP () {} -ESolver_FP::~ESolver_FP() +ESolver_FP::~ESolver_FP () { - //**************************************************** - // do not add any codes in this deconstructor funcion - //**************************************************** + //**************************************************** + // do not add any codes in this deconstructor funcion + //**************************************************** // mohan add 20251005 - pw::teardown_pwrho(this->pw_rho_flag, PARAM.globalv.double_grid, this->pw_rho, this->pw_rhod); + pw::teardown_pwrho (this->pw_rho_flag, PARAM.globalv.double_grid, this->pw_rho, this->pw_rhod); - delete this->pelec; + delete this->pelec; } -void ESolver_FP::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_FP::before_all_runners (UnitCell& ucell, const Input_para& inp) { - ModuleBase::TITLE("ESolver_FP", "before_all_runners"); + ModuleBase::TITLE ("ESolver_FP", "before_all_runners"); //! 1) read pseudopotentials - elecstate::read_pseudo(GlobalV::ofs_running, ucell); + elecstate::read_pseudo (GlobalV::ofs_running, ucell); //! 2) setup pw_rho, pw_rhod, pw_big, sf, and read_pseudopotentials - pw::setup_pwrho(ucell, PARAM.globalv.double_grid, this->pw_rho_flag, - this->pw_rho, this->pw_rhod, this->pw_big, this->classname, inp); + pw::setup_pwrho (ucell, + PARAM.globalv.double_grid, + this->pw_rho_flag, + this->pw_rho, + this->pw_rhod, + this->pw_big, + this->classname, + inp); //! 3) setup structure factors - this->sf.set(this->pw_rhod, inp.nbspline); + this->sf.set (this->pw_rhod, inp.nbspline); //! 4) write geometry file - ModuleIO::CifParser::write(PARAM.globalv.global_out_dir + "STRU.cif", - ucell, "# Generated by ABACUS ModuleIO::CifParser", "data_?"); + ModuleIO::CifParser::write (PARAM.globalv.global_out_dir + "STRU.cif", + ucell, + "# Generated by ABACUS ModuleIO::CifParser", + "data_?"); //! 5) init charge extrapolation - this->CE.Init_CE(inp.nspin, ucell.nat, this->pw_rhod->nrxx, inp.chg_extrap); + this->CE.Init_CE (inp.nspin, ucell.nat, this->pw_rhod->nrxx, inp.chg_extrap); //! 6) symmetry analysis should be performed every time the cell is changed if (ModuleSymmetry::Symmetry::symm_flag == 1) - { - ucell.symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "SYMMETRY"); - } + { + ucell.symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "SYMMETRY"); + } - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "SETUP UNITCELL"); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "SETUP UNITCELL"); //! 7) setup k points in the Brillouin zone according to symmetry. - this->kv.set(ucell,ucell.symm, inp.kpoint_file, inp.nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT K-POINTS"); + this->kv.set (ucell, ucell.symm, inp.kpoint_file, inp.nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT K-POINTS"); //! 8) print information - ModuleIO::print_parameters(ucell, this->kv, inp); + ModuleIO::print_parameters (ucell, this->kv, inp); //! 9) parallel of FFT grid - this->Pgrid.init(this->pw_rhod->nx, this->pw_rhod->ny, this->pw_rhod->nz, - this->pw_rhod->nplane, this->pw_rhod->nrxx, pw_big->nbz, pw_big->bz); + this->Pgrid.init (this->pw_rhod->nx, + this->pw_rhod->ny, + this->pw_rhod->nz, + this->pw_rhod->nplane, + this->pw_rhod->nrxx, + pw_big->nbz, + pw_big->bz); //! 10) calculate the structure factor - this->sf.setup(&ucell, Pgrid, this->pw_rhod); + this->sf.setup (&ucell, Pgrid, this->pw_rhod); //! 11) setup the xc functional - XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); - GlobalV::ofs_running<chr.set_rhopw(this->pw_rhod); // mohan add 20251130 - const bool kin_den = this->chr.kin_density(); // mohan add 20251202 - this->chr.allocate(inp.nspin, kin_den); // mohan move this from setup_estate_pw, 20251128 - + this->chr.set_rhopw (this->pw_rhod); // mohan add 20251130 + const bool kin_den = this->chr.kin_density (); // mohan add 20251202 + this->chr.allocate (inp.nspin, kin_den); // mohan move this from setup_estate_pw, 20251128 return; } -void ESolver_FP::after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) +void + ESolver_FP::after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) { - ModuleBase::TITLE("ESolver_FP", "after_scf"); + ModuleBase::TITLE ("ESolver_FP", "after_scf"); //! Output convergence information - ModuleIO::output_convergence_after_scf(conv_esolver, this->pelec->f_en.etot); + ModuleIO::output_convergence_after_scf (conv_esolver, this->pelec->f_en.etot); //! Write Fermi energy - ModuleIO::output_efermi(conv_esolver, this->pelec->eferm.ef); + ModuleIO::output_efermi (conv_esolver, this->pelec->eferm.ef); //! Update delta_rho for charge extrapolation - CE.update_delta_rho(ucell, &(this->chr), &(this->sf)); + CE.update_delta_rho (ucell, &(this->chr), &(this->sf)); //! print out charge density, potential, elf, etc. - ModuleIO::ctrl_output_fp(ucell, this->pelec, this->pw_big, this->pw_rhod, - this->chr, this->solvent, this->Pgrid, istep); - + ModuleIO::ctrl_output_fp (ucell, + this->pelec, + this->pw_big, + this->pw_rhod, + this->chr, + this->solvent, + this->Pgrid, + istep); } -void ESolver_FP::before_scf(UnitCell& ucell, const int istep) +void + ESolver_FP::before_scf (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_FP", "before_scf"); + ModuleBase::TITLE ("ESolver_FP", "before_scf"); // if the cell has changed if (ucell.cell_parameter_updated) - { - // only G-vector and K-vector are changed due to the change of lattice - // vector FFT grids do not change!! - this->pw_rho->initgrids(ucell.lat0, ucell.latvec, pw_rho->nx, pw_rho->ny, pw_rho->nz); - this->pw_rho->collect_local_pw(); - this->pw_rho->collect_uniqgg(); - - // if double grid used in USPP, update related quantities in dense grid - if (PARAM.globalv.double_grid) { - this->pw_rhod->initgrids(ucell.lat0, ucell.latvec, pw_rhod->nx, pw_rhod->ny, pw_rhod->nz); - this->pw_rhod->collect_local_pw(); - this->pw_rhod->collect_uniqgg(); - } + // only G-vector and K-vector are changed due to the change of lattice + // vector FFT grids do not change!! + this->pw_rho->initgrids (ucell.lat0, ucell.latvec, pw_rho->nx, pw_rho->ny, pw_rho->nz); + this->pw_rho->collect_local_pw (); + this->pw_rho->collect_uniqgg (); + + // if double grid used in USPP, update related quantities in dense grid + if (PARAM.globalv.double_grid) + { + this->pw_rhod->initgrids (ucell.lat0, ucell.latvec, pw_rhod->nx, pw_rhod->ny, pw_rhod->nz); + this->pw_rhod->collect_local_pw (); + this->pw_rhod->collect_uniqgg (); + } - // reset local pseudopotentials - this->locpp.init_vloc(ucell, this->pw_rhod); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "LOCAL POTENTIAL"); + // reset local pseudopotentials + this->locpp.init_vloc (ucell, this->pw_rhod); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "LOCAL POTENTIAL"); - // perform symmetry analysis - if (ModuleSymmetry::Symmetry::symm_flag == 1) - { - ucell.symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "SYMMETRY"); - } + // perform symmetry analysis + if (ModuleSymmetry::Symmetry::symm_flag == 1) + { + ucell.symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "SYMMETRY"); + } - // reset k-points - KVectorUtils::set_after_vc(kv, PARAM.inp.nspin, ucell.G); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT K-POINTS"); - } + // reset k-points + KVectorUtils::set_after_vc (kv, PARAM.inp.nspin, ucell.G); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT K-POINTS"); + } // charge extrapolation if (ucell.ionic_position_updated) - { - this->CE.update_all_dis(ucell); - this->CE.extrapolate_charge(&this->Pgrid, ucell, &this->chr, &this->sf, - GlobalV::ofs_running, GlobalV::ofs_warning); - } + { + this->CE.update_all_dis (ucell); + this->CE.extrapolate_charge (&this->Pgrid, + ucell, + &this->chr, + &this->sf, + GlobalV::ofs_running, + GlobalV::ofs_warning); + } //! calculate D2 or D3 vdW - auto vdw_solver = vdw::make_vdw(ucell, PARAM.inp, &(GlobalV::ofs_running)); + auto vdw_solver = vdw::make_vdw (ucell, PARAM.inp, &(GlobalV::ofs_running)); if (vdw_solver != nullptr) - { - this->pelec->f_en.evdw = vdw_solver->get_energy(); - } + { + this->pelec->f_en.evdw = vdw_solver->get_energy (); + } //! calculate ewald energy if (!PARAM.inp.test_skip_ewald) - { - this->pelec->f_en.ewald_energy = H_Ewald_pw::compute_ewald(ucell, this->pw_rhod, this->sf.strucFac); - } + { + this->pelec->f_en.ewald_energy = H_Ewald_pw::compute_ewald (ucell, this->pw_rhod, this->sf.strucFac); + } - //! set direction of magnetism, used in non-collinear case - elecstate::cal_ux(ucell); + //! set direction of magnetism, used in non-collinear case + elecstate::cal_ux (ucell); //! output the initial charge density and potential - ModuleIO::write_chg_init(ucell, this->Pgrid, this->chr, this->pelec->eferm, istep, PARAM.inp); -// ModuleIO::write_pot_init(ucell, this->Pgrid, this->pelec, istep, PARAM.inp); + ModuleIO::write_chg_init (ucell, this->Pgrid, this->chr, this->pelec->eferm, istep, PARAM.inp); + // ModuleIO::write_pot_init(ucell, this->Pgrid, this->pelec, istep, PARAM.inp); return; } -void ESolver_FP::iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) +void + ESolver_FP::iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) { //! output charge density in G-space, or if available, kinetic energy density in G-space if (PARAM.inp.out_chg[0] != -1) - { - if (iter % PARAM.inp.out_freq_elec == 0 || iter == PARAM.inp.scf_nmax || conv_esolver) { - for (int is = 0; is < PARAM.inp.nspin; is++) - { - this->pw_rhod->real2recip(this->chr.rho_save[is], this->chr.rhog_save[is]); - } - ModuleIO::write_rhog(PARAM.globalv.global_out_dir + PARAM.inp.suffix + "-CHARGE-DENSITY.restart", - PARAM.globalv.gamma_only_pw, - this->pw_rhod, - PARAM.inp.nspin, - ucell.GT, - this->chr.rhog_save, - GlobalV::MY_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::NPROC_IN_POOL); - - if (XC_Functional::get_ked_flag()) - { - std::vector> kin_g_space(PARAM.inp.nspin * this->chr.ngmc, {0.0, 0.0}); - std::vector*> kin_g; - for (int is = 0; is < PARAM.inp.nspin; is++) + if (iter % PARAM.inp.out_freq_elec == 0 || iter == PARAM.inp.scf_nmax || conv_esolver) { - kin_g.push_back(kin_g_space.data() + is * this->chr.ngmc); - this->pw_rhod->real2recip(this->chr.kin_r_save[is], kin_g[is]); + for (int is = 0; is < PARAM.inp.nspin; is++) + { + this->pw_rhod->real2recip (this->chr.rho_save[is], this->chr.rhog_save[is]); + } + ModuleIO::write_rhog (PARAM.globalv.global_out_dir + PARAM.inp.suffix + "-CHARGE-DENSITY.restart", + PARAM.globalv.gamma_only_pw, + this->pw_rhod, + PARAM.inp.nspin, + ucell.GT, + this->chr.rhog_save, + GlobalV::MY_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::NPROC_IN_POOL); + + if (XC_Functional::get_ked_flag ()) + { + std::vector> kin_g_space (PARAM.inp.nspin * this->chr.ngmc, + {0.0, 0.0}); + std::vector*> kin_g; + for (int is = 0; is < PARAM.inp.nspin; is++) + { + kin_g.push_back (kin_g_space.data () + is * this->chr.ngmc); + this->pw_rhod->real2recip (this->chr.kin_r_save[is], kin_g[is]); + } + ModuleIO::write_rhog (PARAM.globalv.global_out_dir + PARAM.inp.suffix + + "-TAU-DENSITY.restart", + PARAM.globalv.gamma_only_pw, + this->pw_rhod, + PARAM.inp.nspin, + ucell.GT, + kin_g.data (), + GlobalV::MY_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::NPROC_IN_POOL); + } } - ModuleIO::write_rhog(PARAM.globalv.global_out_dir + PARAM.inp.suffix + "-TAU-DENSITY.restart", - PARAM.globalv.gamma_only_pw, - this->pw_rhod, - PARAM.inp.nspin, - ucell.GT, - kin_g.data(), - GlobalV::MY_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::NPROC_IN_POOL); - } } - } } -void ESolver_FP::after_all_runners(UnitCell& ucell) +void + ESolver_FP::after_all_runners (UnitCell& ucell) { // print out the final total energy GlobalV::ofs_running << "\n --------------------------------------------" << std::endl; - GlobalV::ofs_running << std::setprecision(16); + GlobalV::ofs_running << std::setprecision (16); GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl; GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; - } } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_fp.h b/source/source_esolver/esolver_fp.h index 501bef9b681..cadea8b44ff 100644 --- a/source/source_esolver/esolver_fp.h +++ b/source/source_esolver/esolver_fp.h @@ -5,16 +5,15 @@ #include "source_base/timer_wrapper.h" -#include "source_basis/module_pw/pw_basis.h" // plane wave basis -#include "source_estate/elecstate.h" // electronic states +#include "source_basis/module_pw/pw_basis.h" // plane wave basis +#include "source_estate/elecstate.h" // electronic states #include "source_estate/module_charge/charge_extra.h" // charge extrapolation -#include "source_hamilt/module_surchem/surchem.h" // solvation model -#include "source_pw/module_pwdft/vl_pw.h" // local pseudopotential -#include "source_pw/module_pwdft/structure_factor.h" // structure factor +#include "source_hamilt/module_surchem/surchem.h" // solvation model +#include "source_pw/module_pwdft/vl_pw.h" // local pseudopotential +#include "source_pw/module_pwdft/structure_factor.h" // structure factor #include - //! The First-Principles (FP) Energy Solver Class /** * This class represents components that needed in @@ -25,24 +24,24 @@ namespace ModuleESolver { -class ESolver_FP: public ESolver +class ESolver_FP : public ESolver { public: - ESolver_FP(); + ESolver_FP (); - virtual ~ESolver_FP(); + virtual ~ESolver_FP (); //! Initialize of the first-principels energy solver - virtual void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + virtual void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - virtual void after_all_runners(UnitCell& ucell) override; + virtual void after_all_runners (UnitCell& ucell) override; protected: - virtual void before_scf(UnitCell& ucell, const int istep); + virtual void before_scf (UnitCell& ucell, const int istep); - virtual void after_scf(UnitCell& ucell, const int istep, const bool conv_esolver); + virtual void after_scf (UnitCell& ucell, const int istep, const bool conv_esolver); - virtual void iter_finish(UnitCell& ucell, const int istep, int& iter, bool &conv_esolver); + virtual void iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver); //! These pointers will be deleted in the free_pointers() function every ion step. elecstate::ElecState* pelec = nullptr; ///< Electronic states @@ -77,7 +76,7 @@ class ESolver_FP: public ESolver //! solvent model surchem solvent; - bool pw_rho_flag = false; ///< flag for pw_rho, 0: not initialized, 1: initialized + bool pw_rho_flag = false; ///< flag for pw_rho, 0: not initialized, 1: initialized //! the start time of scf iteration ModuleBase::TimePoint iter_time; diff --git a/source/source_esolver/esolver_gets.cpp b/source/source_esolver/esolver_gets.cpp index c66e33bce6f..5251010d321 100644 --- a/source/source_esolver/esolver_gets.cpp +++ b/source/source_esolver/esolver_gets.cpp @@ -14,153 +14,156 @@ namespace ModuleESolver { -ESolver_GetS::ESolver_GetS() +ESolver_GetS::ESolver_GetS () { this->classname = "ESolver_GetS"; this->basisname = "LCAO"; } -ESolver_GetS::~ESolver_GetS() -{ -} +ESolver_GetS::~ESolver_GetS () {} -void ESolver_GetS::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_GetS::before_all_runners (UnitCell& ucell, const Input_para& inp) { - ModuleBase::TITLE("ESolver_GetS", "before_all_runners"); - ModuleBase::timer::start("ESolver_GetS", "before_all_runners"); + ModuleBase::TITLE ("ESolver_GetS", "before_all_runners"); + ModuleBase::timer::start ("ESolver_GetS", "before_all_runners"); // 1.1) read pseudopotentials - elecstate::read_pseudo(GlobalV::ofs_running, ucell); + elecstate::read_pseudo (GlobalV::ofs_running, ucell); // 1.2) symmetrize things if (ModuleSymmetry::Symmetry::symm_flag == 1) - { - ucell.symm.analy_sys(ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "SYMMETRY"); - } + { + ucell.symm.analy_sys (ucell.lat, ucell.st, ucell.atoms, GlobalV::ofs_running); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "SYMMETRY"); + } // 1.3) Setup k-points according to symmetry. - this->kv.set(ucell, ucell.symm, inp.kpoint_file, inp.nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT K-POINTS"); + this->kv.set (ucell, ucell.symm, inp.kpoint_file, inp.nspin, ucell.G, ucell.latvec, GlobalV::ofs_running); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT K-POINTS"); - ModuleIO::print_parameters(ucell, this->kv, inp); + ModuleIO::print_parameters (ucell, this->kv, inp); // 2) init ElecState // autoset nbands in ElecState, it should before basis_init (for Psi 2d division) if (this->pelec == nullptr) - { - // TK stands for double and std::complex? - this->pelec = new elecstate::ElecStateLCAO>(&(this->chr), // use which parameter? - &(this->kv), - this->kv.get_nks(), - this->pw_big); - } + { + // TK stands for double and std::complex? + this->pelec = new elecstate::ElecStateLCAO> (&(this->chr), // use which parameter? + &(this->kv), + this->kv.get_nks (), + this->pw_big); + } // 3) init LCAO basis // reading the localized orbitals/projectors // construct the interpolation tables. - LCAO_domain::init_basis_lcao(this->pv, - inp.onsite_radius, - inp.lcao_ecut, - inp.lcao_dk, - inp.lcao_dr, - inp.lcao_rmax, - ucell, - two_center_bundle_, - orb_); - - ModuleBase::timer::end("ESolver_GetS", "before_all_runners"); + LCAO_domain::init_basis_lcao (this->pv, + inp.onsite_radius, + inp.lcao_ecut, + inp.lcao_dk, + inp.lcao_dr, + inp.lcao_rmax, + ucell, + two_center_bundle_, + orb_); + + ModuleBase::timer::end ("ESolver_GetS", "before_all_runners"); } -void ESolver_GetS::runner(UnitCell& ucell, const int istep) +void + ESolver_GetS::runner (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_GetS", "runner"); - ModuleBase::timer::start("ESolver_GetS", "runner"); + ModuleBase::TITLE ("ESolver_GetS", "runner"); + ModuleBase::timer::start ("ESolver_GetS", "runner"); // (1) Find adjacent atoms for each atom. double search_radius = -1.0; - search_radius = atom_arrange::set_sr_NL(GlobalV::ofs_running, - PARAM.inp.out_level, - orb_.get_rcutmax_Phi(), - ucell.infoNL.get_rcutmax_Beta(), - PARAM.globalv.gamma_only_local); + search_radius = atom_arrange::set_sr_NL (GlobalV::ofs_running, + PARAM.inp.out_level, + orb_.get_rcutmax_Phi (), + ucell.infoNL.get_rcutmax_Beta (), + PARAM.globalv.gamma_only_local); Grid_Driver gd; - atom_arrange::search(PARAM.globalv.search_pbc, - GlobalV::ofs_running, - gd, - ucell, - search_radius, - PARAM.inp.test_atom_input); + atom_arrange::search (PARAM.globalv.search_pbc, + GlobalV::ofs_running, + gd, + ucell, + search_radius, + PARAM.inp.test_atom_input); Record_adj RA; - RA.for_2d(ucell, gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs()); + RA.for_2d (ucell, gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs ()); if (this->p_hamilt == nullptr) - { - if (PARAM.inp.nspin == 4) { - this->p_hamilt - = new hamilt::HamiltLCAO, std::complex>(ucell, - gd, - &this->pv, - this->kv, - *(two_center_bundle_.overlap_orb), - orb_.cutoffs()); - auto* hamilt_ptr = static_cast>*>(this->p_hamilt); - auto* ops_ptr = dynamic_cast, std::complex>*>(hamilt_ptr->ops); - ops_ptr->contributeHR(); + if (PARAM.inp.nspin == 4) + { + this->p_hamilt = new hamilt::HamiltLCAO, std::complex> ( + ucell, + gd, + &this->pv, + this->kv, + *(two_center_bundle_.overlap_orb), + orb_.cutoffs ()); + auto* hamilt_ptr = static_cast>*> (this->p_hamilt); + auto* ops_ptr = dynamic_cast, std::complex>*> ( + hamilt_ptr->ops); + ops_ptr->contributeHR (); + } + else + { + this->p_hamilt + = new hamilt::HamiltLCAO, double> (ucell, + gd, + &this->pv, + this->kv, + *(two_center_bundle_.overlap_orb), + orb_.cutoffs ()); + auto* hamilt_ptr = static_cast>*> (this->p_hamilt); + auto* ops_ptr = dynamic_cast, double>*> (hamilt_ptr->ops); + ops_ptr->contributeHR (); + } } - else - { - this->p_hamilt = new hamilt::HamiltLCAO, double>(ucell, - gd, - &this->pv, - this->kv, - *(two_center_bundle_.overlap_orb), - orb_.cutoffs()); - auto* hamilt_ptr = static_cast>*>(this->p_hamilt); - auto* ops_ptr = dynamic_cast, double>*>(hamilt_ptr->ops); - ops_ptr->contributeHR(); - } - } const std::string fn = PARAM.globalv.global_out_dir + "sr_nao.csr"; - auto* hamilt_ptr = static_cast>*>(this->p_hamilt); - ModuleIO::output_SR(pv, gd, hamilt_ptr, fn); + auto* hamilt_ptr = static_cast>*> (this->p_hamilt); + ModuleIO::output_SR (pv, gd, hamilt_ptr, fn); if (PARAM.inp.out_mat_r[0]) - { - cal_r_overlap_R r_matrix; - r_matrix.init(ucell, pv, orb_); - r_matrix.out_rR(ucell, gd, istep); - } + { + cal_r_overlap_R r_matrix; + r_matrix.init (ucell, pv, orb_); + r_matrix.out_rR (ucell, gd, istep); + } if (PARAM.inp.out_mat_ds[0]) - { - LCAO_HS_Arrays HS_Arrays; // store sparse arrays - //! Print out sparse matrix - ModuleIO::output_dSR(istep, - ucell, - this->pv, - HS_Arrays, - gd, // mohan add 2024-04-06 - two_center_bundle_, - orb_, - kv); - } - - ModuleBase::timer::end("ESolver_GetS", "runner"); + { + LCAO_HS_Arrays HS_Arrays; // store sparse arrays + //! Print out sparse matrix + ModuleIO::output_dSR (istep, + ucell, + this->pv, + HS_Arrays, + gd, // mohan add 2024-04-06 + two_center_bundle_, + orb_, + kv); + } + + ModuleBase::timer::end ("ESolver_GetS", "runner"); } -void ESolver_GetS::after_all_runners(UnitCell& ucell) {}; -double ESolver_GetS::cal_energy() +void ESolver_GetS::after_all_runners (UnitCell& ucell) {}; +double + ESolver_GetS::cal_energy () { return 0.0; }; -void ESolver_GetS::cal_force(UnitCell& ucell, ModuleBase::matrix& force) {}; -void ESolver_GetS::cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) {}; +void ESolver_GetS::cal_force (UnitCell& ucell, ModuleBase::matrix& force) {}; +void ESolver_GetS::cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) {}; } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_gets.h b/source/source_esolver/esolver_gets.h index 7a7fb1d34bd..dbf592ef0bb 100644 --- a/source/source_esolver/esolver_gets.h +++ b/source/source_esolver/esolver_gets.h @@ -13,23 +13,23 @@ namespace ModuleESolver class ESolver_GetS : public ESolver_KS { public: - ESolver_GetS(); - ~ESolver_GetS(); + ESolver_GetS (); + ~ESolver_GetS (); - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - void after_all_runners(UnitCell& ucell) override; + void after_all_runners (UnitCell& ucell) override; - void runner(UnitCell& ucell, const int istep) override; + void runner (UnitCell& ucell, const int istep) override; //! calculate total energy of a given system - double cal_energy() override; + double cal_energy () override; //! calcualte forces for the atoms in the given cell - void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; //! calcualte stress of given cell - void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) override; + void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) override; protected: // 2d block - cyclic distribution info diff --git a/source/source_esolver/esolver_ks.cpp b/source/source_esolver/esolver_ks.cpp index 007184773b9..c7b612df301 100644 --- a/source/source_esolver/esolver_ks.cpp +++ b/source/source_esolver/esolver_ks.cpp @@ -5,45 +5,44 @@ #include "source_io/module_json/init_info.h" #include "source_io/module_json/output_info.h" -#include "source_estate/update_pot.h" // mohan add 20251016 +#include "source_estate/update_pot.h" // mohan add 20251016 #include "source_estate/module_charge/chgmixing.h" // mohan add 20251018 -#include "source_pw/module_pwdft/setup_pwwfc.h" // mohan add 20251018 +#include "source_pw/module_pwdft/setup_pwwfc.h" // mohan add 20251018 #include "source_hsolver/hsolver.h" #include "source_io/module_energy/write_eig_occ.h" #include "source_io/module_energy/write_bands.h" #include "source_hamilt/module_xc/xc_functional.h" #include "source_io/module_output/output_log.h" // use write_head -#include "source_estate/elecstate_print.h" // print_etot +#include "source_estate/elecstate_print.h" // print_etot #include "source_io/module_output/print_info.h" // print_parameters -#include "source_lcao/module_dftu/dftu.h" // mohan add 2025-11-07 +#include "source_lcao/module_dftu/dftu.h" // mohan add 2025-11-07 namespace ModuleESolver { -ESolver_KS::ESolver_KS() {} +ESolver_KS::ESolver_KS () {} - -ESolver_KS::~ESolver_KS() +ESolver_KS::~ESolver_KS () { //**************************************************** // do not add any codes in this deconstructor funcion //**************************************************** delete this->p_hamilt; delete this->p_chgmix; - this->ppcell.release_memory(); + this->ppcell.release_memory (); // mohan add 2025-10-18, should be put int clean() function - pw::teardown_pwwfc(this->pw_wfc); + pw::teardown_pwwfc (this->pw_wfc); } - -void ESolver_KS::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_KS::before_all_runners (UnitCell& ucell, const Input_para& inp) { - ModuleBase::TITLE("ESolver_KS", "before_all_runners"); + ModuleBase::TITLE ("ESolver_KS", "before_all_runners"); //! 1) setup "before_all_runniers" in ESolver_FP - ESolver_FP::before_all_runners(ucell, inp); - + ESolver_FP::before_all_runners (ucell, inp); + //! 2) setup some parameters classname = "ESolver_KS"; basisname = ""; @@ -58,30 +57,41 @@ void ESolver_KS::before_all_runners(UnitCell& ucell, const Input_para& inp) this->ppcell.cell_factor = inp.cell_factor; //! 3) setup charge mixing - p_chgmix = new Charge_Mixing(); - p_chgmix->set_rhopw(this->pw_rho, this->pw_rhod); - p_chgmix->set_mixing(inp.mixing_mode, inp.mixing_beta, inp.mixing_ndim, - inp.mixing_gg0, inp.mixing_tau, inp.mixing_beta_mag, inp.mixing_gg0_mag, - inp.mixing_gg0_min, inp.mixing_angle, inp.mixing_dmr, ucell.omega, ucell.tpiba); - p_chgmix->init_mixing(); + p_chgmix = new Charge_Mixing (); + p_chgmix->set_rhopw (this->pw_rho, this->pw_rhod); + p_chgmix->set_mixing (inp.mixing_mode, + inp.mixing_beta, + inp.mixing_ndim, + inp.mixing_gg0, + inp.mixing_tau, + inp.mixing_beta_mag, + inp.mixing_gg0_mag, + inp.mixing_gg0_min, + inp.mixing_angle, + inp.mixing_dmr, + ucell.omega, + ucell.tpiba); + p_chgmix->init_mixing (); //! 4) setup plane wave for electronic wave functions - pw::setup_pwwfc(inp, ucell, *this->pw_rho, this->kv, this->pw_wfc); + pw::setup_pwwfc (inp, ucell, *this->pw_rho, this->kv, this->pw_wfc); //! 5) read in charge density, mohan add 2025-11-28 //! Inititlize the charge density. - this->chr.init_rho(ucell, this->Pgrid, this->sf.strucFac, ucell.symm, &this->kv, this->pw_wfc); - this->chr.check_rho(); // check the rho - + this->chr.init_rho (ucell, this->Pgrid, this->sf.strucFac, ucell.symm, &this->kv, this->pw_wfc); + this->chr.check_rho (); // check the rho } -void ESolver_KS::hamilt2rho_single(UnitCell& ucell, const int istep, const int iter, const double ethr) -{} +void + ESolver_KS::hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) +{ +} -void ESolver_KS::hamilt2rho(UnitCell& ucell, const int istep, const int iter, const double ethr) +void + ESolver_KS::hamilt2rho (UnitCell& ucell, const int istep, const int iter, const double ethr) { // 1) use Hamiltonian to obtain charge density - this->hamilt2rho_single(ucell, istep, iter, diag_ethr); + this->hamilt2rho_single (ucell, istep, iter, diag_ethr); // 2) for MPI: STOGROUP? need to rewrite // It may be changed when more clever parallel algorithm is put forward. @@ -91,41 +101,51 @@ void ESolver_KS::hamilt2rho(UnitCell& ucell, const int istep, const int iter, co // parallel algorithms, in which they do not occupy all processors, for // example wavefunctions uses 20 processors while density uses 10. if (PARAM.globalv.ks_run) - { - drho = p_chgmix->get_drho(&this->chr, PARAM.inp.nelec); - hsolver_error = 0.0; - if (iter == 1 && PARAM.inp.calculation != "nscf") { - hsolver_error - = hsolver::cal_hsolve_error(PARAM.inp.basis_type, PARAM.inp.esolver_type, diag_ethr, PARAM.inp.nelec); - - // The error of HSolver is larger than drho, - // so a more precise HSolver should be executed. - if (hsolver_error > drho) - { - diag_ethr = hsolver::reset_diag_ethr(GlobalV::ofs_running, PARAM.inp.basis_type, - PARAM.inp.esolver_type, PARAM.inp.precision, hsolver_error, - drho, diag_ethr, PARAM.inp.nelec); - - this->hamilt2rho_single(ucell, istep, iter, diag_ethr); - - drho = p_chgmix->get_drho(&this->chr, PARAM.inp.nelec); - - hsolver_error = hsolver::cal_hsolve_error(PARAM.inp.basis_type, - PARAM.inp.esolver_type, diag_ethr, PARAM.inp.nelec); - } + drho = p_chgmix->get_drho (&this->chr, PARAM.inp.nelec); + hsolver_error = 0.0; + if (iter == 1 && PARAM.inp.calculation != "nscf") + { + hsolver_error = hsolver::cal_hsolve_error (PARAM.inp.basis_type, + PARAM.inp.esolver_type, + diag_ethr, + PARAM.inp.nelec); + + // The error of HSolver is larger than drho, + // so a more precise HSolver should be executed. + if (hsolver_error > drho) + { + diag_ethr = hsolver::reset_diag_ethr (GlobalV::ofs_running, + PARAM.inp.basis_type, + PARAM.inp.esolver_type, + PARAM.inp.precision, + hsolver_error, + drho, + diag_ethr, + PARAM.inp.nelec); + + this->hamilt2rho_single (ucell, istep, iter, diag_ethr); + + drho = p_chgmix->get_drho (&this->chr, PARAM.inp.nelec); + + hsolver_error = hsolver::cal_hsolve_error (PARAM.inp.basis_type, + PARAM.inp.esolver_type, + diag_ethr, + PARAM.inp.nelec); + } + } } - } } -void ESolver_KS::runner(UnitCell& ucell, const int istep) +void + ESolver_KS::runner (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_KS", "runner"); - ModuleBase::timer::start(this->classname, "runner"); + ModuleBase::TITLE ("ESolver_KS", "runner"); + ModuleBase::timer::start (this->classname, "runner"); // 1) before_scf (electronic iteration loops) - this->before_scf(ucell, istep); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT SCF"); + this->before_scf (ucell, istep); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT SCF"); // 2) SCF iterations bool conv_esolver = false; @@ -133,97 +153,119 @@ void ESolver_KS::runner(UnitCell& ucell, const int istep) this->diag_ethr = PARAM.inp.pw_diag_thr; this->scf_nmax_flag = false; // mohan add 2025-09-21 for (int iter = 1; iter <= this->maxniter; ++iter) - { - if(iter == this->maxniter) - { - this->scf_nmax_flag=true; - } - - // 3) initialization of SCF iterations - this->iter_init(ucell, istep, iter); - - // 4) use Hamiltonian to obtain charge density - this->hamilt2rho(ucell, istep, iter, diag_ethr); - - // 5) finish scf iterations - this->iter_finish(ucell, istep, iter, conv_esolver); - - // 6) check convergence - if (conv_esolver || this->oscillate_esolver) { - this->niter = iter; - if (this->oscillate_esolver) - { - std::cout << " !! Density oscillation is found, STOP HERE !!" << std::endl; - } - break; - } - } // end scf iterations + if (iter == this->maxniter) + { + this->scf_nmax_flag = true; + } + + // 3) initialization of SCF iterations + this->iter_init (ucell, istep, iter); + + // 4) use Hamiltonian to obtain charge density + this->hamilt2rho (ucell, istep, iter, diag_ethr); + + // 5) finish scf iterations + this->iter_finish (ucell, istep, iter, conv_esolver); + + // 6) check convergence + if (conv_esolver || this->oscillate_esolver) + { + this->niter = iter; + if (this->oscillate_esolver) + { + std::cout << " !! Density oscillation is found, STOP HERE !!" << std::endl; + } + break; + } + } // end scf iterations // 7) after scf - this->after_scf(ucell, istep, conv_esolver); + this->after_scf (ucell, istep, conv_esolver); - ModuleBase::timer::end(this->classname, "runner"); + ModuleBase::timer::end (this->classname, "runner"); return; }; -void ESolver_KS::before_scf(UnitCell& ucell, const int istep) +void + ESolver_KS::before_scf (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_KS", "before_scf"); - ESolver_FP::before_scf(ucell, istep); + ModuleBase::TITLE ("ESolver_KS", "before_scf"); + ESolver_FP::before_scf (ucell, istep); } -void ESolver_KS::iter_init(UnitCell& ucell, const int istep, const int iter) +void + ESolver_KS::iter_init (UnitCell& ucell, const int istep, const int iter) { - if(PARAM.inp.esolver_type != "tddft") - { - ModuleIO::write_head(GlobalV::ofs_running, istep, iter, this->basisname); - } + if (PARAM.inp.esolver_type != "tddft") + { + ModuleIO::write_head (GlobalV::ofs_running, istep, iter, this->basisname); + } - iter_time = ModuleBase::get_time(); + iter_time = ModuleBase::get_time (); if (PARAM.inp.esolver_type == "ksdft") - { - diag_ethr = hsolver::set_diagethr_ks(PARAM.inp.basis_type, PARAM.inp.esolver_type, - PARAM.inp.calculation, PARAM.inp.init_chg, PARAM.inp.precision, istep, iter, - drho, PARAM.inp.pw_diag_thr, diag_ethr, PARAM.inp.nelec); - } + { + diag_ethr = hsolver::set_diagethr_ks (PARAM.inp.basis_type, + PARAM.inp.esolver_type, + PARAM.inp.calculation, + PARAM.inp.init_chg, + PARAM.inp.precision, + istep, + iter, + drho, + PARAM.inp.pw_diag_thr, + diag_ethr, + PARAM.inp.nelec); + } else if (PARAM.inp.esolver_type == "sdft") - { - diag_ethr = hsolver::set_diagethr_sdft(PARAM.inp.basis_type, PARAM.inp.esolver_type, - PARAM.inp.calculation, PARAM.inp.init_chg, istep, iter, drho, - PARAM.inp.pw_diag_thr, diag_ethr, PARAM.inp.nbands, esolver_KS_ne); - } + { + diag_ethr = hsolver::set_diagethr_sdft (PARAM.inp.basis_type, + PARAM.inp.esolver_type, + PARAM.inp.calculation, + PARAM.inp.init_chg, + istep, + iter, + drho, + PARAM.inp.pw_diag_thr, + diag_ethr, + PARAM.inp.nbands, + esolver_KS_ne); + } // save input charge density (rho) - this->chr.save_rho_before_sum_band(); + this->chr.save_rho_before_sum_band (); } -void ESolver_KS::iter_finish(UnitCell& ucell, const int istep, int& iter, bool &conv_esolver) +void + ESolver_KS::iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) { - // 1.1) print out band gap + // 1.1) print out band gap if (!PARAM.globalv.two_fermi) - { - this->pelec->cal_bandgap(); - } + { + this->pelec->cal_bandgap (); + } else - { - this->pelec->cal_bandgap_updw(); - } + { + this->pelec->cal_bandgap_updw (); + } // 1.2) print out eigenvalues and occupations if (PARAM.inp.out_band[0]) - { - if (iter % PARAM.inp.out_freq_elec == 0 || iter == PARAM.inp.scf_nmax || conv_esolver) { - ModuleIO::write_eig_iter(this->pelec->ekb,this->pelec->wg,*this->pelec->klist); + if (iter % PARAM.inp.out_freq_elec == 0 || iter == PARAM.inp.scf_nmax || conv_esolver) + { + ModuleIO::write_eig_iter (this->pelec->ekb, this->pelec->wg, *this->pelec->klist); + } } - } // 2.1) compute magnetization, only for spin==2 - ucell.magnet.compute_mag(ucell.omega, this->chr.nrxx, this->chr.nxyz, this->chr.rho, - this->pelec->nelec_spin.data()); + ucell.magnet.compute_mag (ucell.omega, + this->chr.nrxx, + this->chr.nxyz, + this->chr.rho, + this->pelec->nelec_spin.data ()); // 2.2) charge mixing // SCF will continue if U is not converged for uramping calculation @@ -231,87 +273,97 @@ void ESolver_KS::iter_finish(UnitCell& ucell, const int istep, int& iter, bool & // to avoid unnecessary dependence on dft+u, refactor is needed #ifdef __LCAO if (PARAM.inp.dft_plus_u) - { - converged_u = this->dftu.u_converged(); - } + { + converged_u = this->dftu.u_converged (); + } #endif - module_charge::chgmixing_ks(iter, ucell, this->pelec, this->chr, this->p_chgmix, - this->pw_rhod->nrxx, this->drho, this->oscillate_esolver, conv_esolver, hsolver_error, - this->scf_thr, this->scf_ene_thr, converged_u, PARAM.inp); + module_charge::chgmixing_ks (iter, + ucell, + this->pelec, + this->chr, + this->p_chgmix, + this->pw_rhod->nrxx, + this->drho, + this->oscillate_esolver, + conv_esolver, + hsolver_error, + this->scf_thr, + this->scf_ene_thr, + converged_u, + PARAM.inp); // 2.3) Update potentials (should be done every SF iter) - elecstate::update_pot(ucell, this->pelec, this->chr, conv_esolver); + elecstate::update_pot (ucell, this->pelec, this->chr, conv_esolver); // 3.1) calculate energies - this->pelec->cal_energies(1); // Harris-Foulkes functional - this->pelec->cal_energies(2); // Kohn-Sham functional + this->pelec->cal_energies (1); // Harris-Foulkes functional + this->pelec->cal_energies (2); // Kohn-Sham functional if (iter == 1) - { - this->pelec->f_en.etot_old = this->pelec->f_en.etot; - } + { + this->pelec->f_en.etot_old = this->pelec->f_en.etot; + } this->pelec->f_en.etot_delta = this->pelec->f_en.etot - this->pelec->f_en.etot_old; this->pelec->f_en.etot_old = this->pelec->f_en.etot; // 4) get meta-GGA related parameters double dkin = 0.0; // for meta-GGA - if (XC_Functional::get_ked_flag()) - { - dkin = p_chgmix->get_dkin(&this->chr, PARAM.inp.nelec); - } - - // Iter finish - ESolver_FP::iter_finish(ucell, istep, iter, conv_esolver); + if (XC_Functional::get_ked_flag ()) + { + dkin = p_chgmix->get_dkin (&this->chr, PARAM.inp.nelec); + } + // Iter finish + ESolver_FP::iter_finish (ucell, istep, iter, conv_esolver); // the end, print time - double duration = ModuleBase::get_duration(iter_time, ModuleBase::get_time()); + double duration = ModuleBase::get_duration (iter_time, ModuleBase::get_time ()); // print energies - elecstate::print_etot(ucell.magnet, *pelec, conv_esolver, iter, drho, - dkin, duration, diag_ethr); - + elecstate::print_etot (ucell.magnet, *pelec, conv_esolver, iter, drho, dkin, duration, diag_ethr); #ifdef __RAPIDJSON // add Json of scf mag - Json::add_output_scf_mag(ucell.magnet.tot_mag, ucell.magnet.abs_mag, - this->pelec->f_en.etot * ModuleBase::Ry_to_eV, - this->pelec->f_en.etot_delta * ModuleBase::Ry_to_eV, - drho, duration); + Json::add_output_scf_mag (ucell.magnet.tot_mag, + ucell.magnet.abs_mag, + this->pelec->f_en.etot * ModuleBase::Ry_to_eV, + this->pelec->f_en.etot_delta * ModuleBase::Ry_to_eV, + drho, + duration); #endif //__RAPIDJSON - } //! Something to do after SCF iterations when SCF is converged or comes to the max iter step. -void ESolver_KS::after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) +void + ESolver_KS::after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) { - ModuleBase::TITLE("ESolver_KS", "after_scf"); - -/* - // 1) calculate the kinetic energy density tau - if (PARAM.inp.out_elf[0] > 0) - { - assert(this->psi != nullptr); - this->pelec->cal_tau(*(this->psi)); - } -*/ - + ModuleBase::TITLE ("ESolver_KS", "after_scf"); + + /* + // 1) calculate the kinetic energy density tau + if (PARAM.inp.out_elf[0] > 0) + { + assert(this->psi != nullptr); + this->pelec->cal_tau(*(this->psi)); + } + */ + // 2) call after_scf() of ESolver_FP - ESolver_FP::after_scf(ucell, istep, conv_esolver); + ESolver_FP::after_scf (ucell, istep, conv_esolver); // 3) write eigenvalues and occupations to eig_occ.txt - ModuleIO::write_eig_file(this->pelec->ekb, this->pelec->wg, this->kv, istep); + ModuleIO::write_eig_file (this->pelec->ekb, this->pelec->wg, this->kv, istep); // 4) write band information to band.txt - ModuleIO::write_bands(PARAM.inp, this->pelec->ekb, this->kv); - + ModuleIO::write_bands (PARAM.inp, this->pelec->ekb, this->kv); } -void ESolver_KS::after_all_runners(UnitCell& ucell) +void + ESolver_KS::after_all_runners (UnitCell& ucell) { // 1) write Etot information - ESolver_FP::after_all_runners(ucell); + ESolver_FP::after_all_runners (ucell); } } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_ks.h b/source/source_esolver/esolver_ks.h index c480d238ad1..976aea29138 100644 --- a/source/source_esolver/esolver_ks.h +++ b/source/source_esolver/esolver_ks.h @@ -1,13 +1,13 @@ #ifndef ESOLVER_KS_H #define ESOLVER_KS_H -#include "esolver_fp.h" // first-principles esolver -#include "source_basis/module_pw/pw_basis_k.h" // use plane wave -#include "source_cell/klist.h" // use k-points in Brillouin zone +#include "esolver_fp.h" // first-principles esolver +#include "source_basis/module_pw/pw_basis_k.h" // use plane wave +#include "source_cell/klist.h" // use k-points in Brillouin zone #include "source_estate/module_charge/charge_mixing.h" // use charge mixing -#include "source_hamilt/hamilt.h" // use Hamiltonian -#include "source_hamilt/hamilt_base.h" // use Hamiltonian base class -#include "source_lcao/module_dftu/dftu.h" // mohan add 20251107 +#include "source_hamilt/hamilt.h" // use Hamiltonian +#include "source_hamilt/hamilt_base.h" // use Hamiltonian base class +#include "source_lcao/module_dftu/dftu.h" // mohan add 20251107 #include "source_pw/module_pwdft/vnl_pw.h" namespace ModuleESolver @@ -17,35 +17,35 @@ class ESolver_KS : public ESolver_FP { public: //! Constructor - ESolver_KS(); + ESolver_KS (); //! Deconstructor - virtual ~ESolver_KS(); + virtual ~ESolver_KS (); - virtual void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + virtual void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - virtual void runner(UnitCell& ucell, const int istep) override; + virtual void runner (UnitCell& ucell, const int istep) override; - virtual void after_all_runners(UnitCell& ucell) override; + virtual void after_all_runners (UnitCell& ucell) override; protected: //! Something to do before SCF iterations. - virtual void before_scf(UnitCell& ucell, const int istep) override; + virtual void before_scf (UnitCell& ucell, const int istep) override; //! Something to do before hamilt2rho function in each iter loop. - virtual void iter_init(UnitCell& ucell, const int istep, const int iter); + virtual void iter_init (UnitCell& ucell, const int istep, const int iter); //! Something to do after hamilt2rho function in each iter loop. - virtual void iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; + virtual void iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; // calculate electron density from a specific Hamiltonian with ethr - virtual void hamilt2rho_single(UnitCell& ucell, const int istep, const int iter, const double ethr); + virtual void hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr); // calculate electron density from a specific Hamiltonian - void hamilt2rho(UnitCell& ucell, const int istep, const int iter, const double ethr); + void hamilt2rho (UnitCell& ucell, const int istep, const int iter, const double ethr); //! Something to do after SCF iterations when SCF is converged or comes to the max iter step. - virtual void after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) override; + virtual void after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) override; //! Hamiltonian (base class pointer, actual type determined at runtime) hamilt::HamiltBase* p_hamilt = nullptr; @@ -62,17 +62,17 @@ class ESolver_KS : public ESolver_FP //! DFT+U method, mohan add 2025-11-07 Plus_U dftu; - std::string basisname; //! esolver_ks_lcao.cpp - double esolver_KS_ne = 0.0; //! number of electrons - double diag_ethr; //! the threshold for diagonalization - double scf_thr; //! scf density threshold - double scf_ene_thr; //! scf energy threshold - double drho; //! the difference between rho_in (before HSolver) and rho_out (After HSolver) - double hsolver_error; //! the error of HSolver - int maxniter; //! maximum iter steps for scf - int niter; //! iter steps actually used in scf + std::string basisname; //! esolver_ks_lcao.cpp + double esolver_KS_ne = 0.0; //! number of electrons + double diag_ethr; //! the threshold for diagonalization + double scf_thr; //! scf density threshold + double scf_ene_thr; //! scf energy threshold + double drho; //! the difference between rho_in (before HSolver) and rho_out (After HSolver) + double hsolver_error; //! the error of HSolver + int maxniter; //! maximum iter steps for scf + int niter; //! iter steps actually used in scf bool oscillate_esolver = false; // whether esolver is oscillated - bool scf_nmax_flag = false; // whether scf has reached nmax, mohan add 20250921 + bool scf_nmax_flag = false; // whether scf has reached nmax, mohan add 20250921 }; } // namespace ModuleESolver #endif diff --git a/source/source_esolver/esolver_ks_lcao.cpp b/source/source_esolver/esolver_ks_lcao.cpp index 9d3906d6ebf..436b0bd0179 100644 --- a/source/source_esolver/esolver_ks_lcao.cpp +++ b/source/source_esolver/esolver_ks_lcao.cpp @@ -16,381 +16,484 @@ #include "../source_lcao/module_ri/exx_opt_orb.h" #endif #include "source_lcao/module_rdmft/rdmft.h" -#include "source_estate/module_charge/chgmixing.h" // use charge mixing, mohan add 20251006 -#include "source_estate/module_dm/init_dm.h" // init dm from electronic wave functions -#include "source_io/module_ctrl/ctrl_runner_lcao.h" // use ctrl_runner_lcao() -#include "source_io/module_ctrl/ctrl_iter_lcao.h" // use ctrl_iter_lcao() -#include "source_io/module_ctrl/ctrl_scf_lcao.h" // use ctrl_scf_lcao() +#include "source_estate/module_charge/chgmixing.h" // use charge mixing, mohan add 20251006 +#include "source_estate/module_dm/init_dm.h" // init dm from electronic wave functions +#include "source_io/module_ctrl/ctrl_runner_lcao.h" // use ctrl_runner_lcao() +#include "source_io/module_ctrl/ctrl_iter_lcao.h" // use ctrl_iter_lcao() +#include "source_io/module_ctrl/ctrl_scf_lcao.h" // use ctrl_scf_lcao() #include "source_io/module_output/print_info.h" #include "source_lcao/rho_tau_lcao.h" // mohan add 20251024 -#include "source_lcao/LCAO_set.h" // mohan add 20251111 -#include "source_psi/setup_psi.h" // use Setup_Psi for deallocate_psi +#include "source_lcao/LCAO_set.h" // mohan add 20251111 +#include "source_psi/setup_psi.h" // use Setup_Psi for deallocate_psi namespace ModuleESolver { template -ESolver_KS_LCAO::ESolver_KS_LCAO() +ESolver_KS_LCAO::ESolver_KS_LCAO () { this->classname = "ESolver_KS_LCAO"; this->basisname = "LCAO"; - this->exx_nao.init(); // mohan add 20251008 + this->exx_nao.init (); // mohan add 20251008 } template -ESolver_KS_LCAO::~ESolver_KS_LCAO() +ESolver_KS_LCAO::~ESolver_KS_LCAO () { - //**************************************************** - // do not add any codes in this deconstructor funcion - //**************************************************** - Setup_Psi::deallocate_psi(this->psi); + //**************************************************** + // do not add any codes in this deconstructor funcion + //**************************************************** + Setup_Psi::deallocate_psi (this->psi); } template -void ESolver_KS_LCAO::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_KS_LCAO::before_all_runners (UnitCell& ucell, const Input_para& inp) { - ModuleBase::TITLE("ESolver_KS_LCAO", "before_all_runners"); - ModuleBase::timer::start("ESolver_KS_LCAO", "before_all_runners"); + ModuleBase::TITLE ("ESolver_KS_LCAO", "before_all_runners"); + ModuleBase::timer::start ("ESolver_KS_LCAO", "before_all_runners"); // 1) before_all_runners in ESolver_KS - ESolver_KS::before_all_runners(ucell, inp); + ESolver_KS::before_all_runners (ucell, inp); // 2) autoset nbands in ElecState before init_basis (for Psi 2d division) if (this->pelec == nullptr) - { - // TK stands for double and std::complex? - this->pelec = new elecstate::ElecStateLCAO(&(this->chr), &(this->kv), - this->kv.get_nks(), this->pw_big); - } + { + // TK stands for double and std::complex? + this->pelec + = new elecstate::ElecStateLCAO (&(this->chr), &(this->kv), this->kv.get_nks (), this->pw_big); + } // 3) read LCAO orbitals/projectors and construct the interpolation tables. - LCAO_domain::init_basis_lcao(this->pv, inp.onsite_radius, inp.lcao_ecut, - inp.lcao_dk, inp.lcao_dr, inp.lcao_rmax, ucell, two_center_bundle_, orb_); + LCAO_domain::init_basis_lcao (this->pv, + inp.onsite_radius, + inp.lcao_ecut, + inp.lcao_dk, + inp.lcao_dr, + inp.lcao_rmax, + ucell, + two_center_bundle_, + orb_); // 4) setup EXX calculations if (inp.calculation == "gen_opt_abfs") - { + { #ifdef __EXX - Exx_Opt_Orb exx_opt_orb; - exx_opt_orb.generate_matrix(GlobalC::exx_info.info_opt_abfs, this->kv, ucell, this->orb_); + Exx_Opt_Orb exx_opt_orb; + exx_opt_orb.generate_matrix (GlobalC::exx_info.info_opt_abfs, this->kv, ucell, this->orb_); #else - ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::before_all_runners", "calculation=gen_opt_abfs must compile __EXX"); + ModuleBase::WARNING_QUIT ("ESolver_KS_LCAO::before_all_runners", + "calculation=gen_opt_abfs must compile __EXX"); #endif - return; - } - - LCAO_domain::set_psi_occ_dm_chg(this->kv, this->psi, this->pv, this->pelec, - this->dmat, this->chr, inp); + return; + } - LCAO_domain::set_pot(ucell, this->kv, this->sf, *this->pw_rho, *this->pw_rhod, - this->pelec, this->orb_, this->pv, this->locpp, this->dftu, - this->solvent, this->exx_nao, this->deepks, inp); + LCAO_domain::set_psi_occ_dm_chg (this->kv, this->psi, this->pv, this->pelec, this->dmat, this->chr, inp); + + LCAO_domain::set_pot (ucell, + this->kv, + this->sf, + *this->pw_rho, + *this->pw_rhod, + this->pelec, + this->orb_, + this->pv, + this->locpp, + this->dftu, + this->solvent, + this->exx_nao, + this->deepks, + inp); //! if kpar is not divisible by nks, print a warning - ModuleIO::print_kpar(this->kv.get_nks(), PARAM.globalv.kpar_lcao); + ModuleIO::print_kpar (this->kv.get_nks (), PARAM.globalv.kpar_lcao); //! init rdmft, added by jghan if (inp.rdmft == true) - { - rdmft_solver.init(this->pv, ucell, - this->gd, this->kv, *(this->pelec), this->orb_, - two_center_bundle_, inp.dft_functional, inp.rdmft_power_alpha); - } + { + rdmft_solver.init (this->pv, + ucell, + this->gd, + this->kv, + *(this->pelec), + this->orb_, + two_center_bundle_, + inp.dft_functional, + inp.rdmft_power_alpha); + } - ModuleBase::timer::end("ESolver_KS_LCAO", "before_all_runners"); + ModuleBase::timer::end ("ESolver_KS_LCAO", "before_all_runners"); return; } - template -void ESolver_KS_LCAO::before_scf(UnitCell& ucell, const int istep) +void + ESolver_KS_LCAO::before_scf (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_KS_LCAO", "before_scf"); - ModuleBase::timer::start("ESolver_KS_LCAO", "before_scf"); + ModuleBase::TITLE ("ESolver_KS_LCAO", "before_scf"); + ModuleBase::timer::start ("ESolver_KS_LCAO", "before_scf"); //! 1) call before_scf() of ESolver_KS. - ESolver_KS::before_scf(ucell, istep); + ESolver_KS::before_scf (ucell, istep); //! 2) find search radius - double search_radius = atom_arrange::set_sr_NL(GlobalV::ofs_running, - PARAM.inp.out_level, orb_.get_rcutmax_Phi(), ucell.infoNL.get_rcutmax_Beta(), - PARAM.globalv.gamma_only_local); + double search_radius = atom_arrange::set_sr_NL (GlobalV::ofs_running, + PARAM.inp.out_level, + orb_.get_rcutmax_Phi (), + ucell.infoNL.get_rcutmax_Beta (), + PARAM.globalv.gamma_only_local); //! 3) use search_radius to search adj atoms - atom_arrange::search(PARAM.globalv.search_pbc, GlobalV::ofs_running, - this->gd, ucell, search_radius, PARAM.inp.test_atom_input); + atom_arrange::search (PARAM.globalv.search_pbc, + GlobalV::ofs_running, + this->gd, + ucell, + search_radius, + PARAM.inp.test_atom_input); //! 4) initialize NAO basis set // here new is a unique pointer, which will be deleted automatically - gint_info_.reset( - new ModuleGint::GintInfo( - this->pw_big->nbx, this->pw_big->nby, this->pw_big->nbz, - this->pw_rho->nx, this->pw_rho->ny, this->pw_rho->nz, - 0, 0, this->pw_big->nbzp_start, - this->pw_big->nbx, this->pw_big->nby, this->pw_big->nbzp, - orb_.Phi, ucell, this->gd)); - ModuleGint::Gint::set_gint_info(gint_info_.get()); + gint_info_.reset (new ModuleGint::GintInfo (this->pw_big->nbx, + this->pw_big->nby, + this->pw_big->nbz, + this->pw_rho->nx, + this->pw_rho->ny, + this->pw_rho->nz, + 0, + 0, + this->pw_big->nbzp_start, + this->pw_big->nbx, + this->pw_big->nby, + this->pw_big->nbzp, + orb_.Phi, + ucell, + this->gd)); + ModuleGint::Gint::set_gint_info (gint_info_.get ()); // 7) For each atom, calculate the adjacent atoms in different cells // and allocate the space for H(R) and S(R). // If k point is used here, allocate HlocR after atom_arrange. - this->RA.for_2d(ucell, this->gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs()); + this->RA.for_2d (ucell, this->gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs ()); // 8) initialize the Hamiltonian operators // if atom moves, then delete old pointer and add a new one if (this->p_hamilt != nullptr) - { - delete this->p_hamilt; - this->p_hamilt = nullptr; - } + { + delete this->p_hamilt; + this->p_hamilt = nullptr; + } if (this->p_hamilt == nullptr) - { - this->p_hamilt = new hamilt::HamiltLCAO( - ucell, this->gd, &this->pv, this->pelec->pot, this->kv, - two_center_bundle_, orb_, this->dmat.dm, &this->dftu, this->deepks, istep, exx_nao); - } + { + this->p_hamilt = new hamilt::HamiltLCAO (ucell, + this->gd, + &this->pv, + this->pelec->pot, + this->kv, + two_center_bundle_, + orb_, + this->dmat.dm, + &this->dftu, + this->deepks, + istep, + exx_nao); + } // 9) for each ionic step, the overlap must be rebuilt // since it depends on ionic positions - this->deepks.build_overlap(ucell, orb_, pv, gd, *(two_center_bundle_.overlap_orb_alpha), PARAM.inp); + this->deepks.build_overlap (ucell, orb_, pv, gd, *(two_center_bundle_.overlap_orb_alpha), PARAM.inp); // 10) prepare sc calculation - init_deltaspin_lcao(ucell, PARAM.inp, &(this->pv), this->kv, this->p_hamilt, this->psi, this->dmat.dm, this->pelec); + init_deltaspin_lcao (ucell, + PARAM.inp, + &(this->pv), + this->kv, + this->p_hamilt, + this->psi, + this->dmat.dm, + this->pelec); // 11) set xc type before the first cal of xc in pelec->init_scf, Peize Lin add 2016-12-03 - this->exx_nao.before_scf(ucell, this->kv, orb_, this->p_chgmix, istep, PARAM.inp); + this->exx_nao.before_scf (ucell, this->kv, orb_, this->p_chgmix, istep, PARAM.inp); // 12) initalize DM(R), which has the same size with Hamiltonian(R) - auto* hamilt_lcao = dynamic_cast*>(this->p_hamilt); + auto* hamilt_lcao = dynamic_cast*> (this->p_hamilt); - if(!hamilt_lcao) - { - ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::before_scf","p_hamilt does not exist"); - } - this->dmat.dm->init_DMR(*hamilt_lcao->getHR()); + if (!hamilt_lcao) + { + ModuleBase::WARNING_QUIT ("ESolver_KS_LCAO::before_scf", "p_hamilt does not exist"); + } + this->dmat.dm->init_DMR (*hamilt_lcao->getHR ()); // 13.1) decide the strategy for initializing DMR and HR - if(istep == 0)//if the first scf step, readin DMR from file, - { - //calculate or readin the density matrix DMR - if(PARAM.inp.init_chg == "dm") + if (istep == 0) // if the first scf step, readin DMR from file, { - //! 13.1.1) init charge density from density matrix file - LCAO_domain::init_chg_dm(PARAM.globalv.global_readin_dir, PARAM.inp.nspin, - this->dmat, ucell, &(this->pv), this->pelec->charge); + // calculate or readin the density matrix DMR + if (PARAM.inp.init_chg == "dm") + { + //! 13.1.1) init charge density from density matrix file + LCAO_domain::init_chg_dm (PARAM.globalv.global_readin_dir, + PARAM.inp.nspin, + this->dmat, + ucell, + &(this->pv), + this->pelec->charge); + } + if (PARAM.inp.init_chg == "hr") + { + //! 13.1.2) init charge density from Hamiltonian matrix file + LCAO_domain::init_chg_hr (PARAM.globalv.global_readin_dir, + PARAM.inp.nspin, + static_cast*> (this->p_hamilt), + ucell, + &(this->pv), + this->psi[0], + this->pelec, + *this->dmat.dm, + this->chr, + PARAM.inp.ks_solver); + } } - if(PARAM.inp.init_chg == "hr") + else if (PARAM.inp.esolver_type != "tddft") // if not, use the DMR calculated from last step { - //! 13.1.2) init charge density from Hamiltonian matrix file - LCAO_domain::init_chg_hr(PARAM.globalv.global_readin_dir, PARAM.inp.nspin, - static_cast*>(this->p_hamilt), ucell, &(this->pv), this->psi[0], this->pelec, *this->dmat.dm, - this->chr, PARAM.inp.ks_solver); + // 13.1.2) two cases are considered: + // 1. DMK in DensityMatrix is not empty (istep > 0), then DMR is initialized by DMK + // 2. DMK in DensityMatrix is empty (istep == 0), then DMR is initialized by zeros + this->dmat.dm->cal_DMR (); } - } - else if(PARAM.inp.esolver_type!="tddft")//if not, use the DMR calculated from last step - { - // 13.1.2) two cases are considered: - // 1. DMK in DensityMatrix is not empty (istep > 0), then DMR is initialized by DMK - // 2. DMK in DensityMatrix is empty (istep == 0), then DMR is initialized by zeros - this->dmat.dm->cal_DMR(); - } // 13.2) init_scf, should be before_scf? mohan add 2025-03-10 - this->pelec->init_scf(ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, ucell.symm); + this->pelec->init_scf (ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, ucell.symm); #ifdef __MLALGO // 14) initialize DM2(R) of DeePKS, the DM2(R) is different from DM(R) - this->deepks.ld.init_DMR(ucell, orb_, this->pv, this->gd); + this->deepks.ld.init_DMR (ucell, orb_, this->pv, this->gd); #endif // 16) the electron charge density should be symmetrized, - Symmetry_rho::symmetrize_rho(PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); + Symmetry_rho::symmetrize_rho (PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); // 17) update of RDMFT, added by jghan if (PARAM.inp.rdmft == true) - { - rdmft_solver.update_ion(ucell, *(this->pw_rho), this->locpp.vloc, this->sf.strucFac); - } + { + rdmft_solver.update_ion (ucell, *(this->pw_rho), this->locpp.vloc, this->sf.strucFac); + } - ModuleBase::timer::end("ESolver_KS_LCAO", "before_scf"); + ModuleBase::timer::end ("ESolver_KS_LCAO", "before_scf"); return; } - template -double ESolver_KS_LCAO::cal_energy() +double + ESolver_KS_LCAO::cal_energy () { return this->pelec->f_en.etot; } template -void ESolver_KS_LCAO::cal_force(UnitCell& ucell, ModuleBase::matrix& force) +void + ESolver_KS_LCAO::cal_force (UnitCell& ucell, ModuleBase::matrix& force) { - ModuleBase::TITLE("ESolver_KS_LCAO", "cal_force"); - ModuleBase::timer::start("ESolver_KS_LCAO", "cal_force"); - - Force_Stress_LCAO fsl(this->RA, ucell.nat); - - deepks.dpks_out_type = "tot"; // for deepks method - - fsl.getForceStress(ucell, PARAM.inp.cal_force, PARAM.inp.cal_stress, - PARAM.inp.test_force, PARAM.inp.test_stress, - this->gd, this->pv, this->pelec, this->dmat, this->psi, - two_center_bundle_, orb_, force, this->scs, - this->locpp, this->sf, this->kv, - this->pw_rho, this->solvent, this->dftu, this->deepks, - this->exx_nao, &ucell.symm); + ModuleBase::TITLE ("ESolver_KS_LCAO", "cal_force"); + ModuleBase::timer::start ("ESolver_KS_LCAO", "cal_force"); + + Force_Stress_LCAO fsl (this->RA, ucell.nat); + + deepks.dpks_out_type = "tot"; // for deepks method + + fsl.getForceStress (ucell, + PARAM.inp.cal_force, + PARAM.inp.cal_stress, + PARAM.inp.test_force, + PARAM.inp.test_stress, + this->gd, + this->pv, + this->pelec, + this->dmat, + this->psi, + two_center_bundle_, + orb_, + force, + this->scs, + this->locpp, + this->sf, + this->kv, + this->pw_rho, + this->solvent, + this->dftu, + this->deepks, + this->exx_nao, + &ucell.symm); // delete RA after cal_force - this->RA.delete_grid(); + this->RA.delete_grid (); this->have_force = true; - ModuleBase::timer::end("ESolver_KS_LCAO", "cal_force"); + ModuleBase::timer::end ("ESolver_KS_LCAO", "cal_force"); } template -void ESolver_KS_LCAO::cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) +void + ESolver_KS_LCAO::cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) { - ModuleBase::TITLE("ESolver_KS_LCAO", "cal_stress"); - ModuleBase::timer::start("ESolver_KS_LCAO", "cal_stress"); + ModuleBase::TITLE ("ESolver_KS_LCAO", "cal_stress"); + ModuleBase::timer::start ("ESolver_KS_LCAO", "cal_stress"); if (!this->have_force) - { - ModuleBase::matrix fcs; - this->cal_force(ucell, fcs); - } + { + ModuleBase::matrix fcs; + this->cal_force (ucell, fcs); + } // the stress has been calculated in 'cal_force' stress = this->scs; this->have_force = false; - ModuleBase::timer::end("ESolver_KS_LCAO", "cal_stress"); + ModuleBase::timer::end ("ESolver_KS_LCAO", "cal_stress"); } template -void ESolver_KS_LCAO::after_all_runners(UnitCell& ucell) +void + ESolver_KS_LCAO::after_all_runners (UnitCell& ucell) { - ModuleBase::TITLE("ESolver_KS_LCAO", "after_all_runners"); - ModuleBase::timer::start("ESolver_KS_LCAO", "after_all_runners"); - - ESolver_KS::after_all_runners(ucell); + ModuleBase::TITLE ("ESolver_KS_LCAO", "after_all_runners"); + ModuleBase::timer::start ("ESolver_KS_LCAO", "after_all_runners"); - auto* hamilt_lcao = dynamic_cast*>(this->p_hamilt); - if(!hamilt_lcao) - { - ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::after_all_runners","p_hamilt does not exist"); - } + ESolver_KS::after_all_runners (ucell); - ModuleIO::ctrl_runner_lcao(ucell, - PARAM.inp, this->kv, this->pelec, this->dmat, this->pv, this->Pgrid, - this->gd, this->psi, this->chr, hamilt_lcao, - this->two_center_bundle_, - this->orb_, this->pw_rho, this->pw_rhod, - this->sf, this->locpp.vloc, this->exx_nao, this->solvent); + auto* hamilt_lcao = dynamic_cast*> (this->p_hamilt); + if (!hamilt_lcao) + { + ModuleBase::WARNING_QUIT ("ESolver_KS_LCAO::after_all_runners", "p_hamilt does not exist"); + } + ModuleIO::ctrl_runner_lcao (ucell, + PARAM.inp, + this->kv, + this->pelec, + this->dmat, + this->pv, + this->Pgrid, + this->gd, + this->psi, + this->chr, + hamilt_lcao, + this->two_center_bundle_, + this->orb_, + this->pw_rho, + this->pw_rhod, + this->sf, + this->locpp.vloc, + this->exx_nao, + this->solvent); #ifdef __MPI #ifdef __LCAO // Exit BLACS environment for LCAO calculations - Cblacs_exit(1); + Cblacs_exit (1); #endif #endif - ModuleBase::timer::end("ESolver_KS_LCAO", "after_all_runners"); + ModuleBase::timer::end ("ESolver_KS_LCAO", "after_all_runners"); } template -void ESolver_KS_LCAO::iter_init(UnitCell& ucell, const int istep, const int iter) +void + ESolver_KS_LCAO::iter_init (UnitCell& ucell, const int istep, const int iter) { - ModuleBase::TITLE("ESolver_KS_LCAO", "iter_init"); + ModuleBase::TITLE ("ESolver_KS_LCAO", "iter_init"); // call iter_init() of ESolver_KS - ESolver_KS::iter_init(ucell, istep, iter); + ESolver_KS::iter_init (ucell, istep, iter); - module_charge::chgmixing_ks_lcao(iter, this->p_chgmix, this->dftu, - this->dmat.dm->get_DMR_pointer(1)->get_nnr(), PARAM.inp); + module_charge::chgmixing_ks_lcao (iter, + this->p_chgmix, + this->dftu, + this->dmat.dm->get_DMR_pointer (1)->get_nnr (), + PARAM.inp); if (iter == 1) - { - this->gint_precision_controller_.set_mode(PARAM.inp.gint_precision); - this->gint_precision_controller_.reset_for_new_scf(); - this->gint_info_->set_exec_precision(this->gint_precision_controller_.current_precision()); - if (PARAM.inp.gint_precision == "mix") - { - GlobalV::ofs_running << "\n >> Gint mixed-precision mode: starting SCF with fp32" - << " (will switch to fp64 when drho is small enough)" << std::endl; - std::cout << " >> NOTICE: Gint grid-integration starts with fp32 (mixed-precision mode)" << std::endl; - } - else if (PARAM.inp.gint_precision == "single") { - GlobalV::ofs_running << "\n >> Gint single-precision mode: using fp32 throughout SCF" << std::endl; - std::cout << " >> NOTICE: Gint grid-integration uses fp32 throughout SCF (single-precision mode)" << std::endl; + this->gint_precision_controller_.set_mode (PARAM.inp.gint_precision); + this->gint_precision_controller_.reset_for_new_scf (); + this->gint_info_->set_exec_precision (this->gint_precision_controller_.current_precision ()); + if (PARAM.inp.gint_precision == "mix") + { + GlobalV::ofs_running << "\n >> Gint mixed-precision mode: starting SCF with fp32" + << " (will switch to fp64 when drho is small enough)" << std::endl; + std::cout << " >> NOTICE: Gint grid-integration starts with fp32 (mixed-precision mode)" + << std::endl; + } + else if (PARAM.inp.gint_precision == "single") + { + GlobalV::ofs_running << "\n >> Gint single-precision mode: using fp32 throughout SCF" << std::endl; + std::cout << " >> NOTICE: Gint grid-integration uses fp32 throughout SCF (single-precision mode)" + << std::endl; + } } - } // mohan update 2012-06-05 - this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband(ucell); + this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband (ucell); if (istep == 0 && PARAM.inp.init_wfc == "file") - { - int exx_two_level_step = 0; + { + int exx_two_level_step = 0; #ifdef __EXX - if (GlobalC::exx_info.info_global.cal_exx) - { - // the following steps are only needed in the first outer exx loop - exx_two_level_step - = GlobalC::exx_info.info_ri.real_number ? - this->exx_nao.exd->two_level_step : this->exx_nao.exc->two_level_step; - } + if (GlobalC::exx_info.info_global.cal_exx) + { + // the following steps are only needed in the first outer exx loop + exx_two_level_step = GlobalC::exx_info.info_ri.real_number ? this->exx_nao.exd->two_level_step + : this->exx_nao.exc->two_level_step; + } #endif - elecstate::init_dm(ucell, this->pelec, this->dmat, this->psi, this->chr, iter, exx_two_level_step); - } + elecstate::init_dm (ucell, this->pelec, this->dmat, this->psi, this->chr, iter, exx_two_level_step); + } #ifdef __EXX // calculate exact-exchange if (PARAM.inp.calculation != "nscf") - { - if (GlobalC::exx_info.info_ri.real_number) { - this->exx_nao.exd->exx_eachiterinit(istep, ucell, *this->dmat.dm, this->kv, iter); + if (GlobalC::exx_info.info_ri.real_number) + { + this->exx_nao.exd->exx_eachiterinit (istep, ucell, *this->dmat.dm, this->kv, iter); + } + else + { + this->exx_nao.exc->exx_eachiterinit (istep, ucell, *this->dmat.dm, this->kv, iter); + } } - else - { - this->exx_nao.exc->exx_eachiterinit(istep, ucell, *this->dmat.dm, this->kv, iter); - } - } #endif - init_dftu_lcao(istep, iter, PARAM.inp, &(this->dftu), this->dmat.dm, ucell, this->chr.rho, this->pw_rho->nrxx); + init_dftu_lcao (istep, iter, PARAM.inp, &(this->dftu), this->dmat.dm, ucell, this->chr.rho, this->pw_rho->nrxx); #ifdef __MLALGO // the density matrixes of DeePKS have been updated in each iter - this->deepks.ld.set_hr_cal(true); + this->deepks.ld.set_hr_cal (true); // HR in HamiltLCAO should be recalculate if (PARAM.inp.deepks_scf) - { - this->p_hamilt->refresh(); - } + { + this->p_hamilt->refresh (); + } #endif if (PARAM.inp.vl_in_h) - { - // update real space Hamiltonian - this->p_hamilt->refresh(); - } + { + // update real space Hamiltonian + this->p_hamilt->refresh (); + } // save density matrix DMR for mixing if (PARAM.inp.mixing_restart > 0 && PARAM.inp.mixing_dmr && this->p_chgmix->mixing_restart_count > 0) - { - this->dmat.dm->save_DMR(); - } + { + this->dmat.dm->save_DMR (); + } } template -void ESolver_KS_LCAO::hamilt2rho_single(UnitCell& ucell, int istep, int iter, double ethr) +void + ESolver_KS_LCAO::hamilt2rho_single (UnitCell& ucell, int istep, int iter, double ethr) { - ModuleBase::TITLE("ESolver_KS_LCAO", "hamilt2rho_single"); + ModuleBase::TITLE ("ESolver_KS_LCAO", "hamilt2rho_single"); // 1) reset energy this->pelec->f_en.eband = 0.0; @@ -398,129 +501,173 @@ void ESolver_KS_LCAO::hamilt2rho_single(UnitCell& ucell, int istep, int bool skip_charge = PARAM.inp.calculation == "nscf" ? true : false; // 2) run the inner lambda loop to contrain atomic moments with the DeltaSpin method - bool skip_solve = run_deltaspin_lambda_loop_lcao(iter - 1, this->drho, PARAM.inp); + bool skip_solve = run_deltaspin_lambda_loop_lcao (iter - 1, this->drho, PARAM.inp); // 3) run Hsolver if (!skip_solve) - { - hsolver::HSolverLCAO hsolver_lcao_obj(&(this->pv), PARAM.inp.ks_solver); - hsolver_lcao_obj.solve(static_cast*>(this->p_hamilt), this->psi[0], this->pelec, *this->dmat.dm, - this->chr, PARAM.inp.nspin, skip_charge); - } + { + hsolver::HSolverLCAO hsolver_lcao_obj (&(this->pv), PARAM.inp.ks_solver); + hsolver_lcao_obj.solve (static_cast*> (this->p_hamilt), + this->psi[0], + this->pelec, + *this->dmat.dm, + this->chr, + PARAM.inp.nspin, + skip_charge); + } - // 4) EXX + // 4) EXX #ifdef __EXX if (PARAM.inp.calculation != "nscf") - { - if (GlobalC::exx_info.info_ri.real_number) { - this->exx_nao.exd->exx_hamilt2rho(*this->pelec, this->pv, iter); + if (GlobalC::exx_info.info_ri.real_number) + { + this->exx_nao.exd->exx_hamilt2rho (*this->pelec, this->pv, iter); + } + else + { + this->exx_nao.exc->exx_hamilt2rho (*this->pelec, this->pv, iter); + } } - else - { - this->exx_nao.exc->exx_hamilt2rho(*this->pelec, this->pv, iter); - } - } #endif // 5) symmetrize the charge density - Symmetry_rho::symmetrize_rho(PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); + Symmetry_rho::symmetrize_rho (PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); // 6) calculate delta energy - this->pelec->f_en.deband = this->pelec->cal_delta_eband(ucell); + this->pelec->f_en.deband = this->pelec->cal_delta_eband (ucell); } - template -void ESolver_KS_LCAO::iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) +void + ESolver_KS_LCAO::iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) { - ModuleBase::TITLE("ESolver_KS_LCAO", "iter_finish"); + ModuleBase::TITLE ("ESolver_KS_LCAO", "iter_finish"); - auto* hamilt_lcao = dynamic_cast*>(this->p_hamilt); + auto* hamilt_lcao = dynamic_cast*> (this->p_hamilt); - if(!hamilt_lcao) - { - ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::iter_finish","p_hamilt does not exist"); - } + if (!hamilt_lcao) + { + ModuleBase::WARNING_QUIT ("ESolver_KS_LCAO::iter_finish", "p_hamilt does not exist"); + } - const std::vector>& dm_vec = this->dmat.dm->get_DMK_vector(); + const std::vector>& dm_vec = this->dmat.dm->get_DMK_vector (); // 1) calculate the local occupation number matrix and energy correction in DFT+U - finish_dftu_lcao(iter, conv_esolver, PARAM.inp, &(this->dftu), ucell, dm_vec, this->kv, this->p_chgmix->get_mixing_beta(), hamilt_lcao); + finish_dftu_lcao (iter, + conv_esolver, + PARAM.inp, + &(this->dftu), + ucell, + dm_vec, + this->kv, + this->p_chgmix->get_mixing_beta (), + hamilt_lcao); // 2) for deepks, calculate delta_e, output labels during electronic steps - this->deepks.delta_e(ucell, this->kv, this->orb_, this->pv, this->gd, dm_vec, this->pelec->f_en, PARAM.inp); + this->deepks.delta_e (ucell, this->kv, this->orb_, this->pv, this->gd, dm_vec, this->pelec->f_en, PARAM.inp); // 3) for delta spin - cal_mi_lcao_wrapper(iter, PARAM.inp); + cal_mi_lcao_wrapper (iter, PARAM.inp); // call iter_finish() of ESolver_KS, where band gap is printed, // eig and occ are printed, magnetization is calculated, - // charge mixing is performed, potential is updated, + // charge mixing is performed, potential is updated, // HF and kS energies are computed, meta-GGA, Jason and restart - ESolver_KS::iter_finish(ucell, istep, iter, conv_esolver); - const bool precision_switched = this->gint_precision_controller_.update_after_iteration(this->drho, this->scf_thr); - this->gint_info_->set_exec_precision(this->gint_precision_controller_.current_precision()); + ESolver_KS::iter_finish (ucell, istep, iter, conv_esolver); + const bool precision_switched = this->gint_precision_controller_.update_after_iteration (this->drho, this->scf_thr); + this->gint_info_->set_exec_precision (this->gint_precision_controller_.current_precision ()); if (precision_switched) - { - GlobalV::ofs_running << "\n >> Gint precision switched: fp32 -> fp64 (drho = " - << this->drho << ")" << std::endl; - std::cout << " >> NOTICE: Gint grid-integration precision switched from fp32 to fp64" << std::endl; - } + { + GlobalV::ofs_running << "\n >> Gint precision switched: fp32 -> fp64 (drho = " << this->drho << ")" + << std::endl; + std::cout << " >> NOTICE: Gint grid-integration precision switched from fp32 to fp64" << std::endl; + } // mix density matrix if mixing_restart + mixing_dmr + not first // mixing_restart at every iter except the last iter - if(iter != PARAM.inp.scf_nmax && !conv_esolver) - { - if (PARAM.inp.mixing_restart > 0 && this->p_chgmix->mixing_restart_count > 0 && PARAM.inp.mixing_dmr) + if (iter != PARAM.inp.scf_nmax && !conv_esolver) { - this->p_chgmix->mix_dmr(this->dmat.dm); + if (PARAM.inp.mixing_restart > 0 && this->p_chgmix->mixing_restart_count > 0 && PARAM.inp.mixing_dmr) + { + this->p_chgmix->mix_dmr (this->dmat.dm); + } } - } // control the output related to the finished iteration - ModuleIO::ctrl_iter_lcao(ucell, PARAM.inp, this->kv, this->pelec, *this->dmat.dm, - this->pv, this->gd, this->psi, this->chr, this->p_chgmix, - hamilt_lcao, this->orb_, this->deepks, - this->exx_nao, iter, istep, conv_esolver, this->scf_ene_thr); + ModuleIO::ctrl_iter_lcao (ucell, + PARAM.inp, + this->kv, + this->pelec, + *this->dmat.dm, + this->pv, + this->gd, + this->psi, + this->chr, + this->p_chgmix, + hamilt_lcao, + this->orb_, + this->deepks, + this->exx_nao, + iter, + istep, + conv_esolver, + this->scf_ene_thr); } template -void ESolver_KS_LCAO::after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) +void + ESolver_KS_LCAO::after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) { - ModuleBase::TITLE("ESolver_KS_LCAO", "after_scf"); - ModuleBase::timer::start("ESolver_KS_LCAO", "after_scf"); + ModuleBase::TITLE ("ESolver_KS_LCAO", "after_scf"); + ModuleBase::timer::start ("ESolver_KS_LCAO", "after_scf"); - auto* hamilt_lcao = dynamic_cast*>(this->p_hamilt); + auto* hamilt_lcao = dynamic_cast*> (this->p_hamilt); - if(!hamilt_lcao) - { - ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::after_scf","p_hamilt does not exist"); - } + if (!hamilt_lcao) + { + ModuleBase::WARNING_QUIT ("ESolver_KS_LCAO::after_scf", "p_hamilt does not exist"); + } if (PARAM.inp.out_elf[0] > 0) - { - LCAO_domain::dm2tau(this->dmat.dm->get_DMR_vector(), PARAM.inp.nspin, this->pelec->charge); - } + { + LCAO_domain::dm2tau (this->dmat.dm->get_DMR_vector (), PARAM.inp.nspin, this->pelec->charge); + } //! 1) call after_scf() of ESolver_KS - ESolver_KS::after_scf(ucell, istep, conv_esolver); + ESolver_KS::after_scf (ucell, istep, conv_esolver); //! 2) output of lcao every few ionic steps - ModuleIO::ctrl_scf_lcao(ucell, - PARAM.inp, this->kv, this->pelec, this->dmat.dm, this->pv, - this->gd, this->psi, hamilt_lcao, this->dftu, this->two_center_bundle_, - this->orb_, this->pw_wfc, this->pw_rho, this->pw_big, this->sf, - this->rdmft_solver, this->deepks, this->exx_nao, - this->conv_esolver, this->scf_nmax_flag, istep); + ModuleIO::ctrl_scf_lcao (ucell, + PARAM.inp, + this->kv, + this->pelec, + this->dmat.dm, + this->pv, + this->gd, + this->psi, + hamilt_lcao, + this->dftu, + this->two_center_bundle_, + this->orb_, + this->pw_wfc, + this->pw_rho, + this->pw_big, + this->sf, + this->rdmft_solver, + this->deepks, + this->exx_nao, + this->conv_esolver, + this->scf_nmax_flag, + istep); //! 3) Clean up RA, which is used to serach for adjacent atoms if (!PARAM.inp.cal_force && !PARAM.inp.cal_stress) - { - this->RA.delete_grid(); - } + { + this->RA.delete_grid (); + } - ModuleBase::timer::end("ESolver_KS_LCAO", "after_scf"); + ModuleBase::timer::end ("ESolver_KS_LCAO", "after_scf"); } template class ESolver_KS_LCAO; diff --git a/source/source_esolver/esolver_ks_lcao.h b/source/source_esolver/esolver_ks_lcao.h index bfd80e50fe3..a93a1c1c183 100644 --- a/source/source_esolver/esolver_ks_lcao.h +++ b/source/source_esolver/esolver_ks_lcao.h @@ -2,19 +2,18 @@ #define ESOLVER_KS_LCAO_H #include "esolver_ks.h" -#include "source_lcao/record_adj.h" // adjacent atoms +#include "source_lcao/record_adj.h" // adjacent atoms #include "source_basis/module_nao/two_center_bundle.h" // nao basis -#include "source_lcao/module_gint/gint.h" // gint +#include "source_lcao/module_gint/gint.h" // gint #include "source_lcao/module_gint/gint_info.h" #include "source_estate/module_charge/gint_precision_controller.h" -#include "source_lcao/setup_deepks.h" // for deepks, mohan add 20251008 -#include "source_lcao/setup_exx.h" // for exx, mohan add 20251008 +#include "source_lcao/setup_deepks.h" // for deepks, mohan add 20251008 +#include "source_lcao/setup_exx.h" // for exx, mohan add 20251008 #include "source_lcao/module_rdmft/rdmft.h" // rdmft -#include "source_lcao/setup_dm.h" // mohan add 2025-10-30 +#include "source_lcao/setup_dm.h" // mohan add 2025-10-30 #include - // for Linear Response namespace LR { @@ -32,39 +31,39 @@ template class ESolver_KS_LCAO : public ESolver_KS { public: - ESolver_KS_LCAO(); - ~ESolver_KS_LCAO(); + ESolver_KS_LCAO (); + ~ESolver_KS_LCAO (); - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - double cal_energy() override; + double cal_energy () override; - void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; - void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) override; + void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) override; - void after_all_runners(UnitCell& ucell) override; + void after_all_runners (UnitCell& ucell) override; protected: - virtual void before_scf(UnitCell& ucell, const int istep) override; + virtual void before_scf (UnitCell& ucell, const int istep) override; - virtual void iter_init(UnitCell& ucell, const int istep, const int iter) override; + virtual void iter_init (UnitCell& ucell, const int istep, const int iter) override; - virtual void hamilt2rho_single(UnitCell& ucell, const int istep, const int iter, const double ethr) override; + virtual void hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) override; - virtual void iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; + virtual void iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; - virtual void after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) override; + virtual void after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) override; - virtual void others(UnitCell& ucell, const int istep) override; + virtual void others (UnitCell& ucell, const int istep) override; //! Electronic wave functions (moved from base class) psi::Psi* psi = nullptr; - //! Store information about Adjacent Atoms + //! Store information about Adjacent Atoms Record_adj RA; - //! Store information about Adjacent Atoms + //! Store information about Adjacent Atoms Grid_Driver gd; //! NAO orbitals: 2d block-cyclic distribution info @@ -73,7 +72,7 @@ class ESolver_KS_LCAO : public ESolver_KS //! GintInfo: used to store some basic infomation about module_gint std::unique_ptr gint_info_; - //! NAO: store related information + //! NAO: store related information LCAO_Orbitals orb_; //! NAO orbitals: two-center integrations @@ -82,14 +81,13 @@ class ESolver_KS_LCAO : public ESolver_KS //! Add density matrix class, mohan add 2025-10-30 LCAO_domain::Setup_DM dmat; - // For deepks method, mohan add 2025-10-08 Setup_DeePKS deepks; // For exact-exchange energy, mohan add 2025-10-08 Exx_NAO exx_nao; - //! For RDMFT calculations, added by jghan, 2024-03-16 + //! For RDMFT calculations, added by jghan, 2024-03-16 rdmft::RDMFT rdmft_solver; //! For linear-response TDDFT @@ -100,21 +98,60 @@ class ESolver_KS_LCAO : public ESolver_KS // because it's hard to seperate force and stress calculation in LCAO. ModuleBase::matrix scs; bool have_force = false; - - GintPrecisionController gint_precision_controller_; + GintPrecisionController gint_precision_controller_; public: - const Record_adj & get_RA() const { return RA; } - const Grid_Driver & get_gd() const { return gd; } - const Parallel_Orbitals & get_pv() const { return pv; } - const std::unique_ptr & get_gint_info() const { return gint_info_; } - const TwoCenterBundle & get_two_center_bundle() const { return two_center_bundle_; } - const rdmft::RDMFT & get_rdmft_solver() const { return rdmft_solver; } - const LCAO_Orbitals & get_orb() const { return orb_; } - const ModuleBase::matrix & get_scs() const { return scs; } - const Setup_DeePKS & get_deepks() const { return deepks; } - const Exx_NAO & get_exx_nao() const { return exx_nao; } + const Record_adj& + get_RA () const + { + return RA; + } + const Grid_Driver& + get_gd () const + { + return gd; + } + const Parallel_Orbitals& + get_pv () const + { + return pv; + } + const std::unique_ptr& + get_gint_info () const + { + return gint_info_; + } + const TwoCenterBundle& + get_two_center_bundle () const + { + return two_center_bundle_; + } + const rdmft::RDMFT& + get_rdmft_solver () const + { + return rdmft_solver; + } + const LCAO_Orbitals& + get_orb () const + { + return orb_; + } + const ModuleBase::matrix& + get_scs () const + { + return scs; + } + const Setup_DeePKS& + get_deepks () const + { + return deepks; + } + const Exx_NAO& + get_exx_nao () const + { + return exx_nao; + } }; } // namespace ModuleESolver #endif diff --git a/source/source_esolver/esolver_ks_lcao_tddft.cpp b/source/source_esolver/esolver_ks_lcao_tddft.cpp index 61e43797e13..8275c47ff86 100644 --- a/source/source_esolver/esolver_ks_lcao_tddft.cpp +++ b/source/source_esolver/esolver_ks_lcao_tddft.cpp @@ -21,7 +21,7 @@ namespace ModuleESolver { template -ESolver_KS_LCAO_TDDFT::ESolver_KS_LCAO_TDDFT() +ESolver_KS_LCAO_TDDFT::ESolver_KS_LCAO_TDDFT () { this->classname = "ESolver_rtTDDFT"; this->basisname = "LCAO"; @@ -29,254 +29,290 @@ ESolver_KS_LCAO_TDDFT::ESolver_KS_LCAO_TDDFT() // If the device is GPU, we must open use_tensor and use_lapack ct::DeviceType ct_device_type = ct::DeviceTypeToEnum::value; if (ct_device_type == ct::DeviceType::GpuDevice) - { - use_tensor = true; - if (PARAM.inp.ks_solver != "cusolvermp") { - use_lapack = true; + use_tensor = true; + if (PARAM.inp.ks_solver != "cusolvermp") + { + use_lapack = true; + } } - } } template -ESolver_KS_LCAO_TDDFT::~ESolver_KS_LCAO_TDDFT() +ESolver_KS_LCAO_TDDFT::~ESolver_KS_LCAO_TDDFT () { //************************************************* // Do not add any code in this destructor function //************************************************* if (psi_laststep != nullptr) - { - delete psi_laststep; - psi_laststep = nullptr; - } + { + delete psi_laststep; + psi_laststep = nullptr; + } if (td_p != nullptr) - { - delete td_p; - } + { + delete td_p; + } TD_info::td_vel_op = nullptr; if (td_mg_ != nullptr) - { - delete td_mg_; - td_mg_ = nullptr; - } + { + delete td_mg_; + td_mg_ = nullptr; + } } template -void ESolver_KS_LCAO_TDDFT::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_KS_LCAO_TDDFT::before_all_runners (UnitCell& ucell, const Input_para& inp) { // Run before_all_runners in ESolver_KS_LCAO - ESolver_KS_LCAO, TR>::before_all_runners(ucell, inp); + ESolver_KS_LCAO, TR>::before_all_runners (ucell, inp); - td_p = new TD_info(&ucell, this->pv, this->orb_); + td_p = new TD_info (&ucell, this->pv, this->orb_); TD_info::td_vel_op = td_p; totstep += TD_info::estep_shift; if (PARAM.inp.init_wfc == "file") - { - if (!ModuleIO::read_wfc_nao(PARAM.globalv.global_readin_dir, - this->pv, - *(this->psi), - this->pelec->ekb, - this->pelec->wg, - this->kv.ik2iktot, - this->kv.get_nkstot(), - PARAM.inp.nspin, - 0, - TD_info::estep_shift)) { - ModuleBase::WARNING_QUIT("ESolver_KS_LCAO_TDDFT", "Read electronic wavefunction from file failed!"); + if (!ModuleIO::read_wfc_nao (PARAM.globalv.global_readin_dir, + this->pv, + *(this->psi), + this->pelec->ekb, + this->pelec->wg, + this->kv.ik2iktot, + this->kv.get_nkstot (), + PARAM.inp.nspin, + 0, + TD_info::estep_shift)) + { + ModuleBase::WARNING_QUIT ("ESolver_KS_LCAO_TDDFT", + "Read electronic wavefunction from file failed!"); + } } - } } template -void ESolver_KS_LCAO_TDDFT::runner(UnitCell& ucell, const int istep) +void + ESolver_KS_LCAO_TDDFT::runner (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_KS_LCAO_TDDFT", "runner"); - ModuleBase::timer::start(this->classname, "runner"); + ModuleBase::TITLE ("ESolver_KS_LCAO_TDDFT", "runner"); + ModuleBase::timer::start (this->classname, "runner"); //---------------------------------------------------------------- // 1) before_scf (electronic iteration loops) //---------------------------------------------------------------- - this->before_scf(ucell, istep); // From ESolver_KS_LCAO + this->before_scf (ucell, istep); // From ESolver_KS_LCAO // Initialize the moving spatial gauge if (use_td_moving_gauge && this->td_mg_ == nullptr) - { - this->td_mg_ = new module_rt::TD_MovingGauge(); - auto* hamilt_lcao = dynamic_cast, TR>*>(this->p_hamilt); - const hamilt::HContainer* sR_template = hamilt_lcao->getSR(); - this->td_mg_->init_DR(sR_template, &ucell, &this->pv, this->two_center_bundle_.overlap_orb.get()); - } + { + this->td_mg_ = new module_rt::TD_MovingGauge (); + auto* hamilt_lcao = dynamic_cast, TR>*> (this->p_hamilt); + const hamilt::HContainer* sR_template = hamilt_lcao->getSR (); + this->td_mg_->init_DR (sR_template, &ucell, &this->pv, this->two_center_bundle_.overlap_orb.get ()); + } if (PARAM.inp.td_stype == 2) - { - this->dmat.dm->cal_DMR_td(ucell, TD_info::cart_At); - } + { + this->dmat.dm->cal_DMR_td (ucell, TD_info::cart_At); + } else - { - this->dmat.dm->cal_DMR(); - } - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT SCF"); + { + this->dmat.dm->cal_DMR (); + } + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT SCF"); // Initialize velocity operator for current calculation if (PARAM.inp.td_stype != 1 && TD_info::out_current == 1) - { - // initialize the velocity operator - velocity_mat = new Velocity_op(&ucell, - &(this->gd), - &this->pv, - this->orb_, - this->two_center_bundle_.overlap_orb.get()); - // calculate velocity operator - velocity_mat->calculate_grad_term(); - velocity_mat->calculate_vcomm_r(); - } + { + // initialize the velocity operator + velocity_mat = new Velocity_op (&ucell, + &(this->gd), + &this->pv, + this->orb_, + this->two_center_bundle_.overlap_orb.get ()); + // calculate velocity operator + velocity_mat->calculate_grad_term (); + velocity_mat->calculate_vcomm_r (); + } int estep_max = (istep == 0 && !PARAM.inp.mdp.md_restart) ? 1 : PARAM.inp.estep_per_md; // mohan change md_nstep from 0 to 1, 2026-01-04 if (PARAM.inp.mdp.md_nstep == 1) - { - estep_max = PARAM.inp.estep_per_md + 1; - } + { + estep_max = PARAM.inp.estep_per_md + 1; + } // Reset laststep matrix and wfc, if any atom cross the boundary // Apply a phase correction to H, S, and psi to keep consistency when atoms cross periodic boundaries const size_t len_hs_ik = use_tensor && use_lapack ? PARAM.globalv.nlocal * PARAM.globalv.nlocal : this->pv.nloc; - module_rt::reset_matrix_boundary(ucell, - this->kv, - &(this->pv), - this->Hk_laststep, - this->Sk_laststep, - this->psi_laststep, - len_hs_ik); + module_rt::reset_matrix_boundary (ucell, + this->kv, + &(this->pv), + this->Hk_laststep, + this->Sk_laststep, + this->psi_laststep, + len_hs_ik); for (int estep = 0; estep < estep_max; estep++) - { - // calculate total time step - this->totstep++; - this->print_step(); - // update At - if (PARAM.inp.td_stype > 0) - { - elecstate::H_TDDFT_pw::update_At(); - td_p->cal_cart_At(elecstate::H_TDDFT_pw::At); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Cartesian vector potential Ax(t)", TD_info::cart_At[0]); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Cartesian vector potential Ay(t)", TD_info::cart_At[1]); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Cartesian vector potential Az(t)", TD_info::cart_At[2]); - } - - if (estep != 0) - { - this->CE.update_all_dis(ucell); - this->CE.extrapolate_charge(&this->Pgrid, - ucell, - &this->chr, - &this->sf, - GlobalV::ofs_running, - GlobalV::ofs_warning); - this->exx_nao.before_scf(ucell, this->kv, this->orb_, this->p_chgmix, totstep, PARAM.inp); - this->pelec->init_scf(ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, ucell.symm); - - if (totstep <= PARAM.inp.td_tend + 1) - { - TD_info::evolve_once = true; - } - } - //---------------------------------------------------------------- - // 2) SCF iterations - //---------------------------------------------------------------- - bool conv_esolver = false; - this->niter = this->maxniter; - this->diag_ethr = PARAM.inp.pw_diag_thr; - for (int iter = 1; iter <= this->maxniter; ++iter) { - ModuleIO::write_head_td(GlobalV::ofs_running, istep, totstep, iter, this->basisname); - - // 3) Initialization of SCF iterations - this->iter_init(ucell, totstep, iter); // From ESolver_KS_LCAO - - // 4) Use Hamiltonian to obtain charge density - this->hamilt2rho(ucell, totstep, iter, this->diag_ethr); // From ESolver_KS - - // 5) Finish SCF iterations - this->iter_finish(ucell, totstep, estep, estep_max, iter, conv_esolver); + // calculate total time step + this->totstep++; + this->print_step (); + // update At + if (PARAM.inp.td_stype > 0) + { + elecstate::H_TDDFT_pw::update_At (); + td_p->cal_cart_At (elecstate::H_TDDFT_pw::At); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, + "Cartesian vector potential Ax(t)", + TD_info::cart_At[0]); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, + "Cartesian vector potential Ay(t)", + TD_info::cart_At[1]); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, + "Cartesian vector potential Az(t)", + TD_info::cart_At[2]); + } - // 6) Check convergence - if (conv_esolver || this->oscillate_esolver) - { - this->niter = iter; - if (this->oscillate_esolver) + if (estep != 0) { - std::cout << " !! Density oscillation is found, STOP HERE !!" << std::endl; + this->CE.update_all_dis (ucell); + this->CE.extrapolate_charge (&this->Pgrid, + ucell, + &this->chr, + &this->sf, + GlobalV::ofs_running, + GlobalV::ofs_warning); + this->exx_nao.before_scf (ucell, this->kv, this->orb_, this->p_chgmix, totstep, PARAM.inp); + this->pelec->init_scf (ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, ucell.symm); + + if (totstep <= PARAM.inp.td_tend + 1) + { + TD_info::evolve_once = true; + } + } + //---------------------------------------------------------------- + // 2) SCF iterations + //---------------------------------------------------------------- + bool conv_esolver = false; + this->niter = this->maxniter; + this->diag_ethr = PARAM.inp.pw_diag_thr; + for (int iter = 1; iter <= this->maxniter; ++iter) + { + ModuleIO::write_head_td (GlobalV::ofs_running, istep, totstep, iter, this->basisname); + + // 3) Initialization of SCF iterations + this->iter_init (ucell, totstep, iter); // From ESolver_KS_LCAO + + // 4) Use Hamiltonian to obtain charge density + this->hamilt2rho (ucell, totstep, iter, this->diag_ethr); // From ESolver_KS + + // 5) Finish SCF iterations + this->iter_finish (ucell, totstep, estep, estep_max, iter, conv_esolver); + + // 6) Check convergence + if (conv_esolver || this->oscillate_esolver) + { + this->niter = iter; + if (this->oscillate_esolver) + { + std::cout << " !! Density oscillation is found, STOP HERE !!" << std::endl; + } + break; + } + } // end SCF iterations + + //---------------------------------------------------------------- + // 7) after_scf + //---------------------------------------------------------------- + this->after_scf (ucell, totstep, conv_esolver); + if (!restart_done && PARAM.inp.mdp.md_restart) + { + restart_done = true; + estep += TD_info::estep_shift % PARAM.inp.estep_per_md; + if (estep == 0) + { + break; + } + // mohan add 2026-01-04, change md_nstep!=0 to md_nstep!=1 + if (PARAM.inp.mdp.md_nstep != 1) + { + estep -= 1; + } } - break; - } - } // end SCF iterations - - //---------------------------------------------------------------- - // 7) after_scf - //---------------------------------------------------------------- - this->after_scf(ucell, totstep, conv_esolver); - if (!restart_done && PARAM.inp.mdp.md_restart) - { - restart_done = true; - estep += TD_info::estep_shift % PARAM.inp.estep_per_md; - if (estep == 0) - { - break; - } - // mohan add 2026-01-04, change md_nstep!=0 to md_nstep!=1 - if (PARAM.inp.mdp.md_nstep != 1) - { - estep -= 1; - } } - } if (PARAM.inp.td_stype != 1 && TD_info::out_current == 1) - { - delete velocity_mat; - } + { + delete velocity_mat; + } - ModuleBase::timer::end(this->classname, "runner"); + ModuleBase::timer::end (this->classname, "runner"); return; } // Output electronic step information template -void ESolver_KS_LCAO_TDDFT::print_step() +void + ESolver_KS_LCAO_TDDFT::print_step () { std::cout << " -------------------------------------------" << std::endl; - std::cout << " STEP OF ELECTRON EVOLVE : " << unsigned(totstep) << std::endl; + std::cout << " STEP OF ELECTRON EVOLVE : " << unsigned (totstep) << std::endl; std::cout << " -------------------------------------------" << std::endl; } template -void ESolver_KS_LCAO_TDDFT::hamilt2rho_single(UnitCell& ucell, +void + ESolver_KS_LCAO_TDDFT::hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) { // Update the moving spatial gauge if (use_td_moving_gauge) - { - auto* hamilt_lcao = dynamic_cast, TR>*>(this->p_hamilt); - const hamilt::HContainer* sR_template = hamilt_lcao->getSR(); - this->td_mg_->update_DR(sR_template, &ucell, &this->pv, this->two_center_bundle_.overlap_orb.get()); - } + { + auto* hamilt_lcao = dynamic_cast, TR>*> (this->p_hamilt); + const hamilt::HContainer* sR_template = hamilt_lcao->getSR (); + this->td_mg_->update_DR (sR_template, &ucell, &this->pv, this->two_center_bundle_.overlap_orb.get ()); + } if (PARAM.inp.init_wfc == "file") - { - if (istep >= TD_info::estep_shift + 1) { - module_rt::Evolve_elec::solve_psi( + if (istep >= TD_info::estep_shift + 1) + { + module_rt::Evolve_elec::solve_psi ( + istep, + PARAM.inp.nbands, + PARAM.globalv.nlocal, + this->kv.get_nks (), + static_cast>*> (this->p_hamilt), + this->pv, + this->psi, + this->psi_laststep, + this->Hk_laststep, + this->Sk_laststep, + this->pelec->ekb, + GlobalV::ofs_running, + PARAM.inp.propagator, + use_tensor, + use_lapack, + this->td_mg_, + &ucell, + this->kv.kvec_d, + use_td_moving_gauge); + } + this->weight_dm_rho (ucell); + } + else if (istep >= 1) + { + module_rt::Evolve_elec::solve_psi ( istep, PARAM.inp.nbands, PARAM.globalv.nlocal, - this->kv.get_nks(), - static_cast>*>(this->p_hamilt), + this->kv.get_nks (), + static_cast>*> (this->p_hamilt), this->pv, this->psi, this->psi_laststep, @@ -291,69 +327,46 @@ void ESolver_KS_LCAO_TDDFT::hamilt2rho_single(UnitCell& ucell, &ucell, this->kv.kvec_d, use_td_moving_gauge); + this->weight_dm_rho (ucell); } - this->weight_dm_rho(ucell); - } - else if (istep >= 1) - { - module_rt::Evolve_elec::solve_psi(istep, - PARAM.inp.nbands, - PARAM.globalv.nlocal, - this->kv.get_nks(), - static_cast>*>(this->p_hamilt), - this->pv, - this->psi, - this->psi_laststep, - this->Hk_laststep, - this->Sk_laststep, - this->pelec->ekb, - GlobalV::ofs_running, - PARAM.inp.propagator, - use_tensor, - use_lapack, - this->td_mg_, - &ucell, - this->kv.kvec_d, - use_td_moving_gauge); - this->weight_dm_rho(ucell); - } else - { - // For the first step, do normal SCF calculation to get initial state - this->pelec->f_en.eband = 0.0; - this->pelec->f_en.demet = 0.0; - if (this->psi != nullptr) { - bool skip_charge = PARAM.inp.calculation == "nscf" ? true : false; - hsolver::HSolverLCAO> hsolver_lcao_obj(&this->pv, PARAM.inp.ks_solver); - hsolver_lcao_obj.solve(static_cast>*>(this->p_hamilt), - this->psi[0], - this->pelec, - *this->dmat.dm, - this->chr, - PARAM.inp.nspin, - skip_charge); + // For the first step, do normal SCF calculation to get initial state + this->pelec->f_en.eband = 0.0; + this->pelec->f_en.demet = 0.0; + if (this->psi != nullptr) + { + bool skip_charge = PARAM.inp.calculation == "nscf" ? true : false; + hsolver::HSolverLCAO> hsolver_lcao_obj (&this->pv, PARAM.inp.ks_solver); + hsolver_lcao_obj.solve (static_cast>*> (this->p_hamilt), + this->psi[0], + this->pelec, + *this->dmat.dm, + this->chr, + PARAM.inp.nspin, + skip_charge); + } } - } // Symmetrize the charge density only for ground state if (istep <= 1) - { - Symmetry_rho::symmetrize_rho(PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); - } + { + Symmetry_rho::symmetrize_rho (PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); + } #ifdef __EXX if (GlobalC::exx_info.info_ri.real_number) - this->exx_nao.exd->exx_hamilt2rho(*this->pelec, this->pv, iter); + this->exx_nao.exd->exx_hamilt2rho (*this->pelec, this->pv, iter); else - this->exx_nao.exc->exx_hamilt2rho(*this->pelec, this->pv, iter); + this->exx_nao.exc->exx_hamilt2rho (*this->pelec, this->pv, iter); #endif // Calculate delta energy - this->pelec->f_en.deband = this->pelec->cal_delta_eband(ucell); + this->pelec->f_en.deband = this->pelec->cal_delta_eband (ucell); } template -void ESolver_KS_LCAO_TDDFT::iter_finish(UnitCell& ucell, +void + ESolver_KS_LCAO_TDDFT::iter_finish (UnitCell& ucell, const int istep, const int estep, const int estep_max, @@ -362,53 +375,54 @@ void ESolver_KS_LCAO_TDDFT::iter_finish(UnitCell& ucell, { // Print occupation of each band if (iter == 1 && istep <= 2) - { - GlobalV::ofs_running << " k-point State Occupations" << std::endl; - GlobalV::ofs_running << std::setiosflags(std::ios::showpoint); - GlobalV::ofs_running << std::left; - std::setprecision(6); - for (int ik = 0; ik < this->kv.get_nks(); ik++) { - for (int ib = 0; ib < PARAM.inp.nbands; ib++) - { - GlobalV::ofs_running << " " << std::setw(9) << ik + 1 << std::setw(8) << ib + 1 << std::setw(12) - << this->pelec->wg(ik, ib) << std::endl; - } + GlobalV::ofs_running << " k-point State Occupations" << std::endl; + GlobalV::ofs_running << std::setiosflags (std::ios::showpoint); + GlobalV::ofs_running << std::left; + std::setprecision (6); + for (int ik = 0; ik < this->kv.get_nks (); ik++) + { + for (int ib = 0; ib < PARAM.inp.nbands; ib++) + { + GlobalV::ofs_running << " " << std::setw (9) << ik + 1 << std::setw (8) << ib + 1 + << std::setw (12) << this->pelec->wg (ik, ib) << std::endl; + } + } + GlobalV::ofs_running << std::endl; } - GlobalV::ofs_running << std::endl; - } - ESolver_KS_LCAO, TR>::iter_finish(ucell, istep, iter, conv_esolver); + ESolver_KS_LCAO, TR>::iter_finish (ucell, istep, iter, conv_esolver); // Store wave function, Hamiltonian and Overlap matrix, to be used in next time step // Store when converged or reach max iteration bool force_save = conv_esolver || (iter == this->maxniter); - this->store_h_s_psi(ucell, istep, iter, force_save); + this->store_h_s_psi (ucell, istep, iter, force_save); // Calculate energy-density matrix for RT-TDDFT if (conv_esolver && estep == estep_max - 1 && istep >= (PARAM.inp.init_wfc == "file" ? 0 : 1) && PARAM.inp.td_edm == 0) - { - if (use_tensor && use_lapack) - { - elecstate::cal_edm_tddft_tensor_lapack( - this->pv, - this->dmat, - this->kv, - static_cast>*>(this->p_hamilt)); - } - else { - elecstate::cal_edm_tddft(this->pv, - this->dmat, - this->kv, - static_cast>*>(this->p_hamilt)); + if (use_tensor && use_lapack) + { + elecstate::cal_edm_tddft_tensor_lapack ( + this->pv, + this->dmat, + this->kv, + static_cast>*> (this->p_hamilt)); + } + else + { + elecstate::cal_edm_tddft (this->pv, + this->dmat, + this->kv, + static_cast>*> (this->p_hamilt)); + } } - } } template -void ESolver_KS_LCAO_TDDFT::store_h_s_psi(UnitCell& ucell, +void + ESolver_KS_LCAO_TDDFT::store_h_s_psi (UnitCell& ucell, const int istep, const int iter, const bool conv_esolver) @@ -418,190 +432,197 @@ void ESolver_KS_LCAO_TDDFT::store_h_s_psi(UnitCell& ucell, // Store wave function, Hamiltonian and Overlap matrix if (conv_esolver) - { - if (this->psi_laststep == nullptr) { - this->psi_laststep = new psi::Psi>(this->kv.get_nks(), + if (this->psi_laststep == nullptr) + { + this->psi_laststep = new psi::Psi> (this->kv.get_nks (), #ifdef __MPI - this->pv.ncol_bands, - this->pv.nrow, + this->pv.ncol_bands, + this->pv.nrow, #else - nbands, - nlocal, + nbands, + nlocal, #endif - this->kv.ngk, - true); - } - - // Length of Hk_laststep and Sk_laststep, nlocal * nlocal for global, nloc for local - const int len_HS_ik = use_tensor && use_lapack ? nlocal * nlocal : this->pv.nloc; - const int len_HS_all = this->kv.get_nks() * len_HS_ik; - - // Allocate memory for Hk_laststep, if (use_tensor && use_lapack), should be global - if (this->Hk_laststep.NumElements() != len_HS_all) - { - this->Hk_laststep = ct::Tensor(ct::DataType::DT_COMPLEX_DOUBLE, - ct_device_type_hs, - ct::TensorShape({this->kv.get_nks(), len_HS_ik})); - this->Hk_laststep.zero(); - } - - // Allocate memory for Sk_laststep, if (use_tensor && use_lapack), should be global - if (this->Sk_laststep.NumElements() != len_HS_all) - { - this->Sk_laststep = ct::Tensor(ct::DataType::DT_COMPLEX_DOUBLE, - ct_device_type_hs, - ct::TensorShape({this->kv.get_nks(), len_HS_ik})); - this->Sk_laststep.zero(); - } - - // Put information into psi_laststep, Hk_laststep and Sk_laststep - for (int ik = 0; ik < this->kv.get_nks(); ++ik) - { - this->psi->fix_k(ik); - this->psi_laststep->fix_k(ik); - - // Copy data from psi to psi_laststep at k-point ik - const int len_psi_ik = this->psi->get_nbands() * this->psi->get_nbasis(); - for (int index = 0; index < len_psi_ik; ++index) - { - psi_laststep[0].get_pointer()[index] = this->psi[0].get_pointer()[index]; - } - - // Get H and S matrices at k-point ik - this->p_hamilt->updateHk(ik); - hamilt::MatrixBlock> h_mat; - hamilt::MatrixBlock> s_mat; - static_cast>*>(this->p_hamilt)->matrix(h_mat, s_mat); - - // Store H and S matrices to Hk_laststep and Sk_laststep - if (use_tensor && use_lapack) - { -#ifdef __MPI - int myid = 0; - int num_procs = 1; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - std::complex* h_ptr = nullptr; - std::complex* s_ptr = nullptr; + this->kv.ngk, + true); + } - // Define containers for gathered data (only needed for multi-process) - module_rt::Matrix_g> h_mat_g; - module_rt::Matrix_g> s_mat_g; + // Length of Hk_laststep and Sk_laststep, nlocal * nlocal for global, nloc for local + const int len_HS_ik = use_tensor && use_lapack ? nlocal * nlocal : this->pv.nloc; + const int len_HS_all = this->kv.get_nks () * len_HS_ik; - if (num_procs == 1) + // Allocate memory for Hk_laststep, if (use_tensor && use_lapack), should be global + if (this->Hk_laststep.NumElements () != len_HS_all) { - // Single process: directly point to local data without gather - h_ptr = h_mat.p; - s_ptr = s_mat.p; + this->Hk_laststep = ct::Tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type_hs, + ct::TensorShape ({this->kv.get_nks (), len_HS_ik})); + this->Hk_laststep.zero (); } - else + + // Allocate memory for Sk_laststep, if (use_tensor && use_lapack), should be global + if (this->Sk_laststep.NumElements () != len_HS_all) { - // Multiple processes: gather data to the root process (myid == 0) and point to the gathered data - module_rt::gatherMatrix(myid, 0, h_mat, h_mat_g); - module_rt::gatherMatrix(myid, 0, s_mat, s_mat_g); - if (myid == 0) - { - h_ptr = h_mat_g.p.get(); - s_ptr = s_mat_g.p.get(); - } + this->Sk_laststep = ct::Tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type_hs, + ct::TensorShape ({this->kv.get_nks (), len_HS_ik})); + this->Sk_laststep.zero (); } - // Only the root process (myid == 0) performs the copy - if (myid == 0 && h_ptr != nullptr && s_ptr != nullptr) + // Put information into psi_laststep, Hk_laststep and Sk_laststep + for (int ik = 0; ik < this->kv.get_nks (); ++ik) { - BlasConnector::copy(len_HS_ik, - h_ptr, - 1, - this->Hk_laststep.template data>() + ik * len_HS_ik, - 1); - BlasConnector::copy(len_HS_ik, - s_ptr, - 1, - this->Sk_laststep.template data>() + ik * len_HS_ik, - 1); - } + this->psi->fix_k (ik); + this->psi_laststep->fix_k (ik); + + // Copy data from psi to psi_laststep at k-point ik + const int len_psi_ik = this->psi->get_nbands () * this->psi->get_nbasis (); + for (int index = 0; index < len_psi_ik; ++index) + { + psi_laststep[0].get_pointer ()[index] = this->psi[0].get_pointer ()[index]; + } + + // Get H and S matrices at k-point ik + this->p_hamilt->updateHk (ik); + hamilt::MatrixBlock> h_mat; + hamilt::MatrixBlock> s_mat; + static_cast>*> (this->p_hamilt)->matrix (h_mat, s_mat); + + // Store H and S matrices to Hk_laststep and Sk_laststep + if (use_tensor && use_lapack) + { +#ifdef __MPI + int myid = 0; + int num_procs = 1; + MPI_Comm_rank (MPI_COMM_WORLD, &myid); + MPI_Comm_size (MPI_COMM_WORLD, &num_procs); + + std::complex* h_ptr = nullptr; + std::complex* s_ptr = nullptr; + + // Define containers for gathered data (only needed for multi-process) + module_rt::Matrix_g> h_mat_g; + module_rt::Matrix_g> s_mat_g; + + if (num_procs == 1) + { + // Single process: directly point to local data without gather + h_ptr = h_mat.p; + s_ptr = s_mat.p; + } + else + { + // Multiple processes: gather data to the root process (myid == 0) and point to the + // gathered data + module_rt::gatherMatrix (myid, 0, h_mat, h_mat_g); + module_rt::gatherMatrix (myid, 0, s_mat, s_mat_g); + if (myid == 0) + { + h_ptr = h_mat_g.p.get (); + s_ptr = s_mat_g.p.get (); + } + } + + // Only the root process (myid == 0) performs the copy + if (myid == 0 && h_ptr != nullptr && s_ptr != nullptr) + { + BlasConnector::copy (len_HS_ik, + h_ptr, + 1, + this->Hk_laststep.template data> () + + ik * len_HS_ik, + 1); + BlasConnector::copy (len_HS_ik, + s_ptr, + 1, + this->Sk_laststep.template data> () + + ik * len_HS_ik, + 1); + } #endif - } - else - { - BlasConnector::copy(len_HS_ik, - h_mat.p, - 1, - this->Hk_laststep.template data>() + ik * len_HS_ik, - 1); - BlasConnector::copy(len_HS_ik, - s_mat.p, - 1, - this->Sk_laststep.template data>() + ik * len_HS_ik, - 1); - } // end use_tensor - } // end ik - } // conv_esolver + } + else + { + BlasConnector::copy (len_HS_ik, + h_mat.p, + 1, + this->Hk_laststep.template data> () + + ik * len_HS_ik, + 1); + BlasConnector::copy (len_HS_ik, + s_mat.p, + 1, + this->Sk_laststep.template data> () + + ik * len_HS_ik, + 1); + } // end use_tensor + } // end ik + } // conv_esolver } template -void ESolver_KS_LCAO_TDDFT::after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) +void + ESolver_KS_LCAO_TDDFT::after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) { - ModuleBase::TITLE("ESolver_LCAO_TDDFT", "after_scf"); - ModuleBase::timer::start(this->classname, "after_scf"); + ModuleBase::TITLE ("ESolver_LCAO_TDDFT", "after_scf"); + ModuleBase::timer::start (this->classname, "after_scf"); - ESolver_KS_LCAO, TR>::after_scf(ucell, istep, conv_esolver); + ESolver_KS_LCAO, TR>::after_scf (ucell, istep, conv_esolver); // Output energy for sub-loop (electronic step) - std::cout << " Potential (Ry): " << std::setprecision(15) << this->pelec->f_en.etot << std::endl; + std::cout << " Potential (Ry): " << std::setprecision (15) << this->pelec->f_en.etot << std::endl; // Output dipole, current, etc. - auto* hamilt_lcao = dynamic_cast, TR>*>(this->p_hamilt); - ModuleIO::ctrl_output_td(ucell, - this->chr.rho_save, - this->chr.rhopw, - istep, - this->psi, - this->pelec, - this->kv, - this->two_center_bundle_.overlap_orb.get(), - &this->pv, - this->orb_, - this->velocity_mat, - this->gd, - hamilt_lcao, - this->RA, - this->td_p, - this->exx_nao); - - ModuleBase::timer::end(this->classname, "after_scf"); + auto* hamilt_lcao = dynamic_cast, TR>*> (this->p_hamilt); + ModuleIO::ctrl_output_td (ucell, + this->chr.rho_save, + this->chr.rhopw, + istep, + this->psi, + this->pelec, + this->kv, + this->two_center_bundle_.overlap_orb.get (), + &this->pv, + this->orb_, + this->velocity_mat, + this->gd, + hamilt_lcao, + this->RA, + this->td_p, + this->exx_nao); + + ModuleBase::timer::end (this->classname, "after_scf"); } template -void ESolver_KS_LCAO_TDDFT::weight_dm_rho(const UnitCell& ucell) +void + ESolver_KS_LCAO_TDDFT::weight_dm_rho (const UnitCell& ucell) { if (PARAM.inp.ocp == 1) - { - elecstate::fixed_weights(PARAM.inp.ocp_kb, - PARAM.inp.nbands, - PARAM.inp.nelec, - this->pelec->klist, - this->pelec->wg, - this->pelec->skip_weights); - } + { + elecstate::fixed_weights (PARAM.inp.ocp_kb, + PARAM.inp.nbands, + PARAM.inp.nelec, + this->pelec->klist, + this->pelec->wg, + this->pelec->skip_weights); + } // Calculate Eband energy - elecstate::calEBand(this->pelec->ekb, this->pelec->wg, this->pelec->f_en); + elecstate::calEBand (this->pelec->ekb, this->pelec->wg, this->pelec->f_en); - elecstate::cal_dm_psi(this->dmat.dm->get_paraV_pointer(), this->pelec->wg, this->psi[0], *this->dmat.dm); + elecstate::cal_dm_psi (this->dmat.dm->get_paraV_pointer (), this->pelec->wg, this->psi[0], *this->dmat.dm); if (PARAM.inp.td_stype == 2) - { - this->dmat.dm->cal_DMR_td(ucell, TD_info::cart_At); - } + { + this->dmat.dm->cal_DMR_td (ucell, TD_info::cart_At); + } else - { - this->dmat.dm->cal_DMR(); - } + { + this->dmat.dm->cal_DMR (); + } // get the real-space charge density, mohan add 2025-10-24 - LCAO_domain::dm2rho(this->dmat.dm->get_DMR_vector(), PARAM.inp.nspin, &this->chr); + LCAO_domain::dm2rho (this->dmat.dm->get_DMR_vector (), PARAM.inp.nspin, &this->chr); } template class ESolver_KS_LCAO_TDDFT; diff --git a/source/source_esolver/esolver_ks_lcao_tddft.h b/source/source_esolver/esolver_ks_lcao_tddft.h index 07b049b9fbb..8dd609cf9b2 100644 --- a/source/source_esolver/esolver_ks_lcao_tddft.h +++ b/source/source_esolver/esolver_ks_lcao_tddft.h @@ -15,38 +15,38 @@ template class ESolver_KS_LCAO_TDDFT : public ESolver_KS_LCAO, TR> { public: - ESolver_KS_LCAO_TDDFT(); + ESolver_KS_LCAO_TDDFT (); - ~ESolver_KS_LCAO_TDDFT(); + ~ESolver_KS_LCAO_TDDFT (); - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; protected: - virtual void runner(UnitCell& cell, const int istep) override; + virtual void runner (UnitCell& cell, const int istep) override; - virtual void hamilt2rho_single(UnitCell& ucell, const int istep, const int iter, const double ethr) override; + virtual void hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) override; - void store_h_s_psi(UnitCell& ucell, const int istep, const int iter, const bool conv_esolver); + void store_h_s_psi (UnitCell& ucell, const int istep, const int iter, const bool conv_esolver); - void iter_finish(UnitCell& ucell, - const int istep, - const int estep, - const int estep_max, - int& iter, - bool& conv_esolver); + void iter_finish (UnitCell& ucell, + const int istep, + const int estep, + const int estep_max, + int& iter, + bool& conv_esolver); - virtual void after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) override; + virtual void after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) override; - void print_step(); + void print_step (); //! Wave function for all k-points of last time step psi::Psi>* psi_laststep = nullptr; //! Hamiltonian for all k-points of last time step - ct::Tensor Hk_laststep = ct::Tensor(ct::DataType::DT_COMPLEX_DOUBLE); + ct::Tensor Hk_laststep = ct::Tensor (ct::DataType::DT_COMPLEX_DOUBLE); //! Overlap matrix for all k-points of last time step - ct::Tensor Sk_laststep = ct::Tensor(ct::DataType::DT_COMPLEX_DOUBLE); + ct::Tensor Sk_laststep = ct::Tensor (ct::DataType::DT_COMPLEX_DOUBLE); //! Control heterogeneous computing of the TDDFT solver bool use_tensor = false; @@ -73,7 +73,7 @@ class ESolver_KS_LCAO_TDDFT : public ESolver_KS_LCAO, TR> bool restart_done = false; private: - void weight_dm_rho(const UnitCell& ucell); + void weight_dm_rho (const UnitCell& ucell); }; } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_ks_lcaopw.cpp b/source/source_esolver/esolver_ks_lcaopw.cpp index c6154ab5b8b..3ea46c0cf90 100644 --- a/source/source_esolver/esolver_ks_lcaopw.cpp +++ b/source/source_esolver/esolver_ks_lcaopw.cpp @@ -42,222 +42,241 @@ namespace ModuleESolver { - template - ESolver_KS_LIP::ESolver_KS_LIP() - { - this->classname = "ESolver_KS_LIP"; - this->basisname = "LIP"; - } - template - ESolver_KS_LIP::~ESolver_KS_LIP() - { - //**************************************************** - // do not add any codes in this deconstructor funcion - //**************************************************** - delete this->psi_local; - // delete Hamilt - if (this->p_hamilt != nullptr) +template +ESolver_KS_LIP::ESolver_KS_LIP () +{ + this->classname = "ESolver_KS_LIP"; + this->basisname = "LIP"; +} +template +ESolver_KS_LIP::~ESolver_KS_LIP () +{ + //**************************************************** + // do not add any codes in this deconstructor funcion + //**************************************************** + delete this->psi_local; + // delete Hamilt + if (this->p_hamilt != nullptr) { delete this->p_hamilt; this->p_hamilt = nullptr; } - } +} - template - void ESolver_KS_LIP::allocate_hamilt(const UnitCell& ucell) - { - this->p_hamilt = new hamilt::HamiltLIP(this->pelec->pot, this->pw_wfc, &this->kv, &this->ppcell, &ucell +template +void + ESolver_KS_LIP::allocate_hamilt (const UnitCell& ucell) +{ + this->p_hamilt = new hamilt::HamiltLIP (this->pelec->pot, + this->pw_wfc, + &this->kv, + &this->ppcell, + &ucell #ifdef __EXX - , *this->exx_lip + , + *this->exx_lip #endif - ); - } + ); +} - template - void ESolver_KS_LIP::before_scf(UnitCell& ucell, const int istep) - { - ESolver_KS_PW::before_scf(ucell, istep); - auto* p_psi_init = static_cast*>(this->stp.p_psi_init); - p_psi_init->initialize_lcao_in_pw(this->psi_local, GlobalV::ofs_running); - } +template +void + ESolver_KS_LIP::before_scf (UnitCell& ucell, const int istep) +{ + ESolver_KS_PW::before_scf (ucell, istep); + auto* p_psi_init = static_cast*> (this->stp.p_psi_init); + p_psi_init->initialize_lcao_in_pw (this->psi_local, GlobalV::ofs_running); +} - template - void ESolver_KS_LIP::before_all_runners(UnitCell& ucell, const Input_para& inp) - { - ESolver_KS_PW::before_all_runners(ucell, inp); - auto* p_psi_init = static_cast*>(this->stp.p_psi_init); - delete this->psi_local; - this->psi_local = new psi::Psi(this->stp.psi_cpu->get_nk(), - p_psi_init->psi_initer->nbands_start(), - this->stp.psi_cpu->get_nbasis(), - this->kv.ngk, - true); +template +void + ESolver_KS_LIP::before_all_runners (UnitCell& ucell, const Input_para& inp) +{ + ESolver_KS_PW::before_all_runners (ucell, inp); + auto* p_psi_init = static_cast*> (this->stp.p_psi_init); + delete this->psi_local; + this->psi_local = new psi::Psi (this->stp.psi_cpu->get_nk (), + p_psi_init->psi_initer->nbands_start (), + this->stp.psi_cpu->get_nbasis (), + this->kv.ngk, + true); #ifdef __EXX - if (inp.calculation == "scf" || inp.calculation == "relax" - || inp.calculation == "cell-relax" - || inp.calculation == "md") { + if (inp.calculation == "scf" || inp.calculation == "relax" || inp.calculation == "cell-relax" + || inp.calculation == "md") + { if (GlobalC::exx_info.info_global.cal_exx) - { - XC_Functional::set_xc_first_loop(ucell); - this->exx_lip = std::unique_ptr>(new Exx_Lip(GlobalC::exx_info.info_lip, - ucell.symm, - &this->kv, - this->psi_local, - this->stp.template get_psi_t(), - this->pw_wfc, - this->pw_rho, - this->sf, - &ucell, - this->pelec)); - } -} + { + XC_Functional::set_xc_first_loop (ucell); + this->exx_lip = std::unique_ptr> ( + new Exx_Lip (GlobalC::exx_info.info_lip, + ucell.symm, + &this->kv, + this->psi_local, + this->stp.template get_psi_t (), + this->pw_wfc, + this->pw_rho, + this->sf, + &ucell, + this->pelec)); + } + } #endif - } +} - template - void ESolver_KS_LIP::iter_init(UnitCell& ucell, const int istep, const int iter) - { - ESolver_KS_PW::iter_init(ucell, istep, iter); +template +void + ESolver_KS_LIP::iter_init (UnitCell& ucell, const int istep, const int iter) +{ + ESolver_KS_PW::iter_init (ucell, istep, iter); #ifdef __EXX - if (GlobalC::exx_info.info_global.cal_exx && !GlobalC::exx_info.info_global.separate_loop && this->two_level_step) { - this->exx_lip->cal_exx(); -} + if (GlobalC::exx_info.info_global.cal_exx && !GlobalC::exx_info.info_global.separate_loop && this->two_level_step) + { + this->exx_lip->cal_exx (); + } #endif - } +} - template - void ESolver_KS_LIP::hamilt2rho_single(UnitCell& ucell, const int istep, const int iter, const double ethr) - { - ModuleBase::TITLE("ESolver_KS_LIP", "hamilt2rho_single"); - ModuleBase::timer::start("ESolver_KS_LIP", "hamilt2rho_single"); +template +void + ESolver_KS_LIP::hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) +{ + ModuleBase::TITLE ("ESolver_KS_LIP", "hamilt2rho_single"); + ModuleBase::timer::start ("ESolver_KS_LIP", "hamilt2rho_single"); - // reset energy - this->pelec->f_en.eband = 0.0; - this->pelec->f_en.demet = 0.0; - // choose if psi should be diag in subspace - // be careful that istep start from 0 and iter start from 1 - // if (iter == 1) - hsolver::DiagoIterAssist::need_subspace = ((istep == 0 || istep == 1) && iter == 1) ? false : true; - hsolver::DiagoIterAssist::SCF_ITER = iter; - hsolver::DiagoIterAssist::PW_DIAG_THR = ethr; - hsolver::DiagoIterAssist::PW_DIAG_NMAX = PARAM.inp.pw_diag_nmax; - bool skip_charge = PARAM.inp.calculation == "nscf" ? true : false; + // reset energy + this->pelec->f_en.eband = 0.0; + this->pelec->f_en.demet = 0.0; + // choose if psi should be diag in subspace + // be careful that istep start from 0 and iter start from 1 + // if (iter == 1) + hsolver::DiagoIterAssist::need_subspace = ((istep == 0 || istep == 1) && iter == 1) ? false : true; + hsolver::DiagoIterAssist::SCF_ITER = iter; + hsolver::DiagoIterAssist::PW_DIAG_THR = ethr; + hsolver::DiagoIterAssist::PW_DIAG_NMAX = PARAM.inp.pw_diag_nmax; + bool skip_charge = PARAM.inp.calculation == "nscf" ? true : false; - hsolver::HSolverLIP hsolver_lip_obj(this->pw_wfc); - hsolver_lip_obj.solve(static_cast*>(this->p_hamilt), *this->stp.template get_psi_t(), this->pelec, - *this->psi_local, skip_charge,ucell.tpiba,ucell.nat); + hsolver::HSolverLIP hsolver_lip_obj (this->pw_wfc); + hsolver_lip_obj.solve (static_cast*> (this->p_hamilt), + *this->stp.template get_psi_t (), + this->pelec, + *this->psi_local, + skip_charge, + ucell.tpiba, + ucell.nat); - // add exx + // add exx #ifdef __EXX - if (GlobalC::exx_info.info_global.cal_exx) + if (GlobalC::exx_info.info_global.cal_exx) { - this->pelec->set_exx(this->exx_lip->get_exx_energy()); // Peize Lin add 2019-03-09 + this->pelec->set_exx (this->exx_lip->get_exx_energy ()); // Peize Lin add 2019-03-09 } #endif - Symmetry_rho::symmetrize_rho(PARAM.inp.nspin, this->chr, this->pw_rhod, ucell.symm); + Symmetry_rho::symmetrize_rho (PARAM.inp.nspin, this->chr, this->pw_rhod, ucell.symm); - // deband is calculated from "output" charge density calculated - // in sum_band - // need 'rho(out)' and 'vr (v_h(in) and v_xc(in))' - this->pelec->f_en.deband = this->pelec->cal_delta_eband(ucell); + // deband is calculated from "output" charge density calculated + // in sum_band + // need 'rho(out)' and 'vr (v_h(in) and v_xc(in))' + this->pelec->f_en.deband = this->pelec->cal_delta_eband (ucell); - ModuleBase::timer::end("ESolver_KS_LIP", "hamilt2rho_single"); - } + ModuleBase::timer::end ("ESolver_KS_LIP", "hamilt2rho_single"); +} - template - void ESolver_KS_LIP::iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) - { - ESolver_KS_PW::iter_finish(ucell, istep, iter, conv_esolver); +template +void + ESolver_KS_LIP::iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) +{ + ESolver_KS_PW::iter_finish (ucell, istep, iter, conv_esolver); #ifdef __EXX - if (GlobalC::exx_info.info_global.cal_exx && conv_esolver) + if (GlobalC::exx_info.info_global.cal_exx && conv_esolver) { // no separate_loop case if (!GlobalC::exx_info.info_global.separate_loop) - { - GlobalC::exx_info.info_global.hybrid_step = 1; + { + GlobalC::exx_info.info_global.hybrid_step = 1; - // in no_separate_loop case, scf loop only did twice - // in first scf loop, exx updated once in beginning, - // in second scf loop, exx updated every iter + // in no_separate_loop case, scf loop only did twice + // in first scf loop, exx updated once in beginning, + // in second scf loop, exx updated every iter - if (!this->two_level_step) - { - // update exx and redo scf - XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); - iter = 0; - std::cout << " Entering 2nd SCF, where EXX is updated" << std::endl; - this->two_level_step++; - conv_esolver = false; + if (!this->two_level_step) + { + // update exx and redo scf + XC_Functional::set_xc_type (ucell.atoms[0].ncpp.xc_func); + iter = 0; + std::cout << " Entering 2nd SCF, where EXX is updated" << std::endl; + this->two_level_step++; + conv_esolver = false; + } } - } // has separate_loop case // exx converged or get max exx steps else if (this->two_level_step == GlobalC::exx_info.info_global.hybrid_step || (iter == 1 && this->two_level_step != 0)) - { - conv_esolver = true; - } - else - { - // update exx and redo scf - if (this->two_level_step == 0) { - XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); + conv_esolver = true; } + else + { + // update exx and redo scf + if (this->two_level_step == 0) + { + XC_Functional::set_xc_type (ucell.atoms[0].ncpp.xc_func); + } - std::cout << " Updating EXX " << std::flush; - timeval t_start; - gettimeofday(&t_start, nullptr); + std::cout << " Updating EXX " << std::flush; + timeval t_start; + gettimeofday (&t_start, nullptr); - this->exx_lip->cal_exx(); - iter = 0; - this->two_level_step++; + this->exx_lip->cal_exx (); + iter = 0; + this->two_level_step++; - timeval t_end; - gettimeofday(&t_end, nullptr); - std::cout << "and rerun SCF\t" << std::setprecision(3) << std::setiosflags(std::ios::scientific) - << (double)(t_end.tv_sec - t_start.tv_sec) - + (double)(t_end.tv_usec - t_start.tv_usec) / 1000000.0 - << std::defaultfloat << " (s)" << std::endl; - conv_esolver = false; - } + timeval t_end; + gettimeofday (&t_end, nullptr); + std::cout << "and rerun SCF\t" << std::setprecision (3) << std::setiosflags (std::ios::scientific) + << (double)(t_end.tv_sec - t_start.tv_sec) + + (double)(t_end.tv_usec - t_start.tv_usec) / 1000000.0 + << std::defaultfloat << " (s)" << std::endl; + conv_esolver = false; + } } #endif - } +} - template - void ESolver_KS_LIP::after_all_runners(UnitCell& ucell) - { - ESolver_KS_PW::after_all_runners(ucell); +template +void + ESolver_KS_LIP::after_all_runners (UnitCell& ucell) +{ + ESolver_KS_PW::after_all_runners (ucell); #ifdef __LCAO - if (PARAM.inp.out_mat_xc) + if (PARAM.inp.out_mat_xc) { - ModuleIO::write_Vxc(PARAM.inp.nspin, - PARAM.globalv.nlocal, - GlobalV::DRANK, - *this->stp.template get_psi_t(), - ucell, - this->sf, - this->solvent, - *this->pw_wfc, - *this->pw_rho, - *this->pw_rhod, - this->locpp.vloc, - this->chr, - this->kv, - this->pelec->wg + ModuleIO::write_Vxc (PARAM.inp.nspin, + PARAM.globalv.nlocal, + GlobalV::DRANK, + *this->stp.template get_psi_t (), + ucell, + this->sf, + this->solvent, + *this->pw_wfc, + *this->pw_rho, + *this->pw_rhod, + this->locpp.vloc, + this->chr, + this->kv, + this->pelec->wg #ifdef __EXX - , - *this->exx_lip + , + *this->exx_lip #endif ); } #endif - } - template class ESolver_KS_LIP>; - template class ESolver_KS_LIP>; - // LIP is not supported on GPU yet. +} +template class ESolver_KS_LIP>; +template class ESolver_KS_LIP>; +// LIP is not supported on GPU yet. } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_ks_lcaopw.h b/source/source_esolver/esolver_ks_lcaopw.h index b8d6811f765..b385885c1d2 100644 --- a/source/source_esolver/esolver_ks_lcaopw.h +++ b/source/source_esolver/esolver_ks_lcaopw.h @@ -9,41 +9,37 @@ namespace ModuleESolver { - template - class ESolver_KS_LIP : public ESolver_KS_PW - { - private: - using Real = typename GetTypeReal::type; +template +class ESolver_KS_LIP : public ESolver_KS_PW +{ + private: + using Real = typename GetTypeReal::type; + + public: + ESolver_KS_LIP (); - public: - ESolver_KS_LIP(); + ~ESolver_KS_LIP (); - ~ESolver_KS_LIP(); + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; + void after_all_runners (UnitCell& ucell) override; - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; - void after_all_runners(UnitCell& ucell) override; + virtual void before_scf (UnitCell& ucell, const int istep) override; - virtual void before_scf(UnitCell& ucell, const int istep) override; + protected: + virtual void iter_init (UnitCell& ucell, const int istep, const int iter) override; + virtual void iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; - protected: - virtual void iter_init(UnitCell& ucell, const int istep, const int iter) override; - virtual void iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; + /// All the other interfaces except this one are the same as ESolver_KS_PW. + virtual void hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) override; - /// All the other interfaces except this one are the same as ESolver_KS_PW. - virtual void hamilt2rho_single(UnitCell& ucell, - const int istep, - const int iter, - const double ethr) override; + virtual void allocate_hamilt (const UnitCell& ucell) override; - virtual void allocate_hamilt(const UnitCell& ucell) override; - - psi::Psi* psi_local = nullptr; ///< psi for all local NAOs + psi::Psi* psi_local = nullptr; ///< psi for all local NAOs #ifdef __EXX - std::unique_ptr> exx_lip; - int two_level_step = 0; + std::unique_ptr> exx_lip; + int two_level_step = 0; #endif - - }; +}; } // namespace ModuleESolver #endif diff --git a/source/source_esolver/esolver_ks_pw.cpp b/source/source_esolver/esolver_ks_pw.cpp index 6714821d02f..d188ddb7526 100644 --- a/source/source_esolver/esolver_ks_pw.cpp +++ b/source/source_esolver/esolver_ks_pw.cpp @@ -24,14 +24,14 @@ #include "source_base/kernels/dsp/dsp_connector.h" #endif -#include "source_pw/module_pwdft/setup_pot.h" // mohan add 20250929 -#include "source_estate/setup_estate_pw.h" // mohan add 20251005 -#include "source_io/module_ctrl/ctrl_output_pw.h" // mohan add 20250927 -#include "source_estate/module_charge/chgmixing.h" // use charge mixing, mohan add 20251006 -#include "source_estate/update_pot.h" // mohan add 20251016 +#include "source_pw/module_pwdft/setup_pot.h" // mohan add 20250929 +#include "source_estate/setup_estate_pw.h" // mohan add 20251005 +#include "source_io/module_ctrl/ctrl_output_pw.h" // mohan add 20250927 +#include "source_estate/module_charge/chgmixing.h" // use charge mixing, mohan add 20251006 +#include "source_estate/update_pot.h" // mohan add 20251016 #include "source_pw/module_pwdft/update_cell_pw.h" // mohan add 20250309 -#include "source_pw/module_pwdft/dftu_pw.h" // mohan add 20250309 -#include "source_pw/module_pwdft/deltaspin_pw.h" // mohan add 20250309 +#include "source_pw/module_pwdft/dftu_pw.h" // mohan add 20250309 +#include "source_pw/module_pwdft/deltaspin_pw.h" // mohan add 20250309 #include "source_hamilt/module_xc/exx_info.h" // use GlobalC::exx_info @@ -39,63 +39,73 @@ namespace ModuleESolver { template -ESolver_KS_PW::ESolver_KS_PW() +ESolver_KS_PW::ESolver_KS_PW () { this->classname = "ESolver_KS_PW"; this->basisname = "PW"; } template -ESolver_KS_PW::~ESolver_KS_PW() +ESolver_KS_PW::~ESolver_KS_PW () { //**************************************************** // do not add any codes in this deconstructor funcion //**************************************************** // delete Hamilt if (this->p_hamilt != nullptr) - { - delete this->p_hamilt; - this->p_hamilt = nullptr; - } + { + delete this->p_hamilt; + this->p_hamilt = nullptr; + } // delete exx_helper if (this->exx_helper != nullptr) - { - delete this->exx_helper; - this->exx_helper = nullptr; - } + { + delete this->exx_helper; + this->exx_helper = nullptr; + } // mohan add 2025-10-12 - this->stp.clean(); + this->stp.clean (); } template -void ESolver_KS_PW::allocate_hamilt(const UnitCell& ucell) +void + ESolver_KS_PW::allocate_hamilt (const UnitCell& ucell) { - this->p_hamilt = new hamilt::HamiltPW( - this->pelec->pot, - this->pw_wfc, - &this->kv, - &this->ppcell, - &this->dftu, - &ucell); + this->p_hamilt = new hamilt::HamiltPW (this->pelec->pot, + this->pw_wfc, + &this->kv, + &this->ppcell, + &this->dftu, + &ucell); } - - template -void ESolver_KS_PW::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_KS_PW::before_all_runners (UnitCell& ucell, const Input_para& inp) { - ESolver_KS::before_all_runners(ucell, inp); - - //! setup and allocation for pelec, potentials, etc. - elecstate::setup_estate_pw(ucell, this->kv, this->sf, this->pelec, this->chr, - this->locpp, this->ppcell, this->vsep_cell, this->pw_wfc, this->pw_rho, - this->pw_rhod, this->pw_big, this->solvent, inp); - - this->stp.before_runner(ucell, this->kv, this->sf, *this->pw_wfc, this->ppcell, PARAM.inp); - - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT BASIS"); + ESolver_KS::before_all_runners (ucell, inp); + + //! setup and allocation for pelec, potentials, etc. + elecstate::setup_estate_pw (ucell, + this->kv, + this->sf, + this->pelec, + this->chr, + this->locpp, + this->ppcell, + this->vsep_cell, + this->pw_wfc, + this->pw_rho, + this->pw_rhod, + this->pw_big, + this->solvent, + inp); + + this->stp.before_runner (ucell, this->kv, this->sf, *this->pw_wfc, this->ppcell, PARAM.inp); + + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT BASIS"); //! Create exx_helper based on device and precision const bool is_gpu = (inp.device == "gpu"); @@ -103,247 +113,321 @@ void ESolver_KS_PW::before_all_runners(UnitCell& ucell, const Input_p #if ((defined __CUDA) || (defined __ROCM)) if (is_gpu) - { - if (is_single) - { - this->exx_helper = new Exx_Helper, base_device::DEVICE_GPU>(); - } - else { - this->exx_helper = new Exx_Helper, base_device::DEVICE_GPU>(); + if (is_single) + { + this->exx_helper = new Exx_Helper, base_device::DEVICE_GPU> (); + } + else + { + this->exx_helper = new Exx_Helper, base_device::DEVICE_GPU> (); + } } - } else #endif - { - if (is_single) - { - this->exx_helper = new Exx_Helper, base_device::DEVICE_CPU>(); - } - else { - this->exx_helper = new Exx_Helper, base_device::DEVICE_CPU>(); + if (is_single) + { + this->exx_helper = new Exx_Helper, base_device::DEVICE_CPU> (); + } + else + { + this->exx_helper = new Exx_Helper, base_device::DEVICE_CPU> (); + } } - } //! Initialize exx pw - this->exx_helper->init(ucell, inp, this->pelec->wg); + this->exx_helper->init (ucell, inp, this->pelec->wg); } template -void ESolver_KS_PW::before_scf(UnitCell& ucell, const int istep) +void + ESolver_KS_PW::before_scf (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_KS_PW", "before_scf"); - ModuleBase::timer::start("ESolver_KS_PW", "before_scf"); + ModuleBase::TITLE ("ESolver_KS_PW", "before_scf"); + ModuleBase::timer::start ("ESolver_KS_PW", "before_scf"); - ESolver_KS::before_scf(ucell, istep); + ESolver_KS::before_scf (ucell, istep); //! Init variables (once the cell has changed) - pw::update_cell_pw(ucell, this->ppcell, this->kv, this->pw_wfc, PARAM.inp); + pw::update_cell_pw (ucell, this->ppcell, this->kv, this->pw_wfc, PARAM.inp); if (ucell.cell_parameter_updated) - { - this->stp.p_psi_init->prepare_init(PARAM.inp.pw_seed); - } + { + this->stp.p_psi_init->prepare_init (PARAM.inp.pw_seed); + } //! Init Hamiltonian (cell changed) //! Operators in HamiltPW should be reallocated once cell changed //! delete Hamilt if not first scf if (this->p_hamilt != nullptr) - { - delete this->p_hamilt; - this->p_hamilt = nullptr; - } + { + delete this->p_hamilt; + this->p_hamilt = nullptr; + } //! Allocate HamiltPW - this->allocate_hamilt(ucell); + this->allocate_hamilt (ucell); //! Setup potentials (local, non-local, sc, +U, DFT-1/2) - // note: init DFT+U is done here for pw basis for every scf iteration, however, + // note: init DFT+U is done here for pw basis for every scf iteration, however, // init DFT+U is done in "before_all_runners" in LCAO basis. This should be refactored, mohan note 2025-11-06 - pw::setup_pot(istep, ucell, this->kv, this->sf, this->pelec, this->Pgrid, - this->chr, this->locpp, this->ppcell, this->dftu, this->vsep_cell, - this->stp.template get_psi_t(), - this->p_hamilt, - this->pw_wfc, this->pw_rhod, PARAM.inp); + pw::setup_pot (istep, + ucell, + this->kv, + this->sf, + this->pelec, + this->Pgrid, + this->chr, + this->locpp, + this->ppcell, + this->dftu, + this->vsep_cell, + this->stp.template get_psi_t (), + this->p_hamilt, + this->pw_wfc, + this->pw_rhod, + PARAM.inp); // setup psi (electronic wave functions) - this->stp.init(this->p_hamilt); + this->stp.init (this->p_hamilt); //! Setup EXX helper for Hamiltonian and psi - exx_helper->before_scf(this->p_hamilt, this->stp.template get_psi_t(), PARAM.inp); + exx_helper->before_scf (this->p_hamilt, this->stp.template get_psi_t (), PARAM.inp); - ModuleBase::timer::end("ESolver_KS_PW", "before_scf"); + ModuleBase::timer::end ("ESolver_KS_PW", "before_scf"); } template -void ESolver_KS_PW::iter_init(UnitCell& ucell, const int istep, const int iter) +void + ESolver_KS_PW::iter_init (UnitCell& ucell, const int istep, const int iter) { - ESolver_KS::iter_init(ucell, istep, iter); + ESolver_KS::iter_init (ucell, istep, iter); - module_charge::chgmixing_ks_pw(iter, this->p_chgmix, this->dftu, PARAM.inp); + module_charge::chgmixing_ks_pw (iter, this->p_chgmix, this->dftu, PARAM.inp); // mohan move harris functional here, 2012-06-05 // use 'rho(in)' and 'v_h and v_xc'(in) - this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband(ucell); + this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband (ucell); // update local occupations for DFT+U // should before lambda loop in DeltaSpin - pw::iter_init_dftu_pw(iter, istep, this->dftu, this->stp.template get_psi_t(), this->pelec->wg, ucell, PARAM.inp); + pw::iter_init_dftu_pw (iter, + istep, + this->dftu, + this->stp.template get_psi_t (), + this->pelec->wg, + ucell, + PARAM.inp); } // Temporary, it should be replaced by hsolver later. template -void ESolver_KS_PW::hamilt2rho_single(UnitCell& ucell, const int istep, const int iter, const double ethr) +void + ESolver_KS_PW::hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) { - ModuleBase::timer::start("ESolver_KS_PW", "hamilt2rho_single"); + ModuleBase::timer::start ("ESolver_KS_PW", "hamilt2rho_single"); // reset energy this->pelec->f_en.eband = 0.0; this->pelec->f_en.demet = 0.0; // setup diagonalization parameters - hsolver::setup_diago_params_pw(istep, iter, ethr, PARAM.inp); + hsolver::setup_diago_params_pw (istep, iter, ethr, PARAM.inp); bool skip_charge = PARAM.inp.calculation == "nscf" ? true : false; // run the inner lambda loop to contrain atomic moments with the DeltaSpin method - bool skip_solve = pw::run_deltaspin_lambda_loop(iter - 1, this->drho, PARAM.inp); + bool skip_solve = pw::run_deltaspin_lambda_loop (iter - 1, this->drho, PARAM.inp); if (!skip_solve) - { - hsolver::HSolverPW hsolver_pw_obj(this->pw_wfc, - PARAM.inp.calculation, - PARAM.inp.basis_type, - PARAM.inp.ks_solver, - PARAM.globalv.use_uspp, - PARAM.inp.nspin, - hsolver::DiagoIterAssist::SCF_ITER, - hsolver::DiagoIterAssist::PW_DIAG_NMAX, - hsolver::DiagoIterAssist::PW_DIAG_THR, - hsolver::DiagoIterAssist::need_subspace, - PARAM.inp.use_k_continuity); - - hsolver_pw_obj.solve(static_cast*>(this->p_hamilt), *this->stp.template get_psi_t(), this->pelec, this->pelec->ekb.c, - GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL, skip_charge, ucell.tpiba, ucell.nat); - } + { + hsolver::HSolverPW hsolver_pw_obj (this->pw_wfc, + PARAM.inp.calculation, + PARAM.inp.basis_type, + PARAM.inp.ks_solver, + PARAM.globalv.use_uspp, + PARAM.inp.nspin, + hsolver::DiagoIterAssist::SCF_ITER, + hsolver::DiagoIterAssist::PW_DIAG_NMAX, + hsolver::DiagoIterAssist::PW_DIAG_THR, + hsolver::DiagoIterAssist::need_subspace, + PARAM.inp.use_k_continuity); + + hsolver_pw_obj.solve (static_cast*> (this->p_hamilt), + *this->stp.template get_psi_t (), + this->pelec, + this->pelec->ekb.c, + GlobalV::RANK_IN_POOL, + GlobalV::NPROC_IN_POOL, + skip_charge, + ucell.tpiba, + ucell.nat); + } // symmetrize the charge density - Symmetry_rho::symmetrize_rho(PARAM.inp.nspin, this->chr, this->pw_rhod, ucell.symm); + Symmetry_rho::symmetrize_rho (PARAM.inp.nspin, this->chr, this->pw_rhod, ucell.symm); - ModuleBase::timer::end("ESolver_KS_PW", "hamilt2rho_single"); + ModuleBase::timer::end ("ESolver_KS_PW", "hamilt2rho_single"); } - template -void ESolver_KS_PW::iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) +void + ESolver_KS_PW::iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) { // Related to EXX - if (GlobalC::exx_info.info_global.cal_exx && !exx_helper->get_op_first_iter()) - { - this->pelec->set_exx(exx_helper->cal_exx_energy(this->stp.template get_psi_t())); - } + if (GlobalC::exx_info.info_global.cal_exx && !exx_helper->get_op_first_iter ()) + { + this->pelec->set_exx (exx_helper->cal_exx_energy (this->stp.template get_psi_t ())); + } // deband is calculated from "output" charge density - this->pelec->f_en.deband = this->pelec->cal_delta_eband(ucell); + this->pelec->f_en.deband = this->pelec->cal_delta_eband (ucell); // Call iter_finish() of ESolver_KS - ESolver_KS::iter_finish(ucell, istep, iter, conv_esolver); + ESolver_KS::iter_finish (ucell, istep, iter, conv_esolver); // D in USPP needs vloc, thus needs update when veff updated // calculate the effective coefficient matrix for non-local // pp projectors, liuyu 2023-10-24 if (PARAM.globalv.use_uspp) - { - ModuleBase::matrix veff = this->pelec->pot->get_eff_v(); - this->ppcell.cal_effective_D(veff, this->pw_rhod, ucell); - } + { + ModuleBase::matrix veff = this->pelec->pot->get_eff_v (); + this->ppcell.cal_effective_D (veff, this->pw_rhod, ucell); + } // Handle EXX-related operations after SCF iteration - exx_helper->iter_finish(this->pelec, &this->chr, this->stp.template get_psi_t(), ucell, PARAM.inp, conv_esolver, iter); + exx_helper->iter_finish (this->pelec, + &this->chr, + this->stp.template get_psi_t (), + ucell, + PARAM.inp, + conv_esolver, + iter); // check if oscillate for delta_spin method - pw::check_deltaspin_oscillation(iter, this->drho, this->p_chgmix, PARAM.inp); + pw::check_deltaspin_oscillation (iter, this->drho, this->p_chgmix, PARAM.inp); // the output quantities - ModuleIO::ctrl_iter_pw(istep, iter, conv_esolver, this->stp.psi_cpu, - this->kv, this->pw_wfc, PARAM.inp); + ModuleIO::ctrl_iter_pw (istep, iter, conv_esolver, this->stp.psi_cpu, this->kv, this->pw_wfc, PARAM.inp); } template -void ESolver_KS_PW::after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) +void + ESolver_KS_PW::after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) { - ModuleBase::TITLE("ESolver_KS_PW", "after_scf"); - ModuleBase::timer::start("ESolver_KS_PW", "after_scf"); + ModuleBase::TITLE ("ESolver_KS_PW", "after_scf"); + ModuleBase::timer::start ("ESolver_KS_PW", "after_scf"); // Calculate kinetic energy density tau for ELF if needed if (PARAM.inp.out_elf[0] > 0) - { - this->pelec->cal_tau(*(this->stp.psi_cpu)); - } + { + this->pelec->cal_tau (*(this->stp.psi_cpu)); + } - ESolver_KS::after_scf(ucell, istep, conv_esolver); + ESolver_KS::after_scf (ucell, istep, conv_esolver); // Output quantities - ModuleIO::ctrl_scf_pw(istep, ucell, this->pelec, this->chr, this->kv, this->pw_wfc, - this->pw_rho, this->pw_rhod, this->pw_big, this->stp, - this->Pgrid, PARAM.inp); - - ModuleBase::timer::end("ESolver_KS_PW", "after_scf"); + ModuleIO::ctrl_scf_pw (istep, + ucell, + this->pelec, + this->chr, + this->kv, + this->pw_wfc, + this->pw_rho, + this->pw_rhod, + this->pw_big, + this->stp, + this->Pgrid, + PARAM.inp); + + ModuleBase::timer::end ("ESolver_KS_PW", "after_scf"); } template -double ESolver_KS_PW::cal_energy() +double + ESolver_KS_PW::cal_energy () { return this->pelec->f_en.etot; } template -void ESolver_KS_PW::cal_force(UnitCell& ucell, ModuleBase::matrix& force) +void + ESolver_KS_PW::cal_force (UnitCell& ucell, ModuleBase::matrix& force) { - Forces ff(ucell.nat); + Forces ff (ucell.nat); // mohan add 2025-10-12 - this->stp.update_psi_d(); + this->stp.update_psi_d (); // Calculate forces - ff.cal_force(ucell, force, *this->pelec, this->pw_rhod, &ucell.symm, - &this->sf, this->solvent, &this->dftu, &this->locpp, &this->ppcell, - &this->kv, this->pw_wfc, this->stp.template get_psi_d()); + ff.cal_force (ucell, + force, + *this->pelec, + this->pw_rhod, + &ucell.symm, + &this->sf, + this->solvent, + &this->dftu, + &this->locpp, + &this->ppcell, + &this->kv, + this->pw_wfc, + this->stp.template get_psi_d ()); } template -void ESolver_KS_PW::cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) +void + ESolver_KS_PW::cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) { - Stress_PW ss(this->pelec); + Stress_PW ss (this->pelec); // mohan add 2025-10-12 - this->stp.update_psi_d(); - - ss.cal_stress(stress, ucell, this->dftu, this->locpp, this->ppcell, this->pw_rhod, - &ucell.symm, &this->sf, &this->kv, this->pw_wfc, this->stp.template get_psi_d()); + this->stp.update_psi_d (); + + ss.cal_stress (stress, + ucell, + this->dftu, + this->locpp, + this->ppcell, + this->pw_rhod, + &ucell.symm, + &this->sf, + &this->kv, + this->pw_wfc, + this->stp.template get_psi_d ()); // external stress double unit_transform = 0.0; - unit_transform = ModuleBase::RYDBERG_SI / pow(ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; + unit_transform = ModuleBase::RYDBERG_SI / pow (ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; double external_stress[3] = {PARAM.inp.press1, PARAM.inp.press2, PARAM.inp.press3}; for (int i = 0; i < 3; i++) - { - stress(i, i) -= external_stress[i] / unit_transform; - } + { + stress (i, i) -= external_stress[i] / unit_transform; + } } template -void ESolver_KS_PW::after_all_runners(UnitCell& ucell) +void + ESolver_KS_PW::after_all_runners (UnitCell& ucell) { - ESolver_KS::after_all_runners(ucell); - - ModuleIO::ctrl_runner_pw(ucell, this->pelec, this->pw_wfc, - this->pw_rho, this->pw_rhod, this->chr, this->kv, this->stp, - this->sf, this->ppcell, this->solvent, this->Pgrid, PARAM.inp); - - elecstate::teardown_estate_pw(this->pelec, this->vsep_cell); - + ESolver_KS::after_all_runners (ucell); + + ModuleIO::ctrl_runner_pw (ucell, + this->pelec, + this->pw_wfc, + this->pw_rho, + this->pw_rhod, + this->chr, + this->kv, + this->stp, + this->sf, + this->ppcell, + this->solvent, + this->Pgrid, + PARAM.inp); + + elecstate::teardown_estate_pw (this->pelec, this->vsep_cell); } template class ESolver_KS_PW, base_device::DEVICE_CPU>; diff --git a/source/source_esolver/esolver_ks_pw.h b/source/source_esolver/esolver_ks_pw.h index 9fd2600ce37..36f089170b3 100644 --- a/source/source_esolver/esolver_ks_pw.h +++ b/source/source_esolver/esolver_ks_pw.h @@ -18,43 +18,42 @@ class ESolver_KS_PW : public ESolver_KS using Real = typename GetTypeReal::type; public: - ESolver_KS_PW(); + ESolver_KS_PW (); - ~ESolver_KS_PW(); + ~ESolver_KS_PW (); - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - double cal_energy() override; + double cal_energy () override; - void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; - void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) override; + void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) override; - void after_all_runners(UnitCell& ucell) override; + void after_all_runners (UnitCell& ucell) override; Exx_HelperBase* exx_helper = nullptr; protected: - virtual void before_scf(UnitCell& ucell, const int istep) override; + virtual void before_scf (UnitCell& ucell, const int istep) override; - virtual void iter_init(UnitCell& ucell, const int istep, const int iter) override; + virtual void iter_init (UnitCell& ucell, const int istep, const int iter) override; - virtual void iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; + virtual void iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; - virtual void after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) override; + virtual void after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) override; - virtual void others(UnitCell& ucell, const int istep) override; + virtual void others (UnitCell& ucell, const int istep) override; - virtual void hamilt2rho_single(UnitCell& ucell, const int istep, const int iter, const double ethr) override; + virtual void hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) override; - virtual void allocate_hamilt(const UnitCell& ucell); + virtual void allocate_hamilt (const UnitCell& ucell); // Electronic wave function psi Setup_Psi_pw stp; // DFT-1/2 method VSep* vsep_cell = nullptr; - }; } // namespace ModuleESolver #endif diff --git a/source/source_esolver/esolver_lj.cpp b/source/source_esolver/esolver_lj.cpp index f71698586ea..9c32a9e7e2f 100644 --- a/source/source_esolver/esolver_lj.cpp +++ b/source/source_esolver/esolver_lj.cpp @@ -5,286 +5,299 @@ #include "source_io/module_output/output_log.h" #include "source_io/module_output/cif_io.h" - namespace ModuleESolver { -void ESolver_LJ::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_LJ::before_all_runners (UnitCell& ucell, const Input_para& inp) { lj_potential = 0; - lj_force.create(ucell.nat, 3); - lj_virial.create(3, 3); + lj_force.create (ucell.nat, 3); + lj_virial.create (3, 3); - ModuleIO::CifParser::write(PARAM.globalv.global_out_dir + "STRU.cif", - ucell, - "# Generated by ABACUS ModuleIO::CifParser", - "data_?"); + ModuleIO::CifParser::write (PARAM.globalv.global_out_dir + "STRU.cif", + ucell, + "# Generated by ABACUS ModuleIO::CifParser", + "data_?"); // determine the maximum rcut and lj_rcut - rcut_search_radius(ucell.ntype, inp.mdp.lj_rcut); + rcut_search_radius (ucell.ntype, inp.mdp.lj_rcut); // determine the LJ parameters - set_c6_c12(ucell.ntype, inp.mdp.lj_rule, inp.mdp.lj_epsilon, inp.mdp.lj_sigma); + set_c6_c12 (ucell.ntype, inp.mdp.lj_rule, inp.mdp.lj_epsilon, inp.mdp.lj_sigma); // calculate the energy shift so that LJ energy is zero at rcut - cal_en_shift(ucell.ntype, inp.mdp.lj_eshift); + cal_en_shift (ucell.ntype, inp.mdp.lj_eshift); } -void ESolver_LJ::runner(UnitCell& ucell, const int istep) +void + ESolver_LJ::runner (UnitCell& ucell, const int istep) { - Grid_Driver grid_neigh(PARAM.inp.test_deconstructor, PARAM.inp.test_grid); - atom_arrange::search(PARAM.globalv.search_pbc, - GlobalV::ofs_running, - grid_neigh, - ucell, - search_radius, - PARAM.inp.test_atom_input); + Grid_Driver grid_neigh (PARAM.inp.test_deconstructor, PARAM.inp.test_grid); + atom_arrange::search (PARAM.globalv.search_pbc, + GlobalV::ofs_running, + grid_neigh, + ucell, + search_radius, + PARAM.inp.test_atom_input); double distance = 0.0; int index = 0; // Important! potential, force, virial must be zero per step lj_potential = 0; - lj_force.zero_out(); - lj_virial.zero_out(); + lj_force.zero_out (); + lj_virial.zero_out (); ModuleBase::Vector3 tau1, tau2, dtau; for (int it = 0; it < ucell.ntype; ++it) - { - Atom* atom1 = &ucell.atoms[it]; - for (int ia = 0; ia < atom1->na; ++ia) { - tau1 = atom1->tau[ia]; - grid_neigh.Find_atom(ucell, tau1, it, ia); - for (int ad = 0; ad < grid_neigh.getAdjacentNum(); ++ad) - { - tau2 = grid_neigh.getAdjacentTau(ad); - int it2 = grid_neigh.getType(ad); - dtau = (tau1 - tau2) * ucell.lat0; - distance = dtau.norm(); - if (distance < lj_rcut(it, it2)) + Atom* atom1 = &ucell.atoms[it]; + for (int ia = 0; ia < atom1->na; ++ia) { - lj_potential += LJ_energy(distance, it, it2) - en_shift(it, it2); - ModuleBase::Vector3 f_ij = LJ_force(dtau, it, it2); - lj_force(index, 0) += f_ij.x; - lj_force(index, 1) += f_ij.y; - lj_force(index, 2) += f_ij.z; - LJ_virial(f_ij, dtau); + tau1 = atom1->tau[ia]; + grid_neigh.Find_atom (ucell, tau1, it, ia); + for (int ad = 0; ad < grid_neigh.getAdjacentNum (); ++ad) + { + tau2 = grid_neigh.getAdjacentTau (ad); + int it2 = grid_neigh.getType (ad); + dtau = (tau1 - tau2) * ucell.lat0; + distance = dtau.norm (); + if (distance < lj_rcut (it, it2)) + { + lj_potential += LJ_energy (distance, it, it2) - en_shift (it, it2); + ModuleBase::Vector3 f_ij = LJ_force (dtau, it, it2); + lj_force (index, 0) += f_ij.x; + lj_force (index, 1) += f_ij.y; + lj_force (index, 2) += f_ij.z; + LJ_virial (f_ij, dtau); + } + } + index++; } - } - index++; } - } lj_potential /= 2.0; - GlobalV::ofs_running << " #TOTAL ENERGY# " << std::setprecision(11) << lj_potential * ModuleBase::Ry_to_eV << " eV" + GlobalV::ofs_running << " #TOTAL ENERGY# " << std::setprecision (11) << lj_potential * ModuleBase::Ry_to_eV << " eV" << std::endl; // Post treatment for virial for (int i = 0; i < 3; ++i) - { - for (int j = 0; j < 3; ++j) { - lj_virial(i, j) /= (2.0 * ucell.omega); + for (int j = 0; j < 3; ++j) + { + lj_virial (i, j) /= (2.0 * ucell.omega); + } } - } - } - - double ESolver_LJ::cal_energy() - { - return lj_potential; - } - - void ESolver_LJ::cal_force(UnitCell& ucell, ModuleBase::matrix& force) - { - force = lj_force; - ModuleIO::print_force(GlobalV::ofs_running, ucell, "TOTAL-FORCE (eV/Angstrom)", force, false); - } - - void ESolver_LJ::cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) - { - stress = lj_virial; - - const bool screen = true; - const bool ry = false; - ModuleIO::print_stress("TOTAL-STRESS", stress, screen, ry, GlobalV::ofs_running); - - // external stress - double unit_transform = ModuleBase::RYDBERG_SI / pow(ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; - double external_stress[3] = {PARAM.inp.press1, PARAM.inp.press2, PARAM.inp.press3}; - for (int i = 0; i < 3; i++) +} + +double + ESolver_LJ::cal_energy () +{ + return lj_potential; +} + +void + ESolver_LJ::cal_force (UnitCell& ucell, ModuleBase::matrix& force) +{ + force = lj_force; + ModuleIO::print_force (GlobalV::ofs_running, ucell, "TOTAL-FORCE (eV/Angstrom)", force, false); +} + +void + ESolver_LJ::cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) +{ + stress = lj_virial; + + const bool screen = true; + const bool ry = false; + ModuleIO::print_stress ("TOTAL-STRESS", stress, screen, ry, GlobalV::ofs_running); + + // external stress + double unit_transform = ModuleBase::RYDBERG_SI / pow (ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; + double external_stress[3] = {PARAM.inp.press1, PARAM.inp.press2, PARAM.inp.press3}; + for (int i = 0; i < 3; i++) { - stress(i, i) -= external_stress[i] / unit_transform; + stress (i, i) -= external_stress[i] / unit_transform; } - } - - void ESolver_LJ::after_all_runners(UnitCell& ucell) - { - GlobalV::ofs_running << "\n --------------------------------------------" << std::endl; - GlobalV::ofs_running << std::setprecision(16); - GlobalV::ofs_running << " !FINAL_ETOT_IS " << lj_potential * ModuleBase::Ry_to_eV << " eV" << std::endl; - GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; - } - - double ESolver_LJ::LJ_energy(const double& d, const int& i, const int& j) const - { - assert(d > 1e-6); // avoid atom overlap - const double r2 = d * d; - const double r4 = r2 * r2; - const double r6 = r2 * r4; - return lj_c12(i, j) / (r6 * r6) - lj_c6(i, j) / r6; - } - - ModuleBase::Vector3 ESolver_LJ::LJ_force(const ModuleBase::Vector3& dr, const int& i, const int& j) const - { - const double d = dr.norm(); - assert(d > 1e-6); // avoid atom overlap - const double r2 = d * d; - const double r4 = r2 * r2; - const double r8 = r4 * r4; - const double r14 = r8 * r4 * r2; - double coff = 12.0 * lj_c12(i, j) / r14 - 6.0 * lj_c6(i, j) / r8; - return dr * coff; - } - - void ESolver_LJ::LJ_virial(const ModuleBase::Vector3& force, const ModuleBase::Vector3& dtau) - { - for (int i = 0; i < 3; ++i) +} + +void + ESolver_LJ::after_all_runners (UnitCell& ucell) +{ + GlobalV::ofs_running << "\n --------------------------------------------" << std::endl; + GlobalV::ofs_running << std::setprecision (16); + GlobalV::ofs_running << " !FINAL_ETOT_IS " << lj_potential * ModuleBase::Ry_to_eV << " eV" << std::endl; + GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; +} + +double + ESolver_LJ::LJ_energy (const double& d, const int& i, const int& j) const +{ + assert (d > 1e-6); // avoid atom overlap + const double r2 = d * d; + const double r4 = r2 * r2; + const double r6 = r2 * r4; + return lj_c12 (i, j) / (r6 * r6) - lj_c6 (i, j) / r6; +} + +ModuleBase::Vector3 + ESolver_LJ::LJ_force (const ModuleBase::Vector3& dr, const int& i, const int& j) const +{ + const double d = dr.norm (); + assert (d > 1e-6); // avoid atom overlap + const double r2 = d * d; + const double r4 = r2 * r2; + const double r8 = r4 * r4; + const double r14 = r8 * r4 * r2; + double coff = 12.0 * lj_c12 (i, j) / r14 - 6.0 * lj_c6 (i, j) / r8; + return dr * coff; +} + +void + ESolver_LJ::LJ_virial (const ModuleBase::Vector3& force, const ModuleBase::Vector3& dtau) +{ + for (int i = 0; i < 3; ++i) { for (int j = 0; j < 3; ++j) - { - lj_virial(i, j) += dtau[i] * force[j]; - } + { + lj_virial (i, j) += dtau[i] * force[j]; + } } - } +} - void ESolver_LJ::rcut_search_radius(const int& ntype, const std::vector& rcut) - { - lj_rcut.create(ntype, ntype); - double rcut_max = 0.0; +void + ESolver_LJ::rcut_search_radius (const int& ntype, const std::vector& rcut) +{ + lj_rcut.create (ntype, ntype); + double rcut_max = 0.0; - if (rcut.size() == 1) + if (rcut.size () == 1) { rcut_max = rcut[0] * ModuleBase::ANGSTROM_AU; for (int i = 0; i < ntype; i++) - { - for (int j = 0; j <= i; j++) { - lj_rcut(i, j) = rcut_max; - lj_rcut(j, i) = rcut_max; + for (int j = 0; j <= i; j++) + { + lj_rcut (i, j) = rcut_max; + lj_rcut (j, i) = rcut_max; + } } - } } - else if (rcut.size() == ntype * (ntype + 1) / 2) + else if (rcut.size () == ntype * (ntype + 1) / 2) { for (int i = 0; i < ntype; i++) - { - for (int j = 0; j <= i; j++) { - int k = i * (i + 1) / 2 + j; - lj_rcut(i, j) = rcut[k] * ModuleBase::ANGSTROM_AU; - lj_rcut(j, i) = lj_rcut(i, j); - rcut_max = std::max(rcut_max, lj_rcut(i, j)); + for (int j = 0; j <= i; j++) + { + int k = i * (i + 1) / 2 + j; + lj_rcut (i, j) = rcut[k] * ModuleBase::ANGSTROM_AU; + lj_rcut (j, i) = lj_rcut (i, j); + rcut_max = std::max (rcut_max, lj_rcut (i, j)); + } } - } } - // set the search radius - search_radius = rcut_max + 0.01; - } - - void ESolver_LJ::set_c6_c12(const int& ntype, - const int& rule, - const std::vector& epsilon, - const std::vector& sigma) - { - lj_c6.create(ntype, ntype); - lj_c12.create(ntype, ntype); - - std::vector lj_epsilon = epsilon; - std::vector lj_sigma = sigma; - - std::transform(begin(lj_epsilon), end(lj_epsilon), begin(lj_epsilon), [](double x) { - return x / ModuleBase::Ry_to_eV; - }); - std::transform(begin(lj_sigma), end(lj_sigma), begin(lj_sigma), [](double x) { - return x * ModuleBase::ANGSTROM_AU; - }); - - if (lj_epsilon.size() != lj_sigma.size()) + // set the search radius + search_radius = rcut_max + 0.01; +} + +void + ESolver_LJ::set_c6_c12 (const int& ntype, + const int& rule, + const std::vector& epsilon, + const std::vector& sigma) +{ + lj_c6.create (ntype, ntype); + lj_c12.create (ntype, ntype); + + std::vector lj_epsilon = epsilon; + std::vector lj_sigma = sigma; + + std::transform (begin (lj_epsilon), + end (lj_epsilon), + begin (lj_epsilon), + [] (double x) { return x / ModuleBase::Ry_to_eV; }); + std::transform (begin (lj_sigma), + end (lj_sigma), + begin (lj_sigma), + [] (double x) { return x * ModuleBase::ANGSTROM_AU; }); + + if (lj_epsilon.size () != lj_sigma.size ()) { - ModuleBase::WARNING_QUIT("ESolver_LJ", " the number of lj_epsilon should be equal to lj_sigma "); + ModuleBase::WARNING_QUIT ("ESolver_LJ", " the number of lj_epsilon should be equal to lj_sigma "); } - // do not need any combination rules - else if (lj_sigma.size() == ntype * (ntype + 1) / 2) + // do not need any combination rules + else if (lj_sigma.size () == ntype * (ntype + 1) / 2) { for (int i = 0; i < ntype; i++) - { - for (int j = 0; j <= i; j++) { - int k = i * (i + 1) / 2 + j; - double temp = pow(lj_sigma[k], 6); - lj_c6(i, j) = 4.0 * lj_epsilon[k] * temp; - lj_c12(i, j) = lj_c6(i, j) * temp; - lj_c6(j, i) = lj_c6(i, j); - lj_c12(j, i) = lj_c12(i, j); + for (int j = 0; j <= i; j++) + { + int k = i * (i + 1) / 2 + j; + double temp = pow (lj_sigma[k], 6); + lj_c6 (i, j) = 4.0 * lj_epsilon[k] * temp; + lj_c12 (i, j) = lj_c6 (i, j) * temp; + lj_c6 (j, i) = lj_c6 (i, j); + lj_c12 (j, i) = lj_c12 (i, j); + } } - } } - // combination rule 1 - else if (lj_sigma.size() == ntype && rule == 1) + // combination rule 1 + else if (lj_sigma.size () == ntype && rule == 1) { for (int i = 0; i < ntype; i++) - { - // first determine the diagonal elements - double temp = pow(lj_sigma[i], 6); - lj_c6(i, i) = 4.0 * lj_epsilon[i] * temp; - lj_c12(i, i) = lj_c6(i, i) * temp; - - // then determine the non-diagonal elements - for (int j = 0; j < i; j++) { - lj_c6(i, j) = std::sqrt(lj_c6(i, i) * lj_c6(j, j)); - lj_c12(i, j) = std::sqrt(lj_c12(i, i) * lj_c12(j, j)); - lj_c6(j, i) = lj_c6(i, j); - lj_c12(j, i) = lj_c12(i, j); + // first determine the diagonal elements + double temp = pow (lj_sigma[i], 6); + lj_c6 (i, i) = 4.0 * lj_epsilon[i] * temp; + lj_c12 (i, i) = lj_c6 (i, i) * temp; + + // then determine the non-diagonal elements + for (int j = 0; j < i; j++) + { + lj_c6 (i, j) = std::sqrt (lj_c6 (i, i) * lj_c6 (j, j)); + lj_c12 (i, j) = std::sqrt (lj_c12 (i, i) * lj_c12 (j, j)); + lj_c6 (j, i) = lj_c6 (i, j); + lj_c12 (j, i) = lj_c12 (i, j); + } } - } } - // combination rule 2 - else if (lj_sigma.size() == ntype && rule == 2) + // combination rule 2 + else if (lj_sigma.size () == ntype && rule == 2) { for (int i = 0; i < ntype; i++) - { - for (int j = 0; j <= i; j++) { - double sigma_ij = (lj_sigma[i] + lj_sigma[j]) / 2.0; - double epsilon_ij = std::sqrt(lj_epsilon[i] * lj_epsilon[j]); - - double temp = pow(sigma_ij, 6); - lj_c6(i, j) = 4.0 * epsilon_ij * temp; - lj_c12(i, j) = lj_c6(i, j) * temp; - lj_c6(j, i) = lj_c6(i, j); - lj_c12(j, i) = lj_c12(i, j); + for (int j = 0; j <= i; j++) + { + double sigma_ij = (lj_sigma[i] + lj_sigma[j]) / 2.0; + double epsilon_ij = std::sqrt (lj_epsilon[i] * lj_epsilon[j]); + + double temp = pow (sigma_ij, 6); + lj_c6 (i, j) = 4.0 * epsilon_ij * temp; + lj_c12 (i, j) = lj_c6 (i, j) * temp; + lj_c6 (j, i) = lj_c6 (i, j); + lj_c12 (j, i) = lj_c12 (i, j); + } } - } } - } +} - void ESolver_LJ::cal_en_shift(const int& ntype, const bool& is_shift) - { - en_shift.create(ntype, ntype); +void + ESolver_LJ::cal_en_shift (const int& ntype, const bool& is_shift) +{ + en_shift.create (ntype, ntype); - if (is_shift) + if (is_shift) { for (int i = 0; i < ntype; i++) - { - for (int j = 0; j <= i; j++) { - en_shift(i, j) = LJ_energy(lj_rcut(i, j), i, j); - en_shift(j, i) = en_shift(i, j); + for (int j = 0; j <= i; j++) + { + en_shift (i, j) = LJ_energy (lj_rcut (i, j), i, j); + en_shift (j, i) = en_shift (i, j); + } } - } } - } } +} // namespace ModuleESolver diff --git a/source/source_esolver/esolver_lj.h b/source/source_esolver/esolver_lj.h index ca23a8bed80..fe56e3366f8 100644 --- a/source/source_esolver/esolver_lj.h +++ b/source/source_esolver/esolver_lj.h @@ -6,53 +6,50 @@ namespace ModuleESolver { - class ESolver_LJ : public ESolver - { - public: - ESolver_LJ() - { - classname = "ESolver_LJ"; - } +class ESolver_LJ : public ESolver +{ + public: + ESolver_LJ () { classname = "ESolver_LJ"; } - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - void runner(UnitCell& cell, const int istep) override; + void runner (UnitCell& cell, const int istep) override; - double cal_energy() override; + double cal_energy () override; - void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; - void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) override; + void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) override; - void after_all_runners(UnitCell& ucell) override; + void after_all_runners (UnitCell& ucell) override; - private: - double LJ_energy(const double& d, const int& i, const int& j) const; + private: + double LJ_energy (const double& d, const int& i, const int& j) const; - ModuleBase::Vector3 LJ_force(const ModuleBase::Vector3& dr, const int& i, const int& j) const; + ModuleBase::Vector3 LJ_force (const ModuleBase::Vector3& dr, const int& i, const int& j) const; - void LJ_virial(const ModuleBase::Vector3& force, const ModuleBase::Vector3& dtau); + void LJ_virial (const ModuleBase::Vector3& force, const ModuleBase::Vector3& dtau); - void rcut_search_radius(const int& ntype, const std::vector& rcut); + void rcut_search_radius (const int& ntype, const std::vector& rcut); - void set_c6_c12(const int& ntype, - const int& rule, - const std::vector& epsilon, - const std::vector& sigma); + void set_c6_c12 (const int& ntype, + const int& rule, + const std::vector& epsilon, + const std::vector& sigma); - void cal_en_shift(const int& ntype, const bool& is_shift); + void cal_en_shift (const int& ntype, const bool& is_shift); - //--------------temporary---------------------------- - double search_radius=-1.0; - ModuleBase::matrix lj_rcut; - ModuleBase::matrix lj_c12; - ModuleBase::matrix lj_c6; - ModuleBase::matrix en_shift; + //--------------temporary---------------------------- + double search_radius = -1.0; + ModuleBase::matrix lj_rcut; + ModuleBase::matrix lj_c12; + ModuleBase::matrix lj_c6; + ModuleBase::matrix en_shift; - double lj_potential=0.0; - ModuleBase::matrix lj_force; - ModuleBase::matrix lj_virial; - //--------------------------------------------------- - }; -} + double lj_potential = 0.0; + ModuleBase::matrix lj_force; + ModuleBase::matrix lj_virial; + //--------------------------------------------------- +}; +} // namespace ModuleESolver #endif diff --git a/source/source_esolver/esolver_nep.cpp b/source/source_esolver/esolver_nep.cpp index 8944776aaa6..c4779539ee8 100644 --- a/source/source_esolver/esolver_nep.cpp +++ b/source/source_esolver/esolver_nep.cpp @@ -28,35 +28,37 @@ using namespace ModuleESolver; -void ESolver_NEP::before_all_runners(UnitCell& ucell, const Input_para& inp) -{ +void + ESolver_NEP::before_all_runners (UnitCell& ucell, const Input_para& inp) +{ nep_potential = 0.0; - nep_force.create(ucell.nat, 3); - nep_virial.create(3, 3); - atype.resize(ucell.nat); - _e.resize(ucell.nat); - _f.resize(3 * ucell.nat); - _v.resize(9 * ucell.nat); - - ModuleIO::CifParser::write(PARAM.globalv.global_out_dir + "STRU.cif", - ucell, - "# Generated by ABACUS ModuleIO::CifParser", - "data_?"); + nep_force.create (ucell.nat, 3); + nep_virial.create (3, 3); + atype.resize (ucell.nat); + _e.resize (ucell.nat); + _f.resize (3 * ucell.nat); + _v.resize (9 * ucell.nat); + + ModuleIO::CifParser::write (PARAM.globalv.global_out_dir + "STRU.cif", + ucell, + "# Generated by ABACUS ModuleIO::CifParser", + "data_?"); #ifdef __NEP /// determine the type map from STRU to NEP model - type_map(ucell); + type_map (ucell); #endif } -void ESolver_NEP::runner(UnitCell& ucell, const int istep) +void + ESolver_NEP::runner (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_NEP", "runner"); - ModuleBase::timer::start("ESolver_NEP", "runner"); + ModuleBase::TITLE ("ESolver_NEP", "runner"); + ModuleBase::timer::start ("ESolver_NEP", "runner"); // note that NEP are column major, thus a transpose is needed // cell - std::vector cell(9, 0.0); + std::vector cell (9, 0.0); cell[0] = ucell.latvec.e11 * ucell.lat0_angstrom; cell[1] = ucell.latvec.e21 * ucell.lat0_angstrom; cell[2] = ucell.latvec.e31 * ucell.lat0_angstrom; @@ -68,147 +70,152 @@ void ESolver_NEP::runner(UnitCell& ucell, const int istep) cell[8] = ucell.latvec.e33 * ucell.lat0_angstrom; // coord - std::vector coord(3 * ucell.nat, 0.0); + std::vector coord (3 * ucell.nat, 0.0); int iat = 0; const int nat = ucell.nat; for (int it = 0; it < ucell.ntype; ++it) - { - for (int ia = 0; ia < ucell.atoms[it].na; ++ia) { - coord[iat] = ucell.atoms[it].tau[ia].x * ucell.lat0_angstrom; - coord[iat + nat] = ucell.atoms[it].tau[ia].y * ucell.lat0_angstrom; - coord[iat + 2 * nat] = ucell.atoms[it].tau[ia].z * ucell.lat0_angstrom; - iat++; + for (int ia = 0; ia < ucell.atoms[it].na; ++ia) + { + coord[iat] = ucell.atoms[it].tau[ia].x * ucell.lat0_angstrom; + coord[iat + nat] = ucell.atoms[it].tau[ia].y * ucell.lat0_angstrom; + coord[iat + 2 * nat] = ucell.atoms[it].tau[ia].z * ucell.lat0_angstrom; + iat++; + } } - } - assert(ucell.nat == iat); + assert (ucell.nat == iat); #ifdef __NEP nep_potential = 0.0; - nep_force.zero_out(); - nep_virial.zero_out(); + nep_force.zero_out (); + nep_virial.zero_out (); - nep.compute(atype, cell, coord, _e, _f, _v); + nep.compute (atype, cell, coord, _e, _f, _v); // unit conversion const double fact_e = 1.0 / ModuleBase::Ry_to_eV; const double fact_f = 1.0 / (ModuleBase::Ry_to_eV * ModuleBase::ANGSTROM_AU); const double fact_v = 1.0 / (ucell.omega * ModuleBase::Ry_to_eV); - // potential energy - nep_potential = fact_e * std::accumulate(_e.begin(), _e.end(), 0.0) ; - GlobalV::ofs_running << " #TOTAL ENERGY# " << std::setprecision(11) << nep_potential * ModuleBase::Ry_to_eV << " eV" - << std::endl; - + nep_potential = fact_e * std::accumulate (_e.begin (), _e.end (), 0.0); + GlobalV::ofs_running << " #TOTAL ENERGY# " << std::setprecision (11) << nep_potential * ModuleBase::Ry_to_eV + << " eV" << std::endl; + // forces for (int i = 0; i < nat; ++i) - { - nep_force(i, 0) = _f[i] * fact_f; - nep_force(i, 1) = _f[i + nat] * fact_f; - nep_force(i, 2) = _f[i + 2 * nat] * fact_f; - } + { + nep_force (i, 0) = _f[i] * fact_f; + nep_force (i, 1) = _f[i + nat] * fact_f; + nep_force (i, 2) = _f[i + 2 * nat] * fact_f; + } // virial - std::vector v_sum(9, 0.0); + std::vector v_sum (9, 0.0); for (int j = 0; j < 9; ++j) - { - for (int i = 0; i < nat; ++i) { - int index = j * nat + i; - v_sum[j] += _v[index]; + for (int i = 0; i < nat; ++i) + { + int index = j * nat + i; + v_sum[j] += _v[index]; + } } - } // virial -> stress for (int i = 0; i < 3; ++i) - { - for (int j = 0; j < 3; ++j) { - nep_virial(i, j) = v_sum[3 * i + j] * fact_v; + for (int j = 0; j < 3; ++j) + { + nep_virial (i, j) = v_sum[3 * i + j] * fact_v; + } } - } #else - ModuleBase::WARNING_QUIT("ESolver_NEP", "Please recompile with -D__NEP"); + ModuleBase::WARNING_QUIT ("ESolver_NEP", "Please recompile with -D__NEP"); #endif - ModuleBase::timer::end("ESolver_NEP", "runner"); + ModuleBase::timer::end ("ESolver_NEP", "runner"); } -double ESolver_NEP::cal_energy() +double + ESolver_NEP::cal_energy () { return nep_potential; } -void ESolver_NEP::cal_force(UnitCell& ucell, ModuleBase::matrix& force) +void + ESolver_NEP::cal_force (UnitCell& ucell, ModuleBase::matrix& force) { force = nep_force; - ModuleIO::print_force(GlobalV::ofs_running, ucell, "TOTAL-FORCE (eV/Angstrom)", force, false); + ModuleIO::print_force (GlobalV::ofs_running, ucell, "TOTAL-FORCE (eV/Angstrom)", force, false); } -void ESolver_NEP::cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) +void + ESolver_NEP::cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) { stress = nep_virial; - ModuleIO::print_stress("TOTAL-STRESS", stress, true, false, GlobalV::ofs_running); + ModuleIO::print_stress ("TOTAL-STRESS", stress, true, false, GlobalV::ofs_running); // external stress - double unit_transform = ModuleBase::RYDBERG_SI / pow(ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; + double unit_transform = ModuleBase::RYDBERG_SI / pow (ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; double external_stress[3] = {PARAM.inp.press1, PARAM.inp.press2, PARAM.inp.press3}; for (int i = 0; i < 3; i++) - { - stress(i, i) -= external_stress[i] / unit_transform; - } + { + stress (i, i) -= external_stress[i] / unit_transform; + } } -void ESolver_NEP::after_all_runners(UnitCell& ucell) +void + ESolver_NEP::after_all_runners (UnitCell& ucell) { GlobalV::ofs_running << "\n --------------------------------------------" << std::endl; - GlobalV::ofs_running << std::setprecision(16); + GlobalV::ofs_running << std::setprecision (16); GlobalV::ofs_running << " !FINAL_ETOT_IS " << nep_potential * ModuleBase::Ry_to_eV << " eV" << std::endl; GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; } #ifdef __NEP -void ESolver_NEP::type_map(const UnitCell& ucell) -{ +void + ESolver_NEP::type_map (const UnitCell& ucell) +{ // parse the element list from NEP model file std::unordered_map label; std::string temp; - for (int i = 0; i < nep.element_list.size(); ++i) - { - label[nep.element_list[i]] = i; //> label: map from element string to index int. - } + for (int i = 0; i < nep.element_list.size (); ++i) + { + label[nep.element_list[i]] = i; //> label: map from element string to index int. + } std::cout << "\n Element list of model file " << nep_file << " " << std::endl; std::cout << " ----------------------------------------------------------------"; int count = 0; - for (auto it = label.begin(); it != label.end(); ++it) - { - if (count % 5 == 0) + for (auto it = label.begin (); it != label.end (); ++it) { - std::cout << std::endl; - std::cout << " "; + if (count % 5 == 0) + { + std::cout << std::endl; + std::cout << " "; + } + count++; + temp = it->first + ": " + std::to_string (it->second); + std::cout << std::left << std::setw (10) << temp; } - count++; - temp = it->first + ": " + std::to_string(it->second); - std::cout << std::left << std::setw(10) << temp; - } std::cout << "\n -----------------------------------------------------------------" << std::endl; // parse the atype based on the element list int iat = 0; for (int it = 0; it < ucell.ntype; ++it) - { - for (int ia = 0; ia < ucell.atoms[it].na; ++ia) { - if (label.find(ucell.atoms[it].label) == label.end()) - { - ModuleBase::WARNING_QUIT("ESolver_NEP", - "The label " + ucell.atoms[it].label + " is not found in the type map."); - } - atype[iat] = label[ucell.atoms[it].label]; - iat++; + for (int ia = 0; ia < ucell.atoms[it].na; ++ia) + { + if (label.find (ucell.atoms[it].label) == label.end ()) + { + ModuleBase::WARNING_QUIT ("ESolver_NEP", + "The label " + ucell.atoms[it].label + + " is not found in the type map."); + } + atype[iat] = label[ucell.atoms[it].label]; + iat++; + } } - } - assert(ucell.nat == iat); + assert (ucell.nat == iat); } #endif diff --git a/source/source_esolver/esolver_nep.h b/source/source_esolver/esolver_nep.h index dfec17a83c2..5284017eb85 100644 --- a/source/source_esolver/esolver_nep.h +++ b/source/source_esolver/esolver_nep.h @@ -15,17 +15,17 @@ class ESolver_NEP : public ESolver { public: #ifdef __NEP - ESolver_NEP(const std::string& pot_file): nep(pot_file) - { - classname = "ESolver_NEP"; - nep_file = pot_file; - } + ESolver_NEP (const std::string& pot_file) : nep (pot_file) + { + classname = "ESolver_NEP"; + nep_file = pot_file; + } #else - ESolver_NEP(const std::string& pot_file) - { - classname = "ESolver_NEP"; - nep_file = pot_file; - } + ESolver_NEP (const std::string& pot_file) + { + classname = "ESolver_NEP"; + nep_file = pot_file; + } #endif /** @@ -34,15 +34,15 @@ class ESolver_NEP : public ESolver * @param inp input parameters * @param cell unitcell information */ - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; - + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; + /** * @brief Run the NEP solver for a given ion/md step and unit cell * * @param istep the current ion/md step * @param cell unitcell information */ - void runner(UnitCell& ucell, const int istep) override; + void runner (UnitCell& ucell, const int istep) override; /** * @brief get the total energy without ion kinetic energy @@ -50,28 +50,28 @@ class ESolver_NEP : public ESolver * @param etot the computed energy * @return total energy without ion kinetic energy */ - double cal_energy() override; + double cal_energy () override; /** * @brief get the computed atomic forces * * @param force the computed atomic forces */ - void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; /** * @brief get the computed lattice virials * * @param stress the computed lattice virials */ - void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) override; + void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) override; /** * @brief Prints the final total energy of the NEP model to the output file * * This function prints the final total energy of the NEP model in eV to the output file along with some formatting. */ - void after_all_runners(UnitCell& ucell) override; + void after_all_runners (UnitCell& ucell) override; private: /** @@ -79,7 +79,7 @@ class ESolver_NEP : public ESolver * * @param ucell unitcell information */ - void type_map(const UnitCell& ucell); + void type_map (const UnitCell& ucell); /** * @brief NEP related variables for ESolver_NEP class @@ -93,14 +93,14 @@ class ESolver_NEP : public ESolver NEP nep; ///< NEP object for NEP calculations #endif - std::string nep_file; ///< directory of NEP model file - std::vector atype = {}; ///< atom type mapping for NEP model - double nep_potential; ///< computed potential energy - ModuleBase::matrix nep_force; ///< computed atomic forces - ModuleBase::matrix nep_virial; ///< computed lattice virials - std::vector _e; ///< temporary storage for energy computation - std::vector _f; ///< temporary storage for force computation - std::vector _v; ///< temporary storage for virial computation + std::string nep_file; ///< directory of NEP model file + std::vector atype = {}; ///< atom type mapping for NEP model + double nep_potential; ///< computed potential energy + ModuleBase::matrix nep_force; ///< computed atomic forces + ModuleBase::matrix nep_virial; ///< computed lattice virials + std::vector _e; ///< temporary storage for energy computation + std::vector _f; ///< temporary storage for force computation + std::vector _v; ///< temporary storage for virial computation }; } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_of.cpp b/source/source_esolver/esolver_of.cpp index bf6f7ffde29..7ae0da8ccf0 100644 --- a/source/source_esolver/esolver_of.cpp +++ b/source/source_esolver/esolver_of.cpp @@ -15,17 +15,16 @@ #include "source_pw/module_ofdft/of_print_info.h" #include "source_hamilt/module_xc/xc_functional.h" - namespace ModuleESolver { -ESolver_OF::ESolver_OF() +ESolver_OF::ESolver_OF () { this->classname = "ESolver_OF"; this->task_ = new char[60]; } -ESolver_OF::~ESolver_OF() +ESolver_OF::~ESolver_OF () { //**************************************************** // do not add any codes in this deconstructor funcion @@ -34,12 +33,12 @@ ESolver_OF::~ESolver_OF() delete[] this->pphi_; for (int i = 0; i < PARAM.inp.nspin; ++i) - { - delete[] this->pdirect_[i]; - delete[] this->pdLdphi_[i]; - delete[] this->pdEdphi_[i]; - delete[] this->precip_dir_[i]; - } + { + delete[] this->pdirect_[i]; + delete[] this->pdLdphi_[i]; + delete[] this->pdEdphi_[i]; + delete[] this->precip_dir_[i]; + } delete[] this->pdirect_; delete[] this->pdLdphi_; delete[] this->pdEdphi_; @@ -58,9 +57,10 @@ ESolver_OF::~ESolver_OF() delete this->opt_cg_mag_; } -void ESolver_OF::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_OF::before_all_runners (UnitCell& ucell, const Input_para& inp) { - ESolver_FP::before_all_runners(ucell, inp); + ESolver_FP::before_all_runners (ucell, inp); // save necessary parameters this->of_kinetic_ = inp.of_kinetic; @@ -71,105 +71,104 @@ void ESolver_OF::before_all_runners(UnitCell& ucell, const Input_para& inp) this->max_iter_ = inp.scf_nmax; this->dV_ = ucell.omega / this->pw_rho->nxyz; this->bound_cal_potential_ - = std::bind(&ESolver_OF::cal_potential, this, std::placeholders::_1, std::placeholders::_2, std::ref(ucell)); + = std::bind (&ESolver_OF::cal_potential, this, std::placeholders::_1, std::placeholders::_2, std::ref (ucell)); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "SETUP UNITCELL"); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "SETUP UNITCELL"); -// XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); - int func_type = XC_Functional::get_func_type(); + // XC_Functional::set_xc_type(ucell.atoms[0].ncpp.xc_func); + int func_type = XC_Functional::get_func_type (); if (func_type > 2) - { - ModuleBase::WARNING_QUIT("esolver_of", "meta-GGA and Hybrid functionals are not supported by OFDFT."); - } + { + ModuleBase::WARNING_QUIT ("esolver_of", "meta-GGA and Hybrid functionals are not supported by OFDFT."); + } - this->chr.init_rho(ucell, this->Pgrid, this->sf.strucFac, ucell.symm, &this->kv); - this->chr.check_rho(); // check the rho + this->chr.init_rho (ucell, this->Pgrid, this->sf.strucFac, ucell.symm, &this->kv); + this->chr.check_rho (); // check the rho // initialize local pseudopotential - this->locpp.init_vloc(ucell,pw_rho); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "LOCAL POTENTIAL"); - + this->locpp.init_vloc (ucell, pw_rho); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "LOCAL POTENTIAL"); // initialize elecstate, including potential - this->init_elecstate(ucell); + this->init_elecstate (ucell); // calculate the total local pseudopotential in real space - this->pelec->init_scf(ucell, Pgrid, sf.strucFac, locpp.numeric, ucell.symm); // atomic_rho, v_of_rho, set_vrs + this->pelec->init_scf (ucell, Pgrid, sf.strucFac, locpp.numeric, ucell.symm); // atomic_rho, v_of_rho, set_vrs // liuyu move here 2023-10-09 // D in uspp need vloc, thus behind init_scf() // calculate the effective coefficient matrix for non-local pseudopotential projectors - ModuleBase::matrix veff = this->pelec->pot->get_eff_v(); + ModuleBase::matrix veff = this->pelec->pot->get_eff_v (); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT POTENTIAL"); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT POTENTIAL"); // Initialize KEDF // Calculate electron numbers, which will be used to initialize WT KEDF this->nelec_ = new double[inp.nspin]; if (inp.nspin == 1) - { - this->nelec_[0] = inp.nelec; - } + { + this->nelec_[0] = inp.nelec; + } else if (inp.nspin == 2) - { - // in fact, nelec_spin will not be used anymore - this->pelec->init_nelec_spin(); - this->nelec_[0] = this->pelec->nelec_spin[0]; - this->nelec_[1] = this->pelec->nelec_spin[1]; - } + { + // in fact, nelec_spin will not be used anymore + this->pelec->init_nelec_spin (); + this->nelec_[0] = this->pelec->nelec_spin[0]; + this->nelec_[1] = this->pelec->nelec_spin[1]; + } delete[] this->kedf_manager_; - this->kedf_manager_ = new KEDF_Manager(); - this->kedf_manager_->init(inp, this->pw_rho, this->dV_, this->nelec_[0]); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT KEDF"); + this->kedf_manager_ = new KEDF_Manager (); + this->kedf_manager_->init (inp, this->pw_rho, this->dV_, this->nelec_[0]); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT KEDF"); // Initialize optimization methods - this->init_opt(); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT OPTIMIZATION"); + this->init_opt (); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT OPTIMIZATION"); - this->allocate_array(); + this->allocate_array (); } -void ESolver_OF::runner(UnitCell& ucell, const int istep) +void + ESolver_OF::runner (UnitCell& ucell, const int istep) { - ModuleBase::timer::start("ESolver_OF", "runner"); + ModuleBase::timer::start ("ESolver_OF", "runner"); // get Ewald energy, initial rho and phi if necessary - this->before_opt(istep, ucell); + this->before_opt (istep, ucell); this->iter_ = 0; - bool conv_esolver = false; // this conv_esolver is added by mohan 20250302 - this->iter_time = ModuleBase::get_time(); + bool conv_esolver = false; // this conv_esolver is added by mohan 20250302 + this->iter_time = ModuleBase::get_time (); while (true) - { - // once we get a new rho and phi, update potential - this->update_potential(ucell); - - // calculate the energy of new rho and phi - this->energy_llast_ = this->energy_last_; - this->energy_last_ = this->energy_current_; - this->energy_current_ = this->cal_energy(); + { + // once we get a new rho and phi, update potential + this->update_potential (ucell); + // calculate the energy of new rho and phi + this->energy_llast_ = this->energy_last_; + this->energy_last_ = this->energy_current_; + this->energy_current_ = this->cal_energy (); - // check if the job is done - if (this->check_exit(conv_esolver)) - { - break; - } + // check if the job is done + if (this->check_exit (conv_esolver)) + { + break; + } - // find the optimization direction and step lenghth theta according to the potential - this->optimize(ucell); + // find the optimization direction and step lenghth theta according to the potential + this->optimize (ucell); - // update the rho and phi based on the direction and theta - this->update_rho(); + // update the rho and phi based on the direction and theta + this->update_rho (); - this->iter_++; + this->iter_++; - ESolver_FP::iter_finish(ucell, istep, this->iter_, conv_esolver); - } + ESolver_FP::iter_finish (ucell, istep, this->iter_, conv_esolver); + } - this->after_opt(istep, ucell, conv_esolver); + this->after_opt (istep, ucell, conv_esolver); - ModuleBase::timer::end("ESolver_OF", "runner"); + ModuleBase::timer::end ("ESolver_OF", "runner"); } /** @@ -180,97 +179,95 @@ void ESolver_OF::runner(UnitCell& ucell, const int istep) * @param istep * @param ucell */ -void ESolver_OF::before_opt(const int istep, UnitCell& ucell) +void + ESolver_OF::before_opt (const int istep, UnitCell& ucell) { - ModuleBase::TITLE("ESolver_OF", "before_opt"); - ModuleBase::timer::start("ESolver_OF", "before_opt"); + ModuleBase::TITLE ("ESolver_OF", "before_opt"); + ModuleBase::timer::start ("ESolver_OF", "before_opt"); //! 1) call before_scf() of ESolver_FP - ESolver_FP::before_scf(ucell, istep); - - + ESolver_FP::before_scf (ucell, istep); if (ucell.cell_parameter_updated) - { - this->dV_ = ucell.omega / this->pw_rho->nxyz; - - // initialize elecstate, including potential - this->init_elecstate(ucell); - - // Initialize KEDF - this->kedf_manager_->init(PARAM.inp, this->pw_rho, this->dV_, this->nelec_[0]); - - // Initialize optimization methods - this->init_opt(); - - // Refresh the arrays - delete this->psi_; - this->psi_ = new psi::Psi(1, PARAM.inp.nspin, - this->pw_rho->nrxx, this->pw_rho->nrxx, true); - - for (int is = 0; is < PARAM.inp.nspin; ++is) { - this->pphi_[is] = this->psi_->get_pointer(is); + this->dV_ = ucell.omega / this->pw_rho->nxyz; + + // initialize elecstate, including potential + this->init_elecstate (ucell); + + // Initialize KEDF + this->kedf_manager_->init (PARAM.inp, this->pw_rho, this->dV_, this->nelec_[0]); + + // Initialize optimization methods + this->init_opt (); + + // Refresh the arrays + delete this->psi_; + this->psi_ = new psi::Psi (1, PARAM.inp.nspin, this->pw_rho->nrxx, this->pw_rho->nrxx, true); + + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + this->pphi_[is] = this->psi_->get_pointer (is); + } + + delete this->ptemp_rho_; + this->ptemp_rho_ = new Charge (); + this->ptemp_rho_->set_rhopw (this->pw_rho); + const bool kin_den = this->ptemp_rho_->kin_density (); // mohan add 20251202 + this->ptemp_rho_->allocate (PARAM.inp.nspin, kin_den); + + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + delete[] this->pdLdphi_[is]; + delete[] this->pdEdphi_[is]; + delete[] this->pdirect_[is]; + delete[] this->precip_dir_[is]; + this->pdLdphi_[is] = new double[this->pw_rho->nrxx]; + this->pdEdphi_[is] = new double[this->pw_rho->nrxx]; + this->pdirect_[is] = new double[this->pw_rho->nrxx]; + this->precip_dir_[is] = new std::complex[pw_rho->npw]; + } } - delete this->ptemp_rho_; - this->ptemp_rho_ = new Charge(); - this->ptemp_rho_->set_rhopw(this->pw_rho); - const bool kin_den = this->ptemp_rho_->kin_density(); // mohan add 20251202 - this->ptemp_rho_->allocate(PARAM.inp.nspin, kin_den); + this->pelec->init_scf (ucell, Pgrid, sf.strucFac, locpp.numeric, ucell.symm); - for (int is = 0; is < PARAM.inp.nspin; ++is) + Symmetry_rho::symmetrize_rho (PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); + + for (int is = 0; is < PARAM.inp.nspin; ++is) { - delete[] this->pdLdphi_[is]; - delete[] this->pdEdphi_[is]; - delete[] this->pdirect_[is]; - delete[] this->precip_dir_[is]; - this->pdLdphi_[is] = new double[this->pw_rho->nrxx]; - this->pdEdphi_[is] = new double[this->pw_rho->nrxx]; - this->pdirect_[is] = new double[this->pw_rho->nrxx]; - this->precip_dir_[is] = new std::complex[pw_rho->npw]; + if (PARAM.inp.init_chg != "file") + { + for (int ibs = 0; ibs < this->pw_rho->nrxx; ++ibs) + { + // Here we initialize rho to be uniform, + // because the rho got by pot.init_pot -> Charge::atomic_rho may contain minus elements. + this->chr.rho[is][ibs] = this->nelec_[is] / ucell.omega; + this->pphi_[is][ibs] = sqrt (this->chr.rho[is][ibs]); + } + } + else + { + for (int ibs = 0; ibs < this->pw_rho->nrxx; ++ibs) + { + this->pphi_[is][ibs] = sqrt (this->chr.rho[is][ibs]); + } + } } - } - - this->pelec->init_scf(ucell, Pgrid, sf.strucFac, locpp.numeric, ucell.symm); - - Symmetry_rho::symmetrize_rho(PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); for (int is = 0; is < PARAM.inp.nspin; ++is) - { - if (PARAM.inp.init_chg != "file") { - for (int ibs = 0; ibs < this->pw_rho->nrxx; ++ibs) - { - // Here we initialize rho to be uniform, - // because the rho got by pot.init_pot -> Charge::atomic_rho may contain minus elements. - this->chr.rho[is][ibs] = this->nelec_[is] / ucell.omega; - this->pphi_[is][ibs] = sqrt(this->chr.rho[is][ibs]); - } + this->pelec->eferm.set_efval (is, 0); + this->theta_[is] = 0.; + ModuleBase::GlobalFunc::ZEROS (this->pdLdphi_[is], this->pw_rho->nrxx); + ModuleBase::GlobalFunc::ZEROS (this->pdEdphi_[is], this->pw_rho->nrxx); + ModuleBase::GlobalFunc::ZEROS (this->pdirect_[is], this->pw_rho->nrxx); } - else + if (PARAM.inp.nspin == 1) { - for (int ibs = 0; ibs < this->pw_rho->nrxx; ++ibs) - { - this->pphi_[is][ibs] = sqrt(this->chr.rho[is][ibs]); - } + this->theta_[0] = 0.2; } - } - for (int is = 0; is < PARAM.inp.nspin; ++is) - { - this->pelec->eferm.set_efval(is, 0); - this->theta_[is] = 0.; - ModuleBase::GlobalFunc::ZEROS(this->pdLdphi_[is], this->pw_rho->nrxx); - ModuleBase::GlobalFunc::ZEROS(this->pdEdphi_[is], this->pw_rho->nrxx); - ModuleBase::GlobalFunc::ZEROS(this->pdirect_[is], this->pw_rho->nrxx); - } - if (PARAM.inp.nspin == 1) - { - this->theta_[0] = 0.2; - } - - ModuleBase::timer::end("ESolver_OF", "before_opt"); + ModuleBase::timer::end ("ESolver_OF", "before_opt"); } /** @@ -279,30 +276,31 @@ void ESolver_OF::before_opt(const int istep, UnitCell& ucell) * * @param ucell */ -void ESolver_OF::update_potential(UnitCell& ucell) +void + ESolver_OF::update_potential (UnitCell& ucell) { // (1) get dL/dphi - elecstate::cal_ux(ucell); + elecstate::cal_ux (ucell); - this->pelec->pot->update_from_charge(&this->chr, &ucell); // Hartree + XC + external - this->kedf_manager_->get_potential(this->chr.rho, - this->pphi_, - this->pw_rho, - this->pelec->pot->get_eff_v()); // KEDF potential + this->pelec->pot->update_from_charge (&this->chr, &ucell); // Hartree + XC + external + this->kedf_manager_->get_potential (this->chr.rho, + this->pphi_, + this->pw_rho, + this->pelec->pot->get_eff_v ()); // KEDF potential for (int is = 0; is < PARAM.inp.nspin; ++is) - { - const double* vr_eff = this->pelec->pot->get_eff_v(is); - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) - { - this->pdEdphi_[is][ir] = vr_eff[ir]; - } - this->pelec->eferm.set_efval(is, this->cal_mu(this->pphi_[is], this->pdEdphi_[is], this->nelec_[is])); - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) { - this->pdLdphi_[is][ir] - = this->pdEdphi_[is][ir] - 2. * this->pelec->eferm.get_efval(is) * this->pphi_[is][ir]; + const double* vr_eff = this->pelec->pot->get_eff_v (is); + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + this->pdEdphi_[is][ir] = vr_eff[ir]; + } + this->pelec->eferm.set_efval (is, this->cal_mu (this->pphi_[is], this->pdEdphi_[is], this->nelec_[is])); + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + this->pdLdphi_[is][ir] + = this->pdEdphi_[is][ir] - 2. * this->pelec->eferm.get_efval (is) * this->pphi_[is][ir]; + } } - } // (2) get the norm of dLdphi // ===== temporary solution of potential convergence when of_full_pw = 0 ===== @@ -312,11 +310,11 @@ void ESolver_OF::update_potential(UnitCell& ucell) this->normdLdphi_ = 0.; for (int is = 0; is < PARAM.inp.nspin; ++is) - { - this->normdLdphi_ += this->inner_product(this->pdLdphi_[is], this->pdLdphi_[is], this->pw_rho->nrxx, 1.0); - } - Parallel_Reduce::reduce_all(this->normdLdphi_); - this->normdLdphi_ = sqrt(this->normdLdphi_ / this->pw_rho->nxyz / PARAM.inp.nspin); + { + this->normdLdphi_ += this->inner_product (this->pdLdphi_[is], this->pdLdphi_[is], this->pw_rho->nrxx, 1.0); + } + Parallel_Reduce::reduce_all (this->normdLdphi_); + this->normdLdphi_ = sqrt (this->normdLdphi_ / this->pw_rho->nxyz / PARAM.inp.nspin); } /** @@ -324,39 +322,40 @@ void ESolver_OF::update_potential(UnitCell& ucell) * * @param ucell */ -void ESolver_OF::optimize(UnitCell& ucell) +void + ESolver_OF::optimize (UnitCell& ucell) { // (1) get |d0> with optimization algorithm - this->get_direction(ucell); + this->get_direction (ucell); // initialize temp_phi and temp_rho used in line search double** ptemp_phi = new double*[PARAM.inp.nspin]; for (int is = 0; is < PARAM.inp.nspin; ++is) - { - ptemp_phi[is] = new double[this->pw_rho->nrxx]; - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) { - ptemp_phi[is][ir] = this->pphi_[is][ir]; - this->ptemp_rho_->rho[is][ir] = ptemp_phi[is][ir] * ptemp_phi[is][ir]; + ptemp_phi[is] = new double[this->pw_rho->nrxx]; + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + ptemp_phi[is][ir] = this->pphi_[is][ir]; + this->ptemp_rho_->rho[is][ir] = ptemp_phi[is][ir] * ptemp_phi[is][ir]; + } } - } // (2) rotate and renormalize the direction - this->adjust_direction(); + this->adjust_direction (); // (3) make sure that dEdtheta<0 at theta = 0 double* dEdtheta = new double[PARAM.inp.nspin]; // dE/dtheta of tempPhi - ModuleBase::GlobalFunc::ZEROS(dEdtheta, PARAM.inp.nspin); + ModuleBase::GlobalFunc::ZEROS (dEdtheta, PARAM.inp.nspin); - this->check_direction(dEdtheta, ptemp_phi, ucell); + this->check_direction (dEdtheta, ptemp_phi, ucell); // this->test_direction(dEdtheta, ptemp_phi, ucell); // (4) call line search to find the best theta (step length) - this->get_step_length(dEdtheta, ptemp_phi, ucell); + this->get_step_length (dEdtheta, ptemp_phi, ucell); for (int is = 0; is < PARAM.inp.nspin; ++is) - { - delete[] ptemp_phi[is]; - } + { + delete[] ptemp_phi[is]; + } delete[] ptemp_phi; delete[] dEdtheta; } @@ -366,17 +365,18 @@ void ESolver_OF::optimize(UnitCell& ucell) * phi = cos(theta) * phi + sin(theta) * direction, * rho = phi^2 */ -void ESolver_OF::update_rho() +void + ESolver_OF::update_rho () { for (int is = 0; is < PARAM.inp.nspin; ++is) - { - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) { - this->pphi_[is][ir] - = this->pphi_[is][ir] * cos(this->theta_[is]) + this->pdirect_[is][ir] * sin(this->theta_[is]); - this->chr.rho[is][ir] = this->pphi_[is][ir] * this->pphi_[is][ir]; + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + this->pphi_[is][ir] = this->pphi_[is][ir] * cos (this->theta_[is]) + + this->pdirect_[is][ir] * sin (this->theta_[is]); + this->chr.rho[is][ir] = this->pphi_[is][ir] * this->pphi_[is][ir]; + } } - } // // ------------ turn on symmetry may cause instability in optimization ------------ // if (ModuleSymmetry::Symmetry::symm_flag == 1) // { @@ -399,7 +399,8 @@ void ESolver_OF::update_rho() * * @return exit or not */ -bool ESolver_OF::check_exit(bool& conv_esolver) +bool + ESolver_OF::check_exit (bool& conv_esolver) { conv_esolver = false; bool potConv = false; @@ -407,45 +408,52 @@ bool ESolver_OF::check_exit(bool& conv_esolver) bool energyConv = false; if (this->normdLdphi_ < this->of_tolp_) - { - potConv = true; - } - if (this->iter_ >= 3 && std::abs(this->normdLdphi_ - this->normdLdphi_last_) < 1e-10 - && std::abs(this->normdLdphi_ - this->normdLdphi_llast_) < 1e-10) - { - potHold = true; - } - - if (this->iter_ >= 3 && std::abs(this->energy_current_ - this->energy_last_) < this->of_tole_ - && std::abs(this->energy_current_ - this->energy_llast_) < this->of_tole_) - { - energyConv = true; - } + { + potConv = true; + } + if (this->iter_ >= 3 && std::abs (this->normdLdphi_ - this->normdLdphi_last_) < 1e-10 + && std::abs (this->normdLdphi_ - this->normdLdphi_llast_) < 1e-10) + { + potHold = true; + } + + if (this->iter_ >= 3 && std::abs (this->energy_current_ - this->energy_last_) < this->of_tole_ + && std::abs (this->energy_current_ - this->energy_llast_) < this->of_tole_) + { + energyConv = true; + } conv_esolver = (this->of_conv_ == "energy" && energyConv) || (this->of_conv_ == "potential" && potConv) - || (this->of_conv_ == "both" && potConv && energyConv); + || (this->of_conv_ == "both" && potConv && energyConv); - OFDFT::print_info(this->iter_, this->iter_time, this->energy_current_, this->energy_last_, - this->normdLdphi_, this->pelec, this->kedf_manager_, conv_esolver); + OFDFT::print_info (this->iter_, + this->iter_time, + this->energy_current_, + this->energy_last_, + this->normdLdphi_, + this->pelec, + this->kedf_manager_, + conv_esolver); if (conv_esolver || this->iter_ >= this->max_iter_) - { - return true; - } + { + return true; + } // ============ temporary solution of potential convergence =========== else if (this->of_conv_ == "potential" && potHold) - { - GlobalV::ofs_warning << "ESolver_OF WARNING: " - << "The convergence of potential has not been reached, but the norm of potential nearly " - "remains unchanged, set of_full_pw = 1 may work." - << std::endl; - return true; - } + { + GlobalV::ofs_warning + << "ESolver_OF WARNING: " + << "The convergence of potential has not been reached, but the norm of potential nearly " + "remains unchanged, set of_full_pw = 1 may work." + << std::endl; + return true; + } // ==================================================================== else - { - return false; - } + { + return false; + } } /** @@ -454,64 +462,66 @@ bool ESolver_OF::check_exit(bool& conv_esolver) * @param istep * @param ucell */ -void ESolver_OF::after_opt(const int istep, UnitCell& ucell, const bool conv_esolver) +void + ESolver_OF::after_opt (const int istep, UnitCell& ucell, const bool conv_esolver) { - ModuleBase::TITLE("ESolver_OF", "after_opt"); - ModuleBase::timer::start("ESolver_OF", "after_opt"); + ModuleBase::TITLE ("ESolver_OF", "after_opt"); + ModuleBase::timer::start ("ESolver_OF", "after_opt"); //------------------------------------------------------------------ // 1) calculate kinetic energy density and ELF //------------------------------------------------------------------ if (PARAM.inp.out_elf[0] > 0) - { - this->kedf_manager_->get_energy_density(this->chr.rho, this->pphi_, this->pw_rho, this->chr.kin_r); - } + { + this->kedf_manager_->get_energy_density (this->chr.rho, this->pphi_, this->pw_rho, this->chr.kin_r); + } // should not be here? mohan note 2025-03-03 for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) - { - this->chr.rho_save[0][ir] = this->chr.rho[0][ir]; - } + { + this->chr.rho_save[0][ir] = this->chr.rho[0][ir]; + } //------------------------------------------------------------------ // 2) call after_scf() of ESolver_FP //------------------------------------------------------------------ - ESolver_FP::after_scf(ucell, istep, conv_esolver); + ESolver_FP::after_scf (ucell, istep, conv_esolver); #ifdef __MLALGO //------------------------------------------------------------------ // Generate data if needed //------------------------------------------------------------------ if (PARAM.inp.of_ml_gene_data) - { - this->pelec->pot->update_from_charge(&this->chr, &ucell); // Hartree + XC + external - this->kedf_manager_->get_potential(this->chr.rho, - this->pphi_, - this->pw_rho, - this->pelec->pot->get_eff_v()); // KEDF potential - - const double* vr_eff = this->pelec->pot->get_eff_v(0); - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) { - this->pdEdphi_[0][ir] = vr_eff[ir]; + this->pelec->pot->update_from_charge (&this->chr, &ucell); // Hartree + XC + external + this->kedf_manager_->get_potential (this->chr.rho, + this->pphi_, + this->pw_rho, + this->pelec->pot->get_eff_v ()); // KEDF potential + + const double* vr_eff = this->pelec->pot->get_eff_v (0); + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + this->pdEdphi_[0][ir] = vr_eff[ir]; + } + this->pelec->eferm.set_efval (0, this->cal_mu (this->pphi_[0], this->pdEdphi_[0], this->nelec_[0])); + + std::cout << "Generating Training data..." << std::endl; + std::cout << "mu = " << this->pelec->eferm.get_efval (0) << std::endl; + this->kedf_manager_->generate_ml_target (this->chr.rho, this->pw_rho, vr_eff); } - this->pelec->eferm.set_efval(0, this->cal_mu(this->pphi_[0], this->pdEdphi_[0], this->nelec_[0])); - - std::cout << "Generating Training data..." << std::endl; - std::cout << "mu = " << this->pelec->eferm.get_efval(0) << std::endl; - this->kedf_manager_->generate_ml_target(this->chr.rho, this->pw_rho, vr_eff); - } #endif - ModuleBase::timer::end("ESolver_OF", "after_opt"); + ModuleBase::timer::end ("ESolver_OF", "after_opt"); } /** * @brief Output the FINAL_ETOT */ -void ESolver_OF::after_all_runners(UnitCell& ucell) +void + ESolver_OF::after_all_runners (UnitCell& ucell) { - ESolver_FP::after_all_runners(ucell); + ESolver_FP::after_all_runners (ucell); } /** @@ -520,19 +530,20 @@ void ESolver_OF::after_all_runners(UnitCell& ucell) * * @return total energy */ -double ESolver_OF::cal_energy() +double + ESolver_OF::cal_energy () { - this->pelec->cal_energies(2); - double kinetic_energy = this->kedf_manager_->get_energy(); // kinetic energy - double pseudopot_energy = 0.; // electron-ion interaction energy + this->pelec->cal_energies (2); + double kinetic_energy = this->kedf_manager_->get_energy (); // kinetic energy + double pseudopot_energy = 0.; // electron-ion interaction energy for (int is = 0; is < PARAM.inp.nspin; ++is) - { - pseudopot_energy += this->inner_product(this->pelec->pot->get_fixed_v(), - this->chr.rho[is], - this->pw_rho->nrxx, - this->dV_); - } - Parallel_Reduce::reduce_pool(pseudopot_energy); + { + pseudopot_energy += this->inner_product (this->pelec->pot->get_fixed_v (), + this->chr.rho[is], + this->pw_rho->nrxx, + this->dV_); + } + Parallel_Reduce::reduce_pool (pseudopot_energy); this->pelec->f_en.ekinetic = kinetic_energy; this->pelec->f_en.e_local_pp = pseudopot_energy; this->pelec->f_en.etot += kinetic_energy + pseudopot_energy; @@ -544,13 +555,14 @@ double ESolver_OF::cal_energy() * * @param [out] force */ -void ESolver_OF::cal_force(UnitCell& ucell, ModuleBase::matrix& force) +void + ESolver_OF::cal_force (UnitCell& ucell, ModuleBase::matrix& force) { - Forces ff(ucell.nat); - + Forces ff (ucell.nat); + // here nullptr is for DFT+U, which may cause bugs, mohan note 2025-11-07 // solvent can be used? mohan ask 2025-11-07 - ff.cal_force(ucell, force, *pelec, this->pw_rho, &ucell.symm, &sf, this->solvent, nullptr, &this->locpp); + ff.cal_force (ucell, force, *pelec, this->pw_rho, &ucell.symm, &sf, this->solvent, nullptr, &this->locpp); } /** @@ -558,14 +570,18 @@ void ESolver_OF::cal_force(UnitCell& ucell, ModuleBase::matrix& force) * * @param [out] stress */ -void ESolver_OF::cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) +void + ESolver_OF::cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) { ModuleBase::matrix kinetic_stress_; - kinetic_stress_.create(3, 3); - this->kedf_manager_->get_stress(ucell.omega, this->chr.rho, - this->pphi_, this->pw_rho, kinetic_stress_); // kinetic stress - - OF_Stress_PW ss(this->pelec, this->pw_rho); - ss.cal_stress(stress, kinetic_stress_, ucell, &ucell.symm, this->locpp, &sf, &kv); + kinetic_stress_.create (3, 3); + this->kedf_manager_->get_stress (ucell.omega, + this->chr.rho, + this->pphi_, + this->pw_rho, + kinetic_stress_); // kinetic stress + + OF_Stress_PW ss (this->pelec, this->pw_rho); + ss.cal_stress (stress, kinetic_stress_, ucell, &ucell.symm, this->locpp, &sf, &kv); } } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_of.h b/source/source_esolver/esolver_of.h index df4b96543c6..d35d93ef791 100644 --- a/source/source_esolver/esolver_of.h +++ b/source/source_esolver/esolver_of.h @@ -12,20 +12,20 @@ namespace ModuleESolver class ESolver_OF : public ESolver_FP { public: - ESolver_OF(); - ~ESolver_OF(); + ESolver_OF (); + ~ESolver_OF (); - virtual void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + virtual void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - virtual void runner(UnitCell& ucell, const int istep) override; + virtual void runner (UnitCell& ucell, const int istep) override; - virtual void after_all_runners(UnitCell& ucell) override; + virtual void after_all_runners (UnitCell& ucell) override; - virtual double cal_energy() override; + virtual double cal_energy () override; - virtual void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + virtual void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; - virtual void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) override; + virtual void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) override; protected: // ======================= variables ========================== @@ -74,42 +74,43 @@ class ESolver_OF : public ESolver_FP double normdLdphi_ = 100.; // ==================== main process of OFDFT ====================== - void before_opt(const int istep, UnitCell& ucell); - void update_potential(UnitCell& ucell); - void optimize(UnitCell& ucell); - void update_rho(); - bool check_exit(bool& conv_esolver); - void after_opt(const int istep, UnitCell& ucell, const bool conv_esolver); + void before_opt (const int istep, UnitCell& ucell); + void update_potential (UnitCell& ucell); + void optimize (UnitCell& ucell); + void update_rho (); + bool check_exit (bool& conv_esolver); + void after_opt (const int istep, UnitCell& ucell, const bool conv_esolver); // ============================ tools =============================== // --------------------- initialize --------------------------------- - void init_elecstate(UnitCell& ucell); - void allocate_array(); + void init_elecstate (UnitCell& ucell); + void allocate_array (); // --------------------- calculate physical qualities --------------- - std::function bound_cal_potential_; - void cal_potential_wrapper(double* ptemp_phi, double* rdLdphi); - void cal_potential(double* ptemp_phi, double* rdLdphi, UnitCell& ucell); - void cal_dEdtheta(double** ptemp_phi, Charge* temp_rho, UnitCell& ucell, double* ptheta, double* rdEdtheta); - double cal_mu(double* pphi, double* pdEdphi, double nelec); + std::function bound_cal_potential_; + void cal_potential_wrapper (double* ptemp_phi, double* rdLdphi); + void cal_potential (double* ptemp_phi, double* rdLdphi, UnitCell& ucell); + void cal_dEdtheta (double** ptemp_phi, Charge* temp_rho, UnitCell& ucell, double* ptheta, double* rdEdtheta); + double cal_mu (double* pphi, double* pdEdphi, double nelec); // --------------------- determine the optimization direction ------- - void adjust_direction(); - void check_direction(double* dEdtheta, double** ptemp_phi, UnitCell& ucell); - void test_direction(double* dEdtheta, double** ptemp_phi, UnitCell& ucell); + void adjust_direction (); + void check_direction (double* dEdtheta, double** ptemp_phi, UnitCell& ucell); + void test_direction (double* dEdtheta, double** ptemp_phi, UnitCell& ucell); // --------------------- interface to blas -------------------------- - double inner_product(double* pa, double* pb, int length, double dV = 1) const + double + inner_product (double* pa, double* pb, int length, double dV = 1) const { - double innerproduct = BlasConnector::dot(length, pa, 1, pb, 1); + double innerproduct = BlasConnector::dot (length, pa, 1, pb, 1); innerproduct *= dV; return innerproduct; } // ---------------------- interfaces to optimization methods -------- - void init_opt(); - void get_direction(UnitCell& ucell); - void get_step_length(double* dEdtheta, double** ptemp_phi, UnitCell& ucell); + void init_opt (); + void get_direction (UnitCell& ucell); + void get_step_length (double* dEdtheta, double** ptemp_phi, UnitCell& ucell); }; } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_of_interface.cpp b/source/source_esolver/esolver_of_interface.cpp index d428b795069..75a2251d5e7 100644 --- a/source/source_esolver/esolver_of_interface.cpp +++ b/source/source_esolver/esolver_of_interface.cpp @@ -8,86 +8,89 @@ namespace ModuleESolver * @brief [Interface to opt] * Initialize the opts */ -void ESolver_OF::init_opt() +void + ESolver_OF::init_opt () { if (this->opt_dcsrch_ == nullptr) - { - this->opt_dcsrch_ = new ModuleBase::Opt_DCsrch(); - } + { + this->opt_dcsrch_ = new ModuleBase::Opt_DCsrch (); + } if (this->of_method_ == "tn") - { - if (this->opt_tn_ == nullptr) { - this->opt_tn_ = new ModuleBase::Opt_TN(); + if (this->opt_tn_ == nullptr) + { + this->opt_tn_ = new ModuleBase::Opt_TN (); + } + this->opt_tn_->allocate (this->pw_rho->nrxx); + this->opt_tn_->set_para (this->dV_); } - this->opt_tn_->allocate(this->pw_rho->nrxx); - this->opt_tn_->set_para(this->dV_); - } else if (this->of_method_ == "cg1" || this->of_method_ == "cg2") - { - if (this->opt_cg_ == nullptr) { - this->opt_cg_ = new ModuleBase::Opt_CG(); + if (this->opt_cg_ == nullptr) + { + this->opt_cg_ = new ModuleBase::Opt_CG (); + } + this->opt_cg_->allocate (this->pw_rho->nrxx); + this->opt_cg_->set_para (this->dV_); + this->opt_dcsrch_->set_paras (1e-4, 1e-2); } - this->opt_cg_->allocate(this->pw_rho->nrxx); - this->opt_cg_->set_para(this->dV_); - this->opt_dcsrch_->set_paras(1e-4, 1e-2); - } else if (this->of_method_ == "bfgs") - { - ModuleBase::WARNING_QUIT("esolver_of", "BFGS is not supported now."); - return; - } + { + ModuleBase::WARNING_QUIT ("esolver_of", "BFGS is not supported now."); + return; + } // optimize theta if nspin=2 if (PARAM.inp.nspin == 2) - { - this->opt_cg_mag_ = new ModuleBase::Opt_CG; - this->opt_cg_mag_->allocate(PARAM.inp.nspin); - } + { + this->opt_cg_mag_ = new ModuleBase::Opt_CG; + this->opt_cg_mag_->allocate (PARAM.inp.nspin); + } } -void ESolver_OF::cal_potential_wrapper(double* ptemp_phi, double* rdLdphi) +void + ESolver_OF::cal_potential_wrapper (double* ptemp_phi, double* rdLdphi) { - this->bound_cal_potential_(ptemp_phi, rdLdphi); + this->bound_cal_potential_ (ptemp_phi, rdLdphi); } /** * @brief [Interface to opt] * Call optimization methods to get the optimization direction */ -void ESolver_OF::get_direction(UnitCell& ucell) +void + ESolver_OF::get_direction (UnitCell& ucell) { for (int is = 0; is < PARAM.inp.nspin; ++is) - { - if (this->of_method_ == "tn") - { - this->tn_spin_flag_ = is; - opt_tn_->next_direct(this->pphi_[is], - this->pdLdphi_[is], - this->flag_, - this->pdirect_[is], - this, - &ESolver_OF::cal_potential_wrapper); - } - else if (this->of_method_ == "cg1") - { - opt_cg_->next_direct(this->pdLdphi_[is], 1, this->pdirect_[is]); - } - else if (this->of_method_ == "cg2") - { - opt_cg_->next_direct(this->pdLdphi_[is], 2, this->pdirect_[is]); - } - else if (this->of_method_ == "bfgs") - { - return; - } - else { - ModuleBase::WARNING_QUIT("ESolver_OF", "of_method must be one of CG, TN, or BFGS."); + if (this->of_method_ == "tn") + { + this->tn_spin_flag_ = is; + opt_tn_->next_direct (this->pphi_[is], + this->pdLdphi_[is], + this->flag_, + this->pdirect_[is], + this, + &ESolver_OF::cal_potential_wrapper); + } + else if (this->of_method_ == "cg1") + { + opt_cg_->next_direct (this->pdLdphi_[is], 1, this->pdirect_[is]); + } + else if (this->of_method_ == "cg2") + { + opt_cg_->next_direct (this->pdLdphi_[is], 2, this->pdirect_[is]); + } + else if (this->of_method_ == "bfgs") + { + return; + } + else + { + ModuleBase::WARNING_QUIT ("ESolver_OF", "of_method must be one of CG, TN, or BFGS."); + } } - } } /** @@ -98,185 +101,186 @@ void ESolver_OF::get_direction(UnitCell& ucell) * @param ptemp_phi * @param ucell */ -void ESolver_OF::get_step_length(double* dEdtheta, double** ptemp_phi, UnitCell& ucell) +void + ESolver_OF::get_step_length (double* dEdtheta, double** ptemp_phi, UnitCell& ucell) { double temp_energy = 0.0; // energy of temp_phi and temp_rho double kinetic_energy = 0.0; // kinetic energy double pseudopot_energy = 0.0; // electron-ion interaction energy if (PARAM.inp.nspin == 1) - { - int numDC = 0; // iteration number of line search - strcpy(this->task_, "START"); - while (true) { - // update energy - this->pelec->cal_energies(2); - temp_energy = this->pelec->f_en.etot; - kinetic_energy = this->kedf_manager_->get_energy(); // kinetic energy - pseudopot_energy = this->inner_product(this->pelec->pot->get_fixed_v(), - this->ptemp_rho_->rho[0], - this->pw_rho->nrxx, - this->dV_); - Parallel_Reduce::reduce_all(pseudopot_energy); - temp_energy += kinetic_energy + pseudopot_energy; + int numDC = 0; // iteration number of line search + strcpy (this->task_, "START"); + while (true) + { + // update energy + this->pelec->cal_energies (2); + temp_energy = this->pelec->f_en.etot; + kinetic_energy = this->kedf_manager_->get_energy (); // kinetic energy + pseudopot_energy = this->inner_product (this->pelec->pot->get_fixed_v (), + this->ptemp_rho_->rho[0], + this->pw_rho->nrxx, + this->dV_); + Parallel_Reduce::reduce_all (pseudopot_energy); + temp_energy += kinetic_energy + pseudopot_energy; - // line search to update theta[0] - this->opt_dcsrch_->dcSrch(temp_energy, dEdtheta[0], this->theta_[0], this->task_); - numDC++; + // line search to update theta[0] + this->opt_dcsrch_->dcSrch (temp_energy, dEdtheta[0], this->theta_[0], this->task_); + numDC++; - // decide what to do next according to the output of line search - if (strncmp(this->task_, "FG", 2) == 0) // continue line search - { - // update tempPhi and tempRho - for (int i = 0; i < this->pw_rho->nrxx; ++i) - { - ptemp_phi[0][i] - = this->pphi_[0][i] * cos(this->theta_[0]) + this->pdirect_[0][i] * sin(this->theta_[0]); - this->ptemp_rho_->rho[0][i] = ptemp_phi[0][i] * ptemp_phi[0][i]; - } + // decide what to do next according to the output of line search + if (strncmp (this->task_, "FG", 2) == 0) // continue line search + { + // update tempPhi and tempRho + for (int i = 0; i < this->pw_rho->nrxx; ++i) + { + ptemp_phi[0][i] = this->pphi_[0][i] * cos (this->theta_[0]) + + this->pdirect_[0][i] * sin (this->theta_[0]); + this->ptemp_rho_->rho[0][i] = ptemp_phi[0][i] * ptemp_phi[0][i]; + } - // get dEdtheta of new tempPhi and tempRho - this->cal_dEdtheta(ptemp_phi, this->ptemp_rho_, ucell, this->theta_, dEdtheta); + // get dEdtheta of new tempPhi and tempRho + this->cal_dEdtheta (ptemp_phi, this->ptemp_rho_, ucell, this->theta_, dEdtheta); - if (numDC > this->max_dcsrch_) - { - GlobalV::ofs_warning << "ESolver_OF linesearch: WARNING " - << "excedd the max iter number." << std::endl; - break; + if (numDC > this->max_dcsrch_) + { + GlobalV::ofs_warning << "ESolver_OF linesearch: WARNING " + << "excedd the max iter number." << std::endl; + break; + } + } + else if (strncmp (this->task_, "CO", 2) == 0) // convergence achieved + { + break; + } + else if (strncmp (this->task_, "WA", 2) == 0) // warning of line search + { + GlobalV::ofs_warning << "ESolver_OF linesearch: WARNING " << this->task_ << std::endl; + std::cout << this->task_ << std::endl; + break; + } + else if (strncmp (this->task_, "ER", 2) == 0) // ERROR in line search + { + GlobalV::ofs_warning << "ESolver_OF linesearch: ERROR " << this->task_ << std::endl; + std::cout << this->task_ << std::endl; + break; + } } - } - else if (strncmp(this->task_, "CO", 2) == 0) // convergence achieved - { - break; - } - else if (strncmp(this->task_, "WA", 2) == 0) // warning of line search - { - GlobalV::ofs_warning << "ESolver_OF linesearch: WARNING " << this->task_ << std::endl; - std::cout << this->task_ << std::endl; - break; - } - else if (strncmp(this->task_, "ER", 2) == 0) // ERROR in line search - { - GlobalV::ofs_warning << "ESolver_OF linesearch: ERROR " << this->task_ << std::endl; - std::cout << this->task_ << std::endl; - break; - } } - } else if (PARAM.inp.nspin == 2) - { - ModuleBase::WARNING_QUIT("esolver_of", "Sorry, SPIN2 case is not supported by OFDFT for now."); - // ========================== Under testing ========================== - // this->opt_cg_mag_->refresh(); + { + ModuleBase::WARNING_QUIT ("esolver_of", "Sorry, SPIN2 case is not supported by OFDFT for now."); + // ========================== Under testing ========================== + // this->opt_cg_mag_->refresh(); - // double *pthetaDir = new double[PARAM.inp.nspin]; - // double *temp_theta = new double[PARAM.inp.nspin]; - // ModuleBase::GlobalFunc::ZEROS(pthetaDir, PARAM.inp.nspin); - // ModuleBase::GlobalFunc::ZEROS(temp_theta, PARAM.inp.nspin); - // double thetaAlpha = 0.; - // double alphaTol = 1e-4; - // double maxThetaDir = 0.; - // double dEdalpha = 0.; - // int thetaIter = 0; - // int numDC = 0; + // double *pthetaDir = new double[PARAM.inp.nspin]; + // double *temp_theta = new double[PARAM.inp.nspin]; + // ModuleBase::GlobalFunc::ZEROS(pthetaDir, PARAM.inp.nspin); + // ModuleBase::GlobalFunc::ZEROS(temp_theta, PARAM.inp.nspin); + // double thetaAlpha = 0.; + // double alphaTol = 1e-4; + // double maxThetaDir = 0.; + // double dEdalpha = 0.; + // int thetaIter = 0; + // int numDC = 0; - // while (true) - // { - // this->opt_cg_mag_->next_direct(dEdtheta, 1, pthetaDir); + // while (true) + // { + // this->opt_cg_mag_->next_direct(dEdtheta, 1, pthetaDir); - // dEdalpha = this->inner_product(dEdtheta, pthetaDir, 2, 1.); + // dEdalpha = this->inner_product(dEdtheta, pthetaDir, 2, 1.); - // if (dEdalpha >= 0.) - // { - // for (int is = 0; is < PARAM.inp.nspin; ++is) - // { - // pthetaDir[is] = -dEdtheta[is]; - // } - // dEdalpha = this->inner_product(dEdtheta, pthetaDir, 2, - // 1); - // } + // if (dEdalpha >= 0.) + // { + // for (int is = 0; is < PARAM.inp.nspin; ++is) + // { + // pthetaDir[is] = -dEdtheta[is]; + // } + // dEdalpha = this->inner_product(dEdtheta, pthetaDir, 2, + // 1); + // } - // maxThetaDir = max(abs(pthetaDir[0]), abs(pthetaDir[1])); - // thetaAlpha = min(0.1, 0.1*ModuleBase::PI/maxThetaDir); + // maxThetaDir = max(abs(pthetaDir[0]), abs(pthetaDir[1])); + // thetaAlpha = min(0.1, 0.1*ModuleBase::PI/maxThetaDir); - // // line search along thetaDir to find thetaAlpha - // this->opt_dcsrch_->set_paras(1e-4, 1e-2, 1e-12, 0., - // ModuleBase::PI/maxThetaDir); strcpy(this->task_, "START"); - // numDC = 0; - // while(true) - // { - // this->pelec->f_en.calculate_etot(this->pw_rho->nrxx, - // this->pw_rho->nxyz); temp_energy = - // this->pelec->f_en.etot; kinetic_energy = - // this->kinetic_energy(); pseudopot_energy = 0.; for (int - // is = 0; is < PARAM.inp.nspin; ++is) { - // pseudopot_energy += - // this->inner_product(GlobalC::pot.vltot, - // ptemp_rho_[is], this->pw_rho->nrxx, this->dV_); - // } - // Parallel_Reduce::reduce_all(pseudopot_energy); - // temp_energy += kinetic_energy + pseudopot_energy; - // this->opt_dcsrch_->dcSrch(temp_energy, dEdalpha, - // thetaAlpha, this->task_); numDC++; + // // line search along thetaDir to find thetaAlpha + // this->opt_dcsrch_->set_paras(1e-4, 1e-2, 1e-12, 0., + // ModuleBase::PI/maxThetaDir); strcpy(this->task_, "START"); + // numDC = 0; + // while(true) + // { + // this->pelec->f_en.calculate_etot(this->pw_rho->nrxx, + // this->pw_rho->nxyz); temp_energy = + // this->pelec->f_en.etot; kinetic_energy = + // this->kinetic_energy(); pseudopot_energy = 0.; for (int + // is = 0; is < PARAM.inp.nspin; ++is) { + // pseudopot_energy += + // this->inner_product(GlobalC::pot.vltot, + // ptemp_rho_[is], this->pw_rho->nrxx, this->dV_); + // } + // Parallel_Reduce::reduce_all(pseudopot_energy); + // temp_energy += kinetic_energy + pseudopot_energy; + // this->opt_dcsrch_->dcSrch(temp_energy, dEdalpha, + // thetaAlpha, this->task_); numDC++; - // if (strncmp(this->task_, "FG", 2) == 0) - // { - // for (int is = 0; is < PARAM.inp.nspin; ++is) - // { - // temp_theta[is] = this->theta_[is] + thetaAlpha * - // pthetaDir[is]; for (int ir = 0; ir < - // this->pw_rho->nrxx; ++ir) - // { - // ptemp_phi[is][ir] = this->pphi_[is][ir] * - // cos(temp_theta[is]) + this->pdirect_[is][ir] - // * sin(temp_theta[is]); ptemp_rho_[is][ir] = - // ptemp_phi[is][ir] - // * ptemp_phi[is][ir]; - // } - // } - // this->cal_dEdtheta(ptemp_phi, ptemp_rho_, temp_theta, - // dEdtheta); dEdalpha = this->inner_product(dEdtheta, - // pthetaDir, 2, 1); + // if (strncmp(this->task_, "FG", 2) == 0) + // { + // for (int is = 0; is < PARAM.inp.nspin; ++is) + // { + // temp_theta[is] = this->theta_[is] + thetaAlpha * + // pthetaDir[is]; for (int ir = 0; ir < + // this->pw_rho->nrxx; ++ir) + // { + // ptemp_phi[is][ir] = this->pphi_[is][ir] * + // cos(temp_theta[is]) + this->pdirect_[is][ir] + // * sin(temp_theta[is]); ptemp_rho_[is][ir] = + // ptemp_phi[is][ir] + // * ptemp_phi[is][ir]; + // } + // } + // this->cal_dEdtheta(ptemp_phi, ptemp_rho_, temp_theta, + // dEdtheta); dEdalpha = this->inner_product(dEdtheta, + // pthetaDir, 2, 1); - // if (numDC > 10) - // { - // GlobalV::ofs_warning << "ESolver_OF linesearch: - // WARNING " << "excedd the max iter number." << - // endl; break; - // } - // } - // else if (strncmp(this->task_, "CO", 2) == 0) - // { - // break; - // } - // else if (strncmp(this->task_, "WA", 2) == 0) - // { - // GlobalV::ofs_warning << "ESolver_OF linesearch: - // WARNING " << this->task_ << std::endl; cout << - // this->task_ << endl; break; - // } - // else if (strncmp(this->task_, "ER", 2) == 0) - // { - // GlobalV::ofs_warning << "ESolver_OF linesearch: ERROR - // " << this->task_ << std::endl; cout << this->task_ << - // endl; break; - // } - // } + // if (numDC > 10) + // { + // GlobalV::ofs_warning << "ESolver_OF linesearch: + // WARNING " << "excedd the max iter number." << + // endl; break; + // } + // } + // else if (strncmp(this->task_, "CO", 2) == 0) + // { + // break; + // } + // else if (strncmp(this->task_, "WA", 2) == 0) + // { + // GlobalV::ofs_warning << "ESolver_OF linesearch: + // WARNING " << this->task_ << std::endl; cout << + // this->task_ << endl; break; + // } + // else if (strncmp(this->task_, "ER", 2) == 0) + // { + // GlobalV::ofs_warning << "ESolver_OF linesearch: ERROR + // " << this->task_ << std::endl; cout << this->task_ << + // endl; break; + // } + // } - // for (int is = 0; is < PARAM.inp.nspin; ++is) this->theta_[is] - // += thetaAlpha * pthetaDir[is]; if (sqrt(dEdtheta[0] * - // dEdtheta[0] + dEdtheta[1] * dEdtheta[1]) < alphaTol) break; - // thetaIter++; - // if (thetaIter > 2) break; - // } - // delete[] temp_theta; - // delete[] pthetaDir; - // ========================== Under testing ========================== - } + // for (int is = 0; is < PARAM.inp.nspin; ++is) this->theta_[is] + // += thetaAlpha * pthetaDir[is]; if (sqrt(dEdtheta[0] * + // dEdtheta[0] + dEdtheta[1] * dEdtheta[1]) < alphaTol) break; + // thetaIter++; + // if (thetaIter > 2) break; + // } + // delete[] temp_theta; + // delete[] pthetaDir; + // ========================== Under testing ========================== + } else if (PARAM.inp.nspin == 4) - { - ModuleBase::WARNING_QUIT("esolver_of", "Sorry, SPIN4 case is not supported by OFDFT for now."); - } + { + ModuleBase::WARNING_QUIT ("esolver_of", "Sorry, SPIN4 case is not supported by OFDFT for now."); + } } } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_of_tddft.cpp b/source/source_esolver/esolver_of_tddft.cpp index d3bfbb28f4c..88fb607ae4a 100644 --- a/source/source_esolver/esolver_of_tddft.cpp +++ b/source/source_esolver/esolver_of_tddft.cpp @@ -20,108 +20,104 @@ namespace ModuleESolver { -ESolver_OF_TDDFT::ESolver_OF_TDDFT() +ESolver_OF_TDDFT::ESolver_OF_TDDFT () { this->classname = "ESolver_OF_TDDFT"; - this->evolve_ofdft=new Evolve_OFDFT(); + this->evolve_ofdft = new Evolve_OFDFT (); } -ESolver_OF_TDDFT::~ESolver_OF_TDDFT() -{ - delete this->evolve_ofdft; -} +ESolver_OF_TDDFT::~ESolver_OF_TDDFT () { delete this->evolve_ofdft; } - -void ESolver_OF_TDDFT::runner(UnitCell& ucell, const int istep) +void + ESolver_OF_TDDFT::runner (UnitCell& ucell, const int istep) { - ModuleBase::timer::start("ESolver_OF_TDDFT", "runner"); + ModuleBase::timer::start ("ESolver_OF_TDDFT", "runner"); // get Ewald energy, initial rho and phi if necessary - this->before_opt(istep, ucell); + this->before_opt (istep, ucell); this->iter_ = 0; - bool conv_esolver = false; // this conv_esolver is added by mohan 20250302 - this->iter_time = ModuleBase::get_time(); - - if (this->phi_td.empty()) - { - const int size = PARAM.inp.nspin * this->pw_rho->nrxx; - this->phi_td.resize(size, std::complex(0.0, 0.0)); - } + bool conv_esolver = false; // this conv_esolver is added by mohan 20250302 + this->iter_time = ModuleBase::get_time (); - if ((istep==0) && PARAM.inp.init_chg != "file") - { - while (true) + if (this->phi_td.empty ()) { - // once we get a new rho and phi, update potential - this->update_potential(ucell); + const int size = PARAM.inp.nspin * this->pw_rho->nrxx; + this->phi_td.resize (size, std::complex (0.0, 0.0)); + } - // calculate the energy of new rho and phi - this->energy_llast_ = this->energy_last_; - this->energy_last_ = this->energy_current_; - this->energy_current_ = this->cal_energy(); + if ((istep == 0) && PARAM.inp.init_chg != "file") + { + while (true) + { + // once we get a new rho and phi, update potential + this->update_potential (ucell); + // calculate the energy of new rho and phi + this->energy_llast_ = this->energy_last_; + this->energy_last_ = this->energy_current_; + this->energy_current_ = this->cal_energy (); - // check if the job is done - if (this->check_exit(conv_esolver)) - { - break; - } + // check if the job is done + if (this->check_exit (conv_esolver)) + { + break; + } - // find the optimization direction and step lenghth theta according to the potential - this->optimize(ucell); + // find the optimization direction and step lenghth theta according to the potential + this->optimize (ucell); - // update the rho and phi based on the direction and theta - this->update_rho(); + // update the rho and phi based on the direction and theta + this->update_rho (); - this->iter_++; + this->iter_++; - ESolver_FP::iter_finish(ucell, istep, this->iter_, conv_esolver); - } + ESolver_FP::iter_finish (ucell, istep, this->iter_, conv_esolver); + } #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif - for (int is = 0; is < PARAM.inp.nspin; ++is) - { - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) - { - phi_td[is*this->pw_rho->nrxx+ir]=pphi_[is][ir]; - } + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + phi_td[is * this->pw_rho->nrxx + ir] = pphi_[is][ir]; + } + } } - } - else if ((istep==0) && PARAM.inp.init_chg == "file") - { + else if ((istep == 0) && PARAM.inp.init_chg == "file") + { #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif - for (int is = 0; is < PARAM.inp.nspin; ++is) - { - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) - { - phi_td[is*this->pw_rho->nrxx+ir]=pphi_[is][ir]; - } - } - conv_esolver=true; - } + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + phi_td[is * this->pw_rho->nrxx + ir] = pphi_[is][ir]; + } + } + conv_esolver = true; + } else - { - this->evolve_ofdft->propagate_psi_RK4(this->pelec, this->chr, ucell, this->phi_td, this->pw_rho); + { + this->evolve_ofdft->propagate_psi_RK4 (this->pelec, this->chr, ucell, this->phi_td, this->pw_rho); #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif - for (int is = 0; is < PARAM.inp.nspin; ++is) - { - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) - { - pphi_[is][ir]=std::abs(phi_td[is*this->pw_rho->nrxx+ir]); - } + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + pphi_[is][ir] = std::abs (phi_td[is * this->pw_rho->nrxx + ir]); + } + } + conv_esolver = true; } - conv_esolver=true; - } - this->after_opt(istep, ucell, conv_esolver); + this->after_opt (istep, ucell, conv_esolver); - ModuleBase::timer::end("ESolver_OF_TDDFT", "runner"); + ModuleBase::timer::end ("ESolver_OF_TDDFT", "runner"); } } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_of_tddft.h b/source/source_esolver/esolver_of_tddft.h index 85293b1761a..70d36b69104 100644 --- a/source/source_esolver/esolver_of_tddft.h +++ b/source/source_esolver/esolver_of_tddft.h @@ -9,14 +9,14 @@ namespace ModuleESolver class ESolver_OF_TDDFT : public ESolver_OF { public: - ESolver_OF_TDDFT(); - ~ESolver_OF_TDDFT(); + ESolver_OF_TDDFT (); + ~ESolver_OF_TDDFT (); - virtual void runner(UnitCell& ucell, const int istep) override; + virtual void runner (UnitCell& ucell, const int istep) override; protected: - std::vector> phi_td; // time dependent wavefunction - Evolve_OFDFT* evolve_ofdft=nullptr; + std::vector> phi_td; // time dependent wavefunction + Evolve_OFDFT* evolve_ofdft = nullptr; }; } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_of_tool.cpp b/source/source_esolver/esolver_of_tool.cpp index b54f4de53af..07d19c0ee13 100644 --- a/source/source_esolver/esolver_of_tool.cpp +++ b/source/source_esolver/esolver_of_tool.cpp @@ -14,83 +14,81 @@ namespace ModuleESolver * * @param ucell */ -void ESolver_OF::init_elecstate(UnitCell& ucell) +void + ESolver_OF::init_elecstate (UnitCell& ucell) { if (this->pelec == nullptr) - { - this->pelec = new elecstate::ElecState((Charge*)(&chr), this->pw_rho, pw_big); - } + { + this->pelec = new elecstate::ElecState ((Charge*)(&chr), this->pw_rho, pw_big); + } delete this->pelec->pot; - this->pelec->pot = new elecstate::Potential(this->pw_rhod, - this->pw_rho, - &ucell, - &(this->locpp.vloc), - &(this->sf), - &(this->solvent), - &(this->pelec->f_en.etxc), - &(this->pelec->f_en.vtxc)); + this->pelec->pot = new elecstate::Potential (this->pw_rhod, + this->pw_rho, + &ucell, + &(this->locpp.vloc), + &(this->sf), + &(this->solvent), + &(this->pelec->f_en.etxc), + &(this->pelec->f_en.vtxc)); // There is no Operator in ESolver_OF, register Potentials here! std::vector pot_register_in; if (PARAM.inp.vion_in_h) - { - pot_register_in.push_back("local"); - } + { + pot_register_in.push_back ("local"); + } if (PARAM.inp.vh_in_h) - { - pot_register_in.push_back("hartree"); - } + { + pot_register_in.push_back ("hartree"); + } // no variable can choose xc, maybe it is necessary - pot_register_in.push_back("xc"); + pot_register_in.push_back ("xc"); if (PARAM.inp.imp_sol) - { - pot_register_in.push_back("surchem"); - } + { + pot_register_in.push_back ("surchem"); + } if (PARAM.inp.efield_flag) - { - pot_register_in.push_back("efield"); - } + { + pot_register_in.push_back ("efield"); + } if (PARAM.inp.gate_flag) - { - pot_register_in.push_back("gatefield"); - } + { + pot_register_in.push_back ("gatefield"); + } if (PARAM.inp.ml_exx) - { - pot_register_in.push_back("ml_exx"); - } + { + pot_register_in.push_back ("ml_exx"); + } // only Potential is not empty, Veff and Meta are available - if (pot_register_in.size() > 0) - { - // register Potential by gathered operator - this->pelec->pot->pot_register(pot_register_in); - } + if (pot_register_in.size () > 0) + { + // register Potential by gathered operator + this->pelec->pot->pot_register (pot_register_in); + } } /** * @brief Allocate the arrays, as well as this->psi_ and this->ptemp_rho_. */ -void ESolver_OF::allocate_array() +void + ESolver_OF::allocate_array () { // Initialize the "wavefunction", which is sqrt(rho) - this->psi_ = new psi::Psi(1, - PARAM.inp.nspin, - this->pw_rho->nrxx, - this->pw_rho->nrxx, - true); - ModuleBase::Memory::record("OFDFT::Psi", sizeof(double) * PARAM.inp.nspin * this->pw_rho->nrxx); + this->psi_ = new psi::Psi (1, PARAM.inp.nspin, this->pw_rho->nrxx, this->pw_rho->nrxx, true); + ModuleBase::Memory::record ("OFDFT::Psi", sizeof (double) * PARAM.inp.nspin * this->pw_rho->nrxx); this->pphi_ = new double*[PARAM.inp.nspin]; for (int is = 0; is < PARAM.inp.nspin; ++is) - { - this->pphi_[is] = this->psi_->get_pointer(is); - } - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT PHI"); + { + this->pphi_[is] = this->psi_->get_pointer (is); + } + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "INIT PHI"); // initialize chemical potential, step length, ... delete this->ptemp_rho_; - this->ptemp_rho_ = new Charge(); - this->ptemp_rho_->set_rhopw(this->pw_rho); - const bool kin_den = this->ptemp_rho_->kin_density(); // mohan add 20251202 - this->ptemp_rho_->allocate(PARAM.inp.nspin, kin_den); + this->ptemp_rho_ = new Charge (); + this->ptemp_rho_->set_rhopw (this->pw_rho); + const bool kin_den = this->ptemp_rho_->kin_density (); // mohan add 20251202 + this->ptemp_rho_->allocate (PARAM.inp.nspin, kin_den); this->theta_ = new double[PARAM.inp.nspin]; this->pdLdphi_ = new double*[PARAM.inp.nspin]; @@ -99,16 +97,17 @@ void ESolver_OF::allocate_array() this->precip_dir_ = new std::complex*[PARAM.inp.nspin]; for (int is = 0; is < PARAM.inp.nspin; ++is) - { - this->pdLdphi_[is] = new double[this->pw_rho->nrxx]; - this->pdEdphi_[is] = new double[this->pw_rho->nrxx]; - this->pdirect_[is] = new double[this->pw_rho->nrxx]; - this->precip_dir_[is] = new std::complex[pw_rho->npw]; - } - ModuleBase::Memory::record("OFDFT::pdLdphi_", sizeof(double) * PARAM.inp.nspin * this->pw_rho->nrxx); - ModuleBase::Memory::record("OFDFT::pdEdphi_", sizeof(double) * PARAM.inp.nspin * this->pw_rho->nrxx); - ModuleBase::Memory::record("OFDFT::pdirect_", sizeof(double) * PARAM.inp.nspin * this->pw_rho->nrxx); - ModuleBase::Memory::record("OFDFT::precip_dir_", sizeof(std::complex) * PARAM.inp.nspin * this->pw_rho->npw); + { + this->pdLdphi_[is] = new double[this->pw_rho->nrxx]; + this->pdEdphi_[is] = new double[this->pw_rho->nrxx]; + this->pdirect_[is] = new double[this->pw_rho->nrxx]; + this->precip_dir_[is] = new std::complex[pw_rho->npw]; + } + ModuleBase::Memory::record ("OFDFT::pdLdphi_", sizeof (double) * PARAM.inp.nspin * this->pw_rho->nrxx); + ModuleBase::Memory::record ("OFDFT::pdEdphi_", sizeof (double) * PARAM.inp.nspin * this->pw_rho->nrxx); + ModuleBase::Memory::record ("OFDFT::pdirect_", sizeof (double) * PARAM.inp.nspin * this->pw_rho->nrxx); + ModuleBase::Memory::record ("OFDFT::precip_dir_", + sizeof (std::complex) * PARAM.inp.nspin * this->pw_rho->npw); } /** @@ -118,49 +117,48 @@ void ESolver_OF::allocate_array() * @param [in] ptemp_phi phi * @param [out] rdLdphi dL/dphi */ -void ESolver_OF::cal_potential(double* ptemp_phi, double* rdLdphi, UnitCell& ucell) +void + ESolver_OF::cal_potential (double* ptemp_phi, double* rdLdphi, UnitCell& ucell) { double** dEdtemp_phi = new double*[PARAM.inp.nspin]; double** temp_phi = new double*[PARAM.inp.nspin]; for (int is = 0; is < PARAM.inp.nspin; ++is) - { - dEdtemp_phi[is] = new double[this->pw_rho->nrxx]; - if (is == this->tn_spin_flag_) - { - temp_phi[is] = ptemp_phi; - } - else { - temp_phi[is] = this->pphi_[is]; - } - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) - { - this->ptemp_rho_->rho[is][ir] = temp_phi[is][ir] * temp_phi[is][ir]; + dEdtemp_phi[is] = new double[this->pw_rho->nrxx]; + if (is == this->tn_spin_flag_) + { + temp_phi[is] = ptemp_phi; + } + else + { + temp_phi[is] = this->pphi_[is]; + } + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + this->ptemp_rho_->rho[is][ir] = temp_phi[is][ir] * temp_phi[is][ir]; + } } - } - elecstate::cal_ux(ucell); - this->pelec->pot->update_from_charge(this->ptemp_rho_, &ucell); - ModuleBase::matrix& vr_eff = this->pelec->pot->get_eff_v(); + elecstate::cal_ux (ucell); + this->pelec->pot->update_from_charge (this->ptemp_rho_, &ucell); + ModuleBase::matrix& vr_eff = this->pelec->pot->get_eff_v (); - this->kedf_manager_->get_potential(this->ptemp_rho_->rho, - temp_phi, - this->pw_rho, - vr_eff); // KEDF potential + this->kedf_manager_->get_potential (this->ptemp_rho_->rho, temp_phi, this->pw_rho, + vr_eff); // KEDF potential for (int i = 0; i < this->pw_rho->nrxx; ++i) - { - dEdtemp_phi[this->tn_spin_flag_][i] = vr_eff(this->tn_spin_flag_, i); - } - double temp_mu = this->cal_mu(ptemp_phi, dEdtemp_phi[this->tn_spin_flag_], this->nelec_[this->tn_spin_flag_]); + { + dEdtemp_phi[this->tn_spin_flag_][i] = vr_eff (this->tn_spin_flag_, i); + } + double temp_mu = this->cal_mu (ptemp_phi, dEdtemp_phi[this->tn_spin_flag_], this->nelec_[this->tn_spin_flag_]); for (int i = 0; i < this->pw_rho->nrxx; ++i) - { - rdLdphi[i] = dEdtemp_phi[this->tn_spin_flag_][i] - 2. * temp_mu * ptemp_phi[i]; - } + { + rdLdphi[i] = dEdtemp_phi[this->tn_spin_flag_][i] - 2. * temp_mu * ptemp_phi[i]; + } for (int is = 0; is < PARAM.inp.nspin; ++is) - { - delete[] dEdtemp_phi[is]; - } + { + delete[] dEdtemp_phi[is]; + } delete[] dEdtemp_phi; delete[] temp_phi; } @@ -176,28 +174,28 @@ void ESolver_OF::cal_potential(double* ptemp_phi, double* rdLdphi, UnitCell& uce * @param [in] ptheta * @param [out] rdEdtheta dE/dTheta */ -void ESolver_OF::cal_dEdtheta(double** ptemp_phi, Charge* temp_rho, UnitCell& ucell, double* ptheta, double* rdEdtheta) +void + ESolver_OF::cal_dEdtheta (double** ptemp_phi, Charge* temp_rho, UnitCell& ucell, double* ptheta, double* rdEdtheta) { double* dphi_dtheta = new double[this->pw_rho->nrxx]; - elecstate::cal_ux(ucell); - this->pelec->pot->update_from_charge(temp_rho, &ucell); - ModuleBase::matrix& vr_eff = this->pelec->pot->get_eff_v(); + elecstate::cal_ux (ucell); + this->pelec->pot->update_from_charge (temp_rho, &ucell); + ModuleBase::matrix& vr_eff = this->pelec->pot->get_eff_v (); - this->kedf_manager_->get_potential(temp_rho->rho, - ptemp_phi, - this->pw_rho, - vr_eff); // KEDF potential + this->kedf_manager_->get_potential (temp_rho->rho, ptemp_phi, this->pw_rho, + vr_eff); // KEDF potential for (int is = 0; is < PARAM.inp.nspin; ++is) - { - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) { - this->pdEdphi_[is][ir] = vr_eff(is, ir); - dphi_dtheta[ir] = -this->pphi_[is][ir] * sin(ptheta[is]) + this->pdirect_[is][ir] * cos(ptheta[is]); + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + this->pdEdphi_[is][ir] = vr_eff (is, ir); + dphi_dtheta[ir] + = -this->pphi_[is][ir] * sin (ptheta[is]) + this->pdirect_[is][ir] * cos (ptheta[is]); + } + rdEdtheta[is] = this->inner_product (this->pdEdphi_[is], dphi_dtheta, this->pw_rho->nrxx, this->dV_); + Parallel_Reduce::reduce_all (rdEdtheta[is]); } - rdEdtheta[is] = this->inner_product(this->pdEdphi_[is], dphi_dtheta, this->pw_rho->nrxx, this->dV_); - Parallel_Reduce::reduce_all(rdEdtheta[is]); - } delete[] dphi_dtheta; } @@ -210,10 +208,11 @@ void ESolver_OF::cal_dEdtheta(double** ptemp_phi, Charge* temp_rho, UnitCell& uc * @param nelec * @return mu */ -double ESolver_OF::cal_mu(double* pphi, double* pdEdphi, double nelec) +double + ESolver_OF::cal_mu (double* pphi, double* pdEdphi, double nelec) { - double mu = this->inner_product(pphi, pdEdphi, this->pw_rho->nrxx, this->dV_); - Parallel_Reduce::reduce_all(mu); + double mu = this->inner_product (pphi, pdEdphi, this->pw_rho->nrxx, this->dV_); + Parallel_Reduce::reduce_all (mu); mu = mu / (2.0 * nelec); return mu; } @@ -222,77 +221,80 @@ double ESolver_OF::cal_mu(double* pphi, double* pdEdphi, double nelec) * @brief Rotate and renormalize the direction |d>, * make it orthogonal to phi ( = 0), and = nelec */ -void ESolver_OF::adjust_direction(void) +void + ESolver_OF::adjust_direction () { // filter the high frequency term in direction if of_full_pw = false if (!PARAM.inp.of_full_pw) - { - for (int is = 0; is < PARAM.inp.nspin; ++is) { - pw_rho->real2recip(this->pdirect_[is], this->precip_dir_[is]); - pw_rho->recip2real(this->precip_dir_[is], this->pdirect_[is]); + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + pw_rho->real2recip (this->pdirect_[is], this->precip_dir_[is]); + pw_rho->recip2real (this->precip_dir_[is], this->pdirect_[is]); + } } - } if (PARAM.inp.nspin == 1) - { - double temp_theta = 0; // temp_theta = |d'|/|d0 + phi|, theta = min(theta, temp_theta) - - // (1) make direction orthogonal to phi - // |d'> = |d0> - |phi>/nelec - double inner_phi_direction - = this->inner_product(this->pphi_[0], this->pdirect_[0], this->pw_rho->nrxx, this->dV_); - Parallel_Reduce::reduce_all(inner_phi_direction); - for (int i = 0; i < this->pw_rho->nrxx; ++i) - { - temp_theta += pow(this->pdirect_[0][i] + this->pphi_[0][i], 2); - this->pdirect_[0][i] = this->pdirect_[0][i] - this->pphi_[0][i] * inner_phi_direction / this->nelec_[0]; - } - Parallel_Reduce::reduce_all(temp_theta); - temp_theta = std::sqrt(temp_theta); - - // (2) renormalize direction - // |d> = |d'> * \sqrt(nelec) / - double norm_direction - = this->inner_product(this->pdirect_[0], this->pdirect_[0], this->pw_rho->nrxx, this->dV_); - Parallel_Reduce::reduce_all(norm_direction); - norm_direction = std::sqrt(norm_direction); - for (int i = 0; i < this->pw_rho->nrxx; ++i) { - this->pdirect_[0][i] = std::sqrt(this->nelec_[0]) * this->pdirect_[0][i] / norm_direction; - } + double temp_theta = 0; // temp_theta = |d'|/|d0 + phi|, theta = min(theta, temp_theta) - temp_theta = norm_direction / temp_theta; - this->theta_[0] = std::min(this->theta_[0], temp_theta); - } - else if (PARAM.inp.nspin == 2) // theta = 0 - { - for (int is = 0; is < PARAM.inp.nspin; ++is) - { // (1) make direction orthogonal to phi // |d'> = |d0> - |phi>/nelec double inner_phi_direction - = this->inner_product(this->pphi_[is], this->pdirect_[is], this->pw_rho->nrxx, this->dV_); - Parallel_Reduce::reduce_all(inner_phi_direction); + = this->inner_product (this->pphi_[0], this->pdirect_[0], this->pw_rho->nrxx, this->dV_); + Parallel_Reduce::reduce_all (inner_phi_direction); for (int i = 0; i < this->pw_rho->nrxx; ++i) - { - this->pdirect_[is][i] - = this->pdirect_[is][i] - this->pphi_[is][i] * inner_phi_direction / this->nelec_[is]; - } + { + temp_theta += pow (this->pdirect_[0][i] + this->pphi_[0][i], 2); + this->pdirect_[0][i] + = this->pdirect_[0][i] - this->pphi_[0][i] * inner_phi_direction / this->nelec_[0]; + } + Parallel_Reduce::reduce_all (temp_theta); + temp_theta = std::sqrt (temp_theta); // (2) renormalize direction // |d> = |d'> * \sqrt(nelec) / double norm_direction - = this->inner_product(this->pdirect_[is], this->pdirect_[is], this->pw_rho->nrxx, this->dV_); - Parallel_Reduce::reduce_all(norm_direction); - norm_direction = std::sqrt(norm_direction); + = this->inner_product (this->pdirect_[0], this->pdirect_[0], this->pw_rho->nrxx, this->dV_); + Parallel_Reduce::reduce_all (norm_direction); + norm_direction = std::sqrt (norm_direction); for (int i = 0; i < this->pw_rho->nrxx; ++i) - { - this->pdirect_[is][i] = std::sqrt(this->nelec_[is]) * this->pdirect_[is][i] / norm_direction; - } - this->theta_[is] = 0.; + { + this->pdirect_[0][i] = std::sqrt (this->nelec_[0]) * this->pdirect_[0][i] / norm_direction; + } + + temp_theta = norm_direction / temp_theta; + this->theta_[0] = std::min (this->theta_[0], temp_theta); + } + else if (PARAM.inp.nspin == 2) // theta = 0 + { + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + // (1) make direction orthogonal to phi + // |d'> = |d0> - |phi>/nelec + double inner_phi_direction + = this->inner_product (this->pphi_[is], this->pdirect_[is], this->pw_rho->nrxx, this->dV_); + Parallel_Reduce::reduce_all (inner_phi_direction); + for (int i = 0; i < this->pw_rho->nrxx; ++i) + { + this->pdirect_[is][i] + = this->pdirect_[is][i] - this->pphi_[is][i] * inner_phi_direction / this->nelec_[is]; + } + + // (2) renormalize direction + // |d> = |d'> * \sqrt(nelec) / + double norm_direction + = this->inner_product (this->pdirect_[is], this->pdirect_[is], this->pw_rho->nrxx, this->dV_); + Parallel_Reduce::reduce_all (norm_direction); + norm_direction = std::sqrt (norm_direction); + for (int i = 0; i < this->pw_rho->nrxx; ++i) + { + this->pdirect_[is][i] + = std::sqrt (this->nelec_[is]) * this->pdirect_[is][i] / norm_direction; + } + this->theta_[is] = 0.; + } } - } } /** @@ -303,47 +305,48 @@ void ESolver_OF::adjust_direction(void) * @param ptemp_phi * @param ucell */ -void ESolver_OF::check_direction(double* dEdtheta, double** ptemp_phi, UnitCell& ucell) +void + ESolver_OF::check_direction (double* dEdtheta, double** ptemp_phi, UnitCell& ucell) { - assert(PARAM.inp.nspin > 0); + assert (PARAM.inp.nspin > 0); double* temp_theta = new double[PARAM.inp.nspin]; - ModuleBase::GlobalFunc::ZEROS(temp_theta, PARAM.inp.nspin); + ModuleBase::GlobalFunc::ZEROS (temp_theta, PARAM.inp.nspin); double max_dEdtheta = 1e5; // threshould of dEdtheta, avoid the unstable optimization - this->cal_dEdtheta(ptemp_phi, this->ptemp_rho_, ucell, temp_theta, dEdtheta); + this->cal_dEdtheta (ptemp_phi, this->ptemp_rho_, ucell, temp_theta, dEdtheta); // Assert dEdtheta(theta = 0) < 0, otherwise line search will not work. for (int is = 0; is < PARAM.inp.nspin; ++is) - { - if (dEdtheta[is] > max_dEdtheta) - { - std::cout << "dEdtheta " << dEdtheta[is] << std::endl; - ModuleBase::WARNING_QUIT("esolver_of.cpp", "dE/dtheta is too large."); - } - else if (dEdtheta[is] > 0) { - GlobalV::ofs_warning << "ESolver_OF: WARNING " - << "dEdphi > 0, replace direct with steepest descent method." << std::endl; - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) - { - this->pdirect_[is][ir] = -this->pdLdphi_[is][ir]; - } - this->adjust_direction(); - this->cal_dEdtheta(ptemp_phi, this->ptemp_rho_, ucell, temp_theta, dEdtheta); if (dEdtheta[is] > max_dEdtheta) - { - std::cout << "dEdtheta " << dEdtheta[is] << std::endl; - ModuleBase::WARNING_QUIT("esolver_of.cpp", "dE/dtheta is too large."); - } + { + std::cout << "dEdtheta " << dEdtheta[is] << std::endl; + ModuleBase::WARNING_QUIT ("esolver_of.cpp", "dE/dtheta is too large."); + } else if (dEdtheta[is] > 0) - { - GlobalV::ofs_warning << "ESolver_OF: WARNING " - << "when use steepest dencent method, " - "dEdphi > 0, so we might get minimum." - << std::endl; - } + { + GlobalV::ofs_warning << "ESolver_OF: WARNING " + << "dEdphi > 0, replace direct with steepest descent method." << std::endl; + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + this->pdirect_[is][ir] = -this->pdLdphi_[is][ir]; + } + this->adjust_direction (); + this->cal_dEdtheta (ptemp_phi, this->ptemp_rho_, ucell, temp_theta, dEdtheta); + if (dEdtheta[is] > max_dEdtheta) + { + std::cout << "dEdtheta " << dEdtheta[is] << std::endl; + ModuleBase::WARNING_QUIT ("esolver_of.cpp", "dE/dtheta is too large."); + } + else if (dEdtheta[is] > 0) + { + GlobalV::ofs_warning << "ESolver_OF: WARNING " + << "when use steepest dencent method, " + "dEdphi > 0, so we might get minimum." + << std::endl; + } + } } - } delete[] temp_theta; } @@ -355,40 +358,41 @@ void ESolver_OF::check_direction(double* dEdtheta, double** ptemp_phi, UnitCell& * @param ptemp_phi * @param ucell */ -void ESolver_OF::test_direction(double* dEdtheta, double** ptemp_phi, UnitCell& ucell) +void + ESolver_OF::test_direction (double* dEdtheta, double** ptemp_phi, UnitCell& ucell) { double temp_energy = 0.; if (this->iter_ == 0) - { - for (int i = -100; i < 100; ++i) { - this->theta_[0] = 0.001 * i; - for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) - { - ptemp_phi[0][ir] - = this->pphi_[0][ir] * cos(this->theta_[0]) + this->pdirect_[0][ir] * sin(this->theta_[0]); - ptemp_rho_->rho[0][ir] = ptemp_phi[0][ir] * ptemp_phi[0][ir]; - } - this->cal_dEdtheta(ptemp_phi, ptemp_rho_, ucell, this->theta_, dEdtheta); - this->pelec->cal_energies(2); - temp_energy = this->pelec->f_en.etot; - double kinetic_energy = 0.; - double pseudopot_energy = 0.; - kinetic_energy = this->kedf_manager_->get_energy(); - pseudopot_energy = this->inner_product(this->pelec->pot->get_fixed_v(), - this->ptemp_rho_->rho[0], - this->pw_rho->nrxx, - this->dV_); - Parallel_Reduce::reduce_all(pseudopot_energy); - temp_energy += kinetic_energy + pseudopot_energy; - GlobalV::ofs_warning << i << " " << dEdtheta[0] << " " << temp_energy << std::endl; - if (this->theta_[0] == 0) - { - std::cout << "dEdtheta " << dEdtheta[0] << std::endl; - } - } - exit(0); - } + for (int i = -100; i < 100; ++i) + { + this->theta_[0] = 0.001 * i; + for (int ir = 0; ir < this->pw_rho->nrxx; ++ir) + { + ptemp_phi[0][ir] = this->pphi_[0][ir] * cos (this->theta_[0]) + + this->pdirect_[0][ir] * sin (this->theta_[0]); + ptemp_rho_->rho[0][ir] = ptemp_phi[0][ir] * ptemp_phi[0][ir]; + } + this->cal_dEdtheta (ptemp_phi, ptemp_rho_, ucell, this->theta_, dEdtheta); + this->pelec->cal_energies (2); + temp_energy = this->pelec->f_en.etot; + double kinetic_energy = 0.; + double pseudopot_energy = 0.; + kinetic_energy = this->kedf_manager_->get_energy (); + pseudopot_energy = this->inner_product (this->pelec->pot->get_fixed_v (), + this->ptemp_rho_->rho[0], + this->pw_rho->nrxx, + this->dV_); + Parallel_Reduce::reduce_all (pseudopot_energy); + temp_energy += kinetic_energy + pseudopot_energy; + GlobalV::ofs_warning << i << " " << dEdtheta[0] << " " << temp_energy << std::endl; + if (this->theta_[0] == 0) + { + std::cout << "dEdtheta " << dEdtheta[0] << std::endl; + } + } + exit (0); + } } } // namespace ModuleESolver diff --git a/source/source_esolver/esolver_sdft_pw.cpp b/source/source_esolver/esolver_sdft_pw.cpp index 02300eb3c58..34d450203a2 100644 --- a/source/source_esolver/esolver_sdft_pw.cpp +++ b/source/source_esolver/esolver_sdft_pw.cpp @@ -18,293 +18,301 @@ namespace ModuleESolver { template -ESolver_SDFT_PW::ESolver_SDFT_PW() - : stoche(PARAM.inp.nche_sto, PARAM.inp.method_sto, PARAM.inp.emax_sto, PARAM.inp.emin_sto) +ESolver_SDFT_PW::ESolver_SDFT_PW () + : stoche (PARAM.inp.nche_sto, PARAM.inp.method_sto, PARAM.inp.emax_sto, PARAM.inp.emin_sto) { this->classname = "ESolver_SDFT_PW"; this->basisname = "PW"; } template -ESolver_SDFT_PW::~ESolver_SDFT_PW() +ESolver_SDFT_PW::~ESolver_SDFT_PW () { - //**************************************************** - // do not add any codes in this deconstructor funcion - //**************************************************** + //**************************************************** + // do not add any codes in this deconstructor funcion + //**************************************************** } template -void ESolver_SDFT_PW::before_all_runners(UnitCell& ucell, const Input_para& inp) +void + ESolver_SDFT_PW::before_all_runners (UnitCell& ucell, const Input_para& inp) { // 1) initialize parameters from int Input class this->nche_sto = inp.nche_sto; this->method_sto = inp.method_sto; // 2) run "before_all_runners" in ESolver_KS - ESolver_KS_PW::before_all_runners(ucell, inp); + ESolver_KS_PW::before_all_runners (ucell, inp); // 3) initialize the stochastic wave functions - this->stowf.init(&this->kv, this->pw_wfc->npwk_max); + this->stowf.init (&this->kv, this->pw_wfc->npwk_max); if (inp.nbands_sto != 0) - { - if (inp.initsto_ecut < inp.ecutwfc) { - this->stowf.init_sto_orbitals(inp.seed_sto); + if (inp.initsto_ecut < inp.ecutwfc) + { + this->stowf.init_sto_orbitals (inp.seed_sto); + } + else + { + this->stowf.init_sto_orbitals_Ecut (inp.seed_sto, this->kv, *this->pw_wfc, inp.initsto_ecut); + } } - else + else { - this->stowf.init_sto_orbitals_Ecut(inp.seed_sto, this->kv, *this->pw_wfc, inp.initsto_ecut); + this->stowf.init_com_orbitals (); } - } - else - { - this->stowf.init_com_orbitals(); - } if (this->method_sto == 2) - { - this->stowf.allocate_chiallorder(this->nche_sto); - } - this->stowf.sync_chi0(); + { + this->stowf.allocate_chiallorder (this->nche_sto); + } + this->stowf.sync_chi0 (); // 4) allocate spaces for \sqrt(f(H))|chi> and |\tilde{chi}> - size_t size = stowf.chi0->size(); - this->stowf.shchi - = new psi::Psi(this->kv.get_nks(), - this->stowf.nchip_max, - this->pw_wfc->npwk_max, - this->kv.ngk, - true); - ModuleBase::Memory::record("SDFT::shchi", size * sizeof(T)); + size_t size = stowf.chi0->size (); + this->stowf.shchi = new psi::Psi (this->kv.get_nks (), + this->stowf.nchip_max, + this->pw_wfc->npwk_max, + this->kv.ngk, + true); + ModuleBase::Memory::record ("SDFT::shchi", size * sizeof (T)); if (inp.nbands > 0) - { - this->stowf.chiortho - = new psi::Psi(this->kv.get_nks(), - this->stowf.nchip_max, - this->pw_wfc->npwk_max, - this->kv.ngk, true); - ModuleBase::Memory::record("SDFT::chiortho", size * sizeof(T)); - } + { + this->stowf.chiortho = new psi::Psi (this->kv.get_nks (), + this->stowf.nchip_max, + this->pw_wfc->npwk_max, + this->kv.ngk, + true); + ModuleBase::Memory::record ("SDFT::chiortho", size * sizeof (T)); + } return; } template -void ESolver_SDFT_PW::before_scf(UnitCell& ucell, const int istep) +void + ESolver_SDFT_PW::before_scf (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_SDFT_PW", "before_scf"); - ModuleBase::timer::start("ESolver_SDFT_PW", "before_scf"); - - ESolver_KS_PW::before_scf(ucell, istep); - delete reinterpret_cast*>(this->p_hamilt); - this->p_hamilt = new hamilt::HamiltSdftPW(this->pelec->pot, - this->pw_wfc, - &this->kv, - &this->ppcell, - &ucell, - PARAM.globalv.npol, - &this->stoche.emin_sto, - &this->stoche.emax_sto); - this->p_hamilt_sto = static_cast*>(this->p_hamilt); + ModuleBase::TITLE ("ESolver_SDFT_PW", "before_scf"); + ModuleBase::timer::start ("ESolver_SDFT_PW", "before_scf"); + + ESolver_KS_PW::before_scf (ucell, istep); + delete reinterpret_cast*> (this->p_hamilt); + this->p_hamilt = new hamilt::HamiltSdftPW (this->pelec->pot, + this->pw_wfc, + &this->kv, + &this->ppcell, + &ucell, + PARAM.globalv.npol, + &this->stoche.emin_sto, + &this->stoche.emax_sto); + this->p_hamilt_sto = static_cast*> (this->p_hamilt); if (istep > 0 && PARAM.inp.nbands_sto != 0 && PARAM.inp.initsto_freq > 0 && istep % PARAM.inp.initsto_freq == 0) - { - this->stowf.update_sto_orbitals(PARAM.inp.seed_sto); - } + { + this->stowf.update_sto_orbitals (PARAM.inp.seed_sto); + } - ModuleBase::timer::end("ESolver_SDFT_PW", "before_scf"); + ModuleBase::timer::end ("ESolver_SDFT_PW", "before_scf"); } template -void ESolver_SDFT_PW::iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) +void + ESolver_SDFT_PW::iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) { // call iter_finish() of ESolver_KS - ESolver_KS::iter_finish(ucell, istep, iter, conv_esolver); + ESolver_KS::iter_finish (ucell, istep, iter, conv_esolver); } template -void ESolver_SDFT_PW::after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) +void + ESolver_SDFT_PW::after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) { - ModuleBase::TITLE("ESolver_SDFT_PW", "after_scf"); - ModuleBase::timer::start("ESolver_SDFT_PW", "after_scf"); + ModuleBase::TITLE ("ESolver_SDFT_PW", "after_scf"); + ModuleBase::timer::start ("ESolver_SDFT_PW", "after_scf"); // 1) call after_scf() of ESolver_KS_PW - ESolver_KS_PW::after_scf(ucell, istep, conv_esolver); + ESolver_KS_PW::after_scf (ucell, istep, conv_esolver); - ModuleBase::timer::end("ESolver_SDFT_PW", "after_scf"); + ModuleBase::timer::end ("ESolver_SDFT_PW", "after_scf"); } template -void ESolver_SDFT_PW::hamilt2rho_single(UnitCell& ucell, int istep, int iter, double ethr) +void + ESolver_SDFT_PW::hamilt2rho_single (UnitCell& ucell, int istep, int iter, double ethr) { - ModuleBase::TITLE("ESolver_SDFT_PW", "hamilt2rho"); - ModuleBase::timer::start("ESolver_SDFT_PW", "hamilt2rho"); + ModuleBase::TITLE ("ESolver_SDFT_PW", "hamilt2rho"); + ModuleBase::timer::start ("ESolver_SDFT_PW", "hamilt2rho"); // reset energy this->pelec->f_en.eband = 0.0; this->pelec->f_en.demet = 0.0; // setup diagonalization parameters for SDFT - hsolver::setup_diago_params_sdft(istep, iter, ethr, PARAM.inp); + hsolver::setup_diago_params_sdft (istep, iter, ethr, PARAM.inp); bool skip_charge = PARAM.inp.calculation == "nscf" ? true : false; // hsolver only exists in this function - hsolver::HSolverPW_SDFT hsolver_pw_sdft_obj(&this->kv, - this->pw_wfc, - this->stowf, - this->stoche, - this->p_hamilt_sto, - PARAM.inp.calculation, - PARAM.inp.basis_type, - PARAM.inp.ks_solver, - PARAM.globalv.use_uspp, - PARAM.inp.nspin, - hsolver::DiagoIterAssist::SCF_ITER, - hsolver::DiagoIterAssist::PW_DIAG_NMAX, - hsolver::DiagoIterAssist::PW_DIAG_THR, - hsolver::DiagoIterAssist::need_subspace); - - hsolver_pw_sdft_obj.solve(ucell, - static_cast*>(this->p_hamilt), - *this->stp.template get_psi_t(), - this->stp.psi_cpu[0], - this->pelec, - this->pw_wfc, - this->stowf, - istep, - iter, - skip_charge); + hsolver::HSolverPW_SDFT hsolver_pw_sdft_obj (&this->kv, + this->pw_wfc, + this->stowf, + this->stoche, + this->p_hamilt_sto, + PARAM.inp.calculation, + PARAM.inp.basis_type, + PARAM.inp.ks_solver, + PARAM.globalv.use_uspp, + PARAM.inp.nspin, + hsolver::DiagoIterAssist::SCF_ITER, + hsolver::DiagoIterAssist::PW_DIAG_NMAX, + hsolver::DiagoIterAssist::PW_DIAG_THR, + hsolver::DiagoIterAssist::need_subspace); + + hsolver_pw_sdft_obj.solve (ucell, + static_cast*> (this->p_hamilt), + *this->stp.template get_psi_t (), + this->stp.psi_cpu[0], + this->pelec, + this->pw_wfc, + this->stowf, + istep, + iter, + skip_charge); // set_diagethr need it this->esolver_KS_ne = hsolver_pw_sdft_obj.stoiter.KS_ne; if (PARAM.globalv.ks_run) - { - Symmetry_rho::symmetrize_rho(PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); - this->pelec->f_en.deband = this->pelec->cal_delta_eband(ucell); - } - else - { -#ifdef __MPI - if (ModuleSymmetry::Symmetry::symm_flag == 1) { - MPI_Barrier(MPI_COMM_WORLD); + Symmetry_rho::symmetrize_rho (PARAM.inp.nspin, this->chr, this->pw_rho, ucell.symm); + this->pelec->f_en.deband = this->pelec->cal_delta_eband (ucell); } + else + { +#ifdef __MPI + if (ModuleSymmetry::Symmetry::symm_flag == 1) + { + MPI_Barrier (MPI_COMM_WORLD); + } #endif - } + } #ifdef __MPI - MPI_Bcast(&(this->pelec->f_en.deband), 1, MPI_DOUBLE, 0, BP_WORLD); + MPI_Bcast (&(this->pelec->f_en.deband), 1, MPI_DOUBLE, 0, BP_WORLD); #endif - ModuleBase::timer::end("ESolver_SDFT_PW", "hamilt2rho"); + ModuleBase::timer::end ("ESolver_SDFT_PW", "hamilt2rho"); } template -double ESolver_SDFT_PW::cal_energy() +double + ESolver_SDFT_PW::cal_energy () { return this->pelec->f_en.etot; } template -void ESolver_SDFT_PW::cal_force(UnitCell& ucell, ModuleBase::matrix& force) +void + ESolver_SDFT_PW::cal_force (UnitCell& ucell, ModuleBase::matrix& force) { - Sto_Forces ff(ucell.nat); - - ff.cal_stoforce(force, - *this->pelec, - this->pw_rho, - &ucell.symm, - &this->sf, - &this->kv, - this->pw_wfc, - this->locpp, - this->ppcell, - ucell, - *this->stp.template get_psi_t(), - this->stowf); + Sto_Forces ff (ucell.nat); + + ff.cal_stoforce (force, + *this->pelec, + this->pw_rho, + &ucell.symm, + &this->sf, + &this->kv, + this->pw_wfc, + this->locpp, + this->ppcell, + ucell, + *this->stp.template get_psi_t (), + this->stowf); } template -void ESolver_SDFT_PW::cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) +void + ESolver_SDFT_PW::cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) { Sto_Stress_PW ss; - ss.cal_stress(stress, - *this->pelec, - this->pw_rho, - &ucell.symm, - &this->sf, - &this->kv, - this->pw_wfc, - *this->stp.template get_psi_t(), - this->stowf, - &this->chr, - &this->locpp, - &this->ppcell, - ucell); + ss.cal_stress (stress, + *this->pelec, + this->pw_rho, + &ucell.symm, + &this->sf, + &this->kv, + this->pw_wfc, + *this->stp.template get_psi_t (), + this->stowf, + &this->chr, + &this->locpp, + &this->ppcell, + ucell); } template -void ESolver_SDFT_PW::after_all_runners(UnitCell& ucell) +void + ESolver_SDFT_PW::after_all_runners (UnitCell& ucell) { // 1) write down etot and eigenvalues (for MDFT) information - ESolver_FP::after_all_runners(ucell); + ESolver_FP::after_all_runners (ucell); // 2) release memory if (this->method_sto == 2) - { - stowf.clean_chiallorder(); // release lots of memories - } + { + stowf.clean_chiallorder (); // release lots of memories + } // 3) write down DOS if (PARAM.inp.out_dos) - { - if(!std::is_same>::value || !std::is_same::value) { - ModuleBase::WARNING_QUIT("ESolver_SDFT_PW", "DOS does not support complex float or GPU yet."); + if (!std::is_same>::value || !std::is_same::value) + { + ModuleBase::WARNING_QUIT ("ESolver_SDFT_PW", "DOS does not support complex float or GPU yet."); + } + Sto_DOS sto_dos ( + this->pw_wfc, + &this->kv, + this->pelec, + reinterpret_cast>*> (this->stp.psi_cpu), + reinterpret_cast>*> (this->p_hamilt), + this->stoche, + reinterpret_cast, base_device::DEVICE_CPU>*> (&stowf)); + sto_dos.decide_param (PARAM.inp.dos_nche, + PARAM.inp.emin_sto, + PARAM.inp.emax_sto, + PARAM.globalv.dos_setemin, + PARAM.globalv.dos_setemax, + PARAM.inp.dos_emin_ev, + PARAM.inp.dos_emax_ev, + PARAM.inp.dos_scale); + sto_dos.caldos (PARAM.inp.dos_sigma, PARAM.inp.dos_edelta_ev, PARAM.inp.npart_sto); } - Sto_DOS sto_dos( - this->pw_wfc, - &this->kv, - this->pelec, - reinterpret_cast>*>(this->stp.psi_cpu), - reinterpret_cast>*>(this->p_hamilt), - this->stoche, - reinterpret_cast, base_device::DEVICE_CPU>*>(&stowf)); - sto_dos.decide_param(PARAM.inp.dos_nche, - PARAM.inp.emin_sto, - PARAM.inp.emax_sto, - PARAM.globalv.dos_setemin, - PARAM.globalv.dos_setemax, - PARAM.inp.dos_emin_ev, - PARAM.inp.dos_emax_ev, - PARAM.inp.dos_scale); - sto_dos.caldos(PARAM.inp.dos_sigma, PARAM.inp.dos_edelta_ev, PARAM.inp.npart_sto); - } // 4) sKG cost memory, and it should be placed at the end of the program if (PARAM.inp.cal_cond) - { - Sto_EleCond sto_elecond(&ucell, - &this->kv, - this->pelec, - this->pw_wfc, - this->stp.template get_psi_t(), - &this->ppcell, - static_cast, Device>*>(this->p_hamilt), - this->stoche, - &stowf); - sto_elecond.decide_nche(PARAM.inp.cond_dt, 1e-8, this->nche_sto, PARAM.inp.emin_sto, PARAM.inp.emax_sto); - sto_elecond.sKG(PARAM.inp.cond_smear, - PARAM.inp.cond_fwhm, - PARAM.inp.cond_wcut, - PARAM.inp.cond_dw, - PARAM.inp.cond_dt, - PARAM.inp.cond_nonlocal, - PARAM.inp.npart_sto); - } + { + Sto_EleCond sto_elecond ( + &ucell, + &this->kv, + this->pelec, + this->pw_wfc, + this->stp.template get_psi_t (), + &this->ppcell, + static_cast, Device>*> (this->p_hamilt), + this->stoche, + &stowf); + sto_elecond.decide_nche (PARAM.inp.cond_dt, 1e-8, this->nche_sto, PARAM.inp.emin_sto, PARAM.inp.emax_sto); + sto_elecond.sKG (PARAM.inp.cond_smear, + PARAM.inp.cond_fwhm, + PARAM.inp.cond_wcut, + PARAM.inp.cond_dw, + PARAM.inp.cond_dt, + PARAM.inp.cond_nonlocal, + PARAM.inp.npart_sto); + } } - // template class ESolver_SDFT_PW, base_device::DEVICE_CPU>; template class ESolver_SDFT_PW, base_device::DEVICE_CPU>; #if ((defined __CUDA) || (defined __ROCM)) diff --git a/source/source_esolver/esolver_sdft_pw.h b/source/source_esolver/esolver_sdft_pw.h index 68350da43e4..5137b7ab770 100644 --- a/source/source_esolver/esolver_sdft_pw.h +++ b/source/source_esolver/esolver_sdft_pw.h @@ -15,17 +15,18 @@ class ESolver_SDFT_PW : public ESolver_KS_PW { private: using Real = typename GetTypeReal::type; + public: - ESolver_SDFT_PW(); - ~ESolver_SDFT_PW(); + ESolver_SDFT_PW (); + ~ESolver_SDFT_PW (); - void before_all_runners(UnitCell& ucell, const Input_para& inp) override; + void before_all_runners (UnitCell& ucell, const Input_para& inp) override; - double cal_energy() override; + double cal_energy () override; - void cal_force(UnitCell& ucell, ModuleBase::matrix& force) override; + void cal_force (UnitCell& ucell, ModuleBase::matrix& force) override; - void cal_stress(UnitCell& ucell, ModuleBase::matrix& stress) override; + void cal_stress (UnitCell& ucell, ModuleBase::matrix& stress) override; public: Stochastic_WF stowf; @@ -33,15 +34,15 @@ class ESolver_SDFT_PW : public ESolver_KS_PW hamilt::HamiltSdftPW* p_hamilt_sto = nullptr; protected: - virtual void before_scf(UnitCell& ucell, const int istep) override; + virtual void before_scf (UnitCell& ucell, const int istep) override; - virtual void hamilt2rho_single(UnitCell& ucell, const int istep, const int iter, const double ethr) override; + virtual void hamilt2rho_single (UnitCell& ucell, const int istep, const int iter, const double ethr) override; - virtual void iter_finish(UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; + virtual void iter_finish (UnitCell& ucell, const int istep, int& iter, bool& conv_esolver) override; - virtual void after_scf(UnitCell& ucell, const int istep, const bool conv_esolver) override; + virtual void after_scf (UnitCell& ucell, const int istep, const bool conv_esolver) override; - virtual void after_all_runners(UnitCell& ucell) override; + virtual void after_all_runners (UnitCell& ucell) override; private: int nche_sto; ///< norder of Chebyshev diff --git a/source/source_esolver/lcao_others.cpp b/source/source_esolver/lcao_others.cpp index b3ad0c71499..c04acaf7ded 100644 --- a/source/source_esolver/lcao_others.cpp +++ b/source/source_esolver/lcao_others.cpp @@ -32,83 +32,84 @@ namespace ModuleESolver { template -void ESolver_KS_LCAO::others(UnitCell& ucell, const int istep) +void + ESolver_KS_LCAO::others (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_KS_LCAO", "others"); - ModuleBase::timer::start("ESolver_KS_LCAO", "others"); + ModuleBase::TITLE ("ESolver_KS_LCAO", "others"); + ModuleBase::timer::start ("ESolver_KS_LCAO", "others"); const std::string cal_type = PARAM.inp.calculation; if (cal_type == "test_memory") - { - std::cout << FmtCore::format("\n * * * * * *\n << Start %s.\n", "testing memory"); - Cal_Test::test_memory(ucell.nat, - ucell.ntype, - ucell.GGT, - this->pw_rho, - this->pw_wfc, - this->p_chgmix->get_mixing_mode(), - this->p_chgmix->get_mixing_ndim()); - std::cout << FmtCore::format(" >> Finish %s.\n * * * * * *\n", "testing memory"); - return; - } + { + std::cout << FmtCore::format ("\n * * * * * *\n << Start %s.\n", "testing memory"); + Cal_Test::test_memory (ucell.nat, + ucell.ntype, + ucell.GGT, + this->pw_rho, + this->pw_wfc, + this->p_chgmix->get_mixing_mode (), + this->p_chgmix->get_mixing_ndim ()); + std::cout << FmtCore::format (" >> Finish %s.\n * * * * * *\n", "testing memory"); + return; + } else if (cal_type == "test_neighbour") - { - // test_search_neighbor(); - std::cout << FmtCore::format("\n * * * * * *\n << Start %s.\n", "testing neighbour"); - double search_radius = PARAM.inp.search_radius; - atom_arrange::search(PARAM.globalv.search_pbc, - GlobalV::ofs_running, - this->gd, - ucell, - search_radius, - PARAM.inp.test_atom_input, - true); - std::cout << FmtCore::format(" >> Finish %s.\n * * * * * *\n", "testing neighbour"); - return; - } + { + // test_search_neighbor(); + std::cout << FmtCore::format ("\n * * * * * *\n << Start %s.\n", "testing neighbour"); + double search_radius = PARAM.inp.search_radius; + atom_arrange::search (PARAM.globalv.search_pbc, + GlobalV::ofs_running, + this->gd, + ucell, + search_radius, + PARAM.inp.test_atom_input, + true); + std::cout << FmtCore::format (" >> Finish %s.\n * * * * * *\n", "testing neighbour"); + return; + } else if (cal_type == "gen_opt_abfs") - { - return; - } + { + return; + } // 1. prepare HS matrices, prepare grid integral // (1) Find adjacent atoms for each atom. - double search_radius = atom_arrange::set_sr_NL(GlobalV::ofs_running, - PARAM.inp.out_level, - orb_.get_rcutmax_Phi(), - ucell.infoNL.get_rcutmax_Beta(), - PARAM.globalv.gamma_only_local); + double search_radius = atom_arrange::set_sr_NL (GlobalV::ofs_running, + PARAM.inp.out_level, + orb_.get_rcutmax_Phi (), + ucell.infoNL.get_rcutmax_Beta (), + PARAM.globalv.gamma_only_local); - atom_arrange::search(PARAM.globalv.search_pbc, - GlobalV::ofs_running, - this->gd, - ucell, - search_radius, - PARAM.inp.test_atom_input); + atom_arrange::search (PARAM.globalv.search_pbc, + GlobalV::ofs_running, + this->gd, + ucell, + search_radius, + PARAM.inp.test_atom_input); // (3) Periodic condition search for each grid. - gint_info_.reset(new ModuleGint::GintInfo(this->pw_big->nbx, - this->pw_big->nby, - this->pw_big->nbz, - this->pw_rho->nx, - this->pw_rho->ny, - this->pw_rho->nz, - 0, - 0, - this->pw_big->nbzp_start, - this->pw_big->nbx, - this->pw_big->nby, - this->pw_big->nbzp, - orb_.Phi, - ucell, - this->gd)); - ModuleGint::Gint::set_gint_info(gint_info_.get()); + gint_info_.reset (new ModuleGint::GintInfo (this->pw_big->nbx, + this->pw_big->nby, + this->pw_big->nbz, + this->pw_rho->nx, + this->pw_rho->ny, + this->pw_rho->nz, + 0, + 0, + this->pw_big->nbzp_start, + this->pw_big->nbx, + this->pw_big->nby, + this->pw_big->nbzp, + orb_.Phi, + ucell, + this->gd)); + ModuleGint::Gint::set_gint_info (gint_info_.get ()); // (2)For each atom, calculate the adjacent atoms in different cells // and allocate the space for H(R) and S(R). // If k point is used here, allocate HlocR after atom_arrange. - this->RA.for_2d(ucell, this->gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs()); + this->RA.for_2d (ucell, this->gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs ()); // 2. density matrix extrapolation @@ -122,151 +123,151 @@ void ESolver_KS_LCAO::others(UnitCell& ucell, const int istep) // init Hamiltonian if (this->p_hamilt != nullptr) - { - delete this->p_hamilt; - this->p_hamilt = nullptr; - } + { + delete this->p_hamilt; + this->p_hamilt = nullptr; + } if (this->p_hamilt == nullptr) - { - this->p_hamilt = new hamilt::HamiltLCAO(ucell, - this->gd, - &this->pv, - this->pelec->pot, - this->kv, - two_center_bundle_, - orb_, - this->dmat.dm, - &this->dftu, - this->deepks, - istep, - this->exx_nao); - } + { + this->p_hamilt = new hamilt::HamiltLCAO (ucell, + this->gd, + &this->pv, + this->pelec->pot, + this->kv, + two_center_bundle_, + orb_, + this->dmat.dm, + &this->dftu, + this->deepks, + istep, + this->exx_nao); + } // for each ionic step, the overlap must be rebuilt // since it depends on ionic positions - this->deepks.build_overlap(ucell, orb_, pv, gd, *(two_center_bundle_.overlap_orb_alpha), PARAM.inp); + this->deepks.build_overlap (ucell, orb_, pv, gd, *(two_center_bundle_.overlap_orb_alpha), PARAM.inp); if (PARAM.inp.sc_mag_switch) - { - spinconstrain::SpinConstrain& sc = spinconstrain::SpinConstrain::getScInstance(); - sc.init_sc(PARAM.inp.sc_thr, - PARAM.inp.nsc, - PARAM.inp.nsc_min, - PARAM.inp.alpha_trial, - PARAM.inp.sccut, - PARAM.inp.sc_drop_thr, - ucell, - &(this->pv), - PARAM.inp.nspin, - this->kv, - this->p_hamilt, - this->psi, - this->dmat.dm, - this->pelec); - } + { + spinconstrain::SpinConstrain& sc = spinconstrain::SpinConstrain::getScInstance (); + sc.init_sc (PARAM.inp.sc_thr, + PARAM.inp.nsc, + PARAM.inp.nsc_min, + PARAM.inp.alpha_trial, + PARAM.inp.sccut, + PARAM.inp.sc_drop_thr, + ucell, + &(this->pv), + PARAM.inp.nspin, + this->kv, + this->p_hamilt, + this->psi, + this->dmat.dm, + this->pelec); + } //========================================================= // cal_ux should be called before init_scf because // the direction of ux is used in noncoline_rho //========================================================= - elecstate::cal_ux(ucell); + elecstate::cal_ux (ucell); // pelec should be initialized before these calculations - this->pelec->init_scf(ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, ucell.symm); + this->pelec->init_scf (ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, ucell.symm); // self consistent calculations for electronic ground state if (cal_type == "get_pchg") - { - std::cout << FmtCore::format("\n * * * * * *\n << Start %s.\n", "getting partial charge"); - Get_pchg_lcao get_pchg(this->psi, &(this->pv)); - if (PARAM.globalv.gamma_only_local) { - get_pchg.begin(this->chr.rho, - this->pelec->wg, - this->pelec->eferm.get_all_ef(), - this->pw_rhod->nrxx, - PARAM.inp.out_pchg, - PARAM.inp.nbands, - PARAM.inp.nelec, - PARAM.inp.nspin, - &ucell, - this->Pgrid, - &this->gd, - this->kv, - PARAM.globalv.global_out_dir, - GlobalV::ofs_running); + std::cout << FmtCore::format ("\n * * * * * *\n << Start %s.\n", "getting partial charge"); + Get_pchg_lcao get_pchg (this->psi, &(this->pv)); + if (PARAM.globalv.gamma_only_local) + { + get_pchg.begin (this->chr.rho, + this->pelec->wg, + this->pelec->eferm.get_all_ef (), + this->pw_rhod->nrxx, + PARAM.inp.out_pchg, + PARAM.inp.nbands, + PARAM.inp.nelec, + PARAM.inp.nspin, + &ucell, + this->Pgrid, + &this->gd, + this->kv, + PARAM.globalv.global_out_dir, + GlobalV::ofs_running); + } + else + { + get_pchg.begin (this->chr.rho, + this->chr.rhog, + this->pelec->wg, + this->pelec->eferm.get_all_ef (), + this->pw_rhod, + this->pw_rhod->nrxx, + PARAM.inp.out_pchg, + PARAM.inp.nbands, + PARAM.inp.nelec, + PARAM.inp.nspin, + &ucell, + this->Pgrid, + &this->gd, + this->kv, + PARAM.globalv.global_out_dir, + GlobalV::ofs_running, + PARAM.inp.if_separate_k, + this->chr.ngmc); + } + std::cout << FmtCore::format (" >> Finish %s.\n * * * * * *\n", "getting partial charge"); } - else - { - get_pchg.begin(this->chr.rho, - this->chr.rhog, - this->pelec->wg, - this->pelec->eferm.get_all_ef(), - this->pw_rhod, - this->pw_rhod->nrxx, - PARAM.inp.out_pchg, - PARAM.inp.nbands, - PARAM.inp.nelec, - PARAM.inp.nspin, - &ucell, - this->Pgrid, - &this->gd, - this->kv, - PARAM.globalv.global_out_dir, - GlobalV::ofs_running, - PARAM.inp.if_separate_k, - this->chr.ngmc); - } - std::cout << FmtCore::format(" >> Finish %s.\n * * * * * *\n", "getting partial charge"); - } else if (cal_type == "get_wf") - { - std::cout << FmtCore::format("\n * * * * * *\n << Start %s.\n", "getting wave function"); - Get_wf_lcao get_wf(this->pelec); - if (PARAM.globalv.gamma_only_local) { - get_wf.begin(ucell, - this->psi, - this->pw_wfc, - this->Pgrid, - this->pv, - PARAM.inp.out_wfc_pw, - this->kv, - PARAM.inp.nelec, - PARAM.inp.out_wfc_norm, - PARAM.inp.out_wfc_re_im, - PARAM.inp.nbands, - PARAM.inp.nspin, - PARAM.globalv.nlocal, - PARAM.globalv.global_out_dir, - GlobalV::ofs_running); + std::cout << FmtCore::format ("\n * * * * * *\n << Start %s.\n", "getting wave function"); + Get_wf_lcao get_wf (this->pelec); + if (PARAM.globalv.gamma_only_local) + { + get_wf.begin (ucell, + this->psi, + this->pw_wfc, + this->Pgrid, + this->pv, + PARAM.inp.out_wfc_pw, + this->kv, + PARAM.inp.nelec, + PARAM.inp.out_wfc_norm, + PARAM.inp.out_wfc_re_im, + PARAM.inp.nbands, + PARAM.inp.nspin, + PARAM.globalv.nlocal, + PARAM.globalv.global_out_dir, + GlobalV::ofs_running); + } + else + { + get_wf.begin (ucell, + this->psi, + this->pw_wfc, + this->Pgrid, + this->pv, + PARAM.inp.out_wfc_pw, + this->kv, + PARAM.inp.nelec, + PARAM.inp.out_wfc_norm, + PARAM.inp.out_wfc_re_im, + PARAM.inp.nbands, + PARAM.inp.nspin, + PARAM.globalv.nlocal, + PARAM.globalv.global_out_dir, + GlobalV::ofs_running); + } + std::cout << FmtCore::format (" >> Finish %s.\n * * * * * *\n", "getting wave function"); } - else + else { - get_wf.begin(ucell, - this->psi, - this->pw_wfc, - this->Pgrid, - this->pv, - PARAM.inp.out_wfc_pw, - this->kv, - PARAM.inp.nelec, - PARAM.inp.out_wfc_norm, - PARAM.inp.out_wfc_re_im, - PARAM.inp.nbands, - PARAM.inp.nspin, - PARAM.globalv.nlocal, - PARAM.globalv.global_out_dir, - GlobalV::ofs_running); + ModuleBase::WARNING_QUIT ("ESolver_KS_LCAO::others", "CALCULATION type not supported"); } - std::cout << FmtCore::format(" >> Finish %s.\n * * * * * *\n", "getting wave function"); - } - else - { - ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::others", "CALCULATION type not supported"); - } - ModuleBase::timer::end("ESolver_KS_LCAO", "others"); + ModuleBase::timer::end ("ESolver_KS_LCAO", "others"); return; } diff --git a/source/source_esolver/pw_others.cpp b/source/source_esolver/pw_others.cpp index 49f7465b46e..2dac9a32bc1 100644 --- a/source/source_esolver/pw_others.cpp +++ b/source/source_esolver/pw_others.cpp @@ -9,41 +9,42 @@ // mohan add 2025-03-06 #include "source_io/module_output/cal_test.h" -namespace ModuleESolver { +namespace ModuleESolver +{ template -void ESolver_KS_PW::others(UnitCell& ucell, const int istep) +void + ESolver_KS_PW::others (UnitCell& ucell, const int istep) { - ModuleBase::TITLE("ESolver_KS_PW", "others"); + ModuleBase::TITLE ("ESolver_KS_PW", "others"); const std::string cal_type = PARAM.inp.calculation; - if (cal_type == "test_memory") - { - Cal_Test::test_memory(ucell.nat, - ucell.ntype, - ucell.GGT, - this->pw_rho, - this->pw_wfc, - this->p_chgmix->get_mixing_mode(), - this->p_chgmix->get_mixing_ndim()); - } - else if (cal_type == "gen_bessel") - { - Numerical_Descriptor nc; - nc.output_descriptor(ucell, - *(this->stp.psi_cpu), - PARAM.inp.bessel_descriptor_lmax, - PARAM.inp.bessel_descriptor_rcut, - PARAM.inp.bessel_descriptor_tolerence, - this->kv.get_nks()); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "GENERATE DESCRIPTOR FOR DEEPKS"); - } - else - { - ModuleBase::WARNING_QUIT("ESolver_KS_PW::others", - "CALCULATION type not supported"); - } + if (cal_type == "test_memory") + { + Cal_Test::test_memory (ucell.nat, + ucell.ntype, + ucell.GGT, + this->pw_rho, + this->pw_wfc, + this->p_chgmix->get_mixing_mode (), + this->p_chgmix->get_mixing_ndim ()); + } + else if (cal_type == "gen_bessel") + { + Numerical_Descriptor nc; + nc.output_descriptor (ucell, + *(this->stp.psi_cpu), + PARAM.inp.bessel_descriptor_lmax, + PARAM.inp.bessel_descriptor_rcut, + PARAM.inp.bessel_descriptor_tolerence, + this->kv.get_nks ()); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "GENERATE DESCRIPTOR FOR DEEPKS"); + } + else + { + ModuleBase::WARNING_QUIT ("ESolver_KS_PW::others", "CALCULATION type not supported"); + } return; } diff --git a/source/source_esolver/test/esolver_dp_test.cpp b/source/source_esolver/test/esolver_dp_test.cpp index f25b89ab755..52dd621d884 100644 --- a/source/source_esolver/test/esolver_dp_test.cpp +++ b/source/source_esolver/test/esolver_dp_test.cpp @@ -25,10 +25,11 @@ class ESolverDPTest : public ::testing::Test { protected: - void SetUp() override + void + SetUp () override { // Initialize variables before each test - esolver = new ModuleESolver::ESolver_DP("./support/case_1.pb"); + esolver = new ModuleESolver::ESolver_DP ("./support/case_1.pb"); ucell.iat2it = new int[2]; ucell.iat2it[0] = 0; ucell.iat2it[1] = 1; @@ -40,16 +41,17 @@ class ESolverDPTest : public ::testing::Test ucell.atoms = new Atom[2]; ucell.atoms[0].na = 1; ucell.atoms[1].na = 1; - ucell.atoms[0].taud.resize(1, ModuleBase::Vector3(0.0, 0.0, 0.0)); - ucell.atoms[1].taud.resize(1, ModuleBase::Vector3(0.0, 0.0, 0.0)); + ucell.atoms[0].taud.resize (1, ModuleBase::Vector3 (0.0, 0.0, 0.0)); + ucell.atoms[1].taud.resize (1, ModuleBase::Vector3 (0.0, 0.0, 0.0)); - ucell.atom_label.resize(ucell.ntype); + ucell.atom_label.resize (ucell.ntype); ucell.atom_label[0] = "Cu"; ucell.atom_label[1] = "Al"; - esolver->before_all_runners(ucell, inp); + esolver->before_all_runners (ucell, inp); } - void TearDown() override + void + TearDown () override { // Clean up after each test delete esolver; @@ -62,117 +64,117 @@ class ESolverDPTest : public ::testing::Test }; // Test the Init() funciton case 1 -TEST_F(ESolverDPTest, InitCase1) +TEST_F (ESolverDPTest, InitCase1) { // Check the initialized variables - EXPECT_DOUBLE_EQ(esolver->dp_potential, 0.0); + EXPECT_DOUBLE_EQ (esolver->dp_potential, 0.0); for (int i = 0; i < 3; ++i) - { - for (int j = 0; j < 3; ++j) { - EXPECT_DOUBLE_EQ(esolver->dp_virial(i, j), 0.0); + for (int j = 0; j < 3; ++j) + { + EXPECT_DOUBLE_EQ (esolver->dp_virial (i, j), 0.0); + } } - } for (int i = 0; i < ucell.nat; ++i) - { - for (int j = 0; j < 3; ++j) { - EXPECT_DOUBLE_EQ(esolver->dp_force(i, j), 0.0); + for (int j = 0; j < 3; ++j) + { + EXPECT_DOUBLE_EQ (esolver->dp_force (i, j), 0.0); + } } - } - EXPECT_EQ(esolver->atype[0], 0); - EXPECT_EQ(esolver->atype[1], 0); + EXPECT_EQ (esolver->atype[0], 0); + EXPECT_EQ (esolver->atype[1], 0); } // Test the Run() funciton WARNING_QUIT -TEST_F(ESolverDPTest, RunWarningQuit) +TEST_F (ESolverDPTest, RunWarningQuit) { int istep = 0; - testing::internal::CaptureStdout(); + testing::internal::CaptureStdout (); - EXPECT_EXIT(esolver->runner(ucell, istep), ::testing::ExitedWithCode(1), ""); + EXPECT_EXIT (esolver->runner (ucell, istep), ::testing::ExitedWithCode (1), ""); - std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Please recompile with -D__DPMD")); + std::string output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Please recompile with -D__DPMD")); } // Test the cal_energy() funciton -TEST_F(ESolverDPTest, CalEnergy) +TEST_F (ESolverDPTest, CalEnergy) { double etot = 0.0; esolver->dp_potential = 9.8; - etot = esolver->cal_energy(); + etot = esolver->cal_energy (); // Check the results - EXPECT_DOUBLE_EQ(etot, 9.8); + EXPECT_DOUBLE_EQ (etot, 9.8); } // Test the cal_Force() funciton -TEST_F(ESolverDPTest, CalForce) +TEST_F (ESolverDPTest, CalForce) { - ModuleBase::matrix force(ucell.nat, 3); + ModuleBase::matrix force (ucell.nat, 3); for (int i = 0; i < ucell.nat; ++i) - { - for (int j = 0; j < 3; ++j) { - esolver->dp_force(i, j) = 3.0 * i + j; + for (int j = 0; j < 3; ++j) + { + esolver->dp_force (i, j) = 3.0 * i + j; + } } - } - esolver->cal_force(ucell, force); + esolver->cal_force (ucell, force); // Check the results for (int i = 0; i < ucell.nat; ++i) - { - for (int j = 0; j < 3; ++j) { - EXPECT_DOUBLE_EQ(force(i, j), 3.0 * i + j); + for (int j = 0; j < 3; ++j) + { + EXPECT_DOUBLE_EQ (force (i, j), 3.0 * i + j); + } } - } } // Test the cal_Stress() funciton -TEST_F(ESolverDPTest, CalStress) +TEST_F (ESolverDPTest, CalStress) { - ModuleBase::matrix stress(3, 3); + ModuleBase::matrix stress (3, 3); for (int i = 0; i < 3; ++i) - { - for (int j = 0; j < 3; ++j) { - esolver->dp_virial(i, j) = 3.0 * i + j; + for (int j = 0; j < 3; ++j) + { + esolver->dp_virial (i, j) = 3.0 * i + j; + } } - } - esolver->cal_stress(ucell, stress); + esolver->cal_stress (ucell, stress); // Check the results for (int i = 0; i < 3; ++i) - { - for (int j = 0; j < 3; ++j) { - EXPECT_DOUBLE_EQ(stress(i, j), 3.0 * i + j); + for (int j = 0; j < 3; ++j) + { + EXPECT_DOUBLE_EQ (stress (i, j), 3.0 * i + j); + } } - } } // Test the postprocess() funciton -TEST_F(ESolverDPTest, Postprocess) +TEST_F (ESolverDPTest, Postprocess) { esolver->dp_potential = 9.8; // Check the results - GlobalV::ofs_running.open("log"); - esolver->after_all_runners(ucell); - GlobalV::ofs_running.close(); + GlobalV::ofs_running.open ("log"); + esolver->after_all_runners (ucell); + GlobalV::ofs_running.close (); std::string expected_output = "\n --------------------------------------------\n !FINAL_ETOT_IS 133.3358404000000235 eV\n " "--------------------------------------------\n\n\n"; - std::ifstream ifs("log"); - std::string output((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - ifs.close(); - std::remove("log"); + std::ifstream ifs ("log"); + std::string output ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + ifs.close (); + std::remove ("log"); - EXPECT_EQ(expected_output, output); + EXPECT_EQ (expected_output, output); } diff --git a/source/source_esolver/test/for_test.h b/source/source_esolver/test/for_test.h index ba8c9bb4151..c583d695af8 100644 --- a/source/source_esolver/test/for_test.h +++ b/source/source_esolver/test/for_test.h @@ -3,7 +3,7 @@ #include "source_cell/unitcell.h" -UnitCell::UnitCell() +UnitCell::UnitCell () { Coordinate = "Direct"; latName = "none"; @@ -22,7 +22,7 @@ UnitCell::UnitCell() ntype = 2; nat = 2; - atom_label.resize(ntype); + atom_label.resize (ntype); atom_label[0] = "Al"; atom_label[1] = "Cu"; @@ -30,49 +30,33 @@ UnitCell::UnitCell() set_atom_flag = true; for (int it = 0; it < ntype; it++) - { - Atom* atom = &atoms[it]; - for (int ia = 0; ia < atom->na; ia++) { - for (int ik = 0; ik < 3; ++ik) - { - atom->tau[ia][ik] = 3.0 * ia + ik; - } + Atom* atom = &atoms[it]; + for (int ia = 0; ia < atom->na; ia++) + { + for (int ik = 0; ik < 3; ++ik) + { + atom->tau[ia][ik] = 3.0 * ia + ik; + } + } } - } } -UnitCell::~UnitCell() -{ -} -Magnetism::Magnetism() -{ -} -Magnetism::~Magnetism() -{ -} -Atom::Atom() +UnitCell::~UnitCell () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +Atom::Atom () { na = 1; - tau.resize(na); - mbl.resize(na); -} -Atom::~Atom() -{ -} -Atom_pseudo::Atom_pseudo() -{ -} -Atom_pseudo::~Atom_pseudo() -{ -} -pseudo::pseudo() -{ -} -pseudo::~pseudo() -{ -} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} + tau.resize (na); + mbl.resize (na); +} +Atom::~Atom () {} +Atom_pseudo::Atom_pseudo () {} +Atom_pseudo::~Atom_pseudo () {} +pseudo::pseudo () {} +pseudo::~pseudo () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} #endif diff --git a/source/source_estate/cal_dm.h b/source/source_estate/cal_dm.h index aede5980e0a..fbe1fe34443 100644 --- a/source/source_estate/cal_dm.h +++ b/source/source_estate/cal_dm.h @@ -10,120 +10,130 @@ namespace elecstate { // for gamma_only(double case) and multi-k(complex case) -inline void cal_dm(const Parallel_Orbitals* ParaV, const ModuleBase::matrix& wg, const psi::Psi& wfc, std::vector& dm) +inline void + cal_dm (const Parallel_Orbitals* ParaV, + const ModuleBase::matrix& wg, + const psi::Psi& wfc, + std::vector& dm) { - ModuleBase::TITLE("elecstate", "cal_dm"); - ModuleBase::timer::start("elecstate","cal_dm"); + ModuleBase::TITLE ("elecstate", "cal_dm"); + ModuleBase::timer::start ("elecstate", "cal_dm"); - //dm.resize(wfc.get_nk(), ParaV->ncol, ParaV->nrow); - const int nbands_local = wfc.get_nbands(); - const int nbasis_local = wfc.get_nbasis(); + // dm.resize(wfc.get_nk(), ParaV->ncol, ParaV->nrow); + const int nbands_local = wfc.get_nbands (); + const int nbasis_local = wfc.get_nbasis (); // dm = wfc.T * wg * wfc.conj() // dm[is](iw1,iw2) = \sum_{ib} wfc[is](ib,iw1).T * wg(is,ib) * wfc[is](ib,iw2).conj() - for (int ik = 0; ik < wfc.get_nk(); ++ik) - { - wfc.fix_k(ik); - //dm.fix_k(ik); - dm[ik].create(ParaV->ncol, ParaV->nrow); - // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); - psi::Psi wg_wfc(1, - wfc.get_nbands(), - wfc.get_nbasis(), - wfc.get_nbasis(), - true); - wg_wfc.set_all_psi(wfc.get_pointer(), wg_wfc.size()); - - int ib_global = 0; - for (int ib_local = 0; ib_local < nbands_local; ++ib_local) + for (int ik = 0; ik < wfc.get_nk (); ++ik) { - while (ib_local != ParaV->global2local_col(ib_global)) - { - ++ib_global; - if (ib_global >= wg.nc) + wfc.fix_k (ik); + // dm.fix_k(ik); + dm[ik].create (ParaV->ncol, ParaV->nrow); + // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); + psi::Psi wg_wfc (1, wfc.get_nbands (), wfc.get_nbasis (), wfc.get_nbasis (), true); + wg_wfc.set_all_psi (wfc.get_pointer (), wg_wfc.size ()); + + int ib_global = 0; + for (int ib_local = 0; ib_local < nbands_local; ++ib_local) { - break; - ModuleBase::WARNING_QUIT("ElecStateLCAO::cal_dm", "please check global2local_col!"); + while (ib_local != ParaV->global2local_col (ib_global)) + { + ++ib_global; + if (ib_global >= wg.nc) + { + break; + ModuleBase::WARNING_QUIT ("ElecStateLCAO::cal_dm", + "please check global2local_col!"); + } + } + if (ib_global >= wg.nc) + { + continue; + } + const double wg_local = wg (ik, ib_global); + double* wg_wfc_pointer = &(wg_wfc (0, ib_local, 0)); + BlasConnector::scal (nbasis_local, wg_local, wg_wfc_pointer, 1); } - } - if (ib_global >= wg.nc) { continue; -} - const double wg_local = wg(ik, ib_global); - double* wg_wfc_pointer = &(wg_wfc(0, ib_local, 0)); - BlasConnector::scal(nbasis_local, wg_local, wg_wfc_pointer, 1); - } - // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) + // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) #ifdef __MPI - psiMulPsiMpi(wg_wfc, wfc, dm[ik], ParaV->desc_wfc, ParaV->desc); + psiMulPsiMpi (wg_wfc, wfc, dm[ik], ParaV->desc_wfc, ParaV->desc); #else - psiMulPsi(wg_wfc, wfc, dm[ik]); + psiMulPsi (wg_wfc, wfc, dm[ik]); #endif - } - ModuleBase::timer::end("elecstate","cal_dm"); + } + ModuleBase::timer::end ("elecstate", "cal_dm"); return; } -inline void cal_dm(const Parallel_Orbitals* ParaV, const ModuleBase::matrix& wg, const psi::Psi>& wfc, std::vector& dm) +inline void + cal_dm (const Parallel_Orbitals* ParaV, + const ModuleBase::matrix& wg, + const psi::Psi>& wfc, + std::vector& dm) { - ModuleBase::TITLE("elecstate", "cal_dm"); - ModuleBase::timer::start("elecstate","cal_dm"); + ModuleBase::TITLE ("elecstate", "cal_dm"); + ModuleBase::timer::start ("elecstate", "cal_dm"); - //dm.resize(wfc.get_nk(), ParaV->ncol, ParaV->nrow); - const int nbands_local = wfc.get_nbands(); - const int nbasis_local = wfc.get_nbasis(); + // dm.resize(wfc.get_nk(), ParaV->ncol, ParaV->nrow); + const int nbands_local = wfc.get_nbands (); + const int nbasis_local = wfc.get_nbasis (); // dm = wfc.T * wg * wfc.conj() // dm[is](iw1,iw2) = \sum_{ib} wfc[is](ib,iw1).T * wg(is,ib) * wfc[is](ib,iw2).conj() - for (int ik = 0; ik < wfc.get_nk(); ++ik) - { - wfc.fix_k(ik); - //dm.fix_k(ik); - dm[ik].create(ParaV->ncol, ParaV->nrow); - // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); - psi::Psi> wg_wfc(1, wfc.get_nbands(), wfc.get_nbasis(), wfc.get_nbasis(), true); - const std::complex* pwfc = wfc.get_pointer(); - std::complex* pwg_wfc = wg_wfc.get_pointer(); + for (int ik = 0; ik < wfc.get_nk (); ++ik) + { + wfc.fix_k (ik); + // dm.fix_k(ik); + dm[ik].create (ParaV->ncol, ParaV->nrow); + // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); + psi::Psi> wg_wfc (1, wfc.get_nbands (), wfc.get_nbasis (), wfc.get_nbasis (), true); + const std::complex* pwfc = wfc.get_pointer (); + std::complex* pwg_wfc = wg_wfc.get_pointer (); #ifdef _OPENMP #pragma omp parallel for schedule(static, 1024) #endif - for(int i = 0;iglobal2local_col(ib_global)) - { - ++ib_global; - if (ib_global >= wg.nc) + int ib_global = 0; + for (int ib_local = 0; ib_local < nbands_local; ++ib_local) { - break; - ModuleBase::WARNING_QUIT("ElecStateLCAO::cal_dm", "please check global2local_col!"); + while (ib_local != ParaV->global2local_col (ib_global)) + { + ++ib_global; + if (ib_global >= wg.nc) + { + break; + ModuleBase::WARNING_QUIT ("ElecStateLCAO::cal_dm", + "please check global2local_col!"); + } + } + if (ib_global >= wg.nc) + { + continue; + } + const double wg_local = wg (ik, ib_global); + std::complex* wg_wfc_pointer = &(wg_wfc (0, ib_local, 0)); + BlasConnector::scal (nbasis_local, wg_local, wg_wfc_pointer, 1); } - } - if (ib_global >= wg.nc) { continue; -} - const double wg_local = wg(ik, ib_global); - std::complex* wg_wfc_pointer = &(wg_wfc(0, ib_local, 0)); - BlasConnector::scal(nbasis_local, wg_local, wg_wfc_pointer, 1); - } - // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) + // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) #ifdef __MPI - psiMulPsiMpi(wg_wfc, wfc, dm[ik], ParaV->desc_wfc, ParaV->desc); + psiMulPsiMpi (wg_wfc, wfc, dm[ik], ParaV->desc_wfc, ParaV->desc); #else - psiMulPsi(wg_wfc, wfc, dm[ik]); + psiMulPsi (wg_wfc, wfc, dm[ik]); #endif - } + } - ModuleBase::timer::end("elecstate","cal_dm"); + ModuleBase::timer::end ("elecstate", "cal_dm"); return; } -}//namespace elecstate +} // namespace elecstate #endif diff --git a/source/source_estate/cal_nelec_nband.cpp b/source/source_estate/cal_nelec_nband.cpp index 24b7bca9c57..465d4fcbcaa 100644 --- a/source/source_estate/cal_nelec_nband.cpp +++ b/source/source_estate/cal_nelec_nband.cpp @@ -2,140 +2,148 @@ #include "source_base/constants.h" #include "source_io/module_parameter/parameter.h" -namespace elecstate { +namespace elecstate +{ -void cal_nelec(const Atom* atoms, const int& ntype, double& nelec) +void + cal_nelec (const Atom* atoms, const int& ntype, double& nelec) { - ModuleBase::TITLE("UnitCell", "cal_nelec"); - //GlobalV::ofs_running << "\n Setup number of electrons" << std::endl; + ModuleBase::TITLE ("UnitCell", "cal_nelec"); + // GlobalV::ofs_running << "\n Setup number of electrons" << std::endl; if (nelec == 0) - { - for (int it = 0; it < ntype; it++) { - std::stringstream ss1, ss2; - ss1 << "Electron number of element " << atoms[it].label; - const double nelec_it = atoms[it].ncpp.zv * atoms[it].na; - nelec += nelec_it; - ss2 << "Total electron number of element " << atoms[it].label; + for (int it = 0; it < ntype; it++) + { + std::stringstream ss1, ss2; + ss1 << "Electron number of element " << atoms[it].label; + const double nelec_it = atoms[it].ncpp.zv * atoms[it].na; + nelec += nelec_it; + ss2 << "Total electron number of element " << atoms[it].label; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, ss1.str(), atoms[it].ncpp.zv); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, ss2.str(), nelec_it); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, ss1.str (), atoms[it].ncpp.zv); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, ss2.str (), nelec_it); + } + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Autoset the number of electrons", nelec); } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Autoset the number of electrons", nelec); - } if (PARAM.inp.nelec_delta != 0) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, - "nelec_delta is NOT zero, please make sure you know what you are " - "doing! nelec_delta: ", - PARAM.inp.nelec_delta); - nelec += PARAM.inp.nelec_delta; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nelec now: ", nelec); - } + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, + "nelec_delta is NOT zero, please make sure you know what you are " + "doing! nelec_delta: ", + PARAM.inp.nelec_delta); + nelec += PARAM.inp.nelec_delta; + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nelec now: ", nelec); + } return; } -void cal_nbands(const int& nelec, const int& nlocal, const std::vector& nelec_spin, int& nbands) +void + cal_nbands (const int& nelec, const int& nlocal, const std::vector& nelec_spin, int& nbands) { if (PARAM.inp.esolver_type == "sdft") // qianrui 2021-2-20 - { - return; - } + { + return; + } //======================================= // calculate number of bands (setup.f90) //======================================= - double occupied_bands = static_cast(nelec / ModuleBase::DEGSPIN); - if (PARAM.inp.lspinorb == 1) - { - occupied_bands = static_cast(nelec); - } - - if ((occupied_bands - std::floor(occupied_bands)) > 0.0) - { - occupied_bands = std::floor(occupied_bands) + 1.0; // mohan fix 2012-04-16 - } - - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Occupied electronic states", occupied_bands); - - if (nbands == 0) - { - if (PARAM.inp.nspin == 1) + double occupied_bands = static_cast (nelec / ModuleBase::DEGSPIN); + if (PARAM.inp.lspinorb == 1) { - const int nbands1 = static_cast(occupied_bands) + 10; - const int nbands2 = static_cast(1.2 * occupied_bands) + 1; - nbands = std::max(nbands1, nbands2); - if (PARAM.inp.basis_type != "pw") { - nbands = std::min(nbands, nlocal); - } + occupied_bands = static_cast (nelec); } - else if (PARAM.inp.nspin == 4) + + if ((occupied_bands - std::floor (occupied_bands)) > 0.0) { - const int nbands3 = nelec + 20; - const int nbands4 = static_cast(1.2 * nelec) + 1; - nbands = std::max(nbands3, nbands4); - if (PARAM.inp.basis_type != "pw") { - nbands = std::min(nbands, nlocal); - } + occupied_bands = std::floor (occupied_bands) + 1.0; // mohan fix 2012-04-16 } - else if (PARAM.inp.nspin == 2) + + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Occupied electronic states", occupied_bands); + + if (nbands == 0) { - const double max_occ = std::max(nelec_spin[0], nelec_spin[1]); - const int nbands3 = static_cast(max_occ) + 11; - const int nbands4 = static_cast(1.2 * max_occ) + 1; - nbands = std::max(nbands3, nbands4); - if (PARAM.inp.basis_type != "pw") { - nbands = std::min(nbands, nlocal); - } + if (PARAM.inp.nspin == 1) + { + const int nbands1 = static_cast (occupied_bands) + 10; + const int nbands2 = static_cast (1.2 * occupied_bands) + 1; + nbands = std::max (nbands1, nbands2); + if (PARAM.inp.basis_type != "pw") + { + nbands = std::min (nbands, nlocal); + } + } + else if (PARAM.inp.nspin == 4) + { + const int nbands3 = nelec + 20; + const int nbands4 = static_cast (1.2 * nelec) + 1; + nbands = std::max (nbands3, nbands4); + if (PARAM.inp.basis_type != "pw") + { + nbands = std::min (nbands, nlocal); + } + } + else if (PARAM.inp.nspin == 2) + { + const double max_occ = std::max (nelec_spin[0], nelec_spin[1]); + const int nbands3 = static_cast (max_occ) + 11; + const int nbands4 = static_cast (1.2 * max_occ) + 1; + nbands = std::max (nbands3, nbands4); + if (PARAM.inp.basis_type != "pw") + { + nbands = std::min (nbands, nlocal); + } + } + ModuleBase::GlobalFunc::AUTO_SET ("NBANDS", nbands); } - ModuleBase::GlobalFunc::AUTO_SET("NBANDS", nbands); - } else - { - if (nbands < occupied_bands) { - ModuleBase::WARNING_QUIT("unitcell", "Too few bands!"); - } - if (PARAM.inp.nspin == 2) { - if (nbands < nelec_spin[0]) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nelec_up", nelec_spin[0]); - ModuleBase::WARNING_QUIT("ElecState::cal_nbands", "Too few spin up bands!"); - } - if (nbands < nelec_spin[1]) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nelec_down", nelec_spin[1]); - ModuleBase::WARNING_QUIT("ElecState::cal_nbands", "Too few spin down bands!"); - } + if (nbands < occupied_bands) + { + ModuleBase::WARNING_QUIT ("unitcell", "Too few bands!"); + } + if (PARAM.inp.nspin == 2) + { + if (nbands < nelec_spin[0]) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nelec_up", nelec_spin[0]); + ModuleBase::WARNING_QUIT ("ElecState::cal_nbands", "Too few spin up bands!"); + } + if (nbands < nelec_spin[1]) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "nelec_down", nelec_spin[1]); + ModuleBase::WARNING_QUIT ("ElecState::cal_nbands", "Too few spin down bands!"); + } + } } - } // mohan add 2010-09-04 if (nbands == occupied_bands) - { - if (PARAM.inp.smearing_method != "fixed") { - ModuleBase::WARNING_QUIT("ElecState::cal_nbands", "for smearing, num. of bands > num. of occupied bands"); + if (PARAM.inp.smearing_method != "fixed") + { + ModuleBase::WARNING_QUIT ("ElecState::cal_nbands", + "for smearing, num. of bands > num. of occupied bands"); + } } - } // mohan update 2021-02-19 // mohan add 2011-01-5 if (PARAM.inp.basis_type == "lcao" || PARAM.inp.basis_type == "lcao_in_pw") - { - if (nbands > nlocal) - { - ModuleBase::WARNING_QUIT("ElecState::cal_nbands", - "Number of basis (NLOCAL) < Number of electronic states (NBANDS)"); - } - else { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Number of basis (NLOCAL)", nlocal); + if (nbands > nlocal) + { + ModuleBase::WARNING_QUIT ("ElecState::cal_nbands", + "Number of basis (NLOCAL) < Number of electronic states (NBANDS)"); + } + else + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Number of basis (NLOCAL)", nlocal); + } } - } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Number of electronic states (NBANDS)", nbands); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Number of electronic states (NBANDS)", nbands); } -} +} // namespace elecstate diff --git a/source/source_estate/cal_nelec_nband.h b/source/source_estate/cal_nelec_nband.h index a9f84d59db1..ae6b7762412 100644 --- a/source/source_estate/cal_nelec_nband.h +++ b/source/source_estate/cal_nelec_nband.h @@ -3,27 +3,28 @@ #include "source_cell/atom_spec.h" -namespace elecstate { +namespace elecstate +{ - /** - * @brief calculate the total number of electrons in system - * - * @param atoms [in] atom pointer - * @param ntype [in] number of atom types - * @param nelec [out] total number of electrons - */ - void cal_nelec(const Atom* atoms, const int& ntype, double& nelec); +/** + * @brief calculate the total number of electrons in system + * + * @param atoms [in] atom pointer + * @param ntype [in] number of atom types + * @param nelec [out] total number of electrons + */ +void cal_nelec (const Atom* atoms, const int& ntype, double& nelec); - /** - * @brief Calculate the number of bands. - * - * @param nelec [in] total number of electrons - * @param nlocal [in] total number of local basis - * @param nelec_spin [in] number of electrons for each spin - * @param nbands [out] number of bands - */ - void cal_nbands(const int& nelec, const int& nlocal, const std::vector& nelec_spin, int& nbands); +/** + * @brief Calculate the number of bands. + * + * @param nelec [in] total number of electrons + * @param nlocal [in] total number of local basis + * @param nelec_spin [in] number of electrons for each spin + * @param nbands [out] number of bands + */ +void cal_nbands (const int& nelec, const int& nlocal, const std::vector& nelec_spin, int& nbands); -} +} // namespace elecstate #endif \ No newline at end of file diff --git a/source/source_estate/cal_ux.cpp b/source/source_estate/cal_ux.cpp index f7ea8563864..a3be04a1b34 100644 --- a/source/source_estate/cal_ux.cpp +++ b/source/source_estate/cal_ux.cpp @@ -1,14 +1,17 @@ #include "cal_ux.h" #include "source_io/module_parameter/parameter.h" -namespace elecstate { +namespace elecstate +{ -void cal_ux(UnitCell& ucell) { +void + cal_ux (UnitCell& ucell) +{ if (PARAM.inp.nspin != 4) - { - return; - } + { + return; + } const double absolute_mag_thr = 1.0e-6; @@ -21,83 +24,80 @@ void cal_ux(UnitCell& ucell) { // do not sign feature in teh general case ucell.magnet.lsign_ = false; - ModuleBase::GlobalFunc::ZEROS(ucell.magnet.ux_, 3); - - for (int it = 0; it < ucell.ntype; it++) - { - for (int ia = 0; ia < ucell.atoms[it].na; ia++) - { - // m_loc_: local magnetization vector for each atom - amag = pow(ucell.atoms[it].m_loc_[ia].x, 2) - + pow(ucell.atoms[it].m_loc_[ia].y, 2) - + pow(ucell.atoms[it].m_loc_[ia].z, 2); - - // find the first atom (it,ia) whose magnetism is not zero - // compute ux - if (amag > absolute_mag_thr) - { - ucell.magnet.ux_[0] = ucell.atoms[it].m_loc_[ia].x; - ucell.magnet.ux_[1] = ucell.atoms[it].m_loc_[ia].y; - ucell.magnet.ux_[2] = ucell.atoms[it].m_loc_[ia].z; - - starting_it = it; - starting_ia = ia; - - ucell.magnet.lsign_ = true; - break; - } - } - - // if any atom has magnetism, then break the for iteration - if (ucell.magnet.lsign_) - { - break; - } - } + ModuleBase::GlobalFunc::ZEROS (ucell.magnet.ux_, 3); + + for (int it = 0; it < ucell.ntype; it++) + { + for (int ia = 0; ia < ucell.atoms[it].na; ia++) + { + // m_loc_: local magnetization vector for each atom + amag = pow (ucell.atoms[it].m_loc_[ia].x, 2) + pow (ucell.atoms[it].m_loc_[ia].y, 2) + + pow (ucell.atoms[it].m_loc_[ia].z, 2); + + // find the first atom (it,ia) whose magnetism is not zero + // compute ux + if (amag > absolute_mag_thr) + { + ucell.magnet.ux_[0] = ucell.atoms[it].m_loc_[ia].x; + ucell.magnet.ux_[1] = ucell.atoms[it].m_loc_[ia].y; + ucell.magnet.ux_[2] = ucell.atoms[it].m_loc_[ia].z; + + starting_it = it; + starting_ia = ia; + + ucell.magnet.lsign_ = true; + break; + } + } + + // if any atom has magnetism, then break the for iteration + if (ucell.magnet.lsign_) + { + break; + } + } // whether the initial magnetizations is parallel - for (int it = starting_it; it < ucell.ntype; it++) - { - for (int ia = 0; ia < ucell.atoms[it].na; ia++) - { - if (it > starting_it || ia > starting_ia) - { - ucell.magnet.lsign_ - = ucell.magnet.lsign_ - && judge_parallel(ucell.magnet.ux_, ucell.atoms[it].m_loc_[ia]); - } - } - } + for (int it = starting_it; it < ucell.ntype; it++) + { + for (int ia = 0; ia < ucell.atoms[it].na; ia++) + { + if (it > starting_it || ia > starting_ia) + { + ucell.magnet.lsign_ + = ucell.magnet.lsign_ && judge_parallel (ucell.magnet.ux_, ucell.atoms[it].m_loc_[ia]); + } + } + } // if all of the atoms have the same parallel magnetism direction, // then set the direction to a unit vector - if (ucell.magnet.lsign_) - { - uxmod = pow(ucell.magnet.ux_[0], 2) - + pow(ucell.magnet.ux_[1], 2) - + pow(ucell.magnet.ux_[2], 2); - - if (uxmod < absolute_mag_thr) - { - ModuleBase::WARNING_QUIT("elecstate::cal_ux", "wrong uxmod"); - } - - // reset the magnetism for each direction - for (int i = 0; i < 3; i++) - { - ucell.magnet.ux_[i] *= 1 / sqrt(uxmod); - } - } - return; + if (ucell.magnet.lsign_) + { + uxmod = pow (ucell.magnet.ux_[0], 2) + pow (ucell.magnet.ux_[1], 2) + pow (ucell.magnet.ux_[2], 2); + + if (uxmod < absolute_mag_thr) + { + ModuleBase::WARNING_QUIT ("elecstate::cal_ux", "wrong uxmod"); + } + + // reset the magnetism for each direction + for (int i = 0; i < 3; i++) + { + ucell.magnet.ux_[i] *= 1 / sqrt (uxmod); + } + } + return; } -bool judge_parallel(double a[3], ModuleBase::Vector3 b) { +bool + judge_parallel (double a[3], ModuleBase::Vector3 b) +{ bool jp = false; double cross = 0.0; - cross = pow((a[1] * b.z - a[2] * b.y), 2) - + pow((a[2] * b.x - a[0] * b.z), 2) - + pow((a[0] * b.y - a[1] * b.x), 2); - jp = (fabs(cross) < 1e-6); + cross + = pow ((a[1] * b.z - a[2] * b.y), 2) + pow ((a[2] * b.x - a[0] * b.z), 2) + pow ((a[0] * b.y - a[1] * b.x), 2); + jp = (fabs (cross) < 1e-6); return jp; } -} +} // namespace elecstate diff --git a/source/source_estate/cal_ux.h b/source/source_estate/cal_ux.h index fda1f07a5b0..826a1cb2c80 100644 --- a/source/source_estate/cal_ux.h +++ b/source/source_estate/cal_ux.h @@ -3,13 +3,14 @@ #include "source_cell/unitcell.h" -namespace elecstate { +namespace elecstate +{ - // Only for npsin = 4 - void cal_ux(UnitCell& ucell); - - bool judge_parallel(double a[3], ModuleBase::Vector3 b); +// Only for npsin = 4 +void cal_ux (UnitCell& ucell); -} +bool judge_parallel (double a[3], ModuleBase::Vector3 b); + +} // namespace elecstate #endif \ No newline at end of file diff --git a/source/source_estate/cal_wfc.cpp b/source/source_estate/cal_wfc.cpp index 846ac965bb0..17fdbf183b0 100644 --- a/source/source_estate/cal_wfc.cpp +++ b/source/source_estate/cal_wfc.cpp @@ -4,114 +4,130 @@ namespace elecstate { - void cal_nwfc(std::ofstream& log,UnitCell& ucell,Atom* atoms) - { - ModuleBase::TITLE("UnitCell", "cal_nwfc"); - const int ntype = ucell.ntype; - const int nat = ucell.nat; - assert(ntype > 0); - assert(nat > 0); +void + cal_nwfc (std::ofstream& log, UnitCell& ucell, Atom* atoms) +{ + ModuleBase::TITLE ("UnitCell", "cal_nwfc"); + const int ntype = ucell.ntype; + const int nat = ucell.nat; + assert (ntype > 0); + assert (nat > 0); - //=========================== - // (1) set iw2l, iw2n, iw2m - //=========================== - for (int it = 0; it < ntype; it++) + //=========================== + // (1) set iw2l, iw2n, iw2m + //=========================== + for (int it = 0; it < ntype; it++) { - ucell.atoms[it].set_index(); + ucell.atoms[it].set_index (); } - //=========================== - // (2) set namax and nwmax - //=========================== - ucell.namax = 0; - ucell.nwmax = 0; - for (int it = 0; it < ntype; it++) { - ucell.namax = std::max(atoms[it].na, ucell.namax); - ucell.nwmax = std::max(atoms[it].nw, ucell.nwmax); + //=========================== + // (2) set namax and nwmax + //=========================== + ucell.namax = 0; + ucell.nwmax = 0; + for (int it = 0; it < ntype; it++) + { + ucell.namax = std::max (atoms[it].na, ucell.namax); + ucell.nwmax = std::max (atoms[it].nw, ucell.nwmax); } - //=========================== - // (3) set nwfc and stapos_wf - //=========================== - int nlocal_tmp = 0; - for (int it = 0; it < ntype; it++) { + //=========================== + // (3) set nwfc and stapos_wf + //=========================== + int nlocal_tmp = 0; + for (int it = 0; it < ntype; it++) + { atoms[it].stapos_wf = nlocal_tmp; const int nlocal_it = atoms[it].nw * atoms[it].na; - if (PARAM.inp.nspin != 4) { - nlocal_tmp += nlocal_it; - } else { - nlocal_tmp += nlocal_it * 2; // zhengdy-soc - } + if (PARAM.inp.nspin != 4) + { + nlocal_tmp += nlocal_it; + } + else + { + nlocal_tmp += nlocal_it * 2; // zhengdy-soc + } } -// log << " " << std::setw(40) << "NLOCAL" - // << " = " << nlocal_tmp << std::endl; - //======================================================== - // (4) set index for itia2iat, itiaiw2iwt - //======================================================== + // log << " " << std::setw(40) << "NLOCAL" + // << " = " << nlocal_tmp << std::endl; + //======================================================== + // (4) set index for itia2iat, itiaiw2iwt + //======================================================== - // mohan add 2010-09-26 - assert(nlocal_tmp > 0); - assert(nlocal_tmp == PARAM.globalv.nlocal); - delete[] ucell.iwt2iat; - delete[] ucell.iwt2iw; - ucell.iwt2iat = new int[nlocal_tmp]; - ucell.iwt2iw = new int[nlocal_tmp]; + // mohan add 2010-09-26 + assert (nlocal_tmp > 0); + assert (nlocal_tmp == PARAM.globalv.nlocal); + delete[] ucell.iwt2iat; + delete[] ucell.iwt2iw; + ucell.iwt2iat = new int[nlocal_tmp]; + ucell.iwt2iw = new int[nlocal_tmp]; - ucell.itia2iat.create(ntype, ucell.namax); - ucell.set_iat2iwt(PARAM.globalv.npol); - int iat = 0; - int iwt = 0; - for (int it = 0; it < ntype; it++) { - for (int ia = 0; ia < atoms[it].na; ia++) { - ucell.itia2iat(it, ia) = iat; - for (int iw = 0; iw < atoms[it].nw * PARAM.globalv.npol; iw++) { - ucell.iwt2iat[iwt] = iat; - ucell.iwt2iw[iwt] = iw; - ++iwt; + ucell.itia2iat.create (ntype, ucell.namax); + ucell.set_iat2iwt (PARAM.globalv.npol); + int iat = 0; + int iwt = 0; + for (int it = 0; it < ntype; it++) + { + for (int ia = 0; ia < atoms[it].na; ia++) + { + ucell.itia2iat (it, ia) = iat; + for (int iw = 0; iw < atoms[it].nw * PARAM.globalv.npol; iw++) + { + ucell.iwt2iat[iwt] = iat; + ucell.iwt2iw[iwt] = iw; + ++iwt; + } + ++iat; } - ++iat; - } } - //======================== - // (5) set lmax and nmax - //======================== - ucell.lmax = 0; - ucell.nmax = 0; - ucell.nmax_total = 0; - for (int it = 0; it < ntype; it++) { - ucell.lmax = std::max(ucell.lmax, atoms[it].nwl); - for (int l = 0; l < atoms[it].nwl + 1; l++) { - ucell.nmax = std::max(ucell.nmax, atoms[it].l_nchi[l]); - } + //======================== + // (5) set lmax and nmax + //======================== + ucell.lmax = 0; + ucell.nmax = 0; + ucell.nmax_total = 0; + for (int it = 0; it < ntype; it++) + { + ucell.lmax = std::max (ucell.lmax, atoms[it].nwl); + for (int l = 0; l < atoms[it].nwl + 1; l++) + { + ucell.nmax = std::max (ucell.nmax, atoms[it].l_nchi[l]); + } int nchi = 0; - for (int l = 0; l < atoms[it].nwl + 1; l++) { - nchi += atoms[it].l_nchi[l]; - } - ucell.nmax_total = std::max(ucell.nmax_total, nchi); + for (int l = 0; l < atoms[it].nwl + 1; l++) + { + nchi += atoms[it].l_nchi[l]; + } + ucell.nmax_total = std::max (ucell.nmax_total, nchi); } - //======================= - // (6) set lmax_ppwf - //======================= - ucell.lmax_ppwf = 0; - for (int it = 0; it < ntype; it++) { - for (int ic = 0; ic < atoms[it].ncpp.nchi; ic++) { - if (ucell.lmax_ppwf < atoms[it].ncpp.lchi[ic]) { - ucell.lmax_ppwf = atoms[it].ncpp.lchi[ic]; + //======================= + // (6) set lmax_ppwf + //======================= + ucell.lmax_ppwf = 0; + for (int it = 0; it < ntype; it++) + { + for (int ic = 0; ic < atoms[it].ncpp.nchi; ic++) + { + if (ucell.lmax_ppwf < atoms[it].ncpp.lchi[ic]) + { + ucell.lmax_ppwf = atoms[it].ncpp.lchi[ic]; + } } - } } - //===================== - // Use localized basis - //===================== - if ((PARAM.inp.basis_type == "lcao") || (PARAM.inp.basis_type == "lcao_in_pw") - || ((PARAM.inp.basis_type == "pw") && (PARAM.inp.init_wfc.substr(0, 3) == "nao") - && (PARAM.inp.esolver_type == "ksdft"))) // xiaohui add 2013-09-02 + //===================== + // Use localized basis + //===================== + if ((PARAM.inp.basis_type == "lcao") || (PARAM.inp.basis_type == "lcao_in_pw") + || ((PARAM.inp.basis_type == "pw") && (PARAM.inp.init_wfc.substr (0, 3) == "nao") + && (PARAM.inp.esolver_type == "ksdft"))) // xiaohui add 2013-09-02 { - ModuleBase::GlobalFunc::AUTO_SET("NBANDS", PARAM.inp.nbands); - } else // plane wave basis + ModuleBase::GlobalFunc::AUTO_SET ("NBANDS", PARAM.inp.nbands); + } + else // plane wave basis { // if(winput::after_iter && winput::sph_proj) //{ @@ -123,57 +139,61 @@ namespace elecstate // } } - return; - } + return; +} - void cal_meshx(int& meshx,const Atom* atoms, const int ntype) - { - meshx = 0; - for (int it = 0; it < ntype; it++) { +void + cal_meshx (int& meshx, const Atom* atoms, const int ntype) +{ + meshx = 0; + for (int it = 0; it < ntype; it++) + { const int mesh = atoms[it].ncpp.msh; - if (mesh > meshx) - { - meshx = mesh; - } + if (mesh > meshx) + { + meshx = mesh; + } } - } - +} - void cal_natomwfc(std::ofstream& log,int& natomwfc,const int ntype,const Atom* atoms) - { - natomwfc = 0; - for (int it = 0; it < ntype; it++) - { - //============================ - // Use pseudo-atomic orbitals - //============================ - int tmp = 0; - for (int l = 0; l < atoms[it].ncpp.nchi; l++) - { - if (atoms[it].ncpp.oc[l] >= 0) - { - if (PARAM.inp.nspin == 4) - { - if (atoms[it].ncpp.has_so) - { - tmp += 2 * atoms[it].ncpp.lchi[l]; - if (fabs(atoms[it].ncpp.jchi[l] - atoms[it].ncpp.lchi[l] - 0.5)< 1e-6) - { - tmp += 2; - } - } else - { - tmp += 2 * (2 * atoms[it].ncpp.lchi[l] + 1); - } - } else - { - tmp += 2 * atoms[it].ncpp.lchi[l] + 1; - } - } - } - natomwfc += tmp * atoms[it].na; - } - ModuleBase::GlobalFunc::OUT(log, "Number of pseudo atomic orbitals", natomwfc); - return; - } +void + cal_natomwfc (std::ofstream& log, int& natomwfc, const int ntype, const Atom* atoms) +{ + natomwfc = 0; + for (int it = 0; it < ntype; it++) + { + //============================ + // Use pseudo-atomic orbitals + //============================ + int tmp = 0; + for (int l = 0; l < atoms[it].ncpp.nchi; l++) + { + if (atoms[it].ncpp.oc[l] >= 0) + { + if (PARAM.inp.nspin == 4) + { + if (atoms[it].ncpp.has_so) + { + tmp += 2 * atoms[it].ncpp.lchi[l]; + if (fabs (atoms[it].ncpp.jchi[l] - atoms[it].ncpp.lchi[l] - 0.5) < 1e-6) + { + tmp += 2; + } + } + else + { + tmp += 2 * (2 * atoms[it].ncpp.lchi[l] + 1); + } + } + else + { + tmp += 2 * atoms[it].ncpp.lchi[l] + 1; + } + } + } + natomwfc += tmp * atoms[it].na; + } + ModuleBase::GlobalFunc::OUT (log, "Number of pseudo atomic orbitals", natomwfc); + return; } +} // namespace elecstate diff --git a/source/source_estate/elecstate.cpp b/source/source_estate/elecstate.cpp index 5e2c6a8ced9..de20d7c6865 100644 --- a/source/source_estate/elecstate.cpp +++ b/source/source_estate/elecstate.cpp @@ -10,42 +10,43 @@ namespace elecstate { -const double* ElecState::getRho(int spin) const +const double* + ElecState::getRho (int spin) const { return &(this->charge->rho[spin][0]); } - - -void ElecState::init_nelec_spin() +void + ElecState::init_nelec_spin () { - this->nelec_spin.resize(PARAM.inp.nspin); + this->nelec_spin.resize (PARAM.inp.nspin); if (PARAM.inp.nspin == 2) - { - this->nelec_spin[0] = (PARAM.inp.nelec + PARAM.inp.nupdown) / 2.0; - this->nelec_spin[1] = (PARAM.inp.nelec - PARAM.inp.nupdown) / 2.0; - } + { + this->nelec_spin[0] = (PARAM.inp.nelec + PARAM.inp.nupdown) / 2.0; + this->nelec_spin[1] = (PARAM.inp.nelec - PARAM.inp.nupdown) / 2.0; + } } -void ElecState::init_scf(const UnitCell& ucell, +void + ElecState::init_scf (const UnitCell& ucell, const Parallel_Grid& pgrid, - const ModuleBase::ComplexMatrix& strucfac, + const ModuleBase::ComplexMatrix& strucfac, const bool* numeric, - ModuleSymmetry::Symmetry& symm, + ModuleSymmetry::Symmetry& symm, const void* wfcpw) { //! core correction potential. - this->charge->set_rho_core(ucell,strucfac, numeric); + this->charge->set_rho_core (ucell, strucfac, numeric); //! renormalize the charge density - this->charge->renormalize_rho(); + this->charge->renormalize_rho (); //! initialize the potential - this->pot->init_pot(this->charge); + this->pot->init_pot (this->charge); } - -void ElecState::init_ks(Charge* chr_in, // pointer for class Charge +void + ElecState::init_ks (Charge* chr_in, // pointer for class Charge const K_Vectors* klist_in, int nk_in, const ModulePW::PW_Basis_Big* bigpw_in) @@ -54,10 +55,10 @@ void ElecState::init_ks(Charge* chr_in, // pointer for class Charge this->klist = klist_in; this->bigpw = bigpw_in; // init nelec_spin with nelec and nupdown - this->init_nelec_spin(); + this->init_nelec_spin (); // initialize ekb and wg - this->ekb.create(nk_in, PARAM.globalv.nbands_l); - this->wg.create(nk_in, PARAM.globalv.nbands_l); + this->ekb.create (nk_in, PARAM.globalv.nbands_l); + this->wg.create (nk_in, PARAM.globalv.nbands_l); } } // namespace elecstate diff --git a/source/source_estate/elecstate.h b/source/source_estate/elecstate.h index 888b3b95dc0..5fb9a33a14d 100644 --- a/source/source_estate/elecstate.h +++ b/source/source_estate/elecstate.h @@ -14,51 +14,54 @@ namespace elecstate class ElecState { public: - ElecState() - { - } - ElecState(Charge* chr_in, ModulePW::PW_Basis* rhopw_in, ModulePW::PW_Basis_Big* bigpw_in) + ElecState () {} + ElecState (Charge* chr_in, ModulePW::PW_Basis* rhopw_in, ModulePW::PW_Basis_Big* bigpw_in) { this->charge = chr_in; - this->charge->set_rhopw(rhopw_in); + this->charge->set_rhopw (rhopw_in); this->bigpw = bigpw_in; this->eferm.two_efermi = PARAM.globalv.two_fermi; } - virtual ~ElecState() + virtual ~ElecState () { if (this->pot != nullptr) - { - delete this->pot; - this->pot = nullptr; - } + { + delete this->pot; + this->pot = nullptr; + } } - void init_ks(Charge* chr_in, // pointer for class Charge - const K_Vectors* klist_in, - int nk_in, // number of k points - const ModulePW::PW_Basis_Big* bigpw_in); + void init_ks (Charge* chr_in, // pointer for class Charge + const K_Vectors* klist_in, + int nk_in, // number of k points + const ModulePW::PW_Basis_Big* bigpw_in); // return current electronic density rho, as a input for constructing Hamiltonian - virtual const double* getRho(int spin) const; + virtual const double* getRho (int spin) const; // calculate electronic charge density on grid points or density matrix in real space // the consequence charge density rho saved into rho_out, preparing for charge mixing. - virtual void psiToRho(const psi::Psi>& psi) + virtual void + psiToRho (const psi::Psi>& psi) { return; } - virtual void psiToRho(const psi::Psi& psi) + virtual void + psiToRho (const psi::Psi& psi) { return; } - virtual void cal_tau(const psi::Psi>& psi) + virtual void + cal_tau (const psi::Psi>& psi) { return; } - virtual void cal_tau(const psi::Psi& psi) + virtual void + cal_tau (const psi::Psi& psi) { return; } - virtual void cal_tau(const psi::Psi>& psi) + virtual void + cal_tau (const psi::Psi>& psi) { return; } @@ -69,27 +72,30 @@ class ElecState // 1. input rho would be store to file for restart // 2. calculated rho should be near with input rho when convergence has achieved // 3. new rho should be input rho for next scf step. - virtual void getNewRho() + virtual void + getNewRho () { return; } // use occupied weights from INPUT and skip calculate_weights // mohan updated on 2024-06-08 - + // if nupdown is not 0(TWO_EFERMI case), // nelec_spin will be fixed and weights will be constrained - void init_nelec_spin(); + void init_nelec_spin (); // used to record number of electrons per spin index // for NSPIN=2, it will record number of spin up and number of spin down // for NSPIN=4, it will record total number, magnetization for x, y, z direction std::vector nelec_spin; - virtual void print_psi(const psi::Psi& psi_in, const int istep = -1) + virtual void + print_psi (const psi::Psi& psi_in, const int istep = -1) { return; } - virtual void print_psi(const psi::Psi>& psi_in, const int istep = -1) + virtual void + print_psi (const psi::Psi>& psi_in, const int istep = -1) { return; } @@ -102,12 +108,12 @@ class ElecState * @param symm symmetry * @param wfcpw PW basis for wave function if needed */ - void init_scf(const UnitCell& ucell, - const Parallel_Grid& pgrid, - const ModuleBase::ComplexMatrix& strucfac, - const bool* numeric, - ModuleSymmetry::Symmetry& symm, - const void* wfcpw = nullptr); + void init_scf (const UnitCell& ucell, + const Parallel_Grid& pgrid, + const ModuleBase::ComplexMatrix& strucfac, + const bool* numeric, + ModuleSymmetry::Symmetry& symm, + const void* wfcpw = nullptr); std::string classname = "elecstate"; int iter = 0; ///< scf iteration @@ -117,33 +123,34 @@ class ElecState const ModulePW::PW_Basis_Big* bigpw = nullptr; ///< bigpw will be removed later public: // something aboud energies. See elecstate_energy.cpp - void cal_bandgap(); - void cal_bandgap_updw(); + void cal_bandgap (); + void cal_bandgap_updw (); - double cal_delta_eband(const UnitCell& ucell) const; - double cal_delta_escf() const; + double cal_delta_eband (const UnitCell& ucell) const; + double cal_delta_escf () const; ModuleBase::matrix vnew; bool vnew_exist = false; - void cal_converged(); - void cal_energies(const int type); - void set_exx(const double& Eexx); - void set_exx(const std::complex& Eexx); + void cal_converged (); + void cal_energies (const int type); + void set_exx (const double& Eexx); + void set_exx (const std::complex& Eexx); - double get_hartree_energy(); - double get_etot_efield(); - double get_etot_gatefield(); + double get_hartree_energy (); + double get_etot_efield (); + double get_etot_gatefield (); - double get_solvent_model_Ael(); - double get_solvent_model_Acav(); + double get_solvent_model_Ael (); + double get_solvent_model_Acav (); - virtual double get_spin_constrain_energy() + virtual double + get_spin_constrain_energy () { return 0.0; } - double get_dftu_energy(); - double get_local_pp_energy(); + double get_dftu_energy (); + double get_local_pp_energy (); fenergy f_en; ///< energies contribute to the total free energy Efermi eferm; ///< fermi energies @@ -158,7 +165,6 @@ class ElecState ModuleBase::matrix wg; ///< occupation weight for each k-point and band public: - bool skip_weights = false; }; diff --git a/source/source_estate/elecstate_energy.cpp b/source/source_estate/elecstate_energy.cpp index 3c58360b902..438e1b9e222 100644 --- a/source/source_estate/elecstate_energy.cpp +++ b/source/source_estate/elecstate_energy.cpp @@ -11,193 +11,196 @@ namespace elecstate { /// @brief calculate band gap -void ElecState::cal_bandgap() +void + ElecState::cal_bandgap () { if (this->ekb.nr == 0 || this->ekb.nc == 0) - { // which means no vbm and no cbm - this->bandgap = 0.0; - return; - } + { // which means no vbm and no cbm + this->bandgap = 0.0; + return; + } int nbands = this->ekb.nc; - int nks = this->klist->get_nks(); - double vbm = -std::numeric_limits::infinity(); // Valence Band Maximum - double cbm = std::numeric_limits::infinity(); // Conduction Band Minimum - const double threshold = 1.0e-5; // threshold to avoid E_gap(k) = 0 + int nks = this->klist->get_nks (); + double vbm = -std::numeric_limits::infinity (); // Valence Band Maximum + double cbm = std::numeric_limits::infinity (); // Conduction Band Minimum + const double threshold = 1.0e-5; // threshold to avoid E_gap(k) = 0 for (int ib = 0; ib < nbands; ib++) - { - for (int ik = 0; ik < nks; ik++) { - if (this->ekb(ik, ib) <= this->eferm.ef + threshold && this->ekb(ik, ib) > vbm) - { - vbm = this->ekb(ik, ib); - } - if (this->ekb(ik, ib) > this->eferm.ef + threshold && this->ekb(ik, ib) < cbm) - { - cbm = this->ekb(ik, ib); - } + for (int ik = 0; ik < nks; ik++) + { + if (this->ekb (ik, ib) <= this->eferm.ef + threshold && this->ekb (ik, ib) > vbm) + { + vbm = this->ekb (ik, ib); + } + if (this->ekb (ik, ib) > this->eferm.ef + threshold && this->ekb (ik, ib) < cbm) + { + cbm = this->ekb (ik, ib); + } + } } - } // Assign fermi level to CBM if it's still infinity - if(cbm == std::numeric_limits::infinity()) - { - cbm =this->eferm.ef; - } + if (cbm == std::numeric_limits::infinity ()) + { + cbm = this->eferm.ef; + } // Assign fermi level to VBM if it's still negative infinity - if(vbm ==-std::numeric_limits::infinity()) - { - vbm =this->eferm.ef; - } - #ifdef __MPI - Parallel_Reduce::reduce_max(vbm); - Parallel_Reduce::reduce_min(cbm); - #endif + if (vbm == -std::numeric_limits::infinity ()) + { + vbm = this->eferm.ef; + } +#ifdef __MPI + Parallel_Reduce::reduce_max (vbm); + Parallel_Reduce::reduce_min (cbm); +#endif this->bandgap = cbm - vbm; } /// @brief calculate spin up & down band gap /// @todo add isk[ik] so as to discriminate different spins -void ElecState::cal_bandgap_updw() +void + ElecState::cal_bandgap_updw () { if (this->ekb.nr == 0 || this->ekb.nc == 0) - { // which means no vbm and no cbm - this->bandgap_up = 0.0; - this->bandgap_dw = 0.0; - return; - } + { // which means no vbm and no cbm + this->bandgap_up = 0.0; + this->bandgap_dw = 0.0; + return; + } // int nbands = PARAM.inp.nbands; int nbands = this->ekb.nc; - int nks = this->klist->get_nks(); - double vbm_up = -std::numeric_limits::infinity(); - double cbm_up = std::numeric_limits::infinity(); - double vbm_dw = -std::numeric_limits::infinity(); - double cbm_dw = std::numeric_limits::infinity(); + int nks = this->klist->get_nks (); + double vbm_up = -std::numeric_limits::infinity (); + double cbm_up = std::numeric_limits::infinity (); + double vbm_dw = -std::numeric_limits::infinity (); + double cbm_dw = std::numeric_limits::infinity (); const double threshold = 1.0e-5; for (int ib = 0; ib < nbands; ib++) - { - for (int ik = 0; ik < nks; ik++) { - if (this->klist->isk[ik] == 0) - { - if (this->ekb(ik, ib) <= this->eferm.ef_up + threshold && this->ekb(ik, ib) > vbm_up) + for (int ik = 0; ik < nks; ik++) { - vbm_up = this->ekb(ik, ib); + if (this->klist->isk[ik] == 0) + { + if (this->ekb (ik, ib) <= this->eferm.ef_up + threshold && this->ekb (ik, ib) > vbm_up) + { + vbm_up = this->ekb (ik, ib); + } + if (this->ekb (ik, ib) > this->eferm.ef_up + threshold && this->ekb (ik, ib) < cbm_up) + { + cbm_up = this->ekb (ik, ib); + } + } + if (this->klist->isk[ik] == 1) + { + if (this->ekb (ik, ib) <= this->eferm.ef_dw + threshold && this->ekb (ik, ib) > vbm_dw) + { + vbm_dw = this->ekb (ik, ib); + } + if (this->ekb (ik, ib) > this->eferm.ef_dw + threshold && this->ekb (ik, ib) < cbm_dw) + { + cbm_dw = this->ekb (ik, ib); + } + } } - if (this->ekb(ik, ib) > this->eferm.ef_up + threshold && this->ekb(ik, ib) < cbm_up) - { - cbm_up = this->ekb(ik, ib); - } - } - if (this->klist->isk[ik] == 1) - { - if (this->ekb(ik, ib) <= this->eferm.ef_dw + threshold && this->ekb(ik, ib) > vbm_dw) - { - vbm_dw = this->ekb(ik, ib); - } - if (this->ekb(ik, ib) > this->eferm.ef_dw + threshold && this->ekb(ik, ib) < cbm_dw) - { - cbm_dw = this->ekb(ik, ib); - } - } } - } - // Assign fermi level to CBM if it's still infinity - if (cbm_up == std::numeric_limits::infinity()) - { - cbm_up =this->eferm.ef_up; - } - if (cbm_dw == std::numeric_limits::infinity()) - { - cbm_dw =this->eferm.ef_dw; - } + // Assign fermi level to CBM if it's still infinity + if (cbm_up == std::numeric_limits::infinity ()) + { + cbm_up = this->eferm.ef_up; + } + if (cbm_dw == std::numeric_limits::infinity ()) + { + cbm_dw = this->eferm.ef_dw; + } // Assign fermi level to VBM if it's still negative infinity - if(vbm_up ==-std::numeric_limits::infinity()) - { - vbm_up =this->eferm.ef_up; - } - if(vbm_dw ==-std::numeric_limits::infinity()) - { - vbm_dw =this->eferm.ef_dw; - } - #ifdef __MPI - Parallel_Reduce::reduce_max(vbm_up); - Parallel_Reduce::reduce_min(cbm_up); - Parallel_Reduce::reduce_max(vbm_dw); - Parallel_Reduce::reduce_min(cbm_dw); - #endif + if (vbm_up == -std::numeric_limits::infinity ()) + { + vbm_up = this->eferm.ef_up; + } + if (vbm_dw == -std::numeric_limits::infinity ()) + { + vbm_dw = this->eferm.ef_dw; + } +#ifdef __MPI + Parallel_Reduce::reduce_max (vbm_up); + Parallel_Reduce::reduce_min (cbm_up); + Parallel_Reduce::reduce_max (vbm_dw); + Parallel_Reduce::reduce_min (cbm_dw); +#endif this->bandgap_up = cbm_up - vbm_up; this->bandgap_dw = cbm_dw - vbm_dw; } /// @brief calculate deband -double ElecState::cal_delta_eband(const UnitCell& ucell) const +double + ElecState::cal_delta_eband (const UnitCell& ucell) const { - ModuleBase::timer::start("ElecState", "cal_delta_eband"); - // out potentials from potential mixing - // total energy and band energy corrections + ModuleBase::timer::start ("ElecState", "cal_delta_eband"); + // out potentials from potential mixing + // total energy and band energy corrections double deband0 = 0.0; double deband_aux = 0.0; // only potential related with charge is used here for energy correction // on the fly calculate it here by v_eff - v_fixed - const double* v_eff = this->pot->get_eff_v(0); - const double* v_fixed = this->pot->get_fixed_v(); + const double* v_eff = this->pot->get_eff_v (0); + const double* v_fixed = this->pot->get_fixed_v (); const double* v_ofk = nullptr; - const bool v_ofk_flag = (XC_Functional::get_ked_flag()); + const bool v_ofk_flag = (XC_Functional::get_ked_flag ()); for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) - { - deband_aux -= this->charge->rho[0][ir] * (v_eff[ir] - v_fixed[ir]); - } - - if (v_ofk_flag) - { - v_ofk = this->pot->get_eff_vofk(0); - // cause in the get_eff_vofk, the func will return nullptr - if (v_ofk == nullptr && this->charge->rhopw->nrxx > 0) { - ModuleBase::WARNING_QUIT("ElecState::cal_delta_eband", "v_ofk is nullptr"); + deband_aux -= this->charge->rho[0][ir] * (v_eff[ir] - v_fixed[ir]); } - for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) + + if (v_ofk_flag) { - deband_aux -= this->charge->kin_r[0][ir] * v_ofk[ir]; + v_ofk = this->pot->get_eff_vofk (0); + // cause in the get_eff_vofk, the func will return nullptr + if (v_ofk == nullptr && this->charge->rhopw->nrxx > 0) + { + ModuleBase::WARNING_QUIT ("ElecState::cal_delta_eband", "v_ofk is nullptr"); + } + for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) + { + deband_aux -= this->charge->kin_r[0][ir] * v_ofk[ir]; + } } - } if (PARAM.inp.nspin == 2) - { - v_eff = this->pot->get_eff_v(1); - for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) { - deband_aux -= this->charge->rho[1][ir] * (v_eff[ir] - v_fixed[ir]); - } - if (v_ofk_flag) - { - v_ofk = this->pot->get_eff_vofk(1); - if (v_ofk == nullptr && this->charge->rhopw->nrxx > 0) - { - ModuleBase::WARNING_QUIT("ElecState::cal_delta_eband", "v_ofk is nullptr"); - } + v_eff = this->pot->get_eff_v (1); for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) - { - deband_aux -= this->charge->kin_r[1][ir] * v_ofk[ir]; - } + { + deband_aux -= this->charge->rho[1][ir] * (v_eff[ir] - v_fixed[ir]); + } + if (v_ofk_flag) + { + v_ofk = this->pot->get_eff_vofk (1); + if (v_ofk == nullptr && this->charge->rhopw->nrxx > 0) + { + ModuleBase::WARNING_QUIT ("ElecState::cal_delta_eband", "v_ofk is nullptr"); + } + for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) + { + deband_aux -= this->charge->kin_r[1][ir] * v_ofk[ir]; + } + } } - } else if (PARAM.inp.nspin == 4) - { - for (int is = 1; is < 4; is++) { - v_eff = this->pot->get_eff_v(is); - for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) - { - deband_aux -= this->charge->rho[is][ir] * v_eff[ir]; - } + for (int is = 1; is < 4; is++) + { + v_eff = this->pot->get_eff_v (is); + for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) + { + deband_aux -= this->charge->rho[is][ir] * v_eff[ir]; + } + } } - } #ifdef __MPI - MPI_Allreduce(&deband_aux, &deband0, 1, MPI_DOUBLE, MPI_SUM, POOL_WORLD); + MPI_Allreduce (&deband_aux, &deband0, 1, MPI_DOUBLE, MPI_SUM, POOL_WORLD); #else deband0 = deband_aux; #endif @@ -207,15 +210,16 @@ double ElecState::cal_delta_eband(const UnitCell& ucell) const // \int rho(r) v_{exx}(r) dr = 2 E_{exx}[rho] deband0 -= 2 * this->f_en.exx; // Peize Lin add 2017-10-16 - ModuleBase::timer::end("ElecState", "cal_delta_eband"); + ModuleBase::timer::end ("ElecState", "cal_delta_eband"); return deband0; } /// @brief calculate descf -double ElecState::cal_delta_escf() const +double + ElecState::cal_delta_escf () const { - ModuleBase::TITLE("ElecState", "cal_delta_escf"); - ModuleBase::timer::start("ElecState", "cal_delta_escf"); + ModuleBase::TITLE ("ElecState", "cal_delta_escf"); + ModuleBase::timer::start ("ElecState", "cal_delta_escf"); double descf = 0.0; // now rho1 is "mixed" charge density @@ -224,77 +228,77 @@ double ElecState::cal_delta_escf() const // so here is the correction. // only potential related with charge is used here for energy correction // on the fly calculate it here by v_eff - v_fixed - const double* v_eff = this->pot->get_eff_v(0); - const double* v_fixed = this->pot->get_fixed_v(); + const double* v_eff = this->pot->get_eff_v (0); + const double* v_fixed = this->pot->get_fixed_v (); const double* v_ofk = nullptr; - if (XC_Functional::get_ked_flag()) - { - v_ofk = this->pot->get_eff_vofk(0); - } + if (XC_Functional::get_ked_flag ()) + { + v_ofk = this->pot->get_eff_vofk (0); + } for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) - { - descf -= (this->charge->rho[0][ir] - this->charge->rho_save[0][ir]) * (v_eff[ir] - v_fixed[ir]); - if (XC_Functional::get_ked_flag()) { - // cause in the get_eff_vofk, the func will return nullptr - assert(v_ofk != nullptr); - descf -= (this->charge->kin_r[0][ir] - this->charge->kin_r_save[0][ir]) * v_ofk[ir]; + descf -= (this->charge->rho[0][ir] - this->charge->rho_save[0][ir]) * (v_eff[ir] - v_fixed[ir]); + if (XC_Functional::get_ked_flag ()) + { + // cause in the get_eff_vofk, the func will return nullptr + assert (v_ofk != nullptr); + descf -= (this->charge->kin_r[0][ir] - this->charge->kin_r_save[0][ir]) * v_ofk[ir]; + } } - } if (PARAM.inp.nspin == 2) - { - v_eff = this->pot->get_eff_v(1); - if (XC_Functional::get_ked_flag()) { - v_ofk = this->pot->get_eff_vofk(1); - } - for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) - { - descf -= (this->charge->rho[1][ir] - this->charge->rho_save[1][ir]) * (v_eff[ir] - v_fixed[ir]); - if (XC_Functional::get_ked_flag()) - { - descf -= (this->charge->kin_r[1][ir] - this->charge->kin_r_save[1][ir]) * v_ofk[ir]; - } + v_eff = this->pot->get_eff_v (1); + if (XC_Functional::get_ked_flag ()) + { + v_ofk = this->pot->get_eff_vofk (1); + } + for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) + { + descf -= (this->charge->rho[1][ir] - this->charge->rho_save[1][ir]) * (v_eff[ir] - v_fixed[ir]); + if (XC_Functional::get_ked_flag ()) + { + descf -= (this->charge->kin_r[1][ir] - this->charge->kin_r_save[1][ir]) * v_ofk[ir]; + } + } } - } if (PARAM.inp.nspin == 4) - { - for (int is = 1; is < 4; is++) { - v_eff = this->pot->get_eff_v(is); - for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) - { - descf -= (this->charge->rho[is][ir] - this->charge->rho_save[is][ir]) * v_eff[ir]; - } + for (int is = 1; is < 4; is++) + { + v_eff = this->pot->get_eff_v (is); + for (int ir = 0; ir < this->charge->rhopw->nrxx; ir++) + { + descf -= (this->charge->rho[is][ir] - this->charge->rho_save[is][ir]) * v_eff[ir]; + } + } } - } #ifdef __MPI - Parallel_Reduce::reduce_pool(descf); + Parallel_Reduce::reduce_pool (descf); #endif - assert(this->charge->rhopw->nxyz > 0); + assert (this->charge->rhopw->nxyz > 0); descf *= this->charge->rhopw->omega / this->charge->rhopw->nxyz; // mohan move the code here, 2025-11-28 #ifdef __MPI - MPI_Bcast(&descf, 1, MPI_DOUBLE, 0, BP_WORLD); + MPI_Bcast (&descf, 1, MPI_DOUBLE, 0, BP_WORLD); #endif - - ModuleBase::timer::end("ElecState", "cal_delta_escf"); + ModuleBase::timer::end ("ElecState", "cal_delta_escf"); return descf; } /// @brief calculation if converged -void ElecState::cal_converged() +void + ElecState::cal_converged () { // update etxc and vtxc // allocate vnew in get_vnew() - this->pot->get_vnew(this->charge, this->vnew); + this->pot->get_vnew (this->charge, this->vnew); this->vnew_exist = true; // vnew will be used in force_scc() @@ -308,54 +312,55 @@ void ElecState::cal_converged() * @param type: 1 means Harris-Foulkes functinoal; * @param type: 2 means Kohn-Sham functional; */ -void ElecState::cal_energies(const int type) +void + ElecState::cal_energies (const int type) { //! Hartree energy - this->f_en.hartree_energy = get_hartree_energy(); + this->f_en.hartree_energy = get_hartree_energy (); //! energy from E-field - this->f_en.efield = get_etot_efield(); + this->f_en.efield = get_etot_efield (); //! energy from gate-field - this->f_en.gatefield = get_etot_gatefield(); + this->f_en.gatefield = get_etot_gatefield (); //! energy from implicit solvation model if (PARAM.inp.imp_sol) - { - this->f_en.esol_el = get_solvent_model_Ael(); - this->f_en.esol_cav = get_solvent_model_Acav(); - } + { + this->f_en.esol_el = get_solvent_model_Ael (); + this->f_en.esol_cav = get_solvent_model_Acav (); + } //! spin constrained energy if (PARAM.inp.sc_mag_switch) - { - this->f_en.escon = get_spin_constrain_energy(); - } + { + this->f_en.escon = get_spin_constrain_energy (); + } // energy from DFT+U if (PARAM.inp.dft_plus_u) - { - this->f_en.edftu = get_dftu_energy(); - } + { + this->f_en.edftu = get_dftu_energy (); + } - this->f_en.e_local_pp = get_local_pp_energy(); + this->f_en.e_local_pp = get_local_pp_energy (); #ifdef __MLALGO - this->f_en.ml_exx = this->pot->get_ml_exx_energy(); + this->f_en.ml_exx = this->pot->get_ml_exx_energy (); #endif if (type == 1) // Harris-Foulkes functional - { - this->f_en.calculate_harris(); - } + { + this->f_en.calculate_harris (); + } else if (type == 2) // Kohn-Sham functional - { - this->f_en.calculate_etot(); - } + { + this->f_en.calculate_etot (); + } else - { - ModuleBase::WARNING_QUIT("ElecState::cal_energies", "The form of total energy functional is unknown!"); - } + { + ModuleBase::WARNING_QUIT ("ElecState::cal_energies", "The form of total energy functional is unknown!"); + } } } // namespace elecstate diff --git a/source/source_estate/elecstate_energy_terms.cpp b/source/source_estate/elecstate_energy_terms.cpp index ebe0068c27b..c15ace545f4 100644 --- a/source/source_estate/elecstate_energy_terms.cpp +++ b/source/source_estate/elecstate_energy_terms.cpp @@ -10,49 +10,56 @@ namespace elecstate { -double ElecState::get_hartree_energy() +double + ElecState::get_hartree_energy () { return H_Hartree_pw::hartree_energy; } -double ElecState::get_etot_efield() +double + ElecState::get_etot_efield () { return Efield::etotefield; } -double ElecState::get_etot_gatefield() +double + ElecState::get_etot_gatefield () { return Gatefield::etotgatefield; } -double ElecState::get_solvent_model_Ael() +double + ElecState::get_solvent_model_Ael () { return surchem::Ael; } -double ElecState::get_solvent_model_Acav() +double + ElecState::get_solvent_model_Acav () { return surchem::Acav; } -double ElecState::get_dftu_energy() +double + ElecState::get_dftu_energy () { - return Plus_U::get_energy(); + return Plus_U::get_energy (); } -double ElecState::get_local_pp_energy() +double + ElecState::get_local_pp_energy () { double local_pseudopot_energy = 0.; // electron-ion interaction energy from local pseudopotential for (int is = 0; is < PARAM.inp.nspin; ++is) - { - local_pseudopot_energy - += BlasConnector::dot(this->charge->rhopw->nrxx, - this->pot->get_fixed_v(), - 1, - this->charge->rho[is], 1) - * this->charge->rhopw->omega / this->charge->rhopw->nxyz; - } - Parallel_Reduce::reduce_pool(local_pseudopot_energy); + { + local_pseudopot_energy += BlasConnector::dot (this->charge->rhopw->nrxx, + this->pot->get_fixed_v (), + 1, + this->charge->rho[is], + 1) + * this->charge->rhopw->omega / this->charge->rhopw->nxyz; + } + Parallel_Reduce::reduce_pool (local_pseudopot_energy); return local_pseudopot_energy; } diff --git a/source/source_estate/elecstate_exx.cpp b/source/source_estate/elecstate_exx.cpp index addc7a03da0..84542f51cef 100644 --- a/source/source_estate/elecstate_exx.cpp +++ b/source/source_estate/elecstate_exx.cpp @@ -6,15 +6,16 @@ namespace elecstate /// @brief calculation if converged /// @date Peize Lin add 2016-12-03 -void ElecState::set_exx(const double& Eexx) +void + ElecState::set_exx (const double& Eexx) { - ModuleBase::TITLE("energy", "set_exx"); + ModuleBase::TITLE ("energy", "set_exx"); if (GlobalC::exx_info.info_global.cal_exx) - { - this->f_en.exx = GlobalC::exx_info.info_global.hybrid_alpha * Eexx; - } + { + this->f_en.exx = GlobalC::exx_info.info_global.hybrid_alpha * Eexx; + } return; } -} +} // namespace elecstate diff --git a/source/source_estate/elecstate_lcao.cpp b/source/source_estate/elecstate_lcao.cpp index 2040ee769e1..0656ea04463 100644 --- a/source/source_estate/elecstate_lcao.cpp +++ b/source/source_estate/elecstate_lcao.cpp @@ -13,77 +13,79 @@ namespace elecstate { - template <> -double ElecStateLCAO::get_spin_constrain_energy() +double + ElecStateLCAO::get_spin_constrain_energy () { - spinconstrain::SpinConstrain& sc = spinconstrain::SpinConstrain::getScInstance(); - return sc.cal_escon(); + spinconstrain::SpinConstrain& sc = spinconstrain::SpinConstrain::getScInstance (); + return sc.cal_escon (); } template <> -double ElecStateLCAO>::get_spin_constrain_energy() +double + ElecStateLCAO>::get_spin_constrain_energy () { spinconstrain::SpinConstrain>& sc - = spinconstrain::SpinConstrain>::getScInstance(); - return sc.cal_escon(); + = spinconstrain::SpinConstrain>::getScInstance (); + return sc.cal_escon (); } template <> -void ElecStateLCAO::dm2rho(std::vector pexsi_DM, - std::vector pexsi_EDM, - DensityMatrix* dm) +void + ElecStateLCAO::dm2rho (std::vector pexsi_DM, + std::vector pexsi_EDM, + DensityMatrix* dm) { - ModuleBase::timer::start("ElecStateLCAO", "dm2rho"); + ModuleBase::timer::start ("ElecStateLCAO", "dm2rho"); int nspin = PARAM.inp.nspin; if (PARAM.inp.nspin == 4) - { - nspin = 1; - } + { + nspin = 1; + } #ifdef __PEXSI dm->pexsi_EDM = pexsi_EDM; #endif for (int is = 0; is < nspin; is++) - { - dm->set_DMK_pointer(is, pexsi_DM[is]); - } - dm->cal_DMR(); + { + dm->set_DMK_pointer (is, pexsi_DM[is]); + } + dm->cal_DMR (); for (int is = 0; is < PARAM.inp.nspin; is++) - { - ModuleBase::GlobalFunc::ZEROS(this->charge->rho[is], - this->charge->nrxx); // mohan 2009-11-10 - } - - ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!"); - ModuleGint::cal_gint_rho(dm->get_DMR_vector(), PARAM.inp.nspin, this->charge->rho); - if (XC_Functional::get_ked_flag()) - { - for (int is = 0; is < PARAM.inp.nspin; is++) { - ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[0], this->charge->nrxx); + ModuleBase::GlobalFunc::ZEROS (this->charge->rho[is], + this->charge->nrxx); // mohan 2009-11-10 } - ModuleGint::cal_gint_tau(dm->get_DMR_vector(), PARAM.inp.nspin, this->charge->kin_r); - } - this->charge->renormalize_rho(); + ModuleBase::GlobalFunc::NOTE ("Calculate the charge on real space grid!"); + ModuleGint::cal_gint_rho (dm->get_DMR_vector (), PARAM.inp.nspin, this->charge->rho); + if (XC_Functional::get_ked_flag ()) + { + for (int is = 0; is < PARAM.inp.nspin; is++) + { + ModuleBase::GlobalFunc::ZEROS (this->charge->kin_r[0], this->charge->nrxx); + } + ModuleGint::cal_gint_tau (dm->get_DMR_vector (), PARAM.inp.nspin, this->charge->kin_r); + } - ModuleBase::timer::end("ElecStateLCAO", "dm2rho"); + this->charge->renormalize_rho (); + + ModuleBase::timer::end ("ElecStateLCAO", "dm2rho"); return; } template <> -void ElecStateLCAO>::dm2rho(std::vector*> pexsi_DM, - std::vector*> pexsi_EDM, - DensityMatrix, double>* dm) +void + ElecStateLCAO>::dm2rho (std::vector*> pexsi_DM, + std::vector*> pexsi_EDM, + DensityMatrix, double>* dm) { - ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case"); + ModuleBase::WARNING_QUIT ("ElecStateLCAO", "pexsi is not completed for multi-k case"); } - template class ElecStateLCAO; // Gamma_only case template class ElecStateLCAO>; // multi-k case diff --git a/source/source_estate/elecstate_lcao.h b/source/source_estate/elecstate_lcao.h index bf1f11e1f7e..ebfa3154b43 100644 --- a/source/source_estate/elecstate_lcao.h +++ b/source/source_estate/elecstate_lcao.h @@ -12,21 +12,14 @@ template class ElecStateLCAO : public ElecState { public: - ElecStateLCAO() + ElecStateLCAO () {} // will be called by ElecStateLCAO_TDDFT + ElecStateLCAO (Charge* chr_in, const K_Vectors* klist_in, int nks_in, ModulePW::PW_Basis_Big* bigpw_in) { - } // will be called by ElecStateLCAO_TDDFT - ElecStateLCAO(Charge* chr_in, - const K_Vectors* klist_in, - int nks_in, - ModulePW::PW_Basis_Big* bigpw_in) - { - init_ks(chr_in, klist_in, nks_in, bigpw_in); + init_ks (chr_in, klist_in, nks_in, bigpw_in); this->classname = "ElecStateLCAO"; } - virtual ~ElecStateLCAO() - { - } + virtual ~ElecStateLCAO () {} // update charge density for next scf step // void getNewRho() override; @@ -34,7 +27,7 @@ class ElecStateLCAO : public ElecState static int out_wfc_lcao; static bool need_psi_grid; - double get_spin_constrain_energy() override; + double get_spin_constrain_energy () override; // use for pexsi @@ -44,10 +37,7 @@ class ElecStateLCAO : public ElecState * @param pexsi_EDM: pointers of energy-weighed density matrix (EDMK) calculated by pexsi, needed by MD, will be * stored in DensityMatrix::pexsi_EDM */ - void dm2rho(std::vector pexsi_DM, - std::vector pexsi_EDM, - DensityMatrix* dm); - + void dm2rho (std::vector pexsi_DM, std::vector pexsi_EDM, DensityMatrix* dm); }; template diff --git a/source/source_estate/elecstate_print.cpp b/source/source_estate/elecstate_print.cpp index 84c7972a41d..da801922d45 100644 --- a/source/source_estate/elecstate_print.cpp +++ b/source/source_estate/elecstate_print.cpp @@ -34,7 +34,8 @@ namespace elecstate * 4. densities: DRHO, DKIN(optional) * 5. time: TIME */ -void print_scf_iterinfo(const std::string& ks_solver, +void + print_scf_iterinfo (const std::string& ks_solver, const int& istep, const int& witer, const std::vector& mag, @@ -61,95 +62,94 @@ void print_scf_iterinfo(const std::string& ks_solver, {"pexsi", "PE"}, {"cusolvermp", "CM"}}; // I change the key of "cg_in_lcao" to "CG" because all the other are only two letters // ITER column - std::vector th_fmt = {" %-" + std::to_string(witer) + "s"}; // table header: th: ITER + std::vector th_fmt = {" %-" + std::to_string (witer) + "s"}; // table header: th: ITER std::vector td_fmt - = {" " + iter_header_dict[ks_solver] + "%-" + std::to_string(witer - 2) + ".0f"}; // table data: td: GE10086 + = {" " + iter_header_dict[ks_solver] + "%-" + std::to_string (witer - 2) + ".0f"}; // table data: td: GE10086 // magnetization column, might be non-exist, but size of mag can only be 0, 2 or 4 - for (int i = 0; i < mag.size(); i++) - { - th_fmt.emplace_back(" %" + std::to_string(wmag) + "s"); - } - for (int i = 0; i < mag.size(); i++) - { - td_fmt.emplace_back(" %" + std::to_string(wmag) + ".2e"); - } // hard-code precision here + for (int i = 0; i < mag.size (); i++) + { + th_fmt.emplace_back (" %" + std::to_string (wmag) + "s"); + } + for (int i = 0; i < mag.size (); i++) + { + td_fmt.emplace_back (" %" + std::to_string (wmag) + ".2e"); + } // hard-code precision here // energies for (int i = 0; i < 2; i++) - { - th_fmt.emplace_back(" %" + std::to_string(wener) + "s"); - } + { + th_fmt.emplace_back (" %" + std::to_string (wener) + "s"); + } for (int i = 0; i < 2; i++) - { - td_fmt.emplace_back(" %" + std::to_string(wener) + ".8e"); - } + { + td_fmt.emplace_back (" %" + std::to_string (wener) + ".8e"); + } // densities column, size can be 1 or 2, DRHO or DRHO, DKIN - for (int i = 0; i < drho.size(); i++) - { - th_fmt.emplace_back(" %" + std::to_string(wrho) + "s"); - } - for (int i = 0; i < drho.size(); i++) - { - td_fmt.emplace_back(" %" + std::to_string(wrho) + ".4e"); - } + for (int i = 0; i < drho.size (); i++) + { + th_fmt.emplace_back (" %" + std::to_string (wrho) + "s"); + } + for (int i = 0; i < drho.size (); i++) + { + td_fmt.emplace_back (" %" + std::to_string (wrho) + ".4e"); + } // time column, trivial - th_fmt.emplace_back(" %" + std::to_string(wtime) + "s\n"); - td_fmt.emplace_back(" %" + std::to_string(wtime) + ".2f\n"); + th_fmt.emplace_back (" %" + std::to_string (wtime) + "s\n"); + td_fmt.emplace_back (" %" + std::to_string (wtime) + ".2f\n"); // contents std::vector titles; std::vector values; - switch (mag.size()) - { - case 2: - titles = {"ITER", - FmtCore::center("TMAG", wmag), - FmtCore::center("AMAG", wmag), - FmtCore::center("ETOT/eV", wener), - FmtCore::center("EDIFF/eV", wener), - FmtCore::center("DRHO", wrho)}; - values = {double(istep), mag[0], mag[1], etot, ediff, drho[0]}; - break; - case 4: - titles = {"ITER", - FmtCore::center("TMAGX", wmag), - FmtCore::center("TMAGY", wmag), - FmtCore::center("TMAGZ", wmag), - FmtCore::center("AMAG", wmag), - FmtCore::center("ETOT/eV", wener), - FmtCore::center("EDIFF/eV", wener), - FmtCore::center("DRHO", wrho)}; - values = {double(istep), mag[0], mag[1], mag[2], mag[3], etot, ediff, drho[0]}; - break; - default: - titles = {"ITER", - FmtCore::center("ETOT/eV", wener), - FmtCore::center("EDIFF/eV", wener), - FmtCore::center("DRHO", wrho)}; - values = {double(istep), etot, ediff, drho[0]}; - break; - } - if (drho.size() > 1) - { - titles.push_back(FmtCore::center("DKIN", wrho)); - values.push_back(drho[1]); - } - titles.push_back(FmtCore::center("TIME/s", wtime)); - values.push_back(time); + switch (mag.size ()) + { + case 2: + titles = {"ITER", + FmtCore::center ("TMAG", wmag), + FmtCore::center ("AMAG", wmag), + FmtCore::center ("ETOT/eV", wener), + FmtCore::center ("EDIFF/eV", wener), + FmtCore::center ("DRHO", wrho)}; + values = {double (istep), mag[0], mag[1], etot, ediff, drho[0]}; + break; + case 4: + titles = {"ITER", + FmtCore::center ("TMAGX", wmag), + FmtCore::center ("TMAGY", wmag), + FmtCore::center ("TMAGZ", wmag), + FmtCore::center ("AMAG", wmag), + FmtCore::center ("ETOT/eV", wener), + FmtCore::center ("EDIFF/eV", wener), + FmtCore::center ("DRHO", wrho)}; + values = {double (istep), mag[0], mag[1], mag[2], mag[3], etot, ediff, drho[0]}; + break; + default: + titles = {"ITER", + FmtCore::center ("ETOT/eV", wener), + FmtCore::center ("EDIFF/eV", wener), + FmtCore::center ("DRHO", wrho)}; + values = {double (istep), etot, ediff, drho[0]}; + break; + } + if (drho.size () > 1) + { + titles.push_back (FmtCore::center ("DKIN", wrho)); + values.push_back (drho[1]); + } + titles.push_back (FmtCore::center ("TIME/s", wtime)); + values.push_back (time); std::string buf; if (istep == 1) - { - for (int i = 0; i < titles.size(); i++) { - buf += FmtCore::format(th_fmt[i].c_str(), titles[i]); + for (int i = 0; i < titles.size (); i++) + { + buf += FmtCore::format (th_fmt[i].c_str (), titles[i]); + } + } + for (int i = 0; i < values.size (); i++) + { + buf += FmtCore::format (td_fmt[i].c_str (), values[i]); } - } - for (int i = 0; i < values.size(); i++) - { - buf += FmtCore::format(td_fmt[i].c_str(), values[i]); - } std::cout << buf << std::flush; } - /// @brief print total free energy and other energies /// @param ucell: unit cell /// @param converged: if converged @@ -159,7 +159,8 @@ void print_scf_iterinfo(const std::string& ks_solver, /// @param pw_diag_thr: threshold for diagonalization /// @param avg_iter: averaged diagonalization iteration of each scf iteration /// @param print: if print to screen -void print_etot(const Magnetism& magnet, +void + print_etot (const Magnetism& magnet, const ElecState& elec, const bool converged, const int& iter_in, @@ -170,235 +171,232 @@ void print_etot(const Magnetism& magnet, const double& avg_iter, const bool print) { - ModuleBase::TITLE("energy", "print_etot"); + ModuleBase::TITLE ("energy", "print_etot"); const int iter = iter_in; const int nrxx = elec.charge->nrxx; const int nxyz = elec.charge->nxyz; - GlobalV::ofs_running << std::setprecision(6); - GlobalV::ofs_running << std::setiosflags(std::ios::right); + GlobalV::ofs_running << std::setprecision (6); + GlobalV::ofs_running << std::setiosflags (std::ios::right); GlobalV::ofs_running << " Electron density deviation " << scf_thr << std::endl; if (PARAM.inp.basis_type == "pw") - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Diago Threshold", pw_diag_thr); - } + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Diago Threshold", pw_diag_thr); + } std::vector titles; std::vector energies_Ry; std::vector energies_eV; - if( (iter % PARAM.inp.out_freq_elec == 0) || converged || iter == PARAM.inp.scf_nmax ) - { - int n_order = std::max(0, Occupy::gaussian_type); - - //! Kohn-Sham functional energy - titles.push_back("E_KohnSham"); - energies_Ry.push_back(elec.f_en.etot); + if ((iter % PARAM.inp.out_freq_elec == 0) || converged || iter == PARAM.inp.scf_nmax) + { + int n_order = std::max (0, Occupy::gaussian_type); - //! Kohn-Sham energy with sigma->0 - titles.push_back("E_KS(sigma->0)"); - energies_Ry.push_back(elec.f_en.etot - elec.f_en.demet / (2 + n_order)); + //! Kohn-Sham functional energy + titles.push_back ("E_KohnSham"); + energies_Ry.push_back (elec.f_en.etot); - //! Harris functional energy - titles.push_back("E_Harris"); - energies_Ry.push_back(elec.f_en.etot_harris); + //! Kohn-Sham energy with sigma->0 + titles.push_back ("E_KS(sigma->0)"); + energies_Ry.push_back (elec.f_en.etot - elec.f_en.demet / (2 + n_order)); - //! band energy - titles.push_back("E_band"); - energies_Ry.push_back(elec.f_en.eband); + //! Harris functional energy + titles.push_back ("E_Harris"); + energies_Ry.push_back (elec.f_en.etot_harris); - //! one-electron energy - titles.push_back("E_one_elec"); - energies_Ry.push_back(elec.f_en.eband + elec.f_en.deband); + //! band energy + titles.push_back ("E_band"); + energies_Ry.push_back (elec.f_en.eband); - //! Hartree energy - titles.push_back("E_Hartree"); - energies_Ry.push_back(elec.f_en.hartree_energy); + //! one-electron energy + titles.push_back ("E_one_elec"); + energies_Ry.push_back (elec.f_en.eband + elec.f_en.deband); - //! exchange-correlation energy - titles.push_back("E_xc"); - energies_Ry.push_back(elec.f_en.etxc - elec.f_en.etxcc); + //! Hartree energy + titles.push_back ("E_Hartree"); + energies_Ry.push_back (elec.f_en.hartree_energy); - //! Ewald energy - titles.push_back("E_Ewald"); - energies_Ry.push_back(elec.f_en.ewald_energy); + //! exchange-correlation energy + titles.push_back ("E_xc"); + energies_Ry.push_back (elec.f_en.etxc - elec.f_en.etxcc); - //! entropy energy - titles.push_back("E_entropy(-TS)"); - energies_Ry.push_back(elec.f_en.demet); + //! Ewald energy + titles.push_back ("E_Ewald"); + energies_Ry.push_back (elec.f_en.ewald_energy); - //! correction energy for scf - titles.push_back("E_descf"); - energies_Ry.push_back(elec.f_en.descf); + //! entropy energy + titles.push_back ("E_entropy(-TS)"); + energies_Ry.push_back (elec.f_en.demet); - //! local potential energy - titles.push_back("E_localpp"); - energies_Ry.push_back(elec.f_en.e_local_pp); + //! correction energy for scf + titles.push_back ("E_descf"); + energies_Ry.push_back (elec.f_en.descf); - //! vdw energy - std::string vdw_method = PARAM.inp.vdw_method; - if (vdw_method == "d2") // Peize Lin add 2014-04, update 2021-03-09 - { - titles.push_back("E_vdwD2"); - energies_Ry.push_back(elec.f_en.evdw); - } - else if (vdw_method == "d3_0" || vdw_method == "d3_bj") // jiyy add 2019-05, update 2021-05-02 - { - titles.push_back("E_vdwD3"); - energies_Ry.push_back(elec.f_en.evdw); - } + //! local potential energy + titles.push_back ("E_localpp"); + energies_Ry.push_back (elec.f_en.e_local_pp); - // mohan add 20251108 - if (PARAM.inp.dft_plus_u) - { - titles.push_back("E_plusU"); - energies_Ry.push_back(elec.f_en.edftu); - } + //! vdw energy + std::string vdw_method = PARAM.inp.vdw_method; + if (vdw_method == "d2") // Peize Lin add 2014-04, update 2021-03-09 + { + titles.push_back ("E_vdwD2"); + energies_Ry.push_back (elec.f_en.evdw); + } + else if (vdw_method == "d3_0" || vdw_method == "d3_bj") // jiyy add 2019-05, update 2021-05-02 + { + titles.push_back ("E_vdwD3"); + energies_Ry.push_back (elec.f_en.evdw); + } - //! hybrid functional energy - titles.push_back("E_exx"); - energies_Ry.push_back(elec.f_en.exx); + // mohan add 20251108 + if (PARAM.inp.dft_plus_u) + { + titles.push_back ("E_plusU"); + energies_Ry.push_back (elec.f_en.edftu); + } - //! solvation energy - if (PARAM.inp.imp_sol) - { - titles.push_back("E_sol_el"); - energies_Ry.push_back(elec.f_en.esol_el); - titles.push_back("E_sol_cav"); - energies_Ry.push_back(elec.f_en.esol_cav); - } + //! hybrid functional energy + titles.push_back ("E_exx"); + energies_Ry.push_back (elec.f_en.exx); - //! electric field energy - if (PARAM.inp.efield_flag) - { - titles.push_back("E_efield"); - energies_Ry.push_back(elecstate::Efield::etotefield); - } - - //! gate energy - if (PARAM.inp.gate_flag) - { - titles.push_back("E_gatefield"); - energies_Ry.push_back(elecstate::Gatefield::etotgatefield); - } + //! solvation energy + if (PARAM.inp.imp_sol) + { + titles.push_back ("E_sol_el"); + energies_Ry.push_back (elec.f_en.esol_el); + titles.push_back ("E_sol_cav"); + energies_Ry.push_back (elec.f_en.esol_cav); + } + + //! electric field energy + if (PARAM.inp.efield_flag) + { + titles.push_back ("E_efield"); + energies_Ry.push_back (elecstate::Efield::etotefield); + } + + //! gate energy + if (PARAM.inp.gate_flag) + { + titles.push_back ("E_gatefield"); + energies_Ry.push_back (elecstate::Gatefield::etotgatefield); + } - //! deepks energy + //! deepks energy #ifdef __MLALGO - if (PARAM.inp.deepks_scf) - { - titles.push_back("E_DeePKS"); - energies_Ry.push_back(elec.f_en.edeepks_delta); + if (PARAM.inp.deepks_scf) + { + titles.push_back ("E_DeePKS"); + energies_Ry.push_back (elec.f_en.edeepks_delta); + } + if (PARAM.inp.ml_exx) + { + titles.push_back ("E_ML-EXX"); + energies_Ry.push_back (elec.f_en.ml_exx); + } +#endif } - if (PARAM.inp.ml_exx) + else { - titles.push_back("E_ML-EXX"); - energies_Ry.push_back(elec.f_en.ml_exx); + titles.push_back ("E_KohnSham"); + energies_Ry.push_back (elec.f_en.etot); + titles.push_back ("E_Harris"); + energies_Ry.push_back (elec.f_en.etot_harris); } -#endif - } - else - { - titles.push_back("E_KohnSham"); - energies_Ry.push_back(elec.f_en.etot); - titles.push_back("E_Harris"); - energies_Ry.push_back(elec.f_en.etot_harris); - } // print out the Fermi energy if needed if (PARAM.globalv.two_fermi) - { - titles.push_back("E_Fermi_up"); - energies_Ry.push_back(elec.eferm.ef_up); - titles.push_back("E_Fermi_dw"); - energies_Ry.push_back(elec.eferm.ef_dw); - } + { + titles.push_back ("E_Fermi_up"); + energies_Ry.push_back (elec.eferm.ef_up); + titles.push_back ("E_Fermi_dw"); + energies_Ry.push_back (elec.eferm.ef_dw); + } else - { - titles.push_back("E_Fermi"); - energies_Ry.push_back(elec.eferm.ef); - } + { + titles.push_back ("E_Fermi"); + energies_Ry.push_back (elec.eferm.ef); + } // print out the band gap if needed if (!PARAM.globalv.two_fermi) - { - titles.push_back("E_gap(k)"); // gap of given k-points - energies_Ry.push_back(elec.bandgap); - } + { + titles.push_back ("E_gap(k)"); // gap of given k-points + energies_Ry.push_back (elec.bandgap); + } else - { - titles.push_back("E_gap_up(k)"); - energies_Ry.push_back(elec.bandgap_up); - titles.push_back("E_gap_dw(k)"); - energies_Ry.push_back(elec.bandgap_dw); - } - energies_eV.resize(energies_Ry.size()); - std::transform(energies_Ry.begin(), energies_Ry.end(), energies_eV.begin(), [](double ener) { - return ener * ModuleBase::Ry_to_eV; - }); + { + titles.push_back ("E_gap_up(k)"); + energies_Ry.push_back (elec.bandgap_up); + titles.push_back ("E_gap_dw(k)"); + energies_Ry.push_back (elec.bandgap_dw); + } + energies_eV.resize (energies_Ry.size ()); + std::transform (energies_Ry.begin (), + energies_Ry.end (), + energies_eV.begin (), + [] (double ener) { return ener * ModuleBase::Ry_to_eV; }); // for each SCF step, we print out energy - FmtTable table(/*titles=*/{"Energy", "Rydberg", "eV"}, - /*nrows=*/titles.size(), - /*formats=*/{"%-14s", "%20.10f", "%20.10f"}, - /*indents=*/1, - /*align=*/{/*value*/FmtTable::Align::LEFT, /*title*/FmtTable::Align::CENTER}); + FmtTable table (/*titles=*/{"Energy", "Rydberg", "eV"}, + /*nrows=*/titles.size (), + /*formats=*/{"%-14s", "%20.10f", "%20.10f"}, + /*indents=*/1, + /*align=*/{/*value*/ FmtTable::Align::LEFT, /*title*/ FmtTable::Align::CENTER}); // print out the titles table << titles << energies_Ry << energies_eV; - GlobalV::ofs_running << table.str() << std::endl; + GlobalV::ofs_running << table.str () << std::endl; - - if (PARAM.inp.out_level == "ie" || PARAM.inp.out_level == "m") - { - std::vector mag; - switch (PARAM.inp.nspin) - { - case 2: - mag = {magnet.tot_mag, magnet.abs_mag}; - break; - case 4: - mag = {magnet.tot_mag_nc[0], - magnet.tot_mag_nc[1], - magnet.tot_mag_nc[2], - magnet.abs_mag}; - break; - default: - mag = {}; - break; - } - std::vector drho = {scf_thr}; - if (XC_Functional::get_ked_flag()) { - drho.push_back(scf_thr_kin); + std::vector mag; + switch (PARAM.inp.nspin) + { + case 2: + mag = {magnet.tot_mag, magnet.abs_mag}; + break; + case 4: + mag = {magnet.tot_mag_nc[0], magnet.tot_mag_nc[1], magnet.tot_mag_nc[2], magnet.abs_mag}; + break; + default: + mag = {}; + break; + } + std::vector drho = {scf_thr}; + if (XC_Functional::get_ked_flag ()) + { + drho.push_back (scf_thr_kin); + } + elecstate::print_scf_iterinfo (PARAM.inp.ks_solver, + iter, + 6, + mag, + 10, + elec.f_en.etot * ModuleBase::Ry_to_eV, + elec.f_en.etot_delta * ModuleBase::Ry_to_eV, + 16, + drho, + 12, + duration, + 6); } - elecstate::print_scf_iterinfo(PARAM.inp.ks_solver, - iter, - 6, - mag, - 10, - elec.f_en.etot * ModuleBase::Ry_to_eV, - elec.f_en.etot_delta * ModuleBase::Ry_to_eV, - 16, - drho, - 12, - duration, - 6); - } return; } /// @brief function to print name, value and value*Ry_to_eV /// @param name: name /// @param value: value -void print_format(const std::string& name, const double& value) +void + print_format (const std::string& name, const double& value) { - GlobalV::ofs_running << std::setiosflags(std::ios::showpos); - GlobalV::ofs_running << " " << std::setw(16) << name << std::setw(30) << value << std::setw(30) + GlobalV::ofs_running << std::setiosflags (std::ios::showpos); + GlobalV::ofs_running << " " << std::setw (16) << name << std::setw (30) << value << std::setw (30) << value * ModuleBase::Ry_to_eV << std::endl; - GlobalV::ofs_running << std::resetiosflags(std::ios::showpos); + GlobalV::ofs_running << std::resetiosflags (std::ios::showpos); return; } } // namespace elecstate diff --git a/source/source_estate/elecstate_print.h b/source/source_estate/elecstate_print.h index 8b54c4fb643..d60ac60bf85 100644 --- a/source/source_estate/elecstate_print.h +++ b/source/source_estate/elecstate_print.h @@ -5,18 +5,17 @@ namespace elecstate { - void print_format(const std::string& name, - const double& value); - - void print_etot(const Magnetism& magnet, - const ElecState& elec, - const bool converged, - const int& iter_in, - const double& scf_thr, - const double& scf_thr_kin, - const double& duration, - const double& pw_diag_thr = 0, - const double& avg_iter = 0, - bool print = true); -} +void print_format (const std::string& name, const double& value); + +void print_etot (const Magnetism& magnet, + const ElecState& elec, + const bool converged, + const int& iter_in, + const double& scf_thr, + const double& scf_thr_kin, + const double& duration, + const double& pw_diag_thr = 0, + const double& avg_iter = 0, + bool print = true); +} // namespace elecstate #endif diff --git a/source/source_estate/elecstate_pw.cpp b/source/source_estate/elecstate_pw.cpp index de05d441b58..702fc618d54 100644 --- a/source/source_estate/elecstate_pw.cpp +++ b/source/source_estate/elecstate_pw.cpp @@ -10,168 +10,175 @@ #include "source_io/module_parameter/parameter.h" #include "source_pw/module_pwdft/vnl_pw.h" -namespace elecstate { +namespace elecstate +{ template -ElecStatePW::ElecStatePW(ModulePW::PW_Basis_K* wfc_basis_in, - Charge* chr_in, - K_Vectors* pkv_in, - UnitCell* ucell_in, - pseudopot_cell_vnl* ppcell_in, - ModulePW::PW_Basis* rhopw_in, - ModulePW::PW_Basis_Big* bigpw_in) - : basis(wfc_basis_in) +ElecStatePW::ElecStatePW (ModulePW::PW_Basis_K* wfc_basis_in, + Charge* chr_in, + K_Vectors* pkv_in, + UnitCell* ucell_in, + pseudopot_cell_vnl* ppcell_in, + ModulePW::PW_Basis* rhopw_in, + ModulePW::PW_Basis_Big* bigpw_in) + : basis (wfc_basis_in) { this->classname = "ElecStatePW"; this->rhopw_smooth = rhopw_in; this->ppcell = ppcell_in; this->ucell = ucell_in; - this->init_ks(chr_in, pkv_in, pkv_in->get_nks(), bigpw_in); + this->init_ks (chr_in, pkv_in, pkv_in->get_nks (), bigpw_in); } -template -ElecStatePW::~ElecStatePW() +template +ElecStatePW::~ElecStatePW () { if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") - { - delmem_var_op()(this->rho_data); - delete[] this->rho; - - if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) { - delmem_complex_op()(this->rhog_data); - delete[] this->rhog; + delmem_var_op () (this->rho_data); + delete[] this->rho; + + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + { + delmem_complex_op () (this->rhog_data); + delete[] this->rhog; + } + if (XC_Functional::get_func_type () == 3 || PARAM.inp.out_elf[0] > 0) + { + delmem_var_op () (this->kin_r_data); + delete[] this->kin_r; + } } - if (XC_Functional::get_func_type() == 3 || PARAM.inp.out_elf[0] > 0) + if (PARAM.globalv.use_uspp) { - delmem_var_op()(this->kin_r_data); - delete[] this->kin_r; + delmem_var_op () (this->becsum); } - } - if (PARAM.globalv.use_uspp) - { - delmem_var_op()(this->becsum); - } - delmem_complex_op()(this->wfcr); - delmem_complex_op()(this->wfcr_another_spin); + delmem_complex_op () (this->wfcr); + delmem_complex_op () (this->wfcr_another_spin); } -template -void ElecStatePW::init_rho_data() +template +void + ElecStatePW::init_rho_data () { if (this->init_rho) - { - return; - } - - if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") - { - this->rho = new Real*[this->charge->nspin]; - resmem_var_op()(this->rho_data, this->charge->nspin * this->charge->nrxx); - for (int ii = 0; ii < this->charge->nspin; ii++) { - this->rho[ii] = this->rho_data + ii * this->charge->nrxx; + return; } - if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + + if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { - this->rhog = new T*[this->charge->nspin]; - resmem_complex_op()(this->rhog_data, this->charge->nspin * this->charge->rhopw->npw); + this->rho = new Real*[this->charge->nspin]; + resmem_var_op () (this->rho_data, this->charge->nspin * this->charge->nrxx); for (int ii = 0; ii < this->charge->nspin; ii++) - { - this->rhog[ii] = this->rhog_data + ii * this->charge->rhopw->npw; - } - } - if (XC_Functional::get_func_type() == 3 || PARAM.inp.out_elf[0] > 0) - { - this->kin_r = new Real*[this->charge->nspin]; - resmem_var_op()(this->kin_r_data, this->charge->nspin * this->charge->nrxx); - for (int ii = 0; ii < this->charge->nspin; ii++) { - this->kin_r[ii] = this->kin_r_data + ii * this->charge->nrxx; - } + { + this->rho[ii] = this->rho_data + ii * this->charge->nrxx; + } + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + { + this->rhog = new T*[this->charge->nspin]; + resmem_complex_op () (this->rhog_data, this->charge->nspin * this->charge->rhopw->npw); + for (int ii = 0; ii < this->charge->nspin; ii++) + { + this->rhog[ii] = this->rhog_data + ii * this->charge->rhopw->npw; + } + } + if (XC_Functional::get_func_type () == 3 || PARAM.inp.out_elf[0] > 0) + { + this->kin_r = new Real*[this->charge->nspin]; + resmem_var_op () (this->kin_r_data, this->charge->nspin * this->charge->nrxx); + for (int ii = 0; ii < this->charge->nspin; ii++) + { + this->kin_r[ii] = this->kin_r_data + ii * this->charge->nrxx; + } + } } - } else - { - this->rho = reinterpret_cast(this->charge->rho); - if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) - { - this->rhog = reinterpret_cast(this->charge->rhog); - } - if (XC_Functional::get_func_type() == 3 || PARAM.inp.out_elf[0] > 0) { - this->kin_r = reinterpret_cast(this->charge->kin_r); + this->rho = reinterpret_cast (this->charge->rho); + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + { + this->rhog = reinterpret_cast (this->charge->rhog); + } + if (XC_Functional::get_func_type () == 3 || PARAM.inp.out_elf[0] > 0) + { + this->kin_r = reinterpret_cast (this->charge->kin_r); + } } - } - resmem_complex_op()(this->wfcr, this->basis->nmaxgr, "ElecSPW::wfcr"); - resmem_complex_op()(this->wfcr_another_spin, this->basis->nrxx, "ElecSPW::wfcr_a"); + resmem_complex_op () (this->wfcr, this->basis->nmaxgr, "ElecSPW::wfcr"); + resmem_complex_op () (this->wfcr_another_spin, this->basis->nrxx, "ElecSPW::wfcr_a"); this->init_rho = true; } -template -void ElecStatePW::psiToRho(const psi::Psi& psi) +template +void + ElecStatePW::psiToRho (const psi::Psi& psi) { - ModuleBase::TITLE("ElecStatePW", "psiToRho"); - ModuleBase::timer::start("ElecStatePW", "psiToRho"); + ModuleBase::TITLE ("ElecStatePW", "psiToRho"); + ModuleBase::timer::start ("ElecStatePW", "psiToRho"); - this->init_rho_data(); + this->init_rho_data (); - for(int is=0; isrho[is], this->charge->nrxx); - setmem_var_op()(this->rho[is], 0, this->charge->nrxx); - if (XC_Functional::get_func_type() == 3) + for (int is = 0; is < PARAM.inp.nspin; is++) { - // ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[is], this->charge->nrxx); - setmem_var_op()(this->kin_r[is], 0, this->charge->nrxx); + // denghui replaced at 20221110 + // ModuleBase::GlobalFunc::ZEROS(this->rho[is], this->charge->nrxx); + setmem_var_op () (this->rho[is], 0, this->charge->nrxx); + if (XC_Functional::get_func_type () == 3) + { + // ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[is], this->charge->nrxx); + setmem_var_op () (this->kin_r[is], 0, this->charge->nrxx); + } + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + { + setmem_complex_op () (this->rhog[is], 0, this->charge->rhopw->npw); + } } - if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + + for (int ik = 0; ik < psi.get_nk (); ++ik) { - setmem_complex_op()(this->rhog[is], 0, this->charge->rhopw->npw); + psi.fix_k (ik); + this->updateRhoK (psi); } - } - for (int ik = 0; ik < psi.get_nk(); ++ik) - { - psi.fix_k(ik); - this->updateRhoK(psi); - } - - this->add_usrho(psi); + this->add_usrho (psi); if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") - { - for (int ii = 0; ii < PARAM.inp.nspin; ii++) { - castmem_var_d2h_op()(this->charge->rho[ii], this->rho[ii], this->charge->nrxx); - if (XC_Functional::get_func_type() == 3) - { - castmem_var_d2h_op()(this->charge->kin_r[ii], this->kin_r[ii], this->charge->nrxx); - } + for (int ii = 0; ii < PARAM.inp.nspin; ii++) + { + castmem_var_d2h_op () (this->charge->rho[ii], this->rho[ii], this->charge->nrxx); + if (XC_Functional::get_func_type () == 3) + { + castmem_var_d2h_op () (this->charge->kin_r[ii], this->kin_r[ii], this->charge->nrxx); + } + } } - } - this->parallelK(); - ModuleBase::timer::end("ElecStatePW", "psiToRho"); + this->parallelK (); + ModuleBase::timer::end ("ElecStatePW", "psiToRho"); } -template -void ElecStatePW::updateRhoK(const psi::Psi& psi) +template +void + ElecStatePW::updateRhoK (const psi::Psi& psi) { - this->rhoBandK(psi); + this->rhoBandK (psi); } -template -void ElecStatePW::parallelK() +template +void + ElecStatePW::parallelK () { #ifdef __MPI - this->charge->rho_mpi(); + this->charge->rho_mpi (); #endif } -template -void ElecStatePW::rhoBandK(const psi::Psi& psi) +template +void + ElecStatePW::rhoBandK (const psi::Psi& psi) { - ModuleBase::TITLE("ElecStatePW", "rhoBandK"); + ModuleBase::TITLE ("ElecStatePW", "rhoBandK"); // moved by denghui to constructor at 20221110 // used for plane wavefunction FFT3D to real space @@ -182,279 +189,293 @@ void ElecStatePW::rhoBandK(const psi::Psi& psi) // if (PARAM.inp.nspin == 4) // wfcr_another_spin.resize(this->charge->nrxx); - this->init_rho_data(); - int ik = psi.get_current_k(); - int npw = psi.get_current_ngk(); + this->init_rho_data (); + int ik = psi.get_current_k (); + int npw = psi.get_current_ngk (); int current_spin = 0; if (PARAM.inp.nspin == 2) - { - current_spin = this->klist->isk[ik]; - } - int nbands = psi.get_nbands(); + { + current_spin = this->klist->isk[ik]; + } + int nbands = psi.get_nbands (); // here we compute the band energy: the sum of the eigenvalues if (PARAM.inp.nspin == 4) - { - int npwx = npw / 2; - for (int ibnd = 0; ibnd < nbands; ibnd++) { - /// - /// only occupied band should be calculated. - /// be care of when smearing_sigma is large, wg would less than 0 - /// - - this->basis->recip_to_real(this->ctx, &psi(ibnd,0), this->wfcr, ik); - - this->basis->recip_to_real(this->ctx, &psi(ibnd,npwx), this->wfcr_another_spin, ik); - - const auto w1 = static_cast(this->wg(ik, ibnd) / ucell->omega); - - if (w1 != 0.0) - { - // replaced by denghui at 20221110 - elecstate_pw_op()(this->ctx, - PARAM.globalv.domag, - PARAM.globalv.domag_z, - this->basis->nrxx, - w1, - this->rho, - this->wfcr, - this->wfcr_another_spin); - } + int npwx = npw / 2; + for (int ibnd = 0; ibnd < nbands; ibnd++) + { + /// + /// only occupied band should be calculated. + /// be care of when smearing_sigma is large, wg would less than 0 + /// + + this->basis->recip_to_real (this->ctx, &psi (ibnd, 0), this->wfcr, ik); + + this->basis->recip_to_real (this->ctx, &psi (ibnd, npwx), this->wfcr_another_spin, ik); + + const auto w1 = static_cast (this->wg (ik, ibnd) / ucell->omega); + + if (w1 != 0.0) + { + // replaced by denghui at 20221110 + elecstate_pw_op () (this->ctx, + PARAM.globalv.domag, + PARAM.globalv.domag_z, + this->basis->nrxx, + w1, + this->rho, + this->wfcr, + this->wfcr_another_spin); + } + } } - } else - { - for (int ibnd = 0; ibnd < nbands; ibnd++) { - /// - /// only occupied band should be calculated. - /// + for (int ibnd = 0; ibnd < nbands; ibnd++) + { + /// + /// only occupied band should be calculated. + /// - this->basis->recip_to_real(this->ctx, &psi(ibnd,0), this->wfcr, ik); + this->basis->recip_to_real (this->ctx, &psi (ibnd, 0), this->wfcr, ik); - const auto w1 = static_cast(this->wg(ik, ibnd) / ucell->omega); + const auto w1 = static_cast (this->wg (ik, ibnd) / ucell->omega); - if (w1 != 0.0) - { - // replaced by denghui at 20221110 - elecstate_pw_op()(this->ctx, current_spin, this->basis->nrxx, w1, this->rho, this->wfcr); - } + if (w1 != 0.0) + { + // replaced by denghui at 20221110 + elecstate_pw_op () (this->ctx, current_spin, this->basis->nrxx, w1, this->rho, this->wfcr); + } - // kinetic energy density - if (XC_Functional::get_func_type() == 3) - { - for (int j = 0; j < 3; j++) - { - setmem_complex_op()(this->wfcr, 0, this->charge->nrxx); - - meta_op()(this->ctx, - ik, - j, - npw, - this->basis->npwk_max, - static_cast(ucell->tpiba), - this->basis->template get_gcar_data(), - this->basis->template get_kvec_c_data(), - &psi(ibnd, 0), - this->wfcr); - - this->basis->recip_to_real(this->ctx, this->wfcr, this->wfcr, ik); - - elecstate_pw_op()(this->ctx, current_spin, this->charge->nrxx, w1, this->kin_r, this->wfcr); + // kinetic energy density + if (XC_Functional::get_func_type () == 3) + { + for (int j = 0; j < 3; j++) + { + setmem_complex_op () (this->wfcr, 0, this->charge->nrxx); + + meta_op () (this->ctx, + ik, + j, + npw, + this->basis->npwk_max, + static_cast (ucell->tpiba), + this->basis->template get_gcar_data (), + this->basis->template get_kvec_c_data (), + &psi (ibnd, 0), + this->wfcr); + + this->basis->recip_to_real (this->ctx, this->wfcr, this->wfcr, ik); + + elecstate_pw_op () (this->ctx, + current_spin, + this->charge->nrxx, + w1, + this->kin_r, + this->wfcr); + } + } } - } } - } } template -void ElecStatePW::cal_becsum(const psi::Psi& psi) +void + ElecStatePW::cal_becsum (const psi::Psi& psi) { const T one{1, 0}; const T zero{0, 0}; - const int npol = psi.get_npol(); - const int npwx = psi.get_nbasis() / npol; - const int nbands = psi.get_nbands() * npol; + const int npol = psi.get_npol (); + const int npwx = psi.get_nbasis () / npol; + const int nbands = psi.get_nbands () * npol; const int nkb = this->ppcell->nkb; - this->vkb = this->ppcell->template get_vkb_data(); + this->vkb = this->ppcell->template get_vkb_data (); T* becp = nullptr; - resmem_complex_op()(becp, nbands * nkb, "ElecState::becp"); + resmem_complex_op () (becp, nbands * nkb, "ElecState::becp"); const int nh_tot = this->ppcell->nhm * (this->ppcell->nhm + 1) / 2; - resmem_var_op()(becsum, nh_tot * ucell->nat * PARAM.inp.nspin, "ElecState::becsum"); - setmem_var_op()(becsum, 0, nh_tot * ucell->nat * PARAM.inp.nspin); - - for (int ik = 0; ik < psi.get_nk(); ++ik) - { - psi.fix_k(ik); - const T* psi_now = psi.get_pointer(); - const int currect_spin = this->klist->isk[ik]; - const int npw = psi.get_current_ngk(); - - // get |beta> - if (this->ppcell->nkb > 0) - { - this->ppcell->getvnl(this->ctx, *ucell,ik, this->vkb); - } + resmem_var_op () (becsum, nh_tot * ucell->nat * PARAM.inp.nspin, "ElecState::becsum"); + setmem_var_op () (becsum, 0, nh_tot * ucell->nat * PARAM.inp.nspin); - // becp = - char transa = 'C'; - char transb = 'N'; - if (nbands == 1) + for (int ik = 0; ik < psi.get_nk (); ++ik) { - int inc = 1; - gemv_op()(transa, - npw, - this->ppcell->nkb, - &one, - this->vkb, - this->ppcell->vkb.nc, - psi_now, - inc, - &zero, - becp, - inc); - } - else - { - gemm_op()(transa, - transb, - this->ppcell->nkb, - nbands, - npw, - &one, - this->vkb, - this->ppcell->vkb.nc, - psi_now, - npwx, - &zero, - becp, - this->ppcell->nkb); - } - Parallel_Reduce::reduce_pool(becp, this->ppcell->nkb * nbands); + psi.fix_k (ik); + const T* psi_now = psi.get_pointer (); + const int currect_spin = this->klist->isk[ik]; + const int npw = psi.get_current_ngk (); - // sum over bands: \sum_i w_i - for (int it = 0; it < ucell->ntype; it++) - { - Atom* atom = &ucell->atoms[it]; - if (atom->ncpp.tvanp) - { - T *auxk1 = nullptr, *auxk2 = nullptr, *aux_gk = nullptr; - resmem_complex_op()(auxk1, nbands * atom->ncpp.nh, "ElecState::auxk1"); - resmem_complex_op()(auxk2, nbands * atom->ncpp.nh, "ElecState::auxk2"); - resmem_complex_op()(aux_gk, - atom->ncpp.nh * atom->ncpp.nh * npol * npol, - "ElecState::aux_gk"); - for (int ia = 0; ia < atom->na; ia++) + // get |beta> + if (this->ppcell->nkb > 0) { - const int iat = ucell->itia2iat(it, ia); - if (PARAM.inp.noncolin) - { - // noncolinear case - } - else - { - for (int ih = 0; ih < atom->ncpp.nh; ih++) - { - const int ikb = this->ppcell->indv_ijkb0[iat] + ih; - for (int ib = 0; ib < nbands; ib++) - { - auxk1[ih * nbands + ib] = becp[ib * this->ppcell->nkb + ikb]; - auxk2[ih * nbands + ib] - = becp[ib * this->ppcell->nkb + ikb] * static_cast(this->wg(ik, ib)); - } - } + this->ppcell->getvnl (this->ctx, *ucell, ik, this->vkb); + } + + // becp = + char transa = 'C'; + char transb = 'N'; + if (nbands == 1) + { + int inc = 1; + gemv_op () (transa, + npw, + this->ppcell->nkb, + &one, + this->vkb, + this->ppcell->vkb.nc, + psi_now, + inc, + &zero, + becp, + inc); + } + else + { + gemm_op () (transa, + transb, + this->ppcell->nkb, + nbands, + npw, + &one, + this->vkb, + this->ppcell->vkb.nc, + psi_now, + npwx, + &zero, + becp, + this->ppcell->nkb); + } + Parallel_Reduce::reduce_pool (becp, this->ppcell->nkb * nbands); - char transa = 'C'; - char transb = 'N'; - gemm_op()(transa, - transb, - atom->ncpp.nh, - atom->ncpp.nh, - nbands, - &one, - auxk1, - nbands, - auxk2, - nbands, - &zero, - aux_gk, - atom->ncpp.nh); - } - - // copy output from GEMM into desired format - if (PARAM.inp.noncolin && !atom->ncpp.has_so) - { - } - else if (PARAM.inp.noncolin && atom->ncpp.has_so) - { - } - else - { - int ijh = 0; - const int index = currect_spin * ucell->nat * nh_tot + iat * nh_tot; - for (int ih = 0; ih < atom->ncpp.nh; ih++) + // sum over bands: \sum_i w_i + for (int it = 0; it < ucell->ntype; it++) + { + Atom* atom = &ucell->atoms[it]; + if (atom->ncpp.tvanp) { - for (int jh = ih; jh < atom->ncpp.nh; jh++) - { - // nondiagonal terms summed and collapsed into a - // single index (matrix is symmetric wrt (ih,jh)) - if (ih == jh) - { - becsum[index + ijh] += std::real(aux_gk[ih * atom->ncpp.nh + jh]); - } - else + T *auxk1 = nullptr, *auxk2 = nullptr, *aux_gk = nullptr; + resmem_complex_op () (auxk1, nbands * atom->ncpp.nh, "ElecState::auxk1"); + resmem_complex_op () (auxk2, nbands * atom->ncpp.nh, "ElecState::auxk2"); + resmem_complex_op () (aux_gk, + atom->ncpp.nh * atom->ncpp.nh * npol * npol, + "ElecState::aux_gk"); + for (int ia = 0; ia < atom->na; ia++) { - becsum[index + ijh] += 2.0 * std::real(aux_gk[ih * atom->ncpp.nh + jh]); + const int iat = ucell->itia2iat (it, ia); + if (PARAM.inp.noncolin) + { + // noncolinear case + } + else + { + for (int ih = 0; ih < atom->ncpp.nh; ih++) + { + const int ikb = this->ppcell->indv_ijkb0[iat] + ih; + for (int ib = 0; ib < nbands; ib++) + { + auxk1[ih * nbands + ib] + = becp[ib * this->ppcell->nkb + ikb]; + auxk2[ih * nbands + ib] + = becp[ib * this->ppcell->nkb + ikb] + * static_cast (this->wg (ik, ib)); + } + } + + char transa = 'C'; + char transb = 'N'; + gemm_op () (transa, + transb, + atom->ncpp.nh, + atom->ncpp.nh, + nbands, + &one, + auxk1, + nbands, + auxk2, + nbands, + &zero, + aux_gk, + atom->ncpp.nh); + } + + // copy output from GEMM into desired format + if (PARAM.inp.noncolin && !atom->ncpp.has_so) + { + } + else if (PARAM.inp.noncolin && atom->ncpp.has_so) + { + } + else + { + int ijh = 0; + const int index = currect_spin * ucell->nat * nh_tot + iat * nh_tot; + for (int ih = 0; ih < atom->ncpp.nh; ih++) + { + for (int jh = ih; jh < atom->ncpp.nh; jh++) + { + // nondiagonal terms summed and collapsed into a + // single index (matrix is symmetric wrt (ih,jh)) + if (ih == jh) + { + becsum[index + ijh] + += std::real (aux_gk[ih * atom->ncpp.nh + jh]); + } + else + { + becsum[index + ijh] + += 2.0 + * std::real ( + aux_gk[ih * atom->ncpp.nh + jh]); + } + ijh++; + } + } + } } - ijh++; - } + delmem_complex_op () (auxk1); + delmem_complex_op () (auxk2); + delmem_complex_op () (aux_gk); } - } } - delmem_complex_op()(auxk1); - delmem_complex_op()(auxk2); - delmem_complex_op()(aux_gk); - } } - } - delmem_complex_op()(becp); + delmem_complex_op () (becp); } template -void ElecStatePW::add_usrho(const psi::Psi& psi) +void + ElecStatePW::add_usrho (const psi::Psi& psi) { if (PARAM.globalv.use_uspp) - { - this->cal_becsum(psi); - } + { + this->cal_becsum (psi); + } // transform soft charge to recip space using smooth grids if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) - { - for (int is = 0; is < PARAM.inp.nspin; is++) { - this->rhopw_smooth->real2recip(this->rho[is], this->rhog[is]); + for (int is = 0; is < PARAM.inp.nspin; is++) + { + this->rhopw_smooth->real2recip (this->rho[is], this->rhog[is]); + } } - } // \sum_lm Q_lm(r) \sum_i w_i // add to the charge density in reciprocal space the part which is due to the US augmentation. if (PARAM.globalv.use_uspp) - { - this->addusdens_g(becsum, rhog); - } + { + this->addusdens_g (becsum, rhog); + } // transform back to real space using dense grids if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) - { - for (int is = 0; is < PARAM.inp.nspin; is++) { - this->charge->rhopw->recip2real(this->rhog[is], this->rho[is]); + for (int is = 0; is < PARAM.inp.nspin; is++) + { + this->charge->rhopw->recip2real (this->rhog[is], this->rho[is]); + } } - } } template -void ElecStatePW::addusdens_g(const Real* becsum, T** rhog) +void + ElecStatePW::addusdens_g (const Real* becsum, T** rhog) { const T one{1, 0}; const T zero{0, 0}; @@ -465,93 +486,93 @@ void ElecStatePW::addusdens_g(const Real* becsum, T** rhog) const std::complex ci_tpi = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI; Real* qmod = nullptr; - resmem_var_op()(qmod, npw, "ElecState::qmod"); + resmem_var_op () (qmod, npw, "ElecState::qmod"); T* qgm = nullptr; - resmem_complex_op()(qgm, npw, "ElecState::qgm"); + resmem_complex_op () (qgm, npw, "ElecState::qgm"); Real* ylmk0 = nullptr; - resmem_var_op()(ylmk0, npw * lmaxq * lmaxq, "ElecState::ylmk0"); - Real* g = reinterpret_cast(this->charge->rhopw->gcar); + resmem_var_op () (ylmk0, npw * lmaxq * lmaxq, "ElecState::ylmk0"); + Real* g = reinterpret_cast (this->charge->rhopw->gcar); - ModuleBase::YlmReal::Ylm_Real(this->ctx, lmaxq * lmaxq, npw, g, ylmk0); + ModuleBase::YlmReal::Ylm_Real (this->ctx, lmaxq * lmaxq, npw, g, ylmk0); for (int ig = 0; ig < npw; ig++) - { - qmod[ig] = static_cast(this->charge->rhopw->gcar[ig].norm() * ucell->tpiba); - } + { + qmod[ig] = static_cast (this->charge->rhopw->gcar[ig].norm () * ucell->tpiba); + } for (int it = 0; it < ucell->ntype; it++) - { - Atom* atom = &ucell->atoms[it]; - if (atom->ncpp.tvanp) { - // nij = max number of (ih,jh) pairs per atom type nt - const int nij = atom->ncpp.nh * (atom->ncpp.nh + 1) / 2; - - T *skk = nullptr, *aux2 = nullptr, *tbecsum = nullptr; - resmem_complex_op()(skk, atom->na * npw, "ElecState::skk"); - resmem_complex_op()(aux2, nij * npw, "ElecState::aux2"); - resmem_complex_op()(tbecsum, PARAM.inp.nspin * atom->na * nij, "ElecState::tbecsum"); - for (int ia = 0; ia < atom->na; ia++) - { - const int iat = ucell->itia2iat(it, ia); - for (int is = 0; is < PARAM.inp.nspin; is++) - { - for (int ij = 0; ij < nij; ij++) - { - tbecsum[is * atom->na * nij + ia * nij + ij] - = static_cast(becsum[is * ucell->nat * nh_tot + iat * nh_tot + ij]); - } - } - for (int ig = 0; ig < npw; ig++) + Atom* atom = &ucell->atoms[it]; + if (atom->ncpp.tvanp) { - double arg = this->charge->rhopw->gcar[ig] * atom->tau[ia]; - skk[ia * npw + ig] = static_cast(ModuleBase::libm::exp(ci_tpi * arg)); - } - } + // nij = max number of (ih,jh) pairs per atom type nt + const int nij = atom->ncpp.nh * (atom->ncpp.nh + 1) / 2; + + T *skk = nullptr, *aux2 = nullptr, *tbecsum = nullptr; + resmem_complex_op () (skk, atom->na * npw, "ElecState::skk"); + resmem_complex_op () (aux2, nij * npw, "ElecState::aux2"); + resmem_complex_op () (tbecsum, PARAM.inp.nspin * atom->na * nij, "ElecState::tbecsum"); + for (int ia = 0; ia < atom->na; ia++) + { + const int iat = ucell->itia2iat (it, ia); + for (int is = 0; is < PARAM.inp.nspin; is++) + { + for (int ij = 0; ij < nij; ij++) + { + tbecsum[is * atom->na * nij + ia * nij + ij] + = static_cast (becsum[is * ucell->nat * nh_tot + iat * nh_tot + ij]); + } + } + for (int ig = 0; ig < npw; ig++) + { + double arg = this->charge->rhopw->gcar[ig] * atom->tau[ia]; + skk[ia * npw + ig] = static_cast (ModuleBase::libm::exp (ci_tpi * arg)); + } + } - for (int is = 0; is < PARAM.inp.nspin; is++) - { - // sum over atoms - char transa = 'N'; - char transb = 'T'; - gemm_op()(transa, - transb, - npw, - nij, - atom->na, - &one, - skk, - npw, - &tbecsum[is * atom->na * nij], - nij, - &zero, - aux2, - npw); - - // sum over lm indices of Q_{lm} - int ijh = 0; - for (int ih = 0; ih < atom->ncpp.nh; ih++) - { - for (int jh = ih; jh < atom->ncpp.nh; jh++) - { - this->ppcell->radial_fft_q(this->ctx, npw, ih, jh, it, qmod, ylmk0, qgm); - for (int ig = 0; ig < npw; ig++) + for (int is = 0; is < PARAM.inp.nspin; is++) { - rhog[is][ig] += qgm[ig] * aux2[ijh * npw + ig]; + // sum over atoms + char transa = 'N'; + char transb = 'T'; + gemm_op () (transa, + transb, + npw, + nij, + atom->na, + &one, + skk, + npw, + &tbecsum[is * atom->na * nij], + nij, + &zero, + aux2, + npw); + + // sum over lm indices of Q_{lm} + int ijh = 0; + for (int ih = 0; ih < atom->ncpp.nh; ih++) + { + for (int jh = ih; jh < atom->ncpp.nh; jh++) + { + this->ppcell->radial_fft_q (this->ctx, npw, ih, jh, it, qmod, ylmk0, qgm); + for (int ig = 0; ig < npw; ig++) + { + rhog[is][ig] += qgm[ig] * aux2[ijh * npw + ig]; + } + ijh++; + } + } } - ijh++; - } + delmem_complex_op () (skk); + delmem_complex_op () (aux2); + delmem_complex_op () (tbecsum); } - } - delmem_complex_op()(skk); - delmem_complex_op()(aux2); - delmem_complex_op()(tbecsum); } - } - delmem_var_op()(qmod); - delmem_complex_op()(qgm); - delmem_var_op()(ylmk0); + delmem_var_op () (qmod); + delmem_complex_op () (qgm); + delmem_var_op () (ylmk0); } template class ElecStatePW, base_device::DEVICE_CPU>; @@ -559,6 +580,6 @@ template class ElecStatePW, base_device::DEVICE_CPU>; #if ((defined __CUDA) || (defined __ROCM)) template class ElecStatePW, base_device::DEVICE_GPU>; template class ElecStatePW, base_device::DEVICE_GPU>; -#endif +#endif } // namespace elecstate diff --git a/source/source_estate/elecstate_pw.h b/source/source_estate/elecstate_pw.h index e8e4b95af36..331b9630614 100644 --- a/source/source_estate/elecstate_pw.h +++ b/source/source_estate/elecstate_pw.h @@ -21,28 +21,28 @@ class ElecStatePW : public ElecState using Real = typename GetTypeReal::type; public: - ElecStatePW(ModulePW::PW_Basis_K* wfc_basis_in, - Charge* chr_in, - K_Vectors* pkv_in, - UnitCell* ucell_in, - pseudopot_cell_vnl* ppcell_in, - ModulePW::PW_Basis* rhopw_in, - ModulePW::PW_Basis_Big* bigpw_in); + ElecStatePW (ModulePW::PW_Basis_K* wfc_basis_in, + Charge* chr_in, + K_Vectors* pkv_in, + UnitCell* ucell_in, + pseudopot_cell_vnl* ppcell_in, + ModulePW::PW_Basis* rhopw_in, + ModulePW::PW_Basis_Big* bigpw_in); - ~ElecStatePW(); + ~ElecStatePW (); //! interface for HSolver to calculate rho from Psi - virtual void psiToRho(const psi::Psi& psi); + virtual void psiToRho (const psi::Psi& psi); - virtual void cal_tau(const psi::Psi& psi); + virtual void cal_tau (const psi::Psi& psi); //! calculate becsum for uspp - void cal_becsum(const psi::Psi& psi); + void cal_becsum (const psi::Psi& psi); Real* becsum = nullptr; //! init rho_data and kin_r_data - void init_rho_data(); + void init_rho_data (); Real** rho = nullptr; // [Device] [spin][nrxx] rho T** rhog = nullptr; // [Device] [spin][nrxx] rhog Real** kin_r = nullptr; // [Device] [spin][nrxx] kin_r @@ -50,7 +50,6 @@ class ElecStatePW : public ElecState ModulePW::PW_Basis_K* basis = nullptr; protected: - ModulePW::PW_Basis* rhopw_smooth = nullptr; UnitCell* ucell = nullptr; @@ -59,22 +58,22 @@ class ElecStatePW : public ElecState //! calculate electronic charge density on grid points or density matrix in real space //! the consequence charge density rho saved into rho_out, preparing for charge mixing. - void updateRhoK(const psi::Psi& psi); // override; - + void updateRhoK (const psi::Psi& psi); // override; + //! sum over all pools for rho and ebands - void parallelK(); - + void parallelK (); + //! calcualte rho for each k - void rhoBandK(const psi::Psi& psi); + void rhoBandK (const psi::Psi& psi); //! add to the charge density in reciprocal space the part which is due to the US augmentation. - void add_usrho(const psi::Psi& psi); + void add_usrho (const psi::Psi& psi); //! Non-local pseudopotentials //! \sum_lm Q_lm(r) \sum_i w_i - void addusdens_g(const Real* becsum, T** rhog); + void addusdens_g (const Real* becsum, T** rhog); - Device * ctx = {}; + Device* ctx = {}; bool init_rho = false; @@ -83,7 +82,7 @@ class ElecStatePW : public ElecState Real* rho_data = nullptr; T* rhog_data = nullptr; Real* kin_r_data = nullptr; - T* wfcr = nullptr; + T* wfcr = nullptr; T* wfcr_another_spin = nullptr; private: diff --git a/source/source_estate/elecstate_pw_cal_tau.cpp b/source/source_estate/elecstate_pw_cal_tau.cpp index a59990600a3..c620a89f799 100644 --- a/source/source_estate/elecstate_pw_cal_tau.cpp +++ b/source/source_estate/elecstate_pw_cal_tau.cpp @@ -1,63 +1,72 @@ #include "elecstate_pw.h" -namespace elecstate { - -template -void ElecStatePW::cal_tau(const psi::Psi& psi) +namespace elecstate { - ModuleBase::TITLE("ElecStatePW", "cal_tau"); - for(int is=0; iskin_r[is], 0, this->charge->nrxx); - } - for (int ik = 0; ik < psi.get_nk(); ++ik) - { - psi.fix_k(ik); - int npw = psi.get_current_ngk(); - int current_spin = 0; - if (PARAM.inp.nspin == 2) +template +void + ElecStatePW::cal_tau (const psi::Psi& psi) +{ + ModuleBase::TITLE ("ElecStatePW", "cal_tau"); + for (int is = 0; is < PARAM.inp.nspin; is++) { - current_spin = this->klist->isk[ik]; + setmem_var_op () (this->kin_r[is], 0, this->charge->nrxx); } - int nbands = psi.get_nbands(); - for (int ibnd = 0; ibnd < nbands; ibnd++) + + for (int ik = 0; ik < psi.get_nk (); ++ik) { - this->basis->recip_to_real(this->ctx, &psi(ibnd,0), this->wfcr, ik); + psi.fix_k (ik); + int npw = psi.get_current_ngk (); + int current_spin = 0; + if (PARAM.inp.nspin == 2) + { + current_spin = this->klist->isk[ik]; + } + int nbands = psi.get_nbands (); + for (int ibnd = 0; ibnd < nbands; ibnd++) + { + this->basis->recip_to_real (this->ctx, &psi (ibnd, 0), this->wfcr, ik); - const auto w1 = static_cast(this->wg(ik, ibnd) / ucell->omega); + const auto w1 = static_cast (this->wg (ik, ibnd) / ucell->omega); - // kinetic energy density - for (int j = 0; j < 3; j++) - { - setmem_complex_op()(this->wfcr, 0, this->charge->nrxx); + // kinetic energy density + for (int j = 0; j < 3; j++) + { + setmem_complex_op () (this->wfcr, 0, this->charge->nrxx); - meta_op()(this->ctx, - ik, - j, - npw, - this->basis->npwk_max, - static_cast(ucell->tpiba), - this->basis->template get_gcar_data(), - this->basis->template get_kvec_c_data(), - &psi(ibnd, 0), - this->wfcr); + meta_op () (this->ctx, + ik, + j, + npw, + this->basis->npwk_max, + static_cast (ucell->tpiba), + this->basis->template get_gcar_data (), + this->basis->template get_kvec_c_data (), + &psi (ibnd, 0), + this->wfcr); - this->basis->recip_to_real(this->ctx, this->wfcr, this->wfcr, ik); + this->basis->recip_to_real (this->ctx, this->wfcr, this->wfcr, ik); - elecstate_pw_op()(this->ctx, current_spin, this->charge->nrxx, w1, this->kin_r, this->wfcr); - } + elecstate_pw_op () (this->ctx, + current_spin, + this->charge->nrxx, + w1, + this->kin_r, + this->wfcr); + } + } } - } - if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { - for (int ii = 0; ii < PARAM.inp.nspin; ii++) { - castmem_var_d2h_op()(this->charge->kin_r[ii], this->kin_r[ii], this->charge->nrxx); + if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") + { + for (int ii = 0; ii < PARAM.inp.nspin; ii++) + { + castmem_var_d2h_op () (this->charge->kin_r[ii], this->kin_r[ii], this->charge->nrxx); + } } - } #ifdef __MPI - this->charge->kin_r_mpi(); + this->charge->kin_r_mpi (); #endif - ModuleBase::TITLE("ElecStatePW", "cal_tau"); + ModuleBase::TITLE ("ElecStatePW", "cal_tau"); } template class ElecStatePW, base_device::DEVICE_CPU>; @@ -65,5 +74,5 @@ template class ElecStatePW, base_device::DEVICE_CPU>; #if ((defined __CUDA) || (defined __ROCM)) template class ElecStatePW, base_device::DEVICE_GPU>; template class ElecStatePW, base_device::DEVICE_GPU>; -#endif +#endif } // namespace elecstate diff --git a/source/source_estate/elecstate_pw_sdft.cpp b/source/source_estate/elecstate_pw_sdft.cpp index f4061f733a5..666f677ddae 100644 --- a/source/source_estate/elecstate_pw_sdft.cpp +++ b/source/source_estate/elecstate_pw_sdft.cpp @@ -9,31 +9,34 @@ namespace elecstate { template -void ElecStatePW_SDFT::psiToRho(const psi::Psi& psi) +void + ElecStatePW_SDFT::psiToRho (const psi::Psi& psi) { - ModuleBase::TITLE(this->classname, "psiToRho"); - ModuleBase::timer::start(this->classname, "psiToRho"); + ModuleBase::TITLE (this->classname, "psiToRho"); + ModuleBase::timer::start (this->classname, "psiToRho"); const int nspin = PARAM.inp.nspin; for (int is = 0; is < nspin; is++) - { - setmem_var_op()(this->rho[is], 0, this->charge->nrxx); - } + { + setmem_var_op () (this->rho[is], 0, this->charge->nrxx); + } if (PARAM.globalv.ks_run) - { - for (int ik = 0; ik < psi.get_nk(); ++ik) { - psi.fix_k(ik); - this->updateRhoK(psi); - } - if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { - for (int ii = 0; ii < nspin; ii++) { - castmem_var_d2h_op()(this->charge->rho[ii], this->rho[ii], this->charge->nrxx); - } + for (int ik = 0; ik < psi.get_nk (); ++ik) + { + psi.fix_k (ik); + this->updateRhoK (psi); + } + if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") + { + for (int ii = 0; ii < nspin; ii++) + { + castmem_var_d2h_op () (this->charge->rho[ii], this->rho[ii], this->charge->nrxx); + } + } + this->parallelK (); } - this->parallelK(); - } - ModuleBase::timer::end(this->classname, "psiToRho"); + ModuleBase::timer::end (this->classname, "psiToRho"); return; } diff --git a/source/source_estate/elecstate_pw_sdft.h b/source/source_estate/elecstate_pw_sdft.h index 0ffa00efc48..92589d5bb75 100644 --- a/source/source_estate/elecstate_pw_sdft.h +++ b/source/source_estate/elecstate_pw_sdft.h @@ -7,19 +7,19 @@ template class ElecStatePW_SDFT : public ElecStatePW { public: - ElecStatePW_SDFT(ModulePW::PW_Basis_K* wfc_basis_in, - Charge* chr_in, - K_Vectors* pkv_in, - UnitCell* ucell_in, - pseudopot_cell_vnl* ppcell_in, - ModulePW::PW_Basis* rhopw_in, - ModulePW::PW_Basis_Big* bigpw_in) - : ElecStatePW(wfc_basis_in, chr_in, pkv_in, ucell_in, ppcell_in, rhopw_in, bigpw_in) + ElecStatePW_SDFT (ModulePW::PW_Basis_K* wfc_basis_in, + Charge* chr_in, + K_Vectors* pkv_in, + UnitCell* ucell_in, + pseudopot_cell_vnl* ppcell_in, + ModulePW::PW_Basis* rhopw_in, + ModulePW::PW_Basis_Big* bigpw_in) + : ElecStatePW (wfc_basis_in, chr_in, pkv_in, ucell_in, ppcell_in, rhopw_in, bigpw_in) { this->classname = "ElecStatePW_SDFT"; } - virtual void psiToRho(const psi::Psi& psi) override; + virtual void psiToRho (const psi::Psi& psi) override; + private: using Real = typename GetTypeReal::type; using setmem_var_op = base_device::memory::set_memory_op; diff --git a/source/source_estate/elecstate_tools.cpp b/source/source_estate/elecstate_tools.cpp index 3076c6b437e..4a0f34041d4 100644 --- a/source/source_estate/elecstate_tools.cpp +++ b/source/source_estate/elecstate_tools.cpp @@ -3,181 +3,162 @@ #include "source_base/parallel_reduce.h" namespace elecstate { - void calEBand(const ModuleBase::matrix& ekb,const ModuleBase::matrix& wg,fenergy& f_en) - { - ModuleBase::TITLE("ElecState", "calEBand"); - // calculate ebands using wg and ekb - double eband = 0.0; - #ifdef _OPENMP - #pragma omp parallel for collapse(2) reduction(+ : eband) - #endif - for (int ik = 0; ik < ekb.nr; ++ik) +void + calEBand (const ModuleBase::matrix& ekb, const ModuleBase::matrix& wg, fenergy& f_en) +{ + ModuleBase::TITLE ("ElecState", "calEBand"); + // calculate ebands using wg and ekb + double eband = 0.0; +#ifdef _OPENMP +#pragma omp parallel for collapse(2) reduction(+ : eband) +#endif + for (int ik = 0; ik < ekb.nr; ++ik) { for (int ibnd = 0; ibnd < ekb.nc; ibnd++) - { - eband += ekb(ik, ibnd) * wg(ik, ibnd); - } + { + eband += ekb (ik, ibnd) * wg (ik, ibnd); + } } - f_en.eband = eband; + f_en.eband = eband; - #ifdef __MPI - const int npool = GlobalV::KPAR * PARAM.inp.bndpar; - Parallel_Reduce::reduce_double_allpool(npool, GlobalV::NPROC_IN_POOL, f_en.eband); - #endif - return; - } +#ifdef __MPI + const int npool = GlobalV::KPAR * PARAM.inp.bndpar; + Parallel_Reduce::reduce_double_allpool (npool, GlobalV::NPROC_IN_POOL, f_en.eband); +#endif + return; +} - void calculate_weights(const ModuleBase::matrix& ekb, - ModuleBase::matrix& wg, - const K_Vectors* klist, - Efermi& eferm, - fenergy& f_en, - std::vector& nelec_spin, - const bool skip_weights=false) - { - ModuleBase::TITLE("ElecState", "calculate_weights"); - if (skip_weights==true) return; - - const int nbands = ekb.nc; - const int nks = ekb.nr; - if (!(Occupy::use_gaussian_broadening || Occupy::fixed_occupations)) +void + calculate_weights (const ModuleBase::matrix& ekb, + ModuleBase::matrix& wg, + const K_Vectors* klist, + Efermi& eferm, + fenergy& f_en, + std::vector& nelec_spin, + const bool skip_weights = false) +{ + ModuleBase::TITLE ("ElecState", "calculate_weights"); + if (skip_weights == true) + { + return; + } + + const int nbands = ekb.nc; + const int nks = ekb.nr; + if (!(Occupy::use_gaussian_broadening || Occupy::fixed_occupations)) { if (PARAM.globalv.two_fermi) - { - Occupy::iweights(nks, - klist->wk, - nbands, - nelec_spin[0], - ekb, - eferm.ef_up, - wg, - 0, - klist->isk); - Occupy::iweights(nks, - klist->wk, - nbands, - nelec_spin[1], - ekb, - eferm.ef_dw, - wg, - 1, - klist->isk); - // ef = ( ef_up + ef_dw ) / 2.0_dp need??? mohan add 2012-04-16 - } + { + Occupy::iweights (nks, klist->wk, nbands, nelec_spin[0], ekb, eferm.ef_up, wg, 0, klist->isk); + Occupy::iweights (nks, klist->wk, nbands, nelec_spin[1], ekb, eferm.ef_dw, wg, 1, klist->isk); + // ef = ( ef_up + ef_dw ) / 2.0_dp need??? mohan add 2012-04-16 + } else - { - // -1 means don't need to consider spin. - Occupy::iweights(nks, - klist->wk, - nbands, - PARAM.inp.nelec, - ekb, - eferm.ef, - wg, - -1, - klist->isk); - } + { + // -1 means don't need to consider spin. + Occupy::iweights (nks, klist->wk, nbands, PARAM.inp.nelec, ekb, eferm.ef, wg, -1, klist->isk); + } } - else if (Occupy::use_gaussian_broadening) + else if (Occupy::use_gaussian_broadening) { if (PARAM.globalv.two_fermi) - { - double demet_up = 0.0; - double demet_dw = 0.0; - Occupy::gweights(nks, - klist->wk, - nbands, - nelec_spin[0], - Occupy::gaussian_parameter, - Occupy::gaussian_type, - ekb, - eferm.ef_up, - demet_up, - wg, - 0, - klist->isk); - Occupy::gweights(nks, - klist->wk, - nbands, - nelec_spin[1], - Occupy::gaussian_parameter, - Occupy::gaussian_type, - ekb, - eferm.ef_dw, - demet_dw, - wg, - 1, - klist->isk); - f_en.demet = demet_up + demet_dw; - } + { + double demet_up = 0.0; + double demet_dw = 0.0; + Occupy::gweights (nks, + klist->wk, + nbands, + nelec_spin[0], + Occupy::gaussian_parameter, + Occupy::gaussian_type, + ekb, + eferm.ef_up, + demet_up, + wg, + 0, + klist->isk); + Occupy::gweights (nks, + klist->wk, + nbands, + nelec_spin[1], + Occupy::gaussian_parameter, + Occupy::gaussian_type, + ekb, + eferm.ef_dw, + demet_dw, + wg, + 1, + klist->isk); + f_en.demet = demet_up + demet_dw; + } else - { - // -1 means is no related to spin. - Occupy::gweights(nks, - klist->wk, - nbands, - PARAM.inp.nelec, - Occupy::gaussian_parameter, - Occupy::gaussian_type, - ekb, - eferm.ef, - f_en.demet, - wg, - -1, - klist->isk); - } - #ifdef __MPI + { + // -1 means is no related to spin. + Occupy::gweights (nks, + klist->wk, + nbands, + PARAM.inp.nelec, + Occupy::gaussian_parameter, + Occupy::gaussian_type, + ekb, + eferm.ef, + f_en.demet, + wg, + -1, + klist->isk); + } +#ifdef __MPI const int npool = GlobalV::KPAR * PARAM.inp.bndpar; - Parallel_Reduce::reduce_double_allpool(npool, GlobalV::NPROC_IN_POOL, f_en.demet); - #endif + Parallel_Reduce::reduce_double_allpool (npool, GlobalV::NPROC_IN_POOL, f_en.demet); +#endif } - else if (Occupy::fixed_occupations) + else if (Occupy::fixed_occupations) { - ModuleBase::WARNING_QUIT("calculate_weights", "other occupations, not implemented"); + ModuleBase::WARNING_QUIT ("calculate_weights", "other occupations, not implemented"); } - return; - } + return; +} - void fixed_weights(const std::vector& ocp_kb, - const int& nbands, - const double& nelec, - const K_Vectors* klist, - ModuleBase::matrix& wg, - bool& skip_weights) - { - assert(nbands > 0); - assert(nelec > 0.0); +void + fixed_weights (const std::vector& ocp_kb, + const int& nbands, + const double& nelec, + const K_Vectors* klist, + ModuleBase::matrix& wg, + bool& skip_weights) +{ + assert (nbands > 0); + assert (nelec > 0.0); - const double ne_thr = 1.0e-5; + const double ne_thr = 1.0e-5; - const int num = klist->get_nks() * nbands; - if (num != ocp_kb.size()) + const int num = klist->get_nks () * nbands; + if (num != ocp_kb.size ()) { - ModuleBase::WARNING_QUIT("ElecState::fixed_weights", - "size of occupation array is wrong , please check ocp_set"); + ModuleBase::WARNING_QUIT ("ElecState::fixed_weights", + "size of occupation array is wrong , please check ocp_set"); } - double num_elec = 0.0; - for (int i = 0; i < ocp_kb.size(); ++i) + double num_elec = 0.0; + for (int i = 0; i < ocp_kb.size (); ++i) { num_elec += ocp_kb[i]; } - if (std::abs(num_elec - nelec) > ne_thr) + if (std::abs (num_elec - nelec) > ne_thr) { - ModuleBase::WARNING_QUIT("ElecState::fixed_weights", - "total number of occupations is wrong , please check ocp_set"); + ModuleBase::WARNING_QUIT ("ElecState::fixed_weights", + "total number of occupations is wrong , please check ocp_set"); } - for (int ik = 0; ik < wg.nr; ++ik) + for (int ik = 0; ik < wg.nr; ++ik) { for (int ib = 0; ib < wg.nc; ++ib) - { - wg(ik, ib) = ocp_kb[ik * wg.nc + ib]; - } + { + wg (ik, ib) = ocp_kb[ik * wg.nc + ib]; + } } - skip_weights = true; - - } + skip_weights = true; } +} // namespace elecstate diff --git a/source/source_estate/elecstate_tools.h b/source/source_estate/elecstate_tools.h index 1486c414526..2778723778e 100644 --- a/source/source_estate/elecstate_tools.h +++ b/source/source_estate/elecstate_tools.h @@ -5,22 +5,22 @@ namespace elecstate { -void calEBand(const ModuleBase::matrix& ekb, const ModuleBase::matrix& wg, fenergy& f_en); +void calEBand (const ModuleBase::matrix& ekb, const ModuleBase::matrix& wg, fenergy& f_en); -void calculate_weights(const ModuleBase::matrix& ekb, - ModuleBase::matrix& wg, - const K_Vectors* klist, - Efermi& eferm, - fenergy& f_en, - std::vector& nelec_spin, - const bool skip_weights); +void calculate_weights (const ModuleBase::matrix& ekb, + ModuleBase::matrix& wg, + const K_Vectors* klist, + Efermi& eferm, + fenergy& f_en, + std::vector& nelec_spin, + const bool skip_weights); -void fixed_weights(const std::vector& ocp_kb, - const int& nbands, - const double& nelec, - const K_Vectors* klist, - ModuleBase::matrix& wg, - bool& skip_weights); +void fixed_weights (const std::vector& ocp_kb, + const int& nbands, + const double& nelec, + const K_Vectors* klist, + ModuleBase::matrix& wg, + bool& skip_weights); } // namespace elecstate #endif diff --git a/source/source_estate/fp_energy.cpp b/source/source_estate/fp_energy.cpp index a8d3124b4f7..3b7cb2d4b03 100644 --- a/source/source_estate/fp_energy.cpp +++ b/source/source_estate/fp_energy.cpp @@ -3,7 +3,6 @@ #include "source_io/module_parameter/parameter.h" #include "source_base/global_variable.h" - #include "source_base/tool_quit.h" #include @@ -13,23 +12,26 @@ namespace elecstate { /// @brief calculate etot -double fenergy::calculate_etot() +double + fenergy::calculate_etot () { - etot = eband + deband + (etxc - etxcc) + ewald_energy + hartree_energy + demet + descf + exx + efield - + gatefield + evdw + esol_el + esol_cav + edftu + edeepks_scf + escon + ml_exx; + etot = eband + deband + (etxc - etxcc) + ewald_energy + hartree_energy + demet + descf + exx + efield + gatefield + + evdw + esol_el + esol_cav + edftu + edeepks_scf + escon + ml_exx; return etot; } /// @brief calculate etot_harris -double fenergy::calculate_harris() +double + fenergy::calculate_harris () { - etot_harris = eband + deband_harris + (etxc - etxcc) + ewald_energy + hartree_energy + demet + descf + exx - + efield + gatefield + evdw + esol_el + esol_cav + edftu + edeepks_scf + escon + ml_exx; + etot_harris = eband + deband_harris + (etxc - etxcc) + ewald_energy + hartree_energy + demet + descf + exx + efield + + gatefield + evdw + esol_el + esol_cav + edftu + edeepks_scf + escon + ml_exx; return etot_harris; } /// @brief set all energies to zero -void fenergy::clear_all() +void + fenergy::clear_all () { etot = etot_old = eband = deband = etxc = etxcc = vtxc = ewald_energy = hartree_energy = demet = descf = exx = efield = gatefield = evdw = etot_harris = deband_harris = esol_el = esol_cav = edftu = edeepks_scf = escon @@ -37,10 +39,11 @@ void fenergy::clear_all() } /// @brief print all energies -void fenergy::print_all() const +void + fenergy::print_all () const { - std::cout << std::resetiosflags(std::ios::scientific) << std::endl; - std::cout << std::setprecision(16) << std::endl; + std::cout << std::resetiosflags (std::ios::scientific) << std::endl; + std::cout << std::setprecision (16) << std::endl; std::cout << " eband=" << eband << std::endl; std::cout << " deband=" << deband << std::endl; std::cout << " etxc-etxcc=" << etxc - etxcc << std::endl; @@ -65,63 +68,66 @@ void fenergy::print_all() const /// @brief set Efermi of a specific spin /// @param is SPIN /// @param ef_in fermi(is) -void Efermi::set_efval(const int& is, const double& ef_in) +void + Efermi::set_efval (const int& is, const double& ef_in) { if (!two_efermi) - { - this->ef = ef_in; - } + { + this->ef = ef_in; + } else if (is == 0) - { - this->ef_up = ef_in; - } + { + this->ef_up = ef_in; + } else if (is == 1) - { - this->ef_dw = ef_in; - } + { + this->ef_dw = ef_in; + } else - { - ModuleBase::WARNING_QUIT("energy", "Please check NSPIN when TWO_EFERMI is true"); - __builtin_unreachable(); - } + { + ModuleBase::WARNING_QUIT ("energy", "Please check NSPIN when TWO_EFERMI is true"); + __builtin_unreachable (); + } } /// @brief get the value of fermi of a specific spin /// @param is SPIN /// @return value of fermi(is) -double Efermi::get_efval(const int& is) const +double + Efermi::get_efval (const int& is) const { if (!two_efermi) - { - return this->ef; - } + { + return this->ef; + } else if (is == 0) - { - return this->ef_up; - } + { + return this->ef_up; + } else if (is == 1) - { - return this->ef_dw; - } + { + return this->ef_dw; + } else - { - ModuleBase::WARNING_QUIT("energy", "Please check NSPIN when TWO_EFERMI is true"); - __builtin_unreachable(); - } + { + ModuleBase::WARNING_QUIT ("energy", "Please check NSPIN when TWO_EFERMI is true"); + __builtin_unreachable (); + } } /// @brief get all fermi energies for all spins /// @return all fermi energies for all spins -std::vector Efermi::get_all_ef() const +std::vector + Efermi::get_all_ef () const { if (two_efermi) - { - return {ef_up, ef_dw}; - } + { + return {ef_up, ef_dw}; + } else - { - return {ef, ef}; // For NSPIN=1, ef_up=ef_dw=ef - } + { + return {ef, ef}; // For NSPIN=1, ef_up=ef_dw=ef + } } } // namespace elecstate diff --git a/source/source_estate/fp_energy.h b/source/source_estate/fp_energy.h index 3f37ca26563..978ec96ce1d 100644 --- a/source/source_estate/fp_energy.h +++ b/source/source_estate/fp_energy.h @@ -50,10 +50,10 @@ struct fenergy double ekinetic = 0.0; /// kinetic energy, used in OFDFT double e_local_pp = 0.0; /// ion-electron interaction energy contributed by local pp, used in OFDFT - double calculate_etot(); - double calculate_harris(); - void clear_all(); - void print_all() const; + double calculate_etot (); + double calculate_harris (); + void clear_all (); + void print_all () const; }; /** @@ -62,13 +62,13 @@ struct fenergy */ struct Efermi { - double ef = 0.0; ///< Fermi energy - double ef_up = 0.0; ///< spin up Fermi energy - double ef_dw = 0.0; ///< spin down Fermi energy - bool two_efermi = false; - void set_efval(const int& is, const double& ef_in); - double get_efval(const int& is) const; - std::vector get_all_ef() const; + double ef = 0.0; ///< Fermi energy + double ef_up = 0.0; ///< spin up Fermi energy + double ef_dw = 0.0; ///< spin down Fermi energy + bool two_efermi = false; + void set_efval (const int& is, const double& ef_in); + double get_efval (const int& is) const; + std::vector get_all_ef () const; }; } // namespace elecstate diff --git a/source/source_estate/kernels/cuda/elecstate_op.cu b/source/source_estate/kernels/cuda/elecstate_op.cu index 4597a1f1ad8..300cfe96949 100644 --- a/source/source_estate/kernels/cuda/elecstate_op.cu +++ b/source/source_estate/kernels/cuda/elecstate_op.cu @@ -6,76 +6,85 @@ #define THREADS_PER_BLOCK 256 -namespace elecstate { +namespace elecstate +{ -template -__global__ void elecstate_pw( - const int spin, - const int nrxx, - const FPTYPE w1, - FPTYPE* rho, - const thrust::complex* wfcr) +template +__global__ void + elecstate_pw (const int spin, const int nrxx, const FPTYPE w1, FPTYPE* rho, const thrust::complex* wfcr) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nrxx) {return;} - rho[spin * nrxx + idx] += w1 * norm(wfcr[idx]); + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= nrxx) + { + return; + } + rho[spin * nrxx + idx] += w1 * norm (wfcr[idx]); } -template -__global__ void elecstate_pw( - const bool DOMAG, - const bool DOMAG_Z, - const int nrxx, - const FPTYPE w1, - FPTYPE* rho, - const thrust::complex* wfcr, - const thrust::complex* wfcr_another_spin) +template +__global__ void + elecstate_pw (const bool DOMAG, + const bool DOMAG_Z, + const int nrxx, + const FPTYPE w1, + FPTYPE* rho, + const thrust::complex* wfcr, + const thrust::complex* wfcr_another_spin) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nrxx) {return;} - rho[0 * nrxx + idx] += w1 * (norm(wfcr[idx]) + norm(wfcr_another_spin[idx])); + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= nrxx) + { + return; + } + rho[0 * nrxx + idx] += w1 * (norm (wfcr[idx]) + norm (wfcr_another_spin[idx])); - if (DOMAG) { - rho[1 * nrxx + idx] += w1 * 2.0 - * (wfcr[idx].real() * wfcr_another_spin[idx].real() - + wfcr[idx].imag() * wfcr_another_spin[idx].imag()); - rho[2 * nrxx + idx] += w1 * 2.0 - * (wfcr[idx].real() * wfcr_another_spin[idx].imag() - - wfcr_another_spin[idx].real() * wfcr[idx].imag()); - rho[3 * nrxx + idx] += w1 * (norm(wfcr[idx]) - norm(wfcr_another_spin[idx])); - } - else if(DOMAG_Z) { - rho[1 * nrxx + idx] = 0; - rho[2 * nrxx + idx] = 0; - rho[3 * nrxx + idx] += w1 * (norm(wfcr[idx]) - norm(wfcr_another_spin[idx])); - } - else { - rho[0 * nrxx + idx] = 0; - rho[1 * nrxx + idx] = 0; - rho[2 * nrxx + idx] = 0; - rho[3 * nrxx + idx] = 0; - } + if (DOMAG) + { + rho[1 * nrxx + idx] += w1 * 2.0 + * (wfcr[idx].real () * wfcr_another_spin[idx].real () + + wfcr[idx].imag () * wfcr_another_spin[idx].imag ()); + rho[2 * nrxx + idx] += w1 * 2.0 + * (wfcr[idx].real () * wfcr_another_spin[idx].imag () + - wfcr_another_spin[idx].real () * wfcr[idx].imag ()); + rho[3 * nrxx + idx] += w1 * (norm (wfcr[idx]) - norm (wfcr_another_spin[idx])); + } + else if (DOMAG_Z) + { + rho[1 * nrxx + idx] = 0; + rho[2 * nrxx + idx] = 0; + rho[3 * nrxx + idx] += w1 * (norm (wfcr[idx]) - norm (wfcr_another_spin[idx])); + } + else + { + rho[0 * nrxx + idx] = 0; + rho[1 * nrxx + idx] = 0; + rho[2 * nrxx + idx] = 0; + rho[3 * nrxx + idx] = 0; + } } template -void elecstate_pw_op::operator()(const base_device::DEVICE_GPU* ctx, +void + elecstate_pw_op::operator() (const base_device::DEVICE_GPU* ctx, const int& spin, const int& nrxx, const FPTYPE& w1, FPTYPE** rho, const std::complex* wfcr) { - const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - elecstate_pw<<>>( - spin, nrxx, w1, rho[0], - reinterpret_cast*>(wfcr) - ); + const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + elecstate_pw<<>> (spin, + nrxx, + w1, + rho[0], + reinterpret_cast*> (wfcr)); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template -void elecstate_pw_op::operator()(const base_device::DEVICE_GPU* ctx, +void + elecstate_pw_op::operator() (const base_device::DEVICE_GPU* ctx, const bool& DOMAG, const bool& DOMAG_Z, const int& nrxx, @@ -84,17 +93,20 @@ void elecstate_pw_op::operator()(const base_dev const std::complex* wfcr, const std::complex* wfcr_another_spin) { - const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - elecstate_pw<<>>( - DOMAG, DOMAG_Z, nrxx, w1, rho[0], - reinterpret_cast*>(wfcr), - reinterpret_cast*>(wfcr_another_spin) - ); + const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + elecstate_pw + <<>> (DOMAG, + DOMAG_Z, + nrxx, + w1, + rho[0], + reinterpret_cast*> (wfcr), + reinterpret_cast*> (wfcr_another_spin)); - CHECK_CUDA_SYNC(); + CHECK_CUDA_SYNC (); } template struct elecstate_pw_op; template struct elecstate_pw_op; -} // namespace elecstate \ No newline at end of file +} // namespace elecstate \ No newline at end of file diff --git a/source/source_estate/kernels/elecstate_op.cpp b/source/source_estate/kernels/elecstate_op.cpp index c3933319fc8..b371ee45597 100644 --- a/source/source_estate/kernels/elecstate_op.cpp +++ b/source/source_estate/kernels/elecstate_op.cpp @@ -1,31 +1,34 @@ #include "source_estate/kernels/elecstate_op.h" -namespace elecstate{ +namespace elecstate +{ template struct elecstate_pw_op { - void operator()(const base_device::DEVICE_CPU* /*ctx*/, + void + operator() (const base_device::DEVICE_CPU* /*ctx*/, const int& spin, const int& nrxx, const FPTYPE& w1, FPTYPE** rho, const std::complex* wfcr) { - // for (int ir = 0; ir < nrxx; ir++) - // { - // rho[spin][ir] += weight * norm(wfcr[ir]); - // } + // for (int ir = 0; ir < nrxx; ir++) + // { + // rho[spin][ir] += weight * norm(wfcr[ir]); + // } #ifdef _OPENMP #pragma omp parallel for #endif - for (int ir = 0; ir < nrxx; ir++) - { - rho[spin][ir] += w1 * norm(wfcr[ir]); - } + for (int ir = 0; ir < nrxx; ir++) + { + rho[spin][ir] += w1 * norm (wfcr[ir]); + } } - void operator()(const base_device::DEVICE_CPU* ctx, + void + operator() (const base_device::DEVICE_CPU* ctx, const bool& DOMAG, const bool& DOMAG_Z, const int& nrxx, @@ -37,50 +40,55 @@ struct elecstate_pw_op #ifdef _OPENMP #pragma omp parallel for #endif - for (int ir = 0; ir < nrxx; ir++) { - rho[0][ir] += w1 * (norm(wfcr[ir]) + norm(wfcr_another_spin[ir])); - } - // In this case, calculate the three components of the magnetization - if (DOMAG) - { + for (int ir = 0; ir < nrxx; ir++) + { + rho[0][ir] += w1 * (norm (wfcr[ir]) + norm (wfcr_another_spin[ir])); + } + // In this case, calculate the three components of the magnetization + if (DOMAG) + { #ifdef _OPENMP #pragma omp parallel for #endif - for (int ir = 0; ir < nrxx; ir++) { - rho[1][ir] += w1 * 2.0 - * (wfcr[ir].real() * wfcr_another_spin[ir].real() - + wfcr[ir].imag() * wfcr_another_spin[ir].imag()); - rho[2][ir] += w1 * 2.0 - * (wfcr[ir].real() * wfcr_another_spin[ir].imag() - - wfcr_another_spin[ir].real() * wfcr[ir].imag()); - rho[3][ir] += w1 * (norm(wfcr[ir]) - norm(wfcr_another_spin[ir])); - } - } - else if (DOMAG_Z) - { + for (int ir = 0; ir < nrxx; ir++) + { + rho[1][ir] += w1 * 2.0 + * (wfcr[ir].real () * wfcr_another_spin[ir].real () + + wfcr[ir].imag () * wfcr_another_spin[ir].imag ()); + rho[2][ir] += w1 * 2.0 + * (wfcr[ir].real () * wfcr_another_spin[ir].imag () + - wfcr_another_spin[ir].real () * wfcr[ir].imag ()); + rho[3][ir] += w1 * (norm (wfcr[ir]) - norm (wfcr_another_spin[ir])); + } + } + else if (DOMAG_Z) + { #ifdef _OPENMP #pragma omp parallel for #endif - for (int ir = 0; ir < nrxx; ir++) - { - rho[1][ir] = 0; - rho[2][ir] = 0; - rho[3][ir] += w1 * (norm(wfcr[ir]) - norm(wfcr_another_spin[ir])); - } - } - else { + for (int ir = 0; ir < nrxx; ir++) + { + rho[1][ir] = 0; + rho[2][ir] = 0; + rho[3][ir] += w1 * (norm (wfcr[ir]) - norm (wfcr_another_spin[ir])); + } + } + else + { #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static) #endif - for (int is = 1; is < 4; is++) - { - for (int ir = 0; ir < nrxx; ir++) - rho[is][ir] = 0; - } - } + for (int is = 1; is < 4; is++) + { + for (int ir = 0; ir < nrxx; ir++) + { + rho[is][ir] = 0; + } + } + } } }; template struct elecstate_pw_op; template struct elecstate_pw_op; -} // namespace elecstate \ No newline at end of file +} // namespace elecstate \ No newline at end of file diff --git a/source/source_estate/kernels/elecstate_op.h b/source/source_estate/kernels/elecstate_op.h index f7b8d48c2ee..c8389597439 100644 --- a/source/source_estate/kernels/elecstate_op.h +++ b/source/source_estate/kernels/elecstate_op.h @@ -5,72 +5,72 @@ #include #include "source_psi/psi.h" -namespace elecstate{ +namespace elecstate +{ -template -struct elecstate_pw_op { - /// @brief Calculate psiToRho output within the band-by-band loop, NSPIN != 4 - /// - /// Input Parameters - /// @param ctx - which device this function runs on - /// @param spin - current spin - /// @param nrxx - number of planewaves - /// @param weight - input constant - /// @param wfcr - input array, psi in real space - /// - /// Output Parameters - /// @param rho - electronic densities - void operator() ( - const Device* ctx, - const int& spin, - const int& nrxx, - const FPTYPE& weight, - FPTYPE** rho, - const std::complex* wfcr); +template +struct elecstate_pw_op +{ + /// @brief Calculate psiToRho output within the band-by-band loop, NSPIN != 4 + /// + /// Input Parameters + /// @param ctx - which device this function runs on + /// @param spin - current spin + /// @param nrxx - number of planewaves + /// @param weight - input constant + /// @param wfcr - input array, psi in real space + /// + /// Output Parameters + /// @param rho - electronic densities + void operator() (const Device* ctx, + const int& spin, + const int& nrxx, + const FPTYPE& weight, + FPTYPE** rho, + const std::complex* wfcr); - /// @brief Calculate psiToRho output within the band-by-band loop, NSPIN == 4 - /// - /// Input Parameters - /// @param ctx - which device this function runs on - /// @param DOMAG - PARAM.globalv.domag - /// @param DOMAG_Z - PARAM.globalv.domag_z - /// @param nrxx - number of planewaves - /// @param weight - input constant - /// @param wfcr - input array, psi in real space - /// @param wfcr_another_spin - input array, psi in real space - /// - /// Output Parameters - /// @param rho - electronic densities - void operator() ( - const Device* ctx, - const bool& DOMAG, - const bool& DOMAG_Z, - const int& nrxx, - const FPTYPE& weight, - FPTYPE** rho, - const std::complex* wfcr, - const std::complex* wfcr_another_spin); + /// @brief Calculate psiToRho output within the band-by-band loop, NSPIN == 4 + /// + /// Input Parameters + /// @param ctx - which device this function runs on + /// @param DOMAG - PARAM.globalv.domag + /// @param DOMAG_Z - PARAM.globalv.domag_z + /// @param nrxx - number of planewaves + /// @param weight - input constant + /// @param wfcr - input array, psi in real space + /// @param wfcr_another_spin - input array, psi in real space + /// + /// Output Parameters + /// @param rho - electronic densities + void operator() (const Device* ctx, + const bool& DOMAG, + const bool& DOMAG_Z, + const int& nrxx, + const FPTYPE& weight, + FPTYPE** rho, + const std::complex* wfcr, + const std::complex* wfcr_another_spin); }; #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM template struct elecstate_pw_op { - void operator()(const base_device::DEVICE_GPU* ctx, - const int& spin, - const int& nrxx, - const FPTYPE& w1, - FPTYPE** rho, - const std::complex* wfcr); + void operator() (const base_device::DEVICE_GPU* ctx, + const int& spin, + const int& nrxx, + const FPTYPE& w1, + FPTYPE** rho, + const std::complex* wfcr); - void operator()(const base_device::DEVICE_GPU* ctx, - const bool& DOMAG, - const bool& DOMAG_Z, - const int& nrxx, - const FPTYPE& w1, - FPTYPE** rho, - const std::complex* wfcr, - const std::complex* wfcr_another_spin); + void operator() (const base_device::DEVICE_GPU* ctx, + const bool& DOMAG, + const bool& DOMAG_Z, + const int& nrxx, + const FPTYPE& w1, + FPTYPE** rho, + const std::complex* wfcr, + const std::complex* wfcr_another_spin); }; #endif } // namespace elecstate diff --git a/source/source_estate/kernels/rocm/elecstate_op.hip.cu b/source/source_estate/kernels/rocm/elecstate_op.hip.cu index 90fbe5b0cd1..836bf7be007 100644 --- a/source/source_estate/kernels/rocm/elecstate_op.hip.cu +++ b/source/source_estate/kernels/rocm/elecstate_op.hip.cu @@ -6,76 +6,90 @@ #define THREADS_PER_BLOCK 256 -namespace elecstate { +namespace elecstate +{ -template -__global__ void elecstate_pw( - const int spin, - const int nrxx, - const FPTYPE w1, - FPTYPE* rho, - const thrust::complex* wfcr) +template +__global__ void + elecstate_pw (const int spin, const int nrxx, const FPTYPE w1, FPTYPE* rho, const thrust::complex* wfcr) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nrxx) {return;} - rho[spin * nrxx + idx] += w1 * norm(wfcr[idx]); + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= nrxx) + { + return; + } + rho[spin * nrxx + idx] += w1 * norm (wfcr[idx]); } -template -__global__ void elecstate_pw( - const bool DOMAG, - const bool DOMAG_Z, - const int nrxx, - const FPTYPE w1, - FPTYPE* rho, - const thrust::complex* wfcr, - const thrust::complex* wfcr_another_spin) +template +__global__ void + elecstate_pw (const bool DOMAG, + const bool DOMAG_Z, + const int nrxx, + const FPTYPE w1, + FPTYPE* rho, + const thrust::complex* wfcr, + const thrust::complex* wfcr_another_spin) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nrxx) {return;} - rho[0 * nrxx + idx] += w1 * (norm(wfcr[idx]) + norm(wfcr_another_spin[idx])); + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= nrxx) + { + return; + } + rho[0 * nrxx + idx] += w1 * (norm (wfcr[idx]) + norm (wfcr_another_spin[idx])); - if (DOMAG) { - rho[1 * nrxx + idx] += w1 * 2.0 - * (wfcr[idx].real() * wfcr_another_spin[idx].real() - + wfcr[idx].imag() * wfcr_another_spin[idx].imag()); - rho[2 * nrxx + idx] += w1 * 2.0 - * (wfcr[idx].real() * wfcr_another_spin[idx].imag() - - wfcr_another_spin[idx].real() * wfcr[idx].imag()); - rho[3 * nrxx + idx] += w1 * (norm(wfcr[idx]) - norm(wfcr_another_spin[idx])); - } - else if(DOMAG_Z) { - rho[1 * nrxx + idx] = 0; - rho[2 * nrxx + idx] = 0; - rho[3 * nrxx + idx] += w1 * (norm(wfcr[idx]) - norm(wfcr_another_spin[idx])); - } - else { - rho[0 * nrxx + idx] = 0; - rho[1 * nrxx + idx] = 0; - rho[2 * nrxx + idx] = 0; - rho[3 * nrxx + idx] = 0; - } + if (DOMAG) + { + rho[1 * nrxx + idx] += w1 * 2.0 + * (wfcr[idx].real () * wfcr_another_spin[idx].real () + + wfcr[idx].imag () * wfcr_another_spin[idx].imag ()); + rho[2 * nrxx + idx] += w1 * 2.0 + * (wfcr[idx].real () * wfcr_another_spin[idx].imag () + - wfcr_another_spin[idx].real () * wfcr[idx].imag ()); + rho[3 * nrxx + idx] += w1 * (norm (wfcr[idx]) - norm (wfcr_another_spin[idx])); + } + else if (DOMAG_Z) + { + rho[1 * nrxx + idx] = 0; + rho[2 * nrxx + idx] = 0; + rho[3 * nrxx + idx] += w1 * (norm (wfcr[idx]) - norm (wfcr_another_spin[idx])); + } + else + { + rho[0 * nrxx + idx] = 0; + rho[1 * nrxx + idx] = 0; + rho[2 * nrxx + idx] = 0; + rho[3 * nrxx + idx] = 0; + } } template -void elecstate_pw_op::operator()(const base_device::DEVICE_GPU* ctx, +void + elecstate_pw_op::operator() (const base_device::DEVICE_GPU* ctx, const int& spin, const int& nrxx, const FPTYPE& w1, FPTYPE** rho, const std::complex* wfcr) { - const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(HIP_KERNEL_NAME(elecstate_pw), dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, - spin, nrxx, w1, rho[0], - reinterpret_cast*>(wfcr) - ); + const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + hipLaunchKernelGGL (HIP_KERNEL_NAME (elecstate_pw), + dim3 (block), + dim3 (THREADS_PER_BLOCK), + 0, + 0, + spin, + nrxx, + w1, + rho[0], + reinterpret_cast*> (wfcr)); - hipCheckOnDebug(); + hipCheckOnDebug (); } template -void elecstate_pw_op::operator()(const base_device::DEVICE_GPU* ctx, +void + elecstate_pw_op::operator() (const base_device::DEVICE_GPU* ctx, const bool& DOMAG, const bool& DOMAG_Z, const int& nrxx, @@ -84,16 +98,23 @@ void elecstate_pw_op::operator()(const base_dev const std::complex* wfcr, const std::complex* wfcr_another_spin) { - const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - hipLaunchKernelGGL(HIP_KERNEL_NAME(elecstate_pw), dim3(block), dim3(THREADS_PER_BLOCK), 0, 0, - DOMAG, DOMAG_Z, nrxx, w1, rho[0], - reinterpret_cast*>(wfcr), - reinterpret_cast*>(wfcr_another_spin) - ); + const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + hipLaunchKernelGGL (HIP_KERNEL_NAME (elecstate_pw), + dim3 (block), + dim3 (THREADS_PER_BLOCK), + 0, + 0, + DOMAG, + DOMAG_Z, + nrxx, + w1, + rho[0], + reinterpret_cast*> (wfcr), + reinterpret_cast*> (wfcr_another_spin)); - hipCheckOnDebug(); + hipCheckOnDebug (); } template struct elecstate_pw_op; template struct elecstate_pw_op; -} \ No newline at end of file +} // namespace elecstate \ No newline at end of file diff --git a/source/source_estate/kernels/test/elecstate_op_test.cpp b/source/source_estate/kernels/test/elecstate_op_test.cpp index 24f2097cf0f..f953fb1d3fd 100644 --- a/source/source_estate/kernels/test/elecstate_op_test.cpp +++ b/source/source_estate/kernels/test/elecstate_op_test.cpp @@ -16,18 +16,2534 @@ class TestModuleElecstateMultiDevice : public ::testing::Test const bool DOMAG_Z = true; const double w1 = 0.00011779029181838057; const double w2 = 5.8895145909190286e-05; - const std::vector expected_rho = {0.000169776, 0.000178569, 0.000193478, 0.000190963, 0.000162992, 0.00012976, 0.000115885, 0.000130059, 0.00016338, 0.000191117, 0.000193323, 0.000178366, 0.000179064, 0.000204228, 0.000227792, 0.000219629, 0.000183672, 0.000153516, 0.000153676, 0.000183952, 0.000219538, 0.000227101, 0.00020339, 0.000178705, 0.000194839, 0.000228625, 0.000243426, 0.000219702, 0.000181133, 0.000163767, 0.00018138, 0.000219691, 0.000242614, 0.000227273, 0.000193857, 0.00017819, 0.000192755, 0.000220801, 0.000220073, 0.00019118, 0.000164701, 0.000164856, 0.000191352, 0.000219611, 0.000219514, 0.000191386, 0.000165518, 0.000166079, 0.00016434, 0.000184444, 0.00018134, 0.000164698, 0.000156019, 0.000164894, 0.000181257, 0.000183649, 0.00016312, 0.000135954, 0.000124381, 0.000136818, 0.000130335, 0.00015378, 0.000163817, 0.000164888, 0.000164963, 0.000163834, 0.000153372, 0.000129503, 0.000101264, 8.32226e-05, 8.35355e-05, 0.000102059, 0.000116058, 0.000153881, 0.000181671, 0.00019171, 0.000181563, 0.000153518, 0.000115431, 7.94008e-05, 5.55859e-05, 4.77218e-05, 5.59624e-05, 8.00408e-05, 0.000130395, 0.000184659, 0.000220604, 0.000220398, 0.000184101, 0.000129668, 7.94357e-05, 4.63696e-05, 3.15498e-05, 3.16778e-05, 4.67709e-05, 8.00837e-05, 0.000164257, 0.000220901, 0.000243906, 0.000220232, 0.000163282, 0.000101221, 5.55696e-05, 3.15687e-05, 2.46811e-05, 3.17951e-05, 5.61029e-05, 0.00010208, 0.000192313, 0.000228466, 0.000228055, 0.00019132, 0.000135475, 8.28695e-05, 4.76363e-05, 3.17207e-05, 3.18329e-05, 4.80319e-05, 8.3641e-05, 0.000136545, 0.000194159, 0.000203962, 0.000193564, 0.000164464, 0.000123269, 8.29177e-05, 5.58212e-05, 4.6815e-05, 5.60991e-05, 8.35257e-05, 0.000124162, 0.000165391, 0.000178568, 0.000178447, 0.000177044, 0.000164353, 0.000135345, 0.000101334, 7.98722e-05, 8.00263e-05, 0.00010178, 0.000135967, 0.000164933, 0.000177413, 0.00017896, 0.000203835, 0.000227343, 0.000219346, 0.000183589, 0.000153552, 0.000153811, 0.000184272, 0.000220133, 0.000227869, 0.000204034, 0.000178996, 0.000204641, 0.000245599, 0.000266381, 0.000242499, 0.000200025, 0.000180655, 0.000200547, 0.000243195, 0.000266727, 0.000245475, 0.000204416, 0.000185313, 0.00022891, 0.000267126, 0.000266303, 0.000227029, 0.000191299, 0.000191546, 0.000227565, 0.000266585, 0.000266819, 0.000228355, 0.000192972, 0.000193226, 0.000220892, 0.000243298, 0.000227224, 0.000193116, 0.000176958, 0.00019338, 0.000227396, 0.000242974, 0.000220189, 0.00018224, 0.000165126, 0.000182777, 0.000184452, 0.000200435, 0.000191399, 0.000176968, 0.000176991, 0.000191342, 0.000200057, 0.000183759, 0.000149563, 0.000123756, 0.000124008, 0.000150211, 0.000153857, 0.000180864, 0.000191681, 0.000193468, 0.000191384, 0.000180279, 0.000153109, 0.000115507, 8.4576e-05, 7.32605e-05, 8.49347e-05, 0.000116163, 0.000154093, 0.000200993, 0.000227986, 0.000227634, 0.000200117, 0.000153102, 0.000101786, 6.24711e-05, 4.30082e-05, 4.3128e-05, 6.28796e-05, 0.000102542, 0.000184984, 0.000244115, 0.000267228, 0.000243129, 0.000183648, 0.000115422, 6.24715e-05, 3.36981e-05, 2.53705e-05, 3.38925e-05, 6.3015e-05, 0.00011646, 0.000221235, 0.000267707, 0.000267073, 0.000219788, 0.000149084, 8.43875e-05, 4.30356e-05, 2.54251e-05, 2.55042e-05, 4.33728e-05, 8.52167e-05, 0.000150469, 0.000228728, 0.000245742, 0.000227673, 0.000181087, 0.000122902, 7.29834e-05, 4.31838e-05, 3.39974e-05, 4.3403e-05, 7.35933e-05, 0.000124051, 0.000182539, 0.000204196, 0.000203779, 0.00019146, 0.000163418, 0.000122899, 8.45893e-05, 6.29222e-05, 6.30623e-05, 8.50736e-05, 0.000123809, 0.000164615, 0.000192509, 0.000178745, 0.000184257, 0.000191376, 0.000180885, 0.000149064, 0.000115799, 0.000102469, 0.000116203, 0.000149836, 0.000181829, 0.000192182, 0.000184698, 0.000194197, 0.000227602, 0.000242531, 0.000219264, 0.000181092, 0.000163967, 0.000181826, 0.000220523, 0.000243831, 0.000228498, 0.000194599, 0.000178222, 0.00022836, 0.000266431, 0.000265778, 0.000226824, 0.000191362, 0.000191812, 0.000228048, 0.000267291, 0.000267602, 0.00022895, 0.000193197, 0.000193038, 0.000243596, 0.000266133, 0.000244166, 0.00020288, 0.000183901, 0.00020362, 0.00024537, 0.000267252, 0.000244248, 0.000202034, 0.000182436, 0.000201798, 0.000219914, 0.000226984, 0.000202831, 0.000177819, 0.000178022, 0.000203359, 0.000227581, 0.000220314, 0.000184942, 0.000154877, 0.000154845, 0.000184784, 0.000181151, 0.000191228, 0.00018371, 0.000177896, 0.00018366, 0.000191138, 0.000181046, 0.000149503, 0.000116298, 0.000102854, 0.000116343, 0.000149588, 0.000163791, 0.000191618, 0.000203375, 0.000203111, 0.00019099, 0.000163151, 0.000122788, 8.45394e-05, 6.28441e-05, 6.28824e-05, 8.47057e-05, 0.000123192, 0.000181825, 0.000228007, 0.0002451, 0.000227175, 0.000180756, 0.000122709, 7.28545e-05, 4.30544e-05, 3.38231e-05, 4.31296e-05, 7.31592e-05, 0.000123445, 0.000220837, 0.000267297, 0.000266739, 0.000219626, 0.000149104, 8.44935e-05, 4.312e-05, 2.54477e-05, 2.54607e-05, 4.32413e-05, 8.49761e-05, 0.000150132, 0.00024411, 0.000267192, 0.00024319, 0.000183917, 0.000115847, 6.28857e-05, 3.39986e-05, 2.55682e-05, 3.40292e-05, 6.31138e-05, 0.000116538, 0.000185036, 0.000228282, 0.000227879, 0.000200457, 0.000153624, 0.000102398, 6.30086e-05, 4.34058e-05, 4.34269e-05, 6.31469e-05, 0.000102846, 0.000154464, 0.00020137, 0.000193957, 0.000191754, 0.000180646, 0.000153539, 0.000115928, 8.48887e-05, 7.34535e-05, 8.5078e-05, 0.000116387, 0.000154285, 0.000181475, 0.000192302, 0.000177651, 0.000191707, 0.000200165, 0.000183666, 0.000149307, 0.000123383, 0.000123605, 0.00014996, 0.00018462, 0.000201118, 0.000192363, 0.000177876, 0.000191783, 0.000219569, 0.00021921, 0.000190941, 0.000164941, 0.000165437, 0.000192324, 0.000221049, 0.000221193, 0.000192766, 0.000166159, 0.00016585, 0.00021996, 0.00024227, 0.00022661, 0.000193081, 0.000177415, 0.000194268, 0.0002287, 0.000244543, 0.00022165, 0.000183213, 0.000165438, 0.000182397, 0.000219454, 0.000226546, 0.00020266, 0.000177976, 0.000178476, 0.000204075, 0.000228484, 0.000221241, 0.000185699, 0.000155343, 0.000154979, 0.00018457, 0.00019067, 0.00019267, 0.000177686, 0.000169169, 0.000178188, 0.000193597, 0.000191769, 0.000164278, 0.000131062, 0.000116874, 0.000130584, 0.000163371, 0.000164266, 0.000176694, 0.000177869, 0.000177851, 0.000176716, 0.000164456, 0.000135833, 0.000101933, 8.03218e-05, 8.02043e-05, 0.000101624, 0.000135492, 0.000164644, 0.000193371, 0.000203198, 0.000192973, 0.00016421, 0.000123358, 8.31557e-05, 5.59935e-05, 4.6818e-05, 5.58866e-05, 8.30724e-05, 0.000123516, 0.000191606, 0.000227705, 0.000227401, 0.000190999, 0.000135555, 8.31647e-05, 4.78966e-05, 3.18287e-05, 3.17699e-05, 4.77766e-05, 8.31897e-05, 0.000135944, 0.00022038, 0.000243396, 0.000220002, 0.00016352, 0.000101796, 5.61586e-05, 3.19688e-05, 2.48775e-05, 3.18148e-05, 5.59491e-05, 0.000101777, 0.000163838, 0.000220371, 0.000220283, 0.000184384, 0.000130411, 8.03645e-05, 4.71541e-05, 3.20674e-05, 3.19629e-05, 4.68692e-05, 8.00456e-05, 0.000130283, 0.000184483, 0.000191758, 0.000181752, 0.000154082, 0.000116363, 8.04071e-05, 5.6379e-05, 4.82162e-05, 5.6199e-05, 8.01134e-05, 0.000116105, 0.000153974, 0.000181753, 0.000165222, 0.000164124, 0.000153901, 0.000130258, 0.000101987, 8.36893e-05, 8.37044e-05, 0.000102039, 0.000130355, 0.000154029, 0.000164235, 0.000165266, 0.000165291, 0.000181461, 0.000183847, 0.000163329, 0.000136024, 0.000124216, 0.000136499, 0.000164162, 0.00018475, 0.000182137, 0.000165623, 0.00015673, 0.000163569, 0.00018354, 0.000180875, 0.000164863, 0.000156711, 0.000166032, 0.000182847, 0.000185594, 0.000165052, 0.000137407, 0.000125083, 0.000136701, 0.000183577, 0.000199544, 0.000191064, 0.000177383, 0.000178094, 0.000193021, 0.000202132, 0.000185895, 0.000151357, 0.000124942, 0.00012448, 0.000149918, 0.000180558, 0.000190738, 0.000183719, 0.000178516, 0.000184812, 0.000192645, 0.000182663, 0.000150976, 0.000117447, 0.000103605, 0.00011663, 0.000149349, 0.000164042, 0.000176567, 0.000178014, 0.00017829, 0.000177355, 0.000165163, 0.000136503, 0.000102511, 8.07849e-05, 8.05329e-05, 0.000101763, 0.000135398, 0.000155504, 0.000176838, 0.00018384, 0.000176902, 0.000155733, 0.000122862, 8.78198e-05, 6.26008e-05, 5.37126e-05, 6.23463e-05, 8.73915e-05, 0.000122452, 0.000164634, 0.000191497, 0.000191418, 0.00016453, 0.000122719, 8.18656e-05, 5.29609e-05, 3.90106e-05, 3.89192e-05, 5.27242e-05, 8.16226e-05, 0.000122647, 0.000181433, 0.000200572, 0.000181486, 0.000136005, 8.78125e-05, 5.30691e-05, 3.46193e-05, 2.90604e-05, 3.43956e-05, 5.26804e-05, 8.74193e-05, 0.00013578, 0.000184272, 0.000184487, 0.000150048, 0.000102277, 6.28005e-05, 3.92835e-05, 2.92168e-05, 2.90551e-05, 3.88043e-05, 6.20696e-05, 0.000101486, 0.000149456, 0.000163873, 0.000150183, 0.000116786, 8.07673e-05, 5.40762e-05, 3.93422e-05, 3.47e-05, 3.89455e-05, 5.3295e-05, 7.971e-05, 0.000115732, 0.000149515, 0.000136441, 0.00012402, 0.000103157, 8.06774e-05, 6.28601e-05, 5.33021e-05, 5.31329e-05, 6.23436e-05, 7.98387e-05, 0.000102157, 0.000123181, 0.000136111, 0.000124411, 0.000123824, 0.000116395, 0.000102099, 8.80972e-05, 8.23993e-05, 8.80746e-05, 0.000101969, 0.000116088, 0.000123418, 0.00012412, 0.000123477, 0.000136384, 0.00014958, 0.000149401, 0.000135994, 0.000123413, 0.000123714, 0.000136778, 0.00015033, 0.000150286, 0.000136742, 0.000123842, 0.000123745, 0.000129876, 0.000153196, 0.000163656, 0.000165404, 0.000166112, 0.000165473, 0.00015535, 0.000131594, 0.00010315, 8.46425e-05, 8.43506e-05, 0.000102184, 0.000153082, 0.000180108, 0.000191611, 0.000194349, 0.000193074, 0.000182444, 0.000155389, 0.000117587, 8.6239e-05, 7.44187e-05, 8.54958e-05, 0.000115985, 0.000163108, 0.000191128, 0.000203621, 0.000204191, 0.000192609, 0.000164915, 0.000124426, 8.59286e-05, 6.39458e-05, 6.36734e-05, 8.50474e-05, 0.000122922, 0.00016427, 0.000193218, 0.000203544, 0.000193735, 0.000165102, 0.000124183, 8.38875e-05, 5.66566e-05, 4.74156e-05, 5.63674e-05, 8.32827e-05, 0.000123342, 0.000164511, 0.000191479, 0.000191563, 0.00016474, 0.000122895, 8.20151e-05, 5.31381e-05, 3.92361e-05, 3.91765e-05, 5.29581e-05, 8.17365e-05, 0.000122592, 0.000163723, 0.000180812, 0.000163935, 0.00012388, 8.20223e-05, 5.22662e-05, 3.65683e-05, 3.18566e-05, 3.64529e-05, 5.2034e-05, 8.16996e-05, 0.000123551, 0.0001538, 0.000154107, 0.000123809, 8.38231e-05, 5.32741e-05, 3.6669e-05, 3.01209e-05, 3.00101e-05, 3.63253e-05, 5.26796e-05, 8.30355e-05, 0.000123075, 0.000130433, 0.000116752, 8.56391e-05, 5.67519e-05, 3.94538e-05, 3.2037e-05, 3.0099e-05, 3.16854e-05, 3.87272e-05, 5.56478e-05, 8.43587e-05, 0.000115836, 0.00010236, 8.57183e-05, 6.38156e-05, 4.75674e-05, 3.94416e-05, 3.67055e-05, 3.65041e-05, 3.88229e-05, 4.65113e-05, 6.24189e-05, 8.44039e-05, 0.000101803, 8.41204e-05, 7.4067e-05, 6.36153e-05, 5.65795e-05, 5.33297e-05, 5.24404e-05, 5.30374e-05, 5.59239e-05, 6.25876e-05, 7.28955e-05, 8.33033e-05, 8.80873e-05, 8.4031e-05, 8.5275e-05, 8.51082e-05, 8.36835e-05, 8.23938e-05, 8.24628e-05, 8.37687e-05, 8.49763e-05, 8.48556e-05, 8.35455e-05, 8.2405e-05, 8.26193e-05, 0.000102056, 0.000115945, 0.000123228, 0.000124114, 0.000123743, 0.000124869, 0.000124326, 0.000116858, 0.000102542, 8.85546e-05, 8.28387e-05, 8.83948e-05, 0.000115515, 0.000153178, 0.000181461, 0.000192345, 0.000182929, 0.000155283, 0.000117307, 8.11734e-05, 5.70951e-05, 4.88782e-05, 5.66632e-05, 8.01132e-05, 0.000153026, 0.000199999, 0.000227897, 0.000228681, 0.000201896, 0.000155064, 0.000103597, 6.39952e-05, 4.4215e-05, 4.40134e-05, 6.32707e-05, 0.00010215, 0.00018076, 0.000227277, 0.000245333, 0.000228261, 0.000182143, 0.000123998, 7.39655e-05, 4.40085e-05, 3.46431e-05, 4.37634e-05, 7.3327e-05, 0.000122863, 0.000190936, 0.00022738, 0.000227646, 0.000191525, 0.000136033, 8.35705e-05, 4.83429e-05, 3.23506e-05, 3.23385e-05, 4.8263e-05, 8.33041e-05, 0.000135512, 0.000181134, 0.000200401, 0.000181425, 0.000135892, 8.76473e-05, 5.30001e-05, 3.47412e-05, 2.93641e-05, 3.48117e-05, 5.30612e-05, 8.75565e-05, 0.000135598, 0.000153668, 0.000153945, 0.000123591, 8.35558e-05, 5.30393e-05, 3.65508e-05, 3.01554e-05, 3.01799e-05, 3.65648e-05, 5.28929e-05, 8.31275e-05, 0.000123022, 0.00011622, 0.000103006, 7.38389e-05, 4.83881e-05, 3.4772e-05, 3.01478e-05, 2.93687e-05, 3.00549e-05, 3.44657e-05, 4.77231e-05, 7.28621e-05, 0.000102207, 8.0567e-05, 6.37096e-05, 4.39393e-05, 3.23335e-05, 2.93244e-05, 3.0128e-05, 3.00339e-05, 2.89846e-05, 3.16325e-05, 4.28335e-05, 6.25121e-05, 8.00199e-05, 5.67399e-05, 4.39985e-05, 3.45114e-05, 3.22339e-05, 3.47269e-05, 3.65371e-05, 3.45117e-05, 3.17005e-05, 3.35899e-05, 4.28176e-05, 5.58289e-05, 6.26212e-05, 4.8614e-05, 4.37771e-05, 4.35783e-05, 4.81697e-05, 5.31171e-05, 5.31103e-05, 4.80256e-05, 4.31076e-05, 4.2978e-05, 4.77782e-05, 5.30685e-05, 5.34388e-05, 5.64569e-05, 6.30598e-05, 7.32512e-05, 8.3521e-05, 8.81195e-05, 8.39039e-05, 7.36216e-05, 6.30901e-05, 5.61824e-05, 5.32249e-05, 5.26704e-05, 5.35023e-05, 8.0015e-05, 0.00010211, 0.000123169, 0.000136376, 0.000136928, 0.000124447, 0.00010336, 8.07737e-05, 6.3059e-05, 5.36874e-05, 5.36252e-05, 6.27624e-05, 0.000129242, 0.000183365, 0.00022001, 0.000220842, 0.000185267, 0.000131075, 8.08007e-05, 4.75724e-05, 3.25427e-05, 3.24257e-05, 4.70969e-05, 7.96796e-05, 0.000183131, 0.000242521, 0.000266806, 0.000243863, 0.000184888, 0.000116656, 6.35454e-05, 3.45949e-05, 2.61054e-05, 3.44031e-05, 6.29404e-05, 0.000115378, 0.000219078, 0.000266116, 0.000266622, 0.000220161, 0.000149719, 8.50357e-05, 4.36645e-05, 2.60092e-05, 2.60116e-05, 4.35972e-05, 8.46568e-05, 0.000148832, 0.000219273, 0.000242736, 0.000219796, 0.00016338, 0.0001016, 5.6097e-05, 3.21594e-05, 2.52787e-05, 3.23251e-05, 5.6299e-05, 0.000101538, 0.0001629, 0.0001837, 0.000183984, 0.0001495, 0.000101615, 6.22155e-05, 3.89983e-05, 2.92949e-05, 2.94338e-05, 3.93295e-05, 6.24674e-05, 0.000101468, 0.000149009, 0.000130057, 0.000116254, 8.50146e-05, 5.61045e-05, 3.89505e-05, 3.18085e-05, 3.01956e-05, 3.20324e-05, 3.91534e-05, 5.5959e-05, 8.44065e-05, 0.000115619, 8.03256e-05, 6.34002e-05, 4.35985e-05, 3.20142e-05, 2.90869e-05, 3.00446e-05, 3.01262e-05, 2.91771e-05, 3.18231e-05, 4.29453e-05, 6.24943e-05, 7.98732e-05, 4.73275e-05, 3.44193e-05, 2.57751e-05, 2.49389e-05, 2.90501e-05, 3.17653e-05, 2.90994e-05, 2.47974e-05, 2.52566e-05, 3.35478e-05, 4.65605e-05, 5.36004e-05, 3.23057e-05, 2.58073e-05, 2.56053e-05, 3.18332e-05, 3.89116e-05, 3.89798e-05, 3.18589e-05, 2.53347e-05, 2.51871e-05, 3.15495e-05, 3.88849e-05, 3.92491e-05, 3.21381e-05, 3.39911e-05, 4.31034e-05, 5.5901e-05, 6.23845e-05, 5.61972e-05, 4.32888e-05, 3.38106e-05, 3.16759e-05, 3.44439e-05, 3.66118e-05, 3.48588e-05, 4.6815e-05, 6.25659e-05, 8.44258e-05, 0.000101752, 0.000102168, 8.5279e-05, 6.31952e-05, 4.69861e-05, 3.91033e-05, 3.67077e-05, 3.67876e-05, 3.92044e-05, 7.95397e-05, 0.000115317, 0.000149266, 0.00016403, 0.000150495, 0.000116888, 8.06518e-05, 5.39992e-05, 3.9481e-05, 3.504e-05, 3.93308e-05, 5.34902e-05, 0.000162245, 0.000218994, 0.000242976, 0.000220373, 0.000163988, 0.000102049, 5.63514e-05, 3.22528e-05, 2.52347e-05, 3.21075e-05, 5.58345e-05, 0.000100854, 0.000218578, 0.000265718, 0.0002664, 0.000220051, 0.000149616, 8.49083e-05, 4.35382e-05, 2.58968e-05, 2.5888e-05, 4.3411e-05, 8.4347e-05, 0.000148382, 0.000241876, 0.000265842, 0.000242791, 0.00018387, 0.000115791, 6.29115e-05, 3.42048e-05, 2.59007e-05, 3.43398e-05, 6.29914e-05, 0.000115408, 0.000182897, 0.000219059, 0.000219486, 0.000183833, 0.000129775, 7.9756e-05, 4.68533e-05, 3.21433e-05, 3.23118e-05, 4.72629e-05, 8.00321e-05, 0.00012946, 0.000183057, 0.000163127, 0.00014946, 0.000115913, 7.97889e-05, 5.32883e-05, 3.89858e-05, 3.48174e-05, 3.94074e-05, 5.38133e-05, 7.98969e-05, 0.000115367, 0.000148786, 0.000101745, 8.49632e-05, 6.29319e-05, 4.67002e-05, 3.88037e-05, 3.64789e-05, 3.67183e-05, 3.92949e-05, 4.6957e-05, 6.25904e-05, 8.4203e-05, 0.000101331, 5.62433e-05, 4.34449e-05, 3.39337e-05, 3.16952e-05, 3.43572e-05, 3.64712e-05, 3.47172e-05, 3.19884e-05, 3.37851e-05, 4.28128e-05, 5.55935e-05, 6.22174e-05, 3.20578e-05, 2.55638e-05, 2.53584e-05, 3.16044e-05, 3.87762e-05, 3.89851e-05, 3.19469e-05, 2.54096e-05, 2.51908e-05, 3.14512e-05, 3.86897e-05, 3.90057e-05, 2.48989e-05, 2.53537e-05, 3.35944e-05, 4.64923e-05, 5.33536e-05, 4.69089e-05, 3.39239e-05, 2.52956e-05, 2.45176e-05, 2.87455e-05, 3.16431e-05, 2.91355e-05, 3.16775e-05, 4.27794e-05, 6.2334e-05, 7.97092e-05, 8.00862e-05, 6.30546e-05, 4.32192e-05, 3.16837e-05, 2.8838e-05, 2.98786e-05, 3.0009e-05, 2.90585e-05, 5.54665e-05, 8.39616e-05, 0.000115411, 0.000130106, 0.000116386, 8.50888e-05, 5.6121e-05, 3.89503e-05, 3.17868e-05, 3.00998e-05, 3.18029e-05, 3.87681e-05, 0.00010074, 0.000148546, 0.000183762, 0.000184484, 0.000150126, 0.000102135, 6.25699e-05, 3.91984e-05, 2.93405e-05, 2.92906e-05, 3.89402e-05, 6.18243e-05, 0.000189968, 0.000226648, 0.000227274, 0.000191338, 0.000135795, 8.32333e-05, 4.79955e-05, 3.20438e-05, 3.20297e-05, 4.78504e-05, 8.26649e-05, 0.000134614, 0.000226111, 0.000244162, 0.000227213, 0.000181152, 0.000123019, 7.31013e-05, 4.33658e-05, 3.41949e-05, 4.34179e-05, 7.2966e-05, 0.000122298, 0.000179849, 0.00022641, 0.000227013, 0.000200217, 0.000153416, 0.000102096, 6.283e-05, 4.34486e-05, 4.35768e-05, 6.30788e-05, 0.000102009, 0.000152574, 0.00019898, 0.000190797, 0.000181278, 0.000153628, 0.000115686, 7.97449e-05, 5.60445e-05, 4.82651e-05, 5.64563e-05, 8.01779e-05, 0.000115496, 0.00015265, 0.000180303, 0.000135822, 0.000123417, 0.000102333, 7.97159e-05, 6.21003e-05, 5.30044e-05, 5.33078e-05, 6.27516e-05, 8.00614e-05, 0.000101861, 0.000122438, 0.000135312, 8.34484e-05, 7.3298e-05, 6.27092e-05, 5.56834e-05, 5.27088e-05, 5.22875e-05, 5.32935e-05, 5.62897e-05, 6.27241e-05, 7.26334e-05, 8.2733e-05, 8.7425e-05, 4.79875e-05, 4.31295e-05, 4.28933e-05, 4.75375e-05, 5.2729e-05, 5.30717e-05, 4.81982e-05, 4.32335e-05, 4.28738e-05, 4.73926e-05, 5.24865e-05, 5.28037e-05, 3.1743e-05, 3.36193e-05, 4.27264e-05, 5.55719e-05, 6.22051e-05, 5.61732e-05, 4.32948e-05, 3.37181e-05, 3.14217e-05, 3.40299e-05, 3.61221e-05, 3.44009e-05, 3.15416e-05, 4.26766e-05, 6.22434e-05, 7.96303e-05, 8.00287e-05, 6.30029e-05, 4.31349e-05, 3.15386e-05, 2.8622e-05, 2.96133e-05, 2.97547e-05, 2.88637e-05, 4.73524e-05, 7.24157e-05, 0.000101778, 0.000115845, 0.000102578, 7.3278e-05, 4.77667e-05, 3.41975e-05, 2.96799e-05, 2.90223e-05, 2.97829e-05, 3.41793e-05, 8.24296e-05, 0.000122382, 0.000153314, 0.000153853, 0.000123517, 8.33491e-05, 5.27466e-05, 3.6259e-05, 2.98878e-05, 2.98951e-05, 3.61821e-05, 5.23392e-05, 0.000134706, 0.000180492, 0.00020022, 0.000181553, 0.000136011, 8.76068e-05, 5.28457e-05, 3.45468e-05, 2.91322e-05, 3.44743e-05, 5.25143e-05, 8.67563e-05, 0.000192476, 0.000202953, 0.000193269, 0.000164542, 0.000123406, 8.30327e-05, 5.5932e-05, 4.68833e-05, 5.59711e-05, 8.29089e-05, 0.000122833, 0.000163568, 0.000202578, 0.000203146, 0.000191496, 0.000163576, 0.000122925, 8.45632e-05, 6.29273e-05, 6.30011e-05, 8.4632e-05, 0.000122567, 0.000162534, 0.000190239, 0.000193094, 0.000191727, 0.000180882, 0.000153543, 0.000115707, 8.47041e-05, 7.33911e-05, 8.49485e-05, 0.00011576, 0.000152812, 0.000179443, 0.000190543, 0.000164985, 0.00016428, 0.000153902, 0.000129894, 0.000101524, 8.34307e-05, 8.36724e-05, 0.000101963, 0.000129796, 0.000152824, 0.000162829, 0.000164318, 0.000124187, 0.000123646, 0.000115948, 0.000101433, 8.75308e-05, 8.21529e-05, 8.80862e-05, 0.000101914, 0.000115608, 0.000122509, 0.000123176, 0.000122883, 8.35386e-05, 8.47608e-05, 8.44441e-05, 8.29716e-05, 8.18669e-05, 8.22377e-05, 8.3701e-05, 8.47534e-05, 8.42682e-05, 8.26769e-05, 8.15331e-05, 8.19515e-05, 5.59126e-05, 6.25578e-05, 7.27056e-05, 8.2978e-05, 8.77087e-05, 8.36486e-05, 7.33599e-05, 6.26251e-05, 5.54514e-05, 5.23311e-05, 5.18043e-05, 5.28019e-05, 4.65013e-05, 6.23551e-05, 8.42158e-05, 0.000101497, 0.000101889, 8.49668e-05, 6.27798e-05, 4.64125e-05, 3.83834e-05, 3.59317e-05, 3.60993e-05, 3.87052e-05, 5.55151e-05, 8.41575e-05, 0.000115586, 0.000130113, 0.000116211, 8.47823e-05, 5.5718e-05, 3.84676e-05, 3.1255e-05, 2.95943e-05, 3.14276e-05, 3.85969e-05, 8.27265e-05, 0.000122767, 0.000153602, 0.000153933, 0.000123423, 8.31643e-05, 5.25199e-05, 3.60119e-05, 2.96459e-05, 2.97056e-05, 3.61091e-05, 5.24442e-05, 0.0001231, 0.000163422, 0.000180773, 0.000163969, 0.000123715, 8.16183e-05, 5.17916e-05, 3.61707e-05, 3.15677e-05, 3.62194e-05, 5.17592e-05, 8.13068e-05, 0.000163955, 0.000191154, 0.000191505, 0.000164707, 0.000122651, 8.15762e-05, 5.26784e-05, 3.88668e-05, 3.88879e-05, 5.26703e-05, 8.13391e-05, 0.000122042, 0.000177949, 0.000178224, 0.000177098, 0.000164478, 0.000135415, 0.000101353, 7.98668e-05, 7.99553e-05, 0.000101514, 0.000135364, 0.000164031, 0.0001765, 0.000178256, 0.000184406, 0.000191805, 0.000181199, 0.000149144, 0.00011576, 0.000102371, 0.000115917, 0.000149093, 0.000180508, 0.00019063, 0.000183509, 0.000177763, 0.000192413, 0.000200895, 0.000183997, 0.000149287, 0.000123244, 0.000123385, 0.000149437, 0.000183498, 0.0001995, 0.000190872, 0.00017711, 0.000165789, 0.000182296, 0.000184419, 0.000163397, 0.0001358, 0.000123931, 0.000136118, 0.000163418, 0.00018347, 0.00018064, 0.000164539, 0.000156455, 0.000136691, 0.000150007, 0.000149512, 0.000135719, 0.000122961, 0.000123217, 0.000136137, 0.000149324, 0.000148911, 0.000135376, 0.000122941, 0.00012351, 0.000101983, 0.000115965, 0.00012304, 0.000123648, 0.000123107, 0.000124119, 0.000123373, 0.000115602, 0.000101093, 8.72159e-05, 8.18811e-05, 8.79275e-05, 7.9881e-05, 0.000102134, 0.000123047, 0.000135942, 0.000136207, 0.00012351, 0.000102223, 7.94634e-05, 6.17064e-05, 5.24766e-05, 5.27153e-05, 6.22537e-05, 7.97623e-05, 0.000115769, 0.000149534, 0.000163824, 0.000149842, 0.000115968, 7.95996e-05, 5.29026e-05, 3.84306e-05, 3.41378e-05, 3.87003e-05, 5.32672e-05, 0.000101581, 0.000149594, 0.000184453, 0.000184526, 0.000149665, 0.000101448, 6.18208e-05, 3.84618e-05, 2.86739e-05, 2.87762e-05, 3.87297e-05, 6.21174e-05, 0.000135957, 0.00018172, 0.000200931, 0.000181658, 0.000135758, 8.72314e-05, 5.24467e-05, 3.4166e-05, 2.88205e-05, 3.43373e-05, 5.27495e-05, 8.75491e-05, 0.000164855, 0.000191857, 0.000191831, 0.000164733, 0.00012255, 8.14472e-05, 5.25465e-05, 3.87505e-05, 3.88353e-05, 5.27865e-05, 8.17652e-05, 0.000122809, 0.000176973, 0.000184073, 0.000177107, 0.000155669, 0.000122434, 8.72103e-05, 6.20709e-05, 5.33865e-05, 6.22328e-05, 8.744e-05, 0.000122554, 0.000155593}; - const std::vector> wfcr = {{0.923838, -0.766723}, {0.946855, -0.787058}, {0.98573, -0.819083}, {0.979491, -0.813515}, {0.904645, -0.751906}, {0.806637, -0.67153}, {0.762282, -0.634627}, {0.808631, -0.671024}, {0.907874, -0.750202}, {0.982864, -0.810243}, {0.988344, -0.815121}, {0.948235, -0.784292}, {0.94822, -0.78808}, {1.01275, -0.841533}, {1.07019, -0.888017}, {1.05126, -0.871455}, {0.961291, -0.797015}, {0.878556, -0.728999}, {0.879115, -0.729255}, {0.962554, -0.796984}, {1.05236, -0.869683}, {1.07057, -0.884247}, {1.01257, -0.837506}, {0.948058, -0.786339}, {0.98916, -0.822}, {1.07203, -0.889774}, {1.10691, -0.917254}, {1.05194, -0.870988}, {0.954991, -0.791043}, {0.907701, -0.752602}, {0.955251, -0.792053}, {1.05176, -0.871156}, {1.1057, -0.914953}, {1.07012, -0.88562}, {0.987688, -0.818688}, {0.946156, -0.785852}, {0.984195, -0.817182}, {1.05394, -0.87392}, {1.0527, -0.871877}, {0.981212, -0.812574}, {0.91036, -0.754655}, {0.910444, -0.755421}, {0.981031, -0.81369}, {1.05148, -0.871097}, {1.0516, -0.870478}, {0.981773, -0.812974}, {0.912427, -0.756748}, {0.913464, -0.758645}, {0.908971, -0.754295}, {0.963366, -0.79862}, {0.955384, -0.791681}, {0.910268, -0.754744}, {0.885649, -0.734965}, {0.910563, -0.755494}, {0.95523, -0.791423}, {0.962139, -0.795868}, {0.907001, -0.749786}, {0.827693, -0.684931}, {0.791008, -0.655941}, {0.829214, -0.688433}, {0.809457, -0.671775}, {0.879509, -0.729387}, {0.907706, -0.752877}, {0.910496, -0.755542}, {0.910841, -0.755549}, {0.908354, -0.752191}, {0.879698, -0.726779}, {0.808843, -0.66724}, {0.715068, -0.590231}, {0.647503, -0.535978}, {0.647961, -0.537899}, {0.716026, -0.594774}, {0.76381, -0.633945}, {0.879729, -0.729707}, {0.955803, -0.792947}, {0.981976, -0.814417}, {0.956255, -0.791826}, {0.880226, -0.72699}, {0.763934, -0.629583}, {0.633519, -0.522245}, {0.529231, -0.437973}, {0.489342, -0.407046}, {0.529559, -0.441213}, {0.633801, -0.527082}, {0.809945, -0.671563}, {0.963921, -0.799094}, {1.05348, -0.873514}, {1.05332, -0.872713}, {0.963433, -0.796712}, {0.809205, -0.667849}, {0.633275, -0.522824}, {0.482815, -0.40069}, {0.396853, -0.332197}, {0.397105, -0.333528}, {0.483473, -0.404133}, {0.6341, -0.527068}, {0.909689, -0.752964}, {1.05459, -0.873619}, {1.10791, -0.918271}, {1.05302, -0.872265}, {0.90709, -0.750598}, {0.713992, -0.591227}, {0.527792, -0.439548}, {0.395934, -0.333532}, {0.348973, -0.296229}, {0.397168, -0.334944}, {0.530053, -0.441972}, {0.716797, -0.593993}, {0.984821, -0.814125}, {1.07256, -0.888377}, {1.07113, -0.888141}, {0.981027, -0.813528}, {0.825252, -0.684905}, {0.644281, -0.537062}, {0.486491, -0.409564}, {0.395306, -0.3362}, {0.396422, -0.336303}, {0.489654, -0.409894}, {0.648876, -0.537628}, {0.830161, -0.685605}, {0.989527, -0.818036}, {1.01305, -0.839816}, {0.986307, -0.818834}, {0.908865, -0.75512}, {0.786138, -0.6546}, {0.643264, -0.53866}, {0.526119, -0.443962}, {0.48143, -0.407024}, {0.528918, -0.443292}, {0.648267, -0.537452}, {0.792158, -0.653131}, {0.914289, -0.753784}, {0.948358, -0.785236}, {0.946951, -0.786282}, {0.942838, -0.783644}, {0.908251, -0.755238}, {0.823591, -0.686102}, {0.711542, -0.594982}, {0.630997, -0.529084}, {0.632407, -0.528638}, {0.715391, -0.593543}, {0.828847, -0.683616}, {0.913505, -0.752152}, {0.946682, -0.781005}, {0.94745, -0.788449}, {1.01127, -0.841328}, {1.06872, -0.887642}, {1.05023, -0.871317}, {0.960676, -0.797313}, {0.878136, -0.729712}, {0.878906, -0.730291}, {0.962866, -0.798309}, {1.05343, -0.871288}, {1.07215, -0.886011}, {1.01393, -0.839118}, {0.948459, -0.787425}, {1.01296, -0.84335}, {1.11059, -0.922846}, {1.15768, -0.959828}, {1.10507, -0.915186}, {1.00344, -0.831421}, {0.953133, -0.790722}, {1.00417, -0.833196}, {1.1063, -0.916922}, {1.15903, -0.959721}, {1.11171, -0.920924}, {1.01359, -0.841467}, {0.964068, -0.802384}, {1.07183, -0.891375}, {1.15889, -0.961665}, {1.1579, -0.959214}, {1.06925, -0.885504}, {0.981048, -0.813396}, {0.981203, -0.814497}, {1.06956, -0.887687}, {1.15808, -0.960244}, {1.15877, -0.960442}, {1.07156, -0.889051}, {0.98419, -0.818312}, {0.984327, -0.819464}, {1.05334, -0.875089}, {1.10617, -0.917553}, {1.0693, -0.886373}, {0.985548, -0.817425}, {0.943012, -0.782968}, {0.985796, -0.818496}, {1.06949, -0.886967}, {1.10601, -0.916251}, {1.05287, -0.87224}, {0.957248, -0.794249}, {0.910429, -0.756958}, {0.95766, -0.796621}, {0.962589, -0.799601}, {1.00376, -0.833124}, {0.980864, -0.814135}, {0.943034, -0.782996}, {0.943291, -0.782814}, {0.981453, -0.813129}, {1.0043, -0.830548}, {0.962815, -0.795639}, {0.868239, -0.718263}, {0.788907, -0.654425}, {0.788959, -0.655994}, {0.86827, -0.722043}, {0.878994, -0.730451}, {0.953312, -0.791624}, {0.981566, -0.814761}, {0.986549, -0.818046}, {0.982076, -0.812596}, {0.954135, -0.787486}, {0.879849, -0.725061}, {0.763971, -0.630051}, {0.652763, -0.540298}, {0.606386, -0.504236}, {0.652418, -0.543523}, {0.763313, -0.635249}, {0.879898, -0.730735}, {1.00539, -0.834003}, {1.07124, -0.887672}, {1.07125, -0.885982}, {1.00547, -0.829434}, {0.88019, -0.724606}, {0.71756, -0.590964}, {0.56108, -0.464272}, {0.464024, -0.387048}, {0.463816, -0.388608}, {0.560587, -0.468581}, {0.717063, -0.596964}, {0.964907, -0.79963}, {1.10884, -0.918112}, {1.16062, -0.96002}, {1.1078, -0.914798}, {0.963434, -0.794295}, {0.763661, -0.629857}, {0.560587, -0.464871}, {0.409729, -0.343813}, {0.354046, -0.300064}, {0.409886, -0.34602}, {0.56118, -0.469098}, {0.764854, -0.635378}, {1.05615, -0.873364}, {1.16173, -0.96079}, {1.16047, -0.959515}, {1.05297, -0.870166}, {0.866966, -0.716971}, {0.650974, -0.540976}, {0.462596, -0.389054}, {0.353371, -0.301627}, {0.353899, -0.302122}, {0.464307, -0.390691}, {0.653947, -0.543888}, {0.870675, -0.720663}, {1.07423, -0.887613}, {1.11286, -0.920772}, {1.07081, -0.886703}, {0.954616, -0.791248}, {0.78542, -0.653081}, {0.603294, -0.505609}, {0.461777, -0.391636}, {0.408783, -0.348602}, {0.463771, -0.391654}, {0.607272, -0.505968}, {0.790837, -0.65401}, {0.960022, -0.792498}, {1.01446, -0.839305}, {1.01249, -0.839574}, {0.980906, -0.814407}, {0.905738, -0.752993}, {0.784415, -0.654267}, {0.64917, -0.544714}, {0.558676, -0.471242}, {0.559894, -0.471059}, {0.652772, -0.544183}, {0.789988, -0.653468}, {0.912063, -0.752107}, {0.986013, -0.813704}, {0.947943, -0.786696}, {0.961747, -0.799577}, {0.98013, -0.8149}, {0.952881, -0.792257}, {0.864487, -0.719835}, {0.761105, -0.635464}, {0.715723, -0.598056}, {0.763315, -0.635511}, {0.868656, -0.719369}, {0.958234, -0.790858}, {0.985232, -0.812944}, {0.964889, -0.798132}, {0.986425, -0.821971}, {1.06872, -0.888871}, {1.10427, -0.916299}, {1.05044, -0.870663}, {0.954341, -0.791611}, {0.907489, -0.753982}, {0.955531, -0.794102}, {1.05292, -0.873803}, {1.10779, -0.918069}, {1.07234, -0.888798}, {0.988752, -0.82125}, {0.945192, -0.787184}, {1.07002, -0.890928}, {1.15705, -0.960808}, {1.15661, -0.958449}, {1.06867, -0.885218}, {0.981026, -0.813747}, {0.981551, -0.815461}, {1.07025, -0.889167}, {1.15912, -0.962105}, {1.15997, -0.962456}, {1.07242, -0.890862}, {0.984159, -0.819515}, {0.983228, -0.819809}, {1.10577, -0.919416}, {1.15678, -0.959814}, {1.10845, -0.918813}, {1.01016, -0.837834}, {0.961188, -0.798356}, {1.01121, -0.840304}, {1.11041, -0.922003}, {1.15925, -0.961781}, {1.10809, -0.919633}, {1.00705, -0.837288}, {0.956173, -0.796589}, {1.00563, -0.83779}, {1.0509, -0.873278}, {1.06811, -0.886659}, {1.0097, -0.838137}, {0.945196, -0.785}, {0.945791, -0.785383}, {1.01137, -0.838798}, {1.07049, -0.886646}, {1.0534, -0.872207}, {0.964632, -0.799738}, {0.881846, -0.732942}, {0.881132, -0.733616}, {0.962726, -0.801192}, {0.953683, -0.792715}, {0.980082, -0.81419}, {0.9608, -0.79781}, {0.945936, -0.784524}, {0.96199, -0.796106}, {0.982244, -0.811106}, {0.956339, -0.788946}, {0.868699, -0.71735}, {0.765224, -0.633848}, {0.718571, -0.597372}, {0.763758, -0.635912}, {0.866261, -0.720794}, {0.906923, -0.753674}, {0.981456, -0.814563}, {1.01186, -0.838282}, {1.01232, -0.836397}, {0.982821, -0.809633}, {0.909065, -0.747462}, {0.78849, -0.648623}, {0.653285, -0.539379}, {0.561887, -0.466699}, {0.56111, -0.467981}, {0.651279, -0.5431}, {0.786027, -0.654231}, {0.95633, -0.79314}, {1.07174, -0.887175}, {1.11221, -0.918584}, {1.07195, -0.882926}, {0.957053, -0.786518}, {0.788597, -0.647974}, {0.606688, -0.500441}, {0.464746, -0.38669}, {0.410517, -0.344417}, {0.463565, -0.388925}, {0.604997, -0.50505}, {0.787204, -0.654458}, {1.0551, -0.872699}, {1.16145, -0.959325}, {1.16097, -0.957426}, {1.05411, -0.867994}, {0.868563, -0.715153}, {0.652854, -0.53954}, {0.46448, -0.387728}, {0.354829, -0.300232}, {0.354491, -0.300814}, {0.463852, -0.389801}, {0.652697, -0.543512}, {0.869216, -0.720441}, {1.11001, -0.916672}, {1.16138, -0.95894}, {1.10807, -0.914757}, {0.963429, -0.795738}, {0.763685, -0.63268}, {0.560783, -0.468402}, {0.410032, -0.347146}, {0.354347, -0.302496}, {0.410228, -0.347288}, {0.56176, -0.469298}, {0.765819, -0.634738}, {0.966148, -0.798407}, {1.07317, -0.886755}, {1.07168, -0.886629}, {1.00467, -0.832134}, {0.87882, -0.729311}, {0.716169, -0.597018}, {0.5599, -0.470567}, {0.463163, -0.392404}, {0.463558, -0.392165}, {0.561382, -0.470049}, {0.719143, -0.596626}, {0.88289, -0.729284}, {1.00835, -0.832346}, {0.988023, -0.818802}, {0.981672, -0.815011}, {0.952472, -0.791468}, {0.877698, -0.730165}, {0.761775, -0.635523}, {0.650706, -0.545214}, {0.604801, -0.507751}, {0.65197, -0.545178}, {0.764633, -0.635155}, {0.882053, -0.729255}, {0.957158, -0.790261}, {0.984764, -0.814137}, {0.944125, -0.785381}, {0.980634, -0.816017}, {1.00247, -0.833302}, {0.960503, -0.797932}, {0.865661, -0.719861}, {0.786294, -0.655153}, {0.786934, -0.655821}, {0.867738, -0.72121}, {0.964114, -0.798652}, {1.00697, -0.832725}, {0.9845, -0.814772}, {0.945659, -0.784752}, {0.980221, -0.81691}, {1.04993, -0.87276}, {1.04997, -0.870966}, {0.980039, -0.812739}, {0.910231, -0.756159}, {0.910895, -0.758138}, {0.98214, -0.817416}, {1.05352, -0.87563}, {1.05427, -0.875431}, {0.98389, -0.817603}, {0.912607, -0.760119}, {0.911159, -0.760133}, {1.0503, -0.874212}, {1.10335, -0.916192}, {1.06756, -0.885521}, {0.985103, -0.817782}, {0.943571, -0.784771}, {0.987028, -0.821611}, {1.07126, -0.891067}, {1.10818, -0.920883}, {1.05497, -0.876791}, {0.958468, -0.797971}, {0.910028, -0.759186}, {0.955566, -0.797109}, {1.04949, -0.872734}, {1.06685, -0.886073}, {1.00903, -0.838073}, {0.945229, -0.785811}, {0.946373, -0.787131}, {1.01229, -0.841303}, {1.07162, -0.889596}, {1.05463, -0.87523}, {0.965725, -0.80243}, {0.882442, -0.73492}, {0.880931, -0.734627}, {0.961731, -0.801255}, {0.978262, -0.813467}, {0.983539, -0.81753}, {0.944576, -0.785031}, {0.921942, -0.765645}, {0.946845, -0.78501}, {0.987626, -0.817416}, {0.983227, -0.813215}, {0.909684, -0.753089}, {0.81169, -0.673669}, {0.765633, -0.6372}, {0.808995, -0.673899}, {0.905172, -0.753411}, {0.908055, -0.754985}, {0.942135, -0.782595}, {0.945962, -0.784348}, {0.946988, -0.78301}, {0.945025, -0.779221}, {0.912205, -0.751041}, {0.828845, -0.682783}, {0.717196, -0.592456}, {0.635566, -0.527221}, {0.634306, -0.527791}, {0.713839, -0.594297}, {0.824462, -0.685961}, {0.909756, -0.755058}, {0.986802, -0.817238}, {1.01281, -0.836246}, {0.988323, -0.813324}, {0.912603, -0.749164}, {0.791099, -0.649177}, {0.648844, -0.533821}, {0.531268, -0.439455}, {0.48471, -0.403144}, {0.529169, -0.440952}, {0.64542, -0.537299}, {0.787479, -0.654588}, {0.982643, -0.813071}, {1.07224, -0.885117}, {1.07268, -0.883124}, {0.984031, -0.80821}, {0.82931, -0.68049}, {0.649052, -0.53364}, {0.491369, -0.406426}, {0.399245, -0.332894}, {0.398288, -0.333291}, {0.488944, -0.408094}, {0.646189, -0.537301}, {0.826964, -0.685747}, {1.05484, -0.870787}, {1.10917, -0.91438}, {1.05505, -0.868682}, {0.909755, -0.748716}, {0.717298, -0.591352}, {0.531557, -0.440698}, {0.399524, -0.334343}, {0.351496, -0.296061}, {0.398033, -0.334165}, {0.529442, -0.441226}, {0.715595, -0.593279}, {0.908919, -0.751527}, {1.05492, -0.870641}, {1.05462, -0.870578}, {0.964648, -0.796749}, {0.810731, -0.670714}, {0.635379, -0.527789}, {0.485236, -0.406039}, {0.398919, -0.336311}, {0.398286, -0.335741}, {0.483848, -0.404716}, {0.634293, -0.526529}, {0.810581, -0.670086}, {0.965132, -0.79669}, {0.983137, -0.813267}, {0.956533, -0.792503}, {0.880212, -0.730296}, {0.764332, -0.635357}, {0.63446, -0.529235}, {0.530208, -0.44443}, {0.489803, -0.411621}, {0.529553, -0.44349}, {0.633996, -0.527433}, {0.764852, -0.633001}, {0.881578, -0.728017}, {0.957708, -0.791087}, {0.911065, -0.756731}, {0.90759, -0.754743}, {0.878865, -0.730867}, {0.808498, -0.672442}, {0.714952, -0.595549}, {0.646991, -0.540274}, {0.646913, -0.540487}, {0.71513, -0.595704}, {0.809667, -0.671647}, {0.881103, -0.72891}, {0.909906, -0.752574}, {0.912056, -0.755782}, {0.910096, -0.758283}, {0.953976, -0.794023}, {0.960955, -0.79835}, {0.906061, -0.752102}, {0.826491, -0.686809}, {0.789153, -0.657105}, {0.827152, -0.688951}, {0.907821, -0.754678}, {0.963956, -0.799534}, {0.957414, -0.793498}, {0.912436, -0.757324}, {0.886661, -0.737845}, {0.905238, -0.754449}, {0.9598, -0.798109}, {0.953186, -0.791836}, {0.909625, -0.756445}, {0.886077, -0.738438}, {0.911681, -0.76052}, {0.95712, -0.797639}, {0.964931, -0.802832}, {0.910193, -0.756825}, {0.830028, -0.691083}, {0.791203, -0.660238}, {0.826951, -0.690435}, {0.959333, -0.798864}, {1.00072, -0.832245}, {0.979131, -0.814474}, {0.942911, -0.785392}, {0.94444, -0.787394}, {0.983443, -0.819466}, {1.00692, -0.837934}, {0.96591, -0.803247}, {0.871256, -0.725178}, {0.79087, -0.659725}, {0.788973, -0.659029}, {0.866207, -0.722797}, {0.951502, -0.792164}, {0.978073, -0.81405}, {0.959796, -0.799062}, {0.946133, -0.787637}, {0.96307, -0.800928}, {0.983832, -0.817045}, {0.958292, -0.79525}, {0.870968, -0.723293}, {0.767495, -0.63878}, {0.720162, -0.600778}, {0.763979, -0.637559}, {0.864966, -0.720942}, {0.907024, -0.754961}, {0.941191, -0.78304}, {0.945516, -0.78567}, {0.94707, -0.785292}, {0.945425, -0.782215}, {0.912778, -0.75433}, {0.829649, -0.685964}, {0.718325, -0.595226}, {0.636876, -0.529364}, {0.635356, -0.529169}, {0.714159, -0.594905}, {0.82393, -0.686022}, {0.883695, -0.734345}, {0.943046, -0.78228}, {0.962661, -0.796255}, {0.945518, -0.779638}, {0.887909, -0.730577}, {0.78874, -0.648806}, {0.666353, -0.549122}, {0.56184, -0.464538}, {0.519736, -0.431134}, {0.559555, -0.464971}, {0.662326, -0.55068}, {0.783983, -0.651881}, {0.910466, -0.754149}, {0.982984, -0.812088}, {0.984028, -0.810411}, {0.913295, -0.750132}, {0.789162, -0.647354}, {0.644338, -0.528999}, {0.51765, -0.426214}, {0.44362, -0.36659}, {0.442638, -0.366719}, {0.514944, -0.427134}, {0.640588, -0.531597}, {0.785351, -0.6515}, {0.956983, -0.790247}, {1.00705, -0.829835}, {0.958737, -0.788403}, {0.830394, -0.681967}, {0.667196, -0.54804}, {0.51823, -0.426587}, {0.417992, -0.345236}, {0.382564, -0.316793}, {0.416077, -0.344799}, {0.514972, -0.426665}, {0.663527, -0.549448}, {0.827298, -0.684327}, {0.964904, -0.795846}, {0.965713, -0.796009}, {0.87102, -0.71776}, {0.718984, -0.592754}, {0.563014, -0.464941}, {0.444786, -0.368332}, {0.383195, -0.318124}, {0.382071, -0.317315}, {0.441832, -0.36636}, {0.55925, -0.462805}, {0.715535, -0.591263}, {0.868694, -0.717077}, {0.909407, -0.751137}, {0.870192, -0.719561}, {0.766991, -0.634981}, {0.637511, -0.528456}, {0.521302, -0.43282}, {0.444302, -0.369591}, {0.417088, -0.347316}, {0.442097, -0.367674}, {0.517827, -0.429316}, {0.634041, -0.524121}, {0.764456, -0.630976}, {0.868927, -0.717143}, {0.828542, -0.686918}, {0.789339, -0.655616}, {0.719679, -0.598187}, {0.636449, -0.529015}, {0.561686, -0.467087}, {0.516942, -0.430452}, {0.515986, -0.429929}, {0.559338, -0.465207}, {0.633862, -0.525378}, {0.71781, -0.593316}, {0.788424, -0.651268}, {0.828303, -0.685161}, {0.789856, -0.65752}, {0.78791, -0.656065}, {0.764306, -0.635606}, {0.716127, -0.594935}, {0.665021, -0.552867}, {0.642661, -0.535285}, {0.664276, -0.553589}, {0.715337, -0.594958}, {0.764184, -0.633695}, {0.78849, -0.652733}, {0.79048, -0.654887}, {0.787598, -0.65419}, {0.826413, -0.689125}, {0.866134, -0.720901}, {0.866302, -0.719642}, {0.8266, -0.686494}, {0.786849, -0.654679}, {0.787158, -0.656254}, {0.827716, -0.689988}, {0.868472, -0.722502}, {0.869048, -0.721551}, {0.829017, -0.688201}, {0.788291, -0.655726}, {0.787238, -0.65636}, {0.806404, -0.672542}, {0.876255, -0.729902}, {0.905549, -0.754561}, {0.909824, -0.759239}, {0.911367, -0.761343}, {0.909808, -0.759645}, {0.88218, -0.735273}, {0.812453, -0.676096}, {0.719265, -0.598635}, {0.650973, -0.542973}, {0.649308, -0.542686}, {0.714759, -0.597186}, {0.875528, -0.730116}, {0.949787, -0.791812}, {0.979432, -0.816962}, {0.986245, -0.822968}, {0.983221, -0.820008}, {0.956268, -0.79652}, {0.882883, -0.734657}, {0.767908, -0.639209}, {0.657054, -0.548106}, {0.609749, -0.509898}, {0.653488, -0.546611}, {0.76165, -0.636052}, {0.903865, -0.7535}, {0.978521, -0.815542}, {1.01024, -0.841477}, {1.01218, -0.842025}, {0.98366, -0.817067}, {0.91053, -0.755649}, {0.79074, -0.656558}, {0.656537, -0.546318}, {0.565677, -0.472109}, {0.564151, -0.471486}, {0.652219, -0.544642}, {0.784485, -0.654334}, {0.907658, -0.755482}, {0.984871, -0.818772}, {1.0117, -0.839332}, {0.987933, -0.817759}, {0.912552, -0.754259}, {0.791414, -0.654168}, {0.650012, -0.538202}, {0.53361, -0.443007}, {0.487743, -0.40577}, {0.531668, -0.442571}, {0.646238, -0.537976}, {0.78641, -0.654744}, {0.90948, -0.754644}, {0.982071, -0.8131}, {0.983366, -0.811973}, {0.912734, -0.751998}, {0.788623, -0.64916}, {0.644089, -0.530499}, {0.518145, -0.427377}, {0.445015, -0.367508}, {0.444493, -0.367452}, {0.516436, -0.427656}, {0.641086, -0.531906}, {0.784847, -0.65175}, {0.908595, -0.751269}, {0.95569, -0.788474}, {0.910793, -0.749804}, {0.792214, -0.651229}, {0.644767, -0.529732}, {0.51472, -0.422831}, {0.430638, -0.353559}, {0.402011, -0.329908}, {0.42975, -0.353253}, {0.512703, -0.422949}, {0.641682, -0.530892}, {0.788888, -0.653118}, {0.881405, -0.727209}, {0.882682, -0.727454}, {0.791455, -0.651686}, {0.651413, -0.535995}, {0.519546, -0.427026}, {0.431407, -0.35383}, {0.391367, -0.320231}, {0.390692, -0.319587}, {0.42934, -0.352217}, {0.516202, -0.425168}, {0.647468, -0.534536}, {0.788186, -0.65087}, {0.811606, -0.669797}, {0.767676, -0.633926}, {0.657384, -0.543041}, {0.535325, -0.44185}, {0.446838, -0.367812}, {0.403221, -0.330752}, {0.391118, -0.320245}, {0.401141, -0.328762}, {0.443013, -0.364033}, {0.53056, -0.436964}, {0.652954, -0.538357}, {0.764991, -0.63103}, {0.718148, -0.594358}, {0.656668, -0.544524}, {0.566534, -0.469907}, {0.489575, -0.405152}, {0.446437, -0.368157}, {0.431026, -0.354731}, {0.429791, -0.353818}, {0.443038, -0.365117}, {0.484873, -0.399705}, {0.561764, -0.462965}, {0.653134, -0.538495}, {0.716845, -0.591951}, {0.64991, -0.540158}, {0.609517, -0.50724}, {0.565223, -0.469676}, {0.533674, -0.442191}, {0.518439, -0.42892}, {0.513936, -0.425524}, {0.516595, -0.428252}, {0.530587, -0.439604}, {0.561827, -0.464433}, {0.606721, -0.500747}, {0.648357, -0.535584}, {0.665914, -0.551716}, {0.648763, -0.540834}, {0.653791, -0.54453}, {0.653749, -0.543279}, {0.64854, -0.538368}, {0.643221, -0.534567}, {0.642987, -0.535396}, {0.648008, -0.539679}, {0.653278, -0.542816}, {0.653614, -0.541465}, {0.64882, -0.536941}, {0.643841, -0.533909}, {0.643781, -0.535683}, {0.714768, -0.596264}, {0.762447, -0.634828}, {0.786361, -0.654068}, {0.788876, -0.656779}, {0.787038, -0.65659}, {0.790289, -0.659955}, {0.788985, -0.658021}, {0.765755, -0.636952}, {0.717875, -0.595989}, {0.666946, -0.55406}, {0.644335, -0.536754}, {0.66505, -0.555112}, {0.760297, -0.634533}, {0.875585, -0.730602}, {0.952714, -0.795538}, {0.980608, -0.819361}, {0.956432, -0.798904}, {0.881715, -0.735447}, {0.766887, -0.638576}, {0.638077, -0.53103}, {0.53475, -0.445826}, {0.494181, -0.413213}, {0.531879, -0.445148}, {0.632794, -0.528872}, {0.875148, -0.730244}, {1.00053, -0.83478}, {1.06809, -0.891045}, {1.0702, -0.892241}, {1.00602, -0.837831}, {0.881951, -0.733898}, {0.720757, -0.600013}, {0.565916, -0.472267}, {0.469701, -0.393385}, {0.468381, -0.392782}, {0.562022, -0.470403}, {0.714748, -0.596952}, {0.951724, -0.792978}, {1.0675, -0.888794}, {1.10965, -0.922755}, {1.07095, -0.889341}, {0.956984, -0.794048}, {0.789425, -0.655373}, {0.609099, -0.506893}, {0.469117, -0.39185}, {0.415847, -0.348108}, {0.467634, -0.390967}, {0.60581, -0.505486}, {0.784489, -0.653943}, {0.979257, -0.813659}, {1.06928, -0.887139}, {1.0707, -0.886703}, {0.982609, -0.812689}, {0.828142, -0.684875}, {0.648738, -0.537238}, {0.492989, -0.409118}, {0.4031, -0.334897}, {0.403108, -0.334733}, {0.492489, -0.40889}, {0.646821, -0.537445}, {0.824799, -0.685681}, {0.955085, -0.790936}, {1.0053, -0.831093}, {0.957134, -0.790022}, {0.828631, -0.683406}, {0.665481, -0.548845}, {0.517532, -0.426748}, {0.419298, -0.345152}, {0.385841, -0.316888}, {0.419992, -0.345177}, {0.51773, -0.427116}, {0.664126, -0.549783}, {0.826087, -0.684663}, {0.880649, -0.727353}, {0.881829, -0.727543}, {0.790393, -0.651558}, {0.650103, -0.535468}, {0.518382, -0.426105}, {0.431098, -0.352788}, {0.392378, -0.31945}, {0.392765, -0.319301}, {0.43158, -0.352366}, {0.517685, -0.425494}, {0.64784, -0.534816}, {0.787744, -0.651055}, {0.766142, -0.632214}, {0.721214, -0.595266}, {0.610714, -0.50388}, {0.494908, -0.407265}, {0.42059, -0.343957}, {0.392784, -0.318851}, {0.388285, -0.313951}, {0.392534, -0.317921}, {0.419281, -0.341768}, {0.492016, -0.403823}, {0.607014, -0.500108}, {0.718572, -0.592754}, {0.637533, -0.52682}, {0.566598, -0.46887}, {0.470787, -0.389088}, {0.404848, -0.332564}, {0.386843, -0.31513}, {0.392913, -0.318428}, {0.392354, -0.317861}, {0.384923, -0.312895}, {0.401295, -0.32789}, {0.466159, -0.382542}, {0.562529, -0.462891}, {0.635899, -0.52438}, {0.534317, -0.442955}, {0.470272, -0.390355}, {0.417093, -0.344997}, {0.404226, -0.332048}, {0.420386, -0.34365}, {0.431316, -0.352355}, {0.418891, -0.342817}, {0.401172, -0.328919}, {0.412834, -0.338725}, {0.465949, -0.38262}, {0.531553, -0.437516}, {0.562139, -0.464363}, {0.493902, -0.410825}, {0.468791, -0.389727}, {0.468461, -0.387955}, {0.493162, -0.407106}, {0.517902, -0.427462}, {0.517514, -0.427864}, {0.491941, -0.407082}, {0.466408, -0.385269}, {0.466295, -0.383977}, {0.491793, -0.404673}, {0.517697, -0.427228}, {0.518528, -0.42989}, {0.531833, -0.443231}, {0.562443, -0.46799}, {0.606581, -0.503922}, {0.647608, -0.538209}, {0.664726, -0.553393}, {0.648354, -0.540326}, {0.607691, -0.505701}, {0.563439, -0.467066}, {0.532447, -0.439851}, {0.518209, -0.428159}, {0.514734, -0.426852}, {0.51796, -0.431201}, {0.63291, -0.527945}, {0.715274, -0.596042}, {0.785456, -0.654767}, {0.826009, -0.689563}, {0.827315, -0.691394}, {0.788912, -0.658884}, {0.719733, -0.599566}, {0.637066, -0.529045}, {0.563173, -0.467104}, {0.519211, -0.431518}, {0.518209, -0.432111}, {0.560322, -0.467836}, {0.804534, -0.670781}, {0.958095, -0.799225}, {1.04923, -0.875744}, {1.05128, -0.877321}, {0.963287, -0.803076}, {0.810688, -0.674956}, {0.636615, -0.529804}, {0.488106, -0.406971}, {0.403106, -0.337317}, {0.402144, -0.336993}, {0.485081, -0.405626}, {0.631544, -0.526882}, {0.958055, -0.79803}, {1.10253, -0.918344}, {1.15661, -0.962985}, {1.10614, -0.920206}, {0.963386, -0.80095}, {0.765061, -0.636437}, {0.563985, -0.470531}, {0.415273, -0.348206}, {0.360323, -0.302975}, {0.414176, -0.347174}, {0.561166, -0.46844}, {0.760355, -0.633544}, {1.04883, -0.871692}, {1.15629, -0.960328}, {1.15785, -0.960684}, {1.05242, -0.872646}, {0.867692, -0.719841}, {0.653288, -0.543268}, {0.46733, -0.390256}, {0.360202, -0.301767}, {0.360488, -0.301459}, {0.467436, -0.389396}, {0.651834, -0.542053}, {0.864392, -0.718579}, {1.05051, -0.870624}, {1.10574, -0.91547}, {1.05255, -0.870705}, {0.907469, -0.750694}, {0.71528, -0.592387}, {0.53113, -0.44062}, {0.402088, -0.333687}, {0.356814, -0.295451}, {0.403793, -0.333737}, {0.53271, -0.440658}, {0.71494, -0.592355}, {0.905299, -0.750602}, {0.962527, -0.795674}, {0.963539, -0.795964}, {0.868658, -0.717384}, {0.716134, -0.591464}, {0.560512, -0.462618}, {0.444341, -0.365573}, {0.385921, -0.315861}, {0.38723, -0.316126}, {0.447067, -0.366095}, {0.562225, -0.46285}, {0.715526, -0.591141}, {0.866741, -0.716797}, {0.810385, -0.668894}, {0.766125, -0.632463}, {0.655207, -0.540786}, {0.532737, -0.438747}, {0.444947, -0.364279}, {0.403385, -0.327604}, {0.393803, -0.318229}, {0.405352, -0.328076}, {0.446911, -0.364238}, {0.532732, -0.437343}, {0.653233, -0.538395}, {0.764153, -0.630586}, {0.636884, -0.525658}, {0.56563, -0.467236}, {0.469448, -0.386983}, {0.403469, -0.330156}, {0.386116, -0.312813}, {0.393426, -0.316676}, {0.394081, -0.316956}, {0.387161, -0.312747}, {0.403133, -0.328102}, {0.467077, -0.382661}, {0.562605, -0.462635}, {0.635504, -0.52367}, {0.488623, -0.403785}, {0.416667, -0.34438}, {0.361438, -0.296958}, {0.356958, -0.290352}, {0.386262, -0.312133}, {0.404112, -0.326145}, {0.386502, -0.312506}, {0.356316, -0.289069}, {0.359029, -0.292435}, {0.413136, -0.337829}, {0.485974, -0.398889}, {0.52064, -0.428934}, {0.403403, -0.333963}, {0.360809, -0.298182}, {0.360349, -0.295854}, {0.402612, -0.328872}, {0.445255, -0.363448}, {0.445368, -0.364105}, {0.402454, -0.329396}, {0.359032, -0.293563}, {0.35822, -0.292418}, {0.400701, -0.327542}, {0.444074, -0.36458}, {0.445233, -0.367395}, {0.402024, -0.333495}, {0.413791, -0.342564}, {0.466418, -0.385211}, {0.531149, -0.438704}, {0.560721, -0.463913}, {0.531951, -0.440594}, {0.467203, -0.386301}, {0.413744, -0.340377}, {0.401158, -0.328617}, {0.418162, -0.342867}, {0.430282, -0.354513}, {0.41901, -0.346944}, {0.484747, -0.403068}, {0.560524, -0.465807}, {0.650941, -0.541315}, {0.714135, -0.594855}, {0.715245, -0.596488}, {0.653683, -0.544691}, {0.56358, -0.46785}, {0.487006, -0.402145}, {0.444771, -0.366269}, {0.430556, -0.355326}, {0.430243, -0.356659}, {0.443622, -0.368824}, {0.631331, -0.52601}, {0.760034, -0.633521}, {0.864246, -0.721319}, {0.905595, -0.756611}, {0.867511, -0.724622}, {0.765155, -0.63787}, {0.636401, -0.528868}, {0.521239, -0.43214}, {0.44557, -0.369658}, {0.419224, -0.348898}, {0.443767, -0.370103}, {0.51758, -0.431538}, {0.902427, -0.750354}, {1.0479, -0.872403}, {1.10353, -0.919235}, {1.05113, -0.875217}, {0.907076, -0.754595}, {0.715588, -0.595226}, {0.531267, -0.4429}, {0.401164, -0.335981}, {0.354514, -0.29758}, {0.400529, -0.3349}, {0.529216, -0.440394}, {0.71172, -0.591331}, {1.04799, -0.870268}, {1.1553, -0.959763}, {1.15687, -0.960888}, {1.05157, -0.873127}, {0.86693, -0.720155}, {0.652405, -0.543334}, {0.466182, -0.390255}, {0.358867, -0.301778}, {0.359218, -0.301236}, {0.466438, -0.388562}, {0.651091, -0.540517}, {0.863696, -0.716755}, {1.10337, -0.914346}, {1.15682, -0.958479}, {1.10563, -0.915858}, {0.962001, -0.79721}, {0.762856, -0.63331}, {0.561523, -0.46775}, {0.41347, -0.345587}, {0.359925, -0.300571}, {0.41527, -0.345087}, {0.563153, -0.466513}, {0.762393, -0.631297}, {0.959567, -0.794961}, {1.05093, -0.869067}, {1.05202, -0.869833}, {0.962698, -0.796173}, {0.808564, -0.669304}, {0.633566, -0.525067}, {0.48557, -0.40248}, {0.402562, -0.33291}, {0.404139, -0.33315}, {0.488902, -0.402767}, {0.635809, -0.524588}, {0.808142, -0.667819}, {0.960699, -0.794449}, {0.907461, -0.749272}, {0.868488, -0.717353}, {0.764704, -0.631891}, {0.63457, -0.52412}, {0.519165, -0.427631}, {0.444969, -0.364663}, {0.421215, -0.343753}, {0.448056, -0.365789}, {0.522742, -0.428483}, {0.63591, -0.52337}, {0.763448, -0.629741}, {0.866741, -0.715477}, {0.716895, -0.591475}, {0.654918, -0.540733}, {0.563903, -0.465064}, {0.486687, -0.399506}, {0.444944, -0.362568}, {0.432395, -0.350326}, {0.433953, -0.351298}, {0.448218, -0.364282}, {0.488829, -0.399619}, {0.563356, -0.462603}, {0.652812, -0.5373}, {0.715764, -0.589874}, {0.533085, -0.439668}, {0.468573, -0.386357}, {0.414961, -0.340431}, {0.40239, -0.327359}, {0.419967, -0.339572}, {0.43294, -0.349558}, {0.422056, -0.341476}, {0.404494, -0.328566}, {0.415001, -0.338524}, {0.466539, -0.381848}, {0.531036, -0.435857}, {0.561168, -0.461839}, {0.402562, -0.331819}, {0.35989, -0.295816}, {0.359447, -0.293397}, {0.402073, -0.326571}, {0.445463, -0.361607}, {0.446387, -0.362917}, {0.403877, -0.328788}, {0.360206, -0.293206}, {0.35868, -0.291907}, {0.400427, -0.3266}, {0.44338, -0.363148}, {0.444425, -0.365557}, {0.354795, -0.292411}, {0.358461, -0.294534}, {0.413036, -0.338535}, {0.485775, -0.398405}, {0.519963, -0.427309}, {0.487338, -0.400927}, {0.414766, -0.340547}, {0.358915, -0.29314}, {0.353852, -0.287985}, {0.38281, -0.312243}, {0.400808, -0.328623}, {0.38392, -0.316159}, {0.399797, -0.330293}, {0.464635, -0.383794}, {0.560507, -0.46371}, {0.633218, -0.525109}, {0.63433, -0.526812}, {0.563097, -0.467156}, {0.467112, -0.385647}, {0.40105, -0.328851}, {0.38306, -0.313193}, {0.389456, -0.319348}, {0.389569, -0.32094}, {0.38292, -0.316338}, {0.528231, -0.438023}, {0.649458, -0.539454}, {0.760684, -0.633373}, {0.807088, -0.673172}, {0.763381, -0.636653}, {0.653433, -0.543507}, {0.531722, -0.440136}, {0.443716, -0.365774}, {0.400837, -0.330437}, {0.389558, -0.322152}, {0.400043, -0.331604}, {0.441644, -0.366167}, {0.711186, -0.591151}, {0.862936, -0.718641}, {0.95913, -0.800092}, {0.960843, -0.801868}, {0.867175, -0.72286}, {0.715938, -0.595418}, {0.560808, -0.465502}, {0.443789, -0.368554}, {0.383526, -0.319373}, {0.383013, -0.319325}, {0.44195, -0.36779}, {0.557256, -0.462962}, {0.977648, -0.810535}, {1.06704, -0.886339}, {1.06819, -0.887951}, {0.980195, -0.814622}, {0.825755, -0.686283}, {0.645961, -0.53792}, {0.489583, -0.409603}, {0.399407, -0.335432}, {0.399886, -0.334684}, {0.490293, -0.407243}, {0.645521, -0.533948}, {0.823687, -0.681448}, {1.06705, -0.883747}, {1.10838, -0.918884}, {1.06907, -0.886589}, {0.95443, -0.791821}, {0.786003, -0.653136}, {0.605002, -0.504557}, {0.465166, -0.389591}, {0.413125, -0.345878}, {0.466744, -0.38827}, {0.606417, -0.501713}, {0.785557, -0.648974}, {0.952244, -0.787455}, {1.06833, -0.883635}, {1.06952, -0.885097}, {1.00419, -0.831485}, {0.878647, -0.728307}, {0.716209, -0.594817}, {0.561322, -0.46725}, {0.466711, -0.388645}, {0.468032, -0.388457}, {0.564014, -0.466269}, {0.717716, -0.592374}, {0.87765, -0.724591}, {1.00188, -0.827957}, {0.981157, -0.810639}, {0.956112, -0.790469}, {0.879911, -0.728015}, {0.763366, -0.631984}, {0.633787, -0.524711}, {0.531582, -0.439568}, {0.493744, -0.407396}, {0.534328, -0.440214}, {0.636773, -0.524598}, {0.764022, -0.629912}, {0.878061, -0.724537}, {0.954022, -0.78775}, {0.828059, -0.683667}, {0.789068, -0.652029}, {0.71852, -0.593719}, {0.634587, -0.523508}, {0.560808, -0.4612}, {0.51873, -0.425334}, {0.520357, -0.426373}, {0.564205, -0.463048}, {0.636747, -0.523687}, {0.717815, -0.59119}, {0.786736, -0.648461}, {0.826821, -0.681998}, {0.649228, -0.535679}, {0.608408, -0.50211}, {0.563277, -0.463787}, {0.531714, -0.435906}, {0.518103, -0.423142}, {0.516232, -0.421199}, {0.520815, -0.425671}, {0.534699, -0.438152}, {0.564022, -0.463018}, {0.606713, -0.49853}, {0.647261, -0.532381}, {0.66494, -0.547781}, {0.492575, -0.405916}, {0.467289, -0.384442}, {0.4668, -0.382423}, {0.492099, -0.401766}, {0.518402, -0.422978}, {0.519799, -0.424699}, {0.495055, -0.405101}, {0.468805, -0.383745}, {0.466945, -0.38203}, {0.490834, -0.401783}, {0.516067, -0.423401}, {0.517019, -0.425414}, {0.400898, -0.329801}, {0.413008, -0.338882}, {0.465946, -0.381611}, {0.531245, -0.435391}, {0.561639, -0.461154}, {0.533497, -0.43849}, {0.468642, -0.384621}, {0.414223, -0.338637}, {0.400306, -0.326367}, {0.416338, -0.339949}, {0.428254, -0.351089}, {0.417374, -0.343294}, {0.399594, -0.328789}, {0.464844, -0.3824}, {0.560931, -0.462366}, {0.633766, -0.523808}, {0.634952, -0.525598}, {0.563632, -0.466039}, {0.467259, -0.384538}, {0.400536, -0.327602}, {0.38188, -0.311703}, {0.387949, -0.317652}, {0.388236, -0.319187}, {0.382124, -0.314681}, {0.489021, -0.403565}, {0.604161, -0.499775}, {0.715285, -0.593659}, {0.762404, -0.634212}, {0.717409, -0.596802}, {0.607089, -0.503536}, {0.491217, -0.405252}, {0.416324, -0.342052}, {0.387747, -0.318786}, {0.382927, -0.315842}, {0.387675, -0.320242}, {0.415449, -0.34289}, {0.644393, -0.53344}, {0.784142, -0.65123}, {0.876642, -0.730128}, {0.877801, -0.731866}, {0.786903, -0.655285}, {0.647201, -0.537342}, {0.515433, -0.426767}, {0.427312, -0.35388}, {0.387554, -0.321775}, {0.387507, -0.321928}, {0.426776, -0.353605}, {0.513737, -0.424754}, {0.823246, -0.682551}, {0.951828, -0.791418}, {1.00178, -0.83441}, {0.953937, -0.794564}, {0.82609, -0.687217}, {0.663244, -0.551235}, {0.514813, -0.428497}, {0.415598, -0.347231}, {0.381434, -0.319109}, {0.415698, -0.346224}, {0.514178, -0.42597}, {0.661235, -0.547082}, {0.984781, -0.815022}, {1.01025, -0.838096}, {0.985469, -0.818317}, {0.909201, -0.755153}, {0.787025, -0.654425}, {0.644702, -0.537847}, {0.52825, -0.442488}, {0.483731, -0.405004}, {0.530046, -0.440713}, {0.646902, -0.534216}, {0.788132, -0.649354}, {0.908919, -0.750002}, {1.01037, -0.83604}, {1.01118, -0.837953}, {0.98149, -0.813885}, {0.906876, -0.752516}, {0.785619, -0.652988}, {0.650901, -0.54244}, {0.561214, -0.468264}, {0.562294, -0.467636}, {0.653163, -0.540255}, {0.787078, -0.64889}, {0.906451, -0.747133}, {0.979986, -0.80913}, {0.98654, -0.816116}, {0.98265, -0.813695}, {0.954239, -0.790607}, {0.878967, -0.728661}, {0.762746, -0.632876}, {0.652418, -0.54172}, {0.607499, -0.503995}, {0.654281, -0.541388}, {0.764528, -0.631079}, {0.878747, -0.724656}, {0.952076, -0.785469}, {0.980567, -0.810022}, {0.912, -0.754273}, {0.909769, -0.752995}, {0.880516, -0.728879}, {0.809042, -0.669481}, {0.715417, -0.59168}, {0.648686, -0.536196}, {0.649766, -0.5368}, {0.717448, -0.592368}, {0.809641, -0.668138}, {0.878603, -0.7249}, {0.906776, -0.748411}, {0.910568, -0.752242}, {0.791359, -0.654261}, {0.789521, -0.65297}, {0.764816, -0.631996}, {0.715789, -0.590571}, {0.665251, -0.548223}, {0.644496, -0.531108}, {0.66716, -0.5502}, {0.717495, -0.591957}, {0.764264, -0.630378}, {0.786857, -0.648783}, {0.788898, -0.650666}, {0.78759, -0.650334}, {0.649246, -0.536372}, {0.654126, -0.540102}, {0.653381, -0.538512}, {0.648056, -0.533315}, {0.643744, -0.529732}, {0.644924, -0.531266}, {0.650434, -0.536217}, {0.654632, -0.539431}, {0.653068, -0.537504}, {0.647008, -0.53224}, {0.642242, -0.528879}, {0.64337, -0.530862}, {0.531406, -0.438506}, {0.562406, -0.463459}, {0.606572, -0.499316}, {0.647903, -0.533552}, {0.665775, -0.548963}, {0.650015, -0.536311}, {0.60901, -0.501905}, {0.563297, -0.462993}, {0.530511, -0.435111}, {0.515271, -0.422811}, {0.51211, -0.42136}, {0.516491, -0.426036}, {0.484781, -0.39971}, {0.561479, -0.462725}, {0.652189, -0.538158}, {0.715419, -0.591481}, {0.716504, -0.592977}, {0.654612, -0.541131}, {0.563513, -0.464147}, {0.48534, -0.398085}, {0.441579, -0.361759}, {0.426772, -0.350591}, {0.427162, -0.352141}, {0.442105, -0.364881}, {0.529563, -0.436884}, {0.651593, -0.538419}, {0.762769, -0.632033}, {0.808653, -0.671341}, {0.764284, -0.634396}, {0.653533, -0.540988}, {0.530723, -0.437448}, {0.441427, -0.362931}, {0.39759, -0.327517}, {0.38631, -0.319391}, {0.397944, -0.329317}, {0.441357, -0.364525}, {0.646085, -0.533756}, {0.78606, -0.651431}, {0.878182, -0.729949}, {0.878711, -0.731235}, {0.787196, -0.654323}, {0.646905, -0.536238}, {0.514485, -0.425654}, {0.425728, -0.352824}, {0.385668, -0.320849}, {0.385984, -0.321259}, {0.426281, -0.353325}, {0.51451, -0.424868}, {0.787758, -0.651546}, {0.906306, -0.752333}, {0.952243, -0.792423}, {0.906773, -0.754853}, {0.788039, -0.655204}, {0.640298, -0.531913}, {0.509624, -0.424236}, {0.425075, -0.355511}, {0.396859, -0.332418}, {0.426052, -0.354922}, {0.510788, -0.422509}, {0.640808, -0.528803}, {0.908923, -0.752185}, {0.980113, -0.813766}, {0.980349, -0.815309}, {0.909182, -0.756104}, {0.784639, -0.652386}, {0.639432, -0.532617}, {0.512855, -0.429188}, {0.439865, -0.369439}, {0.440679, -0.368711}, {0.514694, -0.426899}, {0.641037, -0.528785}, {0.785194, -0.647739}, {0.946748, -0.783834}, {0.946529, -0.785583}, {0.943249, -0.783444}, {0.908934, -0.755117}, {0.824279, -0.685704}, {0.712322, -0.594182}, {0.631914, -0.527946}, {0.6331, -0.527236}, {0.71521, -0.591859}, {0.827387, -0.681631}, {0.911075, -0.750006}, {0.944203, -0.779044}, {0.947184, -0.784971}, {0.962878, -0.799005}, {0.982007, -0.814874}, {0.954503, -0.791986}, {0.865704, -0.718843}, {0.762315, -0.63375}, {0.71698, -0.595846}, {0.763879, -0.63292}, {0.867535, -0.716331}, {0.955232, -0.78739}, {0.981437, -0.809425}, {0.962042, -0.795239}, {0.945708, -0.784084}, {0.983782, -0.815902}, {1.00546, -0.833419}, {0.962426, -0.797379}, {0.866852, -0.718308}, {0.787475, -0.652825}, {0.78806, -0.653039}, {0.867819, -0.718022}, {0.96225, -0.794931}, {1.00355, -0.828595}, {0.981294, -0.810865}, {0.944566, -0.781922}, {0.913382, -0.757119}, {0.957874, -0.793793}, {0.963784, -0.797984}, {0.907436, -0.750829}, {0.827225, -0.684541}, {0.790036, -0.654195}, {0.827928, -0.685662}, {0.907448, -0.750932}, {0.961898, -0.795203}, {0.954595, -0.788875}, {0.910793, -0.753218}, {0.887621, -0.735102}, {0.829546, -0.687252}, {0.869225, -0.719695}, {0.86813, -0.718096}, {0.827234, -0.684027}, {0.787186, -0.651334}, {0.787726, -0.652348}, {0.828031, -0.685654}, {0.86761, -0.717612}, {0.866847, -0.716087}, {0.826627, -0.68263}, {0.787418, -0.650923}, {0.788735, -0.653033}, {0.716677, -0.593447}, {0.764484, -0.632507}, {0.787625, -0.651319}, {0.789417, -0.653108}, {0.787396, -0.652028}, {0.790602, -0.654735}, {0.788655, -0.652241}, {0.764062, -0.63058}, {0.714914, -0.589188}, {0.663901, -0.54742}, {0.642708, -0.531102}, {0.665509, -0.550974}, {0.634351, -0.52513}, {0.717483, -0.593546}, {0.78737, -0.651674}, {0.827261, -0.685377}, {0.827992, -0.686135}, {0.788917, -0.652818}, {0.718532, -0.592914}, {0.634171, -0.521962}, {0.558888, -0.459903}, {0.51483, -0.424804}, {0.515329, -0.426581}, {0.559799, -0.46383}, {0.633949, -0.524656}, {0.763686, -0.632159}, {0.867462, -0.719031}, {0.90766, -0.75297}, {0.86835, -0.719775}, {0.76471, -0.632255}, {0.634321, -0.522887}, {0.517281, -0.426082}, {0.440303, -0.363863}, {0.414205, -0.343879}, {0.440819, -0.366376}, {0.517661, -0.429242}, {0.715567, -0.591908}, {0.867818, -0.718954}, {0.962938, -0.799185}, {0.962965, -0.799541}, {0.867722, -0.719489}, {0.715028, -0.5916}, {0.558272, -0.461703}, {0.439627, -0.365042}, {0.378583, -0.316396}, {0.379025, -0.317238}, {0.440662, -0.366905}, {0.559279, -0.463209}, {0.828, -0.684575}, {0.95621, -0.79272}, {1.00468, -0.834531}, {0.955213, -0.793591}, {0.826086, -0.685655}, {0.66216, -0.549645}, {0.512582, -0.427217}, {0.412397, -0.346391}, {0.37819, -0.318824}, {0.413906, -0.34669}, {0.515009, -0.427307}, {0.664666, -0.549073}, {0.911802, -0.753779}, {0.982307, -0.814784}, {0.98149, -0.815631}, {0.909447, -0.75593}, {0.784343, -0.652083}, {0.638684, -0.532487}, {0.511624, -0.429351}, {0.438347, -0.369906}, {0.439502, -0.369508}, {0.514655, -0.4281}, {0.642532, -0.530388}, {0.787852, -0.649535}, {0.94453, -0.781219}, {0.962011, -0.79828}, {0.94307, -0.783709}, {0.884037, -0.734886}, {0.783612, -0.652211}, {0.66033, -0.55168}, {0.555968, -0.466755}, {0.515608, -0.432876}, {0.558439, -0.465275}, {0.664239, -0.548746}, {0.787542, -0.648241}, {0.886935, -0.730943}}; - - const std::vector> wfcr_2 = {{-1.04493, 0.615688}, {-1.06977, 0.631876}, {-1.11054, 0.657279}, {-1.09964, 0.652241}, {-1.01219, 0.601667}, {-0.900492, 0.535509}, {-0.851148, 0.504342}, {-0.905186, 0.532964}, {-1.01925, 0.597305}, {-1.10611, 0.647275}, {-1.11469, 0.65304}, {-1.07155, 0.629455}, {-1.06919, 0.632572}, {-1.13929, 0.676268}, {-1.20108, 0.714099}, {-1.17741, 0.70052}, {-1.07525, 0.639493}, {-0.983179, 0.583099}, {-0.986424, 0.581855}, {-1.0835, 0.635772}, {-1.1872, 0.694674}, {-1.20917, 0.707372}, {-1.14414, 0.670785}, {-1.07073, 0.630447}, {-1.1097, 0.659275}, {-1.20066, 0.715109}, {-1.23859, 0.737959}, {-1.17679, 0.700538}, {-1.06941, 0.635141}, {-1.01939, 0.602723}, {-1.07668, 0.633135}, {-1.18832, 0.695962}, {-1.25024, 0.730935}, {-1.20931, 0.707519}, {-1.11409, 0.654257}, {-1.06425, 0.628851}, {-1.09915, 0.655024}, {-1.17668, 0.701808}, {-1.17599, 0.700685}, {-1.09784, 0.652789}, {-1.02166, 0.605407}, {-1.02577, 0.604855}, {-1.1085, 0.650449}, {-1.18919, 0.69552}, {-1.1883, 0.694368}, {-1.10665, 0.648136}, {-1.02461, 0.603665}, {-1.02207, 0.606491}, {-1.01184, 0.603829}, {-1.07352, 0.639851}, {-1.06674, 0.63446}, {-1.01946, 0.604735}, {-0.995659, 0.588429}, {-1.02691, 0.604062}, {-1.0787, 0.631808}, {-1.08586, 0.634447}, {-1.02126, 0.597121}, {-0.928339, 0.545466}, {-0.883314, 0.523111}, {-0.923441, 0.550196}, {-0.899158, 0.535895}, {-0.979266, 0.581943}, {-1.01385, 0.601005}, {-1.02051, 0.603424}, {-1.02385, 0.603299}, {-1.02247, 0.599959}, {-0.98985, 0.578848}, {-0.908372, 0.530875}, {-0.800363, 0.469599}, {-0.72164, 0.426891}, {-0.719673, 0.428941}, {-0.794487, 0.474495}, {-0.847753, 0.503045}, {-0.979917, 0.579784}, {-1.06852, 0.631067}, {-1.10083, 0.648709}, {-1.07326, 0.630403}, {-0.987296, 0.577914}, {-0.854969, 0.499752}, {-0.706649, 0.41444}, {-0.587992, 0.347976}, {-0.541792, 0.323726}, {-0.585514, 0.350606}, {-0.70153, 0.418275}, {-0.899908, 0.531212}, {-1.07557, 0.634031}, {-1.17943, 0.694395}, {-1.18113, 0.693788}, {-1.07976, 0.632306}, {-0.904516, 0.528696}, {-0.704858, 0.413064}, {-0.53476, 0.316459}, {-0.437762, 0.262493}, {-0.437345, 0.263282}, {-0.533149, 0.31855}, {-0.701541, 0.415762}, {-1.01368, 0.596358}, {-1.18014, 0.694045}, {-1.24311, 0.730044}, {-1.18206, 0.692411}, {-1.01616, 0.593956}, {-0.796338, 0.466089}, {-0.585168, 0.345371}, {-0.43641, 0.261419}, {-0.383443, 0.231703}, {-0.436798, 0.261883}, {-0.584997, 0.346602}, {-0.794811, 0.468186}, {-1.10211, 0.647408}, {-1.20487, 0.707561}, {-1.20547, 0.706679}, {-1.10324, 0.645443}, {-0.9249, 0.54126}, {-0.718091, 0.422601}, {-0.538746, 0.320817}, {-0.435585, 0.262183}, {-0.436396, 0.261854}, {-0.540445, 0.320361}, {-0.719187, 0.423026}, {-0.924581, 0.542895}, {-1.113, 0.653554}, {-1.14286, 0.670879}, {-1.11329, 0.652757}, {-1.02355, 0.60014}, {-0.881332, 0.518509}, {-0.717047, 0.425022}, {-0.583374, 0.348633}, {-0.532489, 0.318429}, {-0.585722, 0.347216}, {-0.720151, 0.423379}, {-0.883396, 0.517889}, {-1.02402, 0.600711}, {-1.0714, 0.629639}, {-1.07118, 0.629932}, {-1.06498, 0.626702}, {-1.02219, 0.60281}, {-0.922587, 0.54634}, {-0.793452, 0.472108}, {-0.701665, 0.418007}, {-0.703508, 0.416851}, {-0.797895, 0.469267}, {-0.927254, 0.543205}, {-1.02524, 0.600537}, {-1.06618, 0.625584}, {-1.07066, 0.632364}, {-1.14196, 0.676121}, {-1.20383, 0.713861}, {-1.17911, 0.700059}, {-1.07535, 0.638753}, {-0.981919, 0.582124}, {-0.984444, 0.580758}, {-1.0813, 0.634679}, {-1.18499, 0.693687}, {-1.20712, 0.706558}, {-1.14271, 0.670185}, {-1.07059, 0.630066}, {-1.14113, 0.677179}, {-1.24972, 0.743092}, {-1.30064, 0.77366}, {-1.23971, 0.737078}, {-1.12538, 0.667908}, {-1.07096, 0.633052}, {-1.13158, 0.665463}, {-1.24888, 0.731698}, {-1.30874, 0.765641}, {-1.25458, 0.734583}, {-1.14303, 0.671605}, {-1.08667, 0.641951}, {-1.20279, 0.716023}, {-1.30017, 0.774582}, {-1.29903, 0.773363}, {-1.20048, 0.713583}, {-1.10409, 0.654302}, {-1.10804, 0.65355}, {-1.21047, 0.710638}, {-1.31077, 0.767229}, {-1.3097, 0.766012}, {-1.20837, 0.708204}, {-1.10709, 0.652335}, {-1.10534, 0.655524}, {-1.17844, 0.702482}, {-1.23876, 0.738047}, {-1.19943, 0.713613}, {-1.10865, 0.658222}, {-1.06494, 0.630025}, {-1.11649, 0.657392}, {-1.21185, 0.710495}, {-1.25126, 0.731794}, {-1.18768, 0.694769}, {-1.0758, 0.631837}, {-1.01978, 0.603169}, {-1.07114, 0.637261}, {-1.07458, 0.640304}, {-1.12296, 0.667999}, {-1.10069, 0.653581}, {-1.06231, 0.629345}, {-1.06618, 0.629337}, {-1.11071, 0.652682}, {-1.13538, 0.664695}, {-1.08553, 0.634604}, {-0.975045, 0.571348}, {-0.882132, 0.520308}, {-0.879736, 0.522713}, {-0.967961, 0.577041}, {-0.979676, 0.582276}, {-1.06587, 0.632036}, {-1.10142, 0.651953}, {-1.11062, 0.655856}, {-1.1075, 0.65173}, {-1.07562, 0.630549}, {-0.989725, 0.578822}, {-0.856301, 0.501377}, {-0.728391, 0.429099}, {-0.674072, 0.40052}, {-0.724417, 0.43233}, {-0.848649, 0.505872}, {-0.979843, 0.579951}, {-1.12384, 0.663901}, {-1.20145, 0.708602}, {-1.20388, 0.708299}, {-1.13006, 0.662763}, {-0.987367, 0.57766}, {-0.802035, 0.469571}, {-0.624138, 0.367771}, {-0.513752, 0.306069}, {-0.512452, 0.307222}, {-0.620172, 0.37061}, {-0.795639, 0.472713}, {-1.07506, 0.633833}, {-1.2403, 0.730333}, {-1.3018, 0.765187}, {-1.24365, 0.729105}, {-1.08006, 0.631723}, {-0.852938, 0.499052}, {-0.622565, 0.366591}, {-0.45203, 0.269866}, {-0.388963, 0.234878}, {-0.450838, 0.27105}, {-0.619713, 0.368657}, {-0.848402, 0.501463}, {-1.17916, 0.693359}, {-1.30199, 0.764703}, {-1.30342, 0.763947}, {-1.18252, 0.691534}, {-0.97088, 0.56762}, {-0.724971, 0.42592}, {-0.511241, 0.304164}, {-0.387754, 0.234222}, {-0.38784, 0.234322}, {-0.510919, 0.304677}, {-0.723214, 0.42719}, {-0.967602, 0.569545}, {-1.20358, 0.706572}, {-1.25131, 0.733695}, {-1.20587, 0.705777}, {-1.07361, 0.628004}, {-0.879535, 0.516072}, {-0.671022, 0.397126}, {-0.509763, 0.305332}, {-0.449406, 0.270429}, {-0.510742, 0.304617}, {-0.671518, 0.396441}, {-0.878216, 0.516153}, {-1.07083, 0.628829}, {-1.14175, 0.670052}, {-1.14289, 0.67034}, {-1.10746, 0.649425}, {-1.01958, 0.599055}, {-0.878265, 0.518646}, {-0.722197, 0.429517}, {-0.618564, 0.369407}, {-0.619678, 0.368504}, {-0.724415, 0.427276}, {-0.879343, 0.516117}, {-1.01841, 0.597142}, {-1.10517, 0.648415}, {-1.07089, 0.629711}, {-1.08784, 0.640443}, {-1.10671, 0.65259}, {-1.07164, 0.63361}, {-0.967419, 0.573961}, {-0.848169, 0.504282}, {-0.796655, 0.472531}, {-0.851287, 0.501844}, {-0.971227, 0.56971}, {-1.07353, 0.628657}, {-1.10617, 0.648262}, {-1.08661, 0.637928}, {-1.11352, 0.659089}, {-1.20587, 0.714958}, {-1.24309, 0.737564}, {-1.17881, 0.699564}, {-1.06849, 0.633478}, {-1.0163, 0.600639}, {-1.0726, 0.63113}, {-1.18407, 0.694443}, {-1.24624, 0.729989}, {-1.20607, 0.706959}, {-1.11256, 0.653862}, {-1.06537, 0.62855}, {-1.20489, 0.715789}, {-1.30261, 0.774429}, {-1.30082, 0.773075}, {-1.20094, 0.712933}, {-1.10316, 0.653288}, {-1.10616, 0.652442}, {-1.2082, 0.709777}, {-1.30851, 0.766763}, {-1.30772, 0.765808}, {-1.20699, 0.708014}, {-1.10674, 0.652034}, {-1.10632, 0.655189}, {-1.24196, 0.738968}, {-1.30045, 0.773641}, {-1.24726, 0.741281}, {-1.13861, 0.675603}, {-1.08631, 0.642631}, {-1.14508, 0.674583}, {-1.25718, 0.737854}, {-1.30991, 0.767062}, {-1.24841, 0.731}, {-1.13115, 0.664375}, {-1.07224, 0.633464}, {-1.12816, 0.669934}, {-1.17821, 0.700917}, {-1.20023, 0.71342}, {-1.13771, 0.675491}, {-1.06857, 0.633331}, {-1.07204, 0.633353}, {-1.14667, 0.674633}, {-1.21128, 0.710071}, {-1.18787, 0.695054}, {-1.08323, 0.634527}, {-0.986582, 0.580744}, {-0.984499, 0.583235}, {-1.07704, 0.640412}, {-1.06784, 0.634178}, {-1.10102, 0.653052}, {-1.08316, 0.641724}, {-1.0696, 0.632519}, {-1.08898, 0.641911}, {-1.11056, 0.652098}, {-1.07799, 0.631063}, {-0.974995, 0.570435}, {-0.854717, 0.501743}, {-0.799953, 0.472682}, {-0.850384, 0.505045}, {-0.966944, 0.574877}, {-1.01415, 0.600477}, {-1.10138, 0.651245}, {-1.13891, 0.672627}, {-1.14134, 0.672702}, {-1.1078, 0.650989}, {-1.02241, 0.599037}, {-0.883392, 0.516958}, {-0.728201, 0.427206}, {-0.623299, 0.368098}, {-0.621454, 0.369282}, {-0.722947, 0.429979}, {-0.875709, 0.519656}, {-1.06812, 0.630338}, {-1.20095, 0.707869}, {-1.24904, 0.735132}, {-1.20449, 0.707355}, {-1.07385, 0.629166}, {-0.881776, 0.516164}, {-0.674754, 0.396097}, {-0.513634, 0.303952}, {-0.45194, 0.269769}, {-0.511184, 0.305241}, {-0.670112, 0.398045}, {-0.875788, 0.517966}, {-1.17817, 0.693416}, {-1.30084, 0.764601}, {-1.30242, 0.7641}, {-1.18214, 0.69212}, {-0.971424, 0.568318}, {-0.726289, 0.426163}, {-0.512779, 0.303672}, {-0.388904, 0.233297}, {-0.3883, 0.233598}, {-0.510769, 0.304568}, {-0.722696, 0.427561}, {-0.966851, 0.569916}, {-1.24109, 0.728966}, {-1.3023, 0.763707}, {-1.24407, 0.728168}, {-1.08036, 0.631761}, {-0.852769, 0.499772}, {-0.621637, 0.367132}, {-0.45052, 0.269493}, {-0.387539, 0.233563}, {-0.450217, 0.269405}, {-0.620134, 0.367305}, {-0.849488, 0.500483}, {-1.07618, 0.63282}, {-1.20341, 0.705967}, {-1.20521, 0.706043}, {-1.13058, 0.661754}, {-0.986483, 0.578209}, {-0.799278, 0.470884}, {-0.619924, 0.36839}, {-0.50952, 0.304951}, {-0.509893, 0.304472}, {-0.62015, 0.367231}, {-0.797705, 0.469649}, {-0.982737, 0.577377}, {-1.12646, 0.661401}, {-1.11236, 0.652928}, {-1.10779, 0.650208}, {-1.07401, 0.631043}, {-0.985667, 0.580886}, {-0.850086, 0.503365}, {-0.721652, 0.429122}, {-0.669176, 0.397736}, {-0.723064, 0.427427}, {-0.850822, 0.500367}, {-0.983821, 0.57733}, {-1.07008, 0.627881}, {-1.10454, 0.648347}, {-1.06606, 0.627901}, {-1.10813, 0.653696}, {-1.13019, 0.667865}, {-1.07805, 0.638393}, {-0.966779, 0.573489}, {-0.875544, 0.519073}, {-0.877057, 0.517943}, {-0.969825, 0.57009}, {-1.07964, 0.633012}, {-1.1288, 0.661633}, {-1.10523, 0.648594}, {-1.0646, 0.625919}, {-1.10452, 0.654055}, {-1.18302, 0.701075}, {-1.18079, 0.699708}, {-1.09935, 0.651024}, {-1.01976, 0.602869}, {-1.0216, 0.602244}, {-1.10343, 0.648611}, {-1.18411, 0.694744}, {-1.18376, 0.694186}, {-1.10341, 0.647791}, {-1.02382, 0.602792}, {-1.02457, 0.605343}, {-1.182, 0.701323}, {-1.24273, 0.737311}, {-1.20227, 0.712744}, {-1.10938, 0.656802}, {-1.0635, 0.628242}, {-1.11361, 0.655887}, {-1.20837, 0.709731}, {-1.24781, 0.731578}, {-1.18471, 0.694405}, {-1.07387, 0.630771}, {-1.01956, 0.601495}, {-1.07302, 0.63559}, {-1.17998, 0.700155}, {-1.20233, 0.713009}, {-1.13938, 0.675057}, {-1.06929, 0.6327}, {-1.07172, 0.632702}, {-1.1456, 0.674243}, {-1.20984, 0.709956}, {-1.18637, 0.694859}, {-1.08187, 0.633818}, {-0.985647, 0.579423}, {-0.984349, 0.581663}, {-1.07791, 0.639116}, {-1.0996, 0.651507}, {-1.10962, 0.65697}, {-1.06894, 0.632458}, {-1.04524, 0.617755}, {-1.07315, 0.632808}, {-1.11669, 0.656378}, {-1.10742, 0.648933}, {-1.01953, 0.596458}, {-0.905076, 0.530223}, {-0.851508, 0.501191}, {-0.901462, 0.533148}, {-1.01305, 0.600326}, {-1.0203, 0.60305}, {-1.06264, 0.627693}, {-1.06956, 0.631499}, {-1.07127, 0.631833}, {-1.06741, 0.628131}, {-1.02698, 0.602525}, {-0.928732, 0.543495}, {-0.79891, 0.46733}, {-0.704322, 0.413299}, {-0.702406, 0.414261}, {-0.793746, 0.469485}, {-0.921849, 0.545315}, {-1.02066, 0.601756}, {-1.11019, 0.654216}, {-1.14079, 0.67186}, {-1.11263, 0.654448}, {-1.02511, 0.601767}, {-0.885157, 0.518572}, {-0.7219, 0.42267}, {-0.587234, 0.344633}, {-0.533765, 0.314734}, {-0.584189, 0.345469}, {-0.716846, 0.423727}, {-0.879668, 0.519176}, {-1.09999, 0.647331}, {-1.20253, 0.707144}, {-1.20356, 0.707013}, {-1.10282, 0.646961}, {-0.926639, 0.543028}, {-0.721512, 0.423028}, {-0.542383, 0.319125}, {-0.437928, 0.259211}, {-0.436833, 0.259377}, {-0.539418, 0.319564}, {-0.717516, 0.423597}, {-0.922708, 0.543563}, {-1.17912, 0.692928}, {-1.24174, 0.728813}, {-1.18134, 0.692403}, {-1.01687, 0.595516}, {-0.798379, 0.468055}, {-0.587552, 0.346047}, {-0.438091, 0.260123}, {-0.384093, 0.229365}, {-0.43687, 0.260139}, {-0.585073, 0.346261}, {-0.794943, 0.468596}, {-1.0134, 0.59624}, {-1.17953, 0.69234}, {-1.18111, 0.692221}, {-1.08034, 0.632519}, {-0.905602, 0.530613}, {-0.705634, 0.41509}, {-0.534527, 0.316801}, {-0.436732, 0.260698}, {-0.436544, 0.260534}, {-0.533502, 0.316457}, {-0.703025, 0.414865}, {-0.901596, 0.53067}, {-1.07652, 0.632739}, {-1.10169, 0.646366}, {-1.07386, 0.629462}, {-0.987618, 0.579122}, {-0.854223, 0.502221}, {-0.704195, 0.416081}, {-0.584345, 0.34711}, {-0.538627, 0.320402}, {-0.584518, 0.346398}, {-0.703109, 0.414803}, {-0.850786, 0.500677}, {-0.982751, 0.577725}, {-1.07026, 0.628631}, {-1.02438, 0.601959}, {-1.02192, 0.600836}, {-0.98774, 0.581656}, {-0.904114, 0.533809}, {-0.79445, 0.470359}, {-0.716003, 0.4243}, {-0.716668, 0.423722}, {-0.79527, 0.468538}, {-0.902875, 0.530746}, {-0.983777, 0.577929}, {-1.01729, 0.597687}, {-1.02232, 0.600718}, {-1.02563, 0.605009}, {-1.07541, 0.635206}, {-1.0804, 0.638865}, {-1.01412, 0.600199}, {-0.92128, 0.545227}, {-0.878868, 0.519145}, {-0.923384, 0.543748}, {-1.01594, 0.596785}, {-1.07964, 0.63358}, {-1.07226, 0.629465}, {-1.02276, 0.601203}, {-0.996484, 0.586831}, {-1.01764, 0.601283}, {-1.08006, 0.638125}, {-1.07164, 0.632736}, {-1.02117, 0.602376}, {-0.994184, 0.585759}, {-1.02338, 0.602105}, {-1.07441, 0.63122}, {-1.08158, 0.634721}, {-1.01751, 0.596905}, {-0.925899, 0.543716}, {-0.883295, 0.51999}, {-0.926588, 0.546849}, {-1.07928, 0.638003}, {-1.12852, 0.666717}, {-1.1052, 0.652351}, {-1.06444, 0.627723}, {-1.06573, 0.627793}, {-1.10846, 0.651937}, {-1.13229, 0.664716}, {-1.08227, 0.634346}, {-0.97208, 0.569569}, {-0.880115, 0.516712}, {-0.879597, 0.518299}, {-0.970382, 0.573297}, {-1.07181, 0.632878}, {-1.10599, 0.65259}, {-1.08756, 0.64134}, {-1.07226, 0.632033}, {-1.08965, 0.641667}, {-1.10974, 0.652268}, {-1.07634, 0.631105}, {-0.972939, 0.569345}, {-0.852578, 0.498928}, {-0.798308, 0.468614}, {-0.850135, 0.501029}, {-0.968836, 0.572095}, {-1.02308, 0.602803}, {-1.06604, 0.627856}, {-1.07256, 0.631719}, {-1.07321, 0.63208}, {-1.06817, 0.628492}, {-1.02686, 0.602845}, {-0.928066, 0.543288}, {-0.797879, 0.466131}, {-0.703114, 0.411117}, {-0.701461, 0.411711}, {-0.793734, 0.467404}, {-0.923278, 0.544201}, {-0.996552, 0.585947}, {-1.06638, 0.627066}, {-1.08855, 0.640374}, {-1.06695, 0.627687}, {-0.998424, 0.586746}, {-0.882562, 0.517475}, {-0.740561, 0.433051}, {-0.619411, 0.361755}, {-0.570359, 0.333737}, {-0.616389, 0.361889}, {-0.736227, 0.433006}, {-0.878979, 0.516988}, {-1.02387, 0.601419}, {-1.10642, 0.650034}, {-1.10644, 0.650222}, {-1.02442, 0.60194}, {-0.881877, 0.517732}, {-0.716058, 0.419767}, {-0.570909, 0.334288}, {-0.485779, 0.284589}, {-0.484422, 0.284385}, {-0.567689, 0.333735}, {-0.712813, 0.419026}, {-0.879963, 0.517007}, {-1.07186, 0.629553}, {-1.12788, 0.662279}, {-1.07234, 0.62956}, {-0.926374, 0.543807}, {-0.741121, 0.435112}, {-0.571991, 0.336067}, {-0.458048, 0.269567}, {-0.417794, 0.246316}, {-0.456244, 0.269076}, {-0.569351, 0.335394}, {-0.738834, 0.434614}, {-0.925054, 0.543618}, {-1.07728, 0.632698}, {-1.07801, 0.632597}, {-0.971053, 0.569618}, {-0.799145, 0.46903}, {-0.622477, 0.366075}, {-0.488434, 0.288236}, {-0.418855, 0.247976}, {-0.418257, 0.247793}, {-0.486751, 0.287841}, {-0.620063, 0.365782}, {-0.796599, 0.469021}, {-0.969105, 0.569791}, {-1.01413, 0.595421}, {-0.970685, 0.569339}, {-0.85451, 0.501219}, {-0.70756, 0.415738}, {-0.574974, 0.339005}, {-0.487221, 0.288358}, {-0.456992, 0.270943}, {-0.486623, 0.288223}, {-0.573232, 0.338789}, {-0.704434, 0.415518}, {-0.850838, 0.501053}, {-0.968158, 0.569254}, {-0.92519, 0.543229}, {-0.882194, 0.51778}, {-0.803052, 0.471756}, {-0.706815, 0.416138}, {-0.619971, 0.365984}, {-0.568649, 0.336233}, {-0.568759, 0.336209}, {-0.619567, 0.365764}, {-0.704601, 0.415476}, {-0.798918, 0.470675}, {-0.87795, 0.516724}, {-0.923379, 0.542786}, {-0.884667, 0.520219}, {-0.883259, 0.51966}, {-0.854793, 0.503429}, {-0.797038, 0.469961}, {-0.736807, 0.434731}, {-0.711432, 0.419602}, {-0.737571, 0.434541}, {-0.796883, 0.469038}, {-0.852281, 0.501427}, {-0.879063, 0.517115}, {-0.881391, 0.518406}, {-0.879834, 0.517378}, {-0.927968, 0.547149}, {-0.97308, 0.574077}, {-0.971079, 0.572991}, {-0.923322, 0.544691}, {-0.876938, 0.516942}, {-0.877867, 0.516857}, {-0.925038, 0.543949}, {-0.97148, 0.570775}, {-0.971238, 0.570472}, {-0.925198, 0.54363}, {-0.880027, 0.517624}, {-0.881342, 0.519098}, {-0.905261, 0.532232}, {-0.986474, 0.579813}, {-1.01989, 0.599225}, {-1.0238, 0.601451}, {-1.02426, 0.601747}, {-1.02099, 0.599699}, {-0.987607, 0.579669}, {-0.905969, 0.531174}, {-0.798101, 0.467542}, {-0.72016, 0.42201}, {-0.720125, 0.422628}, {-0.797852, 0.468896}, {-0.986293, 0.579881}, {-1.07406, 0.631167}, {-1.10871, 0.651354}, {-1.11517, 0.655243}, {-1.10903, 0.651682}, {-1.07501, 0.631306}, {-0.988028, 0.579365}, {-0.85407, 0.499886}, {-0.725942, 0.424484}, {-0.672192, 0.393574}, {-0.724529, 0.425328}, {-0.852022, 0.500918}, {-1.02151, 0.599866}, {-1.11025, 0.651759}, {-1.14672, 0.673324}, {-1.14635, 0.673516}, {-1.10985, 0.652165}, {-1.02238, 0.600139}, {-0.882131, 0.516646}, {-0.726101, 0.424177}, {-0.620723, 0.36236}, {-0.619458, 0.362492}, {-0.72324, 0.42442}, {-0.879634, 0.516698}, {-1.02751, 0.602403}, {-1.11779, 0.655467}, {-1.14703, 0.673228}, {-1.11639, 0.655914}, {-1.02655, 0.603206}, {-0.88504, 0.519278}, {-0.720782, 0.42163}, {-0.585325, 0.341387}, {-0.531499, 0.309997}, {-0.582725, 0.340861}, {-0.717742, 0.420747}, {-0.883837, 0.518304}, {-1.02796, 0.602183}, {-1.11047, 0.651014}, {-1.10939, 0.651263}, {-1.02593, 0.602986}, {-0.882233, 0.518555}, {-0.715708, 0.419907}, {-0.570063, 0.333351}, {-0.484523, 0.282643}, {-0.483116, 0.282058}, {-0.567128, 0.331875}, {-0.713848, 0.418182}, {-0.882855, 0.517167}, {-1.02199, 0.599068}, {-1.07366, 0.629911}, {-1.02043, 0.599504}, {-0.884401, 0.520205}, {-0.716345, 0.421387}, {-0.56779, 0.333442}, {-0.47071, 0.275723}, {-0.436787, 0.255572}, {-0.468615, 0.274475}, {-0.565496, 0.331577}, {-0.715802, 0.419703}, {-0.885756, 0.519153}, {-0.985163, 0.578322}, {-0.984799, 0.578416}, {-0.880689, 0.517839}, {-0.722333, 0.425213}, {-0.5733, 0.337599}, {-0.473054, 0.278322}, {-0.426851, 0.250876}, {-0.425935, 0.250373}, {-0.47102, 0.277097}, {-0.57163, 0.336279}, {-0.721943, 0.424302}, {-0.881215, 0.517432}, {-0.902077, 0.530131}, {-0.851875, 0.500612}, {-0.727772, 0.42798}, {-0.590537, 0.347631}, {-0.490625, 0.289009}, {-0.440981, 0.259799}, {-0.427392, 0.251893}, {-0.439815, 0.259496}, {-0.488748, 0.288593}, {-0.588595, 0.347321}, {-0.726286, 0.427859}, {-0.851099, 0.500605}, {-0.795741, 0.467606}, {-0.727098, 0.427028}, {-0.626118, 0.367814}, {-0.539122, 0.316956}, {-0.489706, 0.288146}, {-0.472129, 0.278046}, {-0.471836, 0.27824}, {-0.488623, 0.28861}, {-0.536973, 0.317434}, {-0.623193, 0.368112}, {-0.724465, 0.427131}, {-0.794673, 0.46762}, {-0.720678, 0.423073}, {-0.676324, 0.396777}, {-0.626258, 0.367444}, {-0.589273, 0.345945}, {-0.570823, 0.335379}, {-0.566018, 0.332904}, {-0.570768, 0.336172}, {-0.588258, 0.346969}, {-0.623633, 0.368075}, {-0.672792, 0.396846}, {-0.718107, 0.422911}, {-0.737603, 0.433613}, {-0.722283, 0.423834}, {-0.728986, 0.42768}, {-0.727991, 0.427139}, {-0.720161, 0.422664}, {-0.713041, 0.418684}, {-0.713393, 0.41919}, {-0.720525, 0.423738}, {-0.727079, 0.427877}, {-0.726446, 0.427577}, {-0.719392, 0.423225}, {-0.713255, 0.419219}, {-0.714531, 0.419555}, {-0.799373, 0.469489}, {-0.854376, 0.501776}, {-0.880596, 0.517094}, {-0.881919, 0.517847}, {-0.879022, 0.516233}, {-0.882802, 0.518575}, {-0.881332, 0.517737}, {-0.853975, 0.501544}, {-0.797942, 0.468444}, {-0.73916, 0.433814}, {-0.71441, 0.419334}, {-0.740444, 0.434774}, {-0.854991, 0.500054}, {-0.988714, 0.578671}, {-1.07639, 0.630481}, {-1.106, 0.648363}, {-1.07561, 0.630936}, {-0.98788, 0.579518}, {-0.854852, 0.501122}, {-0.706168, 0.413345}, {-0.587145, 0.343059}, {-0.541148, 0.315861}, {-0.586502, 0.342447}, {-0.705576, 0.412342}, {-0.989327, 0.579265}, {-1.13501, 0.664759}, {-1.21114, 0.709777}, {-1.2101, 0.70974}, {-1.13284, 0.664816}, {-0.988005, 0.57971}, {-0.801632, 0.469741}, {-0.622966, 0.36422}, {-0.511953, 0.298751}, {-0.511106, 0.298363}, {-0.621344, 0.363315}, {-0.801058, 0.468872}, {-1.0793, 0.631768}, {-1.21307, 0.710265}, {-1.25875, 0.737693}, {-1.21017, 0.710106}, {-1.07603, 0.631937}, {-0.881893, 0.517766}, {-0.673716, 0.394781}, {-0.51159, 0.298897}, {-0.449334, 0.26217}, {-0.509696, 0.297844}, {-0.672192, 0.393401}, {-0.882901, 0.516871}, {-1.1106, 0.649654}, {-1.2128, 0.709962}, {-1.21081, 0.70994}, {-1.10632, 0.649904}, {-0.927359, 0.545417}, {-0.720736, 0.423647}, {-0.540637, 0.316915}, {-0.435294, 0.25434}, {-0.434083, 0.253519}, {-0.538503, 0.314981}, {-0.720599, 0.421735}, {-0.930431, 0.544346}, {-1.07923, 0.631452}, {-1.13403, 0.664443}, {-1.07582, 0.631816}, {-0.927302, 0.545923}, {-0.740516, 0.436481}, {-0.570648, 0.335906}, {-0.456112, 0.26754}, {-0.415358, 0.242998}, {-0.454166, 0.265765}, {-0.569239, 0.333348}, {-0.741985, 0.434341}, {-0.931255, 0.544775}, {-0.988196, 0.5792}, {-0.98678, 0.579449}, {-0.881273, 0.518924}, {-0.721905, 0.426146}, {-0.572437, 0.338054}, {-0.472017, 0.278022}, {-0.425594, 0.249832}, {-0.424547, 0.24896}, {-0.47006, 0.275868}, {-0.571895, 0.33565}, {-0.723826, 0.424366}, {-0.88424, 0.518015}, {-0.852081, 0.500741}, {-0.799693, 0.470788}, {-0.674951, 0.398321}, {-0.545042, 0.322126}, {-0.46119, 0.272203}, {-0.428512, 0.252143}, {-0.422002, 0.247952}, {-0.426989, 0.251169}, {-0.459488, 0.270718}, {-0.544554, 0.320731}, {-0.675807, 0.39739}, {-0.800728, 0.470367}, {-0.702968, 0.413776}, {-0.623145, 0.367063}, {-0.516467, 0.304462}, {-0.443168, 0.261075}, {-0.42274, 0.248531}, {-0.428859, 0.25187}, {-0.428239, 0.251868}, {-0.421193, 0.248522}, {-0.441405, 0.261049}, {-0.515109, 0.304423}, {-0.622407, 0.367025}, {-0.702752, 0.41376}, {-0.586221, 0.344444}, {-0.515515, 0.302523}, {-0.456974, 0.267827}, {-0.442766, 0.259231}, {-0.460513, 0.269644}, {-0.472793, 0.277306}, {-0.459831, 0.270586}, {-0.441288, 0.260678}, {-0.454798, 0.269209}, {-0.513224, 0.303469}, {-0.584727, 0.344884}, {-0.617478, 0.363388}, {-0.542921, 0.317484}, {-0.516465, 0.301428}, {-0.516762, 0.301423}, {-0.544064, 0.31763}, {-0.571358, 0.334191}, {-0.571299, 0.334997}, {-0.54357, 0.319674}, {-0.515301, 0.303841}, {-0.514096, 0.303413}, {-0.540611, 0.318664}, {-0.568043, 0.333992}, {-0.56901, 0.333618}, {-0.589065, 0.343441}, {-0.625492, 0.364584}, {-0.675477, 0.39411}, {-0.720855, 0.421284}, {-0.739418, 0.432921}, {-0.720916, 0.42282}, {-0.675078, 0.396514}, {-0.624235, 0.366955}, {-0.587287, 0.345145}, {-0.569362, 0.334102}, {-0.565215, 0.330882}, {-0.57068, 0.333266}, {-0.706877, 0.41252}, {-0.802308, 0.468612}, {-0.881907, 0.515721}, {-0.926581, 0.542539}, {-0.926536, 0.543113}, {-0.881726, 0.517212}, {-0.80192, 0.470464}, {-0.706327, 0.414159}, {-0.62062, 0.363464}, {-0.57, 0.333274}, {-0.570207, 0.332912}, {-0.621126, 0.362399}, {-0.90873, 0.530699}, {-1.08565, 0.635072}, {-1.188, 0.695829}, {-1.18662, 0.69561}, {-1.08251, 0.634875}, {-0.905949, 0.531375}, {-0.705979, 0.413922}, {-0.535547, 0.313587}, {-0.437947, 0.255777}, {-0.437593, 0.254904}, {-0.535213, 0.3116}, {-0.707025, 0.412153}, {-1.08689, 0.635973}, {-1.25264, 0.73346}, {-1.31127, 0.768447}, {-1.24889, 0.732614}, {-1.08208, 0.635364}, {-0.853578, 0.501501}, {-0.622781, 0.365838}, {-0.45166, 0.264924}, {-0.388031, 0.227055}, {-0.450773, 0.263432}, {-0.622945, 0.364108}, {-0.856448, 0.500854}, {-1.19134, 0.697143}, {-1.31299, 0.768776}, {-1.31039, 0.768294}, {-1.18545, 0.696338}, {-0.971812, 0.571913}, {-0.725656, 0.427572}, {-0.511728, 0.301479}, {-0.387194, 0.227697}, {-0.386477, 0.226781}, {-0.510983, 0.29949}, {-0.727508, 0.426065}, {-0.976979, 0.571804}, {-1.19068, 0.696587}, {-1.25079, 0.732704}, {-1.1858, 0.696342}, {-1.01738, 0.599244}, {-0.796976, 0.470654}, {-0.585708, 0.346253}, {-0.435927, 0.257375}, {-0.381444, 0.224607}, {-0.434799, 0.255504}, {-0.585954, 0.343788}, {-0.800727, 0.469028}, {-1.02363, 0.598893}, {-1.08479, 0.635221}, {-1.08247, 0.635402}, {-0.971785, 0.572516}, {-0.797441, 0.471572}, {-0.620067, 0.367457}, {-0.486057, 0.287752}, {-0.41627, 0.245654}, {-0.415492, 0.244604}, {-0.485065, 0.2852}, {-0.621396, 0.364724}, {-0.801868, 0.469708}, {-0.97699, 0.571703}, {-0.905153, 0.53149}, {-0.852577, 0.502286}, {-0.726367, 0.429708}, {-0.588237, 0.349011}, {-0.488417, 0.289583}, {-0.438975, 0.259306}, {-0.425261, 0.250486}, {-0.437732, 0.257719}, {-0.487727, 0.28711}, {-0.589745, 0.346591}, {-0.729669, 0.427989}, {-0.855327, 0.501453}, {-0.703319, 0.41442}, {-0.62243, 0.367877}, {-0.515097, 0.305252}, {-0.441756, 0.26166}, {-0.421583, 0.248768}, {-0.42783, 0.251684}, {-0.427143, 0.251292}, {-0.420185, 0.247711}, {-0.440973, 0.260248}, {-0.515615, 0.303875}, {-0.623635, 0.366892}, {-0.703934, 0.414065}, {-0.533671, 0.314612}, {-0.453954, 0.267697}, {-0.39369, 0.231851}, {-0.38951, 0.228701}, {-0.421824, 0.247209}, {-0.440785, 0.258508}, {-0.420829, 0.247546}, {-0.388011, 0.229143}, {-0.392316, 0.232138}, {-0.453072, 0.267754}, {-0.533292, 0.314564}, {-0.57029, 0.336116}, {-0.438553, 0.256808}, {-0.392833, 0.229247}, {-0.393994, 0.229459}, {-0.441827, 0.257486}, {-0.489145, 0.285717}, {-0.488796, 0.286388}, {-0.440773, 0.259222}, {-0.392313, 0.231597}, {-0.390874, 0.231116}, {-0.436942, 0.258006}, {-0.483803, 0.28496}, {-0.48443, 0.284558}, {-0.440034, 0.255265}, {-0.455729, 0.263975}, {-0.516139, 0.299649}, {-0.58876, 0.343042}, {-0.621226, 0.363101}, {-0.588249, 0.344708}, {-0.51498, 0.302475}, {-0.45396, 0.267145}, {-0.438129, 0.257929}, {-0.455412, 0.267611}, {-0.468271, 0.274171}, {-0.45667, 0.266107}, {-0.537417, 0.311164}, {-0.625532, 0.363069}, {-0.728288, 0.424258}, {-0.798674, 0.46667}, {-0.798327, 0.467367}, {-0.727316, 0.426259}, {-0.62415, 0.366026}, {-0.535974, 0.31438}, {-0.486437, 0.285064}, {-0.469332, 0.274268}, {-0.469741, 0.273311}, {-0.487532, 0.282546}, {-0.707721, 0.411529}, {-0.856201, 0.49932}, {-0.974282, 0.569611}, {-1.01899, 0.596722}, {-0.972922, 0.570185}, {-0.854309, 0.500753}, {-0.706267, 0.413877}, {-0.573897, 0.336038}, {-0.486771, 0.284451}, {-0.456904, 0.266092}, {-0.486865, 0.282677}, {-0.574523, 0.333405}, {-1.02299, 0.598635}, {-1.1894, 0.697064}, {-1.2498, 0.733129}, {-1.18546, 0.695737}, {-1.01748, 0.597491}, {-0.79733, 0.468729}, {-0.58658, 0.345381}, {-0.437813, 0.257914}, {-0.384434, 0.225752}, {-0.438174, 0.256085}, {-0.588516, 0.343368}, {-0.801632, 0.468227}, {-1.19078, 0.697726}, {-1.31196, 0.769254}, {-1.30916, 0.768367}, {-1.18415, 0.695981}, {-0.970579, 0.571599}, {-0.72483, 0.427978}, {-0.511733, 0.302925}, {-0.388224, 0.229839}, {-0.388251, 0.228873}, {-0.512822, 0.300964}, {-0.72872, 0.426904}, {-0.977243, 0.572377}, {-1.25252, 0.733774}, {-1.31023, 0.768428}, {-1.24677, 0.732767}, {-1.0792, 0.636165}, {-0.850589, 0.503134}, {-0.620269, 0.368135}, {-0.449893, 0.267523}, {-0.386958, 0.229629}, {-0.450238, 0.265791}, {-0.622845, 0.366137}, {-0.856684, 0.502408}, {-1.08719, 0.636906}, {-1.18796, 0.696076}, {-1.18495, 0.696012}, {-1.07929, 0.636323}, {-0.902007, 0.534065}, {-0.702217, 0.417304}, {-0.532352, 0.316932}, {-0.435207, 0.258779}, {-0.435193, 0.257762}, {-0.53342, 0.314548}, {-0.706287, 0.414984}, {-0.909094, 0.532826}, {-1.08639, 0.636101}, {-1.01814, 0.597751}, {-0.970386, 0.572001}, {-0.850826, 0.503959}, {-0.702987, 0.41806}, {-0.571434, 0.34027}, {-0.484887, 0.288176}, {-0.455049, 0.269532}, {-0.484962, 0.286385}, {-0.573141, 0.337476}, {-0.707446, 0.41532}, {-0.856843, 0.502007}, {-0.97477, 0.571052}, {-0.796031, 0.469208}, {-0.724431, 0.429006}, {-0.621865, 0.369769}, {-0.53516, 0.318509}, {-0.486935, 0.288976}, {-0.470153, 0.277962}, {-0.469968, 0.277278}, {-0.487042, 0.287093}, {-0.536759, 0.315898}, {-0.62514, 0.367115}, {-0.727891, 0.427035}, {-0.797522, 0.468481}, {-0.584838, 0.345936}, {-0.512712, 0.304136}, {-0.453815, 0.269255}, {-0.440236, 0.260336}, {-0.458798, 0.270305}, {-0.471412, 0.277328}, {-0.458391, 0.269826}, {-0.440037, 0.259275}, {-0.454387, 0.267663}, {-0.5139, 0.302439}, {-0.585817, 0.344822}, {-0.617751, 0.364292}, {-0.437406, 0.257946}, {-0.391269, 0.230261}, {-0.392486, 0.230267}, {-0.440697, 0.258108}, {-0.48835, 0.286095}, {-0.488075, 0.286356}, {-0.43998, 0.258676}, {-0.391587, 0.230699}, {-0.390463, 0.230296}, {-0.436875, 0.25772}, {-0.483753, 0.285394}, {-0.483935, 0.28553}, {-0.385785, 0.224752}, {-0.392231, 0.227792}, {-0.455032, 0.264624}, {-0.536732, 0.313147}, {-0.57402, 0.335814}, {-0.536115, 0.314239}, {-0.453803, 0.266436}, {-0.390558, 0.229759}, {-0.384116, 0.226347}, {-0.415576, 0.244881}, {-0.435087, 0.255852}, {-0.416633, 0.244006}, {-0.440106, 0.254204}, {-0.515621, 0.298598}, {-0.624637, 0.363576}, {-0.705867, 0.412455}, {-0.705416, 0.413}, {-0.623349, 0.365204}, {-0.513732, 0.301146}, {-0.438066, 0.257127}, {-0.416682, 0.244805}, {-0.423233, 0.248207}, {-0.423834, 0.247287}, {-0.41827, 0.242427}, {-0.589561, 0.341428}, {-0.729069, 0.424294}, {-0.855077, 0.49969}, {-0.905735, 0.530443}, {-0.853543, 0.500136}, {-0.726651, 0.425682}, {-0.587148, 0.344}, {-0.486337, 0.285155}, {-0.437067, 0.256122}, {-0.424451, 0.24771}, {-0.437945, 0.253996}, {-0.488099, 0.282088}, {-0.801556, 0.467116}, {-0.97579, 0.570585}, {-1.08388, 0.635122}, {-1.08247, 0.634772}, {-0.972363, 0.570176}, {-0.797905, 0.467856}, {-0.620246, 0.363894}, {-0.486552, 0.285597}, {-0.417809, 0.244731}, {-0.418033, 0.243582}, {-0.487599, 0.283007}, {-0.622706, 0.361654}, {-1.10965, 0.65101}, {-1.21079, 0.711131}, {-1.20826, 0.710059}, {-1.10336, 0.648844}, {-0.924243, 0.544416}, {-0.7183, 0.424519}, {-0.540084, 0.320516}, {-0.43731, 0.259747}, {-0.438113, 0.258732}, {-0.54291, 0.318508}, {-0.723657, 0.423584}, {-0.93133, 0.545568}, {-1.21182, 0.711241}, {-1.256, 0.73798}, {-1.20609, 0.709803}, {-1.07111, 0.631903}, {-0.877161, 0.519296}, {-0.670496, 0.398656}, {-0.510842, 0.304668}, {-0.451047, 0.268372}, {-0.512848, 0.302991}, {-0.675332, 0.396892}, {-0.884895, 0.519056}, {-1.07967, 0.633252}, {-1.20972, 0.710039}, {-1.20645, 0.709786}, {-1.12718, 0.66538}, {-0.981426, 0.581637}, {-0.795717, 0.473475}, {-0.618989, 0.369437}, {-0.510313, 0.304479}, {-0.511346, 0.30353}, {-0.62263, 0.367283}, {-0.802631, 0.471574}, {-0.990581, 0.580938}, {-1.1353, 0.66563}, {-1.10424, 0.649018}, {-1.07113, 0.632162}, {-0.981394, 0.581887}, {-0.848011, 0.504798}, {-0.700556, 0.417972}, {-0.583449, 0.347961}, {-0.539142, 0.320392}, {-0.585668, 0.346231}, {-0.705652, 0.415267}, {-0.855872, 0.502135}, {-0.989913, 0.579978}, {-1.07677, 0.631228}, {-0.924009, 0.545039}, {-0.876862, 0.519905}, {-0.796041, 0.473948}, {-0.701154, 0.418152}, {-0.617103, 0.367554}, {-0.567934, 0.337124}, {-0.56887, 0.336345}, {-0.620183, 0.36534}, {-0.706598, 0.414928}, {-0.802914, 0.470492}, {-0.8828, 0.517229}, {-0.926374, 0.544028}, {-0.717393, 0.425178}, {-0.670585, 0.399036}, {-0.620299, 0.369539}, {-0.585078, 0.347812}, {-0.568818, 0.33689}, {-0.565262, 0.333697}, {-0.570292, 0.335869}, {-0.588104, 0.345608}, {-0.624537, 0.366309}, {-0.674901, 0.395645}, {-0.720158, 0.42297}, {-0.737567, 0.434958}, {-0.539442, 0.320262}, {-0.511871, 0.303883}, {-0.512536, 0.303441}, {-0.541221, 0.31934}, {-0.569817, 0.335458}, {-0.570279, 0.335327}, {-0.542523, 0.318698}, {-0.514408, 0.301887}, {-0.51385, 0.301549}, {-0.540959, 0.318058}, {-0.56805, 0.335129}, {-0.56748, 0.33604}, {-0.43746, 0.257892}, {-0.452672, 0.265945}, {-0.51339, 0.301114}, {-0.586792, 0.344189}, {-0.619905, 0.363743}, {-0.587106, 0.344438}, {-0.513736, 0.301232}, {-0.452759, 0.26559}, {-0.437264, 0.257159}, {-0.454851, 0.268413}, {-0.467503, 0.276449}, {-0.455078, 0.269016}, {-0.438703, 0.255565}, {-0.513974, 0.299445}, {-0.623115, 0.364126}, {-0.704706, 0.412806}, {-0.704563, 0.413009}, {-0.622574, 0.364719}, {-0.512886, 0.300346}, {-0.437213, 0.25657}, {-0.415979, 0.245083}, {-0.422697, 0.249493}, {-0.423244, 0.249186}, {-0.417325, 0.244282}, {-0.543151, 0.315196}, {-0.675047, 0.393466}, {-0.801031, 0.46879}, {-0.853036, 0.500088}, {-0.799929, 0.468793}, {-0.67305, 0.394009}, {-0.540705, 0.316669}, {-0.455555, 0.267677}, {-0.423304, 0.249443}, {-0.418287, 0.245957}, {-0.424648, 0.247973}, {-0.457794, 0.265668}, {-0.723195, 0.421314}, {-0.883309, 0.516813}, {-0.987705, 0.579312}, {-0.986577, 0.578825}, {-0.880376, 0.516046}, {-0.719542, 0.421598}, {-0.569005, 0.334029}, {-0.468952, 0.27624}, {-0.424209, 0.249912}, {-0.42493, 0.248861}, {-0.47107, 0.273931}, {-0.572251, 0.332207}, {-0.930661, 0.544704}, {-1.07783, 0.632375}, {-1.13263, 0.665132}, {-1.07442, 0.630815}, {-0.925447, 0.543264}, {-0.738253, 0.433963}, {-0.568962, 0.335582}, {-0.456279, 0.269836}, {-0.417773, 0.246317}, {-0.457798, 0.267946}, {-0.572286, 0.333571}, {-0.743182, 0.433634}, {-1.11706, 0.656463}, {-1.14464, 0.673436}, {-1.11251, 0.6551}, {-1.02147, 0.602427}, {-0.879756, 0.520502}, {-0.717022, 0.426186}, {-0.584683, 0.348691}, {-0.534343, 0.317812}, {-0.587821, 0.346913}, {-0.722973, 0.424257}, {-0.887441, 0.520088}, {-1.02882, 0.603639}, {-1.14503, 0.673245}, {-1.14215, 0.67305}, {-1.10347, 0.651965}, {-1.01495, 0.60155}, {-0.875566, 0.520781}, {-0.722298, 0.430863}, {-0.620614, 0.369979}, {-0.622518, 0.368974}, {-0.727856, 0.428557}, {-0.884004, 0.518671}, {-1.02437, 0.60064}, {-1.11098, 0.65206}, {-1.11276, 0.655192}, {-1.10335, 0.652221}, {-1.06692, 0.633092}, {-0.979481, 0.583027}, {-0.847324, 0.505374}, {-0.722435, 0.430777}, {-0.672011, 0.399129}, {-0.726718, 0.428976}, {-0.855318, 0.502484}, {-0.989619, 0.580052}, {-1.07648, 0.630829}, {-1.10921, 0.651047}, {-1.02138, 0.603333}, {-1.01464, 0.602294}, {-0.97934, 0.583296}, {-0.898166, 0.535699}, {-0.792642, 0.472433}, {-0.717578, 0.426351}, {-0.719855, 0.425531}, {-0.79909, 0.470018}, {-0.907516, 0.531987}, {-0.989315, 0.579107}, {-1.02234, 0.598925}, {-1.02428, 0.602035}, {-0.879469, 0.521897}, {-0.875239, 0.521424}, {-0.847141, 0.505252}, {-0.792492, 0.47204}, {-0.736017, 0.437054}, {-0.713142, 0.421708}, {-0.740207, 0.435787}, {-0.799879, 0.469154}, {-0.855876, 0.500826}, {-0.882982, 0.516646}, {-0.884015, 0.518738}, {-0.878991, 0.518584}, {-0.716712, 0.426845}, {-0.721954, 0.430288}, {-0.721974, 0.429445}, {-0.716853, 0.425024}, {-0.712423, 0.420965}, {-0.714245, 0.420629}, {-0.721741, 0.423623}, {-0.728444, 0.426391}, {-0.728185, 0.425916}, {-0.721121, 0.422809}, {-0.713604, 0.420658}, {-0.712023, 0.422312}, {-0.584246, 0.347455}, {-0.619863, 0.367543}, {-0.670681, 0.396429}, {-0.717902, 0.423371}, {-0.738138, 0.43445}, {-0.720411, 0.423002}, {-0.674652, 0.395097}, {-0.623858, 0.36491}, {-0.587194, 0.344227}, {-0.569305, 0.335587}, {-0.564246, 0.334606}, {-0.567819, 0.337851}, {-0.533451, 0.314664}, {-0.620866, 0.365225}, {-0.724074, 0.425719}, {-0.795665, 0.467732}, {-0.796426, 0.467626}, {-0.725876, 0.425255}, {-0.622675, 0.364176}, {-0.534507, 0.313134}, {-0.485271, 0.285983}, {-0.468406, 0.277793}, {-0.468399, 0.278379}, {-0.484976, 0.287408}, {-0.58629, 0.34312}, {-0.725238, 0.424946}, {-0.851612, 0.499892}, {-0.903225, 0.530304}, {-0.851867, 0.499358}, {-0.725244, 0.424248}, {-0.585612, 0.342671}, {-0.484763, 0.28516}, {-0.435776, 0.258224}, {-0.42345, 0.251509}, {-0.436708, 0.258139}, {-0.485944, 0.285239}, {-0.721074, 0.421389}, {-0.881011, 0.516467}, {-0.98582, 0.578833}, {-0.985376, 0.578198}, {-0.87965, 0.515168}, {-0.718866, 0.42065}, {-0.568146, 0.333548}, {-0.468024, 0.276761}, {-0.423433, 0.251491}, {-0.42428, 0.250931}, {-0.470191, 0.275668}, {-0.570757, 0.33309}, {-0.885804, 0.518833}, {-1.0219, 0.600042}, {-1.07367, 0.630722}, {-1.0198, 0.598373}, {-0.882177, 0.517091}, {-0.712562, 0.418363}, {-0.563839, 0.332924}, {-0.468411, 0.27824}, {-0.436962, 0.259253}, {-0.470481, 0.276772}, {-0.567437, 0.33153}, {-0.716733, 0.418563}, {-1.02799, 0.603482}, {-1.10961, 0.652286}, {-1.10785, 0.651229}, {-1.02335, 0.60134}, {-0.878572, 0.516856}, {-0.711954, 0.420536}, {-0.567972, 0.337443}, {-0.485389, 0.288975}, {-0.486686, 0.287995}, {-0.571684, 0.335511}, {-0.717425, 0.419661}, {-0.884505, 0.518018}, {-1.07232, 0.630852}, {-1.07056, 0.630918}, {-1.0632, 0.627608}, {-1.0203, 0.60372}, {-0.92158, 0.547172}, {-0.793655, 0.472723}, {-0.702726, 0.41841}, {-0.704923, 0.417269}, {-0.799497, 0.469998}, {-0.929183, 0.544382}, {-1.02747, 0.602005}, {-1.06817, 0.627034}, {-1.07036, 0.631004}, {-1.0844, 0.641468}, {-1.10189, 0.653626}, {-1.06773, 0.634808}, {-0.965988, 0.575278}, {-0.849279, 0.505626}, {-0.799301, 0.473928}, {-0.85452, 0.503455}, {-0.974792, 0.571657}, {-1.07746, 0.630846}, {-1.10995, 0.650385}, {-1.08889, 0.639677}, {-1.06271, 0.628629}, {-1.10156, 0.654342}, {-1.12316, 0.668716}, {-1.07351, 0.639698}, {-0.966127, 0.575326}, {-0.878149, 0.521311}, {-0.88134, 0.520328}, {-0.974653, 0.57238}, {-1.08471, 0.635055}, {-1.13383, 0.663357}, {-1.10905, 0.649966}, {-1.06538, 0.626932}, {-1.01991, 0.605809}, {-1.06726, 0.636002}, {-1.07311, 0.639927}, {-1.01044, 0.601935}, {-0.921916, 0.54774}, {-0.882583, 0.522}, {-0.928425, 0.546209}, {-1.02128, 0.598346}, {-1.08496, 0.634306}, {-1.07694, 0.629854}, {-1.02525, 0.601719}, {-0.995061, 0.587574}, {-0.92145, 0.549256}, {-0.965175, 0.575824}, {-0.964778, 0.574777}, {-0.920679, 0.547078}, {-0.878039, 0.519858}, {-0.881313, 0.519436}, {-0.929312, 0.545241}, {-0.97583, 0.570653}, {-0.975371, 0.569835}, {-0.928372, 0.54369}, {-0.88077, 0.518931}, {-0.878351, 0.521218}, {-0.793243, 0.473276}, {-0.847351, 0.504481}, {-0.875002, 0.519428}, {-0.879228, 0.520414}, {-0.879135, 0.518658}, {-0.884544, 0.51981}, {-0.883567, 0.517258}, {-0.856226, 0.500205}, {-0.800052, 0.468039}, {-0.740536, 0.435682}, {-0.713862, 0.423286}, {-0.736976, 0.439354}, {-0.701142, 0.416783}, {-0.795548, 0.471179}, {-0.876019, 0.517617}, {-0.922854, 0.544232}, {-0.924957, 0.544023}, {-0.881358, 0.516598}, {-0.801881, 0.468728}, {-0.706368, 0.413108}, {-0.620816, 0.365091}, {-0.570009, 0.33812}, {-0.569014, 0.339552}, {-0.617693, 0.368557}, {-0.701801, 0.414316}, {-0.849151, 0.500379}, {-0.967938, 0.570028}, {-1.01459, 0.596709}, {-0.970387, 0.569212}, {-0.852688, 0.498733}, {-0.704774, 0.412047}, {-0.572499, 0.336444}, {-0.48579, 0.288353}, {-0.456139, 0.272769}, {-0.485267, 0.289806}, {-0.570888, 0.338727}, {-0.795518, 0.467383}, {-0.969111, 0.569707}, {-1.07832, 0.63391}, {-1.07892, 0.633197}, {-0.97037, 0.567968}, {-0.796351, 0.465497}, {-0.618458, 0.36283}, {-0.484734, 0.28727}, {-0.416415, 0.249265}, {-0.416842, 0.249412}, {-0.485523, 0.287876}, {-0.61865, 0.364149}, {-0.92568, 0.54321}, {-1.07309, 0.6305}, {-1.12941, 0.66324}, {-1.07294, 0.628773}, {-0.924866, 0.541062}, {-0.737495, 0.43222}, {-0.567629, 0.335295}, {-0.45474, 0.271532}, {-0.41644, 0.249404}, {-0.456398, 0.270956}, {-0.56995, 0.33514}, {-0.739305, 0.433371}, {-1.02543, 0.602078}, {-1.10773, 0.650928}, {-1.10713, 0.649962}, {-1.02353, 0.600065}, {-0.878905, 0.515649}, {-0.711811, 0.419746}, {-0.567276, 0.337426}, {-0.484466, 0.289703}, {-0.485758, 0.288954}, {-0.570555, 0.336006}, {-0.715695, 0.419294}, {-0.8821, 0.516913}, {-1.06758, 0.627366}, {-1.08843, 0.640068}, {-1.06526, 0.626339}, {-0.994871, 0.585351}, {-0.877686, 0.518046}, {-0.736047, 0.436923}, {-0.617298, 0.368225}, {-0.571703, 0.340401}, {-0.620535, 0.36644}, {-0.741319, 0.434861}, {-0.883244, 0.517288}, {-0.999225, 0.5861}}; + const std::vector expected_rho + = {0.000169776, 0.000178569, 0.000193478, 0.000190963, 0.000162992, 0.00012976, 0.000115885, 0.000130059, + 0.00016338, 0.000191117, 0.000193323, 0.000178366, 0.000179064, 0.000204228, 0.000227792, 0.000219629, + 0.000183672, 0.000153516, 0.000153676, 0.000183952, 0.000219538, 0.000227101, 0.00020339, 0.000178705, + 0.000194839, 0.000228625, 0.000243426, 0.000219702, 0.000181133, 0.000163767, 0.00018138, 0.000219691, + 0.000242614, 0.000227273, 0.000193857, 0.00017819, 0.000192755, 0.000220801, 0.000220073, 0.00019118, + 0.000164701, 0.000164856, 0.000191352, 0.000219611, 0.000219514, 0.000191386, 0.000165518, 0.000166079, + 0.00016434, 0.000184444, 0.00018134, 0.000164698, 0.000156019, 0.000164894, 0.000181257, 0.000183649, + 0.00016312, 0.000135954, 0.000124381, 0.000136818, 0.000130335, 0.00015378, 0.000163817, 0.000164888, + 0.000164963, 0.000163834, 0.000153372, 0.000129503, 0.000101264, 8.32226e-05, 8.35355e-05, 0.000102059, + 0.000116058, 0.000153881, 0.000181671, 0.00019171, 0.000181563, 0.000153518, 0.000115431, 7.94008e-05, + 5.55859e-05, 4.77218e-05, 5.59624e-05, 8.00408e-05, 0.000130395, 0.000184659, 0.000220604, 0.000220398, + 0.000184101, 0.000129668, 7.94357e-05, 4.63696e-05, 3.15498e-05, 3.16778e-05, 4.67709e-05, 8.00837e-05, + 0.000164257, 0.000220901, 0.000243906, 0.000220232, 0.000163282, 0.000101221, 5.55696e-05, 3.15687e-05, + 2.46811e-05, 3.17951e-05, 5.61029e-05, 0.00010208, 0.000192313, 0.000228466, 0.000228055, 0.00019132, + 0.000135475, 8.28695e-05, 4.76363e-05, 3.17207e-05, 3.18329e-05, 4.80319e-05, 8.3641e-05, 0.000136545, + 0.000194159, 0.000203962, 0.000193564, 0.000164464, 0.000123269, 8.29177e-05, 5.58212e-05, 4.6815e-05, + 5.60991e-05, 8.35257e-05, 0.000124162, 0.000165391, 0.000178568, 0.000178447, 0.000177044, 0.000164353, + 0.000135345, 0.000101334, 7.98722e-05, 8.00263e-05, 0.00010178, 0.000135967, 0.000164933, 0.000177413, + 0.00017896, 0.000203835, 0.000227343, 0.000219346, 0.000183589, 0.000153552, 0.000153811, 0.000184272, + 0.000220133, 0.000227869, 0.000204034, 0.000178996, 0.000204641, 0.000245599, 0.000266381, 0.000242499, + 0.000200025, 0.000180655, 0.000200547, 0.000243195, 0.000266727, 0.000245475, 0.000204416, 0.000185313, + 0.00022891, 0.000267126, 0.000266303, 0.000227029, 0.000191299, 0.000191546, 0.000227565, 0.000266585, + 0.000266819, 0.000228355, 0.000192972, 0.000193226, 0.000220892, 0.000243298, 0.000227224, 0.000193116, + 0.000176958, 0.00019338, 0.000227396, 0.000242974, 0.000220189, 0.00018224, 0.000165126, 0.000182777, + 0.000184452, 0.000200435, 0.000191399, 0.000176968, 0.000176991, 0.000191342, 0.000200057, 0.000183759, + 0.000149563, 0.000123756, 0.000124008, 0.000150211, 0.000153857, 0.000180864, 0.000191681, 0.000193468, + 0.000191384, 0.000180279, 0.000153109, 0.000115507, 8.4576e-05, 7.32605e-05, 8.49347e-05, 0.000116163, + 0.000154093, 0.000200993, 0.000227986, 0.000227634, 0.000200117, 0.000153102, 0.000101786, 6.24711e-05, + 4.30082e-05, 4.3128e-05, 6.28796e-05, 0.000102542, 0.000184984, 0.000244115, 0.000267228, 0.000243129, + 0.000183648, 0.000115422, 6.24715e-05, 3.36981e-05, 2.53705e-05, 3.38925e-05, 6.3015e-05, 0.00011646, + 0.000221235, 0.000267707, 0.000267073, 0.000219788, 0.000149084, 8.43875e-05, 4.30356e-05, 2.54251e-05, + 2.55042e-05, 4.33728e-05, 8.52167e-05, 0.000150469, 0.000228728, 0.000245742, 0.000227673, 0.000181087, + 0.000122902, 7.29834e-05, 4.31838e-05, 3.39974e-05, 4.3403e-05, 7.35933e-05, 0.000124051, 0.000182539, + 0.000204196, 0.000203779, 0.00019146, 0.000163418, 0.000122899, 8.45893e-05, 6.29222e-05, 6.30623e-05, + 8.50736e-05, 0.000123809, 0.000164615, 0.000192509, 0.000178745, 0.000184257, 0.000191376, 0.000180885, + 0.000149064, 0.000115799, 0.000102469, 0.000116203, 0.000149836, 0.000181829, 0.000192182, 0.000184698, + 0.000194197, 0.000227602, 0.000242531, 0.000219264, 0.000181092, 0.000163967, 0.000181826, 0.000220523, + 0.000243831, 0.000228498, 0.000194599, 0.000178222, 0.00022836, 0.000266431, 0.000265778, 0.000226824, + 0.000191362, 0.000191812, 0.000228048, 0.000267291, 0.000267602, 0.00022895, 0.000193197, 0.000193038, + 0.000243596, 0.000266133, 0.000244166, 0.00020288, 0.000183901, 0.00020362, 0.00024537, 0.000267252, + 0.000244248, 0.000202034, 0.000182436, 0.000201798, 0.000219914, 0.000226984, 0.000202831, 0.000177819, + 0.000178022, 0.000203359, 0.000227581, 0.000220314, 0.000184942, 0.000154877, 0.000154845, 0.000184784, + 0.000181151, 0.000191228, 0.00018371, 0.000177896, 0.00018366, 0.000191138, 0.000181046, 0.000149503, + 0.000116298, 0.000102854, 0.000116343, 0.000149588, 0.000163791, 0.000191618, 0.000203375, 0.000203111, + 0.00019099, 0.000163151, 0.000122788, 8.45394e-05, 6.28441e-05, 6.28824e-05, 8.47057e-05, 0.000123192, + 0.000181825, 0.000228007, 0.0002451, 0.000227175, 0.000180756, 0.000122709, 7.28545e-05, 4.30544e-05, + 3.38231e-05, 4.31296e-05, 7.31592e-05, 0.000123445, 0.000220837, 0.000267297, 0.000266739, 0.000219626, + 0.000149104, 8.44935e-05, 4.312e-05, 2.54477e-05, 2.54607e-05, 4.32413e-05, 8.49761e-05, 0.000150132, + 0.00024411, 0.000267192, 0.00024319, 0.000183917, 0.000115847, 6.28857e-05, 3.39986e-05, 2.55682e-05, + 3.40292e-05, 6.31138e-05, 0.000116538, 0.000185036, 0.000228282, 0.000227879, 0.000200457, 0.000153624, + 0.000102398, 6.30086e-05, 4.34058e-05, 4.34269e-05, 6.31469e-05, 0.000102846, 0.000154464, 0.00020137, + 0.000193957, 0.000191754, 0.000180646, 0.000153539, 0.000115928, 8.48887e-05, 7.34535e-05, 8.5078e-05, + 0.000116387, 0.000154285, 0.000181475, 0.000192302, 0.000177651, 0.000191707, 0.000200165, 0.000183666, + 0.000149307, 0.000123383, 0.000123605, 0.00014996, 0.00018462, 0.000201118, 0.000192363, 0.000177876, + 0.000191783, 0.000219569, 0.00021921, 0.000190941, 0.000164941, 0.000165437, 0.000192324, 0.000221049, + 0.000221193, 0.000192766, 0.000166159, 0.00016585, 0.00021996, 0.00024227, 0.00022661, 0.000193081, + 0.000177415, 0.000194268, 0.0002287, 0.000244543, 0.00022165, 0.000183213, 0.000165438, 0.000182397, + 0.000219454, 0.000226546, 0.00020266, 0.000177976, 0.000178476, 0.000204075, 0.000228484, 0.000221241, + 0.000185699, 0.000155343, 0.000154979, 0.00018457, 0.00019067, 0.00019267, 0.000177686, 0.000169169, + 0.000178188, 0.000193597, 0.000191769, 0.000164278, 0.000131062, 0.000116874, 0.000130584, 0.000163371, + 0.000164266, 0.000176694, 0.000177869, 0.000177851, 0.000176716, 0.000164456, 0.000135833, 0.000101933, + 8.03218e-05, 8.02043e-05, 0.000101624, 0.000135492, 0.000164644, 0.000193371, 0.000203198, 0.000192973, + 0.00016421, 0.000123358, 8.31557e-05, 5.59935e-05, 4.6818e-05, 5.58866e-05, 8.30724e-05, 0.000123516, + 0.000191606, 0.000227705, 0.000227401, 0.000190999, 0.000135555, 8.31647e-05, 4.78966e-05, 3.18287e-05, + 3.17699e-05, 4.77766e-05, 8.31897e-05, 0.000135944, 0.00022038, 0.000243396, 0.000220002, 0.00016352, + 0.000101796, 5.61586e-05, 3.19688e-05, 2.48775e-05, 3.18148e-05, 5.59491e-05, 0.000101777, 0.000163838, + 0.000220371, 0.000220283, 0.000184384, 0.000130411, 8.03645e-05, 4.71541e-05, 3.20674e-05, 3.19629e-05, + 4.68692e-05, 8.00456e-05, 0.000130283, 0.000184483, 0.000191758, 0.000181752, 0.000154082, 0.000116363, + 8.04071e-05, 5.6379e-05, 4.82162e-05, 5.6199e-05, 8.01134e-05, 0.000116105, 0.000153974, 0.000181753, + 0.000165222, 0.000164124, 0.000153901, 0.000130258, 0.000101987, 8.36893e-05, 8.37044e-05, 0.000102039, + 0.000130355, 0.000154029, 0.000164235, 0.000165266, 0.000165291, 0.000181461, 0.000183847, 0.000163329, + 0.000136024, 0.000124216, 0.000136499, 0.000164162, 0.00018475, 0.000182137, 0.000165623, 0.00015673, + 0.000163569, 0.00018354, 0.000180875, 0.000164863, 0.000156711, 0.000166032, 0.000182847, 0.000185594, + 0.000165052, 0.000137407, 0.000125083, 0.000136701, 0.000183577, 0.000199544, 0.000191064, 0.000177383, + 0.000178094, 0.000193021, 0.000202132, 0.000185895, 0.000151357, 0.000124942, 0.00012448, 0.000149918, + 0.000180558, 0.000190738, 0.000183719, 0.000178516, 0.000184812, 0.000192645, 0.000182663, 0.000150976, + 0.000117447, 0.000103605, 0.00011663, 0.000149349, 0.000164042, 0.000176567, 0.000178014, 0.00017829, + 0.000177355, 0.000165163, 0.000136503, 0.000102511, 8.07849e-05, 8.05329e-05, 0.000101763, 0.000135398, + 0.000155504, 0.000176838, 0.00018384, 0.000176902, 0.000155733, 0.000122862, 8.78198e-05, 6.26008e-05, + 5.37126e-05, 6.23463e-05, 8.73915e-05, 0.000122452, 0.000164634, 0.000191497, 0.000191418, 0.00016453, + 0.000122719, 8.18656e-05, 5.29609e-05, 3.90106e-05, 3.89192e-05, 5.27242e-05, 8.16226e-05, 0.000122647, + 0.000181433, 0.000200572, 0.000181486, 0.000136005, 8.78125e-05, 5.30691e-05, 3.46193e-05, 2.90604e-05, + 3.43956e-05, 5.26804e-05, 8.74193e-05, 0.00013578, 0.000184272, 0.000184487, 0.000150048, 0.000102277, + 6.28005e-05, 3.92835e-05, 2.92168e-05, 2.90551e-05, 3.88043e-05, 6.20696e-05, 0.000101486, 0.000149456, + 0.000163873, 0.000150183, 0.000116786, 8.07673e-05, 5.40762e-05, 3.93422e-05, 3.47e-05, 3.89455e-05, + 5.3295e-05, 7.971e-05, 0.000115732, 0.000149515, 0.000136441, 0.00012402, 0.000103157, 8.06774e-05, + 6.28601e-05, 5.33021e-05, 5.31329e-05, 6.23436e-05, 7.98387e-05, 0.000102157, 0.000123181, 0.000136111, + 0.000124411, 0.000123824, 0.000116395, 0.000102099, 8.80972e-05, 8.23993e-05, 8.80746e-05, 0.000101969, + 0.000116088, 0.000123418, 0.00012412, 0.000123477, 0.000136384, 0.00014958, 0.000149401, 0.000135994, + 0.000123413, 0.000123714, 0.000136778, 0.00015033, 0.000150286, 0.000136742, 0.000123842, 0.000123745, + 0.000129876, 0.000153196, 0.000163656, 0.000165404, 0.000166112, 0.000165473, 0.00015535, 0.000131594, + 0.00010315, 8.46425e-05, 8.43506e-05, 0.000102184, 0.000153082, 0.000180108, 0.000191611, 0.000194349, + 0.000193074, 0.000182444, 0.000155389, 0.000117587, 8.6239e-05, 7.44187e-05, 8.54958e-05, 0.000115985, + 0.000163108, 0.000191128, 0.000203621, 0.000204191, 0.000192609, 0.000164915, 0.000124426, 8.59286e-05, + 6.39458e-05, 6.36734e-05, 8.50474e-05, 0.000122922, 0.00016427, 0.000193218, 0.000203544, 0.000193735, + 0.000165102, 0.000124183, 8.38875e-05, 5.66566e-05, 4.74156e-05, 5.63674e-05, 8.32827e-05, 0.000123342, + 0.000164511, 0.000191479, 0.000191563, 0.00016474, 0.000122895, 8.20151e-05, 5.31381e-05, 3.92361e-05, + 3.91765e-05, 5.29581e-05, 8.17365e-05, 0.000122592, 0.000163723, 0.000180812, 0.000163935, 0.00012388, + 8.20223e-05, 5.22662e-05, 3.65683e-05, 3.18566e-05, 3.64529e-05, 5.2034e-05, 8.16996e-05, 0.000123551, + 0.0001538, 0.000154107, 0.000123809, 8.38231e-05, 5.32741e-05, 3.6669e-05, 3.01209e-05, 3.00101e-05, + 3.63253e-05, 5.26796e-05, 8.30355e-05, 0.000123075, 0.000130433, 0.000116752, 8.56391e-05, 5.67519e-05, + 3.94538e-05, 3.2037e-05, 3.0099e-05, 3.16854e-05, 3.87272e-05, 5.56478e-05, 8.43587e-05, 0.000115836, + 0.00010236, 8.57183e-05, 6.38156e-05, 4.75674e-05, 3.94416e-05, 3.67055e-05, 3.65041e-05, 3.88229e-05, + 4.65113e-05, 6.24189e-05, 8.44039e-05, 0.000101803, 8.41204e-05, 7.4067e-05, 6.36153e-05, 5.65795e-05, + 5.33297e-05, 5.24404e-05, 5.30374e-05, 5.59239e-05, 6.25876e-05, 7.28955e-05, 8.33033e-05, 8.80873e-05, + 8.4031e-05, 8.5275e-05, 8.51082e-05, 8.36835e-05, 8.23938e-05, 8.24628e-05, 8.37687e-05, 8.49763e-05, + 8.48556e-05, 8.35455e-05, 8.2405e-05, 8.26193e-05, 0.000102056, 0.000115945, 0.000123228, 0.000124114, + 0.000123743, 0.000124869, 0.000124326, 0.000116858, 0.000102542, 8.85546e-05, 8.28387e-05, 8.83948e-05, + 0.000115515, 0.000153178, 0.000181461, 0.000192345, 0.000182929, 0.000155283, 0.000117307, 8.11734e-05, + 5.70951e-05, 4.88782e-05, 5.66632e-05, 8.01132e-05, 0.000153026, 0.000199999, 0.000227897, 0.000228681, + 0.000201896, 0.000155064, 0.000103597, 6.39952e-05, 4.4215e-05, 4.40134e-05, 6.32707e-05, 0.00010215, + 0.00018076, 0.000227277, 0.000245333, 0.000228261, 0.000182143, 0.000123998, 7.39655e-05, 4.40085e-05, + 3.46431e-05, 4.37634e-05, 7.3327e-05, 0.000122863, 0.000190936, 0.00022738, 0.000227646, 0.000191525, + 0.000136033, 8.35705e-05, 4.83429e-05, 3.23506e-05, 3.23385e-05, 4.8263e-05, 8.33041e-05, 0.000135512, + 0.000181134, 0.000200401, 0.000181425, 0.000135892, 8.76473e-05, 5.30001e-05, 3.47412e-05, 2.93641e-05, + 3.48117e-05, 5.30612e-05, 8.75565e-05, 0.000135598, 0.000153668, 0.000153945, 0.000123591, 8.35558e-05, + 5.30393e-05, 3.65508e-05, 3.01554e-05, 3.01799e-05, 3.65648e-05, 5.28929e-05, 8.31275e-05, 0.000123022, + 0.00011622, 0.000103006, 7.38389e-05, 4.83881e-05, 3.4772e-05, 3.01478e-05, 2.93687e-05, 3.00549e-05, + 3.44657e-05, 4.77231e-05, 7.28621e-05, 0.000102207, 8.0567e-05, 6.37096e-05, 4.39393e-05, 3.23335e-05, + 2.93244e-05, 3.0128e-05, 3.00339e-05, 2.89846e-05, 3.16325e-05, 4.28335e-05, 6.25121e-05, 8.00199e-05, + 5.67399e-05, 4.39985e-05, 3.45114e-05, 3.22339e-05, 3.47269e-05, 3.65371e-05, 3.45117e-05, 3.17005e-05, + 3.35899e-05, 4.28176e-05, 5.58289e-05, 6.26212e-05, 4.8614e-05, 4.37771e-05, 4.35783e-05, 4.81697e-05, + 5.31171e-05, 5.31103e-05, 4.80256e-05, 4.31076e-05, 4.2978e-05, 4.77782e-05, 5.30685e-05, 5.34388e-05, + 5.64569e-05, 6.30598e-05, 7.32512e-05, 8.3521e-05, 8.81195e-05, 8.39039e-05, 7.36216e-05, 6.30901e-05, + 5.61824e-05, 5.32249e-05, 5.26704e-05, 5.35023e-05, 8.0015e-05, 0.00010211, 0.000123169, 0.000136376, + 0.000136928, 0.000124447, 0.00010336, 8.07737e-05, 6.3059e-05, 5.36874e-05, 5.36252e-05, 6.27624e-05, + 0.000129242, 0.000183365, 0.00022001, 0.000220842, 0.000185267, 0.000131075, 8.08007e-05, 4.75724e-05, + 3.25427e-05, 3.24257e-05, 4.70969e-05, 7.96796e-05, 0.000183131, 0.000242521, 0.000266806, 0.000243863, + 0.000184888, 0.000116656, 6.35454e-05, 3.45949e-05, 2.61054e-05, 3.44031e-05, 6.29404e-05, 0.000115378, + 0.000219078, 0.000266116, 0.000266622, 0.000220161, 0.000149719, 8.50357e-05, 4.36645e-05, 2.60092e-05, + 2.60116e-05, 4.35972e-05, 8.46568e-05, 0.000148832, 0.000219273, 0.000242736, 0.000219796, 0.00016338, + 0.0001016, 5.6097e-05, 3.21594e-05, 2.52787e-05, 3.23251e-05, 5.6299e-05, 0.000101538, 0.0001629, + 0.0001837, 0.000183984, 0.0001495, 0.000101615, 6.22155e-05, 3.89983e-05, 2.92949e-05, 2.94338e-05, + 3.93295e-05, 6.24674e-05, 0.000101468, 0.000149009, 0.000130057, 0.000116254, 8.50146e-05, 5.61045e-05, + 3.89505e-05, 3.18085e-05, 3.01956e-05, 3.20324e-05, 3.91534e-05, 5.5959e-05, 8.44065e-05, 0.000115619, + 8.03256e-05, 6.34002e-05, 4.35985e-05, 3.20142e-05, 2.90869e-05, 3.00446e-05, 3.01262e-05, 2.91771e-05, + 3.18231e-05, 4.29453e-05, 6.24943e-05, 7.98732e-05, 4.73275e-05, 3.44193e-05, 2.57751e-05, 2.49389e-05, + 2.90501e-05, 3.17653e-05, 2.90994e-05, 2.47974e-05, 2.52566e-05, 3.35478e-05, 4.65605e-05, 5.36004e-05, + 3.23057e-05, 2.58073e-05, 2.56053e-05, 3.18332e-05, 3.89116e-05, 3.89798e-05, 3.18589e-05, 2.53347e-05, + 2.51871e-05, 3.15495e-05, 3.88849e-05, 3.92491e-05, 3.21381e-05, 3.39911e-05, 4.31034e-05, 5.5901e-05, + 6.23845e-05, 5.61972e-05, 4.32888e-05, 3.38106e-05, 3.16759e-05, 3.44439e-05, 3.66118e-05, 3.48588e-05, + 4.6815e-05, 6.25659e-05, 8.44258e-05, 0.000101752, 0.000102168, 8.5279e-05, 6.31952e-05, 4.69861e-05, + 3.91033e-05, 3.67077e-05, 3.67876e-05, 3.92044e-05, 7.95397e-05, 0.000115317, 0.000149266, 0.00016403, + 0.000150495, 0.000116888, 8.06518e-05, 5.39992e-05, 3.9481e-05, 3.504e-05, 3.93308e-05, 5.34902e-05, + 0.000162245, 0.000218994, 0.000242976, 0.000220373, 0.000163988, 0.000102049, 5.63514e-05, 3.22528e-05, + 2.52347e-05, 3.21075e-05, 5.58345e-05, 0.000100854, 0.000218578, 0.000265718, 0.0002664, 0.000220051, + 0.000149616, 8.49083e-05, 4.35382e-05, 2.58968e-05, 2.5888e-05, 4.3411e-05, 8.4347e-05, 0.000148382, + 0.000241876, 0.000265842, 0.000242791, 0.00018387, 0.000115791, 6.29115e-05, 3.42048e-05, 2.59007e-05, + 3.43398e-05, 6.29914e-05, 0.000115408, 0.000182897, 0.000219059, 0.000219486, 0.000183833, 0.000129775, + 7.9756e-05, 4.68533e-05, 3.21433e-05, 3.23118e-05, 4.72629e-05, 8.00321e-05, 0.00012946, 0.000183057, + 0.000163127, 0.00014946, 0.000115913, 7.97889e-05, 5.32883e-05, 3.89858e-05, 3.48174e-05, 3.94074e-05, + 5.38133e-05, 7.98969e-05, 0.000115367, 0.000148786, 0.000101745, 8.49632e-05, 6.29319e-05, 4.67002e-05, + 3.88037e-05, 3.64789e-05, 3.67183e-05, 3.92949e-05, 4.6957e-05, 6.25904e-05, 8.4203e-05, 0.000101331, + 5.62433e-05, 4.34449e-05, 3.39337e-05, 3.16952e-05, 3.43572e-05, 3.64712e-05, 3.47172e-05, 3.19884e-05, + 3.37851e-05, 4.28128e-05, 5.55935e-05, 6.22174e-05, 3.20578e-05, 2.55638e-05, 2.53584e-05, 3.16044e-05, + 3.87762e-05, 3.89851e-05, 3.19469e-05, 2.54096e-05, 2.51908e-05, 3.14512e-05, 3.86897e-05, 3.90057e-05, + 2.48989e-05, 2.53537e-05, 3.35944e-05, 4.64923e-05, 5.33536e-05, 4.69089e-05, 3.39239e-05, 2.52956e-05, + 2.45176e-05, 2.87455e-05, 3.16431e-05, 2.91355e-05, 3.16775e-05, 4.27794e-05, 6.2334e-05, 7.97092e-05, + 8.00862e-05, 6.30546e-05, 4.32192e-05, 3.16837e-05, 2.8838e-05, 2.98786e-05, 3.0009e-05, 2.90585e-05, + 5.54665e-05, 8.39616e-05, 0.000115411, 0.000130106, 0.000116386, 8.50888e-05, 5.6121e-05, 3.89503e-05, + 3.17868e-05, 3.00998e-05, 3.18029e-05, 3.87681e-05, 0.00010074, 0.000148546, 0.000183762, 0.000184484, + 0.000150126, 0.000102135, 6.25699e-05, 3.91984e-05, 2.93405e-05, 2.92906e-05, 3.89402e-05, 6.18243e-05, + 0.000189968, 0.000226648, 0.000227274, 0.000191338, 0.000135795, 8.32333e-05, 4.79955e-05, 3.20438e-05, + 3.20297e-05, 4.78504e-05, 8.26649e-05, 0.000134614, 0.000226111, 0.000244162, 0.000227213, 0.000181152, + 0.000123019, 7.31013e-05, 4.33658e-05, 3.41949e-05, 4.34179e-05, 7.2966e-05, 0.000122298, 0.000179849, + 0.00022641, 0.000227013, 0.000200217, 0.000153416, 0.000102096, 6.283e-05, 4.34486e-05, 4.35768e-05, + 6.30788e-05, 0.000102009, 0.000152574, 0.00019898, 0.000190797, 0.000181278, 0.000153628, 0.000115686, + 7.97449e-05, 5.60445e-05, 4.82651e-05, 5.64563e-05, 8.01779e-05, 0.000115496, 0.00015265, 0.000180303, + 0.000135822, 0.000123417, 0.000102333, 7.97159e-05, 6.21003e-05, 5.30044e-05, 5.33078e-05, 6.27516e-05, + 8.00614e-05, 0.000101861, 0.000122438, 0.000135312, 8.34484e-05, 7.3298e-05, 6.27092e-05, 5.56834e-05, + 5.27088e-05, 5.22875e-05, 5.32935e-05, 5.62897e-05, 6.27241e-05, 7.26334e-05, 8.2733e-05, 8.7425e-05, + 4.79875e-05, 4.31295e-05, 4.28933e-05, 4.75375e-05, 5.2729e-05, 5.30717e-05, 4.81982e-05, 4.32335e-05, + 4.28738e-05, 4.73926e-05, 5.24865e-05, 5.28037e-05, 3.1743e-05, 3.36193e-05, 4.27264e-05, 5.55719e-05, + 6.22051e-05, 5.61732e-05, 4.32948e-05, 3.37181e-05, 3.14217e-05, 3.40299e-05, 3.61221e-05, 3.44009e-05, + 3.15416e-05, 4.26766e-05, 6.22434e-05, 7.96303e-05, 8.00287e-05, 6.30029e-05, 4.31349e-05, 3.15386e-05, + 2.8622e-05, 2.96133e-05, 2.97547e-05, 2.88637e-05, 4.73524e-05, 7.24157e-05, 0.000101778, 0.000115845, + 0.000102578, 7.3278e-05, 4.77667e-05, 3.41975e-05, 2.96799e-05, 2.90223e-05, 2.97829e-05, 3.41793e-05, + 8.24296e-05, 0.000122382, 0.000153314, 0.000153853, 0.000123517, 8.33491e-05, 5.27466e-05, 3.6259e-05, + 2.98878e-05, 2.98951e-05, 3.61821e-05, 5.23392e-05, 0.000134706, 0.000180492, 0.00020022, 0.000181553, + 0.000136011, 8.76068e-05, 5.28457e-05, 3.45468e-05, 2.91322e-05, 3.44743e-05, 5.25143e-05, 8.67563e-05, + 0.000192476, 0.000202953, 0.000193269, 0.000164542, 0.000123406, 8.30327e-05, 5.5932e-05, 4.68833e-05, + 5.59711e-05, 8.29089e-05, 0.000122833, 0.000163568, 0.000202578, 0.000203146, 0.000191496, 0.000163576, + 0.000122925, 8.45632e-05, 6.29273e-05, 6.30011e-05, 8.4632e-05, 0.000122567, 0.000162534, 0.000190239, + 0.000193094, 0.000191727, 0.000180882, 0.000153543, 0.000115707, 8.47041e-05, 7.33911e-05, 8.49485e-05, + 0.00011576, 0.000152812, 0.000179443, 0.000190543, 0.000164985, 0.00016428, 0.000153902, 0.000129894, + 0.000101524, 8.34307e-05, 8.36724e-05, 0.000101963, 0.000129796, 0.000152824, 0.000162829, 0.000164318, + 0.000124187, 0.000123646, 0.000115948, 0.000101433, 8.75308e-05, 8.21529e-05, 8.80862e-05, 0.000101914, + 0.000115608, 0.000122509, 0.000123176, 0.000122883, 8.35386e-05, 8.47608e-05, 8.44441e-05, 8.29716e-05, + 8.18669e-05, 8.22377e-05, 8.3701e-05, 8.47534e-05, 8.42682e-05, 8.26769e-05, 8.15331e-05, 8.19515e-05, + 5.59126e-05, 6.25578e-05, 7.27056e-05, 8.2978e-05, 8.77087e-05, 8.36486e-05, 7.33599e-05, 6.26251e-05, + 5.54514e-05, 5.23311e-05, 5.18043e-05, 5.28019e-05, 4.65013e-05, 6.23551e-05, 8.42158e-05, 0.000101497, + 0.000101889, 8.49668e-05, 6.27798e-05, 4.64125e-05, 3.83834e-05, 3.59317e-05, 3.60993e-05, 3.87052e-05, + 5.55151e-05, 8.41575e-05, 0.000115586, 0.000130113, 0.000116211, 8.47823e-05, 5.5718e-05, 3.84676e-05, + 3.1255e-05, 2.95943e-05, 3.14276e-05, 3.85969e-05, 8.27265e-05, 0.000122767, 0.000153602, 0.000153933, + 0.000123423, 8.31643e-05, 5.25199e-05, 3.60119e-05, 2.96459e-05, 2.97056e-05, 3.61091e-05, 5.24442e-05, + 0.0001231, 0.000163422, 0.000180773, 0.000163969, 0.000123715, 8.16183e-05, 5.17916e-05, 3.61707e-05, + 3.15677e-05, 3.62194e-05, 5.17592e-05, 8.13068e-05, 0.000163955, 0.000191154, 0.000191505, 0.000164707, + 0.000122651, 8.15762e-05, 5.26784e-05, 3.88668e-05, 3.88879e-05, 5.26703e-05, 8.13391e-05, 0.000122042, + 0.000177949, 0.000178224, 0.000177098, 0.000164478, 0.000135415, 0.000101353, 7.98668e-05, 7.99553e-05, + 0.000101514, 0.000135364, 0.000164031, 0.0001765, 0.000178256, 0.000184406, 0.000191805, 0.000181199, + 0.000149144, 0.00011576, 0.000102371, 0.000115917, 0.000149093, 0.000180508, 0.00019063, 0.000183509, + 0.000177763, 0.000192413, 0.000200895, 0.000183997, 0.000149287, 0.000123244, 0.000123385, 0.000149437, + 0.000183498, 0.0001995, 0.000190872, 0.00017711, 0.000165789, 0.000182296, 0.000184419, 0.000163397, + 0.0001358, 0.000123931, 0.000136118, 0.000163418, 0.00018347, 0.00018064, 0.000164539, 0.000156455, + 0.000136691, 0.000150007, 0.000149512, 0.000135719, 0.000122961, 0.000123217, 0.000136137, 0.000149324, + 0.000148911, 0.000135376, 0.000122941, 0.00012351, 0.000101983, 0.000115965, 0.00012304, 0.000123648, + 0.000123107, 0.000124119, 0.000123373, 0.000115602, 0.000101093, 8.72159e-05, 8.18811e-05, 8.79275e-05, + 7.9881e-05, 0.000102134, 0.000123047, 0.000135942, 0.000136207, 0.00012351, 0.000102223, 7.94634e-05, + 6.17064e-05, 5.24766e-05, 5.27153e-05, 6.22537e-05, 7.97623e-05, 0.000115769, 0.000149534, 0.000163824, + 0.000149842, 0.000115968, 7.95996e-05, 5.29026e-05, 3.84306e-05, 3.41378e-05, 3.87003e-05, 5.32672e-05, + 0.000101581, 0.000149594, 0.000184453, 0.000184526, 0.000149665, 0.000101448, 6.18208e-05, 3.84618e-05, + 2.86739e-05, 2.87762e-05, 3.87297e-05, 6.21174e-05, 0.000135957, 0.00018172, 0.000200931, 0.000181658, + 0.000135758, 8.72314e-05, 5.24467e-05, 3.4166e-05, 2.88205e-05, 3.43373e-05, 5.27495e-05, 8.75491e-05, + 0.000164855, 0.000191857, 0.000191831, 0.000164733, 0.00012255, 8.14472e-05, 5.25465e-05, 3.87505e-05, + 3.88353e-05, 5.27865e-05, 8.17652e-05, 0.000122809, 0.000176973, 0.000184073, 0.000177107, 0.000155669, + 0.000122434, 8.72103e-05, 6.20709e-05, 5.33865e-05, 6.22328e-05, 8.744e-05, 0.000122554, 0.000155593}; + const std::vector> wfcr + = {{0.923838, -0.766723}, {0.946855, -0.787058}, {0.98573, -0.819083}, {0.979491, -0.813515}, + {0.904645, -0.751906}, {0.806637, -0.67153}, {0.762282, -0.634627}, {0.808631, -0.671024}, + {0.907874, -0.750202}, {0.982864, -0.810243}, {0.988344, -0.815121}, {0.948235, -0.784292}, + {0.94822, -0.78808}, {1.01275, -0.841533}, {1.07019, -0.888017}, {1.05126, -0.871455}, + {0.961291, -0.797015}, {0.878556, -0.728999}, {0.879115, -0.729255}, {0.962554, -0.796984}, + {1.05236, -0.869683}, {1.07057, -0.884247}, {1.01257, -0.837506}, {0.948058, -0.786339}, + {0.98916, -0.822}, {1.07203, -0.889774}, {1.10691, -0.917254}, {1.05194, -0.870988}, + {0.954991, -0.791043}, {0.907701, -0.752602}, {0.955251, -0.792053}, {1.05176, -0.871156}, + {1.1057, -0.914953}, {1.07012, -0.88562}, {0.987688, -0.818688}, {0.946156, -0.785852}, + {0.984195, -0.817182}, {1.05394, -0.87392}, {1.0527, -0.871877}, {0.981212, -0.812574}, + {0.91036, -0.754655}, {0.910444, -0.755421}, {0.981031, -0.81369}, {1.05148, -0.871097}, + {1.0516, -0.870478}, {0.981773, -0.812974}, {0.912427, -0.756748}, {0.913464, -0.758645}, + {0.908971, -0.754295}, {0.963366, -0.79862}, {0.955384, -0.791681}, {0.910268, -0.754744}, + {0.885649, -0.734965}, {0.910563, -0.755494}, {0.95523, -0.791423}, {0.962139, -0.795868}, + {0.907001, -0.749786}, {0.827693, -0.684931}, {0.791008, -0.655941}, {0.829214, -0.688433}, + {0.809457, -0.671775}, {0.879509, -0.729387}, {0.907706, -0.752877}, {0.910496, -0.755542}, + {0.910841, -0.755549}, {0.908354, -0.752191}, {0.879698, -0.726779}, {0.808843, -0.66724}, + {0.715068, -0.590231}, {0.647503, -0.535978}, {0.647961, -0.537899}, {0.716026, -0.594774}, + {0.76381, -0.633945}, {0.879729, -0.729707}, {0.955803, -0.792947}, {0.981976, -0.814417}, + {0.956255, -0.791826}, {0.880226, -0.72699}, {0.763934, -0.629583}, {0.633519, -0.522245}, + {0.529231, -0.437973}, {0.489342, -0.407046}, {0.529559, -0.441213}, {0.633801, -0.527082}, + {0.809945, -0.671563}, {0.963921, -0.799094}, {1.05348, -0.873514}, {1.05332, -0.872713}, + {0.963433, -0.796712}, {0.809205, -0.667849}, {0.633275, -0.522824}, {0.482815, -0.40069}, + {0.396853, -0.332197}, {0.397105, -0.333528}, {0.483473, -0.404133}, {0.6341, -0.527068}, + {0.909689, -0.752964}, {1.05459, -0.873619}, {1.10791, -0.918271}, {1.05302, -0.872265}, + {0.90709, -0.750598}, {0.713992, -0.591227}, {0.527792, -0.439548}, {0.395934, -0.333532}, + {0.348973, -0.296229}, {0.397168, -0.334944}, {0.530053, -0.441972}, {0.716797, -0.593993}, + {0.984821, -0.814125}, {1.07256, -0.888377}, {1.07113, -0.888141}, {0.981027, -0.813528}, + {0.825252, -0.684905}, {0.644281, -0.537062}, {0.486491, -0.409564}, {0.395306, -0.3362}, + {0.396422, -0.336303}, {0.489654, -0.409894}, {0.648876, -0.537628}, {0.830161, -0.685605}, + {0.989527, -0.818036}, {1.01305, -0.839816}, {0.986307, -0.818834}, {0.908865, -0.75512}, + {0.786138, -0.6546}, {0.643264, -0.53866}, {0.526119, -0.443962}, {0.48143, -0.407024}, + {0.528918, -0.443292}, {0.648267, -0.537452}, {0.792158, -0.653131}, {0.914289, -0.753784}, + {0.948358, -0.785236}, {0.946951, -0.786282}, {0.942838, -0.783644}, {0.908251, -0.755238}, + {0.823591, -0.686102}, {0.711542, -0.594982}, {0.630997, -0.529084}, {0.632407, -0.528638}, + {0.715391, -0.593543}, {0.828847, -0.683616}, {0.913505, -0.752152}, {0.946682, -0.781005}, + {0.94745, -0.788449}, {1.01127, -0.841328}, {1.06872, -0.887642}, {1.05023, -0.871317}, + {0.960676, -0.797313}, {0.878136, -0.729712}, {0.878906, -0.730291}, {0.962866, -0.798309}, + {1.05343, -0.871288}, {1.07215, -0.886011}, {1.01393, -0.839118}, {0.948459, -0.787425}, + {1.01296, -0.84335}, {1.11059, -0.922846}, {1.15768, -0.959828}, {1.10507, -0.915186}, + {1.00344, -0.831421}, {0.953133, -0.790722}, {1.00417, -0.833196}, {1.1063, -0.916922}, + {1.15903, -0.959721}, {1.11171, -0.920924}, {1.01359, -0.841467}, {0.964068, -0.802384}, + {1.07183, -0.891375}, {1.15889, -0.961665}, {1.1579, -0.959214}, {1.06925, -0.885504}, + {0.981048, -0.813396}, {0.981203, -0.814497}, {1.06956, -0.887687}, {1.15808, -0.960244}, + {1.15877, -0.960442}, {1.07156, -0.889051}, {0.98419, -0.818312}, {0.984327, -0.819464}, + {1.05334, -0.875089}, {1.10617, -0.917553}, {1.0693, -0.886373}, {0.985548, -0.817425}, + {0.943012, -0.782968}, {0.985796, -0.818496}, {1.06949, -0.886967}, {1.10601, -0.916251}, + {1.05287, -0.87224}, {0.957248, -0.794249}, {0.910429, -0.756958}, {0.95766, -0.796621}, + {0.962589, -0.799601}, {1.00376, -0.833124}, {0.980864, -0.814135}, {0.943034, -0.782996}, + {0.943291, -0.782814}, {0.981453, -0.813129}, {1.0043, -0.830548}, {0.962815, -0.795639}, + {0.868239, -0.718263}, {0.788907, -0.654425}, {0.788959, -0.655994}, {0.86827, -0.722043}, + {0.878994, -0.730451}, {0.953312, -0.791624}, {0.981566, -0.814761}, {0.986549, -0.818046}, + {0.982076, -0.812596}, {0.954135, -0.787486}, {0.879849, -0.725061}, {0.763971, -0.630051}, + {0.652763, -0.540298}, {0.606386, -0.504236}, {0.652418, -0.543523}, {0.763313, -0.635249}, + {0.879898, -0.730735}, {1.00539, -0.834003}, {1.07124, -0.887672}, {1.07125, -0.885982}, + {1.00547, -0.829434}, {0.88019, -0.724606}, {0.71756, -0.590964}, {0.56108, -0.464272}, + {0.464024, -0.387048}, {0.463816, -0.388608}, {0.560587, -0.468581}, {0.717063, -0.596964}, + {0.964907, -0.79963}, {1.10884, -0.918112}, {1.16062, -0.96002}, {1.1078, -0.914798}, + {0.963434, -0.794295}, {0.763661, -0.629857}, {0.560587, -0.464871}, {0.409729, -0.343813}, + {0.354046, -0.300064}, {0.409886, -0.34602}, {0.56118, -0.469098}, {0.764854, -0.635378}, + {1.05615, -0.873364}, {1.16173, -0.96079}, {1.16047, -0.959515}, {1.05297, -0.870166}, + {0.866966, -0.716971}, {0.650974, -0.540976}, {0.462596, -0.389054}, {0.353371, -0.301627}, + {0.353899, -0.302122}, {0.464307, -0.390691}, {0.653947, -0.543888}, {0.870675, -0.720663}, + {1.07423, -0.887613}, {1.11286, -0.920772}, {1.07081, -0.886703}, {0.954616, -0.791248}, + {0.78542, -0.653081}, {0.603294, -0.505609}, {0.461777, -0.391636}, {0.408783, -0.348602}, + {0.463771, -0.391654}, {0.607272, -0.505968}, {0.790837, -0.65401}, {0.960022, -0.792498}, + {1.01446, -0.839305}, {1.01249, -0.839574}, {0.980906, -0.814407}, {0.905738, -0.752993}, + {0.784415, -0.654267}, {0.64917, -0.544714}, {0.558676, -0.471242}, {0.559894, -0.471059}, + {0.652772, -0.544183}, {0.789988, -0.653468}, {0.912063, -0.752107}, {0.986013, -0.813704}, + {0.947943, -0.786696}, {0.961747, -0.799577}, {0.98013, -0.8149}, {0.952881, -0.792257}, + {0.864487, -0.719835}, {0.761105, -0.635464}, {0.715723, -0.598056}, {0.763315, -0.635511}, + {0.868656, -0.719369}, {0.958234, -0.790858}, {0.985232, -0.812944}, {0.964889, -0.798132}, + {0.986425, -0.821971}, {1.06872, -0.888871}, {1.10427, -0.916299}, {1.05044, -0.870663}, + {0.954341, -0.791611}, {0.907489, -0.753982}, {0.955531, -0.794102}, {1.05292, -0.873803}, + {1.10779, -0.918069}, {1.07234, -0.888798}, {0.988752, -0.82125}, {0.945192, -0.787184}, + {1.07002, -0.890928}, {1.15705, -0.960808}, {1.15661, -0.958449}, {1.06867, -0.885218}, + {0.981026, -0.813747}, {0.981551, -0.815461}, {1.07025, -0.889167}, {1.15912, -0.962105}, + {1.15997, -0.962456}, {1.07242, -0.890862}, {0.984159, -0.819515}, {0.983228, -0.819809}, + {1.10577, -0.919416}, {1.15678, -0.959814}, {1.10845, -0.918813}, {1.01016, -0.837834}, + {0.961188, -0.798356}, {1.01121, -0.840304}, {1.11041, -0.922003}, {1.15925, -0.961781}, + {1.10809, -0.919633}, {1.00705, -0.837288}, {0.956173, -0.796589}, {1.00563, -0.83779}, + {1.0509, -0.873278}, {1.06811, -0.886659}, {1.0097, -0.838137}, {0.945196, -0.785}, + {0.945791, -0.785383}, {1.01137, -0.838798}, {1.07049, -0.886646}, {1.0534, -0.872207}, + {0.964632, -0.799738}, {0.881846, -0.732942}, {0.881132, -0.733616}, {0.962726, -0.801192}, + {0.953683, -0.792715}, {0.980082, -0.81419}, {0.9608, -0.79781}, {0.945936, -0.784524}, + {0.96199, -0.796106}, {0.982244, -0.811106}, {0.956339, -0.788946}, {0.868699, -0.71735}, + {0.765224, -0.633848}, {0.718571, -0.597372}, {0.763758, -0.635912}, {0.866261, -0.720794}, + {0.906923, -0.753674}, {0.981456, -0.814563}, {1.01186, -0.838282}, {1.01232, -0.836397}, + {0.982821, -0.809633}, {0.909065, -0.747462}, {0.78849, -0.648623}, {0.653285, -0.539379}, + {0.561887, -0.466699}, {0.56111, -0.467981}, {0.651279, -0.5431}, {0.786027, -0.654231}, + {0.95633, -0.79314}, {1.07174, -0.887175}, {1.11221, -0.918584}, {1.07195, -0.882926}, + {0.957053, -0.786518}, {0.788597, -0.647974}, {0.606688, -0.500441}, {0.464746, -0.38669}, + {0.410517, -0.344417}, {0.463565, -0.388925}, {0.604997, -0.50505}, {0.787204, -0.654458}, + {1.0551, -0.872699}, {1.16145, -0.959325}, {1.16097, -0.957426}, {1.05411, -0.867994}, + {0.868563, -0.715153}, {0.652854, -0.53954}, {0.46448, -0.387728}, {0.354829, -0.300232}, + {0.354491, -0.300814}, {0.463852, -0.389801}, {0.652697, -0.543512}, {0.869216, -0.720441}, + {1.11001, -0.916672}, {1.16138, -0.95894}, {1.10807, -0.914757}, {0.963429, -0.795738}, + {0.763685, -0.63268}, {0.560783, -0.468402}, {0.410032, -0.347146}, {0.354347, -0.302496}, + {0.410228, -0.347288}, {0.56176, -0.469298}, {0.765819, -0.634738}, {0.966148, -0.798407}, + {1.07317, -0.886755}, {1.07168, -0.886629}, {1.00467, -0.832134}, {0.87882, -0.729311}, + {0.716169, -0.597018}, {0.5599, -0.470567}, {0.463163, -0.392404}, {0.463558, -0.392165}, + {0.561382, -0.470049}, {0.719143, -0.596626}, {0.88289, -0.729284}, {1.00835, -0.832346}, + {0.988023, -0.818802}, {0.981672, -0.815011}, {0.952472, -0.791468}, {0.877698, -0.730165}, + {0.761775, -0.635523}, {0.650706, -0.545214}, {0.604801, -0.507751}, {0.65197, -0.545178}, + {0.764633, -0.635155}, {0.882053, -0.729255}, {0.957158, -0.790261}, {0.984764, -0.814137}, + {0.944125, -0.785381}, {0.980634, -0.816017}, {1.00247, -0.833302}, {0.960503, -0.797932}, + {0.865661, -0.719861}, {0.786294, -0.655153}, {0.786934, -0.655821}, {0.867738, -0.72121}, + {0.964114, -0.798652}, {1.00697, -0.832725}, {0.9845, -0.814772}, {0.945659, -0.784752}, + {0.980221, -0.81691}, {1.04993, -0.87276}, {1.04997, -0.870966}, {0.980039, -0.812739}, + {0.910231, -0.756159}, {0.910895, -0.758138}, {0.98214, -0.817416}, {1.05352, -0.87563}, + {1.05427, -0.875431}, {0.98389, -0.817603}, {0.912607, -0.760119}, {0.911159, -0.760133}, + {1.0503, -0.874212}, {1.10335, -0.916192}, {1.06756, -0.885521}, {0.985103, -0.817782}, + {0.943571, -0.784771}, {0.987028, -0.821611}, {1.07126, -0.891067}, {1.10818, -0.920883}, + {1.05497, -0.876791}, {0.958468, -0.797971}, {0.910028, -0.759186}, {0.955566, -0.797109}, + {1.04949, -0.872734}, {1.06685, -0.886073}, {1.00903, -0.838073}, {0.945229, -0.785811}, + {0.946373, -0.787131}, {1.01229, -0.841303}, {1.07162, -0.889596}, {1.05463, -0.87523}, + {0.965725, -0.80243}, {0.882442, -0.73492}, {0.880931, -0.734627}, {0.961731, -0.801255}, + {0.978262, -0.813467}, {0.983539, -0.81753}, {0.944576, -0.785031}, {0.921942, -0.765645}, + {0.946845, -0.78501}, {0.987626, -0.817416}, {0.983227, -0.813215}, {0.909684, -0.753089}, + {0.81169, -0.673669}, {0.765633, -0.6372}, {0.808995, -0.673899}, {0.905172, -0.753411}, + {0.908055, -0.754985}, {0.942135, -0.782595}, {0.945962, -0.784348}, {0.946988, -0.78301}, + {0.945025, -0.779221}, {0.912205, -0.751041}, {0.828845, -0.682783}, {0.717196, -0.592456}, + {0.635566, -0.527221}, {0.634306, -0.527791}, {0.713839, -0.594297}, {0.824462, -0.685961}, + {0.909756, -0.755058}, {0.986802, -0.817238}, {1.01281, -0.836246}, {0.988323, -0.813324}, + {0.912603, -0.749164}, {0.791099, -0.649177}, {0.648844, -0.533821}, {0.531268, -0.439455}, + {0.48471, -0.403144}, {0.529169, -0.440952}, {0.64542, -0.537299}, {0.787479, -0.654588}, + {0.982643, -0.813071}, {1.07224, -0.885117}, {1.07268, -0.883124}, {0.984031, -0.80821}, + {0.82931, -0.68049}, {0.649052, -0.53364}, {0.491369, -0.406426}, {0.399245, -0.332894}, + {0.398288, -0.333291}, {0.488944, -0.408094}, {0.646189, -0.537301}, {0.826964, -0.685747}, + {1.05484, -0.870787}, {1.10917, -0.91438}, {1.05505, -0.868682}, {0.909755, -0.748716}, + {0.717298, -0.591352}, {0.531557, -0.440698}, {0.399524, -0.334343}, {0.351496, -0.296061}, + {0.398033, -0.334165}, {0.529442, -0.441226}, {0.715595, -0.593279}, {0.908919, -0.751527}, + {1.05492, -0.870641}, {1.05462, -0.870578}, {0.964648, -0.796749}, {0.810731, -0.670714}, + {0.635379, -0.527789}, {0.485236, -0.406039}, {0.398919, -0.336311}, {0.398286, -0.335741}, + {0.483848, -0.404716}, {0.634293, -0.526529}, {0.810581, -0.670086}, {0.965132, -0.79669}, + {0.983137, -0.813267}, {0.956533, -0.792503}, {0.880212, -0.730296}, {0.764332, -0.635357}, + {0.63446, -0.529235}, {0.530208, -0.44443}, {0.489803, -0.411621}, {0.529553, -0.44349}, + {0.633996, -0.527433}, {0.764852, -0.633001}, {0.881578, -0.728017}, {0.957708, -0.791087}, + {0.911065, -0.756731}, {0.90759, -0.754743}, {0.878865, -0.730867}, {0.808498, -0.672442}, + {0.714952, -0.595549}, {0.646991, -0.540274}, {0.646913, -0.540487}, {0.71513, -0.595704}, + {0.809667, -0.671647}, {0.881103, -0.72891}, {0.909906, -0.752574}, {0.912056, -0.755782}, + {0.910096, -0.758283}, {0.953976, -0.794023}, {0.960955, -0.79835}, {0.906061, -0.752102}, + {0.826491, -0.686809}, {0.789153, -0.657105}, {0.827152, -0.688951}, {0.907821, -0.754678}, + {0.963956, -0.799534}, {0.957414, -0.793498}, {0.912436, -0.757324}, {0.886661, -0.737845}, + {0.905238, -0.754449}, {0.9598, -0.798109}, {0.953186, -0.791836}, {0.909625, -0.756445}, + {0.886077, -0.738438}, {0.911681, -0.76052}, {0.95712, -0.797639}, {0.964931, -0.802832}, + {0.910193, -0.756825}, {0.830028, -0.691083}, {0.791203, -0.660238}, {0.826951, -0.690435}, + {0.959333, -0.798864}, {1.00072, -0.832245}, {0.979131, -0.814474}, {0.942911, -0.785392}, + {0.94444, -0.787394}, {0.983443, -0.819466}, {1.00692, -0.837934}, {0.96591, -0.803247}, + {0.871256, -0.725178}, {0.79087, -0.659725}, {0.788973, -0.659029}, {0.866207, -0.722797}, + {0.951502, -0.792164}, {0.978073, -0.81405}, {0.959796, -0.799062}, {0.946133, -0.787637}, + {0.96307, -0.800928}, {0.983832, -0.817045}, {0.958292, -0.79525}, {0.870968, -0.723293}, + {0.767495, -0.63878}, {0.720162, -0.600778}, {0.763979, -0.637559}, {0.864966, -0.720942}, + {0.907024, -0.754961}, {0.941191, -0.78304}, {0.945516, -0.78567}, {0.94707, -0.785292}, + {0.945425, -0.782215}, {0.912778, -0.75433}, {0.829649, -0.685964}, {0.718325, -0.595226}, + {0.636876, -0.529364}, {0.635356, -0.529169}, {0.714159, -0.594905}, {0.82393, -0.686022}, + {0.883695, -0.734345}, {0.943046, -0.78228}, {0.962661, -0.796255}, {0.945518, -0.779638}, + {0.887909, -0.730577}, {0.78874, -0.648806}, {0.666353, -0.549122}, {0.56184, -0.464538}, + {0.519736, -0.431134}, {0.559555, -0.464971}, {0.662326, -0.55068}, {0.783983, -0.651881}, + {0.910466, -0.754149}, {0.982984, -0.812088}, {0.984028, -0.810411}, {0.913295, -0.750132}, + {0.789162, -0.647354}, {0.644338, -0.528999}, {0.51765, -0.426214}, {0.44362, -0.36659}, + {0.442638, -0.366719}, {0.514944, -0.427134}, {0.640588, -0.531597}, {0.785351, -0.6515}, + {0.956983, -0.790247}, {1.00705, -0.829835}, {0.958737, -0.788403}, {0.830394, -0.681967}, + {0.667196, -0.54804}, {0.51823, -0.426587}, {0.417992, -0.345236}, {0.382564, -0.316793}, + {0.416077, -0.344799}, {0.514972, -0.426665}, {0.663527, -0.549448}, {0.827298, -0.684327}, + {0.964904, -0.795846}, {0.965713, -0.796009}, {0.87102, -0.71776}, {0.718984, -0.592754}, + {0.563014, -0.464941}, {0.444786, -0.368332}, {0.383195, -0.318124}, {0.382071, -0.317315}, + {0.441832, -0.36636}, {0.55925, -0.462805}, {0.715535, -0.591263}, {0.868694, -0.717077}, + {0.909407, -0.751137}, {0.870192, -0.719561}, {0.766991, -0.634981}, {0.637511, -0.528456}, + {0.521302, -0.43282}, {0.444302, -0.369591}, {0.417088, -0.347316}, {0.442097, -0.367674}, + {0.517827, -0.429316}, {0.634041, -0.524121}, {0.764456, -0.630976}, {0.868927, -0.717143}, + {0.828542, -0.686918}, {0.789339, -0.655616}, {0.719679, -0.598187}, {0.636449, -0.529015}, + {0.561686, -0.467087}, {0.516942, -0.430452}, {0.515986, -0.429929}, {0.559338, -0.465207}, + {0.633862, -0.525378}, {0.71781, -0.593316}, {0.788424, -0.651268}, {0.828303, -0.685161}, + {0.789856, -0.65752}, {0.78791, -0.656065}, {0.764306, -0.635606}, {0.716127, -0.594935}, + {0.665021, -0.552867}, {0.642661, -0.535285}, {0.664276, -0.553589}, {0.715337, -0.594958}, + {0.764184, -0.633695}, {0.78849, -0.652733}, {0.79048, -0.654887}, {0.787598, -0.65419}, + {0.826413, -0.689125}, {0.866134, -0.720901}, {0.866302, -0.719642}, {0.8266, -0.686494}, + {0.786849, -0.654679}, {0.787158, -0.656254}, {0.827716, -0.689988}, {0.868472, -0.722502}, + {0.869048, -0.721551}, {0.829017, -0.688201}, {0.788291, -0.655726}, {0.787238, -0.65636}, + {0.806404, -0.672542}, {0.876255, -0.729902}, {0.905549, -0.754561}, {0.909824, -0.759239}, + {0.911367, -0.761343}, {0.909808, -0.759645}, {0.88218, -0.735273}, {0.812453, -0.676096}, + {0.719265, -0.598635}, {0.650973, -0.542973}, {0.649308, -0.542686}, {0.714759, -0.597186}, + {0.875528, -0.730116}, {0.949787, -0.791812}, {0.979432, -0.816962}, {0.986245, -0.822968}, + {0.983221, -0.820008}, {0.956268, -0.79652}, {0.882883, -0.734657}, {0.767908, -0.639209}, + {0.657054, -0.548106}, {0.609749, -0.509898}, {0.653488, -0.546611}, {0.76165, -0.636052}, + {0.903865, -0.7535}, {0.978521, -0.815542}, {1.01024, -0.841477}, {1.01218, -0.842025}, + {0.98366, -0.817067}, {0.91053, -0.755649}, {0.79074, -0.656558}, {0.656537, -0.546318}, + {0.565677, -0.472109}, {0.564151, -0.471486}, {0.652219, -0.544642}, {0.784485, -0.654334}, + {0.907658, -0.755482}, {0.984871, -0.818772}, {1.0117, -0.839332}, {0.987933, -0.817759}, + {0.912552, -0.754259}, {0.791414, -0.654168}, {0.650012, -0.538202}, {0.53361, -0.443007}, + {0.487743, -0.40577}, {0.531668, -0.442571}, {0.646238, -0.537976}, {0.78641, -0.654744}, + {0.90948, -0.754644}, {0.982071, -0.8131}, {0.983366, -0.811973}, {0.912734, -0.751998}, + {0.788623, -0.64916}, {0.644089, -0.530499}, {0.518145, -0.427377}, {0.445015, -0.367508}, + {0.444493, -0.367452}, {0.516436, -0.427656}, {0.641086, -0.531906}, {0.784847, -0.65175}, + {0.908595, -0.751269}, {0.95569, -0.788474}, {0.910793, -0.749804}, {0.792214, -0.651229}, + {0.644767, -0.529732}, {0.51472, -0.422831}, {0.430638, -0.353559}, {0.402011, -0.329908}, + {0.42975, -0.353253}, {0.512703, -0.422949}, {0.641682, -0.530892}, {0.788888, -0.653118}, + {0.881405, -0.727209}, {0.882682, -0.727454}, {0.791455, -0.651686}, {0.651413, -0.535995}, + {0.519546, -0.427026}, {0.431407, -0.35383}, {0.391367, -0.320231}, {0.390692, -0.319587}, + {0.42934, -0.352217}, {0.516202, -0.425168}, {0.647468, -0.534536}, {0.788186, -0.65087}, + {0.811606, -0.669797}, {0.767676, -0.633926}, {0.657384, -0.543041}, {0.535325, -0.44185}, + {0.446838, -0.367812}, {0.403221, -0.330752}, {0.391118, -0.320245}, {0.401141, -0.328762}, + {0.443013, -0.364033}, {0.53056, -0.436964}, {0.652954, -0.538357}, {0.764991, -0.63103}, + {0.718148, -0.594358}, {0.656668, -0.544524}, {0.566534, -0.469907}, {0.489575, -0.405152}, + {0.446437, -0.368157}, {0.431026, -0.354731}, {0.429791, -0.353818}, {0.443038, -0.365117}, + {0.484873, -0.399705}, {0.561764, -0.462965}, {0.653134, -0.538495}, {0.716845, -0.591951}, + {0.64991, -0.540158}, {0.609517, -0.50724}, {0.565223, -0.469676}, {0.533674, -0.442191}, + {0.518439, -0.42892}, {0.513936, -0.425524}, {0.516595, -0.428252}, {0.530587, -0.439604}, + {0.561827, -0.464433}, {0.606721, -0.500747}, {0.648357, -0.535584}, {0.665914, -0.551716}, + {0.648763, -0.540834}, {0.653791, -0.54453}, {0.653749, -0.543279}, {0.64854, -0.538368}, + {0.643221, -0.534567}, {0.642987, -0.535396}, {0.648008, -0.539679}, {0.653278, -0.542816}, + {0.653614, -0.541465}, {0.64882, -0.536941}, {0.643841, -0.533909}, {0.643781, -0.535683}, + {0.714768, -0.596264}, {0.762447, -0.634828}, {0.786361, -0.654068}, {0.788876, -0.656779}, + {0.787038, -0.65659}, {0.790289, -0.659955}, {0.788985, -0.658021}, {0.765755, -0.636952}, + {0.717875, -0.595989}, {0.666946, -0.55406}, {0.644335, -0.536754}, {0.66505, -0.555112}, + {0.760297, -0.634533}, {0.875585, -0.730602}, {0.952714, -0.795538}, {0.980608, -0.819361}, + {0.956432, -0.798904}, {0.881715, -0.735447}, {0.766887, -0.638576}, {0.638077, -0.53103}, + {0.53475, -0.445826}, {0.494181, -0.413213}, {0.531879, -0.445148}, {0.632794, -0.528872}, + {0.875148, -0.730244}, {1.00053, -0.83478}, {1.06809, -0.891045}, {1.0702, -0.892241}, + {1.00602, -0.837831}, {0.881951, -0.733898}, {0.720757, -0.600013}, {0.565916, -0.472267}, + {0.469701, -0.393385}, {0.468381, -0.392782}, {0.562022, -0.470403}, {0.714748, -0.596952}, + {0.951724, -0.792978}, {1.0675, -0.888794}, {1.10965, -0.922755}, {1.07095, -0.889341}, + {0.956984, -0.794048}, {0.789425, -0.655373}, {0.609099, -0.506893}, {0.469117, -0.39185}, + {0.415847, -0.348108}, {0.467634, -0.390967}, {0.60581, -0.505486}, {0.784489, -0.653943}, + {0.979257, -0.813659}, {1.06928, -0.887139}, {1.0707, -0.886703}, {0.982609, -0.812689}, + {0.828142, -0.684875}, {0.648738, -0.537238}, {0.492989, -0.409118}, {0.4031, -0.334897}, + {0.403108, -0.334733}, {0.492489, -0.40889}, {0.646821, -0.537445}, {0.824799, -0.685681}, + {0.955085, -0.790936}, {1.0053, -0.831093}, {0.957134, -0.790022}, {0.828631, -0.683406}, + {0.665481, -0.548845}, {0.517532, -0.426748}, {0.419298, -0.345152}, {0.385841, -0.316888}, + {0.419992, -0.345177}, {0.51773, -0.427116}, {0.664126, -0.549783}, {0.826087, -0.684663}, + {0.880649, -0.727353}, {0.881829, -0.727543}, {0.790393, -0.651558}, {0.650103, -0.535468}, + {0.518382, -0.426105}, {0.431098, -0.352788}, {0.392378, -0.31945}, {0.392765, -0.319301}, + {0.43158, -0.352366}, {0.517685, -0.425494}, {0.64784, -0.534816}, {0.787744, -0.651055}, + {0.766142, -0.632214}, {0.721214, -0.595266}, {0.610714, -0.50388}, {0.494908, -0.407265}, + {0.42059, -0.343957}, {0.392784, -0.318851}, {0.388285, -0.313951}, {0.392534, -0.317921}, + {0.419281, -0.341768}, {0.492016, -0.403823}, {0.607014, -0.500108}, {0.718572, -0.592754}, + {0.637533, -0.52682}, {0.566598, -0.46887}, {0.470787, -0.389088}, {0.404848, -0.332564}, + {0.386843, -0.31513}, {0.392913, -0.318428}, {0.392354, -0.317861}, {0.384923, -0.312895}, + {0.401295, -0.32789}, {0.466159, -0.382542}, {0.562529, -0.462891}, {0.635899, -0.52438}, + {0.534317, -0.442955}, {0.470272, -0.390355}, {0.417093, -0.344997}, {0.404226, -0.332048}, + {0.420386, -0.34365}, {0.431316, -0.352355}, {0.418891, -0.342817}, {0.401172, -0.328919}, + {0.412834, -0.338725}, {0.465949, -0.38262}, {0.531553, -0.437516}, {0.562139, -0.464363}, + {0.493902, -0.410825}, {0.468791, -0.389727}, {0.468461, -0.387955}, {0.493162, -0.407106}, + {0.517902, -0.427462}, {0.517514, -0.427864}, {0.491941, -0.407082}, {0.466408, -0.385269}, + {0.466295, -0.383977}, {0.491793, -0.404673}, {0.517697, -0.427228}, {0.518528, -0.42989}, + {0.531833, -0.443231}, {0.562443, -0.46799}, {0.606581, -0.503922}, {0.647608, -0.538209}, + {0.664726, -0.553393}, {0.648354, -0.540326}, {0.607691, -0.505701}, {0.563439, -0.467066}, + {0.532447, -0.439851}, {0.518209, -0.428159}, {0.514734, -0.426852}, {0.51796, -0.431201}, + {0.63291, -0.527945}, {0.715274, -0.596042}, {0.785456, -0.654767}, {0.826009, -0.689563}, + {0.827315, -0.691394}, {0.788912, -0.658884}, {0.719733, -0.599566}, {0.637066, -0.529045}, + {0.563173, -0.467104}, {0.519211, -0.431518}, {0.518209, -0.432111}, {0.560322, -0.467836}, + {0.804534, -0.670781}, {0.958095, -0.799225}, {1.04923, -0.875744}, {1.05128, -0.877321}, + {0.963287, -0.803076}, {0.810688, -0.674956}, {0.636615, -0.529804}, {0.488106, -0.406971}, + {0.403106, -0.337317}, {0.402144, -0.336993}, {0.485081, -0.405626}, {0.631544, -0.526882}, + {0.958055, -0.79803}, {1.10253, -0.918344}, {1.15661, -0.962985}, {1.10614, -0.920206}, + {0.963386, -0.80095}, {0.765061, -0.636437}, {0.563985, -0.470531}, {0.415273, -0.348206}, + {0.360323, -0.302975}, {0.414176, -0.347174}, {0.561166, -0.46844}, {0.760355, -0.633544}, + {1.04883, -0.871692}, {1.15629, -0.960328}, {1.15785, -0.960684}, {1.05242, -0.872646}, + {0.867692, -0.719841}, {0.653288, -0.543268}, {0.46733, -0.390256}, {0.360202, -0.301767}, + {0.360488, -0.301459}, {0.467436, -0.389396}, {0.651834, -0.542053}, {0.864392, -0.718579}, + {1.05051, -0.870624}, {1.10574, -0.91547}, {1.05255, -0.870705}, {0.907469, -0.750694}, + {0.71528, -0.592387}, {0.53113, -0.44062}, {0.402088, -0.333687}, {0.356814, -0.295451}, + {0.403793, -0.333737}, {0.53271, -0.440658}, {0.71494, -0.592355}, {0.905299, -0.750602}, + {0.962527, -0.795674}, {0.963539, -0.795964}, {0.868658, -0.717384}, {0.716134, -0.591464}, + {0.560512, -0.462618}, {0.444341, -0.365573}, {0.385921, -0.315861}, {0.38723, -0.316126}, + {0.447067, -0.366095}, {0.562225, -0.46285}, {0.715526, -0.591141}, {0.866741, -0.716797}, + {0.810385, -0.668894}, {0.766125, -0.632463}, {0.655207, -0.540786}, {0.532737, -0.438747}, + {0.444947, -0.364279}, {0.403385, -0.327604}, {0.393803, -0.318229}, {0.405352, -0.328076}, + {0.446911, -0.364238}, {0.532732, -0.437343}, {0.653233, -0.538395}, {0.764153, -0.630586}, + {0.636884, -0.525658}, {0.56563, -0.467236}, {0.469448, -0.386983}, {0.403469, -0.330156}, + {0.386116, -0.312813}, {0.393426, -0.316676}, {0.394081, -0.316956}, {0.387161, -0.312747}, + {0.403133, -0.328102}, {0.467077, -0.382661}, {0.562605, -0.462635}, {0.635504, -0.52367}, + {0.488623, -0.403785}, {0.416667, -0.34438}, {0.361438, -0.296958}, {0.356958, -0.290352}, + {0.386262, -0.312133}, {0.404112, -0.326145}, {0.386502, -0.312506}, {0.356316, -0.289069}, + {0.359029, -0.292435}, {0.413136, -0.337829}, {0.485974, -0.398889}, {0.52064, -0.428934}, + {0.403403, -0.333963}, {0.360809, -0.298182}, {0.360349, -0.295854}, {0.402612, -0.328872}, + {0.445255, -0.363448}, {0.445368, -0.364105}, {0.402454, -0.329396}, {0.359032, -0.293563}, + {0.35822, -0.292418}, {0.400701, -0.327542}, {0.444074, -0.36458}, {0.445233, -0.367395}, + {0.402024, -0.333495}, {0.413791, -0.342564}, {0.466418, -0.385211}, {0.531149, -0.438704}, + {0.560721, -0.463913}, {0.531951, -0.440594}, {0.467203, -0.386301}, {0.413744, -0.340377}, + {0.401158, -0.328617}, {0.418162, -0.342867}, {0.430282, -0.354513}, {0.41901, -0.346944}, + {0.484747, -0.403068}, {0.560524, -0.465807}, {0.650941, -0.541315}, {0.714135, -0.594855}, + {0.715245, -0.596488}, {0.653683, -0.544691}, {0.56358, -0.46785}, {0.487006, -0.402145}, + {0.444771, -0.366269}, {0.430556, -0.355326}, {0.430243, -0.356659}, {0.443622, -0.368824}, + {0.631331, -0.52601}, {0.760034, -0.633521}, {0.864246, -0.721319}, {0.905595, -0.756611}, + {0.867511, -0.724622}, {0.765155, -0.63787}, {0.636401, -0.528868}, {0.521239, -0.43214}, + {0.44557, -0.369658}, {0.419224, -0.348898}, {0.443767, -0.370103}, {0.51758, -0.431538}, + {0.902427, -0.750354}, {1.0479, -0.872403}, {1.10353, -0.919235}, {1.05113, -0.875217}, + {0.907076, -0.754595}, {0.715588, -0.595226}, {0.531267, -0.4429}, {0.401164, -0.335981}, + {0.354514, -0.29758}, {0.400529, -0.3349}, {0.529216, -0.440394}, {0.71172, -0.591331}, + {1.04799, -0.870268}, {1.1553, -0.959763}, {1.15687, -0.960888}, {1.05157, -0.873127}, + {0.86693, -0.720155}, {0.652405, -0.543334}, {0.466182, -0.390255}, {0.358867, -0.301778}, + {0.359218, -0.301236}, {0.466438, -0.388562}, {0.651091, -0.540517}, {0.863696, -0.716755}, + {1.10337, -0.914346}, {1.15682, -0.958479}, {1.10563, -0.915858}, {0.962001, -0.79721}, + {0.762856, -0.63331}, {0.561523, -0.46775}, {0.41347, -0.345587}, {0.359925, -0.300571}, + {0.41527, -0.345087}, {0.563153, -0.466513}, {0.762393, -0.631297}, {0.959567, -0.794961}, + {1.05093, -0.869067}, {1.05202, -0.869833}, {0.962698, -0.796173}, {0.808564, -0.669304}, + {0.633566, -0.525067}, {0.48557, -0.40248}, {0.402562, -0.33291}, {0.404139, -0.33315}, + {0.488902, -0.402767}, {0.635809, -0.524588}, {0.808142, -0.667819}, {0.960699, -0.794449}, + {0.907461, -0.749272}, {0.868488, -0.717353}, {0.764704, -0.631891}, {0.63457, -0.52412}, + {0.519165, -0.427631}, {0.444969, -0.364663}, {0.421215, -0.343753}, {0.448056, -0.365789}, + {0.522742, -0.428483}, {0.63591, -0.52337}, {0.763448, -0.629741}, {0.866741, -0.715477}, + {0.716895, -0.591475}, {0.654918, -0.540733}, {0.563903, -0.465064}, {0.486687, -0.399506}, + {0.444944, -0.362568}, {0.432395, -0.350326}, {0.433953, -0.351298}, {0.448218, -0.364282}, + {0.488829, -0.399619}, {0.563356, -0.462603}, {0.652812, -0.5373}, {0.715764, -0.589874}, + {0.533085, -0.439668}, {0.468573, -0.386357}, {0.414961, -0.340431}, {0.40239, -0.327359}, + {0.419967, -0.339572}, {0.43294, -0.349558}, {0.422056, -0.341476}, {0.404494, -0.328566}, + {0.415001, -0.338524}, {0.466539, -0.381848}, {0.531036, -0.435857}, {0.561168, -0.461839}, + {0.402562, -0.331819}, {0.35989, -0.295816}, {0.359447, -0.293397}, {0.402073, -0.326571}, + {0.445463, -0.361607}, {0.446387, -0.362917}, {0.403877, -0.328788}, {0.360206, -0.293206}, + {0.35868, -0.291907}, {0.400427, -0.3266}, {0.44338, -0.363148}, {0.444425, -0.365557}, + {0.354795, -0.292411}, {0.358461, -0.294534}, {0.413036, -0.338535}, {0.485775, -0.398405}, + {0.519963, -0.427309}, {0.487338, -0.400927}, {0.414766, -0.340547}, {0.358915, -0.29314}, + {0.353852, -0.287985}, {0.38281, -0.312243}, {0.400808, -0.328623}, {0.38392, -0.316159}, + {0.399797, -0.330293}, {0.464635, -0.383794}, {0.560507, -0.46371}, {0.633218, -0.525109}, + {0.63433, -0.526812}, {0.563097, -0.467156}, {0.467112, -0.385647}, {0.40105, -0.328851}, + {0.38306, -0.313193}, {0.389456, -0.319348}, {0.389569, -0.32094}, {0.38292, -0.316338}, + {0.528231, -0.438023}, {0.649458, -0.539454}, {0.760684, -0.633373}, {0.807088, -0.673172}, + {0.763381, -0.636653}, {0.653433, -0.543507}, {0.531722, -0.440136}, {0.443716, -0.365774}, + {0.400837, -0.330437}, {0.389558, -0.322152}, {0.400043, -0.331604}, {0.441644, -0.366167}, + {0.711186, -0.591151}, {0.862936, -0.718641}, {0.95913, -0.800092}, {0.960843, -0.801868}, + {0.867175, -0.72286}, {0.715938, -0.595418}, {0.560808, -0.465502}, {0.443789, -0.368554}, + {0.383526, -0.319373}, {0.383013, -0.319325}, {0.44195, -0.36779}, {0.557256, -0.462962}, + {0.977648, -0.810535}, {1.06704, -0.886339}, {1.06819, -0.887951}, {0.980195, -0.814622}, + {0.825755, -0.686283}, {0.645961, -0.53792}, {0.489583, -0.409603}, {0.399407, -0.335432}, + {0.399886, -0.334684}, {0.490293, -0.407243}, {0.645521, -0.533948}, {0.823687, -0.681448}, + {1.06705, -0.883747}, {1.10838, -0.918884}, {1.06907, -0.886589}, {0.95443, -0.791821}, + {0.786003, -0.653136}, {0.605002, -0.504557}, {0.465166, -0.389591}, {0.413125, -0.345878}, + {0.466744, -0.38827}, {0.606417, -0.501713}, {0.785557, -0.648974}, {0.952244, -0.787455}, + {1.06833, -0.883635}, {1.06952, -0.885097}, {1.00419, -0.831485}, {0.878647, -0.728307}, + {0.716209, -0.594817}, {0.561322, -0.46725}, {0.466711, -0.388645}, {0.468032, -0.388457}, + {0.564014, -0.466269}, {0.717716, -0.592374}, {0.87765, -0.724591}, {1.00188, -0.827957}, + {0.981157, -0.810639}, {0.956112, -0.790469}, {0.879911, -0.728015}, {0.763366, -0.631984}, + {0.633787, -0.524711}, {0.531582, -0.439568}, {0.493744, -0.407396}, {0.534328, -0.440214}, + {0.636773, -0.524598}, {0.764022, -0.629912}, {0.878061, -0.724537}, {0.954022, -0.78775}, + {0.828059, -0.683667}, {0.789068, -0.652029}, {0.71852, -0.593719}, {0.634587, -0.523508}, + {0.560808, -0.4612}, {0.51873, -0.425334}, {0.520357, -0.426373}, {0.564205, -0.463048}, + {0.636747, -0.523687}, {0.717815, -0.59119}, {0.786736, -0.648461}, {0.826821, -0.681998}, + {0.649228, -0.535679}, {0.608408, -0.50211}, {0.563277, -0.463787}, {0.531714, -0.435906}, + {0.518103, -0.423142}, {0.516232, -0.421199}, {0.520815, -0.425671}, {0.534699, -0.438152}, + {0.564022, -0.463018}, {0.606713, -0.49853}, {0.647261, -0.532381}, {0.66494, -0.547781}, + {0.492575, -0.405916}, {0.467289, -0.384442}, {0.4668, -0.382423}, {0.492099, -0.401766}, + {0.518402, -0.422978}, {0.519799, -0.424699}, {0.495055, -0.405101}, {0.468805, -0.383745}, + {0.466945, -0.38203}, {0.490834, -0.401783}, {0.516067, -0.423401}, {0.517019, -0.425414}, + {0.400898, -0.329801}, {0.413008, -0.338882}, {0.465946, -0.381611}, {0.531245, -0.435391}, + {0.561639, -0.461154}, {0.533497, -0.43849}, {0.468642, -0.384621}, {0.414223, -0.338637}, + {0.400306, -0.326367}, {0.416338, -0.339949}, {0.428254, -0.351089}, {0.417374, -0.343294}, + {0.399594, -0.328789}, {0.464844, -0.3824}, {0.560931, -0.462366}, {0.633766, -0.523808}, + {0.634952, -0.525598}, {0.563632, -0.466039}, {0.467259, -0.384538}, {0.400536, -0.327602}, + {0.38188, -0.311703}, {0.387949, -0.317652}, {0.388236, -0.319187}, {0.382124, -0.314681}, + {0.489021, -0.403565}, {0.604161, -0.499775}, {0.715285, -0.593659}, {0.762404, -0.634212}, + {0.717409, -0.596802}, {0.607089, -0.503536}, {0.491217, -0.405252}, {0.416324, -0.342052}, + {0.387747, -0.318786}, {0.382927, -0.315842}, {0.387675, -0.320242}, {0.415449, -0.34289}, + {0.644393, -0.53344}, {0.784142, -0.65123}, {0.876642, -0.730128}, {0.877801, -0.731866}, + {0.786903, -0.655285}, {0.647201, -0.537342}, {0.515433, -0.426767}, {0.427312, -0.35388}, + {0.387554, -0.321775}, {0.387507, -0.321928}, {0.426776, -0.353605}, {0.513737, -0.424754}, + {0.823246, -0.682551}, {0.951828, -0.791418}, {1.00178, -0.83441}, {0.953937, -0.794564}, + {0.82609, -0.687217}, {0.663244, -0.551235}, {0.514813, -0.428497}, {0.415598, -0.347231}, + {0.381434, -0.319109}, {0.415698, -0.346224}, {0.514178, -0.42597}, {0.661235, -0.547082}, + {0.984781, -0.815022}, {1.01025, -0.838096}, {0.985469, -0.818317}, {0.909201, -0.755153}, + {0.787025, -0.654425}, {0.644702, -0.537847}, {0.52825, -0.442488}, {0.483731, -0.405004}, + {0.530046, -0.440713}, {0.646902, -0.534216}, {0.788132, -0.649354}, {0.908919, -0.750002}, + {1.01037, -0.83604}, {1.01118, -0.837953}, {0.98149, -0.813885}, {0.906876, -0.752516}, + {0.785619, -0.652988}, {0.650901, -0.54244}, {0.561214, -0.468264}, {0.562294, -0.467636}, + {0.653163, -0.540255}, {0.787078, -0.64889}, {0.906451, -0.747133}, {0.979986, -0.80913}, + {0.98654, -0.816116}, {0.98265, -0.813695}, {0.954239, -0.790607}, {0.878967, -0.728661}, + {0.762746, -0.632876}, {0.652418, -0.54172}, {0.607499, -0.503995}, {0.654281, -0.541388}, + {0.764528, -0.631079}, {0.878747, -0.724656}, {0.952076, -0.785469}, {0.980567, -0.810022}, + {0.912, -0.754273}, {0.909769, -0.752995}, {0.880516, -0.728879}, {0.809042, -0.669481}, + {0.715417, -0.59168}, {0.648686, -0.536196}, {0.649766, -0.5368}, {0.717448, -0.592368}, + {0.809641, -0.668138}, {0.878603, -0.7249}, {0.906776, -0.748411}, {0.910568, -0.752242}, + {0.791359, -0.654261}, {0.789521, -0.65297}, {0.764816, -0.631996}, {0.715789, -0.590571}, + {0.665251, -0.548223}, {0.644496, -0.531108}, {0.66716, -0.5502}, {0.717495, -0.591957}, + {0.764264, -0.630378}, {0.786857, -0.648783}, {0.788898, -0.650666}, {0.78759, -0.650334}, + {0.649246, -0.536372}, {0.654126, -0.540102}, {0.653381, -0.538512}, {0.648056, -0.533315}, + {0.643744, -0.529732}, {0.644924, -0.531266}, {0.650434, -0.536217}, {0.654632, -0.539431}, + {0.653068, -0.537504}, {0.647008, -0.53224}, {0.642242, -0.528879}, {0.64337, -0.530862}, + {0.531406, -0.438506}, {0.562406, -0.463459}, {0.606572, -0.499316}, {0.647903, -0.533552}, + {0.665775, -0.548963}, {0.650015, -0.536311}, {0.60901, -0.501905}, {0.563297, -0.462993}, + {0.530511, -0.435111}, {0.515271, -0.422811}, {0.51211, -0.42136}, {0.516491, -0.426036}, + {0.484781, -0.39971}, {0.561479, -0.462725}, {0.652189, -0.538158}, {0.715419, -0.591481}, + {0.716504, -0.592977}, {0.654612, -0.541131}, {0.563513, -0.464147}, {0.48534, -0.398085}, + {0.441579, -0.361759}, {0.426772, -0.350591}, {0.427162, -0.352141}, {0.442105, -0.364881}, + {0.529563, -0.436884}, {0.651593, -0.538419}, {0.762769, -0.632033}, {0.808653, -0.671341}, + {0.764284, -0.634396}, {0.653533, -0.540988}, {0.530723, -0.437448}, {0.441427, -0.362931}, + {0.39759, -0.327517}, {0.38631, -0.319391}, {0.397944, -0.329317}, {0.441357, -0.364525}, + {0.646085, -0.533756}, {0.78606, -0.651431}, {0.878182, -0.729949}, {0.878711, -0.731235}, + {0.787196, -0.654323}, {0.646905, -0.536238}, {0.514485, -0.425654}, {0.425728, -0.352824}, + {0.385668, -0.320849}, {0.385984, -0.321259}, {0.426281, -0.353325}, {0.51451, -0.424868}, + {0.787758, -0.651546}, {0.906306, -0.752333}, {0.952243, -0.792423}, {0.906773, -0.754853}, + {0.788039, -0.655204}, {0.640298, -0.531913}, {0.509624, -0.424236}, {0.425075, -0.355511}, + {0.396859, -0.332418}, {0.426052, -0.354922}, {0.510788, -0.422509}, {0.640808, -0.528803}, + {0.908923, -0.752185}, {0.980113, -0.813766}, {0.980349, -0.815309}, {0.909182, -0.756104}, + {0.784639, -0.652386}, {0.639432, -0.532617}, {0.512855, -0.429188}, {0.439865, -0.369439}, + {0.440679, -0.368711}, {0.514694, -0.426899}, {0.641037, -0.528785}, {0.785194, -0.647739}, + {0.946748, -0.783834}, {0.946529, -0.785583}, {0.943249, -0.783444}, {0.908934, -0.755117}, + {0.824279, -0.685704}, {0.712322, -0.594182}, {0.631914, -0.527946}, {0.6331, -0.527236}, + {0.71521, -0.591859}, {0.827387, -0.681631}, {0.911075, -0.750006}, {0.944203, -0.779044}, + {0.947184, -0.784971}, {0.962878, -0.799005}, {0.982007, -0.814874}, {0.954503, -0.791986}, + {0.865704, -0.718843}, {0.762315, -0.63375}, {0.71698, -0.595846}, {0.763879, -0.63292}, + {0.867535, -0.716331}, {0.955232, -0.78739}, {0.981437, -0.809425}, {0.962042, -0.795239}, + {0.945708, -0.784084}, {0.983782, -0.815902}, {1.00546, -0.833419}, {0.962426, -0.797379}, + {0.866852, -0.718308}, {0.787475, -0.652825}, {0.78806, -0.653039}, {0.867819, -0.718022}, + {0.96225, -0.794931}, {1.00355, -0.828595}, {0.981294, -0.810865}, {0.944566, -0.781922}, + {0.913382, -0.757119}, {0.957874, -0.793793}, {0.963784, -0.797984}, {0.907436, -0.750829}, + {0.827225, -0.684541}, {0.790036, -0.654195}, {0.827928, -0.685662}, {0.907448, -0.750932}, + {0.961898, -0.795203}, {0.954595, -0.788875}, {0.910793, -0.753218}, {0.887621, -0.735102}, + {0.829546, -0.687252}, {0.869225, -0.719695}, {0.86813, -0.718096}, {0.827234, -0.684027}, + {0.787186, -0.651334}, {0.787726, -0.652348}, {0.828031, -0.685654}, {0.86761, -0.717612}, + {0.866847, -0.716087}, {0.826627, -0.68263}, {0.787418, -0.650923}, {0.788735, -0.653033}, + {0.716677, -0.593447}, {0.764484, -0.632507}, {0.787625, -0.651319}, {0.789417, -0.653108}, + {0.787396, -0.652028}, {0.790602, -0.654735}, {0.788655, -0.652241}, {0.764062, -0.63058}, + {0.714914, -0.589188}, {0.663901, -0.54742}, {0.642708, -0.531102}, {0.665509, -0.550974}, + {0.634351, -0.52513}, {0.717483, -0.593546}, {0.78737, -0.651674}, {0.827261, -0.685377}, + {0.827992, -0.686135}, {0.788917, -0.652818}, {0.718532, -0.592914}, {0.634171, -0.521962}, + {0.558888, -0.459903}, {0.51483, -0.424804}, {0.515329, -0.426581}, {0.559799, -0.46383}, + {0.633949, -0.524656}, {0.763686, -0.632159}, {0.867462, -0.719031}, {0.90766, -0.75297}, + {0.86835, -0.719775}, {0.76471, -0.632255}, {0.634321, -0.522887}, {0.517281, -0.426082}, + {0.440303, -0.363863}, {0.414205, -0.343879}, {0.440819, -0.366376}, {0.517661, -0.429242}, + {0.715567, -0.591908}, {0.867818, -0.718954}, {0.962938, -0.799185}, {0.962965, -0.799541}, + {0.867722, -0.719489}, {0.715028, -0.5916}, {0.558272, -0.461703}, {0.439627, -0.365042}, + {0.378583, -0.316396}, {0.379025, -0.317238}, {0.440662, -0.366905}, {0.559279, -0.463209}, + {0.828, -0.684575}, {0.95621, -0.79272}, {1.00468, -0.834531}, {0.955213, -0.793591}, + {0.826086, -0.685655}, {0.66216, -0.549645}, {0.512582, -0.427217}, {0.412397, -0.346391}, + {0.37819, -0.318824}, {0.413906, -0.34669}, {0.515009, -0.427307}, {0.664666, -0.549073}, + {0.911802, -0.753779}, {0.982307, -0.814784}, {0.98149, -0.815631}, {0.909447, -0.75593}, + {0.784343, -0.652083}, {0.638684, -0.532487}, {0.511624, -0.429351}, {0.438347, -0.369906}, + {0.439502, -0.369508}, {0.514655, -0.4281}, {0.642532, -0.530388}, {0.787852, -0.649535}, + {0.94453, -0.781219}, {0.962011, -0.79828}, {0.94307, -0.783709}, {0.884037, -0.734886}, + {0.783612, -0.652211}, {0.66033, -0.55168}, {0.555968, -0.466755}, {0.515608, -0.432876}, + {0.558439, -0.465275}, {0.664239, -0.548746}, {0.787542, -0.648241}, {0.886935, -0.730943}}; - std::vector> wfcr_another_spin_2 = { - {-0.000850659, 0.00221376}, {-0.000824667, 0.00233258}, {-0.00025408, 0.00206122}, {0.000478631, 0.00162836}, {0.000857409, 0.00122164}, {0.000690617, 0.000865747}, {0.000252532, 0.000500086}, {-1.97358e-05, 0.000150978}, {3.69117e-05, -3.74248e-06}, {0.000177712, 0.000233336}, {4.52095e-05, 0.00087035}, {-0.000415234, 0.00165232}, {5.94142e-05, 0.00267949}, {0.00105166, 0.00268062}, {0.00175924, 0.00262548}, {0.00167774, 0.00273249}, {0.000846263, 0.00287114}, {-0.0001803, 0.00269879}, {-0.000812061, 0.00205795}, {-0.000897011, 0.00122219}, {-0.000744479, 0.000720935}, {-0.000710161, 0.000892154}, {-0.000785981, 0.00158813}, {-0.00061953, 0.00231689}, {0.0014205, 0.00323172}, {0.00245194, 0.00326343}, {0.00254339, 0.00338285}, {0.00164597, 0.00365304}, {0.000352848, 0.00378036}, {-0.000612417, 0.00341444}, {-0.0009668, 0.00255828}, {-0.000968044, 0.00166251}, {-0.000992893, 0.00128342}, {-0.00105907, 0.00161763}, {-0.000796136, 0.00235437}, {0.000102526, 0.00297995}, {0.00151406, 0.00372578}, {0.00180885, 0.00385756}, {0.00133698, 0.00394148}, {0.000458959, 0.00402696}, {-0.000265637, 0.00393919}, {-0.00055238, 0.00347791}, {-0.000558492, 0.00269481}, {-0.000606658, 0.00195161}, {-0.000774242, 0.00167125}, {-0.000782284, 0.00200075}, {-0.000304306, 0.00269993}, {0.000621885, 0.00335765}, {-0.000255288, 0.00377188}, {-0.000578666, 0.00396859}, {-0.000801606, 0.00387581}, {-0.0007455, 0.00376145}, {-0.000428125, 0.00363368}, {-5.61308e-05, 0.00326119}, {0.000151935, 0.00250074}, {0.000143064, 0.00157857}, {4.12433e-05, 0.00100732}, {-7.37259e-06, 0.00118311}, {7.50117e-06, 0.00203634}, {-3.97326e-05, 0.00307286}, {-0.00262995, 0.00309697}, {-0.00275212, 0.00329879}, {-0.00201295, 0.00311649}, {-0.000892036, 0.00305148}, {9.92763e-05, 0.00304081}, {0.000765938, 0.00255441}, {0.00121087, 0.00127004}, {0.00152584, -0.000387784}, {0.00155589, -0.00144191}, {0.0010114, -0.00115733}, {-0.000172708, 0.000309335}, {-0.00160859, 0.00203217}, {-0.00361979, 0.00187857}, {-0.00293702, 0.0020949}, {-0.00142119, 0.00205592}, {-8.49879e-06, 0.00220368}, {0.000950259, 0.00208594}, {0.00175599, 0.000889985}, {0.00272285, -0.00149533}, {0.00356402, -0.00401823}, {0.00349632, -0.00515843}, {0.00200782, -0.00417881}, {-0.000474875, -0.00175451}, {-0.00272874, 0.000620624}, {-0.00238927, 0.000700564}, {-0.00121046, 0.00101474}, {7.99908e-05, 0.00125414}, {0.000848624, 0.00143781}, {0.00148146, 0.000696302}, {0.00268859, -0.0016751}, {0.00443425, -0.00512418}, {0.00565872, -0.00790653}, {0.00515424, -0.00837321}, {0.0027683, -0.00631599}, {-0.000267813, -0.00310631}, {-0.00226091, -0.000507626}, {-5.16118e-05, 0.000139308}, {0.00064619, 0.000615711}, {0.000801952, 0.00101293}, {0.000697033, 0.000835273}, {0.00136019, -0.000850858}, {0.00329127, -0.00418559}, {0.00564976, -0.00788611}, {0.00680305, -0.00997969}, {0.00576458, -0.00931341}, {0.00313569, -0.00645509}, {0.000621442, -0.00315179}, {-0.000426931, -0.000880551}, {0.00141297, 0.000348494}, {0.000943319, 0.000937462}, {-2.78181e-08, 0.00120799}, {-0.000355539, 0.000441123}, {0.000843933, -0.00190748}, {0.00334012, -0.00531184}, {0.00565542, -0.00821328}, {0.00629755, -0.00908454}, {0.00503133, -0.00758886}, {0.00300479, -0.00477347}, {0.00163284, -0.00214256}, {0.00134622, -0.000504425}, {0.00104173, 0.00101466}, {-0.000272058, 0.00155855}, {-0.00137594, 0.00151119}, {-0.00120731, 0.000368374}, {0.000486366, -0.00186183}, {0.00278219, -0.00436307}, {0.00429818, -0.00594823}, {0.0043304, -0.00589442}, {0.00333931, -0.00443482}, {0.00235727, -0.00247746}, {0.00194516, -0.000844953}, {0.00174431, 0.00024405}, {-0.000282354, 0.00169861}, {-0.00135553, 0.00204457}, {-0.00167071, 0.00175434}, {-0.000877419, 0.000760329}, {0.000573651, -0.000631027}, {0.00178955, -0.00186146}, {0.00217847, -0.00245592}, {0.00187432, -0.00229522}, {0.00146274, -0.00160787}, {0.00132002, -0.000723407}, {0.00124627, 0.000161286}, {0.000757131, 0.000994601}, {-0.000411389, 0.002483}, {0.000664762, 0.00256688}, {0.00147098, 0.00225153}, {0.00160136, 0.00163982}, {0.0011106, 0.00093562}, {0.000409698, 0.00034921}, {-0.000123655, 1.93428e-05}, {-0.000436175, -5.55788e-06}, {-0.000723632, 0.000259591}, {-0.00109888, 0.000762119}, {-0.00137175, 0.00140734}, {-0.00118961, 0.00204395}, {0.00181892, 0.00296487}, {0.00312759, 0.00289344}, {0.00315861, 0.0026423}, {0.00193264, 0.00233801}, {0.00025056, 0.00195006}, {-0.000993451, 0.00141113}, {-0.00149501, 0.000806377}, {-0.00159212, 0.000409872}, {-0.00170222, 0.000499137}, {-0.00176687, 0.0011095}, {-0.00129634, 0.00196539}, {2.67417e-05, 0.00266619}, {0.0034084, 0.00333155}, {0.0038427, 0.00313401}, {0.00269018, 0.00290156}, {0.000672062, 0.00269719}, {-0.00105739, 0.00236236}, {-0.00183363, 0.00177282}, {-0.00186106, 0.00108585}, {-0.00174584, 0.000698806}, {-0.00171999, 0.000929376}, {-0.00137804, 0.00171646}, {-0.000198099, 0.00263587}, {0.0017048, 0.00322729}, {0.00268187, 0.00333104}, {0.00199517, 0.00308239}, {0.000412822, 0.00283221}, {-0.00110048, 0.00265381}, {-0.0017948, 0.00239084}, {-0.00168035, 0.00190336}, {-0.00132851, 0.00131573}, {-0.00117801, 0.00098954}, {-0.0010729, 0.0012226}, {-0.00050065, 0.00195873}, {0.000729861, 0.00279654}, {0.00209266, 0.00330113}, {2.19272e-05, 0.00277353}, {-0.000957074, 0.00256523}, {-0.00174148, 0.00235975}, {-0.00187231, 0.00231918}, {-0.00139096, 0.00225199}, {-0.00075164, 0.00186676}, {-0.000374476, 0.00115971}, {-0.00028408, 0.000522671}, {-0.00017942, 0.000435763}, {0.000160266, 0.00102929}, {0.000567512, 0.00194182}, {0.000607946, 0.00261943}, {-0.0024215, 0.00177225}, {-0.00263682, 0.00167568}, {-0.00209535, 0.00164535}, {-0.00114102, 0.00184997}, {-0.000248941, 0.00187832}, {0.000321309, 0.0012058}, {0.000615769, -0.000147999}, {0.00077493, -0.00146392}, {0.000776697, -0.00187237}, {0.000421758, -0.00110328}, {-0.000414171, 0.000280854}, {-0.00152766, 0.00139216}, {-0.00274443, 0.000749227}, {-0.00197066, 0.000793853}, {-0.000775567, 0.00100805}, {0.000155336, 0.0013392}, {0.000657256, 0.00104115}, {0.00104563, -0.000468441}, {0.00158167, -0.00278986}, {0.0020371, -0.00464365}, {0.00183998, -0.0048709}, {0.000677836, -0.00340594}, {-0.0010533, -0.00129872}, {-0.00243964, 0.000223927}, {-0.00104825, 0.000168518}, {-9.99667e-05, 0.000343194}, {0.000542457, 0.000711696}, {0.000631678, 0.000824043}, {0.000711702, -0.000259321}, {0.0014042, -0.00280632}, {0.00259882, -0.00578326}, {0.00340756, -0.007483}, {0.0029706, -0.0068919}, {0.00132982, -0.00451958}, {-0.000507229, -0.00191114}, {-0.00140519, -0.000323743}, {0.000775353, 0.000221899}, {0.000887833, 0.000489889}, {0.000405415, 0.000801136}, {-0.000135239, 0.00037554}, {0.000194675, -0.00156994}, {0.00168329, -0.00471438}, {0.00352434, -0.00751854}, {0.00444058, -0.00833396}, {0.00380996, -0.00674179}, {0.00220887, -0.00386533}, {0.000850235, -0.00137242}, {0.000474651, -0.0001314}, {0.000988472, 0.000741314}, {0.000104752, 0.00105288}, {-0.000928353, 0.00112477}, {-0.00119781, 0.000146563}, {-8.73e-05, -0.00221071}, {0.00201349, -0.00511534}, {0.00390545, -0.00699787}, {0.00453751, -0.0067628}, {0.00383244, -0.0046732}, {0.00261536, -0.00208014}, {0.00175651, -0.000269502}, {0.0013939, 0.000473638}, {-0.000278078, 0.00139766}, {-0.00127652, 0.00169414}, {-0.0017797, 0.00150281}, {-0.00123359, 0.000298155}, {0.000314453, -0.00178583}, {0.00212936, -0.00380278}, {0.00330561, -0.00463548}, {0.00342906, -0.00387201}, {0.00278692, -0.00210491}, {0.00197045, -0.000384446}, {0.00129739, 0.000639583}, {0.000627818, 0.00108077}, {-0.00124773, 0.00197888}, {-0.00129968, 0.00219642}, {-0.000820366, 0.00186852}, {6.2613e-05, 0.000856692}, {0.000997415, -0.000513844}, {0.0016175, -0.00159544}, {0.0017559, -0.00187196}, {0.00149077, -0.00132339}, {0.00100921, -0.000377133}, {0.000441083, 0.000486924}, {-0.000176634, 0.00109135}, {-0.000794482, 0.0015546}, {0.000812117, 0.00215362}, {0.00217721, 0.00235972}, {0.00250808, 0.00206105}, {0.00185072, 0.00130905}, {0.000847902, 0.000425322}, {0.000100345, -0.000204685}, {-0.00032629, -0.00038357}, {-0.000784331, -0.000183488}, {-0.00148305, 0.00019328}, {-0.00210058, 0.000617194}, {-0.00200482, 0.00109475}, {-0.000889417, 0.00164666}, {0.00344363, 0.00240202}, {0.00412826, 0.00234947}, {0.0030609, 0.00205558}, {0.00102515, 0.00158366}, {-0.000727613, 0.000985268}, {-0.00151951, 0.000361307}, {-0.00163598, -0.000105598}, {-0.00175167, -0.000215457}, {-0.00204384, 0.00012061}, {-0.00193658, 0.000797118}, {-0.000752743, 0.00155791}, {0.00139869, 0.0021397}, {0.00429725, 0.00243424}, {0.00348847, 0.00225357}, {0.00123152, 0.00205561}, {-0.00107539, 0.0017845}, {-0.00224766, 0.00130311}, {-0.00219818, 0.00063953}, {-0.00173366, 8.04705e-05}, {-0.00154505, -1.44906e-05}, {-0.00145589, 0.00048098}, {-0.000696593, 0.00132523}, {0.00109584, 0.00208225}, {0.00321415, 0.00244832}, {0.00244031, 0.0022015}, {0.000620396, 0.00202706}, {-0.00149422, 0.00192451}, {-0.00265921, 0.0017784}, {-0.00248193, 0.0014026}, {-0.00160714, 0.000815412}, {-0.000946948, 0.000317101}, {-0.000758728, 0.000266482}, {-0.000495671, 0.000759492}, {0.000449455, 0.00152111}, {0.00190388, 0.00212198}, {0.00288065, 0.0023187}, {-0.000688485, 0.0016919}, {-0.00217813, 0.00155269}, {-0.00290026, 0.00151515}, {-0.0024853, 0.00150412}, {-0.00140646, 0.00128748}, {-0.000490928, 0.000773425}, {-0.000160101, 0.000184896}, {-0.000140597, -8.53298e-05}, {0.000100468, 0.000185019}, {0.000674583, 0.000838289}, {0.00106589, 0.00146436}, {0.00063142, 0.00174561}, {-0.00260573, 0.0010079}, {-0.00281905, 0.000850244}, {-0.00211981, 0.000891521}, {-0.00101254, 0.0010516}, {-0.000171228, 0.000910086}, {0.000118564, 0.000196445}, {9.63571e-05, -0.000829039}, {0.000145403, -0.00151408}, {0.00031269, -0.00136667}, {0.000236222, -0.000495381}, {-0.000440811, 0.000488826}, {-0.00159713, 0.00101378}, {-0.00209376, 0.00037292}, {-0.00132175, 0.000178588}, {-0.000357631, 0.000357329}, {0.000135574, 0.000621458}, {6.97704e-05, 0.0002626}, {-8.86416e-05, -0.0010209}, {0.000107486, -0.00264534}, {0.000561535, -0.00353785}, {0.000703078, -0.00306147}, {8.94859e-05, -0.00158364}, {-0.00107076, -0.00013794}, {-0.00201802, 0.000490441}, {-0.000230884, 3.1371e-05}, {0.000383229, -0.000104351}, {0.000428404, 0.000237989}, {-0.000165206, 0.000391835}, {-0.000709238, -0.000511882}, {-0.00047087, -0.00248403}, {0.000543768, -0.00442134}, {0.0015335, -0.00497688}, {0.00164608, -0.00374876}, {0.000793595, -0.00164089}, {-0.000266218, -4.59147e-05}, {-0.000662338, 0.000355773}, {0.000838677, 0.000108863}, {0.000500847, 0.000184552}, {-0.000462377, 0.000611161}, {-0.00142202, 0.000407798}, {-0.0014377, -0.00112731}, {-0.00019289, -0.00347252}, {0.00160799, -0.00513285}, {0.00279916, -0.00490715}, {0.00274911, -0.00296621}, {0.00184213, -0.000696689}, {0.00100447, 0.00053703}, {0.000772938, 0.000514692}, {0.000124405, 0.000544474}, {-0.000809288, 0.000897244}, {-0.00179564, 0.00124025}, {-0.00206921, 0.000585713}, {-0.00109742, -0.00130986}, {0.000817474, -0.00346153}, {0.00266122, -0.00439688}, {0.0034805, -0.00344643}, {0.00309768, -0.00134185}, {0.00211648, 0.000457057}, {0.00125909, 0.0010494}, {0.000713378, 0.000753483}, {-0.00124118, 0.00114723}, {-0.00166679, 0.0016648}, {-0.00172845, 0.00177442}, {-0.00111678, 0.000815018}, {0.000181631, -0.00098021}, {0.00170334, -0.00252241}, {0.00274777, -0.00276665}, {0.00285775, -0.00162362}, {0.00211934, -7.41674e-06}, {0.00102324, 0.00101721}, {4.04591e-05, 0.00115183}, {-0.000681188, 0.000968203}, {-0.00124712, 0.00172152}, {-0.000503703, 0.00217425}, {0.000211257, 0.00202485}, {0.000767816, 0.00105073}, {0.00119491, -0.00030522}, {0.00149454, -0.00123516}, {0.00152478, -0.00122728}, {0.00109947, -0.000465569}, {0.000216125, 0.000401356}, {-0.000826921, 0.000878568}, {-0.00157212, 0.00102008}, {-0.00170337, 0.00123249}, {0.00184474, 0.001294}, {0.00283925, 0.00159354}, {0.00251948, 0.00129289}, {0.00143807, 0.000500437}, {0.0004614, -0.000345234}, {-3.9144e-05, -0.000819144}, {-0.000396374, -0.000822161}, {-0.00109119, -0.000579709}, {-0.00204712, -0.000355138}, {-0.00254108, -0.000180772}, {-0.00186409, 0.000122903}, {-8.78541e-05, 0.000672901}, {0.00383137, 0.0011233}, {0.00350436, 0.00127091}, {0.00169583, 0.00120797}, {-0.000300211, 0.000879763}, {-0.0013451, 0.000336338}, {-0.00136446, -0.000243029}, {-0.0011625, -0.000631526}, {-0.00139716, -0.000687156}, {-0.00179999, -0.000428682}, {-0.00145363, 3.06954e-06}, {0.000152943, 0.00045451}, {0.00238365, 0.000838344}, {0.00342454, 0.00105368}, {0.00153576, 0.00130977}, {-0.000938952, 0.00152474}, {-0.00244623, 0.00141073}, {-0.00236447, 0.000855341}, {-0.00138811, 9.4757e-05}, {-0.000669021, -0.000436948}, {-0.000617606, -0.000454379}, {-0.000575121, -4.02195e-05}, {0.000337306, 0.000464277}, {0.00209292, 0.000781006}, {0.00354879, 0.000912453}, {0.000717081, 0.0011431}, {-0.00162143, 0.00147068}, {-0.00320893, 0.00170016}, {-0.00312581, 0.00153008}, {-0.00175628, 0.000924823}, {-0.000323504, 0.000205368}, {0.000294365, -0.000195537}, {0.000293711, -8.92853e-05}, {0.000530904, 0.00033686}, {0.00143664, 0.000728738}, {0.00240627, 0.000900161}, {0.0023238, 0.000963991}, {-0.00217873, 0.0011431}, {-0.00348199, 0.00126879}, {-0.0033341, 0.0012921}, {-0.00193091, 0.00108826}, {-0.000318486, 0.000656389}, {0.000557299, 0.000175283}, {0.000618021, -0.000107607}, {0.00051748, -6.10013e-05}, {0.000796446, 0.000239548}, {0.00122393, 0.000594966}, {0.00101531, 0.000854539}, {-0.000287338, 0.00101178}, {-0.00308394, 0.00078988}, {-0.00282581, 0.000545468}, {-0.00157908, 0.000472976}, {-0.000254897, 0.000507047}, {0.000361377, 0.000400072}, {0.000251192, 2.70184e-05}, {2.11797e-05, -0.000427374}, {0.000163505, -0.000602871}, {0.000493456, -0.000298751}, {0.000345515, 0.000318715}, {-0.000671211, 0.000845599}, {-0.00213771, 0.000987026}, {-0.00173982, 0.000138796}, {-0.000746363, -0.000276445}, {0.000115931, -7.95682e-05}, {0.00015399, 0.000363158}, {-0.000466155, 0.000372438}, {-0.000940082, -0.000301362}, {-0.000678774, -0.00116418}, {7.15204e-05, -0.00142417}, {0.000471506, -0.000764501}, {-4.80198e-05, 0.000324541}, {-0.00117162, 0.000990485}, {-0.00195998, 0.000807742}, {0.00010666, -0.000411199}, {0.000559929, -0.000534979}, {0.000124969, 0.000177635}, {-0.00103976, 0.000812724}, {-0.00194435, 0.00045067}, {-0.0017105, -0.000837987}, {-0.000442787, -0.00198498}, {0.000841294, -0.00193352}, {0.00116176, -0.000670932}, {0.00047464, 0.000741505}, {-0.000358119, 0.00117588}, {-0.000477469, 0.000489336}, {0.000557711, -0.000448939}, {2.14563e-05, 5.1863e-05}, {-0.00125621, 0.00111261}, {-0.00244309, 0.00143189}, {-0.00247485, 0.000335699}, {-0.00108655, -0.00145352}, {0.000859953, -0.00243917}, {0.0020968, -0.00176869}, {0.00203615, -5.04466e-05}, {0.00118411, 0.00123797}, {0.000515273, 0.00116288}, {0.000465298, 0.000173111}, {-0.000523707, 7.49832e-05}, {-0.00129984, 0.00109782}, {-0.00218758, 0.00201665}, {-0.00238082, 0.00163377}, {-0.00134821, -8.95559e-05}, {0.000544868, -0.00188137}, {0.00222292, -0.00231004}, {0.00275675, -0.00111937}, {0.00209201, 0.000561396}, {0.00097507, 0.00133038}, {0.000184764, 0.000840209}, {-0.000155834, 3.24209e-05}, {-0.001461, 0.000795685}, {-0.00141069, 0.00188087}, {-0.00124781, 0.00225242}, {-0.000643685, 0.0012467}, {0.000498695, -0.000580925}, {0.0017598, -0.00189078}, {0.00242939, -0.00176781}, {0.00206917, -0.000530854}, {0.000889091, 0.000638683}, {-0.00042233, 0.000863347}, {-0.0012548, 0.000368828}, {-0.00149871, 0.000137595}, {-0.000600014, 0.00125626}, {0.000440209, 0.0019785}, {0.00101314, 0.0018198}, {0.00122965, 0.000677783}, {0.0013687, -0.00072864}, {0.00146561, -0.00146476}, {0.00123002, -0.00118843}, {0.000388083, -0.000386593}, {-0.000907184, 0.000168035}, {-0.00204432, 0.000181923}, {-0.00238166, 6.42756e-05}, {-0.00175623, 0.000404465}, {0.00199557, 0.000263447}, {0.00247435, 0.000538173}, {0.00180618, 0.000268668}, {0.000754365, -0.000364617}, {4.72554e-05, -0.000926542}, {-0.00026345, -0.00110933}, {-0.000678776, -0.000972234}, {-0.00153395, -0.000817447}, {-0.00246206, -0.000843655}, {-0.00263013, -0.000935596}, {-0.00154663, -0.000803184}, {0.000360354, -0.000324686}, {0.00282182, -0.000181848}, {0.00180775, 0.000362276}, {7.00146e-06, 0.000732201}, {-0.00126844, 0.000697148}, {-0.00137827, 0.000289147}, {-0.000824003, -0.000228184}, {-0.000583612, -0.000584569}, {-0.00102006, -0.00070715}, {-0.00148547, -0.000717379}, {-0.00102567, -0.000744172}, {0.000534689, -0.000757712}, {0.00224657, -0.000600078}, {0.00133015, 0.000104398}, {-0.000843026, 0.00102754}, {-0.00256504, 0.00167641}, {-0.00270407, 0.00161494}, {-0.00144203, 0.000923632}, {-6.64642e-06, 0.000113986}, {0.000547055, -0.000334838}, {0.000289877, -0.000366624}, {0.000180305, -0.000299112}, {0.000918105, -0.000412609}, {0.00208642, -0.000612658}, {0.00248263, -0.000519398}, {-0.0015507, 0.00082568}, {-0.0034411, 0.00164957}, {-0.00371457, 0.0019666}, {-0.00222345, 0.0015614}, {-8.25219e-05, 0.000764932}, {0.00134496, 0.00014339}, {0.00160211, -6.27988e-06}, {0.00133526, 0.00013365}, {0.00140551, 0.0001729}, {0.00186843, -2.94509e-05}, {0.00188474, -0.000196806}, {0.000660482, 6.84618e-05}, {-0.00359162, 0.00117955}, {-0.00394685, 0.00137818}, {-0.0025872, 0.00121969}, {-0.000423216, 0.000818799}, {0.00123655, 0.000463904}, {0.00176489, 0.000358813}, {0.00156645, 0.000459431}, {0.00142312, 0.000557829}, {0.00155579, 0.000518922}, {0.00138269, 0.00042539}, {0.000221478, 0.000483263}, {-0.00179918, 0.000785753}, {-0.00331757, 0.000681835}, {-0.00220791, 0.000308687}, {-0.000444879, 0.000212208}, {0.000802004, 0.000435465}, {0.00101794, 0.000751507}, {0.000661517, 0.000891308}, {0.000525362, 0.000800192}, {0.000826648, 0.000669702}, {0.000968204, 0.000719148}, {0.000214973, 0.000952495}, {-0.00141191, 0.00114009}, {-0.00296008, 0.00104732}, {-0.00133102, -0.00031431}, {-1.68337e-05, -0.000566196}, {0.000632161, 9.58244e-05}, {0.000179505, 0.00109555}, {-0.000724178, 0.00159265}, {-0.00104486, 0.0012803}, {-0.00043404, 0.000641017}, {0.000415266, 0.000418534}, {0.000483599, 0.000846099}, {-0.000509911, 0.00141033}, {-0.00176972, 0.0013876}, {-0.00216637, 0.000610358}, {0.000378534, -0.00093279}, {0.000695835, -0.000328522}, {-0.000191029, 0.00120703}, {-0.00165606, 0.00235426}, {-0.0024236, 0.00215362}, {-0.00179325, 0.000914903}, {-0.000322277, -0.000121804}, {0.000712069, -2.65834e-05}, {0.000537899, 0.00091902}, {-0.000409751, 0.00155347}, {-0.000993859, 0.00105462}, {-0.000563175, -0.000187827}, {0.00042485, -0.000618514}, {-0.000277729, 0.000935412}, {-0.00173402, 0.00267169}, {-0.00280059, 0.00302497}, {-0.00248115, 0.0016338}, {-0.000904025, -0.000295759}, {0.000754963, -0.00114656}, {0.00134079, -0.000423828}, {0.000739223, 0.00084546}, {-0.000135323, 0.00122109}, {-0.000338016, 0.000335609}, {0.000142723, -0.000740402}, {-0.000675449, 0.0003245}, {-0.00135006, 0.0021808}, {-0.00207717, 0.00321564}, {-0.00200301, 0.00234654}, {-0.000837009, 0.000130504}, {0.000759899, -0.0017094}, {0.00170088, -0.00187498}, {0.00142337, -0.000645998}, {0.000350221, 0.000518958}, {-0.000572076, 0.000502558}, {-0.000785952, -0.00036669}, {-0.0005826, -0.00072658}, {-0.00117033, 0.0010344}, {-0.000890367, 0.00239042}, {-0.000582166, 0.00238781}, {5.51232e-05, 0.000768668}, {0.000968865, -0.00134512}, {0.00164892, -0.00244056}, {0.00152962, -0.00196554}, {0.000524819, -0.000735357}, {-0.000830946, -2.62602e-05}, {-0.00179096, -0.000286841}, {-0.00197987, -0.000759252}, {-0.00161236, -0.000370295}, {9.67618e-07, 0.000931598}, {0.000995034, 0.00153071}, {0.00139055, 0.000986414}, {0.00141007, -0.000436504}, {0.00130878, -0.00174739}, {0.00104571, -0.00210457}, {0.000362801, -0.00153128}, {-0.000817367, -0.000790513}, {-0.00211045, -0.000571193}, {-0.00284145, -0.000813133}, {-0.00255406, -0.000842124}, {-0.00139341, -0.000159969}, {0.00145723, -0.00051303}, {0.00164934, -0.000286049}, {0.000923179, -0.000373457}, {-1.50408e-05, -0.000625309}, {-0.000612659, -0.00074709}, {-0.000919935, -0.00060566}, {-0.00136618, -0.000365436}, {-0.00213309, -0.000317819}, {-0.00280477, -0.000576078}, {-0.00268961, -0.00094449}, {-0.0015172, -0.00109731}, {0.000187925, -0.000893071}, {0.00129587, -0.000798419}, {0.000158906, 0.00020614}, {-0.00117883, 0.000975943}, {-0.00170154, 0.00115875}, {-0.00124111, 0.000867268}, {-0.000550201, 0.000487262}, {-0.000471313, 0.000266527}, {-0.00105364, 0.000102517}, {-0.00151183, -0.000268484}, {-0.00106577, -0.000893038}, {0.000193081, -0.00144211}, {0.00129811, -0.0014609}, {-0.000694365, 0.000111926}, {-0.00241287, 0.00150284}, {-0.00303923, 0.00224209}, {-0.00209269, 0.00203013}, {-0.00032754, 0.00130537}, {0.000969676, 0.000745078}, {0.00113847, 0.000600864}, {0.000634264, 0.000529758}, {0.000405736, 6.77302e-05}, {0.000827027, -0.000762891}, {0.00128459, -0.00135793}, {0.000842255, -0.00107487}, {-0.00314295, 0.00121727}, {-0.00398818, 0.00210678}, {-0.00298255, 0.00216122}, {-0.000715846, 0.00155526}, {0.00143399, 0.000959614}, {0.00244066, 0.000853001}, {0.00235477, 0.00107475}, {0.00196991, 0.0010571}, {0.00183944, 0.00048565}, {0.001678, -0.000297241}, {0.000758259, -0.000573442}, {-0.00112358, 5.01733e-05}, {-0.00410814, 0.00137059}, {-0.00330837, 0.00137364}, {-0.00115352, 0.00106281}, {0.00107898, 0.000850138}, {0.0023451, 0.00103993}, {0.00255984, 0.00149989}, {0.00236571, 0.00177804}, {0.00226054, 0.00155252}, {0.00201827, 0.000965194}, {0.00100376, 0.000502181}, {-0.000956968, 0.000534863}, {-0.00308429, 0.000973054}, {-0.0028829, 0.000436756}, {-0.00110773, 0.000167191}, {0.000651311, 0.000473464}, {0.00146196, 0.00129255}, {0.00138454, 0.00213227}, {0.00117829, 0.0024759}, {0.00135942, 0.00221233}, {0.00160184, 0.00168487}, {0.00110211, 0.0013266}, {-0.000466811, 0.00125253}, {-0.00240698, 0.00120983}, {-0.00346005, 0.0009182}, {-0.000632782, -0.000577207}, {0.000635365, -3.94854e-05}, {0.000771369, 0.0014756}, {-3.73475e-05, 0.00296403}, {-0.000741049, 0.00343415}, {-0.000542003, 0.00276521}, {0.000282592, 0.00173449}, {0.000678746, 0.00121102}, {-6.88672e-05, 0.00132984}, {-0.00153606, 0.00146107}, {-0.00249666, 0.000976697}, {-0.0020841, 1.17346e-05}, {0.000753646, -0.000548754}, {0.000617308, 0.00129148}, {-0.000695988, 0.0034929}, {-0.00203612, 0.00442797}, {-0.00223574, 0.00347947}, {-0.00123133, 0.00157252}, {-9.02653e-05, 0.000255751}, {7.34624e-05, 0.000248659}, {-0.000794045, 0.00088217}, {-0.00166211, 0.00094888}, {-0.00148988, 6.42361e-05}, {-0.00031348, -0.000863744}, {0.000456483, 0.000608535}, {-0.000604918, 0.00313614}, {-0.0020511, 0.00475253}, {-0.00264988, 0.00410325}, {-0.0019219, 0.00164859}, {-0.000588009, -0.000746064}, {0.000149688, -0.00151773}, {-0.000225379, -0.000745959}, {-0.00108693, 0.000154799}, {-0.00135942, 2.18475e-05}, {-0.000650927, -0.000822873}, {0.000322916, -0.000945364}, {-0.000607399, 0.00182776}, {-0.00131512, 0.0038504}, {-0.00175392, 0.00399247}, {-0.00134636, 0.00187678}, {-0.000332892, -0.00105811}, {0.000423803, -0.00281499}, {0.000261107, -0.00258398}, {-0.000646136, -0.00129865}, {-0.00146371, -0.000514196}, {-0.00151269, -0.000779952}, {-0.000911194, -0.00115304}, {-0.000411512, -0.000322679}, {-0.000867282, 0.00193138}, {-0.000486437, 0.00276038}, {-1.35831e-05, 0.00172419}, {0.000544091, -0.000666089}, {0.00087913, -0.00279809}, {0.000624222, -0.00336212}, {-0.000271843, -0.00244049}, {-0.00141094, -0.00125204}, {-0.0022103, -0.000853659}, {-0.00233624, -0.00111864}, {-0.00191303, -0.00100408}, {-0.00133161, 0.000197752}, {0.000194878, 0.000797578}, {0.00110693, 0.000825472}, {0.0013971, -0.000212241}, {0.00116602, -0.00165516}, {0.000612288, -0.00249766}, {-0.000157435, -0.00227736}, {-0.00111529, -0.00143563}, {-0.0021413, -0.000817017}, {-0.00291883, -0.000823905}, {-0.00306116, -0.0010721}, {-0.00239347, -0.00087611}, {-0.00113639, -5.18101e-05}, {0.00102125, -0.000807487}, {0.00105345, -0.000485227}, {0.000166927, -0.000213507}, {-0.00094218, 1.24723e-05}, {-0.00166322, 0.000317754}, {-0.0019481, 0.000728524}, {-0.0021791, 0.00105711}, {-0.00260779, 0.00102987}, {-0.00297418, 0.000546126}, {-0.0027215, -0.000186904}, {-0.00160397, -0.00078721}, {-6.61755e-05, -0.000985953}, {0.000329234, -0.000542752}, {-0.000719155, 0.000767918}, {-0.00174749, 0.00168844}, {-0.00198482, 0.00193268}, {-0.00145141, 0.00181473}, {-0.000869482, 0.00178525}, {-0.000890393, 0.00189163}, {-0.00142466, 0.00172581}, {-0.00174984, 0.000913185}, {-0.00128338, -0.000368005}, {-0.000234127, -0.00139491}, {0.000537466, -0.00147333}, {-0.00168221, 0.000800791}, {-0.00280472, 0.00215657}, {-0.00273725, 0.00261712}, {-0.00145783, 0.00229386}, {0.000145804, 0.00192936}, {0.001059, 0.00204242}, {0.000997031, 0.00236793}, {0.000521408, 0.00216271}, {0.000344534, 0.00106061}, {0.000553847, -0.000408225}, {0.000548347, -0.00119428}, {-0.000255449, -0.0006658}, {-0.003496, 0.00170968}, {-0.00344178, 0.00222347}, {-0.00187325, 0.00199334}, {0.000319292, 0.00161855}, {0.00200026, 0.00177115}, {0.00264115, 0.00244428}, {0.00254026, 0.00291995}, {0.00227272, 0.0024991}, {0.0019883, 0.00126046}, {0.00129319, 7.35466e-05}, {-0.000174447, -0.000171995}, {-0.00209945, 0.000623649}, {-0.00360734, 0.00133873}, {-0.0021703, 0.00118988}, {-5.05481e-05, 0.00113971}, {0.00168386, 0.00162029}, {0.00255812, 0.00257233}, {0.002828, 0.00339162}, {0.00292124, 0.00344}, {0.00282294, 0.00263503}, {0.00206942, 0.00154151}, {0.000362512, 0.000871077}, {-0.00185049, 0.000872856}, {-0.00348659, 0.00119725}, {-0.00194572, 0.000339146}, {-0.000244247, 0.000657071}, {0.000915061, 0.00177077}, {0.00126201, 0.00321189}, {0.00131316, 0.00416785}, {0.00162389, 0.00412598}, {0.00207051, 0.00326743}, {0.00192382, 0.00224869}, {0.000654039, 0.00159925}, {-0.00135816, 0.00132319}, {-0.00296409, 0.00106805}, {-0.00316435, 0.000645663}, {6.55117e-05, 0.000132232}, {0.000617656, 0.00185297}, {0.000100959, 0.00404774}, {-0.000654945, 0.00534757}, {-0.000700874, 0.00498998}, {3.09265e-05, 0.00343747}, {0.000653185, 0.00188692}, {0.000252531, 0.00116799}, {-0.00111913, 0.00110664}, {-0.00240307, 0.000938406}, {-0.00250864, 0.000294188}, {-0.00135208, -0.000286716}, {0.000868648, 0.00137508}, {-0.000104883, 0.00421796}, {-0.00161118, 0.00620505}, {-0.00236684, 0.00592248}, {-0.00188737, 0.00362859}, {-0.000899015, 0.00100207}, {-0.000546404, -0.000371853}, {-0.00120603, -0.000307679}, {-0.00210338, 0.000113344}, {-0.0021182, -0.000103027}, {-0.000964516, -0.000716191}, {0.000450775, -0.000497793}, {0.000207818, 0.00318285}, {-0.00130497, 0.00579402}, {-0.00243518, 0.00617887}, {-0.00240675, 0.0038693}, {-0.00157165, 0.000398834}, {-0.00097118, -0.00202301}, {-0.00123945, -0.00238234}, {-0.00196024, -0.00149752}, {-0.00212286, -0.000885259}, {-0.00121054, -0.00107458}, {0.00018794, -0.0011001}, {0.0008803, 0.00032464}, {-0.000761788, 0.0039177}, {-0.00145842, 0.00507368}, {-0.00148637, 0.00364586}, {-0.000942266, 0.0003575}, {-0.000538492, -0.00269366}, {-0.000845693, -0.0038141}, {-0.00167743, -0.00304428}, {-0.00224425, -0.00180712}, {-0.00193182, -0.00131005}, {-0.000927982, -0.00134559}, {-6.83017e-05, -0.000687589}, {-4.55839e-05, 0.00136361}, {-0.000761573, 0.00278107}, {-0.000234762, 0.00248914}, {0.000317487, 0.000440221}, {0.0004477, -0.00213936}, {-0.000121671, -0.00363042}, {-0.00118122, -0.00338277}, {-0.00214753, -0.00214008}, {-0.00251903, -0.00116332}, {-0.00225303, -0.000964868}, {-0.00172029, -0.000920589}, {-0.00130623, -0.00011436}, {-0.00106494, 0.00148846}, {0.00024908, 0.000629334}, {0.00112484, 5.72396e-05}, {0.00116563, -0.00109332}, {0.000382475, -0.00203896}, {-0.000789633, -0.00211082}, {-0.00183343, -0.00134922}, {-0.00249117, -0.000433324}, {-0.00281178, -2.4032e-05}, {-0.00291254, -0.000171425}, {-0.00274451, -0.000360928}, {-0.00212756, -0.000126269}, {-0.00102379, 0.000410084}, {0.00121868, -0.000670007}, {0.000845468, -7.05047e-05}, {-0.000563981, 0.000589868}, {-0.00205899, 0.00117309}, {-0.00281552, 0.00173963}, {-0.0027804, 0.00231553}, {-0.00253607, 0.00270756}, {-0.00257146, 0.00259391}, {-0.00272752, 0.0018272}, {-0.00238943, 0.000651798}, {-0.00122431, -0.0004006}, {0.000306085, -0.000870168}, {0.00023735, 0.000169826}, {-0.000975848, 0.00148156}, {-0.00212441, 0.00227317}, {-0.00246224, 0.00252934}, {-0.002011, 0.00276566}, {-0.00144212, 0.00330621}, {-0.00134638, 0.00380937}, {-0.00163725, 0.00355609}, {-0.00168626, 0.00222682}, {-0.00104194, 0.000374539}, {1.08809e-05, -0.000920227}, {0.000639579, -0.000923125}, {-0.00167471, 0.00144179}, {-0.00256276, 0.0023415}, {-0.00241464, 0.0024758}, {-0.00133867, 0.00239653}, {-8.33317e-05, 0.00284382}, {0.000623536, 0.00382888}, {0.000661179, 0.00449995}, {0.000479659, 0.00395831}, {0.000519103, 0.00221804}, {0.000700286, 0.000333267}, {0.000514513, -0.000472097}, {-0.000379928, 0.000148812}, {-0.00299063, 0.00178471}, {-0.00263682, 0.00188112}, {-0.00124121, 0.00176059}, {0.00045278, 0.00214826}, {0.0017348, 0.0033027}, {0.00237436, 0.00458413}, {0.00256959, 0.00495634}, {0.00252461, 0.00395717}, {0.00213843, 0.00218478}, {0.00114506, 0.000806929}, {-0.000445777, 0.000555252}, {-0.00209478, 0.00116675}, {-0.00268127, 0.00118712}, {-0.00141003, 0.00128755}, {8.66201e-05, 0.00196049}, {0.00129488, 0.00328712}, {0.00216343, 0.00471442}, {0.00284479, 0.00540091}, {0.00326913, 0.00490115}, {0.00305032, 0.00354661}, {0.00186178, 0.00215928}, {-9.08147e-05, 0.00137236}, {-0.00201181, 0.00119917}, {-0.00300126, 0.00122708}, {-0.00116756, 0.000909944}, {-0.000296202, 0.0021814}, {0.000102549, 0.00404616}, {0.000382626, 0.00560597}, {0.00101114, 0.00604626}, {0.00190104, 0.00525103}, {0.0023531, 0.00383196}, {0.00170404, 0.00256591}, {4.64022e-05, 0.00179014}, {-0.00171486, 0.00131392}, {-0.00256568, 0.000868226}, {-0.00219669, 0.000574464}, {0.000130329, 0.00204781}, {-0.000447043, 0.00472096}, {-0.00131336, 0.00684164}, {-0.00148165, 0.00714546}, {-0.00069716, 0.00559183}, {0.000284722, 0.00333404}, {0.000446043, 0.00167319}, {-0.000466081, 0.00101348}, {-0.00166205, 0.000794723}, {-0.00206656, 0.000379372}, {-0.00136156, -8.04534e-05}, {-0.000281149, 0.000275429}, {0.000188126, 0.0042154}, {-0.00159028, 0.00716199}, {-0.0027869, 0.00795655}, {-0.00266121, 0.00605702}, {-0.00171094, 0.00277782}, {-0.00107464, 0.000132255}, {-0.00134268, -0.000841632}, {-0.00197922, -0.000688349}, {-0.00192975, -0.000583599}, {-0.000787651, -0.000884621}, {0.000677016, -0.000705298}, {0.00118786, 0.00104095}, {-0.000671626, 0.00573111}, {-0.00233448, 0.00738883}, {-0.0028127, 0.00606795}, {-0.00226063, 0.00253427}, {-0.00168196, -0.000987316}, {-0.00180508, -0.00268707}, {-0.00231453, -0.00252145}, {-0.00221803, -0.00181284}, {-0.000968108, -0.00161727}, {0.000817859, -0.00157524}, {0.00180554, -0.000419976}, {0.00115696, 0.0024051}, {-0.001204, 0.00516846}, {-0.00162698, 0.00494292}, {-0.0012682, 0.00229898}, {-0.000945128, -0.00114084}, {-0.00133418, -0.00334519}, {-0.00219961, -0.00352788}, {-0.00258819, -0.00255407}, {-0.00180596, -0.00179433}, {-0.000193999, -0.00164776}, {0.00109442, -0.00123954}, {0.00112421, 0.0004306}, {1.89061e-05, 0.00311067}, {-0.000589831, 0.00275862}, {0.000110773, 0.00154341}, {0.000437935, -0.000699136}, {-0.000204417, -0.00254751}, {-0.00155355, -0.00296892}, {-0.00264895, -0.00213355}, {-0.00268869, -0.00108587}, {-0.00175274, -0.000644117}, {-0.000690906, -0.000643559}, {-0.000296111, -0.000263718}, {-0.00058353, 0.000931149}, {-0.00087713, 0.00233777}, {0.000668283, 0.000248119}, {0.00132806, -0.000437232}, {0.000738037, -0.00109884}, {-0.000816317, -0.0012108}, {-0.0023579, -0.000613787}, {-0.00304042, 0.000312979}, {-0.00280838, 0.000972553}, {-0.00228455, 0.00105885}, {-0.00204254, 0.000768028}, {-0.0020198, 0.000526613}, {-0.00166182, 0.000528644}, {-0.000631287, 0.00055431}, {0.00176357, -0.000286297}, {0.000612084, 0.000609432}, {-0.00143759, 0.00145769}, {-0.0030684, 0.00211654}, {-0.00343171, 0.0027218}, {-0.00278635, 0.00333831}, {-0.00207308, 0.00371553}, {-0.00189469, 0.00344003}, {-0.0019428, 0.00235859}, {-0.00143668, 0.000846352}, {-0.000100277, -0.00037913}, {0.00135214, -0.000762469}, {0.000434445, 0.000783103}, {-0.00120828, 0.00181673}, {-0.00258284, 0.00235325}, {-0.00292445, 0.00271486}, {-0.00234709, 0.00345433}, {-0.00160745, 0.00455722}, {-0.00130022, 0.00526206}, {-0.00130219, 0.00473033}, {-0.00101084, 0.0029208}, {-0.000107747, 0.000784285}, {0.000959583, -0.000457078}, {0.00130316, -0.00028696}, {-0.00140426, 0.00159201}, {-0.00234945, 0.00198777}, {-0.00232965, 0.0020839}, {-0.00145429, 0.00262293}, {-0.000353497, 0.00398803}, {0.000386349, 0.00557276}, {0.000671157, 0.00618422}, {0.000820179, 0.00513094}, {0.00108491, 0.00293582}, {0.00129026, 0.000955324}, {0.000985641, 0.000247738}, {-3.87389e-05, 0.000761993}, {-0.00237468, 0.00153928}, {-0.00215182, 0.0015805}, {-0.00114885, 0.00201655}, {0.000170784, 0.00331119}, {0.00136561, 0.00515003}, {0.0022223, 0.00649228}, {0.00270601, 0.00639769}, {0.00277501, 0.00485909}, {0.00230399, 0.00283682}, {0.00122444, 0.00145334}, {-0.000262247, 0.00111188}, {-0.00165088, 0.00135585}, {-0.00201851, 0.00137731}, {-0.00137103, 0.00211839}, {-0.000484952, 0.0035597}, {0.000571075, 0.0053218}, {0.00176368, 0.00661389}, {0.00285414, 0.00676431}, {0.00338151, 0.005726}, {0.00295488, 0.00409583}, {0.00160775, 0.00263604}, {-0.000130185, 0.00174453}, {-0.00154364, 0.00134154}, {-0.00217304, 0.0012096}, {-0.00111807, 0.00227216}, {-0.00127286, 0.00436788}, {-0.00124095, 0.00639557}, {-0.000527549, 0.00739678}, {0.000797411, 0.00700213}, {0.00199679, 0.00563142}, {0.00224587, 0.00406707}, {0.00135669, 0.00282933}, {-5.03695e-05, 0.00193582}, {-0.00109495, 0.00121081}, {-0.00137594, 0.000747473}, {-0.00120017, 0.000984002}, {-0.000799825, 0.00436847}, {-0.00223745, 0.00722247}, {-0.00281127, 0.00846958}, {-0.00205333, 0.0075343}, {-0.000625014, 0.00524057}, {0.000318493, 0.00301769}, {0.00021019, 0.00170262}, {-0.000462619, 0.00107437}, {-0.00074718, 0.000486608}, {-0.000253058, -0.000144515}, {0.000429062, -7.65462e-05}, {0.000343932, 0.00148698}, {-0.00134733, 0.00636626}, {-0.00330364, 0.00850293}, {-0.00364703, 0.00783554}, {-0.00265319, 0.00497093}, {-0.00153655, 0.00181632}, {-0.00117674, -7.14351e-05}, {-0.00130248, -0.000614516}, {-0.000931936, -0.000807034}, {0.000416027, -0.00130901}, {0.00201253, -0.00155793}, {0.00248166, -0.000347455}, {0.00111728, 0.00270967}, {-0.00192699, 0.00660756}, {-0.00312919, 0.00695557}, {-0.00279919, 0.00456761}, {-0.00199951, 0.0011128}, {-0.00175629, -0.00136787}, {-0.00197247, -0.00212369}, {-0.00161602, -0.0019814}, {2.94031e-05, -0.0020944}, {0.00236069, -0.00251844}, {0.00377906, -0.00211064}, {0.00309261, 0.000135028}, {0.000647509, 0.00370285}, {-0.00149831, 0.0046433}, {-0.001407, 0.00349878}, {-0.000897936, 0.000836171}, {-0.00104484, -0.00156051}, {-0.00187465, -0.00248865}, {-0.00229402, -0.00220971}, {-0.00125325, -0.00186899}, {0.00103509, -0.00211821}, {0.00308684, -0.00238043}, {0.0034149, -0.00151485}, {0.00186506, 0.000807039}, {-0.000276379, 0.00345293}, {2.21473e-05, 0.00173809}, {0.000698765, 0.000473142}, {0.000419186, -0.000968488}, {-0.000989215, -0.00160609}, {-0.00244581, -0.00126079}, {-0.00260952, -0.000611524}, {-0.00118894, -0.000426697}, {0.000732035, -0.000763853}, {0.0016879, -0.000946242}, {0.001175, -0.000330561}, {3.39965e-05, 0.000926242}, {-0.000478581, 0.001901}, {0.00154008, -0.000206973}, {0.00156732, -0.000400041}, {7.41494e-05, -0.000267091}, {-0.00201443, 0.000269015}, {-0.00325338, 0.00100147}, {-0.00295024, 0.00155809}, {-0.00171113, 0.00167696}, {-0.000753479, 0.0013894}, {-0.000694384, 0.000948383}, {-0.00102477, 0.000584833}, {-0.000770711, 0.000333006}, {0.000360217, 8.44586e-05}, {0.00186264, 0.00022094}, {-5.23372e-05, 0.00119935}, {-0.00227965, 0.00190951}, {-0.00342172, 0.00236577}, {-0.00302498, 0.00282837}, {-0.00184894, 0.00335118}, {-0.00100949, 0.00358948}, {-0.000879113, 0.00309693}, {-0.000839542, 0.00182768}, {-0.000100609, 0.000326464}, {0.00128725, -0.000621994}, {0.00229895, -0.000585715}, {0.000200884, 0.00111429}, {-0.00163536, 0.00176005}, {-0.00279638, 0.00209268}, {-0.00274624, 0.00264774}, {-0.00188855, 0.00379729}, {-0.0010681, 0.00512943}, {-0.000729571, 0.00566254}, {-0.000573505, 0.00473298}, {-2.67328e-05, 0.0026937}, {0.000989068, 0.000715804}, {0.00181155, -0.000149942}, {0.00160535, 0.000224563}, {-0.00136764, 0.00148465}, {-0.00217036, 0.0016419}, {-0.00200225, 0.00199952}, {-0.00109176, 0.00316358}, {-5.77449e-05, 0.0050243}, {0.000631244, 0.0065833}, {0.000974006, 0.00672095}, {0.00125359, 0.00519826}, {0.00158657, 0.00292302}, {0.00170031, 0.00120678}, {0.00118943, 0.000711506}, {-6.45869e-07, 0.00107013}, {-0.00195023, 0.00154243}, {-0.00179105, 0.00189847}, {-0.000956701, 0.0029312}, {0.000222618, 0.00469048}, {0.00137826, 0.0065037}, {0.00225456, 0.00735722}, {0.00272337, 0.00669574}, {0.00271171, 0.00489771}, {0.00216871, 0.00297272}, {0.00113323, 0.00176315}, {-0.000169929, 0.00139191}, {-0.00133759, 0.00143066}, {-0.0017792, 0.00218462}, {-0.00163111, 0.00352628}, {-0.00097876, 0.00523263}, {0.000195696, 0.00673274}, {0.00161796, 0.00741956}, {0.0027547, 0.00701766}, {0.00310866, 0.00574842}, {0.00254461, 0.00415704}, {0.00136593, 0.00276132}, {8.11069e-05, 0.00182156}, {-0.000925202, 0.00138445}, {-0.00153561, 0.00147572}, {-0.00176118, 0.00384196}, {-0.00248835, 0.00613248}, {-0.00225474, 0.00760546}, {-0.000938517, 0.00774121}, {0.000779032, 0.00678386}, {0.00192464, 0.00539543}, {0.00200691, 0.00407296}, {0.00133241, 0.0028903}, {0.000594913, 0.00175883}, {0.000176473, 0.000856728}, {-0.000139405, 0.000706218}, {-0.000781128, 0.00175313}, {-0.00237342, 0.00576071}, {-0.00385602, 0.00799681}, {-0.0035961, 0.00813768}, {-0.00200032, 0.00653029}, {-0.000315142, 0.00444386}, {0.000526001, 0.00289121}, {0.000634738, 0.00190366}, {0.000800818, 0.000907301}, {0.00146162, -0.000284669}, {0.00210308, -0.00096813}, {0.00173442, -0.000121442}, {-2.74434e-05, 0.00248231}, {-0.00306346, 0.00651427}, {-0.00433561, 0.00755447}, {-0.00360454, 0.00609212}, {-0.00205619, 0.00351098}, {-0.000954652, 0.00144609}, {-0.000454266, 0.00040217}, {0.000300936, -0.000332987}, {0.00195782, -0.0015058}, {0.0039316, -0.00275347}, {0.00470337, -0.00274976}, {0.0032171, -0.000552245}, {1.18578e-05, 0.0032017}, {-0.00278559, 0.00529792}, {-0.00304365, 0.00488066}, {-0.00208558, 0.00269857}, {-0.00131795, 0.000494252}, {-0.00115003, -0.00066359}, {-0.000680505, -0.00114459}, {0.00111467, -0.00197779}, {0.00395805, -0.00341799}, {0.00614145, -0.00439941}, {0.00592998, -0.00350115}, {0.00317673, -0.000485857}, {-0.000434266, 0.00315866}, {-0.00114186, 0.00274722}, {-0.000603376, 0.00171376}, {-0.000292862, 0.000142556}, {-0.000859221, -0.000796188}, {-0.00144753, -0.000996808}, {-0.000631289, -0.00130061}, {0.0019365, -0.0023754}, {0.00486464, -0.0037873}, {0.00609966, -0.00426294}, {0.00474494, -0.00292566}, {0.00186663, -0.000279677}, {-0.000495147, 0.00207097}, {0.0010389, 0.000458118}, {0.00129929, -3.2414e-05}, {0.000254368, -0.00029649}, {-0.00145191, -0.000101111}, {-0.00218416, 0.000116184}, {-0.000931996, -0.000250261}, {0.0016051, -0.0012645}, {0.00356913, -0.00226958}, {0.00361965, -0.00244876}, {0.00210709, -0.00159178}, {0.000614925, -0.000313465}, {0.000357166, 0.000496235}, {0.00231154, -0.000353224}, {0.00139957, 0.000153351}, {-0.000782743, 0.00081739}, {-0.00271041, 0.0013913}, {-0.00298066, 0.00163455}, {-0.00157505, 0.00146486}, {0.000189534, 0.000997357}, {0.000954008, 0.000444072}, {0.00056752, -2.9647e-05}, {7.38775e-05, -0.00036653}, {0.000491613, -0.000563068}, {0.00165481, -0.000583696}, {0.00107511, 0.000844773}, {-0.000968891, 0.00160569}, {-0.0025605, 0.00194004}, {-0.00274394, 0.00205876}, {-0.00171173, 0.00227382}, {-0.000511421, 0.00257089}, {6.06774e-06, 0.00256985}, {-5.62534e-05, 0.00192717}, {8.92828e-05, 0.00077179}, {0.000932978, -0.000286196}, {0.0020071, -0.000619504}, {0.00225286, -9.55433e-05}, {-0.000457187, 0.00142148}, {-0.00175798, 0.00174224}, {-0.00213, 0.00196691}, {-0.00155549, 0.00262817}, {-0.000700077, 0.00381462}, {-0.000222762, 0.00488127}, {-0.000169522, 0.00494942}, {-4.29217e-05, 0.00373502}, {0.000552901, 0.00189056}, {0.00137638, 0.000503154}, {0.00169516, 0.000210339}, {0.000994046, 0.000763479}, {-0.00126864, 0.00169309}, {-0.00142429, 0.0018686}, {-0.000845566, 0.00247045}, {2.08382e-05, 0.00381255}, {0.000659182, 0.00545956}, {0.00090646, 0.00641924}, {0.000988042, 0.00595926}, {0.00116566, 0.00427861}, {0.00138367, 0.00237854}, {0.00129204, 0.00124335}, {0.000631733, 0.00110148}, {-0.000412285, 0.00144003}, {-0.00135563, 0.00217956}, {-0.000962677, 0.0028371}, {-0.000158956, 0.00400678}, {0.000746208, 0.00550259}, {0.00149912, 0.0066804}, {0.00198086, 0.00685389}, {0.00215632, 0.00584917}, {0.00198962, 0.00417877}, {0.00144263, 0.00266101}, {0.000567934, 0.00182998}, {-0.00041124, 0.00166503}, {-0.00114971, 0.00183961}, {-0.00156959, 0.00329811}, {-0.00154812, 0.00472258}, {-0.000922947, 0.00600033}, {0.0002038, 0.00677764}, {0.00142044, 0.00685166}, {0.00222583, 0.00621927}, {0.00233875, 0.00506127}, {0.00183873, 0.00368253}, {0.00103489, 0.00243575}, {0.000208034, 0.00164366}, {-0.00053006, 0.00152392}, {-0.00115678, 0.00212534}, {-0.00251414, 0.00470392}, {-0.0031696, 0.00646804}, {-0.00249403, 0.00706199}, {-0.000867051, 0.00662023}, {0.000772703, 0.00571651}, {0.00168179, 0.0047547}, {0.00181524, 0.0036989}, {0.00163996, 0.00237664}, {0.00150699, 0.000980486}, {0.00125336, 0.000178167}, {0.000444341, 0.000640027}, {-0.00100736, 0.00240404}, {-0.00372239, 0.00542758}, {-0.00448722, 0.00671962}, {-0.00334363, 0.00621626}, {-0.00136296, 0.00486449}, {0.00023217, 0.00366658}, {0.00108734, 0.00277141}, {0.00173833, 0.00160672}, {0.0027026, -0.000155468}, {0.00364879, -0.00187089}, {0.00355844, -0.00226777}, {0.00176387, -0.00059971}, {-0.00118108, 0.00251424}, {-0.00399583, 0.00472759}, {-0.00409661, 0.00507554}, {-0.00257529, 0.00385367}, {-0.000928577, 0.00244868}, {0.000117921, 0.00154953}, {0.00108849, 0.000650323}, {0.00281213, -0.0010673}, {0.00511138, -0.00345559}, {0.00656547, -0.00512779}, {0.00569569, -0.00457654}, {0.00245072, -0.00163705}, {-0.00149525, 0.00215623}, {-0.00260025, 0.00283664}, {-0.0019566, 0.00252812}, {-0.000833706, 0.0014622}, {-0.000274303, 0.000695937}, {0.000132097, 0.000143541}, {0.00157885, -0.0011095}, {0.00442016, -0.00349888}, {0.00733432, -0.00609325}, {0.00823824, -0.00713493}, {0.00614784, -0.00557337}, {0.00218363, -0.00211808}, {-0.00128994, 0.00124256}, {-0.000123401, 0.000854128}, {0.000453969, 0.000646113}, {0.00030688, 0.000373685}, {-0.000391903, 0.000283943}, {-0.000231813, -0.000262111}, {0.00183423, -0.00195531}, {0.00511927, -0.00451608}, {0.00751976, -0.00659521}, {0.00730252, -0.00679844}, {0.00465911, -0.00489722}, {0.00152116, -0.00206403}, {-0.000180551, 0.000104839}, {0.0018963, -0.000141144}, {0.0014379, 0.000268168}, {-0.000127957, 0.000715867}, {-0.00147232, 0.000855295}, {-0.00105054, 0.000142885}, {0.00129946, -0.00150999}, {0.00403861, -0.00343374}, {0.00524854, -0.00460669}, {0.00432995, -0.00443504}, {0.00243836, -0.00319148}, {0.00126506, -0.00171543}, {0.0013995, -0.000681907}, {0.00225673, 7.23815e-05}, {0.000585681, 0.000943757}, {-0.0015829, 0.00154213}, {-0.00263207, 0.00158107}, {-0.00177623, 0.00103909}, {0.000223906, 0.000176443}, {0.00179653, -0.000649631}, {0.00201579, -0.00120673}, {0.00135432, -0.00146062}, {0.00102523, -0.00146935}, {0.00162118, -0.00124164}, {0.0024511, -0.000723483}, {-0.000154943, 0.00156692}, {-0.00142295, 0.00195714}, {-0.00184099, 0.00190483}, {-0.00121448, 0.00170676}, {-0.000172926, 0.0016225}, {0.000460365, 0.00159834}, {0.000433118, 0.00135799}, {0.000214275, 0.000759356}, {0.000406697, 3.91325e-05}, {0.00104141, -0.000326933}, {0.0014867, -3.42658e-05}, {0.00107903, 0.000762432}, {-0.000753044, 0.00198762}, {-0.000877387, 0.00210716}, {-0.000388019, 0.0022298}, {0.000247307, 0.00273539}, {0.000477011, 0.00351262}, {0.000194824, 0.00396464}, {-0.000187014, 0.00355761}, {-0.000184846, 0.00237644}, {0.000245467, 0.00112622}, {0.000651997, 0.000554658}, {0.000544468, 0.000839059}, {-9.30268e-05, 0.00151463}, {-0.000403431, 0.00243266}, {0.000286467, 0.00262747}, {0.00100229, 0.00314263}, {0.00127252, 0.00408365}, {0.00100513, 0.00499286}, {0.000531865, 0.00517755}, {0.000259253, 0.00434466}, {0.000279959, 0.0029229}, {0.000328336, 0.00174231}, {0.000111703, 0.0013628}, {-0.00033151, 0.00168054}, {-0.000622446, 0.00216128}, {-0.000137769, 0.00316293}, {0.000474186, 0.00373584}, {0.000923713, 0.00446389}, {0.00107932, 0.0052212}, {0.00103894, 0.00562663}, {0.000964015, 0.00532408}, {0.000885674, 0.00432542}, {0.000685293, 0.00307646}, {0.000265918, 0.00215933}, {-0.000271117, 0.00188898}, {-0.000642302, 0.00215959}, {-0.000597533, 0.00265122}, {-0.00100542, 0.00399852}, {-0.00100453, 0.00490882}, {-0.00062628, 0.0054026}, {3.83936e-05, 0.00552668}, {0.000727159, 0.00534067}, {0.00116081, 0.00480813}, {0.00121274, 0.00389263}, {0.000953263, 0.00274662}, {0.000546559, 0.00176346}, {0.000112018, 0.0013857}, {-0.00031816, 0.00180742}, {-0.000723358, 0.00282935}, {-0.00281408, 0.00435114}, {-0.00311088, 0.00524849}, {-0.00222225, 0.00520514}, {-0.000745311, 0.00477071}, {0.000542117, 0.00434811}, {0.00127201, 0.00379908}, {0.00160782, 0.0027327}, {0.00184164, 0.00114821}, {0.00193386, -0.000278334}, {0.00150077, -0.000626659}, {0.000276326, 0.000483073}, {-0.00143443, 0.002502}, {-0.00414115, 0.00374612}, {-0.00399791, 0.00430666}, {-0.00240286, 0.00385926}, {-0.000554662, 0.00332491}, {0.000764381, 0.00295489}, {0.00168498, 0.00215531}, {0.00273173, 0.000339295}, {0.00391123, -0.00211464}, {0.00442328, -0.00385595}, {0.00335944, -0.00361691}, {0.000717651, -0.00135349}, {-0.00231005, 0.0016348}, {-0.00365285, 0.00232984}, {-0.00278792, 0.00256214}, {-0.00109614, 0.00216252}, {0.000240346, 0.00189326}, {0.0011766, 0.00143774}, {0.00250374, -0.0001259}, {0.00459497, -0.00303389}, {0.00657677, -0.00611828}, {0.00687616, -0.00750867}, {0.00468409, -0.00617734}, {0.000901089, -0.00286973}, {-0.00241721, 0.000483923}, {-0.00143984, 0.00084618}, {-0.000447711, 0.00106599}, {0.000337655, 0.00107168}, {0.000600646, 0.00100688}, {0.00127956, 6.66812e-05}, {0.00330641, -0.00246444}, {0.00624328, -0.00605344}, {0.00829448, -0.00891638}, {0.00779645, -0.00931868}, {0.00477746, -0.00700737}, {0.00105523, -0.00342405}, {-0.00125896, -0.000496316}, {0.000966925, 7.28597e-05}, {0.00111309, 0.000576299}, {0.000480653, 0.000944169}, {-7.74375e-05, 0.00073422}, {0.000814661, -0.000830886}, {0.00346075, -0.00382169}, {0.00646833, -0.00709314}, {0.00780522, -0.00893505}, {0.00657037, -0.00836397}, {0.00377812, -0.0058766}, {0.00140297, -0.00297302}, {0.000624171, -0.000921167}, {0.00194438, 0.000232843}, {0.000836254, 0.00099782}, {-0.000729045, 0.00137722}, {-0.0012933, 0.000831765}, {6.42774e-05, -0.000917899}, {0.00271073, -0.00339083}, {0.00489816, -0.00548679}, {0.00527083, -0.00621866}, {0.00399808, -0.00540245}, {0.00245975, -0.00369449}, {0.00183763, -0.00197794}, {0.00202921, -0.000700615}, {0.00119042, 0.000918855}, {-0.000561453, 0.00163398}, {-0.00195718, 0.0017527}, {-0.00189511, 0.00112586}, {-0.000381752, -3.45243e-05}, {0.00145983, -0.00125615}, {0.0024085, -0.00209436}, {0.00219995, -0.00236859}, {0.00160762, -0.00216675}, {0.00151792, -0.00166024}, {0.00196498, -0.000940752}, {0.00211146, -3.96837e-05} - }; + const std::vector> wfcr_2 + = {{-1.04493, 0.615688}, {-1.06977, 0.631876}, {-1.11054, 0.657279}, {-1.09964, 0.652241}, + {-1.01219, 0.601667}, {-0.900492, 0.535509}, {-0.851148, 0.504342}, {-0.905186, 0.532964}, + {-1.01925, 0.597305}, {-1.10611, 0.647275}, {-1.11469, 0.65304}, {-1.07155, 0.629455}, + {-1.06919, 0.632572}, {-1.13929, 0.676268}, {-1.20108, 0.714099}, {-1.17741, 0.70052}, + {-1.07525, 0.639493}, {-0.983179, 0.583099}, {-0.986424, 0.581855}, {-1.0835, 0.635772}, + {-1.1872, 0.694674}, {-1.20917, 0.707372}, {-1.14414, 0.670785}, {-1.07073, 0.630447}, + {-1.1097, 0.659275}, {-1.20066, 0.715109}, {-1.23859, 0.737959}, {-1.17679, 0.700538}, + {-1.06941, 0.635141}, {-1.01939, 0.602723}, {-1.07668, 0.633135}, {-1.18832, 0.695962}, + {-1.25024, 0.730935}, {-1.20931, 0.707519}, {-1.11409, 0.654257}, {-1.06425, 0.628851}, + {-1.09915, 0.655024}, {-1.17668, 0.701808}, {-1.17599, 0.700685}, {-1.09784, 0.652789}, + {-1.02166, 0.605407}, {-1.02577, 0.604855}, {-1.1085, 0.650449}, {-1.18919, 0.69552}, + {-1.1883, 0.694368}, {-1.10665, 0.648136}, {-1.02461, 0.603665}, {-1.02207, 0.606491}, + {-1.01184, 0.603829}, {-1.07352, 0.639851}, {-1.06674, 0.63446}, {-1.01946, 0.604735}, + {-0.995659, 0.588429}, {-1.02691, 0.604062}, {-1.0787, 0.631808}, {-1.08586, 0.634447}, + {-1.02126, 0.597121}, {-0.928339, 0.545466}, {-0.883314, 0.523111}, {-0.923441, 0.550196}, + {-0.899158, 0.535895}, {-0.979266, 0.581943}, {-1.01385, 0.601005}, {-1.02051, 0.603424}, + {-1.02385, 0.603299}, {-1.02247, 0.599959}, {-0.98985, 0.578848}, {-0.908372, 0.530875}, + {-0.800363, 0.469599}, {-0.72164, 0.426891}, {-0.719673, 0.428941}, {-0.794487, 0.474495}, + {-0.847753, 0.503045}, {-0.979917, 0.579784}, {-1.06852, 0.631067}, {-1.10083, 0.648709}, + {-1.07326, 0.630403}, {-0.987296, 0.577914}, {-0.854969, 0.499752}, {-0.706649, 0.41444}, + {-0.587992, 0.347976}, {-0.541792, 0.323726}, {-0.585514, 0.350606}, {-0.70153, 0.418275}, + {-0.899908, 0.531212}, {-1.07557, 0.634031}, {-1.17943, 0.694395}, {-1.18113, 0.693788}, + {-1.07976, 0.632306}, {-0.904516, 0.528696}, {-0.704858, 0.413064}, {-0.53476, 0.316459}, + {-0.437762, 0.262493}, {-0.437345, 0.263282}, {-0.533149, 0.31855}, {-0.701541, 0.415762}, + {-1.01368, 0.596358}, {-1.18014, 0.694045}, {-1.24311, 0.730044}, {-1.18206, 0.692411}, + {-1.01616, 0.593956}, {-0.796338, 0.466089}, {-0.585168, 0.345371}, {-0.43641, 0.261419}, + {-0.383443, 0.231703}, {-0.436798, 0.261883}, {-0.584997, 0.346602}, {-0.794811, 0.468186}, + {-1.10211, 0.647408}, {-1.20487, 0.707561}, {-1.20547, 0.706679}, {-1.10324, 0.645443}, + {-0.9249, 0.54126}, {-0.718091, 0.422601}, {-0.538746, 0.320817}, {-0.435585, 0.262183}, + {-0.436396, 0.261854}, {-0.540445, 0.320361}, {-0.719187, 0.423026}, {-0.924581, 0.542895}, + {-1.113, 0.653554}, {-1.14286, 0.670879}, {-1.11329, 0.652757}, {-1.02355, 0.60014}, + {-0.881332, 0.518509}, {-0.717047, 0.425022}, {-0.583374, 0.348633}, {-0.532489, 0.318429}, + {-0.585722, 0.347216}, {-0.720151, 0.423379}, {-0.883396, 0.517889}, {-1.02402, 0.600711}, + {-1.0714, 0.629639}, {-1.07118, 0.629932}, {-1.06498, 0.626702}, {-1.02219, 0.60281}, + {-0.922587, 0.54634}, {-0.793452, 0.472108}, {-0.701665, 0.418007}, {-0.703508, 0.416851}, + {-0.797895, 0.469267}, {-0.927254, 0.543205}, {-1.02524, 0.600537}, {-1.06618, 0.625584}, + {-1.07066, 0.632364}, {-1.14196, 0.676121}, {-1.20383, 0.713861}, {-1.17911, 0.700059}, + {-1.07535, 0.638753}, {-0.981919, 0.582124}, {-0.984444, 0.580758}, {-1.0813, 0.634679}, + {-1.18499, 0.693687}, {-1.20712, 0.706558}, {-1.14271, 0.670185}, {-1.07059, 0.630066}, + {-1.14113, 0.677179}, {-1.24972, 0.743092}, {-1.30064, 0.77366}, {-1.23971, 0.737078}, + {-1.12538, 0.667908}, {-1.07096, 0.633052}, {-1.13158, 0.665463}, {-1.24888, 0.731698}, + {-1.30874, 0.765641}, {-1.25458, 0.734583}, {-1.14303, 0.671605}, {-1.08667, 0.641951}, + {-1.20279, 0.716023}, {-1.30017, 0.774582}, {-1.29903, 0.773363}, {-1.20048, 0.713583}, + {-1.10409, 0.654302}, {-1.10804, 0.65355}, {-1.21047, 0.710638}, {-1.31077, 0.767229}, + {-1.3097, 0.766012}, {-1.20837, 0.708204}, {-1.10709, 0.652335}, {-1.10534, 0.655524}, + {-1.17844, 0.702482}, {-1.23876, 0.738047}, {-1.19943, 0.713613}, {-1.10865, 0.658222}, + {-1.06494, 0.630025}, {-1.11649, 0.657392}, {-1.21185, 0.710495}, {-1.25126, 0.731794}, + {-1.18768, 0.694769}, {-1.0758, 0.631837}, {-1.01978, 0.603169}, {-1.07114, 0.637261}, + {-1.07458, 0.640304}, {-1.12296, 0.667999}, {-1.10069, 0.653581}, {-1.06231, 0.629345}, + {-1.06618, 0.629337}, {-1.11071, 0.652682}, {-1.13538, 0.664695}, {-1.08553, 0.634604}, + {-0.975045, 0.571348}, {-0.882132, 0.520308}, {-0.879736, 0.522713}, {-0.967961, 0.577041}, + {-0.979676, 0.582276}, {-1.06587, 0.632036}, {-1.10142, 0.651953}, {-1.11062, 0.655856}, + {-1.1075, 0.65173}, {-1.07562, 0.630549}, {-0.989725, 0.578822}, {-0.856301, 0.501377}, + {-0.728391, 0.429099}, {-0.674072, 0.40052}, {-0.724417, 0.43233}, {-0.848649, 0.505872}, + {-0.979843, 0.579951}, {-1.12384, 0.663901}, {-1.20145, 0.708602}, {-1.20388, 0.708299}, + {-1.13006, 0.662763}, {-0.987367, 0.57766}, {-0.802035, 0.469571}, {-0.624138, 0.367771}, + {-0.513752, 0.306069}, {-0.512452, 0.307222}, {-0.620172, 0.37061}, {-0.795639, 0.472713}, + {-1.07506, 0.633833}, {-1.2403, 0.730333}, {-1.3018, 0.765187}, {-1.24365, 0.729105}, + {-1.08006, 0.631723}, {-0.852938, 0.499052}, {-0.622565, 0.366591}, {-0.45203, 0.269866}, + {-0.388963, 0.234878}, {-0.450838, 0.27105}, {-0.619713, 0.368657}, {-0.848402, 0.501463}, + {-1.17916, 0.693359}, {-1.30199, 0.764703}, {-1.30342, 0.763947}, {-1.18252, 0.691534}, + {-0.97088, 0.56762}, {-0.724971, 0.42592}, {-0.511241, 0.304164}, {-0.387754, 0.234222}, + {-0.38784, 0.234322}, {-0.510919, 0.304677}, {-0.723214, 0.42719}, {-0.967602, 0.569545}, + {-1.20358, 0.706572}, {-1.25131, 0.733695}, {-1.20587, 0.705777}, {-1.07361, 0.628004}, + {-0.879535, 0.516072}, {-0.671022, 0.397126}, {-0.509763, 0.305332}, {-0.449406, 0.270429}, + {-0.510742, 0.304617}, {-0.671518, 0.396441}, {-0.878216, 0.516153}, {-1.07083, 0.628829}, + {-1.14175, 0.670052}, {-1.14289, 0.67034}, {-1.10746, 0.649425}, {-1.01958, 0.599055}, + {-0.878265, 0.518646}, {-0.722197, 0.429517}, {-0.618564, 0.369407}, {-0.619678, 0.368504}, + {-0.724415, 0.427276}, {-0.879343, 0.516117}, {-1.01841, 0.597142}, {-1.10517, 0.648415}, + {-1.07089, 0.629711}, {-1.08784, 0.640443}, {-1.10671, 0.65259}, {-1.07164, 0.63361}, + {-0.967419, 0.573961}, {-0.848169, 0.504282}, {-0.796655, 0.472531}, {-0.851287, 0.501844}, + {-0.971227, 0.56971}, {-1.07353, 0.628657}, {-1.10617, 0.648262}, {-1.08661, 0.637928}, + {-1.11352, 0.659089}, {-1.20587, 0.714958}, {-1.24309, 0.737564}, {-1.17881, 0.699564}, + {-1.06849, 0.633478}, {-1.0163, 0.600639}, {-1.0726, 0.63113}, {-1.18407, 0.694443}, + {-1.24624, 0.729989}, {-1.20607, 0.706959}, {-1.11256, 0.653862}, {-1.06537, 0.62855}, + {-1.20489, 0.715789}, {-1.30261, 0.774429}, {-1.30082, 0.773075}, {-1.20094, 0.712933}, + {-1.10316, 0.653288}, {-1.10616, 0.652442}, {-1.2082, 0.709777}, {-1.30851, 0.766763}, + {-1.30772, 0.765808}, {-1.20699, 0.708014}, {-1.10674, 0.652034}, {-1.10632, 0.655189}, + {-1.24196, 0.738968}, {-1.30045, 0.773641}, {-1.24726, 0.741281}, {-1.13861, 0.675603}, + {-1.08631, 0.642631}, {-1.14508, 0.674583}, {-1.25718, 0.737854}, {-1.30991, 0.767062}, + {-1.24841, 0.731}, {-1.13115, 0.664375}, {-1.07224, 0.633464}, {-1.12816, 0.669934}, + {-1.17821, 0.700917}, {-1.20023, 0.71342}, {-1.13771, 0.675491}, {-1.06857, 0.633331}, + {-1.07204, 0.633353}, {-1.14667, 0.674633}, {-1.21128, 0.710071}, {-1.18787, 0.695054}, + {-1.08323, 0.634527}, {-0.986582, 0.580744}, {-0.984499, 0.583235}, {-1.07704, 0.640412}, + {-1.06784, 0.634178}, {-1.10102, 0.653052}, {-1.08316, 0.641724}, {-1.0696, 0.632519}, + {-1.08898, 0.641911}, {-1.11056, 0.652098}, {-1.07799, 0.631063}, {-0.974995, 0.570435}, + {-0.854717, 0.501743}, {-0.799953, 0.472682}, {-0.850384, 0.505045}, {-0.966944, 0.574877}, + {-1.01415, 0.600477}, {-1.10138, 0.651245}, {-1.13891, 0.672627}, {-1.14134, 0.672702}, + {-1.1078, 0.650989}, {-1.02241, 0.599037}, {-0.883392, 0.516958}, {-0.728201, 0.427206}, + {-0.623299, 0.368098}, {-0.621454, 0.369282}, {-0.722947, 0.429979}, {-0.875709, 0.519656}, + {-1.06812, 0.630338}, {-1.20095, 0.707869}, {-1.24904, 0.735132}, {-1.20449, 0.707355}, + {-1.07385, 0.629166}, {-0.881776, 0.516164}, {-0.674754, 0.396097}, {-0.513634, 0.303952}, + {-0.45194, 0.269769}, {-0.511184, 0.305241}, {-0.670112, 0.398045}, {-0.875788, 0.517966}, + {-1.17817, 0.693416}, {-1.30084, 0.764601}, {-1.30242, 0.7641}, {-1.18214, 0.69212}, + {-0.971424, 0.568318}, {-0.726289, 0.426163}, {-0.512779, 0.303672}, {-0.388904, 0.233297}, + {-0.3883, 0.233598}, {-0.510769, 0.304568}, {-0.722696, 0.427561}, {-0.966851, 0.569916}, + {-1.24109, 0.728966}, {-1.3023, 0.763707}, {-1.24407, 0.728168}, {-1.08036, 0.631761}, + {-0.852769, 0.499772}, {-0.621637, 0.367132}, {-0.45052, 0.269493}, {-0.387539, 0.233563}, + {-0.450217, 0.269405}, {-0.620134, 0.367305}, {-0.849488, 0.500483}, {-1.07618, 0.63282}, + {-1.20341, 0.705967}, {-1.20521, 0.706043}, {-1.13058, 0.661754}, {-0.986483, 0.578209}, + {-0.799278, 0.470884}, {-0.619924, 0.36839}, {-0.50952, 0.304951}, {-0.509893, 0.304472}, + {-0.62015, 0.367231}, {-0.797705, 0.469649}, {-0.982737, 0.577377}, {-1.12646, 0.661401}, + {-1.11236, 0.652928}, {-1.10779, 0.650208}, {-1.07401, 0.631043}, {-0.985667, 0.580886}, + {-0.850086, 0.503365}, {-0.721652, 0.429122}, {-0.669176, 0.397736}, {-0.723064, 0.427427}, + {-0.850822, 0.500367}, {-0.983821, 0.57733}, {-1.07008, 0.627881}, {-1.10454, 0.648347}, + {-1.06606, 0.627901}, {-1.10813, 0.653696}, {-1.13019, 0.667865}, {-1.07805, 0.638393}, + {-0.966779, 0.573489}, {-0.875544, 0.519073}, {-0.877057, 0.517943}, {-0.969825, 0.57009}, + {-1.07964, 0.633012}, {-1.1288, 0.661633}, {-1.10523, 0.648594}, {-1.0646, 0.625919}, + {-1.10452, 0.654055}, {-1.18302, 0.701075}, {-1.18079, 0.699708}, {-1.09935, 0.651024}, + {-1.01976, 0.602869}, {-1.0216, 0.602244}, {-1.10343, 0.648611}, {-1.18411, 0.694744}, + {-1.18376, 0.694186}, {-1.10341, 0.647791}, {-1.02382, 0.602792}, {-1.02457, 0.605343}, + {-1.182, 0.701323}, {-1.24273, 0.737311}, {-1.20227, 0.712744}, {-1.10938, 0.656802}, + {-1.0635, 0.628242}, {-1.11361, 0.655887}, {-1.20837, 0.709731}, {-1.24781, 0.731578}, + {-1.18471, 0.694405}, {-1.07387, 0.630771}, {-1.01956, 0.601495}, {-1.07302, 0.63559}, + {-1.17998, 0.700155}, {-1.20233, 0.713009}, {-1.13938, 0.675057}, {-1.06929, 0.6327}, + {-1.07172, 0.632702}, {-1.1456, 0.674243}, {-1.20984, 0.709956}, {-1.18637, 0.694859}, + {-1.08187, 0.633818}, {-0.985647, 0.579423}, {-0.984349, 0.581663}, {-1.07791, 0.639116}, + {-1.0996, 0.651507}, {-1.10962, 0.65697}, {-1.06894, 0.632458}, {-1.04524, 0.617755}, + {-1.07315, 0.632808}, {-1.11669, 0.656378}, {-1.10742, 0.648933}, {-1.01953, 0.596458}, + {-0.905076, 0.530223}, {-0.851508, 0.501191}, {-0.901462, 0.533148}, {-1.01305, 0.600326}, + {-1.0203, 0.60305}, {-1.06264, 0.627693}, {-1.06956, 0.631499}, {-1.07127, 0.631833}, + {-1.06741, 0.628131}, {-1.02698, 0.602525}, {-0.928732, 0.543495}, {-0.79891, 0.46733}, + {-0.704322, 0.413299}, {-0.702406, 0.414261}, {-0.793746, 0.469485}, {-0.921849, 0.545315}, + {-1.02066, 0.601756}, {-1.11019, 0.654216}, {-1.14079, 0.67186}, {-1.11263, 0.654448}, + {-1.02511, 0.601767}, {-0.885157, 0.518572}, {-0.7219, 0.42267}, {-0.587234, 0.344633}, + {-0.533765, 0.314734}, {-0.584189, 0.345469}, {-0.716846, 0.423727}, {-0.879668, 0.519176}, + {-1.09999, 0.647331}, {-1.20253, 0.707144}, {-1.20356, 0.707013}, {-1.10282, 0.646961}, + {-0.926639, 0.543028}, {-0.721512, 0.423028}, {-0.542383, 0.319125}, {-0.437928, 0.259211}, + {-0.436833, 0.259377}, {-0.539418, 0.319564}, {-0.717516, 0.423597}, {-0.922708, 0.543563}, + {-1.17912, 0.692928}, {-1.24174, 0.728813}, {-1.18134, 0.692403}, {-1.01687, 0.595516}, + {-0.798379, 0.468055}, {-0.587552, 0.346047}, {-0.438091, 0.260123}, {-0.384093, 0.229365}, + {-0.43687, 0.260139}, {-0.585073, 0.346261}, {-0.794943, 0.468596}, {-1.0134, 0.59624}, + {-1.17953, 0.69234}, {-1.18111, 0.692221}, {-1.08034, 0.632519}, {-0.905602, 0.530613}, + {-0.705634, 0.41509}, {-0.534527, 0.316801}, {-0.436732, 0.260698}, {-0.436544, 0.260534}, + {-0.533502, 0.316457}, {-0.703025, 0.414865}, {-0.901596, 0.53067}, {-1.07652, 0.632739}, + {-1.10169, 0.646366}, {-1.07386, 0.629462}, {-0.987618, 0.579122}, {-0.854223, 0.502221}, + {-0.704195, 0.416081}, {-0.584345, 0.34711}, {-0.538627, 0.320402}, {-0.584518, 0.346398}, + {-0.703109, 0.414803}, {-0.850786, 0.500677}, {-0.982751, 0.577725}, {-1.07026, 0.628631}, + {-1.02438, 0.601959}, {-1.02192, 0.600836}, {-0.98774, 0.581656}, {-0.904114, 0.533809}, + {-0.79445, 0.470359}, {-0.716003, 0.4243}, {-0.716668, 0.423722}, {-0.79527, 0.468538}, + {-0.902875, 0.530746}, {-0.983777, 0.577929}, {-1.01729, 0.597687}, {-1.02232, 0.600718}, + {-1.02563, 0.605009}, {-1.07541, 0.635206}, {-1.0804, 0.638865}, {-1.01412, 0.600199}, + {-0.92128, 0.545227}, {-0.878868, 0.519145}, {-0.923384, 0.543748}, {-1.01594, 0.596785}, + {-1.07964, 0.63358}, {-1.07226, 0.629465}, {-1.02276, 0.601203}, {-0.996484, 0.586831}, + {-1.01764, 0.601283}, {-1.08006, 0.638125}, {-1.07164, 0.632736}, {-1.02117, 0.602376}, + {-0.994184, 0.585759}, {-1.02338, 0.602105}, {-1.07441, 0.63122}, {-1.08158, 0.634721}, + {-1.01751, 0.596905}, {-0.925899, 0.543716}, {-0.883295, 0.51999}, {-0.926588, 0.546849}, + {-1.07928, 0.638003}, {-1.12852, 0.666717}, {-1.1052, 0.652351}, {-1.06444, 0.627723}, + {-1.06573, 0.627793}, {-1.10846, 0.651937}, {-1.13229, 0.664716}, {-1.08227, 0.634346}, + {-0.97208, 0.569569}, {-0.880115, 0.516712}, {-0.879597, 0.518299}, {-0.970382, 0.573297}, + {-1.07181, 0.632878}, {-1.10599, 0.65259}, {-1.08756, 0.64134}, {-1.07226, 0.632033}, + {-1.08965, 0.641667}, {-1.10974, 0.652268}, {-1.07634, 0.631105}, {-0.972939, 0.569345}, + {-0.852578, 0.498928}, {-0.798308, 0.468614}, {-0.850135, 0.501029}, {-0.968836, 0.572095}, + {-1.02308, 0.602803}, {-1.06604, 0.627856}, {-1.07256, 0.631719}, {-1.07321, 0.63208}, + {-1.06817, 0.628492}, {-1.02686, 0.602845}, {-0.928066, 0.543288}, {-0.797879, 0.466131}, + {-0.703114, 0.411117}, {-0.701461, 0.411711}, {-0.793734, 0.467404}, {-0.923278, 0.544201}, + {-0.996552, 0.585947}, {-1.06638, 0.627066}, {-1.08855, 0.640374}, {-1.06695, 0.627687}, + {-0.998424, 0.586746}, {-0.882562, 0.517475}, {-0.740561, 0.433051}, {-0.619411, 0.361755}, + {-0.570359, 0.333737}, {-0.616389, 0.361889}, {-0.736227, 0.433006}, {-0.878979, 0.516988}, + {-1.02387, 0.601419}, {-1.10642, 0.650034}, {-1.10644, 0.650222}, {-1.02442, 0.60194}, + {-0.881877, 0.517732}, {-0.716058, 0.419767}, {-0.570909, 0.334288}, {-0.485779, 0.284589}, + {-0.484422, 0.284385}, {-0.567689, 0.333735}, {-0.712813, 0.419026}, {-0.879963, 0.517007}, + {-1.07186, 0.629553}, {-1.12788, 0.662279}, {-1.07234, 0.62956}, {-0.926374, 0.543807}, + {-0.741121, 0.435112}, {-0.571991, 0.336067}, {-0.458048, 0.269567}, {-0.417794, 0.246316}, + {-0.456244, 0.269076}, {-0.569351, 0.335394}, {-0.738834, 0.434614}, {-0.925054, 0.543618}, + {-1.07728, 0.632698}, {-1.07801, 0.632597}, {-0.971053, 0.569618}, {-0.799145, 0.46903}, + {-0.622477, 0.366075}, {-0.488434, 0.288236}, {-0.418855, 0.247976}, {-0.418257, 0.247793}, + {-0.486751, 0.287841}, {-0.620063, 0.365782}, {-0.796599, 0.469021}, {-0.969105, 0.569791}, + {-1.01413, 0.595421}, {-0.970685, 0.569339}, {-0.85451, 0.501219}, {-0.70756, 0.415738}, + {-0.574974, 0.339005}, {-0.487221, 0.288358}, {-0.456992, 0.270943}, {-0.486623, 0.288223}, + {-0.573232, 0.338789}, {-0.704434, 0.415518}, {-0.850838, 0.501053}, {-0.968158, 0.569254}, + {-0.92519, 0.543229}, {-0.882194, 0.51778}, {-0.803052, 0.471756}, {-0.706815, 0.416138}, + {-0.619971, 0.365984}, {-0.568649, 0.336233}, {-0.568759, 0.336209}, {-0.619567, 0.365764}, + {-0.704601, 0.415476}, {-0.798918, 0.470675}, {-0.87795, 0.516724}, {-0.923379, 0.542786}, + {-0.884667, 0.520219}, {-0.883259, 0.51966}, {-0.854793, 0.503429}, {-0.797038, 0.469961}, + {-0.736807, 0.434731}, {-0.711432, 0.419602}, {-0.737571, 0.434541}, {-0.796883, 0.469038}, + {-0.852281, 0.501427}, {-0.879063, 0.517115}, {-0.881391, 0.518406}, {-0.879834, 0.517378}, + {-0.927968, 0.547149}, {-0.97308, 0.574077}, {-0.971079, 0.572991}, {-0.923322, 0.544691}, + {-0.876938, 0.516942}, {-0.877867, 0.516857}, {-0.925038, 0.543949}, {-0.97148, 0.570775}, + {-0.971238, 0.570472}, {-0.925198, 0.54363}, {-0.880027, 0.517624}, {-0.881342, 0.519098}, + {-0.905261, 0.532232}, {-0.986474, 0.579813}, {-1.01989, 0.599225}, {-1.0238, 0.601451}, + {-1.02426, 0.601747}, {-1.02099, 0.599699}, {-0.987607, 0.579669}, {-0.905969, 0.531174}, + {-0.798101, 0.467542}, {-0.72016, 0.42201}, {-0.720125, 0.422628}, {-0.797852, 0.468896}, + {-0.986293, 0.579881}, {-1.07406, 0.631167}, {-1.10871, 0.651354}, {-1.11517, 0.655243}, + {-1.10903, 0.651682}, {-1.07501, 0.631306}, {-0.988028, 0.579365}, {-0.85407, 0.499886}, + {-0.725942, 0.424484}, {-0.672192, 0.393574}, {-0.724529, 0.425328}, {-0.852022, 0.500918}, + {-1.02151, 0.599866}, {-1.11025, 0.651759}, {-1.14672, 0.673324}, {-1.14635, 0.673516}, + {-1.10985, 0.652165}, {-1.02238, 0.600139}, {-0.882131, 0.516646}, {-0.726101, 0.424177}, + {-0.620723, 0.36236}, {-0.619458, 0.362492}, {-0.72324, 0.42442}, {-0.879634, 0.516698}, + {-1.02751, 0.602403}, {-1.11779, 0.655467}, {-1.14703, 0.673228}, {-1.11639, 0.655914}, + {-1.02655, 0.603206}, {-0.88504, 0.519278}, {-0.720782, 0.42163}, {-0.585325, 0.341387}, + {-0.531499, 0.309997}, {-0.582725, 0.340861}, {-0.717742, 0.420747}, {-0.883837, 0.518304}, + {-1.02796, 0.602183}, {-1.11047, 0.651014}, {-1.10939, 0.651263}, {-1.02593, 0.602986}, + {-0.882233, 0.518555}, {-0.715708, 0.419907}, {-0.570063, 0.333351}, {-0.484523, 0.282643}, + {-0.483116, 0.282058}, {-0.567128, 0.331875}, {-0.713848, 0.418182}, {-0.882855, 0.517167}, + {-1.02199, 0.599068}, {-1.07366, 0.629911}, {-1.02043, 0.599504}, {-0.884401, 0.520205}, + {-0.716345, 0.421387}, {-0.56779, 0.333442}, {-0.47071, 0.275723}, {-0.436787, 0.255572}, + {-0.468615, 0.274475}, {-0.565496, 0.331577}, {-0.715802, 0.419703}, {-0.885756, 0.519153}, + {-0.985163, 0.578322}, {-0.984799, 0.578416}, {-0.880689, 0.517839}, {-0.722333, 0.425213}, + {-0.5733, 0.337599}, {-0.473054, 0.278322}, {-0.426851, 0.250876}, {-0.425935, 0.250373}, + {-0.47102, 0.277097}, {-0.57163, 0.336279}, {-0.721943, 0.424302}, {-0.881215, 0.517432}, + {-0.902077, 0.530131}, {-0.851875, 0.500612}, {-0.727772, 0.42798}, {-0.590537, 0.347631}, + {-0.490625, 0.289009}, {-0.440981, 0.259799}, {-0.427392, 0.251893}, {-0.439815, 0.259496}, + {-0.488748, 0.288593}, {-0.588595, 0.347321}, {-0.726286, 0.427859}, {-0.851099, 0.500605}, + {-0.795741, 0.467606}, {-0.727098, 0.427028}, {-0.626118, 0.367814}, {-0.539122, 0.316956}, + {-0.489706, 0.288146}, {-0.472129, 0.278046}, {-0.471836, 0.27824}, {-0.488623, 0.28861}, + {-0.536973, 0.317434}, {-0.623193, 0.368112}, {-0.724465, 0.427131}, {-0.794673, 0.46762}, + {-0.720678, 0.423073}, {-0.676324, 0.396777}, {-0.626258, 0.367444}, {-0.589273, 0.345945}, + {-0.570823, 0.335379}, {-0.566018, 0.332904}, {-0.570768, 0.336172}, {-0.588258, 0.346969}, + {-0.623633, 0.368075}, {-0.672792, 0.396846}, {-0.718107, 0.422911}, {-0.737603, 0.433613}, + {-0.722283, 0.423834}, {-0.728986, 0.42768}, {-0.727991, 0.427139}, {-0.720161, 0.422664}, + {-0.713041, 0.418684}, {-0.713393, 0.41919}, {-0.720525, 0.423738}, {-0.727079, 0.427877}, + {-0.726446, 0.427577}, {-0.719392, 0.423225}, {-0.713255, 0.419219}, {-0.714531, 0.419555}, + {-0.799373, 0.469489}, {-0.854376, 0.501776}, {-0.880596, 0.517094}, {-0.881919, 0.517847}, + {-0.879022, 0.516233}, {-0.882802, 0.518575}, {-0.881332, 0.517737}, {-0.853975, 0.501544}, + {-0.797942, 0.468444}, {-0.73916, 0.433814}, {-0.71441, 0.419334}, {-0.740444, 0.434774}, + {-0.854991, 0.500054}, {-0.988714, 0.578671}, {-1.07639, 0.630481}, {-1.106, 0.648363}, + {-1.07561, 0.630936}, {-0.98788, 0.579518}, {-0.854852, 0.501122}, {-0.706168, 0.413345}, + {-0.587145, 0.343059}, {-0.541148, 0.315861}, {-0.586502, 0.342447}, {-0.705576, 0.412342}, + {-0.989327, 0.579265}, {-1.13501, 0.664759}, {-1.21114, 0.709777}, {-1.2101, 0.70974}, + {-1.13284, 0.664816}, {-0.988005, 0.57971}, {-0.801632, 0.469741}, {-0.622966, 0.36422}, + {-0.511953, 0.298751}, {-0.511106, 0.298363}, {-0.621344, 0.363315}, {-0.801058, 0.468872}, + {-1.0793, 0.631768}, {-1.21307, 0.710265}, {-1.25875, 0.737693}, {-1.21017, 0.710106}, + {-1.07603, 0.631937}, {-0.881893, 0.517766}, {-0.673716, 0.394781}, {-0.51159, 0.298897}, + {-0.449334, 0.26217}, {-0.509696, 0.297844}, {-0.672192, 0.393401}, {-0.882901, 0.516871}, + {-1.1106, 0.649654}, {-1.2128, 0.709962}, {-1.21081, 0.70994}, {-1.10632, 0.649904}, + {-0.927359, 0.545417}, {-0.720736, 0.423647}, {-0.540637, 0.316915}, {-0.435294, 0.25434}, + {-0.434083, 0.253519}, {-0.538503, 0.314981}, {-0.720599, 0.421735}, {-0.930431, 0.544346}, + {-1.07923, 0.631452}, {-1.13403, 0.664443}, {-1.07582, 0.631816}, {-0.927302, 0.545923}, + {-0.740516, 0.436481}, {-0.570648, 0.335906}, {-0.456112, 0.26754}, {-0.415358, 0.242998}, + {-0.454166, 0.265765}, {-0.569239, 0.333348}, {-0.741985, 0.434341}, {-0.931255, 0.544775}, + {-0.988196, 0.5792}, {-0.98678, 0.579449}, {-0.881273, 0.518924}, {-0.721905, 0.426146}, + {-0.572437, 0.338054}, {-0.472017, 0.278022}, {-0.425594, 0.249832}, {-0.424547, 0.24896}, + {-0.47006, 0.275868}, {-0.571895, 0.33565}, {-0.723826, 0.424366}, {-0.88424, 0.518015}, + {-0.852081, 0.500741}, {-0.799693, 0.470788}, {-0.674951, 0.398321}, {-0.545042, 0.322126}, + {-0.46119, 0.272203}, {-0.428512, 0.252143}, {-0.422002, 0.247952}, {-0.426989, 0.251169}, + {-0.459488, 0.270718}, {-0.544554, 0.320731}, {-0.675807, 0.39739}, {-0.800728, 0.470367}, + {-0.702968, 0.413776}, {-0.623145, 0.367063}, {-0.516467, 0.304462}, {-0.443168, 0.261075}, + {-0.42274, 0.248531}, {-0.428859, 0.25187}, {-0.428239, 0.251868}, {-0.421193, 0.248522}, + {-0.441405, 0.261049}, {-0.515109, 0.304423}, {-0.622407, 0.367025}, {-0.702752, 0.41376}, + {-0.586221, 0.344444}, {-0.515515, 0.302523}, {-0.456974, 0.267827}, {-0.442766, 0.259231}, + {-0.460513, 0.269644}, {-0.472793, 0.277306}, {-0.459831, 0.270586}, {-0.441288, 0.260678}, + {-0.454798, 0.269209}, {-0.513224, 0.303469}, {-0.584727, 0.344884}, {-0.617478, 0.363388}, + {-0.542921, 0.317484}, {-0.516465, 0.301428}, {-0.516762, 0.301423}, {-0.544064, 0.31763}, + {-0.571358, 0.334191}, {-0.571299, 0.334997}, {-0.54357, 0.319674}, {-0.515301, 0.303841}, + {-0.514096, 0.303413}, {-0.540611, 0.318664}, {-0.568043, 0.333992}, {-0.56901, 0.333618}, + {-0.589065, 0.343441}, {-0.625492, 0.364584}, {-0.675477, 0.39411}, {-0.720855, 0.421284}, + {-0.739418, 0.432921}, {-0.720916, 0.42282}, {-0.675078, 0.396514}, {-0.624235, 0.366955}, + {-0.587287, 0.345145}, {-0.569362, 0.334102}, {-0.565215, 0.330882}, {-0.57068, 0.333266}, + {-0.706877, 0.41252}, {-0.802308, 0.468612}, {-0.881907, 0.515721}, {-0.926581, 0.542539}, + {-0.926536, 0.543113}, {-0.881726, 0.517212}, {-0.80192, 0.470464}, {-0.706327, 0.414159}, + {-0.62062, 0.363464}, {-0.57, 0.333274}, {-0.570207, 0.332912}, {-0.621126, 0.362399}, + {-0.90873, 0.530699}, {-1.08565, 0.635072}, {-1.188, 0.695829}, {-1.18662, 0.69561}, + {-1.08251, 0.634875}, {-0.905949, 0.531375}, {-0.705979, 0.413922}, {-0.535547, 0.313587}, + {-0.437947, 0.255777}, {-0.437593, 0.254904}, {-0.535213, 0.3116}, {-0.707025, 0.412153}, + {-1.08689, 0.635973}, {-1.25264, 0.73346}, {-1.31127, 0.768447}, {-1.24889, 0.732614}, + {-1.08208, 0.635364}, {-0.853578, 0.501501}, {-0.622781, 0.365838}, {-0.45166, 0.264924}, + {-0.388031, 0.227055}, {-0.450773, 0.263432}, {-0.622945, 0.364108}, {-0.856448, 0.500854}, + {-1.19134, 0.697143}, {-1.31299, 0.768776}, {-1.31039, 0.768294}, {-1.18545, 0.696338}, + {-0.971812, 0.571913}, {-0.725656, 0.427572}, {-0.511728, 0.301479}, {-0.387194, 0.227697}, + {-0.386477, 0.226781}, {-0.510983, 0.29949}, {-0.727508, 0.426065}, {-0.976979, 0.571804}, + {-1.19068, 0.696587}, {-1.25079, 0.732704}, {-1.1858, 0.696342}, {-1.01738, 0.599244}, + {-0.796976, 0.470654}, {-0.585708, 0.346253}, {-0.435927, 0.257375}, {-0.381444, 0.224607}, + {-0.434799, 0.255504}, {-0.585954, 0.343788}, {-0.800727, 0.469028}, {-1.02363, 0.598893}, + {-1.08479, 0.635221}, {-1.08247, 0.635402}, {-0.971785, 0.572516}, {-0.797441, 0.471572}, + {-0.620067, 0.367457}, {-0.486057, 0.287752}, {-0.41627, 0.245654}, {-0.415492, 0.244604}, + {-0.485065, 0.2852}, {-0.621396, 0.364724}, {-0.801868, 0.469708}, {-0.97699, 0.571703}, + {-0.905153, 0.53149}, {-0.852577, 0.502286}, {-0.726367, 0.429708}, {-0.588237, 0.349011}, + {-0.488417, 0.289583}, {-0.438975, 0.259306}, {-0.425261, 0.250486}, {-0.437732, 0.257719}, + {-0.487727, 0.28711}, {-0.589745, 0.346591}, {-0.729669, 0.427989}, {-0.855327, 0.501453}, + {-0.703319, 0.41442}, {-0.62243, 0.367877}, {-0.515097, 0.305252}, {-0.441756, 0.26166}, + {-0.421583, 0.248768}, {-0.42783, 0.251684}, {-0.427143, 0.251292}, {-0.420185, 0.247711}, + {-0.440973, 0.260248}, {-0.515615, 0.303875}, {-0.623635, 0.366892}, {-0.703934, 0.414065}, + {-0.533671, 0.314612}, {-0.453954, 0.267697}, {-0.39369, 0.231851}, {-0.38951, 0.228701}, + {-0.421824, 0.247209}, {-0.440785, 0.258508}, {-0.420829, 0.247546}, {-0.388011, 0.229143}, + {-0.392316, 0.232138}, {-0.453072, 0.267754}, {-0.533292, 0.314564}, {-0.57029, 0.336116}, + {-0.438553, 0.256808}, {-0.392833, 0.229247}, {-0.393994, 0.229459}, {-0.441827, 0.257486}, + {-0.489145, 0.285717}, {-0.488796, 0.286388}, {-0.440773, 0.259222}, {-0.392313, 0.231597}, + {-0.390874, 0.231116}, {-0.436942, 0.258006}, {-0.483803, 0.28496}, {-0.48443, 0.284558}, + {-0.440034, 0.255265}, {-0.455729, 0.263975}, {-0.516139, 0.299649}, {-0.58876, 0.343042}, + {-0.621226, 0.363101}, {-0.588249, 0.344708}, {-0.51498, 0.302475}, {-0.45396, 0.267145}, + {-0.438129, 0.257929}, {-0.455412, 0.267611}, {-0.468271, 0.274171}, {-0.45667, 0.266107}, + {-0.537417, 0.311164}, {-0.625532, 0.363069}, {-0.728288, 0.424258}, {-0.798674, 0.46667}, + {-0.798327, 0.467367}, {-0.727316, 0.426259}, {-0.62415, 0.366026}, {-0.535974, 0.31438}, + {-0.486437, 0.285064}, {-0.469332, 0.274268}, {-0.469741, 0.273311}, {-0.487532, 0.282546}, + {-0.707721, 0.411529}, {-0.856201, 0.49932}, {-0.974282, 0.569611}, {-1.01899, 0.596722}, + {-0.972922, 0.570185}, {-0.854309, 0.500753}, {-0.706267, 0.413877}, {-0.573897, 0.336038}, + {-0.486771, 0.284451}, {-0.456904, 0.266092}, {-0.486865, 0.282677}, {-0.574523, 0.333405}, + {-1.02299, 0.598635}, {-1.1894, 0.697064}, {-1.2498, 0.733129}, {-1.18546, 0.695737}, + {-1.01748, 0.597491}, {-0.79733, 0.468729}, {-0.58658, 0.345381}, {-0.437813, 0.257914}, + {-0.384434, 0.225752}, {-0.438174, 0.256085}, {-0.588516, 0.343368}, {-0.801632, 0.468227}, + {-1.19078, 0.697726}, {-1.31196, 0.769254}, {-1.30916, 0.768367}, {-1.18415, 0.695981}, + {-0.970579, 0.571599}, {-0.72483, 0.427978}, {-0.511733, 0.302925}, {-0.388224, 0.229839}, + {-0.388251, 0.228873}, {-0.512822, 0.300964}, {-0.72872, 0.426904}, {-0.977243, 0.572377}, + {-1.25252, 0.733774}, {-1.31023, 0.768428}, {-1.24677, 0.732767}, {-1.0792, 0.636165}, + {-0.850589, 0.503134}, {-0.620269, 0.368135}, {-0.449893, 0.267523}, {-0.386958, 0.229629}, + {-0.450238, 0.265791}, {-0.622845, 0.366137}, {-0.856684, 0.502408}, {-1.08719, 0.636906}, + {-1.18796, 0.696076}, {-1.18495, 0.696012}, {-1.07929, 0.636323}, {-0.902007, 0.534065}, + {-0.702217, 0.417304}, {-0.532352, 0.316932}, {-0.435207, 0.258779}, {-0.435193, 0.257762}, + {-0.53342, 0.314548}, {-0.706287, 0.414984}, {-0.909094, 0.532826}, {-1.08639, 0.636101}, + {-1.01814, 0.597751}, {-0.970386, 0.572001}, {-0.850826, 0.503959}, {-0.702987, 0.41806}, + {-0.571434, 0.34027}, {-0.484887, 0.288176}, {-0.455049, 0.269532}, {-0.484962, 0.286385}, + {-0.573141, 0.337476}, {-0.707446, 0.41532}, {-0.856843, 0.502007}, {-0.97477, 0.571052}, + {-0.796031, 0.469208}, {-0.724431, 0.429006}, {-0.621865, 0.369769}, {-0.53516, 0.318509}, + {-0.486935, 0.288976}, {-0.470153, 0.277962}, {-0.469968, 0.277278}, {-0.487042, 0.287093}, + {-0.536759, 0.315898}, {-0.62514, 0.367115}, {-0.727891, 0.427035}, {-0.797522, 0.468481}, + {-0.584838, 0.345936}, {-0.512712, 0.304136}, {-0.453815, 0.269255}, {-0.440236, 0.260336}, + {-0.458798, 0.270305}, {-0.471412, 0.277328}, {-0.458391, 0.269826}, {-0.440037, 0.259275}, + {-0.454387, 0.267663}, {-0.5139, 0.302439}, {-0.585817, 0.344822}, {-0.617751, 0.364292}, + {-0.437406, 0.257946}, {-0.391269, 0.230261}, {-0.392486, 0.230267}, {-0.440697, 0.258108}, + {-0.48835, 0.286095}, {-0.488075, 0.286356}, {-0.43998, 0.258676}, {-0.391587, 0.230699}, + {-0.390463, 0.230296}, {-0.436875, 0.25772}, {-0.483753, 0.285394}, {-0.483935, 0.28553}, + {-0.385785, 0.224752}, {-0.392231, 0.227792}, {-0.455032, 0.264624}, {-0.536732, 0.313147}, + {-0.57402, 0.335814}, {-0.536115, 0.314239}, {-0.453803, 0.266436}, {-0.390558, 0.229759}, + {-0.384116, 0.226347}, {-0.415576, 0.244881}, {-0.435087, 0.255852}, {-0.416633, 0.244006}, + {-0.440106, 0.254204}, {-0.515621, 0.298598}, {-0.624637, 0.363576}, {-0.705867, 0.412455}, + {-0.705416, 0.413}, {-0.623349, 0.365204}, {-0.513732, 0.301146}, {-0.438066, 0.257127}, + {-0.416682, 0.244805}, {-0.423233, 0.248207}, {-0.423834, 0.247287}, {-0.41827, 0.242427}, + {-0.589561, 0.341428}, {-0.729069, 0.424294}, {-0.855077, 0.49969}, {-0.905735, 0.530443}, + {-0.853543, 0.500136}, {-0.726651, 0.425682}, {-0.587148, 0.344}, {-0.486337, 0.285155}, + {-0.437067, 0.256122}, {-0.424451, 0.24771}, {-0.437945, 0.253996}, {-0.488099, 0.282088}, + {-0.801556, 0.467116}, {-0.97579, 0.570585}, {-1.08388, 0.635122}, {-1.08247, 0.634772}, + {-0.972363, 0.570176}, {-0.797905, 0.467856}, {-0.620246, 0.363894}, {-0.486552, 0.285597}, + {-0.417809, 0.244731}, {-0.418033, 0.243582}, {-0.487599, 0.283007}, {-0.622706, 0.361654}, + {-1.10965, 0.65101}, {-1.21079, 0.711131}, {-1.20826, 0.710059}, {-1.10336, 0.648844}, + {-0.924243, 0.544416}, {-0.7183, 0.424519}, {-0.540084, 0.320516}, {-0.43731, 0.259747}, + {-0.438113, 0.258732}, {-0.54291, 0.318508}, {-0.723657, 0.423584}, {-0.93133, 0.545568}, + {-1.21182, 0.711241}, {-1.256, 0.73798}, {-1.20609, 0.709803}, {-1.07111, 0.631903}, + {-0.877161, 0.519296}, {-0.670496, 0.398656}, {-0.510842, 0.304668}, {-0.451047, 0.268372}, + {-0.512848, 0.302991}, {-0.675332, 0.396892}, {-0.884895, 0.519056}, {-1.07967, 0.633252}, + {-1.20972, 0.710039}, {-1.20645, 0.709786}, {-1.12718, 0.66538}, {-0.981426, 0.581637}, + {-0.795717, 0.473475}, {-0.618989, 0.369437}, {-0.510313, 0.304479}, {-0.511346, 0.30353}, + {-0.62263, 0.367283}, {-0.802631, 0.471574}, {-0.990581, 0.580938}, {-1.1353, 0.66563}, + {-1.10424, 0.649018}, {-1.07113, 0.632162}, {-0.981394, 0.581887}, {-0.848011, 0.504798}, + {-0.700556, 0.417972}, {-0.583449, 0.347961}, {-0.539142, 0.320392}, {-0.585668, 0.346231}, + {-0.705652, 0.415267}, {-0.855872, 0.502135}, {-0.989913, 0.579978}, {-1.07677, 0.631228}, + {-0.924009, 0.545039}, {-0.876862, 0.519905}, {-0.796041, 0.473948}, {-0.701154, 0.418152}, + {-0.617103, 0.367554}, {-0.567934, 0.337124}, {-0.56887, 0.336345}, {-0.620183, 0.36534}, + {-0.706598, 0.414928}, {-0.802914, 0.470492}, {-0.8828, 0.517229}, {-0.926374, 0.544028}, + {-0.717393, 0.425178}, {-0.670585, 0.399036}, {-0.620299, 0.369539}, {-0.585078, 0.347812}, + {-0.568818, 0.33689}, {-0.565262, 0.333697}, {-0.570292, 0.335869}, {-0.588104, 0.345608}, + {-0.624537, 0.366309}, {-0.674901, 0.395645}, {-0.720158, 0.42297}, {-0.737567, 0.434958}, + {-0.539442, 0.320262}, {-0.511871, 0.303883}, {-0.512536, 0.303441}, {-0.541221, 0.31934}, + {-0.569817, 0.335458}, {-0.570279, 0.335327}, {-0.542523, 0.318698}, {-0.514408, 0.301887}, + {-0.51385, 0.301549}, {-0.540959, 0.318058}, {-0.56805, 0.335129}, {-0.56748, 0.33604}, + {-0.43746, 0.257892}, {-0.452672, 0.265945}, {-0.51339, 0.301114}, {-0.586792, 0.344189}, + {-0.619905, 0.363743}, {-0.587106, 0.344438}, {-0.513736, 0.301232}, {-0.452759, 0.26559}, + {-0.437264, 0.257159}, {-0.454851, 0.268413}, {-0.467503, 0.276449}, {-0.455078, 0.269016}, + {-0.438703, 0.255565}, {-0.513974, 0.299445}, {-0.623115, 0.364126}, {-0.704706, 0.412806}, + {-0.704563, 0.413009}, {-0.622574, 0.364719}, {-0.512886, 0.300346}, {-0.437213, 0.25657}, + {-0.415979, 0.245083}, {-0.422697, 0.249493}, {-0.423244, 0.249186}, {-0.417325, 0.244282}, + {-0.543151, 0.315196}, {-0.675047, 0.393466}, {-0.801031, 0.46879}, {-0.853036, 0.500088}, + {-0.799929, 0.468793}, {-0.67305, 0.394009}, {-0.540705, 0.316669}, {-0.455555, 0.267677}, + {-0.423304, 0.249443}, {-0.418287, 0.245957}, {-0.424648, 0.247973}, {-0.457794, 0.265668}, + {-0.723195, 0.421314}, {-0.883309, 0.516813}, {-0.987705, 0.579312}, {-0.986577, 0.578825}, + {-0.880376, 0.516046}, {-0.719542, 0.421598}, {-0.569005, 0.334029}, {-0.468952, 0.27624}, + {-0.424209, 0.249912}, {-0.42493, 0.248861}, {-0.47107, 0.273931}, {-0.572251, 0.332207}, + {-0.930661, 0.544704}, {-1.07783, 0.632375}, {-1.13263, 0.665132}, {-1.07442, 0.630815}, + {-0.925447, 0.543264}, {-0.738253, 0.433963}, {-0.568962, 0.335582}, {-0.456279, 0.269836}, + {-0.417773, 0.246317}, {-0.457798, 0.267946}, {-0.572286, 0.333571}, {-0.743182, 0.433634}, + {-1.11706, 0.656463}, {-1.14464, 0.673436}, {-1.11251, 0.6551}, {-1.02147, 0.602427}, + {-0.879756, 0.520502}, {-0.717022, 0.426186}, {-0.584683, 0.348691}, {-0.534343, 0.317812}, + {-0.587821, 0.346913}, {-0.722973, 0.424257}, {-0.887441, 0.520088}, {-1.02882, 0.603639}, + {-1.14503, 0.673245}, {-1.14215, 0.67305}, {-1.10347, 0.651965}, {-1.01495, 0.60155}, + {-0.875566, 0.520781}, {-0.722298, 0.430863}, {-0.620614, 0.369979}, {-0.622518, 0.368974}, + {-0.727856, 0.428557}, {-0.884004, 0.518671}, {-1.02437, 0.60064}, {-1.11098, 0.65206}, + {-1.11276, 0.655192}, {-1.10335, 0.652221}, {-1.06692, 0.633092}, {-0.979481, 0.583027}, + {-0.847324, 0.505374}, {-0.722435, 0.430777}, {-0.672011, 0.399129}, {-0.726718, 0.428976}, + {-0.855318, 0.502484}, {-0.989619, 0.580052}, {-1.07648, 0.630829}, {-1.10921, 0.651047}, + {-1.02138, 0.603333}, {-1.01464, 0.602294}, {-0.97934, 0.583296}, {-0.898166, 0.535699}, + {-0.792642, 0.472433}, {-0.717578, 0.426351}, {-0.719855, 0.425531}, {-0.79909, 0.470018}, + {-0.907516, 0.531987}, {-0.989315, 0.579107}, {-1.02234, 0.598925}, {-1.02428, 0.602035}, + {-0.879469, 0.521897}, {-0.875239, 0.521424}, {-0.847141, 0.505252}, {-0.792492, 0.47204}, + {-0.736017, 0.437054}, {-0.713142, 0.421708}, {-0.740207, 0.435787}, {-0.799879, 0.469154}, + {-0.855876, 0.500826}, {-0.882982, 0.516646}, {-0.884015, 0.518738}, {-0.878991, 0.518584}, + {-0.716712, 0.426845}, {-0.721954, 0.430288}, {-0.721974, 0.429445}, {-0.716853, 0.425024}, + {-0.712423, 0.420965}, {-0.714245, 0.420629}, {-0.721741, 0.423623}, {-0.728444, 0.426391}, + {-0.728185, 0.425916}, {-0.721121, 0.422809}, {-0.713604, 0.420658}, {-0.712023, 0.422312}, + {-0.584246, 0.347455}, {-0.619863, 0.367543}, {-0.670681, 0.396429}, {-0.717902, 0.423371}, + {-0.738138, 0.43445}, {-0.720411, 0.423002}, {-0.674652, 0.395097}, {-0.623858, 0.36491}, + {-0.587194, 0.344227}, {-0.569305, 0.335587}, {-0.564246, 0.334606}, {-0.567819, 0.337851}, + {-0.533451, 0.314664}, {-0.620866, 0.365225}, {-0.724074, 0.425719}, {-0.795665, 0.467732}, + {-0.796426, 0.467626}, {-0.725876, 0.425255}, {-0.622675, 0.364176}, {-0.534507, 0.313134}, + {-0.485271, 0.285983}, {-0.468406, 0.277793}, {-0.468399, 0.278379}, {-0.484976, 0.287408}, + {-0.58629, 0.34312}, {-0.725238, 0.424946}, {-0.851612, 0.499892}, {-0.903225, 0.530304}, + {-0.851867, 0.499358}, {-0.725244, 0.424248}, {-0.585612, 0.342671}, {-0.484763, 0.28516}, + {-0.435776, 0.258224}, {-0.42345, 0.251509}, {-0.436708, 0.258139}, {-0.485944, 0.285239}, + {-0.721074, 0.421389}, {-0.881011, 0.516467}, {-0.98582, 0.578833}, {-0.985376, 0.578198}, + {-0.87965, 0.515168}, {-0.718866, 0.42065}, {-0.568146, 0.333548}, {-0.468024, 0.276761}, + {-0.423433, 0.251491}, {-0.42428, 0.250931}, {-0.470191, 0.275668}, {-0.570757, 0.33309}, + {-0.885804, 0.518833}, {-1.0219, 0.600042}, {-1.07367, 0.630722}, {-1.0198, 0.598373}, + {-0.882177, 0.517091}, {-0.712562, 0.418363}, {-0.563839, 0.332924}, {-0.468411, 0.27824}, + {-0.436962, 0.259253}, {-0.470481, 0.276772}, {-0.567437, 0.33153}, {-0.716733, 0.418563}, + {-1.02799, 0.603482}, {-1.10961, 0.652286}, {-1.10785, 0.651229}, {-1.02335, 0.60134}, + {-0.878572, 0.516856}, {-0.711954, 0.420536}, {-0.567972, 0.337443}, {-0.485389, 0.288975}, + {-0.486686, 0.287995}, {-0.571684, 0.335511}, {-0.717425, 0.419661}, {-0.884505, 0.518018}, + {-1.07232, 0.630852}, {-1.07056, 0.630918}, {-1.0632, 0.627608}, {-1.0203, 0.60372}, + {-0.92158, 0.547172}, {-0.793655, 0.472723}, {-0.702726, 0.41841}, {-0.704923, 0.417269}, + {-0.799497, 0.469998}, {-0.929183, 0.544382}, {-1.02747, 0.602005}, {-1.06817, 0.627034}, + {-1.07036, 0.631004}, {-1.0844, 0.641468}, {-1.10189, 0.653626}, {-1.06773, 0.634808}, + {-0.965988, 0.575278}, {-0.849279, 0.505626}, {-0.799301, 0.473928}, {-0.85452, 0.503455}, + {-0.974792, 0.571657}, {-1.07746, 0.630846}, {-1.10995, 0.650385}, {-1.08889, 0.639677}, + {-1.06271, 0.628629}, {-1.10156, 0.654342}, {-1.12316, 0.668716}, {-1.07351, 0.639698}, + {-0.966127, 0.575326}, {-0.878149, 0.521311}, {-0.88134, 0.520328}, {-0.974653, 0.57238}, + {-1.08471, 0.635055}, {-1.13383, 0.663357}, {-1.10905, 0.649966}, {-1.06538, 0.626932}, + {-1.01991, 0.605809}, {-1.06726, 0.636002}, {-1.07311, 0.639927}, {-1.01044, 0.601935}, + {-0.921916, 0.54774}, {-0.882583, 0.522}, {-0.928425, 0.546209}, {-1.02128, 0.598346}, + {-1.08496, 0.634306}, {-1.07694, 0.629854}, {-1.02525, 0.601719}, {-0.995061, 0.587574}, + {-0.92145, 0.549256}, {-0.965175, 0.575824}, {-0.964778, 0.574777}, {-0.920679, 0.547078}, + {-0.878039, 0.519858}, {-0.881313, 0.519436}, {-0.929312, 0.545241}, {-0.97583, 0.570653}, + {-0.975371, 0.569835}, {-0.928372, 0.54369}, {-0.88077, 0.518931}, {-0.878351, 0.521218}, + {-0.793243, 0.473276}, {-0.847351, 0.504481}, {-0.875002, 0.519428}, {-0.879228, 0.520414}, + {-0.879135, 0.518658}, {-0.884544, 0.51981}, {-0.883567, 0.517258}, {-0.856226, 0.500205}, + {-0.800052, 0.468039}, {-0.740536, 0.435682}, {-0.713862, 0.423286}, {-0.736976, 0.439354}, + {-0.701142, 0.416783}, {-0.795548, 0.471179}, {-0.876019, 0.517617}, {-0.922854, 0.544232}, + {-0.924957, 0.544023}, {-0.881358, 0.516598}, {-0.801881, 0.468728}, {-0.706368, 0.413108}, + {-0.620816, 0.365091}, {-0.570009, 0.33812}, {-0.569014, 0.339552}, {-0.617693, 0.368557}, + {-0.701801, 0.414316}, {-0.849151, 0.500379}, {-0.967938, 0.570028}, {-1.01459, 0.596709}, + {-0.970387, 0.569212}, {-0.852688, 0.498733}, {-0.704774, 0.412047}, {-0.572499, 0.336444}, + {-0.48579, 0.288353}, {-0.456139, 0.272769}, {-0.485267, 0.289806}, {-0.570888, 0.338727}, + {-0.795518, 0.467383}, {-0.969111, 0.569707}, {-1.07832, 0.63391}, {-1.07892, 0.633197}, + {-0.97037, 0.567968}, {-0.796351, 0.465497}, {-0.618458, 0.36283}, {-0.484734, 0.28727}, + {-0.416415, 0.249265}, {-0.416842, 0.249412}, {-0.485523, 0.287876}, {-0.61865, 0.364149}, + {-0.92568, 0.54321}, {-1.07309, 0.6305}, {-1.12941, 0.66324}, {-1.07294, 0.628773}, + {-0.924866, 0.541062}, {-0.737495, 0.43222}, {-0.567629, 0.335295}, {-0.45474, 0.271532}, + {-0.41644, 0.249404}, {-0.456398, 0.270956}, {-0.56995, 0.33514}, {-0.739305, 0.433371}, + {-1.02543, 0.602078}, {-1.10773, 0.650928}, {-1.10713, 0.649962}, {-1.02353, 0.600065}, + {-0.878905, 0.515649}, {-0.711811, 0.419746}, {-0.567276, 0.337426}, {-0.484466, 0.289703}, + {-0.485758, 0.288954}, {-0.570555, 0.336006}, {-0.715695, 0.419294}, {-0.8821, 0.516913}, + {-1.06758, 0.627366}, {-1.08843, 0.640068}, {-1.06526, 0.626339}, {-0.994871, 0.585351}, + {-0.877686, 0.518046}, {-0.736047, 0.436923}, {-0.617298, 0.368225}, {-0.571703, 0.340401}, + {-0.620535, 0.36644}, {-0.741319, 0.434861}, {-0.883244, 0.517288}, {-0.999225, 0.5861}}; - std::vector expected_rho_2 { - 8.66322e-05, 9.09154e-05, 9.80794e-05, 9.62722e-05, 8.16604e-05, 6.46466e-05, 5.76474e-05, 6.49856e-05, 8.21971e-05, 9.67325e-05, 9.82953e-05, 9.09594e-05, 9.08935e-05, 0.000103381, 0.000114995, 0.000110548, 9.21788e-05, 7.69555e-05, 7.72464e-05, 9.2947e-05, 0.00011143, 0.000115579, 0.000103597, 9.09303e-05, 9.81246e-05, 0.000115021, 0.000122426, 0.000110464, 9.11147e-05, 8.25973e-05, 9.18823e-05, 0.000111692, 0.000123525, 0.000115612, 9.83109e-05, 8.99972e-05, 9.64228e-05, 0.000110554, 0.000110366, 9.60819e-05, 8.30607e-05, 8.35169e-05, 9.72864e-05, 0.000111778, 0.00011156, 9.68686e-05, 8.32922e-05, 8.31881e-05, 8.17731e-05, 9.19864e-05, 9.07279e-05, 8.27491e-05, 7.87781e-05, 8.35978e-05, 9.20407e-05, 9.31494e-05, 8.24252e-05, 6.828e-05, 6.20692e-05, 6.80515e-05, 6.45305e-05, 7.64246e-05, 8.18118e-05, 8.27818e-05, 8.31747e-05, 8.27708e-05, 7.74396e-05, 6.51951e-05, 5.07151e-05, 4.14035e-05, 4.13397e-05, 5.04355e-05, 5.72317e-05, 7.63516e-05, 9.06979e-05, 9.61549e-05, 9.12462e-05, 7.70786e-05, 5.77605e-05, 3.95271e-05, 2.74958e-05, 2.34614e-05, 2.74307e-05, 3.92893e-05, 6.43151e-05, 9.18088e-05, 0.000110324, 0.000110512, 9.22121e-05, 6.46479e-05, 3.93121e-05, 2.27458e-05, 1.53501e-05, 1.53502e-05, 2.27178e-05, 3.91666e-05, 8.14631e-05, 0.000110395, 0.000122401, 0.000110528, 8.15908e-05, 5.01446e-05, 2.71976e-05, 1.52503e-05, 1.18282e-05, 1.5279e-05, 2.72311e-05, 5.01152e-05, 9.62222e-05, 0.000114984, 0.000114997, 9.62193e-05, 6.76356e-05, 4.08901e-05, 2.31617e-05, 1.52301e-05, 1.52593e-05, 2.32485e-05, 4.10021e-05, 6.77051e-05, 9.81138e-05, 0.000103432, 9.80899e-05, 8.29138e-05, 6.15808e-05, 4.0922e-05, 2.72051e-05, 2.26743e-05, 2.73074e-05, 4.11016e-05, 6.17575e-05, 8.3011e-05, 9.09546e-05, 9.09482e-05, 8.99295e-05, 8.29393e-05, 6.77091e-05, 5.02057e-05, 3.92875e-05, 3.93831e-05, 5.04644e-05, 6.80165e-05, 8.31457e-05, 8.9998e-05, 9.10633e-05, 0.000103727, 0.000115365, 0.000110745, 9.21349e-05, 7.67424e-05, 7.69412e-05, 9.2585e-05, 0.000111041, 0.00011522, 0.000103358, 9.08835e-05, 0.000103701, 0.000124504, 0.000134884, 0.000122512, 0.000100863, 9.11523e-05, 0.000101495, 0.00012339, 0.0001354, 0.00012448, 0.000103513, 9.3817e-05, 0.000115401, 0.000134897, 0.00013461, 0.000114867, 9.70076e-05, 9.74646e-05, 0.000116038, 0.000135857, 0.000135583, 0.000115536, 9.7247e-05, 9.72654e-05, 0.000110854, 0.000122458, 0.000114721, 9.7906e-05, 9.01702e-05, 9.88692e-05, 0.000116222, 0.00012375, 0.000111506, 9.16747e-05, 8.26758e-05, 9.1491e-05, 9.21537e-05, 0.00010055, 9.65106e-05, 8.97913e-05, 9.02754e-05, 9.7747e-05, 0.000101942, 9.31185e-05, 7.5218e-05, 6.17738e-05, 6.16731e-05, 7.47928e-05, 7.64941e-05, 9.04369e-05, 9.64812e-05, 9.79791e-05, 9.72543e-05, 9.15561e-05, 7.7423e-05, 5.79902e-05, 4.20914e-05, 3.62082e-05, 4.1915e-05, 5.74885e-05, 7.63542e-05, 0.000100344, 0.000114587, 0.000114905, 0.000101082, 7.70694e-05, 5.08717e-05, 3.09099e-05, 2.10637e-05, 2.10258e-05, 3.07413e-05, 5.0444e-05, 9.17286e-05, 0.000122015, 0.000134293, 0.000122399, 9.22065e-05, 5.7515e-05, 3.07442e-05, 1.63273e-05, 1.21628e-05, 1.62989e-05, 3.06228e-05, 5.72021e-05, 0.000110202, 0.000134277, 0.00013443, 0.000110521, 7.44908e-05, 4.16398e-05, 2.0846e-05, 1.20913e-05, 1.20963e-05, 2.08422e-05, 4.15524e-05, 7.42453e-05, 0.000114719, 0.000123921, 0.000114978, 9.1113e-05, 6.12461e-05, 3.58088e-05, 2.07988e-05, 1.62058e-05, 2.08303e-05, 3.58149e-05, 6.11144e-05, 9.0823e-05, 0.000103218, 0.000103394, 9.70724e-05, 8.236e-05, 6.12713e-05, 4.15842e-05, 3.05734e-05, 3.0615e-05, 4.16597e-05, 6.12289e-05, 8.20841e-05, 9.66965e-05, 9.08957e-05, 9.38534e-05, 9.72175e-05, 9.12804e-05, 7.45219e-05, 5.7346e-05, 5.05292e-05, 5.75136e-05, 7.46703e-05, 9.11503e-05, 9.6815e-05, 9.3506e-05, 9.86094e-05, 0.000115747, 0.000123048, 0.000110663, 9.08732e-05, 8.20784e-05, 9.12166e-05, 0.000110974, 0.000122855, 0.000115104, 9.80793e-05, 9.01144e-05, 0.000115677, 0.000135256, 0.000134858, 0.000114877, 9.68091e-05, 9.71348e-05, 0.000115643, 0.000135466, 0.000135259, 0.000115324, 9.71789e-05, 9.73665e-05, 0.000123007, 0.000134853, 0.000123983, 0.000103236, 9.38226e-05, 0.000104024, 0.000125149, 0.000135709, 0.000123261, 0.000101352, 9.13447e-05, 0.000101393, 0.000110692, 0.000114818, 0.000103106, 9.08733e-05, 9.13118e-05, 0.000104243, 0.000116106, 0.000111556, 9.282e-05, 7.71886e-05, 7.71179e-05, 9.2474e-05, 9.08436e-05, 9.65136e-05, 9.33522e-05, 9.09421e-05, 9.4111e-05, 9.76827e-05, 9.18949e-05, 7.51509e-05, 5.78519e-05, 5.08473e-05, 5.76128e-05, 7.453e-05, 8.18096e-05, 9.64211e-05, 0.00010304, 0.000103372, 9.72367e-05, 8.26992e-05, 6.17002e-05, 4.19795e-05, 3.0861e-05, 3.07771e-05, 4.16703e-05, 6.10691e-05, 9.05933e-05, 0.000114455, 0.000123711, 0.000114913, 9.12288e-05, 6.14839e-05, 3.60552e-05, 2.09796e-05, 1.63161e-05, 2.08773e-05, 3.57783e-05, 6.0974e-05, 0.00011007, 0.000134092, 0.00013429, 0.000110516, 7.45997e-05, 4.17636e-05, 2.09183e-05, 1.21148e-05, 1.20948e-05, 2.08283e-05, 4.15268e-05, 7.41847e-05, 0.000122013, 0.000134236, 0.00012238, 9.22473e-05, 5.754e-05, 3.0698e-05, 1.62329e-05, 1.206e-05, 1.62133e-05, 3.0595e-05, 5.72528e-05, 9.17953e-05, 0.000114645, 0.000114906, 0.000101072, 7.70042e-05, 5.0684e-05, 3.06272e-05, 2.07683e-05, 2.07734e-05, 3.05934e-05, 5.04678e-05, 7.6513e-05, 0.000100497, 9.79817e-05, 9.71761e-05, 9.1389e-05, 7.7092e-05, 5.7483e-05, 4.15174e-05, 3.56908e-05, 4.1552e-05, 5.73798e-05, 7.66353e-05, 9.06577e-05, 9.66092e-05, 9.01536e-05, 9.74878e-05, 0.000101498, 9.245e-05, 7.44172e-05, 6.10165e-05, 6.11036e-05, 7.45356e-05, 9.22486e-05, 0.000100826, 9.67187e-05, 8.98243e-05, 9.70445e-05, 0.000111374, 0.00011095, 9.61412e-05, 8.26514e-05, 8.28281e-05, 9.64852e-05, 0.000111005, 0.00011091, 9.64205e-05, 8.31343e-05, 8.34063e-05, 0.000111252, 0.000122974, 0.00011505, 9.789e-05, 8.98579e-05, 9.83735e-05, 0.000115662, 0.000123223, 0.00011106, 9.13501e-05, 8.25301e-05, 9.16022e-05, 0.000110875, 0.00011508, 0.000103296, 9.09165e-05, 9.12227e-05, 0.000104068, 0.000115891, 0.000111329, 9.25934e-05, 7.69896e-05, 7.69925e-05, 9.24877e-05, 9.62098e-05, 9.79353e-05, 9.08547e-05, 8.68207e-05, 9.14111e-05, 9.88162e-05, 9.70291e-05, 8.21707e-05, 6.48023e-05, 5.7497e-05, 6.46014e-05, 8.16684e-05, 8.27292e-05, 8.97105e-05, 9.08607e-05, 9.11018e-05, 9.03403e-05, 8.34977e-05, 6.81965e-05, 5.04528e-05, 3.92764e-05, 3.91645e-05, 5.00874e-05, 6.75631e-05, 8.26807e-05, 9.77966e-05, 0.000103231, 9.81339e-05, 8.32174e-05, 6.19824e-05, 4.12142e-05, 2.73047e-05, 2.26135e-05, 2.71287e-05, 4.08387e-05, 6.14492e-05, 9.5941e-05, 0.000114618, 0.000114753, 9.62806e-05, 6.79379e-05, 4.11991e-05, 2.33238e-05, 1.52523e-05, 1.52009e-05, 2.31512e-05, 4.08889e-05, 6.75442e-05, 0.000110162, 0.000122095, 0.000110427, 8.17862e-05, 5.0443e-05, 2.73845e-05, 1.52887e-05, 1.17873e-05, 1.52261e-05, 2.72218e-05, 5.01503e-05, 8.14217e-05, 0.000110171, 0.000110381, 9.23012e-05, 6.48832e-05, 3.9473e-05, 2.27385e-05, 1.52364e-05, 1.52218e-05, 2.26613e-05, 3.92454e-05, 6.446e-05, 9.18326e-05, 9.60882e-05, 9.12528e-05, 7.71986e-05, 5.7831e-05, 3.94017e-05, 2.72065e-05, 2.31332e-05, 2.71896e-05, 3.92494e-05, 5.73943e-05, 7.65381e-05, 9.07352e-05, 8.31427e-05, 8.27674e-05, 7.73859e-05, 6.49247e-05, 5.02016e-05, 4.07965e-05, 4.08239e-05, 5.01779e-05, 6.46007e-05, 7.66709e-05, 8.19891e-05, 8.28065e-05, 8.35102e-05, 9.18766e-05, 9.27847e-05, 8.17862e-05, 6.74957e-05, 6.13643e-05, 6.76295e-05, 8.17635e-05, 9.22908e-05, 9.10504e-05, 8.28937e-05, 7.87637e-05, 8.22849e-05, 9.26854e-05, 9.12148e-05, 8.27857e-05, 7.84198e-05, 8.30329e-05, 9.14518e-05, 9.2624e-05, 8.19604e-05, 6.79017e-05, 6.18754e-05, 6.81775e-05, 9.25767e-05, 0.000101186, 9.70021e-05, 8.99369e-05, 9.01047e-05, 9.73949e-05, 0.000101531, 9.26836e-05, 7.47587e-05, 6.13449e-05, 6.13879e-05, 7.48154e-05, 9.12466e-05, 9.71234e-05, 9.38859e-05, 9.12418e-05, 9.41784e-05, 9.75881e-05, 9.16882e-05, 7.48419e-05, 5.7471e-05, 5.04671e-05, 5.735e-05, 7.45579e-05, 8.30465e-05, 9.01483e-05, 9.12569e-05, 9.13645e-05, 9.0462e-05, 8.35058e-05, 6.81106e-05, 5.029e-05, 3.90704e-05, 3.89626e-05, 4.99716e-05, 6.76468e-05, 7.87113e-05, 9.01335e-05, 9.39391e-05, 9.02488e-05, 7.89857e-05, 6.16455e-05, 4.33448e-05, 3.03039e-05, 2.5719e-05, 3.00896e-05, 4.29655e-05, 6.12441e-05, 8.30431e-05, 9.69831e-05, 9.70005e-05, 8.31465e-05, 6.15899e-05, 4.05755e-05, 2.57776e-05, 1.86682e-05, 1.85838e-05, 2.55399e-05, 4.02659e-05, 6.13476e-05, 9.10058e-05, 0.000100754, 9.10671e-05, 6.79588e-05, 4.34991e-05, 2.59208e-05, 1.66364e-05, 1.38535e-05, 1.65237e-05, 2.57166e-05, 4.32744e-05, 6.78031e-05, 9.19258e-05, 9.2011e-05, 7.46443e-05, 5.05692e-05, 3.07138e-05, 1.89438e-05, 1.39542e-05, 1.39193e-05, 1.88335e-05, 3.0524e-05, 5.0329e-05, 7.44332e-05, 8.14517e-05, 7.45835e-05, 5.78007e-05, 3.96656e-05, 2.62394e-05, 1.8878e-05, 1.66234e-05, 1.88392e-05, 2.61126e-05, 3.9394e-05, 5.74216e-05, 7.42891e-05, 6.77927e-05, 6.1626e-05, 5.10893e-05, 3.96228e-05, 3.05259e-05, 2.57029e-05, 2.57095e-05, 3.0487e-05, 3.94058e-05, 5.06384e-05, 6.11215e-05, 6.75673e-05, 6.20323e-05, 6.18517e-05, 5.79597e-05, 5.04221e-05, 4.31041e-05, 4.01789e-05, 4.31609e-05, 5.03565e-05, 5.75884e-05, 6.12606e-05, 6.15807e-05, 6.13564e-05, 6.83477e-05, 7.51768e-05, 7.48743e-05, 6.76831e-05, 6.10303e-05, 6.11212e-05, 6.78223e-05, 7.47709e-05, 7.47231e-05, 6.78197e-05, 6.13917e-05, 6.16178e-05, 6.49478e-05, 7.71124e-05, 8.24084e-05, 8.30366e-05, 8.3113e-05, 8.2574e-05, 7.72343e-05, 6.49572e-05, 5.03889e-05, 4.1034e-05, 4.10616e-05, 5.04397e-05, 7.7096e-05, 9.14035e-05, 9.73827e-05, 9.85289e-05, 9.745e-05, 9.15351e-05, 7.72625e-05, 5.76774e-05, 4.16496e-05, 3.57343e-05, 4.15711e-05, 5.75326e-05, 8.26488e-05, 9.76164e-05, 0.000104147, 0.000104112, 9.75951e-05, 8.27725e-05, 6.15501e-05, 4.16477e-05, 3.04254e-05, 3.03387e-05, 4.14158e-05, 6.12942e-05, 8.35527e-05, 9.88914e-05, 0.000104181, 9.87409e-05, 8.34934e-05, 6.20137e-05, 4.10679e-05, 2.70421e-05, 2.22973e-05, 2.68419e-05, 4.07662e-05, 6.18287e-05, 8.3592e-05, 9.75876e-05, 9.74655e-05, 8.34024e-05, 6.16774e-05, 4.05533e-05, 2.56844e-05, 1.85318e-05, 1.8432e-05, 2.54295e-05, 4.03112e-05, 6.16577e-05, 8.26502e-05, 9.12599e-05, 8.24937e-05, 6.20038e-05, 4.06802e-05, 2.55355e-05, 1.75271e-05, 1.50834e-05, 1.73705e-05, 2.53091e-05, 4.05511e-05, 6.20811e-05, 7.68584e-05, 7.68226e-05, 6.14732e-05, 4.13786e-05, 2.60704e-05, 1.77422e-05, 1.44378e-05, 1.43768e-05, 1.75887e-05, 2.5905e-05, 4.12998e-05, 6.1503e-05, 6.44774e-05, 5.74996e-05, 4.19824e-05, 2.76574e-05, 1.90971e-05, 1.54284e-05, 1.44949e-05, 1.53584e-05, 1.89738e-05, 2.75087e-05, 4.18484e-05, 5.74214e-05, 5.01704e-05, 4.18764e-05, 3.10576e-05, 2.30361e-05, 1.90141e-05, 1.76813e-05, 1.76715e-05, 1.89671e-05, 2.29164e-05, 3.08538e-05, 4.1656e-05, 5.00712e-05, 4.11307e-05, 3.62124e-05, 3.10515e-05, 2.74997e-05, 2.58148e-05, 2.53961e-05, 2.58429e-05, 2.74708e-05, 3.08846e-05, 3.59342e-05, 4.09046e-05, 4.31158e-05, 4.13051e-05, 4.20711e-05, 4.19582e-05, 4.10662e-05, 4.02686e-05, 4.03232e-05, 4.1151e-05, 4.19172e-05, 4.18481e-05, 4.10294e-05, 4.03126e-05, 4.04364e-05, 5.06155e-05, 5.78197e-05, 6.1418e-05, 6.16014e-05, 6.12029e-05, 6.17377e-05, 6.15337e-05, 5.77658e-05, 5.04237e-05, 4.32622e-05, 4.04156e-05, 4.34227e-05, 5.778e-05, 7.72949e-05, 9.16478e-05, 9.68004e-05, 9.15827e-05, 7.72559e-05, 5.78292e-05, 3.94324e-05, 2.72353e-05, 2.31232e-05, 2.71658e-05, 3.9334e-05, 7.74069e-05, 0.000101898, 0.000116062, 0.00011591, 0.000101613, 7.72834e-05, 5.08426e-05, 3.06695e-05, 2.06929e-05, 2.06281e-05, 3.05117e-05, 5.07404e-05, 9.21132e-05, 0.000116379, 0.000125368, 0.000115951, 9.17105e-05, 6.15938e-05, 3.59114e-05, 2.06762e-05, 1.59391e-05, 2.05251e-05, 3.57262e-05, 6.16439e-05, 9.75006e-05, 0.000116315, 0.000116028, 9.69599e-05, 6.81701e-05, 4.11648e-05, 2.31304e-05, 1.497e-05, 1.48831e-05, 2.2922e-05, 4.10572e-05, 6.84373e-05, 9.20819e-05, 0.000101742, 9.16747e-05, 6.81962e-05, 4.35172e-05, 2.5825e-05, 1.64692e-05, 1.36392e-05, 1.63083e-05, 2.56285e-05, 4.35352e-05, 6.85557e-05, 7.72709e-05, 7.7123e-05, 6.16001e-05, 4.13891e-05, 2.60307e-05, 1.76754e-05, 1.43446e-05, 1.42662e-05, 1.74955e-05, 2.58979e-05, 4.14633e-05, 6.18535e-05, 5.75279e-05, 5.07178e-05, 3.61754e-05, 2.3609e-05, 1.68921e-05, 1.45595e-05, 1.41095e-05, 1.44533e-05, 1.6751e-05, 2.35235e-05, 3.61993e-05, 5.07919e-05, 3.91875e-05, 3.08059e-05, 2.11715e-05, 1.55836e-05, 1.41639e-05, 1.45683e-05, 1.45369e-05, 1.40858e-05, 1.54888e-05, 2.10853e-05, 3.07491e-05, 3.91687e-05, 2.72276e-05, 2.10439e-05, 1.6526e-05, 1.55049e-05, 1.67723e-05, 1.76942e-05, 1.67656e-05, 1.54714e-05, 1.64506e-05, 2.09369e-05, 2.71419e-05, 3.02327e-05, 2.32975e-05, 2.10623e-05, 2.10794e-05, 2.33753e-05, 2.58044e-05, 2.58327e-05, 2.3421e-05, 2.10763e-05, 2.09878e-05, 2.31935e-05, 2.55737e-05, 2.56238e-05, 2.73837e-05, 3.08709e-05, 3.60198e-05, 4.10568e-05, 4.32392e-05, 4.11388e-05, 3.61005e-05, 3.08807e-05, 2.73295e-05, 2.56666e-05, 2.52632e-05, 2.57222e-05, 3.94509e-05, 5.0844e-05, 6.14707e-05, 6.79005e-05, 6.79324e-05, 6.15427e-05, 5.091e-05, 3.94853e-05, 3.04655e-05, 2.5677e-05, 2.56766e-05, 3.04566e-05, 6.52224e-05, 9.31688e-05, 0.000111636, 0.000111426, 9.27543e-05, 6.49682e-05, 3.94451e-05, 2.26841e-05, 1.51496e-05, 1.51048e-05, 2.25892e-05, 3.94453e-05, 9.33948e-05, 0.000124097, 0.000136045, 0.000123471, 9.27359e-05, 5.77238e-05, 3.07261e-05, 1.61489e-05, 1.19044e-05, 1.60545e-05, 3.06629e-05, 5.7974e-05, 0.000112213, 0.00013634, 0.000135895, 0.000111323, 7.48859e-05, 4.17808e-05, 2.07768e-05, 1.18839e-05, 1.18261e-05, 2.06603e-05, 4.18627e-05, 7.5471e-05, 0.000112075, 0.000123759, 0.000111372, 8.21093e-05, 5.04555e-05, 2.72668e-05, 1.50951e-05, 1.15417e-05, 1.49795e-05, 2.71821e-05, 5.07176e-05, 8.2836e-05, 9.3071e-05, 9.27882e-05, 7.49231e-05, 5.05499e-05, 3.05981e-05, 1.87928e-05, 1.37615e-05, 1.36923e-05, 1.86483e-05, 3.05759e-05, 5.08632e-05, 7.54661e-05, 6.48898e-05, 5.76692e-05, 4.19495e-05, 2.75549e-05, 1.89906e-05, 1.5311e-05, 1.43475e-05, 1.51972e-05, 1.88649e-05, 2.75587e-05, 4.21453e-05, 5.78966e-05, 3.9248e-05, 3.07889e-05, 2.1117e-05, 1.55287e-05, 1.41142e-05, 1.45114e-05, 1.44648e-05, 1.40122e-05, 1.54417e-05, 2.10965e-05, 3.08335e-05, 3.92814e-05, 2.26041e-05, 1.63605e-05, 1.22984e-05, 1.20185e-05, 1.40794e-05, 1.53786e-05, 1.40393e-05, 1.19595e-05, 1.22387e-05, 1.63121e-05, 2.25775e-05, 2.58083e-05, 1.52133e-05, 1.21873e-05, 1.22459e-05, 1.54024e-05, 1.88995e-05, 1.89024e-05, 1.54004e-05, 1.2224e-05, 1.21442e-05, 1.51648e-05, 1.8568e-05, 1.85904e-05, 1.52431e-05, 1.63375e-05, 2.09782e-05, 2.73461e-05, 3.04946e-05, 2.7379e-05, 2.10084e-05, 1.63406e-05, 1.52236e-05, 1.64328e-05, 1.73416e-05, 1.64535e-05, 2.27128e-05, 3.08087e-05, 4.18391e-05, 5.03947e-05, 5.04006e-05, 4.18566e-05, 3.08344e-05, 2.27398e-05, 1.87218e-05, 1.74033e-05, 1.73951e-05, 1.87007e-05, 3.9473e-05, 5.78588e-05, 7.50138e-05, 8.2125e-05, 7.48966e-05, 5.7753e-05, 3.94666e-05, 2.60485e-05, 1.87206e-05, 1.64654e-05, 1.86666e-05, 2.59867e-05, 8.27404e-05, 0.000111934, 0.00012365, 0.000111275, 8.1998e-05, 5.03825e-05, 2.7291e-05, 1.52076e-05, 1.17062e-05, 1.51701e-05, 2.73422e-05, 5.0759e-05, 0.000112183, 0.000136224, 0.000135712, 0.000111112, 7.47242e-05, 4.17312e-05, 2.08291e-05, 1.19892e-05, 1.19634e-05, 2.08234e-05, 4.20088e-05, 7.55401e-05, 0.000124106, 0.000135883, 0.000123173, 9.24298e-05, 5.75206e-05, 3.06425e-05, 1.61379e-05, 1.19258e-05, 1.61001e-05, 3.0743e-05, 5.80896e-05, 9.35032e-05, 0.000111652, 0.000111227, 9.24528e-05, 6.47171e-05, 3.92995e-05, 2.26094e-05, 1.51019e-05, 1.50692e-05, 2.25857e-05, 3.9522e-05, 6.53946e-05, 9.33417e-05, 8.20955e-05, 7.47285e-05, 5.75932e-05, 3.94005e-05, 2.60533e-05, 1.87413e-05, 1.64766e-05, 1.86833e-05, 2.60547e-05, 3.96349e-05, 5.80821e-05, 7.51669e-05, 5.02863e-05, 4.17488e-05, 3.08309e-05, 2.28454e-05, 1.88855e-05, 1.75709e-05, 1.75375e-05, 1.88254e-05, 2.28457e-05, 3.09539e-05, 4.19444e-05, 5.03859e-05, 2.71935e-05, 2.09331e-05, 1.64039e-05, 1.54095e-05, 1.6702e-05, 1.76184e-05, 1.66633e-05, 1.53633e-05, 1.63794e-05, 2.09409e-05, 2.72145e-05, 3.02914e-05, 1.51892e-05, 1.21439e-05, 1.21997e-05, 1.53637e-05, 1.88666e-05, 1.88593e-05, 1.5342e-05, 1.21656e-05, 1.21029e-05, 1.51529e-05, 1.85798e-05, 1.85949e-05, 1.17432e-05, 1.21202e-05, 1.63204e-05, 2.27422e-05, 2.60479e-05, 2.27438e-05, 1.631e-05, 1.20929e-05, 1.17078e-05, 1.37042e-05, 1.50048e-05, 1.37306e-05, 1.52148e-05, 2.09101e-05, 3.07645e-05, 3.93638e-05, 3.93532e-05, 3.07402e-05, 2.0885e-05, 1.51962e-05, 1.3756e-05, 1.41788e-05, 1.41814e-05, 1.37657e-05, 2.73366e-05, 4.19078e-05, 5.77672e-05, 6.48865e-05, 5.76395e-05, 4.17704e-05, 2.72732e-05, 1.87191e-05, 1.51143e-05, 1.42244e-05, 1.50955e-05, 1.87179e-05, 5.06906e-05, 7.52524e-05, 9.29474e-05, 9.2741e-05, 7.48323e-05, 5.03879e-05, 3.04564e-05, 1.87464e-05, 1.38085e-05, 1.37865e-05, 1.87196e-05, 3.05405e-05, 9.74795e-05, 0.000116125, 0.000115675, 9.64954e-05, 6.77666e-05, 4.1002e-05, 2.32303e-05, 1.52373e-05, 1.52473e-05, 2.33342e-05, 4.14095e-05, 6.86143e-05, 0.00011628, 0.000124984, 0.000115345, 9.10867e-05, 6.11978e-05, 3.58388e-05, 2.0838e-05, 1.6225e-05, 2.08974e-05, 3.6138e-05, 6.19849e-05, 9.22708e-05, 0.000115881, 0.000115395, 0.000100903, 7.66527e-05, 5.04949e-05, 3.06063e-05, 2.08002e-05, 2.08273e-05, 3.07772e-05, 5.10388e-05, 7.76675e-05, 0.000102004, 9.6622e-05, 9.11078e-05, 7.66659e-05, 5.73619e-05, 3.91961e-05, 2.7183e-05, 2.3168e-05, 2.72634e-05, 3.94836e-05, 5.79917e-05, 7.75239e-05, 9.17518e-05, 6.77806e-05, 6.12041e-05, 5.05518e-05, 3.92544e-05, 3.03881e-05, 2.56935e-05, 2.57244e-05, 3.0515e-05, 3.95455e-05, 5.10054e-05, 6.16552e-05, 6.79733e-05, 4.09585e-05, 3.58647e-05, 3.07075e-05, 2.72891e-05, 2.57428e-05, 2.53784e-05, 2.57997e-05, 2.74051e-05, 3.08748e-05, 3.60454e-05, 4.10812e-05, 4.31817e-05, 2.31814e-05, 2.08745e-05, 2.08989e-05, 2.32603e-05, 2.57515e-05, 2.57767e-05, 2.33168e-05, 2.09521e-05, 2.09063e-05, 2.31931e-05, 2.56191e-05, 2.56172e-05, 1.51909e-05, 1.62382e-05, 2.08659e-05, 2.72571e-05, 3.04249e-05, 2.7288e-05, 2.08881e-05, 1.62277e-05, 1.51569e-05, 1.64297e-05, 1.73737e-05, 1.64598e-05, 1.51838e-05, 2.08412e-05, 3.06768e-05, 3.92843e-05, 3.92823e-05, 3.0662e-05, 2.08056e-05, 1.51367e-05, 1.3732e-05, 1.41918e-05, 1.42078e-05, 1.37723e-05, 2.32265e-05, 3.5956e-05, 5.07332e-05, 5.75853e-05, 5.06296e-05, 3.58225e-05, 2.31252e-05, 1.64447e-05, 1.42211e-05, 1.38692e-05, 1.4242e-05, 1.65e-05, 4.12571e-05, 6.16827e-05, 7.72212e-05, 7.70569e-05, 6.13316e-05, 4.09608e-05, 2.56398e-05, 1.74473e-05, 1.42779e-05, 1.42823e-05, 1.74887e-05, 2.57862e-05, 6.84855e-05, 9.19718e-05, 0.000101609, 9.14239e-05, 6.78236e-05, 4.31905e-05, 2.5698e-05, 1.65497e-05, 1.38525e-05, 1.65716e-05, 2.58421e-05, 4.36037e-05, 9.88708e-05, 0.000103874, 9.81691e-05, 8.28256e-05, 6.15396e-05, 4.0977e-05, 2.72947e-05, 2.27648e-05, 2.74383e-05, 4.13847e-05, 6.23138e-05, 8.37999e-05, 0.000103912, 0.000103509, 9.67478e-05, 8.19822e-05, 6.1124e-05, 4.16613e-05, 3.07474e-05, 3.08425e-05, 4.20181e-05, 6.18685e-05, 8.30481e-05, 9.77341e-05, 9.82092e-05, 9.67521e-05, 9.06468e-05, 7.65236e-05, 5.7328e-05, 4.16696e-05, 3.59813e-05, 4.19427e-05, 5.79567e-05, 7.74948e-05, 9.16855e-05, 9.74247e-05, 8.28789e-05, 8.19976e-05, 7.65258e-05, 6.44141e-05, 5.01504e-05, 4.10349e-05, 4.11857e-05, 5.06194e-05, 6.51737e-05, 7.73949e-05, 8.26825e-05, 8.31363e-05, 6.15958e-05, 6.11302e-05, 5.73028e-05, 5.01146e-05, 4.31576e-05, 4.04287e-05, 4.34557e-05, 5.06456e-05, 5.79151e-05, 6.16386e-05, 6.18737e-05, 6.13428e-05, 4.09852e-05, 4.16046e-05, 4.15638e-05, 4.09067e-05, 4.03309e-05, 4.04668e-05, 4.12492e-05, 4.19597e-05, 4.19134e-05, 4.1155e-05, 4.0413e-05, 4.03626e-05, 2.72162e-05, 3.05892e-05, 3.57505e-05, 4.09116e-05, 4.3206e-05, 4.11048e-05, 3.60004e-05, 3.07648e-05, 2.72865e-05, 2.57221e-05, 2.53449e-05, 2.57118e-05, 2.25935e-05, 3.05611e-05, 4.15529e-05, 5.01706e-05, 5.02358e-05, 4.16825e-05, 3.06465e-05, 2.26033e-05, 1.869e-05, 1.74698e-05, 1.7486e-05, 1.87176e-05, 2.7179e-05, 4.16129e-05, 5.74309e-05, 6.46102e-05, 5.74249e-05, 4.15782e-05, 2.71151e-05, 1.86346e-05, 1.51183e-05, 1.429e-05, 1.51572e-05, 1.86996e-05, 4.10804e-05, 6.14229e-05, 7.69694e-05, 7.68745e-05, 6.12028e-05, 4.08569e-05, 2.55658e-05, 1.74179e-05, 1.42905e-05, 1.4313e-05, 1.74965e-05, 2.57203e-05, 6.2066e-05, 8.27082e-05, 9.13211e-05, 8.2338e-05, 6.1582e-05, 4.02122e-05, 2.52532e-05, 1.74845e-05, 1.52059e-05, 1.75491e-05, 2.54369e-05, 4.05731e-05, 8.36872e-05, 9.7572e-05, 9.72614e-05, 8.29754e-05, 6.1194e-05, 4.02683e-05, 2.57056e-05, 1.87943e-05, 1.88352e-05, 2.58781e-05, 4.06858e-05, 6.1881e-05, 9.11604e-05, 9.09433e-05, 8.97738e-05, 8.27768e-05, 6.76535e-05, 5.02587e-05, 3.93946e-05, 3.95204e-05, 5.06554e-05, 6.83027e-05, 8.35192e-05, 9.03549e-05, 9.09251e-05, 9.34907e-05, 9.66706e-05, 9.08773e-05, 7.44488e-05, 5.75375e-05, 5.08561e-05, 5.79338e-05, 7.52098e-05, 9.18113e-05, 9.74707e-05, 9.39304e-05, 8.97876e-05, 9.66831e-05, 0.000100632, 9.19744e-05, 7.44687e-05, 6.1424e-05, 6.16938e-05, 7.5243e-05, 9.3048e-05, 0.000101631, 9.73214e-05, 8.99967e-05, 8.28788e-05, 9.0908e-05, 9.19405e-05, 8.14719e-05, 6.77283e-05, 6.19263e-05, 6.83383e-05, 8.25151e-05, 9.30239e-05, 9.16713e-05, 8.3231e-05, 7.86483e-05, 6.77747e-05, 7.43941e-05, 7.42783e-05, 6.75513e-05, 6.13236e-05, 6.16367e-05, 6.83729e-05, 7.52619e-05, 7.5154e-05, 6.81697e-05, 6.15483e-05, 6.14381e-05, 5.02524e-05, 5.7278e-05, 6.09839e-05, 6.14804e-05, 6.13631e-05, 6.19952e-05, 6.17372e-05, 5.79135e-05, 5.05996e-05, 4.34772e-05, 4.05653e-05, 4.33571e-05, 3.91853e-05, 5.03518e-05, 6.09775e-05, 6.76033e-05, 6.78186e-05, 6.14673e-05, 5.08104e-05, 3.94382e-05, 3.05512e-05, 2.58703e-05, 2.58594e-05, 3.04716e-05, 3.91182e-05, 5.72138e-05, 7.43163e-05, 8.15969e-05, 7.4541e-05, 5.74709e-05, 3.92547e-05, 2.59746e-05, 1.88019e-05, 1.66394e-05, 1.88158e-05, 2.59525e-05, 5.01373e-05, 7.44284e-05, 9.21485e-05, 9.21714e-05, 7.44557e-05, 5.01126e-05, 3.02846e-05, 1.87074e-05, 1.38806e-05, 1.39013e-05, 1.87651e-05, 3.03507e-05, 6.7845e-05, 9.12321e-05, 0.000101032, 9.10848e-05, 6.76191e-05, 4.3037e-05, 2.56028e-05, 1.65294e-05, 1.38838e-05, 1.65946e-05, 2.57473e-05, 4.32516e-05, 8.32788e-05, 9.72228e-05, 9.707e-05, 8.29066e-05, 6.11549e-05, 4.02184e-05, 2.56613e-05, 1.877e-05, 1.8817e-05, 2.58228e-05, 4.05219e-05, 6.15633e-05, 9.03046e-05, 9.39009e-05, 8.99376e-05, 7.84724e-05, 6.11747e-05, 4.31508e-05, 3.04285e-05, 2.60745e-05, 3.05872e-05, 4.35037e-05, 6.17051e-05, 7.90354e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8.66315e-05, 9.09147e-05, 9.80789e-05, 9.62719e-05, 8.16601e-05, 6.46465e-05, 5.76473e-05, 6.49856e-05, 8.21971e-05, 9.67325e-05, 9.82952e-05, 9.0959e-05, 9.08927e-05, 0.00010338, 0.000114993, 0.000110547, 9.21777e-05, 7.69547e-05, 7.72459e-05, 9.29467e-05, 0.00011143, 0.000115579, 0.000103597, 9.09296e-05, 9.81232e-05, 0.000115019, 0.000122424, 0.000110462, 9.1113e-05, 8.25959e-05, 9.18814e-05, 0.000111692, 0.000123525, 0.000115612, 9.83101e-05, 8.99962e-05, 9.64209e-05, 0.000110552, 0.000110364, 9.608e-05, 8.30589e-05, 8.35155e-05, 9.72855e-05, 0.000111778, 0.000111559, 9.68681e-05, 8.32914e-05, 8.31867e-05, 8.17714e-05, 9.19845e-05, 9.07261e-05, 8.27473e-05, 7.87765e-05, 8.35966e-05, 9.204e-05, 9.31491e-05, 8.24251e-05, 6.82798e-05, 6.20687e-05, 6.80504e-05, 6.45286e-05, 7.64225e-05, 8.18102e-05, 8.27806e-05, 8.31736e-05, 8.277e-05, 7.74392e-05, 6.51948e-05, 5.07146e-05, 4.14032e-05, 4.13397e-05, 5.04347e-05, 5.72298e-05, 7.63501e-05, 9.06972e-05, 9.61543e-05, 9.12456e-05, 7.70782e-05, 5.77594e-05, 3.95237e-05, 2.74913e-05, 2.34588e-05, 2.74303e-05, 3.92884e-05, 6.43143e-05, 9.18085e-05, 0.000110324, 0.000110512, 9.22118e-05, 6.46467e-05, 3.93067e-05, 2.27347e-05, 1.53387e-05, 1.53446e-05, 2.27166e-05, 3.9166e-05, 8.14631e-05, 0.000110395, 0.000122401, 0.000110528, 8.15905e-05, 5.01413e-05, 2.71865e-05, 1.52331e-05, 1.1814e-05, 1.52729e-05, 2.72298e-05, 5.01151e-05, 9.62219e-05, 0.000114984, 0.000114996, 9.62192e-05, 6.76351e-05, 4.08854e-05, 2.315e-05, 1.52157e-05, 1.52495e-05, 2.32447e-05, 4.10012e-05, 6.77049e-05, 9.81136e-05, 0.000103432, 9.80894e-05, 8.29136e-05, 6.15804e-05, 4.09188e-05, 2.71988e-05, 2.2668e-05, 2.73037e-05, 4.11003e-05, 6.1757e-05, 8.30106e-05, 9.09542e-05, 9.09475e-05, 8.99288e-05, 8.29392e-05, 6.77091e-05, 5.02049e-05, 3.92862e-05, 3.9382e-05, 5.04639e-05, 6.80162e-05, 8.31456e-05, 8.99978e-05, 9.10625e-05, 0.000103726, 0.000115364, 0.000110745, 9.21347e-05, 7.67423e-05, 7.69412e-05, 9.25849e-05, 0.000111041, 0.00011522, 0.000103358, 9.08829e-05, 0.000103699, 0.000124502, 0.000134882, 0.000122511, 0.000100862, 9.11519e-05, 0.000101494, 0.000123389, 0.0001354, 0.000124479, 0.000103512, 9.38161e-05, 0.000115398, 0.000134894, 0.000134608, 0.000114866, 9.70069e-05, 9.74638e-05, 0.000116037, 0.000135856, 0.000135582, 0.000115535, 9.72462e-05, 9.72638e-05, 0.000110852, 0.000122456, 0.00011472, 9.7905e-05, 9.01691e-05, 9.88684e-05, 0.000116222, 0.000123749, 0.000111506, 9.16742e-05, 8.26748e-05, 9.14892e-05, 9.21528e-05, 0.000100549, 9.65096e-05, 8.97903e-05, 9.02746e-05, 9.77465e-05, 0.000101942, 9.31185e-05, 7.5218e-05, 6.17737e-05, 6.16726e-05, 7.47919e-05, 7.6493e-05, 9.04358e-05, 9.64803e-05, 9.79785e-05, 9.72538e-05, 9.15559e-05, 7.7423e-05, 5.79898e-05, 4.20909e-05, 3.6208e-05, 4.1915e-05, 5.7488e-05, 7.63532e-05, 0.000100344, 0.000114586, 0.000114905, 0.000101081, 7.70692e-05, 5.08705e-05, 3.09068e-05, 2.10605e-05, 2.10244e-05, 3.0741e-05, 5.04433e-05, 9.17285e-05, 0.000122015, 0.000134293, 0.000122399, 9.22064e-05, 5.75138e-05, 3.07395e-05, 1.63193e-05, 1.21562e-05, 1.62963e-05, 3.06224e-05, 5.72019e-05, 0.000110202, 0.000134277, 0.000134429, 0.000110521, 7.44905e-05, 4.16369e-05, 2.08379e-05, 1.20808e-05, 1.20892e-05, 2.08398e-05, 4.15521e-05, 7.42453e-05, 0.000114718, 0.000123921, 0.000114978, 9.11128e-05, 6.12455e-05, 3.58052e-05, 2.07913e-05, 1.6198e-05, 2.0826e-05, 3.58136e-05, 6.1114e-05, 9.08227e-05, 0.000103217, 0.000103393, 9.70718e-05, 8.23598e-05, 6.12709e-05, 4.15819e-05, 3.05696e-05, 3.06119e-05, 4.16583e-05, 6.12284e-05, 8.20838e-05, 9.66963e-05, 9.0895e-05, 9.38527e-05, 9.7217e-05, 9.12803e-05, 7.45217e-05, 5.73454e-05, 5.05284e-05, 5.75131e-05, 7.46702e-05, 9.11502e-05, 9.68149e-05, 9.35056e-05, 9.86088e-05, 0.000115746, 0.000123047, 0.000110662, 9.08731e-05, 8.20784e-05, 9.12165e-05, 0.000110974, 0.000122854, 0.000115104, 9.80787e-05, 9.0114e-05, 0.000115675, 0.000135254, 0.000134856, 0.000114877, 9.6809e-05, 9.71345e-05, 0.000115643, 0.000135466, 0.000135258, 0.000115323, 9.71786e-05, 9.73658e-05, 0.000123004, 0.000134851, 0.000123983, 0.000103235, 9.38218e-05, 0.000104024, 0.000125148, 0.000135708, 0.00012326, 0.000101352, 9.13441e-05, 0.000101391, 0.000110691, 0.000114817, 0.000103105, 9.08721e-05, 9.13109e-05, 0.000104243, 0.000116106, 0.000111556, 9.28199e-05, 7.71883e-05, 7.71169e-05, 9.24724e-05, 9.08432e-05, 9.65127e-05, 9.3351e-05, 9.09411e-05, 9.41105e-05, 9.76826e-05, 9.18949e-05, 7.51509e-05, 5.78519e-05, 5.08472e-05, 5.76124e-05, 7.45295e-05, 8.18087e-05, 9.64201e-05, 0.000103039, 0.000103371, 9.72366e-05, 8.26992e-05, 6.17001e-05, 4.19793e-05, 3.08608e-05, 3.07771e-05, 4.16703e-05, 6.10686e-05, 9.05927e-05, 0.000114455, 0.000123711, 0.000114913, 9.12288e-05, 6.14837e-05, 3.60544e-05, 2.09781e-05, 1.63149e-05, 2.0877e-05, 3.57782e-05, 6.09735e-05, 0.00011007, 0.000134092, 0.00013429, 0.000110516, 7.45996e-05, 4.17628e-05, 2.0916e-05, 1.21116e-05, 1.20928e-05, 2.08279e-05, 4.15268e-05, 7.41846e-05, 0.000122013, 0.000134236, 0.00012238, 9.22471e-05, 5.75396e-05, 3.06965e-05, 1.62295e-05, 1.20562e-05, 1.62114e-05, 3.05946e-05, 5.72527e-05, 9.17952e-05, 0.000114645, 0.000114906, 0.000101072, 7.70036e-05, 5.06837e-05, 3.06257e-05, 2.07652e-05, 2.07706e-05, 3.05921e-05, 5.04672e-05, 7.65127e-05, 0.000100497, 9.79813e-05, 9.71754e-05, 9.13883e-05, 7.70917e-05, 5.74829e-05, 4.15163e-05, 3.5689e-05, 4.15507e-05, 5.73792e-05, 7.66351e-05, 9.06575e-05, 9.6609e-05, 9.01531e-05, 9.74872e-05, 0.000101497, 9.24498e-05, 7.4417e-05, 6.1016e-05, 6.11031e-05, 7.45354e-05, 9.22485e-05, 0.000100825, 9.67183e-05, 8.98237e-05, 9.70439e-05, 0.000111373, 0.000110949, 9.6141e-05, 8.26513e-05, 8.2828e-05, 9.64851e-05, 0.000111005, 0.000110909, 9.64198e-05, 8.31339e-05, 8.34063e-05, 0.00011125, 0.000122972, 0.000115049, 9.78899e-05, 8.98577e-05, 9.83733e-05, 0.000115662, 0.000123223, 0.00011106, 9.13499e-05, 8.25301e-05, 9.16015e-05, 0.000110873, 0.000115079, 0.000103295, 9.09156e-05, 9.12219e-05, 0.000104067, 0.000115891, 0.000111329, 9.25933e-05, 7.69896e-05, 7.69919e-05, 9.24861e-05, 9.62096e-05, 9.79348e-05, 9.08532e-05, 8.68193e-05, 9.14106e-05, 9.88162e-05, 9.70291e-05, 8.21707e-05, 6.48022e-05, 5.74967e-05, 6.46006e-05, 8.16677e-05, 8.27285e-05, 8.97089e-05, 9.08592e-05, 9.11012e-05, 9.03402e-05, 8.34976e-05, 6.81965e-05, 5.04528e-05, 3.92763e-05, 3.91643e-05, 5.00872e-05, 6.7563e-05, 8.26795e-05, 9.77956e-05, 0.000103231, 9.81339e-05, 8.32174e-05, 6.19824e-05, 4.12142e-05, 2.73047e-05, 2.26135e-05, 2.71286e-05, 4.08386e-05, 6.14486e-05, 9.59406e-05, 0.000114618, 0.000114753, 9.62806e-05, 6.79378e-05, 4.1199e-05, 2.33236e-05, 1.5252e-05, 1.52008e-05, 2.31512e-05, 4.08887e-05, 6.75437e-05, 0.000110162, 0.000122095, 0.000110427, 8.1786e-05, 5.04425e-05, 2.7384e-05, 1.52882e-05, 1.17868e-05, 1.52259e-05, 2.72217e-05, 5.01501e-05, 8.14217e-05, 0.000110171, 0.000110381, 9.23009e-05, 6.48822e-05, 3.94723e-05, 2.27382e-05, 1.52357e-05, 1.52209e-05, 2.26608e-05, 3.9245e-05, 6.44599e-05, 9.18325e-05, 9.60882e-05, 9.12524e-05, 7.71975e-05, 5.783e-05, 3.94015e-05, 2.72061e-05, 2.3132e-05, 2.71885e-05, 3.92488e-05, 5.7394e-05, 7.6538e-05, 9.07352e-05, 8.31423e-05, 8.27667e-05, 7.73851e-05, 6.49244e-05, 5.02015e-05, 4.07957e-05, 4.08228e-05, 5.01773e-05, 6.46006e-05, 7.66708e-05, 8.19889e-05, 8.28063e-05, 8.351e-05, 9.18762e-05, 9.27842e-05, 8.17859e-05, 6.74954e-05, 6.13638e-05, 6.76291e-05, 8.17634e-05, 9.22907e-05, 9.10499e-05, 8.2893e-05, 7.87633e-05, 8.22845e-05, 9.26847e-05, 9.12144e-05, 8.27857e-05, 7.84197e-05, 8.30327e-05, 9.14516e-05, 9.26237e-05, 8.19596e-05, 6.79007e-05, 6.1875e-05, 6.81775e-05, 9.25758e-05, 0.000101185, 9.70021e-05, 8.99366e-05, 9.01045e-05, 9.73948e-05, 0.000101531, 9.26834e-05, 7.47584e-05, 6.13447e-05, 6.13878e-05, 7.48148e-05, 9.12463e-05, 9.71231e-05, 9.38848e-05, 9.12407e-05, 9.41781e-05, 9.75881e-05, 9.16881e-05, 7.48419e-05, 5.7471e-05, 5.04669e-05, 5.73494e-05, 7.45571e-05, 8.30462e-05, 9.01466e-05, 9.12548e-05, 9.13636e-05, 9.04619e-05, 8.35056e-05, 6.81103e-05, 5.02898e-05, 3.90701e-05, 3.89621e-05, 4.99712e-05, 6.76468e-05, 7.87096e-05, 9.01314e-05, 9.39381e-05, 9.02487e-05, 7.89855e-05, 6.16451e-05, 4.33445e-05, 3.03036e-05, 2.57187e-05, 3.00893e-05, 4.29654e-05, 6.12437e-05, 8.30418e-05, 9.69825e-05, 9.70005e-05, 8.31464e-05, 6.15897e-05, 4.05753e-05, 2.57775e-05, 1.86681e-05, 1.85836e-05, 2.55398e-05, 4.02655e-05, 6.13465e-05, 9.10055e-05, 0.000100754, 9.10671e-05, 6.79587e-05, 4.34987e-05, 2.59205e-05, 1.66363e-05, 1.38535e-05, 1.65236e-05, 2.57164e-05, 4.32738e-05, 6.78025e-05, 9.19257e-05, 9.20109e-05, 7.46441e-05, 5.05682e-05, 3.07125e-05, 1.89433e-05, 1.39541e-05, 1.39193e-05, 1.88334e-05, 3.05237e-05, 5.03287e-05, 7.44332e-05, 8.14516e-05, 7.45834e-05, 5.77995e-05, 3.96636e-05, 2.62384e-05, 1.88778e-05, 1.66231e-05, 1.88389e-05, 2.61124e-05, 3.93938e-05, 5.74216e-05, 7.42891e-05, 6.77926e-05, 6.16252e-05, 5.10875e-05, 3.96217e-05, 3.05258e-05, 2.57025e-05, 2.57087e-05, 3.04867e-05, 3.94057e-05, 5.06384e-05, 6.11214e-05, 6.75672e-05, 6.2032e-05, 6.18509e-05, 5.7959e-05, 5.0422e-05, 4.31037e-05, 4.01778e-05, 4.31602e-05, 5.03564e-05, 5.75883e-05, 6.12602e-05, 6.15802e-05, 6.13561e-05, 6.83476e-05, 7.51764e-05, 7.4874e-05, 6.76828e-05, 6.10298e-05, 6.11205e-05, 6.78221e-05, 7.47707e-05, 7.47225e-05, 6.78187e-05, 6.13909e-05, 6.16175e-05, 6.49475e-05, 7.71121e-05, 8.24083e-05, 8.30366e-05, 8.31129e-05, 8.25739e-05, 7.7234e-05, 6.49567e-05, 5.03879e-05, 4.10331e-05, 4.10612e-05, 5.04396e-05, 7.70957e-05, 9.14035e-05, 9.73825e-05, 9.85284e-05, 9.74498e-05, 9.15351e-05, 7.72624e-05, 5.76772e-05, 4.16493e-05, 3.57341e-05, 4.15708e-05, 5.75321e-05, 8.26488e-05, 9.76155e-05, 0.000104145, 0.000104111, 9.75948e-05, 8.27723e-05, 6.15499e-05, 4.16476e-05, 3.04253e-05, 3.03385e-05, 4.14154e-05, 6.1294e-05, 8.35514e-05, 9.8889e-05, 0.000104179, 9.87406e-05, 8.3493e-05, 6.20129e-05, 4.10672e-05, 2.70415e-05, 2.22969e-05, 2.68416e-05, 4.07661e-05, 6.18285e-05, 8.35898e-05, 9.75861e-05, 9.74652e-05, 8.34022e-05, 6.16766e-05, 4.05523e-05, 2.56833e-05, 1.85309e-05, 1.84314e-05, 2.54294e-05, 4.0311e-05, 6.16564e-05, 8.26492e-05, 9.12597e-05, 8.24936e-05, 6.20034e-05, 4.06795e-05, 2.55347e-05, 1.75263e-05, 1.50827e-05, 1.73701e-05, 2.53089e-05, 4.05503e-05, 6.20796e-05, 7.68583e-05, 7.68225e-05, 6.14728e-05, 4.13775e-05, 2.60689e-05, 1.77413e-05, 1.44374e-05, 1.43766e-05, 1.75885e-05, 2.59044e-05, 4.12989e-05, 6.15025e-05, 6.44773e-05, 5.74994e-05, 4.19809e-05, 2.76546e-05, 1.90951e-05, 1.54279e-05, 1.44949e-05, 1.53584e-05, 1.89736e-05, 2.75083e-05, 4.18481e-05, 5.74213e-05, 5.01703e-05, 4.18752e-05, 3.10545e-05, 2.30333e-05, 1.90133e-05, 1.76812e-05, 1.76712e-05, 1.89671e-05, 2.29162e-05, 3.08536e-05, 4.16559e-05, 5.00711e-05, 4.11303e-05, 3.62105e-05, 3.10493e-05, 2.74991e-05, 2.58147e-05, 2.53952e-05, 2.58421e-05, 2.74706e-05, 3.08843e-05, 3.59338e-05, 4.09044e-05, 4.31158e-05, 4.13046e-05, 4.20702e-05, 4.19578e-05, 4.10662e-05, 4.02676e-05, 4.03219e-05, 4.11503e-05, 4.19168e-05, 4.18474e-05, 4.10286e-05, 4.03121e-05, 4.04362e-05, 5.06155e-05, 5.78195e-05, 6.14178e-05, 6.16009e-05, 6.12021e-05, 6.17371e-05, 6.15333e-05, 5.77652e-05, 5.04226e-05, 4.32609e-05, 4.04148e-05, 4.34225e-05, 5.77798e-05, 7.72948e-05, 9.16478e-05, 9.68003e-05, 9.15823e-05, 7.72554e-05, 5.78285e-05, 3.94314e-05, 2.72343e-05, 2.31223e-05, 2.71654e-05, 3.93339e-05, 7.74068e-05, 0.000101898, 0.000116062, 0.000115909, 0.000101612, 7.72829e-05, 5.08421e-05, 3.06689e-05, 2.06925e-05, 2.06279e-05, 3.05115e-05, 5.07401e-05, 9.21128e-05, 0.000116377, 0.000125366, 0.00011595, 9.17101e-05, 6.15932e-05, 3.59107e-05, 2.06756e-05, 1.59389e-05, 2.0525e-05, 3.5726e-05, 6.16438e-05, 9.74988e-05, 0.000116313, 0.000116027, 9.69596e-05, 6.81692e-05, 4.11633e-05, 2.31286e-05, 1.49687e-05, 1.48825e-05, 2.29218e-05, 4.10572e-05, 6.84367e-05, 9.20802e-05, 0.000101741, 9.16746e-05, 6.81956e-05, 4.35156e-05, 2.58227e-05, 1.64668e-05, 1.36375e-05, 1.63075e-05, 2.56284e-05, 4.35347e-05, 6.85541e-05, 7.72705e-05, 7.7123e-05, 6.15996e-05, 4.13877e-05, 2.60284e-05, 1.76731e-05, 1.43428e-05, 1.42652e-05, 1.74952e-05, 2.58975e-05, 4.14622e-05, 6.18523e-05, 5.75279e-05, 5.07174e-05, 3.61735e-05, 2.36056e-05, 1.68891e-05, 1.45581e-05, 1.4109e-05, 1.44531e-05, 1.67507e-05, 2.35227e-05, 3.61985e-05, 5.07917e-05, 3.91872e-05, 3.08038e-05, 2.11666e-05, 1.55788e-05, 1.41619e-05, 1.45681e-05, 1.45369e-05, 1.40857e-05, 1.54883e-05, 2.10848e-05, 3.07489e-05, 3.91687e-05, 2.72264e-05, 2.10397e-05, 1.65208e-05, 1.55025e-05, 1.6772e-05, 1.76936e-05, 1.67648e-05, 1.54707e-05, 1.645e-05, 2.09366e-05, 2.71418e-05, 3.02326e-05, 2.32956e-05, 2.1059e-05, 2.10776e-05, 2.33751e-05, 2.58035e-05, 2.58309e-05, 2.34196e-05, 2.10754e-05, 2.09872e-05, 2.31931e-05, 2.55736e-05, 2.56236e-05, 2.73827e-05, 3.08702e-05, 3.60198e-05, 4.10563e-05, 4.32377e-05, 4.11373e-05, 3.60995e-05, 3.08798e-05, 2.73288e-05, 2.56661e-05, 2.5263e-05, 2.57218e-05, 3.94508e-05, 5.08438e-05, 6.14704e-05, 6.79e-05, 6.79318e-05, 6.15421e-05, 5.09093e-05, 3.94843e-05, 3.04645e-05, 2.56761e-05, 2.56761e-05, 3.04564e-05, 6.52222e-05, 9.31688e-05, 0.000111636, 0.000111425, 9.2753e-05, 6.49667e-05, 3.94435e-05, 2.26826e-05, 1.51484e-05, 1.51041e-05, 2.2589e-05, 3.94452e-05, 9.33948e-05, 0.000124096, 0.000136044, 0.000123469, 9.27346e-05, 5.77223e-05, 3.07242e-05, 1.61471e-05, 1.19035e-05, 1.60543e-05, 3.06628e-05, 5.79738e-05, 0.000112212, 0.000136339, 0.000135894, 0.000111322, 7.48849e-05, 4.1779e-05, 2.07744e-05, 1.1882e-05, 1.18255e-05, 2.06603e-05, 4.18626e-05, 7.5471e-05, 0.000112074, 0.000123758, 0.000111372, 8.21087e-05, 5.04538e-05, 2.72637e-05, 1.50915e-05, 1.15391e-05, 1.49784e-05, 2.71819e-05, 5.07176e-05, 8.28354e-05, 9.307e-05, 9.27877e-05, 7.49227e-05, 5.05485e-05, 3.05949e-05, 1.87884e-05, 1.37574e-05, 1.36897e-05, 1.86474e-05, 3.05757e-05, 5.08626e-05, 7.54648e-05, 6.48895e-05, 5.76686e-05, 4.19476e-05, 2.75511e-05, 1.89862e-05, 1.53073e-05, 1.43451e-05, 1.51961e-05, 1.88645e-05, 2.75582e-05, 4.21444e-05, 5.78959e-05, 3.92476e-05, 3.07863e-05, 2.11113e-05, 1.55225e-05, 1.41105e-05, 1.45101e-05, 1.44644e-05, 1.4012e-05, 1.54413e-05, 2.10959e-05, 3.08332e-05, 3.92814e-05, 2.26021e-05, 1.63542e-05, 1.229e-05, 1.20133e-05, 1.40781e-05, 1.53785e-05, 1.4039e-05, 1.19589e-05, 1.22382e-05, 1.63119e-05, 2.25774e-05, 2.5808e-05, 1.52094e-05, 1.21802e-05, 1.22407e-05, 1.5401e-05, 1.8899e-05, 1.89012e-05, 1.5399e-05, 1.2223e-05, 1.21438e-05, 1.51644e-05, 1.85676e-05, 1.85896e-05, 1.52398e-05, 1.63343e-05, 2.09774e-05, 2.73458e-05, 3.04931e-05, 2.7377e-05, 2.10069e-05, 1.63399e-05, 1.52233e-05, 1.64325e-05, 1.73414e-05, 1.64524e-05, 2.27119e-05, 3.08084e-05, 4.1839e-05, 5.03939e-05, 5.03993e-05, 4.18553e-05, 3.08334e-05, 2.27394e-05, 1.87217e-05, 1.74032e-05, 1.7395e-05, 1.87e-05, 3.9473e-05, 5.78585e-05, 7.50136e-05, 8.21248e-05, 7.48959e-05, 5.77519e-05, 3.94655e-05, 2.60477e-05, 1.872e-05, 1.64648e-05, 1.86663e-05, 2.59866e-05, 8.274e-05, 0.000111934, 0.000123649, 0.000111273, 8.19958e-05, 5.03803e-05, 2.72889e-05, 1.52058e-05, 1.17051e-05, 1.51698e-05, 2.73422e-05, 5.07587e-05, 0.000112183, 0.000136224, 0.000135711, 0.00011111, 7.47221e-05, 4.17285e-05, 2.08256e-05, 1.19863e-05, 1.19623e-05, 2.08233e-05, 4.20087e-05, 7.55399e-05, 0.000124105, 0.000135882, 0.000123172, 9.24287e-05, 5.75187e-05, 3.06388e-05, 1.61334e-05, 1.19226e-05, 1.60989e-05, 3.07427e-05, 5.80895e-05, 9.35031e-05, 0.000111651, 0.000111226, 9.24521e-05, 6.47158e-05, 3.92962e-05, 2.26038e-05, 1.50962e-05, 1.50656e-05, 2.25841e-05, 3.95216e-05, 6.53944e-05, 9.33412e-05, 8.20948e-05, 7.47278e-05, 5.75916e-05, 3.93971e-05, 2.60478e-05, 1.87349e-05, 1.64714e-05, 1.86803e-05, 2.60535e-05, 3.96345e-05, 5.80816e-05, 7.51661e-05, 5.02855e-05, 4.17464e-05, 3.08259e-05, 2.28389e-05, 1.88796e-05, 1.75667e-05, 1.75349e-05, 1.88242e-05, 2.28453e-05, 3.09536e-05, 4.19441e-05, 5.03856e-05, 2.71912e-05, 2.09263e-05, 1.63945e-05, 1.54023e-05, 1.66987e-05, 1.76173e-05, 1.66629e-05, 1.53631e-05, 1.63793e-05, 2.09409e-05, 2.72145e-05, 3.02911e-05, 1.51842e-05, 1.21341e-05, 1.21909e-05, 1.536e-05, 1.88659e-05, 1.88591e-05, 1.53418e-05, 1.21654e-05, 1.21027e-05, 1.51522e-05, 1.85791e-05, 1.85939e-05, 1.17376e-05, 1.21133e-05, 1.6317e-05, 2.27416e-05, 2.60473e-05, 2.27428e-05, 1.63092e-05, 1.20924e-05, 1.17064e-05, 1.3702e-05, 1.50036e-05, 1.37289e-05, 1.5212e-05, 2.09085e-05, 3.07643e-05, 3.93634e-05, 3.9352e-05, 3.0739e-05, 2.08845e-05, 1.51956e-05, 1.37543e-05, 1.41772e-05, 1.41809e-05, 1.37643e-05, 2.73363e-05, 4.19078e-05, 5.77671e-05, 6.48861e-05, 5.76386e-05, 4.17696e-05, 2.7273e-05, 1.8719e-05, 1.51138e-05, 1.42242e-05, 1.50954e-05, 1.87175e-05, 5.06903e-05, 7.52521e-05, 9.29473e-05, 9.27405e-05, 7.48309e-05, 5.03866e-05, 3.04557e-05, 1.87461e-05, 1.38084e-05, 1.37863e-05, 1.87195e-05, 3.05405e-05, 9.74791e-05, 0.000116125, 0.000115674, 9.64934e-05, 6.77646e-05, 4.10003e-05, 2.32287e-05, 1.52361e-05, 1.52469e-05, 2.33341e-05, 4.14093e-05, 6.86136e-05, 0.00011628, 0.000124984, 0.000115344, 9.1085e-05, 6.11957e-05, 3.58356e-05, 2.08342e-05, 1.62223e-05, 2.08966e-05, 3.61378e-05, 6.19845e-05, 9.22705e-05, 0.000115881, 0.000115394, 0.000100902, 7.66513e-05, 5.04919e-05, 3.06011e-05, 2.07947e-05, 2.08239e-05, 3.07759e-05, 5.10382e-05, 7.76673e-05, 0.000102004, 9.66212e-05, 9.1107e-05, 7.66648e-05, 5.73593e-05, 3.91909e-05, 2.7176e-05, 2.31618e-05, 2.72597e-05, 3.9482e-05, 5.79912e-05, 7.75237e-05, 9.17513e-05, 6.77797e-05, 6.12023e-05, 5.05485e-05, 3.92491e-05, 3.03813e-05, 2.56868e-05, 2.57194e-05, 3.05122e-05, 3.95444e-05, 5.1005e-05, 6.16549e-05, 6.79727e-05, 4.09564e-05, 3.58595e-05, 3.07001e-05, 2.72819e-05, 2.57373e-05, 2.53745e-05, 2.57973e-05, 2.74039e-05, 3.08744e-05, 3.60453e-05, 4.10811e-05, 4.31813e-05, 2.31768e-05, 2.08653e-05, 2.08896e-05, 2.32549e-05, 2.57491e-05, 2.57757e-05, 2.33163e-05, 2.09519e-05, 2.09061e-05, 2.31925e-05, 2.56188e-05, 2.56165e-05, 1.51848e-05, 1.62293e-05, 2.086e-05, 2.72552e-05, 3.04246e-05, 2.72879e-05, 2.08881e-05, 1.62269e-05, 1.51542e-05, 1.64262e-05, 1.73724e-05, 1.64586e-05, 1.51795e-05, 2.08373e-05, 3.06754e-05, 3.92841e-05, 3.92821e-05, 3.06618e-05, 2.0805e-05, 1.51334e-05, 1.37253e-05, 1.41862e-05, 1.42066e-05, 1.37711e-05, 2.32254e-05, 3.59556e-05, 5.07331e-05, 5.75851e-05, 5.06292e-05, 3.58222e-05, 2.31241e-05, 1.64402e-05, 1.42145e-05, 1.38656e-05, 1.42416e-05, 1.64995e-05, 4.1257e-05, 6.16825e-05, 7.72211e-05, 7.70566e-05, 6.1331e-05, 4.09606e-05, 2.56393e-05, 1.74452e-05, 1.42756e-05, 1.42815e-05, 1.74886e-05, 2.57862e-05, 6.84849e-05, 9.19716e-05, 0.000101609, 9.14228e-05, 6.78223e-05, 4.319e-05, 2.56978e-05, 1.65496e-05, 1.38525e-05, 1.65716e-05, 2.5842e-05, 4.36033e-05, 9.88706e-05, 0.000103874, 9.81678e-05, 8.28242e-05, 6.15387e-05, 4.09762e-05, 2.7294e-05, 2.27644e-05, 2.74382e-05, 4.13846e-05, 6.23133e-05, 8.37993e-05, 0.000103912, 0.000103508, 9.67468e-05, 8.19811e-05, 6.11222e-05, 4.16585e-05, 3.07446e-05, 3.08408e-05, 4.20176e-05, 6.18682e-05, 8.30477e-05, 9.77339e-05, 9.82087e-05, 9.67515e-05, 9.0646e-05, 7.65218e-05, 5.73244e-05, 4.16647e-05, 3.5977e-05, 4.19404e-05, 5.79558e-05, 7.74944e-05, 9.16853e-05, 9.74244e-05, 8.28781e-05, 8.19966e-05, 7.65239e-05, 6.44104e-05, 5.01449e-05, 4.10289e-05, 4.11812e-05, 5.06168e-05, 6.51726e-05, 7.73944e-05, 8.26822e-05, 8.31357e-05, 6.15942e-05, 6.11273e-05, 5.72985e-05, 5.01091e-05, 4.31518e-05, 4.04236e-05, 4.3452e-05, 5.06437e-05, 5.79142e-05, 6.16383e-05, 6.18734e-05, 6.13421e-05, 4.09818e-05, 4.15985e-05, 4.15572e-05, 4.09014e-05, 4.0327e-05, 4.04638e-05, 4.12472e-05, 4.19588e-05, 4.1913e-05, 4.11548e-05, 4.04129e-05, 4.03618e-05, 2.72111e-05, 3.05815e-05, 3.57446e-05, 4.09086e-05, 4.32044e-05, 4.11038e-05, 3.59997e-05, 3.07639e-05, 2.72845e-05, 2.572e-05, 2.53444e-05, 2.57109e-05, 2.2589e-05, 3.05561e-05, 4.15504e-05, 5.01698e-05, 5.02356e-05, 4.16823e-05, 3.06455e-05, 2.25988e-05, 1.86818e-05, 1.74636e-05, 1.7485e-05, 1.87168e-05, 2.71773e-05, 4.16117e-05, 5.74306e-05, 6.46101e-05, 5.74248e-05, 4.15777e-05, 2.71114e-05, 1.86239e-05, 1.51043e-05, 1.42819e-05, 1.51561e-05, 1.86992e-05, 4.10803e-05, 6.14228e-05, 7.69694e-05, 7.68745e-05, 6.12028e-05, 4.0856e-05, 2.55604e-05, 1.74061e-05, 1.42788e-05, 1.43076e-05, 1.74957e-05, 2.57203e-05, 6.20656e-05, 8.27079e-05, 9.13211e-05, 8.23376e-05, 6.15819e-05, 4.02118e-05, 2.52499e-05, 1.74787e-05, 1.52014e-05, 1.75472e-05, 2.54363e-05, 4.05728e-05, 8.36866e-05, 9.75719e-05, 9.72608e-05, 8.29743e-05, 6.11935e-05, 4.02683e-05, 2.57052e-05, 1.87937e-05, 1.88347e-05, 2.58778e-05, 4.06853e-05, 6.18802e-05, 9.11601e-05, 9.09426e-05, 8.97729e-05, 8.27763e-05, 6.76532e-05, 5.02583e-05, 3.93943e-05, 3.95204e-05, 5.06554e-05, 6.83026e-05, 8.35189e-05, 9.03547e-05, 9.09245e-05, 9.34901e-05, 9.667e-05, 9.08764e-05, 7.44473e-05, 5.75357e-05, 5.08546e-05, 5.79332e-05, 7.52097e-05, 9.18112e-05, 9.74706e-05, 9.39301e-05, 8.97869e-05, 9.66823e-05, 0.000100631, 9.19722e-05, 7.44656e-05, 6.14208e-05, 6.16916e-05, 7.5242e-05, 9.30476e-05, 0.000101631, 9.7321e-05, 8.99961e-05, 8.28777e-05, 9.09063e-05, 9.1938e-05, 8.14685e-05, 6.77245e-05, 6.19228e-05, 6.8336e-05, 8.25139e-05, 9.30233e-05, 9.16709e-05, 8.32304e-05, 7.86475e-05, 6.77727e-05, 7.43911e-05, 7.42748e-05, 6.75477e-05, 6.13202e-05, 6.16339e-05, 6.83709e-05, 7.52609e-05, 7.51536e-05, 6.81695e-05, 6.15479e-05, 6.14371e-05, 5.02493e-05, 5.72736e-05, 6.09801e-05, 6.14777e-05, 6.13608e-05, 6.19933e-05, 6.1736e-05, 5.7913e-05, 5.05991e-05, 4.34769e-05, 4.05652e-05, 4.33561e-05, 3.91816e-05, 5.03478e-05, 6.09751e-05, 6.7602e-05, 6.78175e-05, 6.14664e-05, 5.08095e-05, 3.94358e-05, 3.05471e-05, 2.58674e-05, 2.58591e-05, 3.04706e-05, 3.9116e-05, 5.72121e-05, 7.43156e-05, 8.15964e-05, 7.45406e-05, 5.74702e-05, 3.92512e-05, 2.59651e-05, 1.87897e-05, 1.66323e-05, 1.88148e-05, 2.59517e-05, 5.0137e-05, 7.44282e-05, 9.21484e-05, 9.21712e-05, 7.44555e-05, 5.01106e-05, 3.02757e-05, 1.86899e-05, 1.38632e-05, 1.38928e-05, 1.87636e-05, 3.03505e-05, 6.78448e-05, 9.12319e-05, 0.000101032, 9.10847e-05, 6.76189e-05, 4.30339e-05, 2.55919e-05, 1.65129e-05, 1.38705e-05, 1.65889e-05, 2.57461e-05, 4.32515e-05, 8.32783e-05, 9.72226e-05, 9.70697e-05, 8.29063e-05, 6.11548e-05, 4.02162e-05, 2.56549e-05, 1.87621e-05, 1.88117e-05, 2.58205e-05, 4.0521e-05, 6.15628e-05, 9.03043e-05, 9.39006e-05, 8.99368e-05, 7.84719e-05, 6.11747e-05, 4.31503e-05, 3.04273e-05, 2.60732e-05, 3.05863e-05, 4.35031e-05, 6.17045e-05, 7.90349e-05 - }; + std::vector> wfcr_another_spin_2 + = {{-0.000850659, 0.00221376}, {-0.000824667, 0.00233258}, {-0.00025408, 0.00206122}, + {0.000478631, 0.00162836}, {0.000857409, 0.00122164}, {0.000690617, 0.000865747}, + {0.000252532, 0.000500086}, {-1.97358e-05, 0.000150978}, {3.69117e-05, -3.74248e-06}, + {0.000177712, 0.000233336}, {4.52095e-05, 0.00087035}, {-0.000415234, 0.00165232}, + {5.94142e-05, 0.00267949}, {0.00105166, 0.00268062}, {0.00175924, 0.00262548}, + {0.00167774, 0.00273249}, {0.000846263, 0.00287114}, {-0.0001803, 0.00269879}, + {-0.000812061, 0.00205795}, {-0.000897011, 0.00122219}, {-0.000744479, 0.000720935}, + {-0.000710161, 0.000892154}, {-0.000785981, 0.00158813}, {-0.00061953, 0.00231689}, + {0.0014205, 0.00323172}, {0.00245194, 0.00326343}, {0.00254339, 0.00338285}, + {0.00164597, 0.00365304}, {0.000352848, 0.00378036}, {-0.000612417, 0.00341444}, + {-0.0009668, 0.00255828}, {-0.000968044, 0.00166251}, {-0.000992893, 0.00128342}, + {-0.00105907, 0.00161763}, {-0.000796136, 0.00235437}, {0.000102526, 0.00297995}, + {0.00151406, 0.00372578}, {0.00180885, 0.00385756}, {0.00133698, 0.00394148}, + {0.000458959, 0.00402696}, {-0.000265637, 0.00393919}, {-0.00055238, 0.00347791}, + {-0.000558492, 0.00269481}, {-0.000606658, 0.00195161}, {-0.000774242, 0.00167125}, + {-0.000782284, 0.00200075}, {-0.000304306, 0.00269993}, {0.000621885, 0.00335765}, + {-0.000255288, 0.00377188}, {-0.000578666, 0.00396859}, {-0.000801606, 0.00387581}, + {-0.0007455, 0.00376145}, {-0.000428125, 0.00363368}, {-5.61308e-05, 0.00326119}, + {0.000151935, 0.00250074}, {0.000143064, 0.00157857}, {4.12433e-05, 0.00100732}, + {-7.37259e-06, 0.00118311}, {7.50117e-06, 0.00203634}, {-3.97326e-05, 0.00307286}, + {-0.00262995, 0.00309697}, {-0.00275212, 0.00329879}, {-0.00201295, 0.00311649}, + {-0.000892036, 0.00305148}, {9.92763e-05, 0.00304081}, {0.000765938, 0.00255441}, + {0.00121087, 0.00127004}, {0.00152584, -0.000387784}, {0.00155589, -0.00144191}, + {0.0010114, -0.00115733}, {-0.000172708, 0.000309335}, {-0.00160859, 0.00203217}, + {-0.00361979, 0.00187857}, {-0.00293702, 0.0020949}, {-0.00142119, 0.00205592}, + {-8.49879e-06, 0.00220368}, {0.000950259, 0.00208594}, {0.00175599, 0.000889985}, + {0.00272285, -0.00149533}, {0.00356402, -0.00401823}, {0.00349632, -0.00515843}, + {0.00200782, -0.00417881}, {-0.000474875, -0.00175451}, {-0.00272874, 0.000620624}, + {-0.00238927, 0.000700564}, {-0.00121046, 0.00101474}, {7.99908e-05, 0.00125414}, + {0.000848624, 0.00143781}, {0.00148146, 0.000696302}, {0.00268859, -0.0016751}, + {0.00443425, -0.00512418}, {0.00565872, -0.00790653}, {0.00515424, -0.00837321}, + {0.0027683, -0.00631599}, {-0.000267813, -0.00310631}, {-0.00226091, -0.000507626}, + {-5.16118e-05, 0.000139308}, {0.00064619, 0.000615711}, {0.000801952, 0.00101293}, + {0.000697033, 0.000835273}, {0.00136019, -0.000850858}, {0.00329127, -0.00418559}, + {0.00564976, -0.00788611}, {0.00680305, -0.00997969}, {0.00576458, -0.00931341}, + {0.00313569, -0.00645509}, {0.000621442, -0.00315179}, {-0.000426931, -0.000880551}, + {0.00141297, 0.000348494}, {0.000943319, 0.000937462}, {-2.78181e-08, 0.00120799}, + {-0.000355539, 0.000441123}, {0.000843933, -0.00190748}, {0.00334012, -0.00531184}, + {0.00565542, -0.00821328}, {0.00629755, -0.00908454}, {0.00503133, -0.00758886}, + {0.00300479, -0.00477347}, {0.00163284, -0.00214256}, {0.00134622, -0.000504425}, + {0.00104173, 0.00101466}, {-0.000272058, 0.00155855}, {-0.00137594, 0.00151119}, + {-0.00120731, 0.000368374}, {0.000486366, -0.00186183}, {0.00278219, -0.00436307}, + {0.00429818, -0.00594823}, {0.0043304, -0.00589442}, {0.00333931, -0.00443482}, + {0.00235727, -0.00247746}, {0.00194516, -0.000844953}, {0.00174431, 0.00024405}, + {-0.000282354, 0.00169861}, {-0.00135553, 0.00204457}, {-0.00167071, 0.00175434}, + {-0.000877419, 0.000760329}, {0.000573651, -0.000631027}, {0.00178955, -0.00186146}, + {0.00217847, -0.00245592}, {0.00187432, -0.00229522}, {0.00146274, -0.00160787}, + {0.00132002, -0.000723407}, {0.00124627, 0.000161286}, {0.000757131, 0.000994601}, + {-0.000411389, 0.002483}, {0.000664762, 0.00256688}, {0.00147098, 0.00225153}, + {0.00160136, 0.00163982}, {0.0011106, 0.00093562}, {0.000409698, 0.00034921}, + {-0.000123655, 1.93428e-05}, {-0.000436175, -5.55788e-06}, {-0.000723632, 0.000259591}, + {-0.00109888, 0.000762119}, {-0.00137175, 0.00140734}, {-0.00118961, 0.00204395}, + {0.00181892, 0.00296487}, {0.00312759, 0.00289344}, {0.00315861, 0.0026423}, + {0.00193264, 0.00233801}, {0.00025056, 0.00195006}, {-0.000993451, 0.00141113}, + {-0.00149501, 0.000806377}, {-0.00159212, 0.000409872}, {-0.00170222, 0.000499137}, + {-0.00176687, 0.0011095}, {-0.00129634, 0.00196539}, {2.67417e-05, 0.00266619}, + {0.0034084, 0.00333155}, {0.0038427, 0.00313401}, {0.00269018, 0.00290156}, + {0.000672062, 0.00269719}, {-0.00105739, 0.00236236}, {-0.00183363, 0.00177282}, + {-0.00186106, 0.00108585}, {-0.00174584, 0.000698806}, {-0.00171999, 0.000929376}, + {-0.00137804, 0.00171646}, {-0.000198099, 0.00263587}, {0.0017048, 0.00322729}, + {0.00268187, 0.00333104}, {0.00199517, 0.00308239}, {0.000412822, 0.00283221}, + {-0.00110048, 0.00265381}, {-0.0017948, 0.00239084}, {-0.00168035, 0.00190336}, + {-0.00132851, 0.00131573}, {-0.00117801, 0.00098954}, {-0.0010729, 0.0012226}, + {-0.00050065, 0.00195873}, {0.000729861, 0.00279654}, {0.00209266, 0.00330113}, + {2.19272e-05, 0.00277353}, {-0.000957074, 0.00256523}, {-0.00174148, 0.00235975}, + {-0.00187231, 0.00231918}, {-0.00139096, 0.00225199}, {-0.00075164, 0.00186676}, + {-0.000374476, 0.00115971}, {-0.00028408, 0.000522671}, {-0.00017942, 0.000435763}, + {0.000160266, 0.00102929}, {0.000567512, 0.00194182}, {0.000607946, 0.00261943}, + {-0.0024215, 0.00177225}, {-0.00263682, 0.00167568}, {-0.00209535, 0.00164535}, + {-0.00114102, 0.00184997}, {-0.000248941, 0.00187832}, {0.000321309, 0.0012058}, + {0.000615769, -0.000147999}, {0.00077493, -0.00146392}, {0.000776697, -0.00187237}, + {0.000421758, -0.00110328}, {-0.000414171, 0.000280854}, {-0.00152766, 0.00139216}, + {-0.00274443, 0.000749227}, {-0.00197066, 0.000793853}, {-0.000775567, 0.00100805}, + {0.000155336, 0.0013392}, {0.000657256, 0.00104115}, {0.00104563, -0.000468441}, + {0.00158167, -0.00278986}, {0.0020371, -0.00464365}, {0.00183998, -0.0048709}, + {0.000677836, -0.00340594}, {-0.0010533, -0.00129872}, {-0.00243964, 0.000223927}, + {-0.00104825, 0.000168518}, {-9.99667e-05, 0.000343194}, {0.000542457, 0.000711696}, + {0.000631678, 0.000824043}, {0.000711702, -0.000259321}, {0.0014042, -0.00280632}, + {0.00259882, -0.00578326}, {0.00340756, -0.007483}, {0.0029706, -0.0068919}, + {0.00132982, -0.00451958}, {-0.000507229, -0.00191114}, {-0.00140519, -0.000323743}, + {0.000775353, 0.000221899}, {0.000887833, 0.000489889}, {0.000405415, 0.000801136}, + {-0.000135239, 0.00037554}, {0.000194675, -0.00156994}, {0.00168329, -0.00471438}, + {0.00352434, -0.00751854}, {0.00444058, -0.00833396}, {0.00380996, -0.00674179}, + {0.00220887, -0.00386533}, {0.000850235, -0.00137242}, {0.000474651, -0.0001314}, + {0.000988472, 0.000741314}, {0.000104752, 0.00105288}, {-0.000928353, 0.00112477}, + {-0.00119781, 0.000146563}, {-8.73e-05, -0.00221071}, {0.00201349, -0.00511534}, + {0.00390545, -0.00699787}, {0.00453751, -0.0067628}, {0.00383244, -0.0046732}, + {0.00261536, -0.00208014}, {0.00175651, -0.000269502}, {0.0013939, 0.000473638}, + {-0.000278078, 0.00139766}, {-0.00127652, 0.00169414}, {-0.0017797, 0.00150281}, + {-0.00123359, 0.000298155}, {0.000314453, -0.00178583}, {0.00212936, -0.00380278}, + {0.00330561, -0.00463548}, {0.00342906, -0.00387201}, {0.00278692, -0.00210491}, + {0.00197045, -0.000384446}, {0.00129739, 0.000639583}, {0.000627818, 0.00108077}, + {-0.00124773, 0.00197888}, {-0.00129968, 0.00219642}, {-0.000820366, 0.00186852}, + {6.2613e-05, 0.000856692}, {0.000997415, -0.000513844}, {0.0016175, -0.00159544}, + {0.0017559, -0.00187196}, {0.00149077, -0.00132339}, {0.00100921, -0.000377133}, + {0.000441083, 0.000486924}, {-0.000176634, 0.00109135}, {-0.000794482, 0.0015546}, + {0.000812117, 0.00215362}, {0.00217721, 0.00235972}, {0.00250808, 0.00206105}, + {0.00185072, 0.00130905}, {0.000847902, 0.000425322}, {0.000100345, -0.000204685}, + {-0.00032629, -0.00038357}, {-0.000784331, -0.000183488}, {-0.00148305, 0.00019328}, + {-0.00210058, 0.000617194}, {-0.00200482, 0.00109475}, {-0.000889417, 0.00164666}, + {0.00344363, 0.00240202}, {0.00412826, 0.00234947}, {0.0030609, 0.00205558}, + {0.00102515, 0.00158366}, {-0.000727613, 0.000985268}, {-0.00151951, 0.000361307}, + {-0.00163598, -0.000105598}, {-0.00175167, -0.000215457}, {-0.00204384, 0.00012061}, + {-0.00193658, 0.000797118}, {-0.000752743, 0.00155791}, {0.00139869, 0.0021397}, + {0.00429725, 0.00243424}, {0.00348847, 0.00225357}, {0.00123152, 0.00205561}, + {-0.00107539, 0.0017845}, {-0.00224766, 0.00130311}, {-0.00219818, 0.00063953}, + {-0.00173366, 8.04705e-05}, {-0.00154505, -1.44906e-05}, {-0.00145589, 0.00048098}, + {-0.000696593, 0.00132523}, {0.00109584, 0.00208225}, {0.00321415, 0.00244832}, + {0.00244031, 0.0022015}, {0.000620396, 0.00202706}, {-0.00149422, 0.00192451}, + {-0.00265921, 0.0017784}, {-0.00248193, 0.0014026}, {-0.00160714, 0.000815412}, + {-0.000946948, 0.000317101}, {-0.000758728, 0.000266482}, {-0.000495671, 0.000759492}, + {0.000449455, 0.00152111}, {0.00190388, 0.00212198}, {0.00288065, 0.0023187}, + {-0.000688485, 0.0016919}, {-0.00217813, 0.00155269}, {-0.00290026, 0.00151515}, + {-0.0024853, 0.00150412}, {-0.00140646, 0.00128748}, {-0.000490928, 0.000773425}, + {-0.000160101, 0.000184896}, {-0.000140597, -8.53298e-05}, {0.000100468, 0.000185019}, + {0.000674583, 0.000838289}, {0.00106589, 0.00146436}, {0.00063142, 0.00174561}, + {-0.00260573, 0.0010079}, {-0.00281905, 0.000850244}, {-0.00211981, 0.000891521}, + {-0.00101254, 0.0010516}, {-0.000171228, 0.000910086}, {0.000118564, 0.000196445}, + {9.63571e-05, -0.000829039}, {0.000145403, -0.00151408}, {0.00031269, -0.00136667}, + {0.000236222, -0.000495381}, {-0.000440811, 0.000488826}, {-0.00159713, 0.00101378}, + {-0.00209376, 0.00037292}, {-0.00132175, 0.000178588}, {-0.000357631, 0.000357329}, + {0.000135574, 0.000621458}, {6.97704e-05, 0.0002626}, {-8.86416e-05, -0.0010209}, + {0.000107486, -0.00264534}, {0.000561535, -0.00353785}, {0.000703078, -0.00306147}, + {8.94859e-05, -0.00158364}, {-0.00107076, -0.00013794}, {-0.00201802, 0.000490441}, + {-0.000230884, 3.1371e-05}, {0.000383229, -0.000104351}, {0.000428404, 0.000237989}, + {-0.000165206, 0.000391835}, {-0.000709238, -0.000511882}, {-0.00047087, -0.00248403}, + {0.000543768, -0.00442134}, {0.0015335, -0.00497688}, {0.00164608, -0.00374876}, + {0.000793595, -0.00164089}, {-0.000266218, -4.59147e-05}, {-0.000662338, 0.000355773}, + {0.000838677, 0.000108863}, {0.000500847, 0.000184552}, {-0.000462377, 0.000611161}, + {-0.00142202, 0.000407798}, {-0.0014377, -0.00112731}, {-0.00019289, -0.00347252}, + {0.00160799, -0.00513285}, {0.00279916, -0.00490715}, {0.00274911, -0.00296621}, + {0.00184213, -0.000696689}, {0.00100447, 0.00053703}, {0.000772938, 0.000514692}, + {0.000124405, 0.000544474}, {-0.000809288, 0.000897244}, {-0.00179564, 0.00124025}, + {-0.00206921, 0.000585713}, {-0.00109742, -0.00130986}, {0.000817474, -0.00346153}, + {0.00266122, -0.00439688}, {0.0034805, -0.00344643}, {0.00309768, -0.00134185}, + {0.00211648, 0.000457057}, {0.00125909, 0.0010494}, {0.000713378, 0.000753483}, + {-0.00124118, 0.00114723}, {-0.00166679, 0.0016648}, {-0.00172845, 0.00177442}, + {-0.00111678, 0.000815018}, {0.000181631, -0.00098021}, {0.00170334, -0.00252241}, + {0.00274777, -0.00276665}, {0.00285775, -0.00162362}, {0.00211934, -7.41674e-06}, + {0.00102324, 0.00101721}, {4.04591e-05, 0.00115183}, {-0.000681188, 0.000968203}, + {-0.00124712, 0.00172152}, {-0.000503703, 0.00217425}, {0.000211257, 0.00202485}, + {0.000767816, 0.00105073}, {0.00119491, -0.00030522}, {0.00149454, -0.00123516}, + {0.00152478, -0.00122728}, {0.00109947, -0.000465569}, {0.000216125, 0.000401356}, + {-0.000826921, 0.000878568}, {-0.00157212, 0.00102008}, {-0.00170337, 0.00123249}, + {0.00184474, 0.001294}, {0.00283925, 0.00159354}, {0.00251948, 0.00129289}, + {0.00143807, 0.000500437}, {0.0004614, -0.000345234}, {-3.9144e-05, -0.000819144}, + {-0.000396374, -0.000822161}, {-0.00109119, -0.000579709}, {-0.00204712, -0.000355138}, + {-0.00254108, -0.000180772}, {-0.00186409, 0.000122903}, {-8.78541e-05, 0.000672901}, + {0.00383137, 0.0011233}, {0.00350436, 0.00127091}, {0.00169583, 0.00120797}, + {-0.000300211, 0.000879763}, {-0.0013451, 0.000336338}, {-0.00136446, -0.000243029}, + {-0.0011625, -0.000631526}, {-0.00139716, -0.000687156}, {-0.00179999, -0.000428682}, + {-0.00145363, 3.06954e-06}, {0.000152943, 0.00045451}, {0.00238365, 0.000838344}, + {0.00342454, 0.00105368}, {0.00153576, 0.00130977}, {-0.000938952, 0.00152474}, + {-0.00244623, 0.00141073}, {-0.00236447, 0.000855341}, {-0.00138811, 9.4757e-05}, + {-0.000669021, -0.000436948}, {-0.000617606, -0.000454379}, {-0.000575121, -4.02195e-05}, + {0.000337306, 0.000464277}, {0.00209292, 0.000781006}, {0.00354879, 0.000912453}, + {0.000717081, 0.0011431}, {-0.00162143, 0.00147068}, {-0.00320893, 0.00170016}, + {-0.00312581, 0.00153008}, {-0.00175628, 0.000924823}, {-0.000323504, 0.000205368}, + {0.000294365, -0.000195537}, {0.000293711, -8.92853e-05}, {0.000530904, 0.00033686}, + {0.00143664, 0.000728738}, {0.00240627, 0.000900161}, {0.0023238, 0.000963991}, + {-0.00217873, 0.0011431}, {-0.00348199, 0.00126879}, {-0.0033341, 0.0012921}, + {-0.00193091, 0.00108826}, {-0.000318486, 0.000656389}, {0.000557299, 0.000175283}, + {0.000618021, -0.000107607}, {0.00051748, -6.10013e-05}, {0.000796446, 0.000239548}, + {0.00122393, 0.000594966}, {0.00101531, 0.000854539}, {-0.000287338, 0.00101178}, + {-0.00308394, 0.00078988}, {-0.00282581, 0.000545468}, {-0.00157908, 0.000472976}, + {-0.000254897, 0.000507047}, {0.000361377, 0.000400072}, {0.000251192, 2.70184e-05}, + {2.11797e-05, -0.000427374}, {0.000163505, -0.000602871}, {0.000493456, -0.000298751}, + {0.000345515, 0.000318715}, {-0.000671211, 0.000845599}, {-0.00213771, 0.000987026}, + {-0.00173982, 0.000138796}, {-0.000746363, -0.000276445}, {0.000115931, -7.95682e-05}, + {0.00015399, 0.000363158}, {-0.000466155, 0.000372438}, {-0.000940082, -0.000301362}, + {-0.000678774, -0.00116418}, {7.15204e-05, -0.00142417}, {0.000471506, -0.000764501}, + {-4.80198e-05, 0.000324541}, {-0.00117162, 0.000990485}, {-0.00195998, 0.000807742}, + {0.00010666, -0.000411199}, {0.000559929, -0.000534979}, {0.000124969, 0.000177635}, + {-0.00103976, 0.000812724}, {-0.00194435, 0.00045067}, {-0.0017105, -0.000837987}, + {-0.000442787, -0.00198498}, {0.000841294, -0.00193352}, {0.00116176, -0.000670932}, + {0.00047464, 0.000741505}, {-0.000358119, 0.00117588}, {-0.000477469, 0.000489336}, + {0.000557711, -0.000448939}, {2.14563e-05, 5.1863e-05}, {-0.00125621, 0.00111261}, + {-0.00244309, 0.00143189}, {-0.00247485, 0.000335699}, {-0.00108655, -0.00145352}, + {0.000859953, -0.00243917}, {0.0020968, -0.00176869}, {0.00203615, -5.04466e-05}, + {0.00118411, 0.00123797}, {0.000515273, 0.00116288}, {0.000465298, 0.000173111}, + {-0.000523707, 7.49832e-05}, {-0.00129984, 0.00109782}, {-0.00218758, 0.00201665}, + {-0.00238082, 0.00163377}, {-0.00134821, -8.95559e-05}, {0.000544868, -0.00188137}, + {0.00222292, -0.00231004}, {0.00275675, -0.00111937}, {0.00209201, 0.000561396}, + {0.00097507, 0.00133038}, {0.000184764, 0.000840209}, {-0.000155834, 3.24209e-05}, + {-0.001461, 0.000795685}, {-0.00141069, 0.00188087}, {-0.00124781, 0.00225242}, + {-0.000643685, 0.0012467}, {0.000498695, -0.000580925}, {0.0017598, -0.00189078}, + {0.00242939, -0.00176781}, {0.00206917, -0.000530854}, {0.000889091, 0.000638683}, + {-0.00042233, 0.000863347}, {-0.0012548, 0.000368828}, {-0.00149871, 0.000137595}, + {-0.000600014, 0.00125626}, {0.000440209, 0.0019785}, {0.00101314, 0.0018198}, + {0.00122965, 0.000677783}, {0.0013687, -0.00072864}, {0.00146561, -0.00146476}, + {0.00123002, -0.00118843}, {0.000388083, -0.000386593}, {-0.000907184, 0.000168035}, + {-0.00204432, 0.000181923}, {-0.00238166, 6.42756e-05}, {-0.00175623, 0.000404465}, + {0.00199557, 0.000263447}, {0.00247435, 0.000538173}, {0.00180618, 0.000268668}, + {0.000754365, -0.000364617}, {4.72554e-05, -0.000926542}, {-0.00026345, -0.00110933}, + {-0.000678776, -0.000972234}, {-0.00153395, -0.000817447}, {-0.00246206, -0.000843655}, + {-0.00263013, -0.000935596}, {-0.00154663, -0.000803184}, {0.000360354, -0.000324686}, + {0.00282182, -0.000181848}, {0.00180775, 0.000362276}, {7.00146e-06, 0.000732201}, + {-0.00126844, 0.000697148}, {-0.00137827, 0.000289147}, {-0.000824003, -0.000228184}, + {-0.000583612, -0.000584569}, {-0.00102006, -0.00070715}, {-0.00148547, -0.000717379}, + {-0.00102567, -0.000744172}, {0.000534689, -0.000757712}, {0.00224657, -0.000600078}, + {0.00133015, 0.000104398}, {-0.000843026, 0.00102754}, {-0.00256504, 0.00167641}, + {-0.00270407, 0.00161494}, {-0.00144203, 0.000923632}, {-6.64642e-06, 0.000113986}, + {0.000547055, -0.000334838}, {0.000289877, -0.000366624}, {0.000180305, -0.000299112}, + {0.000918105, -0.000412609}, {0.00208642, -0.000612658}, {0.00248263, -0.000519398}, + {-0.0015507, 0.00082568}, {-0.0034411, 0.00164957}, {-0.00371457, 0.0019666}, + {-0.00222345, 0.0015614}, {-8.25219e-05, 0.000764932}, {0.00134496, 0.00014339}, + {0.00160211, -6.27988e-06}, {0.00133526, 0.00013365}, {0.00140551, 0.0001729}, + {0.00186843, -2.94509e-05}, {0.00188474, -0.000196806}, {0.000660482, 6.84618e-05}, + {-0.00359162, 0.00117955}, {-0.00394685, 0.00137818}, {-0.0025872, 0.00121969}, + {-0.000423216, 0.000818799}, {0.00123655, 0.000463904}, {0.00176489, 0.000358813}, + {0.00156645, 0.000459431}, {0.00142312, 0.000557829}, {0.00155579, 0.000518922}, + {0.00138269, 0.00042539}, {0.000221478, 0.000483263}, {-0.00179918, 0.000785753}, + {-0.00331757, 0.000681835}, {-0.00220791, 0.000308687}, {-0.000444879, 0.000212208}, + {0.000802004, 0.000435465}, {0.00101794, 0.000751507}, {0.000661517, 0.000891308}, + {0.000525362, 0.000800192}, {0.000826648, 0.000669702}, {0.000968204, 0.000719148}, + {0.000214973, 0.000952495}, {-0.00141191, 0.00114009}, {-0.00296008, 0.00104732}, + {-0.00133102, -0.00031431}, {-1.68337e-05, -0.000566196}, {0.000632161, 9.58244e-05}, + {0.000179505, 0.00109555}, {-0.000724178, 0.00159265}, {-0.00104486, 0.0012803}, + {-0.00043404, 0.000641017}, {0.000415266, 0.000418534}, {0.000483599, 0.000846099}, + {-0.000509911, 0.00141033}, {-0.00176972, 0.0013876}, {-0.00216637, 0.000610358}, + {0.000378534, -0.00093279}, {0.000695835, -0.000328522}, {-0.000191029, 0.00120703}, + {-0.00165606, 0.00235426}, {-0.0024236, 0.00215362}, {-0.00179325, 0.000914903}, + {-0.000322277, -0.000121804}, {0.000712069, -2.65834e-05}, {0.000537899, 0.00091902}, + {-0.000409751, 0.00155347}, {-0.000993859, 0.00105462}, {-0.000563175, -0.000187827}, + {0.00042485, -0.000618514}, {-0.000277729, 0.000935412}, {-0.00173402, 0.00267169}, + {-0.00280059, 0.00302497}, {-0.00248115, 0.0016338}, {-0.000904025, -0.000295759}, + {0.000754963, -0.00114656}, {0.00134079, -0.000423828}, {0.000739223, 0.00084546}, + {-0.000135323, 0.00122109}, {-0.000338016, 0.000335609}, {0.000142723, -0.000740402}, + {-0.000675449, 0.0003245}, {-0.00135006, 0.0021808}, {-0.00207717, 0.00321564}, + {-0.00200301, 0.00234654}, {-0.000837009, 0.000130504}, {0.000759899, -0.0017094}, + {0.00170088, -0.00187498}, {0.00142337, -0.000645998}, {0.000350221, 0.000518958}, + {-0.000572076, 0.000502558}, {-0.000785952, -0.00036669}, {-0.0005826, -0.00072658}, + {-0.00117033, 0.0010344}, {-0.000890367, 0.00239042}, {-0.000582166, 0.00238781}, + {5.51232e-05, 0.000768668}, {0.000968865, -0.00134512}, {0.00164892, -0.00244056}, + {0.00152962, -0.00196554}, {0.000524819, -0.000735357}, {-0.000830946, -2.62602e-05}, + {-0.00179096, -0.000286841}, {-0.00197987, -0.000759252}, {-0.00161236, -0.000370295}, + {9.67618e-07, 0.000931598}, {0.000995034, 0.00153071}, {0.00139055, 0.000986414}, + {0.00141007, -0.000436504}, {0.00130878, -0.00174739}, {0.00104571, -0.00210457}, + {0.000362801, -0.00153128}, {-0.000817367, -0.000790513}, {-0.00211045, -0.000571193}, + {-0.00284145, -0.000813133}, {-0.00255406, -0.000842124}, {-0.00139341, -0.000159969}, + {0.00145723, -0.00051303}, {0.00164934, -0.000286049}, {0.000923179, -0.000373457}, + {-1.50408e-05, -0.000625309}, {-0.000612659, -0.00074709}, {-0.000919935, -0.00060566}, + {-0.00136618, -0.000365436}, {-0.00213309, -0.000317819}, {-0.00280477, -0.000576078}, + {-0.00268961, -0.00094449}, {-0.0015172, -0.00109731}, {0.000187925, -0.000893071}, + {0.00129587, -0.000798419}, {0.000158906, 0.00020614}, {-0.00117883, 0.000975943}, + {-0.00170154, 0.00115875}, {-0.00124111, 0.000867268}, {-0.000550201, 0.000487262}, + {-0.000471313, 0.000266527}, {-0.00105364, 0.000102517}, {-0.00151183, -0.000268484}, + {-0.00106577, -0.000893038}, {0.000193081, -0.00144211}, {0.00129811, -0.0014609}, + {-0.000694365, 0.000111926}, {-0.00241287, 0.00150284}, {-0.00303923, 0.00224209}, + {-0.00209269, 0.00203013}, {-0.00032754, 0.00130537}, {0.000969676, 0.000745078}, + {0.00113847, 0.000600864}, {0.000634264, 0.000529758}, {0.000405736, 6.77302e-05}, + {0.000827027, -0.000762891}, {0.00128459, -0.00135793}, {0.000842255, -0.00107487}, + {-0.00314295, 0.00121727}, {-0.00398818, 0.00210678}, {-0.00298255, 0.00216122}, + {-0.000715846, 0.00155526}, {0.00143399, 0.000959614}, {0.00244066, 0.000853001}, + {0.00235477, 0.00107475}, {0.00196991, 0.0010571}, {0.00183944, 0.00048565}, + {0.001678, -0.000297241}, {0.000758259, -0.000573442}, {-0.00112358, 5.01733e-05}, + {-0.00410814, 0.00137059}, {-0.00330837, 0.00137364}, {-0.00115352, 0.00106281}, + {0.00107898, 0.000850138}, {0.0023451, 0.00103993}, {0.00255984, 0.00149989}, + {0.00236571, 0.00177804}, {0.00226054, 0.00155252}, {0.00201827, 0.000965194}, + {0.00100376, 0.000502181}, {-0.000956968, 0.000534863}, {-0.00308429, 0.000973054}, + {-0.0028829, 0.000436756}, {-0.00110773, 0.000167191}, {0.000651311, 0.000473464}, + {0.00146196, 0.00129255}, {0.00138454, 0.00213227}, {0.00117829, 0.0024759}, + {0.00135942, 0.00221233}, {0.00160184, 0.00168487}, {0.00110211, 0.0013266}, + {-0.000466811, 0.00125253}, {-0.00240698, 0.00120983}, {-0.00346005, 0.0009182}, + {-0.000632782, -0.000577207}, {0.000635365, -3.94854e-05}, {0.000771369, 0.0014756}, + {-3.73475e-05, 0.00296403}, {-0.000741049, 0.00343415}, {-0.000542003, 0.00276521}, + {0.000282592, 0.00173449}, {0.000678746, 0.00121102}, {-6.88672e-05, 0.00132984}, + {-0.00153606, 0.00146107}, {-0.00249666, 0.000976697}, {-0.0020841, 1.17346e-05}, + {0.000753646, -0.000548754}, {0.000617308, 0.00129148}, {-0.000695988, 0.0034929}, + {-0.00203612, 0.00442797}, {-0.00223574, 0.00347947}, {-0.00123133, 0.00157252}, + {-9.02653e-05, 0.000255751}, {7.34624e-05, 0.000248659}, {-0.000794045, 0.00088217}, + {-0.00166211, 0.00094888}, {-0.00148988, 6.42361e-05}, {-0.00031348, -0.000863744}, + {0.000456483, 0.000608535}, {-0.000604918, 0.00313614}, {-0.0020511, 0.00475253}, + {-0.00264988, 0.00410325}, {-0.0019219, 0.00164859}, {-0.000588009, -0.000746064}, + {0.000149688, -0.00151773}, {-0.000225379, -0.000745959}, {-0.00108693, 0.000154799}, + {-0.00135942, 2.18475e-05}, {-0.000650927, -0.000822873}, {0.000322916, -0.000945364}, + {-0.000607399, 0.00182776}, {-0.00131512, 0.0038504}, {-0.00175392, 0.00399247}, + {-0.00134636, 0.00187678}, {-0.000332892, -0.00105811}, {0.000423803, -0.00281499}, + {0.000261107, -0.00258398}, {-0.000646136, -0.00129865}, {-0.00146371, -0.000514196}, + {-0.00151269, -0.000779952}, {-0.000911194, -0.00115304}, {-0.000411512, -0.000322679}, + {-0.000867282, 0.00193138}, {-0.000486437, 0.00276038}, {-1.35831e-05, 0.00172419}, + {0.000544091, -0.000666089}, {0.00087913, -0.00279809}, {0.000624222, -0.00336212}, + {-0.000271843, -0.00244049}, {-0.00141094, -0.00125204}, {-0.0022103, -0.000853659}, + {-0.00233624, -0.00111864}, {-0.00191303, -0.00100408}, {-0.00133161, 0.000197752}, + {0.000194878, 0.000797578}, {0.00110693, 0.000825472}, {0.0013971, -0.000212241}, + {0.00116602, -0.00165516}, {0.000612288, -0.00249766}, {-0.000157435, -0.00227736}, + {-0.00111529, -0.00143563}, {-0.0021413, -0.000817017}, {-0.00291883, -0.000823905}, + {-0.00306116, -0.0010721}, {-0.00239347, -0.00087611}, {-0.00113639, -5.18101e-05}, + {0.00102125, -0.000807487}, {0.00105345, -0.000485227}, {0.000166927, -0.000213507}, + {-0.00094218, 1.24723e-05}, {-0.00166322, 0.000317754}, {-0.0019481, 0.000728524}, + {-0.0021791, 0.00105711}, {-0.00260779, 0.00102987}, {-0.00297418, 0.000546126}, + {-0.0027215, -0.000186904}, {-0.00160397, -0.00078721}, {-6.61755e-05, -0.000985953}, + {0.000329234, -0.000542752}, {-0.000719155, 0.000767918}, {-0.00174749, 0.00168844}, + {-0.00198482, 0.00193268}, {-0.00145141, 0.00181473}, {-0.000869482, 0.00178525}, + {-0.000890393, 0.00189163}, {-0.00142466, 0.00172581}, {-0.00174984, 0.000913185}, + {-0.00128338, -0.000368005}, {-0.000234127, -0.00139491}, {0.000537466, -0.00147333}, + {-0.00168221, 0.000800791}, {-0.00280472, 0.00215657}, {-0.00273725, 0.00261712}, + {-0.00145783, 0.00229386}, {0.000145804, 0.00192936}, {0.001059, 0.00204242}, + {0.000997031, 0.00236793}, {0.000521408, 0.00216271}, {0.000344534, 0.00106061}, + {0.000553847, -0.000408225}, {0.000548347, -0.00119428}, {-0.000255449, -0.0006658}, + {-0.003496, 0.00170968}, {-0.00344178, 0.00222347}, {-0.00187325, 0.00199334}, + {0.000319292, 0.00161855}, {0.00200026, 0.00177115}, {0.00264115, 0.00244428}, + {0.00254026, 0.00291995}, {0.00227272, 0.0024991}, {0.0019883, 0.00126046}, + {0.00129319, 7.35466e-05}, {-0.000174447, -0.000171995}, {-0.00209945, 0.000623649}, + {-0.00360734, 0.00133873}, {-0.0021703, 0.00118988}, {-5.05481e-05, 0.00113971}, + {0.00168386, 0.00162029}, {0.00255812, 0.00257233}, {0.002828, 0.00339162}, + {0.00292124, 0.00344}, {0.00282294, 0.00263503}, {0.00206942, 0.00154151}, + {0.000362512, 0.000871077}, {-0.00185049, 0.000872856}, {-0.00348659, 0.00119725}, + {-0.00194572, 0.000339146}, {-0.000244247, 0.000657071}, {0.000915061, 0.00177077}, + {0.00126201, 0.00321189}, {0.00131316, 0.00416785}, {0.00162389, 0.00412598}, + {0.00207051, 0.00326743}, {0.00192382, 0.00224869}, {0.000654039, 0.00159925}, + {-0.00135816, 0.00132319}, {-0.00296409, 0.00106805}, {-0.00316435, 0.000645663}, + {6.55117e-05, 0.000132232}, {0.000617656, 0.00185297}, {0.000100959, 0.00404774}, + {-0.000654945, 0.00534757}, {-0.000700874, 0.00498998}, {3.09265e-05, 0.00343747}, + {0.000653185, 0.00188692}, {0.000252531, 0.00116799}, {-0.00111913, 0.00110664}, + {-0.00240307, 0.000938406}, {-0.00250864, 0.000294188}, {-0.00135208, -0.000286716}, + {0.000868648, 0.00137508}, {-0.000104883, 0.00421796}, {-0.00161118, 0.00620505}, + {-0.00236684, 0.00592248}, {-0.00188737, 0.00362859}, {-0.000899015, 0.00100207}, + {-0.000546404, -0.000371853}, {-0.00120603, -0.000307679}, {-0.00210338, 0.000113344}, + {-0.0021182, -0.000103027}, {-0.000964516, -0.000716191}, {0.000450775, -0.000497793}, + {0.000207818, 0.00318285}, {-0.00130497, 0.00579402}, {-0.00243518, 0.00617887}, + {-0.00240675, 0.0038693}, {-0.00157165, 0.000398834}, {-0.00097118, -0.00202301}, + {-0.00123945, -0.00238234}, {-0.00196024, -0.00149752}, {-0.00212286, -0.000885259}, + {-0.00121054, -0.00107458}, {0.00018794, -0.0011001}, {0.0008803, 0.00032464}, + {-0.000761788, 0.0039177}, {-0.00145842, 0.00507368}, {-0.00148637, 0.00364586}, + {-0.000942266, 0.0003575}, {-0.000538492, -0.00269366}, {-0.000845693, -0.0038141}, + {-0.00167743, -0.00304428}, {-0.00224425, -0.00180712}, {-0.00193182, -0.00131005}, + {-0.000927982, -0.00134559}, {-6.83017e-05, -0.000687589}, {-4.55839e-05, 0.00136361}, + {-0.000761573, 0.00278107}, {-0.000234762, 0.00248914}, {0.000317487, 0.000440221}, + {0.0004477, -0.00213936}, {-0.000121671, -0.00363042}, {-0.00118122, -0.00338277}, + {-0.00214753, -0.00214008}, {-0.00251903, -0.00116332}, {-0.00225303, -0.000964868}, + {-0.00172029, -0.000920589}, {-0.00130623, -0.00011436}, {-0.00106494, 0.00148846}, + {0.00024908, 0.000629334}, {0.00112484, 5.72396e-05}, {0.00116563, -0.00109332}, + {0.000382475, -0.00203896}, {-0.000789633, -0.00211082}, {-0.00183343, -0.00134922}, + {-0.00249117, -0.000433324}, {-0.00281178, -2.4032e-05}, {-0.00291254, -0.000171425}, + {-0.00274451, -0.000360928}, {-0.00212756, -0.000126269}, {-0.00102379, 0.000410084}, + {0.00121868, -0.000670007}, {0.000845468, -7.05047e-05}, {-0.000563981, 0.000589868}, + {-0.00205899, 0.00117309}, {-0.00281552, 0.00173963}, {-0.0027804, 0.00231553}, + {-0.00253607, 0.00270756}, {-0.00257146, 0.00259391}, {-0.00272752, 0.0018272}, + {-0.00238943, 0.000651798}, {-0.00122431, -0.0004006}, {0.000306085, -0.000870168}, + {0.00023735, 0.000169826}, {-0.000975848, 0.00148156}, {-0.00212441, 0.00227317}, + {-0.00246224, 0.00252934}, {-0.002011, 0.00276566}, {-0.00144212, 0.00330621}, + {-0.00134638, 0.00380937}, {-0.00163725, 0.00355609}, {-0.00168626, 0.00222682}, + {-0.00104194, 0.000374539}, {1.08809e-05, -0.000920227}, {0.000639579, -0.000923125}, + {-0.00167471, 0.00144179}, {-0.00256276, 0.0023415}, {-0.00241464, 0.0024758}, + {-0.00133867, 0.00239653}, {-8.33317e-05, 0.00284382}, {0.000623536, 0.00382888}, + {0.000661179, 0.00449995}, {0.000479659, 0.00395831}, {0.000519103, 0.00221804}, + {0.000700286, 0.000333267}, {0.000514513, -0.000472097}, {-0.000379928, 0.000148812}, + {-0.00299063, 0.00178471}, {-0.00263682, 0.00188112}, {-0.00124121, 0.00176059}, + {0.00045278, 0.00214826}, {0.0017348, 0.0033027}, {0.00237436, 0.00458413}, + {0.00256959, 0.00495634}, {0.00252461, 0.00395717}, {0.00213843, 0.00218478}, + {0.00114506, 0.000806929}, {-0.000445777, 0.000555252}, {-0.00209478, 0.00116675}, + {-0.00268127, 0.00118712}, {-0.00141003, 0.00128755}, {8.66201e-05, 0.00196049}, + {0.00129488, 0.00328712}, {0.00216343, 0.00471442}, {0.00284479, 0.00540091}, + {0.00326913, 0.00490115}, {0.00305032, 0.00354661}, {0.00186178, 0.00215928}, + {-9.08147e-05, 0.00137236}, {-0.00201181, 0.00119917}, {-0.00300126, 0.00122708}, + {-0.00116756, 0.000909944}, {-0.000296202, 0.0021814}, {0.000102549, 0.00404616}, + {0.000382626, 0.00560597}, {0.00101114, 0.00604626}, {0.00190104, 0.00525103}, + {0.0023531, 0.00383196}, {0.00170404, 0.00256591}, {4.64022e-05, 0.00179014}, + {-0.00171486, 0.00131392}, {-0.00256568, 0.000868226}, {-0.00219669, 0.000574464}, + {0.000130329, 0.00204781}, {-0.000447043, 0.00472096}, {-0.00131336, 0.00684164}, + {-0.00148165, 0.00714546}, {-0.00069716, 0.00559183}, {0.000284722, 0.00333404}, + {0.000446043, 0.00167319}, {-0.000466081, 0.00101348}, {-0.00166205, 0.000794723}, + {-0.00206656, 0.000379372}, {-0.00136156, -8.04534e-05}, {-0.000281149, 0.000275429}, + {0.000188126, 0.0042154}, {-0.00159028, 0.00716199}, {-0.0027869, 0.00795655}, + {-0.00266121, 0.00605702}, {-0.00171094, 0.00277782}, {-0.00107464, 0.000132255}, + {-0.00134268, -0.000841632}, {-0.00197922, -0.000688349}, {-0.00192975, -0.000583599}, + {-0.000787651, -0.000884621}, {0.000677016, -0.000705298}, {0.00118786, 0.00104095}, + {-0.000671626, 0.00573111}, {-0.00233448, 0.00738883}, {-0.0028127, 0.00606795}, + {-0.00226063, 0.00253427}, {-0.00168196, -0.000987316}, {-0.00180508, -0.00268707}, + {-0.00231453, -0.00252145}, {-0.00221803, -0.00181284}, {-0.000968108, -0.00161727}, + {0.000817859, -0.00157524}, {0.00180554, -0.000419976}, {0.00115696, 0.0024051}, + {-0.001204, 0.00516846}, {-0.00162698, 0.00494292}, {-0.0012682, 0.00229898}, + {-0.000945128, -0.00114084}, {-0.00133418, -0.00334519}, {-0.00219961, -0.00352788}, + {-0.00258819, -0.00255407}, {-0.00180596, -0.00179433}, {-0.000193999, -0.00164776}, + {0.00109442, -0.00123954}, {0.00112421, 0.0004306}, {1.89061e-05, 0.00311067}, + {-0.000589831, 0.00275862}, {0.000110773, 0.00154341}, {0.000437935, -0.000699136}, + {-0.000204417, -0.00254751}, {-0.00155355, -0.00296892}, {-0.00264895, -0.00213355}, + {-0.00268869, -0.00108587}, {-0.00175274, -0.000644117}, {-0.000690906, -0.000643559}, + {-0.000296111, -0.000263718}, {-0.00058353, 0.000931149}, {-0.00087713, 0.00233777}, + {0.000668283, 0.000248119}, {0.00132806, -0.000437232}, {0.000738037, -0.00109884}, + {-0.000816317, -0.0012108}, {-0.0023579, -0.000613787}, {-0.00304042, 0.000312979}, + {-0.00280838, 0.000972553}, {-0.00228455, 0.00105885}, {-0.00204254, 0.000768028}, + {-0.0020198, 0.000526613}, {-0.00166182, 0.000528644}, {-0.000631287, 0.00055431}, + {0.00176357, -0.000286297}, {0.000612084, 0.000609432}, {-0.00143759, 0.00145769}, + {-0.0030684, 0.00211654}, {-0.00343171, 0.0027218}, {-0.00278635, 0.00333831}, + {-0.00207308, 0.00371553}, {-0.00189469, 0.00344003}, {-0.0019428, 0.00235859}, + {-0.00143668, 0.000846352}, {-0.000100277, -0.00037913}, {0.00135214, -0.000762469}, + {0.000434445, 0.000783103}, {-0.00120828, 0.00181673}, {-0.00258284, 0.00235325}, + {-0.00292445, 0.00271486}, {-0.00234709, 0.00345433}, {-0.00160745, 0.00455722}, + {-0.00130022, 0.00526206}, {-0.00130219, 0.00473033}, {-0.00101084, 0.0029208}, + {-0.000107747, 0.000784285}, {0.000959583, -0.000457078}, {0.00130316, -0.00028696}, + {-0.00140426, 0.00159201}, {-0.00234945, 0.00198777}, {-0.00232965, 0.0020839}, + {-0.00145429, 0.00262293}, {-0.000353497, 0.00398803}, {0.000386349, 0.00557276}, + {0.000671157, 0.00618422}, {0.000820179, 0.00513094}, {0.00108491, 0.00293582}, + {0.00129026, 0.000955324}, {0.000985641, 0.000247738}, {-3.87389e-05, 0.000761993}, + {-0.00237468, 0.00153928}, {-0.00215182, 0.0015805}, {-0.00114885, 0.00201655}, + {0.000170784, 0.00331119}, {0.00136561, 0.00515003}, {0.0022223, 0.00649228}, + {0.00270601, 0.00639769}, {0.00277501, 0.00485909}, {0.00230399, 0.00283682}, + {0.00122444, 0.00145334}, {-0.000262247, 0.00111188}, {-0.00165088, 0.00135585}, + {-0.00201851, 0.00137731}, {-0.00137103, 0.00211839}, {-0.000484952, 0.0035597}, + {0.000571075, 0.0053218}, {0.00176368, 0.00661389}, {0.00285414, 0.00676431}, + {0.00338151, 0.005726}, {0.00295488, 0.00409583}, {0.00160775, 0.00263604}, + {-0.000130185, 0.00174453}, {-0.00154364, 0.00134154}, {-0.00217304, 0.0012096}, + {-0.00111807, 0.00227216}, {-0.00127286, 0.00436788}, {-0.00124095, 0.00639557}, + {-0.000527549, 0.00739678}, {0.000797411, 0.00700213}, {0.00199679, 0.00563142}, + {0.00224587, 0.00406707}, {0.00135669, 0.00282933}, {-5.03695e-05, 0.00193582}, + {-0.00109495, 0.00121081}, {-0.00137594, 0.000747473}, {-0.00120017, 0.000984002}, + {-0.000799825, 0.00436847}, {-0.00223745, 0.00722247}, {-0.00281127, 0.00846958}, + {-0.00205333, 0.0075343}, {-0.000625014, 0.00524057}, {0.000318493, 0.00301769}, + {0.00021019, 0.00170262}, {-0.000462619, 0.00107437}, {-0.00074718, 0.000486608}, + {-0.000253058, -0.000144515}, {0.000429062, -7.65462e-05}, {0.000343932, 0.00148698}, + {-0.00134733, 0.00636626}, {-0.00330364, 0.00850293}, {-0.00364703, 0.00783554}, + {-0.00265319, 0.00497093}, {-0.00153655, 0.00181632}, {-0.00117674, -7.14351e-05}, + {-0.00130248, -0.000614516}, {-0.000931936, -0.000807034}, {0.000416027, -0.00130901}, + {0.00201253, -0.00155793}, {0.00248166, -0.000347455}, {0.00111728, 0.00270967}, + {-0.00192699, 0.00660756}, {-0.00312919, 0.00695557}, {-0.00279919, 0.00456761}, + {-0.00199951, 0.0011128}, {-0.00175629, -0.00136787}, {-0.00197247, -0.00212369}, + {-0.00161602, -0.0019814}, {2.94031e-05, -0.0020944}, {0.00236069, -0.00251844}, + {0.00377906, -0.00211064}, {0.00309261, 0.000135028}, {0.000647509, 0.00370285}, + {-0.00149831, 0.0046433}, {-0.001407, 0.00349878}, {-0.000897936, 0.000836171}, + {-0.00104484, -0.00156051}, {-0.00187465, -0.00248865}, {-0.00229402, -0.00220971}, + {-0.00125325, -0.00186899}, {0.00103509, -0.00211821}, {0.00308684, -0.00238043}, + {0.0034149, -0.00151485}, {0.00186506, 0.000807039}, {-0.000276379, 0.00345293}, + {2.21473e-05, 0.00173809}, {0.000698765, 0.000473142}, {0.000419186, -0.000968488}, + {-0.000989215, -0.00160609}, {-0.00244581, -0.00126079}, {-0.00260952, -0.000611524}, + {-0.00118894, -0.000426697}, {0.000732035, -0.000763853}, {0.0016879, -0.000946242}, + {0.001175, -0.000330561}, {3.39965e-05, 0.000926242}, {-0.000478581, 0.001901}, + {0.00154008, -0.000206973}, {0.00156732, -0.000400041}, {7.41494e-05, -0.000267091}, + {-0.00201443, 0.000269015}, {-0.00325338, 0.00100147}, {-0.00295024, 0.00155809}, + {-0.00171113, 0.00167696}, {-0.000753479, 0.0013894}, {-0.000694384, 0.000948383}, + {-0.00102477, 0.000584833}, {-0.000770711, 0.000333006}, {0.000360217, 8.44586e-05}, + {0.00186264, 0.00022094}, {-5.23372e-05, 0.00119935}, {-0.00227965, 0.00190951}, + {-0.00342172, 0.00236577}, {-0.00302498, 0.00282837}, {-0.00184894, 0.00335118}, + {-0.00100949, 0.00358948}, {-0.000879113, 0.00309693}, {-0.000839542, 0.00182768}, + {-0.000100609, 0.000326464}, {0.00128725, -0.000621994}, {0.00229895, -0.000585715}, + {0.000200884, 0.00111429}, {-0.00163536, 0.00176005}, {-0.00279638, 0.00209268}, + {-0.00274624, 0.00264774}, {-0.00188855, 0.00379729}, {-0.0010681, 0.00512943}, + {-0.000729571, 0.00566254}, {-0.000573505, 0.00473298}, {-2.67328e-05, 0.0026937}, + {0.000989068, 0.000715804}, {0.00181155, -0.000149942}, {0.00160535, 0.000224563}, + {-0.00136764, 0.00148465}, {-0.00217036, 0.0016419}, {-0.00200225, 0.00199952}, + {-0.00109176, 0.00316358}, {-5.77449e-05, 0.0050243}, {0.000631244, 0.0065833}, + {0.000974006, 0.00672095}, {0.00125359, 0.00519826}, {0.00158657, 0.00292302}, + {0.00170031, 0.00120678}, {0.00118943, 0.000711506}, {-6.45869e-07, 0.00107013}, + {-0.00195023, 0.00154243}, {-0.00179105, 0.00189847}, {-0.000956701, 0.0029312}, + {0.000222618, 0.00469048}, {0.00137826, 0.0065037}, {0.00225456, 0.00735722}, + {0.00272337, 0.00669574}, {0.00271171, 0.00489771}, {0.00216871, 0.00297272}, + {0.00113323, 0.00176315}, {-0.000169929, 0.00139191}, {-0.00133759, 0.00143066}, + {-0.0017792, 0.00218462}, {-0.00163111, 0.00352628}, {-0.00097876, 0.00523263}, + {0.000195696, 0.00673274}, {0.00161796, 0.00741956}, {0.0027547, 0.00701766}, + {0.00310866, 0.00574842}, {0.00254461, 0.00415704}, {0.00136593, 0.00276132}, + {8.11069e-05, 0.00182156}, {-0.000925202, 0.00138445}, {-0.00153561, 0.00147572}, + {-0.00176118, 0.00384196}, {-0.00248835, 0.00613248}, {-0.00225474, 0.00760546}, + {-0.000938517, 0.00774121}, {0.000779032, 0.00678386}, {0.00192464, 0.00539543}, + {0.00200691, 0.00407296}, {0.00133241, 0.0028903}, {0.000594913, 0.00175883}, + {0.000176473, 0.000856728}, {-0.000139405, 0.000706218}, {-0.000781128, 0.00175313}, + {-0.00237342, 0.00576071}, {-0.00385602, 0.00799681}, {-0.0035961, 0.00813768}, + {-0.00200032, 0.00653029}, {-0.000315142, 0.00444386}, {0.000526001, 0.00289121}, + {0.000634738, 0.00190366}, {0.000800818, 0.000907301}, {0.00146162, -0.000284669}, + {0.00210308, -0.00096813}, {0.00173442, -0.000121442}, {-2.74434e-05, 0.00248231}, + {-0.00306346, 0.00651427}, {-0.00433561, 0.00755447}, {-0.00360454, 0.00609212}, + {-0.00205619, 0.00351098}, {-0.000954652, 0.00144609}, {-0.000454266, 0.00040217}, + {0.000300936, -0.000332987}, {0.00195782, -0.0015058}, {0.0039316, -0.00275347}, + {0.00470337, -0.00274976}, {0.0032171, -0.000552245}, {1.18578e-05, 0.0032017}, + {-0.00278559, 0.00529792}, {-0.00304365, 0.00488066}, {-0.00208558, 0.00269857}, + {-0.00131795, 0.000494252}, {-0.00115003, -0.00066359}, {-0.000680505, -0.00114459}, + {0.00111467, -0.00197779}, {0.00395805, -0.00341799}, {0.00614145, -0.00439941}, + {0.00592998, -0.00350115}, {0.00317673, -0.000485857}, {-0.000434266, 0.00315866}, + {-0.00114186, 0.00274722}, {-0.000603376, 0.00171376}, {-0.000292862, 0.000142556}, + {-0.000859221, -0.000796188}, {-0.00144753, -0.000996808}, {-0.000631289, -0.00130061}, + {0.0019365, -0.0023754}, {0.00486464, -0.0037873}, {0.00609966, -0.00426294}, + {0.00474494, -0.00292566}, {0.00186663, -0.000279677}, {-0.000495147, 0.00207097}, + {0.0010389, 0.000458118}, {0.00129929, -3.2414e-05}, {0.000254368, -0.00029649}, + {-0.00145191, -0.000101111}, {-0.00218416, 0.000116184}, {-0.000931996, -0.000250261}, + {0.0016051, -0.0012645}, {0.00356913, -0.00226958}, {0.00361965, -0.00244876}, + {0.00210709, -0.00159178}, {0.000614925, -0.000313465}, {0.000357166, 0.000496235}, + {0.00231154, -0.000353224}, {0.00139957, 0.000153351}, {-0.000782743, 0.00081739}, + {-0.00271041, 0.0013913}, {-0.00298066, 0.00163455}, {-0.00157505, 0.00146486}, + {0.000189534, 0.000997357}, {0.000954008, 0.000444072}, {0.00056752, -2.9647e-05}, + {7.38775e-05, -0.00036653}, {0.000491613, -0.000563068}, {0.00165481, -0.000583696}, + {0.00107511, 0.000844773}, {-0.000968891, 0.00160569}, {-0.0025605, 0.00194004}, + {-0.00274394, 0.00205876}, {-0.00171173, 0.00227382}, {-0.000511421, 0.00257089}, + {6.06774e-06, 0.00256985}, {-5.62534e-05, 0.00192717}, {8.92828e-05, 0.00077179}, + {0.000932978, -0.000286196}, {0.0020071, -0.000619504}, {0.00225286, -9.55433e-05}, + {-0.000457187, 0.00142148}, {-0.00175798, 0.00174224}, {-0.00213, 0.00196691}, + {-0.00155549, 0.00262817}, {-0.000700077, 0.00381462}, {-0.000222762, 0.00488127}, + {-0.000169522, 0.00494942}, {-4.29217e-05, 0.00373502}, {0.000552901, 0.00189056}, + {0.00137638, 0.000503154}, {0.00169516, 0.000210339}, {0.000994046, 0.000763479}, + {-0.00126864, 0.00169309}, {-0.00142429, 0.0018686}, {-0.000845566, 0.00247045}, + {2.08382e-05, 0.00381255}, {0.000659182, 0.00545956}, {0.00090646, 0.00641924}, + {0.000988042, 0.00595926}, {0.00116566, 0.00427861}, {0.00138367, 0.00237854}, + {0.00129204, 0.00124335}, {0.000631733, 0.00110148}, {-0.000412285, 0.00144003}, + {-0.00135563, 0.00217956}, {-0.000962677, 0.0028371}, {-0.000158956, 0.00400678}, + {0.000746208, 0.00550259}, {0.00149912, 0.0066804}, {0.00198086, 0.00685389}, + {0.00215632, 0.00584917}, {0.00198962, 0.00417877}, {0.00144263, 0.00266101}, + {0.000567934, 0.00182998}, {-0.00041124, 0.00166503}, {-0.00114971, 0.00183961}, + {-0.00156959, 0.00329811}, {-0.00154812, 0.00472258}, {-0.000922947, 0.00600033}, + {0.0002038, 0.00677764}, {0.00142044, 0.00685166}, {0.00222583, 0.00621927}, + {0.00233875, 0.00506127}, {0.00183873, 0.00368253}, {0.00103489, 0.00243575}, + {0.000208034, 0.00164366}, {-0.00053006, 0.00152392}, {-0.00115678, 0.00212534}, + {-0.00251414, 0.00470392}, {-0.0031696, 0.00646804}, {-0.00249403, 0.00706199}, + {-0.000867051, 0.00662023}, {0.000772703, 0.00571651}, {0.00168179, 0.0047547}, + {0.00181524, 0.0036989}, {0.00163996, 0.00237664}, {0.00150699, 0.000980486}, + {0.00125336, 0.000178167}, {0.000444341, 0.000640027}, {-0.00100736, 0.00240404}, + {-0.00372239, 0.00542758}, {-0.00448722, 0.00671962}, {-0.00334363, 0.00621626}, + {-0.00136296, 0.00486449}, {0.00023217, 0.00366658}, {0.00108734, 0.00277141}, + {0.00173833, 0.00160672}, {0.0027026, -0.000155468}, {0.00364879, -0.00187089}, + {0.00355844, -0.00226777}, {0.00176387, -0.00059971}, {-0.00118108, 0.00251424}, + {-0.00399583, 0.00472759}, {-0.00409661, 0.00507554}, {-0.00257529, 0.00385367}, + {-0.000928577, 0.00244868}, {0.000117921, 0.00154953}, {0.00108849, 0.000650323}, + {0.00281213, -0.0010673}, {0.00511138, -0.00345559}, {0.00656547, -0.00512779}, + {0.00569569, -0.00457654}, {0.00245072, -0.00163705}, {-0.00149525, 0.00215623}, + {-0.00260025, 0.00283664}, {-0.0019566, 0.00252812}, {-0.000833706, 0.0014622}, + {-0.000274303, 0.000695937}, {0.000132097, 0.000143541}, {0.00157885, -0.0011095}, + {0.00442016, -0.00349888}, {0.00733432, -0.00609325}, {0.00823824, -0.00713493}, + {0.00614784, -0.00557337}, {0.00218363, -0.00211808}, {-0.00128994, 0.00124256}, + {-0.000123401, 0.000854128}, {0.000453969, 0.000646113}, {0.00030688, 0.000373685}, + {-0.000391903, 0.000283943}, {-0.000231813, -0.000262111}, {0.00183423, -0.00195531}, + {0.00511927, -0.00451608}, {0.00751976, -0.00659521}, {0.00730252, -0.00679844}, + {0.00465911, -0.00489722}, {0.00152116, -0.00206403}, {-0.000180551, 0.000104839}, + {0.0018963, -0.000141144}, {0.0014379, 0.000268168}, {-0.000127957, 0.000715867}, + {-0.00147232, 0.000855295}, {-0.00105054, 0.000142885}, {0.00129946, -0.00150999}, + {0.00403861, -0.00343374}, {0.00524854, -0.00460669}, {0.00432995, -0.00443504}, + {0.00243836, -0.00319148}, {0.00126506, -0.00171543}, {0.0013995, -0.000681907}, + {0.00225673, 7.23815e-05}, {0.000585681, 0.000943757}, {-0.0015829, 0.00154213}, + {-0.00263207, 0.00158107}, {-0.00177623, 0.00103909}, {0.000223906, 0.000176443}, + {0.00179653, -0.000649631}, {0.00201579, -0.00120673}, {0.00135432, -0.00146062}, + {0.00102523, -0.00146935}, {0.00162118, -0.00124164}, {0.0024511, -0.000723483}, + {-0.000154943, 0.00156692}, {-0.00142295, 0.00195714}, {-0.00184099, 0.00190483}, + {-0.00121448, 0.00170676}, {-0.000172926, 0.0016225}, {0.000460365, 0.00159834}, + {0.000433118, 0.00135799}, {0.000214275, 0.000759356}, {0.000406697, 3.91325e-05}, + {0.00104141, -0.000326933}, {0.0014867, -3.42658e-05}, {0.00107903, 0.000762432}, + {-0.000753044, 0.00198762}, {-0.000877387, 0.00210716}, {-0.000388019, 0.0022298}, + {0.000247307, 0.00273539}, {0.000477011, 0.00351262}, {0.000194824, 0.00396464}, + {-0.000187014, 0.00355761}, {-0.000184846, 0.00237644}, {0.000245467, 0.00112622}, + {0.000651997, 0.000554658}, {0.000544468, 0.000839059}, {-9.30268e-05, 0.00151463}, + {-0.000403431, 0.00243266}, {0.000286467, 0.00262747}, {0.00100229, 0.00314263}, + {0.00127252, 0.00408365}, {0.00100513, 0.00499286}, {0.000531865, 0.00517755}, + {0.000259253, 0.00434466}, {0.000279959, 0.0029229}, {0.000328336, 0.00174231}, + {0.000111703, 0.0013628}, {-0.00033151, 0.00168054}, {-0.000622446, 0.00216128}, + {-0.000137769, 0.00316293}, {0.000474186, 0.00373584}, {0.000923713, 0.00446389}, + {0.00107932, 0.0052212}, {0.00103894, 0.00562663}, {0.000964015, 0.00532408}, + {0.000885674, 0.00432542}, {0.000685293, 0.00307646}, {0.000265918, 0.00215933}, + {-0.000271117, 0.00188898}, {-0.000642302, 0.00215959}, {-0.000597533, 0.00265122}, + {-0.00100542, 0.00399852}, {-0.00100453, 0.00490882}, {-0.00062628, 0.0054026}, + {3.83936e-05, 0.00552668}, {0.000727159, 0.00534067}, {0.00116081, 0.00480813}, + {0.00121274, 0.00389263}, {0.000953263, 0.00274662}, {0.000546559, 0.00176346}, + {0.000112018, 0.0013857}, {-0.00031816, 0.00180742}, {-0.000723358, 0.00282935}, + {-0.00281408, 0.00435114}, {-0.00311088, 0.00524849}, {-0.00222225, 0.00520514}, + {-0.000745311, 0.00477071}, {0.000542117, 0.00434811}, {0.00127201, 0.00379908}, + {0.00160782, 0.0027327}, {0.00184164, 0.00114821}, {0.00193386, -0.000278334}, + {0.00150077, -0.000626659}, {0.000276326, 0.000483073}, {-0.00143443, 0.002502}, + {-0.00414115, 0.00374612}, {-0.00399791, 0.00430666}, {-0.00240286, 0.00385926}, + {-0.000554662, 0.00332491}, {0.000764381, 0.00295489}, {0.00168498, 0.00215531}, + {0.00273173, 0.000339295}, {0.00391123, -0.00211464}, {0.00442328, -0.00385595}, + {0.00335944, -0.00361691}, {0.000717651, -0.00135349}, {-0.00231005, 0.0016348}, + {-0.00365285, 0.00232984}, {-0.00278792, 0.00256214}, {-0.00109614, 0.00216252}, + {0.000240346, 0.00189326}, {0.0011766, 0.00143774}, {0.00250374, -0.0001259}, + {0.00459497, -0.00303389}, {0.00657677, -0.00611828}, {0.00687616, -0.00750867}, + {0.00468409, -0.00617734}, {0.000901089, -0.00286973}, {-0.00241721, 0.000483923}, + {-0.00143984, 0.00084618}, {-0.000447711, 0.00106599}, {0.000337655, 0.00107168}, + {0.000600646, 0.00100688}, {0.00127956, 6.66812e-05}, {0.00330641, -0.00246444}, + {0.00624328, -0.00605344}, {0.00829448, -0.00891638}, {0.00779645, -0.00931868}, + {0.00477746, -0.00700737}, {0.00105523, -0.00342405}, {-0.00125896, -0.000496316}, + {0.000966925, 7.28597e-05}, {0.00111309, 0.000576299}, {0.000480653, 0.000944169}, + {-7.74375e-05, 0.00073422}, {0.000814661, -0.000830886}, {0.00346075, -0.00382169}, + {0.00646833, -0.00709314}, {0.00780522, -0.00893505}, {0.00657037, -0.00836397}, + {0.00377812, -0.0058766}, {0.00140297, -0.00297302}, {0.000624171, -0.000921167}, + {0.00194438, 0.000232843}, {0.000836254, 0.00099782}, {-0.000729045, 0.00137722}, + {-0.0012933, 0.000831765}, {6.42774e-05, -0.000917899}, {0.00271073, -0.00339083}, + {0.00489816, -0.00548679}, {0.00527083, -0.00621866}, {0.00399808, -0.00540245}, + {0.00245975, -0.00369449}, {0.00183763, -0.00197794}, {0.00202921, -0.000700615}, + {0.00119042, 0.000918855}, {-0.000561453, 0.00163398}, {-0.00195718, 0.0017527}, + {-0.00189511, 0.00112586}, {-0.000381752, -3.45243e-05}, {0.00145983, -0.00125615}, + {0.0024085, -0.00209436}, {0.00219995, -0.00236859}, {0.00160762, -0.00216675}, + {0.00151792, -0.00166024}, {0.00196498, -0.000940752}, {0.00211146, -3.96837e-05}}; + + std::vector expected_rho_2{ + 8.66322e-05, 9.09154e-05, 9.80794e-05, 9.62722e-05, 8.16604e-05, 6.46466e-05, 5.76474e-05, 6.49856e-05, + 8.21971e-05, 9.67325e-05, 9.82953e-05, 9.09594e-05, 9.08935e-05, 0.000103381, 0.000114995, 0.000110548, + 9.21788e-05, 7.69555e-05, 7.72464e-05, 9.2947e-05, 0.00011143, 0.000115579, 0.000103597, 9.09303e-05, + 9.81246e-05, 0.000115021, 0.000122426, 0.000110464, 9.11147e-05, 8.25973e-05, 9.18823e-05, 0.000111692, + 0.000123525, 0.000115612, 9.83109e-05, 8.99972e-05, 9.64228e-05, 0.000110554, 0.000110366, 9.60819e-05, + 8.30607e-05, 8.35169e-05, 9.72864e-05, 0.000111778, 0.00011156, 9.68686e-05, 8.32922e-05, 8.31881e-05, + 8.17731e-05, 9.19864e-05, 9.07279e-05, 8.27491e-05, 7.87781e-05, 8.35978e-05, 9.20407e-05, 9.31494e-05, + 8.24252e-05, 6.828e-05, 6.20692e-05, 6.80515e-05, 6.45305e-05, 7.64246e-05, 8.18118e-05, 8.27818e-05, + 8.31747e-05, 8.27708e-05, 7.74396e-05, 6.51951e-05, 5.07151e-05, 4.14035e-05, 4.13397e-05, 5.04355e-05, + 5.72317e-05, 7.63516e-05, 9.06979e-05, 9.61549e-05, 9.12462e-05, 7.70786e-05, 5.77605e-05, 3.95271e-05, + 2.74958e-05, 2.34614e-05, 2.74307e-05, 3.92893e-05, 6.43151e-05, 9.18088e-05, 0.000110324, 0.000110512, + 9.22121e-05, 6.46479e-05, 3.93121e-05, 2.27458e-05, 1.53501e-05, 1.53502e-05, 2.27178e-05, 3.91666e-05, + 8.14631e-05, 0.000110395, 0.000122401, 0.000110528, 8.15908e-05, 5.01446e-05, 2.71976e-05, 1.52503e-05, + 1.18282e-05, 1.5279e-05, 2.72311e-05, 5.01152e-05, 9.62222e-05, 0.000114984, 0.000114997, 9.62193e-05, + 6.76356e-05, 4.08901e-05, 2.31617e-05, 1.52301e-05, 1.52593e-05, 2.32485e-05, 4.10021e-05, 6.77051e-05, + 9.81138e-05, 0.000103432, 9.80899e-05, 8.29138e-05, 6.15808e-05, 4.0922e-05, 2.72051e-05, 2.26743e-05, + 2.73074e-05, 4.11016e-05, 6.17575e-05, 8.3011e-05, 9.09546e-05, 9.09482e-05, 8.99295e-05, 8.29393e-05, + 6.77091e-05, 5.02057e-05, 3.92875e-05, 3.93831e-05, 5.04644e-05, 6.80165e-05, 8.31457e-05, 8.9998e-05, + 9.10633e-05, 0.000103727, 0.000115365, 0.000110745, 9.21349e-05, 7.67424e-05, 7.69412e-05, 9.2585e-05, + 0.000111041, 0.00011522, 0.000103358, 9.08835e-05, 0.000103701, 0.000124504, 0.000134884, 0.000122512, + 0.000100863, 9.11523e-05, 0.000101495, 0.00012339, 0.0001354, 0.00012448, 0.000103513, 9.3817e-05, + 0.000115401, 0.000134897, 0.00013461, 0.000114867, 9.70076e-05, 9.74646e-05, 0.000116038, 0.000135857, + 0.000135583, 0.000115536, 9.7247e-05, 9.72654e-05, 0.000110854, 0.000122458, 0.000114721, 9.7906e-05, + 9.01702e-05, 9.88692e-05, 0.000116222, 0.00012375, 0.000111506, 9.16747e-05, 8.26758e-05, 9.1491e-05, + 9.21537e-05, 0.00010055, 9.65106e-05, 8.97913e-05, 9.02754e-05, 9.7747e-05, 0.000101942, 9.31185e-05, + 7.5218e-05, 6.17738e-05, 6.16731e-05, 7.47928e-05, 7.64941e-05, 9.04369e-05, 9.64812e-05, 9.79791e-05, + 9.72543e-05, 9.15561e-05, 7.7423e-05, 5.79902e-05, 4.20914e-05, 3.62082e-05, 4.1915e-05, 5.74885e-05, + 7.63542e-05, 0.000100344, 0.000114587, 0.000114905, 0.000101082, 7.70694e-05, 5.08717e-05, 3.09099e-05, + 2.10637e-05, 2.10258e-05, 3.07413e-05, 5.0444e-05, 9.17286e-05, 0.000122015, 0.000134293, 0.000122399, + 9.22065e-05, 5.7515e-05, 3.07442e-05, 1.63273e-05, 1.21628e-05, 1.62989e-05, 3.06228e-05, 5.72021e-05, + 0.000110202, 0.000134277, 0.00013443, 0.000110521, 7.44908e-05, 4.16398e-05, 2.0846e-05, 1.20913e-05, + 1.20963e-05, 2.08422e-05, 4.15524e-05, 7.42453e-05, 0.000114719, 0.000123921, 0.000114978, 9.1113e-05, + 6.12461e-05, 3.58088e-05, 2.07988e-05, 1.62058e-05, 2.08303e-05, 3.58149e-05, 6.11144e-05, 9.0823e-05, + 0.000103218, 0.000103394, 9.70724e-05, 8.236e-05, 6.12713e-05, 4.15842e-05, 3.05734e-05, 3.0615e-05, + 4.16597e-05, 6.12289e-05, 8.20841e-05, 9.66965e-05, 9.08957e-05, 9.38534e-05, 9.72175e-05, 9.12804e-05, + 7.45219e-05, 5.7346e-05, 5.05292e-05, 5.75136e-05, 7.46703e-05, 9.11503e-05, 9.6815e-05, 9.3506e-05, + 9.86094e-05, 0.000115747, 0.000123048, 0.000110663, 9.08732e-05, 8.20784e-05, 9.12166e-05, 0.000110974, + 0.000122855, 0.000115104, 9.80793e-05, 9.01144e-05, 0.000115677, 0.000135256, 0.000134858, 0.000114877, + 9.68091e-05, 9.71348e-05, 0.000115643, 0.000135466, 0.000135259, 0.000115324, 9.71789e-05, 9.73665e-05, + 0.000123007, 0.000134853, 0.000123983, 0.000103236, 9.38226e-05, 0.000104024, 0.000125149, 0.000135709, + 0.000123261, 0.000101352, 9.13447e-05, 0.000101393, 0.000110692, 0.000114818, 0.000103106, 9.08733e-05, + 9.13118e-05, 0.000104243, 0.000116106, 0.000111556, 9.282e-05, 7.71886e-05, 7.71179e-05, 9.2474e-05, + 9.08436e-05, 9.65136e-05, 9.33522e-05, 9.09421e-05, 9.4111e-05, 9.76827e-05, 9.18949e-05, 7.51509e-05, + 5.78519e-05, 5.08473e-05, 5.76128e-05, 7.453e-05, 8.18096e-05, 9.64211e-05, 0.00010304, 0.000103372, + 9.72367e-05, 8.26992e-05, 6.17002e-05, 4.19795e-05, 3.0861e-05, 3.07771e-05, 4.16703e-05, 6.10691e-05, + 9.05933e-05, 0.000114455, 0.000123711, 0.000114913, 9.12288e-05, 6.14839e-05, 3.60552e-05, 2.09796e-05, + 1.63161e-05, 2.08773e-05, 3.57783e-05, 6.0974e-05, 0.00011007, 0.000134092, 0.00013429, 0.000110516, + 7.45997e-05, 4.17636e-05, 2.09183e-05, 1.21148e-05, 1.20948e-05, 2.08283e-05, 4.15268e-05, 7.41847e-05, + 0.000122013, 0.000134236, 0.00012238, 9.22473e-05, 5.754e-05, 3.0698e-05, 1.62329e-05, 1.206e-05, + 1.62133e-05, 3.0595e-05, 5.72528e-05, 9.17953e-05, 0.000114645, 0.000114906, 0.000101072, 7.70042e-05, + 5.0684e-05, 3.06272e-05, 2.07683e-05, 2.07734e-05, 3.05934e-05, 5.04678e-05, 7.6513e-05, 0.000100497, + 9.79817e-05, 9.71761e-05, 9.1389e-05, 7.7092e-05, 5.7483e-05, 4.15174e-05, 3.56908e-05, 4.1552e-05, + 5.73798e-05, 7.66353e-05, 9.06577e-05, 9.66092e-05, 9.01536e-05, 9.74878e-05, 0.000101498, 9.245e-05, + 7.44172e-05, 6.10165e-05, 6.11036e-05, 7.45356e-05, 9.22486e-05, 0.000100826, 9.67187e-05, 8.98243e-05, + 9.70445e-05, 0.000111374, 0.00011095, 9.61412e-05, 8.26514e-05, 8.28281e-05, 9.64852e-05, 0.000111005, + 0.00011091, 9.64205e-05, 8.31343e-05, 8.34063e-05, 0.000111252, 0.000122974, 0.00011505, 9.789e-05, + 8.98579e-05, 9.83735e-05, 0.000115662, 0.000123223, 0.00011106, 9.13501e-05, 8.25301e-05, 9.16022e-05, + 0.000110875, 0.00011508, 0.000103296, 9.09165e-05, 9.12227e-05, 0.000104068, 0.000115891, 0.000111329, + 9.25934e-05, 7.69896e-05, 7.69925e-05, 9.24877e-05, 9.62098e-05, 9.79353e-05, 9.08547e-05, 8.68207e-05, + 9.14111e-05, 9.88162e-05, 9.70291e-05, 8.21707e-05, 6.48023e-05, 5.7497e-05, 6.46014e-05, 8.16684e-05, + 8.27292e-05, 8.97105e-05, 9.08607e-05, 9.11018e-05, 9.03403e-05, 8.34977e-05, 6.81965e-05, 5.04528e-05, + 3.92764e-05, 3.91645e-05, 5.00874e-05, 6.75631e-05, 8.26807e-05, 9.77966e-05, 0.000103231, 9.81339e-05, + 8.32174e-05, 6.19824e-05, 4.12142e-05, 2.73047e-05, 2.26135e-05, 2.71287e-05, 4.08387e-05, 6.14492e-05, + 9.5941e-05, 0.000114618, 0.000114753, 9.62806e-05, 6.79379e-05, 4.11991e-05, 2.33238e-05, 1.52523e-05, + 1.52009e-05, 2.31512e-05, 4.08889e-05, 6.75442e-05, 0.000110162, 0.000122095, 0.000110427, 8.17862e-05, + 5.0443e-05, 2.73845e-05, 1.52887e-05, 1.17873e-05, 1.52261e-05, 2.72218e-05, 5.01503e-05, 8.14217e-05, + 0.000110171, 0.000110381, 9.23012e-05, 6.48832e-05, 3.9473e-05, 2.27385e-05, 1.52364e-05, 1.52218e-05, + 2.26613e-05, 3.92454e-05, 6.446e-05, 9.18326e-05, 9.60882e-05, 9.12528e-05, 7.71986e-05, 5.7831e-05, + 3.94017e-05, 2.72065e-05, 2.31332e-05, 2.71896e-05, 3.92494e-05, 5.73943e-05, 7.65381e-05, 9.07352e-05, + 8.31427e-05, 8.27674e-05, 7.73859e-05, 6.49247e-05, 5.02016e-05, 4.07965e-05, 4.08239e-05, 5.01779e-05, + 6.46007e-05, 7.66709e-05, 8.19891e-05, 8.28065e-05, 8.35102e-05, 9.18766e-05, 9.27847e-05, 8.17862e-05, + 6.74957e-05, 6.13643e-05, 6.76295e-05, 8.17635e-05, 9.22908e-05, 9.10504e-05, 8.28937e-05, 7.87637e-05, + 8.22849e-05, 9.26854e-05, 9.12148e-05, 8.27857e-05, 7.84198e-05, 8.30329e-05, 9.14518e-05, 9.2624e-05, + 8.19604e-05, 6.79017e-05, 6.18754e-05, 6.81775e-05, 9.25767e-05, 0.000101186, 9.70021e-05, 8.99369e-05, + 9.01047e-05, 9.73949e-05, 0.000101531, 9.26836e-05, 7.47587e-05, 6.13449e-05, 6.13879e-05, 7.48154e-05, + 9.12466e-05, 9.71234e-05, 9.38859e-05, 9.12418e-05, 9.41784e-05, 9.75881e-05, 9.16882e-05, 7.48419e-05, + 5.7471e-05, 5.04671e-05, 5.735e-05, 7.45579e-05, 8.30465e-05, 9.01483e-05, 9.12569e-05, 9.13645e-05, + 9.0462e-05, 8.35058e-05, 6.81106e-05, 5.029e-05, 3.90704e-05, 3.89626e-05, 4.99716e-05, 6.76468e-05, + 7.87113e-05, 9.01335e-05, 9.39391e-05, 9.02488e-05, 7.89857e-05, 6.16455e-05, 4.33448e-05, 3.03039e-05, + 2.5719e-05, 3.00896e-05, 4.29655e-05, 6.12441e-05, 8.30431e-05, 9.69831e-05, 9.70005e-05, 8.31465e-05, + 6.15899e-05, 4.05755e-05, 2.57776e-05, 1.86682e-05, 1.85838e-05, 2.55399e-05, 4.02659e-05, 6.13476e-05, + 9.10058e-05, 0.000100754, 9.10671e-05, 6.79588e-05, 4.34991e-05, 2.59208e-05, 1.66364e-05, 1.38535e-05, + 1.65237e-05, 2.57166e-05, 4.32744e-05, 6.78031e-05, 9.19258e-05, 9.2011e-05, 7.46443e-05, 5.05692e-05, + 3.07138e-05, 1.89438e-05, 1.39542e-05, 1.39193e-05, 1.88335e-05, 3.0524e-05, 5.0329e-05, 7.44332e-05, + 8.14517e-05, 7.45835e-05, 5.78007e-05, 3.96656e-05, 2.62394e-05, 1.8878e-05, 1.66234e-05, 1.88392e-05, + 2.61126e-05, 3.9394e-05, 5.74216e-05, 7.42891e-05, 6.77927e-05, 6.1626e-05, 5.10893e-05, 3.96228e-05, + 3.05259e-05, 2.57029e-05, 2.57095e-05, 3.0487e-05, 3.94058e-05, 5.06384e-05, 6.11215e-05, 6.75673e-05, + 6.20323e-05, 6.18517e-05, 5.79597e-05, 5.04221e-05, 4.31041e-05, 4.01789e-05, 4.31609e-05, 5.03565e-05, + 5.75884e-05, 6.12606e-05, 6.15807e-05, 6.13564e-05, 6.83477e-05, 7.51768e-05, 7.48743e-05, 6.76831e-05, + 6.10303e-05, 6.11212e-05, 6.78223e-05, 7.47709e-05, 7.47231e-05, 6.78197e-05, 6.13917e-05, 6.16178e-05, + 6.49478e-05, 7.71124e-05, 8.24084e-05, 8.30366e-05, 8.3113e-05, 8.2574e-05, 7.72343e-05, 6.49572e-05, + 5.03889e-05, 4.1034e-05, 4.10616e-05, 5.04397e-05, 7.7096e-05, 9.14035e-05, 9.73827e-05, 9.85289e-05, + 9.745e-05, 9.15351e-05, 7.72625e-05, 5.76774e-05, 4.16496e-05, 3.57343e-05, 4.15711e-05, 5.75326e-05, + 8.26488e-05, 9.76164e-05, 0.000104147, 0.000104112, 9.75951e-05, 8.27725e-05, 6.15501e-05, 4.16477e-05, + 3.04254e-05, 3.03387e-05, 4.14158e-05, 6.12942e-05, 8.35527e-05, 9.88914e-05, 0.000104181, 9.87409e-05, + 8.34934e-05, 6.20137e-05, 4.10679e-05, 2.70421e-05, 2.22973e-05, 2.68419e-05, 4.07662e-05, 6.18287e-05, + 8.3592e-05, 9.75876e-05, 9.74655e-05, 8.34024e-05, 6.16774e-05, 4.05533e-05, 2.56844e-05, 1.85318e-05, + 1.8432e-05, 2.54295e-05, 4.03112e-05, 6.16577e-05, 8.26502e-05, 9.12599e-05, 8.24937e-05, 6.20038e-05, + 4.06802e-05, 2.55355e-05, 1.75271e-05, 1.50834e-05, 1.73705e-05, 2.53091e-05, 4.05511e-05, 6.20811e-05, + 7.68584e-05, 7.68226e-05, 6.14732e-05, 4.13786e-05, 2.60704e-05, 1.77422e-05, 1.44378e-05, 1.43768e-05, + 1.75887e-05, 2.5905e-05, 4.12998e-05, 6.1503e-05, 6.44774e-05, 5.74996e-05, 4.19824e-05, 2.76574e-05, + 1.90971e-05, 1.54284e-05, 1.44949e-05, 1.53584e-05, 1.89738e-05, 2.75087e-05, 4.18484e-05, 5.74214e-05, + 5.01704e-05, 4.18764e-05, 3.10576e-05, 2.30361e-05, 1.90141e-05, 1.76813e-05, 1.76715e-05, 1.89671e-05, + 2.29164e-05, 3.08538e-05, 4.1656e-05, 5.00712e-05, 4.11307e-05, 3.62124e-05, 3.10515e-05, 2.74997e-05, + 2.58148e-05, 2.53961e-05, 2.58429e-05, 2.74708e-05, 3.08846e-05, 3.59342e-05, 4.09046e-05, 4.31158e-05, + 4.13051e-05, 4.20711e-05, 4.19582e-05, 4.10662e-05, 4.02686e-05, 4.03232e-05, 4.1151e-05, 4.19172e-05, + 4.18481e-05, 4.10294e-05, 4.03126e-05, 4.04364e-05, 5.06155e-05, 5.78197e-05, 6.1418e-05, 6.16014e-05, + 6.12029e-05, 6.17377e-05, 6.15337e-05, 5.77658e-05, 5.04237e-05, 4.32622e-05, 4.04156e-05, 4.34227e-05, + 5.778e-05, 7.72949e-05, 9.16478e-05, 9.68004e-05, 9.15827e-05, 7.72559e-05, 5.78292e-05, 3.94324e-05, + 2.72353e-05, 2.31232e-05, 2.71658e-05, 3.9334e-05, 7.74069e-05, 0.000101898, 0.000116062, 0.00011591, + 0.000101613, 7.72834e-05, 5.08426e-05, 3.06695e-05, 2.06929e-05, 2.06281e-05, 3.05117e-05, 5.07404e-05, + 9.21132e-05, 0.000116379, 0.000125368, 0.000115951, 9.17105e-05, 6.15938e-05, 3.59114e-05, 2.06762e-05, + 1.59391e-05, 2.05251e-05, 3.57262e-05, 6.16439e-05, 9.75006e-05, 0.000116315, 0.000116028, 9.69599e-05, + 6.81701e-05, 4.11648e-05, 2.31304e-05, 1.497e-05, 1.48831e-05, 2.2922e-05, 4.10572e-05, 6.84373e-05, + 9.20819e-05, 0.000101742, 9.16747e-05, 6.81962e-05, 4.35172e-05, 2.5825e-05, 1.64692e-05, 1.36392e-05, + 1.63083e-05, 2.56285e-05, 4.35352e-05, 6.85557e-05, 7.72709e-05, 7.7123e-05, 6.16001e-05, 4.13891e-05, + 2.60307e-05, 1.76754e-05, 1.43446e-05, 1.42662e-05, 1.74955e-05, 2.58979e-05, 4.14633e-05, 6.18535e-05, + 5.75279e-05, 5.07178e-05, 3.61754e-05, 2.3609e-05, 1.68921e-05, 1.45595e-05, 1.41095e-05, 1.44533e-05, + 1.6751e-05, 2.35235e-05, 3.61993e-05, 5.07919e-05, 3.91875e-05, 3.08059e-05, 2.11715e-05, 1.55836e-05, + 1.41639e-05, 1.45683e-05, 1.45369e-05, 1.40858e-05, 1.54888e-05, 2.10853e-05, 3.07491e-05, 3.91687e-05, + 2.72276e-05, 2.10439e-05, 1.6526e-05, 1.55049e-05, 1.67723e-05, 1.76942e-05, 1.67656e-05, 1.54714e-05, + 1.64506e-05, 2.09369e-05, 2.71419e-05, 3.02327e-05, 2.32975e-05, 2.10623e-05, 2.10794e-05, 2.33753e-05, + 2.58044e-05, 2.58327e-05, 2.3421e-05, 2.10763e-05, 2.09878e-05, 2.31935e-05, 2.55737e-05, 2.56238e-05, + 2.73837e-05, 3.08709e-05, 3.60198e-05, 4.10568e-05, 4.32392e-05, 4.11388e-05, 3.61005e-05, 3.08807e-05, + 2.73295e-05, 2.56666e-05, 2.52632e-05, 2.57222e-05, 3.94509e-05, 5.0844e-05, 6.14707e-05, 6.79005e-05, + 6.79324e-05, 6.15427e-05, 5.091e-05, 3.94853e-05, 3.04655e-05, 2.5677e-05, 2.56766e-05, 3.04566e-05, + 6.52224e-05, 9.31688e-05, 0.000111636, 0.000111426, 9.27543e-05, 6.49682e-05, 3.94451e-05, 2.26841e-05, + 1.51496e-05, 1.51048e-05, 2.25892e-05, 3.94453e-05, 9.33948e-05, 0.000124097, 0.000136045, 0.000123471, + 9.27359e-05, 5.77238e-05, 3.07261e-05, 1.61489e-05, 1.19044e-05, 1.60545e-05, 3.06629e-05, 5.7974e-05, + 0.000112213, 0.00013634, 0.000135895, 0.000111323, 7.48859e-05, 4.17808e-05, 2.07768e-05, 1.18839e-05, + 1.18261e-05, 2.06603e-05, 4.18627e-05, 7.5471e-05, 0.000112075, 0.000123759, 0.000111372, 8.21093e-05, + 5.04555e-05, 2.72668e-05, 1.50951e-05, 1.15417e-05, 1.49795e-05, 2.71821e-05, 5.07176e-05, 8.2836e-05, + 9.3071e-05, 9.27882e-05, 7.49231e-05, 5.05499e-05, 3.05981e-05, 1.87928e-05, 1.37615e-05, 1.36923e-05, + 1.86483e-05, 3.05759e-05, 5.08632e-05, 7.54661e-05, 6.48898e-05, 5.76692e-05, 4.19495e-05, 2.75549e-05, + 1.89906e-05, 1.5311e-05, 1.43475e-05, 1.51972e-05, 1.88649e-05, 2.75587e-05, 4.21453e-05, 5.78966e-05, + 3.9248e-05, 3.07889e-05, 2.1117e-05, 1.55287e-05, 1.41142e-05, 1.45114e-05, 1.44648e-05, 1.40122e-05, + 1.54417e-05, 2.10965e-05, 3.08335e-05, 3.92814e-05, 2.26041e-05, 1.63605e-05, 1.22984e-05, 1.20185e-05, + 1.40794e-05, 1.53786e-05, 1.40393e-05, 1.19595e-05, 1.22387e-05, 1.63121e-05, 2.25775e-05, 2.58083e-05, + 1.52133e-05, 1.21873e-05, 1.22459e-05, 1.54024e-05, 1.88995e-05, 1.89024e-05, 1.54004e-05, 1.2224e-05, + 1.21442e-05, 1.51648e-05, 1.8568e-05, 1.85904e-05, 1.52431e-05, 1.63375e-05, 2.09782e-05, 2.73461e-05, + 3.04946e-05, 2.7379e-05, 2.10084e-05, 1.63406e-05, 1.52236e-05, 1.64328e-05, 1.73416e-05, 1.64535e-05, + 2.27128e-05, 3.08087e-05, 4.18391e-05, 5.03947e-05, 5.04006e-05, 4.18566e-05, 3.08344e-05, 2.27398e-05, + 1.87218e-05, 1.74033e-05, 1.73951e-05, 1.87007e-05, 3.9473e-05, 5.78588e-05, 7.50138e-05, 8.2125e-05, + 7.48966e-05, 5.7753e-05, 3.94666e-05, 2.60485e-05, 1.87206e-05, 1.64654e-05, 1.86666e-05, 2.59867e-05, + 8.27404e-05, 0.000111934, 0.00012365, 0.000111275, 8.1998e-05, 5.03825e-05, 2.7291e-05, 1.52076e-05, + 1.17062e-05, 1.51701e-05, 2.73422e-05, 5.0759e-05, 0.000112183, 0.000136224, 0.000135712, 0.000111112, + 7.47242e-05, 4.17312e-05, 2.08291e-05, 1.19892e-05, 1.19634e-05, 2.08234e-05, 4.20088e-05, 7.55401e-05, + 0.000124106, 0.000135883, 0.000123173, 9.24298e-05, 5.75206e-05, 3.06425e-05, 1.61379e-05, 1.19258e-05, + 1.61001e-05, 3.0743e-05, 5.80896e-05, 9.35032e-05, 0.000111652, 0.000111227, 9.24528e-05, 6.47171e-05, + 3.92995e-05, 2.26094e-05, 1.51019e-05, 1.50692e-05, 2.25857e-05, 3.9522e-05, 6.53946e-05, 9.33417e-05, + 8.20955e-05, 7.47285e-05, 5.75932e-05, 3.94005e-05, 2.60533e-05, 1.87413e-05, 1.64766e-05, 1.86833e-05, + 2.60547e-05, 3.96349e-05, 5.80821e-05, 7.51669e-05, 5.02863e-05, 4.17488e-05, 3.08309e-05, 2.28454e-05, + 1.88855e-05, 1.75709e-05, 1.75375e-05, 1.88254e-05, 2.28457e-05, 3.09539e-05, 4.19444e-05, 5.03859e-05, + 2.71935e-05, 2.09331e-05, 1.64039e-05, 1.54095e-05, 1.6702e-05, 1.76184e-05, 1.66633e-05, 1.53633e-05, + 1.63794e-05, 2.09409e-05, 2.72145e-05, 3.02914e-05, 1.51892e-05, 1.21439e-05, 1.21997e-05, 1.53637e-05, + 1.88666e-05, 1.88593e-05, 1.5342e-05, 1.21656e-05, 1.21029e-05, 1.51529e-05, 1.85798e-05, 1.85949e-05, + 1.17432e-05, 1.21202e-05, 1.63204e-05, 2.27422e-05, 2.60479e-05, 2.27438e-05, 1.631e-05, 1.20929e-05, + 1.17078e-05, 1.37042e-05, 1.50048e-05, 1.37306e-05, 1.52148e-05, 2.09101e-05, 3.07645e-05, 3.93638e-05, + 3.93532e-05, 3.07402e-05, 2.0885e-05, 1.51962e-05, 1.3756e-05, 1.41788e-05, 1.41814e-05, 1.37657e-05, + 2.73366e-05, 4.19078e-05, 5.77672e-05, 6.48865e-05, 5.76395e-05, 4.17704e-05, 2.72732e-05, 1.87191e-05, + 1.51143e-05, 1.42244e-05, 1.50955e-05, 1.87179e-05, 5.06906e-05, 7.52524e-05, 9.29474e-05, 9.2741e-05, + 7.48323e-05, 5.03879e-05, 3.04564e-05, 1.87464e-05, 1.38085e-05, 1.37865e-05, 1.87196e-05, 3.05405e-05, + 9.74795e-05, 0.000116125, 0.000115675, 9.64954e-05, 6.77666e-05, 4.1002e-05, 2.32303e-05, 1.52373e-05, + 1.52473e-05, 2.33342e-05, 4.14095e-05, 6.86143e-05, 0.00011628, 0.000124984, 0.000115345, 9.10867e-05, + 6.11978e-05, 3.58388e-05, 2.0838e-05, 1.6225e-05, 2.08974e-05, 3.6138e-05, 6.19849e-05, 9.22708e-05, + 0.000115881, 0.000115395, 0.000100903, 7.66527e-05, 5.04949e-05, 3.06063e-05, 2.08002e-05, 2.08273e-05, + 3.07772e-05, 5.10388e-05, 7.76675e-05, 0.000102004, 9.6622e-05, 9.11078e-05, 7.66659e-05, 5.73619e-05, + 3.91961e-05, 2.7183e-05, 2.3168e-05, 2.72634e-05, 3.94836e-05, 5.79917e-05, 7.75239e-05, 9.17518e-05, + 6.77806e-05, 6.12041e-05, 5.05518e-05, 3.92544e-05, 3.03881e-05, 2.56935e-05, 2.57244e-05, 3.0515e-05, + 3.95455e-05, 5.10054e-05, 6.16552e-05, 6.79733e-05, 4.09585e-05, 3.58647e-05, 3.07075e-05, 2.72891e-05, + 2.57428e-05, 2.53784e-05, 2.57997e-05, 2.74051e-05, 3.08748e-05, 3.60454e-05, 4.10812e-05, 4.31817e-05, + 2.31814e-05, 2.08745e-05, 2.08989e-05, 2.32603e-05, 2.57515e-05, 2.57767e-05, 2.33168e-05, 2.09521e-05, + 2.09063e-05, 2.31931e-05, 2.56191e-05, 2.56172e-05, 1.51909e-05, 1.62382e-05, 2.08659e-05, 2.72571e-05, + 3.04249e-05, 2.7288e-05, 2.08881e-05, 1.62277e-05, 1.51569e-05, 1.64297e-05, 1.73737e-05, 1.64598e-05, + 1.51838e-05, 2.08412e-05, 3.06768e-05, 3.92843e-05, 3.92823e-05, 3.0662e-05, 2.08056e-05, 1.51367e-05, + 1.3732e-05, 1.41918e-05, 1.42078e-05, 1.37723e-05, 2.32265e-05, 3.5956e-05, 5.07332e-05, 5.75853e-05, + 5.06296e-05, 3.58225e-05, 2.31252e-05, 1.64447e-05, 1.42211e-05, 1.38692e-05, 1.4242e-05, 1.65e-05, + 4.12571e-05, 6.16827e-05, 7.72212e-05, 7.70569e-05, 6.13316e-05, 4.09608e-05, 2.56398e-05, 1.74473e-05, + 1.42779e-05, 1.42823e-05, 1.74887e-05, 2.57862e-05, 6.84855e-05, 9.19718e-05, 0.000101609, 9.14239e-05, + 6.78236e-05, 4.31905e-05, 2.5698e-05, 1.65497e-05, 1.38525e-05, 1.65716e-05, 2.58421e-05, 4.36037e-05, + 9.88708e-05, 0.000103874, 9.81691e-05, 8.28256e-05, 6.15396e-05, 4.0977e-05, 2.72947e-05, 2.27648e-05, + 2.74383e-05, 4.13847e-05, 6.23138e-05, 8.37999e-05, 0.000103912, 0.000103509, 9.67478e-05, 8.19822e-05, + 6.1124e-05, 4.16613e-05, 3.07474e-05, 3.08425e-05, 4.20181e-05, 6.18685e-05, 8.30481e-05, 9.77341e-05, + 9.82092e-05, 9.67521e-05, 9.06468e-05, 7.65236e-05, 5.7328e-05, 4.16696e-05, 3.59813e-05, 4.19427e-05, + 5.79567e-05, 7.74948e-05, 9.16855e-05, 9.74247e-05, 8.28789e-05, 8.19976e-05, 7.65258e-05, 6.44141e-05, + 5.01504e-05, 4.10349e-05, 4.11857e-05, 5.06194e-05, 6.51737e-05, 7.73949e-05, 8.26825e-05, 8.31363e-05, + 6.15958e-05, 6.11302e-05, 5.73028e-05, 5.01146e-05, 4.31576e-05, 4.04287e-05, 4.34557e-05, 5.06456e-05, + 5.79151e-05, 6.16386e-05, 6.18737e-05, 6.13428e-05, 4.09852e-05, 4.16046e-05, 4.15638e-05, 4.09067e-05, + 4.03309e-05, 4.04668e-05, 4.12492e-05, 4.19597e-05, 4.19134e-05, 4.1155e-05, 4.0413e-05, 4.03626e-05, + 2.72162e-05, 3.05892e-05, 3.57505e-05, 4.09116e-05, 4.3206e-05, 4.11048e-05, 3.60004e-05, 3.07648e-05, + 2.72865e-05, 2.57221e-05, 2.53449e-05, 2.57118e-05, 2.25935e-05, 3.05611e-05, 4.15529e-05, 5.01706e-05, + 5.02358e-05, 4.16825e-05, 3.06465e-05, 2.26033e-05, 1.869e-05, 1.74698e-05, 1.7486e-05, 1.87176e-05, + 2.7179e-05, 4.16129e-05, 5.74309e-05, 6.46102e-05, 5.74249e-05, 4.15782e-05, 2.71151e-05, 1.86346e-05, + 1.51183e-05, 1.429e-05, 1.51572e-05, 1.86996e-05, 4.10804e-05, 6.14229e-05, 7.69694e-05, 7.68745e-05, + 6.12028e-05, 4.08569e-05, 2.55658e-05, 1.74179e-05, 1.42905e-05, 1.4313e-05, 1.74965e-05, 2.57203e-05, + 6.2066e-05, 8.27082e-05, 9.13211e-05, 8.2338e-05, 6.1582e-05, 4.02122e-05, 2.52532e-05, 1.74845e-05, + 1.52059e-05, 1.75491e-05, 2.54369e-05, 4.05731e-05, 8.36872e-05, 9.7572e-05, 9.72614e-05, 8.29754e-05, + 6.1194e-05, 4.02683e-05, 2.57056e-05, 1.87943e-05, 1.88352e-05, 2.58781e-05, 4.06858e-05, 6.1881e-05, + 9.11604e-05, 9.09433e-05, 8.97738e-05, 8.27768e-05, 6.76535e-05, 5.02587e-05, 3.93946e-05, 3.95204e-05, + 5.06554e-05, 6.83027e-05, 8.35192e-05, 9.03549e-05, 9.09251e-05, 9.34907e-05, 9.66706e-05, 9.08773e-05, + 7.44488e-05, 5.75375e-05, 5.08561e-05, 5.79338e-05, 7.52098e-05, 9.18113e-05, 9.74707e-05, 9.39304e-05, + 8.97876e-05, 9.66831e-05, 0.000100632, 9.19744e-05, 7.44687e-05, 6.1424e-05, 6.16938e-05, 7.5243e-05, + 9.3048e-05, 0.000101631, 9.73214e-05, 8.99967e-05, 8.28788e-05, 9.0908e-05, 9.19405e-05, 8.14719e-05, + 6.77283e-05, 6.19263e-05, 6.83383e-05, 8.25151e-05, 9.30239e-05, 9.16713e-05, 8.3231e-05, 7.86483e-05, + 6.77747e-05, 7.43941e-05, 7.42783e-05, 6.75513e-05, 6.13236e-05, 6.16367e-05, 6.83729e-05, 7.52619e-05, + 7.5154e-05, 6.81697e-05, 6.15483e-05, 6.14381e-05, 5.02524e-05, 5.7278e-05, 6.09839e-05, 6.14804e-05, + 6.13631e-05, 6.19952e-05, 6.17372e-05, 5.79135e-05, 5.05996e-05, 4.34772e-05, 4.05653e-05, 4.33571e-05, + 3.91853e-05, 5.03518e-05, 6.09775e-05, 6.76033e-05, 6.78186e-05, 6.14673e-05, 5.08104e-05, 3.94382e-05, + 3.05512e-05, 2.58703e-05, 2.58594e-05, 3.04716e-05, 3.91182e-05, 5.72138e-05, 7.43163e-05, 8.15969e-05, + 7.4541e-05, 5.74709e-05, 3.92547e-05, 2.59746e-05, 1.88019e-05, 1.66394e-05, 1.88158e-05, 2.59525e-05, + 5.01373e-05, 7.44284e-05, 9.21485e-05, 9.21714e-05, 7.44557e-05, 5.01126e-05, 3.02846e-05, 1.87074e-05, + 1.38806e-05, 1.39013e-05, 1.87651e-05, 3.03507e-05, 6.7845e-05, 9.12321e-05, 0.000101032, 9.10848e-05, + 6.76191e-05, 4.3037e-05, 2.56028e-05, 1.65294e-05, 1.38838e-05, 1.65946e-05, 2.57473e-05, 4.32516e-05, + 8.32788e-05, 9.72228e-05, 9.707e-05, 8.29066e-05, 6.11549e-05, 4.02184e-05, 2.56613e-05, 1.877e-05, + 1.8817e-05, 2.58228e-05, 4.05219e-05, 6.15633e-05, 9.03046e-05, 9.39009e-05, 8.99376e-05, 7.84724e-05, + 6.11747e-05, 4.31508e-05, 3.04285e-05, 2.60745e-05, 3.05872e-05, 4.35037e-05, 6.17051e-05, 7.90354e-05, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 8.66315e-05, 9.09147e-05, 9.80789e-05, 9.62719e-05, 8.16601e-05, 6.46465e-05, 5.76473e-05, 6.49856e-05, + 8.21971e-05, 9.67325e-05, 9.82952e-05, 9.0959e-05, 9.08927e-05, 0.00010338, 0.000114993, 0.000110547, + 9.21777e-05, 7.69547e-05, 7.72459e-05, 9.29467e-05, 0.00011143, 0.000115579, 0.000103597, 9.09296e-05, + 9.81232e-05, 0.000115019, 0.000122424, 0.000110462, 9.1113e-05, 8.25959e-05, 9.18814e-05, 0.000111692, + 0.000123525, 0.000115612, 9.83101e-05, 8.99962e-05, 9.64209e-05, 0.000110552, 0.000110364, 9.608e-05, + 8.30589e-05, 8.35155e-05, 9.72855e-05, 0.000111778, 0.000111559, 9.68681e-05, 8.32914e-05, 8.31867e-05, + 8.17714e-05, 9.19845e-05, 9.07261e-05, 8.27473e-05, 7.87765e-05, 8.35966e-05, 9.204e-05, 9.31491e-05, + 8.24251e-05, 6.82798e-05, 6.20687e-05, 6.80504e-05, 6.45286e-05, 7.64225e-05, 8.18102e-05, 8.27806e-05, + 8.31736e-05, 8.277e-05, 7.74392e-05, 6.51948e-05, 5.07146e-05, 4.14032e-05, 4.13397e-05, 5.04347e-05, + 5.72298e-05, 7.63501e-05, 9.06972e-05, 9.61543e-05, 9.12456e-05, 7.70782e-05, 5.77594e-05, 3.95237e-05, + 2.74913e-05, 2.34588e-05, 2.74303e-05, 3.92884e-05, 6.43143e-05, 9.18085e-05, 0.000110324, 0.000110512, + 9.22118e-05, 6.46467e-05, 3.93067e-05, 2.27347e-05, 1.53387e-05, 1.53446e-05, 2.27166e-05, 3.9166e-05, + 8.14631e-05, 0.000110395, 0.000122401, 0.000110528, 8.15905e-05, 5.01413e-05, 2.71865e-05, 1.52331e-05, + 1.1814e-05, 1.52729e-05, 2.72298e-05, 5.01151e-05, 9.62219e-05, 0.000114984, 0.000114996, 9.62192e-05, + 6.76351e-05, 4.08854e-05, 2.315e-05, 1.52157e-05, 1.52495e-05, 2.32447e-05, 4.10012e-05, 6.77049e-05, + 9.81136e-05, 0.000103432, 9.80894e-05, 8.29136e-05, 6.15804e-05, 4.09188e-05, 2.71988e-05, 2.2668e-05, + 2.73037e-05, 4.11003e-05, 6.1757e-05, 8.30106e-05, 9.09542e-05, 9.09475e-05, 8.99288e-05, 8.29392e-05, + 6.77091e-05, 5.02049e-05, 3.92862e-05, 3.9382e-05, 5.04639e-05, 6.80162e-05, 8.31456e-05, 8.99978e-05, + 9.10625e-05, 0.000103726, 0.000115364, 0.000110745, 9.21347e-05, 7.67423e-05, 7.69412e-05, 9.25849e-05, + 0.000111041, 0.00011522, 0.000103358, 9.08829e-05, 0.000103699, 0.000124502, 0.000134882, 0.000122511, + 0.000100862, 9.11519e-05, 0.000101494, 0.000123389, 0.0001354, 0.000124479, 0.000103512, 9.38161e-05, + 0.000115398, 0.000134894, 0.000134608, 0.000114866, 9.70069e-05, 9.74638e-05, 0.000116037, 0.000135856, + 0.000135582, 0.000115535, 9.72462e-05, 9.72638e-05, 0.000110852, 0.000122456, 0.00011472, 9.7905e-05, + 9.01691e-05, 9.88684e-05, 0.000116222, 0.000123749, 0.000111506, 9.16742e-05, 8.26748e-05, 9.14892e-05, + 9.21528e-05, 0.000100549, 9.65096e-05, 8.97903e-05, 9.02746e-05, 9.77465e-05, 0.000101942, 9.31185e-05, + 7.5218e-05, 6.17737e-05, 6.16726e-05, 7.47919e-05, 7.6493e-05, 9.04358e-05, 9.64803e-05, 9.79785e-05, + 9.72538e-05, 9.15559e-05, 7.7423e-05, 5.79898e-05, 4.20909e-05, 3.6208e-05, 4.1915e-05, 5.7488e-05, + 7.63532e-05, 0.000100344, 0.000114586, 0.000114905, 0.000101081, 7.70692e-05, 5.08705e-05, 3.09068e-05, + 2.10605e-05, 2.10244e-05, 3.0741e-05, 5.04433e-05, 9.17285e-05, 0.000122015, 0.000134293, 0.000122399, + 9.22064e-05, 5.75138e-05, 3.07395e-05, 1.63193e-05, 1.21562e-05, 1.62963e-05, 3.06224e-05, 5.72019e-05, + 0.000110202, 0.000134277, 0.000134429, 0.000110521, 7.44905e-05, 4.16369e-05, 2.08379e-05, 1.20808e-05, + 1.20892e-05, 2.08398e-05, 4.15521e-05, 7.42453e-05, 0.000114718, 0.000123921, 0.000114978, 9.11128e-05, + 6.12455e-05, 3.58052e-05, 2.07913e-05, 1.6198e-05, 2.0826e-05, 3.58136e-05, 6.1114e-05, 9.08227e-05, + 0.000103217, 0.000103393, 9.70718e-05, 8.23598e-05, 6.12709e-05, 4.15819e-05, 3.05696e-05, 3.06119e-05, + 4.16583e-05, 6.12284e-05, 8.20838e-05, 9.66963e-05, 9.0895e-05, 9.38527e-05, 9.7217e-05, 9.12803e-05, + 7.45217e-05, 5.73454e-05, 5.05284e-05, 5.75131e-05, 7.46702e-05, 9.11502e-05, 9.68149e-05, 9.35056e-05, + 9.86088e-05, 0.000115746, 0.000123047, 0.000110662, 9.08731e-05, 8.20784e-05, 9.12165e-05, 0.000110974, + 0.000122854, 0.000115104, 9.80787e-05, 9.0114e-05, 0.000115675, 0.000135254, 0.000134856, 0.000114877, + 9.6809e-05, 9.71345e-05, 0.000115643, 0.000135466, 0.000135258, 0.000115323, 9.71786e-05, 9.73658e-05, + 0.000123004, 0.000134851, 0.000123983, 0.000103235, 9.38218e-05, 0.000104024, 0.000125148, 0.000135708, + 0.00012326, 0.000101352, 9.13441e-05, 0.000101391, 0.000110691, 0.000114817, 0.000103105, 9.08721e-05, + 9.13109e-05, 0.000104243, 0.000116106, 0.000111556, 9.28199e-05, 7.71883e-05, 7.71169e-05, 9.24724e-05, + 9.08432e-05, 9.65127e-05, 9.3351e-05, 9.09411e-05, 9.41105e-05, 9.76826e-05, 9.18949e-05, 7.51509e-05, + 5.78519e-05, 5.08472e-05, 5.76124e-05, 7.45295e-05, 8.18087e-05, 9.64201e-05, 0.000103039, 0.000103371, + 9.72366e-05, 8.26992e-05, 6.17001e-05, 4.19793e-05, 3.08608e-05, 3.07771e-05, 4.16703e-05, 6.10686e-05, + 9.05927e-05, 0.000114455, 0.000123711, 0.000114913, 9.12288e-05, 6.14837e-05, 3.60544e-05, 2.09781e-05, + 1.63149e-05, 2.0877e-05, 3.57782e-05, 6.09735e-05, 0.00011007, 0.000134092, 0.00013429, 0.000110516, + 7.45996e-05, 4.17628e-05, 2.0916e-05, 1.21116e-05, 1.20928e-05, 2.08279e-05, 4.15268e-05, 7.41846e-05, + 0.000122013, 0.000134236, 0.00012238, 9.22471e-05, 5.75396e-05, 3.06965e-05, 1.62295e-05, 1.20562e-05, + 1.62114e-05, 3.05946e-05, 5.72527e-05, 9.17952e-05, 0.000114645, 0.000114906, 0.000101072, 7.70036e-05, + 5.06837e-05, 3.06257e-05, 2.07652e-05, 2.07706e-05, 3.05921e-05, 5.04672e-05, 7.65127e-05, 0.000100497, + 9.79813e-05, 9.71754e-05, 9.13883e-05, 7.70917e-05, 5.74829e-05, 4.15163e-05, 3.5689e-05, 4.15507e-05, + 5.73792e-05, 7.66351e-05, 9.06575e-05, 9.6609e-05, 9.01531e-05, 9.74872e-05, 0.000101497, 9.24498e-05, + 7.4417e-05, 6.1016e-05, 6.11031e-05, 7.45354e-05, 9.22485e-05, 0.000100825, 9.67183e-05, 8.98237e-05, + 9.70439e-05, 0.000111373, 0.000110949, 9.6141e-05, 8.26513e-05, 8.2828e-05, 9.64851e-05, 0.000111005, + 0.000110909, 9.64198e-05, 8.31339e-05, 8.34063e-05, 0.00011125, 0.000122972, 0.000115049, 9.78899e-05, + 8.98577e-05, 9.83733e-05, 0.000115662, 0.000123223, 0.00011106, 9.13499e-05, 8.25301e-05, 9.16015e-05, + 0.000110873, 0.000115079, 0.000103295, 9.09156e-05, 9.12219e-05, 0.000104067, 0.000115891, 0.000111329, + 9.25933e-05, 7.69896e-05, 7.69919e-05, 9.24861e-05, 9.62096e-05, 9.79348e-05, 9.08532e-05, 8.68193e-05, + 9.14106e-05, 9.88162e-05, 9.70291e-05, 8.21707e-05, 6.48022e-05, 5.74967e-05, 6.46006e-05, 8.16677e-05, + 8.27285e-05, 8.97089e-05, 9.08592e-05, 9.11012e-05, 9.03402e-05, 8.34976e-05, 6.81965e-05, 5.04528e-05, + 3.92763e-05, 3.91643e-05, 5.00872e-05, 6.7563e-05, 8.26795e-05, 9.77956e-05, 0.000103231, 9.81339e-05, + 8.32174e-05, 6.19824e-05, 4.12142e-05, 2.73047e-05, 2.26135e-05, 2.71286e-05, 4.08386e-05, 6.14486e-05, + 9.59406e-05, 0.000114618, 0.000114753, 9.62806e-05, 6.79378e-05, 4.1199e-05, 2.33236e-05, 1.5252e-05, + 1.52008e-05, 2.31512e-05, 4.08887e-05, 6.75437e-05, 0.000110162, 0.000122095, 0.000110427, 8.1786e-05, + 5.04425e-05, 2.7384e-05, 1.52882e-05, 1.17868e-05, 1.52259e-05, 2.72217e-05, 5.01501e-05, 8.14217e-05, + 0.000110171, 0.000110381, 9.23009e-05, 6.48822e-05, 3.94723e-05, 2.27382e-05, 1.52357e-05, 1.52209e-05, + 2.26608e-05, 3.9245e-05, 6.44599e-05, 9.18325e-05, 9.60882e-05, 9.12524e-05, 7.71975e-05, 5.783e-05, + 3.94015e-05, 2.72061e-05, 2.3132e-05, 2.71885e-05, 3.92488e-05, 5.7394e-05, 7.6538e-05, 9.07352e-05, + 8.31423e-05, 8.27667e-05, 7.73851e-05, 6.49244e-05, 5.02015e-05, 4.07957e-05, 4.08228e-05, 5.01773e-05, + 6.46006e-05, 7.66708e-05, 8.19889e-05, 8.28063e-05, 8.351e-05, 9.18762e-05, 9.27842e-05, 8.17859e-05, + 6.74954e-05, 6.13638e-05, 6.76291e-05, 8.17634e-05, 9.22907e-05, 9.10499e-05, 8.2893e-05, 7.87633e-05, + 8.22845e-05, 9.26847e-05, 9.12144e-05, 8.27857e-05, 7.84197e-05, 8.30327e-05, 9.14516e-05, 9.26237e-05, + 8.19596e-05, 6.79007e-05, 6.1875e-05, 6.81775e-05, 9.25758e-05, 0.000101185, 9.70021e-05, 8.99366e-05, + 9.01045e-05, 9.73948e-05, 0.000101531, 9.26834e-05, 7.47584e-05, 6.13447e-05, 6.13878e-05, 7.48148e-05, + 9.12463e-05, 9.71231e-05, 9.38848e-05, 9.12407e-05, 9.41781e-05, 9.75881e-05, 9.16881e-05, 7.48419e-05, + 5.7471e-05, 5.04669e-05, 5.73494e-05, 7.45571e-05, 8.30462e-05, 9.01466e-05, 9.12548e-05, 9.13636e-05, + 9.04619e-05, 8.35056e-05, 6.81103e-05, 5.02898e-05, 3.90701e-05, 3.89621e-05, 4.99712e-05, 6.76468e-05, + 7.87096e-05, 9.01314e-05, 9.39381e-05, 9.02487e-05, 7.89855e-05, 6.16451e-05, 4.33445e-05, 3.03036e-05, + 2.57187e-05, 3.00893e-05, 4.29654e-05, 6.12437e-05, 8.30418e-05, 9.69825e-05, 9.70005e-05, 8.31464e-05, + 6.15897e-05, 4.05753e-05, 2.57775e-05, 1.86681e-05, 1.85836e-05, 2.55398e-05, 4.02655e-05, 6.13465e-05, + 9.10055e-05, 0.000100754, 9.10671e-05, 6.79587e-05, 4.34987e-05, 2.59205e-05, 1.66363e-05, 1.38535e-05, + 1.65236e-05, 2.57164e-05, 4.32738e-05, 6.78025e-05, 9.19257e-05, 9.20109e-05, 7.46441e-05, 5.05682e-05, + 3.07125e-05, 1.89433e-05, 1.39541e-05, 1.39193e-05, 1.88334e-05, 3.05237e-05, 5.03287e-05, 7.44332e-05, + 8.14516e-05, 7.45834e-05, 5.77995e-05, 3.96636e-05, 2.62384e-05, 1.88778e-05, 1.66231e-05, 1.88389e-05, + 2.61124e-05, 3.93938e-05, 5.74216e-05, 7.42891e-05, 6.77926e-05, 6.16252e-05, 5.10875e-05, 3.96217e-05, + 3.05258e-05, 2.57025e-05, 2.57087e-05, 3.04867e-05, 3.94057e-05, 5.06384e-05, 6.11214e-05, 6.75672e-05, + 6.2032e-05, 6.18509e-05, 5.7959e-05, 5.0422e-05, 4.31037e-05, 4.01778e-05, 4.31602e-05, 5.03564e-05, + 5.75883e-05, 6.12602e-05, 6.15802e-05, 6.13561e-05, 6.83476e-05, 7.51764e-05, 7.4874e-05, 6.76828e-05, + 6.10298e-05, 6.11205e-05, 6.78221e-05, 7.47707e-05, 7.47225e-05, 6.78187e-05, 6.13909e-05, 6.16175e-05, + 6.49475e-05, 7.71121e-05, 8.24083e-05, 8.30366e-05, 8.31129e-05, 8.25739e-05, 7.7234e-05, 6.49567e-05, + 5.03879e-05, 4.10331e-05, 4.10612e-05, 5.04396e-05, 7.70957e-05, 9.14035e-05, 9.73825e-05, 9.85284e-05, + 9.74498e-05, 9.15351e-05, 7.72624e-05, 5.76772e-05, 4.16493e-05, 3.57341e-05, 4.15708e-05, 5.75321e-05, + 8.26488e-05, 9.76155e-05, 0.000104145, 0.000104111, 9.75948e-05, 8.27723e-05, 6.15499e-05, 4.16476e-05, + 3.04253e-05, 3.03385e-05, 4.14154e-05, 6.1294e-05, 8.35514e-05, 9.8889e-05, 0.000104179, 9.87406e-05, + 8.3493e-05, 6.20129e-05, 4.10672e-05, 2.70415e-05, 2.22969e-05, 2.68416e-05, 4.07661e-05, 6.18285e-05, + 8.35898e-05, 9.75861e-05, 9.74652e-05, 8.34022e-05, 6.16766e-05, 4.05523e-05, 2.56833e-05, 1.85309e-05, + 1.84314e-05, 2.54294e-05, 4.0311e-05, 6.16564e-05, 8.26492e-05, 9.12597e-05, 8.24936e-05, 6.20034e-05, + 4.06795e-05, 2.55347e-05, 1.75263e-05, 1.50827e-05, 1.73701e-05, 2.53089e-05, 4.05503e-05, 6.20796e-05, + 7.68583e-05, 7.68225e-05, 6.14728e-05, 4.13775e-05, 2.60689e-05, 1.77413e-05, 1.44374e-05, 1.43766e-05, + 1.75885e-05, 2.59044e-05, 4.12989e-05, 6.15025e-05, 6.44773e-05, 5.74994e-05, 4.19809e-05, 2.76546e-05, + 1.90951e-05, 1.54279e-05, 1.44949e-05, 1.53584e-05, 1.89736e-05, 2.75083e-05, 4.18481e-05, 5.74213e-05, + 5.01703e-05, 4.18752e-05, 3.10545e-05, 2.30333e-05, 1.90133e-05, 1.76812e-05, 1.76712e-05, 1.89671e-05, + 2.29162e-05, 3.08536e-05, 4.16559e-05, 5.00711e-05, 4.11303e-05, 3.62105e-05, 3.10493e-05, 2.74991e-05, + 2.58147e-05, 2.53952e-05, 2.58421e-05, 2.74706e-05, 3.08843e-05, 3.59338e-05, 4.09044e-05, 4.31158e-05, + 4.13046e-05, 4.20702e-05, 4.19578e-05, 4.10662e-05, 4.02676e-05, 4.03219e-05, 4.11503e-05, 4.19168e-05, + 4.18474e-05, 4.10286e-05, 4.03121e-05, 4.04362e-05, 5.06155e-05, 5.78195e-05, 6.14178e-05, 6.16009e-05, + 6.12021e-05, 6.17371e-05, 6.15333e-05, 5.77652e-05, 5.04226e-05, 4.32609e-05, 4.04148e-05, 4.34225e-05, + 5.77798e-05, 7.72948e-05, 9.16478e-05, 9.68003e-05, 9.15823e-05, 7.72554e-05, 5.78285e-05, 3.94314e-05, + 2.72343e-05, 2.31223e-05, 2.71654e-05, 3.93339e-05, 7.74068e-05, 0.000101898, 0.000116062, 0.000115909, + 0.000101612, 7.72829e-05, 5.08421e-05, 3.06689e-05, 2.06925e-05, 2.06279e-05, 3.05115e-05, 5.07401e-05, + 9.21128e-05, 0.000116377, 0.000125366, 0.00011595, 9.17101e-05, 6.15932e-05, 3.59107e-05, 2.06756e-05, + 1.59389e-05, 2.0525e-05, 3.5726e-05, 6.16438e-05, 9.74988e-05, 0.000116313, 0.000116027, 9.69596e-05, + 6.81692e-05, 4.11633e-05, 2.31286e-05, 1.49687e-05, 1.48825e-05, 2.29218e-05, 4.10572e-05, 6.84367e-05, + 9.20802e-05, 0.000101741, 9.16746e-05, 6.81956e-05, 4.35156e-05, 2.58227e-05, 1.64668e-05, 1.36375e-05, + 1.63075e-05, 2.56284e-05, 4.35347e-05, 6.85541e-05, 7.72705e-05, 7.7123e-05, 6.15996e-05, 4.13877e-05, + 2.60284e-05, 1.76731e-05, 1.43428e-05, 1.42652e-05, 1.74952e-05, 2.58975e-05, 4.14622e-05, 6.18523e-05, + 5.75279e-05, 5.07174e-05, 3.61735e-05, 2.36056e-05, 1.68891e-05, 1.45581e-05, 1.4109e-05, 1.44531e-05, + 1.67507e-05, 2.35227e-05, 3.61985e-05, 5.07917e-05, 3.91872e-05, 3.08038e-05, 2.11666e-05, 1.55788e-05, + 1.41619e-05, 1.45681e-05, 1.45369e-05, 1.40857e-05, 1.54883e-05, 2.10848e-05, 3.07489e-05, 3.91687e-05, + 2.72264e-05, 2.10397e-05, 1.65208e-05, 1.55025e-05, 1.6772e-05, 1.76936e-05, 1.67648e-05, 1.54707e-05, + 1.645e-05, 2.09366e-05, 2.71418e-05, 3.02326e-05, 2.32956e-05, 2.1059e-05, 2.10776e-05, 2.33751e-05, + 2.58035e-05, 2.58309e-05, 2.34196e-05, 2.10754e-05, 2.09872e-05, 2.31931e-05, 2.55736e-05, 2.56236e-05, + 2.73827e-05, 3.08702e-05, 3.60198e-05, 4.10563e-05, 4.32377e-05, 4.11373e-05, 3.60995e-05, 3.08798e-05, + 2.73288e-05, 2.56661e-05, 2.5263e-05, 2.57218e-05, 3.94508e-05, 5.08438e-05, 6.14704e-05, 6.79e-05, + 6.79318e-05, 6.15421e-05, 5.09093e-05, 3.94843e-05, 3.04645e-05, 2.56761e-05, 2.56761e-05, 3.04564e-05, + 6.52222e-05, 9.31688e-05, 0.000111636, 0.000111425, 9.2753e-05, 6.49667e-05, 3.94435e-05, 2.26826e-05, + 1.51484e-05, 1.51041e-05, 2.2589e-05, 3.94452e-05, 9.33948e-05, 0.000124096, 0.000136044, 0.000123469, + 9.27346e-05, 5.77223e-05, 3.07242e-05, 1.61471e-05, 1.19035e-05, 1.60543e-05, 3.06628e-05, 5.79738e-05, + 0.000112212, 0.000136339, 0.000135894, 0.000111322, 7.48849e-05, 4.1779e-05, 2.07744e-05, 1.1882e-05, + 1.18255e-05, 2.06603e-05, 4.18626e-05, 7.5471e-05, 0.000112074, 0.000123758, 0.000111372, 8.21087e-05, + 5.04538e-05, 2.72637e-05, 1.50915e-05, 1.15391e-05, 1.49784e-05, 2.71819e-05, 5.07176e-05, 8.28354e-05, + 9.307e-05, 9.27877e-05, 7.49227e-05, 5.05485e-05, 3.05949e-05, 1.87884e-05, 1.37574e-05, 1.36897e-05, + 1.86474e-05, 3.05757e-05, 5.08626e-05, 7.54648e-05, 6.48895e-05, 5.76686e-05, 4.19476e-05, 2.75511e-05, + 1.89862e-05, 1.53073e-05, 1.43451e-05, 1.51961e-05, 1.88645e-05, 2.75582e-05, 4.21444e-05, 5.78959e-05, + 3.92476e-05, 3.07863e-05, 2.11113e-05, 1.55225e-05, 1.41105e-05, 1.45101e-05, 1.44644e-05, 1.4012e-05, + 1.54413e-05, 2.10959e-05, 3.08332e-05, 3.92814e-05, 2.26021e-05, 1.63542e-05, 1.229e-05, 1.20133e-05, + 1.40781e-05, 1.53785e-05, 1.4039e-05, 1.19589e-05, 1.22382e-05, 1.63119e-05, 2.25774e-05, 2.5808e-05, + 1.52094e-05, 1.21802e-05, 1.22407e-05, 1.5401e-05, 1.8899e-05, 1.89012e-05, 1.5399e-05, 1.2223e-05, + 1.21438e-05, 1.51644e-05, 1.85676e-05, 1.85896e-05, 1.52398e-05, 1.63343e-05, 2.09774e-05, 2.73458e-05, + 3.04931e-05, 2.7377e-05, 2.10069e-05, 1.63399e-05, 1.52233e-05, 1.64325e-05, 1.73414e-05, 1.64524e-05, + 2.27119e-05, 3.08084e-05, 4.1839e-05, 5.03939e-05, 5.03993e-05, 4.18553e-05, 3.08334e-05, 2.27394e-05, + 1.87217e-05, 1.74032e-05, 1.7395e-05, 1.87e-05, 3.9473e-05, 5.78585e-05, 7.50136e-05, 8.21248e-05, + 7.48959e-05, 5.77519e-05, 3.94655e-05, 2.60477e-05, 1.872e-05, 1.64648e-05, 1.86663e-05, 2.59866e-05, + 8.274e-05, 0.000111934, 0.000123649, 0.000111273, 8.19958e-05, 5.03803e-05, 2.72889e-05, 1.52058e-05, + 1.17051e-05, 1.51698e-05, 2.73422e-05, 5.07587e-05, 0.000112183, 0.000136224, 0.000135711, 0.00011111, + 7.47221e-05, 4.17285e-05, 2.08256e-05, 1.19863e-05, 1.19623e-05, 2.08233e-05, 4.20087e-05, 7.55399e-05, + 0.000124105, 0.000135882, 0.000123172, 9.24287e-05, 5.75187e-05, 3.06388e-05, 1.61334e-05, 1.19226e-05, + 1.60989e-05, 3.07427e-05, 5.80895e-05, 9.35031e-05, 0.000111651, 0.000111226, 9.24521e-05, 6.47158e-05, + 3.92962e-05, 2.26038e-05, 1.50962e-05, 1.50656e-05, 2.25841e-05, 3.95216e-05, 6.53944e-05, 9.33412e-05, + 8.20948e-05, 7.47278e-05, 5.75916e-05, 3.93971e-05, 2.60478e-05, 1.87349e-05, 1.64714e-05, 1.86803e-05, + 2.60535e-05, 3.96345e-05, 5.80816e-05, 7.51661e-05, 5.02855e-05, 4.17464e-05, 3.08259e-05, 2.28389e-05, + 1.88796e-05, 1.75667e-05, 1.75349e-05, 1.88242e-05, 2.28453e-05, 3.09536e-05, 4.19441e-05, 5.03856e-05, + 2.71912e-05, 2.09263e-05, 1.63945e-05, 1.54023e-05, 1.66987e-05, 1.76173e-05, 1.66629e-05, 1.53631e-05, + 1.63793e-05, 2.09409e-05, 2.72145e-05, 3.02911e-05, 1.51842e-05, 1.21341e-05, 1.21909e-05, 1.536e-05, + 1.88659e-05, 1.88591e-05, 1.53418e-05, 1.21654e-05, 1.21027e-05, 1.51522e-05, 1.85791e-05, 1.85939e-05, + 1.17376e-05, 1.21133e-05, 1.6317e-05, 2.27416e-05, 2.60473e-05, 2.27428e-05, 1.63092e-05, 1.20924e-05, + 1.17064e-05, 1.3702e-05, 1.50036e-05, 1.37289e-05, 1.5212e-05, 2.09085e-05, 3.07643e-05, 3.93634e-05, + 3.9352e-05, 3.0739e-05, 2.08845e-05, 1.51956e-05, 1.37543e-05, 1.41772e-05, 1.41809e-05, 1.37643e-05, + 2.73363e-05, 4.19078e-05, 5.77671e-05, 6.48861e-05, 5.76386e-05, 4.17696e-05, 2.7273e-05, 1.8719e-05, + 1.51138e-05, 1.42242e-05, 1.50954e-05, 1.87175e-05, 5.06903e-05, 7.52521e-05, 9.29473e-05, 9.27405e-05, + 7.48309e-05, 5.03866e-05, 3.04557e-05, 1.87461e-05, 1.38084e-05, 1.37863e-05, 1.87195e-05, 3.05405e-05, + 9.74791e-05, 0.000116125, 0.000115674, 9.64934e-05, 6.77646e-05, 4.10003e-05, 2.32287e-05, 1.52361e-05, + 1.52469e-05, 2.33341e-05, 4.14093e-05, 6.86136e-05, 0.00011628, 0.000124984, 0.000115344, 9.1085e-05, + 6.11957e-05, 3.58356e-05, 2.08342e-05, 1.62223e-05, 2.08966e-05, 3.61378e-05, 6.19845e-05, 9.22705e-05, + 0.000115881, 0.000115394, 0.000100902, 7.66513e-05, 5.04919e-05, 3.06011e-05, 2.07947e-05, 2.08239e-05, + 3.07759e-05, 5.10382e-05, 7.76673e-05, 0.000102004, 9.66212e-05, 9.1107e-05, 7.66648e-05, 5.73593e-05, + 3.91909e-05, 2.7176e-05, 2.31618e-05, 2.72597e-05, 3.9482e-05, 5.79912e-05, 7.75237e-05, 9.17513e-05, + 6.77797e-05, 6.12023e-05, 5.05485e-05, 3.92491e-05, 3.03813e-05, 2.56868e-05, 2.57194e-05, 3.05122e-05, + 3.95444e-05, 5.1005e-05, 6.16549e-05, 6.79727e-05, 4.09564e-05, 3.58595e-05, 3.07001e-05, 2.72819e-05, + 2.57373e-05, 2.53745e-05, 2.57973e-05, 2.74039e-05, 3.08744e-05, 3.60453e-05, 4.10811e-05, 4.31813e-05, + 2.31768e-05, 2.08653e-05, 2.08896e-05, 2.32549e-05, 2.57491e-05, 2.57757e-05, 2.33163e-05, 2.09519e-05, + 2.09061e-05, 2.31925e-05, 2.56188e-05, 2.56165e-05, 1.51848e-05, 1.62293e-05, 2.086e-05, 2.72552e-05, + 3.04246e-05, 2.72879e-05, 2.08881e-05, 1.62269e-05, 1.51542e-05, 1.64262e-05, 1.73724e-05, 1.64586e-05, + 1.51795e-05, 2.08373e-05, 3.06754e-05, 3.92841e-05, 3.92821e-05, 3.06618e-05, 2.0805e-05, 1.51334e-05, + 1.37253e-05, 1.41862e-05, 1.42066e-05, 1.37711e-05, 2.32254e-05, 3.59556e-05, 5.07331e-05, 5.75851e-05, + 5.06292e-05, 3.58222e-05, 2.31241e-05, 1.64402e-05, 1.42145e-05, 1.38656e-05, 1.42416e-05, 1.64995e-05, + 4.1257e-05, 6.16825e-05, 7.72211e-05, 7.70566e-05, 6.1331e-05, 4.09606e-05, 2.56393e-05, 1.74452e-05, + 1.42756e-05, 1.42815e-05, 1.74886e-05, 2.57862e-05, 6.84849e-05, 9.19716e-05, 0.000101609, 9.14228e-05, + 6.78223e-05, 4.319e-05, 2.56978e-05, 1.65496e-05, 1.38525e-05, 1.65716e-05, 2.5842e-05, 4.36033e-05, + 9.88706e-05, 0.000103874, 9.81678e-05, 8.28242e-05, 6.15387e-05, 4.09762e-05, 2.7294e-05, 2.27644e-05, + 2.74382e-05, 4.13846e-05, 6.23133e-05, 8.37993e-05, 0.000103912, 0.000103508, 9.67468e-05, 8.19811e-05, + 6.11222e-05, 4.16585e-05, 3.07446e-05, 3.08408e-05, 4.20176e-05, 6.18682e-05, 8.30477e-05, 9.77339e-05, + 9.82087e-05, 9.67515e-05, 9.0646e-05, 7.65218e-05, 5.73244e-05, 4.16647e-05, 3.5977e-05, 4.19404e-05, + 5.79558e-05, 7.74944e-05, 9.16853e-05, 9.74244e-05, 8.28781e-05, 8.19966e-05, 7.65239e-05, 6.44104e-05, + 5.01449e-05, 4.10289e-05, 4.11812e-05, 5.06168e-05, 6.51726e-05, 7.73944e-05, 8.26822e-05, 8.31357e-05, + 6.15942e-05, 6.11273e-05, 5.72985e-05, 5.01091e-05, 4.31518e-05, 4.04236e-05, 4.3452e-05, 5.06437e-05, + 5.79142e-05, 6.16383e-05, 6.18734e-05, 6.13421e-05, 4.09818e-05, 4.15985e-05, 4.15572e-05, 4.09014e-05, + 4.0327e-05, 4.04638e-05, 4.12472e-05, 4.19588e-05, 4.1913e-05, 4.11548e-05, 4.04129e-05, 4.03618e-05, + 2.72111e-05, 3.05815e-05, 3.57446e-05, 4.09086e-05, 4.32044e-05, 4.11038e-05, 3.59997e-05, 3.07639e-05, + 2.72845e-05, 2.572e-05, 2.53444e-05, 2.57109e-05, 2.2589e-05, 3.05561e-05, 4.15504e-05, 5.01698e-05, + 5.02356e-05, 4.16823e-05, 3.06455e-05, 2.25988e-05, 1.86818e-05, 1.74636e-05, 1.7485e-05, 1.87168e-05, + 2.71773e-05, 4.16117e-05, 5.74306e-05, 6.46101e-05, 5.74248e-05, 4.15777e-05, 2.71114e-05, 1.86239e-05, + 1.51043e-05, 1.42819e-05, 1.51561e-05, 1.86992e-05, 4.10803e-05, 6.14228e-05, 7.69694e-05, 7.68745e-05, + 6.12028e-05, 4.0856e-05, 2.55604e-05, 1.74061e-05, 1.42788e-05, 1.43076e-05, 1.74957e-05, 2.57203e-05, + 6.20656e-05, 8.27079e-05, 9.13211e-05, 8.23376e-05, 6.15819e-05, 4.02118e-05, 2.52499e-05, 1.74787e-05, + 1.52014e-05, 1.75472e-05, 2.54363e-05, 4.05728e-05, 8.36866e-05, 9.75719e-05, 9.72608e-05, 8.29743e-05, + 6.11935e-05, 4.02683e-05, 2.57052e-05, 1.87937e-05, 1.88347e-05, 2.58778e-05, 4.06853e-05, 6.18802e-05, + 9.11601e-05, 9.09426e-05, 8.97729e-05, 8.27763e-05, 6.76532e-05, 5.02583e-05, 3.93943e-05, 3.95204e-05, + 5.06554e-05, 6.83026e-05, 8.35189e-05, 9.03547e-05, 9.09245e-05, 9.34901e-05, 9.667e-05, 9.08764e-05, + 7.44473e-05, 5.75357e-05, 5.08546e-05, 5.79332e-05, 7.52097e-05, 9.18112e-05, 9.74706e-05, 9.39301e-05, + 8.97869e-05, 9.66823e-05, 0.000100631, 9.19722e-05, 7.44656e-05, 6.14208e-05, 6.16916e-05, 7.5242e-05, + 9.30476e-05, 0.000101631, 9.7321e-05, 8.99961e-05, 8.28777e-05, 9.09063e-05, 9.1938e-05, 8.14685e-05, + 6.77245e-05, 6.19228e-05, 6.8336e-05, 8.25139e-05, 9.30233e-05, 9.16709e-05, 8.32304e-05, 7.86475e-05, + 6.77727e-05, 7.43911e-05, 7.42748e-05, 6.75477e-05, 6.13202e-05, 6.16339e-05, 6.83709e-05, 7.52609e-05, + 7.51536e-05, 6.81695e-05, 6.15479e-05, 6.14371e-05, 5.02493e-05, 5.72736e-05, 6.09801e-05, 6.14777e-05, + 6.13608e-05, 6.19933e-05, 6.1736e-05, 5.7913e-05, 5.05991e-05, 4.34769e-05, 4.05652e-05, 4.33561e-05, + 3.91816e-05, 5.03478e-05, 6.09751e-05, 6.7602e-05, 6.78175e-05, 6.14664e-05, 5.08095e-05, 3.94358e-05, + 3.05471e-05, 2.58674e-05, 2.58591e-05, 3.04706e-05, 3.9116e-05, 5.72121e-05, 7.43156e-05, 8.15964e-05, + 7.45406e-05, 5.74702e-05, 3.92512e-05, 2.59651e-05, 1.87897e-05, 1.66323e-05, 1.88148e-05, 2.59517e-05, + 5.0137e-05, 7.44282e-05, 9.21484e-05, 9.21712e-05, 7.44555e-05, 5.01106e-05, 3.02757e-05, 1.86899e-05, + 1.38632e-05, 1.38928e-05, 1.87636e-05, 3.03505e-05, 6.78448e-05, 9.12319e-05, 0.000101032, 9.10847e-05, + 6.76189e-05, 4.30339e-05, 2.55919e-05, 1.65129e-05, 1.38705e-05, 1.65889e-05, 2.57461e-05, 4.32515e-05, + 8.32783e-05, 9.72226e-05, 9.70697e-05, 8.29063e-05, 6.11548e-05, 4.02162e-05, 2.56549e-05, 1.87621e-05, + 1.88117e-05, 2.58205e-05, 4.0521e-05, 6.15628e-05, 9.03043e-05, 9.39006e-05, 8.99368e-05, 7.84719e-05, + 6.11747e-05, 4.31503e-05, 3.04273e-05, 2.60732e-05, 3.05863e-05, 4.35031e-05, 6.17045e-05, 7.90349e-05}; const base_device::DEVICE_CPU* cpu_ctx = {}; const base_device::DEVICE_GPU* gpu_ctx = {}; @@ -51,122 +2567,117 @@ class TestModuleElecstateMultiDevice : public ::testing::Test using syncmem_var_d2h_op = base_device::memory::synchronize_memory_op; - void SetUp() override { + void + SetUp () override + { } - void TearDown() override { + void + TearDown () override + { } }; -TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_op_cpu) +TEST_F (TestModuleElecstateMultiDevice, elecstate_pw_op_cpu) { - std::vector rho_data(expected_rho.size(), 0); - double ** rho = new double* [1]; - rho[0] = rho_data.data(); - elecstate_cpu_op()( - this->cpu_ctx, - this->spin, this->nrxx, - this->w1, - rho, - this->wfcr.data()); - - // check the result - for (int ii = 0; ii < rho_data.size(); ii++) { - EXPECT_LT(fabs(rho_data[ii] - expected_rho[ii]), 6e-5); - } - delete [] rho; + std::vector rho_data (expected_rho.size (), 0); + double** rho = new double*[1]; + rho[0] = rho_data.data (); + elecstate_cpu_op () (this->cpu_ctx, this->spin, this->nrxx, this->w1, rho, this->wfcr.data ()); + + // check the result + for (int ii = 0; ii < rho_data.size (); ii++) + { + EXPECT_LT (fabs (rho_data[ii] - expected_rho[ii]), 6e-5); + } + delete[] rho; } -TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_spin_op_cpu) +TEST_F (TestModuleElecstateMultiDevice, elecstate_pw_spin_op_cpu) { - std::vector rho_data(expected_rho_2.size(), 0); - double ** rho = new double* [4]; - rho[0] = rho_data.data(); - rho[1] = rho_data.data() + this->nrxx; - rho[2] = rho_data.data() + this->nrxx * 2; - rho[3] = rho_data.data() + this->nrxx * 3; - elecstate_cpu_op()( - this->cpu_ctx, - this->DOMAG, - this->DOMAG_Z, - this->nrxx, - this->w1, - rho, - this->wfcr_2.data(), - this->wfcr_another_spin_2.data()); - - // check the result - for (int ii = 0; ii < rho_data.size(); ii++) { - EXPECT_LT(fabs(rho_data[ii] - expected_rho_2[ii]), 5e-4); - } - delete [] rho; + std::vector rho_data (expected_rho_2.size (), 0); + double** rho = new double*[4]; + rho[0] = rho_data.data (); + rho[1] = rho_data.data () + this->nrxx; + rho[2] = rho_data.data () + this->nrxx * 2; + rho[3] = rho_data.data () + this->nrxx * 3; + elecstate_cpu_op () (this->cpu_ctx, + this->DOMAG, + this->DOMAG_Z, + this->nrxx, + this->w1, + rho, + this->wfcr_2.data (), + this->wfcr_another_spin_2.data ()); + + // check the result + for (int ii = 0; ii < rho_data.size (); ii++) + { + EXPECT_LT (fabs (rho_data[ii] - expected_rho_2[ii]), 5e-4); + } + delete[] rho; } #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM -TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_op_gpu) +TEST_F (TestModuleElecstateMultiDevice, elecstate_pw_op_gpu) { - std::vector rho_data(expected_rho.size(), 0); + std::vector rho_data (expected_rho.size (), 0); double* d_rho_data = NULL; std::complex* d_wfcr = NULL; - resize_memory_var_op()(d_rho_data, rho_data.size()); - resize_memory_complex_op()(d_wfcr, wfcr.size()); - syncmem_var_h2d_op()(d_rho_data, rho_data.data(), rho_data.size()); - syncmem_complex_h2d_op()(d_wfcr, wfcr.data(), wfcr.size()); - double ** rho = new double* [1]; + resize_memory_var_op () (d_rho_data, rho_data.size ()); + resize_memory_complex_op () (d_wfcr, wfcr.size ()); + syncmem_var_h2d_op () (d_rho_data, rho_data.data (), rho_data.size ()); + syncmem_complex_h2d_op () (d_wfcr, wfcr.data (), wfcr.size ()); + double** rho = new double*[1]; rho[0] = d_rho_data; - elecstate_gpu_op()( - this->gpu_ctx, - this->spin, this->nrxx, - this->w1, - rho, - d_wfcr); - - syncmem_var_d2h_op()(rho_data.data(), d_rho_data, rho_data.size()); - // check the result - for (int ii = 0; ii < rho_data.size(); ii++) { - EXPECT_LT(fabs(rho_data[ii] - expected_rho[ii]), 6e-5); - } - delete [] rho; - delete_memory_var_op()(this->gpu_ctx, d_rho_data); - delete_memory_complex_op()(this->gpu_ctx, d_wfcr); + elecstate_gpu_op () (this->gpu_ctx, this->spin, this->nrxx, this->w1, rho, d_wfcr); + + syncmem_var_d2h_op () (rho_data.data (), d_rho_data, rho_data.size ()); + // check the result + for (int ii = 0; ii < rho_data.size (); ii++) + { + EXPECT_LT (fabs (rho_data[ii] - expected_rho[ii]), 6e-5); + } + delete[] rho; + delete_memory_var_op () (this->gpu_ctx, d_rho_data); + delete_memory_complex_op () (this->gpu_ctx, d_wfcr); } -TEST_F(TestModuleElecstateMultiDevice, elecstate_pw_spin_op_gpu) +TEST_F (TestModuleElecstateMultiDevice, elecstate_pw_spin_op_gpu) { - std::vector rho_data_2(expected_rho_2.size(), 0); + std::vector rho_data_2 (expected_rho_2.size (), 0); double* d_rho_data_2 = NULL; std::complex* d_wfcr_2 = NULL; std::complex* d_wfcr_another_spin_2 = NULL; - resize_memory_var_op()(d_rho_data_2, rho_data_2.size()); - resize_memory_complex_op()(d_wfcr_2, wfcr_2.size()); - resize_memory_complex_op()(d_wfcr_another_spin_2, wfcr_another_spin_2.size()); - syncmem_var_h2d_op()(d_rho_data_2, rho_data_2.data(), rho_data_2.size()); - syncmem_complex_h2d_op()(d_wfcr_2, wfcr_2.data(), wfcr_2.size()); - syncmem_complex_h2d_op()(d_wfcr_another_spin_2, wfcr_another_spin_2.data(), wfcr_another_spin_2.size()); - double ** rho = new double* [4]; + resize_memory_var_op () (d_rho_data_2, rho_data_2.size ()); + resize_memory_complex_op () (d_wfcr_2, wfcr_2.size ()); + resize_memory_complex_op () (d_wfcr_another_spin_2, wfcr_another_spin_2.size ()); + syncmem_var_h2d_op () (d_rho_data_2, rho_data_2.data (), rho_data_2.size ()); + syncmem_complex_h2d_op () (d_wfcr_2, wfcr_2.data (), wfcr_2.size ()); + syncmem_complex_h2d_op () (d_wfcr_another_spin_2, wfcr_another_spin_2.data (), wfcr_another_spin_2.size ()); + double** rho = new double*[4]; rho[0] = d_rho_data_2; rho[1] = d_rho_data_2 + this->nrxx; rho[2] = d_rho_data_2 + this->nrxx * 2; rho[3] = d_rho_data_2 + this->nrxx * 3; - elecstate_gpu_op()( - this->gpu_ctx, - this->DOMAG, - this->DOMAG_Z, - this->nrxx, - this->w1, - rho, - d_wfcr_2, - d_wfcr_another_spin_2); - - syncmem_var_d2h_op()(rho_data_2.data(), d_rho_data_2, rho_data_2.size()); - // check the result - for (int ii = 0; ii < rho_data_2.size(); ii++) { - EXPECT_LT(fabs(rho_data_2[ii] - expected_rho_2[ii]), 5e-4); - } - delete [] rho; - delete_memory_var_op()(this->gpu_ctx, d_rho_data_2); - delete_memory_complex_op()(this->gpu_ctx, d_wfcr_2); - delete_memory_complex_op()(this->gpu_ctx, d_wfcr_another_spin_2); + elecstate_gpu_op () (this->gpu_ctx, + this->DOMAG, + this->DOMAG_Z, + this->nrxx, + this->w1, + rho, + d_wfcr_2, + d_wfcr_another_spin_2); + + syncmem_var_d2h_op () (rho_data_2.data (), d_rho_data_2, rho_data_2.size ()); + // check the result + for (int ii = 0; ii < rho_data_2.size (); ii++) + { + EXPECT_LT (fabs (rho_data_2[ii] - expected_rho_2[ii]), 5e-4); + } + delete[] rho; + delete_memory_var_op () (this->gpu_ctx, d_rho_data_2); + delete_memory_complex_op () (this->gpu_ctx, d_wfcr_2); + delete_memory_complex_op () (this->gpu_ctx, d_wfcr_another_spin_2); } #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM - diff --git a/source/source_estate/magnetism.cpp b/source/source_estate/magnetism.cpp index a31bec1f796..1d31b8fcf66 100644 --- a/source/source_estate/magnetism.cpp +++ b/source/source_estate/magnetism.cpp @@ -2,120 +2,120 @@ #include "source_base/parallel_reduce.h" #include "source_io/module_parameter/parameter.h" -//#include "source_estate/module_charge/charge.h" +// #include "source_estate/module_charge/charge.h" -Magnetism::Magnetism() +Magnetism::Magnetism () { tot_mag = 0.0; abs_mag = 0.0; - std::fill(tot_mag_nc, tot_mag_nc + 3, 0.0); - std::fill(ux_, ux_ + 3, 0.0); + std::fill (tot_mag_nc, tot_mag_nc + 3, 0.0); + std::fill (ux_, ux_ + 3, 0.0); } -Magnetism::~Magnetism() -{ - delete[] start_mag; -} +Magnetism::~Magnetism () { delete[] start_mag; } -void Magnetism::compute_mag(const double& omega, - const int& nrxx, - const int& nxyz, - const double* const * rho, - double* nelec_spin) +void + Magnetism::compute_mag (const double& omega, + const int& nrxx, + const int& nxyz, + const double* const* rho, + double* nelec_spin) { - assert(omega>0.0); - assert(nxyz>0); + assert (omega > 0.0); + assert (nxyz > 0); const double fac = omega / nxyz; - if (PARAM.inp.nspin==2) - { - this->tot_mag = 0.00; - this->abs_mag = 0.00; - - for (int ir=0; irtot_mag += diff; - this->abs_mag += std::abs(diff); - } + this->tot_mag = 0.00; + this->abs_mag = 0.00; + + for (int ir = 0; ir < nrxx; ir++) + { + double diff = rho[0][ir] - rho[1][ir]; + this->tot_mag += diff; + this->abs_mag += std::abs (diff); + } #ifdef __MPI - Parallel_Reduce::reduce_pool(this->tot_mag); - Parallel_Reduce::reduce_pool(this->abs_mag); + Parallel_Reduce::reduce_pool (this->tot_mag); + Parallel_Reduce::reduce_pool (this->abs_mag); #endif - this->tot_mag *= fac; - this->abs_mag *= fac; - - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Total magnetism (Bohr mag/cell)",this->tot_mag); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Absolute magnetism (Bohr mag/cell)",this->abs_mag); - - //update number of electrons for each spin - //if TWO_EFERMI, no need to update - if(!PARAM.globalv.two_fermi) - { - nelec_spin[0] = (PARAM.inp.nelec + this->tot_mag) / 2; - nelec_spin[1] = (PARAM.inp.nelec - this->tot_mag) / 2; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Electron number for spin up", nelec_spin[0]); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Electron number for spin down", nelec_spin[1]); - } - } - - // noncolliear : - else if(PARAM.inp.nspin==4) - { - for(int i=0;i<3;i++) - { - this->tot_mag_nc[i] = 0.00; - } - - this->abs_mag = 0.00; - for (int ir=0; irtot_mag_nc[i] += rho[i+1][ir]; - } - this->abs_mag += std::abs(diff); - } + this->tot_mag *= fac; + this->abs_mag *= fac; + + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Total magnetism (Bohr mag/cell)", this->tot_mag); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Absolute magnetism (Bohr mag/cell)", this->abs_mag); + + // update number of electrons for each spin + // if TWO_EFERMI, no need to update + if (!PARAM.globalv.two_fermi) + { + nelec_spin[0] = (PARAM.inp.nelec + this->tot_mag) / 2; + nelec_spin[1] = (PARAM.inp.nelec - this->tot_mag) / 2; + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Electron number for spin up", nelec_spin[0]); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Electron number for spin down", nelec_spin[1]); + } + } + + // noncolliear : + else if (PARAM.inp.nspin == 4) + { + for (int i = 0; i < 3; i++) + { + this->tot_mag_nc[i] = 0.00; + } + + this->abs_mag = 0.00; + for (int ir = 0; ir < nrxx; ir++) + { + double diff = sqrt (pow (rho[1][ir], 2) + pow (rho[2][ir], 2) + pow (rho[3][ir], 2)); + + for (int i = 0; i < 3; i++) + { + this->tot_mag_nc[i] += rho[i + 1][ir]; + } + this->abs_mag += std::abs (diff); + } #ifdef __MPI - Parallel_Reduce::reduce_pool(this->tot_mag_nc, 3); - Parallel_Reduce::reduce_pool(this->abs_mag); + Parallel_Reduce::reduce_pool (this->tot_mag_nc, 3); + Parallel_Reduce::reduce_pool (this->abs_mag); #endif - for(int i=0;i<3;i++) - { - this->tot_mag_nc[i] *= fac; - // mohan add 2025-06-21 - if( std::abs(this->tot_mag_nc[i]) < 1.0e-16) - { - this->tot_mag_nc[i] = 0.0; - } - } - - this->abs_mag *= fac; - - // mohan update 2025-06-21 - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Total magnetism (Bohr mag/cell)", - this->tot_mag_nc[0], this->tot_mag_nc[1], this->tot_mag_nc[2]); - - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Absolute magnetism (Bohr mag/cell)",this->abs_mag); - } + for (int i = 0; i < 3; i++) + { + this->tot_mag_nc[i] *= fac; + // mohan add 2025-06-21 + if (std::abs (this->tot_mag_nc[i]) < 1.0e-16) + { + this->tot_mag_nc[i] = 0.0; + } + } + + this->abs_mag *= fac; + + // mohan update 2025-06-21 + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, + "Total magnetism (Bohr mag/cell)", + this->tot_mag_nc[0], + this->tot_mag_nc[1], + this->tot_mag_nc[2]); + + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Absolute magnetism (Bohr mag/cell)", this->abs_mag); + } return; } - -bool Magnetism::judge_parallel(const double a[3], const ModuleBase::Vector3 &b) +bool + Magnetism::judge_parallel (const double a[3], const ModuleBase::Vector3& b) { - bool jp=false; + bool jp = false; - double cross=0.0; + double cross = 0.0; - cross = pow((a[1]*b.z-a[2]*b.y),2) - + pow((a[2]*b.x-a[0]*b.z),2) - + pow((a[0]*b.y-a[1]*b.x),2); + cross + = pow ((a[1] * b.z - a[2] * b.y), 2) + pow ((a[2] * b.x - a[0] * b.z), 2) + pow ((a[0] * b.y - a[1] * b.x), 2); - jp = (fabs(cross)<1e-6); - return jp; + jp = (fabs (cross) < 1e-6); + return jp; } diff --git a/source/source_estate/magnetism.h b/source/source_estate/magnetism.h index b2cee2d6a95..eeea6a1b1a6 100644 --- a/source/source_estate/magnetism.h +++ b/source/source_estate/magnetism.h @@ -6,41 +6,39 @@ class Magnetism { -public: + public: // constructor and deconstructor - Magnetism(); - ~Magnetism(); + Magnetism (); + ~Magnetism (); // notice : bcast (MPI operation) is done in unitcell - double *start_mag=nullptr; + double* start_mag = nullptr; // tot_mag : majority spin - minority spin (nelup - neldw). double tot_mag; - double tot_mag_nc[3]={0.0}; + double tot_mag_nc[3] = {0.0}; double abs_mag; - void compute_mag(const double& omega, - const int& nrxx, - const int& nxyz, - const double* const * rho, - double* nelec_spin = nullptr); + void compute_mag (const double& omega, + const int& nrxx, + const int& nxyz, + const double* const* rho, + double* nelec_spin = nullptr); - ModuleBase::Vector3 *m_loc_=nullptr; //magnetization for each element along c-axis + ModuleBase::Vector3* m_loc_ = nullptr; // magnetization for each element along c-axis - double *angle1_=nullptr; //angle between c-axis and real spin std::vector + double* angle1_ = nullptr; // angle between c-axis and real spin std::vector - double *angle2_=nullptr; //angle between a-axis and real spin std::vector projection in ab-plane + double* angle2_ = nullptr; // angle between a-axis and real spin std::vector projection in ab-plane - double ux_[3]={0.0}; + double ux_[3] = {0.0}; - bool lsign_=false; - -private: - - bool judge_parallel(const double a[3], const ModuleBase::Vector3 &b); + bool lsign_ = false; + private: + bool judge_parallel (const double a[3], const ModuleBase::Vector3& b); }; /* diff --git a/source/source_estate/math_tools.h b/source/source_estate/math_tools.h index 049f9a4e8e9..4130b355d23 100644 --- a/source/source_estate/math_tools.h +++ b/source/source_estate/math_tools.h @@ -6,119 +6,123 @@ #include "source_base/timer.h" #ifdef __MPI -inline void psiMulPsiMpi(const psi::Psi& psi1, - const psi::Psi& psi2, - ModuleBase::matrix& dm_out, - const int* desc_psi, - const int* desc_dm) +inline void + psiMulPsiMpi (const psi::Psi& psi1, + const psi::Psi& psi2, + ModuleBase::matrix& dm_out, + const int* desc_psi, + const int* desc_dm) { - ModuleBase::timer::start("psiMulPsiMpi","pdgemm"); + ModuleBase::timer::start ("psiMulPsiMpi", "pdgemm"); const double one_float = 1.0, zero_float = 0.0; const int one_int = 1; const char N_char = 'N', T_char = 'T'; const int nlocal = desc_dm[2]; const int nbands = desc_psi[3]; - pdgemm_(&N_char, - &T_char, - &nlocal, - &nlocal, - &nbands, - &one_float, - psi1.get_pointer(), - &one_int, - &one_int, - desc_psi, - psi2.get_pointer(), - &one_int, - &one_int, - desc_psi, - &zero_float, - dm_out.c, - &one_int, - &one_int, - desc_dm); - ModuleBase::timer::end("psiMulPsiMpi","pdgemm"); + pdgemm_ (&N_char, + &T_char, + &nlocal, + &nlocal, + &nbands, + &one_float, + psi1.get_pointer (), + &one_int, + &one_int, + desc_psi, + psi2.get_pointer (), + &one_int, + &one_int, + desc_psi, + &zero_float, + dm_out.c, + &one_int, + &one_int, + desc_dm); + ModuleBase::timer::end ("psiMulPsiMpi", "pdgemm"); } -inline void psiMulPsiMpi(const psi::Psi>& psi1, - const psi::Psi>& psi2, - ModuleBase::ComplexMatrix& dm_out, - const int* desc_psi, - const int* desc_dm) +inline void + psiMulPsiMpi (const psi::Psi>& psi1, + const psi::Psi>& psi2, + ModuleBase::ComplexMatrix& dm_out, + const int* desc_psi, + const int* desc_dm) { - ModuleBase::timer::start("psiMulPsiMpi","pdgemm"); + ModuleBase::timer::start ("psiMulPsiMpi", "pdgemm"); const std::complex one_complex = {1.0, 0.0}, zero_complex = {0.0, 0.0}; const int one_int = 1; const char N_char = 'N', T_char = 'T'; const int nlocal = desc_dm[2]; const int nbands = desc_psi[3]; - pzgemm_(&N_char, - &T_char, - &nlocal, - &nlocal, - &nbands, - &one_complex, - psi1.get_pointer(), - &one_int, - &one_int, - desc_psi, - psi2.get_pointer(), - &one_int, - &one_int, - desc_psi, - &zero_complex, - dm_out.c, - &one_int, - &one_int, - desc_dm); - ModuleBase::timer::end("psiMulPsiMpi","pdgemm"); + pzgemm_ (&N_char, + &T_char, + &nlocal, + &nlocal, + &nbands, + &one_complex, + psi1.get_pointer (), + &one_int, + &one_int, + desc_psi, + psi2.get_pointer (), + &one_int, + &one_int, + desc_psi, + &zero_complex, + dm_out.c, + &one_int, + &one_int, + desc_dm); + ModuleBase::timer::end ("psiMulPsiMpi", "pdgemm"); } #else -inline void psiMulPsi(const psi::Psi& psi1, const psi::Psi& psi2, ModuleBase::matrix& dm_out) +inline void + psiMulPsi (const psi::Psi& psi1, const psi::Psi& psi2, ModuleBase::matrix& dm_out) { const double one_float = 1.0, zero_float = 0.0; const int one_int = 1; const char N_char = 'N', T_char = 'T'; - const int nlocal = psi1.get_nbasis(); - const int nbands = psi1.get_nbands(); - dgemm_(&N_char, - &T_char, - &nlocal, - &nlocal, - &nbands, - &one_float, - psi1.get_pointer(), - &nlocal, - psi2.get_pointer(), - &nlocal, - &zero_float, - dm_out.c, - &nlocal); + const int nlocal = psi1.get_nbasis (); + const int nbands = psi1.get_nbands (); + dgemm_ (&N_char, + &T_char, + &nlocal, + &nlocal, + &nbands, + &one_float, + psi1.get_pointer (), + &nlocal, + psi2.get_pointer (), + &nlocal, + &zero_float, + dm_out.c, + &nlocal); } -inline void psiMulPsi(const psi::Psi>& psi1, - const psi::Psi>& psi2, - ModuleBase::ComplexMatrix& dm_out) +inline void + psiMulPsi (const psi::Psi>& psi1, + const psi::Psi>& psi2, + ModuleBase::ComplexMatrix& dm_out) { const int one_int = 1; const char N_char = 'N', T_char = 'T'; - const int nlocal = psi1.get_nbasis(); - const int nbands = psi1.get_nbands(); + const int nlocal = psi1.get_nbasis (); + const int nbands = psi1.get_nbands (); const std::complex one_complex = {1.0, 0.0}, zero_complex = {0.0, 0.0}; - zgemm_(&N_char, - &T_char, - &nlocal, - &nlocal, - &nbands, - &one_complex, - psi1.get_pointer(), - &nlocal, - psi2.get_pointer(), - &nlocal, - &zero_complex, - dm_out.c, - &nlocal); + zgemm_ (&N_char, + &T_char, + &nlocal, + &nlocal, + &nbands, + &one_complex, + psi1.get_pointer (), + &nlocal, + psi2.get_pointer (), + &nlocal, + &zero_complex, + dm_out.c, + &nlocal); } #endif \ No newline at end of file diff --git a/source/source_estate/module_charge/charge.cpp b/source/source_estate/module_charge/charge.cpp index 62a9e702f90..0f8326ff8c2 100644 --- a/source/source_estate/module_charge/charge.cpp +++ b/source/source_estate/module_charge/charge.cpp @@ -33,207 +33,212 @@ #include -Charge::Charge() +Charge::Charge () { allocate_rho = false; allocate_rho_final_scf = false; // LiuXh add 20180619 } -Charge::~Charge() +Charge::~Charge () { - this->destroy(); + this->destroy (); #ifdef __MPI delete[] rec; delete[] dis; #endif } -void Charge::set_rhopw(ModulePW::PW_Basis* rhopw_in) +void + Charge::set_rhopw (ModulePW::PW_Basis* rhopw_in) { this->rhopw = rhopw_in; } // mohan add 2025-12-02 -bool Charge::kin_density() +bool + Charge::kin_density () { - if (XC_Functional::get_ked_flag() || PARAM.inp.out_elf[0] > 0) - { - return true; - } - else - { - return false; - } + if (XC_Functional::get_ked_flag () || PARAM.inp.out_elf[0] > 0) + { + return true; + } + else + { + return false; + } } -void Charge::destroy() +void + Charge::destroy () { if (allocate_rho || allocate_rho_final_scf) // LiuXh add 20180619 - { - delete[] rho; - delete[] rhog; - delete[] rho_save; - delete[] rhog_save; - delete[] rho_core; - delete[] rhog_core; - delete[] _space_rho; - delete[] _space_rho_save; - delete[] _space_rhog; - delete[] _space_rhog_save; - delete[] _space_kin_r; - delete[] _space_kin_r_save; - if (XC_Functional::get_ked_flag() || PARAM.inp.out_elf[0] > 0) { - delete[] kin_r; - delete[] kin_r_save; + delete[] rho; + delete[] rhog; + delete[] rho_save; + delete[] rhog_save; + delete[] rho_core; + delete[] rhog_core; + delete[] _space_rho; + delete[] _space_rho_save; + delete[] _space_rhog; + delete[] _space_rhog_save; + delete[] _space_kin_r; + delete[] _space_kin_r_save; + if (XC_Functional::get_ked_flag () || PARAM.inp.out_elf[0] > 0) + { + delete[] kin_r; + delete[] kin_r_save; + } } - } } -void Charge::allocate(const int& nspin_in, const bool kin_den) +void + Charge::allocate (const int& nspin_in, const bool kin_den) { - ModuleBase::TITLE("Charge", "allocate"); + ModuleBase::TITLE ("Charge", "allocate"); - if (this->rhopw == nullptr) - { - ModuleBase::WARNING_QUIT("Charge::allocate","rhopw is nullptr."); - } + if (this->rhopw == nullptr) + { + ModuleBase::WARNING_QUIT ("Charge::allocate", "rhopw is nullptr."); + } this->nrxx = this->rhopw->nrxx; this->nxyz = this->rhopw->nxyz; this->ngmc = this->rhopw->npw; - if (allocate_rho == true) - { - this->destroy(); - allocate_rho = false; - } + { + this->destroy (); + allocate_rho = false; + } - assert(allocate_rho == false); + assert (allocate_rho == false); // mohan add 2021-02-20 this->nspin = nspin_in; if (PARAM.inp.test_charge > 1) - { - std::cout << "\n spin_number = " << nspin << " real_point_number = " << nrxx << std::endl; - } + { + std::cout << "\n spin_number = " << nspin << " real_point_number = " << nrxx << std::endl; + } // allocate memory _space_rho = new double[nspin * nrxx]; _space_rho_save = new double[nspin * nrxx]; _space_rhog = new std::complex[nspin * ngmc]; _space_rhog_save = new std::complex[nspin * ngmc]; - if(kin_den) - { - _space_kin_r = new double[nspin * nrxx]; - _space_kin_r_save = new double[nspin * nrxx]; - } + if (kin_den) + { + _space_kin_r = new double[nspin * nrxx]; + _space_kin_r_save = new double[nspin * nrxx]; + } rho = new double*[nspin]; rhog = new std::complex*[nspin]; rho_save = new double*[nspin]; rhog_save = new std::complex*[nspin]; - if(kin_den) - { - kin_r = new double*[nspin]; - kin_r_save = new double*[nspin]; - } + if (kin_den) + { + kin_r = new double*[nspin]; + kin_r_save = new double*[nspin]; + } for (int is = 0; is < nspin; is++) - { - rho[is] = _space_rho + is * nrxx; - rhog[is] = _space_rhog + is * ngmc; - rho_save[is] = _space_rho_save + is * nrxx; - rhog_save[is] = _space_rhog_save + is * ngmc; - ModuleBase::GlobalFunc::ZEROS(rho[is], nrxx); - ModuleBase::GlobalFunc::ZEROS(rhog[is], ngmc); - ModuleBase::GlobalFunc::ZEROS(rho_save[is], nrxx); - ModuleBase::GlobalFunc::ZEROS(rhog_save[is], ngmc); - if(kin_den) { - kin_r[is] = _space_kin_r + is * nrxx; - ModuleBase::GlobalFunc::ZEROS(kin_r[is], nrxx); - kin_r_save[is] = _space_kin_r_save + is * nrxx; - ModuleBase::GlobalFunc::ZEROS(kin_r_save[is], nrxx); + rho[is] = _space_rho + is * nrxx; + rhog[is] = _space_rhog + is * ngmc; + rho_save[is] = _space_rho_save + is * nrxx; + rhog_save[is] = _space_rhog_save + is * ngmc; + ModuleBase::GlobalFunc::ZEROS (rho[is], nrxx); + ModuleBase::GlobalFunc::ZEROS (rhog[is], ngmc); + ModuleBase::GlobalFunc::ZEROS (rho_save[is], nrxx); + ModuleBase::GlobalFunc::ZEROS (rhog_save[is], ngmc); + if (kin_den) + { + kin_r[is] = _space_kin_r + is * nrxx; + ModuleBase::GlobalFunc::ZEROS (kin_r[is], nrxx); + kin_r_save[is] = _space_kin_r_save + is * nrxx; + ModuleBase::GlobalFunc::ZEROS (kin_r_save[is], nrxx); + } } - } - ModuleBase::Memory::record("Chg::rho", sizeof(double) * nspin * nrxx); - ModuleBase::Memory::record("Chg::rho_save", sizeof(double) * nspin * nrxx); - ModuleBase::Memory::record("Chg::rhog", sizeof(double) * nspin * ngmc); - ModuleBase::Memory::record("Chg::rhog_save", sizeof(double) * nspin * ngmc); - if(kin_den) - { - ModuleBase::Memory::record("Chg::kin_r", sizeof(double) * nspin * ngmc); - ModuleBase::Memory::record("Chg::kin_r_save", sizeof(double) * nspin * ngmc); - } + ModuleBase::Memory::record ("Chg::rho", sizeof (double) * nspin * nrxx); + ModuleBase::Memory::record ("Chg::rho_save", sizeof (double) * nspin * nrxx); + ModuleBase::Memory::record ("Chg::rhog", sizeof (double) * nspin * ngmc); + ModuleBase::Memory::record ("Chg::rhog_save", sizeof (double) * nspin * ngmc); + if (kin_den) + { + ModuleBase::Memory::record ("Chg::kin_r", sizeof (double) * nspin * ngmc); + ModuleBase::Memory::record ("Chg::kin_r_save", sizeof (double) * nspin * ngmc); + } this->rho_core = new double[nrxx]; // core charge in real space - ModuleBase::GlobalFunc::ZEROS(rho_core, nrxx); + ModuleBase::GlobalFunc::ZEROS (rho_core, nrxx); this->rhog_core = new std::complex[ngmc]; // reciprocal core charge - ModuleBase::GlobalFunc::ZEROS(rhog_core, ngmc); + ModuleBase::GlobalFunc::ZEROS (rhog_core, ngmc); - ModuleBase::Memory::record("Chg::rho_core", sizeof(double) * nrxx); - ModuleBase::Memory::record("Chg::rhog_core", sizeof(double) * ngmc); + ModuleBase::Memory::record ("Chg::rho_core", sizeof (double) * nrxx); + ModuleBase::Memory::record ("Chg::rhog_core", sizeof (double) * ngmc); this->allocate_rho = true; return; } -double Charge::sum_rho() const +double + Charge::sum_rho () const { - ModuleBase::TITLE("Charge", "sum_rho"); + ModuleBase::TITLE ("Charge", "sum_rho"); double sum_rho = 0.0; int nspin0 = (nspin == 2) ? 2 : 1; for (int is = 0; is < nspin0; is++) - { - for (int ir = 0; ir < nrxx; ir++) { - sum_rho += this->rho[is][ir]; + for (int ir = 0; ir < nrxx; ir++) + { + sum_rho += this->rho[is][ir]; + } } - } // multiply the sum of charge density by a factor - sum_rho *= *this->omega_ / static_cast(this->rhopw->nxyz); + sum_rho *= *this->omega_ / static_cast (this->rhopw->nxyz); #ifdef __MPI - Parallel_Reduce::reduce_pool(sum_rho); + Parallel_Reduce::reduce_pool (sum_rho); #endif // mohan fixed bug 2010-01-18, // sum_rho may be smaller than 1, like Na bcc. if (sum_rho <= 0.1) - { - GlobalV::ofs_warning << " sum_rho=" << sum_rho << std::endl; - ModuleBase::WARNING_QUIT("Charge::renormalize_rho", "Can't find even an electron!"); - } + { + GlobalV::ofs_warning << " sum_rho=" << sum_rho << std::endl; + ModuleBase::WARNING_QUIT ("Charge::renormalize_rho", "Can't find even an electron!"); + } return sum_rho; } -void Charge::renormalize_rho() +void + Charge::renormalize_rho () { - ModuleBase::TITLE("Charge", "renormalize_rho"); + ModuleBase::TITLE ("Charge", "renormalize_rho"); - const double sr = this->sum_rho(); - GlobalV::ofs_warning << std::setprecision(15); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "charge before normalized", sr); + const double sr = this->sum_rho (); + GlobalV::ofs_warning << std::setprecision (15); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_warning, "charge before normalized", sr); const double normalize_factor = PARAM.inp.nelec / sr; for (int is = 0; is < nspin; is++) - { - for (int ir = 0; ir < nrxx; ir++) { - rho[is][ir] *= normalize_factor; + for (int ir = 0; ir < nrxx; ir++) + { + rho[is][ir] *= normalize_factor; + } } - } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "charge after normalized", this->sum_rho()); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_warning, "charge after normalized", this->sum_rho ()); - GlobalV::ofs_running << std::setprecision(6); + GlobalV::ofs_running << std::setprecision (6); return; } @@ -242,478 +247,544 @@ void Charge::renormalize_rho() // rho_at (read from pseudopotential files) // allocate work space (psic must already be allocated) //------------------------------------------------------- -void Charge::atomic_rho(const int spin_number_need, +void + Charge::atomic_rho (const int spin_number_need, const double& omega, double** rho_in, const ModuleBase::ComplexMatrix& strucFac, const UnitCell& ucell) const // Peize Lin refactor 2021.04.08 { - ModuleBase::TITLE("Charge", "atomic_rho"); - ModuleBase::timer::start("Charge", "atomic_rho"); + ModuleBase::TITLE ("Charge", "atomic_rho"); + ModuleBase::timer::start ("Charge", "atomic_rho"); { - ModuleBase::ComplexMatrix rho_g3d = [&]() -> ModuleBase::ComplexMatrix - { - // use interpolation to get three dimension charge density. - ModuleBase::ComplexMatrix rho_g3d(spin_number_need, this->rhopw->npw); - - for (int it = 0; it < ucell.ntype; it++) + ModuleBase::ComplexMatrix rho_g3d = [&] () -> ModuleBase::ComplexMatrix { - // check the start magnetization - const int startmag_type = [&]() -> int { - if (ucell.magnet.start_mag[it] != 0.0) - { - return 1; - } - return 2; - }(); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "startmag_type", startmag_type); - - const Atom* const atom = &ucell.atoms[it]; + // use interpolation to get three dimension charge density. + ModuleBase::ComplexMatrix rho_g3d (spin_number_need, this->rhopw->npw); - if (!atom->flag_empty_element) // Peize Lin add for bsse 2021.04.07 - { - const std::vector rho_lgl = [&]() -> std::vector { - // one dimension of charge in G space. - std::vector rho_lgl(this->rhopw->ngg, 0); - - // mesh point of this element. - const int mesh = atom->ncpp.msh; - - //---------------------------------------------------------- - // Here we check the electron number - //---------------------------------------------------------- - const std::vector rhoatm = [&]() -> std::vector { - std::vector rhoatm(mesh); - // this is only one part of the charge density for uspp - // liuyu 2023-11-01 - if (atom->ncpp.tvanp) - { - for (int ir = 0; ir < mesh; ++ir) - { - rhoatm[ir] = atom->ncpp.rho_at[ir]; - } - } - else + for (int it = 0; it < ucell.ntype; it++) + { + // check the start magnetization + const int startmag_type = [&] () -> int { - for (int ir = 0; ir < mesh; ++ir) - { - double r2 = atom->ncpp.r[ir] * atom->ncpp.r[ir]; - if (r2!=0) + if (ucell.magnet.start_mag[it] != 0.0) { - rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2; + return 1; } - } - rhoatm[0] - = pow((rhoatm[2] / rhoatm[1]), atom->ncpp.r[1] / (atom->ncpp.r[2] - atom->ncpp.r[1])); // zws add, sunliang updated 2024-03-04 - if (rhoatm[0] < 1e-12) - { - rhoatm[0] = rhoatm[1]; - } - else - { - rhoatm[0] = rhoatm[1] / rhoatm[0]; - } - - double charge = 0.0; - ModuleBase::Integral::Simpson_Integral(atom->ncpp.msh, - atom->ncpp.rho_at.data(), - atom->ncpp.rab.data(), - charge); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "charge from rho_at", charge); - assert(charge != 0.0 - || charge - == atom->ncpp.zv); // Peize Lin add charge==atom->zv for bsse 2021.04.07 - - double scale = 1.0; - if (charge != atom->ncpp.zv) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, - "charge should be", - atom->ncpp.zv); - scale = atom->ncpp.zv / charge; - } - - for (int ir = 0; ir < mesh; ++ir) - { - rhoatm[ir] *= scale; - rhoatm[ir] *= (ModuleBase::FOUR_PI * atom->ncpp.r[ir] * atom->ncpp.r[ir]); - } - } - return rhoatm; - }(); - - assert(ucell.meshx > 0); - //---------------------------------------------------------- - // Here we compute the G=0 term - //---------------------------------------------------------- - int gstart = 0; - if (this->rhopw->gg_uniq[0] < 1e-8) - { - std::vector rho1d(ucell.meshx); - for (int ir = 0; ir < mesh; ir++) - { - rho1d[ir] = rhoatm[ir]; - } - ModuleBase::Integral::Simpson_Integral(mesh, rho1d.data(), atom->ncpp.rab.data(), rho_lgl[0]); - gstart = 1; - } - if (PARAM.inp.test_charge > 0) - std::cout << "\n |G|=0 term done." << std::endl; - //---------------------------------------------------------- - // Here we compute the G<>0 term - // But if in parallel case - // G=0 term only belong to 1 cpu. - // Other processors start from '0' - //---------------------------------------------------------- - #ifdef _OPENMP - #pragma omp parallel - { - #endif - std::vector rho1d(ucell.meshx); - - #ifdef _OPENMP - #pragma omp for - #endif - for (int igg = gstart; igg < this->rhopw->ngg; ++igg) + return 2; + }(); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_warning, "startmag_type", startmag_type); + + const Atom* const atom = &ucell.atoms[it]; + + if (!atom->flag_empty_element) // Peize Lin add for bsse 2021.04.07 { - const double gx = sqrt(this->rhopw->gg_uniq[igg]) * ucell.tpiba; - for (int ir = 0; ir < mesh; ir++) - { - if (atom->ncpp.r[ir] < 1.0e-8) + const std::vector rho_lgl = [&] () -> std::vector { - rho1d[ir] = rhoatm[ir]; - } - else + // one dimension of charge in G space. + std::vector rho_lgl (this->rhopw->ngg, 0); + + // mesh point of this element. + const int mesh = atom->ncpp.msh; + + //---------------------------------------------------------- + // Here we check the electron number + //---------------------------------------------------------- + const std::vector rhoatm = [&] () -> std::vector + { + std::vector rhoatm (mesh); + // this is only one part of the charge density for uspp + // liuyu 2023-11-01 + if (atom->ncpp.tvanp) + { + for (int ir = 0; ir < mesh; ++ir) + { + rhoatm[ir] = atom->ncpp.rho_at[ir]; + } + } + else + { + for (int ir = 0; ir < mesh; ++ir) + { + double r2 = atom->ncpp.r[ir] * atom->ncpp.r[ir]; + if (r2 != 0) + { + rhoatm[ir] = atom->ncpp.rho_at[ir] + / ModuleBase::FOUR_PI / r2; + } + } + rhoatm[0] + = pow ((rhoatm[2] / rhoatm[1]), + atom->ncpp.r[1] + / (atom->ncpp.r[2] + - atom->ncpp.r[1])); // zws add, sunliang + // updated 2024-03-04 + if (rhoatm[0] < 1e-12) + { + rhoatm[0] = rhoatm[1]; + } + else + { + rhoatm[0] = rhoatm[1] / rhoatm[0]; + } + + double charge = 0.0; + ModuleBase::Integral::Simpson_Integral ( + atom->ncpp.msh, + atom->ncpp.rho_at.data (), + atom->ncpp.rab.data (), + charge); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_warning, + "charge from rho_at", + charge); + assert ( + charge != 0.0 + || charge + == atom->ncpp.zv); // Peize Lin add charge==atom->zv + // for bsse 2021.04.07 + + double scale = 1.0; + if (charge != atom->ncpp.zv) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_warning, + "charge should be", + atom->ncpp.zv); + scale = atom->ncpp.zv / charge; + } + + for (int ir = 0; ir < mesh; ++ir) + { + rhoatm[ir] *= scale; + rhoatm[ir] *= (ModuleBase::FOUR_PI * atom->ncpp.r[ir] + * atom->ncpp.r[ir]); + } + } + return rhoatm; + }(); + + assert (ucell.meshx > 0); + //---------------------------------------------------------- + // Here we compute the G=0 term + //---------------------------------------------------------- + int gstart = 0; + if (this->rhopw->gg_uniq[0] < 1e-8) + { + std::vector rho1d (ucell.meshx); + for (int ir = 0; ir < mesh; ir++) + { + rho1d[ir] = rhoatm[ir]; + } + ModuleBase::Integral::Simpson_Integral (mesh, + rho1d.data (), + atom->ncpp.rab.data (), + rho_lgl[0]); + gstart = 1; + } + if (PARAM.inp.test_charge > 0) + { + std::cout << "\n |G|=0 term done." << std::endl; + } + //---------------------------------------------------------- + // Here we compute the G<>0 term + // But if in parallel case + // G=0 term only belong to 1 cpu. + // Other processors start from '0' + //---------------------------------------------------------- +#ifdef _OPENMP +#pragma omp parallel + { +#endif + std::vector rho1d (ucell.meshx); + +#ifdef _OPENMP +#pragma omp for +#endif + for (int igg = gstart; igg < this->rhopw->ngg; ++igg) + { + const double gx = sqrt (this->rhopw->gg_uniq[igg]) * ucell.tpiba; + for (int ir = 0; ir < mesh; ir++) + { + if (atom->ncpp.r[ir] < 1.0e-8) + { + rho1d[ir] = rhoatm[ir]; + } + else + { + const double gxx = gx * atom->ncpp.r[ir]; + rho1d[ir] = rhoatm[ir] * ModuleBase::libm::sin (gxx) + / gxx; + } + } + ModuleBase::Integral::Simpson_Integral (mesh, + rho1d.data (), + atom->ncpp.rab.data (), + rho_lgl[igg]); + } +#ifdef _OPENMP +#pragma omp single +#endif + { + if (PARAM.inp.test_charge > 0) + { + std::cout << " |G|>0 term done." << std::endl; + } + } + //---------------------------------------------------------- + // EXPLAIN : Complete the transfer of rho from real space to + // reciprocal space + //---------------------------------------------------------- +#ifdef _OPENMP +#pragma omp for +#endif + for (int igg = 0; igg < this->rhopw->ngg; igg++) + { + rho_lgl[igg] /= omega; + } +#ifdef _OPENMP + } +#endif + return rho_lgl; + }(); + //---------------------------------------------------------- + // EXPLAIN : compute the 3D atomic charge in reciprocal space + //---------------------------------------------------------- + if (spin_number_need == 1) { - const double gxx = gx * atom->ncpp.r[ir]; - rho1d[ir] = rhoatm[ir] * ModuleBase::libm::sin(gxx) / gxx; +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + rho_g3d (0, ig) += strucFac (it, ig) * rho_lgl[this->rhopw->ig2igg[ig]]; + } } - } - ModuleBase::Integral::Simpson_Integral(mesh, rho1d.data(), atom->ncpp.rab.data(), rho_lgl[igg]); - } - #ifdef _OPENMP - #pragma omp single - #endif - { - if (PARAM.inp.test_charge > 0) - std::cout << " |G|>0 term done." << std::endl; - } - //---------------------------------------------------------- - // EXPLAIN : Complete the transfer of rho from real space to - // reciprocal space - //---------------------------------------------------------- - #ifdef _OPENMP - #pragma omp for - #endif - for (int igg = 0; igg < this->rhopw->ngg; igg++) - { - rho_lgl[igg] /= omega; - } - #ifdef _OPENMP - } - #endif - return rho_lgl; - }(); - //---------------------------------------------------------- - // EXPLAIN : compute the 3D atomic charge in reciprocal space - //---------------------------------------------------------- - if (spin_number_need == 1) - { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - rho_g3d(0, ig) += strucFac(it, ig) * rho_lgl[this->rhopw->ig2igg[ig]]; - } - } - // mohan add 2011-06-14, initialize the charge density according to each atom - else if (spin_number_need == 2) - { - if (startmag_type == 1) - { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - const std::complex swap = strucFac(it, ig) * rho_lgl[this->rhopw->ig2igg[ig]]; - const double up = 0.5 * (1 + ucell.magnet.start_mag[it] / atom->ncpp.zv); - const double dw = 0.5 * (1 - ucell.magnet.start_mag[it] / atom->ncpp.zv); - rho_g3d(0, ig) += swap * up; - rho_g3d(1, ig) += swap * dw; - } - } - // mohan add 2011-06-14 - else if (startmag_type == 2) - { - std::complex ci_tpi = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI; - for (int ia = 0; ia < atom->na; ia++) - { - // const double up = 0.5 * ( 1 + atom->mag[ia] ); - // const double dw = 0.5 * ( 1 - atom->mag[ia] ); - const double up = 0.5 * (1 + atom->mag[ia] / atom->ncpp.zv); - const double dw = 0.5 * (1 - atom->mag[ia] / atom->ncpp.zv); - // std::cout << " atom " << ia << " up=" << up << " dw=" << dw << std::endl; - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - const double Gtau = this->rhopw->gcar[ig][0] * atom->tau[ia].x - + this->rhopw->gcar[ig][1] * atom->tau[ia].y - + this->rhopw->gcar[ig][2] * atom->tau[ia].z; - - std::complex swap - = ModuleBase::libm::exp(ci_tpi * Gtau) * rho_lgl[this->rhopw->ig2igg[ig]]; - - rho_g3d(0, ig) += swap * up; - rho_g3d(1, ig) += swap * dw; - } - } - } - } - else if (spin_number_need == 4) - { - // noncolinear case - if (startmag_type == 1) - { - double sin_a1, sin_a2, cos_a1, cos_a2; - if (PARAM.globalv.domag) - { // will not be used now, will be deleted later - ModuleBase::libm::sincos(atom->angle1[0], &sin_a1, &cos_a1); - ModuleBase::libm::sincos(atom->angle2[0], &sin_a2, &cos_a2); - } - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - const std::complex swap = strucFac(it, ig) * rho_lgl[this->rhopw->ig2igg[ig]]; - rho_g3d(0, ig) += swap; - if (PARAM.globalv.domag) - { // will not be used now, will be deleted later - rho_g3d(1, ig) - += swap * (ucell.magnet.start_mag[it] / atom->ncpp.zv) * sin_a1 * cos_a2; - rho_g3d(2, ig) - += swap * (ucell.magnet.start_mag[it] / atom->ncpp.zv) * sin_a1 * sin_a2; - rho_g3d(3, ig) - += swap * (ucell.magnet.start_mag[it] / atom->ncpp.zv) * cos_a1; - } - else if (PARAM.globalv.domag_z) - { - rho_g3d(1, ig) = 0.0; - rho_g3d(2, ig) = 0.0; - rho_g3d(3, ig) += swap * (ucell.magnet.start_mag[it] / atom->ncpp.zv); - } - } - } - else if (startmag_type == 2) - { // zdy-warning-not-available - std::complex ci_tpi = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI; - for (int ia = 0; ia < atom->na; ia++) - { - double sin_a1, sin_a2, cos_a1, cos_a2; - if (PARAM.globalv.domag || PARAM.globalv.domag_z) - { - ModuleBase::libm::sincos(atom->angle1[ia], &sin_a1, &cos_a1); - } - if (PARAM.globalv.domag) - { - ModuleBase::libm::sincos(atom->angle2[ia], &sin_a2, &cos_a2); - } - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - const double Gtau = this->rhopw->gcar[ig][0] * atom->tau[ia].x - + this->rhopw->gcar[ig][1] * atom->tau[ia].y - + this->rhopw->gcar[ig][2] * atom->tau[ia].z; - - std::complex swap = exp(ci_tpi * Gtau) * rho_lgl[this->rhopw->ig2igg[ig]]; - - // calculate rho_total - rho_g3d(0, ig) += swap; - // calculate mag_z - if (PARAM.globalv.domag || PARAM.globalv.domag_z) + // mohan add 2011-06-14, initialize the charge density according to each atom + else if (spin_number_need == 2) { - rho_g3d(3, ig) += swap * (atom->mag[ia] / atom->ncpp.zv) * cos_a1; + if (startmag_type == 1) + { +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + const std::complex swap + = strucFac (it, ig) * rho_lgl[this->rhopw->ig2igg[ig]]; + const double up + = 0.5 * (1 + ucell.magnet.start_mag[it] / atom->ncpp.zv); + const double dw + = 0.5 * (1 - ucell.magnet.start_mag[it] / atom->ncpp.zv); + rho_g3d (0, ig) += swap * up; + rho_g3d (1, ig) += swap * dw; + } + } + // mohan add 2011-06-14 + else if (startmag_type == 2) + { + std::complex ci_tpi + = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI; + for (int ia = 0; ia < atom->na; ia++) + { + // const double up = 0.5 * ( 1 + atom->mag[ia] ); + // const double dw = 0.5 * ( 1 - atom->mag[ia] ); + const double up = 0.5 * (1 + atom->mag[ia] / atom->ncpp.zv); + const double dw = 0.5 * (1 - atom->mag[ia] / atom->ncpp.zv); + // std::cout << " atom " << ia << " up=" << up << " dw=" << dw << + // std::endl; +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + const double Gtau + = this->rhopw->gcar[ig][0] * atom->tau[ia].x + + this->rhopw->gcar[ig][1] * atom->tau[ia].y + + this->rhopw->gcar[ig][2] * atom->tau[ia].z; + + std::complex swap + = ModuleBase::libm::exp (ci_tpi * Gtau) + * rho_lgl[this->rhopw->ig2igg[ig]]; + + rho_g3d (0, ig) += swap * up; + rho_g3d (1, ig) += swap * dw; + } + } + } } - // calculate mag_x and mag_y - if (PARAM.globalv.domag) + else if (spin_number_need == 4) { - rho_g3d(1, ig) += swap * (atom->mag[ia] / atom->ncpp.zv) * sin_a1 * cos_a2; - rho_g3d(2, ig) += swap * (atom->mag[ia] / atom->ncpp.zv) * sin_a1 * sin_a2; + // noncolinear case + if (startmag_type == 1) + { + double sin_a1, sin_a2, cos_a1, cos_a2; + if (PARAM.globalv.domag) + { // will not be used now, will be deleted later + ModuleBase::libm::sincos (atom->angle1[0], &sin_a1, &cos_a1); + ModuleBase::libm::sincos (atom->angle2[0], &sin_a2, &cos_a2); + } +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + const std::complex swap + = strucFac (it, ig) * rho_lgl[this->rhopw->ig2igg[ig]]; + rho_g3d (0, ig) += swap; + if (PARAM.globalv.domag) + { // will not be used now, will be deleted later + rho_g3d (1, ig) + += swap + * (ucell.magnet.start_mag[it] / atom->ncpp.zv) + * sin_a1 * cos_a2; + rho_g3d (2, ig) + += swap + * (ucell.magnet.start_mag[it] / atom->ncpp.zv) + * sin_a1 * sin_a2; + rho_g3d (3, ig) + += swap + * (ucell.magnet.start_mag[it] / atom->ncpp.zv) + * cos_a1; + } + else if (PARAM.globalv.domag_z) + { + rho_g3d (1, ig) = 0.0; + rho_g3d (2, ig) = 0.0; + rho_g3d (3, ig) + += swap + * (ucell.magnet.start_mag[it] / atom->ncpp.zv); + } + } + } + else if (startmag_type == 2) + { // zdy-warning-not-available + std::complex ci_tpi + = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI; + for (int ia = 0; ia < atom->na; ia++) + { + double sin_a1, sin_a2, cos_a1, cos_a2; + if (PARAM.globalv.domag || PARAM.globalv.domag_z) + { + ModuleBase::libm::sincos (atom->angle1[ia], + &sin_a1, + &cos_a1); + } + if (PARAM.globalv.domag) + { + ModuleBase::libm::sincos (atom->angle2[ia], + &sin_a2, + &cos_a2); + } +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + const double Gtau + = this->rhopw->gcar[ig][0] * atom->tau[ia].x + + this->rhopw->gcar[ig][1] * atom->tau[ia].y + + this->rhopw->gcar[ig][2] * atom->tau[ia].z; + + std::complex swap + = exp (ci_tpi * Gtau) + * rho_lgl[this->rhopw->ig2igg[ig]]; + + // calculate rho_total + rho_g3d (0, ig) += swap; + // calculate mag_z + if (PARAM.globalv.domag || PARAM.globalv.domag_z) + { + rho_g3d (3, ig) + += swap * (atom->mag[ia] / atom->ncpp.zv) + * cos_a1; + } + // calculate mag_x and mag_y + if (PARAM.globalv.domag) + { + rho_g3d (1, ig) + += swap * (atom->mag[ia] / atom->ncpp.zv) + * sin_a1 * cos_a2; + rho_g3d (2, ig) + += swap * (atom->mag[ia] / atom->ncpp.zv) + * sin_a1 * sin_a2; + } + else + { + rho_g3d (1, ig) = 0.0; + rho_g3d (2, ig) = 0.0; + } + } + } + } } - else + else { - rho_g3d(1, ig) = 0.0; - rho_g3d(2, ig) = 0.0; + ModuleBase::WARNING_QUIT ("Charge::spin_number_need", + " Either 1 or 2 or 4, check SPIN number !"); } - } } - } - } - else - { - ModuleBase::WARNING_QUIT("Charge::spin_number_need", " Either 1 or 2 or 4, check SPIN number !"); } - } - } - return rho_g3d; - }(); + return rho_g3d; + }(); - assert(spin_number_need > 0); - std::vector ne(spin_number_need); + assert (spin_number_need > 0); + std::vector ne (spin_number_need); for (int is = 0; is < spin_number_need; is++) - { - this->rhopw->recip2real(&rho_g3d(is, 0), rho_in[is]); - - for (int ir = 0; ir < this->rhopw->nrxx; ++ir) - { - ne[is] += rho_in[is][ir]; - } - - ne[is] *= omega / (double)this->rhopw->nxyz; - #ifdef __MPI - Parallel_Reduce::reduce_pool(ne[is]); - #endif - // we check that everything is correct - double neg = 0.0; - double rea = 0.0; - double ima = 0.0; - double sumrea = 0.0; - for (int ir = 0; ir < this->rhopw->nrxx; ir++) { - rea = this->rhopw->fft_bundle.get_auxr_data()[ir].real(); - sumrea += rea; - neg += std::min(0.0, rea); - ima += std::abs(this->rhopw->fft_bundle.get_auxr_data()[ir].imag()); - } + this->rhopw->recip2real (&rho_g3d (is, 0), rho_in[is]); - #ifdef __MPI - Parallel_Reduce::reduce_pool(neg); - Parallel_Reduce::reduce_pool(ima); - Parallel_Reduce::reduce_pool(sumrea); - #endif - // mohan fix bug 2011-04-03 - neg = neg / (double)this->rhopw->nxyz * omega; - ima = ima / (double)this->rhopw->nxyz * omega; - sumrea = sumrea / (double)this->rhopw->nxyz * omega; - - if (((neg < -1.0e-4) && (is == 0 || PARAM.inp.nspin == 2)) || ima > 1.0e-4) - { - GlobalV::ofs_warning << " Warning: negative or imaginary starting charge : "; - GlobalV::ofs_warning << " neg = " << neg << " ima = " << ima << " SPIN = " << is << std::endl; - } + for (int ir = 0; ir < this->rhopw->nrxx; ++ir) + { + ne[is] += rho_in[is][ir]; + } + + ne[is] *= omega / (double)this->rhopw->nxyz; +#ifdef __MPI + Parallel_Reduce::reduce_pool (ne[is]); +#endif + // we check that everything is correct + double neg = 0.0; + double rea = 0.0; + double ima = 0.0; + double sumrea = 0.0; + for (int ir = 0; ir < this->rhopw->nrxx; ir++) + { + rea = this->rhopw->fft_bundle.get_auxr_data ()[ir].real (); + sumrea += rea; + neg += std::min (0.0, rea); + ima += std::abs (this->rhopw->fft_bundle.get_auxr_data ()[ir].imag ()); + } - } // end is +#ifdef __MPI + Parallel_Reduce::reduce_pool (neg); + Parallel_Reduce::reduce_pool (ima); + Parallel_Reduce::reduce_pool (sumrea); +#endif + // mohan fix bug 2011-04-03 + neg = neg / (double)this->rhopw->nxyz * omega; + ima = ima / (double)this->rhopw->nxyz * omega; + sumrea = sumrea / (double)this->rhopw->nxyz * omega; + + if (((neg < -1.0e-4) && (is == 0 || PARAM.inp.nspin == 2)) || ima > 1.0e-4) + { + GlobalV::ofs_warning << " Warning: negative or imaginary starting charge : "; + GlobalV::ofs_warning << " neg = " << neg << " ima = " << ima << " SPIN = " << is << std::endl; + } + + } // end is double ne_tot = 0.0; int spin0 = 1; - if (spin_number_need == 2) - { - spin0 = spin_number_need; - } + if (spin_number_need == 2) + { + spin0 = spin_number_need; + } for (int is = 0; is < spin0; ++is) - { - GlobalV::ofs_warning << "\n SETUP ATOMIC RHO FOR SPIN " << is + 1 << std::endl; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "Electron number from rho", ne[is]); - ne_tot += ne[is]; - } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "total electron number from rho", ne_tot); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "should be", PARAM.inp.nelec); - - for (int is = 0; is < spin_number_need; ++is) - { - for (int ir = 0; ir < this->rhopw->nrxx; ++ir) - { - rho_in[is][ir] = rho_in[is][ir] / ne_tot * PARAM.inp.nelec; - } - } + { + GlobalV::ofs_warning << "\n SETUP ATOMIC RHO FOR SPIN " << is + 1 << std::endl; + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_warning, "Electron number from rho", ne[is]); + ne_tot += ne[is]; + } + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_warning, "total electron number from rho", ne_tot); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_warning, "should be", PARAM.inp.nelec); + + for (int is = 0; is < spin_number_need; ++is) + { + for (int ir = 0; ir < this->rhopw->nrxx; ++ir) + { + rho_in[is][ir] = rho_in[is][ir] / ne_tot * PARAM.inp.nelec; + } + } } - ModuleBase::timer::end("Charge", "atomic_rho"); + ModuleBase::timer::end ("Charge", "atomic_rho"); return; } -void Charge::save_rho_before_sum_band() +void + Charge::save_rho_before_sum_band () { for (int is = 0; is < PARAM.inp.nspin; is++) - { - ModuleBase::GlobalFunc::DCOPY(rho[is], rho_save[is], this->rhopw->nrxx); - if (XC_Functional::get_ked_flag()) { - ModuleBase::GlobalFunc::DCOPY(kin_r[is], kin_r_save[is], this->rhopw->nrxx); + ModuleBase::GlobalFunc::DCOPY (rho[is], rho_save[is], this->rhopw->nrxx); + if (XC_Functional::get_ked_flag ()) + { + ModuleBase::GlobalFunc::DCOPY (kin_r[is], kin_r_save[is], this->rhopw->nrxx); + } } - } return; } -double Charge::cal_rho2ne(const double* rho_in) const +double + Charge::cal_rho2ne (const double* rho_in) const { - assert(this->rhopw->nxyz > 0); // mohan add 2025-12-02 + assert (this->rhopw->nxyz > 0); // mohan add 2025-12-02 double ne = 0.0; for (int ir = 0; ir < this->rhopw->nrxx; ir++) - { - ne += rho_in[ir]; - } + { + ne += rho_in[ir]; + } #ifdef __MPI - Parallel_Reduce::reduce_pool(ne); + Parallel_Reduce::reduce_pool (ne); #endif ne = ne * *this->omega_ / (double)this->rhopw->nxyz; return ne; } -void Charge::check_rho() +void + Charge::check_rho () { - if (this->nspin==1 || this->nspin==4) - { - double ne = 0.0; - ne = this->cal_rho2ne(rho[0]); - if (std::abs(ne - PARAM.inp.nelec) > 1.0e-6) + if (this->nspin == 1 || this->nspin == 4) { - ModuleBase::WARNING("Charge", "Charge is not equal to the number of electrons!"); + double ne = 0.0; + ne = this->cal_rho2ne (rho[0]); + if (std::abs (ne - PARAM.inp.nelec) > 1.0e-6) + { + ModuleBase::WARNING ("Charge", "Charge is not equal to the number of electrons!"); + } } - } else if (this->nspin == 2) - { - // for spin up - double ne_up = 0.0; - ne_up = this->cal_rho2ne(rho[0]); - if (ne_up < 0.0) { - ModuleBase::WARNING_QUIT("Charge", "Number of spin-down electrons set in starting magnetization exceeds all available."); - } - // for spin down - double ne_dn = 0.0; - ne_dn = this->cal_rho2ne(rho[1]); - if (ne_dn < 0.0) - { - ModuleBase::WARNING_QUIT("Charge", "Number of spin-up electrons set in starting magnetization exceeds all available."); - } - // for total charge - if (std::abs(ne_up + ne_dn - PARAM.inp.nelec) > 1.0e-6) - { - ModuleBase::WARNING("Charge", "Charge is not equal to the number of electrons!"); + // for spin up + double ne_up = 0.0; + ne_up = this->cal_rho2ne (rho[0]); + if (ne_up < 0.0) + { + ModuleBase::WARNING_QUIT ( + "Charge", + "Number of spin-down electrons set in starting magnetization exceeds all available."); + } + // for spin down + double ne_dn = 0.0; + ne_dn = this->cal_rho2ne (rho[1]); + if (ne_dn < 0.0) + { + ModuleBase::WARNING_QUIT ( + "Charge", + "Number of spin-up electrons set in starting magnetization exceeds all available."); + } + // for total charge + if (std::abs (ne_up + ne_dn - PARAM.inp.nelec) > 1.0e-6) + { + ModuleBase::WARNING ("Charge", "Charge is not equal to the number of electrons!"); + } } - } } // LiuXh add 20180619 -void Charge::init_final_scf() +void + Charge::init_final_scf () { - ModuleBase::TITLE("Charge", "init_after_scf"); + ModuleBase::TITLE ("Charge", "init_after_scf"); - assert(allocate_rho_final_scf == false); + assert (allocate_rho_final_scf == false); if (PARAM.inp.test_charge > 1) - { - std::cout << "\n spin_number = " << PARAM.inp.nspin << " real_point_number = " << this->rhopw->nrxx << std::endl; - } + { + std::cout << "\n spin_number = " << PARAM.inp.nspin << " real_point_number = " << this->rhopw->nrxx + << std::endl; + } // allocate memory rho = new double*[PARAM.inp.nspin]; @@ -722,30 +793,30 @@ void Charge::init_final_scf() rhog_save = new std::complex*[PARAM.inp.nspin]; for (int is = 0; is < PARAM.inp.nspin; is++) - { - rho[is] = new double[this->rhopw->nrxx]; - rhog[is] = new std::complex[this->rhopw->npw]; - rho_save[is] = new double[this->rhopw->nrxx]; - rhog_save[is] = new std::complex[this->rhopw->npw]; - ModuleBase::GlobalFunc::ZEROS(rho[is], this->rhopw->nrxx); - ModuleBase::GlobalFunc::ZEROS(rhog[is], this->rhopw->npw); - ModuleBase::GlobalFunc::ZEROS(rho_save[is], this->rhopw->nrxx); - ModuleBase::GlobalFunc::ZEROS(rhog_save[is], this->rhopw->npw); - } + { + rho[is] = new double[this->rhopw->nrxx]; + rhog[is] = new std::complex[this->rhopw->npw]; + rho_save[is] = new double[this->rhopw->nrxx]; + rhog_save[is] = new std::complex[this->rhopw->npw]; + ModuleBase::GlobalFunc::ZEROS (rho[is], this->rhopw->nrxx); + ModuleBase::GlobalFunc::ZEROS (rhog[is], this->rhopw->npw); + ModuleBase::GlobalFunc::ZEROS (rho_save[is], this->rhopw->nrxx); + ModuleBase::GlobalFunc::ZEROS (rhog_save[is], this->rhopw->npw); + } - ModuleBase::Memory::record("Chg::rho", sizeof(double) * PARAM.inp.nspin * this->rhopw->nrxx); - ModuleBase::Memory::record("Chg::rho_save", sizeof(double) * PARAM.inp.nspin * this->rhopw->nrxx); - ModuleBase::Memory::record("Chg::rhog", sizeof(double) * PARAM.inp.nspin * this->rhopw->npw); - ModuleBase::Memory::record("Chg::rhog_save", sizeof(double) * PARAM.inp.nspin * this->rhopw->npw); + ModuleBase::Memory::record ("Chg::rho", sizeof (double) * PARAM.inp.nspin * this->rhopw->nrxx); + ModuleBase::Memory::record ("Chg::rho_save", sizeof (double) * PARAM.inp.nspin * this->rhopw->nrxx); + ModuleBase::Memory::record ("Chg::rhog", sizeof (double) * PARAM.inp.nspin * this->rhopw->npw); + ModuleBase::Memory::record ("Chg::rhog_save", sizeof (double) * PARAM.inp.nspin * this->rhopw->npw); this->rho_core = new double[this->rhopw->nrxx]; // core charge in real space - ModuleBase::GlobalFunc::ZEROS(rho_core, this->rhopw->nrxx); + ModuleBase::GlobalFunc::ZEROS (rho_core, this->rhopw->nrxx); this->rhog_core = new std::complex[this->rhopw->npw]; // reciprocal core charge - ModuleBase::GlobalFunc::ZEROS(rhog_core, this->rhopw->npw); + ModuleBase::GlobalFunc::ZEROS (rhog_core, this->rhopw->npw); - ModuleBase::Memory::record("Chg::rho_core", sizeof(double) * this->rhopw->nrxx); - ModuleBase::Memory::record("Chg::rhog_core", sizeof(double) * this->rhopw->npw); + ModuleBase::Memory::record ("Chg::rho_core", sizeof (double) * this->rhopw->nrxx); + ModuleBase::Memory::record ("Chg::rhog_core", sizeof (double) * this->rhopw->npw); this->allocate_rho_final_scf = true; return; diff --git a/source/source_estate/module_charge/charge.h b/source/source_estate/module_charge/charge.h index 9064dfc5fb8..926a05fb78b 100644 --- a/source/source_estate/module_charge/charge.h +++ b/source/source_estate/module_charge/charge.h @@ -9,7 +9,7 @@ // #include "source_estate/fp_energy.h" #include "source_pw/module_pwdft/parallel_grid.h" -//a forward declaration of UnitCell +// a forward declaration of UnitCell class UnitCell; // Electron Charge Density @@ -17,9 +17,8 @@ class Charge { public: - - Charge(); - ~Charge(); + Charge (); + ~Charge (); //========================================================== // MEMBER VARIABLES : @@ -33,38 +32,36 @@ class Charge // NAME : rhog_core [ngm], the core charge in reciprocal space //========================================================== - double **rho = nullptr; - double **rho_save = nullptr; + double** rho = nullptr; + double** rho_save = nullptr; - std::complex **rhog = nullptr; - std::complex **rhog_save = nullptr; + std::complex** rhog = nullptr; + std::complex** rhog_save = nullptr; - double **kin_r = nullptr; // kinetic energy density in real space, for meta-GGA - double **kin_r_save = nullptr; // kinetic energy density in real space, for meta-GGA + double** kin_r = nullptr; // kinetic energy density in real space, for meta-GGA + double** kin_r_save = nullptr; // kinetic energy density in real space, for meta-GGA const Parallel_Grid* pgrid = nullptr; private: - - //temporary - double *_space_rho = nullptr; - double *_space_rho_save = nullptr; - std::complex *_space_rhog = nullptr; - std::complex *_space_rhog_save = nullptr; - double *_space_kin_r = nullptr; - double *_space_kin_r_save = nullptr; + // temporary + double* _space_rho = nullptr; + double* _space_rho_save = nullptr; + std::complex* _space_rhog = nullptr; + std::complex* _space_rhog_save = nullptr; + double* _space_kin_r = nullptr; + double* _space_kin_r_save = nullptr; public: + double** nhat = nullptr; // compensation charge for PAW + double** nhat_save = nullptr; // compensation charge for PAW + // wenfei 2023-09-05 - double **nhat = nullptr; //compensation charge for PAW - double **nhat_save = nullptr; //compensation charge for PAW - // wenfei 2023-09-05 - - double *rho_core = nullptr; - std::complex *rhog_core = nullptr; + double* rho_core = nullptr; + std::complex* rhog_core = nullptr; int prenspin = 1; - void set_rhopw(ModulePW::PW_Basis* rhopw_in); + void set_rhopw (ModulePW::PW_Basis* rhopw_in); /** * @brief Init charge density from file or atomic pseudo-wave-functions @@ -76,93 +73,91 @@ class Charge * @param klist [in] k points list if needed * @param wfcpw [in] PW basis for wave function if needed */ - void init_rho(const UnitCell& ucell, - const Parallel_Grid& pgrid, - const ModuleBase::ComplexMatrix& strucFac, - ModuleSymmetry::Symmetry& symm, - const void* klist = nullptr, - const void* wfcpw = nullptr); + void init_rho (const UnitCell& ucell, + const Parallel_Grid& pgrid, + const ModuleBase::ComplexMatrix& strucFac, + ModuleSymmetry::Symmetry& symm, + const void* klist = nullptr, + const void* wfcpw = nullptr); // mohan add 2025-12-02 - bool kin_density(); + bool kin_density (); - void allocate(const int &nspin_in, const bool kin_den); + void allocate (const int& nspin_in, const bool kin_den); - void atomic_rho(const int spin_number_need, - const double& omega, - double** rho_in, - const ModuleBase::ComplexMatrix& strucFac, - const UnitCell& ucell) const; + void atomic_rho (const int spin_number_need, + const double& omega, + double** rho_in, + const ModuleBase::ComplexMatrix& strucFac, + const UnitCell& ucell) const; - void set_rho_core(const UnitCell& ucell, - const ModuleBase::ComplexMatrix& structure_factor, - const bool* numeric); + void set_rho_core (const UnitCell& ucell, const ModuleBase::ComplexMatrix& structure_factor, const bool* numeric); - void renormalize_rho(); + void renormalize_rho (); - double sum_rho() const; + double sum_rho () const; - void save_rho_before_sum_band(); + void save_rho_before_sum_band (); - // for non-linear core correction - void non_linear_core_correction - ( - const bool &numeric, - const double omega, - const double tpiba2, - const int mesh, - const double *r, - const double *rab, - const double *rhoc, - double *rhocg - ) const; + // for non-linear core correction + void non_linear_core_correction (const bool& numeric, + const double omega, + const double tpiba2, + const int mesh, + const double* r, + const double* rab, + const double* rhoc, + double* rhocg) const; - double cal_rho2ne(const double *rho_in) const; + double cal_rho2ne (const double* rho_in) const; - void check_rho(); // to check whether the charge density is normal + void check_rho (); // to check whether the charge density is normal - void init_final_scf(); //LiuXh add 20180619 + void init_final_scf (); // LiuXh add 20180619 - public: + public: /** * @brief init some arrays for mpi_inter_pools, rho_mpi */ - void init_chgmpi(); + void init_chgmpi (); /** * @brief Sum rho at different pools (k-point parallelism). * Only used when GlobalV::KPAR > 1 */ - void rho_mpi(); + void rho_mpi (); /** * @brief Sum kin_r at different pools (k-point/band parallelism). * Only used when GlobalV::KPAR * bndpar > 1 */ - void kin_r_mpi(); + void kin_r_mpi (); - /** - * @brief Reduce among different pools + /** + * @brief Reduce among different pools * If NPROC_IN_POOLs are all the same, use GlobalV::KP_WORLD * else, gather rho in a POOL, and then reduce among different POOLs - * - * @param array_rho f(rho): an array [nrxx] - */ - void reduce_diff_pools(double* array_rho) const; + * + * @param array_rho f(rho): an array [nrxx] + */ + void reduce_diff_pools (double* array_rho) const; - void set_omega(double* omega_in){this->omega_ = omega_in;}; + void + set_omega (double* omega_in) + { + this->omega_ = omega_in; + }; // mohan add 2021-02-20 - int nrxx=0; // number of r vectors in this processor - int nxyz = 0; // total number of r vectors - int ngmc=0; // number of g vectors in this processor - int nspin=0; // number of spins - ModulePW::PW_Basis* rhopw = nullptr;// When double_grid is used, rhopw = rhodpw (dense grid) - bool cal_elf = false; // whether to calculate electron localization function (ELF) + int nrxx = 0; // number of r vectors in this processor + int nxyz = 0; // total number of r vectors + int ngmc = 0; // number of g vectors in this processor + int nspin = 0; // number of spins + ModulePW::PW_Basis* rhopw = nullptr; // When double_grid is used, rhopw = rhodpw (dense grid) + bool cal_elf = false; // whether to calculate electron localization function (ELF) private: - - void destroy(); // free arrays liuyu 2023-03-12 + void destroy (); // free arrays liuyu 2023-03-12 double* omega_ = nullptr; // omega for non-linear core correction @@ -171,10 +166,9 @@ class Charge bool allocate_rho_final_scf; // LiuXh add 20180606 #ifdef __MPI - int *rec = nullptr; //The number of elements each process should receive into the receive buffer. - int *dis = nullptr; //The displacement (relative to recvbuf) for each process in the receive buffer. + int* rec = nullptr; // The number of elements each process should receive into the receive buffer. + int* dis = nullptr; // The displacement (relative to recvbuf) for each process in the receive buffer. #endif - }; #endif // charge diff --git a/source/source_estate/module_charge/charge_extra.cpp b/source/source_estate/module_charge/charge_extra.cpp index 7513469a31b..83fc30b6ebe 100644 --- a/source/source_estate/module_charge/charge_extra.cpp +++ b/source/source_estate/module_charge/charge_extra.cpp @@ -6,84 +6,83 @@ #include "source_base/tool_threading.h" #include "source_io/module_output/cube_io.h" -Charge_Extra::Charge_Extra() -{ -} +Charge_Extra::Charge_Extra () {} -Charge_Extra::~Charge_Extra() +Charge_Extra::~Charge_Extra () { - if(pot_order == 3) - { - delete[] dis_old1; - delete[] dis_old2; - delete[] dis_now; - } + if (pot_order == 3) + { + delete[] dis_old1; + delete[] dis_old2; + delete[] dis_now; + } } -void Charge_Extra::Init_CE(const int& nspin, const int& natom, const int& nrxx, const std::string chg_extrap) +void + Charge_Extra::Init_CE (const int& nspin, const int& natom, const int& nrxx, const std::string chg_extrap) { if (chg_extrap == "none") - { - pot_order = 0; - } + { + pot_order = 0; + } else if (chg_extrap == "atomic") - { - pot_order = 1; - } + { + pot_order = 1; + } else if (chg_extrap == "first-order") - { - pot_order = 2; - } + { + pot_order = 2; + } else if (chg_extrap == "second-order") - { - pot_order = 3; - } + { + pot_order = 3; + } else - { - ModuleBase::WARNING_QUIT("Charge_Extra","charge extrapolation method is not available !"); - } + { + ModuleBase::WARNING_QUIT ("Charge_Extra", "charge extrapolation method is not available !"); + } this->nspin = nspin; if (pot_order > 0) - { - // delta_rho1.resize(this->nspin, std::vector(nrxx, 0.0)); - // delta_rho2.resize(this->nspin, std::vector(nrxx, 0.0)); - // delta_rho3.resize(this->nspin, std::vector(nrxx, 0.0)); - // qianrui replace the above code with the following code. - // The above code cannot passed valgrind tests, which has an invalid read of size 32. - delta_rho1.resize(this->nspin); - delta_rho2.resize(this->nspin); - delta_rho3.resize(this->nspin); - for (int is = 0; is < this->nspin; is++) { - delta_rho1[is].resize(nrxx, 0.0); - delta_rho2[is].resize(nrxx, 0.0); - delta_rho3[is].resize(nrxx, 0.0); + // delta_rho1.resize(this->nspin, std::vector(nrxx, 0.0)); + // delta_rho2.resize(this->nspin, std::vector(nrxx, 0.0)); + // delta_rho3.resize(this->nspin, std::vector(nrxx, 0.0)); + // qianrui replace the above code with the following code. + // The above code cannot passed valgrind tests, which has an invalid read of size 32. + delta_rho1.resize (this->nspin); + delta_rho2.resize (this->nspin); + delta_rho3.resize (this->nspin); + for (int is = 0; is < this->nspin; is++) + { + delta_rho1[is].resize (nrxx, 0.0); + delta_rho2[is].resize (nrxx, 0.0); + delta_rho3[is].resize (nrxx, 0.0); + } } - } - if(pot_order == 3) - { - dis_old1 = new ModuleBase::Vector3[natom]; - dis_old2 = new ModuleBase::Vector3[natom]; - dis_now = new ModuleBase::Vector3[natom]; - } + if (pot_order == 3) + { + dis_old1 = new ModuleBase::Vector3[natom]; + dis_old2 = new ModuleBase::Vector3[natom]; + dis_now = new ModuleBase::Vector3[natom]; + } alpha = 1.0; - beta = 0.0; + beta = 0.0; } -void Charge_Extra::extrapolate_charge( - Parallel_Grid* Pgrid, - UnitCell& ucell, - Charge* chr, - Structure_Factor* sf, - std::ofstream& ofs_running, - std::ofstream& ofs_warning) +void + Charge_Extra::extrapolate_charge (Parallel_Grid* Pgrid, + UnitCell& ucell, + Charge* chr, + Structure_Factor* sf, + std::ofstream& ofs_running, + std::ofstream& ofs_warning) { - ModuleBase::TITLE("Charge_Extra","extrapolate_charge"); - ModuleBase::timer::start("Charge_Extra", "extrapolate_charge"); + ModuleBase::TITLE ("Charge_Extra", "extrapolate_charge"); + ModuleBase::timer::start ("Charge_Extra", "extrapolate_charge"); //------------------------------------------------------- // Charge density extrapolation: // @@ -91,12 +90,12 @@ void Charge_Extra::extrapolate_charge( // * pot_order=1 : subtract old atomic charge density and sum the new // if dynamics is done the routine extrapolates also the difference // between the scf charge and the atomic one; - // * pot_order=2 : first order extrapolation: + // * pot_order=2 : first order extrapolation: // \[ \rho(t+dt) = 2\ \rho(t)-\rho(t-dt); \] // * pot_order=3 : second order extrapolation: // \[ \rho(t+dt) = \rho(t) + \alpha_0\ (\rho(t) - \rho(t-dt)) // + \beta_0\ (\rho(t-dt)- \rho(t-2 dt)). \] - // + // // The \(\alpha_0\) and \(\beta_0\) parameters are calculated in find_alpha_and_beta() // so that \(|\tau'-\tau(t+dt)|\) is minimum. \(\tau'\) and \(\tau(t+dt)\) are respectively // the atomic positions at time t+dt and the extrapolated one: @@ -104,153 +103,155 @@ void Charge_Extra::extrapolate_charge( // + \beta_0\ ( \tau(t-dt) - \tau(t-2 dt) ). \] //------------------------------------------------------- - rho_extr = std::min(istep, pot_order); - if(rho_extr == 0) - { - sf->setup(&ucell, *Pgrid, chr->rhopw); - ofs_running << " charge density from previous step !" << std::endl; - ModuleBase::timer::end("Charge_Extra", "extrapolate_charge"); - return; - } - + rho_extr = std::min (istep, pot_order); + if (rho_extr == 0) + { + sf->setup (&ucell, *Pgrid, chr->rhopw); + ofs_running << " charge density from previous step !" << std::endl; + ModuleBase::timer::end ("Charge_Extra", "extrapolate_charge"); + return; + } // if(lsda || noncolin) rho2zeta(); - if(rho_extr == 1) - { - ofs_running << " NEW-OLD atomic charge density approx. for the potential !" << std::endl; + if (rho_extr == 1) + { + ofs_running << " NEW-OLD atomic charge density approx. for the potential !" << std::endl; #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 128) #endif - for (int is = 0; is < this->nspin; is++) - { - for (int ir = 0; ir < chr->rhopw->nrxx; ir++) - { - chr->rho[is][ir] = delta_rho1[is][ir]; - } + for (int is = 0; is < this->nspin; is++) + { + for (int ir = 0; ir < chr->rhopw->nrxx; ir++) + { + chr->rho[is][ir] = delta_rho1[is][ir]; + } + } } - } // first order extrapolation - else if(rho_extr ==2) - { - ofs_running << " first order charge density extrapolation !" << std::endl; + else if (rho_extr == 2) + { + ofs_running << " first order charge density extrapolation !" << std::endl; #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 128) #endif - for (int is = 0; is < this->nspin; is++) - { - for (int ir = 0; ir < chr->rhopw->nrxx; ir++) - { - chr->rho[is][ir] = 2 * delta_rho1[is][ir] - delta_rho2[is][ir]; - } + for (int is = 0; is < this->nspin; is++) + { + for (int ir = 0; ir < chr->rhopw->nrxx; ir++) + { + chr->rho[is][ir] = 2 * delta_rho1[is][ir] - delta_rho2[is][ir]; + } + } } - } // second order extrapolation else - { - ofs_running << " second order charge density extrapolation !" << std::endl; + { + ofs_running << " second order charge density extrapolation !" << std::endl; - find_alpha_and_beta(ucell.nat, ofs_running, ofs_warning); + find_alpha_and_beta (ucell.nat, ofs_running, ofs_warning); - const double one_add_alpha = 1 + alpha; - const double beta_alpha = beta - alpha; + const double one_add_alpha = 1 + alpha; + const double beta_alpha = beta - alpha; #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 64) #endif - for (int is = 0; is < this->nspin; is++) - { - for (int ir = 0; ir < chr->rhopw->nrxx; ir++) - { - chr->rho[is][ir] - = one_add_alpha * delta_rho1[is][ir] + beta_alpha * delta_rho2[is][ir] - beta * delta_rho3[is][ir]; - } + for (int is = 0; is < this->nspin; is++) + { + for (int ir = 0; ir < chr->rhopw->nrxx; ir++) + { + chr->rho[is][ir] = one_add_alpha * delta_rho1[is][ir] + beta_alpha * delta_rho2[is][ir] + - beta * delta_rho3[is][ir]; + } + } } - } - sf->setup(&ucell, *Pgrid, chr->rhopw); + sf->setup (&ucell, *Pgrid, chr->rhopw); double** rho_atom = new double*[this->nspin]; for (int is = 0; is < this->nspin; is++) - { - rho_atom[is] = new double[chr->rhopw->nrxx]; - } - chr->atomic_rho(this->nspin, ucell.omega, rho_atom, sf->strucFac, ucell); + { + rho_atom[is] = new double[chr->rhopw->nrxx]; + } + chr->atomic_rho (this->nspin, ucell.omega, rho_atom, sf->strucFac, ucell); #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 512) #endif for (int is = 0; is < this->nspin; is++) - { - for(int ir=0; irrhopw->nrxx; ir++) { - chr->rho[is][ir] /= ucell.omega; - chr->rho[is][ir] += rho_atom[is][ir]; + for (int ir = 0; ir < chr->rhopw->nrxx; ir++) + { + chr->rho[is][ir] /= ucell.omega; + chr->rho[is][ir] += rho_atom[is][ir]; + } } - } for (int is = 0; is < this->nspin; is++) - { - delete[] rho_atom[is]; - } + { + delete[] rho_atom[is]; + } delete[] rho_atom; - ModuleBase::timer::end("Charge_Extra", "extrapolate_charge"); + ModuleBase::timer::end ("Charge_Extra", "extrapolate_charge"); return; } -void Charge_Extra::find_alpha_and_beta(const int& natom, std::ofstream& ofs_running, std::ofstream& ofs_warning) +void + Charge_Extra::find_alpha_and_beta (const int& natom, std::ofstream& ofs_running, std::ofstream& ofs_warning) { - if(istep < 3) return; + if (istep < 3) + { + return; + } double a11 = 0.0; double a12 = 0.0; double a21 = 0.0; double a22 = 0.0; - double b1 = 0.0; - double b2 = 0.0; - double c = 0.0; + double b1 = 0.0; + double b2 = 0.0; + double c = 0.0; double det = 0.0; #ifdef _OPENMP -#pragma omp parallel for schedule(static, 16) \ - reduction(+:a11) reduction(+:a12) reduction(+:a22) \ - reduction(+:b1) reduction(+:b2) reduction(+:c) +#pragma omp parallel for schedule(static, 16) reduction(+ : a11) reduction(+ : a12) reduction(+ : a22) \ + reduction(+ : b1) reduction(+ : b2) reduction(+ : c) #endif - for(int i=0; i0); + assert (nspin > 0); std::cout << " START CHARGE : " << PARAM.inp.init_chg << std::endl; // we need to set the omega for the charge density - set_omega(&ucell.omega); + set_omega (&ucell.omega); this->pgrid = &pgrid; bool read_error = false; bool read_kin_error = false; if (PARAM.inp.init_chg == "file" || PARAM.inp.init_chg == "auto") - { - GlobalV::ofs_running << " Read electron density from file" << std::endl; - - // try to read charge from binary file first, which is the same as QE - // liuyu 2023-12-05 - std::stringstream binary; - binary << PARAM.globalv.global_readin_dir << PARAM.inp.suffix + "-CHARGE-DENSITY.restart"; - if (ModuleIO::read_rhog(binary.str(), rhopw, rhog)) { - GlobalV::ofs_running << " Read electron density from file: " << binary.str() << std::endl; - for (int is = 0; is < nspin; ++is) - { - rhopw->recip2real(rhog[is], rho[is]); - } - } - else - { - for (int is = 0; is < nspin; ++is) - { - std::stringstream ssc; - - if(nspin==1) - { - ssc << PARAM.globalv.global_readin_dir << "chg.cube"; - } - else - { - ssc << PARAM.globalv.global_readin_dir << "chgs" << is + 1 << ".cube"; - } - - - if (ModuleIO::read_vdata_palgrid(pgrid, - (PARAM.inp.esolver_type == "sdft" ? GlobalV::RANK_IN_BPGROUP : GlobalV::MY_RANK), - GlobalV::ofs_running, - ssc.str(), - this->rho[is], - ucell.nat)) - { - GlobalV::ofs_running << " Read electron density from file: " << ssc.str() << std::endl; - } - else if (is > 0) // nspin=2 or 4 + GlobalV::ofs_running << " Read electron density from file" << std::endl; + + // try to read charge from binary file first, which is the same as QE + // liuyu 2023-12-05 + std::stringstream binary; + binary << PARAM.globalv.global_readin_dir << PARAM.inp.suffix + "-CHARGE-DENSITY.restart"; + if (ModuleIO::read_rhog (binary.str (), rhopw, rhog)) { - if (is == 1) // failed at the second spin - { - std::cout << " Incomplete electron density file." << std::endl; - read_error = true; - break; - } - else if (is == 2) // read 2 files when nspin=4 - { - GlobalV::ofs_running << " Didn't read in the electron density but would rearrange it later. " - << std::endl; - } - else if (is == 3) // read 2 files when nspin=4 - { - GlobalV::ofs_running << " rearrange electron density " << std::endl; - for (int ir = 0; ir < this->rhopw->nrxx; ir++) + GlobalV::ofs_running << " Read electron density from file: " << binary.str () << std::endl; + for (int is = 0; is < nspin; ++is) { - this->rho[3][ir] = this->rho[0][ir] - this->rho[1][ir]; - this->rho[0][ir] = this->rho[0][ir] + this->rho[1][ir]; - this->rho[1][ir] = 0.0; - this->rho[2][ir] = 0.0; + rhopw->recip2real (rhog[is], rho[is]); } - } } - else + else { - read_error = true; - break; + for (int is = 0; is < nspin; ++is) + { + std::stringstream ssc; + + if (nspin == 1) + { + ssc << PARAM.globalv.global_readin_dir << "chg.cube"; + } + else + { + ssc << PARAM.globalv.global_readin_dir << "chgs" << is + 1 << ".cube"; + } + + if (ModuleIO::read_vdata_palgrid ( + pgrid, + (PARAM.inp.esolver_type == "sdft" ? GlobalV::RANK_IN_BPGROUP : GlobalV::MY_RANK), + GlobalV::ofs_running, + ssc.str (), + this->rho[is], + ucell.nat)) + { + GlobalV::ofs_running << " Read electron density from file: " << ssc.str () + << std::endl; + } + else if (is > 0) // nspin=2 or 4 + { + if (is == 1) // failed at the second spin + { + std::cout << " Incomplete electron density file." << std::endl; + read_error = true; + break; + } + else if (is == 2) // read 2 files when nspin=4 + { + GlobalV::ofs_running + << " Didn't read in the electron density but would rearrange it later. " + << std::endl; + } + else if (is == 3) // read 2 files when nspin=4 + { + GlobalV::ofs_running << " rearrange electron density " << std::endl; + for (int ir = 0; ir < this->rhopw->nrxx; ir++) + { + this->rho[3][ir] = this->rho[0][ir] - this->rho[1][ir]; + this->rho[0][ir] = this->rho[0][ir] + this->rho[1][ir]; + this->rho[1][ir] = 0.0; + this->rho[2][ir] = 0.0; + } + } + } + else + { + read_error = true; + break; + } + } } - } - } - if (read_error) - { - const std::string warn_msg - = " WARNING: \"init_chg\" is enabled but ABACUS failed to read\n charge density from file.\n" - " Please check if there is chg.cube (for nspin=1) or chgsx.cube (x=1,2,etc.) or\n {suffix}-CHARGE-DENSITY.restart in the " - "directory.\n"; - std::cout << warn_msg; - if (PARAM.inp.init_chg == "file") - { - ModuleBase::WARNING_QUIT("Charge::init_rho", - "Failed to read in charge density from file.\n For initializing atomic " - "charge in calculations,\n please set init_chg to atomic in INPUT."); - } - } - - if (XC_Functional::get_ked_flag()) - { - // If the charge density is not read in, then the kinetic energy density is not read in either - if (!read_error) - { - GlobalV::ofs_running << " try to read kinetic energy density from file" << std::endl; - // try to read charge from binary file first, which is the same as QE - std::vector> kin_g_space(nspin * this->ngmc, {0.0, 0.0}); - std::vector*> kin_g; - for (int is = 0; is < nspin; is++) + if (read_error) { - kin_g.push_back(kin_g_space.data() + is * this->ngmc); + const std::string warn_msg + = " WARNING: \"init_chg\" is enabled but ABACUS failed to read\n charge density from file.\n" + " Please check if there is chg.cube (for nspin=1) or chgsx.cube (x=1,2,etc.) or\n " + "{suffix}-CHARGE-DENSITY.restart in the " + "directory.\n"; + std::cout << warn_msg; + if (PARAM.inp.init_chg == "file") + { + ModuleBase::WARNING_QUIT ( + "Charge::init_rho", + "Failed to read in charge density from file.\n For initializing atomic " + "charge in calculations,\n please set init_chg to atomic in INPUT."); + } } - std::stringstream binary; - binary << PARAM.globalv.global_readin_dir << PARAM.inp.suffix + "-TAU-DENSITY.restart"; - if (ModuleIO::read_rhog(binary.str(), rhopw, kin_g.data())) - { - GlobalV::ofs_running << " Read in the kinetic energy density: " << binary.str() << std::endl; - for (int is = 0; is < nspin; ++is) - { - rhopw->recip2real(kin_g[is], this->kin_r[is]); - } - } - else + if (XC_Functional::get_ked_flag ()) { - for (int is = 0; is < nspin; is++) - { - std::stringstream ssc; - ssc << PARAM.globalv.global_readin_dir << "SPIN" << is + 1 << "_TAU.cube"; - // mohan update 2012-02-10, sunliang update 2023-03-09 - if (ModuleIO::read_vdata_palgrid( - pgrid, - (PARAM.inp.esolver_type == "sdft" ? GlobalV::RANK_IN_BPGROUP : GlobalV::MY_RANK), - GlobalV::ofs_running, - ssc.str(), - this->kin_r[is], - ucell.nat)) + // If the charge density is not read in, then the kinetic energy density is not read in either + if (!read_error) { - GlobalV::ofs_running << " Read in the kinetic energy density: " << ssc.str() << std::endl; + GlobalV::ofs_running << " try to read kinetic energy density from file" << std::endl; + // try to read charge from binary file first, which is the same as QE + std::vector> kin_g_space (nspin * this->ngmc, {0.0, 0.0}); + std::vector*> kin_g; + for (int is = 0; is < nspin; is++) + { + kin_g.push_back (kin_g_space.data () + is * this->ngmc); + } + + std::stringstream binary; + binary << PARAM.globalv.global_readin_dir << PARAM.inp.suffix + "-TAU-DENSITY.restart"; + if (ModuleIO::read_rhog (binary.str (), rhopw, kin_g.data ())) + { + GlobalV::ofs_running << " Read in the kinetic energy density: " << binary.str () + << std::endl; + for (int is = 0; is < nspin; ++is) + { + rhopw->recip2real (kin_g[is], this->kin_r[is]); + } + } + else + { + for (int is = 0; is < nspin; is++) + { + std::stringstream ssc; + ssc << PARAM.globalv.global_readin_dir << "SPIN" << is + 1 << "_TAU.cube"; + // mohan update 2012-02-10, sunliang update 2023-03-09 + if (ModuleIO::read_vdata_palgrid (pgrid, + (PARAM.inp.esolver_type == "sdft" + ? GlobalV::RANK_IN_BPGROUP + : GlobalV::MY_RANK), + GlobalV::ofs_running, + ssc.str (), + this->kin_r[is], + ucell.nat)) + { + GlobalV::ofs_running + << " Read in the kinetic energy density: " << ssc.str () + << std::endl; + } + else + { + read_kin_error = true; + std::cout + << " WARNING: \"init_chg\" is enabled but ABACUS failed to " + "read kinetic energy " + "density from file.\n" + " Please check if there is SPINX_TAU.cube (X=1,...) or " + "{suffix}-TAU-DENSITY.restart in the directory.\n" + << std::endl; + break; + } + } + } } - else + else { read_kin_error = true; - std::cout << " WARNING: \"init_chg\" is enabled but ABACUS failed to read kinetic energy " - "density from file.\n" - " Please check if there is SPINX_TAU.cube (X=1,...) or " - "{suffix}-TAU-DENSITY.restart in the directory.\n" - << std::endl; - break; } - } } - } - else - { - read_kin_error = true; - } } - } if (PARAM.inp.init_chg == "atomic" || read_error) - { - if (read_error) { - std::cout << " Charge::init_rho: use atomic initialization instead." << std::endl; + if (read_error) + { + std::cout << " Charge::init_rho: use atomic initialization instead." << std::endl; + } + this->atomic_rho (nspin, ucell.omega, rho, strucFac, ucell); } - this->atomic_rho(nspin, ucell.omega, rho, strucFac, ucell); - } // initial tau = 3/5 rho^2/3, Thomas-Fermi - if (XC_Functional::get_ked_flag()) - { - if (PARAM.inp.init_chg == "atomic" || read_kin_error) + if (XC_Functional::get_ked_flag ()) { - if (read_kin_error) - { - std::cout << " Charge::init_rho: init kinetic energy density from rho." << std::endl; - } - const double fact = (3.0 / 5.0) * pow(3.0 * ModuleBase::PI * ModuleBase::PI, 2.0 / 3.0); - for (int is = 0; is < nspin; ++is) - { - for (int ir = 0; ir < this->rhopw->nrxx; ++ir) + if (PARAM.inp.init_chg == "atomic" || read_kin_error) { - kin_r[is][ir] = fact * pow(std::abs(rho[is][ir]) * nspin, 5.0 / 3.0) / nspin; + if (read_kin_error) + { + std::cout << " Charge::init_rho: init kinetic energy density from rho." << std::endl; + } + const double fact = (3.0 / 5.0) * pow (3.0 * ModuleBase::PI * ModuleBase::PI, 2.0 / 3.0); + for (int is = 0; is < nspin; ++is) + { + for (int ir = 0; ir < this->rhopw->nrxx; ++ir) + { + kin_r[is][ir] = fact * pow (std::abs (rho[is][ir]) * nspin, 5.0 / 3.0) / nspin; + } + } } - } } - } // Peize Lin add 2020.04.04 if (GlobalC::restart.info_load.load_charge && !GlobalC::restart.info_load.load_charge_finish) - { - for (int is = 0; is < nspin; ++is) { - try - { - GlobalC::restart.load_disk("charge", is, this->nrxx, rho[is]); - } - catch (const std::exception& e) - { - // try to load from the output of `out_chg` - std::stringstream ssc; - ssc << PARAM.globalv.global_readin_dir << "chgs" << is + 1 << ".cube"; - if (ModuleIO::read_vdata_palgrid(pgrid, - (PARAM.inp.esolver_type == "sdft" ? GlobalV::RANK_IN_BPGROUP : GlobalV::MY_RANK), - GlobalV::ofs_running, - ssc.str(), - this->rho[is], - ucell.nat)) + for (int is = 0; is < nspin; ++is) { - GlobalV::ofs_running << " Read in electron density: " << ssc.str() << std::endl; + try + { + GlobalC::restart.load_disk ("charge", is, this->nrxx, rho[is]); + } + catch (const std::exception& e) + { + // try to load from the output of `out_chg` + std::stringstream ssc; + ssc << PARAM.globalv.global_readin_dir << "chgs" << is + 1 << ".cube"; + if (ModuleIO::read_vdata_palgrid ( + pgrid, + (PARAM.inp.esolver_type == "sdft" ? GlobalV::RANK_IN_BPGROUP : GlobalV::MY_RANK), + GlobalV::ofs_running, + ssc.str (), + this->rho[is], + ucell.nat)) + { + GlobalV::ofs_running << " Read in electron density: " << ssc.str () << std::endl; + } + } } - } + GlobalC::restart.info_load.load_charge_finish = true; } - GlobalC::restart.info_load.load_charge_finish = true; - } #ifdef __MPI - this->init_chgmpi(); + this->init_chgmpi (); #endif if (PARAM.inp.init_chg == "wfc") - { - if (wfcpw == nullptr) { - ModuleBase::WARNING_QUIT("Charge::init_rho", "wfc is only supported for PW-KSDFT."); - } - - const ModulePW::PW_Basis_K* pw_wfc = reinterpret_cast(const_cast(wfcpw)); - const K_Vectors* kv = reinterpret_cast(klist); + if (wfcpw == nullptr) + { + ModuleBase::WARNING_QUIT ("Charge::init_rho", "wfc is only supported for PW-KSDFT."); + } - ModuleIO::read_wf2rho_pw(pw_wfc, symm, *this, - PARAM.globalv.global_readin_dir, - GlobalV::KPAR, GlobalV::MY_POOL, GlobalV::MY_RANK, - GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, - PARAM.inp.nbands, nspin, PARAM.globalv.npol, - kv->get_nkstot(),kv->ik2iktot,kv->isk,GlobalV::ofs_running); - } + const ModulePW::PW_Basis_K* pw_wfc = reinterpret_cast (const_cast (wfcpw)); + const K_Vectors* kv = reinterpret_cast (klist); + + ModuleIO::read_wf2rho_pw (pw_wfc, + symm, + *this, + PARAM.globalv.global_readin_dir, + GlobalV::KPAR, + GlobalV::MY_POOL, + GlobalV::MY_RANK, + GlobalV::NPROC_IN_POOL, + GlobalV::RANK_IN_POOL, + PARAM.inp.nbands, + nspin, + PARAM.globalv.npol, + kv->get_nkstot (), + kv->ik2iktot, + kv->isk, + GlobalV::ofs_running); + } } //========================================================== // computes the core charge on the real space 3D mesh. //========================================================== -void Charge::set_rho_core(const UnitCell& ucell, - const ModuleBase::ComplexMatrix& structure_factor, - const bool* numeric) +void + Charge::set_rho_core (const UnitCell& ucell, const ModuleBase::ComplexMatrix& structure_factor, const bool* numeric) { - ModuleBase::TITLE("Charge","set_rho_core"); - ModuleBase::timer::start("Charge","set_rho_core"); + ModuleBase::TITLE ("Charge", "set_rho_core"); + ModuleBase::timer::start ("Charge", "set_rho_core"); bool bl = false; - for (int it = 0; itrho_core, this->rhopw->nrxx); - ModuleBase::timer::end("Charge","set_rho_core"); - return; - } + { + ModuleBase::GlobalFunc::ZEROS (this->rho_core, this->rhopw->nrxx); + ModuleBase::timer::end ("Charge", "set_rho_core"); + return; + } - double *rhocg = new double[this->rhopw->ngg]; - ModuleBase::GlobalFunc::ZEROS(rhocg, this->rhopw->ngg ); + double* rhocg = new double[this->rhopw->ngg]; + ModuleBase::GlobalFunc::ZEROS (rhocg, this->rhopw->ngg); - // three dimension. - std::complex *vg = new std::complex[this->rhopw->npw]; + // three dimension. + std::complex* vg = new std::complex[this->rhopw->npw]; - for (int it = 0; it < ucell.ntype;it++) - { - if (ucell.atoms[it].ncpp.nlcc) + for (int it = 0; it < ucell.ntype; it++) { -//---------------------------------------------------------- -// EXPLAIN : drhoc compute the radial fourier transform for -// each shell of g vec -//---------------------------------------------------------- - this->non_linear_core_correction( - numeric, - ucell.omega, - ucell.tpiba2, - ucell.atoms[it].ncpp.msh, - ucell.atoms[it].ncpp.r.data(), - ucell.atoms[it].ncpp.rab.data(), - ucell.atoms[it].ncpp.rho_atc.data(), - rhocg); -//---------------------------------------------------------- -// EXPLAIN : multiply by the structure factor and sum -//---------------------------------------------------------- - for (int ig = 0; ig < this->rhopw->npw ; ig++) - { - vg[ig] += structure_factor(it, ig) * rhocg[this->rhopw->ig2igg[ig]]; - } + if (ucell.atoms[it].ncpp.nlcc) + { + //---------------------------------------------------------- + // EXPLAIN : drhoc compute the radial fourier transform for + // each shell of g vec + //---------------------------------------------------------- + this->non_linear_core_correction (numeric, + ucell.omega, + ucell.tpiba2, + ucell.atoms[it].ncpp.msh, + ucell.atoms[it].ncpp.r.data (), + ucell.atoms[it].ncpp.rab.data (), + ucell.atoms[it].ncpp.rho_atc.data (), + rhocg); + //---------------------------------------------------------- + // EXPLAIN : multiply by the structure factor and sum + //---------------------------------------------------------- + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + vg[ig] += structure_factor (it, ig) * rhocg[this->rhopw->ig2igg[ig]]; + } + } } - } - // for tmp use. - for(int ig=0; ig< this->rhopw->npw; ig++) - { - this->rhog_core[ig] = vg[ig]; - } + // for tmp use. + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + this->rhog_core[ig] = vg[ig]; + } - this->rhopw->recip2real(vg, this->rho_core); + this->rhopw->recip2real (vg, this->rho_core); // test on the charge and computation of the core energy double rhoima = 0.0; double rhoneg = 0.0; for (int ir = 0; ir < this->rhopw->nrxx; ir++) - { - rhoneg += std::min(0.0, this->rhopw->fft_bundle.get_auxr_data()[ir].real()); - rhoima += std::abs(this->rhopw->fft_bundle.get_auxr_data()[ir].imag()); - // NOTE: Core charge is computed in reciprocal space and brought to real - // space by FFT. For non smooth core charges (or insufficient cut-off) - // this may result in negative values in some grid points. - // Up to October 1999 the core charge was forced to be positive definite. - // This induces an error in the force, and probably stress, calculation if - // the number of grid points where the core charge would be otherwise neg - // is large. The error disappears for sufficiently high cut-off, but may be - // rather large and it is better to leave the core charge as it is. - // If you insist to have it positive definite (with the possible problems - // mentioned above) uncomment the following lines. SdG, Oct 15 1999 - } - - // mohan fix bug 2011-04-03 - Parallel_Reduce::reduce_pool(rhoneg); - Parallel_Reduce::reduce_pool(rhoima); - - // mohan changed 2010-2-2, make this same as in atomic_rho. - // still lack something...... + { + rhoneg += std::min (0.0, this->rhopw->fft_bundle.get_auxr_data ()[ir].real ()); + rhoima += std::abs (this->rhopw->fft_bundle.get_auxr_data ()[ir].imag ()); + // NOTE: Core charge is computed in reciprocal space and brought to real + // space by FFT. For non smooth core charges (or insufficient cut-off) + // this may result in negative values in some grid points. + // Up to October 1999 the core charge was forced to be positive definite. + // This induces an error in the force, and probably stress, calculation if + // the number of grid points where the core charge would be otherwise neg + // is large. The error disappears for sufficiently high cut-off, but may be + // rather large and it is better to leave the core charge as it is. + // If you insist to have it positive definite (with the possible problems + // mentioned above) uncomment the following lines. SdG, Oct 15 1999 + } + + // mohan fix bug 2011-04-03 + Parallel_Reduce::reduce_pool (rhoneg); + Parallel_Reduce::reduce_pool (rhoima); + + // mohan changed 2010-2-2, make this same as in atomic_rho. + // still lack something...... rhoneg /= this->rhopw->nxyz * ucell.omega; rhoima /= this->rhopw->nxyz * ucell.omega; // calculate core_only exch-corr energy etxcc=E_xc[rho_core] if required // The term was present in previous versions of the code but it shouldn't - delete [] rhocg; - delete [] vg; - ModuleBase::timer::end("Charge","set_rho_core"); + delete[] rhocg; + delete[] vg; + ModuleBase::timer::end ("Charge", "set_rho_core"); return; } // end subroutine set_rhoc - -void Charge::non_linear_core_correction -( - const bool &numeric, - const double omega, - const double tpiba2, - const int mesh, - const double *r, - const double *rab, - const double *rhoc, - double *rhocg) const +void + Charge::non_linear_core_correction (const bool& numeric, + const double omega, + const double tpiba2, + const int mesh, + const double* r, + const double* rab, + const double* rhoc, + double* rhocg) const { - ModuleBase::TITLE("charge","drhoc"); + ModuleBase::TITLE ("charge", "drhoc"); - // use labmda instead of repeating codes - const auto kernel = [&](int num_threads, int thread_id) - { + // use labmda instead of repeating codes + const auto kernel = [&] (int num_threads, int thread_id) + { + double gx = 0.0; + double rhocg1 = 0.0; + double* aux = nullptr; - double gx = 0.0; - double rhocg1 = 0.0; - double *aux = nullptr; + // here we compute the fourier transform is the charge in numeric form + if (numeric) + { + aux = new double[mesh]; + // G=0 term - // here we compute the fourier transform is the charge in numeric form - if (numeric) - { - aux = new double [mesh]; - // G=0 term + int igl0 = 0; + if (this->rhopw->gg_uniq[0] < 1.0e-8) + { + // single thread term + if (thread_id == 0) + { + for (int ir = 0; ir < mesh; ir++) + { + aux[ir] = r[ir] * r[ir] * rhoc[ir]; + } + ModuleBase::Integral::Simpson_Integral (mesh, aux, rab, rhocg1); + // rhocg [1] = fpi * rhocg1 / omega; + rhocg[0] = ModuleBase::FOUR_PI * rhocg1 / omega; // mohan modify 2008-01-19 + } + igl0 = 1; + } - int igl0 = 0; - if (this->rhopw->gg_uniq [0] < 1.0e-8) - { - // single thread term - if (thread_id == 0) - { - for (int ir = 0;ir < mesh; ir++) - { - aux [ir] = r [ir] * r [ir] * rhoc [ir]; - } - ModuleBase::Integral::Simpson_Integral(mesh, aux, rab, rhocg1); - //rhocg [1] = fpi * rhocg1 / omega; - rhocg [0] = ModuleBase::FOUR_PI * rhocg1 / omega;//mohan modify 2008-01-19 - } - igl0 = 1; - } + int igl_beg, igl_end; + // exclude igl0 + ModuleBase::TASK_DIST_1D (num_threads, thread_id, this->rhopw->ngg - igl0, igl_beg, igl_end); + igl_beg += igl0; + igl_end += igl_beg; - int igl_beg, igl_end; - // exclude igl0 - ModuleBase::TASK_DIST_1D(num_threads, thread_id, this->rhopw->ngg - igl0, igl_beg, igl_end); - igl_beg += igl0; - igl_end += igl_beg; + // G <> 0 term + for (int igl = igl_beg; igl < igl_end; igl++) + { + gx = sqrt (this->rhopw->gg_uniq[igl] * tpiba2); + ModuleBase::Sphbes::Spherical_Bessel (mesh, r, gx, 0, aux); + for (int ir = 0; ir < mesh; ir++) + { + aux[ir] = r[ir] * r[ir] * rhoc[ir] * aux[ir]; + } // enddo + ModuleBase::Integral::Simpson_Integral (mesh, aux, rab, rhocg1); + rhocg[igl] = ModuleBase::FOUR_PI * rhocg1 / omega; + } // enddo + delete[] aux; + } + else + { + // here the case where the charge is in analytic form, + // check old version before 2008-12-9 + } + }; // end kernel - // G <> 0 term - for (int igl = igl_beg; igl < igl_end;igl++) - { - gx = sqrt(this->rhopw->gg_uniq[igl] * tpiba2); - ModuleBase::Sphbes::Spherical_Bessel(mesh, r, gx, 0, aux); - for (int ir = 0;ir < mesh; ir++) - { - aux [ir] = r[ir] * r[ir] * rhoc [ir] * aux [ir]; - } // enddo - ModuleBase::Integral::Simpson_Integral(mesh, aux, rab, rhocg1); - rhocg [igl] = ModuleBase::FOUR_PI * rhocg1 / omega; - } // enddo - delete [] aux; - } - else - { - // here the case where the charge is in analytic form, - // check old version before 2008-12-9 - } - - }; // end kernel - - // do not use omp parallel when this function is already in parallel block - // - // it is called in parallel block in Forces::cal_force_cc, - // but not in other funtcion such as Stress_Func::stress_cc. - ModuleBase::TRY_OMP_PARALLEL(kernel); + // do not use omp parallel when this function is already in parallel block + // + // it is called in parallel block in Forces::cal_force_cc, + // but not in other funtcion such as Stress_Func::stress_cc. + ModuleBase::TRY_OMP_PARALLEL (kernel); return; } diff --git a/source/source_estate/module_charge/charge_mixing.cpp b/source/source_estate/module_charge/charge_mixing.cpp index 921d102502c..eed096ab539 100644 --- a/source/source_estate/module_charge/charge_mixing.cpp +++ b/source/source_estate/module_charge/charge_mixing.cpp @@ -6,28 +6,29 @@ #include "source_base/timer.h" #include "source_hamilt/module_xc/xc_functional.h" -Charge_Mixing::Charge_Mixing() +Charge_Mixing::Charge_Mixing () { this->mixing = nullptr; this->mixing_highf = nullptr; } -Charge_Mixing::~Charge_Mixing() +Charge_Mixing::~Charge_Mixing () { - if(this->mixing != nullptr) - { - delete this->mixing; - this->mixing = nullptr; - } + if (this->mixing != nullptr) + { + delete this->mixing; + this->mixing = nullptr; + } - if(this->mixing_highf != nullptr) - { - delete this->mixing_highf; - this->mixing_highf = nullptr; - } + if (this->mixing_highf != nullptr) + { + delete this->mixing_highf; + this->mixing_highf = nullptr; + } } -void Charge_Mixing::set_mixing(const std::string& mixing_mode_in, +void + Charge_Mixing::set_mixing (const std::string& mixing_mode_in, const double& mixing_beta_in, const int& mixing_ndim_in, const double& mixing_gg0_in, @@ -55,179 +56,194 @@ void Charge_Mixing::set_mixing(const std::string& mixing_mode_in, this->tpiba = &tpiba_in; // check the paramters if (this->mixing_beta > 1.0 || this->mixing_beta < 0.0) - { - ModuleBase::WARNING_QUIT("Charge_Mixing", "You'd better set mixing_beta to [0.0, 1.0]!"); - } + { + ModuleBase::WARNING_QUIT ("Charge_Mixing", "You'd better set mixing_beta to [0.0, 1.0]!"); + } if (PARAM.inp.nspin >= 2 && this->mixing_beta_mag < 0.0) - { - ModuleBase::WARNING_QUIT("Charge_Mixing", "You'd better set mixing_beta_mag >= 0.0!"); - } + { + ModuleBase::WARNING_QUIT ("Charge_Mixing", "You'd better set mixing_beta_mag >= 0.0!"); + } if (!(this->mixing_mode == "plain" || this->mixing_mode == "broyden" || this->mixing_mode == "pulay")) - { - ModuleBase::WARNING_QUIT("Charge_Mixing", "This Mixing mode is not implemended yet,coming soon."); - } + { + ModuleBase::WARNING_QUIT ("Charge_Mixing", "This Mixing mode is not implemended yet,coming soon."); + } // print into running.log - //GlobalV::ofs_running << "\n\n"; + // GlobalV::ofs_running << "\n\n"; GlobalV::ofs_running << "\n"; GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - ">>>>" << std::endl; + ">>>>" + << std::endl; GlobalV::ofs_running << " | " - " |" << std::endl; + " |" + << std::endl; GlobalV::ofs_running << " | Setup charge mixing parameters " - " |" << std::endl; + " |" + << std::endl; GlobalV::ofs_running << " | " - " |" << std::endl; + " |" + << std::endl; GlobalV::ofs_running << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" - "<<<<" << std::endl; + "<<<<" + << std::endl; GlobalV::ofs_running << "\n"; + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mixing_type", this->mixing_mode); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mixing_beta", this->mixing_beta); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mixing_gg0", this->mixing_gg0); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mixing_gg0_min", PARAM.inp.mixing_gg0_min); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "mixing_type", this->mixing_mode); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "mixing_beta", this->mixing_beta); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "mixing_gg0", this->mixing_gg0); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "mixing_gg0_min", PARAM.inp.mixing_gg0_min); - - if (PARAM.inp.nspin==2 || PARAM.inp.nspin==4) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "mixing_beta_mag", this->mixing_beta_mag); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "mixing_gg0_mag", PARAM.inp.mixing_gg0_mag); - } + if (PARAM.inp.nspin == 2 || PARAM.inp.nspin == 4) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mixing_beta_mag", this->mixing_beta_mag); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mixing_gg0_mag", PARAM.inp.mixing_gg0_mag); + } if (PARAM.inp.mixing_angle > 0) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "mixing_angle", PARAM.inp.mixing_angle); - } + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mixing_angle", PARAM.inp.mixing_angle); + } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "mixing_ndim", this->mixing_ndim); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mixing_ndim", this->mixing_ndim); return; } -void Charge_Mixing::init_mixing() +void + Charge_Mixing::init_mixing () { // this init should be called at the 1-st iteration of each scf loop - ModuleBase::TITLE("Charge_Mixing", "init_mixing"); - ModuleBase::timer::start("Charge_Mixing", "init_mixing"); + ModuleBase::TITLE ("Charge_Mixing", "init_mixing"); + ModuleBase::timer::start ("Charge_Mixing", "init_mixing"); // (re)construct mixing object if (this->mixing_mode == "broyden") - { - delete this->mixing; - this->mixing = new Base_Mixing::Broyden_Mixing(this->mixing_ndim, this->mixing_beta); - } + { + delete this->mixing; + this->mixing = new Base_Mixing::Broyden_Mixing (this->mixing_ndim, this->mixing_beta); + } else if (this->mixing_mode == "plain") - { - delete this->mixing; - this->mixing = new Base_Mixing::Plain_Mixing(this->mixing_beta); - } + { + delete this->mixing; + this->mixing = new Base_Mixing::Plain_Mixing (this->mixing_beta); + } else if (this->mixing_mode == "pulay") - { - delete this->mixing; - this->mixing = new Base_Mixing::Pulay_Mixing(this->mixing_ndim, this->mixing_beta); - } + { + delete this->mixing; + this->mixing = new Base_Mixing::Pulay_Mixing (this->mixing_ndim, this->mixing_beta); + } else - { - ModuleBase::WARNING_QUIT("Charge_Mixing", "This Mixing mode is not implemended yet,coming soon."); - } + { + ModuleBase::WARNING_QUIT ("Charge_Mixing", "This Mixing mode is not implemended yet,coming soon."); + } - if ( PARAM.globalv.double_grid) - { - // ONLY smooth part of charge density is mixed by specific mixing method - // The high_frequency part is mixed by plain mixing method. - delete this->mixing_highf; - this->mixing_highf = new Base_Mixing::Plain_Mixing(this->mixing_beta); - } + if (PARAM.globalv.double_grid) + { + // ONLY smooth part of charge density is mixed by specific mixing method + // The high_frequency part is mixed by plain mixing method. + delete this->mixing_highf; + this->mixing_highf = new Base_Mixing::Plain_Mixing (this->mixing_beta); + } // allocate memory for mixing data, if exists, free it first and then allocate new memory // initailize rho_mdata if (PARAM.inp.scf_thr_type == 1) - { - if (PARAM.inp.nspin == 4 && PARAM.inp.mixing_angle > 0 ) - { - this->mixing->init_mixing_data(this->rho_mdata, - this->rhopw->npw * 2, - sizeof(std::complex)); - } - else { - this->mixing->init_mixing_data(this->rho_mdata, - this->rhopw->npw * PARAM.inp.nspin, - sizeof(std::complex)); + if (PARAM.inp.nspin == 4 && PARAM.inp.mixing_angle > 0) + { + this->mixing->init_mixing_data (this->rho_mdata, + this->rhopw->npw * 2, + sizeof (std::complex)); + } + else + { + this->mixing->init_mixing_data (this->rho_mdata, + this->rhopw->npw * PARAM.inp.nspin, + sizeof (std::complex)); + } } - } else - { - if (PARAM.inp.nspin == 4 && PARAM.inp.mixing_angle > 0 ) - { - this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * 2, sizeof(double)); - } - else { - this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * PARAM.inp.nspin, sizeof(double)); + if (PARAM.inp.nspin == 4 && PARAM.inp.mixing_angle > 0) + { + this->mixing->init_mixing_data (this->rho_mdata, this->rhopw->nrxx * 2, sizeof (double)); + } + else + { + this->mixing->init_mixing_data (this->rho_mdata, + this->rhopw->nrxx * PARAM.inp.nspin, + sizeof (double)); + } } - } - + // initailize tau_mdata - if ((XC_Functional::get_ked_flag()) && mixing_tau) - { - if (PARAM.inp.scf_thr_type == 1) - { - this->mixing->init_mixing_data(this->tau_mdata, - this->rhopw->npw * PARAM.inp.nspin, - sizeof(std::complex)); - } - else + if ((XC_Functional::get_ked_flag ()) && mixing_tau) { - this->mixing->init_mixing_data(this->tau_mdata, this->rhopw->nrxx * PARAM.inp.nspin, sizeof(double)); + if (PARAM.inp.scf_thr_type == 1) + { + this->mixing->init_mixing_data (this->tau_mdata, + this->rhopw->npw * PARAM.inp.nspin, + sizeof (std::complex)); + } + else + { + this->mixing->init_mixing_data (this->tau_mdata, + this->rhopw->nrxx * PARAM.inp.nspin, + sizeof (double)); + } } - } - ModuleBase::timer::end("Charge_Mixing", "init_mixing"); + ModuleBase::timer::end ("Charge_Mixing", "init_mixing"); return; } -void Charge_Mixing::set_rhopw(ModulePW::PW_Basis* rhopw_in, ModulePW::PW_Basis* rhodpw_in) +void + Charge_Mixing::set_rhopw (ModulePW::PW_Basis* rhopw_in, ModulePW::PW_Basis* rhodpw_in) { this->rhopw = rhopw_in; this->rhodpw = rhodpw_in; } -void Charge_Mixing::mix_reset() +void + Charge_Mixing::mix_reset () { - this->mixing->reset(); - this->rho_mdata.reset(); + this->mixing->reset (); + this->rho_mdata.reset (); // initailize tau_mdata - if ((XC_Functional::get_ked_flag()) && mixing_tau) - { - this->tau_mdata.reset(); - } + if ((XC_Functional::get_ked_flag ()) && mixing_tau) + { + this->tau_mdata.reset (); + } } -bool Charge_Mixing::if_scf_oscillate(const int iteration, const double drho, const int iternum_used, const double threshold) +bool + Charge_Mixing::if_scf_oscillate (const int iteration, + const double drho, + const int iternum_used, + const double threshold) { - ModuleBase::TITLE("Charge_Mixing", "if_scf_oscillate"); + ModuleBase::TITLE ("Charge_Mixing", "if_scf_oscillate"); - if(this->_drho_history.size() == 0) - { - this->_drho_history.resize(PARAM.inp.scf_nmax); - } + if (this->_drho_history.size () == 0) + { + this->_drho_history.resize (PARAM.inp.scf_nmax); + } // add drho into history this->_drho_history[iteration - 1] = drho; - if(threshold >= 0) // close the function - { - return false; - } + if (threshold >= 0) // close the function + { + return false; + } // check if the history is long enough - if(iteration < iternum_used + this->mixing_restart_last) - { - return false; - } + if (iteration < iternum_used + this->mixing_restart_last) + { + return false; + } // calculate the slope of the last iternum_used iterations' drho double slope = 0.0; @@ -236,24 +252,25 @@ bool Charge_Mixing::if_scf_oscillate(const int iteration, const double drho, con // this part is too short, so I do not design it as a free function in principle double sumX = 0, sumY = 0, sumXY = 0, sumXX = 0; for (int i = iteration - iternum_used; i < iteration; i++) - { - sumX += i; - sumY += std::log10(this->_drho_history[i]); - sumXY += i * std::log10(this->_drho_history[i]); - sumXX += i * i; - } + { + sumX += i; + sumY += std::log10 (this->_drho_history[i]); + sumXY += i * std::log10 (this->_drho_history[i]); + sumXX += i * i; + } double numerator = iternum_used * sumXY - sumX * sumY; double denominator = iternum_used * sumXX - sumX * sumX; - if (denominator == 0) { - return false; - } - slope = numerator / denominator; + if (denominator == 0) + { + return false; + } + slope = numerator / denominator; // if the slope is less than the threshold, return true - if(slope > threshold) - { - return true; - } + if (slope > threshold) + { + return true; + } return false; } diff --git a/source/source_estate/module_charge/charge_mixing.h b/source/source_estate/module_charge/charge_mixing.h index 3152dc5e204..a7931eb43b1 100644 --- a/source/source_estate/module_charge/charge_mixing.h +++ b/source/source_estate/module_charge/charge_mixing.h @@ -7,19 +7,19 @@ class Charge_Mixing { - /// Charge_Mixing class - /// This class is used to mix charge density, kinetic energy density and real-space density matrix - /// This Charge_Mixing class offers the following interfaces: - /// 1. set_mixing() to set all private mixing parameters - /// 2. init_mixing() to initialize mixing, including allocating memory for mixing data and reset mixing - /// 3. mix_rho() to mix charge density - /// 4. mix_dmr() to mix real-space density matrix - /// how to use it: - /// you can (re)start a mixing by calling set_mixing() and init_mixing() before calling mix_rho() or mix_dmr() + /// Charge_Mixing class + /// This class is used to mix charge density, kinetic energy density and real-space density matrix + /// This Charge_Mixing class offers the following interfaces: + /// 1. set_mixing() to set all private mixing parameters + /// 2. init_mixing() to initialize mixing, including allocating memory for mixing data and reset mixing + /// 3. mix_rho() to mix charge density + /// 4. mix_dmr() to mix real-space density matrix + /// how to use it: + /// you can (re)start a mixing by calling set_mixing() and init_mixing() before calling mix_rho() or mix_dmr() public: - Charge_Mixing(); - ~Charge_Mixing(); + Charge_Mixing (); + ~Charge_Mixing (); /** * @brief Set all private mixing paramters @@ -36,88 +36,115 @@ class Charge_Mixing * @param omega_in omega for non-linear core correction * @param tpiba_in 2*pi/beta for non-linear core correction */ - void set_mixing(const std::string& mixing_mode_in, - const double& mixing_beta_in, - const int& mixing_ndim_in, - const double& mixing_gg0_in, - const bool& mixing_tau_in, - const double& mixing_beta_mag_in, - const double& mixing_gg0_mag_in, - const double& mixing_gg0_min_in, - const double& mixing_angle_in, - const bool& mixing_dmr_in, - double& omega_in, - double& tpiba_in); - - void close_kerker_gg0() { mixing_gg0 = 0.0; mixing_gg0_mag = 0.0; } + void set_mixing (const std::string& mixing_mode_in, + const double& mixing_beta_in, + const int& mixing_ndim_in, + const double& mixing_gg0_in, + const bool& mixing_tau_in, + const double& mixing_beta_mag_in, + const double& mixing_gg0_mag_in, + const double& mixing_gg0_min_in, + const double& mixing_angle_in, + const bool& mixing_dmr_in, + double& omega_in, + double& tpiba_in); + + void + close_kerker_gg0 () + { + mixing_gg0 = 0.0; + mixing_gg0_mag = 0.0; + } /** * @brief initialize mixing, including constructing mixing and allocating memory for mixing data * @brief this function should be called at eachiterinit() */ - void init_mixing(); + void init_mixing (); /** * @brief allocate memory of dmr_mdata * @param nnr size of real-space density matrix */ - void allocate_mixing_dmr(const int nnr); + void allocate_mixing_dmr (const int nnr); /** * @brief charge mixing * @param chr pointer of Charge object */ - void mix_rho(Charge* chr); + void mix_rho (Charge* chr); /** * @brief density matrix mixing, only for LCAO * @param DM pointer of DensityMatrix object */ - void mix_dmr(elecstate::DensityMatrix* DM); - void mix_dmr(elecstate::DensityMatrix, double>* DM); - + void mix_dmr (elecstate::DensityMatrix* DM); + void mix_dmr (elecstate::DensityMatrix, double>* DM); + /** * @brief Get the drho between rho and rho_save, similar for get_dkin * */ - double get_drho(Charge* chr, const double nelec); - double get_dkin(Charge* chr, const double nelec); + double get_drho (Charge* chr, const double nelec); + double get_dkin (Charge* chr, const double nelec); /** - * @brief reset mixing, actually we only call init_mixing() to reset mixing instead of this function + * @brief reset mixing, actually we only call init_mixing() to reset mixing instead of this function */ - void mix_reset(); - + void mix_reset (); + /** * @brief Set the smooth and dense grids * @param rhopw_in smooth grid * @param rhodpw_in dense grid when double grid is used, otherwise same as rhopw */ - void set_rhopw(ModulePW::PW_Basis* rhopw_in, ModulePW::PW_Basis* rhodpw_in); + void set_rhopw (ModulePW::PW_Basis* rhopw_in, ModulePW::PW_Basis* rhodpw_in); // extracting parameters normally these parameters will not be used outside charge mixing // while Exx is using them as well as some other places - const std::string& get_mixing_mode() const {return mixing_mode;} - double get_mixing_beta() const {return mixing_beta;} - int get_mixing_ndim() const {return mixing_ndim;} - double get_mixing_gg0() const {return mixing_gg0;} - Base_Mixing::Mixing* get_mixing() const {return mixing;} + const std::string& + get_mixing_mode () const + { + return mixing_mode; + } + double + get_mixing_beta () const + { + return mixing_beta; + } + int + get_mixing_ndim () const + { + return mixing_ndim; + } + double + get_mixing_gg0 () const + { + return mixing_gg0; + } + Base_Mixing::Mixing* + get_mixing () const + { + return mixing; + } // for mixing restart - int mixing_restart_step = 0; //which step to restart mixing during SCF, always equal to scf_namx except for the mixing restart - int mixing_restart_count = 0; // the number of restart mixing during SCF. Do not set mixing_restart_count as bool since I want to keep some flexibility in the future - int mixing_restart_last = 0; // the label of mixing restart step, store the step number of the last mixing restart + int mixing_restart_step + = 0; // which step to restart mixing during SCF, always equal to scf_namx except for the mixing restart + int mixing_restart_count = 0; // the number of restart mixing during SCF. Do not set mixing_restart_count as bool + // since I want to keep some flexibility in the future + int mixing_restart_last = 0; // the label of mixing restart step, store the step number of the last mixing restart // to calculate the slope of drho curve during SCF, which is used to determine if SCF oscillate - bool if_scf_oscillate(const int iteration, const double drho, const int iternum_used, const double threshold); - + bool if_scf_oscillate (const int iteration, const double drho, const int iternum_used, const double threshold); + private: - // mixing_data - Base_Mixing::Mixing* mixing = nullptr; ///< Mixing object to mix charge density, kinetic energy density and compensation density - Base_Mixing::Mixing_Data rho_mdata; ///< Mixing data for charge density - Base_Mixing::Mixing_Data tau_mdata; ///< Mixing data for kinetic energy density - Base_Mixing::Mixing_Data nhat_mdata; ///< Mixing data for compensation density - Base_Mixing::Mixing_Data dmr_mdata; ///< Mixing data for real space density matrix + Base_Mixing::Mixing* mixing + = nullptr; ///< Mixing object to mix charge density, kinetic energy density and compensation density + Base_Mixing::Mixing_Data rho_mdata; ///< Mixing data for charge density + Base_Mixing::Mixing_Data tau_mdata; ///< Mixing data for kinetic energy density + Base_Mixing::Mixing_Data nhat_mdata; ///< Mixing data for compensation density + Base_Mixing::Mixing_Data dmr_mdata; ///< Mixing data for real space density matrix Base_Mixing::Plain_Mixing* mixing_highf = nullptr; ///< The high_frequency part is mixed by plain mixing method. //====================================== @@ -133,11 +160,11 @@ class Charge_Mixing double mixing_gg0_min = 0.1; ///< minimum kerker coefficient double mixing_angle = 0.0; ///< mixing angle for nspin=4 bool mixing_dmr = false; ///< whether to mixing real space density matrix - double* omega = nullptr; ///< omega for non-linear core correction - double* tpiba = nullptr; ///< 2*pi/beta for non-linear core correction - double* tpiba2 = nullptr; ///< 2*pi/beta^2 for non-linear core correction - std::vector _drho_history; ///< history of drho used to determine the oscillation, size is scf_nmax - + double* omega = nullptr; ///< omega for non-linear core correction + double* tpiba = nullptr; ///< 2*pi/beta for non-linear core correction + double* tpiba2 = nullptr; ///< 2*pi/beta^2 for non-linear core correction + std::vector _drho_history; ///< history of drho used to determine the oscillation, size is scf_nmax + bool new_e_iteration = true; ModulePW::PW_Basis* rhopw = nullptr; ///< smooth grid @@ -147,25 +174,25 @@ class Charge_Mixing * @brief charge mixing for reciprocal space * @param chr pointer of Charge object */ - void mix_rho_recip(Charge* chr); + void mix_rho_recip (Charge* chr); /** * @brief charge mixing for real space * @param chr pointer of Charge object */ - void mix_rho_real(Charge* chr); + void mix_rho_real (Charge* chr); /** * @brief Kerker screen method for reciprocal space * @param rhog charge density in reciprocal space */ - void Kerker_screen_recip(std::complex* rhog); + void Kerker_screen_recip (std::complex* rhog); /** * @brief Kerker screen method for real space * @param rho charge density in real space */ - void Kerker_screen_real(double* rho); + void Kerker_screen_real (double* rho); /** * @brief Inner product of two complex vectors @@ -174,15 +201,15 @@ class Charge_Mixing * @brief inner_product_recip_simple is only used for test * @brief Actually, I am not sure if the definition of inner product for NSPIN=4 is correct, need to be checked. */ - double inner_product_recip_rho(std::complex* rho1, std::complex* rho2); - double inner_product_recip_simple(std::complex* rho1, std::complex* rho2); - double inner_product_recip_hartree(std::complex* rho1, std::complex* rho2); + double inner_product_recip_rho (std::complex* rho1, std::complex* rho2); + double inner_product_recip_simple (std::complex* rho1, std::complex* rho2); + double inner_product_recip_hartree (std::complex* rho1, std::complex* rho2); /** * @brief Inner product of two double vectors * */ - double inner_product_real(double* rho1, double* rho2); + double inner_product_real (double* rho1, double* rho2); /** * @brief divide rho/tau to smooth and high frequency parts @@ -191,15 +218,15 @@ class Charge_Mixing * @param data_hf high frequency data = dense data - smooth data * */ - void divide_data(std::complex* data_d, std::complex*& data_s, std::complex*& data_hf); + void divide_data (std::complex* data_d, std::complex*& data_s, std::complex*& data_hf); /** * @brief gather smooth and high frequency parts to rho/tau * @param data_d dense data * @param data_s smooth data * @param data_hf high frequency data = dense data - smooth data - * + * */ - void combine_data(std::complex* data_d, std::complex*& data_s, std::complex*& data_hf); + void combine_data (std::complex* data_d, std::complex*& data_s, std::complex*& data_hf); /** * @brief clean smooth and high frequency parts * @param data_d dense data @@ -207,7 +234,7 @@ class Charge_Mixing * @param data_hf high frequency data = dense data - smooth data * */ - void clean_data(std::complex*& data_s, std::complex*& data_hf); + void clean_data (std::complex*& data_s, std::complex*& data_hf); }; #endif diff --git a/source/source_estate/module_charge/charge_mixing_dmr.cpp b/source/source_estate/module_charge/charge_mixing_dmr.cpp index 368ed63f257..8fddbb4d229 100644 --- a/source/source_estate/module_charge/charge_mixing_dmr.cpp +++ b/source/source_estate/module_charge/charge_mixing_dmr.cpp @@ -3,225 +3,230 @@ #include "source_io/module_parameter/parameter.h" #include "source_base/timer.h" -void Charge_Mixing::allocate_mixing_dmr(const int nnr) +void + Charge_Mixing::allocate_mixing_dmr (const int nnr) { // Note that: we cannot allocate memory for dmr_mdata in set_mixing. - // since the size of dmr_mdata is given by the size of HContainer.nnr, which is calculated in DensityMatrix::init_DMR(). - // and DensityMatrix::init_DMR() is called in beforescf(). While set_mixing() is called in ESolver_KS::Init(). - ModuleBase::TITLE("Charge_Mixing", "allocate_mixing_dmr"); - ModuleBase::timer::start("Charge_Mixing", "allocate_mixing_dmr"); + // since the size of dmr_mdata is given by the size of HContainer.nnr, which is calculated in + // DensityMatrix::init_DMR(). and DensityMatrix::init_DMR() is called in beforescf(). While set_mixing() is called + // in ESolver_KS::Init(). + ModuleBase::TITLE ("Charge_Mixing", "allocate_mixing_dmr"); + ModuleBase::timer::start ("Charge_Mixing", "allocate_mixing_dmr"); // const int dmr_nspin = (PARAM.inp.nspin == 2) ? 2 : 1; // allocate memory for dmr_mdata if (PARAM.inp.scf_thr_type == 1) - { - ModuleBase::WARNING_QUIT("Charge_Mixing", "This Mixing of Density Matrix is not supported for PW basis yet"); - } + { + ModuleBase::WARNING_QUIT ("Charge_Mixing", + "This Mixing of Density Matrix is not supported for PW basis yet"); + } else if (PARAM.inp.scf_thr_type == 2) - { - this->mixing->init_mixing_data(this->dmr_mdata, nnr * dmr_nspin, sizeof(double)); - } + { + this->mixing->init_mixing_data (this->dmr_mdata, nnr * dmr_nspin, sizeof (double)); + } - this->dmr_mdata.reset(); - ModuleBase::timer::end("Charge_Mixing", "allocate_mixing_dmr"); + this->dmr_mdata.reset (); + ModuleBase::timer::end ("Charge_Mixing", "allocate_mixing_dmr"); return; } -void Charge_Mixing::mix_dmr(elecstate::DensityMatrix* DM) +void + Charge_Mixing::mix_dmr (elecstate::DensityMatrix* DM) { // Notice that DensityMatrix object is a Template class - ModuleBase::TITLE("Charge_Mixing", "mix_dmr"); - ModuleBase::timer::start("Charge_Mixing", "mix_dmr"); + ModuleBase::TITLE ("Charge_Mixing", "mix_dmr"); + ModuleBase::timer::start ("Charge_Mixing", "mix_dmr"); // - std::vector*> dmr = DM->get_DMR_vector(); - std::vector>& dmr_save = DM->get_DMR_save(); + std::vector*> dmr = DM->get_DMR_vector (); + std::vector>& dmr_save = DM->get_DMR_save (); // - //const int dmr_nspin = (PARAM.inp.nspin == 2) ? 2 : 1; + // const int dmr_nspin = (PARAM.inp.nspin == 2) ? 2 : 1; double* dmr_in = nullptr; double* dmr_out = nullptr; if (PARAM.inp.nspin == 1 || PARAM.inp.nspin == 4) - { - dmr_in = dmr_save[0].data(); - dmr_out = dmr[0]->get_wrapper(); - this->mixing->push_data(this->dmr_mdata, dmr_in, dmr_out, nullptr, false); - this->mixing->mix_data(this->dmr_mdata, dmr_out); - } - else if (PARAM.inp.nspin == 2) - { - // magnetic density matrix - double* dmr_mag = nullptr; - double* dmr_mag_save = nullptr; - const int nnr = dmr[0]->get_nnr(); - // allocate dmr_mag[is*nnrx] and dmr_mag_save[is*nnrx] - dmr_mag = new double[nnr * PARAM.inp.nspin]; - dmr_mag_save = new double[nnr * PARAM.inp.nspin]; - ModuleBase::GlobalFunc::ZEROS(dmr_mag, nnr * PARAM.inp.nspin); - ModuleBase::GlobalFunc::ZEROS(dmr_mag_save, nnr * PARAM.inp.nspin); - double* dmr_up = nullptr; - double* dmr_down = nullptr; - // tranfer dmr into dmr_mag - dmr_up = dmr[0]->get_wrapper(); - dmr_down = dmr[1]->get_wrapper(); - for (int ir = 0; ir < nnr; ir++) { - dmr_mag[ir] = dmr_up[ir] + dmr_down[ir]; - dmr_mag[ir + nnr] = dmr_up[ir] - dmr_down[ir]; + dmr_in = dmr_save[0].data (); + dmr_out = dmr[0]->get_wrapper (); + this->mixing->push_data (this->dmr_mdata, dmr_in, dmr_out, nullptr, false); + this->mixing->mix_data (this->dmr_mdata, dmr_out); } - // tranfer dmr_save into dmr_mag_save - dmr_up = dmr_save[0].data(); - dmr_down = dmr_save[1].data(); - for (int ir = 0; ir < nnr; ir++) + else if (PARAM.inp.nspin == 2) { - dmr_mag_save[ir] = dmr_up[ir] + dmr_down[ir]; - dmr_mag_save[ir + nnr] = dmr_up[ir] - dmr_down[ir]; - } - // - dmr_in = dmr_mag_save; - dmr_out = dmr_mag; - // no kerker in mixing_dmr - //auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); - auto twobeta_mix - = [this, nnr](double* out, const double* in, const double* sres) { + // magnetic density matrix + double* dmr_mag = nullptr; + double* dmr_mag_save = nullptr; + const int nnr = dmr[0]->get_nnr (); + // allocate dmr_mag[is*nnrx] and dmr_mag_save[is*nnrx] + dmr_mag = new double[nnr * PARAM.inp.nspin]; + dmr_mag_save = new double[nnr * PARAM.inp.nspin]; + ModuleBase::GlobalFunc::ZEROS (dmr_mag, nnr * PARAM.inp.nspin); + ModuleBase::GlobalFunc::ZEROS (dmr_mag_save, nnr * PARAM.inp.nspin); + double* dmr_up = nullptr; + double* dmr_down = nullptr; + // tranfer dmr into dmr_mag + dmr_up = dmr[0]->get_wrapper (); + dmr_down = dmr[1]->get_wrapper (); + for (int ir = 0; ir < nnr; ir++) + { + dmr_mag[ir] = dmr_up[ir] + dmr_down[ir]; + dmr_mag[ir + nnr] = dmr_up[ir] - dmr_down[ir]; + } + // tranfer dmr_save into dmr_mag_save + dmr_up = dmr_save[0].data (); + dmr_down = dmr_save[1].data (); + for (int ir = 0; ir < nnr; ir++) + { + dmr_mag_save[ir] = dmr_up[ir] + dmr_down[ir]; + dmr_mag_save[ir + nnr] = dmr_up[ir] - dmr_down[ir]; + } + // + dmr_in = dmr_mag_save; + dmr_out = dmr_mag; + // no kerker in mixing_dmr + // auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); + auto twobeta_mix = [this, nnr] (double* out, const double* in, const double* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < nnr; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - // magnetism + for (int i = 0; i < nnr; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + // magnetism #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = nnr; i < 2 * nnr; ++i) - { - out[i] = in[i] + this->mixing_beta_mag * sres[i]; - } - }; - this->mixing->push_data(this->dmr_mdata, dmr_in, dmr_out, nullptr, twobeta_mix, false); - //auto inner_product - // = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); - //this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->dmr_mdata, dmr_out); - // get new dmr from dmr_mag - dmr_up = dmr[0]->get_wrapper(); - dmr_down = dmr[1]->get_wrapper(); - for (int is = 0; is < PARAM.inp.nspin; is++) - { - ModuleBase::GlobalFunc::ZEROS(dmr_up, nnr); - ModuleBase::GlobalFunc::ZEROS(dmr_down, nnr); + for (int i = nnr; i < 2 * nnr; ++i) + { + out[i] = in[i] + this->mixing_beta_mag * sres[i]; + } + }; + this->mixing->push_data (this->dmr_mdata, dmr_in, dmr_out, nullptr, twobeta_mix, false); + // auto inner_product + // = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); + // this->mixing->cal_coef(this->rho_mdata, inner_product); + this->mixing->mix_data (this->dmr_mdata, dmr_out); + // get new dmr from dmr_mag + dmr_up = dmr[0]->get_wrapper (); + dmr_down = dmr[1]->get_wrapper (); + for (int is = 0; is < PARAM.inp.nspin; is++) + { + ModuleBase::GlobalFunc::ZEROS (dmr_up, nnr); + ModuleBase::GlobalFunc::ZEROS (dmr_down, nnr); + } + for (int ir = 0; ir < nnr; ir++) + { + dmr_up[ir] = 0.5 * (dmr_mag[ir] + dmr_mag[ir + nnr]); + dmr_down[ir] = 0.5 * (dmr_mag[ir] - dmr_mag[ir + nnr]); + } + // delete + delete[] dmr_mag; + delete[] dmr_mag_save; } - for (int ir = 0; ir < nnr; ir++) - { - dmr_up[ir] = 0.5 * (dmr_mag[ir] + dmr_mag[ir+nnr]); - dmr_down[ir] = 0.5 * (dmr_mag[ir] - dmr_mag[ir+nnr]); - } - // delete - delete[] dmr_mag; - delete[] dmr_mag_save; - } - ModuleBase::timer::end("Charge_Mixing", "mix_dmr"); + ModuleBase::timer::end ("Charge_Mixing", "mix_dmr"); return; } -void Charge_Mixing::mix_dmr(elecstate::DensityMatrix, double>* DM) +void + Charge_Mixing::mix_dmr (elecstate::DensityMatrix, double>* DM) { // Notice that DensityMatrix object is a Template class - ModuleBase::TITLE("Charge_Mixing", "mix_dmr"); - ModuleBase::timer::start("Charge_Mixing", "mix_dmr"); + ModuleBase::TITLE ("Charge_Mixing", "mix_dmr"); + ModuleBase::timer::start ("Charge_Mixing", "mix_dmr"); // - std::vector*> dmr = DM->get_DMR_vector(); - std::vector>& dmr_save = DM->get_DMR_save(); + std::vector*> dmr = DM->get_DMR_vector (); + std::vector>& dmr_save = DM->get_DMR_save (); // - //const int dmr_nspin = (PARAM.inp.nspin == 2) ? 2 : 1; + // const int dmr_nspin = (PARAM.inp.nspin == 2) ? 2 : 1; double* dmr_in = nullptr; double* dmr_out = nullptr; if (PARAM.inp.nspin == 1 || PARAM.inp.nspin == 4) - { - dmr_in = dmr_save[0].data(); - dmr_out = dmr[0]->get_wrapper(); - this->mixing->push_data(this->dmr_mdata, dmr_in, dmr_out, nullptr, false); - this->mixing->mix_data(this->dmr_mdata, dmr_out); - } - else if (PARAM.inp.nspin == 2) - { - // magnetic density matrix - double* dmr_mag = nullptr; - double* dmr_mag_save = nullptr; - const int nnr = dmr[0]->get_nnr(); - // allocate dmr_mag[is*nnrx] and dmr_mag_save[is*nnrx] - dmr_mag = new double[nnr * PARAM.inp.nspin]; - dmr_mag_save = new double[nnr * PARAM.inp.nspin]; - ModuleBase::GlobalFunc::ZEROS(dmr_mag, nnr * PARAM.inp.nspin); - ModuleBase::GlobalFunc::ZEROS(dmr_mag_save, nnr * PARAM.inp.nspin); - double* dmr_up = nullptr; - double* dmr_down = nullptr; - // tranfer dmr into dmr_mag - dmr_up = dmr[0]->get_wrapper(); - dmr_down = dmr[1]->get_wrapper(); - for (int ir = 0; ir < nnr; ir++) { - dmr_mag[ir] = dmr_up[ir] + dmr_down[ir]; - dmr_mag[ir + nnr] = dmr_up[ir] - dmr_down[ir]; + dmr_in = dmr_save[0].data (); + dmr_out = dmr[0]->get_wrapper (); + this->mixing->push_data (this->dmr_mdata, dmr_in, dmr_out, nullptr, false); + this->mixing->mix_data (this->dmr_mdata, dmr_out); } - // tranfer dmr_save into dmr_mag_save - dmr_up = dmr_save[0].data(); - dmr_down = dmr_save[1].data(); - for (int ir = 0; ir < nnr; ir++) + else if (PARAM.inp.nspin == 2) { - dmr_mag_save[ir] = dmr_up[ir] + dmr_down[ir]; - dmr_mag_save[ir + nnr] = dmr_up[ir] - dmr_down[ir]; - } - // - dmr_in = dmr_mag_save; - dmr_out = dmr_mag; - // no kerker in mixing_dmr - //auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); - auto twobeta_mix - = [this, nnr](double* out, const double* in, const double* sres) { + // magnetic density matrix + double* dmr_mag = nullptr; + double* dmr_mag_save = nullptr; + const int nnr = dmr[0]->get_nnr (); + // allocate dmr_mag[is*nnrx] and dmr_mag_save[is*nnrx] + dmr_mag = new double[nnr * PARAM.inp.nspin]; + dmr_mag_save = new double[nnr * PARAM.inp.nspin]; + ModuleBase::GlobalFunc::ZEROS (dmr_mag, nnr * PARAM.inp.nspin); + ModuleBase::GlobalFunc::ZEROS (dmr_mag_save, nnr * PARAM.inp.nspin); + double* dmr_up = nullptr; + double* dmr_down = nullptr; + // tranfer dmr into dmr_mag + dmr_up = dmr[0]->get_wrapper (); + dmr_down = dmr[1]->get_wrapper (); + for (int ir = 0; ir < nnr; ir++) + { + dmr_mag[ir] = dmr_up[ir] + dmr_down[ir]; + dmr_mag[ir + nnr] = dmr_up[ir] - dmr_down[ir]; + } + // tranfer dmr_save into dmr_mag_save + dmr_up = dmr_save[0].data (); + dmr_down = dmr_save[1].data (); + for (int ir = 0; ir < nnr; ir++) + { + dmr_mag_save[ir] = dmr_up[ir] + dmr_down[ir]; + dmr_mag_save[ir + nnr] = dmr_up[ir] - dmr_down[ir]; + } + // + dmr_in = dmr_mag_save; + dmr_out = dmr_mag; + // no kerker in mixing_dmr + // auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); + auto twobeta_mix = [this, nnr] (double* out, const double* in, const double* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < nnr; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - // magnetism + for (int i = 0; i < nnr; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + // magnetism #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = nnr; i < 2 * nnr; ++i) - { - out[i] = in[i] + this->mixing_beta_mag * sres[i]; - } - }; - this->mixing->push_data(this->dmr_mdata, dmr_in, dmr_out, nullptr, twobeta_mix, false); - //auto inner_product - // = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); - //this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->dmr_mdata, dmr_out); - // get new dmr from dmr_mag - dmr_up = dmr[0]->get_wrapper(); - dmr_down = dmr[1]->get_wrapper(); - for (int is = 0; is < PARAM.inp.nspin; is++) - { - ModuleBase::GlobalFunc::ZEROS(dmr_up, nnr); - ModuleBase::GlobalFunc::ZEROS(dmr_down, nnr); - } - for (int ir = 0; ir < nnr; ir++) - { - dmr_up[ir] = 0.5 * (dmr_mag[ir] + dmr_mag[ir+nnr]); - dmr_down[ir] = 0.5 * (dmr_mag[ir] - dmr_mag[ir+nnr]); + for (int i = nnr; i < 2 * nnr; ++i) + { + out[i] = in[i] + this->mixing_beta_mag * sres[i]; + } + }; + this->mixing->push_data (this->dmr_mdata, dmr_in, dmr_out, nullptr, twobeta_mix, false); + // auto inner_product + // = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); + // this->mixing->cal_coef(this->rho_mdata, inner_product); + this->mixing->mix_data (this->dmr_mdata, dmr_out); + // get new dmr from dmr_mag + dmr_up = dmr[0]->get_wrapper (); + dmr_down = dmr[1]->get_wrapper (); + for (int is = 0; is < PARAM.inp.nspin; is++) + { + ModuleBase::GlobalFunc::ZEROS (dmr_up, nnr); + ModuleBase::GlobalFunc::ZEROS (dmr_down, nnr); + } + for (int ir = 0; ir < nnr; ir++) + { + dmr_up[ir] = 0.5 * (dmr_mag[ir] + dmr_mag[ir + nnr]); + dmr_down[ir] = 0.5 * (dmr_mag[ir] - dmr_mag[ir + nnr]); + } + // delete + delete[] dmr_mag; + delete[] dmr_mag_save; } - // delete - delete[] dmr_mag; - delete[] dmr_mag_save; - } - ModuleBase::timer::end("Charge_Mixing", "mix_dmr"); + ModuleBase::timer::end ("Charge_Mixing", "mix_dmr"); return; } \ No newline at end of file diff --git a/source/source_estate/module_charge/charge_mixing_preconditioner.cpp b/source/source_estate/module_charge/charge_mixing_preconditioner.cpp index a0a32fef97c..4ddbc879582 100644 --- a/source/source_estate/module_charge/charge_mixing_preconditioner.cpp +++ b/source/source_estate/module_charge/charge_mixing_preconditioner.cpp @@ -3,16 +3,17 @@ #include "source_io/module_parameter/parameter.h" #include "source_base/timer.h" -void Charge_Mixing::Kerker_screen_recip(std::complex* drhog) +void + Charge_Mixing::Kerker_screen_recip (std::complex* drhog) { - ModuleBase::TITLE("Charge_Mixing", "Kerker_screen_recip"); + ModuleBase::TITLE ("Charge_Mixing", "Kerker_screen_recip"); - if (this->mixing_gg0 <= 0.0 || this->mixing_beta <= 0.1) - { - return; - } + if (this->mixing_gg0 <= 0.0 || this->mixing_beta <= 0.1) + { + return; + } - ModuleBase::timer::start("Charge_Mixing", "Kerker_screen_recip"); + ModuleBase::timer::start ("Charge_Mixing", "Kerker_screen_recip"); const int nspin = PARAM.inp.nspin; @@ -21,156 +22,160 @@ void Charge_Mixing::Kerker_screen_recip(std::complex* drhog) double amin = 0.0; /// consider a resize for mixing_angle - int resize_tmp = 1; - if (nspin == 4 && this->mixing_angle > 0) - { - resize_tmp = 2; - } + int resize_tmp = 1; + if (nspin == 4 && this->mixing_angle > 0) + { + resize_tmp = 2; + } /// implement Kerker for density and magnetization separately for (int is = 0; is < nspin / resize_tmp; ++is) - { - const int is_idx = is * this->rhopw->npw; - /// new mixing method only support nspin=2 not nspin=4 - if (is >= 1) { - if (this->mixing_gg0_mag <= 0.0001 || this->mixing_beta_mag <= 0.1) - { + const int is_idx = is * this->rhopw->npw; + /// new mixing method only support nspin=2 not nspin=4 + if (is >= 1) + { + if (this->mixing_gg0_mag <= 0.0001 || this->mixing_beta_mag <= 0.1) + { #ifdef __DEBUG - assert(is == 1); // make sure break works + assert (is == 1); // make sure break works #endif - double is_mag = nspin - 1; - //for (int ig = 0; ig < this->rhopw->npw * is_mag; ig++) - //{ - // drhog[is_idx + ig] *= 1; - //} - break; - } - fac = this->mixing_gg0_mag; - amin = this->mixing_beta_mag; - } - else - { - fac = this->mixing_gg0; - amin = this->mixing_beta; - } + double is_mag = nspin - 1; + // for (int ig = 0; ig < this->rhopw->npw * is_mag; ig++) + //{ + // drhog[is_idx + ig] *= 1; + // } + break; + } + fac = this->mixing_gg0_mag; + amin = this->mixing_beta_mag; + } + else + { + fac = this->mixing_gg0; + amin = this->mixing_beta; + } - gg0 = std::pow(fac * ModuleBase::BOHR_TO_A / *this->tpiba, 2); + gg0 = std::pow (fac * ModuleBase::BOHR_TO_A / *this->tpiba, 2); - const double gg0_amin = this->mixing_gg0_min / amin; + const double gg0_amin = this->mixing_gg0_min / amin; #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for (int ig = 0; ig < this->rhopw->npw; ++ig) - { - double gg = this->rhopw->gg[ig]; - double filter_g = std::max(gg / (gg + gg0), gg0_amin); - drhog[is_idx + ig] *= filter_g; + for (int ig = 0; ig < this->rhopw->npw; ++ig) + { + double gg = this->rhopw->gg[ig]; + double filter_g = std::max (gg / (gg + gg0), gg0_amin); + drhog[is_idx + ig] *= filter_g; + } } - } - ModuleBase::timer::end("Charge_Mixing", "Kerker_screen_recip"); + ModuleBase::timer::end ("Charge_Mixing", "Kerker_screen_recip"); return; } -void Charge_Mixing::Kerker_screen_real(double* drhor) +void + Charge_Mixing::Kerker_screen_real (double* drhor) { - ModuleBase::TITLE("Charge_Mixing", "Kerker_screen_real"); + ModuleBase::TITLE ("Charge_Mixing", "Kerker_screen_real"); - if (this->mixing_gg0 <= 0.0001 || this->mixing_beta <= 0.1) - { - return; - } + if (this->mixing_gg0 <= 0.0001 || this->mixing_beta <= 0.1) + { + return; + } - ModuleBase::timer::start("Charge_Mixing", "Kerker_screen_real"); + ModuleBase::timer::start ("Charge_Mixing", "Kerker_screen_real"); const int nspin = PARAM.inp.nspin; - assert(nspin==1 || nspin==2 || nspin==4); + assert (nspin == 1 || nspin == 2 || nspin == 4); - /// consider a resize for mixing_angle + /// consider a resize for mixing_angle int resize_tmp = 1; - if (nspin == 4 && this->mixing_angle > 0) - { - resize_tmp = 2; - } - - std::vector> drhog(this->rhopw->npw * nspin / resize_tmp); - std::vector drhor_filter(this->rhopw->nrxx * nspin / resize_tmp); + if (nspin == 4 && this->mixing_angle > 0) + { + resize_tmp = 2; + } + + std::vector> drhog (this->rhopw->npw * nspin / resize_tmp); + std::vector drhor_filter (this->rhopw->nrxx * nspin / resize_tmp); for (int is = 0; is < nspin / resize_tmp; ++is) - { - // Note after this process some G which is higher than Gmax will be filtered. - // Thus we cannot use Kerker_screen_recip(drhog.data()) directly after it. - this->rhopw->real2recip(drhor + is * this->rhopw->nrxx, drhog.data() + is * this->rhopw->npw); - } + { + // Note after this process some G which is higher than Gmax will be filtered. + // Thus we cannot use Kerker_screen_recip(drhog.data()) directly after it. + this->rhopw->real2recip (drhor + is * this->rhopw->nrxx, drhog.data () + is * this->rhopw->npw); + } /// implement Kerker for density and magnetization separately double fac = 0.0; double gg0 = 0.0; double amin = 0.0; for (int is = 0; is < nspin / resize_tmp; is++) - { - - if (is >= 1) { - if (this->mixing_gg0_mag <= 0.0001 || this->mixing_beta_mag <= 0.1) - { + + if (is >= 1) + { + if (this->mixing_gg0_mag <= 0.0001 || this->mixing_beta_mag <= 0.1) + { #ifdef __DEBUG - assert(is == 1); /// make sure break works + assert (is == 1); /// make sure break works #endif - double is_mag = nspin - 1; - if (nspin == 4 && this->mixing_angle > 0) { is_mag = 1; -} - for (int ig = 0; ig < this->rhopw->npw * is_mag; ig++) + double is_mag = nspin - 1; + if (nspin == 4 && this->mixing_angle > 0) + { + is_mag = 1; + } + for (int ig = 0; ig < this->rhopw->npw * is_mag; ig++) + { + drhog[is * this->rhopw->npw + ig] = 0; + } + break; + } + fac = this->mixing_gg0_mag; + amin = this->mixing_beta_mag; + } + else { - drhog[is * this->rhopw->npw + ig] = 0; + fac = this->mixing_gg0; + amin = this->mixing_beta; } - break; - } - fac = this->mixing_gg0_mag; - amin = this->mixing_beta_mag; - } - else - { - fac = this->mixing_gg0; - amin = this->mixing_beta; - } - - gg0 = std::pow(fac * ModuleBase::BOHR_TO_A / *this->tpiba, 2); - const int is_idx = is * this->rhopw->npw; - const double gg0_amin = this->mixing_gg0_min / amin; + gg0 = std::pow (fac * ModuleBase::BOHR_TO_A / *this->tpiba, 2); + + const int is_idx = is * this->rhopw->npw; + const double gg0_amin = this->mixing_gg0_min / amin; #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - double gg = this->rhopw->gg[ig]; - // I have not decided how to handle gg=0 part, will be changed in future - //if (gg == 0) - //{ - // drhog[is_idx + ig] *= 0; - // continue; - //} - double filter_g = std::max(gg / (gg + gg0), gg0_amin); - drhog[is_idx + ig] *= (1 - filter_g); + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + double gg = this->rhopw->gg[ig]; + // I have not decided how to handle gg=0 part, will be changed in future + // if (gg == 0) + //{ + // drhog[is_idx + ig] *= 0; + // continue; + //} + double filter_g = std::max (gg / (gg + gg0), gg0_amin); + drhog[is_idx + ig] *= (1 - filter_g); + } } - } /// inverse FT for (int is = 0; is < nspin / resize_tmp; ++is) - { - this->rhopw->recip2real(drhog.data() + is * this->rhopw->npw, drhor_filter.data() + is * this->rhopw->nrxx); - } + { + this->rhopw->recip2real (drhog.data () + is * this->rhopw->npw, + drhor_filter.data () + is * this->rhopw->nrxx); + } #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif for (int ir = 0; ir < this->rhopw->nrxx * nspin / resize_tmp; ir++) - { - drhor[ir] -= drhor_filter[ir]; - } + { + drhor[ir] -= drhor_filter[ir]; + } - ModuleBase::timer::end("Charge_Mixing", "Kerker_screen_real"); + ModuleBase::timer::end ("Charge_Mixing", "Kerker_screen_real"); return; } diff --git a/source/source_estate/module_charge/charge_mixing_residual.cpp b/source/source_estate/module_charge/charge_mixing_residual.cpp index 05146618916..5f2861789a0 100644 --- a/source/source_estate/module_charge/charge_mixing_residual.cpp +++ b/source/source_estate/module_charge/charge_mixing_residual.cpp @@ -4,244 +4,260 @@ #include "source_base/parallel_reduce.h" #include "source_hamilt/module_xc/xc_functional.h" -double Charge_Mixing::get_drho(Charge* chr, const double nelec) +double + Charge_Mixing::get_drho (Charge* chr, const double nelec) { - ModuleBase::TITLE("Charge_Mixing", "get_drho"); - ModuleBase::timer::start("Charge_Mixing", "get_drho"); + ModuleBase::TITLE ("Charge_Mixing", "get_drho"); + ModuleBase::timer::start ("Charge_Mixing", "get_drho"); const int nspin = PARAM.inp.nspin; - assert(nspin==1 || nspin==2 || nspin==4); + assert (nspin == 1 || nspin == 2 || nspin == 4); double drho = 0.0; if (PARAM.inp.scf_thr_type == 1) - { - for (int is = 0; is < nspin; ++is) { - ModuleBase::GlobalFunc::NOTE("Perform FFT on rho(r) to obtain rho(G)."); - chr->rhopw->real2recip(chr->rho[is], chr->rhog[is]); + for (int is = 0; is < nspin; ++is) + { + ModuleBase::GlobalFunc::NOTE ("Perform FFT on rho(r) to obtain rho(G)."); + chr->rhopw->real2recip (chr->rho[is], chr->rhog[is]); - ModuleBase::GlobalFunc::NOTE("Perform FFT on rho_save(r) to obtain rho_save(G)."); - chr->rhopw->real2recip(chr->rho_save[is], chr->rhog_save[is]); - } + ModuleBase::GlobalFunc::NOTE ("Perform FFT on rho_save(r) to obtain rho_save(G)."); + chr->rhopw->real2recip (chr->rho_save[is], chr->rhog_save[is]); + } - ModuleBase::GlobalFunc::NOTE("Calculate the charge difference between rho(G) and rho_save(G)"); - std::vector> drhog(nspin * this->rhopw->npw); + ModuleBase::GlobalFunc::NOTE ("Calculate the charge difference between rho(G) and rho_save(G)"); + std::vector> drhog (nspin * this->rhopw->npw); #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 512) #endif - for (int is = 0; is < nspin; ++is) - { - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - drhog[is * this->rhopw->npw + ig] = chr->rhog[is][ig] - chr->rhog_save[is][ig]; - } - } + for (int is = 0; is < nspin; ++is) + { + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + drhog[is * this->rhopw->npw + ig] = chr->rhog[is][ig] - chr->rhog_save[is][ig]; + } + } - ModuleBase::GlobalFunc::NOTE("Calculate the norm of the Residual std::vector: < R[rho] | R[rho_save] >"); - drho = this->inner_product_recip_rho(drhog.data(), drhog.data()); - } + ModuleBase::GlobalFunc::NOTE ("Calculate the norm of the Residual std::vector: < R[rho] | R[rho_save] >"); + drho = this->inner_product_recip_rho (drhog.data (), drhog.data ()); + } else - { - // Note: Maybe it is wrong. - // The inner_product_real function (L1-norm) is different from that (L2-norm) in mixing. - for (int is = 0; is < nspin; is++) { - if (is != 0 && is != 3 && PARAM.globalv.domag_z) - { - continue; - } + // Note: Maybe it is wrong. + // The inner_product_real function (L1-norm) is different from that (L2-norm) in mixing. + for (int is = 0; is < nspin; is++) + { + if (is != 0 && is != 3 && PARAM.globalv.domag_z) + { + continue; + } #ifdef _OPENMP #pragma omp parallel for reduction(+ : drho) #endif - for (int ir = 0; ir < this->rhopw->nrxx; ir++) - { - drho += std::abs(chr->rho[is][ir] - chr->rho_save[is][ir]); - } - } + for (int ir = 0; ir < this->rhopw->nrxx; ir++) + { + drho += std::abs (chr->rho[is][ir] - chr->rho_save[is][ir]); + } + } #ifdef __MPI - Parallel_Reduce::reduce_pool(drho); + Parallel_Reduce::reduce_pool (drho); #endif - assert(nelec != 0); - assert(*this->omega > 0); - assert(this->rhopw->nxyz > 0); - drho *= *this->omega / static_cast(this->rhopw->nxyz); - drho /= nelec; - } - - ModuleBase::timer::end("Charge_Mixing", "get_drho"); + assert (nelec != 0); + assert (*this->omega > 0); + assert (this->rhopw->nxyz > 0); + drho *= *this->omega / static_cast (this->rhopw->nxyz); + drho /= nelec; + } + + ModuleBase::timer::end ("Charge_Mixing", "get_drho"); return drho; } -double Charge_Mixing::get_dkin(Charge* chr, const double nelec) +double + Charge_Mixing::get_dkin (Charge* chr, const double nelec) { - if (!(XC_Functional::get_ked_flag())) - { - return 0.0; - }; - ModuleBase::TITLE("Charge_Mixing", "get_dkin"); - ModuleBase::timer::start("Charge_Mixing", "get_dkin"); + if (!(XC_Functional::get_ked_flag ())) + { + return 0.0; + }; + ModuleBase::TITLE ("Charge_Mixing", "get_dkin"); + ModuleBase::timer::start ("Charge_Mixing", "get_dkin"); double dkin = 0.0; - + // Get dkin from kin_r and kin_r_save for PW and LCAO both, which is different from drho. for (int is = 0; is < PARAM.inp.nspin; is++) - { - if (is != 0 && is != 3 && PARAM.globalv.domag_z) { - continue; - } + if (is != 0 && is != 3 && PARAM.globalv.domag_z) + { + continue; + } #ifdef _OPENMP #pragma omp parallel for reduction(+ : dkin) #endif - for (int ir = 0; ir < this->rhopw->nrxx; ir++) - { - dkin += std::abs(chr->kin_r[is][ir] - chr->kin_r_save[is][ir]); + for (int ir = 0; ir < this->rhopw->nrxx; ir++) + { + dkin += std::abs (chr->kin_r[is][ir] - chr->kin_r_save[is][ir]); + } } - } #ifdef __MPI - Parallel_Reduce::reduce_pool(dkin); + Parallel_Reduce::reduce_pool (dkin); #endif - assert(nelec != 0); - assert(*this->omega > 0); - assert(this->rhopw->nxyz > 0); - dkin *= *this->omega / static_cast(this->rhopw->nxyz); + assert (nelec != 0); + assert (*this->omega > 0); + assert (this->rhopw->nxyz > 0); + dkin *= *this->omega / static_cast (this->rhopw->nxyz); dkin /= nelec; - ModuleBase::timer::end("Charge_Mixing", "get_dkin"); + ModuleBase::timer::end ("Charge_Mixing", "get_dkin"); return dkin; } -double Charge_Mixing::inner_product_recip_rho(std::complex* rho1, std::complex* rho2) +double + Charge_Mixing::inner_product_recip_rho (std::complex* rho1, std::complex* rho2) { - ModuleBase::TITLE("Charge_Mixing", "recip_rho"); - ModuleBase::timer::start("Charge_Mixing", "recip_rho"); + ModuleBase::TITLE ("Charge_Mixing", "recip_rho"); + ModuleBase::timer::start ("Charge_Mixing", "recip_rho"); std::complex** rhog1 = new std::complex*[PARAM.inp.nspin]; std::complex** rhog2 = new std::complex*[PARAM.inp.nspin]; for (int is = 0; is < PARAM.inp.nspin; is++) - { - rhog1[is] = rho1 + is * this->rhopw->npw; - rhog2[is] = rho2 + is * this->rhopw->npw; - } + { + rhog1[is] = rho1 + is * this->rhopw->npw; + rhog2[is] = rho2 + is * this->rhopw->npw; + } static const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / ((*this->tpiba) * (*this->tpiba)); static const double fac2 = ModuleBase::e2 * ModuleBase::FOUR_PI / (ModuleBase::TWO_PI * ModuleBase::TWO_PI); double sum = 0.0; - auto part_of_noncolin = [&]() - { - double sum = 0.0; - const int ig0 = this->rhopw->ig_gge0; + auto part_of_noncolin = [&] () + { + double sum = 0.0; + const int ig0 = this->rhopw->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ++ig) - { - if (ig == ig0) {continue;} - sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig]; - } - sum *= fac; - return sum; - }; + for (int ig = 0; ig < this->rhopw->npw; ++ig) + { + if (ig == ig0) + { + continue; + } + sum += (conj (rhog1[0][ig]) * rhog2[0][ig]).real () / this->rhopw->gg[ig]; + } + sum *= fac; + return sum; + }; switch (PARAM.inp.nspin) - { - case 1: - sum += part_of_noncolin(); - break; - - case 2: { - // (1) First part of density error. - const int ig0 = this->rhopw->ig_gge0; + { + case 1: + sum += part_of_noncolin (); + break; + + case 2: + { + // (1) First part of density error. + const int ig0 = this->rhopw->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ++ig) - { - if (ig == ig0) {continue;} - sum += (conj(rhog1[0][ig] + rhog1[1][ig]) * (rhog2[0][ig] + rhog2[1][ig])).real() / this->rhopw->gg[ig]; - } - sum *= fac; - - if (PARAM.globalv.gamma_only_pw) - { - sum *= 2.0; - } - - // (2) Second part of density error. - // including |G|=0 term. - double sum2 = 0.0; - - sum2 += fac2 * (conj(rhog1[0][0] - rhog1[1][0]) * (rhog2[0][0] - rhog2[1][0])).real(); - - double mag = 0.0; + for (int ig = 0; ig < this->rhopw->npw; ++ig) + { + if (ig == ig0) + { + continue; + } + sum += (conj (rhog1[0][ig] + rhog1[1][ig]) * (rhog2[0][ig] + rhog2[1][ig])).real () + / this->rhopw->gg[ig]; + } + sum *= fac; + + if (PARAM.globalv.gamma_only_pw) + { + sum *= 2.0; + } + + // (2) Second part of density error. + // including |G|=0 term. + double sum2 = 0.0; + + sum2 += fac2 * (conj (rhog1[0][0] - rhog1[1][0]) * (rhog2[0][0] - rhog2[1][0])).real (); + + double mag = 0.0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : mag) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - mag += (conj(rhog1[0][ig] - rhog1[1][ig]) * (rhog2[0][ig] - rhog2[1][ig])).real(); - } - mag *= fac2; - - // if(PARAM.globalv.gamma_only_pw); - if (PARAM.globalv.gamma_only_pw) // Peize Lin delete ; 2020.01.31 - { - mag *= 2.0; - } - - // std::cout << " sum=" << sum << " mag=" << mag << std::endl; - sum2 += mag; - sum += sum2; - break; - } - case 4: - // non-collinear spin, added by zhengdy - if (!PARAM.globalv.domag && !PARAM.globalv.domag_z) { - sum += part_of_noncolin(); - } else - { - // another part with magnetization - const int ig0 = this->rhopw->ig_gge0; + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + mag += (conj (rhog1[0][ig] - rhog1[1][ig]) * (rhog2[0][ig] - rhog2[1][ig])).real (); + } + mag *= fac2; + + // if(PARAM.globalv.gamma_only_pw); + if (PARAM.globalv.gamma_only_pw) // Peize Lin delete ; 2020.01.31 + { + mag *= 2.0; + } + + // std::cout << " sum=" << sum << " mag=" << mag << std::endl; + sum2 += mag; + sum += sum2; + break; + } + case 4: + // non-collinear spin, added by zhengdy + if (!PARAM.globalv.domag && !PARAM.globalv.domag_z) + { + sum += part_of_noncolin (); + } + else + { + // another part with magnetization + const int ig0 = this->rhopw->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - if (ig == ig0) - { - continue; - } - sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig]; - } - sum *= fac; - if (ig0 > 0) - { - sum += fac2 - * ((conj(rhog1[1][ig0]) * rhog2[1][ig0]).real() + (conj(rhog1[2][ig0]) * rhog2[2][ig0]).real() - + (conj(rhog1[3][ig0]) * rhog2[3][ig0]).real()); - } - double fac3 = fac2; - if (PARAM.globalv.gamma_only_pw) - { - fac3 *= 2.0; - } + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + if (ig == ig0) + { + continue; + } + sum += (conj (rhog1[0][ig]) * rhog2[0][ig]).real () / this->rhopw->gg[ig]; + } + sum *= fac; + if (ig0 > 0) + { + sum += fac2 + * ((conj (rhog1[1][ig0]) * rhog2[1][ig0]).real () + + (conj (rhog1[2][ig0]) * rhog2[2][ig0]).real () + + (conj (rhog1[3][ig0]) * rhog2[3][ig0]).real ()); + } + double fac3 = fac2; + if (PARAM.globalv.gamma_only_pw) + { + fac3 *= 2.0; + } #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - if (ig == ig0) { - continue; -} - sum += fac3 - * ((conj(rhog1[1][ig]) * rhog2[1][ig]).real() + (conj(rhog1[2][ig]) * rhog2[2][ig]).real() - + (conj(rhog1[3][ig]) * rhog2[3][ig]).real()); - } + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + if (ig == ig0) + { + continue; + } + sum += fac3 + * ((conj (rhog1[1][ig]) * rhog2[1][ig]).real () + + (conj (rhog1[2][ig]) * rhog2[2][ig]).real () + + (conj (rhog1[3][ig]) * rhog2[3][ig]).real ()); + } + } + break; } - break; - } #ifdef __MPI - Parallel_Reduce::reduce_pool(sum); + Parallel_Reduce::reduce_pool (sum); #endif sum *= *this->omega * 0.5; @@ -249,42 +265,46 @@ double Charge_Mixing::inner_product_recip_rho(std::complex* rho1, std::c delete[] rhog1; delete[] rhog2; - ModuleBase::timer::end("Charge_Mixing", "recip_rho"); + ModuleBase::timer::end ("Charge_Mixing", "recip_rho"); return sum; } // a simple inner product, now is not used anywhere. For test only. -double Charge_Mixing::inner_product_recip_simple(std::complex* rho1, std::complex* rho2) +double + Charge_Mixing::inner_product_recip_simple (std::complex* rho1, std::complex* rho2) { - ModuleBase::TITLE("Charge_Mixing", "recip_simple"); - ModuleBase::timer::start("Charge_Mixing", "recip_simple"); + ModuleBase::TITLE ("Charge_Mixing", "recip_simple"); + ModuleBase::timer::start ("Charge_Mixing", "recip_simple"); double rnorm = 0.0; // consider a resize for mixing_angle int resize_tmp = 1; - if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) { resize_tmp = 2; -} + if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) + { + resize_tmp = 2; + } #ifdef _OPENMP #pragma omp parallel for reduction(+ : rnorm) #endif for (int ig = 0; ig < this->rhopw->npw * PARAM.inp.nspin / resize_tmp; ++ig) - { - rnorm += (conj(rho1[ig]) * rho2[ig]).real(); - } + { + rnorm += (conj (rho1[ig]) * rho2[ig]).real (); + } #ifdef __MPI - Parallel_Reduce::reduce_pool(rnorm); + Parallel_Reduce::reduce_pool (rnorm); #endif - ModuleBase::timer::end("Charge_Mixing", "recip_simple"); + ModuleBase::timer::end ("Charge_Mixing", "recip_simple"); return rnorm; } // a Hartree-like inner product -double Charge_Mixing::inner_product_recip_hartree(std::complex* rhog1, std::complex* rhog2) +double + Charge_Mixing::inner_product_recip_hartree (std::complex* rhog1, std::complex* rhog2) { - ModuleBase::TITLE("Charge_Mixing", "recip_hartree"); - ModuleBase::timer::start("Charge_Mixing", "recip_hartree"); + ModuleBase::TITLE ("Charge_Mixing", "recip_hartree"); + ModuleBase::timer::start ("Charge_Mixing", "recip_hartree"); static const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / ((*this->tpiba) * (*this->tpiba)); static const double fac2 = ModuleBase::e2 * ModuleBase::FOUR_PI / (ModuleBase::TWO_PI * ModuleBase::TWO_PI); @@ -293,187 +313,195 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex* rhog1, s const int npw = this->rhopw->npw; // a lambda function for summing the charge density - auto part_of_rho = [&]() - { - double sum = 0.0; - const int ig0 = this->rhopw->ig_gge0; + auto part_of_rho = [&] () + { + double sum = 0.0; + const int ig0 = this->rhopw->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ++ig) + for (int ig = 0; ig < this->rhopw->npw; ++ig) + { + if (ig == ig0) + { + continue; + } + sum += (conj (rhog1[ig]) * rhog2[ig]).real () / this->rhopw->gg[ig]; + } + sum *= fac; + return sum; + }; + + if (PARAM.inp.nspin == 1) { - if (ig == ig0) - { - continue; - } - sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig]; + sum += part_of_rho (); } - sum *= fac; - return sum; - }; - - if (PARAM.inp.nspin==1) - { - sum += part_of_rho(); - } - else if (PARAM.inp.nspin==2) - { - // charge density part - const int ig0 = this->rhopw->ig_gge0; + else if (PARAM.inp.nspin == 2) + { + // charge density part + const int ig0 = this->rhopw->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ++ig) - { - if (ig == ig0) - { - continue; - } - sum += (conj(rhog1[ig]) * (rhog2[ig])).real() / this->rhopw->gg[ig]; - } - sum *= fac; + for (int ig = 0; ig < this->rhopw->npw; ++ig) + { + if (ig == ig0) + { + continue; + } + sum += (conj (rhog1[ig]) * (rhog2[ig])).real () / this->rhopw->gg[ig]; + } + sum *= fac; - if (PARAM.globalv.gamma_only_pw) - { - sum *= 2.0; - } + if (PARAM.globalv.gamma_only_pw) + { + sum *= 2.0; + } - // (2) Second part of density error. - // including |G|=0 term. - double sum2 = 0.0; + // (2) Second part of density error. + // including |G|=0 term. + double sum2 = 0.0; - sum2 += fac2 * (conj(rhog1[0 + this->rhopw->npw]) * rhog2[0 + this->rhopw->npw]).real(); + sum2 += fac2 * (conj (rhog1[0 + this->rhopw->npw]) * rhog2[0 + this->rhopw->npw]).real (); - double mag = 0.0; + double mag = 0.0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : mag) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - mag += (conj(rhog1[ig + this->rhopw->npw]) * rhog2[ig + this->rhopw->npw]).real(); - } - mag *= fac2; + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + mag += (conj (rhog1[ig + this->rhopw->npw]) * rhog2[ig + this->rhopw->npw]).real (); + } + mag *= fac2; - if (PARAM.globalv.gamma_only_pw) - { - mag *= 2.0; - } + if (PARAM.globalv.gamma_only_pw) + { + mag *= 2.0; + } - sum2 += mag; - sum += sum2; - } - else if (PARAM.inp.nspin==4) - { - if (!PARAM.globalv.domag && !PARAM.globalv.domag_z) - { - sum += part_of_rho(); + sum2 += mag; + sum += sum2; } - else if (this->mixing_angle <= 0) + else if (PARAM.inp.nspin == 4) { - // sum for tradtional mixing - const int ig0 = this->rhopw->ig_gge0; + if (!PARAM.globalv.domag && !PARAM.globalv.domag_z) + { + sum += part_of_rho (); + } + else if (this->mixing_angle <= 0) + { + // sum for tradtional mixing + const int ig0 = this->rhopw->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - if (ig == ig0) {continue;} - sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig]; - } - sum *= fac; - if (ig0 > 0) - { - sum += fac2 - * ((conj(rhog1[ig0 + npw]) * rhog2[ig0 + npw]).real() + (conj(rhog1[ig0 + 2*npw]) * rhog2[ig0 + 2*npw]).real() - + (conj(rhog1[ig0 + 3*npw]) * rhog2[ig0 + 3*npw]).real()); - } - double fac3 = fac2; - if (PARAM.globalv.gamma_only_pw) - { - fac3 *= 2.0; - } + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + if (ig == ig0) + { + continue; + } + sum += (conj (rhog1[ig]) * rhog2[ig]).real () / this->rhopw->gg[ig]; + } + sum *= fac; + if (ig0 > 0) + { + sum += fac2 + * ((conj (rhog1[ig0 + npw]) * rhog2[ig0 + npw]).real () + + (conj (rhog1[ig0 + 2 * npw]) * rhog2[ig0 + 2 * npw]).real () + + (conj (rhog1[ig0 + 3 * npw]) * rhog2[ig0 + 3 * npw]).real ()); + } + double fac3 = fac2; + if (PARAM.globalv.gamma_only_pw) + { + fac3 *= 2.0; + } #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - if (ig == ig0) { - continue; -} - sum += fac3 - * ((conj(rhog1[ig + npw]) * rhog2[ig + npw]).real() + (conj(rhog1[ig + 2*npw]) * rhog2[ig + 2*npw]).real() - + (conj(rhog1[ig + 3*npw]) * rhog2[ig + 3*npw]).real()); - } - } - else if (this->mixing_angle > 0) - { - // sum for angle mixing - const int ig0 = this->rhopw->ig_gge0; + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + if (ig == ig0) + { + continue; + } + sum += fac3 + * ((conj (rhog1[ig + npw]) * rhog2[ig + npw]).real () + + (conj (rhog1[ig + 2 * npw]) * rhog2[ig + 2 * npw]).real () + + (conj (rhog1[ig + 3 * npw]) * rhog2[ig + 3 * npw]).real ()); + } + } + else if (this->mixing_angle > 0) + { + // sum for angle mixing + const int ig0 = this->rhopw->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - if (ig == ig0) - { - continue; - } - sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig]; - } - sum *= fac; - if (ig0 > 0) - { - sum += fac2 - * ((conj(rhog1[ig0 + this->rhopw->npw]) * rhog2[ig0 + this->rhopw->npw]).real()); - } - double fac3 = fac2; - if (PARAM.globalv.gamma_only_pw) - { - fac3 *= 2.0; - } + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + if (ig == ig0) + { + continue; + } + sum += (conj (rhog1[ig]) * rhog2[ig]).real () / this->rhopw->gg[ig]; + } + sum *= fac; + if (ig0 > 0) + { + sum += fac2 + * ((conj (rhog1[ig0 + this->rhopw->npw]) * rhog2[ig0 + this->rhopw->npw]).real ()); + } + double fac3 = fac2; + if (PARAM.globalv.gamma_only_pw) + { + fac3 *= 2.0; + } #ifdef _OPENMP #pragma omp parallel for reduction(+ : sum) #endif - for (int ig = 0; ig < this->rhopw->npw; ig++) - { - if (ig == ig0) { - continue; -} - sum += fac3 - * ((conj(rhog1[ig + this->rhopw->npw]) * rhog2[ig + this->rhopw->npw]).real()); - } + for (int ig = 0; ig < this->rhopw->npw; ig++) + { + if (ig == ig0) + { + continue; + } + sum += fac3 + * ((conj (rhog1[ig + this->rhopw->npw]) * rhog2[ig + this->rhopw->npw]).real ()); + } + } } - } #ifdef __MPI - Parallel_Reduce::reduce_pool(sum); + Parallel_Reduce::reduce_pool (sum); #endif sum *= *this->omega * 0.5; - ModuleBase::timer::end("Charge_Mixing", "recip_hartree"); + ModuleBase::timer::end ("Charge_Mixing", "recip_hartree"); return sum; } -double Charge_Mixing::inner_product_real(double* rho1, double* rho2) +double + Charge_Mixing::inner_product_real (double* rho1, double* rho2) { double rnorm = 0.0; // consider a resize for mixing_angle int resize_tmp = 1; - if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) - { - resize_tmp = 2; - } + if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) + { + resize_tmp = 2; + } #ifdef _OPENMP #pragma omp parallel for reduction(+ : rnorm) #endif for (int ir = 0; ir < this->rhopw->nrxx * PARAM.inp.nspin / resize_tmp; ++ir) - { - rnorm += rho1[ir] * rho2[ir]; - } + { + rnorm += rho1[ir] * rho2[ir]; + } #ifdef __MPI - Parallel_Reduce::reduce_pool(rnorm); + Parallel_Reduce::reduce_pool (rnorm); #endif return rnorm; } diff --git a/source/source_estate/module_charge/charge_mixing_rho.cpp b/source/source_estate/module_charge/charge_mixing_rho.cpp index 3d8f302b572..4c087889447 100644 --- a/source/source_estate/module_charge/charge_mixing_rho.cpp +++ b/source/source_estate/module_charge/charge_mixing_rho.cpp @@ -3,13 +3,14 @@ #include "source_base/timer.h" #include "source_hamilt/module_xc/xc_functional.h" -void Charge_Mixing::mix_rho_recip(Charge* chr) +void + Charge_Mixing::mix_rho_recip (Charge* chr) { - ModuleBase::TITLE("Charge_Mixing", "mix_rho_recip"); - ModuleBase::timer::start("Charge_Mixing", "mix_rho_recip"); + ModuleBase::TITLE ("Charge_Mixing", "mix_rho_recip"); + ModuleBase::timer::start ("Charge_Mixing", "mix_rho_recip"); const int nspin = PARAM.inp.nspin; - assert(nspin==1 || nspin==2 || nspin==4); + assert (nspin == 1 || nspin == 2 || nspin == 4); std::complex* rhog_in = nullptr; std::complex* rhog_out = nullptr; @@ -20,594 +21,600 @@ void Charge_Mixing::mix_rho_recip(Charge* chr) std::complex* rhoghf_in = nullptr; std::complex* rhoghf_out = nullptr; - if ( PARAM.globalv.double_grid) - { - // divide into smooth part and high_frequency part - divide_data(chr->rhog_save[0], rhogs_in, rhoghf_in); - divide_data(chr->rhog[0], rhogs_out, rhoghf_out); - } + if (PARAM.globalv.double_grid) + { + // divide into smooth part and high_frequency part + divide_data (chr->rhog_save[0], rhogs_in, rhoghf_in); + divide_data (chr->rhog[0], rhogs_out, rhoghf_out); + } // inner_product_recip_hartree is a hartree-like sum, unit is Ry auto inner_product - = std::bind(&Charge_Mixing::inner_product_recip_hartree, this, std::placeholders::_1, std::placeholders::_2); + = std::bind (&Charge_Mixing::inner_product_recip_hartree, this, std::placeholders::_1, std::placeholders::_2); // DIIS Mixing Only for smooth part, while high_frequency part is mixed by plain mixing method. if (nspin == 1) - { - rhog_in = rhogs_in; - rhog_out = rhogs_out; - auto screen = std::bind(&Charge_Mixing::Kerker_screen_recip, this, std::placeholders::_1); - this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, true); - this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->rho_mdata, rhog_out); - } - else if (nspin == 2) - { - // magnetic density - std::complex *rhog_mag = nullptr; - std::complex *rhog_mag_save = nullptr; - const int npw = this->rhopw->npw; - // allocate rhog_mag[is*ngmc] and rhog_mag_save[is*ngmc] - rhog_mag = new std::complex[npw * nspin]; - rhog_mag_save = new std::complex[npw * nspin]; - ModuleBase::GlobalFunc::ZEROS(rhog_mag, npw * nspin); - ModuleBase::GlobalFunc::ZEROS(rhog_mag_save, npw * nspin); - // get rhog_mag[is*ngmc] and rhog_mag_save[is*ngmc] - for (int ig = 0; ig < npw; ig++) { - rhog_mag[ig] = chr->rhog[0][ig] + chr->rhog[1][ig]; - rhog_mag_save[ig] = chr->rhog_save[0][ig] + chr->rhog_save[1][ig]; + rhog_in = rhogs_in; + rhog_out = rhogs_out; + auto screen = std::bind (&Charge_Mixing::Kerker_screen_recip, this, std::placeholders::_1); + this->mixing->push_data (this->rho_mdata, rhog_in, rhog_out, screen, true); + this->mixing->cal_coef (this->rho_mdata, inner_product); + this->mixing->mix_data (this->rho_mdata, rhog_out); } - for (int ig = 0; ig < npw; ig++) + else if (nspin == 2) { - rhog_mag[ig + npw] = chr->rhog[0][ig] - chr->rhog[1][ig]; - rhog_mag_save[ig + npw] = chr->rhog_save[0][ig] - chr->rhog_save[1][ig]; - } - // - rhog_in = rhog_mag_save; - rhog_out = rhog_mag; - // - auto screen = std::bind(&Charge_Mixing::Kerker_screen_recip, this, std::placeholders::_1); - auto twobeta_mix - = [this, npw](std::complex* out, const std::complex* in, const std::complex* sres) { + // magnetic density + std::complex* rhog_mag = nullptr; + std::complex* rhog_mag_save = nullptr; + const int npw = this->rhopw->npw; + // allocate rhog_mag[is*ngmc] and rhog_mag_save[is*ngmc] + rhog_mag = new std::complex[npw * nspin]; + rhog_mag_save = new std::complex[npw * nspin]; + ModuleBase::GlobalFunc::ZEROS (rhog_mag, npw * nspin); + ModuleBase::GlobalFunc::ZEROS (rhog_mag_save, npw * nspin); + // get rhog_mag[is*ngmc] and rhog_mag_save[is*ngmc] + for (int ig = 0; ig < npw; ig++) + { + rhog_mag[ig] = chr->rhog[0][ig] + chr->rhog[1][ig]; + rhog_mag_save[ig] = chr->rhog_save[0][ig] + chr->rhog_save[1][ig]; + } + for (int ig = 0; ig < npw; ig++) + { + rhog_mag[ig + npw] = chr->rhog[0][ig] - chr->rhog[1][ig]; + rhog_mag_save[ig + npw] = chr->rhog_save[0][ig] - chr->rhog_save[1][ig]; + } + // + rhog_in = rhog_mag_save; + rhog_out = rhog_mag; + // + auto screen = std::bind (&Charge_Mixing::Kerker_screen_recip, this, std::placeholders::_1); + auto twobeta_mix + = [this, + npw] (std::complex* out, const std::complex* in, const std::complex* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < npw; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - // magnetism + for (int i = 0; i < npw; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + // magnetism #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = npw; i < 2 * npw; ++i) - { - out[i] = in[i] + this->mixing_beta_mag * sres[i]; - } - }; - this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true); - this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->rho_mdata, rhog_out); - // get rhog[is][ngmc] from rhog_mag[is*ngmc] - for (int is = 0; is < nspin; is++) - { - ModuleBase::GlobalFunc::ZEROS(chr->rhog[is], npw); - } - for (int ig = 0; ig < npw; ig++) - { - chr->rhog[0][ig] = 0.5 * (rhog_mag[ig] + rhog_mag[ig+npw]); - chr->rhog[1][ig] = 0.5 * (rhog_mag[ig] - rhog_mag[ig+npw]); - } - // delete - delete[] rhog_mag; - delete[] rhog_mag_save; - // get rhogs_out for combine_data() - if ( PARAM.globalv.double_grid) - { + for (int i = npw; i < 2 * npw; ++i) + { + out[i] = in[i] + this->mixing_beta_mag * sres[i]; + } + }; + this->mixing->push_data (this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true); + this->mixing->cal_coef (this->rho_mdata, inner_product); + this->mixing->mix_data (this->rho_mdata, rhog_out); + // get rhog[is][ngmc] from rhog_mag[is*ngmc] + for (int is = 0; is < nspin; is++) + { + ModuleBase::GlobalFunc::ZEROS (chr->rhog[is], npw); + } for (int ig = 0; ig < npw; ig++) - { - rhogs_out[ig] = chr->rhog[0][ig]; - rhogs_out[ig + npw] = chr->rhog[1][ig]; - } + { + chr->rhog[0][ig] = 0.5 * (rhog_mag[ig] + rhog_mag[ig + npw]); + chr->rhog[1][ig] = 0.5 * (rhog_mag[ig] - rhog_mag[ig + npw]); + } + // delete + delete[] rhog_mag; + delete[] rhog_mag_save; + // get rhogs_out for combine_data() + if (PARAM.globalv.double_grid) + { + for (int ig = 0; ig < npw; ig++) + { + rhogs_out[ig] = chr->rhog[0][ig]; + rhogs_out[ig + npw] = chr->rhog[1][ig]; + } + } } - } else if (nspin == 4 && PARAM.inp.mixing_angle <= 0) - { - // normal broyden mixing for {rho, mx, my, mz} - rhog_in = rhogs_in; - rhog_out = rhogs_out; - const int npw = this->rhopw->npw; - auto screen = std::bind(&Charge_Mixing::Kerker_screen_recip, this, std::placeholders::_1); // use old one - auto twobeta_mix - = [this, npw](std::complex* out, const std::complex* in, const std::complex* sres) { + { + // normal broyden mixing for {rho, mx, my, mz} + rhog_in = rhogs_in; + rhog_out = rhogs_out; + const int npw = this->rhopw->npw; + auto screen = std::bind (&Charge_Mixing::Kerker_screen_recip, this, std::placeholders::_1); // use old one + auto twobeta_mix + = [this, + npw] (std::complex* out, const std::complex* in, const std::complex* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < npw; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - // magnetism, mx, my, mz + for (int i = 0; i < npw; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + // magnetism, mx, my, mz #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = npw; i < 4 * npw; ++i) - { - out[i] = in[i] + this->mixing_beta_mag * sres[i]; - } - }; - this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true); - this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->rho_mdata, rhog_out); - } - else if (nspin == 4 && PARAM.inp.mixing_angle > 0) - { - // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706 - // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed - // old support see mix_rho_recip() - if ( PARAM.globalv.double_grid) - { - ModuleBase::WARNING_QUIT("Charge_Mixing", "double_grid is not supported for new mixing method yet."); + for (int i = npw; i < 4 * npw; ++i) + { + out[i] = in[i] + this->mixing_beta_mag * sres[i]; + } + }; + this->mixing->push_data (this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true); + this->mixing->cal_coef (this->rho_mdata, inner_product); + this->mixing->mix_data (this->rho_mdata, rhog_out); } - // allocate memory for rho_magabs and rho_magabs_save - const int nrxx = this->rhopw->nrxx; - double* rho_magabs = new double[nrxx]; - double* rho_magabs_save = new double[nrxx]; - ModuleBase::GlobalFunc::ZEROS(rho_magabs, nrxx); - ModuleBase::GlobalFunc::ZEROS(rho_magabs_save, nrxx); - // calculate rho_magabs and rho_magabs_save - for (int ir = 0; ir < nrxx; ir++) - { - // |m| for rho - rho_magabs[ir] = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] - + chr->rho[2][ir] * chr->rho[2][ir] - + chr->rho[3][ir] * chr->rho[3][ir]); - // |m| for rho_save - rho_magabs_save[ir] = std::sqrt(chr->rho_save[1][ir] * chr->rho_save[1][ir] - + chr->rho_save[2][ir] * chr->rho_save[2][ir] - + chr->rho_save[3][ir] * chr->rho_save[3][ir]); - } - // allocate memory for rhog_magabs and rhog_magabs_save - const int npw = this->rhopw->npw; - std::complex* rhog_magabs = new std::complex[npw * 2]; - std::complex* rhog_magabs_save = new std::complex[npw * 2]; - ModuleBase::GlobalFunc::ZEROS(rhog_magabs, npw * 2); - ModuleBase::GlobalFunc::ZEROS(rhog_magabs_save, npw * 2); - // calculate rhog_magabs and rhog_magabs_save - for (int ig = 0; ig < npw; ig++) + else if (nspin == 4 && PARAM.inp.mixing_angle > 0) { - rhog_magabs[ig] = chr->rhog[0][ig]; // rho - rhog_magabs_save[ig] = chr->rhog_save[0][ig]; // rho_save - } - // FT to get rhog_magabs and rhog_magabs_save - this->rhopw->real2recip(rho_magabs, rhog_magabs + this->rhopw->npw); - this->rhopw->real2recip(rho_magabs_save, rhog_magabs_save + this->rhopw->npw); - // - rhog_in = rhog_magabs_save; - rhog_out = rhog_magabs; - auto screen = std::bind(&Charge_Mixing::Kerker_screen_recip, this, std::placeholders::_1); // use old one - auto twobeta_mix - = [this, npw](std::complex* out, const std::complex* in, const std::complex* sres) { + // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706 + // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed + // old support see mix_rho_recip() + if (PARAM.globalv.double_grid) + { + ModuleBase::WARNING_QUIT ("Charge_Mixing", + "double_grid is not supported for new mixing method yet."); + } + // allocate memory for rho_magabs and rho_magabs_save + const int nrxx = this->rhopw->nrxx; + double* rho_magabs = new double[nrxx]; + double* rho_magabs_save = new double[nrxx]; + ModuleBase::GlobalFunc::ZEROS (rho_magabs, nrxx); + ModuleBase::GlobalFunc::ZEROS (rho_magabs_save, nrxx); + // calculate rho_magabs and rho_magabs_save + for (int ir = 0; ir < nrxx; ir++) + { + // |m| for rho + rho_magabs[ir] = std::sqrt (chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + + chr->rho[3][ir] * chr->rho[3][ir]); + // |m| for rho_save + rho_magabs_save[ir] = std::sqrt (chr->rho_save[1][ir] * chr->rho_save[1][ir] + + chr->rho_save[2][ir] * chr->rho_save[2][ir] + + chr->rho_save[3][ir] * chr->rho_save[3][ir]); + } + // allocate memory for rhog_magabs and rhog_magabs_save + const int npw = this->rhopw->npw; + std::complex* rhog_magabs = new std::complex[npw * 2]; + std::complex* rhog_magabs_save = new std::complex[npw * 2]; + ModuleBase::GlobalFunc::ZEROS (rhog_magabs, npw * 2); + ModuleBase::GlobalFunc::ZEROS (rhog_magabs_save, npw * 2); + // calculate rhog_magabs and rhog_magabs_save + for (int ig = 0; ig < npw; ig++) + { + rhog_magabs[ig] = chr->rhog[0][ig]; // rho + rhog_magabs_save[ig] = chr->rhog_save[0][ig]; // rho_save + } + // FT to get rhog_magabs and rhog_magabs_save + this->rhopw->real2recip (rho_magabs, rhog_magabs + this->rhopw->npw); + this->rhopw->real2recip (rho_magabs_save, rhog_magabs_save + this->rhopw->npw); + // + rhog_in = rhog_magabs_save; + rhog_out = rhog_magabs; + auto screen = std::bind (&Charge_Mixing::Kerker_screen_recip, this, std::placeholders::_1); // use old one + auto twobeta_mix + = [this, + npw] (std::complex* out, const std::complex* in, const std::complex* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < npw; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - // magnetism, |m| + for (int i = 0; i < npw; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + // magnetism, |m| #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = npw; i < 2 * npw; ++i) - { - out[i] = in[i] + this->mixing_beta_mag * sres[i]; - } - }; - this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true); - this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->rho_mdata, rhog_out); - // get new |m| in real space using FT - this->rhopw->recip2real(rhog_magabs + this->rhopw->npw, rho_magabs); - // use new |m| and angle to update {mx, my, mz} - for (int ig = 0; ig < npw; ig++) + for (int i = npw; i < 2 * npw; ++i) + { + out[i] = in[i] + this->mixing_beta_mag * sres[i]; + } + }; + this->mixing->push_data (this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true); + this->mixing->cal_coef (this->rho_mdata, inner_product); + this->mixing->mix_data (this->rho_mdata, rhog_out); + // get new |m| in real space using FT + this->rhopw->recip2real (rhog_magabs + this->rhopw->npw, rho_magabs); + // use new |m| and angle to update {mx, my, mz} + for (int ig = 0; ig < npw; ig++) + { + chr->rhog[0][ig] = rhog_magabs[ig]; // rhog + double norm = std::sqrt (chr->rho[1][ig] * chr->rho[1][ig] + chr->rho[2][ig] * chr->rho[2][ig] + + chr->rho[3][ig] * chr->rho[3][ig]); + if (std::abs (norm) < 1e-10) + { + continue; + } + double rescale_tmp = rho_magabs[npw + ig] / norm; + chr->rho[1][ig] *= rescale_tmp; + chr->rho[2][ig] *= rescale_tmp; + chr->rho[3][ig] *= rescale_tmp; + } + // delete + delete[] rhog_magabs; + delete[] rhog_magabs_save; + delete[] rho_magabs; + delete[] rho_magabs_save; + } + + if (PARAM.globalv.double_grid) { - chr->rhog[0][ig] = rhog_magabs[ig]; // rhog - double norm = std::sqrt(chr->rho[1][ig] * chr->rho[1][ig] - + chr->rho[2][ig] * chr->rho[2][ig] - + chr->rho[3][ig] * chr->rho[3][ig]); - if (std::abs(norm) < 1e-10) - { - continue; - } - double rescale_tmp = rho_magabs[npw + ig] / norm; - chr->rho[1][ig] *= rescale_tmp; - chr->rho[2][ig] *= rescale_tmp; - chr->rho[3][ig] *= rescale_tmp; + // plain mixing for high_frequencies + const int ndimhf = (this->rhodpw->npw - this->rhopw->npw) * nspin; + this->mixing_highf->plain_mix (rhoghf_out, rhoghf_in, rhoghf_out, ndimhf, nullptr); + + // combine smooth part and high_frequency part + combine_data (chr->rhog[0], rhogs_out, rhoghf_out); + clean_data (rhogs_in, rhoghf_in); } - // delete - delete[] rhog_magabs; - delete[] rhog_magabs_save; - delete[] rho_magabs; - delete[] rho_magabs_save; - } - - if ( PARAM.globalv.double_grid) - { - // plain mixing for high_frequencies - const int ndimhf = (this->rhodpw->npw - this->rhopw->npw) * nspin; - this->mixing_highf->plain_mix(rhoghf_out, rhoghf_in, rhoghf_out, ndimhf, nullptr); - - // combine smooth part and high_frequency part - combine_data(chr->rhog[0], rhogs_out, rhoghf_out); - clean_data(rhogs_in, rhoghf_in); - } // rhog to rho if (nspin == 4 && PARAM.inp.mixing_angle > 0) - { - // only tranfer rhog[0] - // do not support double_grid, use rhopw directly - chr->rhopw->recip2real(chr->rhog[0], chr->rho[0]); - } - else - { - for (int is = 0; is < nspin; is++) - { - // use rhodpw for double_grid - // rhodpw is the same as rhopw for ! PARAM.globalv.double_grid - this->rhodpw->recip_to_real,double,base_device::DEVICE_CPU>(chr->rhog[is], chr->rho[is]); - } - } - // For kinetic energy density - if ((XC_Functional::get_ked_flag()) && mixing_tau) - { - std::vector> kin_g(nspin * rhodpw->npw); - std::vector> kin_g_save(nspin * rhodpw->npw); - // FFT to get kin_g and kin_g_save - for (int is = 0; is < nspin; ++is) { - rhodpw->real2recip(chr->kin_r[is], &kin_g[is * rhodpw->npw]); - rhodpw->real2recip(chr->kin_r_save[is], &kin_g_save[is * rhodpw->npw]); + // only tranfer rhog[0] + // do not support double_grid, use rhopw directly + chr->rhopw->recip2real (chr->rhog[0], chr->rho[0]); } - // for smooth part, for ! PARAM.globalv.double_grid only have this part - std::complex*taugs_in = kin_g_save.data(), *taugs_out = kin_g.data(); - // for high frequency part - std::complex*taughf_in = nullptr, *taughf_out = nullptr; - if ( PARAM.globalv.double_grid) - { - // divide into smooth part and high_frequency part - divide_data(kin_g_save.data(), taugs_in, taughf_in); - divide_data(kin_g.data(), taugs_out, taughf_out); - } - - // Note: there is no kerker modification for tau because I'm not sure - // if we should have it. If necessary we can try it in the future. - this->mixing->push_data(this->tau_mdata, taugs_in, taugs_out, nullptr, false); - - this->mixing->mix_data(this->tau_mdata, taugs_out); - - if ( PARAM.globalv.double_grid) + else { - // simple mixing for high_frequencies - const int ndimhf = (this->rhodpw->npw - this->rhopw->npw) * nspin; - this->mixing_highf->plain_mix(taughf_out, taughf_in, taughf_out, ndimhf, nullptr); - - // combine smooth part and high_frequency part - combine_data(kin_g.data(), taugs_out, taughf_out); - clean_data(taugs_in, taughf_in); + for (int is = 0; is < nspin; is++) + { + // use rhodpw for double_grid + // rhodpw is the same as rhopw for ! PARAM.globalv.double_grid + this->rhodpw->recip_to_real, double, base_device::DEVICE_CPU> (chr->rhog[is], + chr->rho[is]); + } } - - // kin_g to kin_r - for (int is = 0; is < nspin; is++) + // For kinetic energy density + if ((XC_Functional::get_ked_flag ()) && mixing_tau) { - rhodpw->recip2real(&kin_g[is * rhodpw->npw], chr->kin_r[is]); + std::vector> kin_g (nspin * rhodpw->npw); + std::vector> kin_g_save (nspin * rhodpw->npw); + // FFT to get kin_g and kin_g_save + for (int is = 0; is < nspin; ++is) + { + rhodpw->real2recip (chr->kin_r[is], &kin_g[is * rhodpw->npw]); + rhodpw->real2recip (chr->kin_r_save[is], &kin_g_save[is * rhodpw->npw]); + } + // for smooth part, for ! PARAM.globalv.double_grid only have this part + std::complex*taugs_in = kin_g_save.data (), *taugs_out = kin_g.data (); + // for high frequency part + std::complex*taughf_in = nullptr, *taughf_out = nullptr; + if (PARAM.globalv.double_grid) + { + // divide into smooth part and high_frequency part + divide_data (kin_g_save.data (), taugs_in, taughf_in); + divide_data (kin_g.data (), taugs_out, taughf_out); + } + + // Note: there is no kerker modification for tau because I'm not sure + // if we should have it. If necessary we can try it in the future. + this->mixing->push_data (this->tau_mdata, taugs_in, taugs_out, nullptr, false); + + this->mixing->mix_data (this->tau_mdata, taugs_out); + + if (PARAM.globalv.double_grid) + { + // simple mixing for high_frequencies + const int ndimhf = (this->rhodpw->npw - this->rhopw->npw) * nspin; + this->mixing_highf->plain_mix (taughf_out, taughf_in, taughf_out, ndimhf, nullptr); + + // combine smooth part and high_frequency part + combine_data (kin_g.data (), taugs_out, taughf_out); + clean_data (taugs_in, taughf_in); + } + + // kin_g to kin_r + for (int is = 0; is < nspin; is++) + { + rhodpw->recip2real (&kin_g[is * rhodpw->npw], chr->kin_r[is]); + } } - } - ModuleBase::timer::end("Charge_Mixing", "mix_rho_recip"); + ModuleBase::timer::end ("Charge_Mixing", "mix_rho_recip"); return; } -void Charge_Mixing::mix_rho_real(Charge* chr) +void + Charge_Mixing::mix_rho_real (Charge* chr) { - ModuleBase::TITLE("Charge_Mixing", "mix_rho_real"); - ModuleBase::timer::start("Charge_Mixing", "mix_rho_real"); + ModuleBase::TITLE ("Charge_Mixing", "mix_rho_real"); + ModuleBase::timer::start ("Charge_Mixing", "mix_rho_real"); const int nspin = PARAM.inp.nspin; - assert(nspin==1 || nspin==2 || nspin==4); + assert (nspin == 1 || nspin == 2 || nspin == 4); - double* rhor_in=nullptr; - double* rhor_out=nullptr; + double* rhor_in = nullptr; + double* rhor_out = nullptr; if (nspin == 1) - { - rhor_in = chr->rho_save[0]; - rhor_out = chr->rho[0]; - auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); - this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, true); - auto inner_product - = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); - this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->rho_mdata, rhor_out); - } - else if (nspin == 2) - { - // magnetic density - double *rho_mag = nullptr; - double *rho_mag_save = nullptr; - const int nrxx = this->rhopw->nrxx; - // allocate rho_mag[is*nnrx] and rho_mag_save[is*nnrx] - rho_mag = new double[nrxx * nspin]; - rho_mag_save = new double[nrxx * nspin]; - ModuleBase::GlobalFunc::ZEROS(rho_mag, nrxx * nspin); - ModuleBase::GlobalFunc::ZEROS(rho_mag_save, nrxx * nspin); - // get rho_mag[is*nnrx] and rho_mag_save[is*nnrx] - for (int ir = 0; ir < nrxx; ir++) { - rho_mag[ir] = chr->rho[0][ir] + chr->rho[1][ir]; - rho_mag_save[ir] = chr->rho_save[0][ir] + chr->rho_save[1][ir]; + rhor_in = chr->rho_save[0]; + rhor_out = chr->rho[0]; + auto screen = std::bind (&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); + this->mixing->push_data (this->rho_mdata, rhor_in, rhor_out, screen, true); + auto inner_product + = std::bind (&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); + this->mixing->cal_coef (this->rho_mdata, inner_product); + this->mixing->mix_data (this->rho_mdata, rhor_out); } - for (int ir = 0; ir < nrxx; ir++) + else if (nspin == 2) { - rho_mag[ir + nrxx] = chr->rho[0][ir] - chr->rho[1][ir]; - rho_mag_save[ir + nrxx] = chr->rho_save[0][ir] - chr->rho_save[1][ir]; - } - // - rhor_in = rho_mag_save; - rhor_out = rho_mag; - auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); - auto twobeta_mix - = [this, nrxx](double* out, const double* in, const double* sres) { + // magnetic density + double* rho_mag = nullptr; + double* rho_mag_save = nullptr; + const int nrxx = this->rhopw->nrxx; + // allocate rho_mag[is*nnrx] and rho_mag_save[is*nnrx] + rho_mag = new double[nrxx * nspin]; + rho_mag_save = new double[nrxx * nspin]; + ModuleBase::GlobalFunc::ZEROS (rho_mag, nrxx * nspin); + ModuleBase::GlobalFunc::ZEROS (rho_mag_save, nrxx * nspin); + // get rho_mag[is*nnrx] and rho_mag_save[is*nnrx] + for (int ir = 0; ir < nrxx; ir++) + { + rho_mag[ir] = chr->rho[0][ir] + chr->rho[1][ir]; + rho_mag_save[ir] = chr->rho_save[0][ir] + chr->rho_save[1][ir]; + } + for (int ir = 0; ir < nrxx; ir++) + { + rho_mag[ir + nrxx] = chr->rho[0][ir] - chr->rho[1][ir]; + rho_mag_save[ir + nrxx] = chr->rho_save[0][ir] - chr->rho_save[1][ir]; + } + // + rhor_in = rho_mag_save; + rhor_out = rho_mag; + auto screen = std::bind (&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); + auto twobeta_mix = [this, nrxx] (double* out, const double* in, const double* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < nrxx; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - // magnetism + for (int i = 0; i < nrxx; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + // magnetism #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = nrxx; i < 2 * nrxx; ++i) - { - out[i] = in[i] + this->mixing_beta_mag * sres[i]; - } - }; - this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true); - auto inner_product - = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); - this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->rho_mdata, rhor_out); - // get new rho[is][nrxx] from rho_mag[is*nrxx] - for (int is = 0; is < nspin; is++) - { - ModuleBase::GlobalFunc::ZEROS(chr->rho[is], nrxx); - //ModuleBase::GlobalFunc::ZEROS(rho_save[is], nrxx); + for (int i = nrxx; i < 2 * nrxx; ++i) + { + out[i] = in[i] + this->mixing_beta_mag * sres[i]; + } + }; + this->mixing->push_data (this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true); + auto inner_product + = std::bind (&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); + this->mixing->cal_coef (this->rho_mdata, inner_product); + this->mixing->mix_data (this->rho_mdata, rhor_out); + // get new rho[is][nrxx] from rho_mag[is*nrxx] + for (int is = 0; is < nspin; is++) + { + ModuleBase::GlobalFunc::ZEROS (chr->rho[is], nrxx); + // ModuleBase::GlobalFunc::ZEROS(rho_save[is], nrxx); + } + for (int ir = 0; ir < nrxx; ir++) + { + chr->rho[0][ir] = 0.5 * (rho_mag[ir] + rho_mag[ir + nrxx]); + chr->rho[1][ir] = 0.5 * (rho_mag[ir] - rho_mag[ir + nrxx]); + } + // delete + delete[] rho_mag; + delete[] rho_mag_save; } - for (int ir = 0; ir < nrxx; ir++) - { - chr->rho[0][ir] = 0.5 * (rho_mag[ir] + rho_mag[ir+nrxx]); - chr->rho[1][ir] = 0.5 * (rho_mag[ir] - rho_mag[ir+nrxx]); - } - // delete - delete[] rho_mag; - delete[] rho_mag_save; - } else if (nspin == 4 && PARAM.inp.mixing_angle <= 0) - { - // normal broyden mixing for {rho, mx, my, mz} - rhor_in = chr->rho_save[0]; - rhor_out = chr->rho[0]; - const int nrxx = this->rhopw->nrxx; - auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); - auto twobeta_mix - = [this, nrxx](double* out, const double* in, const double* sres) { + { + // normal broyden mixing for {rho, mx, my, mz} + rhor_in = chr->rho_save[0]; + rhor_out = chr->rho[0]; + const int nrxx = this->rhopw->nrxx; + auto screen = std::bind (&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); + auto twobeta_mix = [this, nrxx] (double* out, const double* in, const double* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < nrxx; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - // magnetism, mx, my, mz + for (int i = 0; i < nrxx; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + // magnetism, mx, my, mz #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = nrxx; i < 4 * nrxx; ++i) - { - out[i] = in[i] + this->mixing_beta_mag * sres[i]; - } - }; - this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true); - auto inner_product - = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); - this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->rho_mdata, rhor_out); - } + for (int i = nrxx; i < 4 * nrxx; ++i) + { + out[i] = in[i] + this->mixing_beta_mag * sres[i]; + } + }; + this->mixing->push_data (this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true); + auto inner_product + = std::bind (&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); + this->mixing->cal_coef (this->rho_mdata, inner_product); + this->mixing->mix_data (this->rho_mdata, rhor_out); + } else if (nspin == 4 && PARAM.inp.mixing_angle > 0) - { - // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706 - // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed - const int nrxx = this->rhopw->nrxx; - // allocate memory for rho_magabs and rho_magabs_save - double* rho_magabs = new double[nrxx * 2]; - double* rho_magabs_save = new double[nrxx * 2]; - ModuleBase::GlobalFunc::ZEROS(rho_magabs, nrxx * 2); - ModuleBase::GlobalFunc::ZEROS(rho_magabs_save, nrxx * 2); - // calculate rho_magabs and rho_magabs_save - for (int ir = 0; ir < nrxx; ir++) { - rho_magabs[ir] = chr->rho[0][ir]; // rho - rho_magabs_save[ir] = chr->rho_save[0][ir]; // rho_save - // |m| for rho - rho_magabs[nrxx + ir] = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] - + chr->rho[2][ir] * chr->rho[2][ir] - + chr->rho[3][ir] * chr->rho[3][ir]); - // |m| for rho_save - rho_magabs_save[nrxx + ir] = std::sqrt(chr->rho_save[1][ir] * chr->rho_save[1][ir] - + chr->rho_save[2][ir] * chr->rho_save[2][ir] - + chr->rho_save[3][ir] * chr->rho_save[3][ir]); - } - rhor_in = rho_magabs_save; - rhor_out = rho_magabs; - - auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); - auto twobeta_mix - = [this, nrxx](double* out, const double* in, const double* sres) { + // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706 + // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed + const int nrxx = this->rhopw->nrxx; + // allocate memory for rho_magabs and rho_magabs_save + double* rho_magabs = new double[nrxx * 2]; + double* rho_magabs_save = new double[nrxx * 2]; + ModuleBase::GlobalFunc::ZEROS (rho_magabs, nrxx * 2); + ModuleBase::GlobalFunc::ZEROS (rho_magabs_save, nrxx * 2); + // calculate rho_magabs and rho_magabs_save + for (int ir = 0; ir < nrxx; ir++) + { + rho_magabs[ir] = chr->rho[0][ir]; // rho + rho_magabs_save[ir] = chr->rho_save[0][ir]; // rho_save + // |m| for rho + rho_magabs[nrxx + ir] + = std::sqrt (chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + + chr->rho[3][ir] * chr->rho[3][ir]); + // |m| for rho_save + rho_magabs_save[nrxx + ir] = std::sqrt (chr->rho_save[1][ir] * chr->rho_save[1][ir] + + chr->rho_save[2][ir] * chr->rho_save[2][ir] + + chr->rho_save[3][ir] * chr->rho_save[3][ir]); + } + rhor_in = rho_magabs_save; + rhor_out = rho_magabs; + + auto screen = std::bind (&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1); + auto twobeta_mix = [this, nrxx] (double* out, const double* in, const double* sres) + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = 0; i < nrxx; ++i) - { - out[i] = in[i] + this->mixing_beta * sres[i]; - } - // magnetism, |m| + for (int i = 0; i < nrxx; ++i) + { + out[i] = in[i] + this->mixing_beta * sres[i]; + } + // magnetism, |m| #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int i = nrxx; i < 2 * nrxx; ++i) - { - out[i] = in[i] + this->mixing_beta_mag * sres[i]; - } - }; - this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true); - auto inner_product - = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); - this->mixing->cal_coef(this->rho_mdata, inner_product); - this->mixing->mix_data(this->rho_mdata, rhor_out); - - // use new |m| and angle to update {mx, my, mz} - for (int ir = 0; ir < nrxx; ir++) + for (int i = nrxx; i < 2 * nrxx; ++i) + { + out[i] = in[i] + this->mixing_beta_mag * sres[i]; + } + }; + this->mixing->push_data (this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true); + auto inner_product + = std::bind (&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2); + this->mixing->cal_coef (this->rho_mdata, inner_product); + this->mixing->mix_data (this->rho_mdata, rhor_out); + + // use new |m| and angle to update {mx, my, mz} + for (int ir = 0; ir < nrxx; ir++) + { + chr->rho[0][ir] = rho_magabs[ir]; // rho + double norm = std::sqrt (chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + + chr->rho[3][ir] * chr->rho[3][ir]); + + if (norm < 1e-10) + { + continue; + } + double rescale_tmp = rho_magabs[nrxx + ir] / norm; + chr->rho[1][ir] *= rescale_tmp; + chr->rho[2][ir] *= rescale_tmp; + chr->rho[3][ir] *= rescale_tmp; + } + // delete + delete[] rho_magabs; + delete[] rho_magabs_save; + } + + double* taur_out = nullptr; + double* taur_in = nullptr; + if ((XC_Functional::get_ked_flag ()) && mixing_tau) { - chr->rho[0][ir] = rho_magabs[ir]; // rho - double norm = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] - + chr->rho[2][ir] * chr->rho[2][ir] - + chr->rho[3][ir] * chr->rho[3][ir]); - - if (norm < 1e-10) - { - continue; - } - double rescale_tmp = rho_magabs[nrxx + ir] / norm; - chr->rho[1][ir] *= rescale_tmp; - chr->rho[2][ir] *= rescale_tmp; - chr->rho[3][ir] *= rescale_tmp; + taur_in = chr->kin_r_save[0]; + taur_out = chr->kin_r[0]; + // Note: there is no kerker modification for tau because I'm not sure + // if we should have it. If necessary we can try it in the future. + this->mixing->push_data (this->tau_mdata, taur_in, taur_out, nullptr, false); + + this->mixing->mix_data (this->tau_mdata, taur_out); } - // delete - delete[] rho_magabs; - delete[] rho_magabs_save; - } - - double *taur_out=nullptr; - double *taur_in=nullptr; - if ((XC_Functional::get_ked_flag()) && mixing_tau) - { - taur_in = chr->kin_r_save[0]; - taur_out = chr->kin_r[0]; - // Note: there is no kerker modification for tau because I'm not sure - // if we should have it. If necessary we can try it in the future. - this->mixing->push_data(this->tau_mdata, taur_in, taur_out, nullptr, false); - - this->mixing->mix_data(this->tau_mdata, taur_out); - } - - ModuleBase::timer::end("Charge_Mixing", "mix_rho_real"); + + ModuleBase::timer::end ("Charge_Mixing", "mix_rho_real"); return; } - -void Charge_Mixing::mix_rho(Charge* chr) +void + Charge_Mixing::mix_rho (Charge* chr) { - ModuleBase::TITLE("Charge_Mixing", "mix_rho"); - ModuleBase::timer::start("Charge_Mixing", "mix_rho"); + ModuleBase::TITLE ("Charge_Mixing", "mix_rho"); + ModuleBase::timer::start ("Charge_Mixing", "mix_rho"); const int nspin = PARAM.inp.nspin; - assert(nspin==1 || nspin==2 || nspin==4); + assert (nspin == 1 || nspin == 2 || nspin == 4); // the charge before mixing. const int nrxx = chr->rhopw->nrxx; - std::vector rho123(nspin * nrxx); + std::vector rho123 (nspin * nrxx); for (int is = 0; is < nspin; ++is) - { - if (is == 0 || is == 3 || !PARAM.globalv.domag_z) { - double* rho123_is = rho123.data() + is * nrxx; + if (is == 0 || is == 3 || !PARAM.globalv.domag_z) + { + double* rho123_is = rho123.data () + is * nrxx; #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for(int ir = 0 ; ir < nrxx ; ++ir) - { - rho123_is[ir] = chr->rho[is][ir]; - } + for (int ir = 0; ir < nrxx; ++ir) + { + rho123_is[ir] = chr->rho[is][ir]; + } + } } - } std::vector kin_r123; - if ((XC_Functional::get_ked_flag()) && mixing_tau) - { - kin_r123.resize(nspin * nrxx); - for (int is = 0; is < nspin; ++is) + if ((XC_Functional::get_ked_flag ()) && mixing_tau) { - double* kin_r123_is = kin_r123.data() + is * nrxx; + kin_r123.resize (nspin * nrxx); + for (int is = 0; is < nspin; ++is) + { + double* kin_r123_is = kin_r123.data () + is * nrxx; #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for(int ir = 0 ; ir < nrxx ; ++ir) - { - kin_r123_is[ir] = chr->kin_r[is][ir]; - } + for (int ir = 0; ir < nrxx; ++ir) + { + kin_r123_is[ir] = chr->kin_r[is][ir]; + } + } } - } // --------------------Mixing Body-------------------- if (PARAM.inp.scf_thr_type == 1) - { - mix_rho_recip(chr); - } + { + mix_rho_recip (chr); + } else if (PARAM.inp.scf_thr_type == 2) - { - mix_rho_real(chr); - } + { + mix_rho_real (chr); + } // --------------------------------------------------- // mohan add 2012-06-05 // rho_save is the charge before mixing for (int is = 0; is < nspin; ++is) - { - if (is == 0 || is == 3 || !PARAM.globalv.domag_z) { - double* rho123_is = rho123.data() + is * nrxx; + if (is == 0 || is == 3 || !PARAM.globalv.domag_z) + { + double* rho123_is = rho123.data () + is * nrxx; #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for(int ir = 0 ; ir < nrxx ; ++ir) - { - chr->rho_save[is][ir] = rho123_is[ir]; - } + for (int ir = 0; ir < nrxx; ++ir) + { + chr->rho_save[is][ir] = rho123_is[ir]; + } + } } - } - if ((XC_Functional::get_ked_flag()) && mixing_tau) - { - for (int is = 0; is < nspin; ++is) + if ((XC_Functional::get_ked_flag ()) && mixing_tau) { - double* kin_r123_is = kin_r123.data() + is * nrxx; + for (int is = 0; is < nspin; ++is) + { + double* kin_r123_is = kin_r123.data () + is * nrxx; #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for(int ir = 0 ; ir < nrxx ; ++ir) - { - chr->kin_r_save[is][ir] = kin_r123_is[ir]; - } + for (int ir = 0; ir < nrxx; ++ir) + { + chr->kin_r_save[is][ir] = kin_r123_is[ir]; + } + } } - } - if (new_e_iteration) - { - new_e_iteration = false; - } + if (new_e_iteration) + { + new_e_iteration = false; + } - ModuleBase::timer::end("Charge_Mixing", "mix_rho"); + ModuleBase::timer::end ("Charge_Mixing", "mix_rho"); return; } diff --git a/source/source_estate/module_charge/charge_mixing_uspp.cpp b/source/source_estate/module_charge/charge_mixing_uspp.cpp index 10968cd16e6..af5ee519236 100644 --- a/source/source_estate/module_charge/charge_mixing_uspp.cpp +++ b/source/source_estate/module_charge/charge_mixing_uspp.cpp @@ -1,76 +1,83 @@ #include "charge_mixing.h" #include "source_io/module_parameter/parameter.h" -void Charge_Mixing::divide_data(std::complex* data_d, +void + Charge_Mixing::divide_data (std::complex* data_d, std::complex*& data_s, std::complex*& data_hf) { - ModuleBase::TITLE("Charge_Mixing", "divide_data"); + ModuleBase::TITLE ("Charge_Mixing", "divide_data"); if (PARAM.inp.nspin == 1) - { - data_s = data_d; - data_hf = data_d + this->rhopw->npw; - } - else - { - const int ndimd = this->rhodpw->npw; - const int ndims = this->rhopw->npw; - const int ndimhf = ndimd - ndims; - data_s = new std::complex[PARAM.inp.nspin * ndims]; - data_hf = nullptr; - if (ndimhf > 0) { - data_hf = new std::complex[PARAM.inp.nspin * ndimhf]; + data_s = data_d; + data_hf = data_d + this->rhopw->npw; } - for (int is = 0; is < PARAM.inp.nspin; ++is) + else { - std::memcpy(data_s + is * ndims, data_d + is * ndimd, ndims * sizeof(std::complex)); - std::memcpy(data_hf + is * ndimhf, data_d + is * ndimd + ndims, ndimhf * sizeof(std::complex)); + const int ndimd = this->rhodpw->npw; + const int ndims = this->rhopw->npw; + const int ndimhf = ndimd - ndims; + data_s = new std::complex[PARAM.inp.nspin * ndims]; + data_hf = nullptr; + if (ndimhf > 0) + { + data_hf = new std::complex[PARAM.inp.nspin * ndimhf]; + } + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + std::memcpy (data_s + is * ndims, data_d + is * ndimd, ndims * sizeof (std::complex)); + std::memcpy (data_hf + is * ndimhf, + data_d + is * ndimd + ndims, + ndimhf * sizeof (std::complex)); + } } - } } -void Charge_Mixing::combine_data(std::complex* data_d, +void + Charge_Mixing::combine_data (std::complex* data_d, std::complex*& data_s, std::complex*& data_hf) { - ModuleBase::TITLE("Charge_Mixing", "combine_data"); + ModuleBase::TITLE ("Charge_Mixing", "combine_data"); if (PARAM.inp.nspin == 1) - { - data_s = nullptr; - data_hf = nullptr; - return; - } + { + data_s = nullptr; + data_hf = nullptr; + return; + } else - { - const int ndimd = this->rhodpw->npw; - const int ndims = this->rhopw->npw; - const int ndimhf = ndimd - ndims; - for (int is = 0; is < PARAM.inp.nspin; ++is) { - std::memcpy(data_d + is * ndimd, data_s + is * ndims, ndims * sizeof(std::complex)); - std::memcpy(data_d + is * ndimd + ndims, data_hf + is * ndimhf, ndimhf * sizeof(std::complex)); + const int ndimd = this->rhodpw->npw; + const int ndims = this->rhopw->npw; + const int ndimhf = ndimd - ndims; + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + std::memcpy (data_d + is * ndimd, data_s + is * ndims, ndims * sizeof (std::complex)); + std::memcpy (data_d + is * ndimd + ndims, + data_hf + is * ndimhf, + ndimhf * sizeof (std::complex)); + } + delete[] data_s; + delete[] data_hf; + data_s = nullptr; + data_hf = nullptr; } - delete[] data_s; - delete[] data_hf; - data_s = nullptr; - data_hf = nullptr; - } } -void Charge_Mixing::clean_data(std::complex*& data_s, std::complex*& data_hf) +void + Charge_Mixing::clean_data (std::complex*& data_s, std::complex*& data_hf) { - ModuleBase::TITLE("Charge_Mixing", "clean_data"); + ModuleBase::TITLE ("Charge_Mixing", "clean_data"); if (PARAM.inp.nspin == 1) - { - data_s = nullptr; - data_hf = nullptr; - return; - } + { + data_s = nullptr; + data_hf = nullptr; + return; + } else - { - delete[] data_s; - delete[] data_hf; - data_s = nullptr; - data_hf = nullptr; - } + { + delete[] data_s; + delete[] data_hf; + data_s = nullptr; + data_hf = nullptr; + } } \ No newline at end of file diff --git a/source/source_estate/module_charge/charge_mpi.cpp b/source/source_estate/module_charge/charge_mpi.cpp index c178471320c..f81d06fec2e 100644 --- a/source/source_estate/module_charge/charge_mpi.cpp +++ b/source/source_estate/module_charge/charge_mpi.cpp @@ -7,156 +7,160 @@ #include "source_hamilt/module_xc/xc_functional.h" #include "source_io/module_parameter/parameter.h" #ifdef __MPI -void Charge::init_chgmpi() +void + Charge::init_chgmpi () { if (KP_WORLD == MPI_COMM_NULL) - { - delete[] rec; - rec = new int[GlobalV::NPROC_IN_POOL]; - delete[] dis; - dis = new int[GlobalV::NPROC_IN_POOL]; - - const int ncxy = this->rhopw->nx * this->rhopw->ny; - for (int ip = 0; ip < GlobalV::NPROC_IN_POOL; ip++) { - rec[ip] = this->rhopw->numz[ip] * ncxy; - dis[ip] = this->rhopw->startz[ip] * ncxy; + delete[] rec; + rec = new int[GlobalV::NPROC_IN_POOL]; + delete[] dis; + dis = new int[GlobalV::NPROC_IN_POOL]; + + const int ncxy = this->rhopw->nx * this->rhopw->ny; + for (int ip = 0; ip < GlobalV::NPROC_IN_POOL; ip++) + { + rec[ip] = this->rhopw->numz[ip] * ncxy; + dis[ip] = this->rhopw->startz[ip] * ncxy; + } } - } } -void Charge::reduce_diff_pools(double* array_rho) const +void + Charge::reduce_diff_pools (double* array_rho) const { - ModuleBase::TITLE("Charge", "reduce_diff_pools"); - ModuleBase::timer::start("Charge", "reduce_diff_pools"); + ModuleBase::TITLE ("Charge", "reduce_diff_pools"); + ModuleBase::timer::start ("Charge", "reduce_diff_pools"); if (KP_WORLD != MPI_COMM_NULL) - { - MPI_Allreduce(MPI_IN_PLACE, array_rho, this->nrxx, MPI_DOUBLE, MPI_SUM, KP_WORLD); - } - else - { - double* array_tmp = new double[this->rhopw->nxyz]; - double* array_tot = new double[this->rhopw->nxyz]; - double* array_tot_aux = new double[this->rhopw->nxyz]; - //================================== - // Collect the rho in each pool - //================================== - for (int ir = 0; ir < this->rhopw->nrxx; ++ir) { - array_tmp[ir] = array_rho[ir] / GlobalV::NPROC_IN_POOL; + MPI_Allreduce (MPI_IN_PLACE, array_rho, this->nrxx, MPI_DOUBLE, MPI_SUM, KP_WORLD); } - MPI_Allgatherv(array_tmp, this->rhopw->nrxx, MPI_DOUBLE, array_tot, rec, dis, MPI_DOUBLE, POOL_WORLD); - - const int ncxy = this->rhopw->nx * this->rhopw->ny; - for (int ip = 0; ip < GlobalV::NPROC_IN_POOL; ++ip) + else { - for (int ir = 0; ir < ncxy; ++ir) - { - for (int iz = 0; iz < this->rhopw->numz[ip]; ++iz) + double* array_tmp = new double[this->rhopw->nxyz]; + double* array_tot = new double[this->rhopw->nxyz]; + double* array_tot_aux = new double[this->rhopw->nxyz]; + //================================== + // Collect the rho in each pool + //================================== + for (int ir = 0; ir < this->rhopw->nrxx; ++ir) { - // ------------------------------------------------- - // very carefully with the order of charge density. - // the data (ir,iz) is now in processor 'ip'. - // different POOL has different ordering. - // we want to collect them in each processor - // in a unit format, - // and then reduce among all POOLS to yield - // the correct charge density. - // we know the division of 'z' is indipendent - // in each processor, so the 'unit format' - // must have no relationship with 'z' divide method. - // ------------------------------------------------- - // rot_tot_aux : suitable among all pools. - // (1) the data save along z direction. - // (2) and each element number of group 'z data' - // is 'this->rhopw->nz' - // (3) however, the data rearrange is occured - // between [ this->rhopw->startz[ip], this->rhopw->startz[ip]+this->rhopw->numz[ip] ) - // (4) this->rhopw->startz[ip] + iz yields correct z coordiante. - // ------------------------------------------------- - // rot_tot: suitable for local pool. - // (1) the data save along z direction, only - // in a small distance. - // (2) however, the number of z in each processor - // 'ip' is this->rhopw->numz[ip] - // (3) the index of data increases with the ip, - // so the data on large 'ip' processor must - // have large 'start position', which we label - // this->rhopw->startz[ip] * ncxy. - // ------------------------------------------------- - array_tot_aux[this->rhopw->nz * ir + this->rhopw->startz[ip] + iz] - = array_tot[this->rhopw->numz[ip] * ir + this->rhopw->startz[ip] * ncxy + iz]; + array_tmp[ir] = array_rho[ir] / GlobalV::NPROC_IN_POOL; + } + MPI_Allgatherv (array_tmp, this->rhopw->nrxx, MPI_DOUBLE, array_tot, rec, dis, MPI_DOUBLE, POOL_WORLD); + + const int ncxy = this->rhopw->nx * this->rhopw->ny; + for (int ip = 0; ip < GlobalV::NPROC_IN_POOL; ++ip) + { + for (int ir = 0; ir < ncxy; ++ir) + { + for (int iz = 0; iz < this->rhopw->numz[ip]; ++iz) + { + // ------------------------------------------------- + // very carefully with the order of charge density. + // the data (ir,iz) is now in processor 'ip'. + // different POOL has different ordering. + // we want to collect them in each processor + // in a unit format, + // and then reduce among all POOLS to yield + // the correct charge density. + // we know the division of 'z' is indipendent + // in each processor, so the 'unit format' + // must have no relationship with 'z' divide method. + // ------------------------------------------------- + // rot_tot_aux : suitable among all pools. + // (1) the data save along z direction. + // (2) and each element number of group 'z data' + // is 'this->rhopw->nz' + // (3) however, the data rearrange is occured + // between [ this->rhopw->startz[ip], this->rhopw->startz[ip]+this->rhopw->numz[ip] + // ) (4) this->rhopw->startz[ip] + iz yields correct z coordiante. + // ------------------------------------------------- + // rot_tot: suitable for local pool. + // (1) the data save along z direction, only + // in a small distance. + // (2) however, the number of z in each processor + // 'ip' is this->rhopw->numz[ip] + // (3) the index of data increases with the ip, + // so the data on large 'ip' processor must + // have large 'start position', which we label + // this->rhopw->startz[ip] * ncxy. + // ------------------------------------------------- + array_tot_aux[this->rhopw->nz * ir + this->rhopw->startz[ip] + iz] + = array_tot[this->rhopw->numz[ip] * ir + this->rhopw->startz[ip] * ncxy + iz]; + } + } } - } - } - //================================== - // Reduce all the rho in each cpu - //================================== - MPI_Allreduce(array_tot_aux, array_tot, this->rhopw->nxyz, MPI_DOUBLE, MPI_SUM, INT_BGROUP); + //================================== + // Reduce all the rho in each cpu + //================================== + MPI_Allreduce (array_tot_aux, array_tot, this->rhopw->nxyz, MPI_DOUBLE, MPI_SUM, INT_BGROUP); - //===================================== - // Change the order of rho in each cpu - //===================================== - for (int ir = 0; ir < ncxy; ir++) + //===================================== + // Change the order of rho in each cpu + //===================================== + for (int ir = 0; ir < ncxy; ir++) + { + for (int iz = 0; iz < this->rhopw->numz[GlobalV::RANK_IN_POOL]; iz++) + { + array_rho[this->rhopw->numz[GlobalV::RANK_IN_POOL] * ir + iz] + = array_tot[this->rhopw->nz * ir + this->rhopw->startz_current + iz]; + } + } + delete[] array_tot_aux; + delete[] array_tot; + delete[] array_tmp; + } + if (PARAM.globalv.all_ks_run && PARAM.inp.bndpar > 1) { - for (int iz = 0; iz < this->rhopw->numz[GlobalV::RANK_IN_POOL]; iz++) - { - array_rho[this->rhopw->numz[GlobalV::RANK_IN_POOL] * ir + iz] - = array_tot[this->rhopw->nz * ir + this->rhopw->startz_current + iz]; - } + MPI_Allreduce (MPI_IN_PLACE, array_rho, this->nrxx, MPI_DOUBLE, MPI_SUM, BP_WORLD); } - delete[] array_tot_aux; - delete[] array_tot; - delete[] array_tmp; - } - if(PARAM.globalv.all_ks_run && PARAM.inp.bndpar > 1) - { - MPI_Allreduce(MPI_IN_PLACE, array_rho, this->nrxx, MPI_DOUBLE, MPI_SUM, BP_WORLD); - } - ModuleBase::timer::end("Charge", "reduce_diff_pools"); + ModuleBase::timer::end ("Charge", "reduce_diff_pools"); } -void Charge::rho_mpi() +void + Charge::rho_mpi () { - ModuleBase::TITLE("Charge", "rho_mpi"); - if (GlobalV::KPAR * PARAM.inp.bndpar <= 1) - { - return; - } - ModuleBase::timer::start("Charge", "rho_mpi"); + ModuleBase::TITLE ("Charge", "rho_mpi"); + if (GlobalV::KPAR * PARAM.inp.bndpar <= 1) + { + return; + } + ModuleBase::timer::start ("Charge", "rho_mpi"); for (int is = 0; is < PARAM.inp.nspin; ++is) - { - reduce_diff_pools(this->rho[is]); - if (XC_Functional::get_ked_flag() || PARAM.inp.out_elf[0] > 0) { - reduce_diff_pools(this->kin_r[is]); + reduce_diff_pools (this->rho[is]); + if (XC_Functional::get_ked_flag () || PARAM.inp.out_elf[0] > 0) + { + reduce_diff_pools (this->kin_r[is]); + } } - } - ModuleBase::timer::end("Charge", "rho_mpi"); + ModuleBase::timer::end ("Charge", "rho_mpi"); return; } -void Charge::kin_r_mpi() +void + Charge::kin_r_mpi () { - ModuleBase::TITLE("Charge", "kin_r_mpi"); + ModuleBase::TITLE ("Charge", "kin_r_mpi"); if (GlobalV::KPAR * PARAM.inp.bndpar <= 1) - { - return; - } - ModuleBase::timer::start("Charge", "kin_r_mpi"); + { + return; + } + ModuleBase::timer::start ("Charge", "kin_r_mpi"); - if (XC_Functional::get_ked_flag() || PARAM.inp.out_elf[0] > 0) - { - for (int is = 0; is < PARAM.inp.nspin; ++is) + if (XC_Functional::get_ked_flag () || PARAM.inp.out_elf[0] > 0) { - reduce_diff_pools(this->kin_r[is]); + for (int is = 0; is < PARAM.inp.nspin; ++is) + { + reduce_diff_pools (this->kin_r[is]); + } } - } - ModuleBase::timer::end("Charge", "kin_r_mpi"); + ModuleBase::timer::end ("Charge", "kin_r_mpi"); return; } #endif diff --git a/source/source_estate/module_charge/chgmixing.cpp b/source/source_estate/module_charge/chgmixing.cpp index 45e5c5b350c..ebf0ff719d7 100644 --- a/source/source_estate/module_charge/chgmixing.cpp +++ b/source/source_estate/module_charge/chgmixing.cpp @@ -3,225 +3,222 @@ #include "source_lcao/module_dftu/dftu.h" #include "source_lcao/module_deltaspin/spin_constrain.h" -void module_charge::chgmixing_ks(const int iter, // scf iteration number - UnitCell& ucell, - elecstate::ElecState* pelec, - Charge &chr, // charge density - Charge_Mixing* p_chgmix, // charge mixing class - const int nrxx, // charge density - double &drho, // charge density deviation - bool &oscillate_esolver, // whether the esolver has oscillation of charge density - bool &conv_esolver, - const double &hsolver_error, - const double &scf_thr, - const double &scf_ene_thr, - const bool converged_u, // mohan add 2025-11-06 - const Input_para& inp) // input parameters +void + module_charge::chgmixing_ks (const int iter, // scf iteration number + UnitCell& ucell, + elecstate::ElecState* pelec, + Charge& chr, // charge density + Charge_Mixing* p_chgmix, // charge mixing class + const int nrxx, // charge density + double& drho, // charge density deviation + bool& oscillate_esolver, // whether the esolver has oscillation of charge density + bool& conv_esolver, + const double& hsolver_error, + const double& scf_thr, + const double& scf_ene_thr, + const bool converged_u, // mohan add 2025-11-06 + const Input_para& inp) // input parameters { if (PARAM.globalv.ks_run) - { - // mixing will restart at p_chgmix->mixing_restart steps - if (drho <= inp.mixing_restart && inp.mixing_restart > 0.0 - && p_chgmix->mixing_restart_step > iter) { - p_chgmix->mixing_restart_step = iter + 1; - } + // mixing will restart at p_chgmix->mixing_restart steps + if (drho <= inp.mixing_restart && inp.mixing_restart > 0.0 && p_chgmix->mixing_restart_step > iter) + { + p_chgmix->mixing_restart_step = iter + 1; + } - if (inp.scf_os_stop) // if oscillation is detected, SCF will stop - { - oscillate_esolver = p_chgmix->if_scf_oscillate(iter, drho, - inp.scf_os_ndim, inp.scf_os_thr); - } + if (inp.scf_os_stop) // if oscillation is detected, SCF will stop + { + oscillate_esolver = p_chgmix->if_scf_oscillate (iter, drho, inp.scf_os_ndim, inp.scf_os_thr); + } - // drho will be 0 at p_chgmix->mixing_restart step, which is - // not ground state - bool not_restart_step = !(iter == p_chgmix->mixing_restart_step && inp.mixing_restart > 0.0); + // drho will be 0 at p_chgmix->mixing_restart step, which is + // not ground state + bool not_restart_step = !(iter == p_chgmix->mixing_restart_step && inp.mixing_restart > 0.0); - conv_esolver = (drho < scf_thr && not_restart_step && converged_u); + conv_esolver = (drho < scf_thr && not_restart_step && converged_u); - // add energy threshold for SCF convergence - if (scf_ene_thr > 0.0) - { - // calculate energy of output charge density - elecstate::update_pot(ucell, pelec, chr, conv_esolver); - pelec->cal_energies(2); // 2 means Kohn-Sham functional - // now, etot_old is the energy of input density, while etot is the energy of output density - pelec->f_en.etot_delta = pelec->f_en.etot - pelec->f_en.etot_old; - // output etot_delta - GlobalV::ofs_running << " DeltaE_womix = " << pelec->f_en.etot_delta * ModuleBase::Ry_to_eV << " eV" - << std::endl; - if (iter > 1 && conv_esolver == 1) // only check when density is converged - { - // update the convergence flag - conv_esolver - = (std::abs(pelec->f_en.etot_delta * ModuleBase::Ry_to_eV) < scf_ene_thr); - } - } + // add energy threshold for SCF convergence + if (scf_ene_thr > 0.0) + { + // calculate energy of output charge density + elecstate::update_pot (ucell, pelec, chr, conv_esolver); + pelec->cal_energies (2); // 2 means Kohn-Sham functional + // now, etot_old is the energy of input density, while etot is the energy of output density + pelec->f_en.etot_delta = pelec->f_en.etot - pelec->f_en.etot_old; + // output etot_delta + GlobalV::ofs_running << " DeltaE_womix = " << pelec->f_en.etot_delta * ModuleBase::Ry_to_eV << " eV" + << std::endl; + if (iter > 1 && conv_esolver == 1) // only check when density is converged + { + // update the convergence flag + conv_esolver = (std::abs (pelec->f_en.etot_delta * ModuleBase::Ry_to_eV) < scf_ene_thr); + } + } - // If drho < hsolver_error in the first iter or drho < scf_thr, we - // do not change rho. - if (drho < hsolver_error || conv_esolver || inp.calculation == "nscf") - { - if (drho < hsolver_error) - { - GlobalV::ofs_warning << " drho < hsolver_error, keep " - "charge density unchanged." - << std::endl; - } - } - else - { - //----------charge mixing--------------- - // mixing will restart after p_chgmix->mixing_restart - // steps - if (inp.mixing_restart > 0 && iter == p_chgmix->mixing_restart_step - 1 - && drho <= inp.mixing_restart) - { - // do not mix charge density - } + // If drho < hsolver_error in the first iter or drho < scf_thr, we + // do not change rho. + if (drho < hsolver_error || conv_esolver || inp.calculation == "nscf") + { + if (drho < hsolver_error) + { + GlobalV::ofs_warning << " drho < hsolver_error, keep " + "charge density unchanged." + << std::endl; + } + } else - { - p_chgmix->mix_rho(&chr); // update chr->rho by mixing - } - if (inp.scf_thr_type == 2) - { - chr.renormalize_rho(); // renormalize rho in R-space would - // induce a error in K-space - } - //----------charge mixing done----------- + { + //----------charge mixing--------------- + // mixing will restart after p_chgmix->mixing_restart + // steps + if (inp.mixing_restart > 0 && iter == p_chgmix->mixing_restart_step - 1 + && drho <= inp.mixing_restart) + { + // do not mix charge density + } + else + { + p_chgmix->mix_rho (&chr); // update chr->rho by mixing + } + if (inp.scf_thr_type == 2) + { + chr.renormalize_rho (); // renormalize rho in R-space would + // induce a error in K-space + } + //----------charge mixing done----------- + } } - } #ifdef __MPI - MPI_Bcast(&drho, 1, MPI_DOUBLE, 0, BP_WORLD); + MPI_Bcast (&drho, 1, MPI_DOUBLE, 0, BP_WORLD); // change MPI_DOUBLE to MPI_C_BOOL, mohan 2025-04-13 - MPI_Bcast(&conv_esolver, 1, MPI_C_BOOL, 0, BP_WORLD); + MPI_Bcast (&conv_esolver, 1, MPI_C_BOOL, 0, BP_WORLD); - assert(nrxx>=0); // mohan add 2025-10-18 - MPI_Bcast(chr.rho[0], nrxx, MPI_DOUBLE, 0, BP_WORLD); + assert (nrxx >= 0); // mohan add 2025-10-18 + MPI_Bcast (chr.rho[0], nrxx, MPI_DOUBLE, 0, BP_WORLD); #endif // mohan move the following code here, 2025-10-18 // SCF restart information - if (PARAM.inp.mixing_restart > 0 - && iter == p_chgmix->mixing_restart_step - 1 - && iter != PARAM.inp.scf_nmax) - { - p_chgmix->mixing_restart_last = iter; - std::cout << " SCF restart after this step!" << std::endl; - } + if (PARAM.inp.mixing_restart > 0 && iter == p_chgmix->mixing_restart_step - 1 && iter != PARAM.inp.scf_nmax) + { + p_chgmix->mixing_restart_last = iter; + std::cout << " SCF restart after this step!" << std::endl; + } return; } - -void module_charge::chgmixing_ks_pw(const int iter, // scf iteration number - Charge_Mixing* p_chgmix, // charge mixing class - Plus_U &dftu, // mohan add 2025-11-06 - const Input_para& inp) // input parameters +void + module_charge::chgmixing_ks_pw (const int iter, // scf iteration number + Charge_Mixing* p_chgmix, // charge mixing class + Plus_U& dftu, // mohan add 2025-11-06 + const Input_para& inp) // input parameters { - ModuleBase::TITLE("module_charge", "chgmixing_ks_pw"); + ModuleBase::TITLE ("module_charge", "chgmixing_ks_pw"); if (iter == 1) - { - p_chgmix->init_mixing(); - p_chgmix->mixing_restart_step = inp.scf_nmax + 1; - } + { + p_chgmix->init_mixing (); + p_chgmix->mixing_restart_step = inp.scf_nmax + 1; + } // For mixing restart if (iter == p_chgmix->mixing_restart_step && inp.mixing_restart > 0.0) - { - p_chgmix->init_mixing(); - p_chgmix->mixing_restart_count++; - - if (inp.dft_plus_u) { - if (dftu.uramping > 0.01 && !dftu.u_converged()) - { - p_chgmix->mixing_restart_step = inp.scf_nmax + 1; - } - if (dftu.uramping > 0.01) - { - bool do_uramping = true; - if (inp.sc_mag_switch) + p_chgmix->init_mixing (); + p_chgmix->mixing_restart_count++; + + if (inp.dft_plus_u) { - spinconstrain::SpinConstrain>& sc - = spinconstrain::SpinConstrain>::getScInstance(); - if (!sc.mag_converged()) // skip uramping if mag not converged - { - do_uramping = false; - } - } - if (do_uramping) - { - dftu.uramping_update(); // update U by uramping if uramping > 0.01 - std::cout << " U-Ramping! Current U = "; - for (int i = 0; i < dftu.U0.size(); i++) - { - std::cout << dftu.U[i] * ModuleBase::Ry_to_eV << " "; - } - std::cout << " eV " << std::endl; - } - } - } - } + if (dftu.uramping > 0.01 && !dftu.u_converged ()) + { + p_chgmix->mixing_restart_step = inp.scf_nmax + 1; + } + if (dftu.uramping > 0.01) + { + bool do_uramping = true; + if (inp.sc_mag_switch) + { + spinconstrain::SpinConstrain>& sc + = spinconstrain::SpinConstrain>::getScInstance (); + if (!sc.mag_converged ()) // skip uramping if mag not converged + { + do_uramping = false; + } + } + if (do_uramping) + { + dftu.uramping_update (); // update U by uramping if uramping > 0.01 + std::cout << " U-Ramping! Current U = "; + for (int i = 0; i < dftu.U0.size (); i++) + { + std::cout << dftu.U[i] * ModuleBase::Ry_to_eV << " "; + } + std::cout << " eV " << std::endl; + } + } + } + } return; } -void module_charge::chgmixing_ks_lcao(const int iter, // scf iteration number - Charge_Mixing* p_chgmix, // charge mixing class - Plus_U &dftu, // mohan add 2025-11-06 - const int nnr, // dimension of density matrix - const Input_para& inp) // input parameters +void + module_charge::chgmixing_ks_lcao (const int iter, // scf iteration number + Charge_Mixing* p_chgmix, // charge mixing class + Plus_U& dftu, // mohan add 2025-11-06 + const int nnr, // dimension of density matrix + const Input_para& inp) // input parameters { - ModuleBase::TITLE("module_charge", "chgmixing_ks_lcao"); + ModuleBase::TITLE ("module_charge", "chgmixing_ks_lcao"); if (iter == 1) - { - p_chgmix->mix_reset(); // init mixing - p_chgmix->mixing_restart_step = inp.scf_nmax + 1; - p_chgmix->mixing_restart_count = 0; - // this output will be removed once the feeature is stable - if (dftu.uramping > 0.01) { - std::cout << " U-Ramping! Current U = "; - for (int i = 0; i < dftu.U0.size(); i++) - { - std::cout << dftu.U[i] * ModuleBase::Ry_to_eV << " "; - } - std::cout << " eV " << std::endl; + p_chgmix->mix_reset (); // init mixing + p_chgmix->mixing_restart_step = inp.scf_nmax + 1; + p_chgmix->mixing_restart_count = 0; + // this output will be removed once the feeature is stable + if (dftu.uramping > 0.01) + { + std::cout << " U-Ramping! Current U = "; + for (int i = 0; i < dftu.U0.size (); i++) + { + std::cout << dftu.U[i] * ModuleBase::Ry_to_eV << " "; + } + std::cout << " eV " << std::endl; + } } - } // for mixing restart if (iter == p_chgmix->mixing_restart_step && inp.mixing_restart > 0.0) - { - p_chgmix->init_mixing(); - p_chgmix->mixing_restart_count++; - if (inp.dft_plus_u) { - dftu.uramping_update(); // update U by uramping if uramping > 0.01 - if (dftu.uramping > 0.01) - { - std::cout << " U-Ramping! Current U = "; - for (int i = 0; i < dftu.U0.size(); i++) + p_chgmix->init_mixing (); + p_chgmix->mixing_restart_count++; + if (inp.dft_plus_u) { - std::cout << dftu.U[i] * ModuleBase::Ry_to_eV << " "; + dftu.uramping_update (); // update U by uramping if uramping > 0.01 + if (dftu.uramping > 0.01) + { + std::cout << " U-Ramping! Current U = "; + for (int i = 0; i < dftu.U0.size (); i++) + { + std::cout << dftu.U[i] * ModuleBase::Ry_to_eV << " "; + } + std::cout << " eV " << std::endl; + } + if (dftu.uramping > 0.01 && !dftu.u_converged ()) + { + p_chgmix->mixing_restart_step = inp.scf_nmax + 1; + } + } + if (inp.mixing_dmr) // for mixing_dmr + { + // allocate memory for dmr_mdata + p_chgmix->allocate_mixing_dmr (nnr); } - std::cout << " eV " << std::endl; - } - if (dftu.uramping > 0.01 && !dftu.u_converged()) - { - p_chgmix->mixing_restart_step = inp.scf_nmax + 1; - } - } - if (inp.mixing_dmr) // for mixing_dmr - { - // allocate memory for dmr_mdata - p_chgmix->allocate_mixing_dmr(nnr); } - } } diff --git a/source/source_estate/module_charge/chgmixing.h b/source/source_estate/module_charge/chgmixing.h index 4a04a0880d0..a9cb4a33d95 100644 --- a/source/source_estate/module_charge/chgmixing.h +++ b/source/source_estate/module_charge/chgmixing.h @@ -1,9 +1,9 @@ #ifndef CHGMIXING_H #define CHGMIXING_H -#include "source_estate/elecstate.h" // use pelec -#include "source_estate/module_charge/charge.h" // use chr -#include "source_estate/module_charge/charge_mixing.h" // use p_chgmix +#include "source_estate/elecstate.h" // use pelec +#include "source_estate/module_charge/charge.h" // use chr +#include "source_estate/module_charge/charge_mixing.h" // use p_chgmix #include "source_io/module_parameter/input_parameter.h" // use Input_para #include "source_cell/unitcell.h" #include "source_lcao/module_dftu/dftu.h" // mohan add 2025-11-06 @@ -11,33 +11,32 @@ namespace module_charge { -void chgmixing_ks(const int iter, // scf iteration number - UnitCell& ucell, - elecstate::ElecState* pelec, - Charge &chr, // charge density - Charge_Mixing* p_chgmix, // charge mixing class - const int nrxx, // charge density - double &drho, // charge density deviation - bool &oscillate_esolver, // whether the esolver has oscillation of charge density - bool &conv_esolver, - const double &hsolver_error, - const double &scf_thr, - const double &scf_ene_thr, - const bool converged_u, // mohan add 2025-11-06 - const Input_para& inp); // input parameters - -void chgmixing_ks_pw(const int iter, - Charge_Mixing* p_chgmix, - Plus_U &dftu, // mohan add 2025-11-06 - const Input_para& inp); // input parameters - -void chgmixing_ks_lcao(const int iter, // scf iteration number - Charge_Mixing* p_chgmix, // charge mixing class - Plus_U &dftu, // mohan add 2025-11-06 - const int nnr, // dimension of density matrix - const Input_para& inp); // input parameters - -} - +void chgmixing_ks (const int iter, // scf iteration number + UnitCell& ucell, + elecstate::ElecState* pelec, + Charge& chr, // charge density + Charge_Mixing* p_chgmix, // charge mixing class + const int nrxx, // charge density + double& drho, // charge density deviation + bool& oscillate_esolver, // whether the esolver has oscillation of charge density + bool& conv_esolver, + const double& hsolver_error, + const double& scf_thr, + const double& scf_ene_thr, + const bool converged_u, // mohan add 2025-11-06 + const Input_para& inp); // input parameters + +void chgmixing_ks_pw (const int iter, + Charge_Mixing* p_chgmix, + Plus_U& dftu, // mohan add 2025-11-06 + const Input_para& inp); // input parameters + +void chgmixing_ks_lcao (const int iter, // scf iteration number + Charge_Mixing* p_chgmix, // charge mixing class + Plus_U& dftu, // mohan add 2025-11-06 + const int nnr, // dimension of density matrix + const Input_para& inp); // input parameters + +} // namespace module_charge #endif diff --git a/source/source_estate/module_charge/gint_precision_controller.cpp b/source/source_estate/module_charge/gint_precision_controller.cpp index d3389059042..4fa57261b4a 100644 --- a/source/source_estate/module_charge/gint_precision_controller.cpp +++ b/source/source_estate/module_charge/gint_precision_controller.cpp @@ -2,47 +2,51 @@ #include -void GintPrecisionController::set_mode(const std::string& precision_mode) +void + GintPrecisionController::set_mode (const std::string& precision_mode) { - this->mode_ = parse_mode_(precision_mode); + this->mode_ = parse_mode_ (precision_mode); } -GintPrecisionController::PrecisionMode GintPrecisionController::parse_mode_(const std::string& precision_mode) +GintPrecisionController::PrecisionMode + GintPrecisionController::parse_mode_ (const std::string& precision_mode) { if (precision_mode == "single") - { - return PrecisionMode::single; - } + { + return PrecisionMode::single; + } if (precision_mode == "mix") - { - return PrecisionMode::mix; - } + { + return PrecisionMode::mix; + } return PrecisionMode::double_mode; } -void GintPrecisionController::reset_for_new_scf() +void + GintPrecisionController::reset_for_new_scf () { switch (this->mode_) - { - case PrecisionMode::single: - case PrecisionMode::mix: - this->current_precision_ = ModuleGint::GintPrecision::fp32; - this->locked_double_precision_ = false; - break; - case PrecisionMode::double_mode: - default: - this->current_precision_ = ModuleGint::GintPrecision::fp64; - this->locked_double_precision_ = true; - break; - } + { + case PrecisionMode::single: + case PrecisionMode::mix: + this->current_precision_ = ModuleGint::GintPrecision::fp32; + this->locked_double_precision_ = false; + break; + case PrecisionMode::double_mode: + default: + this->current_precision_ = ModuleGint::GintPrecision::fp64; + this->locked_double_precision_ = true; + break; + } } -bool GintPrecisionController::update_after_iteration(double drho, double scf_thr) +bool + GintPrecisionController::update_after_iteration (double drho, double scf_thr) { if (this->locked_double_precision_ || this->mode_ != PrecisionMode::mix) - { - return false; - } + { + return false; + } // Switch from fp32 to fp64 when drho is close enough to the target. // fp32 has ~7 significant digits (~1e-7 relative error), so we switch @@ -51,17 +55,18 @@ bool GintPrecisionController::update_after_iteration(double drho, double scf_thr // scf_thr is extremely tight. constexpr double kSwitchFactor = 1000.0; constexpr double kMinSwitchThreshold = 1.0e-5; - const double switch_thr = std::max(kSwitchFactor * scf_thr, kMinSwitchThreshold); + const double switch_thr = std::max (kSwitchFactor * scf_thr, kMinSwitchThreshold); if (drho <= switch_thr) - { - this->current_precision_ = ModuleGint::GintPrecision::fp64; - this->locked_double_precision_ = true; - return true; - } + { + this->current_precision_ = ModuleGint::GintPrecision::fp64; + this->locked_double_precision_ = true; + return true; + } return false; } -ModuleGint::GintPrecision GintPrecisionController::current_precision() const +ModuleGint::GintPrecision + GintPrecisionController::current_precision () const { return this->current_precision_; } diff --git a/source/source_estate/module_charge/gint_precision_controller.h b/source/source_estate/module_charge/gint_precision_controller.h index 9b18708235c..a2e7ff63540 100644 --- a/source/source_estate/module_charge/gint_precision_controller.h +++ b/source/source_estate/module_charge/gint_precision_controller.h @@ -8,16 +8,16 @@ class GintPrecisionController { public: - GintPrecisionController() = default; + GintPrecisionController () = default; - void set_mode(const std::string& precision_mode); + void set_mode (const std::string& precision_mode); - void reset_for_new_scf(); + void reset_for_new_scf (); /// Returns true if precision switched from fp32 to fp64 in this call. - bool update_after_iteration(double drho, double scf_thr); + bool update_after_iteration (double drho, double scf_thr); - ModuleGint::GintPrecision current_precision() const; + ModuleGint::GintPrecision current_precision () const; private: enum class PrecisionMode @@ -27,7 +27,7 @@ class GintPrecisionController mix }; - static PrecisionMode parse_mode_(const std::string& precision_mode); + static PrecisionMode parse_mode_ (const std::string& precision_mode); ModuleGint::GintPrecision current_precision_ = ModuleGint::GintPrecision::fp64; PrecisionMode mode_ = PrecisionMode::double_mode; diff --git a/source/source_estate/module_charge/symmetry_rho.cpp b/source/source_estate/module_charge/symmetry_rho.cpp index 86abcfa3e67..6b8bd394680 100644 --- a/source/source_estate/module_charge/symmetry_rho.cpp +++ b/source/source_estate/module_charge/symmetry_rho.cpp @@ -2,70 +2,69 @@ #include "source_hamilt/module_xc/xc_functional.h" -Symmetry_rho::Symmetry_rho() -{ -} +Symmetry_rho::Symmetry_rho () {} -Symmetry_rho::~Symmetry_rho() -{ -} +Symmetry_rho::~Symmetry_rho () {} -void Symmetry_rho::symmetrize_rho(const int nspin, - const Charge& chr, - const ModulePW::PW_Basis* pw, - ModuleSymmetry::Symmetry& symm) +void + Symmetry_rho::symmetrize_rho (const int nspin, + const Charge& chr, + const ModulePW::PW_Basis* pw, + ModuleSymmetry::Symmetry& symm) { Symmetry_rho srho; for (int is = 0; is < nspin; is++) - { - srho.begin(is, chr, pw, symm); - } + { + srho.begin (is, chr, pw, symm); + } } -void Symmetry_rho::begin(const int& spin_now, +void + Symmetry_rho::begin (const int& spin_now, const Charge& chr, const ModulePW::PW_Basis* rho_basis, ModuleSymmetry::Symmetry& symm) const { - assert(spin_now < 4); // added by zhengdy-soc + assert (spin_now < 4); // added by zhengdy-soc - if (ModuleSymmetry::Symmetry::symm_flag != 1) - { - return; - } + if (ModuleSymmetry::Symmetry::symm_flag != 1) + { + return; + } - ModuleBase::TITLE("Symmetry_rho", "begin"); - ModuleBase::timer::start("Symmetry_rho","begin"); + ModuleBase::TITLE ("Symmetry_rho", "begin"); + ModuleBase::timer::start ("Symmetry_rho", "begin"); -// both parallel and serial -// if(symm.nrot==symm.nrotk) //pure point-group, do rho_symm in real space -// { -// psymm(chr.rho[spin_now], rho_basis, Pgrid, symm); -// if(XC_Functional::get_ked_flag()) psymm(chr.kin_r[spin_now], -// rho_basis,Pgrid,symm); -// } -// else //space group, do rho_symm in reciprocal space + // both parallel and serial + // if(symm.nrot==symm.nrotk) //pure point-group, do rho_symm in real space + // { + // psymm(chr.rho[spin_now], rho_basis, Pgrid, symm); + // if(XC_Functional::get_ked_flag()) psymm(chr.kin_r[spin_now], + // rho_basis,Pgrid,symm); + // } + // else //space group, do rho_symm in reciprocal space - rho_basis->real2recip(chr.rho[spin_now], chr.rhog[spin_now]); + rho_basis->real2recip (chr.rho[spin_now], chr.rhog[spin_now]); - psymmg(chr.rhog[spin_now], rho_basis, symm); // need to modify + psymmg (chr.rhog[spin_now], rho_basis, symm); // need to modify - rho_basis->recip2real(chr.rhog[spin_now], chr.rho[spin_now]); + rho_basis->recip2real (chr.rhog[spin_now], chr.rho[spin_now]); - if (XC_Functional::get_ked_flag() || chr.cal_elf) - { - // Use std::vector to manage kin_g instead of raw pointer - std::vector> kin_g(chr.ngmc); - rho_basis->real2recip(chr.kin_r[spin_now], kin_g.data()); - psymmg(kin_g.data(), rho_basis, symm); - rho_basis->recip2real(kin_g.data(), chr.kin_r[spin_now]); - } + if (XC_Functional::get_ked_flag () || chr.cal_elf) + { + // Use std::vector to manage kin_g instead of raw pointer + std::vector> kin_g (chr.ngmc); + rho_basis->real2recip (chr.kin_r[spin_now], kin_g.data ()); + psymmg (kin_g.data (), rho_basis, symm); + rho_basis->recip2real (kin_g.data (), chr.kin_r[spin_now]); + } - ModuleBase::timer::end("Symmetry_rho","begin"); + ModuleBase::timer::end ("Symmetry_rho", "begin"); return; } -void Symmetry_rho::begin(const int& spin_now, +void + Symmetry_rho::begin (const int& spin_now, double** rho, std::complex** rhog, int ngmc, @@ -73,15 +72,15 @@ void Symmetry_rho::begin(const int& spin_now, const ModulePW::PW_Basis* rho_basis, ModuleSymmetry::Symmetry& symm) const { - assert(spin_now < 4); // added by zhengdy-soc + assert (spin_now < 4); // added by zhengdy-soc if (ModuleSymmetry::Symmetry::symm_flag != 1) - { - return; - } + { + return; + } - ModuleBase::TITLE("Symmetry_rho", "begin"); - ModuleBase::timer::start("Symmetry_rho","begin"); + ModuleBase::TITLE ("Symmetry_rho", "begin"); + ModuleBase::timer::start ("Symmetry_rho", "begin"); // both parallel and serial // if(symm.nrot==symm.nrotk) //pure point-group, do rho_symm in real space @@ -92,71 +91,72 @@ void Symmetry_rho::begin(const int& spin_now, // } // else //space group, do rho_symm in reciprocal space { - rho_basis->real2recip(rho[spin_now], rhog[spin_now]); - psymmg(rhog[spin_now], rho_basis, symm); - rho_basis->recip2real(rhog[spin_now], rho[spin_now]); + rho_basis->real2recip (rho[spin_now], rhog[spin_now]); + psymmg (rhog[spin_now], rho_basis, symm); + rho_basis->recip2real (rhog[spin_now], rho[spin_now]); - if (XC_Functional::get_ked_flag()) - { - // Use std::vector to manage kin_g instead of raw pointer - std::vector> kin_g(ngmc); - rho_basis->real2recip(kin_r[spin_now], kin_g.data()); - psymmg(kin_g.data(), rho_basis, symm); - rho_basis->recip2real(kin_g.data(), kin_r[spin_now]); - } + if (XC_Functional::get_ked_flag ()) + { + // Use std::vector to manage kin_g instead of raw pointer + std::vector> kin_g (ngmc); + rho_basis->real2recip (kin_r[spin_now], kin_g.data ()); + psymmg (kin_g.data (), rho_basis, symm); + rho_basis->recip2real (kin_g.data (), kin_r[spin_now]); + } } - ModuleBase::timer::end("Symmetry_rho","begin"); + ModuleBase::timer::end ("Symmetry_rho", "begin"); return; } -void Symmetry_rho::psymm(double* rho_part, +void + Symmetry_rho::psymm (double* rho_part, const ModulePW::PW_Basis* rho_basis, Parallel_Grid& Pgrid, ModuleSymmetry::Symmetry& symm) const { - ModuleBase::TITLE("Symmetry_rho", "psymm"); - ModuleBase::timer::start("Symmetry_rho","psymm"); + ModuleBase::TITLE ("Symmetry_rho", "psymm"); + ModuleBase::timer::start ("Symmetry_rho", "psymm"); #ifdef __MPI // reduce all rho from the first pool. std::vector rhotot; if (GlobalV::MY_RANK == 0) - { - rhotot.resize(rho_basis->nxyz); - ModuleBase::GlobalFunc::ZEROS(rhotot.data(), rho_basis->nxyz); - } - Pgrid.reduce(rhotot.data(), rho_part, false); + { + rhotot.resize (rho_basis->nxyz); + ModuleBase::GlobalFunc::ZEROS (rhotot.data (), rho_basis->nxyz); + } + Pgrid.reduce (rhotot.data (), rho_part, false); if (GlobalV::MY_RANK == 0) - { - symm.rho_symmetry(rhotot.data(), rho_basis->nx, rho_basis->ny, rho_basis->nz); + { + symm.rho_symmetry (rhotot.data (), rho_basis->nx, rho_basis->ny, rho_basis->nz); #else - symm.rho_symmetry(rho_part, rho_basis->nx, rho_basis->ny, rho_basis->nz); + symm.rho_symmetry (rho_part, rho_basis->nx, rho_basis->ny, rho_basis->nz); #endif - /* - int count = 0; - GlobalV::ofs_running << scientific; - for(int iz=0; iznz; iz++) - { - GlobalV::ofs_running << "\n iz=" << iz; - for(int iy=0; iyny; iy++) + /* + int count = 0; + GlobalV::ofs_running << scientific; + for(int iz=0; iznz; iz++) { - for(int ix=0; ixnx; ix++) + GlobalV::ofs_running << "\n iz=" << iz; + for(int iy=0; iyny; iy++) { - if(count%5==0) GlobalV::ofs_running << "\n"; - ++count; - GlobalV::ofs_running << " " << rhotot[ix*rho_basis->ny*rho_basis->nz+iy*rho_basis->nz+iz]; + for(int ix=0; ixnx; ix++) + { + if(count%5==0) GlobalV::ofs_running << "\n"; + ++count; + GlobalV::ofs_running << " " << rhotot[ix*rho_basis->ny*rho_basis->nz+iy*rho_basis->nz+iz]; + } } } - } - */ + */ #ifdef __MPI - } + } - Pgrid.bcast(rhotot.data(), rho_part, GlobalV::MY_RANK); + Pgrid.bcast (rhotot.data (), rho_part, GlobalV::MY_RANK); #endif - ModuleBase::timer::end("Symmetry_rho","psymm"); + ModuleBase::timer::end ("Symmetry_rho", "psymm"); return; } diff --git a/source/source_estate/module_charge/symmetry_rho.h b/source/source_estate/module_charge/symmetry_rho.h index 98d06501677..cc86ca5d1fa 100644 --- a/source/source_estate/module_charge/symmetry_rho.h +++ b/source/source_estate/module_charge/symmetry_rho.h @@ -8,63 +8,63 @@ class Symmetry_rho { public: - Symmetry_rho(); - ~Symmetry_rho(); + Symmetry_rho (); + ~Symmetry_rho (); /** * @brief Symmetrize charge density for all spin channels - * + * * This is a static helper function that symmetrizes the charge density * for all spin channels by calling begin() for each spin. - * + * * @param nspin Number of spin channels * @param chr Charge object containing the density * @param pw Plane wave basis * @param symm Symmetry object */ - static void symmetrize_rho(const int nspin, - const Charge& chr, - const ModulePW::PW_Basis* pw, - ModuleSymmetry::Symmetry& symm); + static void symmetrize_rho (const int nspin, + const Charge& chr, + const ModulePW::PW_Basis* pw, + ModuleSymmetry::Symmetry& symm); - void begin(const int& spin_now, - const Charge& CHR, - const ModulePW::PW_Basis* pw, - ModuleSymmetry::Symmetry& symm) const; + void begin (const int& spin_now, + const Charge& CHR, + const ModulePW::PW_Basis* pw, + ModuleSymmetry::Symmetry& symm) const; - void begin(const int& spin_now, - double** rho, - std::complex** rhog, - int ngmc, - double** kin_r, - const ModulePW::PW_Basis* pw, - ModuleSymmetry::Symmetry& symm) const; + void begin (const int& spin_now, + double** rho, + std::complex** rhog, + int ngmc, + double** kin_r, + const ModulePW::PW_Basis* pw, + ModuleSymmetry::Symmetry& symm) const; private: // in real space: - void psymm(double* rho_part, - const ModulePW::PW_Basis* pw, - Parallel_Grid& Pgrid, - ModuleSymmetry::Symmetry& symm) const; - // in reciprocal space: - void psymmg(std::complex* rhog_part, - const ModulePW::PW_Basis* rho_basis, + void psymm (double* rho_part, + const ModulePW::PW_Basis* pw, + Parallel_Grid& Pgrid, ModuleSymmetry::Symmetry& symm) const; + // in reciprocal space: + void psymmg (std::complex* rhog_part, + const ModulePW::PW_Basis* rho_basis, + ModuleSymmetry::Symmetry& symm) const; #ifdef __MPI - void reduce_to_fullrhog(const ModulePW::PW_Basis* rho_basis, + void reduce_to_fullrhog (const ModulePW::PW_Basis* rho_basis, + std::complex* rhogtot, + std::complex* rhogin, + int* ig2isztot, + const int* ig2iszin, + int max_npw) const; + void rhog_piece_to_all (const ModulePW::PW_Basis* rho_basis, std::complex* rhogtot, - std::complex* rhogin, - int* ig2isztot, - const int* ig2iszin, - int max_npw) const; - void rhog_piece_to_all(const ModulePW::PW_Basis* rho_basis, - std::complex* rhogtot, - std::complex* rhog_part) const; + std::complex* rhog_part) const; #endif - void get_ixyz2ipw(const ModulePW::PW_Basis* rho_basis, - const int* ig2isztot, - const int* fftixy2is, - int* ixyz2ipw) const; //(ix, iy, iz) -> (ip, ig) + void get_ixyz2ipw (const ModulePW::PW_Basis* rho_basis, + const int* ig2isztot, + const int* fftixy2is, + int* ixyz2ipw) const; //(ix, iy, iz) -> (ip, ig) }; #endif diff --git a/source/source_estate/module_charge/symmetry_rhog.cpp b/source/source_estate/module_charge/symmetry_rhog.cpp index 3a537560bbc..7f6de9a23eb 100644 --- a/source/source_estate/module_charge/symmetry_rhog.cpp +++ b/source/source_estate/module_charge/symmetry_rhog.cpp @@ -3,235 +3,296 @@ #include "source_base/parallel_global.h" #include "source_hamilt/module_xc/xc_functional.h" - -void Symmetry_rho::psymmg(std::complex* rhog_part, const ModulePW::PW_Basis *rho_basis, ModuleSymmetry::Symmetry &symm) const -{ - //(1) get fftixy2is and do Allreduce - int * fftixy2is = new int [rho_basis->fftnxy]; - rho_basis->getfftixy2is(fftixy2is); //current proc +void + Symmetry_rho::psymmg (std::complex* rhog_part, + const ModulePW::PW_Basis* rho_basis, + ModuleSymmetry::Symmetry& symm) const +{ + //(1) get fftixy2is and do Allreduce + int* fftixy2is = new int[rho_basis->fftnxy]; + rho_basis->getfftixy2is (fftixy2is); // current proc #ifdef __MPI - Parallel_Reduce::reduce_pool(fftixy2is, rho_basis->fftnxy); - if(rho_basis->poolnproc>1) - for (int i=0;ifftnxy;++i) - fftixy2is[i]+=rho_basis->poolnproc-1; - - // (2) reduce all rho from the first pool. - std::complex* rhogtot; - int* ig2isztot = nullptr; - if(GlobalV::RANK_IN_POOL == 0) - { - rhogtot = new std::complex[rho_basis->npwtot]; - ModuleBase::GlobalFunc::ZEROS(rhogtot, rho_basis->npwtot); - ig2isztot = new int[rho_basis->npwtot]; - ModuleBase::GlobalFunc::ZEROS(rhogtot, rho_basis->npwtot); - } - // find max_npw - int max_npw=0; - for (int proc = 0; proc < rho_basis->poolnproc; ++proc) - { - if(rho_basis->npw_per[proc] > max_npw) - { - max_npw=rho_basis->npw_per[proc]; - } - } - this->reduce_to_fullrhog(rho_basis, rhogtot, rhog_part, ig2isztot, rho_basis->ig2isz, max_npw); - - // (3) get ixy2ipw and do rhog_symmetry on proc 0 of each pool - if(GlobalV::RANK_IN_POOL==0) - { + Parallel_Reduce::reduce_pool (fftixy2is, rho_basis->fftnxy); + if (rho_basis->poolnproc > 1) + for (int i = 0; i < rho_basis->fftnxy; ++i) + fftixy2is[i] += rho_basis->poolnproc - 1; + + // (2) reduce all rho from the first pool. + std::complex* rhogtot; + int* ig2isztot = nullptr; + if (GlobalV::RANK_IN_POOL == 0) + { + rhogtot = new std::complex[rho_basis->npwtot]; + ModuleBase::GlobalFunc::ZEROS (rhogtot, rho_basis->npwtot); + ig2isztot = new int[rho_basis->npwtot]; + ModuleBase::GlobalFunc::ZEROS (rhogtot, rho_basis->npwtot); + } + // find max_npw + int max_npw = 0; + for (int proc = 0; proc < rho_basis->poolnproc; ++proc) + { + if (rho_basis->npw_per[proc] > max_npw) + { + max_npw = rho_basis->npw_per[proc]; + } + } + this->reduce_to_fullrhog (rho_basis, rhogtot, rhog_part, ig2isztot, rho_basis->ig2isz, max_npw); + + // (3) get ixy2ipw and do rhog_symmetry on proc 0 of each pool + if (GlobalV::RANK_IN_POOL == 0) + { #endif - //init ixyz2ipw - int* ixyz2ipw = new int[rho_basis->fftnxyz]; - for(int i=0;ifftnxyz;++i) ixyz2ipw[i]=-1; + // init ixyz2ipw + int* ixyz2ipw = new int[rho_basis->fftnxyz]; + for (int i = 0; i < rho_basis->fftnxyz; ++i) + { + ixyz2ipw[i] = -1; + } #ifdef __MPI - this->get_ixyz2ipw(rho_basis, ig2isztot, fftixy2is, ixyz2ipw); - symm.rhog_symmetry(rhogtot, ixyz2ipw, rho_basis->nx, rho_basis->ny, rho_basis->nz, - rho_basis->fftnx, rho_basis->fftny, rho_basis->fftnz); + this->get_ixyz2ipw (rho_basis, ig2isztot, fftixy2is, ixyz2ipw); + symm.rhog_symmetry (rhogtot, + ixyz2ipw, + rho_basis->nx, + rho_basis->ny, + rho_basis->nz, + rho_basis->fftnx, + rho_basis->fftny, + rho_basis->fftnz); #else - this->get_ixyz2ipw(rho_basis, rho_basis->ig2isz, fftixy2is, ixyz2ipw); - symm.rhog_symmetry(rhog_part, ixyz2ipw, rho_basis->nx, rho_basis->ny, rho_basis->nz, - rho_basis->fftnx, rho_basis->fftny, rho_basis->fftnz); + this->get_ixyz2ipw (rho_basis, rho_basis->ig2isz, fftixy2is, ixyz2ipw); + symm.rhog_symmetry (rhog_part, + ixyz2ipw, + rho_basis->nx, + rho_basis->ny, + rho_basis->nz, + rho_basis->fftnx, + rho_basis->fftny, + rho_basis->fftnz); #endif - delete[] ixyz2ipw; + delete[] ixyz2ipw; #ifdef __MPI - } + } - // (4) send the result to other procs in the same pool - this->rhog_piece_to_all(rho_basis, rhogtot, rhog_part); + // (4) send the result to other procs in the same pool + this->rhog_piece_to_all (rho_basis, rhogtot, rhog_part); - if(GlobalV::RANK_IN_POOL==0) - { - delete[] rhogtot; - delete[] ig2isztot; - } + if (GlobalV::RANK_IN_POOL == 0) + { + delete[] rhogtot; + delete[] ig2isztot; + } #endif - delete[] fftixy2is; - return; + delete[] fftixy2is; + return; } #ifdef __MPI -void Symmetry_rho::reduce_to_fullrhog(const ModulePW::PW_Basis *rho_basis, - std::complex* rhogtot, std::complex* rhogin, - int* ig2isztot, const int* ig2iszin, int max_npw) const +void + Symmetry_rho::reduce_to_fullrhog (const ModulePW::PW_Basis* rho_basis, + std::complex* rhogtot, + std::complex* rhogin, + int* ig2isztot, + const int* ig2iszin, + int max_npw) const { - ModuleBase::TITLE("Symmetry_rho","reduce_to_fullrhog"); - - std::complex* rhog_piece = new std::complex[max_npw]; - int* ig2isz_piece = new int[max_npw]; - - int npw_start=0; - for(int proc=0; procpoolnproc; ++proc) - { - ModuleBase::GlobalFunc::ZEROS(rhog_piece, max_npw); - ModuleBase::GlobalFunc::ZEROS(ig2isz_piece, max_npw); - - MPI_Status ierror; - - // case 1: the first part of rho in processor 0 in each pool. - if(proc == 0 && GlobalV::RANK_IN_POOL ==0) - { - for(int ig=0; ignpw; ++ig) - { - rhog_piece[ig] = rhogin[ig]; - ig2isz_piece[ig]=ig2iszin[ig]; - } - } - - // case 2: > first part rho: send the rho to - // processor 0 in each pool - else if(proc == GlobalV::RANK_IN_POOL ) - { - for(int ig=0; ignpw; ++ig) - { - rhog_piece[ig] = rhogin[ig]; - ig2isz_piece[ig]=ig2iszin[ig]; - } - MPI_Send(rhog_piece,rho_basis->npw, MPI_DOUBLE_COMPLEX, 0, proc, POOL_WORLD); - MPI_Send(ig2isz_piece, rho_basis->npw, MPI_INT, 0, proc+rho_basis->poolnproc, POOL_WORLD); - } - - // case 2: > first part rho: processor 0 receive the rho - // from other processors - else if(GlobalV::RANK_IN_POOL==0) - { - MPI_Recv(rhog_piece, rho_basis->npw_per[proc], MPI_DOUBLE_COMPLEX, proc, proc, POOL_WORLD, &ierror); - MPI_Recv(ig2isz_piece, rho_basis->npw_per[proc], MPI_INT, proc, proc+rho_basis->poolnproc, POOL_WORLD, &ierror); - } - - if(GlobalV::RANK_IN_POOL==0) - { - for(int ig=0; ignpw_per[proc]; ++ig) - { - rhogtot[npw_start+ig] = rhog_piece[ig]; - ig2isztot[npw_start+ig] = ig2isz_piece[ig]; - } - npw_start+=rho_basis->npw_per[proc]; - } - } - if(GlobalV::RANK_IN_POOL==0) assert(npw_start==rho_basis->npwtot); - delete[] rhog_piece; - delete[] ig2isz_piece; - - MPI_Barrier(MPI_COMM_WORLD); - - return; + ModuleBase::TITLE ("Symmetry_rho", "reduce_to_fullrhog"); + + std::complex* rhog_piece = new std::complex[max_npw]; + int* ig2isz_piece = new int[max_npw]; + + int npw_start = 0; + for (int proc = 0; proc < rho_basis->poolnproc; ++proc) + { + ModuleBase::GlobalFunc::ZEROS (rhog_piece, max_npw); + ModuleBase::GlobalFunc::ZEROS (ig2isz_piece, max_npw); + + MPI_Status ierror; + + // case 1: the first part of rho in processor 0 in each pool. + if (proc == 0 && GlobalV::RANK_IN_POOL == 0) + { + for (int ig = 0; ig < rho_basis->npw; ++ig) + { + rhog_piece[ig] = rhogin[ig]; + ig2isz_piece[ig] = ig2iszin[ig]; + } + } + + // case 2: > first part rho: send the rho to + // processor 0 in each pool + else if (proc == GlobalV::RANK_IN_POOL) + { + for (int ig = 0; ig < rho_basis->npw; ++ig) + { + rhog_piece[ig] = rhogin[ig]; + ig2isz_piece[ig] = ig2iszin[ig]; + } + MPI_Send (rhog_piece, rho_basis->npw, MPI_DOUBLE_COMPLEX, 0, proc, POOL_WORLD); + MPI_Send (ig2isz_piece, rho_basis->npw, MPI_INT, 0, proc + rho_basis->poolnproc, POOL_WORLD); + } + + // case 2: > first part rho: processor 0 receive the rho + // from other processors + else if (GlobalV::RANK_IN_POOL == 0) + { + MPI_Recv (rhog_piece, + rho_basis->npw_per[proc], + MPI_DOUBLE_COMPLEX, + proc, + proc, + POOL_WORLD, + &ierror); + MPI_Recv (ig2isz_piece, + rho_basis->npw_per[proc], + MPI_INT, + proc, + proc + rho_basis->poolnproc, + POOL_WORLD, + &ierror); + } + + if (GlobalV::RANK_IN_POOL == 0) + { + for (int ig = 0; ig < rho_basis->npw_per[proc]; ++ig) + { + rhogtot[npw_start + ig] = rhog_piece[ig]; + ig2isztot[npw_start + ig] = ig2isz_piece[ig]; + } + npw_start += rho_basis->npw_per[proc]; + } + } + if (GlobalV::RANK_IN_POOL == 0) + assert (npw_start == rho_basis->npwtot); + delete[] rhog_piece; + delete[] ig2isz_piece; + + MPI_Barrier (MPI_COMM_WORLD); + + return; } -void Symmetry_rho::rhog_piece_to_all(const ModulePW::PW_Basis *rho_basis, - std::complex* rhogtot, std::complex* rhog_part) const -{ - ModuleBase::TITLE(" Symmetry_rho","rhog_piece_to_all"); - - MPI_Status ierror; - - if(GlobalV::RANK_IN_POOL==0) - { - // proc 0: send to other proc in pool - // itself: directly copy - for(int ig=0;ignpw;++ig) - { - rhog_part[ig]=rhogtot[ig]; - } - int npw_start=rho_basis->npw; - for(int proc=1;procpoolnproc;++proc) - { - MPI_Send(&rhogtot[npw_start], rho_basis->npw_per[proc], MPI_DOUBLE_COMPLEX, proc, proc, POOL_WORLD); - npw_start+=rho_basis->npw_per[proc]; - } - assert(npw_start==rho_basis->npwtot); - }// GlobalV::RANK_IN_POOL == 0 - else - { - MPI_Recv(rhog_part, rho_basis->npw_per[GlobalV::RANK_IN_POOL], MPI_DOUBLE_COMPLEX, 0, GlobalV::RANK_IN_POOL, POOL_WORLD, &ierror); - } - return; +void + Symmetry_rho::rhog_piece_to_all (const ModulePW::PW_Basis* rho_basis, + std::complex* rhogtot, + std::complex* rhog_part) const +{ + ModuleBase::TITLE (" Symmetry_rho", "rhog_piece_to_all"); + + MPI_Status ierror; + + if (GlobalV::RANK_IN_POOL == 0) + { + // proc 0: send to other proc in pool + // itself: directly copy + for (int ig = 0; ig < rho_basis->npw; ++ig) + { + rhog_part[ig] = rhogtot[ig]; + } + int npw_start = rho_basis->npw; + for (int proc = 1; proc < rho_basis->poolnproc; ++proc) + { + MPI_Send (&rhogtot[npw_start], + rho_basis->npw_per[proc], + MPI_DOUBLE_COMPLEX, + proc, + proc, + POOL_WORLD); + npw_start += rho_basis->npw_per[proc]; + } + assert (npw_start == rho_basis->npwtot); + } // GlobalV::RANK_IN_POOL == 0 + else + { + MPI_Recv (rhog_part, + rho_basis->npw_per[GlobalV::RANK_IN_POOL], + MPI_DOUBLE_COMPLEX, + 0, + GlobalV::RANK_IN_POOL, + POOL_WORLD, + &ierror); + } + return; } #endif // only for MYRANK==0 -void Symmetry_rho::get_ixyz2ipw(const ModulePW::PW_Basis *rho_basis, - const int* ig2isztot, const int* fftixy2is, int* ixyz2ipw) const +void + Symmetry_rho::get_ixyz2ipw (const ModulePW::PW_Basis* rho_basis, + const int* ig2isztot, + const int* fftixy2is, + int* ixyz2ipw) const { - //step 1: get ipsz2ipw - - //get ipsz2ipw from ig2isztot - int* ipsz2ipw = new int [rho_basis->nstot*rho_basis->nz]; - for(int i=0;instot*rho_basis->nz;++i) ipsz2ipw[i]=-1; - - int npw_count=0; - int nstnz_count=0; - int ipsz=0; //global index of a z-grid on stick - int isz=0; //local index of a z-grid stick on ip core - int ipw=0; // global index of pw (in npwtot) - for (int ip=0;ippoolnproc;++ip) - { - for (int ig=0;ignpw_per[ip];++ig) - { - ipw=npw_count+ig; - isz=ig2isztot[ipw]; - ipsz=nstnz_count+isz; - ipsz2ipw[ipsz]=ipw; - } - npw_count+=rho_basis->npw_per[ip]; - nstnz_count+=rho_basis->nst_per[ip]*rho_basis->nz; - } - assert(npw_count==rho_basis->npwtot); - assert(nstnz_count==rho_basis->nstot*rho_basis->nz); - - //step2: ixyz to ipsz - - //save the start-index of (nst*nz) till each core + // step 1: get ipsz2ipw + + // get ipsz2ipw from ig2isztot + int* ipsz2ipw = new int[rho_basis->nstot * rho_basis->nz]; + for (int i = 0; i < rho_basis->nstot * rho_basis->nz; ++i) + { + ipsz2ipw[i] = -1; + } + + int npw_count = 0; + int nstnz_count = 0; + int ipsz = 0; // global index of a z-grid on stick + int isz = 0; // local index of a z-grid stick on ip core + int ipw = 0; // global index of pw (in npwtot) + for (int ip = 0; ip < rho_basis->poolnproc; ++ip) + { + for (int ig = 0; ig < rho_basis->npw_per[ip]; ++ig) + { + ipw = npw_count + ig; + isz = ig2isztot[ipw]; + ipsz = nstnz_count + isz; + ipsz2ipw[ipsz] = ipw; + } + npw_count += rho_basis->npw_per[ip]; + nstnz_count += rho_basis->nst_per[ip] * rho_basis->nz; + } + assert (npw_count == rho_basis->npwtot); + assert (nstnz_count == rho_basis->nstot * rho_basis->nz); + + // step2: ixyz to ipsz + + // save the start-index of (nst*nz) till each core int* nstnz_start = new int[rho_basis->poolnproc]; - nstnz_start[0]=0; - for (int ip=1; ippoolnproc; ++ip) - nstnz_start[ip]=nstnz_start[ip-1]+rho_basis->nst_per[ip-1]*rho_basis->nz; - - //tmp variables - int ixy, ixyz, ip, is, ig=0; - - for (int ix=0;ixfftnx;++ix) - { - for (int iy=0;iyfftny;++iy) - { - for(int iz=0;izfftnz;++iz) - { - ixy = ix*rho_basis->fftny + iy; - ixyz = ixy*rho_basis->fftnz+iz; - ip = rho_basis->fftixy2ip[ixy]; - if (ip==-1) continue; //not in any core - is = fftixy2is[ixy]; //stick-index on ip=proc core - if (is==-1) continue; //not on any stick - ipsz = nstnz_start[ip]+is*rho_basis->nz+iz; - ipw = ipsz2ipw[ipsz]; - ixyz2ipw[ixyz] = ipw; - } - } - } - assert (ixyz==rho_basis->fftnxyz-1); - - delete[] nstnz_start; - delete[] ipsz2ipw; - return; + nstnz_start[0] = 0; + for (int ip = 1; ip < rho_basis->poolnproc; ++ip) + { + nstnz_start[ip] = nstnz_start[ip - 1] + rho_basis->nst_per[ip - 1] * rho_basis->nz; + } + + // tmp variables + int ixy, ixyz, ip, is, ig = 0; + + for (int ix = 0; ix < rho_basis->fftnx; ++ix) + { + for (int iy = 0; iy < rho_basis->fftny; ++iy) + { + for (int iz = 0; iz < rho_basis->fftnz; ++iz) + { + ixy = ix * rho_basis->fftny + iy; + ixyz = ixy * rho_basis->fftnz + iz; + ip = rho_basis->fftixy2ip[ixy]; + if (ip == -1) + { + continue; // not in any core + } + is = fftixy2is[ixy]; // stick-index on ip=proc core + if (is == -1) + { + continue; // not on any stick + } + ipsz = nstnz_start[ip] + is * rho_basis->nz + iz; + ipw = ipsz2ipw[ipsz]; + ixyz2ipw[ixyz] = ipw; + } + } + } + assert (ixyz == rho_basis->fftnxyz - 1); + + delete[] nstnz_start; + delete[] ipsz2ipw; + return; } diff --git a/source/source_estate/module_dm/cal_dm_psi.cpp b/source/source_estate/module_dm/cal_dm_psi.cpp index 0445a10a16b..f06b985bd59 100644 --- a/source/source_estate/module_dm/cal_dm_psi.cpp +++ b/source/source_estate/module_dm/cal_dm_psi.cpp @@ -10,263 +10,265 @@ namespace elecstate { // for Gamma-Only case where DMK is double -void cal_dm_psi(const Parallel_Orbitals* ParaV, - const ModuleBase::matrix& wg, - const psi::Psi& wfc, - elecstate::DensityMatrix& DM) +void + cal_dm_psi (const Parallel_Orbitals* ParaV, + const ModuleBase::matrix& wg, + const psi::Psi& wfc, + elecstate::DensityMatrix& DM) { - ModuleBase::TITLE("elecstate", "cal_dm_psi"); - ModuleBase::timer::start("elecstate", "cal_dm_psi"); + ModuleBase::TITLE ("elecstate", "cal_dm_psi"); + ModuleBase::timer::start ("elecstate", "cal_dm_psi"); // dm.resize(wfc.get_nk(), ParaV->ncol, ParaV->nrow); - const int nbands_local = wfc.get_nbands(); - const int nbasis_local = wfc.get_nbasis(); + const int nbands_local = wfc.get_nbands (); + const int nbasis_local = wfc.get_nbasis (); // dm = wfc.T * wg * wfc.conj() // dm[is](iw1,iw2) = \sum_{ib} wfc[is](ib,iw1).T * wg(is,ib) * wfc[is](ib,iw2).conj() - for (int ik = 0; ik < wfc.get_nk(); ++ik) - { - double* dmk_pointer = DM.get_DMK_pointer(ik); - wfc.fix_k(ik); - // dm.fix_k(ik); - // dm[ik].create(ParaV->ncol, ParaV->nrow); - // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); + for (int ik = 0; ik < wfc.get_nk (); ++ik) + { + double* dmk_pointer = DM.get_DMK_pointer (ik); + wfc.fix_k (ik); + // dm.fix_k(ik); + // dm[ik].create(ParaV->ncol, ParaV->nrow); + // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); - psi::Psi wg_wfc(1, wfc.get_nbands(), - wfc.get_nbasis(), wfc.get_nbasis(), true); + psi::Psi wg_wfc (1, wfc.get_nbands (), wfc.get_nbasis (), wfc.get_nbasis (), true); - wg_wfc.set_all_psi(wfc.get_pointer(), wg_wfc.size()); + wg_wfc.set_all_psi (wfc.get_pointer (), wg_wfc.size ()); - int ib_global = 0; - for (int ib_local = 0; ib_local < nbands_local; ++ib_local) - { - while (ib_local != ParaV->global2local_col(ib_global)) - { - ++ib_global; - if (ib_global >= wg.nc) + int ib_global = 0; + for (int ib_local = 0; ib_local < nbands_local; ++ib_local) { - break; - } - } - if (ib_global >= wg.nc) - { - continue; - } - const double wg_local = wg(ik, ib_global); + while (ib_local != ParaV->global2local_col (ib_global)) + { + ++ib_global; + if (ib_global >= wg.nc) + { + break; + } + } + if (ib_global >= wg.nc) + { + continue; + } + const double wg_local = wg (ik, ib_global); - double* wg_wfc_pointer = &(wg_wfc(0, ib_local, 0)); - BlasConnector::scal(nbasis_local, wg_local, wg_wfc_pointer, 1); - } + double* wg_wfc_pointer = &(wg_wfc (0, ib_local, 0)); + BlasConnector::scal (nbasis_local, wg_local, wg_wfc_pointer, 1); + } - // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) + // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) #ifdef __MPI - psiMulPsiMpi(wg_wfc, wfc, dmk_pointer, ParaV->desc_wfc, ParaV->desc); + psiMulPsiMpi (wg_wfc, wfc, dmk_pointer, ParaV->desc_wfc, ParaV->desc); #else - psiMulPsi(wg_wfc, wfc, dmk_pointer); + psiMulPsi (wg_wfc, wfc, dmk_pointer); #endif - } - ModuleBase::timer::end("elecstate", "cal_dm_psi"); + } + ModuleBase::timer::end ("elecstate", "cal_dm_psi"); return; } -void cal_dm_psi(const Parallel_Orbitals* ParaV, - const ModuleBase::matrix& wg, - const psi::Psi>& wfc, - elecstate::DensityMatrix, double>& DM) +void + cal_dm_psi (const Parallel_Orbitals* ParaV, + const ModuleBase::matrix& wg, + const psi::Psi>& wfc, + elecstate::DensityMatrix, double>& DM) { - ModuleBase::TITLE("elecstate", "cal_dm_psi"); - ModuleBase::timer::start("elecstate", "cal_dm_psi"); + ModuleBase::TITLE ("elecstate", "cal_dm_psi"); + ModuleBase::timer::start ("elecstate", "cal_dm_psi"); // dm.resize(wfc.get_nk(), ParaV->ncol, ParaV->nrow); - const int nbands_local = wfc.get_nbands(); - const int nbasis_local = wfc.get_nbasis(); + const int nbands_local = wfc.get_nbands (); + const int nbasis_local = wfc.get_nbasis (); // dm = wfc.T * wg * wfc.conj() // dm[is](iw1,iw2) = \sum_{ib} wfc[is](ib,iw1).T * wg(is,ib) * wfc[is](ib,iw2).conj() - for (int ik = 0; ik < wfc.get_nk(); ++ik) - { - wfc.fix_k(ik); - std::complex* dmk_pointer = DM.get_DMK_pointer(ik); - // dm.fix_k(ik); - //dm[ik].create(ParaV->ncol, ParaV->nrow); - // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); - psi::Psi> wg_wfc(1, - wfc.get_nbands(), - wfc.get_nbasis(), - wfc.get_nbasis(), - true); - - const std::complex* pwfc = wfc.get_pointer(); - std::complex* pwg_wfc = wg_wfc.get_pointer(); + for (int ik = 0; ik < wfc.get_nk (); ++ik) + { + wfc.fix_k (ik); + std::complex* dmk_pointer = DM.get_DMK_pointer (ik); + // dm.fix_k(ik); + // dm[ik].create(ParaV->ncol, ParaV->nrow); + // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); + psi::Psi> wg_wfc (1, wfc.get_nbands (), wfc.get_nbasis (), wfc.get_nbasis (), true); + + const std::complex* pwfc = wfc.get_pointer (); + std::complex* pwg_wfc = wg_wfc.get_pointer (); #ifdef _OPENMP #pragma omp parallel for schedule(static, 1024) #endif - for (int i = 0; i < wg_wfc.size(); ++i) - { - pwg_wfc[i] = conj(pwfc[i]); - } + for (int i = 0; i < wg_wfc.size (); ++i) + { + pwg_wfc[i] = conj (pwfc[i]); + } - int ib_global = 0; - for (int ib_local = 0; ib_local < nbands_local; ++ib_local) - { - while (ib_local != ParaV->global2local_col(ib_global)) - { - ++ib_global; - if (ib_global >= wg.nc) + int ib_global = 0; + for (int ib_local = 0; ib_local < nbands_local; ++ib_local) { - break; - ModuleBase::WARNING_QUIT("ElecStateLCAO::cal_dm", "please check global2local_col!"); + while (ib_local != ParaV->global2local_col (ib_global)) + { + ++ib_global; + if (ib_global >= wg.nc) + { + break; + ModuleBase::WARNING_QUIT ("ElecStateLCAO::cal_dm", + "please check global2local_col!"); + } + } + if (ib_global >= wg.nc) + { + continue; + } + const double wg_local = wg (ik, ib_global); + std::complex* wg_wfc_pointer = &(wg_wfc (0, ib_local, 0)); + BlasConnector::scal (nbasis_local, wg_local, wg_wfc_pointer, 1); } - } - if (ib_global >= wg.nc) - { - continue; - } - const double wg_local = wg(ik, ib_global); - std::complex* wg_wfc_pointer = &(wg_wfc(0, ib_local, 0)); - BlasConnector::scal(nbasis_local, wg_local, wg_wfc_pointer, 1); - } - // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) + // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) #ifdef __MPI - if (PARAM.inp.ks_solver == "cg_in_lcao") - { - psiMulPsi(wg_wfc, wfc, dmk_pointer); - } - else - { - psiMulPsiMpi(wg_wfc, wfc, dmk_pointer, ParaV->desc_wfc, ParaV->desc); - } + if (PARAM.inp.ks_solver == "cg_in_lcao") + { + psiMulPsi (wg_wfc, wfc, dmk_pointer); + } + else + { + psiMulPsiMpi (wg_wfc, wfc, dmk_pointer, ParaV->desc_wfc, ParaV->desc); + } #else - psiMulPsi(wg_wfc, wfc, dmk_pointer); + psiMulPsi (wg_wfc, wfc, dmk_pointer); #endif - } + } - ModuleBase::timer::end("elecstate", "cal_dm_psi"); + ModuleBase::timer::end ("elecstate", "cal_dm_psi"); return; } #ifdef __MPI -void psiMulPsiMpi(const psi::Psi& psi1, - const psi::Psi& psi2, - double* dm_out, - const int* desc_psi, - const int* desc_dm) +void + psiMulPsiMpi (const psi::Psi& psi1, + const psi::Psi& psi2, + double* dm_out, + const int* desc_psi, + const int* desc_dm) { - ModuleBase::timer::start("psiMulPsiMpi", "pdgemm"); + ModuleBase::timer::start ("psiMulPsiMpi", "pdgemm"); const double one_float = 1.0, zero_float = 0.0; const int one_int = 1; const char N_char = 'N', T_char = 'T'; const int nlocal = desc_dm[2]; const int nbands = desc_psi[3]; - ScalapackConnector::gemm(N_char, - T_char, - nlocal, - nlocal, - nbands, - one_float, - psi1.get_pointer(), - one_int, - one_int, - desc_psi, - psi2.get_pointer(), - one_int, - one_int, - desc_psi, - zero_float, - dm_out, - one_int, - one_int, - desc_dm); - ModuleBase::timer::end("psiMulPsiMpi", "pdgemm"); + ScalapackConnector::gemm (N_char, + T_char, + nlocal, + nlocal, + nbands, + one_float, + psi1.get_pointer (), + one_int, + one_int, + desc_psi, + psi2.get_pointer (), + one_int, + one_int, + desc_psi, + zero_float, + dm_out, + one_int, + one_int, + desc_dm); + ModuleBase::timer::end ("psiMulPsiMpi", "pdgemm"); } -void psiMulPsiMpi(const psi::Psi>& psi1, - const psi::Psi>& psi2, - std::complex* dm_out, - const int* desc_psi, - const int* desc_dm) +void + psiMulPsiMpi (const psi::Psi>& psi1, + const psi::Psi>& psi2, + std::complex* dm_out, + const int* desc_psi, + const int* desc_dm) { - ModuleBase::timer::start("psiMulPsiMpi", "pdgemm"); + ModuleBase::timer::start ("psiMulPsiMpi", "pdgemm"); const std::complex one_complex = {1.0, 0.0}, zero_complex = {0.0, 0.0}; const int one_int = 1; const char N_char = 'N', T_char = 'T'; const int nlocal = desc_dm[2]; const int nbands = desc_psi[3]; - ScalapackConnector::gemm(N_char, - T_char, - nlocal, - nlocal, - nbands, - one_complex, - psi1.get_pointer(), - one_int, - one_int, - desc_psi, - psi2.get_pointer(), - one_int, - one_int, - desc_psi, - zero_complex, - dm_out, - one_int, - one_int, - desc_dm); - ModuleBase::timer::end("psiMulPsiMpi", "pdgemm"); + ScalapackConnector::gemm (N_char, + T_char, + nlocal, + nlocal, + nbands, + one_complex, + psi1.get_pointer (), + one_int, + one_int, + desc_psi, + psi2.get_pointer (), + one_int, + one_int, + desc_psi, + zero_complex, + dm_out, + one_int, + one_int, + desc_dm); + ModuleBase::timer::end ("psiMulPsiMpi", "pdgemm"); } #endif -void psiMulPsi(const psi::Psi& psi1, const psi::Psi& psi2, double* dm_out) +void + psiMulPsi (const psi::Psi& psi1, const psi::Psi& psi2, double* dm_out) { const double one_float = 1.0, zero_float = 0.0; const int one_int = 1; const char N_char = 'N', T_char = 'T'; - const int nlocal = psi1.get_nbasis(); - const int nbands = psi1.get_nbands(); - BlasConnector::gemm_cm(N_char, - T_char, - nlocal, - nlocal, - nbands, - one_float, - psi1.get_pointer(), - nlocal, - psi2.get_pointer(), - nlocal, - zero_float, - dm_out, - nlocal); + const int nlocal = psi1.get_nbasis (); + const int nbands = psi1.get_nbands (); + BlasConnector::gemm_cm (N_char, + T_char, + nlocal, + nlocal, + nbands, + one_float, + psi1.get_pointer (), + nlocal, + psi2.get_pointer (), + nlocal, + zero_float, + dm_out, + nlocal); } -void psiMulPsi(const psi::Psi>& psi1, - const psi::Psi>& psi2, - std::complex* dm_out) +void + psiMulPsi (const psi::Psi>& psi1, + const psi::Psi>& psi2, + std::complex* dm_out) { const int one_int = 1; const char N_char = 'N', T_char = 'T'; - const int nlocal = psi1.get_nbasis(); - const int nbands = psi1.get_nbands(); + const int nlocal = psi1.get_nbasis (); + const int nbands = psi1.get_nbands (); const std::complex one_complex = {1.0, 0.0}; const std::complex zero_complex = {0.0, 0.0}; - BlasConnector::gemm_cm(N_char, - T_char, - nlocal, - nlocal, - nbands, - one_complex, - psi1.get_pointer(), - nlocal, - psi2.get_pointer(), - nlocal, - zero_complex, - dm_out, - nlocal); + BlasConnector::gemm_cm (N_char, + T_char, + nlocal, + nlocal, + nbands, + one_complex, + psi1.get_pointer (), + nlocal, + psi2.get_pointer (), + nlocal, + zero_complex, + dm_out, + nlocal); } } // namespace elecstate diff --git a/source/source_estate/module_dm/cal_dm_psi.h b/source/source_estate/module_dm/cal_dm_psi.h index 09ab3d3974a..4b6f44d426e 100644 --- a/source/source_estate/module_dm/cal_dm_psi.h +++ b/source/source_estate/module_dm/cal_dm_psi.h @@ -7,32 +7,38 @@ namespace elecstate { - // for Gamma-Only case where DMK is double - void cal_dm_psi(const Parallel_Orbitals* ParaV, const ModuleBase::matrix& wg, const psi::Psi& wfc, elecstate::DensityMatrix& DM); +// for Gamma-Only case where DMK is double +void cal_dm_psi (const Parallel_Orbitals* ParaV, + const ModuleBase::matrix& wg, + const psi::Psi& wfc, + elecstate::DensityMatrix& DM); - // for Multi-k case where DMK is std::complex - void cal_dm_psi(const Parallel_Orbitals* ParaV, const ModuleBase::matrix& wg, const psi::Psi>& wfc, elecstate::DensityMatrix, double>& DM); +// for Multi-k case where DMK is std::complex +void cal_dm_psi (const Parallel_Orbitals* ParaV, + const ModuleBase::matrix& wg, + const psi::Psi>& wfc, + elecstate::DensityMatrix, double>& DM); - // for Gamma-Only case with MPI - void psiMulPsiMpi(const psi::Psi& psi1, - const psi::Psi& psi2, - double* dm_out, - const int* desc_psi, - const int* desc_dm); - - // for multi-k case with MPI - void psiMulPsiMpi(const psi::Psi>& psi1, - const psi::Psi>& psi2, - std::complex* dm_out, - const int* desc_psi, - const int* desc_dm); +// for Gamma-Only case with MPI +void psiMulPsiMpi (const psi::Psi& psi1, + const psi::Psi& psi2, + double* dm_out, + const int* desc_psi, + const int* desc_dm); - // for Gamma-Only case without MPI - void psiMulPsi(const psi::Psi& psi1, const psi::Psi& psi2, double* dm_out); +// for multi-k case with MPI +void psiMulPsiMpi (const psi::Psi>& psi1, + const psi::Psi>& psi2, + std::complex* dm_out, + const int* desc_psi, + const int* desc_dm); - // for multi-k case without MPI - void psiMulPsi(const psi::Psi>& psi1, - const psi::Psi>& psi2, - std::complex* dm_out); -}; +// for Gamma-Only case without MPI +void psiMulPsi (const psi::Psi& psi1, const psi::Psi& psi2, double* dm_out); + +// for multi-k case without MPI +void psiMulPsi (const psi::Psi>& psi1, + const psi::Psi>& psi2, + std::complex* dm_out); +}; // namespace elecstate #endif diff --git a/source/source_estate/module_dm/cal_edm_tddft.cpp b/source/source_estate/module_dm/cal_edm_tddft.cpp index 2295ccf5285..e235d51e21c 100644 --- a/source/source_estate/module_dm/cal_edm_tddft.cpp +++ b/source/source_estate/module_dm/cal_edm_tddft.cpp @@ -13,542 +13,558 @@ namespace elecstate { -void print_local_matrix(std::ostream& os, +void + print_local_matrix (std::ostream& os, const std::complex* matrix_data, int local_rows, int local_cols, const std::string& matrix_name, int rank) { - if (!matrix_name.empty() || rank >= 0) - { - os << "=== "; - if (!matrix_name.empty()) + if (!matrix_name.empty () || rank >= 0) { - os << "Matrix: " << matrix_name; + os << "=== "; + if (!matrix_name.empty ()) + { + os << "Matrix: " << matrix_name; + if (rank >= 0) + os << " "; + } if (rank >= 0) - os << " "; - } - if (rank >= 0) - { - os << "(Process: " << rank + 1 << ")"; + { + os << "(Process: " << rank + 1 << ")"; + } + os << " (Local dims: " << local_rows << " x " << local_cols << ") ===" << std::endl; } - os << " (Local dims: " << local_rows << " x " << local_cols << ") ===" << std::endl; - } - os << std::fixed << std::setprecision(10) << std::showpos; + os << std::fixed << std::setprecision (10) << std::showpos; for (int i = 0; i < local_rows; ++i) // Iterate over rows (i) - { - for (int j = 0; j < local_cols; ++j) // Iterate over columns (j) { - // For column-major storage, element (i, j) is at index i + j * LDA - // where LDA (leading dimension) is typically the number of *rows* in the local block. - int idx = i + j * local_rows; - os << "(" << std::real(matrix_data[idx]) << "," << std::imag(matrix_data[idx]) << ") "; + for (int j = 0; j < local_cols; ++j) // Iterate over columns (j) + { + // For column-major storage, element (i, j) is at index i + j * LDA + // where LDA (leading dimension) is typically the number of *rows* in the local block. + int idx = i + j * local_rows; + os << "(" << std::real (matrix_data[idx]) << "," << std::imag (matrix_data[idx]) << ") "; + } + os << std::endl; // New line after each row } - os << std::endl; // New line after each row - } - os.unsetf(std::ios_base::fixed | std::ios_base::showpos); + os.unsetf (std::ios_base::fixed | std::ios_base::showpos); os << std::endl; } // use the original formula (Hamiltonian matrix) to calculate energy density matrix -void cal_edm_tddft(Parallel_Orbitals& pv, +void + cal_edm_tddft (Parallel_Orbitals& pv, LCAO_domain::Setup_DM>& dmat, K_Vectors& kv, hamilt::Hamilt>* p_hamilt) { - ModuleBase::TITLE("elecstate", "cal_edm_tddft"); - ModuleBase::timer::start("TD_Efficiency", "cal_edm_tddft"); + ModuleBase::TITLE ("elecstate", "cal_edm_tddft"); + ModuleBase::timer::start ("TD_Efficiency", "cal_edm_tddft"); const int nlocal = PARAM.globalv.nlocal; - assert(nlocal >= 0); + assert (nlocal >= 0); - dmat.dm->EDMK.resize(kv.get_nks()); + dmat.dm->EDMK.resize (kv.get_nks ()); - for (int ik = 0; ik < kv.get_nks(); ++ik) - { - p_hamilt->updateHk(ik); - std::complex* tmp_dmk = dmat.dm->get_DMK_pointer(ik); - ModuleBase::ComplexMatrix& tmp_edmk = dmat.dm->EDMK[ik]; + for (int ik = 0; ik < kv.get_nks (); ++ik) + { + p_hamilt->updateHk (ik); + std::complex* tmp_dmk = dmat.dm->get_DMK_pointer (ik); + ModuleBase::ComplexMatrix& tmp_edmk = dmat.dm->EDMK[ik]; #ifdef __MPI - const int nloc = pv.nloc; - const int ncol = pv.ncol; - const int nrow = pv.nrow; - - tmp_edmk.create(ncol, nrow); - std::complex* Htmp = new std::complex[nloc]; - std::complex* Sinv = new std::complex[nloc]; - std::complex* tmp1 = new std::complex[nloc]; - std::complex* tmp2 = new std::complex[nloc]; - std::complex* tmp3 = new std::complex[nloc]; - std::complex* tmp4 = new std::complex[nloc]; - - ModuleBase::GlobalFunc::ZEROS(Htmp, nloc); - ModuleBase::GlobalFunc::ZEROS(Sinv, nloc); - ModuleBase::GlobalFunc::ZEROS(tmp1, nloc); - ModuleBase::GlobalFunc::ZEROS(tmp2, nloc); - ModuleBase::GlobalFunc::ZEROS(tmp3, nloc); - ModuleBase::GlobalFunc::ZEROS(tmp4, nloc); - - const int inc = 1; - - hamilt::MatrixBlock> h_mat; - hamilt::MatrixBlock> s_mat; - - p_hamilt->matrix(h_mat, s_mat); - BlasConnector::copy(nloc, h_mat.p, inc, Htmp, inc); - BlasConnector::copy(nloc, s_mat.p, inc, Sinv, inc); - - vector ipiv(nloc, 0); - int info = 0; - const int one_int = 1; - - ScalapackConnector::getrf(nlocal, nlocal, Sinv, one_int, one_int, pv.desc, ipiv.data(), &info); - - int lwork = -1; - int liwork = -1; - - // if lwork == -1, then the size of work is (at least) of length 1. - std::vector> work(1, 0); - - // if liwork = -1, then the size of iwork is (at least) of length 1. - std::vector iwork(1, 0); - - ScalapackConnector::getri(nlocal, - Sinv, - one_int, - one_int, - pv.desc, - ipiv.data(), - work.data(), - &lwork, - iwork.data(), - &liwork, - &info); - - lwork = work[0].real(); - work.resize(lwork, 0); - liwork = iwork[0]; - iwork.resize(liwork, 0); - - ScalapackConnector::getri(nlocal, - Sinv, - one_int, - one_int, - pv.desc, - ipiv.data(), - work.data(), - &lwork, - iwork.data(), - &liwork, - &info); - - const char N_char = 'N'; - const char T_char = 'T'; - const std::complex one_complex = {1.0, 0.0}; - const std::complex zero_complex = {0.0, 0.0}; - const std::complex half_complex = {0.5, 0.0}; - - // tmp1 = Htmp * Sinv - ScalapackConnector::gemm(N_char, - N_char, - nlocal, - nlocal, - nlocal, - one_complex, - Htmp, - one_int, - one_int, - pv.desc, - Sinv, - one_int, - one_int, - pv.desc, - zero_complex, - tmp1, - one_int, - one_int, - pv.desc); - - // tmp2 = tmp1^T * tmp_dmk - ScalapackConnector::gemm(T_char, - N_char, - nlocal, - nlocal, - nlocal, - one_complex, - tmp1, - one_int, - one_int, - pv.desc, - tmp_dmk, - one_int, - one_int, - pv.desc, - zero_complex, - tmp2, - one_int, - one_int, - pv.desc); - - // tmp3 = Sinv * Htmp - ScalapackConnector::gemm(N_char, - N_char, - nlocal, - nlocal, - nlocal, - one_complex, - Sinv, - one_int, - one_int, - pv.desc, - Htmp, - one_int, - one_int, - pv.desc, - zero_complex, - tmp3, - one_int, - one_int, - pv.desc); - - // tmp4 = tmp_dmk * tmp3^T - ScalapackConnector::gemm(N_char, - T_char, - nlocal, - nlocal, - nlocal, - one_complex, - tmp_dmk, - one_int, - one_int, - pv.desc, - tmp3, - one_int, - one_int, - pv.desc, - zero_complex, - tmp4, - one_int, - one_int, - pv.desc); - - // tmp4 = 0.5 * (tmp2 + tmp4) - ScalapackConnector::geadd(N_char, - nlocal, - nlocal, - half_complex, - tmp2, - one_int, - one_int, - pv.desc, - half_complex, - tmp4, - one_int, - one_int, - pv.desc); - - BlasConnector::copy(nloc, tmp4, inc, tmp_edmk.c, inc); - - delete[] Htmp; - delete[] Sinv; - delete[] tmp1; - delete[] tmp2; - delete[] tmp3; - delete[] tmp4; + const int nloc = pv.nloc; + const int ncol = pv.ncol; + const int nrow = pv.nrow; + + tmp_edmk.create (ncol, nrow); + std::complex* Htmp = new std::complex[nloc]; + std::complex* Sinv = new std::complex[nloc]; + std::complex* tmp1 = new std::complex[nloc]; + std::complex* tmp2 = new std::complex[nloc]; + std::complex* tmp3 = new std::complex[nloc]; + std::complex* tmp4 = new std::complex[nloc]; + + ModuleBase::GlobalFunc::ZEROS (Htmp, nloc); + ModuleBase::GlobalFunc::ZEROS (Sinv, nloc); + ModuleBase::GlobalFunc::ZEROS (tmp1, nloc); + ModuleBase::GlobalFunc::ZEROS (tmp2, nloc); + ModuleBase::GlobalFunc::ZEROS (tmp3, nloc); + ModuleBase::GlobalFunc::ZEROS (tmp4, nloc); + + const int inc = 1; + + hamilt::MatrixBlock> h_mat; + hamilt::MatrixBlock> s_mat; + + p_hamilt->matrix (h_mat, s_mat); + BlasConnector::copy (nloc, h_mat.p, inc, Htmp, inc); + BlasConnector::copy (nloc, s_mat.p, inc, Sinv, inc); + + vector ipiv (nloc, 0); + int info = 0; + const int one_int = 1; + + ScalapackConnector::getrf (nlocal, nlocal, Sinv, one_int, one_int, pv.desc, ipiv.data (), &info); + + int lwork = -1; + int liwork = -1; + + // if lwork == -1, then the size of work is (at least) of length 1. + std::vector> work (1, 0); + + // if liwork = -1, then the size of iwork is (at least) of length 1. + std::vector iwork (1, 0); + + ScalapackConnector::getri (nlocal, + Sinv, + one_int, + one_int, + pv.desc, + ipiv.data (), + work.data (), + &lwork, + iwork.data (), + &liwork, + &info); + + lwork = work[0].real (); + work.resize (lwork, 0); + liwork = iwork[0]; + iwork.resize (liwork, 0); + + ScalapackConnector::getri (nlocal, + Sinv, + one_int, + one_int, + pv.desc, + ipiv.data (), + work.data (), + &lwork, + iwork.data (), + &liwork, + &info); + + const char N_char = 'N'; + const char T_char = 'T'; + const std::complex one_complex = {1.0, 0.0}; + const std::complex zero_complex = {0.0, 0.0}; + const std::complex half_complex = {0.5, 0.0}; + + // tmp1 = Htmp * Sinv + ScalapackConnector::gemm (N_char, + N_char, + nlocal, + nlocal, + nlocal, + one_complex, + Htmp, + one_int, + one_int, + pv.desc, + Sinv, + one_int, + one_int, + pv.desc, + zero_complex, + tmp1, + one_int, + one_int, + pv.desc); + + // tmp2 = tmp1^T * tmp_dmk + ScalapackConnector::gemm (T_char, + N_char, + nlocal, + nlocal, + nlocal, + one_complex, + tmp1, + one_int, + one_int, + pv.desc, + tmp_dmk, + one_int, + one_int, + pv.desc, + zero_complex, + tmp2, + one_int, + one_int, + pv.desc); + + // tmp3 = Sinv * Htmp + ScalapackConnector::gemm (N_char, + N_char, + nlocal, + nlocal, + nlocal, + one_complex, + Sinv, + one_int, + one_int, + pv.desc, + Htmp, + one_int, + one_int, + pv.desc, + zero_complex, + tmp3, + one_int, + one_int, + pv.desc); + + // tmp4 = tmp_dmk * tmp3^T + ScalapackConnector::gemm (N_char, + T_char, + nlocal, + nlocal, + nlocal, + one_complex, + tmp_dmk, + one_int, + one_int, + pv.desc, + tmp3, + one_int, + one_int, + pv.desc, + zero_complex, + tmp4, + one_int, + one_int, + pv.desc); + + // tmp4 = 0.5 * (tmp2 + tmp4) + ScalapackConnector::geadd (N_char, + nlocal, + nlocal, + half_complex, + tmp2, + one_int, + one_int, + pv.desc, + half_complex, + tmp4, + one_int, + one_int, + pv.desc); + + BlasConnector::copy (nloc, tmp4, inc, tmp_edmk.c, inc); + + delete[] Htmp; + delete[] Sinv; + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + delete[] tmp4; #else - // for serial version - tmp_edmk.create(pv.ncol, pv.nrow); - ModuleBase::ComplexMatrix Sinv(nlocal, nlocal); - ModuleBase::ComplexMatrix Htmp(nlocal, nlocal); + // for serial version + tmp_edmk.create (pv.ncol, pv.nrow); + ModuleBase::ComplexMatrix Sinv (nlocal, nlocal); + ModuleBase::ComplexMatrix Htmp (nlocal, nlocal); - hamilt::MatrixBlock> h_mat; - hamilt::MatrixBlock> s_mat; + hamilt::MatrixBlock> h_mat; + hamilt::MatrixBlock> s_mat; - p_hamilt->matrix(h_mat, s_mat); + p_hamilt->matrix (h_mat, s_mat); - for (int i = 0; i < nlocal; i++) - { - for (int j = 0; j < nlocal; j++) - { - Htmp(i, j) = h_mat.p[i * nlocal + j]; - Sinv(i, j) = s_mat.p[i * nlocal + j]; - } - } - int INFO = 0; + for (int i = 0; i < nlocal; i++) + { + for (int j = 0; j < nlocal; j++) + { + Htmp (i, j) = h_mat.p[i * nlocal + j]; + Sinv (i, j) = s_mat.p[i * nlocal + j]; + } + } + int INFO = 0; - int lwork = 3 * nlocal - 1; // tmp - std::complex* work = new std::complex[lwork]; - ModuleBase::GlobalFunc::ZEROS(work, lwork); + int lwork = 3 * nlocal - 1; // tmp + std::complex* work = new std::complex[lwork]; + ModuleBase::GlobalFunc::ZEROS (work, lwork); - int IPIV[nlocal]; + int IPIV[nlocal]; - LapackConnector::zgetrf(nlocal, nlocal, Sinv, nlocal, IPIV, &INFO); - LapackConnector::zgetri(nlocal, Sinv, nlocal, IPIV, work, lwork, &INFO); - // I just use ModuleBase::ComplexMatrix temporarily, and will change it - // to std::complex* - ModuleBase::ComplexMatrix tmp_dmk_base(nlocal, nlocal); - for (int i = 0; i < nlocal; i++) - { - for (int j = 0; j < nlocal; j++) - { - tmp_dmk_base(i, j) = tmp_dmk[i * nlocal + j]; - } - } - tmp_edmk = 0.5 * (Sinv * Htmp * tmp_dmk_base + tmp_dmk_base * Htmp * Sinv); - delete[] work; + LapackConnector::zgetrf (nlocal, nlocal, Sinv, nlocal, IPIV, &INFO); + LapackConnector::zgetri (nlocal, Sinv, nlocal, IPIV, work, lwork, &INFO); + // I just use ModuleBase::ComplexMatrix temporarily, and will change it + // to std::complex* + ModuleBase::ComplexMatrix tmp_dmk_base (nlocal, nlocal); + for (int i = 0; i < nlocal; i++) + { + for (int j = 0; j < nlocal; j++) + { + tmp_dmk_base (i, j) = tmp_dmk[i * nlocal + j]; + } + } + tmp_edmk = 0.5 * (Sinv * Htmp * tmp_dmk_base + tmp_dmk_base * Htmp * Sinv); + delete[] work; #endif - } // end ik + } // end ik - ModuleBase::timer::end("TD_Efficiency", "cal_edm_tddft"); + ModuleBase::timer::end ("TD_Efficiency", "cal_edm_tddft"); return; } // cal_edm_tddft -void cal_edm_tddft_tensor(Parallel_Orbitals& pv, +void + cal_edm_tddft_tensor (Parallel_Orbitals& pv, LCAO_domain::Setup_DM>& dmat, K_Vectors& kv, hamilt::Hamilt>* p_hamilt) { - ModuleBase::TITLE("elecstate", "cal_edm_tddft_tensor"); - ModuleBase::timer::start("TD_Efficiency", "cal_edm_tddft"); + ModuleBase::TITLE ("elecstate", "cal_edm_tddft_tensor"); + ModuleBase::timer::start ("TD_Efficiency", "cal_edm_tddft"); const int nlocal = PARAM.globalv.nlocal; - assert(nlocal >= 0); - dmat.dm->EDMK.resize(kv.get_nks()); + assert (nlocal >= 0); + dmat.dm->EDMK.resize (kv.get_nks ()); - for (int ik = 0; ik < kv.get_nks(); ++ik) - { - p_hamilt->updateHk(ik); - std::complex* tmp_dmk = dmat.dm->get_DMK_pointer(ik); - ModuleBase::ComplexMatrix& tmp_edmk = dmat.dm->EDMK[ik]; + for (int ik = 0; ik < kv.get_nks (); ++ik) + { + p_hamilt->updateHk (ik); + std::complex* tmp_dmk = dmat.dm->get_DMK_pointer (ik); + ModuleBase::ComplexMatrix& tmp_edmk = dmat.dm->EDMK[ik]; #ifdef __MPI - const int nloc = pv.nloc; - const int ncol = pv.ncol; - const int nrow = pv.nrow; - - // Initialize EDMK matrix - tmp_edmk.create(ncol, nrow); - - // Allocate Tensor objects on CPU - ct::Tensor Htmp_tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape({nloc})); - Htmp_tensor.zero(); - - ct::Tensor Sinv_tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape({nloc})); - Sinv_tensor.zero(); - - ct::Tensor tmp1_tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape({nloc})); - tmp1_tensor.zero(); - - ct::Tensor tmp2_tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape({nloc})); - tmp2_tensor.zero(); - - ct::Tensor tmp3_tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape({nloc})); - tmp3_tensor.zero(); - - ct::Tensor tmp4_tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape({nloc})); - tmp4_tensor.zero(); - - // Get raw pointers from tensors for ScaLAPACK calls - std::complex* Htmp_ptr = Htmp_tensor.data>(); - std::complex* Sinv_ptr = Sinv_tensor.data>(); - std::complex* tmp1_ptr = tmp1_tensor.data>(); - std::complex* tmp2_ptr = tmp2_tensor.data>(); - std::complex* tmp3_ptr = tmp3_tensor.data>(); - std::complex* tmp4_ptr = tmp4_tensor.data>(); - - const int inc = 1; - hamilt::MatrixBlock> h_mat; - hamilt::MatrixBlock> s_mat; - p_hamilt->matrix(h_mat, s_mat); - - // Copy Hamiltonian and Overlap matrices into Tensor buffers using BlasConnector - BlasConnector::copy(nloc, h_mat.p, inc, Htmp_ptr, inc); - BlasConnector::copy(nloc, s_mat.p, inc, Sinv_ptr, inc); - - int myid = 0; - const int root_proc = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - - // --- ScaLAPACK Inversion of S --- - ct::Tensor ipiv(ct::DataType::DT_INT, - ct::DeviceType::CpuDevice, - ct::TensorShape({pv.nrow + pv.nb})); // Size for ScaLAPACK pivot array - ipiv.zero(); - int* ipiv_ptr = ipiv.data(); - - int info = 0; - const int one_int = 1; - ScalapackConnector::getrf(nlocal, nlocal, Sinv_ptr, one_int, one_int, pv.desc, ipiv_ptr, &info); - - int lwork = -1; - int liwork = -1; - ct::Tensor work_query(ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape({1})); - ct::Tensor iwork_query(ct::DataType::DT_INT, ct::DeviceType::CpuDevice, ct::TensorShape({1})); - - ScalapackConnector::getri(nlocal, - Sinv_ptr, - one_int, - one_int, - pv.desc, - ipiv_ptr, - work_query.data>(), - &lwork, - iwork_query.data(), - &liwork, - &info); - - // Resize work arrays based on query results - lwork = work_query.data>()[0].real(); - work_query.resize(ct::TensorShape({lwork})); - liwork = iwork_query.data()[0]; - iwork_query.resize(ct::TensorShape({liwork})); - - ScalapackConnector::getri(nlocal, - Sinv_ptr, - one_int, - one_int, - pv.desc, - ipiv_ptr, - work_query.data>(), - &lwork, - iwork_query.data(), - &liwork, - &info); - - // --- EDM Calculation using ScaLAPACK --- - const char N_char = 'N'; - const char T_char = 'T'; - const std::complex one_complex = {1.0, 0.0}; - const std::complex zero_complex = {0.0, 0.0}; - const std::complex half_complex = {0.5, 0.0}; - - // tmp1 = Htmp * Sinv - ScalapackConnector::gemm(N_char, - N_char, - nlocal, - nlocal, - nlocal, - one_complex, - Htmp_ptr, - one_int, - one_int, - pv.desc, - Sinv_ptr, - one_int, - one_int, - pv.desc, - zero_complex, - tmp1_ptr, - one_int, - one_int, - pv.desc); - - // tmp2 = tmp1^T * tmp_dmk - ScalapackConnector::gemm(T_char, - N_char, - nlocal, - nlocal, - nlocal, - one_complex, - tmp1_ptr, - one_int, - one_int, - pv.desc, - tmp_dmk, - one_int, - one_int, - pv.desc, - zero_complex, - tmp2_ptr, - one_int, - one_int, - pv.desc); - - // tmp3 = Sinv * Htmp - ScalapackConnector::gemm(N_char, - N_char, - nlocal, - nlocal, - nlocal, - one_complex, - Sinv_ptr, - one_int, - one_int, - pv.desc, - Htmp_ptr, - one_int, - one_int, - pv.desc, - zero_complex, - tmp3_ptr, - one_int, - one_int, - pv.desc); - - // tmp4 = tmp_dmk * tmp3^T - ScalapackConnector::gemm(N_char, - T_char, - nlocal, - nlocal, - nlocal, - one_complex, - tmp_dmk, - one_int, - one_int, - pv.desc, - tmp3_ptr, - one_int, - one_int, - pv.desc, - zero_complex, - tmp4_ptr, - one_int, - one_int, - pv.desc); - - // tmp4 = 0.5 * (tmp2 + tmp4) - ScalapackConnector::geadd(N_char, - nlocal, - nlocal, - half_complex, - tmp2_ptr, - one_int, - one_int, - pv.desc, - half_complex, - tmp4_ptr, - one_int, - one_int, - pv.desc); - - // Copy final result from Tensor buffer back to EDMK matrix - BlasConnector::copy(nloc, tmp4_ptr, inc, tmp_edmk.c, inc); + const int nloc = pv.nloc; + const int ncol = pv.ncol; + const int nrow = pv.nrow; + + // Initialize EDMK matrix + tmp_edmk.create (ncol, nrow); + + // Allocate Tensor objects on CPU + ct::Tensor Htmp_tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct::DeviceType::CpuDevice, + ct::TensorShape ({nloc})); + Htmp_tensor.zero (); + + ct::Tensor Sinv_tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct::DeviceType::CpuDevice, + ct::TensorShape ({nloc})); + Sinv_tensor.zero (); + + ct::Tensor tmp1_tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct::DeviceType::CpuDevice, + ct::TensorShape ({nloc})); + tmp1_tensor.zero (); + + ct::Tensor tmp2_tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct::DeviceType::CpuDevice, + ct::TensorShape ({nloc})); + tmp2_tensor.zero (); + + ct::Tensor tmp3_tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct::DeviceType::CpuDevice, + ct::TensorShape ({nloc})); + tmp3_tensor.zero (); + + ct::Tensor tmp4_tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct::DeviceType::CpuDevice, + ct::TensorShape ({nloc})); + tmp4_tensor.zero (); + + // Get raw pointers from tensors for ScaLAPACK calls + std::complex* Htmp_ptr = Htmp_tensor.data> (); + std::complex* Sinv_ptr = Sinv_tensor.data> (); + std::complex* tmp1_ptr = tmp1_tensor.data> (); + std::complex* tmp2_ptr = tmp2_tensor.data> (); + std::complex* tmp3_ptr = tmp3_tensor.data> (); + std::complex* tmp4_ptr = tmp4_tensor.data> (); + + const int inc = 1; + hamilt::MatrixBlock> h_mat; + hamilt::MatrixBlock> s_mat; + p_hamilt->matrix (h_mat, s_mat); + + // Copy Hamiltonian and Overlap matrices into Tensor buffers using BlasConnector + BlasConnector::copy (nloc, h_mat.p, inc, Htmp_ptr, inc); + BlasConnector::copy (nloc, s_mat.p, inc, Sinv_ptr, inc); + + int myid = 0; + const int root_proc = 0; + MPI_Comm_rank (MPI_COMM_WORLD, &myid); + + // --- ScaLAPACK Inversion of S --- + ct::Tensor ipiv (ct::DataType::DT_INT, + ct::DeviceType::CpuDevice, + ct::TensorShape ({pv.nrow + pv.nb})); // Size for ScaLAPACK pivot array + ipiv.zero (); + int* ipiv_ptr = ipiv.data (); + + int info = 0; + const int one_int = 1; + ScalapackConnector::getrf (nlocal, nlocal, Sinv_ptr, one_int, one_int, pv.desc, ipiv_ptr, &info); + + int lwork = -1; + int liwork = -1; + ct::Tensor work_query (ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape ({1})); + ct::Tensor iwork_query (ct::DataType::DT_INT, ct::DeviceType::CpuDevice, ct::TensorShape ({1})); + + ScalapackConnector::getri (nlocal, + Sinv_ptr, + one_int, + one_int, + pv.desc, + ipiv_ptr, + work_query.data> (), + &lwork, + iwork_query.data (), + &liwork, + &info); + + // Resize work arrays based on query results + lwork = work_query.data> ()[0].real (); + work_query.resize (ct::TensorShape ({lwork})); + liwork = iwork_query.data ()[0]; + iwork_query.resize (ct::TensorShape ({liwork})); + + ScalapackConnector::getri (nlocal, + Sinv_ptr, + one_int, + one_int, + pv.desc, + ipiv_ptr, + work_query.data> (), + &lwork, + iwork_query.data (), + &liwork, + &info); + + // --- EDM Calculation using ScaLAPACK --- + const char N_char = 'N'; + const char T_char = 'T'; + const std::complex one_complex = {1.0, 0.0}; + const std::complex zero_complex = {0.0, 0.0}; + const std::complex half_complex = {0.5, 0.0}; + + // tmp1 = Htmp * Sinv + ScalapackConnector::gemm (N_char, + N_char, + nlocal, + nlocal, + nlocal, + one_complex, + Htmp_ptr, + one_int, + one_int, + pv.desc, + Sinv_ptr, + one_int, + one_int, + pv.desc, + zero_complex, + tmp1_ptr, + one_int, + one_int, + pv.desc); + + // tmp2 = tmp1^T * tmp_dmk + ScalapackConnector::gemm (T_char, + N_char, + nlocal, + nlocal, + nlocal, + one_complex, + tmp1_ptr, + one_int, + one_int, + pv.desc, + tmp_dmk, + one_int, + one_int, + pv.desc, + zero_complex, + tmp2_ptr, + one_int, + one_int, + pv.desc); + + // tmp3 = Sinv * Htmp + ScalapackConnector::gemm (N_char, + N_char, + nlocal, + nlocal, + nlocal, + one_complex, + Sinv_ptr, + one_int, + one_int, + pv.desc, + Htmp_ptr, + one_int, + one_int, + pv.desc, + zero_complex, + tmp3_ptr, + one_int, + one_int, + pv.desc); + + // tmp4 = tmp_dmk * tmp3^T + ScalapackConnector::gemm (N_char, + T_char, + nlocal, + nlocal, + nlocal, + one_complex, + tmp_dmk, + one_int, + one_int, + pv.desc, + tmp3_ptr, + one_int, + one_int, + pv.desc, + zero_complex, + tmp4_ptr, + one_int, + one_int, + pv.desc); + + // tmp4 = 0.5 * (tmp2 + tmp4) + ScalapackConnector::geadd (N_char, + nlocal, + nlocal, + half_complex, + tmp2_ptr, + one_int, + one_int, + pv.desc, + half_complex, + tmp4_ptr, + one_int, + one_int, + pv.desc); + + // Copy final result from Tensor buffer back to EDMK matrix + BlasConnector::copy (nloc, tmp4_ptr, inc, tmp_edmk.c, inc); #else - ModuleBase::WARNING_QUIT("elecstate::cal_edm_tddft_tensor", "MPI is required for this function!"); + ModuleBase::WARNING_QUIT ("elecstate::cal_edm_tddft_tensor", "MPI is required for this function!"); #endif - } // end ik - ModuleBase::timer::end("TD_Efficiency", "cal_edm_tddft"); + } // end ik + ModuleBase::timer::end ("TD_Efficiency", "cal_edm_tddft"); return; } // cal_edm_tddft_tensor // Template function for EDM calculation supporting CPU and GPU template -void cal_edm_tddft_tensor_lapack(Parallel_Orbitals& pv, +void + cal_edm_tddft_tensor_lapack (Parallel_Orbitals& pv, LCAO_domain::Setup_DM>& dmat, K_Vectors& kv, hamilt::Hamilt>* p_hamilt) { - ModuleBase::TITLE("elecstate", "cal_edm_tddft_tensor_lapack"); - ModuleBase::timer::start("TD_Efficiency", "cal_edm_tddft"); + ModuleBase::TITLE ("elecstate", "cal_edm_tddft_tensor_lapack"); + ModuleBase::timer::start ("TD_Efficiency", "cal_edm_tddft"); const int nlocal = PARAM.globalv.nlocal; - assert(nlocal >= 0); - dmat.dm->EDMK.resize(kv.get_nks()); + assert (nlocal >= 0); + dmat.dm->EDMK.resize (kv.get_nks ()); // ct_device_type = ct::DeviceType::CpuDevice or ct::DeviceType::GpuDevice ct::DeviceType ct_device_type = ct::DeviceTypeToEnum::value; @@ -563,258 +579,284 @@ void cal_edm_tddft_tensor_lapack(Parallel_Orbitals& pv, #if ((defined __CUDA) /* || (defined __ROCM) */) if (ct_device_type == ct::DeviceType::GpuDevice) - { - // Initialize cuBLAS & cuSOLVER handle - ct::kernels::createGpuSolverHandle(); - ct::kernels::createGpuBlasHandle(); - } -#endif // __CUDA - - for (int ik = 0; ik < kv.get_nks(); ++ik) - { - p_hamilt->updateHk(ik); - std::complex* tmp_dmk_local = dmat.dm->get_DMK_pointer(ik); - ModuleBase::ComplexMatrix& tmp_edmk = dmat.dm->EDMK[ik]; - -#ifdef __MPI - int myid = 0; - const int root_proc = 0; - int num_procs = 1; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - // 1. Prepare Data Source Pointers (Host) - // If np = 1, point directly to local data to avoid copy - // If np > 1, gather data and point to the gathered buffer - std::complex* h_src = nullptr; - std::complex* s_src = nullptr; - std::complex* dmk_src = nullptr; - - // Global containers (Used only when num_procs > 1) - module_rt::Matrix_g> h_mat_global, s_mat_global, dmk_global, edm_global; - - // Get Local Matrices - hamilt::MatrixBlock> h_mat_local, s_mat_local; - p_hamilt->matrix(h_mat_local, s_mat_local); - - if (num_procs == 1) - { - // Optimization: Direct access for single process - h_src = h_mat_local.p; - s_src = s_mat_local.p; - dmk_src = tmp_dmk_local; - } - else { - // Standard Gather Logic for multi-process - module_rt::gatherMatrix(myid, root_proc, h_mat_local, h_mat_global); - module_rt::gatherMatrix(myid, root_proc, s_mat_local, s_mat_global); - - hamilt::MatrixBlock> dmk_local_block; - dmk_local_block.p = tmp_dmk_local; - dmk_local_block.desc = pv.desc; - module_rt::gatherMatrix(myid, root_proc, dmk_local_block, dmk_global); - - if (myid == root_proc) - { - h_src = h_mat_global.p.get(); - s_src = s_mat_global.p.get(); - dmk_src = dmk_global.p.get(); - } + // Initialize cuBLAS & cuSOLVER handle + ct::kernels::createGpuSolverHandle (); + ct::kernels::createGpuBlasHandle (); } +#endif // __CUDA - // 2. GPU Calculation (on Rank 0) - if (myid == root_proc) + for (int ik = 0; ik < kv.get_nks (); ++ik) { - ct::Tensor H_dev, S_dev, DMK_dev, ipiv_dev; - - // Allocate and Copy (H2D) - H_dev = ct::Tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({nlocal, nlocal})); - syncmem_complex_h2d_op()(H_dev.template data>(), h_src, nlocal * nlocal); - - S_dev = ct::Tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({nlocal, nlocal})); - syncmem_complex_h2d_op()(S_dev.template data>(), s_src, nlocal * nlocal); - - DMK_dev = ct::Tensor(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({nlocal, nlocal})); - syncmem_complex_h2d_op()(DMK_dev.template data>(), dmk_src, nlocal * nlocal); - - ipiv_dev = ct::Tensor(ct::DataType::DT_INT, ct_device_type, ct::TensorShape({nlocal})); - ipiv_dev.zero(); - - // --- Calculate S^-1 using getrf + getrs --- - // 1. LU decomposition S = P * L * U - ct::kernels::lapack_getrf, ct_Device>()(nlocal, - nlocal, - S_dev.template data>(), - nlocal, - ipiv_dev.template data()); - - // 2. Solve S * Sinv = I - auto Sinv_dev = module_rt::create_identity_matrix>(nlocal, ct_device_type); - - ct::kernels::lapack_getrs, ct_Device>()('N', - nlocal, - nlocal, - S_dev.template data>(), - nlocal, - ipiv_dev.template data(), - Sinv_dev.template data>(), - nlocal); - - // --- EDM Calculation --- - std::complex one = {1.0, 0.0}; - std::complex zero = {0.0, 0.0}; - - // tmp1 = H * Sinv - ct::Tensor tmp1_dev(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({nlocal, nlocal})); - ct::kernels::blas_gemm, ct_Device>()('N', - 'N', - nlocal, - nlocal, - nlocal, - &one, - H_dev.template data>(), - nlocal, - Sinv_dev.template data>(), - nlocal, - &zero, - tmp1_dev.template data>(), - nlocal); - - // tmp2 = tmp1^T * DMK - ct::Tensor tmp2_dev(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({nlocal, nlocal})); - ct::kernels::blas_gemm, ct_Device>()('T', - 'N', - nlocal, - nlocal, - nlocal, - &one, - tmp1_dev.template data>(), - nlocal, - DMK_dev.template data>(), - nlocal, - &zero, - tmp2_dev.template data>(), - nlocal); - - // tmp3 = Sinv * H - ct::Tensor tmp3_dev(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({nlocal, nlocal})); - ct::kernels::blas_gemm, ct_Device>()('N', - 'N', - nlocal, - nlocal, - nlocal, - &one, - Sinv_dev.template data>(), - nlocal, - H_dev.template data>(), - nlocal, - &zero, - tmp3_dev.template data>(), - nlocal); - - // tmp4 = DMK * tmp3^T - ct::Tensor tmp4_dev(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({nlocal, nlocal})); - ct::kernels::blas_gemm, ct_Device>()('N', - 'T', - nlocal, - nlocal, - nlocal, - &one, - DMK_dev.template data>(), - nlocal, - tmp3_dev.template data>(), - nlocal, - &zero, - tmp4_dev.template data>(), - nlocal); - - // tmp4 = tmp2 + tmp4 - ct::kernels::blas_axpy, ct_Device>()(nlocal * nlocal, - &one, - tmp2_dev.template data>(), - 1, - tmp4_dev.template data>(), - 1); - - // tmp4 = 0.5 * tmp4 - std::complex half = {0.5, 0.0}; - ct::kernels::blas_scal, ct_Device>()(nlocal * nlocal, - &half, - tmp4_dev.template data>(), - 1); - - // 3. Retrieve Result (D2H) - std::complex* edm_dest = nullptr; + p_hamilt->updateHk (ik); + std::complex* tmp_dmk_local = dmat.dm->get_DMK_pointer (ik); + ModuleBase::ComplexMatrix& tmp_edmk = dmat.dm->EDMK[ik]; + +#ifdef __MPI + int myid = 0; + const int root_proc = 0; + int num_procs = 1; + MPI_Comm_rank (MPI_COMM_WORLD, &myid); + MPI_Comm_size (MPI_COMM_WORLD, &num_procs); + + // 1. Prepare Data Source Pointers (Host) + // If np = 1, point directly to local data to avoid copy + // If np > 1, gather data and point to the gathered buffer + std::complex* h_src = nullptr; + std::complex* s_src = nullptr; + std::complex* dmk_src = nullptr; + + // Global containers (Used only when num_procs > 1) + module_rt::Matrix_g> h_mat_global, s_mat_global, dmk_global, edm_global; + + // Get Local Matrices + hamilt::MatrixBlock> h_mat_local, s_mat_local; + p_hamilt->matrix (h_mat_local, s_mat_local); if (num_procs == 1) - { - // Directly copy to target local matrix - tmp_edmk.create(pv.ncol, pv.nrow); - edm_dest = tmp_edmk.c; - } + { + // Optimization: Direct access for single process + h_src = h_mat_local.p; + s_src = s_mat_local.p; + dmk_src = tmp_dmk_local; + } else - { - // Wait to set up edm_dest after allocating global buffer - if (myid == root_proc && edm_global.p == nullptr) { - edm_global.p.reset(new std::complex[nlocal * nlocal]); + // Standard Gather Logic for multi-process + module_rt::gatherMatrix (myid, root_proc, h_mat_local, h_mat_global); + module_rt::gatherMatrix (myid, root_proc, s_mat_local, s_mat_global); + + hamilt::MatrixBlock> dmk_local_block; + dmk_local_block.p = tmp_dmk_local; + dmk_local_block.desc = pv.desc; + module_rt::gatherMatrix (myid, root_proc, dmk_local_block, dmk_global); + + if (myid == root_proc) + { + h_src = h_mat_global.p.get (); + s_src = s_mat_global.p.get (); + dmk_src = dmk_global.p.get (); + } } - edm_dest = edm_global.p.get(); - } - if (num_procs == 1 || myid == root_proc) - { - syncmem_complex_d2h_op()(edm_dest, tmp4_dev.template data>(), nlocal * nlocal); - } - } + // 2. GPU Calculation (on Rank 0) + if (myid == root_proc) + { + ct::Tensor H_dev, S_dev, DMK_dev, ipiv_dev; + + // Allocate and Copy (H2D) + H_dev = ct::Tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type, + ct::TensorShape ({nlocal, nlocal})); + syncmem_complex_h2d_op () (H_dev.template data> (), h_src, nlocal * nlocal); + + S_dev = ct::Tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type, + ct::TensorShape ({nlocal, nlocal})); + syncmem_complex_h2d_op () (S_dev.template data> (), s_src, nlocal * nlocal); + + DMK_dev = ct::Tensor (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type, + ct::TensorShape ({nlocal, nlocal})); + syncmem_complex_h2d_op () (DMK_dev.template data> (), + dmk_src, + nlocal * nlocal); + + ipiv_dev = ct::Tensor (ct::DataType::DT_INT, ct_device_type, ct::TensorShape ({nlocal})); + ipiv_dev.zero (); + + // --- Calculate S^-1 using getrf + getrs --- + // 1. LU decomposition S = P * L * U + ct::kernels::lapack_getrf, ct_Device> () ( + nlocal, + nlocal, + S_dev.template data> (), + nlocal, + ipiv_dev.template data ()); + + // 2. Solve S * Sinv = I + auto Sinv_dev = module_rt::create_identity_matrix> (nlocal, ct_device_type); + + ct::kernels::lapack_getrs, ct_Device> () ( + 'N', + nlocal, + nlocal, + S_dev.template data> (), + nlocal, + ipiv_dev.template data (), + Sinv_dev.template data> (), + nlocal); + + // --- EDM Calculation --- + std::complex one = {1.0, 0.0}; + std::complex zero = {0.0, 0.0}; + + // tmp1 = H * Sinv + ct::Tensor tmp1_dev (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type, + ct::TensorShape ({nlocal, nlocal})); + ct::kernels::blas_gemm, ct_Device> () ( + 'N', + 'N', + nlocal, + nlocal, + nlocal, + &one, + H_dev.template data> (), + nlocal, + Sinv_dev.template data> (), + nlocal, + &zero, + tmp1_dev.template data> (), + nlocal); + + // tmp2 = tmp1^T * DMK + ct::Tensor tmp2_dev (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type, + ct::TensorShape ({nlocal, nlocal})); + ct::kernels::blas_gemm, ct_Device> () ( + 'T', + 'N', + nlocal, + nlocal, + nlocal, + &one, + tmp1_dev.template data> (), + nlocal, + DMK_dev.template data> (), + nlocal, + &zero, + tmp2_dev.template data> (), + nlocal); + + // tmp3 = Sinv * H + ct::Tensor tmp3_dev (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type, + ct::TensorShape ({nlocal, nlocal})); + ct::kernels::blas_gemm, ct_Device> () ( + 'N', + 'N', + nlocal, + nlocal, + nlocal, + &one, + Sinv_dev.template data> (), + nlocal, + H_dev.template data> (), + nlocal, + &zero, + tmp3_dev.template data> (), + nlocal); + + // tmp4 = DMK * tmp3^T + ct::Tensor tmp4_dev (ct::DataType::DT_COMPLEX_DOUBLE, + ct_device_type, + ct::TensorShape ({nlocal, nlocal})); + ct::kernels::blas_gemm, ct_Device> () ( + 'N', + 'T', + nlocal, + nlocal, + nlocal, + &one, + DMK_dev.template data> (), + nlocal, + tmp3_dev.template data> (), + nlocal, + &zero, + tmp4_dev.template data> (), + nlocal); + + // tmp4 = tmp2 + tmp4 + ct::kernels::blas_axpy, ct_Device> () ( + nlocal * nlocal, + &one, + tmp2_dev.template data> (), + 1, + tmp4_dev.template data> (), + 1); + + // tmp4 = 0.5 * tmp4 + std::complex half = {0.5, 0.0}; + ct::kernels::blas_scal, ct_Device> () ( + nlocal * nlocal, + &half, + tmp4_dev.template data> (), + 1); + + // 3. Retrieve Result (D2H) + std::complex* edm_dest = nullptr; + + if (num_procs == 1) + { + // Directly copy to target local matrix + tmp_edmk.create (pv.ncol, pv.nrow); + edm_dest = tmp_edmk.c; + } + else + { + // Wait to set up edm_dest after allocating global buffer + if (myid == root_proc && edm_global.p == nullptr) + { + edm_global.p.reset (new std::complex[nlocal * nlocal]); + } + edm_dest = edm_global.p.get (); + } + + if (num_procs == 1 || myid == root_proc) + { + syncmem_complex_d2h_op () (edm_dest, + tmp4_dev.template data> (), + nlocal * nlocal); + } + } - // 4. Distribute (Only needed if num_procs > 1) - if (num_procs > 1) - { - if (edm_global.p == nullptr) - { - edm_global.p.reset(new std::complex[nlocal * nlocal]); - } - - edm_global.row = nlocal; - edm_global.col = nlocal; - edm_global.desc.reset(new int[9]{1, pv.desc[1], nlocal, nlocal, nlocal, nlocal, 0, 0, nlocal}); - - tmp_edmk.create(pv.ncol, pv.nrow); - hamilt::MatrixBlock> edm_local_block; - edm_local_block.p = tmp_edmk.c; - edm_local_block.desc = pv.desc; - module_rt::distributeMatrix(edm_local_block, edm_global); - } + // 4. Distribute (Only needed if num_procs > 1) + if (num_procs > 1) + { + if (edm_global.p == nullptr) + { + edm_global.p.reset (new std::complex[nlocal * nlocal]); + } + + edm_global.row = nlocal; + edm_global.col = nlocal; + edm_global.desc.reset (new int[9]{1, pv.desc[1], nlocal, nlocal, nlocal, nlocal, 0, 0, nlocal}); + + tmp_edmk.create (pv.ncol, pv.nrow); + hamilt::MatrixBlock> edm_local_block; + edm_local_block.p = tmp_edmk.c; + edm_local_block.desc = pv.desc; + module_rt::distributeMatrix (edm_local_block, edm_global); + } #else - ModuleBase::WARNING_QUIT("elecstate::cal_edm_tddft_tensor_lapack", "MPI is required for this function!"); + ModuleBase::WARNING_QUIT ("elecstate::cal_edm_tddft_tensor_lapack", "MPI is required for this function!"); #endif // __MPI - } // end ik + } // end ik #if ((defined __CUDA) /* || (defined __ROCM) */) if (ct_device_type == ct::DeviceType::GpuDevice) - { - // Destroy cuBLAS & cuSOLVER handle - ct::kernels::destroyGpuSolverHandle(); - ct::kernels::destroyGpuBlasHandle(); - } + { + // Destroy cuBLAS & cuSOLVER handle + ct::kernels::destroyGpuSolverHandle (); + ct::kernels::destroyGpuBlasHandle (); + } #endif // __CUDA - ModuleBase::timer::end("TD_Efficiency", "cal_edm_tddft"); + ModuleBase::timer::end ("TD_Efficiency", "cal_edm_tddft"); return; } // cal_edm_tddft_tensor_lapack // Explicit instantiation of template functions -template void cal_edm_tddft_tensor_lapack(Parallel_Orbitals& pv, - LCAO_domain::Setup_DM>& dmat, - K_Vectors& kv, - hamilt::Hamilt>* p_hamilt); +template void cal_edm_tddft_tensor_lapack (Parallel_Orbitals& pv, + LCAO_domain::Setup_DM>& dmat, + K_Vectors& kv, + hamilt::Hamilt>* p_hamilt); #if ((defined __CUDA) /* || (defined __ROCM) */) -template void cal_edm_tddft_tensor_lapack(Parallel_Orbitals& pv, - LCAO_domain::Setup_DM>& dmat, - K_Vectors& kv, - hamilt::Hamilt>* p_hamilt); +template void cal_edm_tddft_tensor_lapack (Parallel_Orbitals& pv, + LCAO_domain::Setup_DM>& dmat, + K_Vectors& kv, + hamilt::Hamilt>* p_hamilt); #endif // __CUDA } // namespace elecstate diff --git a/source/source_estate/module_dm/cal_edm_tddft.h b/source/source_estate/module_dm/cal_edm_tddft.h index b442bd90cd0..17d8771364d 100644 --- a/source/source_estate/module_dm/cal_edm_tddft.h +++ b/source/source_estate/module_dm/cal_edm_tddft.h @@ -8,27 +8,27 @@ namespace elecstate { -void print_local_matrix(std::ostream& os, - const std::complex* matrix_data, - int local_rows, // pv.nrow - int local_cols, // pv.ncol - const std::string& matrix_name = "", - int rank = -1); +void print_local_matrix (std::ostream& os, + const std::complex* matrix_data, + int local_rows, // pv.nrow + int local_cols, // pv.ncol + const std::string& matrix_name = "", + int rank = -1); -void cal_edm_tddft(Parallel_Orbitals& pv, - LCAO_domain::Setup_DM>& dmat, - K_Vectors& kv, - hamilt::Hamilt>* p_hamilt); +void cal_edm_tddft (Parallel_Orbitals& pv, + LCAO_domain::Setup_DM>& dmat, + K_Vectors& kv, + hamilt::Hamilt>* p_hamilt); -void cal_edm_tddft_tensor(Parallel_Orbitals& pv, - LCAO_domain::Setup_DM>& dmat, - K_Vectors& kv, - hamilt::Hamilt>* p_hamilt); +void cal_edm_tddft_tensor (Parallel_Orbitals& pv, + LCAO_domain::Setup_DM>& dmat, + K_Vectors& kv, + hamilt::Hamilt>* p_hamilt); template -void cal_edm_tddft_tensor_lapack(Parallel_Orbitals& pv, - LCAO_domain::Setup_DM>& dmat, - K_Vectors& kv, - hamilt::Hamilt>* p_hamilt); +void cal_edm_tddft_tensor_lapack (Parallel_Orbitals& pv, + LCAO_domain::Setup_DM>& dmat, + K_Vectors& kv, + hamilt::Hamilt>* p_hamilt); } // namespace elecstate #endif // CAL_EDM_TDDFT_H diff --git a/source/source_estate/module_dm/density_matrix.cpp b/source/source_estate/module_dm/density_matrix.cpp index d28b6ea19e9..ad41ee3ab22 100644 --- a/source/source_estate/module_dm/density_matrix.cpp +++ b/source/source_estate/module_dm/density_matrix.cpp @@ -18,659 +18,695 @@ namespace elecstate // destructor template -DensityMatrix::~DensityMatrix() +DensityMatrix::~DensityMatrix () { for (auto& it: this->_DMR) - { - delete it; - } + { + delete it; + } delete[] this->dmr_tmp_; } template -DensityMatrix::DensityMatrix(const Parallel_Orbitals* paraV_in, const int nspin, const std::vector>& kvec_d, const int nk) - : _paraV(paraV_in), _nspin(nspin), _kvec_d(kvec_d), _nk((nk > 0 && nk <= _kvec_d.size()) ? nk : _kvec_d.size()) +DensityMatrix::DensityMatrix (const Parallel_Orbitals* paraV_in, + const int nspin, + const std::vector>& kvec_d, + const int nk) + : _paraV (paraV_in), _nspin (nspin), _kvec_d (kvec_d), + _nk ((nk > 0 && nk <= _kvec_d.size ()) ? nk : _kvec_d.size ()) { - ModuleBase::TITLE("DensityMatrix", "resize_DMK"); + ModuleBase::TITLE ("DensityMatrix", "resize_DMK"); const int nks = _nk * _nspin; - this->_DMK.resize(nks); + this->_DMK.resize (nks); for (int ik = 0; ik < nks; ik++) - { - this->_DMK[ik].resize(this->_paraV->get_row_size() * this->_paraV->get_col_size()); - } - ModuleBase::Memory::record("DensityMatrix::DMK", this->_DMK.size() * this->_DMK[0].size() * sizeof(TK)); + { + this->_DMK[ik].resize (this->_paraV->get_row_size () * this->_paraV->get_col_size ()); + } + ModuleBase::Memory::record ("DensityMatrix::DMK", this->_DMK.size () * this->_DMK[0].size () * sizeof (TK)); } template -DensityMatrix::DensityMatrix(const Parallel_Orbitals* paraV_in, const int nspin) :_paraV(paraV_in), _nspin(nspin), _kvec_d({ ModuleBase::Vector3(0,0,0) }), _nk(1) +DensityMatrix::DensityMatrix (const Parallel_Orbitals* paraV_in, const int nspin) + : _paraV (paraV_in), _nspin (nspin), _kvec_d ({ModuleBase::Vector3 (0, 0, 0)}), _nk (1) { - ModuleBase::TITLE("DensityMatrix", "resize_gamma"); - this->_DMK.resize(_nspin); + ModuleBase::TITLE ("DensityMatrix", "resize_gamma"); + this->_DMK.resize (_nspin); for (int ik = 0; ik < this->_nspin; ik++) - { - this->_DMK[ik].resize(this->_paraV->get_row_size() * this->_paraV->get_col_size()); - } - ModuleBase::Memory::record("DensityMatrix::DMK", this->_DMK.size() * this->_DMK[0].size() * sizeof(TK)); + { + this->_DMK[ik].resize (this->_paraV->get_row_size () * this->_paraV->get_col_size ()); + } + ModuleBase::Memory::record ("DensityMatrix::DMK", this->_DMK.size () * this->_DMK[0].size () * sizeof (TK)); } - - // calculate DMR from DMK using blas for multi-k calculation template -void DensityMatrix_Tools::cal_DMR( - const DensityMatrix &dm, - std::vector*> &dmR_out, - const int ik_in) +void + DensityMatrix_Tools::cal_DMR (const DensityMatrix& dm, + std::vector*>& dmR_out, + const int ik_in) { - ModuleBase::TITLE("DensityMatrix", "cal_DMR"); + ModuleBase::TITLE ("DensityMatrix", "cal_DMR"); // To check whether DMR has been initialized - assert(dmR_out.size()==dm._nspin && "DMR has not been initialized!"); + assert (dmR_out.size () == dm._nspin && "DMR has not been initialized!"); - ModuleBase::timer::start("DensityMatrix", "cal_DMR"); + ModuleBase::timer::start ("DensityMatrix", "cal_DMR"); const int ld_hk = dm._paraV->nrow; for (int is = 1; is <= dm._nspin; ++is) - { - const int ik_begin = dm._nk * (is - 1); // jump dm._nk for spin_down if nspin==2 - hamilt::HContainer*const target_DMR = dmR_out[is - 1]; - // set zero since this function is called in every scf step - target_DMR->set_zero(); - #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic) - #endif - for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); - const int iat1 = target_ap.get_atom_i(); - const int iat2 = target_ap.get_atom_j(); - // get global indexes of whole matrix for each atom in this process - const int row_ap = dm._paraV->atom_begin_row[iat1]; - const int col_ap = dm._paraV->atom_begin_col[iat2]; - const int row_size = dm._paraV->get_row_size(iat1); - const int col_size = dm._paraV->get_col_size(iat2); - const int mat_size = row_size * col_size; - const int R_size = target_ap.get_R_size(); - assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); - - // calculate kphase and target_mat_ptr - std::vector> kphase_vec(dm._nk, std::vector(R_size)); - std::vector target_DMR_mat_vec(R_size); - for(int iR = 0; iR < R_size; ++iR) - { - const ModuleBase::Vector3 R_index = target_ap.get_R_index(iR); - hamilt::BaseMatrix*const target_mat = target_ap.find_matrix(R_index); - #ifdef __DEBUG - if (target_mat == nullptr) + const int ik_begin = dm._nk * (is - 1); // jump dm._nk for spin_down if nspin==2 + hamilt::HContainer* const target_DMR = dmR_out[is - 1]; + // set zero since this function is called in every scf step + target_DMR->set_zero (); +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 0; i < target_DMR->size_atom_pairs (); ++i) { - std::cout << "target_mat is nullptr" << std::endl; - continue; - } - #endif - target_DMR_mat_vec[iR] = target_mat->get_pointer(); - for(int ik = 0; ik < dm._nk; ++ik) - { - if(ik_in >= 0 && ik_in != ik) { continue; } - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(R_index[0], R_index[1], R_index[2]); - const double arg = (dm._kvec_d[ik] * dR) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - kphase_vec[ik][iR] = TK(cosp, sinp); - } - } - - std::vector DMK_mat_trans(mat_size); - std::vector tmp_DMR( (PARAM.inp.nspin==4) ? mat_size*R_size : 0); - for(int ik = 0; ik < dm._nk; ++ik) - { - if(ik_in >= 0 && ik_in != ik) { continue; } - // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational efficiency) - const TK*const DMK_mat_ptr - = dm._DMK[ik + ik_begin].data() - + col_ap * dm._paraV->nrow + row_ap; - for(int icol = 0; icol < col_size; ++icol) { - for(int irow = 0; irow < row_size; ++irow) { - DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; - }} - - // if nspin != 4, fill DMR - // if nspin == 4, fill tmp_DMR - for(int iR = 0; iR < R_size; ++iR) - { - // (kr+i*ki) * (Dr+i*Di) = (kr*Dr-ki*Di) + i*(kr*Di+ki*Dr) - const TK kphase = kphase_vec[ik][iR]; - if(PARAM.inp.nspin != 4) // only save real kr*Dr-ki*Di - { - func_exp_mul_dmk(kphase, DMK_mat_trans, target_DMR_mat_vec[iR]); - } else if(PARAM.inp.nspin == 4) - { - BlasConnector::axpy(mat_size, - kphase, - DMK_mat_trans.data(), - 1, - &tmp_DMR[iR * mat_size], - 1); - } - } - } - - // if nspin == 4 - // copy tmp_DMR to fill target_DMR - if(PARAM.inp.nspin == 4) - { - // step_trace ={0, 1, local_col, local_col+1} for NSPIN=4 - int step_trace[4]{}; - constexpr int npol = 2; - for (int is = 0; is < npol; is++) { - for (int is2 = 0; is2 < npol; is2++) { - step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; - }} - - TK tmp[4]{}; - for(int iR = 0; iR < R_size; ++iR) - { - const TK* tmp_DMR_mat = &tmp_DMR[iR * mat_size]; - TR_out* target_DMR_mat = target_DMR_mat_vec[iR]; - for (int irow = 0; irow < row_size; irow += 2) - { - for (int icol = 0; icol < col_size; icol += 2) + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair (i); + const int iat1 = target_ap.get_atom_i (); + const int iat2 = target_ap.get_atom_j (); + // get global indexes of whole matrix for each atom in this process + const int row_ap = dm._paraV->atom_begin_row[iat1]; + const int col_ap = dm._paraV->atom_begin_col[iat2]; + const int row_size = dm._paraV->get_row_size (iat1); + const int col_size = dm._paraV->get_col_size (iat2); + const int mat_size = row_size * col_size; + const int R_size = target_ap.get_R_size (); + assert (row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); + + // calculate kphase and target_mat_ptr + std::vector> kphase_vec (dm._nk, std::vector (R_size)); + std::vector target_DMR_mat_vec (R_size); + for (int iR = 0; iR < R_size; ++iR) { - // catch the 4 spin component value of one orbital pair - tmp[0] = tmp_DMR_mat[icol + step_trace[0]]; - tmp[1] = tmp_DMR_mat[icol + step_trace[1]]; - tmp[2] = tmp_DMR_mat[icol + step_trace[2]]; - tmp[3] = tmp_DMR_mat[icol + step_trace[3]]; - - // transfer to Pauli matrix, save them back to the target_DMR_mat - func_xyz_to_updown(tmp, icol, step_trace, target_DMR_mat); + const ModuleBase::Vector3 R_index = target_ap.get_R_index (iR); + hamilt::BaseMatrix* const target_mat = target_ap.find_matrix (R_index); +#ifdef __DEBUG + if (target_mat == nullptr) + { + std::cout << "target_mat is nullptr" << std::endl; + continue; + } +#endif + target_DMR_mat_vec[iR] = target_mat->get_pointer (); + for (int ik = 0; ik < dm._nk; ++ik) + { + if (ik_in >= 0 && ik_in != ik) + { + continue; + } + // cal k_phase + // if TK==std::complex, kphase is e^{ikR} + const ModuleBase::Vector3 dR (R_index[0], R_index[1], R_index[2]); + const double arg = (dm._kvec_d[ik] * dR) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos (arg, &sinp, &cosp); + kphase_vec[ik][iR] = TK (cosp, sinp); + } + } + + std::vector DMK_mat_trans (mat_size); + std::vector tmp_DMR ((PARAM.inp.nspin == 4) ? mat_size * R_size : 0); + for (int ik = 0; ik < dm._nk; ++ik) + { + if (ik_in >= 0 && ik_in != ik) + { + continue; + } + // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational + // efficiency) + const TK* const DMK_mat_ptr + = dm._DMK[ik + ik_begin].data () + col_ap * dm._paraV->nrow + row_ap; + for (int icol = 0; icol < col_size; ++icol) + { + for (int irow = 0; irow < row_size; ++irow) + { + DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; + } + } + + // if nspin != 4, fill DMR + // if nspin == 4, fill tmp_DMR + for (int iR = 0; iR < R_size; ++iR) + { + // (kr+i*ki) * (Dr+i*Di) = (kr*Dr-ki*Di) + i*(kr*Di+ki*Dr) + const TK kphase = kphase_vec[ik][iR]; + if (PARAM.inp.nspin != 4) // only save real kr*Dr-ki*Di + { + func_exp_mul_dmk (kphase, DMK_mat_trans, target_DMR_mat_vec[iR]); + } + else if (PARAM.inp.nspin == 4) + { + BlasConnector::axpy (mat_size, + kphase, + DMK_mat_trans.data (), + 1, + &tmp_DMR[iR * mat_size], + 1); + } + } + } + + // if nspin == 4 + // copy tmp_DMR to fill target_DMR + if (PARAM.inp.nspin == 4) + { + // step_trace ={0, 1, local_col, local_col+1} for NSPIN=4 + int step_trace[4]{}; + constexpr int npol = 2; + for (int is = 0; is < npol; is++) + { + for (int is2 = 0; is2 < npol; is2++) + { + step_trace[is * npol + is2] = target_ap.get_col_size () * is + is2; + } + } + + TK tmp[4]{}; + for (int iR = 0; iR < R_size; ++iR) + { + const TK* tmp_DMR_mat = &tmp_DMR[iR * mat_size]; + TR_out* target_DMR_mat = target_DMR_mat_vec[iR]; + for (int irow = 0; irow < row_size; irow += 2) + { + for (int icol = 0; icol < col_size; icol += 2) + { + // catch the 4 spin component value of one orbital pair + tmp[0] = tmp_DMR_mat[icol + step_trace[0]]; + tmp[1] = tmp_DMR_mat[icol + step_trace[1]]; + tmp[2] = tmp_DMR_mat[icol + step_trace[2]]; + tmp[3] = tmp_DMR_mat[icol + step_trace[3]]; + + // transfer to Pauli matrix, save them back to the target_DMR_mat + func_xyz_to_updown (tmp, icol, step_trace, target_DMR_mat); + } + tmp_DMR_mat += col_size * 2; + target_DMR_mat += col_size * 2; + } + } } - tmp_DMR_mat += col_size * 2; - target_DMR_mat += col_size * 2; - } } - } } - } - ModuleBase::timer::end("DensityMatrix", "cal_DMR"); + ModuleBase::timer::end ("DensityMatrix", "cal_DMR"); } template <> -void DensityMatrix, double>::cal_DMR(const int ik_in) +void + DensityMatrix, double>::cal_DMR (const int ik_in) { - DensityMatrix_Tools::cal_DMR(*this, this->_DMR, ik_in); + DensityMatrix_Tools::cal_DMR (*this, this->_DMR, ik_in); } template <> -void DensityMatrix, std::complex>::cal_DMR(const int ik_in) +void + DensityMatrix, std::complex>::cal_DMR (const int ik_in) { - DensityMatrix_Tools::cal_DMR(*this, this->_DMR, ik_in); + DensityMatrix_Tools::cal_DMR (*this, this->_DMR, ik_in); } - - // calculate DMR from DMK using blas for multi-k calculation template -void DensityMatrix_Tools::cal_DMR_td( - const DensityMatrix &dm, - std::vector*> &dmR_out, - const UnitCell& ucell, - const ModuleBase::Vector3 At, - const int ik_in) +void + DensityMatrix_Tools::cal_DMR_td (const DensityMatrix& dm, + std::vector*>& dmR_out, + const UnitCell& ucell, + const ModuleBase::Vector3 At, + const int ik_in) { - ModuleBase::TITLE("DensityMatrix", "cal_DMR_td"); + ModuleBase::TITLE ("DensityMatrix", "cal_DMR_td"); // To check whether DMR has been initialized - assert(dmR_out.size()==dm._nspin && "DMR has not been initialized!"); + assert (dmR_out.size () == dm._nspin && "DMR has not been initialized!"); - ModuleBase::timer::start("DensityMatrix", "cal_DMR_td"); + ModuleBase::timer::start ("DensityMatrix", "cal_DMR_td"); const int ld_hk = dm._paraV->nrow; for (int is = 1; is <= dm._nspin; ++is) - { - const int ik_begin = dm._nk * (is - 1); // jump dm._nk for spin_down if nspin==2 - hamilt::HContainer*const target_DMR = dmR_out[is - 1]; - // set zero since this function is called in every scf step - target_DMR->set_zero(); - #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic) - #endif - for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); - const int iat1 = target_ap.get_atom_i(); - const int iat2 = target_ap.get_atom_j(); - // get global indexes of whole matrix for each atom in this process - const int row_ap = dm._paraV->atom_begin_row[iat1]; - const int col_ap = dm._paraV->atom_begin_col[iat2]; - const int row_size = dm._paraV->get_row_size(iat1); - const int col_size = dm._paraV->get_col_size(iat2); - const int mat_size = row_size * col_size; - const int R_size = target_ap.get_R_size(); - assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); - - // calculate kphase and target_mat_ptr - std::vector> kphase_vec(dm._nk, std::vector(R_size)); - std::vector target_DMR_mat_vec(R_size); - for(int iR = 0; iR < R_size; ++iR) - { - const ModuleBase::Vector3 R_index = target_ap.get_R_index(iR); - hamilt::BaseMatrix*const target_mat = target_ap.find_matrix(R_index); - #ifdef __DEBUG - if (target_mat == nullptr) + const int ik_begin = dm._nk * (is - 1); // jump dm._nk for spin_down if nspin==2 + hamilt::HContainer* const target_DMR = dmR_out[is - 1]; + // set zero since this function is called in every scf step + target_DMR->set_zero (); +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 0; i < target_DMR->size_atom_pairs (); ++i) { - std::cout << "target_mat is nullptr" << std::endl; - continue; - } - #endif - target_DMR_mat_vec[iR] = target_mat->get_pointer(); - //cal tddft phase for hybrid gauge - const ModuleBase::Vector3 dtau = ucell.cal_dtau(iat1, iat2, R_index); - const double arg_td = At * dtau * ucell.lat0; - for(int ik = 0; ik < dm._nk; ++ik) - { - if(ik_in >= 0 && ik_in != ik) { continue; } - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(R_index[0], R_index[1], R_index[2]); - const double arg = (dm._kvec_d[ik] * dR) * ModuleBase::TWO_PI + arg_td; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - kphase_vec[ik][iR] = TK(cosp, sinp); - } - } - - std::vector DMK_mat_trans(mat_size); - std::vector tmp_DMR( (PARAM.inp.nspin==4) ? mat_size*R_size : 0); - for(int ik = 0; ik < dm._nk; ++ik) - { - if(ik_in >= 0 && ik_in != ik) { continue; } - // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational efficiency) - const TK*const DMK_mat_ptr - = dm._DMK[ik + ik_begin].data() - + col_ap * dm._paraV->nrow + row_ap; - for(int icol = 0; icol < col_size; ++icol) { - for(int irow = 0; irow < row_size; ++irow) { - DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; - }} - - // if nspin != 4, fill DMR - // if nspin == 4, fill tmp_DMR - for(int iR = 0; iR < R_size; ++iR) - { - // (kr+i*ki) * (Dr+i*Di) = (kr*Dr-ki*Di) + i*(kr*Di+ki*Dr) - const TK kphase = kphase_vec[ik][iR]; - if(PARAM.inp.nspin != 4) // only save real kr*Dr-ki*Di - { - func_exp_mul_dmk(kphase, DMK_mat_trans, target_DMR_mat_vec[iR]); - } else if(PARAM.inp.nspin == 4) - { - BlasConnector::axpy(mat_size, - kphase, - DMK_mat_trans.data(), - 1, - &tmp_DMR[iR * mat_size], - 1); - } - } - } - - // if nspin == 4 - // copy tmp_DMR to fill target_DMR - if(PARAM.inp.nspin == 4) - { - // step_trace ={0, 1, local_col, local_col+1} for NSPIN=4 - int step_trace[4]{}; - constexpr int npol = 2; - for (int is = 0; is < npol; is++) { - for (int is2 = 0; is2 < npol; is2++) { - step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; - }} - - TK tmp[4]{}; - for(int iR = 0; iR < R_size; ++iR) - { - const TK* tmp_DMR_mat = &tmp_DMR[iR * mat_size]; - TR_out* target_DMR_mat = target_DMR_mat_vec[iR]; - for (int irow = 0; irow < row_size; irow += 2) - { - for (int icol = 0; icol < col_size; icol += 2) + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair (i); + const int iat1 = target_ap.get_atom_i (); + const int iat2 = target_ap.get_atom_j (); + // get global indexes of whole matrix for each atom in this process + const int row_ap = dm._paraV->atom_begin_row[iat1]; + const int col_ap = dm._paraV->atom_begin_col[iat2]; + const int row_size = dm._paraV->get_row_size (iat1); + const int col_size = dm._paraV->get_col_size (iat2); + const int mat_size = row_size * col_size; + const int R_size = target_ap.get_R_size (); + assert (row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); + + // calculate kphase and target_mat_ptr + std::vector> kphase_vec (dm._nk, std::vector (R_size)); + std::vector target_DMR_mat_vec (R_size); + for (int iR = 0; iR < R_size; ++iR) { - // catch the 4 spin component value of one orbital pair - tmp[0] = tmp_DMR_mat[icol + step_trace[0]]; - tmp[1] = tmp_DMR_mat[icol + step_trace[1]]; - tmp[2] = tmp_DMR_mat[icol + step_trace[2]]; - tmp[3] = tmp_DMR_mat[icol + step_trace[3]]; - - // transfer to Pauli matrix, save them back to the target_DMR_mat - func_xyz_to_updown(tmp, icol, step_trace, target_DMR_mat); + const ModuleBase::Vector3 R_index = target_ap.get_R_index (iR); + hamilt::BaseMatrix* const target_mat = target_ap.find_matrix (R_index); +#ifdef __DEBUG + if (target_mat == nullptr) + { + std::cout << "target_mat is nullptr" << std::endl; + continue; + } +#endif + target_DMR_mat_vec[iR] = target_mat->get_pointer (); + // cal tddft phase for hybrid gauge + const ModuleBase::Vector3 dtau = ucell.cal_dtau (iat1, iat2, R_index); + const double arg_td = At * dtau * ucell.lat0; + for (int ik = 0; ik < dm._nk; ++ik) + { + if (ik_in >= 0 && ik_in != ik) + { + continue; + } + // cal k_phase + // if TK==std::complex, kphase is e^{ikR} + const ModuleBase::Vector3 dR (R_index[0], R_index[1], R_index[2]); + const double arg = (dm._kvec_d[ik] * dR) * ModuleBase::TWO_PI + arg_td; + double sinp, cosp; + ModuleBase::libm::sincos (arg, &sinp, &cosp); + kphase_vec[ik][iR] = TK (cosp, sinp); + } + } + + std::vector DMK_mat_trans (mat_size); + std::vector tmp_DMR ((PARAM.inp.nspin == 4) ? mat_size * R_size : 0); + for (int ik = 0; ik < dm._nk; ++ik) + { + if (ik_in >= 0 && ik_in != ik) + { + continue; + } + // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational + // efficiency) + const TK* const DMK_mat_ptr + = dm._DMK[ik + ik_begin].data () + col_ap * dm._paraV->nrow + row_ap; + for (int icol = 0; icol < col_size; ++icol) + { + for (int irow = 0; irow < row_size; ++irow) + { + DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; + } + } + + // if nspin != 4, fill DMR + // if nspin == 4, fill tmp_DMR + for (int iR = 0; iR < R_size; ++iR) + { + // (kr+i*ki) * (Dr+i*Di) = (kr*Dr-ki*Di) + i*(kr*Di+ki*Dr) + const TK kphase = kphase_vec[ik][iR]; + if (PARAM.inp.nspin != 4) // only save real kr*Dr-ki*Di + { + func_exp_mul_dmk (kphase, DMK_mat_trans, target_DMR_mat_vec[iR]); + } + else if (PARAM.inp.nspin == 4) + { + BlasConnector::axpy (mat_size, + kphase, + DMK_mat_trans.data (), + 1, + &tmp_DMR[iR * mat_size], + 1); + } + } + } + + // if nspin == 4 + // copy tmp_DMR to fill target_DMR + if (PARAM.inp.nspin == 4) + { + // step_trace ={0, 1, local_col, local_col+1} for NSPIN=4 + int step_trace[4]{}; + constexpr int npol = 2; + for (int is = 0; is < npol; is++) + { + for (int is2 = 0; is2 < npol; is2++) + { + step_trace[is * npol + is2] = target_ap.get_col_size () * is + is2; + } + } + + TK tmp[4]{}; + for (int iR = 0; iR < R_size; ++iR) + { + const TK* tmp_DMR_mat = &tmp_DMR[iR * mat_size]; + TR_out* target_DMR_mat = target_DMR_mat_vec[iR]; + for (int irow = 0; irow < row_size; irow += 2) + { + for (int icol = 0; icol < col_size; icol += 2) + { + // catch the 4 spin component value of one orbital pair + tmp[0] = tmp_DMR_mat[icol + step_trace[0]]; + tmp[1] = tmp_DMR_mat[icol + step_trace[1]]; + tmp[2] = tmp_DMR_mat[icol + step_trace[2]]; + tmp[3] = tmp_DMR_mat[icol + step_trace[3]]; + + // transfer to Pauli matrix, save them back to the target_DMR_mat + func_xyz_to_updown (tmp, icol, step_trace, target_DMR_mat); + } + tmp_DMR_mat += col_size * 2; + target_DMR_mat += col_size * 2; + } + } } - tmp_DMR_mat += col_size * 2; - target_DMR_mat += col_size * 2; - } } - } } - } - ModuleBase::timer::end("DensityMatrix", "cal_DMR_td"); + ModuleBase::timer::end ("DensityMatrix", "cal_DMR_td"); } template <> -void DensityMatrix::cal_DMR_td(const UnitCell& ucell, const ModuleBase::Vector3 At, const int ik_in) +void + DensityMatrix::cal_DMR_td (const UnitCell& ucell, + const ModuleBase::Vector3 At, + const int ik_in) { return; } template <> -void DensityMatrix, double>::cal_DMR_td(const UnitCell& ucell, const ModuleBase::Vector3 At, const int ik_in) +void + DensityMatrix, double>::cal_DMR_td (const UnitCell& ucell, + const ModuleBase::Vector3 At, + const int ik_in) { - DensityMatrix_Tools::cal_DMR_td(*this, this->_DMR, ucell, At, ik_in); + DensityMatrix_Tools::cal_DMR_td (*this, this->_DMR, ucell, At, ik_in); } template <> -void DensityMatrix, std::complex>::cal_DMR_td(const UnitCell& ucell, const ModuleBase::Vector3 At, const int ik_in) +void + DensityMatrix, std::complex>::cal_DMR_td (const UnitCell& ucell, + const ModuleBase::Vector3 At, + const int ik_in) { - DensityMatrix_Tools::cal_DMR_td(*this, this->_DMR, ucell, At, ik_in); + DensityMatrix_Tools::cal_DMR_td (*this, this->_DMR, ucell, At, ik_in); } - - // calculate DMR from DMK using blas for multi-k calculation template -void DensityMatrix_Tools::cal_DMR_full( - const DensityMatrix &dm, - hamilt::HContainer* dmR_out, - const int ik_in) +void + DensityMatrix_Tools::cal_DMR_full (const DensityMatrix& dm, + hamilt::HContainer* dmR_out, + const int ik_in) { - ModuleBase::TITLE("DensityMatrix", "cal_DMR_full"); + ModuleBase::TITLE ("DensityMatrix", "cal_DMR_full"); - ModuleBase::timer::start("DensityMatrix", "cal_DMR_full"); + ModuleBase::timer::start ("DensityMatrix", "cal_DMR_full"); const int ld_hk = dm._paraV->nrow; hamilt::HContainer* target_DMR = dmR_out; // set zero since this function is called in every scf step - target_DMR->set_zero(); - #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic) - #endif - for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) - { - hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); - const int iat1 = target_ap.get_atom_i(); - const int iat2 = target_ap.get_atom_j(); - // get global indexes of whole matrix for each atom in this process - const int row_ap = dm._paraV->atom_begin_row[iat1]; - const int col_ap = dm._paraV->atom_begin_col[iat2]; - const int row_size = dm._paraV->get_row_size(iat1); - const int col_size = dm._paraV->get_col_size(iat2); - const int mat_size = row_size * col_size; - const int R_size = target_ap.get_R_size(); - assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); - - // calculate kphase and target_mat_ptr - std::vector> kphase_vec(dm._nk, std::vector(R_size)); - std::vector target_DMR_mat_vec(R_size); - for(int iR = 0; iR < R_size; ++iR) + target_DMR->set_zero (); +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 0; i < target_DMR->size_atom_pairs (); ++i) { - const ModuleBase::Vector3 R_index = target_ap.get_R_index(iR); - hamilt::BaseMatrix*const target_mat = target_ap.find_matrix(R_index); - #ifdef __DEBUG - if (target_mat == nullptr) - { - std::cout << "target_mat is nullptr" << std::endl; - continue; - } - #endif - target_DMR_mat_vec[iR] = target_mat->get_pointer(); - for(int ik = 0; ik < dm._nk; ++ik) - { - if(ik_in >= 0 && ik_in != ik) { continue; } - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(R_index[0], R_index[1], R_index[2]); - const double arg = (dm._kvec_d[ik] * dR) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - kphase_vec[ik][iR] = TK(cosp, sinp); - } - } + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair (i); + const int iat1 = target_ap.get_atom_i (); + const int iat2 = target_ap.get_atom_j (); + // get global indexes of whole matrix for each atom in this process + const int row_ap = dm._paraV->atom_begin_row[iat1]; + const int col_ap = dm._paraV->atom_begin_col[iat2]; + const int row_size = dm._paraV->get_row_size (iat1); + const int col_size = dm._paraV->get_col_size (iat2); + const int mat_size = row_size * col_size; + const int R_size = target_ap.get_R_size (); + assert (row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); - std::vector DMK_mat_trans(mat_size); - for(int ik = 0; ik < dm._nk; ++ik) - { - if(ik_in >= 0 && ik_in != ik) { continue; } - // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational efficiency) - const TK*const DMK_mat_ptr - = dm._DMK[ik].data() - + col_ap * dm._paraV->nrow + row_ap; - for(int icol = 0; icol < col_size; ++icol) { - for(int irow = 0; irow < row_size; ++irow) { - DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; - }} - - for(int iR = 0; iR < R_size; ++iR) - { - const TK kphase = kphase_vec[ik][iR]; - BlasConnector::axpy(mat_size, - kphase, - DMK_mat_trans.data(), - 1, - target_DMR_mat_vec[iR], - 1); - } + // calculate kphase and target_mat_ptr + std::vector> kphase_vec (dm._nk, std::vector (R_size)); + std::vector target_DMR_mat_vec (R_size); + for (int iR = 0; iR < R_size; ++iR) + { + const ModuleBase::Vector3 R_index = target_ap.get_R_index (iR); + hamilt::BaseMatrix* const target_mat = target_ap.find_matrix (R_index); +#ifdef __DEBUG + if (target_mat == nullptr) + { + std::cout << "target_mat is nullptr" << std::endl; + continue; + } +#endif + target_DMR_mat_vec[iR] = target_mat->get_pointer (); + for (int ik = 0; ik < dm._nk; ++ik) + { + if (ik_in >= 0 && ik_in != ik) + { + continue; + } + // cal k_phase + // if TK==std::complex, kphase is e^{ikR} + const ModuleBase::Vector3 dR (R_index[0], R_index[1], R_index[2]); + const double arg = (dm._kvec_d[ik] * dR) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos (arg, &sinp, &cosp); + kphase_vec[ik][iR] = TK (cosp, sinp); + } + } + + std::vector DMK_mat_trans (mat_size); + for (int ik = 0; ik < dm._nk; ++ik) + { + if (ik_in >= 0 && ik_in != ik) + { + continue; + } + // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational efficiency) + const TK* const DMK_mat_ptr = dm._DMK[ik].data () + col_ap * dm._paraV->nrow + row_ap; + for (int icol = 0; icol < col_size; ++icol) + { + for (int irow = 0; irow < row_size; ++irow) + { + DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; + } + } + + for (int iR = 0; iR < R_size; ++iR) + { + const TK kphase = kphase_vec[ik][iR]; + BlasConnector::axpy (mat_size, kphase, DMK_mat_trans.data (), 1, target_DMR_mat_vec[iR], 1); + } + } } - } - ModuleBase::timer::end("DensityMatrix", "cal_DMR_full"); + ModuleBase::timer::end ("DensityMatrix", "cal_DMR_full"); } template <> -void DensityMatrix::cal_DMR_full( - hamilt::HContainer>* dmR_out, - const int ik_in) const{} +void + DensityMatrix::cal_DMR_full (hamilt::HContainer>* dmR_out, + const int ik_in) const +{ +} template <> -void DensityMatrix, double>::cal_DMR_full( - hamilt::HContainer>* dmR_out, - const int ik_in) const +void + DensityMatrix, double>::cal_DMR_full (hamilt::HContainer>* dmR_out, + const int ik_in) const { - DensityMatrix_Tools::cal_DMR_full(*this, dmR_out, ik_in); + DensityMatrix_Tools::cal_DMR_full (*this, dmR_out, ik_in); } - - // calculate DMR from DMK using blas for gamma-only calculation template <> -void DensityMatrix::cal_DMR(const int ik_in) +void + DensityMatrix::cal_DMR (const int ik_in) { - ModuleBase::TITLE("DensityMatrix", "cal_DMR"); + ModuleBase::TITLE ("DensityMatrix", "cal_DMR"); using TK = double; using TR = double; - assert(ik_in == -1 || ik_in == 0); - assert(this->_nk == 1); + assert (ik_in == -1 || ik_in == 0); + assert (this->_nk == 1); // To check whether DMR has been initialized - assert(this->_DMR.size()==this->_nspin && "DMR has not been initialized!"); + assert (this->_DMR.size () == this->_nspin && "DMR has not been initialized!"); - ModuleBase::timer::start("DensityMatrix", "cal_DMR"); + ModuleBase::timer::start ("DensityMatrix", "cal_DMR"); const int ld_hk = this->_paraV->nrow; for (int is = 1; is <= this->_nspin; ++is) - { - const int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer*const target_DMR = this->_DMR[is - 1]; - // set zero since this function is called in every scf step - target_DMR->set_zero(); - #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic) - #endif - for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); - const int iat1 = target_ap.get_atom_i(); - const int iat2 = target_ap.get_atom_j(); - // get global indexes of whole matrix for each atom in this process - const int row_ap = this->_paraV->atom_begin_row[iat1]; - const int col_ap = this->_paraV->atom_begin_col[iat2]; - const int row_size = this->_paraV->get_row_size(iat1); - const int col_size = this->_paraV->get_col_size(iat2); - const int R_size = target_ap.get_R_size(); - assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); - assert(R_size == 1); - const ModuleBase::Vector3 R_index = target_ap.get_R_index(0); - assert(R_index.x == 0 && R_index.y == 0 && R_index.z == 0); - hamilt::BaseMatrix*const target_mat = target_ap.find_matrix(R_index); - #ifdef __DEBUG - if (target_mat == nullptr) - { - std::cout << "target_mat is nullptr" << std::endl; - continue; - } - #endif - // k index - constexpr TK kphase = 1; - // transpose DMK col=>row - const TK* DMK_mat_ptr - = this->_DMK[0 + ik_begin].data() - + col_ap * this->_paraV->nrow + row_ap; - // set DMR element - TR* target_DMR_ptr = target_mat->get_pointer(); - for (int mu = 0; mu < row_size; ++mu) - { - BlasConnector::axpy(col_size, - kphase, - DMK_mat_ptr, - ld_hk, - target_DMR_ptr, - 1); - DMK_mat_ptr += 1; - target_DMR_ptr += col_size; - } + const int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 + hamilt::HContainer* const target_DMR = this->_DMR[is - 1]; + // set zero since this function is called in every scf step + target_DMR->set_zero (); +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 0; i < target_DMR->size_atom_pairs (); ++i) + { + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair (i); + const int iat1 = target_ap.get_atom_i (); + const int iat2 = target_ap.get_atom_j (); + // get global indexes of whole matrix for each atom in this process + const int row_ap = this->_paraV->atom_begin_row[iat1]; + const int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_size = this->_paraV->get_row_size (iat1); + const int col_size = this->_paraV->get_col_size (iat2); + const int R_size = target_ap.get_R_size (); + assert (row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); + assert (R_size == 1); + const ModuleBase::Vector3 R_index = target_ap.get_R_index (0); + assert (R_index.x == 0 && R_index.y == 0 && R_index.z == 0); + hamilt::BaseMatrix* const target_mat = target_ap.find_matrix (R_index); +#ifdef __DEBUG + if (target_mat == nullptr) + { + std::cout << "target_mat is nullptr" << std::endl; + continue; + } +#endif + // k index + constexpr TK kphase = 1; + // transpose DMK col=>row + const TK* DMK_mat_ptr = this->_DMK[0 + ik_begin].data () + col_ap * this->_paraV->nrow + row_ap; + // set DMR element + TR* target_DMR_ptr = target_mat->get_pointer (); + for (int mu = 0; mu < row_size; ++mu) + { + BlasConnector::axpy (col_size, kphase, DMK_mat_ptr, ld_hk, target_DMR_ptr, 1); + DMK_mat_ptr += 1; + target_DMR_ptr += col_size; + } + } } - } - ModuleBase::timer::end("DensityMatrix", "cal_DMR"); + ModuleBase::timer::end ("DensityMatrix", "cal_DMR"); } - - // switch_dmr template -void DensityMatrix::switch_dmr(const int mode) +void + DensityMatrix::switch_dmr (const int mode) { - ModuleBase::TITLE("DensityMatrix", "switch_dmr"); + ModuleBase::TITLE ("DensityMatrix", "switch_dmr"); if (this->_nspin != 2) - { - return; - } + { + return; + } else - { - ModuleBase::timer::start("DensityMatrix", "switch_dmr"); - switch(mode) { - case 0: - // switch to original density matrix - if (this->dmr_tmp_ != nullptr && this->dmr_origin_.size() != 0) - { - this->_DMR[0]->allocate(this->dmr_origin_.data(), false); - delete[] this->dmr_tmp_; - this->dmr_tmp_ = nullptr; - } - // else: do nothing - break; - case 1: - // switch to total magnetization density matrix, dmr_up + dmr_down - if(this->dmr_tmp_ == nullptr) - { - const size_t size = this->_DMR[0]->get_nnr(); - this->dmr_tmp_ = new TR[size]; - this->dmr_origin_.resize(size); - for (int i = 0; i < size; ++i) - { - this->dmr_origin_[i] = this->_DMR[0]->get_wrapper()[i]; - this->dmr_tmp_[i] = this->dmr_origin_[i] + this->_DMR[1]->get_wrapper()[i]; - } - this->_DMR[0]->allocate(this->dmr_tmp_, false); - } - else - { - const size_t size = this->_DMR[0]->get_nnr(); - for (int i = 0; i < size; ++i) + ModuleBase::timer::start ("DensityMatrix", "switch_dmr"); + switch (mode) { - this->dmr_tmp_[i] = this->dmr_origin_[i] + this->_DMR[1]->get_wrapper()[i]; - } - } - break; - case 2: - // switch to magnetization density matrix, dmr_up - dmr_down - if(this->dmr_tmp_ == nullptr) - { - const size_t size = this->_DMR[0]->get_nnr(); - this->dmr_tmp_ = new TR[size]; - this->dmr_origin_.resize(size); - for (int i = 0; i < size; ++i) - { - this->dmr_origin_[i] = this->_DMR[0]->get_wrapper()[i]; - this->dmr_tmp_[i] = this->dmr_origin_[i] - this->_DMR[1]->get_wrapper()[i]; - } - this->_DMR[0]->allocate(this->dmr_tmp_, false); - } - else - { - const size_t size = this->_DMR[0]->get_nnr(); - for (int i = 0; i < size; ++i) - { - this->dmr_tmp_[i] = this->dmr_origin_[i] - this->_DMR[1]->get_wrapper()[i]; + case 0: + // switch to original density matrix + if (this->dmr_tmp_ != nullptr && this->dmr_origin_.size () != 0) + { + this->_DMR[0]->allocate (this->dmr_origin_.data (), false); + delete[] this->dmr_tmp_; + this->dmr_tmp_ = nullptr; + } + // else: do nothing + break; + case 1: + // switch to total magnetization density matrix, dmr_up + dmr_down + if (this->dmr_tmp_ == nullptr) + { + const size_t size = this->_DMR[0]->get_nnr (); + this->dmr_tmp_ = new TR[size]; + this->dmr_origin_.resize (size); + for (int i = 0; i < size; ++i) + { + this->dmr_origin_[i] = this->_DMR[0]->get_wrapper ()[i]; + this->dmr_tmp_[i] = this->dmr_origin_[i] + this->_DMR[1]->get_wrapper ()[i]; + } + this->_DMR[0]->allocate (this->dmr_tmp_, false); + } + else + { + const size_t size = this->_DMR[0]->get_nnr (); + for (int i = 0; i < size; ++i) + { + this->dmr_tmp_[i] = this->dmr_origin_[i] + this->_DMR[1]->get_wrapper ()[i]; + } + } + break; + case 2: + // switch to magnetization density matrix, dmr_up - dmr_down + if (this->dmr_tmp_ == nullptr) + { + const size_t size = this->_DMR[0]->get_nnr (); + this->dmr_tmp_ = new TR[size]; + this->dmr_origin_.resize (size); + for (int i = 0; i < size; ++i) + { + this->dmr_origin_[i] = this->_DMR[0]->get_wrapper ()[i]; + this->dmr_tmp_[i] = this->dmr_origin_[i] - this->_DMR[1]->get_wrapper ()[i]; + } + this->_DMR[0]->allocate (this->dmr_tmp_, false); + } + else + { + const size_t size = this->_DMR[0]->get_nnr (); + for (int i = 0; i < size; ++i) + { + this->dmr_tmp_[i] = this->dmr_origin_[i] - this->_DMR[1]->get_wrapper ()[i]; + } + } + break; + default: + ModuleBase::WARNING_QUIT ("density_matrix.cpp", "Unknown mode in switch_dmr"); } - } - break; - default: - ModuleBase::WARNING_QUIT("density_matrix.cpp", "Unknown mode in switch_dmr"); + ModuleBase::timer::end ("DensityMatrix", "switch_dmr"); } - ModuleBase::timer::end("DensityMatrix", "switch_dmr"); - } } - - template <> -void DensityMatrix_Tools::func_exp_mul_dmk(const std::complex kphase, const std::vector> &DMK_mat_trans, double* target_DMR_mat) +void + DensityMatrix_Tools::func_exp_mul_dmk (const std::complex kphase, + const std::vector>& DMK_mat_trans, + double* target_DMR_mat) { - const std::size_t mat_size = DMK_mat_trans.size(); - for(std::size_t i = 0; i < mat_size; i++) - { - target_DMR_mat[i] - += kphase.real() * DMK_mat_trans[i].real() - - kphase.imag() * DMK_mat_trans[i].imag(); - } + const std::size_t mat_size = DMK_mat_trans.size (); + for (std::size_t i = 0; i < mat_size; i++) + { + target_DMR_mat[i] += kphase.real () * DMK_mat_trans[i].real () - kphase.imag () * DMK_mat_trans[i].imag (); + } } template <> -void DensityMatrix_Tools::func_exp_mul_dmk>(const std::complex kphase, const std::vector> &DMK_mat_trans, std::complex* target_DMR_mat) +void + DensityMatrix_Tools::func_exp_mul_dmk> (const std::complex kphase, + const std::vector>& DMK_mat_trans, + std::complex* target_DMR_mat) { - BlasConnector::axpy(DMK_mat_trans.size(), - kphase, - DMK_mat_trans.data(), - 1, - target_DMR_mat, - 1); + BlasConnector::axpy (DMK_mat_trans.size (), kphase, DMK_mat_trans.data (), 1, target_DMR_mat, 1); } template <> -void DensityMatrix_Tools::func_xyz_to_updown(const std::complex tmp[4], const int icol, const int step_trace[4], double* target_DMR_mat) +void + DensityMatrix_Tools::func_xyz_to_updown (const std::complex tmp[4], + const int icol, + const int step_trace[4], + double* target_DMR_mat) { - target_DMR_mat[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); // rho_0 = (rho_upup + rho_downdown).real() - target_DMR_mat[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); // rho_x = (rho_updown + rho_downup).real() - target_DMR_mat[icol + step_trace[2]] = -tmp[1].imag() + tmp[2].imag(); // rho_y = (i * (rho_updown - rho_downup)).real() - target_DMR_mat[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); // rho_z = (rho_upup - rho_downdown).real() + target_DMR_mat[icol + step_trace[0]] = tmp[0].real () + tmp[3].real (); // rho_0 = (rho_upup + rho_downdown).real() + target_DMR_mat[icol + step_trace[1]] = tmp[1].real () + tmp[2].real (); // rho_x = (rho_updown + rho_downup).real() + target_DMR_mat[icol + step_trace[2]] + = -tmp[1].imag () + tmp[2].imag (); // rho_y = (i * (rho_updown - rho_downup)).real() + target_DMR_mat[icol + step_trace[3]] = tmp[0].real () - tmp[3].real (); // rho_z = (rho_upup - rho_downdown).real() } template <> -void DensityMatrix_Tools::func_xyz_to_updown>(const std::complex tmp[4], const int icol, const int step_trace[4], std::complex* target_DMR_mat) +void + DensityMatrix_Tools::func_xyz_to_updown> (const std::complex tmp[4], + const int icol, + const int step_trace[4], + std::complex* target_DMR_mat) { - target_DMR_mat[icol + step_trace[0]] = tmp[0] + tmp[3]; // rho_0 = (rho_upup + rho_downdown) - target_DMR_mat[icol + step_trace[1]] = tmp[1] + tmp[2]; // rho_x = (rho_updown + rho_downup) - target_DMR_mat[icol + step_trace[2]] = ModuleBase::IMAG_UNIT * (tmp[1].imag() - tmp[2].imag()); // rho_y = (i * (rho_updown - rho_downup)) - target_DMR_mat[icol + step_trace[3]] = tmp[0] - tmp[3]; // rho_z = (rho_upup - rho_downdown) + target_DMR_mat[icol + step_trace[0]] = tmp[0] + tmp[3]; // rho_0 = (rho_upup + rho_downdown) + target_DMR_mat[icol + step_trace[1]] = tmp[1] + tmp[2]; // rho_x = (rho_updown + rho_downup) + target_DMR_mat[icol + step_trace[2]] + = ModuleBase::IMAG_UNIT * (tmp[1].imag () - tmp[2].imag ()); // rho_y = (i * (rho_updown - rho_downup)) + target_DMR_mat[icol + step_trace[3]] = tmp[0] - tmp[3]; // rho_z = (rho_upup - rho_downdown) } - - // T of HContainer can be double or complex -template class DensityMatrix; // Gamma-Only case -template class DensityMatrix, double>; // Multi-k case +template class DensityMatrix; // Gamma-Only case +template class DensityMatrix, double>; // Multi-k case template class DensityMatrix, std::complex>; // For EXX in future } // namespace elecstate diff --git a/source/source_estate/module_dm/density_matrix.h b/source/source_estate/module_dm/density_matrix.h index a8b0e1c4ecb..8aba3b15a0e 100644 --- a/source/source_estate/module_dm/density_matrix.h +++ b/source/source_estate/module_dm/density_matrix.h @@ -14,67 +14,64 @@ namespace elecstate * = for Gamma-only calculation * = ,double> for multi-k calculation */ -template struct ShiftRealComplex +template +struct ShiftRealComplex { using type = void; }; -template<> -struct ShiftRealComplex +template <> +struct ShiftRealComplex { - using type = std::complex; + using type = std::complex; }; -template<> -struct ShiftRealComplex> +template <> +struct ShiftRealComplex> { - using type = double; + using type = double; }; - - template class DensityMatrix; +template +class DensityMatrix; // DensityMatrix,TR>::cal_DMR() is illegal in C++, so DensityMatrix_Tools is used instead. namespace DensityMatrix_Tools { - template - extern void cal_DMR( - const DensityMatrix &dm, - std::vector*> &dmR_out, - const int ik_in); - - template - extern void cal_DMR_td( - const DensityMatrix &dm, - std::vector*> &dmR_out, - const UnitCell& ucell, - const ModuleBase::Vector3 At, - const int ik_in); - - template - extern void cal_DMR_full( - const DensityMatrix &dm, - hamilt::HContainer* dmR_out, - const int ik_in); - - template - extern void func_exp_mul_dmk(const std::complex kphase, const std::vector> &DMK_mat_trans, TR* target_DMR_mat); - - template - extern void func_xyz_to_updown(const std::complex tmp[4], const int icol, const int step_trace[4], TR* target_DMR_mat); -} - +template +extern void + cal_DMR (const DensityMatrix& dm, std::vector*>& dmR_out, const int ik_in); + +template +extern void cal_DMR_td (const DensityMatrix& dm, + std::vector*>& dmR_out, + const UnitCell& ucell, + const ModuleBase::Vector3 At, + const int ik_in); + +template +extern void cal_DMR_full (const DensityMatrix& dm, hamilt::HContainer* dmR_out, const int ik_in); + +template +extern void func_exp_mul_dmk (const std::complex kphase, + const std::vector>& DMK_mat_trans, + TR* target_DMR_mat); + +template +extern void + func_xyz_to_updown (const std::complex tmp[4], const int icol, const int step_trace[4], TR* target_DMR_mat); +} // namespace DensityMatrix_Tools template class DensityMatrix { - using TRShift = typename ShiftRealComplex::type; + using TRShift = typename ShiftRealComplex::type; - public: - /** - * @brief Destructor of class DensityMatrix - */ - ~DensityMatrix(); + public: + /** + * @brief Destructor of class DensityMatrix + */ + ~DensityMatrix (); /** * @brief Constructor of class DensityMatrix for multi-k calculation @@ -85,10 +82,10 @@ class DensityMatrix * @param nk number of k-points, not always equal to K_Vectors::get_nks()/nspin_dm. * it will be set to kvec_d.size() if the value is invalid */ - DensityMatrix(const Parallel_Orbitals* _paraV, - const int nspin, - const std::vector>& kvec_d, - const int nk); + DensityMatrix (const Parallel_Orbitals* _paraV, + const int nspin, + const std::vector>& kvec_d, + const int nk); /** * @brief Constructor of class DensityMatrix for gamma-only calculation, where kvector is not required @@ -96,36 +93,36 @@ class DensityMatrix * @param nspin number of spin of the density matrix, set by user according to global nspin * (usually {nspin_global -> nspin_dm} = {1->1, 2->2, 4->1}, but sometimes 2->1 like in LR-TDDFT) */ - DensityMatrix(const Parallel_Orbitals* _paraV, const int nspin); + DensityMatrix (const Parallel_Orbitals* _paraV, const int nspin); /** * @brief initialize density matrix DMR from UnitCell * @param GridD_in pointer of Grid_Driver object (used to find ajacent atoms) * @param ucell pointer of UnitCell object */ - void init_DMR(const Grid_Driver* GridD_in, const UnitCell* ucell); + void init_DMR (const Grid_Driver* GridD_in, const UnitCell* ucell); /** * @brief initialize density matrix DMR from UnitCell and RA * @param ra pointer of Record_adj object (used to find ajacent atoms) * @param ucell pointer of UnitCell object */ - void init_DMR(Record_adj& ra, const UnitCell* ucell); + void init_DMR (Record_adj& ra, const UnitCell* ucell); /** * @brief initialize density matrix DMR from another HContainer * now only support HContainer * @param _DMR_in pointer of another HContainer object */ - void init_DMR(const hamilt::HContainer& _DMR_in); + void init_DMR (const hamilt::HContainer& _DMR_in); /// @brief initialize density matrix DMR from another HContainer - /// this is a temprory function for NSPIN=4 case + /// this is a temprory function for NSPIN=4 case /// since copy HContainer from another HContainer with different TR is not supported yet /// would be refactor in the future - /// @param _DMR_in + /// @param _DMR_in // the old input type ``:HContainer` causes redefination error if TR = complex - void init_DMR(const hamilt::HContainer& _DMR_in); + void init_DMR (const hamilt::HContainer& _DMR_in); /** * @brief set _DMK element directly @@ -135,13 +132,13 @@ class DensityMatrix * @param j column index * @param value value to be set */ - void set_DMK(const int ispin, const int ik, const int i, const int j, const TK value); + void set_DMK (const int ispin, const int ik, const int i, const int j, const TK value); /** * @brief set _DMK element to zero - */ - void set_DMK_zero(); - + */ + void set_DMK_zero (); + /** * @brief get a matrix element of density matrix dm(k) * @param ispin spin index (1 - spin up (support SOC) or 2 - spin down) @@ -150,66 +147,98 @@ class DensityMatrix * @param j column index * @return T a matrix element of density matrix dm(k) */ - TK get_DMK(const int ispin, const int ik, const int i, const int j) const; + TK get_DMK (const int ispin, const int ik, const int i, const int j) const; /** * @brief get total number of k-points of density matrix dm(k) */ - int get_DMK_nks() const; - int get_DMK_size() const; + int get_DMK_nks () const; + int get_DMK_size () const; /** * @brief get number of rows of density matrix dm(k) */ - int get_DMK_nrow() const; + int get_DMK_nrow () const; /** * @brief get number of columns of density matrix dm(k) */ - int get_DMK_ncol() const; + int get_DMK_ncol () const; /** * @brief get pointer of DMR * @param ispin spin index (1 - spin up (support SOC) or 2 - spin down) * @return HContainer* pointer of DMR */ - hamilt::HContainer* get_DMR_pointer(const int ispin) const; + hamilt::HContainer* get_DMR_pointer (const int ispin) const; /** * @brief get pointer vector of DMR * @return HContainer* vector of DMR */ - const std::vector*>& get_DMR_vector() const {return this->_DMR;} - std::vector*>& get_DMR_vector() {return this->_DMR;} - - const std::vector>& get_DMR_save() const {return this->_DMR_save;} - std::vector>& get_DMR_save() {return this->_DMR_save;} + const std::vector*>& + get_DMR_vector () const + { + return this->_DMR; + } + std::vector*>& + get_DMR_vector () + { + return this->_DMR; + } + + const std::vector>& + get_DMR_save () const + { + return this->_DMR_save; + } + std::vector>& + get_DMR_save () + { + return this->_DMR_save; + } /** * @brief get pointer of DMK * @param ik k-point index, which is the index of _DMK * @return TK* pointer of DMK */ - TK* get_DMK_pointer(const int ik) const; + TK* get_DMK_pointer (const int ik) const; /** * @brief get pointer vector of DMK - */ - const std::vector>& get_DMK_vector() const {return this->_DMK;} - std::vector>& get_DMK_vector() {return this->_DMK;} + */ + const std::vector>& + get_DMK_vector () const + { + return this->_DMK; + } + std::vector>& + get_DMK_vector () + { + return this->_DMK; + } /** * @brief set _DMK using a input TK* pointer * please make sure the size of TK* is correct - */ - void set_DMK_pointer(const int ik, TK* DMK_in); + */ + void set_DMK_pointer (const int ik, TK* DMK_in); /** * @brief get pointer of paraV */ - const Parallel_Orbitals* get_paraV_pointer() const {return this->_paraV;} + const Parallel_Orbitals* + get_paraV_pointer () const + { + return this->_paraV; + } - const std::vector>& get_kvec_d() const { return this->_kvec_d; } + const std::vector>& + get_kvec_d () const + { + return this->_kvec_d; + } /** * @brief calculate density matrix DMR from dm(k) using blas::axpy @@ -217,7 +246,7 @@ class DensityMatrix * if ik_in < 0, calculate all k-points * if ik_in >= 0, calculate only one k-point without summing over k-points */ - void cal_DMR(const int ik_in = -1); + void cal_DMR (const int ik_in = -1); /** * @brief calculate density matrix DMR with additional vector potential phase, used for hybrid gauge tddft @@ -225,7 +254,7 @@ class DensityMatrix * if ik_in < 0, calculate all k-points * if ik_in >= 0, calculate only one k-point without summing over k-points */ - void cal_DMR_td(const UnitCell& ucell, const ModuleBase::Vector3 At, const int ik_in = -1); + void cal_DMR_td (const UnitCell& ucell, const ModuleBase::Vector3 At, const int ik_in = -1); /** * @brief calculate complex density matrix DMR with both real and imaginary part for noncollinear-spin calculation @@ -235,13 +264,13 @@ class DensityMatrix * if ik_in < 0, calculate all k-points * if ik_in >= 0, calculate only one k-point without summing over k-points */ - void cal_DMR_full(hamilt::HContainer>* dmR_out, const int ik_in = -1) const; + void cal_DMR_full (hamilt::HContainer>* dmR_out, const int ik_in = -1) const; /** * @brief (Only nspin=2) switch DMR to total density matrix or magnetization density matrix * @param mode 0 - original density matrix; 1 - total density matrix; 2 - magnetization density matrix */ - void switch_dmr(const int mode); + void switch_dmr (const int mode); /** * @brief write density matrix dm(ik) into *.dmk @@ -249,7 +278,7 @@ class DensityMatrix * @param ispin spin index (1 - spin up (support SOC) or 2 - spin down) * @param ik k-point index */ - void write_DMK(const std::string directory, const int ispin, const int ik); + void write_DMK (const std::string directory, const int ispin, const int ik); /** * @brief read *.dmk into density matrix dm(ik) @@ -257,13 +286,13 @@ class DensityMatrix * @param ispin spin index (1 - spin up (support SOC) or 2 - spin down) * @param ik k-point index */ - void read_DMK(const std::string directory, const int ispin, const int ik); + void read_DMK (const std::string directory, const int ispin, const int ik); /** * @brief save _DMR into _DMR_save */ - void save_DMR(); - + void save_DMR (); + std::vector EDMK; // for TD-DFT #ifdef __PEXSI @@ -326,9 +355,17 @@ class DensityMatrix std::vector dmr_origin_; TR* dmr_tmp_ = nullptr; - friend void DensityMatrix_Tools::cal_DMR(const DensityMatrix &dm, std::vector*> &dmR_out, const int ik_in); - friend void DensityMatrix_Tools::cal_DMR_td(const DensityMatrix &dm, std::vector*> &dmR_out, const UnitCell& ucell, const ModuleBase::Vector3 At, const int ik_in); - friend void DensityMatrix_Tools::cal_DMR_full(const DensityMatrix &dm, hamilt::HContainer>* dmR_out, const int ik_in); + friend void DensityMatrix_Tools::cal_DMR (const DensityMatrix& dm, + std::vector*>& dmR_out, + const int ik_in); + friend void DensityMatrix_Tools::cal_DMR_td (const DensityMatrix& dm, + std::vector*>& dmR_out, + const UnitCell& ucell, + const ModuleBase::Vector3 At, + const int ik_in); + friend void DensityMatrix_Tools::cal_DMR_full (const DensityMatrix& dm, + hamilt::HContainer>* dmR_out, + const int ik_in); }; } // namespace elecstate diff --git a/source/source_estate/module_dm/density_matrix_io.cpp b/source/source_estate/module_dm/density_matrix_io.cpp index 7ee383dfb81..488e47f9e03 100644 --- a/source/source_estate/module_dm/density_matrix_io.cpp +++ b/source/source_estate/module_dm/density_matrix_io.cpp @@ -12,212 +12,220 @@ namespace elecstate // initialize density matrix DMR from UnitCell (mainly used in UnitTest) template -void DensityMatrix::init_DMR(const Grid_Driver* GridD_in, const UnitCell* ucell) +void + DensityMatrix::init_DMR (const Grid_Driver* GridD_in, const UnitCell* ucell) { - ModuleBase::TITLE("DensityMatrix", "init_DMR"); + ModuleBase::TITLE ("DensityMatrix", "init_DMR"); // ensure _DMR is empty for (auto& it: this->_DMR) - { - delete it; - } - this->_DMR.clear(); + { + delete it; + } + this->_DMR.clear (); // construct a new DMR hamilt::HContainer* tmp_DMR; - tmp_DMR = new hamilt::HContainer(this->_paraV); + tmp_DMR = new hamilt::HContainer (this->_paraV); // set up a HContainer for (int iat1 = 0; iat1 < ucell->nat; iat1++) - { - auto tau1 = ucell->get_tau(iat1); - int T1, I1; - ucell->iat2iait(iat1, &I1, &T1); - AdjacentAtomInfo adjs; - GridD_in->Find_atom(*ucell, tau1, T1, I1, &adjs); - // std::cout << "adjs.adj_num: " <itia2iat(T2, I2); - if (this->_paraV->get_row_size(iat1) <= 0 || this->_paraV->get_col_size(iat2) <= 0) - { - continue; - } - ModuleBase::Vector3& R_index = adjs.box[ad]; - // std::cout << "R_index: " << R_index.x << " " << R_index.y << " " << R_index.z << std::endl; - hamilt::AtomPair tmp_ap(iat1, iat2, R_index, this->_paraV); - tmp_DMR->insert_pair(tmp_ap); + auto tau1 = ucell->get_tau (iat1); + int T1, I1; + ucell->iat2iait (iat1, &I1, &T1); + AdjacentAtomInfo adjs; + GridD_in->Find_atom (*ucell, tau1, T1, I1, &adjs); + // std::cout << "adjs.adj_num: " <itia2iat (T2, I2); + if (this->_paraV->get_row_size (iat1) <= 0 || this->_paraV->get_col_size (iat2) <= 0) + { + continue; + } + ModuleBase::Vector3& R_index = adjs.box[ad]; + // std::cout << "R_index: " << R_index.x << " " << R_index.y << " " << R_index.z << std::endl; + hamilt::AtomPair tmp_ap (iat1, iat2, R_index, this->_paraV); + tmp_DMR->insert_pair (tmp_ap); + } } - } // allocate the memory of BaseMatrix in SR, and set the new values to zero if (std::is_same::value) - { - tmp_DMR->fix_gamma(); - } - tmp_DMR->allocate(nullptr, true); - this->_DMR.push_back(tmp_DMR); + { + tmp_DMR->fix_gamma (); + } + tmp_DMR->allocate (nullptr, true); + this->_DMR.push_back (tmp_DMR); // add another DMR if nspin==2 if (this->_nspin == 2) - { - hamilt::HContainer* tmp_DMR1; - tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); - this->_DMR.push_back(tmp_DMR1); - } - ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); + { + hamilt::HContainer* tmp_DMR1; + tmp_DMR1 = new hamilt::HContainer (*tmp_DMR); + this->_DMR.push_back (tmp_DMR1); + } + ModuleBase::Memory::record ("DensityMatrix::DMR", this->_DMR.size () * this->_DMR[0]->get_memory_size ()); } /// initialize density matrix DMR from UnitCell and RA (mainly used in UnitTest) template -void DensityMatrix::init_DMR(Record_adj& ra, const UnitCell* ucell) +void + DensityMatrix::init_DMR (Record_adj& ra, const UnitCell* ucell) { - ModuleBase::TITLE("DensityMatrix", "init_DMR"); + ModuleBase::TITLE ("DensityMatrix", "init_DMR"); // ensure _DMR is empty for (auto& it: this->_DMR) - { - delete it; - } - this->_DMR.clear(); + { + delete it; + } + this->_DMR.clear (); // construct a new DMR hamilt::HContainer* tmp_DMR; - tmp_DMR = new hamilt::HContainer(this->_paraV); + tmp_DMR = new hamilt::HContainer (this->_paraV); // set up a HContainer for (int iat1 = 0; iat1 < ucell->nat; iat1++) - { - auto tau1 = ucell->get_tau(iat1); - int T1, I1; - ucell->iat2iait(iat1, &I1, &T1); - for (int ad = 0; ad < ra.na_each[iat1]; ++ad) { - const int T2 = ra.info[iat1][ad][3]; - const int I2 = ra.info[iat1][ad][4]; - int iat2 = ucell->itia2iat(T2, I2); - if (this->_paraV->get_row_size(iat1) <= 0 || this->_paraV->get_col_size(iat2) <= 0) - { - continue; - } - hamilt::AtomPair tmp_ap(iat1, - iat2, - ra.info[iat1][ad][0], - ra.info[iat1][ad][1], - ra.info[iat1][ad][2], - this->_paraV); - tmp_DMR->insert_pair(tmp_ap); + auto tau1 = ucell->get_tau (iat1); + int T1, I1; + ucell->iat2iait (iat1, &I1, &T1); + for (int ad = 0; ad < ra.na_each[iat1]; ++ad) + { + const int T2 = ra.info[iat1][ad][3]; + const int I2 = ra.info[iat1][ad][4]; + int iat2 = ucell->itia2iat (T2, I2); + if (this->_paraV->get_row_size (iat1) <= 0 || this->_paraV->get_col_size (iat2) <= 0) + { + continue; + } + hamilt::AtomPair tmp_ap (iat1, + iat2, + ra.info[iat1][ad][0], + ra.info[iat1][ad][1], + ra.info[iat1][ad][2], + this->_paraV); + tmp_DMR->insert_pair (tmp_ap); + } } - } // allocate the memory of BaseMatrix in SR, and set the new values to zero if (std::is_same::value) - { - tmp_DMR->fix_gamma(); - } - tmp_DMR->allocate(nullptr, true); - this->_DMR.push_back(tmp_DMR); + { + tmp_DMR->fix_gamma (); + } + tmp_DMR->allocate (nullptr, true); + this->_DMR.push_back (tmp_DMR); // add another DMR if nspin==2 if (this->_nspin == 2) - { - hamilt::HContainer* tmp_DMR1; - tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); - this->_DMR.push_back(tmp_DMR1); - } - ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); + { + hamilt::HContainer* tmp_DMR1; + tmp_DMR1 = new hamilt::HContainer (*tmp_DMR); + this->_DMR.push_back (tmp_DMR1); + } + ModuleBase::Memory::record ("DensityMatrix::DMR", this->_DMR.size () * this->_DMR[0]->get_memory_size ()); } // initialize density matrix DMR from another HContainer (mainly used) template -void DensityMatrix::init_DMR(const hamilt::HContainer& DMR_in) +void + DensityMatrix::init_DMR (const hamilt::HContainer& DMR_in) { - ModuleBase::TITLE("DensityMatrix", "init_DMR"); + ModuleBase::TITLE ("DensityMatrix", "init_DMR"); // ensure _DMR is empty for (auto& it: this->_DMR) - { - delete it; - } - this->_DMR.clear(); + { + delete it; + } + this->_DMR.clear (); // set up a HContainer using another one for (int is = 0; is < this->_nspin; ++is) // loop over spin - { - hamilt::HContainer* tmp_DMR; - tmp_DMR = new hamilt::HContainer(DMR_in); - // zero.out - tmp_DMR->set_zero(); - this->_DMR.push_back(tmp_DMR); - } - ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); + { + hamilt::HContainer* tmp_DMR; + tmp_DMR = new hamilt::HContainer (DMR_in); + // zero.out + tmp_DMR->set_zero (); + this->_DMR.push_back (tmp_DMR); + } + ModuleBase::Memory::record ("DensityMatrix::DMR", this->_DMR.size () * this->_DMR[0]->get_memory_size ()); } template -void DensityMatrix::init_DMR(const hamilt::HContainer& DMR_in) +void + DensityMatrix::init_DMR (const hamilt::HContainer& DMR_in) { - ModuleBase::TITLE("DensityMatrix", "init_DMR"); + ModuleBase::TITLE ("DensityMatrix", "init_DMR"); // ensure _DMR is empty for (auto& it: this->_DMR) - { - delete it; - } - this->_DMR.clear(); - // set up a HContainer using another one - int size_ap = DMR_in.size_atom_pairs(); - if (size_ap > 0) - { - const Parallel_Orbitals* paraV_ = DMR_in.get_atom_pair(0).get_paraV(); - hamilt::HContainer* tmp_DMR = new hamilt::HContainer(paraV_); - for (int iap = 0; iap < size_ap; iap++) { - const int iat1 = DMR_in.get_atom_pair(iap).get_atom_i(); - const int iat2 = DMR_in.get_atom_pair(iap).get_atom_j(); - for (int ir = 0; ir < DMR_in.get_atom_pair(iap).get_R_size(); ir++) - { - const ModuleBase::Vector3 R_index = DMR_in.get_atom_pair(iap).get_R_index(ir); - hamilt::AtomPair tmp_ap(iat1, iat2, R_index, paraV_); - tmp_DMR->insert_pair(tmp_ap); - } + delete it; } - tmp_DMR->allocate(nullptr, true); - this->_DMR.push_back(tmp_DMR); - if (this->_nspin == 2) + this->_DMR.clear (); + // set up a HContainer using another one + int size_ap = DMR_in.size_atom_pairs (); + if (size_ap > 0) { - hamilt::HContainer* tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); - this->_DMR.push_back(tmp_DMR1); + const Parallel_Orbitals* paraV_ = DMR_in.get_atom_pair (0).get_paraV (); + hamilt::HContainer* tmp_DMR = new hamilt::HContainer (paraV_); + for (int iap = 0; iap < size_ap; iap++) + { + const int iat1 = DMR_in.get_atom_pair (iap).get_atom_i (); + const int iat2 = DMR_in.get_atom_pair (iap).get_atom_j (); + for (int ir = 0; ir < DMR_in.get_atom_pair (iap).get_R_size (); ir++) + { + const ModuleBase::Vector3 R_index = DMR_in.get_atom_pair (iap).get_R_index (ir); + hamilt::AtomPair tmp_ap (iat1, iat2, R_index, paraV_); + tmp_DMR->insert_pair (tmp_ap); + } + } + tmp_DMR->allocate (nullptr, true); + this->_DMR.push_back (tmp_DMR); + if (this->_nspin == 2) + { + hamilt::HContainer* tmp_DMR1 = new hamilt::HContainer (*tmp_DMR); + this->_DMR.push_back (tmp_DMR1); + } } - } - ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); + ModuleBase::Memory::record ("DensityMatrix::DMR", this->_DMR.size () * this->_DMR[0]->get_memory_size ()); } // get _DMR pointer template -hamilt::HContainer* DensityMatrix::get_DMR_pointer(const int ispin) const +hamilt::HContainer* + DensityMatrix::get_DMR_pointer (const int ispin) const { #ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); + assert (ispin > 0 && ispin <= this->_nspin); #endif return this->_DMR[ispin - 1]; } // get _DMK[ik] pointer template -TK* DensityMatrix::get_DMK_pointer(const int ik) const +TK* + DensityMatrix::get_DMK_pointer (const int ik) const { #ifdef __DEBUG - assert(ik < this->_nk * this->_nspin); + assert (ik < this->_nk * this->_nspin); #endif - return const_cast(this->_DMK[ik].data()); + return const_cast (this->_DMK[ik].data ()); } // set DMK using a pointer template -void DensityMatrix::set_DMK_pointer(const int ik, TK* DMK_in) +void + DensityMatrix::set_DMK_pointer (const int ik, TK* DMK_in) { #ifdef __DEBUG - assert(ik < this->_nk * this->_nspin); + assert (ik < this->_nk * this->_nspin); #endif - this->_DMK[ik].assign(DMK_in, DMK_in + this->_paraV->nrow * this->_paraV->ncol); + this->_DMK[ik].assign (DMK_in, DMK_in + this->_paraV->nrow * this->_paraV->ncol); } // set _DMK element template -void DensityMatrix::set_DMK(const int ispin, const int ik, const int i, const int j, const TK value) +void + DensityMatrix::set_DMK (const int ispin, const int ik, const int i, const int j, const TK value) { #ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); - assert(ik >= 0 && ik < this->_nk); + assert (ispin > 0 && ispin <= this->_nspin); + assert (ik >= 0 && ik < this->_nk); #endif // consider transpose col=>row this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->nrow + j] = value; @@ -225,21 +233,23 @@ void DensityMatrix::set_DMK(const int ispin, const int ik, const int i, // set _DMK element template -void DensityMatrix::set_DMK_zero() +void + DensityMatrix::set_DMK_zero () { for (int ik = 0; ik < _nspin * _nk; ik++) - { - ModuleBase::GlobalFunc::ZEROS(this->_DMK[ik].data(), - this->_paraV->get_row_size() * this->_paraV->get_col_size()); - } + { + ModuleBase::GlobalFunc::ZEROS (this->_DMK[ik].data (), + this->_paraV->get_row_size () * this->_paraV->get_col_size ()); + } } // get a matrix element of density matrix dm(k) template -TK DensityMatrix::get_DMK(const int ispin, const int ik, const int i, const int j) const +TK + DensityMatrix::get_DMK (const int ispin, const int ik, const int i, const int j) const { #ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); + assert (ispin > 0 && ispin <= this->_nspin); #endif // consider transpose col=>row return this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->nrow + j]; @@ -247,200 +257,208 @@ TK DensityMatrix::get_DMK(const int ispin, const int ik, const int i, co // get _DMK nks, nrow, ncol template -int DensityMatrix::get_DMK_nks() const +int + DensityMatrix::get_DMK_nks () const { #ifdef __DEBUG - assert(this->_DMK.size() == _nk * _nspin); + assert (this->_DMK.size () == _nk * _nspin); #endif return _nk * _nspin; } template -int DensityMatrix::get_DMK_size() const +int + DensityMatrix::get_DMK_size () const { #ifdef __DEBUG - assert(this->_DMK.size() != 0); + assert (this->_DMK.size () != 0); #endif - return this->_DMK.size(); + return this->_DMK.size (); } template -int DensityMatrix::get_DMK_nrow() const +int + DensityMatrix::get_DMK_nrow () const { #ifdef __DEBUG - assert(this->_DMK.size() != 0); + assert (this->_DMK.size () != 0); #endif return this->_paraV->nrow; } template -int DensityMatrix::get_DMK_ncol() const +int + DensityMatrix::get_DMK_ncol () const { #ifdef __DEBUG - assert(this->_DMK.size() != 0); + assert (this->_DMK.size () != 0); #endif return this->_paraV->ncol; } template -void DensityMatrix::save_DMR() +void + DensityMatrix::save_DMR () { - ModuleBase::TITLE("DensityMatrix", "save_DMR"); - ModuleBase::timer::start("DensityMatrix", "save_DMR"); + ModuleBase::TITLE ("DensityMatrix", "save_DMR"); + ModuleBase::timer::start ("DensityMatrix", "save_DMR"); - const int nnr = this->_DMR[0]->get_nnr(); + const int nnr = this->_DMR[0]->get_nnr (); // allocate if _DMR_save is empty - if (_DMR_save.size() == 0) - { - _DMR_save.resize(this->_DMR.size()); - } + if (_DMR_save.size () == 0) + { + _DMR_save.resize (this->_DMR.size ()); + } // resize if _DMR_save[is].size is not equal to _DMR.size - for (int is = 0; is < _DMR_save.size(); is++) - { - if (_DMR_save[is].size() != nnr) + for (int is = 0; is < _DMR_save.size (); is++) { - _DMR_save[is].resize(nnr); + if (_DMR_save[is].size () != nnr) + { + _DMR_save[is].resize (nnr); + } } - } // save _DMR to _DMR_save - for (int is = 0; is < this->_DMR.size(); is++) - { - TR* DMR_pointer = this->_DMR[is]->get_wrapper(); - TR* DMR_save_pointer = _DMR_save[is].data(); - // set to zero - ModuleBase::GlobalFunc::ZEROS(DMR_save_pointer, nnr); - for (int i = 0; i < nnr; i++) + for (int is = 0; is < this->_DMR.size (); is++) { - DMR_save_pointer[i] = DMR_pointer[i]; + TR* DMR_pointer = this->_DMR[is]->get_wrapper (); + TR* DMR_save_pointer = _DMR_save[is].data (); + // set to zero + ModuleBase::GlobalFunc::ZEROS (DMR_save_pointer, nnr); + for (int i = 0; i < nnr; i++) + { + DMR_save_pointer[i] = DMR_pointer[i]; + } } - } - ModuleBase::timer::end("DensityMatrix", "save_DMR"); + ModuleBase::timer::end ("DensityMatrix", "save_DMR"); } // read *.dmk into density matrix dm(k) template -void DensityMatrix::read_DMK(const std::string directory, const int ispin, const int ik) +void + DensityMatrix::read_DMK (const std::string directory, const int ispin, const int ik) { - ModuleBase::TITLE("DensityMatrix", "read_DMK"); + ModuleBase::TITLE ("DensityMatrix", "read_DMK"); #ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); + assert (ispin > 0 && ispin <= this->_nspin); #endif // read std::string fn; - fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; + fn = directory + "SPIN" + std::to_string (ispin) + "_" + std::to_string (ik) + ".dmk"; // bool quit_abacus = false; std::ifstream ifs; - ifs.open(fn.c_str()); + ifs.open (fn.c_str ()); if (!ifs) - { - quit_abacus = true; - } + { + quit_abacus = true; + } else - { - // if the number is not match, - // quit the program or not. - bool quit = false; - - ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].x, quit); - ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].y, quit); - ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].z, quit); - ModuleBase::CHECK_INT(ifs, this->_paraV->nrow); - ModuleBase::CHECK_INT(ifs, this->_paraV->ncol); - } // If file exist, read in data. + { + // if the number is not match, + // quit the program or not. + bool quit = false; + + ModuleBase::CHECK_DOUBLE (ifs, this->_kvec_d[ik].x, quit); + ModuleBase::CHECK_DOUBLE (ifs, this->_kvec_d[ik].y, quit); + ModuleBase::CHECK_DOUBLE (ifs, this->_kvec_d[ik].z, quit); + ModuleBase::CHECK_INT (ifs, this->_paraV->nrow); + ModuleBase::CHECK_INT (ifs, this->_paraV->ncol); + } // If file exist, read in data. // Finish reading the first part of density matrix. for (int i = 0; i < this->_paraV->nrow; ++i) - { - for (int j = 0; j < this->_paraV->ncol; ++j) { - ifs >> this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j]; + for (int j = 0; j < this->_paraV->ncol; ++j) + { + ifs >> this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j]; + } } - } - ifs.close(); + ifs.close (); } // output density matrix dm(k) into *.dmk template <> -void DensityMatrix::write_DMK(const std::string directory, const int ispin, const int ik) +void + DensityMatrix::write_DMK (const std::string directory, const int ispin, const int ik) { - ModuleBase::TITLE("DensityMatrix", "write_DMK"); + ModuleBase::TITLE ("DensityMatrix", "write_DMK"); #ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); + assert (ispin > 0 && ispin <= this->_nspin); #endif // write std::string fn; - fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; + fn = directory + "SPIN" + std::to_string (ispin) + "_" + std::to_string (ik) + ".dmk"; std::ofstream ofs; - ofs.open(fn.c_str()); + ofs.open (fn.c_str ()); if (!ofs) - { - ModuleBase::WARNING("elecstate::write_dmk", "Can't create DENSITY MATRIX File!"); - } + { + ModuleBase::WARNING ("elecstate::write_dmk", "Can't create DENSITY MATRIX File!"); + } ofs << this->_kvec_d[ik].x << " " << this->_kvec_d[ik].y << " " << this->_kvec_d[ik].z << std::endl; ofs << "\n " << this->_paraV->nrow << " " << this->_paraV->ncol << std::endl; - ofs << std::setprecision(3); + ofs << std::setprecision (3); ofs << std::scientific; for (int i = 0; i < this->_paraV->nrow; ++i) - { - for (int j = 0; j < this->_paraV->ncol; ++j) { - if (j % 8 == 0) - { - ofs << "\n"; - } - ofs << " " << this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j]; + for (int j = 0; j < this->_paraV->ncol; ++j) + { + if (j % 8 == 0) + { + ofs << "\n"; + } + ofs << " " << this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j]; + } } - } - ofs.close(); + ofs.close (); } template <> -void DensityMatrix, double>::write_DMK(const std::string directory, const int ispin, const int ik) +void + DensityMatrix, double>::write_DMK (const std::string directory, const int ispin, const int ik) { - ModuleBase::TITLE("DensityMatrix", "write_DMK"); + ModuleBase::TITLE ("DensityMatrix", "write_DMK"); #ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); + assert (ispin > 0 && ispin <= this->_nspin); #endif // write std::string fn; - fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; + fn = directory + "SPIN" + std::to_string (ispin) + "_" + std::to_string (ik) + ".dmk"; std::ofstream ofs; - ofs.open(fn.c_str()); + ofs.open (fn.c_str ()); if (!ofs) - { - ModuleBase::WARNING("elecstate::write_dmk", "Can't create DENSITY MATRIX File!"); - } + { + ModuleBase::WARNING ("elecstate::write_dmk", "Can't create DENSITY MATRIX File!"); + } ofs << this->_kvec_d[ik].x << " " << this->_kvec_d[ik].y << " " << this->_kvec_d[ik].z << std::endl; ofs << "\n " << this->_paraV->nrow << " " << this->_paraV->ncol << std::endl; - ofs << std::setprecision(3); + ofs << std::setprecision (3); ofs << std::scientific; for (int i = 0; i < this->_paraV->nrow; ++i) - { - for (int j = 0; j < this->_paraV->ncol; ++j) { - if (j % 8 == 0) - { - ofs << "\n"; - } - ofs << " " << this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j].real(); + for (int j = 0; j < this->_paraV->ncol; ++j) + { + if (j % 8 == 0) + { + ofs << "\n"; + } + ofs << " " << this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j].real (); + } } - } - ofs.close(); + ofs.close (); } // T of HContainer can be double or std::complex -template class DensityMatrix; // Gamma-Only case -template class DensityMatrix, double>; // Multi-k case +template class DensityMatrix; // Gamma-Only case +template class DensityMatrix, double>; // Multi-k case template class DensityMatrix, std::complex>; // For EXX in future } // namespace elecstate \ No newline at end of file diff --git a/source/source_estate/module_dm/init_dm.cpp b/source/source_estate/module_dm/init_dm.cpp index ac7a9b19dd2..3c3a1bef082 100644 --- a/source/source_estate/module_dm/init_dm.cpp +++ b/source/source_estate/module_dm/init_dm.cpp @@ -6,61 +6,60 @@ #include "source_lcao/module_rt/td_info.h" template -void elecstate::init_dm(UnitCell& ucell, - elecstate::ElecState* pelec, - LCAO_domain::Setup_DM &dmat, - psi::Psi* psi, - Charge &chr, - const int iter, - const int exx_two_level_step) +void + elecstate::init_dm (UnitCell& ucell, + elecstate::ElecState* pelec, + LCAO_domain::Setup_DM& dmat, + psi::Psi* psi, + Charge& chr, + const int iter, + const int exx_two_level_step) { - ModuleBase::TITLE("elecstate", "init_dm"); + ModuleBase::TITLE ("elecstate", "init_dm"); - if (iter == 1 && exx_two_level_step == 0) - { - std::cout << " LCAO WAVEFUN -> CHARGE " << std::endl; + if (iter == 1 && exx_two_level_step == 0) + { + std::cout << " LCAO WAVEFUN -> CHARGE " << std::endl; - elecstate::calEBand(pelec->ekb, pelec->wg, pelec->f_en); + elecstate::calEBand (pelec->ekb, pelec->wg, pelec->f_en); - elecstate::cal_dm_psi(dmat.dm->get_paraV_pointer(), pelec->wg, *psi, *dmat.dm); - if (PARAM.inp.esolver_type!="tddft" && PARAM.inp.td_stype == 2) - { - dmat.dm->cal_DMR_td(ucell, TD_info::cart_At); - } - else - { - dmat.dm->cal_DMR(); - } + elecstate::cal_dm_psi (dmat.dm->get_paraV_pointer (), pelec->wg, *psi, *dmat.dm); + if (PARAM.inp.esolver_type != "tddft" && PARAM.inp.td_stype == 2) + { + dmat.dm->cal_DMR_td (ucell, TD_info::cart_At); + } + else + { + dmat.dm->cal_DMR (); + } - // mohan add 2025-11-12, use density matrix to calculate the charge density - LCAO_domain::dm2rho(dmat.dm->get_DMR_vector(), PARAM.inp.nspin, &chr); + // mohan add 2025-11-12, use density matrix to calculate the charge density + LCAO_domain::dm2rho (dmat.dm->get_DMR_vector (), PARAM.inp.nspin, &chr); - elecstate::cal_ux(ucell); + elecstate::cal_ux (ucell); - //! update the potentials by using new electron charge density - pelec->pot->update_from_charge(&chr, &ucell); + //! update the potentials by using new electron charge density + pelec->pot->update_from_charge (&chr, &ucell); - //! compute the correction energy for metals - pelec->f_en.descf = pelec->cal_delta_escf(); - } + //! compute the correction energy for metals + pelec->f_en.descf = pelec->cal_delta_escf (); + } return; } +template void elecstate::init_dm (UnitCell& ucell, + elecstate::ElecState* pelec, + LCAO_domain::Setup_DM& dmat, + psi::Psi* psi, + Charge& chr, + const int iter, + const int exx_two_level_step); -template void elecstate::init_dm(UnitCell& ucell, - elecstate::ElecState* pelec, - LCAO_domain::Setup_DM &dmat, - psi::Psi* psi, - Charge &chr, - const int iter, - const int exx_two_level_step); - -template void elecstate::init_dm>(UnitCell& ucell, - elecstate::ElecState* pelec, - LCAO_domain::Setup_DM> &dmat, - psi::Psi>* psi, - Charge &chr, - const int iter, - const int exx_two_level_step); - +template void elecstate::init_dm> (UnitCell& ucell, + elecstate::ElecState* pelec, + LCAO_domain::Setup_DM>& dmat, + psi::Psi>* psi, + Charge& chr, + const int iter, + const int exx_two_level_step); diff --git a/source/source_estate/module_dm/init_dm.h b/source/source_estate/module_dm/init_dm.h index 2fd969638d5..f294397890a 100644 --- a/source/source_estate/module_dm/init_dm.h +++ b/source/source_estate/module_dm/init_dm.h @@ -1,23 +1,23 @@ #ifndef INIT_DM_H #define INIT_DM_H -#include "source_cell/unitcell.h" // use unitcell -#include "source_estate/elecstate.h"// use ElecState -#include "source_psi/psi.h" // use electronic wave functions +#include "source_cell/unitcell.h" // use unitcell +#include "source_estate/elecstate.h" // use ElecState +#include "source_psi/psi.h" // use electronic wave functions #include "source_estate/module_charge/charge.h" // use charge -#include "source_lcao/setup_dm.h" // define Setup_DM +#include "source_lcao/setup_dm.h" // define Setup_DM namespace elecstate { -template -void init_dm(UnitCell& ucell, - ElecState* pelec, - LCAO_domain::Setup_DM &dmat, - psi::Psi* psi, - Charge &chr, - const int iter, - const int exx_two_level_step); +template +void init_dm (UnitCell& ucell, + ElecState* pelec, + LCAO_domain::Setup_DM& dmat, + psi::Psi* psi, + Charge& chr, + const int iter, + const int exx_two_level_step); } diff --git a/source/source_estate/module_dm/test/prepare_unitcell.h b/source/source_estate/module_dm/test/prepare_unitcell.h index 0cbbd28905c..0d1c357c6c0 100644 --- a/source/source_estate/module_dm/test/prepare_unitcell.h +++ b/source/source_estate/module_dm/test/prepare_unitcell.h @@ -8,42 +8,42 @@ class UcellTestPrepare { public: - UcellTestPrepare() = default; - UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in); - UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in, - std::valarray mbl_in, - std::valarray velocity_in); - UcellTestPrepare(const UcellTestPrepare& utp); + UcellTestPrepare () = default; + UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in); + UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in, + std::valarray mbl_in, + std::valarray velocity_in); + UcellTestPrepare (const UcellTestPrepare& utp); std::string latname; int lmaxmax; @@ -67,33 +67,34 @@ class UcellTestPrepare int ntype; int atomic_index; - UnitCell* SetUcellInfo(const std::vector& nw, int& nlocal) + UnitCell* + SetUcellInfo (const std::vector& nw, int& nlocal) { // basic info - this->ntype = this->elements.size(); + this->ntype = this->elements.size (); static UnitCell ucell; - ucell.setup(this->latname, this->ntype, this->lmaxmax, this->init_vel, this->fixed_axes); - + ucell.setup (this->latname, this->ntype, this->lmaxmax, this->init_vel, this->fixed_axes); + delete[] ucell.magnet.start_mag; // mag set here - - ucell.atom_label.resize(ucell.ntype); - ucell.atom_mass.resize(ucell.ntype); - ucell.pseudo_fn.resize(ucell.ntype); - ucell.pseudo_type.resize(ucell.ntype); - ucell.orbital_fn.resize(ucell.ntype); + + ucell.atom_label.resize (ucell.ntype); + ucell.atom_mass.resize (ucell.ntype); + ucell.pseudo_fn.resize (ucell.ntype); + ucell.pseudo_type.resize (ucell.ntype); + ucell.orbital_fn.resize (ucell.ntype); ucell.magnet.start_mag = new double[ucell.ntype]; // mag set here - ucell.magnet.ux_[0] = 0.0; // ux_ set here + ucell.magnet.ux_[0] = 0.0; // ux_ set here ucell.magnet.ux_[1] = 0.0; ucell.magnet.ux_[2] = 0.0; for (int it = 0; it < ucell.ntype; ++it) - { - ucell.atom_label[it] = this->elements[it]; - ucell.atom_mass[it] = this->atomic_mass[it]; - ucell.pseudo_fn[it] = this->pp_files[it]; - ucell.pseudo_type[it] = this->pp_types[it]; - ucell.orbital_fn[it] = this->orb_files[it]; - ucell.magnet.start_mag[it] = 0.0; // mag set here - } + { + ucell.atom_label[it] = this->elements[it]; + ucell.atom_mass[it] = this->atomic_mass[it]; + ucell.pseudo_fn[it] = this->pp_files[it]; + ucell.pseudo_type[it] = this->pp_types[it]; + ucell.orbital_fn[it] = this->orb_files[it]; + ucell.magnet.start_mag[it] = 0.0; // mag set here + } // lattice info ucell.lat0 = this->lat0; ucell.lat0_angstrom = ucell.lat0 * ModuleBase::BOHR_TO_A; @@ -117,120 +118,120 @@ class UcellTestPrepare ucell.a3.x = ucell.latvec.e31; ucell.a3.y = ucell.latvec.e32; ucell.a3.z = ucell.latvec.e33; - ucell.GT = ucell.latvec.Inverse(); - ucell.G = ucell.GT.Transpose(); + ucell.GT = ucell.latvec.Inverse (); + ucell.G = ucell.GT.Transpose (); ucell.GGT = ucell.G * ucell.GT; - ucell.invGGT = ucell.GGT.Inverse(); - ucell.omega = std::abs(ucell.latvec.Det()) * (ucell.lat0) * (ucell.lat0) * (ucell.lat0); + ucell.invGGT = ucell.GGT.Inverse (); + ucell.omega = std::abs (ucell.latvec.Det ()) * (ucell.lat0) * (ucell.lat0) * (ucell.lat0); // atomic info ucell.Coordinate = this->coor_type; ucell.atoms = new Atom[ucell.ntype]; ucell.set_atom_flag = true; this->atomic_index = 0; for (int it = 0; it < ucell.ntype; ++it) - { - ucell.atoms[it].label = this->elements[it]; - /* - ucell.atoms[it].nw = 0; - ucell.atoms[it].nwl = 2; - delete[] ucell.atoms[it].l_nchi; - ucell.atoms[it].l_nchi = new int[ ucell.atoms[it].nwl+1]; - for(int L=0; Lnatom[it]; - // coordinates and related physical quantities - ucell.atoms[it].tau.resize(ucell.atoms[it].na); - ucell.atoms[it].dis.resize(ucell.atoms[it].na); - ucell.atoms[it].taud.resize(ucell.atoms[it].na); - ucell.atoms[it].vel.resize(ucell.atoms[it].na); - ucell.atoms[it].mag.resize(ucell.atoms[it].na); - ucell.atoms[it].angle1.resize(ucell.atoms[it].na); - ucell.atoms[it].angle2.resize(ucell.atoms[it].na); - ucell.atoms[it].m_loc_.resize(ucell.atoms[it].na); - ucell.atoms[it].mbl.resize(ucell.atoms[it].na); - ucell.atoms[it].mass = ucell.atom_mass[it]; // mass set here - for (int ia = 0; ia < ucell.atoms[it].na; ++ia) - { - if (ucell.Coordinate == "Direct") + ucell.atoms[it].label = this->elements[it]; + /* + ucell.atoms[it].nw = 0; + ucell.atoms[it].nwl = 2; + delete[] ucell.atoms[it].l_nchi; + ucell.atoms[it].l_nchi = new int[ ucell.atoms[it].nwl+1]; + for(int L=0; Lcoordinates[this->atomic_index * 3 + 0]; - ucell.atoms[it].taud[ia].y = this->coordinates[this->atomic_index * 3 + 1]; - ucell.atoms[it].taud[ia].z = this->coordinates[this->atomic_index * 3 + 2]; - ucell.atoms[it].tau[ia] = ucell.atoms[it].taud[ia] * ucell.latvec; + ucell.atoms[it].l_nchi[L] = 1; + ucell.atoms[it].nw += (2*L + 1) * ucell.atoms[it].l_nchi[L]; } - else if (ucell.Coordinate == "Cartesian") - { - ucell.atoms[it].tau[ia].x = this->coordinates[this->atomic_index * 3 + 0]; - ucell.atoms[it].tau[ia].y = this->coordinates[this->atomic_index * 3 + 1]; - ucell.atoms[it].tau[ia].z = this->coordinates[this->atomic_index * 3 + 2]; - ModuleBase::Mathzone::Cartesian_to_Direct(ucell.atoms[it].tau[ia].x, - ucell.atoms[it].tau[ia].y, - ucell.atoms[it].tau[ia].z, - ucell.latvec.e11, - ucell.latvec.e12, - ucell.latvec.e13, - ucell.latvec.e21, - ucell.latvec.e22, - ucell.latvec.e23, - ucell.latvec.e31, - ucell.latvec.e32, - ucell.latvec.e33, - ucell.atoms[it].taud[ia].x, - ucell.atoms[it].taud[ia].y, - ucell.atoms[it].taud[ia].z); - } - ucell.atoms[it].dis[ia].set(0, 0, 0); - if (this->init_vel) - { - ucell.atoms[it].vel[ia].x = this->velocity[this->atomic_index * 3 + 0]; - ucell.atoms[it].vel[ia].y = this->velocity[this->atomic_index * 3 + 1]; - ucell.atoms[it].vel[ia].z = this->velocity[this->atomic_index * 3 + 2]; - } - else - { - ucell.atoms[it].vel[ia].set(0, 0, 0); - } - ucell.atoms[it].m_loc_[ia].set(0, 0, 0); - ucell.atoms[it].angle1[ia] = 0; - ucell.atoms[it].angle2[ia] = 0; - if (this->selective_dynamics) - { - ucell.atoms[it].mbl[ia].x = this->mbl[this->atomic_index * 3 + 0]; - ucell.atoms[it].mbl[ia].y = this->mbl[this->atomic_index * 3 + 1]; - ucell.atoms[it].mbl[ia].z = this->mbl[this->atomic_index * 3 + 2]; - } - else - { - ucell.atoms[it].mbl[ia] = {1, 1, 1}; - } - ++(this->atomic_index); + */ + ucell.atoms[it].na = this->natom[it]; + // coordinates and related physical quantities + ucell.atoms[it].tau.resize (ucell.atoms[it].na); + ucell.atoms[it].dis.resize (ucell.atoms[it].na); + ucell.atoms[it].taud.resize (ucell.atoms[it].na); + ucell.atoms[it].vel.resize (ucell.atoms[it].na); + ucell.atoms[it].mag.resize (ucell.atoms[it].na); + ucell.atoms[it].angle1.resize (ucell.atoms[it].na); + ucell.atoms[it].angle2.resize (ucell.atoms[it].na); + ucell.atoms[it].m_loc_.resize (ucell.atoms[it].na); + ucell.atoms[it].mbl.resize (ucell.atoms[it].na); + ucell.atoms[it].mass = ucell.atom_mass[it]; // mass set here + for (int ia = 0; ia < ucell.atoms[it].na; ++ia) + { + if (ucell.Coordinate == "Direct") + { + ucell.atoms[it].taud[ia].x = this->coordinates[this->atomic_index * 3 + 0]; + ucell.atoms[it].taud[ia].y = this->coordinates[this->atomic_index * 3 + 1]; + ucell.atoms[it].taud[ia].z = this->coordinates[this->atomic_index * 3 + 2]; + ucell.atoms[it].tau[ia] = ucell.atoms[it].taud[ia] * ucell.latvec; + } + else if (ucell.Coordinate == "Cartesian") + { + ucell.atoms[it].tau[ia].x = this->coordinates[this->atomic_index * 3 + 0]; + ucell.atoms[it].tau[ia].y = this->coordinates[this->atomic_index * 3 + 1]; + ucell.atoms[it].tau[ia].z = this->coordinates[this->atomic_index * 3 + 2]; + ModuleBase::Mathzone::Cartesian_to_Direct (ucell.atoms[it].tau[ia].x, + ucell.atoms[it].tau[ia].y, + ucell.atoms[it].tau[ia].z, + ucell.latvec.e11, + ucell.latvec.e12, + ucell.latvec.e13, + ucell.latvec.e21, + ucell.latvec.e22, + ucell.latvec.e23, + ucell.latvec.e31, + ucell.latvec.e32, + ucell.latvec.e33, + ucell.atoms[it].taud[ia].x, + ucell.atoms[it].taud[ia].y, + ucell.atoms[it].taud[ia].z); + } + ucell.atoms[it].dis[ia].set (0, 0, 0); + if (this->init_vel) + { + ucell.atoms[it].vel[ia].x = this->velocity[this->atomic_index * 3 + 0]; + ucell.atoms[it].vel[ia].y = this->velocity[this->atomic_index * 3 + 1]; + ucell.atoms[it].vel[ia].z = this->velocity[this->atomic_index * 3 + 2]; + } + else + { + ucell.atoms[it].vel[ia].set (0, 0, 0); + } + ucell.atoms[it].m_loc_[ia].set (0, 0, 0); + ucell.atoms[it].angle1[ia] = 0; + ucell.atoms[it].angle2[ia] = 0; + if (this->selective_dynamics) + { + ucell.atoms[it].mbl[ia].x = this->mbl[this->atomic_index * 3 + 0]; + ucell.atoms[it].mbl[ia].y = this->mbl[this->atomic_index * 3 + 1]; + ucell.atoms[it].mbl[ia].z = this->mbl[this->atomic_index * 3 + 2]; + } + else + { + ucell.atoms[it].mbl[ia] = {1, 1, 1}; + } + ++(this->atomic_index); + } } - } - ucell.nat = this->natom.sum(); + ucell.nat = this->natom.sum (); // set_nw - assert(nw.size() == ucell.ntype); + assert (nw.size () == ucell.ntype); for (int it = 0; it < ucell.ntype; ++it) - { - ucell.atoms[it].nw = nw[it]; - } + { + ucell.atoms[it].nw = nw[it]; + } // cal_nloc for (int it = 0; it < ucell.ntype; ++it) - { - nlocal += ucell.atoms[it].na * ucell.atoms[it].nw; - } + { + nlocal += ucell.atoms[it].na * ucell.atoms[it].nw; + } // cal_namax int namax = 0; for (int it = 0; it < ucell.ntype; ++it) - { - namax = std::max(namax, ucell.atoms[it].na); - } + { + namax = std::max (namax, ucell.atoms[it].na); + } ucell.namax = namax; // cal_index - assert(nlocal > 0); + assert (nlocal > 0); delete[] ucell.iwt2iat; delete[] ucell.iwt2iw; delete[] ucell.iat2it; @@ -239,158 +240,119 @@ class UcellTestPrepare ucell.iwt2iw = new int[nlocal]; ucell.iat2it = new int[ucell.nat]; ucell.iat2ia = new int[ucell.nat]; // set_iat2itia - ucell.itia2iat.create(ucell.ntype, ucell.namax); - ucell.set_iat2iwt(1); + ucell.itia2iat.create (ucell.ntype, ucell.namax); + ucell.set_iat2iwt (1); int iat = 0; int iwt = 0; for (int it = 0; it < ucell.ntype; it++) - { - for (int ia = 0; ia < ucell.atoms[it].na; ia++) { - ucell.iat2it[iat] = it; - ucell.iat2ia[iat] = ia; - ucell.itia2iat(it, ia) = iat; - for (int iw = 0; iw < ucell.atoms[it].nw; iw++) - { - ucell.iwt2iat[iwt] = iat; - ucell.iwt2iw[iwt] = iw; - ++iwt; - } - ++iat; + for (int ia = 0; ia < ucell.atoms[it].na; ia++) + { + ucell.iat2it[iat] = it; + ucell.iat2ia[iat] = ia; + ucell.itia2iat (it, ia) = iat; + for (int iw = 0; iw < ucell.atoms[it].nw; iw++) + { + ucell.iwt2iat[iwt] = iat; + ucell.iwt2iw[iwt] = iw; + ++iwt; + } + ++iat; + } } - } return &ucell; } }; -UcellTestPrepare::UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in) - : latname(latname_in), - lmaxmax(lmaxmax_in), - init_vel(init_vel_in), - selective_dynamics(selective_dynamics_in), - relax_new(relax_new_in), - fixed_axes(fixed_axes_in), - lat0(lat0_in), - latvec(latvec_in), - elements(elements_in), - pp_files(pp_files_in), - pp_types(pp_types_in), - orb_files(orb_files_in), - natom(natom_in), - atomic_mass(atomic_mass_in), - coor_type(coor_type_in), - coordinates(coordinates_in) +UcellTestPrepare::UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in) + : latname (latname_in), lmaxmax (lmaxmax_in), init_vel (init_vel_in), selective_dynamics (selective_dynamics_in), + relax_new (relax_new_in), fixed_axes (fixed_axes_in), lat0 (lat0_in), latvec (latvec_in), elements (elements_in), + pp_files (pp_files_in), pp_types (pp_types_in), orb_files (orb_files_in), natom (natom_in), + atomic_mass (atomic_mass_in), coor_type (coor_type_in), coordinates (coordinates_in) { - mbl = std::valarray(0.0, coordinates_in.size()); - velocity = std::valarray(0.0, coordinates_in.size()); + mbl = std::valarray (0.0, coordinates_in.size ()); + velocity = std::valarray (0.0, coordinates_in.size ()); } -UcellTestPrepare::UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in, - std::valarray mbl_in, - std::valarray velocity_in) - : latname(latname_in), - lmaxmax(lmaxmax_in), - init_vel(init_vel_in), - selective_dynamics(selective_dynamics_in), - relax_new(relax_new_in), - fixed_axes(fixed_axes_in), - lat0(lat0_in), - latvec(latvec_in), - elements(elements_in), - pp_files(pp_files_in), - pp_types(pp_types_in), - orb_files(orb_files_in), - natom(natom_in), - atomic_mass(atomic_mass_in), - coor_type(coor_type_in), - coordinates(coordinates_in), - mbl(mbl_in), - velocity(velocity_in) // velocity assume the existence of mbl in print_stru_file() +UcellTestPrepare::UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in, + std::valarray mbl_in, + std::valarray velocity_in) + : latname (latname_in), lmaxmax (lmaxmax_in), init_vel (init_vel_in), selective_dynamics (selective_dynamics_in), + relax_new (relax_new_in), fixed_axes (fixed_axes_in), lat0 (lat0_in), latvec (latvec_in), elements (elements_in), + pp_files (pp_files_in), pp_types (pp_types_in), orb_files (orb_files_in), natom (natom_in), + atomic_mass (atomic_mass_in), coor_type (coor_type_in), coordinates (coordinates_in), mbl (mbl_in), + velocity (velocity_in) // velocity assume the existence of mbl in print_stru_file() { } -UcellTestPrepare::UcellTestPrepare(const UcellTestPrepare& utp) - : latname(utp.latname), - lmaxmax(utp.lmaxmax), - init_vel(utp.init_vel), - selective_dynamics(utp.selective_dynamics), - relax_new(utp.relax_new), - fixed_axes(utp.fixed_axes), - lat0(utp.lat0), - latvec(utp.latvec), - elements(utp.elements), - pp_files(utp.pp_files), - pp_types(utp.pp_types), - orb_files(utp.orb_files), - natom(utp.natom), - atomic_mass(utp.atomic_mass), - coor_type(utp.coor_type), - coordinates(utp.coordinates), - mbl(utp.mbl), - velocity(utp.velocity) // velocity assume the existence of mbl in print_stru_file() +UcellTestPrepare::UcellTestPrepare (const UcellTestPrepare& utp) + : latname (utp.latname), lmaxmax (utp.lmaxmax), init_vel (utp.init_vel), + selective_dynamics (utp.selective_dynamics), relax_new (utp.relax_new), fixed_axes (utp.fixed_axes), + lat0 (utp.lat0), latvec (utp.latvec), elements (utp.elements), pp_files (utp.pp_files), pp_types (utp.pp_types), + orb_files (utp.orb_files), natom (utp.natom), atomic_mass (utp.atomic_mass), coor_type (utp.coor_type), + coordinates (utp.coordinates), mbl (utp.mbl), + velocity (utp.velocity) // velocity assume the existence of mbl in print_stru_file() { } -std::map UcellTestLib{ - {"Si", - UcellTestPrepare("fcc", // latname - 2, // lmaxmax - true, // init_vel - true, // selective_dyanmics - true, // relax_new - "volume", // fixed_axes - 10.2, // lat0 - {-0.5, - 0.0, - 0.5, // latvec - 0.0, - 0.5, - 0.5, - -0.5, - 0.5, - 0.0}, - {"Si"}, // elements - {"Si.upf"}, // upf file - {"upf201"}, // upf types - {"Si.orb"}, // orb file - {2}, // number of each elements - {28.0}, // atomic mass - "Cartesian", // coordination type - {0.0, - 0.0, - 0.0, // atomic coordinates - 0.25, - 0.25, - 0.25})} -}; +std::map UcellTestLib{{"Si", + UcellTestPrepare ("fcc", // latname + 2, // lmaxmax + true, // init_vel + true, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 10.2, // lat0 + {-0.5, + 0.0, + 0.5, // latvec + 0.0, + 0.5, + 0.5, + -0.5, + 0.5, + 0.0}, + {"Si"}, // elements + {"Si.upf"}, // upf file + {"upf201"}, // upf types + {"Si.orb"}, // orb file + {2}, // number of each elements + {28.0}, // atomic mass + "Cartesian", // coordination type + {0.0, + 0.0, + 0.0, // atomic coordinates + 0.25, + 0.25, + 0.25})}}; #endif diff --git a/source/source_estate/module_dm/test/test_cal_dm_R.cpp b/source/source_estate/module_dm/test/test_cal_dm_R.cpp index 585cded191f..87552dfe597 100644 --- a/source/source_estate/module_dm/test/test_cal_dm_R.cpp +++ b/source/source_estate/module_dm/test/test_cal_dm_R.cpp @@ -26,12 +26,13 @@ class DMTest : public testing::Test int dsize; int my_rank = 0; UnitCell ucell; - void SetUp() override + void + SetUp () override { #ifdef __MPI // MPI parallel settings - MPI_Comm_size(MPI_COMM_WORLD, &dsize); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size (MPI_COMM_WORLD, &dsize); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif // set up a unitcell, with one element and test_size atoms, each atom has test_nw orbitals @@ -40,313 +41,324 @@ class DMTest : public testing::Test ucell.atoms = new Atom[ucell.ntype]; ucell.iat2it = new int[ucell.nat]; ucell.iat2ia = new int[ucell.nat]; - ucell.atoms[0].tau.resize(ucell.nat); - ucell.itia2iat.create(ucell.ntype, ucell.nat); + ucell.atoms[0].tau.resize (ucell.nat); + ucell.itia2iat.create (ucell.ntype, ucell.nat); for (int iat = 0; iat < ucell.nat; iat++) - { - ucell.iat2it[iat] = 0; - ucell.iat2ia[iat] = iat; - ucell.atoms[0].tau[iat] = ModuleBase::Vector3(0.0, 0.0, 0.0); - ucell.itia2iat(0, iat) = iat; - } + { + ucell.iat2it[iat] = 0; + ucell.iat2ia[iat] = iat; + ucell.atoms[0].tau[iat] = ModuleBase::Vector3 (0.0, 0.0, 0.0); + ucell.itia2iat (0, iat) = iat; + } ucell.atoms[0].na = test_size; ucell.atoms[0].nw = test_nw; - ucell.atoms[0].iw2l.resize(test_nw); - ucell.atoms[0].iw2m.resize(test_nw); - ucell.atoms[0].iw2n.resize(test_nw); + ucell.atoms[0].iw2l.resize (test_nw); + ucell.atoms[0].iw2m.resize (test_nw); + ucell.atoms[0].iw2n.resize (test_nw); for (int iw = 0; iw < test_nw; ++iw) - { - ucell.atoms[0].iw2l[iw] = 0; - ucell.atoms[0].iw2m[iw] = 0; - ucell.atoms[0].iw2n[iw] = 0; - } - ucell.set_iat2iwt(1); - init_parav(); + { + ucell.atoms[0].iw2l[iw] = 0; + ucell.atoms[0].iw2m[iw] = 0; + ucell.atoms[0].iw2n[iw] = 0; + } + ucell.set_iat2iwt (1); + init_parav (); // set paraV - init_parav(); + init_parav (); } - void TearDown() override + void + TearDown () override { delete paraV; delete[] ucell.atoms; } #ifdef __MPI - void init_parav() + void + init_parav () { int nb = 2; int global_row = test_size * test_nw; int global_col = test_size * test_nw; std::ofstream ofs_running; - paraV = new Parallel_Orbitals(); - paraV->init(global_row, global_col, nb, MPI_COMM_WORLD); - paraV->set_atomic_trace(ucell.get_iat2iwt(), test_size, global_row); + paraV = new Parallel_Orbitals (); + paraV->init (global_row, global_col, nb, MPI_COMM_WORLD); + paraV->set_atomic_trace (ucell.get_iat2iwt (), test_size, global_row); } #else - void init_parav() + void + init_parav () { } #endif }; -TEST_F(DMTest, cal_DMR_full) +TEST_F (DMTest, cal_DMR_full) { // get my rank of this process int my_rank = 0; #ifdef __MPI - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif // output dim and nrow, ncol if (my_rank == 0) - { - std::cout << "my rank: " << my_rank << " dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; - std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - } + { + std::cout << "my rank: " << my_rank << " dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; + std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; + } else - { - std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - } + { + std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; + } // initalize a kvectors, Gamma-only K_Vectors* kv = nullptr; int nspin = 4; int nks = 2; // since nspin = 2 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); // construct DM - elecstate::DensityMatrix, double> DM(paraV, nspin, kv->kvec_d, kv->get_nks()); + elecstate::DensityMatrix, double> DM (paraV, nspin, kv->kvec_d, kv->get_nks ()); // set this->_DMK for (int is = 1; is <= nspin; is++) - { - for (int ik = 0; ik < kv->get_nks(); ik++) { - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + for (int ik = 0; ik < kv->get_nks (); ik++) { - DM.set_DMK(is, ik, i, j, std::complex(0.77, 0.77)); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + DM.set_DMK (is, ik, i, j, std::complex (0.77, 0.77)); + } + } } - } } - } // initialize dmR_full - hamilt::HContainer> dmR_full(ucell, paraV); + hamilt::HContainer> dmR_full (ucell, paraV); // calculate this->_DMR - std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now(); - DM.cal_DMR_full(&dmR_full); - std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now(); + std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now (); + DM.cal_DMR_full (&dmR_full); + std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now (); std::chrono::duration elapsed_time - = std::chrono::duration_cast>(end_time - start_time); - std::cout << "my rank: " << my_rank << " elapsed time blas: " << elapsed_time.count() << std::endl; + = std::chrono::duration_cast> (end_time - start_time); + std::cout << "my rank: " << my_rank << " elapsed time blas: " << elapsed_time.count () << std::endl; // compare the result - for (int i = 0; i < dmR_full.size_atom_pairs(); i++) - { - const std::complex* ptr1 = dmR_full.get_atom_pair(i).get_HR_values(0, 0, 0).get_pointer(); - // - for (int j = 0; j < dmR_full.get_atom_pair(i).get_size(); j++) + for (int i = 0; i < dmR_full.size_atom_pairs (); i++) { - //std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; - EXPECT_NEAR(ptr1[j].real(), 1.54, 1e-10); - EXPECT_NEAR(ptr1[j].imag(), 1.54, 1e-10); + const std::complex* ptr1 = dmR_full.get_atom_pair (i).get_HR_values (0, 0, 0).get_pointer (); + // + for (int j = 0; j < dmR_full.get_atom_pair (i).get_size (); j++) + { + // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << + // std::endl; + EXPECT_NEAR (ptr1[j].real (), 1.54, 1e-10); + EXPECT_NEAR (ptr1[j].imag (), 1.54, 1e-10); + } } - } delete kv; } -TEST_F(DMTest, cal_DMR_blas_double) +TEST_F (DMTest, cal_DMR_blas_double) { // get my rank of this process int my_rank = 0; #ifdef __MPI - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif // output dim and nrow, ncol if (my_rank == 0) - { - std::cout << "my rank: " << my_rank << " dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; - std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - } + { + std::cout << "my rank: " << my_rank << " dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; + std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; + } else - { - std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - } + { + std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; + } // initalize a kvectors, Gamma-only K_Vectors* kv = nullptr; int nspin = 2; int nks = 2; // since nspin = 2 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); // construct DM - elecstate::DensityMatrix DM(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); + elecstate::DensityMatrix DM (paraV, nspin, kv->kvec_d, kv->get_nks () / nspin); // set this->_DMK for (int is = 1; is <= nspin; is++) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - DM.set_DMK(is, ik, i, j, 0.77); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + DM.set_DMK (is, ik, i, j, 0.77); + } + } } - } } - } // initialize this->_DMR - Grid_Driver gd(0, 0); - DM.init_DMR(&gd, &ucell); + Grid_Driver gd (0, 0); + DM.init_DMR (&gd, &ucell); // set Gamma-only for (int is = 1; is <= nspin; is++) - { - DM.get_DMR_pointer(is)->fix_gamma(); - } + { + DM.get_DMR_pointer (is)->fix_gamma (); + } // calculate this->_DMR - std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now(); - DM.cal_DMR(); - std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now(); + std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now (); + DM.cal_DMR (); + std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now (); std::chrono::duration elapsed_time - = std::chrono::duration_cast>(end_time - start_time); - std::cout << "my rank: " << my_rank << " elapsed time blas: " << elapsed_time.count() << std::endl; + = std::chrono::duration_cast> (end_time - start_time); + std::cout << "my rank: " << my_rank << " elapsed time blas: " << elapsed_time.count () << std::endl; // compare the result - for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) - { - double* ptr1 = DM.get_DMR_pointer(1)->get_atom_pair(i).get_HR_values(0, 0, 0).get_pointer(); - // - for (int j = 0; j < DM.get_DMR_pointer(1)->get_atom_pair(i).get_size(); j++) + for (int i = 0; i < DM.get_DMR_pointer (1)->size_atom_pairs (); i++) { - // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; - EXPECT_NEAR(ptr1[j], 0.77, 1e-10); + double* ptr1 = DM.get_DMR_pointer (1)->get_atom_pair (i).get_HR_values (0, 0, 0).get_pointer (); + // + for (int j = 0; j < DM.get_DMR_pointer (1)->get_atom_pair (i).get_size (); j++) + { + // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << + // std::endl; + EXPECT_NEAR (ptr1[j], 0.77, 1e-10); + } } - } delete kv; } -TEST_F(DMTest, cal_DMR_blas_complex) +TEST_F (DMTest, cal_DMR_blas_complex) { // get my rank of this process int my_rank = 0; #ifdef __MPI - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif // output dim and nrow, ncol if (my_rank == 0) - { - std::cout << "my rank: " << my_rank << " dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; - std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - } + { + std::cout << "my rank: " << my_rank << " dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; + std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; + } else - { - std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - } + { + std::cout << "my rank: " << my_rank << " nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; + } // initalize a kvectors K_Vectors* kv = nullptr; int nspin = 2; int nks = 4; // since nspin = 2 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; kv->kvec_d[3].x = 0.5; // construct DM - elecstate::DensityMatrix, double> DM(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); + elecstate::DensityMatrix, double> DM (paraV, nspin, kv->kvec_d, kv->get_nks () / nspin); // set this->_DMK for (int is = 1; is <= nspin; is++) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - DM.set_DMK(is, ik, i, j, is * 0.77 * (ik + 1)); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + DM.set_DMK (is, ik, i, j, is * 0.77 * (ik + 1)); + } + } } - } } - } // initialize this->_DMR - Grid_Driver gd(0, 0); - DM.init_DMR(&gd, &ucell); + Grid_Driver gd (0, 0); + DM.init_DMR (&gd, &ucell); // calculate this->_DMR - std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now(); - DM.cal_DMR(); - std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now(); + std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now (); + DM.cal_DMR (); + std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now (); std::chrono::duration elapsed_time - = std::chrono::duration_cast>(end_time - start_time); - std::cout << "my rank: " << my_rank << " elapsed time blas: " << elapsed_time.count() << std::endl; + = std::chrono::duration_cast> (end_time - start_time); + std::cout << "my rank: " << my_rank << " elapsed time blas: " << elapsed_time.count () << std::endl; // compare the result for spin-up - for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) - { - double* ptr1 = DM.get_DMR_pointer(1)->get_atom_pair(i).get_HR_values(1, 1, 1).get_pointer(); - // - for (int j = 0; j < DM.get_DMR_pointer(1)->get_atom_pair(i).get_size(); j++) + for (int i = 0; i < DM.get_DMR_pointer (1)->size_atom_pairs (); i++) { - // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; - EXPECT_NEAR(ptr1[j], -0.77, 1e-10); + double* ptr1 = DM.get_DMR_pointer (1)->get_atom_pair (i).get_HR_values (1, 1, 1).get_pointer (); + // + for (int j = 0; j < DM.get_DMR_pointer (1)->get_atom_pair (i).get_size (); j++) + { + // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << + // std::endl; + EXPECT_NEAR (ptr1[j], -0.77, 1e-10); + } } - } // compare the result for spin-down - for (int i = 0; i < DM.get_DMR_pointer(2)->size_atom_pairs(); i++) - { - double* ptr1 = DM.get_DMR_pointer(2)->get_atom_pair(i).get_HR_values(1, 1, 1).get_pointer(); - // - for (int j = 0; j < DM.get_DMR_pointer(2)->get_atom_pair(i).get_size(); j++) + for (int i = 0; i < DM.get_DMR_pointer (2)->size_atom_pairs (); i++) { - // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; - EXPECT_NEAR(ptr1[j], -0.77 * 2, 1e-10); + double* ptr1 = DM.get_DMR_pointer (2)->get_atom_pair (i).get_HR_values (1, 1, 1).get_pointer (); + // + for (int j = 0; j < DM.get_DMR_pointer (2)->get_atom_pair (i).get_size (); j++) + { + // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << + // std::endl; + EXPECT_NEAR (ptr1[j], -0.77 * 2, 1e-10); + } } - } // calculate DMR_total - DM.switch_dmr(1); + DM.switch_dmr (1); // compare the result for spin-up after sum - for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) - { - double* ptr1 = DM.get_DMR_pointer(1)->get_atom_pair(i).get_HR_values(1, 1, 1).get_pointer(); - // - for (int j = 0; j < DM.get_DMR_pointer(1)->get_atom_pair(i).get_size(); j++) + for (int i = 0; i < DM.get_DMR_pointer (1)->size_atom_pairs (); i++) { - //std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; - EXPECT_NEAR(ptr1[j], -0.77 * 3, 1e-10); + double* ptr1 = DM.get_DMR_pointer (1)->get_atom_pair (i).get_HR_values (1, 1, 1).get_pointer (); + // + for (int j = 0; j < DM.get_DMR_pointer (1)->get_atom_pair (i).get_size (); j++) + { + // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << + // std::endl; + EXPECT_NEAR (ptr1[j], -0.77 * 3, 1e-10); + } } - } - // restore to normal DMR - DM.switch_dmr(0); - for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) - { - double* ptr1 = DM.get_DMR_pointer(1)->get_atom_pair(i).get_HR_values(1, 1, 1).get_pointer(); - // - for (int j = 0; j < DM.get_DMR_pointer(1)->get_atom_pair(i).get_size(); j++) + // restore to normal DMR + DM.switch_dmr (0); + for (int i = 0; i < DM.get_DMR_pointer (1)->size_atom_pairs (); i++) { - //std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; - EXPECT_NEAR(ptr1[j], -0.77, 1e-10); + double* ptr1 = DM.get_DMR_pointer (1)->get_atom_pair (i).get_HR_values (1, 1, 1).get_pointer (); + // + for (int j = 0; j < DM.get_DMR_pointer (1)->get_atom_pair (i).get_size (); j++) + { + // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << + // std::endl; + EXPECT_NEAR (ptr1[j], -0.77, 1e-10); + } } - } // calculate DMR_differenct - DM.switch_dmr(2); - for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) - { - double* ptr1 = DM.get_DMR_pointer(1)->get_atom_pair(i).get_HR_values(1, 1, 1).get_pointer(); - // - for (int j = 0; j < DM.get_DMR_pointer(1)->get_atom_pair(i).get_size(); j++) + DM.switch_dmr (2); + for (int i = 0; i < DM.get_DMR_pointer (1)->size_atom_pairs (); i++) { - //std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; - EXPECT_NEAR(ptr1[j], 0.77, 1e-10); + double* ptr1 = DM.get_DMR_pointer (1)->get_atom_pair (i).get_HR_values (1, 1, 1).get_pointer (); + // + for (int j = 0; j < DM.get_DMR_pointer (1)->get_atom_pair (i).get_size (); j++) + { + // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << + // std::endl; + EXPECT_NEAR (ptr1[j], 0.77, 1e-10); + } } - } delete kv; } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } diff --git a/source/source_estate/module_dm/test/test_cal_dmk_psi.cpp b/source/source_estate/module_dm/test/test_cal_dmk_psi.cpp index 689f4f37920..f34cf2dab4f 100644 --- a/source/source_estate/module_dm/test/test_cal_dmk_psi.cpp +++ b/source/source_estate/module_dm/test/test_cal_dmk_psi.cpp @@ -26,12 +26,13 @@ class DMTest : public testing::Test int dsize; int my_rank = 0; UnitCell ucell; - void SetUp() override + void + SetUp () override { #ifdef __MPI // MPI parallel settings - MPI_Comm_size(MPI_COMM_WORLD, &dsize); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size (MPI_COMM_WORLD, &dsize); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif // set up a unitcell, with one element and test_size atoms, each atom has test_nw orbitals @@ -41,32 +42,33 @@ class DMTest : public testing::Test ucell.iat2it = new int[ucell.nat]; ucell.iat2ia = new int[ucell.nat]; ucell.atoms[0].tau = new ModuleBase::Vector3[ucell.nat]; - ucell.itia2iat.create(ucell.ntype, ucell.nat); + ucell.itia2iat.create (ucell.ntype, ucell.nat); for (int iat = 0; iat < ucell.nat; iat++) - { - ucell.iat2it[iat] = 0; - ucell.iat2ia[iat] = iat; - ucell.atoms[0].tau[iat] = ModuleBase::Vector3(0.0, 0.0, 0.0); - ucell.itia2iat(0, iat) = iat; - } + { + ucell.iat2it[iat] = 0; + ucell.iat2ia[iat] = iat; + ucell.atoms[0].tau[iat] = ModuleBase::Vector3 (0.0, 0.0, 0.0); + ucell.itia2iat (0, iat) = iat; + } ucell.atoms[0].na = test_size; ucell.atoms[0].nw = test_nw; ucell.atoms[0].iw2l = new int[test_nw]; ucell.atoms[0].iw2m = new int[test_nw]; ucell.atoms[0].iw2n = new int[test_nw]; for (int iw = 0; iw < test_nw; ++iw) - { - ucell.atoms[0].iw2l[iw] = 0; - ucell.atoms[0].iw2m[iw] = 0; - ucell.atoms[0].iw2n[iw] = 0; - } - ucell.set_iat2iwt(1); - init_parav(); + { + ucell.atoms[0].iw2l[iw] = 0; + ucell.atoms[0].iw2m[iw] = 0; + ucell.atoms[0].iw2n[iw] = 0; + } + ucell.set_iat2iwt (1); + init_parav (); // set paraV - init_parav(); + init_parav (); } - void TearDown() + void + TearDown () { delete paraV; delete[] ucell.atoms[0].tau; @@ -77,100 +79,103 @@ class DMTest : public testing::Test } #ifdef __MPI - void init_parav() + void + init_parav () { int nb = 2; int global_row = test_size * test_nw; int global_col = test_size * test_nw; std::ofstream ofs_running; - paraV = new Parallel_Orbitals(); - paraV->init(global_row, global_col, nb, MPI_COMM_WORLD); - paraV->set_atomic_trace(ucell.get_iat2iwt(), test_size, global_row); + paraV = new Parallel_Orbitals (); + paraV->init (global_row, global_col, nb, MPI_COMM_WORLD); + paraV->set_atomic_trace (ucell.get_iat2iwt (), test_size, global_row); } #else - void init_parav() + void + init_parav () { } #endif }; -TEST_F(DMTest, cal_dmk_psi_nspin1) +TEST_F (DMTest, cal_dmk_psi_nspin1) { // initalize a kvectors K_Vectors* kv = nullptr; int nks = 2; kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; // construct DM std::cout << "dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; std::cout << "nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; int nspin = 1; - elecstate::DensityMatrix DM(kv, paraV, nspin); + elecstate::DensityMatrix DM (kv, paraV, nspin); // compare - EXPECT_EQ(DM.get_DMK_nks(), kv->get_nks()); - EXPECT_EQ(DM.get_DMK_nrow(), paraV->nrow); - EXPECT_EQ(DM.get_DMK_ncol(), paraV->ncol); + EXPECT_EQ (DM.get_DMK_nks (), kv->get_nks ()); + EXPECT_EQ (DM.get_DMK_nrow (), paraV->nrow); + EXPECT_EQ (DM.get_DMK_ncol (), paraV->ncol); // set elements of DMK for (int is = 1; is <= nspin; is++) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - DM.set_DMK(is, ik, i, j, is + ik * i + j); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + DM.set_DMK (is, ik, i, j, is + ik * i + j); + } + } } - } } - } // compare for (int is = 1; is <= nspin; is++) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - EXPECT_EQ(DM.get_DMK(is, ik, i, j), is + ik * i + j); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + EXPECT_EQ (DM.get_DMK (is, ik, i, j), is + ik * i + j); + } + } } - } } - } // test for get_DMK_pointer for (int is = 1; is <= nspin; is++) - { - int ik_begin = (is - 1) * kv->get_nks() / nspin; - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - double* ptr = DM.get_DMK_pointer(ik + ik_begin); - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + int ik_begin = (is - 1) * kv->get_nks () / nspin; + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - // std::cout << ptr[i*paraV->ncol+j] << " "; - EXPECT_EQ(ptr[i * paraV->ncol + j], is + ik * i + j); + double* ptr = DM.get_DMK_pointer (ik + ik_begin); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + // std::cout << ptr[i*paraV->ncol+j] << " "; + EXPECT_EQ (ptr[i * paraV->ncol + j], is + ik * i + j); + } + } } - } } - } // delete kv delete kv; } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } diff --git a/source/source_estate/module_dm/test/test_dm_R_init.cpp b/source/source_estate/module_dm/test/test_dm_R_init.cpp index 13c43657afb..36e86fd6379 100644 --- a/source/source_estate/module_dm/test/test_dm_R_init.cpp +++ b/source/source_estate/module_dm/test/test_dm_R_init.cpp @@ -27,12 +27,13 @@ class DMTest : public testing::Test int dsize; int my_rank = 0; UnitCell ucell; - void SetUp() override + void + SetUp () override { #ifdef __MPI // MPI parallel settings - MPI_Comm_size(MPI_COMM_WORLD, &dsize); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size (MPI_COMM_WORLD, &dsize); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif // set up a unitcell, with one element and test_size atoms, each atom has test_nw orbitals @@ -41,273 +42,277 @@ class DMTest : public testing::Test ucell.atoms = new Atom[ucell.ntype]; ucell.iat2it = new int[ucell.nat]; ucell.iat2ia = new int[ucell.nat]; - ucell.atoms[0].tau.resize(ucell.nat); - ucell.itia2iat.create(ucell.ntype, ucell.nat); + ucell.atoms[0].tau.resize (ucell.nat); + ucell.itia2iat.create (ucell.ntype, ucell.nat); for (int iat = 0; iat < ucell.nat; iat++) - { - ucell.iat2it[iat] = 0; - ucell.iat2ia[iat] = iat; - ucell.atoms[0].tau[iat] = ModuleBase::Vector3(0.0, 0.0, 0.0); - ucell.itia2iat(0, iat) = iat; - } + { + ucell.iat2it[iat] = 0; + ucell.iat2ia[iat] = iat; + ucell.atoms[0].tau[iat] = ModuleBase::Vector3 (0.0, 0.0, 0.0); + ucell.itia2iat (0, iat) = iat; + } ucell.atoms[0].na = test_size; ucell.atoms[0].nw = test_nw; - ucell.atoms[0].iw2l.resize(test_nw); - ucell.atoms[0].iw2m.resize(test_nw); - ucell.atoms[0].iw2n.resize(test_nw); + ucell.atoms[0].iw2l.resize (test_nw); + ucell.atoms[0].iw2m.resize (test_nw); + ucell.atoms[0].iw2n.resize (test_nw); for (int iw = 0; iw < test_nw; ++iw) - { - ucell.atoms[0].iw2l[iw] = 0; - ucell.atoms[0].iw2m[iw] = 0; - ucell.atoms[0].iw2n[iw] = 0; - } - ucell.set_iat2iwt(1); - init_parav(); + { + ucell.atoms[0].iw2l[iw] = 0; + ucell.atoms[0].iw2m[iw] = 0; + ucell.atoms[0].iw2n[iw] = 0; + } + ucell.set_iat2iwt (1); + init_parav (); // set paraV - init_parav(); + init_parav (); } - void TearDown() override + void + TearDown () override { delete paraV; delete[] ucell.atoms; } #ifdef __MPI - void init_parav() + void + init_parav () { int nb = 2; int global_row = test_size * test_nw; int global_col = test_size * test_nw; std::ofstream ofs_running; - paraV = new Parallel_Orbitals(); - paraV->init(global_row, global_col, nb, MPI_COMM_WORLD); - paraV->set_atomic_trace(ucell.get_iat2iwt(), test_size, global_row); + paraV = new Parallel_Orbitals (); + paraV->init (global_row, global_col, nb, MPI_COMM_WORLD); + paraV->set_atomic_trace (ucell.get_iat2iwt (), test_size, global_row); } #else - void init_parav() + void + init_parav () { } #endif }; // test for construct DMR from GridD and UnitCell -TEST_F(DMTest, DMInit1) +TEST_F (DMTest, DMInit1) { // initalize a kvectors K_Vectors* kv = nullptr; int nspin = 1; int nks = 2; // since nspin = 1 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; // construct DM std::cout << "dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; std::cout << "nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - elecstate::DensityMatrix DM(paraV, nspin, kv->kvec_d, nks); + elecstate::DensityMatrix DM (paraV, nspin, kv->kvec_d, nks); // initialize this->_DMR - Grid_Driver gd(0,0); - DM.init_DMR(&gd, &ucell); + Grid_Driver gd (0, 0); + DM.init_DMR (&gd, &ucell); // compare - EXPECT_EQ(DM.get_DMR_pointer(1)->size_atom_pairs(), test_size * test_size); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_atom_i(), 2); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_atom_j(), 2); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_row_size(), paraV->get_row_size(2)); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_col_size(), paraV->get_col_size(2)); + EXPECT_EQ (DM.get_DMR_pointer (1)->size_atom_pairs (), test_size * test_size); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_atom_i (), 2); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_atom_j (), 2); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_row_size (), paraV->get_row_size (2)); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_col_size (), paraV->get_col_size (2)); delete kv; } // test for construct DMR from RA and UnitCell -TEST_F(DMTest, DMInit2) +TEST_F (DMTest, DMInit2) { // initalize a kvectors K_Vectors* kv = nullptr; int nspin = 1; int nks = 2; // since nspin = 1 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; // construct DM std::cout << "dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; std::cout << "nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - elecstate::DensityMatrix DM(paraV, nspin, kv->kvec_d, nks); + elecstate::DensityMatrix DM (paraV, nspin, kv->kvec_d, nks); // initialize Record_adj using Grid_Driver - Grid_Driver gd(0,0); + Grid_Driver gd (0, 0); Record_adj ra; ra.na_each = new int[ucell.nat]; ra.info = new int**[ucell.nat]; for (int iat1 = 0; iat1 < ucell.nat; iat1++) - { - auto tau1 = ucell.get_tau(iat1); - int T1, I1; - ucell.iat2iait(iat1, &I1, &T1); - AdjacentAtomInfo adjs; - gd.Find_atom(ucell, tau1, T1, I1, &adjs); - ra.na_each[iat1] = adjs.adj_num + 1; - ra.info[iat1] = new int*[ra.na_each[iat1]]; - for (int ad = 0; ad < ra.na_each[iat1]; ++ad) { - ra.info[iat1][ad] = new int[5]; - const int T2 = adjs.ntype[ad]; - const int I2 = adjs.natom[ad]; - ra.info[iat1][ad][3] = T2; - ra.info[iat1][ad][4] = I2; - ModuleBase::Vector3& R_index = adjs.box[ad]; - ra.info[iat1][ad][0] = R_index.x; - ra.info[iat1][ad][1] = R_index.y; - ra.info[iat1][ad][2] = R_index.z; - ra.info[iat1][ad][3] = T2; - ra.info[iat1][ad][4] = I2; + auto tau1 = ucell.get_tau (iat1); + int T1, I1; + ucell.iat2iait (iat1, &I1, &T1); + AdjacentAtomInfo adjs; + gd.Find_atom (ucell, tau1, T1, I1, &adjs); + ra.na_each[iat1] = adjs.adj_num + 1; + ra.info[iat1] = new int*[ra.na_each[iat1]]; + for (int ad = 0; ad < ra.na_each[iat1]; ++ad) + { + ra.info[iat1][ad] = new int[5]; + const int T2 = adjs.ntype[ad]; + const int I2 = adjs.natom[ad]; + ra.info[iat1][ad][3] = T2; + ra.info[iat1][ad][4] = I2; + ModuleBase::Vector3& R_index = adjs.box[ad]; + ra.info[iat1][ad][0] = R_index.x; + ra.info[iat1][ad][1] = R_index.y; + ra.info[iat1][ad][2] = R_index.z; + ra.info[iat1][ad][3] = T2; + ra.info[iat1][ad][4] = I2; + } } - } - DM.init_DMR(ra, &ucell); + DM.init_DMR (ra, &ucell); // compare - EXPECT_EQ(DM.get_DMR_pointer(1)->size_atom_pairs(), test_size * test_size); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_atom_i(), 2); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_atom_j(), 2); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_row_size(), paraV->get_row_size(2)); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_col_size(), paraV->get_col_size(2)); + EXPECT_EQ (DM.get_DMR_pointer (1)->size_atom_pairs (), test_size * test_size); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_atom_i (), 2); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_atom_j (), 2); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_row_size (), paraV->get_row_size (2)); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_col_size (), paraV->get_col_size (2)); // release memory delete kv; for (int iat1 = 0; iat1 < ucell.nat; iat1++) - { - for (int ad = 0; ad < ra.na_each[iat1]; ++ad) { - delete[] ra.info[iat1][ad]; + for (int ad = 0; ad < ra.na_each[iat1]; ++ad) + { + delete[] ra.info[iat1][ad]; + } + delete[] ra.info[iat1]; } - delete[] ra.info[iat1]; - } delete[] ra.info; } // test for construct DMR from another HContainer -TEST_F(DMTest, DMInit3) +TEST_F (DMTest, DMInit3) { // initalize a kvectors K_Vectors* kv = nullptr; int nspin = 2; int nks = 4; // since nspin = 2 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; kv->kvec_d[3].x = 0.5; // construct a DM - elecstate::DensityMatrix, double> DM(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); - Grid_Driver gd(0, 0); - DM.init_DMR(&gd, &ucell); + elecstate::DensityMatrix, double> DM (paraV, nspin, kv->kvec_d, kv->get_nks () / nspin); + Grid_Driver gd (0, 0); + DM.init_DMR (&gd, &ucell); std::cout << "dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; // construct another DM - elecstate::DensityMatrix, double> DM1(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); - DM1.init_DMR(*DM.get_DMR_pointer(1)); + elecstate::DensityMatrix, double> DM1 (paraV, nspin, kv->kvec_d, kv->get_nks () / nspin); + DM1.init_DMR (*DM.get_DMR_pointer (1)); // compare - EXPECT_EQ(DM1.get_DMR_pointer(2)->size_atom_pairs(), test_size * test_size); - EXPECT_EQ(DM1.get_DMR_pointer(2)->get_atom_pair(2, 2).get_atom_i(), 2); - EXPECT_EQ(DM1.get_DMR_pointer(1)->get_atom_pair(2, 2).get_atom_j(), 2); - EXPECT_EQ(DM1.get_DMR_pointer(1)->get_atom_pair(2, 2).get_row_size(), paraV->get_row_size(2)); - EXPECT_EQ(DM1.get_DMR_pointer(2)->get_atom_pair(2, 2).get_col_size(), paraV->get_col_size(2)); + EXPECT_EQ (DM1.get_DMR_pointer (2)->size_atom_pairs (), test_size * test_size); + EXPECT_EQ (DM1.get_DMR_pointer (2)->get_atom_pair (2, 2).get_atom_i (), 2); + EXPECT_EQ (DM1.get_DMR_pointer (1)->get_atom_pair (2, 2).get_atom_j (), 2); + EXPECT_EQ (DM1.get_DMR_pointer (1)->get_atom_pair (2, 2).get_row_size (), paraV->get_row_size (2)); + EXPECT_EQ (DM1.get_DMR_pointer (2)->get_atom_pair (2, 2).get_col_size (), paraV->get_col_size (2)); // delete kv; } // test for construct DMR from another HContainer > -TEST_F(DMTest, DMInit4) +TEST_F (DMTest, DMInit4) { // initalize a kvectors K_Vectors* kv = nullptr; int nspin = 2; int nks = 4; // since nspin = 2 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; kv->kvec_d[3].x = 0.5; // construct a new HContainer - Grid_Driver gd(0, 0); + Grid_Driver gd (0, 0); hamilt::HContainer>* tmp_DMR; - tmp_DMR = new hamilt::HContainer>(paraV); + tmp_DMR = new hamilt::HContainer> (paraV); // set up a HContainer for (int iat1 = 0; iat1 < ucell.nat; iat1++) - { - auto tau1 = ucell.get_tau(iat1); - int T1, I1; - ucell.iat2iait(iat1, &I1, &T1); - AdjacentAtomInfo adjs; - gd.Find_atom(ucell, tau1, T1, I1, &adjs); - // std::cout << "adjs.adj_num: " <get_row_size(iat1) <= 0 || paraV->get_col_size(iat2) <= 0) - { - continue; - } - ModuleBase::Vector3& R_index = adjs.box[ad]; - // std::cout << "R_index: " << R_index.x << " " << R_index.y << " " << R_index.z << std::endl; - hamilt::AtomPair> tmp_ap(iat1, iat2, R_index.x, R_index.y, R_index.z, paraV); - tmp_DMR->insert_pair(tmp_ap); + auto tau1 = ucell.get_tau (iat1); + int T1, I1; + ucell.iat2iait (iat1, &I1, &T1); + AdjacentAtomInfo adjs; + gd.Find_atom (ucell, tau1, T1, I1, &adjs); + // std::cout << "adjs.adj_num: " <get_row_size (iat1) <= 0 || paraV->get_col_size (iat2) <= 0) + { + continue; + } + ModuleBase::Vector3& R_index = adjs.box[ad]; + // std::cout << "R_index: " << R_index.x << " " << R_index.y << " " << R_index.z << std::endl; + hamilt::AtomPair> tmp_ap (iat1, iat2, R_index.x, R_index.y, R_index.z, paraV); + tmp_DMR->insert_pair (tmp_ap); + } } - } // construct a DM from this HContainer - elecstate::DensityMatrix, double> DM(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); - DM.init_DMR(*tmp_DMR); + elecstate::DensityMatrix, double> DM (paraV, nspin, kv->kvec_d, kv->get_nks () / nspin); + DM.init_DMR (*tmp_DMR); std::cout << "dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; // compare - EXPECT_EQ(DM.get_DMR_pointer(2)->size_atom_pairs(), test_size * test_size); - EXPECT_EQ(DM.get_DMR_pointer(2)->get_atom_pair(2, 2).get_atom_i(), 2); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_atom_j(), 2); - EXPECT_EQ(DM.get_DMR_pointer(1)->get_atom_pair(2, 2).get_row_size(), paraV->get_row_size(2)); - EXPECT_EQ(DM.get_DMR_pointer(2)->get_atom_pair(2, 2).get_col_size(), paraV->get_col_size(2)); + EXPECT_EQ (DM.get_DMR_pointer (2)->size_atom_pairs (), test_size * test_size); + EXPECT_EQ (DM.get_DMR_pointer (2)->get_atom_pair (2, 2).get_atom_i (), 2); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_atom_j (), 2); + EXPECT_EQ (DM.get_DMR_pointer (1)->get_atom_pair (2, 2).get_row_size (), paraV->get_row_size (2)); + EXPECT_EQ (DM.get_DMR_pointer (2)->get_atom_pair (2, 2).get_col_size (), paraV->get_col_size (2)); // delete kv; } // test for save_DMR -TEST_F(DMTest, saveDMR) +TEST_F (DMTest, saveDMR) { // initalize a kvectors K_Vectors* kv = nullptr; int nspin = 2; int nks = 4; // since nspin = 2 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; kv->kvec_d[3].x = 0.5; // construct a DM - elecstate::DensityMatrix, double> DM(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); - Grid_Driver gd(0, 0); - DM.init_DMR(&gd, &ucell); + elecstate::DensityMatrix, double> DM (paraV, nspin, kv->kvec_d, kv->get_nks () / nspin); + Grid_Driver gd (0, 0); + DM.init_DMR (&gd, &ucell); // construct another DM - elecstate::DensityMatrix, double> DM_test(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); - DM_test.init_DMR(*DM.get_DMR_pointer(1)); - DM_test.save_DMR(); - EXPECT_EQ(DM_test.get_DMR_pointer(1)->get_nnr(), DM.get_DMR_pointer(1)->get_nnr()); - EXPECT_EQ(DM_test.get_DMR_pointer(1)->get_nnr(), DM_test._DMR_save[0].size()); + elecstate::DensityMatrix, double> DM_test (paraV, nspin, kv->kvec_d, kv->get_nks () / nspin); + DM_test.init_DMR (*DM.get_DMR_pointer (1)); + DM_test.save_DMR (); + EXPECT_EQ (DM_test.get_DMR_pointer (1)->get_nnr (), DM.get_DMR_pointer (1)->get_nnr ()); + EXPECT_EQ (DM_test.get_DMR_pointer (1)->get_nnr (), DM_test._DMR_save[0].size ()); // add a new AtomPair, act as a relaxation - hamilt::AtomPair tmp_ap(9, 9, 1, 0, 0, paraV); - DM_test.get_DMR_pointer(1)->insert_pair(tmp_ap); - DM_test.get_DMR_pointer(1)->allocate(); + hamilt::AtomPair tmp_ap (9, 9, 1, 0, 0, paraV); + DM_test.get_DMR_pointer (1)->insert_pair (tmp_ap); + DM_test.get_DMR_pointer (1)->allocate (); // update DMR_save - DM_test.save_DMR(); - EXPECT_EQ(DM_test.get_DMR_pointer(1)->get_nnr(), DM_test._DMR_save[0].size()); - // delete - delete kv; + DM_test.save_DMR (); + EXPECT_EQ (DM_test.get_DMR_pointer (1)->get_nnr (), DM_test._DMR_save[0].size ()); + // delete + delete kv; } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } diff --git a/source/source_estate/module_dm/test/test_dm_constructor.cpp b/source/source_estate/module_dm/test/test_dm_constructor.cpp index f82abf8676b..92cbacb4c52 100644 --- a/source/source_estate/module_dm/test/test_dm_constructor.cpp +++ b/source/source_estate/module_dm/test/test_dm_constructor.cpp @@ -25,12 +25,13 @@ class DMTest : public testing::Test int dsize; int my_rank = 0; UnitCell ucell; - void SetUp() override + void + SetUp () override { #ifdef __MPI // MPI parallel settings - MPI_Comm_size(MPI_COMM_WORLD, &dsize); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size (MPI_COMM_WORLD, &dsize); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif // set up a unitcell, with one element and test_size atoms, each atom has test_nw orbitals @@ -39,213 +40,217 @@ class DMTest : public testing::Test ucell.atoms = new Atom[ucell.ntype]; ucell.iat2it = new int[ucell.nat]; ucell.iat2ia = new int[ucell.nat]; - ucell.atoms[0].tau.resize(ucell.nat); - ucell.itia2iat.create(ucell.ntype, ucell.nat); + ucell.atoms[0].tau.resize (ucell.nat); + ucell.itia2iat.create (ucell.ntype, ucell.nat); for (int iat = 0; iat < ucell.nat; iat++) - { - ucell.iat2it[iat] = 0; - ucell.iat2ia[iat] = iat; - ucell.atoms[0].tau[iat] = ModuleBase::Vector3(0.0, 0.0, 0.0); - ucell.itia2iat(0, iat) = iat; - } + { + ucell.iat2it[iat] = 0; + ucell.iat2ia[iat] = iat; + ucell.atoms[0].tau[iat] = ModuleBase::Vector3 (0.0, 0.0, 0.0); + ucell.itia2iat (0, iat) = iat; + } ucell.atoms[0].na = test_size; ucell.atoms[0].nw = test_nw; - ucell.atoms[0].iw2l.resize(test_nw); - ucell.atoms[0].iw2m.resize(test_nw); - ucell.atoms[0].iw2n.resize(test_nw); + ucell.atoms[0].iw2l.resize (test_nw); + ucell.atoms[0].iw2m.resize (test_nw); + ucell.atoms[0].iw2n.resize (test_nw); for (int iw = 0; iw < test_nw; ++iw) - { - ucell.atoms[0].iw2l[iw] = 0; - ucell.atoms[0].iw2m[iw] = 0; - ucell.atoms[0].iw2n[iw] = 0; - } - ucell.set_iat2iwt(1); - init_parav(); + { + ucell.atoms[0].iw2l[iw] = 0; + ucell.atoms[0].iw2m[iw] = 0; + ucell.atoms[0].iw2n[iw] = 0; + } + ucell.set_iat2iwt (1); + init_parav (); // set paraV - init_parav(); + init_parav (); } - void TearDown() + void + TearDown () { delete paraV; delete[] ucell.atoms; } #ifdef __MPI - void init_parav() + void + init_parav () { int nb = 2; int global_row = test_size * test_nw; int global_col = test_size * test_nw; std::ofstream ofs_running; - paraV = new Parallel_Orbitals(); - paraV->init(global_row, global_col, nb, MPI_COMM_WORLD); - paraV->set_atomic_trace(ucell.get_iat2iwt(), test_size, global_row); + paraV = new Parallel_Orbitals (); + paraV->init (global_row, global_col, nb, MPI_COMM_WORLD); + paraV->set_atomic_trace (ucell.get_iat2iwt (), test_size, global_row); } #else - void init_parav() + void + init_parav () { } #endif }; -TEST_F(DMTest, DMConstructor_GammaOnly) +TEST_F (DMTest, DMConstructor_GammaOnly) { // construct DM std::cout << "dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; std::cout << "nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; int nspin = 2; - elecstate::DensityMatrix DM(paraV, nspin); + elecstate::DensityMatrix DM (paraV, nspin); // compare - EXPECT_EQ(DM.get_DMK_size(), nspin); - EXPECT_EQ(DM.get_DMK_nrow(), paraV->nrow); - EXPECT_EQ(DM.get_DMK_ncol(), paraV->ncol); + EXPECT_EQ (DM.get_DMK_size (), nspin); + EXPECT_EQ (DM.get_DMK_nrow (), paraV->nrow); + EXPECT_EQ (DM.get_DMK_ncol (), paraV->ncol); } -TEST_F(DMTest, DMConstructor_nspin1) +TEST_F (DMTest, DMConstructor_nspin1) { // initalize a kvectors K_Vectors* kv = nullptr; int nks = 2; kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; // construct DM std::cout << "dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; std::cout << "nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; int nspin = 1; - elecstate::DensityMatrix DM(paraV, nspin, kv->kvec_d, nks); + elecstate::DensityMatrix DM (paraV, nspin, kv->kvec_d, nks); // compare - EXPECT_EQ(DM.get_DMK_nks(), kv->get_nks()); - EXPECT_EQ(DM.get_DMK_nrow(), paraV->nrow); - EXPECT_EQ(DM.get_DMK_ncol(), paraV->ncol); + EXPECT_EQ (DM.get_DMK_nks (), kv->get_nks ()); + EXPECT_EQ (DM.get_DMK_nrow (), paraV->nrow); + EXPECT_EQ (DM.get_DMK_ncol (), paraV->ncol); // set elements of DMK for (int is = 1; is <= nspin; is++) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - DM.set_DMK(is, ik, i, j, is + ik * i + j); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + DM.set_DMK (is, ik, i, j, is + ik * i + j); + } + } } - } } - } // compare for (int is = 1; is <= nspin; is++) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - EXPECT_EQ(DM.get_DMK(is, ik, i, j), is + ik * i + j); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + EXPECT_EQ (DM.get_DMK (is, ik, i, j), is + ik * i + j); + } + } } - } } - } // test for get_DMK_pointer for (int is = 1; is <= nspin; is++) - { - int ik_begin = (is - 1) * kv->get_nks() / nspin; - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - double* ptr = DM.get_DMK_pointer(ik + ik_begin); - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + int ik_begin = (is - 1) * kv->get_nks () / nspin; + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - // std::cout << ptr[i*paraV->ncol+j] << " "; - EXPECT_EQ(ptr[i * paraV->ncol + j], is + ik * i + j); + double* ptr = DM.get_DMK_pointer (ik + ik_begin); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + // std::cout << ptr[i*paraV->ncol+j] << " "; + EXPECT_EQ (ptr[i * paraV->ncol + j], is + ik * i + j); + } + } } - } } - } // delete kv delete kv; } -TEST_F(DMTest, DMConstructor_nspin2) +TEST_F (DMTest, DMConstructor_nspin2) { // initalize a kvectors K_Vectors* kv = nullptr; int nspin = 2; int nks = 4; // since nspin = 2 kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; kv->kvec_d[3].x = 0.5; // construct DM std::cout << "dim0: " << paraV->dim0 << " dim1:" << paraV->dim1 << std::endl; std::cout << "nrow: " << paraV->nrow << " ncol:" << paraV->ncol << std::endl; - elecstate::DensityMatrix DM(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); + elecstate::DensityMatrix DM (paraV, nspin, kv->kvec_d, kv->get_nks () / nspin); // compare - EXPECT_EQ(DM.get_DMK_nks(), kv->get_nks()); - EXPECT_EQ(DM.get_DMK_nrow(), paraV->nrow); - EXPECT_EQ(DM.get_DMK_ncol(), paraV->ncol); + EXPECT_EQ (DM.get_DMK_nks (), kv->get_nks ()); + EXPECT_EQ (DM.get_DMK_nrow (), paraV->nrow); + EXPECT_EQ (DM.get_DMK_ncol (), paraV->ncol); // set elements of DMK for (int is = 1; is <= nspin; is++) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - DM.set_DMK(is, ik, i, j, ik * i + j); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + DM.set_DMK (is, ik, i, j, ik * i + j); + } + } } - } } - } // compare - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) - { - for (int i = 0; i < paraV->nrow; i++) + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - for (int j = 0; j < paraV->ncol; j++) - { - EXPECT_EQ(DM.get_DMK(1, ik, i, j), ik * i + j); - EXPECT_EQ(DM.get_DMK(1, ik, i, j), DM.get_DMK(2, ik, i, j)); - } + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + EXPECT_EQ (DM.get_DMK (1, ik, i, j), ik * i + j); + EXPECT_EQ (DM.get_DMK (1, ik, i, j), DM.get_DMK (2, ik, i, j)); + } + } } - } // test for get_DMK_pointer for (int is = 1; is <= nspin; is++) - { - int ik_begin = (is - 1) * kv->get_nks() / nspin; - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) { - double* ptr = DM.get_DMK_pointer(ik + ik_begin); - for (int i = 0; i < paraV->nrow; i++) - { - for (int j = 0; j < paraV->ncol; j++) + int ik_begin = (is - 1) * kv->get_nks () / nspin; + for (int ik = 0; ik < kv->get_nks () / nspin; ik++) { - // std::cout << ptr[i*paraV->ncol+j] << " "; - EXPECT_EQ(ptr[i * paraV->ncol + j], ik * i + j); + double* ptr = DM.get_DMK_pointer (ik + ik_begin); + for (int i = 0; i < paraV->nrow; i++) + { + for (int j = 0; j < paraV->ncol; j++) + { + // std::cout << ptr[i*paraV->ncol+j] << " "; + EXPECT_EQ (ptr[i * paraV->ncol + j], ik * i + j); + } + } } - } } - } // delete kv delete kv; } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } diff --git a/source/source_estate/module_dm/test/test_dm_io.cpp b/source/source_estate/module_dm/test/test_dm_io.cpp index 380595429e1..c66475083cb 100644 --- a/source/source_estate/module_dm/test/test_dm_io.cpp +++ b/source/source_estate/module_dm/test/test_dm_io.cpp @@ -8,51 +8,37 @@ // mock functions #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -LCAO_Orbitals::LCAO_Orbitals() -{ -} -LCAO_Orbitals::~LCAO_Orbitals() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +LCAO_Orbitals::LCAO_Orbitals () {} +LCAO_Orbitals::~LCAO_Orbitals () {} #endif -Magnetism::Magnetism() +Magnetism::Magnetism () { this->tot_mag = 0.0; this->abs_mag = 0.0; this->start_mag = nullptr; } -Magnetism::~Magnetism() -{ - delete[] this->start_mag; -} +Magnetism::~Magnetism () { delete[] this->start_mag; } #include "source_cell/klist.h" #include "source_cell/module_neighbor/sltk_grid_driver.h" // mock find_atom() function -void Grid_Driver::Find_atom(const UnitCell& ucell, +void + Grid_Driver::Find_atom (const UnitCell& ucell, const ModuleBase::Vector3& tau, const int& T, const int& I, AdjacentAtomInfo* adjs) const { } -Grid::Grid(const int& test_grid_in) : test_grid(test_grid_in) -{ -} -Grid::~Grid() -{ -} -Grid_Driver::Grid_Driver(const int& test_d_in,const int& test_grid_in) - : Grid(test_grid_in), test_deconstructor(test_d_in){} -Grid_Driver::~Grid_Driver() +Grid::Grid (const int& test_grid_in) : test_grid (test_grid_in) {} +Grid::~Grid () {} +Grid_Driver::Grid_Driver (const int& test_d_in, const int& test_grid_in) + : Grid (test_grid_in), test_deconstructor (test_d_in) { } +Grid_Driver::~Grid_Driver () {} // mocke functions /************************************************ @@ -83,99 +69,104 @@ class DMTest : public testing::Test std::vector nw = {13}; int nks = 2; int nlocal = 0; - void SetUp() override + void + SetUp () override { #ifdef __MPI // MPI parallel settings - MPI_Comm_size(MPI_COMM_WORLD, &dsize); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size (MPI_COMM_WORLD, &dsize); + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif // initalize a unitcell - ucell = utp.SetUcellInfo(nw, nlocal); - ucell->set_iat2iwt(1); + ucell = utp.SetUcellInfo (nw, nlocal); + ucell->set_iat2iwt (1); // initalize a kvectors kv = new K_Vectors; - kv->set_nks(nks); - kv->kvec_d.resize(nks); + kv->set_nks (nks); + kv->kvec_d.resize (nks); kv->kvec_d[1].x = 0.5; // set paraV - init_parav(); + init_parav (); } - void TearDown() override + void + TearDown () override { - DMK.clear(); + DMK.clear (); delete kv; delete paraV; } #ifdef __MPI - void init_parav() + void + init_parav () { int nb = 2; int global_row = test_size * test_nw; int global_col = test_size * test_nw; std::ofstream ofs_running; - paraV = new Parallel_Orbitals(); - paraV->init(global_row, global_col, nb, MPI_COMM_WORLD); - paraV->set_atomic_trace(ucell->get_iat2iwt(), test_size, global_row); + paraV = new Parallel_Orbitals (); + paraV->init (global_row, global_col, nb, MPI_COMM_WORLD); + paraV->set_atomic_trace (ucell->get_iat2iwt (), test_size, global_row); } #else - void init_parav() + void + init_parav () { } #endif }; -TEST_F(DMTest, DMConstructor1) +TEST_F (DMTest, DMConstructor1) { // int nspin = 1; // construct DM std::cout << paraV->nrow << paraV->ncol << std::endl; - elecstate::DensityMatrix DM(paraV, nspin, kv->kvec_d, kv->get_nks()); + elecstate::DensityMatrix DM (paraV, nspin, kv->kvec_d, kv->get_nks ()); // read DMK std::string directory = "./support/"; for (int is = 1; is <= nspin; ++is) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ++ik) { - DM.read_DMK(directory, is, ik); + for (int ik = 0; ik < kv->get_nks () / nspin; ++ik) + { + DM.read_DMK (directory, is, ik); + } } - } // write DMK directory = "./support/output"; for (int is = 1; is <= nspin; ++is) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ++ik) { - DM.write_DMK(directory, is, ik); + for (int ik = 0; ik < kv->get_nks () / nspin; ++ik) + { + DM.write_DMK (directory, is, ik); + } } - } // construct a new DM - elecstate::DensityMatrix DM1(paraV, nspin, kv->kvec_d, kv->get_nks()); + elecstate::DensityMatrix DM1 (paraV, nspin, kv->kvec_d, kv->get_nks ()); directory = "./support/output"; for (int is = 1; is <= nspin; ++is) - { - for (int ik = 0; ik < kv->get_nks() / nspin; ++ik) { - DM1.read_DMK(directory, is, ik); + for (int ik = 0; ik < kv->get_nks () / nspin; ++ik) + { + DM1.read_DMK (directory, is, ik); + } } - } // compare DMK1 with DMK - EXPECT_NEAR(DM.get_DMK(1, 0, 0, 0), DM1.get_DMK(1, 0, 0, 0), 1e-6); - EXPECT_NEAR(DM.get_DMK(1, 1, 25, 25), DM1.get_DMK(1, 1, 25, 25), 1e-6); + EXPECT_NEAR (DM.get_DMK (1, 0, 0, 0), DM1.get_DMK (1, 0, 0, 0), 1e-6); + EXPECT_NEAR (DM.get_DMK (1, 1, 25, 25), DM1.get_DMK (1, 1, 25, 25), 1e-6); } -int main(int argc, char** argv) +int + main (int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init (&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize (); #endif return result; } diff --git a/source/source_estate/module_dm/test/tmp_mocks.cpp b/source/source_estate/module_dm/test/tmp_mocks.cpp index dcf803ca5f1..9f7682ca665 100644 --- a/source/source_estate/module_dm/test/tmp_mocks.cpp +++ b/source/source_estate/module_dm/test/tmp_mocks.cpp @@ -3,118 +3,84 @@ #include "source_cell/unitcell.h" // constructor of Atom -Atom::Atom() -{ -} -Atom::~Atom() -{ -} +Atom::Atom () {} +Atom::~Atom () {} -Atom_pseudo::Atom_pseudo() -{ -} -Atom_pseudo::~Atom_pseudo() -{ -} +Atom_pseudo::Atom_pseudo () {} +Atom_pseudo::~Atom_pseudo () {} -Magnetism::Magnetism() -{ -} -Magnetism::~Magnetism() -{ -} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -LCAO_Orbitals::LCAO_Orbitals() -{ -} -LCAO_Orbitals::~LCAO_Orbitals() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +LCAO_Orbitals::LCAO_Orbitals () {} +LCAO_Orbitals::~LCAO_Orbitals () {} #endif -pseudo::pseudo() -{ -} -pseudo::~pseudo() -{ -} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} +pseudo::pseudo () {} +pseudo::~pseudo () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} // constructor of UnitCell -UnitCell::UnitCell() -{ -} -UnitCell::~UnitCell() -{ -} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} -void UnitCell::set_iat2iwt(const int& npol_in) +void + UnitCell::set_iat2iwt (const int& npol_in) { - this->iat2iwt.resize(this->nat); + this->iat2iwt.resize (this->nat); this->npol = npol_in; int iat = 0; int iwt = 0; for (int it = 0; it < this->ntype; it++) - { - for (int ia = 0; ia < atoms[it].na; ia++) { - this->iat2iwt[iat] = iwt; - iwt += atoms[it].nw * this->npol; - ++iat; + for (int ia = 0; ia < atoms[it].na; ia++) + { + this->iat2iwt[iat] = iwt; + iwt += atoms[it].nw * this->npol; + ++iat; + } } - } return; } #include "source_cell/module_neighbor/sltk_grid_driver.h" // mock find_atom() function -void Grid_Driver::Find_atom(const UnitCell& ucell, +void + Grid_Driver::Find_atom (const UnitCell& ucell, const ModuleBase::Vector3& tau, const int& T, const int& I, AdjacentAtomInfo* adjs) const { adjs->adj_num = ucell.nat - 1; - adjs->adjacent_tau.resize(ucell.nat); - adjs->ntype.resize(ucell.nat, 0); - adjs->natom.resize(ucell.nat); - adjs->box.resize(ucell.nat); + adjs->adjacent_tau.resize (ucell.nat); + adjs->ntype.resize (ucell.nat, 0); + adjs->natom.resize (ucell.nat); + adjs->box.resize (ucell.nat); for (int iat = 0; iat < ucell.nat; iat++) - { - adjs->natom[iat] = iat; - adjs->box[iat].x = 1; - adjs->box[iat].y = 1; - adjs->box[iat].z = 1; - adjs->adjacent_tau[iat] = ucell.get_tau(iat); - } -} -Grid::Grid(const int& test_grid_in) : test_grid(test_grid_in) -{ -} -Grid::~Grid() -{ + { + adjs->natom[iat] = iat; + adjs->box[iat].x = 1; + adjs->box[iat].y = 1; + adjs->box[iat].z = 1; + adjs->adjacent_tau[iat] = ucell.get_tau (iat); + } } -Grid_Driver::Grid_Driver(const int& test_d_in, const int& test_grid_in) - : Grid(test_grid_in), test_deconstructor(test_d_in){} -Grid_Driver::~Grid_Driver() +Grid::Grid (const int& test_grid_in) : test_grid (test_grid_in) {} +Grid::~Grid () {} +Grid_Driver::Grid_Driver (const int& test_d_in, const int& test_grid_in) + : Grid (test_grid_in), test_deconstructor (test_d_in) { } +Grid_Driver::~Grid_Driver () {} // mock Record_adj #include "source_lcao/record_adj.h" -Record_adj::Record_adj() -{ -} -Record_adj::~Record_adj() -{ -} +Record_adj::Record_adj () {} +Record_adj::~Record_adj () {} diff --git a/source/source_estate/module_pot/H_Hartree_pw.cpp b/source/source_estate/module_pot/H_Hartree_pw.cpp index 4c304ad19a9..82dacb27e75 100644 --- a/source/source_estate/module_pot/H_Hartree_pw.cpp +++ b/source/source_estate/module_pot/H_Hartree_pw.cpp @@ -13,29 +13,32 @@ double H_Hartree_pw::hartree_energy = 0.0; //-------------------------------------------------------------------- // Transform charge density to hartree potential. //-------------------------------------------------------------------- -ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell, - ModulePW::PW_Basis *rho_basis, - const int &nspin, - const double *const *const rho) +ModuleBase::matrix + H_Hartree_pw::v_hartree (const UnitCell& cell, + ModulePW::PW_Basis* rho_basis, + const int& nspin, + const double* const* const rho) { - ModuleBase::TITLE("H_Hartree_pw", "v_hartree"); - ModuleBase::timer::start("H_Hartree_pw", "v_hartree"); + ModuleBase::TITLE ("H_Hartree_pw", "v_hartree"); + ModuleBase::timer::start ("H_Hartree_pw", "v_hartree"); // Hartree potential VH(r) from n(r) - std::vector> Porter(rho_basis->nmaxgr); + std::vector> Porter (rho_basis->nmaxgr); const int nspin0 = (nspin == 2) ? 2 : 1; for (int is = 0; is < nspin0; is++) - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 256) #endif - for (int ir = 0; ir < rho_basis->nrxx; ir++) - Porter[ir] += std::complex(rho[is][ir], 0.0); - } + for (int ir = 0; ir < rho_basis->nrxx; ir++) + { + Porter[ir] += std::complex (rho[is][ir], 0.0); + } + } //============================= // bring rho (aux) to G space //============================= - rho_basis->real2recip(Porter.data(), Porter.data()); + rho_basis->real2recip (Porter.data (), Porter.data ()); //======================================================= // calculate hartree potential in G-space (NB: V(G=0)=0 ) @@ -43,24 +46,23 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell, double ehart = 0.0; - std::vector> vh_g(rho_basis->npw); + std::vector> vh_g (rho_basis->npw); const int ig0 = rho_basis->ig_gge0; #ifdef _OPENMP -#pragma omp parallel for reduction(+:ehart) +#pragma omp parallel for reduction(+ : ehart) #endif for (int ig = 0; ig < rho_basis->npw; ig++) - { - if (ig == ig0) { - continue; // skip G=0 + if (ig == ig0) + { + continue; // skip G=0 + } + const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]); + ehart += (conj (Porter[ig]) * Porter[ig]).real () * fac; + vh_g[ig] = fac * Porter[ig]; } - const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]); - ehart += (conj(Porter[ig]) * Porter[ig]).real() * fac; - vh_g[ig] = fac * Porter[ig]; - - } - Parallel_Reduce::reduce_pool(ehart); + Parallel_Reduce::reduce_pool (ehart); ehart *= 0.5 * cell.omega; // std::cout << " ehart=" << ehart << std::endl; H_Hartree_pw::hartree_energy = ehart; @@ -68,44 +70,51 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell, //========================================== // transform hartree potential to real space //========================================== - rho_basis->recip2real(vh_g.data(), Porter.data()); + rho_basis->recip2real (vh_g.data (), Porter.data ()); //========================================== // Add hartree potential to the xc potential //========================================== - ModuleBase::matrix v(nspin, rho_basis->nrxx); + ModuleBase::matrix v (nspin, rho_basis->nrxx); if (nspin == 4) - { + { #ifdef _OPENMP #pragma omp parallel for schedule(static, 512) #endif - for (int ir = 0; ir < rho_basis->nrxx; ir++) - v(0, ir) = Porter[ir].real(); - } + for (int ir = 0; ir < rho_basis->nrxx; ir++) + { + v (0, ir) = Porter[ir].real (); + } + } else - { + { #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 512) #endif - for (int is = 0; is < nspin; is++) - for (int ir = 0; ir < rho_basis->nrxx; ir++) - v(is, ir) = Porter[ir].real(); - } + for (int is = 0; is < nspin; is++) + { + for (int ir = 0; ir < rho_basis->nrxx; ir++) + { + v (is, ir) = Porter[ir].real (); + } + } + } - ModuleBase::timer::end("H_Hartree_pw", "v_hartree"); + ModuleBase::timer::end ("H_Hartree_pw", "v_hartree"); return v; } // end subroutine v_h -PotHartree::PotHartree(const ModulePW::PW_Basis* rho_basis_in) +PotHartree::PotHartree (const ModulePW::PW_Basis* rho_basis_in) { this->rho_basis_ = rho_basis_in; this->dynamic_mode = true; this->fixed_mode = false; } -void PotHartree::cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff) +void + PotHartree::cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) { - v_eff += H_Hartree_pw::v_hartree(*ucell, const_cast(this->rho_basis_), v_eff.nr, chg->rho); + v_eff += H_Hartree_pw::v_hartree (*ucell, const_cast (this->rho_basis_), v_eff.nr, chg->rho); return; } diff --git a/source/source_estate/module_pot/H_Hartree_pw.h b/source/source_estate/module_pot/H_Hartree_pw.h index c5ca6bdd0b5..869cff25212 100644 --- a/source/source_estate/module_pot/H_Hartree_pw.h +++ b/source/source_estate/module_pot/H_Hartree_pw.h @@ -12,37 +12,37 @@ namespace elecstate class H_Hartree_pw { public: - H_Hartree_pw(); - ~H_Hartree_pw(); + H_Hartree_pw (); + ~H_Hartree_pw (); // the Hartree energy static double hartree_energy; // compute the Hartree energy - static ModuleBase::matrix v_hartree(const UnitCell &cell, - ModulePW::PW_Basis *rho_basis, - const int &nspin, - const double *const *const rho); + static ModuleBase::matrix v_hartree (const UnitCell& cell, + ModulePW::PW_Basis* rho_basis, + const int& nspin, + const double* const* const rho); - static int get_Z(std::string str); + static int get_Z (std::string str); - static void cast_C2R(std::complex *src, double *dst, int dim); + static void cast_C2R (std::complex* src, double* dst, int dim); - static void lapl_rho(const std::complex *rhog, double *lapn); + static void lapl_rho (const std::complex* rhog, double* lapn); - static void shape_gradn(const std::complex *PS_TOTN, ModulePW::PW_Basis *rho_basis, double *eprime); + static void shape_gradn (const std::complex* PS_TOTN, ModulePW::PW_Basis* rho_basis, double* eprime); - static void eps_pot(const std::complex* PS_TOTN, - const std::complex* phi, - const ModulePW::PW_Basis* rho_basis, - double* d_eps, - double* vwork); + static void eps_pot (const std::complex* PS_TOTN, + const std::complex* phi, + const ModulePW::PW_Basis* rho_basis, + double* d_eps, + double* vwork); - static void test_res(const UnitCell &ucell, - ModulePW::PW_Basis *rho_basis, - const std::complex *tot_N, - std::complex *phi, - double *d_eps); + static void test_res (const UnitCell& ucell, + ModulePW::PW_Basis* rho_basis, + const std::complex* tot_N, + std::complex* phi, + double* d_eps); private: }; @@ -56,9 +56,9 @@ namespace elecstate class PotHartree : public PotBase { public: - PotHartree(const ModulePW::PW_Basis* rho_basis_in); + PotHartree (const ModulePW::PW_Basis* rho_basis_in); - void cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff); + void cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff); }; } // namespace elecstate diff --git a/source/source_estate/module_pot/H_TDDFT_pw.cpp b/source/source_estate/module_pot/H_TDDFT_pw.cpp index a71b2c6654d..bb423da44e2 100644 --- a/source/source_estate/module_pot/H_TDDFT_pw.cpp +++ b/source/source_estate/module_pot/H_TDDFT_pw.cpp @@ -78,33 +78,35 @@ int H_TDDFT_pw::heavi_count; std::vector H_TDDFT_pw::heavi_t0; std::vector H_TDDFT_pw::heavi_amp; // Ry/bohr -void H_TDDFT_pw::current_step_info(const std::string& file_dir, int& istep) +void + H_TDDFT_pw::current_step_info (const std::string& file_dir, int& istep) { std::stringstream ssc; ssc << file_dir << "Restart_td.txt"; - std::ifstream file(ssc.str().c_str()); + std::ifstream file (ssc.str ().c_str ()); if (!file) - { - ModuleBase::WARNING_QUIT("H_TDDFT_pw::current_step_info", "No Restart_td.txt!"); - } + { + ModuleBase::WARNING_QUIT ("H_TDDFT_pw::current_step_info", "No Restart_td.txt!"); + } file >> istep; file >> At[0] >> At[1] >> At[2]; file >> At_laststep[0] >> At_laststep[1] >> At_laststep[2]; At_laststep = -At_laststep; - file.close(); + file.close (); } -void H_TDDFT_pw::cal_fixed_v(double* vl_pseudo) +void + H_TDDFT_pw::cal_fixed_v (double* vl_pseudo) { - ModuleBase::TITLE("H_TDDFT_pw", "cal_fixed_v"); + ModuleBase::TITLE ("H_TDDFT_pw", "cal_fixed_v"); // skip if not length gauge if (stype != 0) - { - return; - } + { + return; + } // time evolve H_TDDFT_pw::istep++; @@ -112,11 +114,11 @@ void H_TDDFT_pw::cal_fixed_v(double* vl_pseudo) // judgement to skip vext if (!PARAM.inp.td_vext || istep > tend || istep < tstart) - { - return; - } + { + return; + } - ModuleBase::timer::start("H_TDDFT_pw", "cal_fixed_v"); + ModuleBase::timer::start ("H_TDDFT_pw", "cal_fixed_v"); int count = 0; gauss_count = 0; @@ -127,159 +129,164 @@ void H_TDDFT_pw::cal_fixed_v(double* vl_pseudo) global_vext_time = {0.0, 0.0, 0.0}; for (auto direc: PARAM.inp.td_vext_dire) - { - std::vector vext_space(this->rho_basis_->nrxx, 0.0); - double vext_time = cal_v_time(ttype[count], true); - - global_vext_time[direc - 1] += vext_time; - - if (PARAM.inp.out_efield && GlobalV::MY_RANK == 0) - { - std::stringstream as; - as << PARAM.globalv.global_out_dir << "efield_" << count << ".txt"; - std::ofstream ofs(as.str().c_str(), std::ofstream::app); - ofs << H_TDDFT_pw::istep * dt * ModuleBase::AU_to_FS << "\t" - << vext_time * ModuleBase::Ry_to_eV / ModuleBase::BOHR_TO_A << std::endl; - ofs.close(); - } - - cal_v_space(vext_space, direc); - for (size_t ir = 0; ir < this->rho_basis_->nrxx; ++ir) { - vl_pseudo[ir] += vext_space[ir] * vext_time; + std::vector vext_space (this->rho_basis_->nrxx, 0.0); + double vext_time = cal_v_time (ttype[count], true); + + global_vext_time[direc - 1] += vext_time; + + if (PARAM.inp.out_efield && GlobalV::MY_RANK == 0) + { + std::stringstream as; + as << PARAM.globalv.global_out_dir << "efield_" << count << ".txt"; + std::ofstream ofs (as.str ().c_str (), std::ofstream::app); + ofs << H_TDDFT_pw::istep * dt * ModuleBase::AU_to_FS << "\t" + << vext_time * ModuleBase::Ry_to_eV / ModuleBase::BOHR_TO_A << std::endl; + ofs.close (); + } + + cal_v_space (vext_space, direc); + for (size_t ir = 0; ir < this->rho_basis_->nrxx; ++ir) + { + vl_pseudo[ir] += vext_space[ir] * vext_time; + } + count++; } - count++; - } - ModuleBase::timer::end("H_TDDFT_pw", "cal_fixed_v"); + ModuleBase::timer::end ("H_TDDFT_pw", "cal_fixed_v"); return; } -void H_TDDFT_pw::cal_v_space(std::vector& vext_space, int direc) +void + H_TDDFT_pw::cal_v_space (std::vector& vext_space, int direc) { - ModuleBase::TITLE("H_TDDFT_pw", "cal_v_space"); - ModuleBase::timer::start("H_TDDFT_pw", "cal_v_space"); + ModuleBase::TITLE ("H_TDDFT_pw", "cal_v_space"); + ModuleBase::timer::start ("H_TDDFT_pw", "cal_v_space"); switch (stype) - { - case 0: - cal_v_space_length(vext_space, direc); - break; - default: - std::cout << "space_domain_type of electric field is wrong" << std::endl; - break; - } - - ModuleBase::timer::end("H_TDDFT_pw", "cal_v_space"); + { + case 0: + cal_v_space_length (vext_space, direc); + break; + default: + std::cout << "space_domain_type of electric field is wrong" << std::endl; + break; + } + + ModuleBase::timer::end ("H_TDDFT_pw", "cal_v_space"); return; } -void H_TDDFT_pw::cal_v_space_length(std::vector& vext_space, int direc) +void + H_TDDFT_pw::cal_v_space_length (std::vector& vext_space, int direc) { - ModuleBase::TITLE("H_TDDFT_pw", "cal_v_space_length"); - ModuleBase::timer::start("H_TDDFT_pw", "cal_v_space_length"); + ModuleBase::TITLE ("H_TDDFT_pw", "cal_v_space_length"); + ModuleBase::timer::start ("H_TDDFT_pw", "cal_v_space_length"); for (int ir = 0; ir < this->rho_basis_->nrxx; ++ir) - { - int i = ir / (this->rho_basis_->ny * this->rho_basis_->nplane); - int j = ir / this->rho_basis_->nplane - i * this->rho_basis_->ny; - int k = ir % this->rho_basis_->nplane + this->rho_basis_->startz_current; - double x = (double)i / this->rho_basis_->nx; - double y = (double)j / this->rho_basis_->ny; - double z = (double)k / this->rho_basis_->nz; - - switch (direc) { - case 1: - vext_space[ir] = cal_v_space_length_potential(x) * this->ucell_->latvec.e11 - + cal_v_space_length_potential(y) * this->ucell_->latvec.e21 - + cal_v_space_length_potential(z) * this->ucell_->latvec.e31; - break; - - case 2: - vext_space[ir] = cal_v_space_length_potential(x) * this->ucell_->latvec.e12 - + cal_v_space_length_potential(y) * this->ucell_->latvec.e22 - + cal_v_space_length_potential(z) * this->ucell_->latvec.e32; - break; - - case 3: - vext_space[ir] = cal_v_space_length_potential(x) * this->ucell_->latvec.e13 - + cal_v_space_length_potential(y) * this->ucell_->latvec.e23 - + cal_v_space_length_potential(z) * this->ucell_->latvec.e33; - break; - - default: - std::cout << "direction of electric field is wrong" << std::endl; - break; + int i = ir / (this->rho_basis_->ny * this->rho_basis_->nplane); + int j = ir / this->rho_basis_->nplane - i * this->rho_basis_->ny; + int k = ir % this->rho_basis_->nplane + this->rho_basis_->startz_current; + double x = (double)i / this->rho_basis_->nx; + double y = (double)j / this->rho_basis_->ny; + double z = (double)k / this->rho_basis_->nz; + + switch (direc) + { + case 1: + vext_space[ir] = cal_v_space_length_potential (x) * this->ucell_->latvec.e11 + + cal_v_space_length_potential (y) * this->ucell_->latvec.e21 + + cal_v_space_length_potential (z) * this->ucell_->latvec.e31; + break; + + case 2: + vext_space[ir] = cal_v_space_length_potential (x) * this->ucell_->latvec.e12 + + cal_v_space_length_potential (y) * this->ucell_->latvec.e22 + + cal_v_space_length_potential (z) * this->ucell_->latvec.e32; + break; + + case 3: + vext_space[ir] = cal_v_space_length_potential (x) * this->ucell_->latvec.e13 + + cal_v_space_length_potential (y) * this->ucell_->latvec.e23 + + cal_v_space_length_potential (z) * this->ucell_->latvec.e33; + break; + + default: + std::cout << "direction of electric field is wrong" << std::endl; + break; + } } - } - ModuleBase::timer::end("H_TDDFT_pw", "cal_v_space_length"); + ModuleBase::timer::end ("H_TDDFT_pw", "cal_v_space_length"); return; } -double H_TDDFT_pw::cal_v_space_length_potential(double i) +double + H_TDDFT_pw::cal_v_space_length_potential (double i) { double vext_space = 0.0; if (i < lcut1) - { - vext_space = -((i - lcut1) * (lcut2 - lcut1) / (lcut1 + 1.0 - lcut2) - lcut1) * this->ucell_->lat0; - } + { + vext_space = -((i - lcut1) * (lcut2 - lcut1) / (lcut1 + 1.0 - lcut2) - lcut1) * this->ucell_->lat0; + } else if (i >= lcut1 && i < lcut2) - { - vext_space = i * this->ucell_->lat0; - } + { + vext_space = i * this->ucell_->lat0; + } else if (i >= lcut2) - { - vext_space = -((i - lcut2) * (lcut2 - lcut1) / (lcut1 + 1.0 - lcut2) - lcut2) * this->ucell_->lat0; - } + { + vext_space = -((i - lcut2) * (lcut2 - lcut1) / (lcut1 + 1.0 - lcut2) - lcut2) * this->ucell_->lat0; + } return vext_space; } -int H_TDDFT_pw::check_ncut(int t_type) +int + H_TDDFT_pw::check_ncut (int t_type) { int ncut = 0; switch (t_type) - { - case 0: - ncut = *(gauss_ncut.begin() + gauss_count); - break; - - case 1: - ncut = *(trape_ncut.begin() + trape_count); - break; - - case 2: - ncut = *(trigo_ncut.begin() + trigo_count); - break; - - case 3: - ncut = 2; - break; - - default: - std::cout << "time_domain_type of electric field is wrong" << std::endl; - break; - } + { + case 0: + ncut = *(gauss_ncut.begin () + gauss_count); + break; + + case 1: + ncut = *(trape_ncut.begin () + trape_count); + break; + + case 2: + ncut = *(trigo_ncut.begin () + trigo_count); + break; + + case 3: + ncut = 2; + break; + + default: + std::cout << "time_domain_type of electric field is wrong" << std::endl; + break; + } return ncut; } -void H_TDDFT_pw::update_At() +void + H_TDDFT_pw::update_At () { // time evolve H_TDDFT_pw::istep++; // midpoint rule should be used both in Hamiltonian and here. At = At + At_laststep / 2.0; - At_laststep.set(0.0, 0.0, 0.0); - Et.set(0.0, 0.0, 0.0); + At_laststep.set (0.0, 0.0, 0.0); + Et.set (0.0, 0.0, 0.0); // judgement to skip vext if (!PARAM.inp.td_vext || istep > tend || istep < tstart) - { - return; - } + { + return; + } - ModuleBase::timer::start("H_TDDFT_pw", "update_At"); + ModuleBase::timer::start ("H_TDDFT_pw", "update_At"); int count = 0; gauss_count = 0; @@ -292,200 +299,207 @@ void H_TDDFT_pw::update_At() double out = 0.0; for (auto direc: PARAM.inp.td_vext_dire) - { - last = false; - // cut the integral space and initialize relevant parameters - ncut = check_ncut(ttype[count]); - istep_int = istep * ncut; - dt_int = dt / double(ncut); - - // store vext_time for each time point, include the first and last point - std::vector vext_time(ncut + 1, 0.0); // Use std::vector to manage memory - for (int i = 0; i <= ncut; i++) { - // if this is the last point, type_count++ - if (i == ncut) - { - last = true; - } - vext_time[i] = cal_v_time(ttype[count], last); - istep_int++; - } - // Call the Simpson's rule integration using std::vector data - ModuleBase::Integral::Simpson_Integral(ncut + 1, vext_time.data(), dt_int, out); - - // update At value for its direction - switch (stype) - { - case 1: - At_laststep[direc - 1] -= out; - break; - case 2: - At_laststep[direc - 1] -= out; - Et[direc - 1] += vext_time[0]; - break; - default: - std::cout << "space_domain_type of electric field is wrong" << std::endl; - break; + last = false; + // cut the integral space and initialize relevant parameters + ncut = check_ncut (ttype[count]); + istep_int = istep * ncut; + dt_int = dt / double (ncut); + + // store vext_time for each time point, include the first and last point + std::vector vext_time (ncut + 1, 0.0); // Use std::vector to manage memory + for (int i = 0; i <= ncut; i++) + { + // if this is the last point, type_count++ + if (i == ncut) + { + last = true; + } + vext_time[i] = cal_v_time (ttype[count], last); + istep_int++; + } + // Call the Simpson's rule integration using std::vector data + ModuleBase::Integral::Simpson_Integral (ncut + 1, vext_time.data (), dt_int, out); + + // update At value for its direction + switch (stype) + { + case 1: + At_laststep[direc - 1] -= out; + break; + case 2: + At_laststep[direc - 1] -= out; + Et[direc - 1] += vext_time[0]; + break; + default: + std::cout << "space_domain_type of electric field is wrong" << std::endl; + break; + } + + // output Efield + if (PARAM.inp.out_efield && GlobalV::MY_RANK == 0) + { + std::stringstream as; + as << PARAM.globalv.global_out_dir << "efield_" << count << ".txt"; + std::ofstream ofs (as.str ().c_str (), std::ofstream::app); + ofs << H_TDDFT_pw::istep * dt * ModuleBase::AU_to_FS << "\t" + << vext_time[0] * ModuleBase::Ry_to_eV / ModuleBase::BOHR_TO_A << std::endl; + ofs.close (); + } + // total count++ + count++; } - - // output Efield - if (PARAM.inp.out_efield && GlobalV::MY_RANK == 0) - { - std::stringstream as; - as << PARAM.globalv.global_out_dir << "efield_" << count << ".txt"; - std::ofstream ofs(as.str().c_str(), std::ofstream::app); - ofs << H_TDDFT_pw::istep * dt * ModuleBase::AU_to_FS << "\t" - << vext_time[0] * ModuleBase::Ry_to_eV / ModuleBase::BOHR_TO_A << std::endl; - ofs.close(); - } - // total count++ - count++; - } At = At + At_laststep / 2.0; - ModuleBase::timer::end("H_TDDFT_pw", "update_At"); + ModuleBase::timer::end ("H_TDDFT_pw", "update_At"); return; } -double H_TDDFT_pw::cal_v_time(int t_type, const bool last) +double + H_TDDFT_pw::cal_v_time (int t_type, const bool last) { double vext_time = 0.0; switch (t_type) - { - case 0: - vext_time = cal_v_time_Gauss(last); - break; - - case 1: - vext_time = cal_v_time_trapezoid(last); - break; - - case 2: - vext_time = cal_v_time_trigonometric(last); - break; - - case 3: - vext_time = cal_v_time_heaviside(last); - break; - - default: - std::cout << "time_domain_type of electric field is wrong" << std::endl; - break; - } + { + case 0: + vext_time = cal_v_time_Gauss (last); + break; + + case 1: + vext_time = cal_v_time_trapezoid (last); + break; + + case 2: + vext_time = cal_v_time_trigonometric (last); + break; + + case 3: + vext_time = cal_v_time_heaviside (last); + break; + + default: + std::cout << "time_domain_type of electric field is wrong" << std::endl; + break; + } return vext_time; } -double H_TDDFT_pw::cal_v_time_Gauss(const bool last) +double + H_TDDFT_pw::cal_v_time_Gauss (const bool last) { double vext_time = 0.0; - double t0 = *(gauss_t0.begin() + gauss_count); - double omega = *(gauss_omega.begin() + gauss_count); - double sigma = *(gauss_sigma.begin() + gauss_count); - double phase = *(gauss_phase.begin() + gauss_count); - double amp = *(gauss_amp.begin() + gauss_count); - double ncut = *(gauss_ncut.begin() + gauss_count); + double t0 = *(gauss_t0.begin () + gauss_count); + double omega = *(gauss_omega.begin () + gauss_count); + double sigma = *(gauss_sigma.begin () + gauss_count); + double phase = *(gauss_phase.begin () + gauss_count); + double amp = *(gauss_amp.begin () + gauss_count); + double ncut = *(gauss_ncut.begin () + gauss_count); double gauss_t = (istep_int - t0 * ncut) * dt_int; - vext_time = cos(omega * gauss_t + phase) * exp(-gauss_t * gauss_t * 0.5 / (sigma * sigma)) * amp; + vext_time = cos (omega * gauss_t + phase) * exp (-gauss_t * gauss_t * 0.5 / (sigma * sigma)) * amp; if (last) - { - gauss_count++; - } + { + gauss_count++; + } return vext_time; } -double H_TDDFT_pw::cal_v_time_trapezoid(const bool last) +double + H_TDDFT_pw::cal_v_time_trapezoid (const bool last) { double vext_time = 0.0; - double t1 = *(trape_t1.begin() + trape_count); - double t2 = *(trape_t2.begin() + trape_count); - double t3 = *(trape_t3.begin() + trape_count); - double omega = *(trape_omega.begin() + trape_count); - double phase = *(trape_phase.begin() + trape_count); - double amp = *(trape_amp.begin() + trape_count); - double ncut = *(trape_ncut.begin() + trape_count); + double t1 = *(trape_t1.begin () + trape_count); + double t2 = *(trape_t2.begin () + trape_count); + double t3 = *(trape_t3.begin () + trape_count); + double omega = *(trape_omega.begin () + trape_count); + double phase = *(trape_phase.begin () + trape_count); + double amp = *(trape_amp.begin () + trape_count); + double ncut = *(trape_ncut.begin () + trape_count); if (istep < t1) - { - vext_time = istep_int / ncut / t1; - } + { + vext_time = istep_int / ncut / t1; + } else if (istep < t2) - { - vext_time = 1.0; - } + { + vext_time = 1.0; + } else if (istep < t3) - { - vext_time = (t3 - istep_int / ncut) / (t3 - t2); - } + { + vext_time = (t3 - istep_int / ncut) / (t3 - t2); + } - vext_time = vext_time * amp * cos(omega * istep_int * dt_int + phase); + vext_time = vext_time * amp * cos (omega * istep_int * dt_int + phase); if (last) - { - trape_count++; - } + { + trape_count++; + } return vext_time; } -double H_TDDFT_pw::cal_v_time_trigonometric(const bool last) +double + H_TDDFT_pw::cal_v_time_trigonometric (const bool last) { double vext_time = 0.0; - double omega1 = *(trigo_omega1.begin() + trigo_count); - double phase1 = *(trigo_phase1.begin() + trigo_count); - double omega2 = *(trigo_omega2.begin() + trigo_count); - double phase2 = *(trigo_phase2.begin() + trigo_count); - double amp = *(trigo_amp.begin() + trigo_count); + double omega1 = *(trigo_omega1.begin () + trigo_count); + double phase1 = *(trigo_phase1.begin () + trigo_count); + double omega2 = *(trigo_omega2.begin () + trigo_count); + double phase2 = *(trigo_phase2.begin () + trigo_count); + double amp = *(trigo_amp.begin () + trigo_count); const double timenow = istep_int * dt_int; - vext_time = amp * cos(omega1 * timenow + phase1) * sin(omega2 * timenow + phase2) * sin(omega2 * timenow + phase2); + vext_time + = amp * cos (omega1 * timenow + phase1) * sin (omega2 * timenow + phase2) * sin (omega2 * timenow + phase2); if (last) - { - trigo_count++; - } + { + trigo_count++; + } return vext_time; } -double H_TDDFT_pw::cal_v_time_heaviside(const bool last) +double + H_TDDFT_pw::cal_v_time_heaviside (const bool last) { - double t0 = *(heavi_t0.begin() + heavi_count); - double amp = *(heavi_amp.begin() + heavi_count); + double t0 = *(heavi_t0.begin () + heavi_count); + double amp = *(heavi_amp.begin () + heavi_count); double vext_time = 0.0; if (istep < t0) - { - vext_time = amp; - } + { + vext_time = amp; + } else if (istep >= t0) - { - vext_time = 0.0; - } + { + vext_time = 0.0; + } if (last) - { - heavi_count++; - } + { + heavi_count++; + } return vext_time; } -void H_TDDFT_pw::compute_force(const UnitCell& cell, ModuleBase::matrix& fe) +void + H_TDDFT_pw::compute_force (const UnitCell& cell, ModuleBase::matrix& fe) { int iat = 0; for (int it = 0; it < cell.ntype; ++it) - { - for (int ia = 0; ia < cell.atoms[it].na; ++ia) { - for (int direc = 0; direc < 3; ++direc) - { - // No need to multiply ModuleBase::e2, since the unit of force is Ry/Bohr - fe(iat, direc) = global_vext_time[direc] * cell.atoms[it].ncpp.zv; - } - ++iat; + for (int ia = 0; ia < cell.atoms[it].na; ++ia) + { + for (int direc = 0; direc < 3; ++direc) + { + // No need to multiply ModuleBase::e2, since the unit of force is Ry/Bohr + fe (iat, direc) = global_vext_time[direc] * cell.atoms[it].ncpp.zv; + } + ++iat; + } } - } } } // namespace elecstate diff --git a/source/source_estate/module_pot/H_TDDFT_pw.h b/source/source_estate/module_pot/H_TDDFT_pw.h index 4565cd000ff..2a39771637d 100644 --- a/source/source_estate/module_pot/H_TDDFT_pw.h +++ b/source/source_estate/module_pot/H_TDDFT_pw.h @@ -10,7 +10,7 @@ namespace elecstate class H_TDDFT_pw : public PotBase { public: - H_TDDFT_pw(const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in) : ucell_(ucell_in) + H_TDDFT_pw (const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in) : ucell_ (ucell_in) { this->dynamic_mode = false; this->fixed_mode = true; @@ -20,23 +20,23 @@ class H_TDDFT_pw : public PotBase // If it is the first time to create an H_TDDFT_pw instance and is restart calculation, // initialize istep using current_step_info if (!is_initialized && PARAM.inp.mdp.md_restart) - { - int restart_istep = -1; - std::string file_dir = PARAM.globalv.global_readin_dir; - current_step_info(file_dir, restart_istep); - - if (restart_istep >= 0) { - H_TDDFT_pw::istep = restart_istep - 1; // Update istep - } + int restart_istep = -1; + std::string file_dir = PARAM.globalv.global_readin_dir; + current_step_info (file_dir, restart_istep); - is_initialized = true; // Mark as initialized, so that istep will not be initialized again - } + if (restart_istep >= 0) + { + H_TDDFT_pw::istep = restart_istep - 1; // Update istep + } + + is_initialized = true; // Mark as initialized, so that istep will not be initialized again + } } - ~H_TDDFT_pw() {}; + ~H_TDDFT_pw () {}; - void cal_fixed_v(double* vl_pseudo) override; + void cal_fixed_v (double* vl_pseudo) override; /** * @brief Compute ionic force of electric field @@ -44,7 +44,7 @@ class H_TDDFT_pw : public PotBase * @param[in] cell Information of cell * @param[out] fe Force of electric field F = qE */ - static void compute_force(const UnitCell& cell, ModuleBase::matrix& fe); + static void compute_force (const UnitCell& cell, ModuleBase::matrix& fe); // parameters static int stype; // 0: length gauge; 1: velocity gauge; 2: hybrid gauge @@ -113,7 +113,7 @@ class H_TDDFT_pw : public PotBase static std::vector heavi_amp; // Ry/bohr // update At for velocity gauge by intergral of E(t)dt - static void update_At(); + static void update_At (); private: static int istep; @@ -125,22 +125,22 @@ class H_TDDFT_pw : public PotBase const UnitCell* ucell_ = nullptr; // Obtain the current MD step information, used for restart calculation - void current_step_info(const std::string& file_dir, int& istep); + void current_step_info (const std::string& file_dir, int& istep); // Potential of electric field in space domain: for length gauge only - void cal_v_space(std::vector& vext_space, int direc); - void cal_v_space_length(std::vector& vext_space, int direc); - double cal_v_space_length_potential(double i); + void cal_v_space (std::vector& vext_space, int direc); + void cal_v_space_length (std::vector& vext_space, int direc); + double cal_v_space_length_potential (double i); // Potential of electric field in time domain: Gaussian, trapezoid, trigonometric, Heaviside - static double cal_v_time(int t_type, const bool last); - static double cal_v_time_Gauss(const bool last); - static double cal_v_time_trapezoid(const bool last); - static double cal_v_time_trigonometric(const bool last); - static double cal_v_time_heaviside(const bool last); + static double cal_v_time (int t_type, const bool last); + static double cal_v_time_Gauss (const bool last); + static double cal_v_time_trapezoid (const bool last); + static double cal_v_time_trigonometric (const bool last); + static double cal_v_time_heaviside (const bool last); // Get ncut number for At integral - static int check_ncut(int t_type); + static int check_ncut (int t_type); }; } // namespace elecstate diff --git a/source/source_estate/module_pot/efield.cpp b/source/source_estate/module_pot/efield.cpp index 9a5851b268e..dad7859c4cf 100644 --- a/source/source_estate/module_pot/efield.cpp +++ b/source/source_estate/module_pot/efield.cpp @@ -19,345 +19,352 @@ double Efield::efield_amp; double Efield::bvec[3]; double Efield::bmod; -Efield::Efield() -{ -} +Efield::Efield () {} -Efield::~Efield() -{ -} +Efield::~Efield () {} //======================================================= // calculate dipole potential in surface calculations //======================================================= -ModuleBase::matrix Efield::add_efield(const UnitCell& cell, - const ModulePW::PW_Basis* rho_basis, - const int& nspin, - const double* const* const rho, - const surchem& solvent) +ModuleBase::matrix + Efield::add_efield (const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const int& nspin, + const double* const* const rho, + const surchem& solvent) { - ModuleBase::TITLE("Efield", "add_efield"); - ModuleBase::timer::start("Efield", "add_efield"); + ModuleBase::TITLE ("Efield", "add_efield"); + ModuleBase::timer::start ("Efield", "add_efield"); // set the parameters if (efield_pos_max == -1 || efield_pos_dec == -1) - { - // obtain the position of atoms along the efield direction - std::vector pos; - for (int it = 0; it < cell.ntype; ++it) { - for (int ia = 0; ia < cell.atoms[it].na; ++ia) - { - pos.push_back(cell.atoms[it].taud[ia][efield_dir]); - } - } + // obtain the position of atoms along the efield direction + std::vector pos; + for (int it = 0; it < cell.ntype; ++it) + { + for (int ia = 0; ia < cell.atoms[it].na; ++ia) + { + pos.push_back (cell.atoms[it].taud[ia][efield_dir]); + } + } - autoset(pos); - } + autoset (pos); + } double latvec; // latvec along the efield direction - double area; // surface area along the efield direction - prepare(cell, latvec, area); + double area; // surface area along the efield direction + prepare (cell, latvec, area); double ion_dipole = 0; double elec_dipole = 0; double induced_dipole = 0; if (PARAM.inp.dip_cor_flag) - { - ion_dipole = cal_ion_dipole(cell, bmod); - elec_dipole = cal_elec_dipole(cell, rho_basis, nspin, rho, bmod); - tot_dipole = ion_dipole - elec_dipole; - - if (PARAM.inp.imp_sol) { - induced_dipole = cal_induced_dipole(cell, rho_basis, solvent, bmod); - tot_dipole += induced_dipole; - } + ion_dipole = cal_ion_dipole (cell, bmod); + elec_dipole = cal_elec_dipole (cell, rho_basis, nspin, rho, bmod); + tot_dipole = ion_dipole - elec_dipole; + + if (PARAM.inp.imp_sol) + { + induced_dipole = cal_induced_dipole (cell, rho_basis, solvent, bmod); + tot_dipole += induced_dipole; + } - // energy correction - etotefield = -ModuleBase::e2 * (efield_amp - 0.5 * tot_dipole) * tot_dipole * cell.omega / ModuleBase::FOUR_PI; - } + // energy correction + etotefield + = -ModuleBase::e2 * (efield_amp - 0.5 * tot_dipole) * tot_dipole * cell.omega / ModuleBase::FOUR_PI; + } else - { - ion_dipole = cal_ion_dipole(cell, bmod); + { + ion_dipole = cal_ion_dipole (cell, bmod); - // energy correction - etotefield = -ModuleBase::e2 * efield_amp * ion_dipole * cell.omega / ModuleBase::FOUR_PI; - } + // energy correction + etotefield = -ModuleBase::e2 * efield_amp * ion_dipole * cell.omega / ModuleBase::FOUR_PI; + } const double length = (1.0 - efield_pos_dec) * latvec * cell.lat0; const double vamp = ModuleBase::e2 * (efield_amp - tot_dipole) * length; GlobalV::ofs_running << "\n\n Adding external electric field: " << std::endl; if (PARAM.inp.dip_cor_flag) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Computed dipole along efield_dir", efield_dir); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Elec. dipole (Ry a.u.)", elec_dipole); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Ion dipole (Ry a.u.)", ion_dipole); - if (PARAM.inp.imp_sol) { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Induced dipole (Ry a.u.)", induced_dipole); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Computed dipole along efield_dir", efield_dir); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Elec. dipole (Ry a.u.)", elec_dipole); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Ion dipole (Ry a.u.)", ion_dipole); + if (PARAM.inp.imp_sol) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Induced dipole (Ry a.u.)", induced_dipole); + } + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Total dipole (Ry a.u.)", tot_dipole); + } + if (std::abs (efield_amp) > 0.0) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Amplitute of Efield (Hartree)", efield_amp); } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Total dipole (Ry a.u.)", tot_dipole); - } - if (std::abs(efield_amp) > 0.0) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Amplitute of Efield (Hartree)", efield_amp); - } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Potential amplitute (Ry)", vamp); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Total length (Bohr)", length); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Potential amplitute (Ry)", vamp); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Total length (Bohr)", length); // dipole potential - ModuleBase::matrix v(nspin, rho_basis->nrxx); + ModuleBase::matrix v (nspin, rho_basis->nrxx); const int nspin0 = (nspin == 2) ? 2 : 1; for (int ir = 0; ir < rho_basis->nrxx; ++ir) - { - int i = ir / (rho_basis->ny * rho_basis->nplane); - int j = ir / rho_basis->nplane - i * rho_basis->ny; - int k = ir % rho_basis->nplane + rho_basis->startz_current; - double x = (double)i / rho_basis->nx; - double y = (double)j / rho_basis->ny; - double z = (double)k / rho_basis->nz; - ModuleBase::Vector3 pos(x, y, z); - - double saw = saw_function(efield_pos_max, efield_pos_dec, pos[efield_dir]); - - for (int is = 0; is < nspin0; is++) { - v(is, ir) = saw; + int i = ir / (rho_basis->ny * rho_basis->nplane); + int j = ir / rho_basis->nplane - i * rho_basis->ny; + int k = ir % rho_basis->nplane + rho_basis->startz_current; + double x = (double)i / rho_basis->nx; + double y = (double)j / rho_basis->ny; + double z = (double)k / rho_basis->nz; + ModuleBase::Vector3 pos (x, y, z); + + double saw = saw_function (efield_pos_max, efield_pos_dec, pos[efield_dir]); + + for (int is = 0; is < nspin0; is++) + { + v (is, ir) = saw; + } } - } double fac = ModuleBase::e2 * (efield_amp - tot_dipole) * cell.lat0 / bmod; - ModuleBase::timer::end("Efield", "add_efield"); + ModuleBase::timer::end ("Efield", "add_efield"); return v * fac; } //======================================================= // calculate dipole density in surface calculations //======================================================= -double Efield::cal_ion_dipole(const UnitCell &cell, const double &bmod) +double + Efield::cal_ion_dipole (const UnitCell& cell, const double& bmod) { double ion_dipole = 0; for (int it = 0; it < cell.ntype; ++it) - { - double sum = 0; - for (int ia = 0; ia < cell.atoms[it].na; ++ia) { - sum += saw_function(efield_pos_max, efield_pos_dec, cell.atoms[it].taud[ia][efield_dir]); + double sum = 0; + for (int ia = 0; ia < cell.atoms[it].na; ++ia) + { + sum += saw_function (efield_pos_max, efield_pos_dec, cell.atoms[it].taud[ia][efield_dir]); + } + ion_dipole += sum * cell.atoms[it].ncpp.zv; } - ion_dipole += sum * cell.atoms[it].ncpp.zv; - } if (PARAM.inp.gate_flag && PARAM.inp.dip_cor_flag) - { - double ion_charge = 0; - for (int it = 0; it < cell.ntype; ++it) { - ion_charge += cell.atoms[it].na * cell.atoms[it].ncpp.zv; + double ion_charge = 0; + for (int it = 0; it < cell.ntype; ++it) + { + ion_charge += cell.atoms[it].na * cell.atoms[it].ncpp.zv; + } + ion_dipole + += (PARAM.inp.nelec - ion_charge) * saw_function (efield_pos_max, efield_pos_dec, Gatefield::zgate); } - ion_dipole += (PARAM.inp.nelec - ion_charge) * saw_function(efield_pos_max, efield_pos_dec, Gatefield::zgate); - } ion_dipole *= cell.lat0 / bmod * ModuleBase::FOUR_PI / cell.omega; return ion_dipole; } -double Efield::cal_elec_dipole(const UnitCell& cell, - const ModulePW::PW_Basis* rho_basis, - const int& nspin, - const double* const* const rho, - const double& bmod) +double + Efield::cal_elec_dipole (const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const int& nspin, + const double* const* const rho, + const double& bmod) { double elec_dipole = 0; const int nspin0 = (nspin == 2) ? 2 : 1; for (int ir = 0; ir < rho_basis->nrxx; ++ir) - { - int i = ir / (rho_basis->ny * rho_basis->nplane); - int j = ir / rho_basis->nplane - i * rho_basis->ny; - int k = ir % rho_basis->nplane + rho_basis->startz_current; - double x = (double)i / rho_basis->nx; - double y = (double)j / rho_basis->ny; - double z = (double)k / rho_basis->nz; - ModuleBase::Vector3 pos(x, y, z); - - double saw = saw_function(efield_pos_max, efield_pos_dec, pos[efield_dir]); - - for (int is = 0; is < nspin0; is++) { - elec_dipole += rho[is][ir] * saw; + int i = ir / (rho_basis->ny * rho_basis->nplane); + int j = ir / rho_basis->nplane - i * rho_basis->ny; + int k = ir % rho_basis->nplane + rho_basis->startz_current; + double x = (double)i / rho_basis->nx; + double y = (double)j / rho_basis->ny; + double z = (double)k / rho_basis->nz; + ModuleBase::Vector3 pos (x, y, z); + + double saw = saw_function (efield_pos_max, efield_pos_dec, pos[efield_dir]); + + for (int is = 0; is < nspin0; is++) + { + elec_dipole += rho[is][ir] * saw; + } } - } - Parallel_Reduce::reduce_pool(elec_dipole); + Parallel_Reduce::reduce_pool (elec_dipole); elec_dipole *= cell.lat0 / bmod * ModuleBase::FOUR_PI / rho_basis->nxyz; return elec_dipole; } -double Efield::cal_induced_dipole(const UnitCell& cell, - const ModulePW::PW_Basis* rho_basis, - const surchem& solvent, - const double& bmod) +double + Efield::cal_induced_dipole (const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const surchem& solvent, + const double& bmod) { double induced_dipole = 0; - double *induced_rho = new double[rho_basis->nrxx]; - solvent.induced_charge(cell, rho_basis, induced_rho); + double* induced_rho = new double[rho_basis->nrxx]; + solvent.induced_charge (cell, rho_basis, induced_rho); for (int ir = 0; ir < rho_basis->nrxx; ++ir) - { - int i = ir / (rho_basis->ny * rho_basis->nplane); - int j = ir / rho_basis->nplane - i * rho_basis->ny; - int k = ir % rho_basis->nplane + rho_basis->startz_current; - double x = (double)i / rho_basis->nx; - double y = (double)j / rho_basis->ny; - double z = (double)k / rho_basis->nz; - ModuleBase::Vector3 pos(x, y, z); + { + int i = ir / (rho_basis->ny * rho_basis->nplane); + int j = ir / rho_basis->nplane - i * rho_basis->ny; + int k = ir % rho_basis->nplane + rho_basis->startz_current; + double x = (double)i / rho_basis->nx; + double y = (double)j / rho_basis->ny; + double z = (double)k / rho_basis->nz; + ModuleBase::Vector3 pos (x, y, z); - double saw = saw_function(efield_pos_max, efield_pos_dec, pos[efield_dir]); + double saw = saw_function (efield_pos_max, efield_pos_dec, pos[efield_dir]); - induced_dipole += induced_rho[ir] * saw; - } + induced_dipole += induced_rho[ir] * saw; + } - Parallel_Reduce::reduce_pool(induced_dipole); + Parallel_Reduce::reduce_pool (induced_dipole); induced_dipole *= cell.lat0 / bmod * ModuleBase::FOUR_PI / rho_basis->nxyz; delete[] induced_rho; return induced_dipole; } -double Efield::saw_function(const double &a, const double &b, const double &x) +double + Efield::saw_function (const double& a, const double& b, const double& x) { - assert(x >= 0); - assert(x <= 1); + assert (x >= 0); + assert (x <= 1); const double fac = 1 - b; if (x <= a) - { - return x - a + 0.5 * fac; - } + { + return x - a + 0.5 * fac; + } else if (x > (a + b)) - { - return x - a - 1 + 0.5 * fac; - } + { + return x - a - 1 + 0.5 * fac; + } else - { - return 0.5 * fac - fac * (x - a) / b; - } + { + return 0.5 * fac - fac * (x - a) / b; + } } -void Efield::compute_force(const UnitCell &cell, ModuleBase::matrix &fdip) +void + Efield::compute_force (const UnitCell& cell, ModuleBase::matrix& fdip) { if (PARAM.inp.dip_cor_flag) - { - int iat = 0; - for (int it = 0; it < cell.ntype; ++it) { - for (int ia = 0; ia < cell.atoms[it].na; ++ia) - { - for (int jj = 0; jj < 3; ++jj) + int iat = 0; + for (int it = 0; it < cell.ntype; ++it) { - fdip(iat, jj) - = ModuleBase::e2 * (efield_amp - tot_dipole) * cell.atoms[it].ncpp.zv * bvec[jj] / bmod; + for (int ia = 0; ia < cell.atoms[it].na; ++ia) + { + for (int jj = 0; jj < 3; ++jj) + { + fdip (iat, jj) = ModuleBase::e2 * (efield_amp - tot_dipole) * cell.atoms[it].ncpp.zv + * bvec[jj] / bmod; + } + ++iat; + } } - ++iat; - } } - } else - { - int iat = 0; - for (int it = 0; it < cell.ntype; ++it) { - for (int ia = 0; ia < cell.atoms[it].na; ++ia) - { - for (int jj = 0; jj < 3; ++jj) + int iat = 0; + for (int it = 0; it < cell.ntype; ++it) { - fdip(iat, jj) = ModuleBase::e2 * efield_amp * cell.atoms[it].ncpp.zv * bvec[jj] / bmod; + for (int ia = 0; ia < cell.atoms[it].na; ++ia) + { + for (int jj = 0; jj < 3; ++jj) + { + fdip (iat, jj) + = ModuleBase::e2 * efield_amp * cell.atoms[it].ncpp.zv * bvec[jj] / bmod; + } + ++iat; + } } - ++iat; - } } - } } -void Efield::prepare(const UnitCell &cell, double &latvec, double &area) +void + Efield::prepare (const UnitCell& cell, double& latvec, double& area) { if (efield_dir == 0) - { - bvec[0] = cell.G.e11; - bvec[1] = cell.G.e12; - bvec[2] = cell.G.e13; - latvec = cell.a1.norm(); - area = cross(cell.a2, cell.a3).norm() * cell.lat0 * cell.lat0; - } + { + bvec[0] = cell.G.e11; + bvec[1] = cell.G.e12; + bvec[2] = cell.G.e13; + latvec = cell.a1.norm (); + area = cross (cell.a2, cell.a3).norm () * cell.lat0 * cell.lat0; + } else if (efield_dir == 1) - { - bvec[0] = cell.G.e21; - bvec[1] = cell.G.e22; - bvec[2] = cell.G.e23; - latvec = cell.a2.norm(); - area = cross(cell.a3, cell.a1).norm() * cell.lat0 * cell.lat0; - } + { + bvec[0] = cell.G.e21; + bvec[1] = cell.G.e22; + bvec[2] = cell.G.e23; + latvec = cell.a2.norm (); + area = cross (cell.a3, cell.a1).norm () * cell.lat0 * cell.lat0; + } else if (efield_dir == 2) - { - bvec[0] = cell.G.e31; - bvec[1] = cell.G.e32; - bvec[2] = cell.G.e33; - latvec = cell.a3.norm(); - area = cross(cell.a1, cell.a2).norm() * cell.lat0 * cell.lat0; - } + { + bvec[0] = cell.G.e31; + bvec[1] = cell.G.e32; + bvec[2] = cell.G.e33; + latvec = cell.a3.norm (); + area = cross (cell.a1, cell.a2).norm () * cell.lat0 * cell.lat0; + } else - { - ModuleBase::WARNING_QUIT("Efield::prepare", "direction is wrong!"); - } - bmod = sqrt(pow(bvec[0], 2) + pow(bvec[1], 2) + pow(bvec[2], 2)); + { + ModuleBase::WARNING_QUIT ("Efield::prepare", "direction is wrong!"); + } + bmod = sqrt (pow (bvec[0], 2) + pow (bvec[1], 2) + pow (bvec[2], 2)); } -void Efield::autoset(std::vector& pos) +void + Efield::autoset (std::vector& pos) { // determine the vacuum region - std::sort(pos.begin(), pos.end()); + std::sort (pos.begin (), pos.end ()); double vacuum = 0.0; double center = 0.0; - for (int i = 1; i < pos.size(); i++) - { - double diff = pos[i] - pos[i - 1]; - if (diff > vacuum) + for (int i = 1; i < pos.size (); i++) { - vacuum = diff; - center = (pos[i] + pos[i - 1]) / 2; + double diff = pos[i] - pos[i - 1]; + if (diff > vacuum) + { + vacuum = diff; + center = (pos[i] + pos[i - 1]) / 2; + } } - } // consider the periodic boundary condition - double diff = pos[0] + 1 - pos[pos.size() - 1]; + double diff = pos[0] + 1 - pos[pos.size () - 1]; if (diff > vacuum) - { - vacuum = diff; - center = (pos[0] + pos[pos.size() - 1] + 1) / 2; - } + { + vacuum = diff; + center = (pos[0] + pos[pos.size () - 1] + 1) / 2; + } // set the parameters efield_pos_max = center - vacuum / 20; efield_pos_dec = vacuum / 10; while (efield_pos_max >= 1) - { - efield_pos_max -= 1; - } + { + efield_pos_max -= 1; + } while (efield_pos_max < 0) - { - efield_pos_max += 1; - } + { + efield_pos_max += 1; + } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Autoset efield_pos_max", efield_pos_max); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Autoset efield_pos_dec", efield_pos_dec); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Autoset efield_pos_max", efield_pos_max); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Autoset efield_pos_dec", efield_pos_dec); } } // namespace elecstate diff --git a/source/source_estate/module_pot/efield.h b/source/source_estate/module_pot/efield.h index cd829d3be16..14f1ae985c9 100644 --- a/source/source_estate/module_pot/efield.h +++ b/source/source_estate/module_pot/efield.h @@ -11,42 +11,42 @@ namespace elecstate class Efield { public: - Efield(); - ~Efield(); + Efield (); + ~Efield (); - static ModuleBase::matrix add_efield(const UnitCell& cell, - const ModulePW::PW_Basis* rho_basis, - const int& nspin, - const double* const* const rho, - const surchem& solvent); + static ModuleBase::matrix add_efield (const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const int& nspin, + const double* const* const rho, + const surchem& solvent); - static double cal_elec_dipole(const UnitCell& cell, - const ModulePW::PW_Basis* rho_basis, - const int& nspin, - const double* const* const rho, - const double& bmod); + static double cal_elec_dipole (const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const int& nspin, + const double* const* const rho, + const double& bmod); - static double cal_ion_dipole(const UnitCell& cell, const double& bmod); + static double cal_ion_dipole (const UnitCell& cell, const double& bmod); - static double cal_induced_dipole(const UnitCell& cell, - const ModulePW::PW_Basis* rho_basis, - const surchem& solvent, - const double& bmod); + static double cal_induced_dipole (const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const surchem& solvent, + const double& bmod); - static double saw_function(const double &a, const double &b, const double &x); + static double saw_function (const double& a, const double& b, const double& x); - static void compute_force(const UnitCell &cell, ModuleBase::matrix &fdip); + static void compute_force (const UnitCell& cell, ModuleBase::matrix& fdip); - static void prepare(const UnitCell &cell, double &latvec, double &area); + static void prepare (const UnitCell& cell, double& latvec, double& area); - static void autoset(std::vector& pos); + static void autoset (std::vector& pos); - static double etotefield; // dipole energy - static double tot_dipole; // total dipole - static int efield_dir; // 0, 1, 2 denotes x, y, z direction for dipole correction + static double etotefield; // dipole energy + static double tot_dipole; // total dipole + static int efield_dir; // 0, 1, 2 denotes x, y, z direction for dipole correction static double efield_pos_max; // the maximum position of the saw function static double efield_pos_dec; // the decrease region length of the saw function - static double efield_amp; // field amplitude (in a.u.) (1 a.u. = 51.44 10^10 V/m) + static double efield_amp; // field amplitude (in a.u.) (1 a.u. = 51.44 10^10 V/m) static double bvec[3]; static double bmod; }; @@ -60,43 +60,45 @@ namespace elecstate class PotEfield : public PotBase { public: - PotEfield(const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in, const surchem* solvent_in, bool dipole) - : ucell_(ucell_in), solvent_(solvent_in) + PotEfield (const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in, const surchem* solvent_in, bool dipole) + : ucell_ (ucell_in), solvent_ (solvent_in) { this->rho_basis_ = rho_basis_in; if (!dipole) - { - this->fixed_mode = true; - this->dynamic_mode = false; - } + { + this->fixed_mode = true; + this->dynamic_mode = false; + } else - { - this->fixed_mode = false; - this->dynamic_mode = true; - } + { + this->fixed_mode = false; + this->dynamic_mode = true; + } }; - void cal_fixed_v(double *vl_pseudo) override + void + cal_fixed_v (double* vl_pseudo) override { - ModuleBase::matrix v_efield(PARAM.inp.nspin, rho_basis_->nrxx); - v_efield = Efield::add_efield(*ucell_, - const_cast(rho_basis_), - PARAM.inp.nspin, - nullptr, - *solvent_); + ModuleBase::matrix v_efield (PARAM.inp.nspin, rho_basis_->nrxx); + v_efield = Efield::add_efield (*ucell_, + const_cast (rho_basis_), + PARAM.inp.nspin, + nullptr, + *solvent_); for (int ir = 0; ir < rho_basis_->nrxx; ++ir) - { - vl_pseudo[ir] += v_efield(0, ir); - } + { + vl_pseudo[ir] += v_efield (0, ir); + } } - void cal_v_eff(const Charge *chg, const UnitCell *ucell, ModuleBase::matrix &v_eff) override + void + cal_v_eff (const Charge* chg, const UnitCell* ucell, ModuleBase::matrix& v_eff) override { - v_eff += Efield::add_efield(*ucell, - const_cast(rho_basis_), - v_eff.nr, - chg->rho, - *solvent_); + v_eff += Efield::add_efield (*ucell, + const_cast (rho_basis_), + v_eff.nr, + chg->rho, + *solvent_); } private: diff --git a/source/source_estate/module_pot/gatefield.cpp b/source/source_estate/module_pot/gatefield.cpp index 257842f6bb5..1e4b07b0864 100644 --- a/source/source_estate/module_pot/gatefield.cpp +++ b/source/source_estate/module_pot/gatefield.cpp @@ -16,27 +16,28 @@ double Gatefield::block_down = 0.45; double Gatefield::block_up = 0.55; double Gatefield::block_height = 0.1; -void Gatefield::add_gatefield(double *vltot, - const UnitCell &cell, - const ModulePW::PW_Basis *rho_basis, - const bool &linear, - const bool &quadratic) +void + Gatefield::add_gatefield (double* vltot, + const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const bool& linear, + const bool& quadratic) { - ModuleBase::TITLE("Gatefield", "add_gatefield"); - ModuleBase::timer::start("Gatefield", "add_gatefield"); + ModuleBase::TITLE ("Gatefield", "add_gatefield"); + ModuleBase::timer::start ("Gatefield", "add_gatefield"); //======================================================= // preparation for constants //======================================================= double latvec; // latvec along the efield direction - double area; // surface area along the efield direction - Efield::prepare(cell, latvec, area); + double area; // surface area along the efield direction + Efield::prepare (cell, latvec, area); double ion_charge = 0; for (int it = 0; it < cell.ntype; ++it) - { - ion_charge += cell.atoms[it].na * cell.atoms[it].ncpp.zv; - } + { + ion_charge += cell.atoms[it].na * cell.atoms[it].ncpp.zv; + } rho_surface = -(PARAM.inp.nelec - ion_charge) / area * ModuleBase::TWO_PI; double block_size = block_up - block_down; @@ -46,161 +47,185 @@ void Gatefield::add_gatefield(double *vltot, //======================================================= double factor = 0; for (int it = 0; it < cell.ntype; ++it) - { - double zval = cell.atoms[it].ncpp.zv; - for (int ia = 0; ia < cell.atoms[it].na; ++ia) { - double pos = cell.atoms[it].taud[ia][Efield::efield_dir]; - factor += zval * (mopopla(zgate, pos, true) + 1.0 / 6.0); // linear part - factor += zval * mopopla(zgate, pos, false); // quadratic part + double zval = cell.atoms[it].ncpp.zv; + for (int ia = 0; ia < cell.atoms[it].na; ++ia) + { + double pos = cell.atoms[it].taud[ia][Efield::efield_dir]; + factor += zval * (mopopla (zgate, pos, true) + 1.0 / 6.0); // linear part + factor += zval * mopopla (zgate, pos, false); // quadratic part + } } - } etotgatefield = -ModuleBase::e2 * rho_surface * cell.lat0 / Efield::bmod * (factor + (PARAM.inp.nelec - ion_charge) / 12.0); GlobalV::ofs_running << "\n\n Adding charged plate to compensate the charge of the system" << std::endl; - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "prefactor of the potential (Ry a.u.)", rho_surface); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "position of the charged plate within cell", zgate); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "prefactor of the potential (Ry a.u.)", rho_surface); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "position of the charged plate within cell", zgate); if (relax) - { - GlobalV::ofs_running << "Allow relaxation in specific direction" << std::endl; - } - if (block) - { - if (PARAM.inp.dip_cor_flag) { - GlobalV::ofs_running << "Adding potential to prevent charge spilling into region of the gate" << std::endl; + GlobalV::ofs_running << "Allow relaxation in specific direction" << std::endl; } - else - { - GlobalV::ofs_running << "Adding potential to prevent interaction between lower and upper part of unit cell" - << std::endl; - } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "block_size in units of unit cell length", block_size); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "block_height (Ry a.u.)", block_height); - } - - for (int ir = 0; ir < rho_basis->nrxx; ++ir) - { - int i = ir / (rho_basis->ny * rho_basis->nplane); - int j = ir / rho_basis->nplane - i * rho_basis->ny; - int k = ir % rho_basis->nplane + rho_basis->startz_current; - double x = (double)i / rho_basis->nx; - double y = (double)j / rho_basis->ny; - double z = (double)k / rho_basis->nz; - ModuleBase::Vector3 pos(x, y, z); - double gatepos = pos[Efield::efield_dir]; - - double value = 0; - - if (linear) + if (block) { - value += rho_surface * ModuleBase::e2 * (mopopla(zgate, gatepos, true) + 1.0 / 6.0) * cell.lat0 - / Efield::bmod; - if (block && gatepos >= block_down && gatepos <= block_up && !PARAM.inp.dip_cor_flag) - { - if (gatepos - zgate <= -block_size / 2.0 * 0.9) // smooth increase within the first 10% + if (PARAM.inp.dip_cor_flag) { - value += block_height * (gatepos - zgate + block_size / 2.0) / (0.1 * block_size / 2.0); + GlobalV::ofs_running << "Adding potential to prevent charge spilling into region of the gate" + << std::endl; } - else if (gatepos - zgate >= block_size / 2.0 * 0.9) // smooth decrease within the last 10% + else { - value += block_height * (gatepos - zgate - block_size / 2.0) / (-0.1 * block_size / 2.0); + GlobalV::ofs_running + << "Adding potential to prevent interaction between lower and upper part of unit cell" + << std::endl; } - else // block - { - value += block_height; - } - } - else if (block && gatepos >= block_down && gatepos <= block_up && PARAM.inp.dip_cor_flag) - { - if (gatepos <= block_down + Efield::efield_pos_dec) - { - value += (gatepos - block_down) / Efield::efield_pos_dec * block_height; - } - else if (gatepos >= block_up - Efield::efield_pos_dec) + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "block_size in units of unit cell length", block_size); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "block_height (Ry a.u.)", block_height); + } + + for (int ir = 0; ir < rho_basis->nrxx; ++ir) + { + int i = ir / (rho_basis->ny * rho_basis->nplane); + int j = ir / rho_basis->nplane - i * rho_basis->ny; + int k = ir % rho_basis->nplane + rho_basis->startz_current; + double x = (double)i / rho_basis->nx; + double y = (double)j / rho_basis->ny; + double z = (double)k / rho_basis->nz; + ModuleBase::Vector3 pos (x, y, z); + double gatepos = pos[Efield::efield_dir]; + + double value = 0; + + if (linear) { - value += (block_up - gatepos) / Efield::efield_pos_dec * block_height; + value += rho_surface * ModuleBase::e2 * (mopopla (zgate, gatepos, true) + 1.0 / 6.0) * cell.lat0 + / Efield::bmod; + if (block && gatepos >= block_down && gatepos <= block_up && !PARAM.inp.dip_cor_flag) + { + if (gatepos - zgate <= -block_size / 2.0 * 0.9) // smooth increase within the first 10% + { + value += block_height * (gatepos - zgate + block_size / 2.0) + / (0.1 * block_size / 2.0); + } + else if (gatepos - zgate >= block_size / 2.0 * 0.9) // smooth decrease within the last 10% + { + value += block_height * (gatepos - zgate - block_size / 2.0) + / (-0.1 * block_size / 2.0); + } + else // block + { + value += block_height; + } + } + else if (block && gatepos >= block_down && gatepos <= block_up && PARAM.inp.dip_cor_flag) + { + if (gatepos <= block_down + Efield::efield_pos_dec) + { + value += (gatepos - block_down) / Efield::efield_pos_dec * block_height; + } + else if (gatepos >= block_up - Efield::efield_pos_dec) + { + value += (block_up - gatepos) / Efield::efield_pos_dec * block_height; + } + else + { + value += block_height; + } + } } - else + + if (quadratic) { - value += block_height; + value += rho_surface * ModuleBase::e2 * mopopla (zgate, gatepos, false) * cell.lat0 / Efield::bmod; } - } - } - if (quadratic) - { - value += rho_surface * ModuleBase::e2 * mopopla(zgate, gatepos, false) * cell.lat0 / Efield::bmod; + vltot[ir] += value; } - vltot[ir] += value; - } - - ModuleBase::timer::end("Gatefield", "add_gatefield"); + ModuleBase::timer::end ("Gatefield", "add_gatefield"); } -double Gatefield::mopopla(double &zgate, double z, bool flag) +double + Gatefield::mopopla (double& zgate, double z, bool flag) { while (z > 1.0) - z -= 1.0; + { + z -= 1.0; + } while (z < 0.0) - z += 1.0; + { + z += 1.0; + } z -= zgate; if (z <= -0.5) - z += 1.0; + { + z += 1.0; + } if (z >= 0.5) - z -= 1.0; + { + z -= 1.0; + } if (!flag) - { - return z * z; - } + { + return z * z; + } else if (z <= 0) - { - return z; - } + { + return z; + } else - { - return -z; - } + { + return -z; + } } -void Gatefield::compute_force(const UnitCell &cell, ModuleBase::matrix &fgate) +void + Gatefield::compute_force (const UnitCell& cell, ModuleBase::matrix& fgate) { int iat = 0; for (int it = 0; it < cell.ntype; ++it) - { - double zval = cell.atoms[it].ncpp.zv; - for (int ia = 0; ia < cell.atoms[it].na; ++ia) { - double pos = cell.atoms[it].taud[ia][Efield::efield_dir]; - while (pos > 1.0) - pos -= 1.0; - while (pos < 0.0) - pos += 1.0; - - pos -= zgate; - - if (pos <= -0.5) - pos += 1.0; - if (pos >= 0.5) - pos -= 1.0; - - double fac = 1; - if (pos < 0) - fac = -1; - - for (int jj = 0; jj < 3; ++jj) - { - fgate(iat, jj) - = -zval * ModuleBase::e2 * rho_surface * Efield::bvec[jj] / Efield::bmod * (fac - 2.0 * pos); - } - ++iat; + double zval = cell.atoms[it].ncpp.zv; + for (int ia = 0; ia < cell.atoms[it].na; ++ia) + { + double pos = cell.atoms[it].taud[ia][Efield::efield_dir]; + while (pos > 1.0) + { + pos -= 1.0; + } + while (pos < 0.0) + { + pos += 1.0; + } + + pos -= zgate; + + if (pos <= -0.5) + { + pos += 1.0; + } + if (pos >= 0.5) + { + pos -= 1.0; + } + + double fac = 1; + if (pos < 0) + { + fac = -1; + } + + for (int jj = 0; jj < 3; ++jj) + { + fgate (iat, jj) = -zval * ModuleBase::e2 * rho_surface * Efield::bvec[jj] / Efield::bmod + * (fac - 2.0 * pos); + } + ++iat; + } } - } } } // namespace elecstate diff --git a/source/source_estate/module_pot/gatefield.h b/source/source_estate/module_pot/gatefield.h index d7bb548c969..e15050efe38 100644 --- a/source/source_estate/module_pot/gatefield.h +++ b/source/source_estate/module_pot/gatefield.h @@ -9,27 +9,27 @@ namespace elecstate class Gatefield { public: - Gatefield(); - ~Gatefield(); + Gatefield (); + ~Gatefield (); - static void add_gatefield(double *vltot, - const UnitCell &cell, - const ModulePW::PW_Basis *rho_basis, - const bool &linear, - const bool &quadratic); + static void add_gatefield (double* vltot, + const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const bool& linear, + const bool& quadratic); - static double mopopla(double &zgate, double z, bool flag); + static double mopopla (double& zgate, double z, bool flag); - static void compute_force(const UnitCell &cell, ModuleBase::matrix &fgate); + static void compute_force (const UnitCell& cell, ModuleBase::matrix& fgate); static double etotgatefield; // energy for gatefield - static double rho_surface; // surface charge density of charged plate - static double zgate; // position of charged plate - static bool relax; // - static bool block; // add a block potential or not - static double block_down; // low bound of the block - static double block_up; // high bound of the block - static double block_height; // height of the block + static double rho_surface; // surface charge density of charged plate + static double zgate; // position of charged plate + static bool relax; // + static bool block; // add a block potential or not + static double block_down; // low bound of the block + static double block_up; // high bound of the block + static double block_height; // height of the block }; } // namespace elecstate @@ -40,20 +40,21 @@ namespace elecstate class PotGate : public PotBase { public: - PotGate(const ModulePW::PW_Basis *rho_basis_in, const UnitCell *ucell_in) : ucell_(ucell_in) + PotGate (const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in) : ucell_ (ucell_in) { this->rho_basis_ = rho_basis_in; this->fixed_mode = true; this->dynamic_mode = false; }; - void cal_fixed_v(double *vl_pseudo) override + void + cal_fixed_v (double* vl_pseudo) override { - Gatefield::add_gatefield(vl_pseudo, *ucell_, this->rho_basis_, true, true); + Gatefield::add_gatefield (vl_pseudo, *ucell_, this->rho_basis_, true, true); } private: - const UnitCell *ucell_ = nullptr; + const UnitCell* ucell_ = nullptr; }; } // namespace elecstate diff --git a/source/source_estate/module_pot/pot_base.h b/source/source_estate/module_pot/pot_base.h index aaa578bc5bf..8246f2cc86f 100644 --- a/source/source_estate/module_pot/pot_base.h +++ b/source/source_estate/module_pot/pot_base.h @@ -12,26 +12,36 @@ namespace elecstate /** This class is the base class of Potential module 1. Main class Potential is derived from it. 2. components of potentials on real space grids can derived from it and will be registered into Potential. - a. cal_fixed_v() is a virtual function, it can be override to contribute potentials which do not change with Charge object. - b. cal_v_eff() is a virtual function, it can be override to contribute potentials which change with Charge object. - c. fixed_mode should be set "true" if you want Potential class call cal_fixed_v() - d. dynamic_mode should be set "true" if you want Potential class call cal_v_eff() - e. rho_basis_ is needed to provide number of real space grids(nrxx) and number of spin(nspin) and FFT(real<->recip) interface + a. cal_fixed_v() is a virtual function, it can be override to contribute potentials which do not change with Charge + object. b. cal_v_eff() is a virtual function, it can be override to contribute potentials which change with Charge + object. c. fixed_mode should be set "true" if you want Potential class call cal_fixed_v() d. dynamic_mode should be set + "true" if you want Potential class call cal_v_eff() e. rho_basis_ is needed to provide number of real space grids(nrxx) + and number of spin(nspin) and FFT(real<->recip) interface */ class PotBase { public: - PotBase(){} - virtual ~PotBase(){} - - virtual void cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff){} - - virtual void cal_fixed_v(double* vl_pseudo){} - - virtual double get_energy() const { return 0.0; } - - bool fixed_mode = 0; - bool dynamic_mode = 0; + PotBase () {} + virtual ~PotBase () {} + + virtual void + cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) + { + } + + virtual void + cal_fixed_v (double* vl_pseudo) + { + } + + virtual double + get_energy () const + { + return 0.0; + } + + bool fixed_mode = false; + bool dynamic_mode = false; protected: const ModulePW::PW_Basis* rho_basis_ = nullptr; diff --git a/source/source_estate/module_pot/pot_cosikr.cpp b/source/source_estate/module_pot/pot_cosikr.cpp index 66a5ef7849c..30da3bd3647 100644 --- a/source/source_estate/module_pot/pot_cosikr.cpp +++ b/source/source_estate/module_pot/pot_cosikr.cpp @@ -10,44 +10,46 @@ namespace elecstate { -Pot_Cosikr::Pot_Cosikr( - const ModulePW::PW_Basis* rho_basis_in, - const ModuleBase::Vector3 &kvec_d_in, - const std::vector &phase_in, - const std::vector &litude_in) - :kvec_d(kvec_d_in), - phase(phase_in), - amplitude(amplitude_in) +Pot_Cosikr::Pot_Cosikr (const ModulePW::PW_Basis* rho_basis_in, + const ModuleBase::Vector3& kvec_d_in, + const std::vector& phase_in, + const std::vector& amplitude_in) + : kvec_d (kvec_d_in), phase (phase_in), amplitude (amplitude_in) { - this->rho_basis_ = rho_basis_in; - this->dynamic_mode = true; - this->fixed_mode = false; + this->rho_basis_ = rho_basis_in; + this->dynamic_mode = true; + this->fixed_mode = false; } - -void Pot_Cosikr::cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix &v_eff) +void + Pot_Cosikr::cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) { - ModuleBase::TITLE("Pot_Cosikr", "cal_v_eff"); - ModuleBase::timer::start("Pot_Cosikr", "cal_veff"); - assert(v_eff.nr == this->phase.size()); - assert(v_eff.nr == this->amplitude.size()); - int ir = 0; - for (int ix = 0; ix < this->rho_basis_->nx; ++ix) - { - const double phase_x = this->kvec_d.x * ix / this->rho_basis_->nx; - for (int iy = 0; iy < this->rho_basis_->ny; ++iy) - { - const double phase_xy = phase_x + this->kvec_d.y * iy / this->rho_basis_->ny; - for (int iz = this->rho_basis_->startz_current; iz < this->rho_basis_->startz_current + this->rho_basis_->nplane; ++iz) - { - const double phase_xyz = phase_xy + this->kvec_d.z * iz / this->rho_basis_->nz; - for(int is=0; isamplitude[is] * std::cos((phase_xyz + this->phase[is]) * ModuleBase::TWO_PI); - ++ir; - } - } - } - ModuleBase::timer::end("Pot_Cosikr", "cal_veff"); + ModuleBase::TITLE ("Pot_Cosikr", "cal_v_eff"); + ModuleBase::timer::start ("Pot_Cosikr", "cal_veff"); + assert (v_eff.nr == this->phase.size ()); + assert (v_eff.nr == this->amplitude.size ()); + int ir = 0; + for (int ix = 0; ix < this->rho_basis_->nx; ++ix) + { + const double phase_x = this->kvec_d.x * ix / this->rho_basis_->nx; + for (int iy = 0; iy < this->rho_basis_->ny; ++iy) + { + const double phase_xy = phase_x + this->kvec_d.y * iy / this->rho_basis_->ny; + for (int iz = this->rho_basis_->startz_current; + iz < this->rho_basis_->startz_current + this->rho_basis_->nplane; + ++iz) + { + const double phase_xyz = phase_xy + this->kvec_d.z * iz / this->rho_basis_->nz; + for (int is = 0; is < v_eff.nr; ++is) + { + v_eff (is, ir) += this->amplitude[is] + * std::cos ((phase_xyz + this->phase[is]) * ModuleBase::TWO_PI); + } + ++ir; + } + } + } + ModuleBase::timer::end ("Pot_Cosikr", "cal_veff"); } -} \ No newline at end of file +} // namespace elecstate \ No newline at end of file diff --git a/source/source_estate/module_pot/pot_cosikr.h b/source/source_estate/module_pot/pot_cosikr.h index d6953a5dfe6..431b9242601 100644 --- a/source/source_estate/module_pot/pot_cosikr.h +++ b/source/source_estate/module_pot/pot_cosikr.h @@ -9,7 +9,6 @@ #include "pot_base.h" #include "source_base/vector3.h" - namespace elecstate { @@ -17,20 +16,19 @@ namespace elecstate class Pot_Cosikr : public PotBase { public: - Pot_Cosikr( - const ModulePW::PW_Basis* rho_basis_in, - const ModuleBase::Vector3 &kvec_d_in, - const std::vector &phase_in, - const std::vector &litude_in); - - void cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix &v_eff) override; + Pot_Cosikr (const ModulePW::PW_Basis* rho_basis_in, + const ModuleBase::Vector3& kvec_d_in, + const std::vector& phase_in, + const std::vector& amplitude_in); + + void cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) override; private: - const ModuleBase::Vector3 kvec_d; - const std::vector phase; - const std::vector amplitude; + const ModuleBase::Vector3 kvec_d; + const std::vector phase; + const std::vector amplitude; }; -} +} // namespace elecstate #endif \ No newline at end of file diff --git a/source/source_estate/module_pot/pot_local.cpp b/source/source_estate/module_pot/pot_local.cpp index 1dabae97d69..458630ccb13 100644 --- a/source/source_estate/module_pot/pot_local.cpp +++ b/source/source_estate/module_pot/pot_local.cpp @@ -11,37 +11,38 @@ namespace elecstate //========================================================== // This routine computes the local potential in real space //========================================================== -void PotLocal::cal_fixed_v(double *vl_pseudo) // store the local pseudopotential +void + PotLocal::cal_fixed_v (double* vl_pseudo) // store the local pseudopotential { - ModuleBase::TITLE("PotLocal", "cal_fixed_v"); - ModuleBase::timer::start("PotLocal", "cal_fixed_v"); + ModuleBase::TITLE ("PotLocal", "cal_fixed_v"); + ModuleBase::timer::start ("PotLocal", "cal_fixed_v"); - std::complex *vg = new std::complex[this->rho_basis_->npw]; + std::complex* vg = new std::complex[this->rho_basis_->npw]; - ModuleBase::GlobalFunc::ZEROS(vg, this->rho_basis_->npw); + ModuleBase::GlobalFunc::ZEROS (vg, this->rho_basis_->npw); for (int it = 0; it < this->ntype_; it++) - { - for (int ig = 0; ig < this->rho_basis_->npw; ig++) { - vg[ig] += this->vloc_[0](it, this->rho_basis_->ig2igg[ig]) * this->sf_[0](it, ig); + for (int ig = 0; ig < this->rho_basis_->npw; ig++) + { + vg[ig] += this->vloc_[0](it, this->rho_basis_->ig2igg[ig]) * this->sf_[0](it, ig); + } } - } /// save the V_local at G=0 - if(this->rho_basis_->npw > 0) - { - *vl_of_0_ = vg[0].real(); - } + if (this->rho_basis_->npw > 0) + { + *vl_of_0_ = vg[0].real (); + } // recip2real should be a const function, but now it isn't // a dangerous usage appears here, which should be fix in the future. - const_cast(this->rho_basis_)->recip2real(vg, vl_pseudo); + const_cast (this->rho_basis_)->recip2real (vg, vl_pseudo); delete[] vg; // GlobalV::ofs_running <<" set local pseudopotential done." << std::endl; - ModuleBase::timer::end("PotLocal", "cal_fixed_v"); + ModuleBase::timer::end ("PotLocal", "cal_fixed_v"); return; } diff --git a/source/source_estate/module_pot/pot_local.h b/source/source_estate/module_pot/pot_local.h index 923e962a112..f26e08a15d0 100644 --- a/source/source_estate/module_pot/pot_local.h +++ b/source/source_estate/module_pot/pot_local.h @@ -10,29 +10,28 @@ namespace elecstate class PotLocal : public PotBase { public: - PotLocal(const ModuleBase::matrix* vloc_in, // local pseduopotentials - const ModuleBase::ComplexMatrix* sf_in, - const ModulePW::PW_Basis* rho_basis_in, - double& vl_of_0) - : vloc_(vloc_in), sf_(sf_in), vl_of_0_(&vl_of_0) + PotLocal (const ModuleBase::matrix* vloc_in, // local pseduopotentials + const ModuleBase::ComplexMatrix* sf_in, + const ModulePW::PW_Basis* rho_basis_in, + double& vl_of_0) + : vloc_ (vloc_in), sf_ (sf_in), vl_of_0_ (&vl_of_0) { - assert(this->vloc_->nr == this->sf_->nr); + assert (this->vloc_->nr == this->sf_->nr); this->rho_basis_ = rho_basis_in; this->ntype_ = this->vloc_->nr; this->fixed_mode = true; this->dynamic_mode = false; } - void cal_fixed_v(double* vl_pseudo) override; - - private: + void cal_fixed_v (double* vl_pseudo) override; + private: /// @brief save the value of vloc at G=0; this is a static member because there is only one vl(0) for all instances double* vl_of_0_ = nullptr; // std::vector vltot; - const ModuleBase::matrix* vloc_ = nullptr; // local pseduopotentials + const ModuleBase::matrix* vloc_ = nullptr; // local pseduopotentials const ModuleBase::ComplexMatrix* sf_ = nullptr; // structure factors int ntype_ = 0; }; diff --git a/source/source_estate/module_pot/pot_ml_exx.cpp b/source/source_estate/module_pot/pot_ml_exx.cpp index 53393b1e33f..6ed1cab8142 100644 --- a/source/source_estate/module_pot/pot_ml_exx.cpp +++ b/source/source_estate/module_pot/pot_ml_exx.cpp @@ -9,131 +9,152 @@ namespace elecstate { -ML_EXX::ML_EXX() +ML_EXX::ML_EXX () { - this->energy_prefactor = - 3. / 4. * std::pow(3. / M_PI, 1./3.) * 2; + this->energy_prefactor = -3. / 4. * std::pow (3. / M_PI, 1. / 3.) * 2; this->energy_exponent = 4. / 3.; } -ML_EXX::~ML_EXX(){} +ML_EXX::~ML_EXX () {} -void ML_EXX::set_para(const Input_para& inp, const UnitCell* ucell_in, const ModulePW::PW_Basis* rho_basis_in) +void + ML_EXX::set_para (const Input_para& inp, const UnitCell* ucell_in, const ModulePW::PW_Basis* rho_basis_in) { - torch::set_default_dtype(caffe2::TypeMeta::fromScalarType(torch::kDouble)); - auto output = torch::get_default_dtype(); + torch::set_default_dtype (caffe2::TypeMeta::fromScalarType (torch::kDouble)); + auto output = torch::get_default_dtype (); std::cout << "Default type: " << output << std::endl; - this->set_device(inp.of_ml_device); + this->set_device (inp.of_ml_device); this->nx = rho_basis_in->nrxx; this->nx_tot = rho_basis_in->nrxx; this->dV = ucell_in->omega / rho_basis_in->nxyz; this->nkernel = inp.of_ml_nkernel; - this->init_data( - this->nkernel, - inp.of_ml_gamma, - inp.of_ml_p, - inp.of_ml_q, - inp.of_ml_tanhp, - inp.of_ml_tanhq, - inp.of_ml_gammanl, - inp.of_ml_pnl, - inp.of_ml_qnl, - inp.of_ml_xi, - inp.of_ml_tanhxi, - inp.of_ml_tanhxi_nl, - inp.of_ml_tanh_pnl, - inp.of_ml_tanh_qnl, - inp.of_ml_tanhp_nl, - inp.of_ml_tanhq_nl); + this->init_data (this->nkernel, + inp.of_ml_gamma, + inp.of_ml_p, + inp.of_ml_q, + inp.of_ml_tanhp, + inp.of_ml_tanhq, + inp.of_ml_gammanl, + inp.of_ml_pnl, + inp.of_ml_qnl, + inp.of_ml_xi, + inp.of_ml_tanhxi, + inp.of_ml_tanhxi_nl, + inp.of_ml_tanh_pnl, + inp.of_ml_tanh_qnl, + inp.of_ml_tanhp_nl, + inp.of_ml_tanhq_nl); std::cout << "ninput = " << this->ninput << std::endl; if (PARAM.inp.ml_exx) - { - int nnode = 100; - int nlayer = 3; - this->nn = std::make_shared(this->nx, 0, this->ninput, nnode, nlayer, this->device); - torch::load(this->nn, "net.pt", this->device_type); - std::cout << "load net done" << std::endl; - if (PARAM.inp.of_ml_feg != 0) { - torch::Tensor feg_inpt = torch::zeros(this->ninput, this->device_type); - for (int i = 0; i < this->ninput; ++i) - { - if (this->descriptor_type[i] == "gamma") feg_inpt[i] = 1.; - } - - if (PARAM.inp.of_ml_feg == 1) - { - this->feg_net_F = torch::softplus(this->nn->forward(feg_inpt)).to(this->device_CPU).contiguous().data_ptr()[0]; - } - else - { - this->feg_net_F = this->nn->forward(feg_inpt).to(this->device_CPU).contiguous().data_ptr()[0]; - } - - std::cout << "feg_net_F = " << this->feg_net_F << std::endl; + int nnode = 100; + int nlayer = 3; + this->nn = std::make_shared (this->nx, 0, this->ninput, nnode, nlayer, this->device); + torch::load (this->nn, "net.pt", this->device_type); + std::cout << "load net done" << std::endl; + if (PARAM.inp.of_ml_feg != 0) + { + torch::Tensor feg_inpt = torch::zeros (this->ninput, this->device_type); + for (int i = 0; i < this->ninput; ++i) + { + if (this->descriptor_type[i] == "gamma") + feg_inpt[i] = 1.; + } + + if (PARAM.inp.of_ml_feg == 1) + { + this->feg_net_F = torch::softplus (this->nn->forward (feg_inpt)) + .to (this->device_CPU) + .contiguous () + .data_ptr ()[0]; + } + else + { + this->feg_net_F = this->nn->forward (feg_inpt) + .to (this->device_CPU) + .contiguous () + .data_ptr ()[0]; + } + + std::cout << "feg_net_F = " << this->feg_net_F << std::endl; + } } - } - + if (PARAM.inp.ml_exx || PARAM.inp.of_ml_gene_data == 1) - { - this->cal_tool = new ModuleIO::Cal_MLKEDF_Descriptors; - - this->chi_p = inp.of_ml_chi_p; - this->chi_q = inp.of_ml_chi_q; - this->chi_xi = inp.of_ml_chi_xi; - this->chi_pnl = inp.of_ml_chi_pnl; - this->chi_qnl = inp.of_ml_chi_qnl; - - this->cal_tool->set_para(this->nx, inp.nelec, inp.of_tf_weight, inp.of_vw_weight, this->chi_p, this->chi_q, - this->chi_xi, this->chi_pnl, this->chi_qnl, this->nkernel, inp.of_ml_kernel, inp.of_ml_kernel_scaling, inp.of_ml_yukawa_alpha, inp.of_ml_kernel_file, this->dV * rho_basis_in->nxyz, rho_basis_in); - } + { + this->cal_tool = new ModuleIO::Cal_MLKEDF_Descriptors; + + this->chi_p = inp.of_ml_chi_p; + this->chi_q = inp.of_ml_chi_q; + this->chi_xi = inp.of_ml_chi_xi; + this->chi_pnl = inp.of_ml_chi_pnl; + this->chi_qnl = inp.of_ml_chi_qnl; + + this->cal_tool->set_para (this->nx, + inp.nelec, + inp.of_tf_weight, + inp.of_vw_weight, + this->chi_p, + this->chi_q, + this->chi_xi, + this->chi_pnl, + this->chi_qnl, + this->nkernel, + inp.of_ml_kernel, + inp.of_ml_kernel_scaling, + inp.of_ml_yukawa_alpha, + inp.of_ml_kernel_file, + this->dV * rho_basis_in->nxyz, + rho_basis_in); + } } - /** * @brief Get the potential of ML KEDF, and add it into rpotential - * + * * @param prho charge density * @param pw_rho PW_Basis * @param rpotential rpotential => rpotential + V_{ML} */ -void ML_EXX::ml_potential(const double * const * prho, const ModulePW::PW_Basis *pw_rho, ModuleBase::matrix &rpotential) +void + ML_EXX::ml_potential (const double* const* prho, const ModulePW::PW_Basis* pw_rho, ModuleBase::matrix& rpotential) { double* rho_data = new double[this->nx]; const double** prho_mod = new const double*[1]; prho_mod[0] = rho_data; for (int ir = 0; ir < this->nx; ++ir) - { - rho_data[ir] = std::abs(prho[0][ir]); - } + { + rho_data[ir] = std::abs (prho[0][ir]); + } + + this->updateInput (prho_mod, pw_rho); - this->updateInput(prho_mod, pw_rho); + this->NN_forward (prho_mod, pw_rho, true); - this->NN_forward(prho_mod, pw_rho, true); - - torch::Tensor enhancement_cpu_tensor = this->nn->F.to(this->device_CPU).contiguous(); - this->enhancement_cpu_ptr = enhancement_cpu_tensor.data_ptr(); - torch::Tensor gradient_cpu_tensor = this->nn->inputs.grad().to(this->device_CPU).contiguous(); - this->gradient_cpu_ptr = gradient_cpu_tensor.data_ptr(); + torch::Tensor enhancement_cpu_tensor = this->nn->F.to (this->device_CPU).contiguous (); + this->enhancement_cpu_ptr = enhancement_cpu_tensor.data_ptr (); + torch::Tensor gradient_cpu_tensor = this->nn->inputs.grad ().to (this->device_CPU).contiguous (); + this->gradient_cpu_ptr = gradient_cpu_tensor.data_ptr (); - this->get_potential_(prho_mod, pw_rho, rpotential); + this->get_potential_ (prho_mod, pw_rho, rpotential); // get energy - ModuleBase::timer::start("ML_EXX", "Pauli Energy"); + ModuleBase::timer::start ("ML_EXX", "Pauli Energy"); double energy = 0.; for (int ir = 0; ir < this->nx; ++ir) - { - energy += this->enhancement_cpu_ptr[ir] * std::pow(prho_mod[0][ir], this->energy_exponent); - } + { + energy += this->enhancement_cpu_ptr[ir] * std::pow (prho_mod[0][ir], this->energy_exponent); + } energy *= this->dV * this->energy_prefactor; this->ml_exx_energy = energy; - Parallel_Reduce::reduce_pool(this->ml_exx_energy); - ModuleBase::timer::end("ML_EXX", "Pauli Energy"); + Parallel_Reduce::reduce_pool (this->ml_exx_energy); + ModuleBase::timer::end ("ML_EXX", "Pauli Energy"); delete[] rho_data; delete[] prho_mod; @@ -141,79 +162,83 @@ void ML_EXX::ml_potential(const double * const * prho, const ModulePW::PW_Basis /** * @brief Generate training data for ML KEDF - * + * * @param prho charge density * @param wt KEDF_WT * @param tf KEDF_TF * @param pw_rho PW_Basis * @param veff effective potential */ -void ML_EXX::generateTrainData(const double * const *prho, const ModulePW::PW_Basis *pw_rho, const double *veff) +void + ML_EXX::generateTrainData (const double* const* prho, const ModulePW::PW_Basis* pw_rho, const double* veff) { if (PARAM.inp.of_kinetic == "ml") - { - this->updateInput(prho, pw_rho); + { + this->updateInput (prho, pw_rho); + + this->NN_forward (prho, pw_rho, true); - this->NN_forward(prho, pw_rho, true); - - torch::Tensor enhancement_cpu_tensor = this->nn->F.to(this->device_CPU).contiguous(); - this->enhancement_cpu_ptr = enhancement_cpu_tensor.data_ptr(); - torch::Tensor gradient_cpu_tensor = this->nn->inputs.grad().to(this->device_CPU).contiguous(); - this->gradient_cpu_ptr = gradient_cpu_tensor.data_ptr(); + torch::Tensor enhancement_cpu_tensor = this->nn->F.to (this->device_CPU).contiguous (); + this->enhancement_cpu_ptr = enhancement_cpu_tensor.data_ptr (); + torch::Tensor gradient_cpu_tensor = this->nn->inputs.grad ().to (this->device_CPU).contiguous (); + this->gradient_cpu_ptr = gradient_cpu_tensor.data_ptr (); - torch::Tensor enhancement = this->nn->F.reshape({this->nx}); - ModuleBase::matrix potential(1, this->nx); + torch::Tensor enhancement = this->nn->F.reshape ({this->nx}); + ModuleBase::matrix potential (1, this->nx); - this->get_potential_(prho, pw_rho, potential); + this->get_potential_ (prho, pw_rho, potential); - this->dumpTensor("enhancement.npy", enhancement); - this->dumpMatrix("potential.npy", potential); - } + this->dumpTensor ("enhancement.npy", enhancement); + this->dumpMatrix ("potential.npy", potential); + } } /** * @brief For test - * + * * @param prho charge density * @param pw_rho PW_Basis */ -void ML_EXX::localTest(const double * const *pprho, const ModulePW::PW_Basis *pw_rho) +void + ML_EXX::localTest (const double* const* pprho, const ModulePW::PW_Basis* pw_rho) { // for test ===================== - std::vector cshape = {(long unsigned) this->nx}; + std::vector cshape = {(long unsigned)this->nx}; bool fortran_order = false; - std::vector temp_prho(this->nx); - this->loadVector("path_to_rho_file", temp_prho); - - double ** prho = new double *[1]; + std::vector temp_prho (this->nx); + this->loadVector ("path_to_rho_file", temp_prho); + + double** prho = new double*[1]; prho[0] = new double[this->nx]; - for (int ir = 0; ir < this->nx; ++ir) prho[0][ir] = temp_prho[ir]; - for (int ir = 0; ir < this->nx; ++ir) - { - if (prho[0][ir] == 0.){ - std::cout << "WARNING: rho = 0" << std::endl; - } - }; + for (int ir = 0; ir < this->nx; ++ir) + prho[0][ir] = temp_prho[ir]; + for (int ir = 0; ir < this->nx; ++ir) + { + if (prho[0][ir] == 0.) + { + std::cout << "WARNING: rho = 0" << std::endl; + } + }; // ============================== - this->updateInput(prho, pw_rho); + this->updateInput (prho, pw_rho); + + this->NN_forward (prho, pw_rho, true); - this->NN_forward(prho, pw_rho, true); - - torch::Tensor enhancement_cpu_tensor = this->nn->F.to(this->device_CPU).contiguous(); - this->enhancement_cpu_ptr = enhancement_cpu_tensor.data_ptr(); - torch::Tensor gradient_cpu_tensor = this->nn->inputs.grad().to(this->device_CPU).contiguous(); - this->gradient_cpu_ptr = gradient_cpu_tensor.data_ptr(); + torch::Tensor enhancement_cpu_tensor = this->nn->F.to (this->device_CPU).contiguous (); + this->enhancement_cpu_ptr = enhancement_cpu_tensor.data_ptr (); + torch::Tensor gradient_cpu_tensor = this->nn->inputs.grad ().to (this->device_CPU).contiguous (); + this->gradient_cpu_ptr = gradient_cpu_tensor.data_ptr (); - torch::Tensor enhancement = this->nn->F.reshape({this->nx}); - ModuleBase::matrix potential(1, this->nx); + torch::Tensor enhancement = this->nn->F.reshape ({this->nx}); + ModuleBase::matrix potential (1, this->nx); - this->get_potential_(prho, pw_rho, potential); + this->get_potential_ (prho, pw_rho, potential); - this->dumpTensor("enhancement-abacus.npy", enhancement); - this->dumpMatrix("potential-abacus.npy", potential); - exit(0); + this->dumpTensor ("enhancement-abacus.npy", enhancement); + this->dumpMatrix ("potential-abacus.npy", potential); + exit (0); } -} // namespace elecstate +} // namespace elecstate #endif diff --git a/source/source_estate/module_pot/pot_ml_exx.h b/source/source_estate/module_pot/pot_ml_exx.h index 5936add9050..2a4eeb6eb63 100644 --- a/source/source_estate/module_pot/pot_ml_exx.h +++ b/source/source_estate/module_pot/pot_ml_exx.h @@ -12,67 +12,69 @@ namespace elecstate class ML_EXX : public ML_Base { -public: - ML_EXX(); - virtual ~ML_EXX(); + public: + ML_EXX (); + virtual ~ML_EXX (); - void set_para(const Input_para& inp, const UnitCell* ucell_in, const ModulePW::PW_Basis* rho_basis_in); + void set_para (const Input_para& inp, const UnitCell* ucell_in, const ModulePW::PW_Basis* rho_basis_in); - void ml_potential(const double * const * prho, const ModulePW::PW_Basis *pw_rho, ModuleBase::matrix &rpotential); + void ml_potential (const double* const* prho, const ModulePW::PW_Basis* pw_rho, ModuleBase::matrix& rpotential); // output all parameters - void generateTrainData(const double * const *prho, const ModulePW::PW_Basis *pw_rho, const double *veff); - void localTest(const double * const *prho, const ModulePW::PW_Basis *pw_rho); - - void init_data( - const int &nkernel, - const bool &of_ml_gamma, - const bool &of_ml_p, - const bool &of_ml_q, - const bool &of_ml_tanhp, - const bool &of_ml_tanhq, - const std::vector &of_ml_gammanl_, - const std::vector &of_ml_pnl, - const std::vector &of_ml_qnl, - const std::vector &of_ml_xi, - const std::vector &of_ml_tanhxi, - const std::vector &of_ml_tanhxi_nl, - const std::vector &of_ml_tanh_pnl, - const std::vector &of_ml_tanh_qnl, - const std::vector &of_ml_tanhp_nl, - const std::vector &of_ml_tanhq_nl - ); + void generateTrainData (const double* const* prho, const ModulePW::PW_Basis* pw_rho, const double* veff); + void localTest (const double* const* prho, const ModulePW::PW_Basis* pw_rho); + + void init_data (const int& nkernel, + const bool& of_ml_gamma, + const bool& of_ml_p, + const bool& of_ml_q, + const bool& of_ml_tanhp, + const bool& of_ml_tanhq, + const std::vector& of_ml_gammanl_, + const std::vector& of_ml_pnl, + const std::vector& of_ml_qnl, + const std::vector& of_ml_xi, + const std::vector& of_ml_tanhxi, + const std::vector& of_ml_tanhxi_nl, + const std::vector& of_ml_tanh_pnl, + const std::vector& of_ml_tanh_qnl, + const std::vector& of_ml_tanhp_nl, + const std::vector& of_ml_tanhq_nl); double ml_exx_energy = 0.0; }; - class PotML_EXX : public PotBase { public: - PotML_EXX(const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in) + PotML_EXX (const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in) { this->rho_basis_ = rho_basis_in; this->dynamic_mode = true; this->fixed_mode = false; - this->ml_exx.set_para(PARAM.inp, ucell_in, rho_basis_in); + this->ml_exx.set_para (PARAM.inp, ucell_in, rho_basis_in); } - ~PotML_EXX() {}; + ~PotML_EXX () {}; - void cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff) override + void + cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) override { - if (PARAM.inp.of_ml_local_test) this->ml_exx.localTest(chg->rho, this->rho_basis_); - this->ml_exx.ml_potential(chg->rho, this->rho_basis_, v_eff); + if (PARAM.inp.of_ml_local_test) + this->ml_exx.localTest (chg->rho, this->rho_basis_); + this->ml_exx.ml_potential (chg->rho, this->rho_basis_, v_eff); } - double get_energy() const override { return this->ml_exx.ml_exx_energy; } + double + get_energy () const override + { + return this->ml_exx.ml_exx_energy; + } -private: - ML_EXX ml_exx; + private: + ML_EXX ml_exx; }; - -} +} // namespace elecstate #endif #endif diff --git a/source/source_estate/module_pot/pot_ml_exx_label.cpp b/source/source_estate/module_pot/pot_ml_exx_label.cpp index 3908b7c5ef9..b838eaddc96 100644 --- a/source/source_estate/module_pot/pot_ml_exx_label.cpp +++ b/source/source_estate/module_pot/pot_ml_exx_label.cpp @@ -6,7 +6,7 @@ namespace elecstate { /** * @brief Initialize the data for ML KEDF, and generate the mapping between descriptor and kernel - * + * * @param nkernel number of kernels * @param of_ml_gamma whether to use gamma descriptor * @param of_ml_p whether to use p descriptor @@ -24,113 +24,127 @@ namespace elecstate * @param of_ml_tanhp_nl whether to use tanhp_nl descriptor * @param of_ml_tanhq_nl whether to use tanhq_nl descriptor */ -void ML_EXX::init_data( - const int &nkernel, - const bool &of_ml_gamma, - const bool &of_ml_p, - const bool &of_ml_q, - const bool &of_ml_tanhp, - const bool &of_ml_tanhq, - const std::vector &of_ml_gammanl, - const std::vector &of_ml_pnl, - const std::vector &of_ml_qnl, - const std::vector &of_ml_xi, - const std::vector &of_ml_tanhxi, - const std::vector &of_ml_tanhxi_nl, - const std::vector &of_ml_tanh_pnl, - const std::vector &of_ml_tanh_qnl, - const std::vector &of_ml_tanhp_nl, - const std::vector &of_ml_tanhq_nl -) +void + ML_EXX::init_data (const int& nkernel, + const bool& of_ml_gamma, + const bool& of_ml_p, + const bool& of_ml_q, + const bool& of_ml_tanhp, + const bool& of_ml_tanhq, + const std::vector& of_ml_gammanl, + const std::vector& of_ml_pnl, + const std::vector& of_ml_qnl, + const std::vector& of_ml_xi, + const std::vector& of_ml_tanhxi, + const std::vector& of_ml_tanhxi_nl, + const std::vector& of_ml_tanh_pnl, + const std::vector& of_ml_tanh_qnl, + const std::vector& of_ml_tanhp_nl, + const std::vector& of_ml_tanhq_nl) { this->ninput = 0; // --------- semi-local descriptors --------- - if (of_ml_gamma){ - this->descriptor_type.push_back("gamma"); - this->kernel_index.push_back(-1); - this->ninput++; - } - if (of_ml_p){ - this->descriptor_type.push_back("p"); - this->kernel_index.push_back(-1); - this->ninput++; - } - if (of_ml_q){ - this->descriptor_type.push_back("q"); - this->kernel_index.push_back(-1); - this->ninput++; - } - // --------- non-local descriptors --------- - for (int ik = 0; ik < nkernel; ++ik) - { - if (of_ml_gammanl[ik]){ - this->descriptor_type.push_back("gammanl"); - this->kernel_index.push_back(ik); - this->ninput++; - } - if (of_ml_pnl[ik]){ - this->descriptor_type.push_back("pnl"); - this->kernel_index.push_back(ik); - this->ninput++; - } - if (of_ml_qnl[ik]){ - this->descriptor_type.push_back("qnl"); - this->kernel_index.push_back(ik); - this->ninput++; - } - if (of_ml_xi[ik]){ - this->descriptor_type.push_back("xi"); - this->kernel_index.push_back(ik); + if (of_ml_gamma) + { + this->descriptor_type.push_back ("gamma"); + this->kernel_index.push_back (-1); this->ninput++; } - if (of_ml_tanhxi[ik]){ - this->descriptor_type.push_back("tanhxi"); - this->kernel_index.push_back(ik); + if (of_ml_p) + { + this->descriptor_type.push_back ("p"); + this->kernel_index.push_back (-1); this->ninput++; } - if (of_ml_tanhxi_nl[ik]){ - this->descriptor_type.push_back("tanhxi_nl"); - this->kernel_index.push_back(ik); + if (of_ml_q) + { + this->descriptor_type.push_back ("q"); + this->kernel_index.push_back (-1); this->ninput++; } - } - // --------- semi-local descriptors --------- - if (of_ml_tanhp){ - this->descriptor_type.push_back("tanhp"); - this->kernel_index.push_back(-1); - this->ninput++; - } - if (of_ml_tanhq){ - this->descriptor_type.push_back("tanhq"); - this->kernel_index.push_back(-1); - this->ninput++; - } // --------- non-local descriptors --------- for (int ik = 0; ik < nkernel; ++ik) - { - if (of_ml_tanh_pnl[ik]){ - this->descriptor_type.push_back("tanh_pnl"); - this->kernel_index.push_back(ik); - this->ninput++; + { + if (of_ml_gammanl[ik]) + { + this->descriptor_type.push_back ("gammanl"); + this->kernel_index.push_back (ik); + this->ninput++; + } + if (of_ml_pnl[ik]) + { + this->descriptor_type.push_back ("pnl"); + this->kernel_index.push_back (ik); + this->ninput++; + } + if (of_ml_qnl[ik]) + { + this->descriptor_type.push_back ("qnl"); + this->kernel_index.push_back (ik); + this->ninput++; + } + if (of_ml_xi[ik]) + { + this->descriptor_type.push_back ("xi"); + this->kernel_index.push_back (ik); + this->ninput++; + } + if (of_ml_tanhxi[ik]) + { + this->descriptor_type.push_back ("tanhxi"); + this->kernel_index.push_back (ik); + this->ninput++; + } + if (of_ml_tanhxi_nl[ik]) + { + this->descriptor_type.push_back ("tanhxi_nl"); + this->kernel_index.push_back (ik); + this->ninput++; + } } - if (of_ml_tanh_qnl[ik]){ - this->descriptor_type.push_back("tanh_qnl"); - this->kernel_index.push_back(ik); + // --------- semi-local descriptors --------- + if (of_ml_tanhp) + { + this->descriptor_type.push_back ("tanhp"); + this->kernel_index.push_back (-1); this->ninput++; } - if (of_ml_tanhp_nl[ik]){ - this->descriptor_type.push_back("tanhp_nl"); - this->kernel_index.push_back(ik); + if (of_ml_tanhq) + { + this->descriptor_type.push_back ("tanhq"); + this->kernel_index.push_back (-1); this->ninput++; } - if (of_ml_tanhq_nl[ik]){ - this->descriptor_type.push_back("tanhq_nl"); - this->kernel_index.push_back(ik); - this->ninput++; + // --------- non-local descriptors --------- + for (int ik = 0; ik < nkernel; ++ik) + { + if (of_ml_tanh_pnl[ik]) + { + this->descriptor_type.push_back ("tanh_pnl"); + this->kernel_index.push_back (ik); + this->ninput++; + } + if (of_ml_tanh_qnl[ik]) + { + this->descriptor_type.push_back ("tanh_qnl"); + this->kernel_index.push_back (ik); + this->ninput++; + } + if (of_ml_tanhp_nl[ik]) + { + this->descriptor_type.push_back ("tanhp_nl"); + this->kernel_index.push_back (ik); + this->ninput++; + } + if (of_ml_tanhq_nl[ik]) + { + this->descriptor_type.push_back ("tanhq_nl"); + this->kernel_index.push_back (ik); + this->ninput++; + } } - } this->descriptor2kernel = {{"gamma", {}}, {"p", {}}, @@ -150,28 +164,28 @@ void ML_EXX::init_data( this->descriptor2index = this->descriptor2kernel; for (int i = 0; i < this->ninput; ++i) - { - this->descriptor2kernel[this->descriptor_type[i]].push_back(this->kernel_index[i]); - this->descriptor2index[this->descriptor_type[i]].push_back(i); - } + { + this->descriptor2kernel[this->descriptor_type[i]].push_back (this->kernel_index[i]); + this->descriptor2index[this->descriptor_type[i]].push_back (i); + } // std::cout << "descriptor2index " << this->descriptor2index << std::endl; // std::cout << "descriptor2kernel " << this->descriptor2kernel << std::endl; - this->ml_gamma = this->descriptor2index["gamma"].size() > 0; - this->ml_p = this->descriptor2index["p"].size() > 0; - this->ml_q = this->descriptor2index["q"].size() > 0; - this->ml_tanhp = this->descriptor2index["tanhp"].size() > 0; - this->ml_tanhq = this->descriptor2index["tanhq"].size() > 0; - this->ml_gammanl = this->descriptor2index["gammanl"].size() > 0; - this->ml_pnl = this->descriptor2index["pnl"].size() > 0; - this->ml_qnl = this->descriptor2index["qnl"].size() > 0; - this->ml_xi = this->descriptor2index["xi"].size() > 0; - this->ml_tanhxi = this->descriptor2index["tanhxi"].size() > 0; - this->ml_tanhxi_nl = this->descriptor2index["tanhxi_nl"].size() > 0; - this->ml_tanh_pnl = this->descriptor2index["tanh_pnl"].size() > 0; - this->ml_tanh_qnl = this->descriptor2index["tanh_qnl"].size() > 0; - this->ml_tanhp_nl = this->descriptor2index["tanhp_nl"].size() > 0; - this->ml_tanhq_nl = this->descriptor2index["tanhq_nl"].size() > 0; + this->ml_gamma = this->descriptor2index["gamma"].size () > 0; + this->ml_p = this->descriptor2index["p"].size () > 0; + this->ml_q = this->descriptor2index["q"].size () > 0; + this->ml_tanhp = this->descriptor2index["tanhp"].size () > 0; + this->ml_tanhq = this->descriptor2index["tanhq"].size () > 0; + this->ml_gammanl = this->descriptor2index["gammanl"].size () > 0; + this->ml_pnl = this->descriptor2index["pnl"].size () > 0; + this->ml_qnl = this->descriptor2index["qnl"].size () > 0; + this->ml_xi = this->descriptor2index["xi"].size () > 0; + this->ml_tanhxi = this->descriptor2index["tanhxi"].size () > 0; + this->ml_tanhxi_nl = this->descriptor2index["tanhxi_nl"].size () > 0; + this->ml_tanh_pnl = this->descriptor2index["tanh_pnl"].size () > 0; + this->ml_tanh_qnl = this->descriptor2index["tanh_qnl"].size () > 0; + this->ml_tanhp_nl = this->descriptor2index["tanhp_nl"].size () > 0; + this->ml_tanhq_nl = this->descriptor2index["tanhq_nl"].size () > 0; bool gene_gammanl_tot = false; bool gene_pnl_tot = false; @@ -182,124 +196,138 @@ void ML_EXX::init_data( bool gene_tanhq_nl_tot = false; this->gene_data_label = {{"gamma", {}}, - {"p", {}}, - {"q", {}}, - {"tanhp", {}}, - {"tanhq", {}}, - {"gammanl", {}}, - {"pnl", {}}, - {"qnl", {}}, - {"xi", {}}, - {"tanhxi", {}}, - {"tanhxi_nl", {}}, - {"tanh_pnl", {}}, - {"tanh_qnl", {}}, - {"tanhp_nl", {}}, - {"tanhq_nl", {}}}; + {"p", {}}, + {"q", {}}, + {"tanhp", {}}, + {"tanhq", {}}, + {"gammanl", {}}, + {"pnl", {}}, + {"qnl", {}}, + {"xi", {}}, + {"tanhxi", {}}, + {"tanhxi_nl", {}}, + {"tanh_pnl", {}}, + {"tanh_qnl", {}}, + {"tanhp_nl", {}}, + {"tanhq_nl", {}}}; - for (std::string descriptor : {"gamma", "p", "q", "tanhp", "tanhq"}) - { - this->gene_data_label[descriptor].push_back(0); - } - for (std::string descriptor : {"gammanl", "pnl", "qnl", "xi", "tanhxi", "tanhxi_nl", - "tanh_pnl", "tanh_qnl", "tanhp_nl", "tanhq_nl"}) - { - for (int ik = 0; ik < nkernel; ++ik) + for (std::string descriptor: {"gamma", "p", "q", "tanhp", "tanhq"}) { - this->gene_data_label[descriptor].push_back(0); + this->gene_data_label[descriptor].push_back (0); + } + for (std::string descriptor: + {"gammanl", "pnl", "qnl", "xi", "tanhxi", "tanhxi_nl", "tanh_pnl", "tanh_qnl", "tanhp_nl", "tanhq_nl"}) + { + for (int ik = 0; ik < nkernel; ++ik) + { + this->gene_data_label[descriptor].push_back (0); + } } - } for (int ik = 0; ik < nkernel; ++ik) - { - this->gene_data_label["pnl"][ik] = of_ml_pnl[ik] || of_ml_tanh_pnl[ik]; - this->gene_data_label["qnl"][ik] = of_ml_qnl[ik] || of_ml_tanh_qnl[ik]; - this->gene_data_label["tanhxi_nl"][ik] = of_ml_tanhxi_nl[ik]; - this->gene_data_label["tanhxi"][ik] = of_ml_tanhxi[ik] || of_ml_tanhxi_nl[ik]; - this->gene_data_label["xi"][ik] = of_ml_xi[ik] || this->gene_data_label["tanhxi"][ik]; - this->gene_data_label["gammanl"][ik] = of_ml_gammanl[ik] || this->gene_data_label["xi"][ik]; - this->gene_data_label["tanh_pnl"][ik] = of_ml_tanh_pnl[ik]; - this->gene_data_label["tanh_qnl"][ik] = of_ml_tanh_qnl[ik]; - this->gene_data_label["tanhp_nl"][ik] = of_ml_tanhp_nl[ik]; - this->gene_data_label["tanhq_nl"][ik] = of_ml_tanhq_nl[ik]; - // this->gene_data_label["pnl"][ik] = of_ml_pnl[ik] || of_ml_tanh_pnl[ik]; + { + this->gene_data_label["pnl"][ik] = of_ml_pnl[ik] || of_ml_tanh_pnl[ik]; + this->gene_data_label["qnl"][ik] = of_ml_qnl[ik] || of_ml_tanh_qnl[ik]; + this->gene_data_label["tanhxi_nl"][ik] = of_ml_tanhxi_nl[ik]; + this->gene_data_label["tanhxi"][ik] = of_ml_tanhxi[ik] || of_ml_tanhxi_nl[ik]; + this->gene_data_label["xi"][ik] = of_ml_xi[ik] || this->gene_data_label["tanhxi"][ik]; + this->gene_data_label["gammanl"][ik] = of_ml_gammanl[ik] || this->gene_data_label["xi"][ik]; + this->gene_data_label["tanh_pnl"][ik] = of_ml_tanh_pnl[ik]; + this->gene_data_label["tanh_qnl"][ik] = of_ml_tanh_qnl[ik]; + this->gene_data_label["tanhp_nl"][ik] = of_ml_tanhp_nl[ik]; + this->gene_data_label["tanhq_nl"][ik] = of_ml_tanhq_nl[ik]; + // this->gene_data_label["pnl"][ik] = of_ml_pnl[ik] || of_ml_tanh_pnl[ik]; - gene_gammanl_tot = gene_gammanl_tot || this->gene_data_label["gammanl"][ik]; - gene_pnl_tot = gene_pnl_tot || this->gene_data_label["pnl"][ik]; - gene_qnl_tot = gene_qnl_tot || this->gene_data_label["qnl"][ik]; - gene_tanh_pnl_tot = gene_tanh_pnl_tot || this->gene_data_label["tanh_pnl"][ik]; - gene_tanh_qnl_tot = gene_tanh_qnl_tot || this->gene_data_label["tanh_qnl"][ik]; - gene_tanhp_nl_tot = gene_tanhp_nl_tot || this->gene_data_label["tanhp_nl"][ik]; - gene_tanhq_nl_tot = gene_tanhq_nl_tot || this->gene_data_label["tanhq_nl"][ik]; - } + gene_gammanl_tot = gene_gammanl_tot || this->gene_data_label["gammanl"][ik]; + gene_pnl_tot = gene_pnl_tot || this->gene_data_label["pnl"][ik]; + gene_qnl_tot = gene_qnl_tot || this->gene_data_label["qnl"][ik]; + gene_tanh_pnl_tot = gene_tanh_pnl_tot || this->gene_data_label["tanh_pnl"][ik]; + gene_tanh_qnl_tot = gene_tanh_qnl_tot || this->gene_data_label["tanh_qnl"][ik]; + gene_tanhp_nl_tot = gene_tanhp_nl_tot || this->gene_data_label["tanhp_nl"][ik]; + gene_tanhq_nl_tot = gene_tanhq_nl_tot || this->gene_data_label["tanhq_nl"][ik]; + } this->gene_data_label["gamma"][0] = of_ml_gamma || gene_gammanl_tot; this->gene_data_label["tanhp"][0] = of_ml_tanhp || gene_tanhp_nl_tot || gene_tanh_pnl_tot; this->gene_data_label["tanhq"][0] = of_ml_tanhq || gene_tanhq_nl_tot || gene_tanh_qnl_tot; this->gene_data_label["p"][0] = of_ml_p || this->gene_data_label["tanhp"][0] || gene_pnl_tot; this->gene_data_label["q"][0] = of_ml_q || this->gene_data_label["tanhq"][0] || gene_qnl_tot; - - if (this->gene_data_label["gamma"][0]){ - this->gamma = std::vector(this->nx, 0.); - } - if (this->gene_data_label["p"][0]){ - this->nablaRho = std::vector >(3, std::vector(this->nx, 0.)); - this->p = std::vector(this->nx, 0.); - } - if (this->gene_data_label["q"][0]){ - this->q = std::vector(this->nx, 0.); - } - if (this->gene_data_label["tanhp"][0]){ - this->tanhp = std::vector(this->nx, 0.); - } - if (this->gene_data_label["tanhq"][0]){ - this->tanhq = std::vector(this->nx, 0.); - } - - for (int ik = 0; ik < nkernel; ++ik) - { - this->gammanl.push_back({}); - this->pnl.push_back({}); - this->qnl.push_back({}); - this->xi.push_back({}); - this->tanhxi.push_back({}); - this->tanhxi_nl.push_back({}); - this->tanh_pnl.push_back({}); - this->tanh_qnl.push_back({}); - this->tanhp_nl.push_back({}); - this->tanhq_nl.push_back({}); - - if (this->gene_data_label["gammanl"][ik]){ - this->gammanl[ik] = std::vector(this->nx, 0.); - } - if (this->gene_data_label["pnl"][ik]){ - this->pnl[ik] = std::vector(this->nx, 0.); - } - if (this->gene_data_label["qnl"][ik]){ - this->qnl[ik] = std::vector(this->nx, 0.); - } - if (this->gene_data_label["xi"][ik]){ - this->xi[ik] = std::vector(this->nx, 0.); - } - if (this->gene_data_label["tanhxi"][ik]){ - this->tanhxi[ik] = std::vector(this->nx, 0.); + if (this->gene_data_label["gamma"][0]) + { + this->gamma = std::vector (this->nx, 0.); } - if (this->gene_data_label["tanhxi_nl"][ik]){ - this->tanhxi_nl[ik] = std::vector(this->nx, 0.); + if (this->gene_data_label["p"][0]) + { + this->nablaRho = std::vector> (3, std::vector (this->nx, 0.)); + this->p = std::vector (this->nx, 0.); } - if (this->gene_data_label["tanh_pnl"][ik]){ - this->tanh_pnl[ik] = std::vector(this->nx, 0.); + if (this->gene_data_label["q"][0]) + { + this->q = std::vector (this->nx, 0.); } - if (this->gene_data_label["tanh_qnl"][ik]){ - this->tanh_qnl[ik] = std::vector(this->nx, 0.); + if (this->gene_data_label["tanhp"][0]) + { + this->tanhp = std::vector (this->nx, 0.); } - if (this->gene_data_label["tanhp_nl"][ik]){ - this->tanhp_nl[ik] = std::vector(this->nx, 0.); + if (this->gene_data_label["tanhq"][0]) + { + this->tanhq = std::vector (this->nx, 0.); } - if (this->gene_data_label["tanhq_nl"][ik]){ - this->tanhq_nl[ik] = std::vector(this->nx, 0.); + + for (int ik = 0; ik < nkernel; ++ik) + { + this->gammanl.push_back ({}); + this->pnl.push_back ({}); + this->qnl.push_back ({}); + this->xi.push_back ({}); + this->tanhxi.push_back ({}); + this->tanhxi_nl.push_back ({}); + this->tanh_pnl.push_back ({}); + this->tanh_qnl.push_back ({}); + this->tanhp_nl.push_back ({}); + this->tanhq_nl.push_back ({}); + + if (this->gene_data_label["gammanl"][ik]) + { + this->gammanl[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["pnl"][ik]) + { + this->pnl[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["qnl"][ik]) + { + this->qnl[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["xi"][ik]) + { + this->xi[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["tanhxi"][ik]) + { + this->tanhxi[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["tanhxi_nl"][ik]) + { + this->tanhxi_nl[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["tanh_pnl"][ik]) + { + this->tanh_pnl[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["tanh_qnl"][ik]) + { + this->tanh_qnl[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["tanhp_nl"][ik]) + { + this->tanhp_nl[ik] = std::vector (this->nx, 0.); + } + if (this->gene_data_label["tanhq_nl"][ik]) + { + this->tanhq_nl[ik] = std::vector (this->nx, 0.); + } } - } -} } +} // namespace elecstate #endif diff --git a/source/source_estate/module_pot/pot_sep.cpp b/source/source_estate/module_pot/pot_sep.cpp index 2d5e0ee0bb0..67de7ea6191 100644 --- a/source/source_estate/module_pot/pot_sep.cpp +++ b/source/source_estate/module_pot/pot_sep.cpp @@ -5,24 +5,25 @@ namespace elecstate { -void PotSep::cal_fixed_v(double* vl_pseudo) +void + PotSep::cal_fixed_v (double* vl_pseudo) { - ModuleBase::TITLE("PotSep", "cal_fixed_v"); - ModuleBase::timer::start("PotSep", "cal_fixed_v"); + ModuleBase::TITLE ("PotSep", "cal_fixed_v"); + ModuleBase::timer::start ("PotSep", "cal_fixed_v"); // GlobalC::vsep_cell.generate_vsep_r(this->rho_basis_[0], this->sf_[0]); // const_cast(this->vsep_)->generate_vsep_r(this->rho_basis_[0], this->sf_[0]); if (vsep_cell != nullptr) - { - for (int ir = 0; ir < this->rho_basis_->nrxx; ++ir) { - vl_pseudo[ir] += vsep_cell->vsep_r[ir]; + for (int ir = 0; ir < this->rho_basis_->nrxx; ++ir) + { + vl_pseudo[ir] += vsep_cell->vsep_r[ir]; + } } - } - ModuleBase::timer::end("PotSep", "cal_fixed_v"); + ModuleBase::timer::end ("PotSep", "cal_fixed_v"); return; } } // namespace elecstate diff --git a/source/source_estate/module_pot/pot_sep.h b/source/source_estate/module_pot/pot_sep.h index cb4368030d5..22235e934eb 100644 --- a/source/source_estate/module_pot/pot_sep.h +++ b/source/source_estate/module_pot/pot_sep.h @@ -23,10 +23,10 @@ class PotSep : public PotBase // this->fixed_mode = true; // this->dynamic_mode = false; // } - PotSep(const ModuleBase::ComplexMatrix* sf_in, const ModulePW::PW_Basis* rho_basis_in, const VSep* vsep_cell_in) - : sf_(sf_in), vsep_cell(vsep_cell_in) + PotSep (const ModuleBase::ComplexMatrix* sf_in, const ModulePW::PW_Basis* rho_basis_in, const VSep* vsep_cell_in) + : sf_ (sf_in), vsep_cell (vsep_cell_in) { - assert(vsep_cell->vsep_form.nr == this->sf_->nr); + assert (vsep_cell->vsep_form.nr == this->sf_->nr); // assert(this->vsep_->vsep_form.nr == this->sf_->nr); this->rho_basis_ = rho_basis_in; // this->ntype_ = this->vsep_->vsep_form.nr; @@ -34,7 +34,7 @@ class PotSep : public PotBase this->dynamic_mode = false; } - void cal_fixed_v(double* vl_pseudo) override; + void cal_fixed_v (double* vl_pseudo) override; const VSep* vsep_cell = nullptr; const ModuleBase::ComplexMatrix* sf_ = nullptr; diff --git a/source/source_estate/module_pot/pot_surchem.hpp b/source/source_estate/module_pot/pot_surchem.hpp index dc06e60fd4f..bc99b03861b 100644 --- a/source/source_estate/module_pot/pot_surchem.hpp +++ b/source/source_estate/module_pot/pot_surchem.hpp @@ -12,42 +12,43 @@ class PotSurChem : public PotBase public: // constructor for exchange-correlation potential // meta-GGA should input matrix of kinetic potential, it is optional - PotSurChem(const ModulePW::PW_Basis* rho_basis_in, - Structure_Factor* structure_factors_in, - const double* vlocal_in, - surchem* surchem_in) - : vlocal(vlocal_in), surchem_(surchem_in) + PotSurChem (const ModulePW::PW_Basis* rho_basis_in, + Structure_Factor* structure_factors_in, + const double* vlocal_in, + surchem* surchem_in) + : vlocal (vlocal_in), surchem_ (surchem_in) { this->rho_basis_ = rho_basis_in; this->structure_factors_ = structure_factors_in; this->dynamic_mode = true; this->fixed_mode = false; } - ~PotSurChem() + ~PotSurChem () { if (this->allocated) - { - this->surchem_->clear(); - } + { + this->surchem_->clear (); + } } // Passing an explicit output matrix makes the lifetime and allocation explicit and avoids hidden allocations. - void cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff) override + void + cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) override { if (!this->allocated) - { - this->surchem_->allocate(this->rho_basis_->nrxx, v_eff.nr); - this->allocated = true; - } - ModuleBase::matrix v_sol_correction(v_eff.nr, this->rho_basis_->nrxx); - this->surchem_->v_correction(*ucell, - *chg->pgrid, - const_cast(this->rho_basis_), - v_eff.nr, - chg->rho, - this->vlocal, - this->structure_factors_, - v_sol_correction); + { + this->surchem_->allocate (this->rho_basis_->nrxx, v_eff.nr); + this->allocated = true; + } + ModuleBase::matrix v_sol_correction (v_eff.nr, this->rho_basis_->nrxx); + this->surchem_->v_correction (*ucell, + *chg->pgrid, + const_cast (this->rho_basis_), + v_eff.nr, + chg->rho, + this->vlocal, + this->structure_factors_, + v_sol_correction); v_eff += v_sol_correction; } diff --git a/source/source_estate/module_pot/pot_xc.cpp b/source/source_estate/module_pot/pot_xc.cpp index e122a9f1ae8..46447f6cbba 100644 --- a/source/source_estate/module_pot/pot_xc.cpp +++ b/source/source_estate/module_pot/pot_xc.cpp @@ -10,38 +10,43 @@ namespace elecstate { -void PotXC::cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff) +void + PotXC::cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) { - ModuleBase::TITLE("PotXC", "cal_veff"); - ModuleBase::timer::start("PotXC", "cal_veff"); + ModuleBase::TITLE ("PotXC", "cal_veff"); + ModuleBase::timer::start ("PotXC", "cal_veff"); const int nrxx_current = chg->nrxx; - + //---------------------------------------------------------- // calculate the exchange-correlation potential //---------------------------------------------------------- - if (XC_Functional::get_ked_flag()) - { + if (XC_Functional::get_ked_flag ()) + { #ifdef USE_LIBXC - const std::tuple etxc_vtxc_v - = XC_Functional_Libxc::v_xc_meta(XC_Functional::get_func_id(), nrxx_current, ucell->omega, ucell->tpiba, chg); - *(this->etxc_) = std::get<0>(etxc_vtxc_v); - *(this->vtxc_) = std::get<1>(etxc_vtxc_v); - v_eff += std::get<2>(etxc_vtxc_v); - *(this->vofk) = std::get<3>(etxc_vtxc_v); + const std::tuple etxc_vtxc_v + = XC_Functional_Libxc::v_xc_meta (XC_Functional::get_func_id (), + nrxx_current, + ucell->omega, + ucell->tpiba, + chg); + *(this->etxc_) = std::get<0> (etxc_vtxc_v); + *(this->vtxc_) = std::get<1> (etxc_vtxc_v); + v_eff += std::get<2> (etxc_vtxc_v); + *(this->vofk) = std::get<3> (etxc_vtxc_v); #else - ModuleBase::WARNING_QUIT("v_of_rho", "to use mGGA, compile with LIBXC"); + ModuleBase::WARNING_QUIT ("v_of_rho", "to use mGGA, compile with LIBXC"); #endif - } + } else - { - const std::tuple etxc_vtxc_v - = XC_Functional::v_xc(nrxx_current, chg, ucell); - *(this->etxc_) = std::get<0>(etxc_vtxc_v); - *(this->vtxc_) = std::get<1>(etxc_vtxc_v); - v_eff += std::get<2>(etxc_vtxc_v); - } - ModuleBase::timer::end("PotXC", "cal_veff"); + { + const std::tuple etxc_vtxc_v + = XC_Functional::v_xc (nrxx_current, chg, ucell); + *(this->etxc_) = std::get<0> (etxc_vtxc_v); + *(this->vtxc_) = std::get<1> (etxc_vtxc_v); + v_eff += std::get<2> (etxc_vtxc_v); + } + ModuleBase::timer::end ("PotXC", "cal_veff"); } } // namespace elecstate diff --git a/source/source_estate/module_pot/pot_xc.h b/source/source_estate/module_pot/pot_xc.h index 47243c633d7..5c0ba667d42 100644 --- a/source/source_estate/module_pot/pot_xc.h +++ b/source/source_estate/module_pot/pot_xc.h @@ -11,18 +11,18 @@ class PotXC : public PotBase public: // constructor for exchange-correlation potential // meta-GGA should input matrix of kinetic potential, it is optional - PotXC(const ModulePW::PW_Basis* rho_basis_in, - double* etxc_in, - double* vtxc_in, - ModuleBase::matrix* vofk_in = nullptr) - : etxc_(etxc_in), vtxc_(vtxc_in), vofk(vofk_in) + PotXC (const ModulePW::PW_Basis* rho_basis_in, + double* etxc_in, + double* vtxc_in, + ModuleBase::matrix* vofk_in = nullptr) + : etxc_ (etxc_in), vtxc_ (vtxc_in), vofk (vofk_in) { this->rho_basis_ = rho_basis_in; this->dynamic_mode = true; this->fixed_mode = false; } - void cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff) override; + void cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) override; ModuleBase::matrix* vofk = nullptr; double* etxc_ = nullptr; diff --git a/source/source_estate/module_pot/pot_xc_fdm.cpp b/source/source_estate/module_pot/pot_xc_fdm.cpp index d0a49cc1012..7d8249c44f7 100644 --- a/source/source_estate/module_pot/pot_xc_fdm.cpp +++ b/source/source_estate/module_pot/pot_xc_fdm.cpp @@ -9,51 +9,47 @@ namespace elecstate { -PotXC_FDM::PotXC_FDM( - const ModulePW::PW_Basis* rho_basis_in, - const Charge*const chg_0_in, - const UnitCell*const ucell) - : chg_0(chg_0_in) +PotXC_FDM::PotXC_FDM (const ModulePW::PW_Basis* rho_basis_in, const Charge* const chg_0_in, const UnitCell* const ucell) + : chg_0 (chg_0_in) { - this->rho_basis_ = rho_basis_in; - this->dynamic_mode = true; - this->fixed_mode = false; + this->rho_basis_ = rho_basis_in; + this->dynamic_mode = true; + this->fixed_mode = false; - const std::tuple etxc_vtxc_v_0 - = XC_Functional::v_xc(this->chg_0->nrxx, this->chg_0, ucell); - this->v_xc_0 = std::get<2>(etxc_vtxc_v_0); + const std::tuple etxc_vtxc_v_0 + = XC_Functional::v_xc (this->chg_0->nrxx, this->chg_0, ucell); + this->v_xc_0 = std::get<2> (etxc_vtxc_v_0); } -void PotXC_FDM::cal_v_eff( - const Charge*const chg_1, - const UnitCell*const ucell, - ModuleBase::matrix& v_eff) +void + PotXC_FDM::cal_v_eff (const Charge* const chg_1, const UnitCell* const ucell, ModuleBase::matrix& v_eff) { - ModuleBase::TITLE("PotXC_FDM", "cal_veff"); - ModuleBase::timer::start("PotXC_FDM", "cal_veff"); + ModuleBase::TITLE ("PotXC_FDM", "cal_veff"); + ModuleBase::timer::start ("PotXC_FDM", "cal_veff"); - assert(this->chg_0->nrxx == chg_1->nrxx); - assert(this->chg_0->nspin == chg_1->nspin); + assert (this->chg_0->nrxx == chg_1->nrxx); + assert (this->chg_0->nspin == chg_1->nspin); - Charge chg_01; - chg_01.set_rhopw(chg_1->rhopw); - chg_01.allocate(chg_1->nspin, chg_01.kin_density()); + Charge chg_01; + chg_01.set_rhopw (chg_1->rhopw); + chg_01.allocate (chg_1->nspin, chg_01.kin_density ()); - for(int ir=0; irrho[is][ir] + chg_1->rho[is][ir]; } - chg_01.rho_core[ir] = chg_0->rho_core[ir] + chg_1->rho_core[ir]; - } + for (int ir = 0; ir < chg_01.nrxx; ++ir) + { + for (int is = 0; is < chg_01.nspin; ++is) + { + chg_01.rho[is][ir] = chg_0->rho[is][ir] + chg_1->rho[is][ir]; + } + chg_01.rho_core[ir] = chg_0->rho_core[ir] + chg_1->rho_core[ir]; + } - const std::tuple etxc_vtxc_v_01 - = XC_Functional::v_xc(chg_01.nrxx, &chg_01, ucell); - const ModuleBase::matrix &v_xc_01 = std::get<2>(etxc_vtxc_v_01); + const std::tuple etxc_vtxc_v_01 + = XC_Functional::v_xc (chg_01.nrxx, &chg_01, ucell); + const ModuleBase::matrix& v_xc_01 = std::get<2> (etxc_vtxc_v_01); - v_eff += v_xc_01 - this->v_xc_0; + v_eff += v_xc_01 - this->v_xc_0; - ModuleBase::timer::end("PotXC_FDM", "cal_veff"); + ModuleBase::timer::end ("PotXC_FDM", "cal_veff"); } } // namespace elecstate - diff --git a/source/source_estate/module_pot/pot_xc_fdm.h b/source/source_estate/module_pot/pot_xc_fdm.h index eb1cd880576..03c6501dfde 100644 --- a/source/source_estate/module_pot/pot_xc_fdm.h +++ b/source/source_estate/module_pot/pot_xc_fdm.h @@ -13,20 +13,13 @@ namespace elecstate class PotXC_FDM : public PotBase { -public: + public: + PotXC_FDM (const ModulePW::PW_Basis* rho_basis_in, const Charge* const chg_0_in, const UnitCell* const ucell); - PotXC_FDM( - const ModulePW::PW_Basis* rho_basis_in, - const Charge*const chg_0_in, - const UnitCell*const ucell); + void cal_v_eff (const Charge* const chg_1, const UnitCell* const ucell, ModuleBase::matrix& v_eff) override; - void cal_v_eff( - const Charge*const chg_1, - const UnitCell*const ucell, - ModuleBase::matrix& v_eff) override; - - const Charge*const chg_0 = nullptr; - ModuleBase::matrix v_xc_0; + const Charge* const chg_0 = nullptr; + ModuleBase::matrix v_xc_0; }; } // namespace elecstate diff --git a/source/source_estate/module_pot/potential_new.cpp b/source/source_estate/module_pot/potential_new.cpp index 85bccb157a4..3cabce0707a 100644 --- a/source/source_estate/module_pot/potential_new.cpp +++ b/source/source_estate/module_pot/potential_new.cpp @@ -15,18 +15,17 @@ namespace elecstate { -Potential::Potential(const ModulePW::PW_Basis* rho_basis_in, - const ModulePW::PW_Basis* rho_basis_smooth_in, - const UnitCell* ucell_in, - const ModuleBase::matrix* vloc_in, - Structure_Factor* structure_factors_in, - surchem* solvent_in, - double* etxc_in, - double* vtxc_in, - VSep* vsep_cell_in) - : ucell_(ucell_in), vloc_(vloc_in), structure_factors_(structure_factors_in), - solvent_(solvent_in), vsep_cell(vsep_cell_in), etxc_(etxc_in), - vtxc_(vtxc_in) +Potential::Potential (const ModulePW::PW_Basis* rho_basis_in, + const ModulePW::PW_Basis* rho_basis_smooth_in, + const UnitCell* ucell_in, + const ModuleBase::matrix* vloc_in, + Structure_Factor* structure_factors_in, + surchem* solvent_in, + double* etxc_in, + double* vtxc_in, + VSep* vsep_cell_in) + : ucell_ (ucell_in), vloc_ (vloc_in), structure_factors_ (structure_factors_in), solvent_ (solvent_in), + vsep_cell (vsep_cell_in), etxc_ (etxc_in), vtxc_ (vtxc_in) { this->rho_basis_ = rho_basis_in; this->rho_basis_smooth_ = rho_basis_smooth_in; @@ -35,55 +34,56 @@ Potential::Potential(const ModulePW::PW_Basis* rho_basis_in, this->use_gpu_ = (PARAM.inp.basis_type == "pw" && PARAM.inp.device == "gpu"); // allocate memory for Potential. - this->allocate(); + this->allocate (); } -Potential::~Potential() +Potential::~Potential () { - if (this->components.size() > 0) - { - for (auto comp: this->components) + if (this->components.size () > 0) { - delete comp; + for (auto comp: this->components) + { + delete comp; + } + this->components.clear (); } - this->components.clear(); - } if (use_gpu_) - { - delmem_sd_op()(s_veff_smooth); - delmem_sd_op()(s_vofk_smooth); - delmem_dd_op()(d_veff_smooth); - delmem_dd_op()(d_vofk_smooth); - } + { + delmem_sd_op () (s_veff_smooth); + delmem_sd_op () (s_vofk_smooth); + delmem_dd_op () (d_veff_smooth); + delmem_dd_op () (d_vofk_smooth); + } else - { - delmem_sh_op()(s_veff_smooth); - delmem_sh_op()(s_vofk_smooth); - } + { + delmem_sh_op () (s_veff_smooth); + delmem_sh_op () (s_vofk_smooth); + } } -void Potential::pot_register(const std::vector& components_list) +void + Potential::pot_register (const std::vector& components_list) { - ModuleBase::TITLE("Potential", "pot_register"); + ModuleBase::TITLE ("Potential", "pot_register"); // delete old components first. - if (this->components.size() > 0) - { - for (auto comp: this->components) + if (this->components.size () > 0) { - delete comp; + for (auto comp: this->components) + { + delete comp; + } + this->components.clear (); } - this->components.clear(); - } // register components //--------------------------- // mapping for register //--------------------------- for (auto comp: components_list) - { - PotBase* tmp = this->get_pot_type(comp); - this->components.push_back(tmp); - } + { + PotBase* tmp = this->get_pot_type (comp); + this->components.push_back (tmp); + } // after register, reset fixed_done to false this->fixed_done = false; @@ -91,269 +91,293 @@ void Potential::pot_register(const std::vector& components_list) return; } -void Potential::allocate() +void + Potential::allocate () { - ModuleBase::TITLE("Potential", "allocate"); + ModuleBase::TITLE ("Potential", "allocate"); const int nspin = PARAM.inp.nspin; - assert(nspin==1 || nspin==2 || nspin==4); + assert (nspin == 1 || nspin == 2 || nspin == 4); const int nrxx = this->rho_basis_->nrxx; const int nrxx_smooth = this->rho_basis_smooth_->nrxx; if (nrxx == 0) - { - return; - } - if (nrxx_smooth == 0) - { - return; - } - - this->v_eff_fixed.resize(nrxx); - ModuleBase::Memory::record("Pot::veff_fix", sizeof(double) * nrxx); - - this->v_eff.create(nspin, nrxx); - ModuleBase::Memory::record("Pot::veff", sizeof(double) * nspin * nrxx); - - this->veff_smooth.create(nspin, nrxx_smooth); - ModuleBase::Memory::record("Pot::veff_smooth", sizeof(double) * nspin * nrxx_smooth); - - if (XC_Functional::get_ked_flag()) - { - this->vofk_eff.create(nspin, nrxx); - ModuleBase::Memory::record("Pot::vofk", sizeof(double) * nspin * nrxx); - - this->vofk_smooth.create(nspin, nrxx_smooth); - ModuleBase::Memory::record("Pot::vofk_smooth", sizeof(double) * nspin * nrxx_smooth); - } - if (use_gpu_) - { - if (PARAM.globalv.has_float_data) { - resmem_sd_op()(s_veff_smooth, nspin * nrxx_smooth); - resmem_sd_op()(s_vofk_smooth, nspin * nrxx_smooth); + return; } - if (PARAM.globalv.has_double_data) + if (nrxx_smooth == 0) { - resmem_dd_op()(d_veff_smooth, nspin * nrxx_smooth); - resmem_dd_op()(d_vofk_smooth, nspin * nrxx_smooth); + return; } - } - else - { - if (PARAM.globalv.has_float_data) + + this->v_eff_fixed.resize (nrxx); + ModuleBase::Memory::record ("Pot::veff_fix", sizeof (double) * nrxx); + + this->v_eff.create (nspin, nrxx); + ModuleBase::Memory::record ("Pot::veff", sizeof (double) * nspin * nrxx); + + this->veff_smooth.create (nspin, nrxx_smooth); + ModuleBase::Memory::record ("Pot::veff_smooth", sizeof (double) * nspin * nrxx_smooth); + + if (XC_Functional::get_ked_flag ()) + { + this->vofk_eff.create (nspin, nrxx); + ModuleBase::Memory::record ("Pot::vofk", sizeof (double) * nspin * nrxx); + + this->vofk_smooth.create (nspin, nrxx_smooth); + ModuleBase::Memory::record ("Pot::vofk_smooth", sizeof (double) * nspin * nrxx_smooth); + } + if (use_gpu_) { - resmem_sh_op()(s_veff_smooth, nspin * nrxx_smooth, "POT::sveff_smooth"); - resmem_sh_op()(s_vofk_smooth, nspin * nrxx_smooth, "POT::svofk_smooth"); + if (PARAM.globalv.has_float_data) + { + resmem_sd_op () (s_veff_smooth, nspin * nrxx_smooth); + resmem_sd_op () (s_vofk_smooth, nspin * nrxx_smooth); + } + if (PARAM.globalv.has_double_data) + { + resmem_dd_op () (d_veff_smooth, nspin * nrxx_smooth); + resmem_dd_op () (d_vofk_smooth, nspin * nrxx_smooth); + } } - if (PARAM.globalv.has_double_data) + else { - this->d_veff_smooth = this->veff_smooth.c; - this->d_vofk_smooth = this->vofk_smooth.c; + if (PARAM.globalv.has_float_data) + { + resmem_sh_op () (s_veff_smooth, nspin * nrxx_smooth, "POT::sveff_smooth"); + resmem_sh_op () (s_vofk_smooth, nspin * nrxx_smooth, "POT::svofk_smooth"); + } + if (PARAM.globalv.has_double_data) + { + this->d_veff_smooth = this->veff_smooth.c; + this->d_vofk_smooth = this->vofk_smooth.c; + } + // There's no need to allocate memory for double precision pointers while in a CPU environment } - // There's no need to allocate memory for double precision pointers while in a CPU environment - } } -void Potential::update_from_charge(const Charge*const chg, const UnitCell*const ucell) +void + Potential::update_from_charge (const Charge* const chg, const UnitCell* const ucell) { - ModuleBase::TITLE("Potential", "update_from_charge"); - //ModuleBase::timer::start("Potential", "update_from_charge"); + ModuleBase::TITLE ("Potential", "update_from_charge"); + // ModuleBase::timer::start("Potential", "update_from_charge"); if (!this->fixed_done) - { - this->cal_fixed_v(this->v_eff_fixed.data()); - this->fixed_done = true; - } + { + this->cal_fixed_v (this->v_eff_fixed.data ()); + this->fixed_done = true; + } - this->cal_v_eff(chg, ucell, this->v_eff); + this->cal_v_eff (chg, ucell, this->v_eff); // interpolate potential on the smooth mesh if necessary - this->interpolate_vrs(); + this->interpolate_vrs (); if (this->use_gpu_) - { - if (PARAM.globalv.has_float_data) { - castmem_d2s_h2d_op()(s_veff_smooth, this->veff_smooth.c, this->veff_smooth.nr * this->veff_smooth.nc); - castmem_d2s_h2d_op()(s_vofk_smooth, this->vofk_smooth.c, this->vofk_smooth.nr * this->vofk_smooth.nc); + if (PARAM.globalv.has_float_data) + { + castmem_d2s_h2d_op () (s_veff_smooth, + this->veff_smooth.c, + this->veff_smooth.nr * this->veff_smooth.nc); + castmem_d2s_h2d_op () (s_vofk_smooth, + this->vofk_smooth.c, + this->vofk_smooth.nr * this->vofk_smooth.nc); + } + if (PARAM.globalv.has_double_data) + { + syncmem_d2d_h2d_op () (d_veff_smooth, + this->veff_smooth.c, + this->veff_smooth.nr * this->veff_smooth.nc); + syncmem_d2d_h2d_op () (d_vofk_smooth, + this->vofk_smooth.c, + this->vofk_smooth.nr * this->vofk_smooth.nc); + } } - if (PARAM.globalv.has_double_data) - { - syncmem_d2d_h2d_op()(d_veff_smooth, this->veff_smooth.c, this->veff_smooth.nr * this->veff_smooth.nc); - syncmem_d2d_h2d_op()(d_vofk_smooth, this->vofk_smooth.c, this->vofk_smooth.nr * this->vofk_smooth.nc); - } - } else - { - if (PARAM.globalv.has_float_data) { - castmem_d2s_h2h_op()(s_veff_smooth, this->veff_smooth.c, this->veff_smooth.nr * this->veff_smooth.nc); - castmem_d2s_h2h_op()(s_vofk_smooth, this->vofk_smooth.c, this->vofk_smooth.nr * this->vofk_smooth.nc); + if (PARAM.globalv.has_float_data) + { + castmem_d2s_h2h_op () (s_veff_smooth, + this->veff_smooth.c, + this->veff_smooth.nr * this->veff_smooth.nc); + castmem_d2s_h2h_op () (s_vofk_smooth, + this->vofk_smooth.c, + this->vofk_smooth.nr * this->vofk_smooth.nc); + } + // There's no need to synchronize memory for double precision pointers while in a CPU environment } - // There's no need to synchronize memory for double precision pointers while in a CPU environment - } - //ModuleBase::timer::end("Potential", "update_from_charge"); + // ModuleBase::timer::end("Potential", "update_from_charge"); } -void Potential::cal_fixed_v(double* vl_pseudo) +void + Potential::cal_fixed_v (double* vl_pseudo) { - ModuleBase::TITLE("Potential", "cal_fixed_v"); - ModuleBase::timer::start("Potential", "cal_fixed_v"); + ModuleBase::TITLE ("Potential", "cal_fixed_v"); + ModuleBase::timer::start ("Potential", "cal_fixed_v"); - this->v_eff_fixed.assign(this->v_eff_fixed.size(), 0.0); - for (size_t i = 0; i < this->components.size(); i++) - { - if (this->components[i]->fixed_mode) + this->v_eff_fixed.assign (this->v_eff_fixed.size (), 0.0); + for (size_t i = 0; i < this->components.size (); i++) { - this->components[i]->cal_fixed_v(vl_pseudo); + if (this->components[i]->fixed_mode) + { + this->components[i]->cal_fixed_v (vl_pseudo); + } } - } - ModuleBase::timer::end("Potential", "cal_fixed_v"); + ModuleBase::timer::end ("Potential", "cal_fixed_v"); } -void Potential::cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff) +void + Potential::cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) { - ModuleBase::TITLE("Potential", "cal_veff"); - ModuleBase::timer::start("Potential", "cal_veff"); + ModuleBase::TITLE ("Potential", "cal_veff"); + ModuleBase::timer::start ("Potential", "cal_veff"); const int nspin_current = this->v_eff.nr; const int nrxx = this->v_eff.nc; // first of all, set v_eff to zero. - this->v_eff.zero_out(); + this->v_eff.zero_out (); // add fixed potential components // nspin = 2, add fixed components for all // nspin = 4, add fixed components on first colomn for (int i = 0; i < nspin_current; i++) - { - if (i == 0 || nspin_current == 2) { - ModuleBase::GlobalFunc::COPYARRAY(this->v_eff_fixed.data(), this->get_eff_v(i), nrxx); + if (i == 0 || nspin_current == 2) + { + ModuleBase::GlobalFunc::COPYARRAY (this->v_eff_fixed.data (), this->get_eff_v (i), nrxx); + } } - } // cal eff by every components - for (size_t i = 0; i < this->components.size(); i++) - { - if (this->components[i]->dynamic_mode) + for (size_t i = 0; i < this->components.size (); i++) { - this->components[i]->cal_v_eff(chg, ucell, v_eff); + if (this->components[i]->dynamic_mode) + { + this->components[i]->cal_v_eff (chg, ucell, v_eff); + } } - } - ModuleBase::timer::end("Potential", "cal_veff"); + ModuleBase::timer::end ("Potential", "cal_veff"); } -void Potential::init_pot(const Charge*const chg) +void + Potential::init_pot (const Charge* const chg) { - ModuleBase::TITLE("Potential", "init_pot"); - ModuleBase::timer::start("Potential", "init_pot"); + ModuleBase::TITLE ("Potential", "init_pot"); + ModuleBase::timer::start ("Potential", "init_pot"); // fixed components only calculated in the beginning of SCF this->fixed_done = false; - this->update_from_charge(chg, this->ucell_); + this->update_from_charge (chg, this->ucell_); - ModuleBase::timer::end("Potential", "init_pot"); + ModuleBase::timer::end ("Potential", "init_pot"); return; } -void Potential::get_vnew(const Charge* chg, ModuleBase::matrix& vnew) +void + Potential::get_vnew (const Charge* chg, ModuleBase::matrix& vnew) { - ModuleBase::TITLE("Potential", "get_vnew"); - vnew.create(this->v_eff.nr, this->v_eff.nc); + ModuleBase::TITLE ("Potential", "get_vnew"); + vnew.create (this->v_eff.nr, this->v_eff.nc); vnew = this->v_eff; - this->update_from_charge(chg, this->ucell_); + this->update_from_charge (chg, this->ucell_); //(used later for scf correction to the forces ) for (int iter = 0; iter < vnew.nr * vnew.nc; ++iter) - { - vnew.c[iter] = this->v_eff.c[iter] - vnew.c[iter]; - } + { + vnew.c[iter] = this->v_eff.c[iter] - vnew.c[iter]; + } return; } -void Potential::interpolate_vrs(void) +void + Potential::interpolate_vrs () { - ModuleBase::TITLE("Potential", "interpolate_vrs"); - ModuleBase::timer::start("Potential", "interpolate_vrs"); + ModuleBase::TITLE ("Potential", "interpolate_vrs"); + ModuleBase::timer::start ("Potential", "interpolate_vrs"); const int nspin = PARAM.inp.nspin; - assert(nspin==1 || nspin==2 || nspin==4); + assert (nspin == 1 || nspin == 2 || nspin == 4); if (PARAM.globalv.double_grid) - { - if (rho_basis_->gamma_only != rho_basis_smooth_->gamma_only) { - ModuleBase::WARNING_QUIT("Potential::interpolate_vrs", "gamma_only is not consistent"); - } - - ModuleBase::ComplexMatrix vrs(nspin, rho_basis_->npw); - for (int is = 0; is < nspin; is++) - { - rho_basis_->real2recip(&v_eff(is, 0), &vrs(is, 0)); - rho_basis_smooth_->recip2real(&vrs(is, 0), &veff_smooth(is, 0)); - } + if (rho_basis_->gamma_only != rho_basis_smooth_->gamma_only) + { + ModuleBase::WARNING_QUIT ("Potential::interpolate_vrs", "gamma_only is not consistent"); + } - if (XC_Functional::get_ked_flag()) - { - ModuleBase::ComplexMatrix vrs_ofk(nspin, rho_basis_->npw); + ModuleBase::ComplexMatrix vrs (nspin, rho_basis_->npw); for (int is = 0; is < nspin; is++) - { - rho_basis_->real2recip(&vofk_eff(is, 0), &vrs_ofk(is, 0)); - rho_basis_smooth_->recip2real(&vrs_ofk(is, 0), &vofk_smooth(is, 0)); - } + { + rho_basis_->real2recip (&v_eff (is, 0), &vrs (is, 0)); + rho_basis_smooth_->recip2real (&vrs (is, 0), &veff_smooth (is, 0)); + } + + if (XC_Functional::get_ked_flag ()) + { + ModuleBase::ComplexMatrix vrs_ofk (nspin, rho_basis_->npw); + for (int is = 0; is < nspin; is++) + { + rho_basis_->real2recip (&vofk_eff (is, 0), &vrs_ofk (is, 0)); + rho_basis_smooth_->recip2real (&vrs_ofk (is, 0), &vofk_smooth (is, 0)); + } + } } - } else - { - this->veff_smooth = this->v_eff; - this->vofk_smooth = this->vofk_eff; - } + { + this->veff_smooth = this->v_eff; + this->vofk_smooth = this->vofk_eff; + } - ModuleBase::timer::end("Potential", "interpolate_vrs"); + ModuleBase::timer::end ("Potential", "interpolate_vrs"); } template <> -float* Potential::get_veff_smooth_data() +float* + Potential::get_veff_smooth_data () { return this->veff_smooth.nc > 0 ? this->s_veff_smooth : nullptr; } template <> -double* Potential::get_veff_smooth_data() +double* + Potential::get_veff_smooth_data () { return this->veff_smooth.nc > 0 ? this->d_veff_smooth : nullptr; } template <> -float* Potential::get_vofk_smooth_data() +float* + Potential::get_vofk_smooth_data () { return this->vofk_smooth.nc > 0 ? this->s_vofk_smooth : nullptr; } template <> -double* Potential::get_vofk_smooth_data() +double* + Potential::get_vofk_smooth_data () { return this->vofk_smooth.nc > 0 ? this->d_vofk_smooth : nullptr; } -double Potential::get_ml_exx_energy() const +double + Potential::get_ml_exx_energy () const { #ifdef __MLALGO - for (size_t i = 0; i < this->components.size(); i++) - { - PotML_EXX* pot_ml_exx = dynamic_cast(this->components[i]); - if (pot_ml_exx != nullptr) + for (size_t i = 0; i < this->components.size (); i++) { - return pot_ml_exx->get_energy(); + PotML_EXX* pot_ml_exx = dynamic_cast (this->components[i]); + if (pot_ml_exx != nullptr) + { + return pot_ml_exx->get_energy (); + } } - } return 0.0; #else return 0.0; diff --git a/source/source_estate/module_pot/potential_new.h b/source/source_estate/module_pot/potential_new.h index ca79ca02f63..ac984c4878c 100644 --- a/source/source_estate/module_pot/potential_new.h +++ b/source/source_estate/module_pot/potential_new.h @@ -48,155 +48,170 @@ class Potential : public PotBase { public: // default constructor for UT - Potential(){}; + Potential () {}; // In constructor, size of every potential components should be allocated // rho_basis_in is the dense grids, rho_basis_smooth_in is the smooth grids in USPP // charge density and potential are defined on dense grids, // but eff potential needs to be interpolated on smooth grids in order to compute Veff|psi> // Note: rho_basis_in and rho_basis_smooth_in are the same in NCPP - Potential(const ModulePW::PW_Basis* rho_basis_in, - const ModulePW::PW_Basis* rho_basis_smooth_in, - const UnitCell* ucell_in, - const ModuleBase::matrix* vloc_in, - Structure_Factor* structure_factors_in, - surchem* solvent_in, - double* etxc_in, - double* vtxc_in, - VSep* vsep_cell_in = nullptr); - ~Potential(); + Potential (const ModulePW::PW_Basis* rho_basis_in, + const ModulePW::PW_Basis* rho_basis_smooth_in, + const UnitCell* ucell_in, + const ModuleBase::matrix* vloc_in, + Structure_Factor* structure_factors_in, + surchem* solvent_in, + double* etxc_in, + double* vtxc_in, + VSep* vsep_cell_in = nullptr); + ~Potential (); // initialize potential when SCF begin - void init_pot(const Charge*const chg); + void init_pot (const Charge* const chg); // initialize potential components before SCF - void pot_register(const std::vector& components_list); + void pot_register (const std::vector& components_list); // update potential from current charge - void update_from_charge(const Charge*const chg, const UnitCell*const ucell); + void update_from_charge (const Charge* const chg, const UnitCell* const ucell); // interface for SCF-converged, etxc vtxc for Energy, vnew for force_scc - void get_vnew(const Charge* chg, ModuleBase::matrix& vnew); + void get_vnew (const Charge* chg, ModuleBase::matrix& vnew); - PotBase* get_pot_type(const std::string& pot_type); + PotBase* get_pot_type (const std::string& pot_type); // interfaces to get values - ModuleBase::matrix& get_eff_v() + ModuleBase::matrix& + get_eff_v () { return this->v_eff; } - const ModuleBase::matrix& get_eff_v() const + const ModuleBase::matrix& + get_eff_v () const { return this->v_eff; } - double* get_eff_v(int is) + double* + get_eff_v (int is) { if (this->v_eff.nc > 0) - { - return &(this->v_eff(is, 0)); - } + { + return &(this->v_eff (is, 0)); + } else - { - return nullptr; - } + { + return nullptr; + } } - const double* get_eff_v(int is) const + const double* + get_eff_v (int is) const { if (this->v_eff.nc > 0) - { - return &(this->v_eff(is, 0)); - } + { + return &(this->v_eff (is, 0)); + } else - { - return nullptr; - } + { + return nullptr; + } } - ModuleBase::matrix& get_eff_vofk() + ModuleBase::matrix& + get_eff_vofk () { return this->vofk_eff; } - const ModuleBase::matrix& get_eff_vofk() const + const ModuleBase::matrix& + get_eff_vofk () const { return this->vofk_eff; } - double* get_eff_vofk(int is) + double* + get_eff_vofk (int is) { if (this->vofk_eff.nc > 0) - { - return &(this->vofk_eff(is, 0)); - } + { + return &(this->vofk_eff (is, 0)); + } else - { - return nullptr; - } + { + return nullptr; + } } - const double* get_eff_vofk(int is) const + const double* + get_eff_vofk (int is) const { if (this->vofk_eff.nc > 0) - { - return &(this->vofk_eff(is, 0)); - } + { + return &(this->vofk_eff (is, 0)); + } else - { - return nullptr; - } + { + return nullptr; + } } - ModuleBase::matrix& get_veff_smooth() + ModuleBase::matrix& + get_veff_smooth () { return this->veff_smooth; } - const ModuleBase::matrix& get_veff_smooth() const + const ModuleBase::matrix& + get_veff_smooth () const { return this->veff_smooth; } - ModuleBase::matrix& get_vofk_smooth() + ModuleBase::matrix& + get_vofk_smooth () { return this->vofk_smooth; } - const ModuleBase::matrix& get_vofk_smooth() const + const ModuleBase::matrix& + get_vofk_smooth () const { return this->vofk_smooth; } template - FPTYPE* get_veff_smooth_data(); + FPTYPE* get_veff_smooth_data (); template - FPTYPE* get_vofk_smooth_data(); + FPTYPE* get_vofk_smooth_data (); - double* get_fixed_v() + double* + get_fixed_v () { - return this->v_eff_fixed.data(); + return this->v_eff_fixed.data (); } - const double* get_fixed_v() const + const double* + get_fixed_v () const { - return this->v_eff_fixed.data(); + return this->v_eff_fixed.data (); } - const ModulePW::PW_Basis *get_rho_basis() const + const ModulePW::PW_Basis* + get_rho_basis () const { return this->rho_basis_; } // What about adding a function to get the wfc? // This is useful for the calculation of the exx energy - /// @brief get the value of vloc at G=0; /// @return vl(0) - double get_vl_of_0() const + double + get_vl_of_0 () const { return this->vl_of_0; } /// @brief get the ML-EXX energy, avoiding static variable /// @return E_ML-EXX - double get_ml_exx_energy() const; + double get_ml_exx_energy () const; private: - void cal_v_eff(const Charge*const chg, const UnitCell*const ucell, ModuleBase::matrix& v_eff) override; - void cal_fixed_v(double* vl_pseudo) override; + void cal_v_eff (const Charge* const chg, const UnitCell* const ucell, ModuleBase::matrix& v_eff) override; + void cal_fixed_v (double* vl_pseudo) override; // interpolate potential on the smooth mesh if necessary - void interpolate_vrs(); + void interpolate_vrs (); - void allocate(); + void allocate (); std::vector v_eff_fixed; ModuleBase::matrix v_eff; @@ -206,10 +221,10 @@ class Potential : public PotBase ModuleBase::matrix v_xc; // if PAW is used, vxc must be stored separately - float *s_veff_smooth = nullptr; - float *s_vofk_smooth = nullptr; - double *d_veff_smooth = nullptr; - double *d_vofk_smooth = nullptr; + float* s_veff_smooth = nullptr; + float* s_vofk_smooth = nullptr; + double* d_veff_smooth = nullptr; + double* d_vofk_smooth = nullptr; ModuleBase::matrix vofk_eff; diff --git a/source/source_estate/module_pot/potential_types.cpp b/source/source_estate/module_pot/potential_types.cpp index f8f1315d8e3..40b2b6c2657 100644 --- a/source/source_estate/module_pot/potential_types.cpp +++ b/source/source_estate/module_pot/potential_types.cpp @@ -23,56 +23,58 @@ namespace elecstate { -PotBase* Potential::get_pot_type(const std::string& pot_type) +PotBase* + Potential::get_pot_type (const std::string& pot_type) { - ModuleBase::TITLE("Potential", "get_pot_type"); + ModuleBase::TITLE ("Potential", "get_pot_type"); if (pot_type == "local") - { - return new PotLocal(this->vloc_, &(this->structure_factors_->strucFac), this->rho_basis_, this->vl_of_0); - } + { + return new PotLocal (this->vloc_, &(this->structure_factors_->strucFac), this->rho_basis_, this->vl_of_0); + } else if (pot_type == "hartree") - { - return new PotHartree(this->rho_basis_); - } + { + return new PotHartree (this->rho_basis_); + } else if (pot_type == "xc") - { - return new PotXC(this->rho_basis_, this->etxc_, this->vtxc_, &(this->vofk_eff)); - } + { + return new PotXC (this->rho_basis_, this->etxc_, this->vtxc_, &(this->vofk_eff)); + } else if (pot_type == "surchem") - { - return new PotSurChem(this->rho_basis_, - this->structure_factors_, - this->v_eff_fixed.data(), - this->solvent_); - } + { + return new PotSurChem (this->rho_basis_, + this->structure_factors_, + this->v_eff_fixed.data (), + this->solvent_); + } else if (pot_type == "efield") - { - return new PotEfield(this->rho_basis_, this->ucell_, this->solvent_, PARAM.inp.dip_cor_flag); - } + { + return new PotEfield (this->rho_basis_, this->ucell_, this->solvent_, PARAM.inp.dip_cor_flag); + } else if (pot_type == "gatefield") - { - return new PotGate(this->rho_basis_, this->ucell_); - } + { + return new PotGate (this->rho_basis_, this->ucell_); + } #ifdef __LCAO else if (pot_type == "tddft") - { - return new H_TDDFT_pw(this->rho_basis_, this->ucell_); - } + { + return new H_TDDFT_pw (this->rho_basis_, this->ucell_); + } #endif #ifdef __MLALGO else if (pot_type == "ml_exx") - { - return new PotML_EXX(this->rho_basis_, this->ucell_); - } + { + return new PotML_EXX (this->rho_basis_, this->ucell_); + } #endif - else if (pot_type == "dfthalf") { - return new PotSep(&(this->structure_factors_->strucFac), this->rho_basis_, this->vsep_cell); - } + else if (pot_type == "dfthalf") + { + return new PotSep (&(this->structure_factors_->strucFac), this->rho_basis_, this->vsep_cell); + } else - { - ModuleBase::WARNING_QUIT("Potential::get_pot_type", "Please input correct component of potential!"); - __builtin_unreachable(); - } + { + ModuleBase::WARNING_QUIT ("Potential::get_pot_type", "Please input correct component of potential!"); + __builtin_unreachable (); + } } } // namespace elecstate diff --git a/source/source_estate/occupy.cpp b/source/source_estate/occupy.cpp index fa50d1520d5..3a8c040cdfc 100644 --- a/source/source_estate/occupy.cpp +++ b/source/source_estate/occupy.cpp @@ -5,12 +5,8 @@ #include "source_base/parallel_reduce.h" #include "source_io/module_parameter/parameter.h" -Occupy::Occupy() -{ -} -Occupy::~Occupy() -{ -} +Occupy::Occupy () {} +Occupy::~Occupy () {} //=========================================================== // Four smearing methods: @@ -25,9 +21,10 @@ int Occupy::gaussian_type = 0; double Occupy::gaussian_parameter = 0.01; bool Occupy::fixed_occupations = false; -void Occupy::decision(const std::string& name, const std::string& smearing_method, const double& smearing_sigma) +void + Occupy::decision (const std::string& name, const std::string& smearing_method, const double& smearing_sigma) { - ModuleBase::TITLE("Occupy", "decision"); + ModuleBase::TITLE ("Occupy", "decision"); use_gaussian_broadening = false; fixed_occupations = false; @@ -35,23 +32,23 @@ void Occupy::decision(const std::string& name, const std::string& smearing_metho gaussian_parameter = smearing_sigma; if (name == "fixed") - { - if (gaussian_parameter != 0.0) { - ModuleBase::WARNING("smearing_decision", "Fixed occupations,gauss broadening ignored"); - ModuleBase::GlobalFunc::AUTO_SET("gaussian_parameter", 0.0); - gaussian_parameter = 0.0; + if (gaussian_parameter != 0.0) + { + ModuleBase::WARNING ("smearing_decision", "Fixed occupations,gauss broadening ignored"); + ModuleBase::GlobalFunc::AUTO_SET ("gaussian_parameter", 0.0); + gaussian_parameter = 0.0; + } } - } else if (name == "smearing" && smearing_method == "fixed") - { - if (gaussian_parameter != 0.0) { - ModuleBase::WARNING("smearing_decision", "Fixed occupations,gauss broadening ignored"); - ModuleBase::GlobalFunc::AUTO_SET("gaussian_parameter", 0.0); - gaussian_parameter = 0.0; + if (gaussian_parameter != 0.0) + { + ModuleBase::WARNING ("smearing_decision", "Fixed occupations,gauss broadening ignored"); + ModuleBase::GlobalFunc::AUTO_SET ("gaussian_parameter", 0.0); + gaussian_parameter = 0.0; + } } - } // there are four types of smearing methods: // (1) gaussian @@ -59,56 +56,56 @@ void Occupy::decision(const std::string& name, const std::string& smearing_metho // (3) Marzari-Vanderbilt // (4) Fermi-Dirac else if (name == "smearing") - { - use_gaussian_broadening = true; - if (gaussian_parameter == 0.0) - { - ModuleBase::WARNING_QUIT( - "smearing_decision", - "Smearing requires gaussian broadening,but gaussian_parameter = 0(default value = 0.01)"); - } - if (smearing_method == "gaussian" || smearing_method == "gauss") - { - gaussian_type = 0; // 0: gaussian - } - else if (smearing_method == "methfessel-paxton" || smearing_method == "mp") - { - gaussian_type = 1; // >0 Methfessel-Paxton method. - } - else if (smearing_method == "mp2") { - gaussian_type = 2; // 2nd Methfessel-Paxton method. + use_gaussian_broadening = true; + if (gaussian_parameter == 0.0) + { + ModuleBase::WARNING_QUIT ( + "smearing_decision", + "Smearing requires gaussian broadening,but gaussian_parameter = 0(default value = 0.01)"); + } + if (smearing_method == "gaussian" || smearing_method == "gauss") + { + gaussian_type = 0; // 0: gaussian + } + else if (smearing_method == "methfessel-paxton" || smearing_method == "mp") + { + gaussian_type = 1; // >0 Methfessel-Paxton method. + } + else if (smearing_method == "mp2") + { + gaussian_type = 2; // 2nd Methfessel-Paxton method. + } + else if (smearing_method == "mp3") + { + // acually any order Methfessel-Paxton method can be supported in Occupy::w1gauss() + // however the parameter is string instead of int + ModuleBase::WARNING_QUIT ("occupy", + "Some refactor of smearing shoule be done before supporting any order of " + "Methfessel-Paxton method!"); + } + + else if (smearing_method == "marzari-vanderbilt" || smearing_method == "cold" || smearing_method == "mv") + { + gaussian_type = -1; + } + else if (smearing_method == "fermi-dirac" || smearing_method == "fd") + { + gaussian_type = -99; + } } - else if (smearing_method == "mp3") + else if (name == "tetrahedra") { - // acually any order Methfessel-Paxton method can be supported in Occupy::w1gauss() - // however the parameter is string instead of int - ModuleBase::WARNING_QUIT( - "occupy", - "Some refactor of smearing shoule be done before supporting any order of Methfessel-Paxton method!"); + ModuleBase::WARNING_QUIT ("occupy", "not implemented yet!"); } - - else if (smearing_method == "marzari-vanderbilt" || smearing_method == "cold" || smearing_method == "mv") + else if (name == "from_input") { - gaussian_type = -1; + fixed_occupations = true; } - else if (smearing_method == "fermi-dirac" || smearing_method == "fd") + else { - gaussian_type = -99; + ModuleBase::WARNING_QUIT ("occupy_decision", "occupations, not implemented"); } - } - else if (name == "tetrahedra") - { - ModuleBase::WARNING_QUIT("occupy", "not implemented yet!"); - } - else if (name == "from_input") - { - fixed_occupations = true; - } - else - { - ModuleBase::WARNING_QUIT("occupy_decision", "occupations, not implemented"); - } return; } @@ -128,60 +125,64 @@ void Occupy::decision(const std::string& name, const std::string& smearing_metho * @param is the spin index now. * @param isk distinguish k point belong to which spin. */ -void Occupy::iweights( - const int nks, - const std::vector& wk, - const int nbands, - const double& nelec, - const ModuleBase::matrix& ekb, - double& ef, - ModuleBase::matrix& wg, - const int& is, //<- is should be -1, 0, or 1. -1 means set all spins, and 0 means spin up, 1 means spin down. - const std::vector& isk) +void + Occupy::iweights ( + const int nks, + const std::vector& wk, + const int nbands, + const double& nelec, + const ModuleBase::matrix& ekb, + double& ef, + ModuleBase::matrix& wg, + const int& is, //<- is should be -1, 0, or 1. -1 means set all spins, and 0 means spin up, 1 means spin down. + const std::vector& isk) { - assert(is < 2); + assert (is < 2); double degspin = 2.0; - if (PARAM.inp.nspin == 4) { - degspin = 1.0; -} - if (is != -1) { - degspin = 1.0; -} + if (PARAM.inp.nspin == 4) + { + degspin = 1.0; + } + if (is != -1) + { + degspin = 1.0; + } double ib_mind = nelec / degspin; - int ib_min = std::ceil(ib_mind); - if (ib_min != int(ib_mind)) - { - ModuleBase::WARNING_QUIT("iweights", - "It is not a semiconductor or insulator. Please do not set 'smearing_method=fixed', " - "and try other options."); - } - ef = -1e+10; - - for (int ik = 0; ik < nks; ++ik) - { - // when NSPIN=2, only calculate spin up or spin down with TWO_FERMI mode(nupdown != 0) - if (PARAM.inp.nspin == 2 && isk[ik] != is && is != -1) + int ib_min = std::ceil (ib_mind); + if (ib_min != int (ib_mind)) { - continue; + ModuleBase::WARNING_QUIT ( + "iweights", + "It is not a semiconductor or insulator. Please do not set 'smearing_method=fixed', " + "and try other options."); } + ef = -1e+10; - for (int ib = 0; ib < nbands; ++ib) + for (int ik = 0; ik < nks; ++ik) { - if (ib < ib_min) - { - wg(ik, ib) = wk[ik]; - ef = std::max(ef, ekb(ik, ib)); - } - else - { - wg(ik, ib) = 0.0; - } + // when NSPIN=2, only calculate spin up or spin down with TWO_FERMI mode(nupdown != 0) + if (PARAM.inp.nspin == 2 && isk[ik] != is && is != -1) + { + continue; + } + + for (int ib = 0; ib < nbands; ++ib) + { + if (ib < ib_min) + { + wg (ik, ib) = wk[ik]; + ef = std::max (ef, ekb (ik, ib)); + } + else + { + wg (ik, ib) = 0.0; + } + } } - } - #ifdef __MPI - Parallel_Reduce::reduce_max(ef); - #endif +#ifdef __MPI + Parallel_Reduce::reduce_max (ef); +#endif return; } @@ -201,7 +202,8 @@ void Occupy::iweights( * @param is spin * @param isk array to point out each k belong to which spin */ -void Occupy::gweights(const int nks, +void + Occupy::gweights (const int nks, const std::vector& wk, const int nband, const double& nelec, @@ -219,33 +221,34 @@ void Occupy::gweights(const int nks, // Calculate the Fermi energy ef //=============================== // call efermig - Occupy::efermig(ekb, nband, nks, nelec, wk, smearing_sigma, ngauss, ef, is, isk); + Occupy::efermig (ekb, nband, nks, nelec, wk, smearing_sigma, ngauss, ef, is, isk); demet = 0.0; for (int ik = 0; ik < nks; ik++) - { - // mohan add 2011-04-03 - if (is != -1 && is != isk[ik]) { - continue; -} - - for (int ib = 0; ib < PARAM.globalv.nbands_l; ib++) { - //================================ - // Calculate the gaussian weights - //================================ - // call wgauss - wg(ik, ib) = wk[ik] * Occupy::wgauss((ef - ekb(ik, ib)) / smearing_sigma, ngauss); - - //==================================================================== - // The correct form of the band energy is \int e n(e) de for e 1000) - { - std::cout << " SOMETHING WRONG: " << std::endl; - std::cout << " is = " << is << std::endl; - std::cout << " eup = " << eup << std::endl; - std::cout << " elw = " << elw << std::endl; - std::cout << " nband = " << nband << std::endl; - std::cout << " nelec = " << nelec << std::endl; - std::cout << " sumkup = " << sumkup << std::endl; - std::cout << " sumklw = " << sumklw << std::endl; - std::cout << " sumkup - nelec = " << sumkup - nelec << std::endl; - std::cout << " sumklw - nelec = " << sumklw - nelec << std::endl; - ModuleBase::WARNING_QUIT("Occupy::efermig", "ERROS in SMEARING"); - // no need to break; quit directly - } - else if ((sumkup - nelec) < -eps) { - eup += 2 * smearing_sigma; - ++changetime; - } - else if ((sumklw - nelec) > eps) - { - elw -= 2 * smearing_sigma; - ++changetime; - } - else - { - break; + const double sumkup = Occupy::sumkg (ekb, nband, nks, wk, smearing_sigma, ngauss, eup, is, isk); + const double sumklw = Occupy::sumkg (ekb, nband, nks, wk, smearing_sigma, ngauss, elw, is, isk); + + if (changetime > 1000) + { + std::cout << " SOMETHING WRONG: " << std::endl; + std::cout << " is = " << is << std::endl; + std::cout << " eup = " << eup << std::endl; + std::cout << " elw = " << elw << std::endl; + std::cout << " nband = " << nband << std::endl; + std::cout << " nelec = " << nelec << std::endl; + std::cout << " sumkup = " << sumkup << std::endl; + std::cout << " sumklw = " << sumklw << std::endl; + std::cout << " sumkup - nelec = " << sumkup - nelec << std::endl; + std::cout << " sumklw - nelec = " << sumklw - nelec << std::endl; + ModuleBase::WARNING_QUIT ("Occupy::efermig", "ERROS in SMEARING"); + // no need to break; quit directly + } + else if ((sumkup - nelec) < -eps) + { + eup += 2 * smearing_sigma; + ++changetime; + } + else if ((sumklw - nelec) > eps) + { + elw -= 2 * smearing_sigma; + ++changetime; + } + else + { + break; + } } - } for (int i = 0; i < maxiter; i++) - { - //====================== - // change ef value - //====================== - ef = (eup + elw) / 2.0; - const double sumkmid = sumkg(ekb, nband, nks, wk, smearing_sigma, ngauss, ef, is, isk); - - if (std::abs(sumkmid - nelec) < eps) { - return; - } - else if ((sumkmid - nelec) < -eps) - { - elw = ef; - } - else - { - eup = ef; + //====================== + // change ef value + //====================== + ef = (eup + elw) / 2.0; + const double sumkmid = sumkg (ekb, nband, nks, wk, smearing_sigma, ngauss, ef, is, isk); + + if (std::abs (sumkmid - nelec) < eps) + { + return; + } + else if ((sumkmid - nelec) < -eps) + { + elw = ef; + } + else + { + eup = ef; + } } - } return; } @@ -389,40 +393,42 @@ void Occupy::efermig(const ModuleBase::matrix& ekb, * @param isk array to point out each k belong to which spin * @return (double) the number of states */ -double Occupy::sumkg(const ModuleBase::matrix& ekb, - const int nband, - const int nks, - const std::vector& wk, - const double& smearing_sigma, - const int ngauss, - const double& e, - const int& is, - const std::vector& isk) +double + Occupy::sumkg (const ModuleBase::matrix& ekb, + const int nband, + const int nks, + const std::vector& wk, + const double& smearing_sigma, + const int ngauss, + const double& e, + const int& is, + const std::vector& isk) { // ModuleBase::TITLE("Occupy","sumkg"); double sum2 = 0.0; for (int ik = 0; ik < nks; ik++) - { - if (is != -1 && is != isk[ik]) { - continue; -} - - double sum1 = 0.0; - for (int ib = 0; ib < nband; ib++) { - //=========================== - // call wgauss - //=========================== - sum1 += Occupy::wgauss((e - ekb(ik, ib)) / smearing_sigma, ngauss); + if (is != -1 && is != isk[ik]) + { + continue; + } + + double sum1 = 0.0; + for (int ib = 0; ib < nband; ib++) + { + //=========================== + // call wgauss + //=========================== + sum1 += Occupy::wgauss ((e - ekb (ik, ib)) / smearing_sigma, ngauss); + } + sum2 += wk[ik] * sum1; } - sum2 += wk[ik] * sum1; - } - // GlobalV::ofs_running << "\n sum2 before reduce = " << sum2 << std::endl; + // GlobalV::ofs_running << "\n sum2 before reduce = " << sum2 << std::endl; #ifdef __MPI const int npool = GlobalV::KPAR * PARAM.inp.bndpar; - Parallel_Reduce::reduce_double_allpool(npool, GlobalV::NPROC_IN_POOL, sum2); + Parallel_Reduce::reduce_double_allpool (npool, GlobalV::NPROC_IN_POOL, sum2); #endif // GlobalV::ofs_running << "\n sum2 after reduce = " << sum2 << std::endl; @@ -430,7 +436,8 @@ double Occupy::sumkg(const ModuleBase::matrix& ekb, return sum2; } -double Occupy::wgauss(const double& x, const int n) +double + Occupy::wgauss (const double& x, const int n) { // ModuleBase::TITLE("Occupy","wgauss"); //===================================================================== @@ -452,67 +459,68 @@ double Occupy::wgauss(const double& x, const int n) // Fermi-Dirac(fd) smearing //=========================== if (n == -99) - { - if (x < -maxarg) - { - wga = 0.0; - } - else if (x > maxarg) { - wga = 1.0; - } - else - { - wga = 1.00 / (1.0 + std::exp(-x)); + if (x < -maxarg) + { + wga = 0.0; + } + else if (x > maxarg) + { + wga = 1.0; + } + else + { + wga = 1.00 / (1.0 + std::exp (-x)); + } + return wga; } - return wga; - } //=================== // Cold smearing(mv) //=================== if (n == -1) - { - const double xp = x - 1.00 / ModuleBase::SQRT2; - const double arg = std::min(maxarg, xp * xp); - wga = 0.50 * erf(xp) + 1.00 / sqrt(ModuleBase::TWO_PI) * std::exp(-arg) + 0.50; - return wga; - } + { + const double xp = x - 1.00 / ModuleBase::SQRT2; + const double arg = std::min (maxarg, xp * xp); + wga = 0.50 * erf (xp) + 1.00 / sqrt (ModuleBase::TWO_PI) * std::exp (-arg) + 0.50; + return wga; + } //==================== // Methfessel-Paxton //pengfei 2014-10-13 //==================== - wga = 0.5 * (1 - erf(-x)); + wga = 0.5 * (1 - erf (-x)); // wga = gauss_freq(x * ModuleBase::SQRT2); // std::cout<<"\n x="<9 presently const std::string spectrum = "SPDFGHIKLM"; - std::ifstream ifs(orb_file.c_str(), std::ios::in); // pengfei 2014-10-13 + std::ifstream ifs (orb_file.c_str (), std::ios::in); // pengfei 2014-10-13 // mohan add return 2021-04-26 if (!ifs) - { - std::cout << " Element index " << it+1 << std::endl; - std::cout << " orbital file: " << orb_file << std::endl; - ModuleBase::WARNING("elecstate::read_orb_file", - "cannot open the ORBITAL file (NAO basis sets)"); - return false; - } - std::string word; - atom->nw = 0; - while (ifs.good()) - { - ifs >> word; - if (word == "Element") // pengfei Li 16-2-29 { - ModuleBase::GlobalFunc::READ_VALUE(ifs, atom->label_orb); + std::cout << " Element index " << it + 1 << std::endl; + std::cout << " orbital file: " << orb_file << std::endl; + ModuleBase::WARNING ("elecstate::read_orb_file", "cannot open the ORBITAL file (NAO basis sets)"); + return false; } - if (word == "Lmax") - { - ModuleBase::GlobalFunc::READ_VALUE(ifs, atom->nwl); - atom->l_nchi.resize(atom->nwl+1, 0); - } - // assert(atom->nwl<10); // cannot understand why restrict the maximum value of atom->nwl - if (word == "Cutoff(a.u.)") // pengfei Li 16-2-29 - { - ModuleBase::GlobalFunc::READ_VALUE(ifs, atom->Rcut); - } - if (FmtCore::endswith(word, "orbital-->")) + std::string word; + atom->nw = 0; + while (ifs.good ()) { - bool valid = false; - for (int i = 0; i < spectrum.size(); i++) - { - if (word == spectrum.substr(i, 1) + "orbital-->") + ifs >> word; + if (word == "Element") // pengfei Li 16-2-29 { - ModuleBase::GlobalFunc::READ_VALUE(ifs, atom->l_nchi[i]); - atom->nw += (2*i + 1) * atom->l_nchi[i]; - std::stringstream ss; - ss << "L=" << i << ", number of zeta"; - ModuleBase::GlobalFunc::OUT(ofs_running,ss.str(),atom->l_nchi[i]); - valid = true; - break; + ModuleBase::GlobalFunc::READ_VALUE (ifs, atom->label_orb); } - } - if (!valid) - { - ModuleBase::WARNING("elecstate::read_orb_file", - "ABACUS does not support NAO with L > 9, " - "or an invalid orbital label is found in the ORBITAL file."); - return false; - } + if (word == "Lmax") + { + ModuleBase::GlobalFunc::READ_VALUE (ifs, atom->nwl); + atom->l_nchi.resize (atom->nwl + 1, 0); + } + // assert(atom->nwl<10); // cannot understand why restrict the maximum value of atom->nwl + if (word == "Cutoff(a.u.)") // pengfei Li 16-2-29 + { + ModuleBase::GlobalFunc::READ_VALUE (ifs, atom->Rcut); + } + if (FmtCore::endswith (word, "orbital-->")) + { + bool valid = false; + for (int i = 0; i < spectrum.size (); i++) + { + if (word == spectrum.substr (i, 1) + "orbital-->") + { + ModuleBase::GlobalFunc::READ_VALUE (ifs, atom->l_nchi[i]); + atom->nw += (2 * i + 1) * atom->l_nchi[i]; + std::stringstream ss; + ss << "L=" << i << ", number of zeta"; + ModuleBase::GlobalFunc::OUT (ofs_running, ss.str (), atom->l_nchi[i]); + valid = true; + break; + } + } + if (!valid) + { + ModuleBase::WARNING ("elecstate::read_orb_file", + "ABACUS does not support NAO with L > 9, " + "or an invalid orbital label is found in the ORBITAL file."); + return false; + } + } + } + ifs.close (); + if (!atom->nw) + { + ModuleBase::WARNING ("elecstate::read_orb_file", "get nw = 0, check the ORBITAL file"); + return false; } - } - ifs.close(); - if(!atom->nw) - { - ModuleBase::WARNING("elecstate::read_orb_file","get nw = 0, check the ORBITAL file"); - return false; - } return true; } -} +} // namespace elecstate diff --git a/source/source_estate/read_orb.h b/source/source_estate/read_orb.h index 8d42b789e7c..b12a2093edc 100644 --- a/source/source_estate/read_orb.h +++ b/source/source_estate/read_orb.h @@ -3,21 +3,18 @@ #include "source_cell/unitcell.h" -namespace elecstate +namespace elecstate { - /** - * @brief read number of numerical orbitals for each angular momentum - * @param it index of atom type - * @param orb_file orbital filename - * @param ofs_running ofstream - * @param atom Atom instance stored in UnitCell - */ - bool read_orb_file(int it, - std::string& orb_file, - std::ofstream& ofs_running, - Atom* atom); +/** + * @brief read number of numerical orbitals for each angular momentum + * @param it index of atom type + * @param orb_file orbital filename + * @param ofs_running ofstream + * @param atom Atom instance stored in UnitCell + */ +bool read_orb_file (int it, std::string& orb_file, std::ofstream& ofs_running, Atom* atom); -} +} // namespace elecstate #endif diff --git a/source/source_estate/read_pseudo.cpp b/source/source_estate/read_pseudo.cpp index c348635dde9..e5bda501c41 100644 --- a/source/source_estate/read_pseudo.cpp +++ b/source/source_estate/read_pseudo.cpp @@ -9,8 +9,11 @@ #include // Peize Lin fix bug about strcmp 2016-08-02 -namespace elecstate { -void read_pseudo(std::ofstream& ofs, UnitCell& ucell) { +namespace elecstate +{ +void + read_pseudo (std::ofstream& ofs, UnitCell& ucell) +{ // read in non-local pseudopotential and ouput the projectors. ofs << "\n\n"; ofs << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; @@ -24,203 +27,204 @@ void read_pseudo(std::ofstream& ofs, UnitCell& ucell) { ofs << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; ofs << "\n"; - read_cell_pseudopots(PARAM.inp.pseudo_dir, ofs, ucell); - - if (GlobalV::MY_RANK == 0) - { - for (int it = 0; it < ucell.ntype; it++) - { - Atom* atom = &ucell.atoms[it]; - if (!(atom->label_orb.empty())) - { - ucell.compare_atom_labels(atom->label_orb, atom->ncpp.psd); - } - } + read_cell_pseudopots (PARAM.inp.pseudo_dir, ofs, ucell); - if (PARAM.inp.out_element_info) - { - for (int i = 0; i < ucell.ntype; i++) - { - ModuleBase::Global_File::make_dir_atom(ucell.atoms[i].label); - } - for (int it = 0; it < ucell.ntype; it++) - { - Atom* atom = &ucell.atoms[it]; - std::stringstream ss; - ss << PARAM.globalv.global_out_dir << atom->label << "/" - << atom->label << ".NONLOCAL"; - std::ofstream ofs(ss.str().c_str()); - - ofs << "
" << std::endl; - ofs << std::setw(10) << atom->label << "\t" - << "label" << std::endl; - ofs << std::setw(10) << atom->ncpp.pp_type << "\t" - << "Pseudopotential type" << std::endl; - ofs << std::setw(10) << atom->ncpp.lmax << "\t" - << "lmax" << std::endl; - ofs << "
" << std::endl; - - ofs << "\n" << std::endl; - ofs << std::setw(10) << atom->ncpp.nbeta << "\t" - << "nummber of projectors." << std::endl; - for (int ib = 0; ib < atom->ncpp.nbeta; ib++) { - for (int ib2 = 0; ib2 < atom->ncpp.nbeta; ib2++) { - ofs << std::setw(10) << atom->ncpp.lll[ib] << " " - << atom->ncpp.lll[ib2] << " " - << atom->ncpp.dion(ib, ib2) << std::endl; - } - } - ofs << "" << std::endl; - - for (int i = 0; i < atom->ncpp.nbeta; i++) { - ofs << "" << std::endl; - ofs << std::setw(10) << i << "\t" - << "the index of projectors." << std::endl; - ofs << std::setw(10) << atom->ncpp.lll[i] << "\t" - << "the angular momentum." << std::endl; - - // mohan add - // only keep the nonzero part. - int cut_mesh = atom->ncpp.mesh; - for (int j = atom->ncpp.mesh - 1; j >= 0; --j) { - if (std::abs(atom->ncpp.betar(i, j)) > 1.0e-10) { - cut_mesh = j; - break; + if (GlobalV::MY_RANK == 0) + { + for (int it = 0; it < ucell.ntype; it++) + { + Atom* atom = &ucell.atoms[it]; + if (!(atom->label_orb.empty ())) + { + ucell.compare_atom_labels (atom->label_orb, atom->ncpp.psd); } - } - if (cut_mesh % 2 == 0) { - ++cut_mesh; - } - - ofs << std::setw(10) << cut_mesh << "\t" - << "the number of mesh points." << std::endl; - - for (int j = 0; j < cut_mesh; ++j) { - ofs << std::setw(15) << atom->ncpp.r[j] << std::setw(15) - << atom->ncpp.betar(i, j) << std::setw(15) - << atom->ncpp.rab[j] << std::endl; - } - ofs << "" << std::endl; } - ofs.close(); - } + if (PARAM.inp.out_element_info) + { + for (int i = 0; i < ucell.ntype; i++) + { + ModuleBase::Global_File::make_dir_atom (ucell.atoms[i].label); + } + for (int it = 0; it < ucell.ntype; it++) + { + Atom* atom = &ucell.atoms[it]; + std::stringstream ss; + ss << PARAM.globalv.global_out_dir << atom->label << "/" << atom->label << ".NONLOCAL"; + std::ofstream ofs (ss.str ().c_str ()); + + ofs << "
" << std::endl; + ofs << std::setw (10) << atom->label << "\t" + << "label" << std::endl; + ofs << std::setw (10) << atom->ncpp.pp_type << "\t" + << "Pseudopotential type" << std::endl; + ofs << std::setw (10) << atom->ncpp.lmax << "\t" + << "lmax" << std::endl; + ofs << "
" << std::endl; + + ofs << "\n" << std::endl; + ofs << std::setw (10) << atom->ncpp.nbeta << "\t" + << "nummber of projectors." << std::endl; + for (int ib = 0; ib < atom->ncpp.nbeta; ib++) + { + for (int ib2 = 0; ib2 < atom->ncpp.nbeta; ib2++) + { + ofs << std::setw (10) << atom->ncpp.lll[ib] << " " << atom->ncpp.lll[ib2] + << " " << atom->ncpp.dion (ib, ib2) << std::endl; + } + } + ofs << "" << std::endl; + + for (int i = 0; i < atom->ncpp.nbeta; i++) + { + ofs << "" << std::endl; + ofs << std::setw (10) << i << "\t" + << "the index of projectors." << std::endl; + ofs << std::setw (10) << atom->ncpp.lll[i] << "\t" + << "the angular momentum." << std::endl; + + // mohan add + // only keep the nonzero part. + int cut_mesh = atom->ncpp.mesh; + for (int j = atom->ncpp.mesh - 1; j >= 0; --j) + { + if (std::abs (atom->ncpp.betar (i, j)) > 1.0e-10) + { + cut_mesh = j; + break; + } + } + if (cut_mesh % 2 == 0) + { + ++cut_mesh; + } + + ofs << std::setw (10) << cut_mesh << "\t" + << "the number of mesh points." << std::endl; + + for (int j = 0; j < cut_mesh; ++j) + { + ofs << std::setw (15) << atom->ncpp.r[j] << std::setw (15) + << atom->ncpp.betar (i, j) << std::setw (15) << atom->ncpp.rab[j] + << std::endl; + } + ofs << "" << std::endl; + } + + ofs.close (); + } + } } - } #ifdef __MPI - unitcell::bcast_atoms_pseudo(ucell.atoms,ucell.ntype); + unitcell::bcast_atoms_pseudo (ucell.atoms, ucell.ntype); #endif - for (int it = 0; it < ucell.ntype; it++) { - if (ucell.atoms[0].ncpp.xc_func != ucell.atoms[it].ncpp.xc_func) { - GlobalV::ofs_warning << "\n type " << ucell.atoms[0].label - << " functional is " << ucell.atoms[0].ncpp.xc_func; + for (int it = 0; it < ucell.ntype; it++) + { + if (ucell.atoms[0].ncpp.xc_func != ucell.atoms[it].ncpp.xc_func) + { + GlobalV::ofs_warning << "\n type " << ucell.atoms[0].label << " functional is " + << ucell.atoms[0].ncpp.xc_func; - GlobalV::ofs_warning << "\n type " << ucell.atoms[it].label - << " functional is " << ucell.atoms[it].ncpp.xc_func - << std::endl; + GlobalV::ofs_warning << "\n type " << ucell.atoms[it].label << " functional is " + << ucell.atoms[it].ncpp.xc_func << std::endl; - ModuleBase::WARNING_QUIT("setup_cell", - "All DFT functional must consistent."); + ModuleBase::WARNING_QUIT ("setup_cell", "All DFT functional must consistent."); + } } - } // setup the total number of PAOs - cal_natomwfc(ofs,ucell.natomwfc,ucell.ntype,ucell.atoms); + cal_natomwfc (ofs, ucell.natomwfc, ucell.ntype, ucell.atoms); // Calculate the information of atoms from the pseudopotential to set PARAM CalAtomsInfo ca; - ca.cal_atoms_info(ucell.atoms, ucell.ntype, PARAM); + ca.cal_atoms_info (ucell.atoms, ucell.ntype, PARAM); // setup PARAM.globalv.nlocal - cal_nwfc(ofs,ucell,ucell.atoms); + cal_nwfc (ofs, ucell, ucell.atoms); // Check whether the number of valence is minimum - if (GlobalV::MY_RANK == 0) - { - int abtype = 0; - for (int it = 0; it < ucell.ntype; it++) - { - if (ModuleBase::MinZval.find(ucell.atoms[it].ncpp.psd) - != ModuleBase::MinZval.end()) - { - if (ucell.atoms[it].ncpp.zv - > ModuleBase::MinZval.at(ucell.atoms[it].ncpp.psd)) - { - abtype += 1; - if (abtype == 1) - { - std::cout << "\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%%%%%%" - << std::endl; - ofs << "\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%" - << std::endl; - } - std::cout << " Warning: the number of valence electrons in " - "pseudopotential > " - << ModuleBase::MinZval.at(ucell.atoms[it].ncpp.psd); - std::cout << " for " << ucell.atoms[it].ncpp.psd << ": " - << ModuleBase::EleConfig.at(ucell.atoms[it].ncpp.psd) + if (GlobalV::MY_RANK == 0) + { + int abtype = 0; + for (int it = 0; it < ucell.ntype; it++) + { + if (ModuleBase::MinZval.find (ucell.atoms[it].ncpp.psd) != ModuleBase::MinZval.end ()) + { + if (ucell.atoms[it].ncpp.zv > ModuleBase::MinZval.at (ucell.atoms[it].ncpp.psd)) + { + abtype += 1; + if (abtype == 1) + { + std::cout << "\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%%%%%%%%%%%%%%%" + << std::endl; + ofs << "\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%%%%%%%%%%" + << std::endl; + } + std::cout << " Warning: the number of valence electrons in " + "pseudopotential > " + << ModuleBase::MinZval.at (ucell.atoms[it].ncpp.psd); + std::cout << " for " << ucell.atoms[it].ncpp.psd << ": " + << ModuleBase::EleConfig.at (ucell.atoms[it].ncpp.psd) << std::endl; + ofs << " Warning: the number of valence electrons in " + "pseudopotential > " + << ModuleBase::MinZval.at (ucell.atoms[it].ncpp.psd); + ofs << " for " << ucell.atoms[it].ncpp.psd << ": " + << ModuleBase::EleConfig.at (ucell.atoms[it].ncpp.psd) << std::endl; + } + } + } + if (abtype > 0) + { + std::cout << " Pseudopotentials with additional electrons can " + "yield (more) accurate outcomes, but may be " + "less efficient." << std::endl; - ofs << " Warning: the number of valence electrons in " - "pseudopotential > " - << ModuleBase::MinZval.at(ucell.atoms[it].ncpp.psd); - ofs << " for " << ucell.atoms[it].ncpp.psd << ": " - << ModuleBase::EleConfig.at(ucell.atoms[it].ncpp.psd) + std::cout << " If you're confident that your chosen pseudopotential is " + "appropriate, you can safely ignore " + "this warning." + << std::endl; + std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%\n" + << std::endl; + ofs << " Pseudopotentials with additional electrons can yield " + "(more) accurate outcomes, but may be less " + "efficient." + << std::endl; + ofs << " If you're confident that your chosen pseudopotential is " + "appropriate, you can safely ignore this " + "warning." << std::endl; + ofs << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + "%%%%%%%"; + ModuleBase::GlobalFunc::OUT (ofs, ""); } - } } - if (abtype > 0) - { - std::cout << " Pseudopotentials with additional electrons can " - "yield (more) accurate outcomes, but may be " - "less efficient." - << std::endl; - std::cout - << " If you're confident that your chosen pseudopotential is " - "appropriate, you can safely ignore " - "this warning." - << std::endl; - std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%\n" - << std::endl; - ofs << " Pseudopotentials with additional electrons can yield " - "(more) accurate outcomes, but may be less " - "efficient." - << std::endl; - ofs << " If you're confident that your chosen pseudopotential is " - "appropriate, you can safely ignore this " - "warning." - << std::endl; - ofs << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" - "%%%%%%%"; - ModuleBase::GlobalFunc::OUT(ofs, ""); - } - } - cal_meshx(ucell.meshx,ucell.atoms,ucell.ntype); + cal_meshx (ucell.meshx, ucell.atoms, ucell.ntype); #ifdef __MPI - Parallel_Common::bcast_int(ucell.meshx); - Parallel_Common::bcast_int(ucell.natomwfc); - Parallel_Common::bcast_int(ucell.lmax); - Parallel_Common::bcast_int(ucell.lmax_ppwf); + Parallel_Common::bcast_int (ucell.meshx); + Parallel_Common::bcast_int (ucell.natomwfc); + Parallel_Common::bcast_int (ucell.lmax); + Parallel_Common::bcast_int (ucell.lmax_ppwf); #endif } //========================================================== // Read pseudopotential according to the dir //========================================================== -void read_cell_pseudopots(const std::string& pp_dir, std::ofstream& log, UnitCell& ucell) +void + read_cell_pseudopots (const std::string& pp_dir, std::ofstream& log, UnitCell& ucell) { - ModuleBase::TITLE("Elecstate", "read_cell_pseudopots"); + ModuleBase::TITLE ("Elecstate", "read_cell_pseudopots"); // setup reading log for pseudopot_upf std::stringstream ss; ss << PARAM.globalv.global_out_dir << "atom_pseudo.log"; @@ -228,124 +232,133 @@ void read_cell_pseudopots(const std::string& pp_dir, std::ofstream& log, UnitCel // Read in the atomic pseudo potentials std::string pp_address; for (int i = 0; i < ucell.ntype; i++) - { - Pseudopot_upf upf; - upf.coulomb_potential = ucell.atoms[i].coulomb_potential; - - // mohan update 2010-09-12 - int error = 0; - int error_ap = 0; - - if (GlobalV::MY_RANK == 0) { - pp_address = pp_dir + ucell.pseudo_fn[i]; - error = upf.init_pseudo_reader(pp_address, ucell.pseudo_type[i], ucell.atoms[i].ncpp); // xiaohui add 2013-06-23 + Pseudopot_upf upf; + upf.coulomb_potential = ucell.atoms[i].coulomb_potential; + + // mohan update 2010-09-12 + int error = 0; + int error_ap = 0; - if (error == 0) // mohan add 2021-04-16 - { - if (ucell.atoms[i].flag_empty_element) // Peize Lin add for bsse 2021.04.07 + if (GlobalV::MY_RANK == 0) { - upf.set_empty_element(ucell.atoms[i].ncpp); + pp_address = pp_dir + ucell.pseudo_fn[i]; + error = upf.init_pseudo_reader (pp_address, + ucell.pseudo_type[i], + ucell.atoms[i].ncpp); // xiaohui add 2013-06-23 + + if (error == 0) // mohan add 2021-04-16 + { + if (ucell.atoms[i].flag_empty_element) // Peize Lin add for bsse 2021.04.07 + { + upf.set_empty_element (ucell.atoms[i].ncpp); + } + upf.set_upf_q (ucell.atoms[i].ncpp); // liuyu add 2023-09-21 + // average pseudopotential if needed + error_ap = upf.average_p (PARAM.inp.soc_lambda, + ucell.atoms[i].ncpp); // added by zhengdy 2020-10-20 + } + ucell.atoms[i].coulomb_potential = upf.coulomb_potential; } - upf.set_upf_q(ucell.atoms[i].ncpp); // liuyu add 2023-09-21 - // average pseudopotential if needed - error_ap = upf.average_p(PARAM.inp.soc_lambda, ucell.atoms[i].ncpp); // added by zhengdy 2020-10-20 - } - ucell.atoms[i].coulomb_potential = upf.coulomb_potential; - } #ifdef __MPI - Parallel_Common::bcast_int(error); - Parallel_Common::bcast_int(error_ap); - Parallel_Common::bcast_bool(ucell.atoms[i].coulomb_potential); + Parallel_Common::bcast_int (error); + Parallel_Common::bcast_int (error_ap); + Parallel_Common::bcast_bool (ucell.atoms[i].coulomb_potential); #endif - if (error_ap) - { - ModuleBase::WARNING_QUIT("read_cell_pseudopots", "error when average the pseudopotential."); - } - - if (error == 1) - { - std::cout << " Pseudopotential directory now is : " << pp_address << std::endl; - GlobalV::ofs_warning << " Pseudopotential directory now is : " << pp_address << std::endl; - ModuleBase::WARNING_QUIT("read_cell_pseudopots", "Couldn't find pseudopotential file."); - } - else if (error == 2) - { - ModuleBase::WARNING_QUIT("read_cell_pseudopots", "Pseudopotential data do not match."); - } - else if (error == 3) - { - ModuleBase::WARNING_QUIT( - "read_cell_pseudopots", - "Check the reference states in pseudopotential .vwr file.\n Also the norm of the read in pseudo wave " - "functions\n explicitly please check S, P and D channels.\n If the norm of the wave function is \n " - "unreasonable large (should be near 1.0), ABACUS would quit. \n The solution is to turn off the wave " - "functions \n and the corresponding non-local projectors together\n in .vwr pseudopotential file."); - } - else if (error == 4) - { - ModuleBase::WARNING_QUIT("read_cell_pseudopots", "Unknown pseudopotential type."); - } - - if (GlobalV::MY_RANK == 0) - { - upf.complete_default(ucell.atoms[i].ncpp); - - ModuleBase::GlobalFunc::OUT(log, "Pseudopotential file", ucell.pseudo_fn[i]); - ModuleBase::GlobalFunc::OUT(log, "Pseudopotential type", ucell.atoms[i].ncpp.pp_type); - ModuleBase::GlobalFunc::OUT(log, "Exchange-correlation functional", ucell.atoms[i].ncpp.xc_func); - ModuleBase::GlobalFunc::OUT(log, "Nonlocal core correction", ucell.atoms[i].ncpp.nlcc); - // ModuleBase::GlobalFunc::OUT(log, "spin orbital", ucell.atoms[i].has_so); - ModuleBase::GlobalFunc::OUT(log, "Valence electrons", ucell.atoms[i].ncpp.zv); - ModuleBase::GlobalFunc::OUT(log, "Lmax", ucell.atoms[i].ncpp.lmax); - ModuleBase::GlobalFunc::OUT(log, "Number of zeta", ucell.atoms[i].ncpp.nchi); - ModuleBase::GlobalFunc::OUT(log, "Number of projectors", ucell.atoms[i].ncpp.nbeta); - for (int ib = 0; ib < ucell.atoms[i].ncpp.nbeta; ib++) - { - ModuleBase::GlobalFunc::OUT(log, "L of projector", ucell.atoms[i].ncpp.lll[ib]); - } - // ModuleBase::GlobalFunc::OUT(log,"Grid Mesh Number", atoms[i].mesh); - if (PARAM.inp.dft_functional != "default") - { - std::string xc_func1 = PARAM.inp.dft_functional; - transform(xc_func1.begin(), xc_func1.end(), xc_func1.begin(), (::toupper)); - if (xc_func1 != ucell.atoms[i].ncpp.xc_func) + if (error_ap) { - std::cout << " NAME OF ELEMENT : " << ucell.atoms[i].label << std::endl; - std::cout << " DFT FUNC. (PSEUDO) : " << ucell.atoms[i].ncpp.xc_func << std::endl; - std::cout << " DFT FUNC. (SET TO) : " << xc_func1 << std::endl; - std::cout << " MAKE SURE THIS DFT FUNCTIONAL IS WHAT YOU NEED" << std::endl; - + ModuleBase::WARNING_QUIT ("read_cell_pseudopots", "error when average the pseudopotential."); + } - GlobalV::ofs_warning << " NAME OF ELEMENT : " << ucell.atoms[i].label << std::endl; - GlobalV::ofs_warning << " DFT FUNC. (PSEUDO) : " << ucell.atoms[i].ncpp.xc_func << std::endl; - GlobalV::ofs_warning << " DFT FUNC. (SET TO) : " << xc_func1 << std::endl; - GlobalV::ofs_warning << " MAKE SURE THIS DFT FUNCTIONAL IS WHAT YOU NEED" << std::endl; + if (error == 1) + { + std::cout << " Pseudopotential directory now is : " << pp_address << std::endl; + GlobalV::ofs_warning << " Pseudopotential directory now is : " << pp_address << std::endl; + ModuleBase::WARNING_QUIT ("read_cell_pseudopots", "Couldn't find pseudopotential file."); + } + else if (error == 2) + { + ModuleBase::WARNING_QUIT ("read_cell_pseudopots", "Pseudopotential data do not match."); + } + else if (error == 3) + { + ModuleBase::WARNING_QUIT ("read_cell_pseudopots", + "Check the reference states in pseudopotential .vwr file.\n Also the " + "norm of the read in pseudo wave " + "functions\n explicitly please check S, P and D channels.\n If the norm " + "of the wave function is \n " + "unreasonable large (should be near 1.0), ABACUS would quit. \n The " + "solution is to turn off the wave " + "functions \n and the corresponding non-local projectors together\n in " + ".vwr pseudopotential file."); + } + else if (error == 4) + { + ModuleBase::WARNING_QUIT ("read_cell_pseudopots", "Unknown pseudopotential type."); + } - ucell.atoms[i].ncpp.xc_func = xc_func1; - ModuleBase::GlobalFunc::OUT(log, "DFT functional set to", xc_func1); + if (GlobalV::MY_RANK == 0) + { + upf.complete_default (ucell.atoms[i].ncpp); + + ModuleBase::GlobalFunc::OUT (log, "Pseudopotential file", ucell.pseudo_fn[i]); + ModuleBase::GlobalFunc::OUT (log, "Pseudopotential type", ucell.atoms[i].ncpp.pp_type); + ModuleBase::GlobalFunc::OUT (log, "Exchange-correlation functional", ucell.atoms[i].ncpp.xc_func); + ModuleBase::GlobalFunc::OUT (log, "Nonlocal core correction", ucell.atoms[i].ncpp.nlcc); + // ModuleBase::GlobalFunc::OUT(log, "spin orbital", ucell.atoms[i].has_so); + ModuleBase::GlobalFunc::OUT (log, "Valence electrons", ucell.atoms[i].ncpp.zv); + ModuleBase::GlobalFunc::OUT (log, "Lmax", ucell.atoms[i].ncpp.lmax); + ModuleBase::GlobalFunc::OUT (log, "Number of zeta", ucell.atoms[i].ncpp.nchi); + ModuleBase::GlobalFunc::OUT (log, "Number of projectors", ucell.atoms[i].ncpp.nbeta); + for (int ib = 0; ib < ucell.atoms[i].ncpp.nbeta; ib++) + { + ModuleBase::GlobalFunc::OUT (log, "L of projector", ucell.atoms[i].ncpp.lll[ib]); + } + // ModuleBase::GlobalFunc::OUT(log,"Grid Mesh Number", atoms[i].mesh); + if (PARAM.inp.dft_functional != "default") + { + std::string xc_func1 = PARAM.inp.dft_functional; + transform (xc_func1.begin (), xc_func1.end (), xc_func1.begin (), (::toupper)); + if (xc_func1 != ucell.atoms[i].ncpp.xc_func) + { + std::cout << " NAME OF ELEMENT : " << ucell.atoms[i].label << std::endl; + std::cout << " DFT FUNC. (PSEUDO) : " << ucell.atoms[i].ncpp.xc_func << std::endl; + std::cout << " DFT FUNC. (SET TO) : " << xc_func1 << std::endl; + std::cout << " MAKE SURE THIS DFT FUNCTIONAL IS WHAT YOU NEED" << std::endl; + + GlobalV::ofs_warning << " NAME OF ELEMENT : " << ucell.atoms[i].label + << std::endl; + GlobalV::ofs_warning << " DFT FUNC. (PSEUDO) : " << ucell.atoms[i].ncpp.xc_func + << std::endl; + GlobalV::ofs_warning << " DFT FUNC. (SET TO) : " << xc_func1 << std::endl; + GlobalV::ofs_warning << " MAKE SURE THIS DFT FUNCTIONAL IS WHAT YOU NEED" + << std::endl; + + ucell.atoms[i].ncpp.xc_func = xc_func1; + ModuleBase::GlobalFunc::OUT (log, "DFT functional set to", xc_func1); + } + } } - } } - } return; } -void print_unitcell_pseudo(const std::string& fn, UnitCell& ucell) +void + print_unitcell_pseudo (const std::string& fn, UnitCell& ucell) { - ModuleBase::TITLE("elecstate", "print_unitcell_pseudo"); - std::ofstream ofs(fn.c_str()); + ModuleBase::TITLE ("elecstate", "print_unitcell_pseudo"); + std::ofstream ofs (fn.c_str ()); - ucell.print_cell(ofs); + ucell.print_cell (ofs); for (int i = 0; i < ucell.ntype; i++) - { - ucell.atoms[i].print_Atom(ofs); - } + { + ucell.atoms[i].print_Atom (ofs); + } - ofs.close(); + ofs.close (); return; } -} +} // namespace elecstate diff --git a/source/source_estate/read_pseudo.h b/source/source_estate/read_pseudo.h index b42d81d439b..706ecf7723e 100644 --- a/source/source_estate/read_pseudo.h +++ b/source/source_estate/read_pseudo.h @@ -3,37 +3,38 @@ #include "source_cell/unitcell.h" -namespace elecstate { - - void read_pseudo(std::ofstream& ofs, UnitCell& ucell); - - // read in pseudopotential from files for each type of atom - void read_cell_pseudopots(const std::string& fn, std::ofstream& log, UnitCell& ucell); - - void print_unitcell_pseudo(const std::string& fn, UnitCell& ucell); - - //=========================================== - // calculate the total number of local basis - // Target : nwfc, lmax, - // atoms[].stapos_wf - // PARAM.inp.nbands - //=========================================== - void cal_nwfc(std::ofstream& log, UnitCell& ucell,Atom* atoms); - - //====================== - // Target : meshx - // Demand : atoms[].msh - //====================== - void cal_meshx(int& meshx,const Atom* atoms, const int ntype); - - //========================= - // Target : natomwfc - // Demand : atoms[].nchi - // atoms[].lchi - // atoms[].oc - // atoms[].na - //========================= - void cal_natomwfc(std::ofstream& log,int& natomwfc,const int ntype,const Atom* atoms); -} +namespace elecstate +{ + +void read_pseudo (std::ofstream& ofs, UnitCell& ucell); + +// read in pseudopotential from files for each type of atom +void read_cell_pseudopots (const std::string& fn, std::ofstream& log, UnitCell& ucell); + +void print_unitcell_pseudo (const std::string& fn, UnitCell& ucell); + +//=========================================== +// calculate the total number of local basis +// Target : nwfc, lmax, +// atoms[].stapos_wf +// PARAM.inp.nbands +//=========================================== +void cal_nwfc (std::ofstream& log, UnitCell& ucell, Atom* atoms); + +//====================== +// Target : meshx +// Demand : atoms[].msh +//====================== +void cal_meshx (int& meshx, const Atom* atoms, const int ntype); + +//========================= +// Target : natomwfc +// Demand : atoms[].nchi +// atoms[].lchi +// atoms[].oc +// atoms[].na +//========================= +void cal_natomwfc (std::ofstream& log, int& natomwfc, const int ntype, const Atom* atoms); +} // namespace elecstate #endif \ No newline at end of file diff --git a/source/source_estate/setup_estate_pw.cpp b/source/source_estate/setup_estate_pw.cpp index 654969faa9f..5afae130c9c 100644 --- a/source/source_estate/setup_estate_pw.cpp +++ b/source/source_estate/setup_estate_pw.cpp @@ -7,210 +7,289 @@ namespace elecstate { -void setup_estate_pw( - UnitCell& ucell, - K_Vectors& kv, - Structure_Factor& sf, - elecstate::ElecState*& pelec, - Charge& chr, - pseudopot_cell_vl& locpp, - pseudopot_cell_vnl& ppcell, - VSep*& vsep_cell, - ModulePW::PW_Basis_K* pw_wfc, - ModulePW::PW_Basis* pw_rho, - ModulePW::PW_Basis* pw_rhod, - ModulePW::PW_Basis_Big* pw_big, - surchem& solvent, - const Input_para& inp) +void + setup_estate_pw (UnitCell& ucell, + K_Vectors& kv, + Structure_Factor& sf, + elecstate::ElecState*& pelec, + Charge& chr, + pseudopot_cell_vl& locpp, + pseudopot_cell_vnl& ppcell, + VSep*& vsep_cell, + ModulePW::PW_Basis_K* pw_wfc, + ModulePW::PW_Basis* pw_rho, + ModulePW::PW_Basis* pw_rhod, + ModulePW::PW_Basis_Big* pw_big, + surchem& solvent, + const Input_para& inp) { - ModuleBase::TITLE("elecstate", "setup_estate_pw"); + ModuleBase::TITLE ("elecstate", "setup_estate_pw"); const bool is_gpu = (inp.device == "gpu"); const bool is_single = (inp.precision == "single"); #if ((defined __CUDA) || (defined __ROCM)) if (is_gpu) - { - if (is_single) { - setup_estate_pw_impl, base_device::DEVICE_GPU>( - ucell, kv, sf, pelec, chr, locpp, ppcell, vsep_cell, - pw_wfc, pw_rho, pw_rhod, pw_big, solvent, inp); + if (is_single) + { + setup_estate_pw_impl, base_device::DEVICE_GPU> (ucell, + kv, + sf, + pelec, + chr, + locpp, + ppcell, + vsep_cell, + pw_wfc, + pw_rho, + pw_rhod, + pw_big, + solvent, + inp); + } + else + { + setup_estate_pw_impl, base_device::DEVICE_GPU> (ucell, + kv, + sf, + pelec, + chr, + locpp, + ppcell, + vsep_cell, + pw_wfc, + pw_rho, + pw_rhod, + pw_big, + solvent, + inp); + } } - else - { - setup_estate_pw_impl, base_device::DEVICE_GPU>( - ucell, kv, sf, pelec, chr, locpp, ppcell, vsep_cell, - pw_wfc, pw_rho, pw_rhod, pw_big, solvent, inp); - } - } else #endif - { - if (is_single) - { - setup_estate_pw_impl, base_device::DEVICE_CPU>( - ucell, kv, sf, pelec, chr, locpp, ppcell, vsep_cell, - pw_wfc, pw_rho, pw_rhod, pw_big, solvent, inp); - } - else { - setup_estate_pw_impl, base_device::DEVICE_CPU>( - ucell, kv, sf, pelec, chr, locpp, ppcell, vsep_cell, - pw_wfc, pw_rho, pw_rhod, pw_big, solvent, inp); + if (is_single) + { + setup_estate_pw_impl, base_device::DEVICE_CPU> (ucell, + kv, + sf, + pelec, + chr, + locpp, + ppcell, + vsep_cell, + pw_wfc, + pw_rho, + pw_rhod, + pw_big, + solvent, + inp); + } + else + { + setup_estate_pw_impl, base_device::DEVICE_CPU> (ucell, + kv, + sf, + pelec, + chr, + locpp, + ppcell, + vsep_cell, + pw_wfc, + pw_rho, + pw_rhod, + pw_big, + solvent, + inp); + } } - } } template -void setup_estate_pw_impl( - UnitCell& ucell, - K_Vectors& kv, - Structure_Factor& sf, - elecstate::ElecState*& pelec, - Charge& chr, - pseudopot_cell_vl& locpp, - pseudopot_cell_vnl& ppcell, - VSep*& vsep_cell, - ModulePW::PW_Basis_K* pw_wfc, - ModulePW::PW_Basis* pw_rho, - ModulePW::PW_Basis* pw_rhod, - ModulePW::PW_Basis_Big* pw_big, - surchem& solvent, - const Input_para& inp) +void + setup_estate_pw_impl (UnitCell& ucell, + K_Vectors& kv, + Structure_Factor& sf, + elecstate::ElecState*& pelec, + Charge& chr, + pseudopot_cell_vl& locpp, + pseudopot_cell_vnl& ppcell, + VSep*& vsep_cell, + ModulePW::PW_Basis_K* pw_wfc, + ModulePW::PW_Basis* pw_rho, + ModulePW::PW_Basis* pw_rhod, + ModulePW::PW_Basis_Big* pw_big, + surchem& solvent, + const Input_para& inp) { if (pelec == nullptr) - { - if (inp.esolver_type == "sdft") - { - pelec = new elecstate::ElecStatePW_SDFT, Device>(pw_wfc, - &chr, &kv, &ucell, &ppcell, pw_rho, pw_big); - } - else { - pelec = new elecstate::ElecStatePW(pw_wfc, - &chr, &kv, &ucell, &ppcell, pw_rho, pw_big); + if (inp.esolver_type == "sdft") + { + pelec = new elecstate::ElecStatePW_SDFT, Device> (pw_wfc, + &chr, + &kv, + &ucell, + &ppcell, + pw_rho, + pw_big); + } + else + { + pelec = new elecstate::ElecStatePW (pw_wfc, &chr, &kv, &ucell, &ppcell, pw_rho, pw_big); + } } - } if (PARAM.inp.dfthalf_type > 0) - { - vsep_cell = new VSep; - vsep_cell->init_vsep(*pw_rhod, ucell.sep_cell); - } + { + vsep_cell = new VSep; + vsep_cell->init_vsep (*pw_rhod, ucell.sep_cell); + } if (pelec->pot == nullptr) - { - pelec->pot = new elecstate::Potential(pw_rhod, - pw_rho, &ucell, &locpp.vloc, &sf, - &solvent, &(pelec->f_en.etxc), &(pelec->f_en.vtxc), vsep_cell); - } + { + pelec->pot = new elecstate::Potential (pw_rhod, + pw_rho, + &ucell, + &locpp.vloc, + &sf, + &solvent, + &(pelec->f_en.etxc), + &(pelec->f_en.vtxc), + vsep_cell); + } - locpp.init_vloc(ucell, pw_rhod); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "LOCAL POTENTIAL"); + locpp.init_vloc (ucell, pw_rhod); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "LOCAL POTENTIAL"); - ppcell.init(ucell, &sf, pw_wfc); - ppcell.init_vnl(ucell, pw_rhod); - ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "NON-LOCAL POTENTIAL"); + ppcell.init (ucell, &sf, pw_wfc); + ppcell.init_vnl (ucell, pw_rhod); + ModuleBase::GlobalFunc::DONE (GlobalV::ofs_running, "NON-LOCAL POTENTIAL"); if (inp.ocp) - { - elecstate::fixed_weights(inp.ocp_kb, - inp.nbands, - inp.nelec, - pelec->klist, - pelec->wg, - pelec->skip_weights); - } + { + elecstate::fixed_weights (inp.ocp_kb, inp.nbands, inp.nelec, pelec->klist, pelec->wg, pelec->skip_weights); + } } -void teardown_estate_pw(elecstate::ElecState*& pelec, VSep*& vsep_cell) +void + teardown_estate_pw (elecstate::ElecState*& pelec, VSep*& vsep_cell) { - ModuleBase::TITLE("elecstate", "teardown_estate_pw"); + ModuleBase::TITLE ("elecstate", "teardown_estate_pw"); if (vsep_cell != nullptr) - { - delete vsep_cell; - } + { + delete vsep_cell; + } if (pelec != nullptr) - { - delete pelec; - pelec = nullptr; - } + { + delete pelec; + pelec = nullptr; + } } template -void teardown_estate_pw_impl(elecstate::ElecState*& pelec, VSep*& vsep_cell) +void + teardown_estate_pw_impl (elecstate::ElecState*& pelec, VSep*& vsep_cell) { - ModuleBase::TITLE("elecstate", "teardown_estate_pw_impl"); + ModuleBase::TITLE ("elecstate", "teardown_estate_pw_impl"); if (vsep_cell != nullptr) - { - delete vsep_cell; - } - - if (pelec != nullptr) - { - auto* pw_elec = dynamic_cast*>(pelec); - if (pw_elec) { - delete pw_elec; - pelec = nullptr; + delete vsep_cell; } - else + + if (pelec != nullptr) { - ModuleBase::WARNING_QUIT("elecstate::teardown_estate_pw_impl", "Invalid ElecState type"); + auto* pw_elec = dynamic_cast*> (pelec); + if (pw_elec) + { + delete pw_elec; + pelec = nullptr; + } + else + { + ModuleBase::WARNING_QUIT ("elecstate::teardown_estate_pw_impl", "Invalid ElecState type"); + } } - } } -template void setup_estate_pw_impl, base_device::DEVICE_CPU>( - UnitCell& ucell, K_Vectors& kv, Structure_Factor& sf, - elecstate::ElecState*& pelec, Charge& chr, - pseudopot_cell_vl& locpp, pseudopot_cell_vnl& ppcell, VSep*& vsep_cell, - ModulePW::PW_Basis_K* pw_wfc, ModulePW::PW_Basis* pw_rho, - ModulePW::PW_Basis* pw_rhod, ModulePW::PW_Basis_Big* pw_big, - surchem& solvent, const Input_para& inp); +template void setup_estate_pw_impl, base_device::DEVICE_CPU> (UnitCell& ucell, + K_Vectors& kv, + Structure_Factor& sf, + elecstate::ElecState*& pelec, + Charge& chr, + pseudopot_cell_vl& locpp, + pseudopot_cell_vnl& ppcell, + VSep*& vsep_cell, + ModulePW::PW_Basis_K* pw_wfc, + ModulePW::PW_Basis* pw_rho, + ModulePW::PW_Basis* pw_rhod, + ModulePW::PW_Basis_Big* pw_big, + surchem& solvent, + const Input_para& inp); -template void setup_estate_pw_impl, base_device::DEVICE_CPU>( - UnitCell& ucell, K_Vectors& kv, Structure_Factor& sf, - elecstate::ElecState*& pelec, Charge& chr, - pseudopot_cell_vl& locpp, pseudopot_cell_vnl& ppcell, VSep*& vsep_cell, - ModulePW::PW_Basis_K* pw_wfc, ModulePW::PW_Basis* pw_rho, - ModulePW::PW_Basis* pw_rhod, ModulePW::PW_Basis_Big* pw_big, - surchem& solvent, const Input_para& inp); +template void setup_estate_pw_impl, base_device::DEVICE_CPU> (UnitCell& ucell, + K_Vectors& kv, + Structure_Factor& sf, + elecstate::ElecState*& pelec, + Charge& chr, + pseudopot_cell_vl& locpp, + pseudopot_cell_vnl& ppcell, + VSep*& vsep_cell, + ModulePW::PW_Basis_K* pw_wfc, + ModulePW::PW_Basis* pw_rho, + ModulePW::PW_Basis* pw_rhod, + ModulePW::PW_Basis_Big* pw_big, + surchem& solvent, + const Input_para& inp); -template void teardown_estate_pw_impl, base_device::DEVICE_CPU>( - elecstate::ElecState*& pelec, VSep*& vsep_cell); +template void teardown_estate_pw_impl, base_device::DEVICE_CPU> (elecstate::ElecState*& pelec, + VSep*& vsep_cell); -template void teardown_estate_pw_impl, base_device::DEVICE_CPU>( - elecstate::ElecState*& pelec, VSep*& vsep_cell); +template void teardown_estate_pw_impl, base_device::DEVICE_CPU> (elecstate::ElecState*& pelec, + VSep*& vsep_cell); #if ((defined __CUDA) || (defined __ROCM)) -template void setup_estate_pw_impl, base_device::DEVICE_GPU>( - UnitCell& ucell, K_Vectors& kv, Structure_Factor& sf, - elecstate::ElecState*& pelec, Charge& chr, - pseudopot_cell_vl& locpp, pseudopot_cell_vnl& ppcell, VSep*& vsep_cell, - ModulePW::PW_Basis_K* pw_wfc, ModulePW::PW_Basis* pw_rho, - ModulePW::PW_Basis* pw_rhod, ModulePW::PW_Basis_Big* pw_big, - surchem& solvent, const Input_para& inp); +template void setup_estate_pw_impl, base_device::DEVICE_GPU> (UnitCell& ucell, + K_Vectors& kv, + Structure_Factor& sf, + elecstate::ElecState*& pelec, + Charge& chr, + pseudopot_cell_vl& locpp, + pseudopot_cell_vnl& ppcell, + VSep*& vsep_cell, + ModulePW::PW_Basis_K* pw_wfc, + ModulePW::PW_Basis* pw_rho, + ModulePW::PW_Basis* pw_rhod, + ModulePW::PW_Basis_Big* pw_big, + surchem& solvent, + const Input_para& inp); -template void setup_estate_pw_impl, base_device::DEVICE_GPU>( - UnitCell& ucell, K_Vectors& kv, Structure_Factor& sf, - elecstate::ElecState*& pelec, Charge& chr, - pseudopot_cell_vl& locpp, pseudopot_cell_vnl& ppcell, VSep*& vsep_cell, - ModulePW::PW_Basis_K* pw_wfc, ModulePW::PW_Basis* pw_rho, - ModulePW::PW_Basis* pw_rhod, ModulePW::PW_Basis_Big* pw_big, - surchem& solvent, const Input_para& inp); +template void setup_estate_pw_impl, base_device::DEVICE_GPU> (UnitCell& ucell, + K_Vectors& kv, + Structure_Factor& sf, + elecstate::ElecState*& pelec, + Charge& chr, + pseudopot_cell_vl& locpp, + pseudopot_cell_vnl& ppcell, + VSep*& vsep_cell, + ModulePW::PW_Basis_K* pw_wfc, + ModulePW::PW_Basis* pw_rho, + ModulePW::PW_Basis* pw_rhod, + ModulePW::PW_Basis_Big* pw_big, + surchem& solvent, + const Input_para& inp); -template void teardown_estate_pw_impl, base_device::DEVICE_GPU>( - elecstate::ElecState*& pelec, VSep*& vsep_cell); +template void teardown_estate_pw_impl, base_device::DEVICE_GPU> (elecstate::ElecState*& pelec, + VSep*& vsep_cell); -template void teardown_estate_pw_impl, base_device::DEVICE_GPU>( - elecstate::ElecState*& pelec, VSep*& vsep_cell); +template void teardown_estate_pw_impl, base_device::DEVICE_GPU> (elecstate::ElecState*& pelec, + VSep*& vsep_cell); #endif -} +} // namespace elecstate diff --git a/source/source_estate/setup_estate_pw.h b/source/source_estate/setup_estate_pw.h index 03e994369ae..c20f140d9c2 100644 --- a/source/source_estate/setup_estate_pw.h +++ b/source/source_estate/setup_estate_pw.h @@ -13,44 +13,42 @@ class pseudopot_cell_vnl; namespace elecstate { -void setup_estate_pw( - UnitCell& ucell, - K_Vectors& kv, - Structure_Factor& sf, - elecstate::ElecState*& pelec, - Charge& chr, - pseudopot_cell_vl& locpp, - pseudopot_cell_vnl& ppcell, - VSep*& vsep_cell, - ModulePW::PW_Basis_K* pw_wfc, - ModulePW::PW_Basis* pw_rho, - ModulePW::PW_Basis* pw_rhod, - ModulePW::PW_Basis_Big* pw_big, - surchem& solvent, - const Input_para& inp); - -void teardown_estate_pw(elecstate::ElecState*& pelec, VSep*& vsep_cell); +void setup_estate_pw (UnitCell& ucell, + K_Vectors& kv, + Structure_Factor& sf, + elecstate::ElecState*& pelec, + Charge& chr, + pseudopot_cell_vl& locpp, + pseudopot_cell_vnl& ppcell, + VSep*& vsep_cell, + ModulePW::PW_Basis_K* pw_wfc, + ModulePW::PW_Basis* pw_rho, + ModulePW::PW_Basis* pw_rhod, + ModulePW::PW_Basis_Big* pw_big, + surchem& solvent, + const Input_para& inp); + +void teardown_estate_pw (elecstate::ElecState*& pelec, VSep*& vsep_cell); template -void setup_estate_pw_impl( - UnitCell& ucell, - K_Vectors& kv, - Structure_Factor& sf, - elecstate::ElecState*& pelec, - Charge& chr, - pseudopot_cell_vl& locpp, - pseudopot_cell_vnl& ppcell, - VSep*& vsep_cell, - ModulePW::PW_Basis_K* pw_wfc, - ModulePW::PW_Basis* pw_rho, - ModulePW::PW_Basis* pw_rhod, - ModulePW::PW_Basis_Big* pw_big, - surchem& solvent, - const Input_para& inp); +void setup_estate_pw_impl (UnitCell& ucell, + K_Vectors& kv, + Structure_Factor& sf, + elecstate::ElecState*& pelec, + Charge& chr, + pseudopot_cell_vl& locpp, + pseudopot_cell_vnl& ppcell, + VSep*& vsep_cell, + ModulePW::PW_Basis_K* pw_wfc, + ModulePW::PW_Basis* pw_rho, + ModulePW::PW_Basis* pw_rhod, + ModulePW::PW_Basis_Big* pw_big, + surchem& solvent, + const Input_para& inp); template -void teardown_estate_pw_impl(elecstate::ElecState*& pelec, VSep*& vsep_cell); +void teardown_estate_pw_impl (elecstate::ElecState*& pelec, VSep*& vsep_cell); -} +} // namespace elecstate #endif diff --git a/source/source_estate/test/charge_extra_test.cpp b/source/source_estate/test/charge_extra_test.cpp index c2951c1b5a9..f20de129c6e 100644 --- a/source/source_estate/test/charge_extra_test.cpp +++ b/source/source_estate/test/charge_extra_test.cpp @@ -10,26 +10,15 @@ #undef protected // mock functions for UnitCell #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} #endif -Magnetism::Magnetism() -{ - this->start_mag = nullptr; -} -Magnetism::~Magnetism() -{ - delete[] this->start_mag; -} -Parallel_Grid::~Parallel_Grid(){}; - +Magnetism::Magnetism () { this->start_mag = nullptr; } +Magnetism::~Magnetism () { delete[] this->start_mag; } +Parallel_Grid::~Parallel_Grid () {}; // mock functions for Charge -Charge::Charge() +Charge::Charge () { rhopw = new ModulePW::PW_Basis; rhopw->nrxx = 8; @@ -38,19 +27,20 @@ Charge::Charge() rhopw->nz = 2; rho = new double*[1]; rho[0] = new double[rhopw->nrxx]; - ModuleBase::GlobalFunc::ZEROS(rho[0], rhopw->nrxx); + ModuleBase::GlobalFunc::ZEROS (rho[0], rhopw->nrxx); for (int i = 0; i < rhopw->nrxx; ++i) - { - rho[0][i] = i + 1; - } + { + rho[0][i] = i + 1; + } } -Charge::~Charge() +Charge::~Charge () { delete[] rho[0]; delete[] rho; delete rhopw; } -void Charge::atomic_rho(const int spin_number_need, +void + Charge::atomic_rho (const int spin_number_need, const double& omega, double** rho_in, const ModuleBase::ComplexMatrix& strucFac, @@ -61,35 +51,31 @@ void Charge::atomic_rho(const int spin_number_need, // mock functions for PW_Basis namespace ModulePW { -PW_Basis::PW_Basis() +PW_Basis::PW_Basis () {} +PW_Basis::~PW_Basis () {} +void + PW_Basis::initgrids (const double lat0_in, const ModuleBase::Matrix3 latvec_in, const double gridecut) { } -PW_Basis::~PW_Basis() -{ -} -void PW_Basis::initgrids(const double lat0_in, const ModuleBase::Matrix3 latvec_in, const double gridecut) -{ -} -void PW_Basis::initgrids(const double lat0_in, +void + PW_Basis::initgrids (const double lat0_in, const ModuleBase::Matrix3 latvec_in, const int nx_in, int ny_in, int nz_in) { } -void PW_Basis::distribute_r() +void + PW_Basis::distribute_r () { } } // namespace ModulePW // mock functions for Structure_Factor -Structure_Factor::Structure_Factor() -{ -} -Structure_Factor::~Structure_Factor() -{ -} -void Structure_Factor::setup(const UnitCell*, const Parallel_Grid&, const ModulePW::PW_Basis*) +Structure_Factor::Structure_Factor () {} +Structure_Factor::~Structure_Factor () {} +void + Structure_Factor::setup (const UnitCell*, const Parallel_Grid&, const ModulePW::PW_Basis*) { } @@ -118,192 +104,194 @@ class ChargeExtraTest : public ::testing::Test Parallel_Grid* pgrid = nullptr; Charge charge; Structure_Factor sf; - void SetUp() override + void + SetUp () override { PARAM.input.nspin = 1; PARAM.sys.global_out_dir = "./support/"; - ucell = utp.SetUcellInfo(); + ucell = utp.SetUcellInfo (); ucell->omega = 1.0; } - void TearDown() override + void + TearDown () override { } }; -TEST_F(ChargeExtraTest, InitCEWarningQuit) +TEST_F (ChargeExtraTest, InitCEWarningQuit) { PARAM.input.chg_extrap = "wwww"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap), - ::testing::ExitedWithCode(1), - ""); - std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("charge extrapolation method is not available")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap), + ::testing::ExitedWithCode (1), + ""); + std::string output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("charge extrapolation method is not available")); } -TEST_F(ChargeExtraTest, InitCECase1) +TEST_F (ChargeExtraTest, InitCECase1) { PARAM.input.chg_extrap = "none"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); - EXPECT_EQ(CE.pot_order, 0); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + EXPECT_EQ (CE.pot_order, 0); } -TEST_F(ChargeExtraTest, InitCECase2) +TEST_F (ChargeExtraTest, InitCECase2) { PARAM.input.chg_extrap = "atomic"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); - EXPECT_EQ(CE.pot_order, 1); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + EXPECT_EQ (CE.pot_order, 1); } -TEST_F(ChargeExtraTest, InitCECase3) +TEST_F (ChargeExtraTest, InitCECase3) { PARAM.input.chg_extrap = "first-order"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); - EXPECT_EQ(CE.pot_order, 2); - EXPECT_NE(CE.delta_rho1.size(), 0); - EXPECT_NE(CE.delta_rho2.size(), 0); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + EXPECT_EQ (CE.pot_order, 2); + EXPECT_NE (CE.delta_rho1.size (), 0); + EXPECT_NE (CE.delta_rho2.size (), 0); } -TEST_F(ChargeExtraTest, InitCECase4) +TEST_F (ChargeExtraTest, InitCECase4) { PARAM.input.chg_extrap = "second-order"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); - EXPECT_EQ(CE.pot_order, 3); - EXPECT_DOUBLE_EQ(CE.alpha, 1.0); - EXPECT_DOUBLE_EQ(CE.beta, 0.0); - EXPECT_NE(CE.delta_rho1.size(), 0); - EXPECT_NE(CE.delta_rho2.size(), 0); - EXPECT_NE(CE.dis_old1, nullptr); - EXPECT_NE(CE.dis_old2, nullptr); - EXPECT_NE(CE.dis_now, nullptr); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + EXPECT_EQ (CE.pot_order, 3); + EXPECT_DOUBLE_EQ (CE.alpha, 1.0); + EXPECT_DOUBLE_EQ (CE.beta, 0.0); + EXPECT_NE (CE.delta_rho1.size (), 0); + EXPECT_NE (CE.delta_rho2.size (), 0); + EXPECT_NE (CE.dis_old1, nullptr); + EXPECT_NE (CE.dis_old2, nullptr); + EXPECT_NE (CE.dis_now, nullptr); } -TEST_F(ChargeExtraTest, ExtrapolateChargeCase1) +TEST_F (ChargeExtraTest, ExtrapolateChargeCase1) { PARAM.input.chg_extrap = "second-order"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); CE.istep = 0; CE.pot_order = 3; - GlobalV::ofs_running.open("log"); - CE.extrapolate_charge(pgrid, *ucell.get(), &charge, &sf, GlobalV::ofs_running, GlobalV::ofs_warning); - GlobalV::ofs_running.close(); + GlobalV::ofs_running.open ("log"); + CE.extrapolate_charge (pgrid, *ucell.get (), &charge, &sf, GlobalV::ofs_running, GlobalV::ofs_warning); + GlobalV::ofs_running.close (); // Check the results - std::ifstream ifs("log"); + std::ifstream ifs ("log"); std::string expected_output = " charge density from previous step !\n"; - std::string output((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - ifs.close(); - std::remove("log"); + std::string output ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + ifs.close (); + std::remove ("log"); - EXPECT_EQ(output, expected_output); - EXPECT_EQ(CE.rho_extr, 0); + EXPECT_EQ (output, expected_output); + EXPECT_EQ (CE.rho_extr, 0); } -TEST_F(ChargeExtraTest, ExtrapolateChargeCase2) +TEST_F (ChargeExtraTest, ExtrapolateChargeCase2) { PARAM.input.chg_extrap = "second-order"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); CE.istep = 1; CE.pot_order = 3; - GlobalV::ofs_running.open("log"); - CE.extrapolate_charge(pgrid, *ucell.get(), &charge, &sf, GlobalV::ofs_running, GlobalV::ofs_warning); - GlobalV::ofs_running.close(); + GlobalV::ofs_running.open ("log"); + CE.extrapolate_charge (pgrid, *ucell.get (), &charge, &sf, GlobalV::ofs_running, GlobalV::ofs_warning); + GlobalV::ofs_running.close (); // Check the results - std::ifstream ifs("log"); + std::ifstream ifs ("log"); std::string expected_output = " NEW-OLD atomic charge density approx. for the potential !\n"; - std::string output((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - ifs.close(); - std::remove("log"); + std::string output ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + ifs.close (); + std::remove ("log"); - EXPECT_EQ(output, expected_output); - EXPECT_EQ(CE.rho_extr, 1); + EXPECT_EQ (output, expected_output); + EXPECT_EQ (CE.rho_extr, 1); } -TEST_F(ChargeExtraTest, ExtrapolateChargeCase3) +TEST_F (ChargeExtraTest, ExtrapolateChargeCase3) { PARAM.input.chg_extrap = "second-order"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); CE.istep = 2; CE.pot_order = 3; - GlobalV::ofs_running.open("log"); - CE.extrapolate_charge(pgrid, *ucell.get(), &charge, &sf, GlobalV::ofs_running, GlobalV::ofs_warning); - GlobalV::ofs_running.close(); + GlobalV::ofs_running.open ("log"); + CE.extrapolate_charge (pgrid, *ucell.get (), &charge, &sf, GlobalV::ofs_running, GlobalV::ofs_warning); + GlobalV::ofs_running.close (); // Check the results - std::ifstream ifs("log"); + std::ifstream ifs ("log"); std::string expected_output = " first order charge density extrapolation !\n"; - std::string output((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - ifs.close(); - std::remove("log"); + std::string output ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + ifs.close (); + std::remove ("log"); - EXPECT_EQ(output, expected_output); - EXPECT_EQ(CE.rho_extr, 2); + EXPECT_EQ (output, expected_output); + EXPECT_EQ (CE.rho_extr, 2); } -TEST_F(ChargeExtraTest, ExtrapolateChargeCase4) +TEST_F (ChargeExtraTest, ExtrapolateChargeCase4) { PARAM.input.chg_extrap = "second-order"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); CE.istep = 3; - GlobalV::ofs_running.open("log"); - CE.extrapolate_charge(pgrid, *ucell.get(), &charge, &sf, GlobalV::ofs_running, GlobalV::ofs_warning); - GlobalV::ofs_running.close(); + GlobalV::ofs_running.open ("log"); + CE.extrapolate_charge (pgrid, *ucell.get (), &charge, &sf, GlobalV::ofs_running, GlobalV::ofs_warning); + GlobalV::ofs_running.close (); // Check the results - std::ifstream ifs("log"); + std::ifstream ifs ("log"); std::string expected_output = " second order charge density extrapolation !\n alpha = 0\n beta = 0\n"; - std::string output((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - ifs.close(); - std::remove("log"); + std::string output ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + ifs.close (); + std::remove ("log"); - EXPECT_EQ(output, expected_output); - EXPECT_EQ(CE.rho_extr, 3); - std::remove("./support/OLD2_SPIN1_CHG.cube"); + EXPECT_EQ (output, expected_output); + EXPECT_EQ (CE.rho_extr, 3); + std::remove ("./support/OLD2_SPIN1_CHG.cube"); } -TEST_F(ChargeExtraTest, UpdateAllDis) +TEST_F (ChargeExtraTest, UpdateAllDis) { PARAM.input.chg_extrap = "second-order"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); CE.istep = 3; for (int i = 0; i < ucell->nat; ++i) - { - for (int j = 0; j < 3; ++j) { - CE.dis_old1[i][j] = i; - CE.dis_now[i][j] = j; + for (int j = 0; j < 3; ++j) + { + CE.dis_old1[i][j] = i; + CE.dis_now[i][j] = j; + } } - } - CE.update_all_dis(*ucell.get()); + CE.update_all_dis (*ucell.get ()); - EXPECT_EQ(CE.istep, 4); - EXPECT_DOUBLE_EQ(CE.dis_old2[0][2], 0.0); - EXPECT_DOUBLE_EQ(CE.dis_old1[0][2], 2.0); - EXPECT_DOUBLE_EQ(CE.dis_now[0][2], 0.0); + EXPECT_EQ (CE.istep, 4); + EXPECT_DOUBLE_EQ (CE.dis_old2[0][2], 0.0); + EXPECT_DOUBLE_EQ (CE.dis_old1[0][2], 2.0); + EXPECT_DOUBLE_EQ (CE.dis_now[0][2], 0.0); } -TEST_F(ChargeExtraTest, FindAlphaAndBeta) +TEST_F (ChargeExtraTest, FindAlphaAndBeta) { PARAM.input.chg_extrap = "second-order"; - CE.Init_CE(PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); + CE.Init_CE (PARAM.input.nspin, ucell->nat, charge.rhopw->nrxx, PARAM.input.chg_extrap); CE.istep = 3; for (int i = 0; i < ucell->nat; ++i) - { - for (int j = 0; j < 3; ++j) { - CE.dis_old1[i][j] = i; - CE.dis_now[i][j] = j; + for (int j = 0; j < 3; ++j) + { + CE.dis_old1[i][j] = i; + CE.dis_now[i][j] = j; + } } - } - CE.find_alpha_and_beta(ucell->nat, GlobalV::ofs_running, GlobalV::ofs_warning); + CE.find_alpha_and_beta (ucell->nat, GlobalV::ofs_running, GlobalV::ofs_warning); - EXPECT_DOUBLE_EQ(CE.alpha, 1.0); - EXPECT_DOUBLE_EQ(CE.beta, 0.0); + EXPECT_DOUBLE_EQ (CE.alpha, 1.0); + EXPECT_DOUBLE_EQ (CE.beta, 0.0); } diff --git a/source/source_estate/test/charge_mixing_test.cpp b/source/source_estate/test/charge_mixing_test.cpp index a0e03b99113..23a315a871f 100644 --- a/source/source_estate/test/charge_mixing_test.cpp +++ b/source/source_estate/test/charge_mixing_test.cpp @@ -15,30 +15,19 @@ int XC_Functional::func_type = 1; bool XC_Functional::ked_flag = false; // mock function -Magnetism::~Magnetism() -{ -} -Magnetism::Magnetism() -{ -} -Charge::~Charge() -{ -} -Charge::Charge() -{ -} +Magnetism::~Magnetism () {} +Magnetism::Magnetism () {} +Charge::~Charge () {} +Charge::Charge () {} -void Charge::set_rhopw(ModulePW::PW_Basis* rhopw_in) +void + Charge::set_rhopw (ModulePW::PW_Basis* rhopw_in) { this->rhopw = rhopw_in; } #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} #endif // mock class cell /************************************************ @@ -79,23 +68,23 @@ class ChargeMixingTest : public ::testing::Test { public: UnitCell ucell; - ChargeMixingTest() + ChargeMixingTest () { // Init pw_basis - pw_basis.initgrids(4, ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1), 20); - pw_basis.initparameters(false, 20); - pw_basis.setuptransform(); - pw_basis.collect_local_pw(); - pw_dbasis.initgrids(4, ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1), 40); - pw_dbasis.initparameters(false, 40); - pw_dbasis.setuptransform(&pw_basis); - pw_dbasis.collect_local_pw(); + pw_basis.initgrids (4, ModuleBase::Matrix3 (1, 0, 0, 0, 1, 0, 0, 0, 1), 20); + pw_basis.initparameters (false, 20); + pw_basis.setuptransform (); + pw_basis.collect_local_pw (); + pw_dbasis.initgrids (4, ModuleBase::Matrix3 (1, 0, 0, 0, 1, 0, 0, 0, 1), 40); + pw_dbasis.initparameters (false, 40); + pw_dbasis.setuptransform (&pw_basis); + pw_dbasis.collect_local_pw (); // default mixing parameters PARAM.input.mixing_mode = "broyden"; PARAM.input.mixing_beta = 0.8; PARAM.input.mixing_ndim = 8; - PARAM.input.mixing_gg0 = 1.0; - PARAM.input.mixing_tau = false; + PARAM.input.mixing_gg0 = 1.0; + PARAM.input.mixing_tau = false; PARAM.input.mixing_beta_mag = 1.6; PARAM.input.mixing_gg0_mag = 0.0; PARAM.input.mixing_gg0_min = 0.1; @@ -106,527 +95,530 @@ class ChargeMixingTest : public ::testing::Test } ModulePW::PW_Basis pw_basis; ModulePW::PW_Basis_Sup pw_dbasis; - Charge charge; + Charge charge; }; -TEST_F(ChargeMixingTest, SetMixingTest) +TEST_F (ChargeMixingTest, SetMixingTest) { #ifdef _OPENMP - omp_set_num_threads(1); + omp_set_num_threads (1); #endif PARAM.input.nspin = 1; Charge_Mixing CMtest; - CMtest.set_rhopw(&pw_basis, &pw_basis); + CMtest.set_rhopw (&pw_basis, &pw_basis); PARAM.input.mixing_beta = 1.0; PARAM.input.mixing_ndim = 1; PARAM.input.mixing_gg0 = 1.0; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - EXPECT_EQ(CMtest.get_mixing_mode(), "broyden"); - EXPECT_EQ(CMtest.get_mixing_beta(), 1.0); - EXPECT_EQ(CMtest.get_mixing_ndim(), 1); - EXPECT_EQ(CMtest.get_mixing_gg0(), 1.0); - EXPECT_EQ(CMtest.mixing_tau, false); - EXPECT_EQ(CMtest.mixing_beta_mag, 1.6); - EXPECT_EQ(CMtest.mixing_gg0_mag, 0.0); - EXPECT_EQ(CMtest.mixing_gg0_min, 0.1); - EXPECT_EQ(CMtest.mixing_angle, -10.0); - EXPECT_EQ(CMtest.mixing_dmr, false); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + EXPECT_EQ (CMtest.get_mixing_mode (), "broyden"); + EXPECT_EQ (CMtest.get_mixing_beta (), 1.0); + EXPECT_EQ (CMtest.get_mixing_ndim (), 1); + EXPECT_EQ (CMtest.get_mixing_gg0 (), 1.0); + EXPECT_EQ (CMtest.mixing_tau, false); + EXPECT_EQ (CMtest.mixing_beta_mag, 1.6); + EXPECT_EQ (CMtest.mixing_gg0_mag, 0.0); + EXPECT_EQ (CMtest.mixing_gg0_min, 0.1); + EXPECT_EQ (CMtest.mixing_angle, -10.0); + EXPECT_EQ (CMtest.mixing_dmr, false); PARAM.input.mixing_tau = true; PARAM.input.mixing_mode = "plain"; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - EXPECT_EQ(CMtest.mixing_mode, "plain"); - EXPECT_EQ(CMtest.mixing_tau, true); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + EXPECT_EQ (CMtest.mixing_mode, "plain"); + EXPECT_EQ (CMtest.mixing_tau, true); PARAM.input.mixing_beta = 1.1; std::string output; - testing::internal::CaptureStdout(); - EXPECT_EXIT(CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba);, ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("You'd better set mixing_beta to [0.0, 1.0]!")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + , ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("You'd better set mixing_beta to [0.0, 1.0]!")); PARAM.input.mixing_beta = 0.7; PARAM.input.mixing_beta_mag = -0.1; PARAM.input.nspin = 2; - testing::internal::CaptureStdout(); - EXPECT_EXIT(CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba);, ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("You'd better set mixing_beta_mag >= 0.0!")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + , ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("You'd better set mixing_beta_mag >= 0.0!")); PARAM.input.nspin = 1; PARAM.input.mixing_beta = 0.7; PARAM.input.mixing_beta_mag = 1.6; PARAM.input.mixing_mode = "nothing"; - testing::internal::CaptureStdout(); - EXPECT_EXIT(CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba);, ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("This Mixing mode is not implemended yet,coming soon.")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + , ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("This Mixing mode is not implemended yet,coming soon.")); } -TEST_F(ChargeMixingTest, InitMixingTest) +TEST_F (ChargeMixingTest, InitMixingTest) { #ifdef _OPENMP - omp_set_num_threads(1); + omp_set_num_threads (1); #endif PARAM.input.nspin = 1; XC_Functional::func_type = 1; XC_Functional::ked_flag = false; Charge_Mixing CMtest; - CMtest.set_rhopw(&pw_basis, &pw_basis); + CMtest.set_rhopw (&pw_basis, &pw_basis); + + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + + PARAM.input.scf_thr_type = 1; + CMtest.init_mixing (); + EXPECT_EQ (CMtest.rho_mdata.length, pw_basis.npw); - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - - PARAM.input.scf_thr_type= 1; - CMtest.init_mixing(); - EXPECT_EQ(CMtest.rho_mdata.length, pw_basis.npw); - - PARAM.input.scf_thr_type= 2; - CMtest.init_mixing(); - EXPECT_EQ(CMtest.rho_mdata.length, pw_basis.nrxx); + PARAM.input.scf_thr_type = 2; + CMtest.init_mixing (); + EXPECT_EQ (CMtest.rho_mdata.length, pw_basis.nrxx); PARAM.input.nspin = 4; - CMtest.init_mixing(); - EXPECT_EQ(CMtest.rho_mdata.length, 4 * pw_basis.nrxx); + CMtest.init_mixing (); + EXPECT_EQ (CMtest.rho_mdata.length, 4 * pw_basis.nrxx); PARAM.input.nspin = 1; PARAM.input.mixing_tau = true; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); XC_Functional::func_type = 3; XC_Functional::ked_flag = true; - CMtest.init_mixing(); - EXPECT_EQ(CMtest.tau_mdata.length, pw_basis.nrxx); + CMtest.init_mixing (); + EXPECT_EQ (CMtest.tau_mdata.length, pw_basis.nrxx); PARAM.input.nspin = 4; PARAM.input.mixing_angle = 1.0; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - CMtest.init_mixing(); - EXPECT_EQ(CMtest.rho_mdata.length, 2 * pw_basis.nrxx); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + CMtest.init_mixing (); + EXPECT_EQ (CMtest.rho_mdata.length, 2 * pw_basis.nrxx); } -TEST_F(ChargeMixingTest, InnerDotRealTest) +TEST_F (ChargeMixingTest, InnerDotRealTest) { Charge_Mixing CMtest; // non mixing angle case - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - CMtest.set_rhopw(&pw_basis, &pw_basis); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + CMtest.set_rhopw (&pw_basis, &pw_basis); PARAM.input.nspin = 4; // a simple sum for inner product - std::vector drho1(pw_basis.nrxx * PARAM.input.nspin); - std::vector drho2(pw_basis.nrxx * PARAM.input.nspin); + std::vector drho1 (pw_basis.nrxx * PARAM.input.nspin); + std::vector drho2 (pw_basis.nrxx * PARAM.input.nspin); for (int i = 0; i < pw_basis.nrxx * PARAM.input.nspin; ++i) - { - drho1[i] = 1.0; - drho2[i] = double(i); - } - double inner = CMtest.inner_product_real(drho1.data(), drho2.data()); - EXPECT_NEAR(inner, 0.5 * pw_basis.nrxx * PARAM.input.nspin * (pw_basis.nrxx * PARAM.input.nspin - 1), 1e-8); + { + drho1[i] = 1.0; + drho2[i] = double (i); + } + double inner = CMtest.inner_product_real (drho1.data (), drho2.data ()); + EXPECT_NEAR (inner, 0.5 * pw_basis.nrxx * PARAM.input.nspin * (pw_basis.nrxx * PARAM.input.nspin - 1), 1e-8); // mixing angle case PARAM.input.mixing_angle = 1.0; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); PARAM.input.nspin = 4; // a simple sum for inner product - drho1.resize(pw_basis.nrxx * 2); - drho2.resize(pw_basis.nrxx * 2); + drho1.resize (pw_basis.nrxx * 2); + drho2.resize (pw_basis.nrxx * 2); for (int i = 0; i < pw_basis.nrxx * 2; ++i) - { - drho1[i] = 1.0; - drho2[i] = double(i); - } - inner = CMtest.inner_product_real(drho1.data(), drho2.data()); - EXPECT_NEAR(inner, 0.5 * pw_basis.nrxx * 2 * (pw_basis.nrxx * 2 - 1), 1e-8); + { + drho1[i] = 1.0; + drho2[i] = double (i); + } + inner = CMtest.inner_product_real (drho1.data (), drho2.data ()); + EXPECT_NEAR (inner, 0.5 * pw_basis.nrxx * 2 * (pw_basis.nrxx * 2 - 1), 1e-8); } -TEST_F(ChargeMixingTest, InnerDotRecipSimpleTest) +TEST_F (ChargeMixingTest, InnerDotRecipSimpleTest) { Charge_Mixing CMtest; // non mixing angle case - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - CMtest.set_rhopw(&pw_basis, &pw_basis); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + CMtest.set_rhopw (&pw_basis, &pw_basis); PARAM.input.nspin = 2; // a simple sum for inner product - std::vector> drhog1(pw_basis.npw * PARAM.input.nspin); - std::vector> drhog2(pw_basis.npw * PARAM.input.nspin); + std::vector> drhog1 (pw_basis.npw * PARAM.input.nspin); + std::vector> drhog2 (pw_basis.npw * PARAM.input.nspin); for (int i = 0; i < pw_basis.npw * PARAM.input.nspin; ++i) - { - drhog1[i] = 1.0; - drhog2[i] = double(i); - } - double inner = CMtest.inner_product_recip_simple(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 0.5 * pw_basis.npw * PARAM.input.nspin * (pw_basis.npw * PARAM.input.nspin - 1), 1e-8); + { + drhog1[i] = 1.0; + drhog2[i] = double (i); + } + double inner = CMtest.inner_product_recip_simple (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 0.5 * pw_basis.npw * PARAM.input.nspin * (pw_basis.npw * PARAM.input.nspin - 1), 1e-8); } -TEST_F(ChargeMixingTest, InnerDotRecipHartreeTest) +TEST_F (ChargeMixingTest, InnerDotRecipHartreeTest) { // REAL Charge_Mixing CMtest; - CMtest.set_rhopw(&pw_basis, &pw_basis); + CMtest.set_rhopw (&pw_basis, &pw_basis); const int npw = pw_basis.npw; const int nrxx = pw_basis.nrxx; PARAM.input.nspin = 1; - std::vector drhor1(pw_basis.nrxx); - std::vector drhor2(pw_basis.nrxx); + std::vector drhor1 (pw_basis.nrxx); + std::vector drhor2 (pw_basis.nrxx); for (int i = 0; i < pw_basis.nrxx; ++i) - { - drhor1[i] = 1.0; - drhor2[i] = double(i); - } - double inner = CMtest.inner_product_real(drhor1.data(), drhor2.data()); - EXPECT_NEAR(inner, 0.5 * pw_basis.nrxx * (pw_basis.nrxx - 1), 1e-8); + { + drhor1[i] = 1.0; + drhor2[i] = double (i); + } + double inner = CMtest.inner_product_real (drhor1.data (), drhor2.data ()); + EXPECT_NEAR (inner, 0.5 * pw_basis.nrxx * (pw_basis.nrxx - 1), 1e-8); // RECIPROCAL NSPIN=1 ucell.tpiba2 = 1.0; ucell.omega = 2.0; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); PARAM.input.nspin = 1; - std::vector> drhog1(pw_basis.npw); - std::vector> drhog2(pw_basis.npw); + std::vector> drhog1 (pw_basis.npw); + std::vector> drhog2 (pw_basis.npw); for (int i = 0; i < pw_basis.nrxx; ++i) - { - drhor1[i] = 0.0; - } + { + drhor1[i] = 0.0; + } drhor1[2] = 1.0; - pw_basis.real2recip(drhor1.data(), drhog1.data()); - pw_basis.real2recip(drhor2.data(), drhog2.data()); + pw_basis.real2recip (drhor1.data (), drhog1.data ()); + pw_basis.real2recip (drhor2.data (), drhog2.data ()); - inner = CMtest.inner_product_recip_hartree(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, -0.3 * ModuleBase::e2 * ModuleBase::FOUR_PI, 1e-8); + inner = CMtest.inner_product_recip_hartree (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, -0.3 * ModuleBase::e2 * ModuleBase::FOUR_PI, 1e-8); // RECIPROCAL NSPIN=2 PARAM.input.nspin = 2; - drhog1.resize(pw_basis.npw * PARAM.input.nspin); - drhog2.resize(pw_basis.npw * PARAM.input.nspin); - std::vector> drhog1_mag(pw_basis.npw * PARAM.input.nspin); - std::vector> drhog2_mag(pw_basis.npw * PARAM.input.nspin); + drhog1.resize (pw_basis.npw * PARAM.input.nspin); + drhog2.resize (pw_basis.npw * PARAM.input.nspin); + std::vector> drhog1_mag (pw_basis.npw * PARAM.input.nspin); + std::vector> drhog2_mag (pw_basis.npw * PARAM.input.nspin); for (int i = 0; i < pw_basis.npw * PARAM.input.nspin; ++i) - { - drhog1[i] = std::complex(1.0, double(i)); - drhog2[i] = std::complex(1.0, 1.0); - } + { + drhog1[i] = std::complex (1.0, double (i)); + drhog2[i] = std::complex (1.0, 1.0); + } // set mag for (int i = 0; i < pw_basis.npw; ++i) - { - drhog1_mag[i] = drhog1[i] + drhog1[i+pw_basis.npw]; - drhog1_mag[i+pw_basis.npw] = drhog1[i] - drhog1[i+pw_basis.npw]; - drhog2_mag[i] = drhog2[i] + drhog2[i+pw_basis.npw]; - drhog2_mag[i+pw_basis.npw] = drhog2[i] - drhog2[i+pw_basis.npw]; - } - PARAM.sys.gamma_only_pw= false; - inner = CMtest.inner_product_recip_hartree(drhog1_mag.data(), drhog2_mag.data()); - EXPECT_NEAR(inner, 236763.82650318215, 1e-8); - PARAM.sys.gamma_only_pw= true; - inner = CMtest.inner_product_recip_hartree(drhog1_mag.data(), drhog2_mag.data()); - EXPECT_NEAR(inner, 236763.82650318215 * 2, 1e-8); + { + drhog1_mag[i] = drhog1[i] + drhog1[i + pw_basis.npw]; + drhog1_mag[i + pw_basis.npw] = drhog1[i] - drhog1[i + pw_basis.npw]; + drhog2_mag[i] = drhog2[i] + drhog2[i + pw_basis.npw]; + drhog2_mag[i + pw_basis.npw] = drhog2[i] - drhog2[i + pw_basis.npw]; + } + PARAM.sys.gamma_only_pw = false; + inner = CMtest.inner_product_recip_hartree (drhog1_mag.data (), drhog2_mag.data ()); + EXPECT_NEAR (inner, 236763.82650318215, 1e-8); + PARAM.sys.gamma_only_pw = true; + inner = CMtest.inner_product_recip_hartree (drhog1_mag.data (), drhog2_mag.data ()); + EXPECT_NEAR (inner, 236763.82650318215 * 2, 1e-8); // RECIPROCAL NSPIN=4 without mixing_angle PARAM.input.nspin = 4; - drhog1.resize(pw_basis.npw * PARAM.input.nspin); - drhog2.resize(pw_basis.npw * PARAM.input.nspin); + drhog1.resize (pw_basis.npw * PARAM.input.nspin); + drhog2.resize (pw_basis.npw * PARAM.input.nspin); for (int i = 0; i < pw_basis.npw * PARAM.input.nspin; ++i) - { - drhog1[i] = std::complex(1.0, double(i)); - drhog2[i] = std::complex(1.0, 1.0); - } + { + drhog1[i] = std::complex (1.0, double (i)); + drhog2[i] = std::complex (1.0, 1.0); + } PARAM.sys.domag = false; PARAM.sys.domag_z = false; - inner = CMtest.inner_product_recip_hartree(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 28260.091995611871, 1e-8); - PARAM.sys.gamma_only_pw= true; + inner = CMtest.inner_product_recip_hartree (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 28260.091995611871, 1e-8); + PARAM.sys.gamma_only_pw = true; PARAM.sys.domag = true; PARAM.sys.domag_z = true; - inner = CMtest.inner_product_recip_hartree(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 110668.61166927818, 1e-8); + inner = CMtest.inner_product_recip_hartree (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 110668.61166927818, 1e-8); // RECIPROCAL NSPIN=4 with mixing_angle PARAM.input.nspin = 4; PARAM.input.mixing_angle = 1.0; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - drhog1.resize(pw_basis.npw * 2); - drhog2.resize(pw_basis.npw * 2); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + drhog1.resize (pw_basis.npw * 2); + drhog2.resize (pw_basis.npw * 2); for (int i = 0; i < pw_basis.npw * 2; ++i) - { - drhog1[i] = std::complex(1.0, double(i)); - drhog2[i] = std::complex(1.0, 1.0); - } - PARAM.sys.gamma_only_pw= false; - inner = CMtest.inner_product_recip_hartree(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 36548.881431837777, 1e-8); - PARAM.sys.gamma_only_pw= true; - inner = CMtest.inner_product_recip_hartree(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 44776.555369916401, 1e-8); + { + drhog1[i] = std::complex (1.0, double (i)); + drhog2[i] = std::complex (1.0, 1.0); + } + PARAM.sys.gamma_only_pw = false; + inner = CMtest.inner_product_recip_hartree (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 36548.881431837777, 1e-8); + PARAM.sys.gamma_only_pw = true; + inner = CMtest.inner_product_recip_hartree (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 44776.555369916401, 1e-8); } -TEST_F(ChargeMixingTest, InnerDotRecipRhoTest) +TEST_F (ChargeMixingTest, InnerDotRecipRhoTest) { // REAL Charge_Mixing CMtest; - CMtest.set_rhopw(&pw_basis, &pw_basis); + CMtest.set_rhopw (&pw_basis, &pw_basis); PARAM.input.nspin = 1; - std::vector drhor1(pw_basis.nrxx); - std::vector drhor2(pw_basis.nrxx); + std::vector drhor1 (pw_basis.nrxx); + std::vector drhor2 (pw_basis.nrxx); for (int i = 0; i < pw_basis.nrxx; ++i) - { - drhor1[i] = 1.0; - drhor2[i] = double(i); - } - double inner = CMtest.inner_product_real(drhor1.data(), drhor2.data()); - EXPECT_NEAR(inner, 0.5 * pw_basis.nrxx * (pw_basis.nrxx - 1), 1e-8); + { + drhor1[i] = 1.0; + drhor2[i] = double (i); + } + double inner = CMtest.inner_product_real (drhor1.data (), drhor2.data ()); + EXPECT_NEAR (inner, 0.5 * pw_basis.nrxx * (pw_basis.nrxx - 1), 1e-8); // RECIPROCAL ucell.tpiba2 = 1.0; ucell.omega = 2.0; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); PARAM.input.nspin = 1; - std::vector> drhog1(pw_basis.npw); - std::vector> drhog2(pw_basis.npw); + std::vector> drhog1 (pw_basis.npw); + std::vector> drhog2 (pw_basis.npw); for (int i = 0; i < pw_basis.nrxx; ++i) - { - drhor1[i] = 0.0; - } + { + drhor1[i] = 0.0; + } drhor1[2] = 1.0; - pw_basis.real2recip(drhor1.data(), drhog1.data()); - pw_basis.real2recip(drhor2.data(), drhog2.data()); + pw_basis.real2recip (drhor1.data (), drhog1.data ()); + pw_basis.real2recip (drhor2.data (), drhog2.data ()); - inner = CMtest.inner_product_recip_rho(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, -0.3 * ModuleBase::e2 * ModuleBase::FOUR_PI, 1e-8); + inner = CMtest.inner_product_recip_rho (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, -0.3 * ModuleBase::e2 * ModuleBase::FOUR_PI, 1e-8); PARAM.input.nspin = 2; - drhog1.resize(pw_basis.npw * PARAM.input.nspin); - drhog2.resize(pw_basis.npw * PARAM.input.nspin); + drhog1.resize (pw_basis.npw * PARAM.input.nspin); + drhog2.resize (pw_basis.npw * PARAM.input.nspin); for (int i = 0; i < pw_basis.npw * PARAM.input.nspin; ++i) - { - drhog1[i] = std::complex(1.0, double(i)); - drhog2[i] = std::complex(1.0, 1.0); - } - PARAM.sys.gamma_only_pw= false; - inner = CMtest.inner_product_recip_rho(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 236763.82650318215, 1e-8); - PARAM.sys.gamma_only_pw= true; - inner = CMtest.inner_product_recip_rho(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 236763.82650318215 * 2, 1e-8); + { + drhog1[i] = std::complex (1.0, double (i)); + drhog2[i] = std::complex (1.0, 1.0); + } + PARAM.sys.gamma_only_pw = false; + inner = CMtest.inner_product_recip_rho (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 236763.82650318215, 1e-8); + PARAM.sys.gamma_only_pw = true; + inner = CMtest.inner_product_recip_rho (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 236763.82650318215 * 2, 1e-8); PARAM.input.nspin = 4; - drhog1.resize(pw_basis.npw * PARAM.input.nspin); - drhog2.resize(pw_basis.npw * PARAM.input.nspin); + drhog1.resize (pw_basis.npw * PARAM.input.nspin); + drhog2.resize (pw_basis.npw * PARAM.input.nspin); for (int i = 0; i < pw_basis.npw * PARAM.input.nspin; ++i) - { - drhog1[i] = std::complex(1.0, double(i)); - drhog2[i] = std::complex(1.0, 1.0); - } + { + drhog1[i] = std::complex (1.0, double (i)); + drhog2[i] = std::complex (1.0, 1.0); + } PARAM.sys.domag = false; PARAM.sys.domag_z = false; - inner = CMtest.inner_product_recip_rho(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 28260.091995611871, 1e-8); - PARAM.sys.gamma_only_pw= true; + inner = CMtest.inner_product_recip_rho (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 28260.091995611871, 1e-8); + PARAM.sys.gamma_only_pw = true; PARAM.sys.domag = true; PARAM.sys.domag_z = true; - inner = CMtest.inner_product_recip_rho(drhog1.data(), drhog2.data()); - EXPECT_NEAR(inner, 110668.61166927818, 1e-8); + inner = CMtest.inner_product_recip_rho (drhog1.data (), drhog2.data ()); + EXPECT_NEAR (inner, 110668.61166927818, 1e-8); } -TEST_F(ChargeMixingTest, KerkerScreenRecipTest) +TEST_F (ChargeMixingTest, KerkerScreenRecipTest) { Charge_Mixing CMtest; - CMtest.set_rhopw(&pw_basis, &pw_basis); + CMtest.set_rhopw (&pw_basis, &pw_basis); ucell.tpiba = 1.0; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); // nspin = 1 PARAM.input.nspin = 1; - std::complex* drhog = new std::complex[PARAM.input.nspin*pw_basis.npw]; - std::complex* drhog_old = new std::complex[PARAM.input.nspin*pw_basis.npw]; - for (int i = 0; i < PARAM.input.nspin*pw_basis.npw; ++i) - { - drhog_old[i] = drhog[i] = std::complex(1.0, 1.0); - } + std::complex* drhog = new std::complex[PARAM.input.nspin * pw_basis.npw]; + std::complex* drhog_old = new std::complex[PARAM.input.nspin * pw_basis.npw]; + for (int i = 0; i < PARAM.input.nspin * pw_basis.npw; ++i) + { + drhog_old[i] = drhog[i] = std::complex (1.0, 1.0); + } // no kerker CMtest.mixing_gg0 = 0.0; - CMtest.Kerker_screen_recip(drhog); - for (int i = 0; i < PARAM.input.nspin*pw_basis.npw; ++i) - { - EXPECT_EQ(drhog[i], drhog_old[i]); - } + CMtest.Kerker_screen_recip (drhog); + for (int i = 0; i < PARAM.input.nspin * pw_basis.npw; ++i) + { + EXPECT_EQ (drhog[i], drhog_old[i]); + } // kerker CMtest.mixing_gg0 = 1.0; - CMtest.Kerker_screen_recip(drhog); - double gg0 = std::pow(ModuleBase::BOHR_TO_A, 2); + CMtest.Kerker_screen_recip (drhog); + double gg0 = std::pow (ModuleBase::BOHR_TO_A, 2); for (int i = 0; i < pw_basis.npw; ++i) - { - double gg = this->pw_basis.gg[i]; - double ref = std::max(gg / (gg + gg0), 0.1 / CMtest.mixing_beta); - EXPECT_NEAR(drhog[i].real(), ref, 1e-10); - EXPECT_NEAR(drhog[i].imag(), ref, 1e-10); - } + { + double gg = this->pw_basis.gg[i]; + double ref = std::max (gg / (gg + gg0), 0.1 / CMtest.mixing_beta); + EXPECT_NEAR (drhog[i].real (), ref, 1e-10); + EXPECT_NEAR (drhog[i].imag (), ref, 1e-10); + } delete[] drhog; delete[] drhog_old; @@ -634,191 +626,190 @@ TEST_F(ChargeMixingTest, KerkerScreenRecipTest) PARAM.input.nspin = 2; CMtest.mixing_beta = 0.4; CMtest.mixing_beta_mag = 1.6; - drhog = new std::complex[PARAM.input.nspin*pw_basis.npw]; - drhog_old = new std::complex[PARAM.input.nspin*pw_basis.npw]; - for (int i = 0; i < PARAM.input.nspin*pw_basis.npw; ++i) - { - drhog_old[i] = drhog[i] = std::complex(1.0, 1.0); - } + drhog = new std::complex[PARAM.input.nspin * pw_basis.npw]; + drhog_old = new std::complex[PARAM.input.nspin * pw_basis.npw]; + for (int i = 0; i < PARAM.input.nspin * pw_basis.npw; ++i) + { + drhog_old[i] = drhog[i] = std::complex (1.0, 1.0); + } // mixing_gg0 = 0.0 CMtest.mixing_gg0 = 0.0; - CMtest.Kerker_screen_recip(drhog); - for (int i = 0; i < PARAM.input.nspin*pw_basis.npw; ++i) - { - EXPECT_EQ(drhog[i], drhog_old[i]); - } + CMtest.Kerker_screen_recip (drhog); + for (int i = 0; i < PARAM.input.nspin * pw_basis.npw; ++i) + { + EXPECT_EQ (drhog[i], drhog_old[i]); + } // mixing_gg0 = 1.0, mixing_gg0_mag = 0.0 CMtest.mixing_gg0 = 1.0; - CMtest.Kerker_screen_recip(drhog); - gg0 = std::pow(ModuleBase::BOHR_TO_A, 2); + CMtest.Kerker_screen_recip (drhog); + gg0 = std::pow (ModuleBase::BOHR_TO_A, 2); for (int i = 0; i < pw_basis.npw; ++i) - { - double gg = this->pw_basis.gg[i]; - double ref = std::max(gg / (gg + gg0), 0.1 / CMtest.mixing_beta); - // rho - EXPECT_NEAR(drhog[i].real(), ref, 1e-10); - EXPECT_NEAR(drhog[i].imag(), ref, 1e-10); - // mag - EXPECT_NEAR(drhog[i+pw_basis.npw].real(), 1.0, 1e-10); - EXPECT_NEAR(drhog[i+pw_basis.npw].imag(), 1.0, 1e-10); - } + { + double gg = this->pw_basis.gg[i]; + double ref = std::max (gg / (gg + gg0), 0.1 / CMtest.mixing_beta); + // rho + EXPECT_NEAR (drhog[i].real (), ref, 1e-10); + EXPECT_NEAR (drhog[i].imag (), ref, 1e-10); + // mag + EXPECT_NEAR (drhog[i + pw_basis.npw].real (), 1.0, 1e-10); + EXPECT_NEAR (drhog[i + pw_basis.npw].imag (), 1.0, 1e-10); + } delete[] drhog; delete[] drhog_old; // nspin = 4 PARAM.input.nspin = 4; - drhog = new std::complex[PARAM.input.nspin*pw_basis.npw]; - drhog_old = new std::complex[PARAM.input.nspin*pw_basis.npw]; - for (int i = 0; i < PARAM.input.nspin*pw_basis.npw; ++i) - { - drhog_old[i] = drhog[i] = std::complex(1.0, 1.0); - } + drhog = new std::complex[PARAM.input.nspin * pw_basis.npw]; + drhog_old = new std::complex[PARAM.input.nspin * pw_basis.npw]; + for (int i = 0; i < PARAM.input.nspin * pw_basis.npw; ++i) + { + drhog_old[i] = drhog[i] = std::complex (1.0, 1.0); + } // mixing_gg0 = 0.0 CMtest.mixing_gg0 = 0.0; - CMtest.Kerker_screen_recip(drhog); - for (int i = 0; i < PARAM.input.nspin*pw_basis.npw; ++i) - { - EXPECT_EQ(drhog[i], drhog_old[i]); - } + CMtest.Kerker_screen_recip (drhog); + for (int i = 0; i < PARAM.input.nspin * pw_basis.npw; ++i) + { + EXPECT_EQ (drhog[i], drhog_old[i]); + } // mixing_gg0 = 1.0, mixing_gg0_mag = 0.0 CMtest.mixing_gg0 = 1.0; - CMtest.Kerker_screen_recip(drhog); - gg0 = std::pow(ModuleBase::BOHR_TO_A, 2); + CMtest.Kerker_screen_recip (drhog); + gg0 = std::pow (ModuleBase::BOHR_TO_A, 2); for (int i = 0; i < pw_basis.npw; ++i) - { - double gg = this->pw_basis.gg[i]; - double ref = std::max(gg / (gg + gg0), 0.1 / CMtest.mixing_beta); - // rho - EXPECT_NEAR(drhog[i].real(), ref, 1e-10); - EXPECT_NEAR(drhog[i].imag(), ref, 1e-10); - } - for (int i = 0; i < 3*pw_basis.npw; ++i) - { - EXPECT_NEAR(drhog[i + pw_basis.npw].real(), 1.0, 1e-10); - EXPECT_NEAR(drhog[i + pw_basis.npw].imag(), 1.0, 1e-10); - } + { + double gg = this->pw_basis.gg[i]; + double ref = std::max (gg / (gg + gg0), 0.1 / CMtest.mixing_beta); + // rho + EXPECT_NEAR (drhog[i].real (), ref, 1e-10); + EXPECT_NEAR (drhog[i].imag (), ref, 1e-10); + } + for (int i = 0; i < 3 * pw_basis.npw; ++i) + { + EXPECT_NEAR (drhog[i + pw_basis.npw].real (), 1.0, 1e-10); + EXPECT_NEAR (drhog[i + pw_basis.npw].imag (), 1.0, 1e-10); + } // mixing_gg0 = 1.0, mixing_gg0_mag = 2.0 CMtest.mixing_gg0 = 1.0; CMtest.mixing_gg0_mag = 2.0; - CMtest.Kerker_screen_recip(drhog); - double gg1 = std::pow(1.0 * ModuleBase::BOHR_TO_A, 2); - double gg2 = std::pow(2.0 * ModuleBase::BOHR_TO_A, 2); + CMtest.Kerker_screen_recip (drhog); + double gg1 = std::pow (1.0 * ModuleBase::BOHR_TO_A, 2); + double gg2 = std::pow (2.0 * ModuleBase::BOHR_TO_A, 2); for (int i = 0; i < pw_basis.npw; ++i) - { - double gg = this->pw_basis.gg[i]; - double ref = std::max(gg / (gg + gg1), 0.1 / CMtest.mixing_beta); - // rho - EXPECT_NEAR(drhog[i].real(), ref * ref, 1e-10); - EXPECT_NEAR(drhog[i].imag(), ref * ref, 1e-10); - } + { + double gg = this->pw_basis.gg[i]; + double ref = std::max (gg / (gg + gg1), 0.1 / CMtest.mixing_beta); + // rho + EXPECT_NEAR (drhog[i].real (), ref * ref, 1e-10); + EXPECT_NEAR (drhog[i].imag (), ref * ref, 1e-10); + } for (int i = 0; i < pw_basis.npw; ++i) - { - double gg = this->pw_basis.gg[i]; - double ref = std::max(gg / (gg + gg2), 0.1 / CMtest.mixing_beta_mag); - // rho - for (int j = 1; j < PARAM.input.nspin; ++j) { - EXPECT_NEAR(drhog[i + pw_basis.npw * j].real(), ref, 1e-10); - EXPECT_NEAR(drhog[i + pw_basis.npw * j].imag(), ref, 1e-10); + double gg = this->pw_basis.gg[i]; + double ref = std::max (gg / (gg + gg2), 0.1 / CMtest.mixing_beta_mag); + // rho + for (int j = 1; j < PARAM.input.nspin; ++j) + { + EXPECT_NEAR (drhog[i + pw_basis.npw * j].real (), ref, 1e-10); + EXPECT_NEAR (drhog[i + pw_basis.npw * j].imag (), ref, 1e-10); + } } - } delete[] drhog; delete[] drhog_old; } -TEST_F(ChargeMixingTest, KerkerScreenRealTest) +TEST_F (ChargeMixingTest, KerkerScreenRealTest) { Charge_Mixing CMtest; - CMtest.set_rhopw(&pw_basis, &pw_basis); + CMtest.set_rhopw (&pw_basis, &pw_basis); ucell.tpiba = 1.0; - CMtest.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); + CMtest.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); // nspin = 1 PARAM.input.nspin = 1; - double* drhor = new double[PARAM.input.nspin*pw_basis.nrxx]; - double* drhor_ref = new double[PARAM.input.nspin*pw_basis.nrxx]; - for (int i = 0; i < PARAM.input.nspin*pw_basis.nrxx; ++i) - { - drhor_ref[i] = drhor[i] = 1.0; - } + double* drhor = new double[PARAM.input.nspin * pw_basis.nrxx]; + double* drhor_ref = new double[PARAM.input.nspin * pw_basis.nrxx]; + for (int i = 0; i < PARAM.input.nspin * pw_basis.nrxx; ++i) + { + drhor_ref[i] = drhor[i] = 1.0; + } // no kerker CMtest.mixing_gg0 = 0.0; - CMtest.Kerker_screen_real(drhor); - for (int i = 0; i < PARAM.input.nspin*pw_basis.nrxx; ++i) - { - EXPECT_EQ(drhor[i], drhor_ref[i]); - } + CMtest.Kerker_screen_real (drhor); + for (int i = 0; i < PARAM.input.nspin * pw_basis.nrxx; ++i) + { + EXPECT_EQ (drhor[i], drhor_ref[i]); + } delete[] drhor; delete[] drhor_ref; // nspin = 2 PARAM.input.nspin = 2; CMtest.mixing_gg0 = 0.0; - std::complex* drhog = new std::complex[PARAM.input.nspin*pw_basis.npw]; - std::complex* drhog_old = new std::complex[PARAM.input.nspin*pw_basis.npw]; - drhor = new double[PARAM.input.nspin*pw_basis.nrxx]; - drhor_ref = new double[PARAM.input.nspin*pw_basis.nrxx]; - for (int i = 0; i < PARAM.input.nspin*pw_basis.npw; ++i) - { - drhog_old[i] = drhog[i] = std::complex(1.0, 1.0); - } - CMtest.Kerker_screen_recip(drhog); // no kerker - for (int i = 0; i < PARAM.input.nspin*pw_basis.npw; ++i) - { - EXPECT_EQ(drhog[i], drhog_old[i]); - } + std::complex* drhog = new std::complex[PARAM.input.nspin * pw_basis.npw]; + std::complex* drhog_old = new std::complex[PARAM.input.nspin * pw_basis.npw]; + drhor = new double[PARAM.input.nspin * pw_basis.nrxx]; + drhor_ref = new double[PARAM.input.nspin * pw_basis.nrxx]; + for (int i = 0; i < PARAM.input.nspin * pw_basis.npw; ++i) + { + drhog_old[i] = drhog[i] = std::complex (1.0, 1.0); + } + CMtest.Kerker_screen_recip (drhog); // no kerker + for (int i = 0; i < PARAM.input.nspin * pw_basis.npw; ++i) + { + EXPECT_EQ (drhog[i], drhog_old[i]); + } // RECIPROCAL CMtest.mixing_gg0 = 1.0; PARAM.input.mixing_gg0_mag = 0.0; - CMtest.Kerker_screen_recip(drhog); - const double gg0 = std::pow(ModuleBase::BOHR_TO_A, 2); + CMtest.Kerker_screen_recip (drhog); + const double gg0 = std::pow (ModuleBase::BOHR_TO_A, 2); for (int i = 0; i < pw_basis.npw; ++i) - { - std::complex ration = drhog[i] / drhog[i+pw_basis.npw]; - double gg = this->pw_basis.gg[i]; - double ration_ref = std::max(gg / (gg + gg0), 0.1 / CMtest.mixing_beta); - EXPECT_NEAR(ration.real(), ration_ref, 1e-10); - EXPECT_NEAR(ration.imag(), 0, 1e-10); - } + { + std::complex ration = drhog[i] / drhog[i + pw_basis.npw]; + double gg = this->pw_basis.gg[i]; + double ration_ref = std::max (gg / (gg + gg0), 0.1 / CMtest.mixing_beta); + EXPECT_NEAR (ration.real (), ration_ref, 1e-10); + EXPECT_NEAR (ration.imag (), 0, 1e-10); + } // REAL - pw_basis.recip2real(drhog, drhor_ref); - pw_basis.recip2real(drhog_old, drhor); + pw_basis.recip2real (drhog, drhor_ref); + pw_basis.recip2real (drhog_old, drhor); CMtest.mixing_gg0 = 0.0; PARAM.input.mixing_gg0_mag = 0.0; // nothing happens - CMtest.Kerker_screen_real(drhor); + CMtest.Kerker_screen_real (drhor); CMtest.mixing_gg0 = 1.0; - CMtest.Kerker_screen_real(drhor); + CMtest.Kerker_screen_real (drhor); for (int i = 0; i < pw_basis.nrxx; ++i) - { - EXPECT_NEAR(drhor[i], drhor_ref[i], 1e-8); - } + { + EXPECT_NEAR (drhor[i], drhor_ref[i], 1e-8); + } delete[] drhog; delete[] drhog_old; delete[] drhor; delete[] drhor_ref; - } -TEST_F(ChargeMixingTest, MixRhoTest) +TEST_F (ChargeMixingTest, MixRhoTest) { - PARAM.sys.double_grid = false; - charge.set_rhopw(&pw_basis); + PARAM.sys.double_grid = false; + charge.set_rhopw (&pw_basis); const int nspin = PARAM.input.nspin = 1; PARAM.sys.domag_z = false; XC_Functional::func_type = 3; @@ -843,34 +834,75 @@ TEST_F(ChargeMixingTest, MixRhoTest) charge.kin_r = new double*[nspin]; charge.kin_r_save = new double*[nspin]; for (int is = 0; is < nspin; is++) - { - charge.rho[is] = charge._space_rho + is * nrxx; - charge.rhog[is] = charge._space_rhog + is * npw; - charge.rho_save[is] = charge._space_rho_save + is * nrxx; - charge.rhog_save[is] = charge._space_rhog_save + is * npw; - charge.kin_r[is] = charge._space_kin_r + is * nrxx; - charge.kin_r_save[is] = charge._space_kin_r_save + is * nrxx; - } - std::vector real_ref(nspin * nrxx); - std::vector real_save_ref(nspin * nrxx); - std::vector> recip_ref(nspin * npw); - std::vector> recip_save_ref(nspin * npw); - for(int i = 0 ; i < nspin * npw; ++i) - { - recip_ref[i] = std::complex(double(i), 1.0); - recip_save_ref[i] = std::complex(double(i), 0.0); - } - for(int i = 0 ; i < nspin ; ++i) - { - pw_basis.recip2real(recip_ref.data() + i * npw, real_ref.data() + i * nrxx); - pw_basis.recip2real(recip_save_ref.data() + i * npw, real_save_ref.data() + i * nrxx); - } + { + charge.rho[is] = charge._space_rho + is * nrxx; + charge.rhog[is] = charge._space_rhog + is * npw; + charge.rho_save[is] = charge._space_rho_save + is * nrxx; + charge.rhog_save[is] = charge._space_rhog_save + is * npw; + charge.kin_r[is] = charge._space_kin_r + is * nrxx; + charge.kin_r_save[is] = charge._space_kin_r_save + is * nrxx; + } + std::vector real_ref (nspin * nrxx); + std::vector real_save_ref (nspin * nrxx); + std::vector> recip_ref (nspin * npw); + std::vector> recip_save_ref (nspin * npw); + for (int i = 0; i < nspin * npw; ++i) + { + recip_ref[i] = std::complex (double (i), 1.0); + recip_save_ref[i] = std::complex (double (i), 0.0); + } + for (int i = 0; i < nspin; ++i) + { + pw_basis.recip2real (recip_ref.data () + i * npw, real_ref.data () + i * nrxx); + pw_basis.recip2real (recip_save_ref.data () + i * npw, real_save_ref.data () + i * nrxx); + } //--------------------------------MAIN BODY-------------------------------- // RECIPROCAL Charge_Mixing CMtest_recip; - CMtest_recip.set_rhopw(&pw_basis, &pw_basis); - PARAM.input.scf_thr_type= 1; - CMtest_recip.set_mixing(PARAM.input.mixing_mode, + CMtest_recip.set_rhopw (&pw_basis, &pw_basis); + PARAM.input.scf_thr_type = 1; + CMtest_recip.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); + CMtest_recip.init_mixing (); + for (int i = 0; i < nspin * npw; ++i) + { + charge._space_rhog[i] = recip_ref[i]; + charge._space_rhog_save[i] = recip_save_ref[i]; + } + for (int i = 0; i < nspin * nrxx; ++i) + { + charge._space_rho[i] = real_ref[i]; + charge._space_rho_save[i] = real_save_ref[i]; + } + CMtest_recip.mix_rho (&charge); + for (int is = 0; is < nspin; ++is) + { + for (int ir = 0; ir < nrxx; ++ir) + { + EXPECT_NEAR (charge.rho_save[is][ir], real_ref[is * nrxx + ir], 1e-8); + } + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (charge.rhog[is][ig].real (), recip_save_ref[is * npw + ig].real (), 1e-8); + EXPECT_NEAR (charge.rhog[is][ig].imag (), recip_save_ref[is * npw + ig].imag () + 0.7, 1e-8); + } + } + + // REAL + Charge_Mixing CMtest_real; + PARAM.input.scf_thr_type = 2; + CMtest_real.set_rhopw (&pw_basis, &pw_basis); + CMtest_real.set_mixing (PARAM.input.mixing_mode, PARAM.input.mixing_beta, PARAM.input.mixing_ndim, PARAM.input.mixing_gg0, @@ -880,64 +912,25 @@ TEST_F(ChargeMixingTest, MixRhoTest) PARAM.input.mixing_gg0_min, PARAM.input.mixing_angle, PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - CMtest_recip.init_mixing(); - for(int i = 0 ; i < nspin * npw; ++i) - { - charge._space_rhog[i] = recip_ref[i]; - charge._space_rhog_save[i] = recip_save_ref[i]; - } - for(int i = 0 ; i < nspin * nrxx; ++i) - { - charge._space_rho[i] = real_ref[i]; - charge._space_rho_save[i] = real_save_ref[i]; - } - CMtest_recip.mix_rho(&charge); - for(int is = 0 ; is < nspin; ++is) - { - for(int ir = 0 ; ir < nrxx ; ++ir) - { - EXPECT_NEAR(charge.rho_save[is][ir], real_ref[is*nrxx + ir], 1e-8); - } - for(int ig = 0; ig < npw ; ++ig) + ucell.omega, + ucell.tpiba); + CMtest_real.init_mixing (); + for (int i = 0; i < nspin * nrxx; ++i) { - EXPECT_NEAR(charge.rhog[is][ig].real(), recip_save_ref[is*npw + ig].real(), 1e-8); - EXPECT_NEAR(charge.rhog[is][ig].imag(), recip_save_ref[is*npw + ig].imag() + 0.7, 1e-8); + charge._space_rho[i] = real_ref[i]; + charge._space_rho_save[i] = real_save_ref[i]; } - } - - // REAL - Charge_Mixing CMtest_real; - PARAM.input.scf_thr_type= 2; - CMtest_real.set_rhopw(&pw_basis, &pw_basis); - CMtest_real.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); - CMtest_real.init_mixing(); - for(int i = 0 ; i < nspin * nrxx; ++i) - { - charge._space_rho[i] = real_ref[i]; - charge._space_rho_save[i] = real_save_ref[i]; - } - CMtest_recip.mix_rho(&charge); - for(int is = 0 ; is < nspin; ++is) - { - for(int ir = 0 ; ir < nrxx ; ++ir) + CMtest_recip.mix_rho (&charge); + for (int is = 0; is < nspin; ++is) { - EXPECT_NEAR(charge.rho_save[is][ir], real_ref[is*nrxx + ir], 1e-8); - EXPECT_NEAR(charge.rho[is][ir], 0.3*real_save_ref[is*nrxx+ir] + 0.7*real_ref[is*nrxx+ir], 1e-8); + for (int ir = 0; ir < nrxx; ++ir) + { + EXPECT_NEAR (charge.rho_save[is][ir], real_ref[is * nrxx + ir], 1e-8); + EXPECT_NEAR (charge.rho[is][ir], + 0.3 * real_save_ref[is * nrxx + ir] + 0.7 * real_ref[is * nrxx + ir], + 1e-8); + } } - } //------------------------------------------------------------------------- delete[] charge._space_rho; @@ -954,10 +947,10 @@ TEST_F(ChargeMixingTest, MixRhoTest) delete[] charge.kin_r_save; } -TEST_F(ChargeMixingTest, MixDoubleGridRhoTest) +TEST_F (ChargeMixingTest, MixDoubleGridRhoTest) { - PARAM.sys.double_grid = true; - charge.set_rhopw(&pw_dbasis); + PARAM.sys.double_grid = true; + charge.set_rhopw (&pw_dbasis); const int nspin = PARAM.input.nspin = 1; PARAM.sys.domag_z = false; XC_Functional::func_type = 3; @@ -982,71 +975,71 @@ TEST_F(ChargeMixingTest, MixDoubleGridRhoTest) charge.kin_r = new double*[nspin]; charge.kin_r_save = new double*[nspin]; for (int is = 0; is < nspin; is++) - { - charge.rho[is] = charge._space_rho + is * nrxx; - charge.rhog[is] = charge._space_rhog + is * npw; - charge.rho_save[is] = charge._space_rho_save + is * nrxx; - charge.rhog_save[is] = charge._space_rhog_save + is * npw; - charge.kin_r[is] = charge._space_kin_r + is * nrxx; - charge.kin_r_save[is] = charge._space_kin_r_save + is * nrxx; - } - std::vector real_ref(nspin * nrxx); - std::vector real_save_ref(nspin * nrxx); - std::vector> recip_ref(nspin * npw); - std::vector> recip_save_ref(nspin * npw); + { + charge.rho[is] = charge._space_rho + is * nrxx; + charge.rhog[is] = charge._space_rhog + is * npw; + charge.rho_save[is] = charge._space_rho_save + is * nrxx; + charge.rhog_save[is] = charge._space_rhog_save + is * npw; + charge.kin_r[is] = charge._space_kin_r + is * nrxx; + charge.kin_r_save[is] = charge._space_kin_r_save + is * nrxx; + } + std::vector real_ref (nspin * nrxx); + std::vector real_save_ref (nspin * nrxx); + std::vector> recip_ref (nspin * npw); + std::vector> recip_save_ref (nspin * npw); for (int i = 0; i < nspin * npw; ++i) - { - recip_ref[i] = std::complex(double(i), 1.0); - recip_save_ref[i] = std::complex(double(i), 0.0); - } + { + recip_ref[i] = std::complex (double (i), 1.0); + recip_save_ref[i] = std::complex (double (i), 0.0); + } for (int i = 0; i < nspin; ++i) - { - pw_dbasis.recip2real(recip_ref.data() + i * npw, real_ref.data() + i * nrxx); - pw_dbasis.recip2real(recip_save_ref.data() + i * npw, real_save_ref.data() + i * nrxx); - } + { + pw_dbasis.recip2real (recip_ref.data () + i * npw, real_ref.data () + i * nrxx); + pw_dbasis.recip2real (recip_save_ref.data () + i * npw, real_save_ref.data () + i * nrxx); + } //--------------------------------MAIN BODY-------------------------------- // RECIPROCAL Charge_Mixing CMtest_recip; - CMtest_recip.set_rhopw(&pw_basis, &pw_dbasis); + CMtest_recip.set_rhopw (&pw_basis, &pw_dbasis); - PARAM.input.scf_thr_type= 1; - CMtest_recip.set_mixing(PARAM.input.mixing_mode, - PARAM.input.mixing_beta, - PARAM.input.mixing_ndim, - PARAM.input.mixing_gg0, - PARAM.input.mixing_tau, - PARAM.input.mixing_beta_mag, - PARAM.input.mixing_gg0_mag, - PARAM.input.mixing_gg0_min, - PARAM.input.mixing_angle, - PARAM.input.mixing_dmr, - ucell.omega, - ucell.tpiba); + PARAM.input.scf_thr_type = 1; + CMtest_recip.set_mixing (PARAM.input.mixing_mode, + PARAM.input.mixing_beta, + PARAM.input.mixing_ndim, + PARAM.input.mixing_gg0, + PARAM.input.mixing_tau, + PARAM.input.mixing_beta_mag, + PARAM.input.mixing_gg0_mag, + PARAM.input.mixing_gg0_min, + PARAM.input.mixing_angle, + PARAM.input.mixing_dmr, + ucell.omega, + ucell.tpiba); - CMtest_recip.init_mixing(); + CMtest_recip.init_mixing (); for (int i = 0; i < nspin * npw; ++i) - { - charge._space_rhog[i] = recip_ref[i]; - charge._space_rhog_save[i] = recip_save_ref[i]; - } + { + charge._space_rhog[i] = recip_ref[i]; + charge._space_rhog_save[i] = recip_save_ref[i]; + } for (int i = 0; i < nspin * nrxx; ++i) - { - charge._space_rho[i] = real_ref[i]; - charge._space_rho_save[i] = real_save_ref[i]; - } - CMtest_recip.mix_rho(&charge); - for (int is = 0; is < nspin; ++is) - { - for (int ir = 0; ir < nrxx; ++ir) { - EXPECT_NEAR(charge.rho_save[is][ir], real_ref[is * nrxx + ir], 1e-8); + charge._space_rho[i] = real_ref[i]; + charge._space_rho_save[i] = real_save_ref[i]; } - for (int ig = 0; ig < npw; ++ig) + CMtest_recip.mix_rho (&charge); + for (int is = 0; is < nspin; ++is) { - EXPECT_NEAR(charge.rhog[is][ig].real(), recip_save_ref[is * npw + ig].real(), 1e-8); - EXPECT_NEAR(charge.rhog[is][ig].imag(), recip_save_ref[is * npw + ig].imag() + 0.7, 1e-8); + for (int ir = 0; ir < nrxx; ++ir) + { + EXPECT_NEAR (charge.rho_save[is][ir], real_ref[is * nrxx + ir], 1e-8); + } + for (int ig = 0; ig < npw; ++ig) + { + EXPECT_NEAR (charge.rhog[is][ig].real (), recip_save_ref[is * npw + ig].real (), 1e-8); + EXPECT_NEAR (charge.rhog[is][ig].imag (), recip_save_ref[is * npw + ig].imag () + 0.7, 1e-8); + } } - } //------------------------------------------------------------------------- delete[] charge._space_rho; @@ -1063,80 +1056,64 @@ TEST_F(ChargeMixingTest, MixDoubleGridRhoTest) delete[] charge.kin_r_save; } -TEST_F(ChargeMixingTest, MixDivCombTest) +TEST_F (ChargeMixingTest, MixDivCombTest) { // NSPIN = 1 PARAM.input.nspin = 1; Charge_Mixing CMtest; - CMtest.set_rhopw(&pw_basis, &pw_dbasis); - std::vector> data(pw_dbasis.npw, 1.0); + CMtest.set_rhopw (&pw_basis, &pw_dbasis); + std::vector> data (pw_dbasis.npw, 1.0); std::complex*datas, *datahf; std::complex*datas2, *datahf2; - CMtest.divide_data(data.data(), datas, datahf); - EXPECT_EQ(datas, data.data()); - EXPECT_EQ(datahf, data.data() + pw_basis.npw); - CMtest.combine_data(data.data(), datas, datahf); - EXPECT_EQ(datas, nullptr); - EXPECT_EQ(datahf, nullptr); + CMtest.divide_data (data.data (), datas, datahf); + EXPECT_EQ (datas, data.data ()); + EXPECT_EQ (datahf, data.data () + pw_basis.npw); + CMtest.combine_data (data.data (), datas, datahf); + EXPECT_EQ (datas, nullptr); + EXPECT_EQ (datahf, nullptr); - CMtest.divide_data(data.data(), datas2, datahf2); - CMtest.clean_data(datas2, datahf2); - EXPECT_EQ(datas2, nullptr); - EXPECT_EQ(datahf2, nullptr); + CMtest.divide_data (data.data (), datas2, datahf2); + CMtest.clean_data (datas2, datahf2); + EXPECT_EQ (datas2, nullptr); + EXPECT_EQ (datahf2, nullptr); // NSPIN = 2 PARAM.input.nspin = 2; - data.resize(pw_dbasis.npw * 2, 1.0); - std::vector> dataout(pw_dbasis.npw * 2, 1.0); - CMtest.divide_data(data.data(), datas, datahf); - CMtest.combine_data(dataout.data(), datas, datahf); - EXPECT_EQ(datas, nullptr); - EXPECT_EQ(datahf, nullptr); + data.resize (pw_dbasis.npw * 2, 1.0); + std::vector> dataout (pw_dbasis.npw * 2, 1.0); + CMtest.divide_data (data.data (), datas, datahf); + CMtest.combine_data (dataout.data (), datas, datahf); + EXPECT_EQ (datas, nullptr); + EXPECT_EQ (datahf, nullptr); for (int i = 0; i < pw_dbasis.npw * 2; ++i) - { - EXPECT_EQ(dataout[i], data[i]); - } + { + EXPECT_EQ (dataout[i], data[i]); + } - CMtest.divide_data(data.data(), datas2, datahf2); - CMtest.clean_data(datas2, datahf2); - EXPECT_EQ(datas2, nullptr); - EXPECT_EQ(datahf2, nullptr); + CMtest.divide_data (data.data (), datas2, datahf2); + CMtest.clean_data (datas2, datahf2); + EXPECT_EQ (datas2, nullptr); + EXPECT_EQ (datahf2, nullptr); } -TEST_F(ChargeMixingTest, SCFOscillationTest) +TEST_F (ChargeMixingTest, SCFOscillationTest) { Charge_Mixing CMtest; int scf_nmax = 20; int scf_os_ndim = 3; double scf_os_thr = -0.05; bool scf_oscillate = false; - std::vector drho(scf_nmax, 0.0); - std::vector scf_oscillate_ref(scf_nmax, false); - drho = {6.83639633652e-05, - 4.93523029235e-05, - 3.59230097735e-05, - 2.68356403913e-05, - 2.17490806464e-05, - 2.14231642508e-05, - 1.67507494811e-05, - 1.53575889539e-05, - 1.26504511554e-05, - 1.04762016224e-05, - 8.10000162918e-06, - 7.66427917682e-06, - 6.70112820094e-06, - 5.68594436664e-06, - 4.80120233733e-06, - 4.86519757184e-06, - 4.37855804356e-06, - 4.29922703412e-06, - 4.36398486331e-06, - 4.94224615955e-06}; - scf_oscillate_ref = {false,false,false,false,false,true,false,false,false,false, - false,false,true,false,false,true,true,true,true,true}; + std::vector drho (scf_nmax, 0.0); + std::vector scf_oscillate_ref (scf_nmax, false); + drho = {6.83639633652e-05, 4.93523029235e-05, 3.59230097735e-05, 2.68356403913e-05, 2.17490806464e-05, + 2.14231642508e-05, 1.67507494811e-05, 1.53575889539e-05, 1.26504511554e-05, 1.04762016224e-05, + 8.10000162918e-06, 7.66427917682e-06, 6.70112820094e-06, 5.68594436664e-06, 4.80120233733e-06, + 4.86519757184e-06, 4.37855804356e-06, 4.29922703412e-06, 4.36398486331e-06, 4.94224615955e-06}; + scf_oscillate_ref = {false, false, false, false, false, true, false, false, false, false, + false, false, true, false, false, true, true, true, true, true}; for (int i = 1; i <= scf_nmax; ++i) - { - scf_oscillate = CMtest.if_scf_oscillate(i,drho[i-1],scf_os_ndim,scf_os_thr); - EXPECT_EQ(scf_oscillate, scf_oscillate_ref[i-1]); - } + { + scf_oscillate = CMtest.if_scf_oscillate (i, drho[i - 1], scf_os_ndim, scf_os_thr); + EXPECT_EQ (scf_oscillate, scf_oscillate_ref[i - 1]); + } } diff --git a/source/source_estate/test/charge_test.cpp b/source/source_estate/test/charge_test.cpp index 261bcc5e3e7..7d52163e0cc 100644 --- a/source/source_estate/test/charge_test.cpp +++ b/source/source_estate/test/charge_test.cpp @@ -10,23 +10,16 @@ #include "prepare_unitcell.h" // mock functions for UnitCell #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} #endif -Magnetism::Magnetism() +Magnetism::Magnetism () { this->tot_mag = 0.0; this->abs_mag = 0.0; this->start_mag = nullptr; } -Magnetism::~Magnetism() -{ - delete[] this->start_mag; -} +Magnetism::~Magnetism () { delete[] this->start_mag; } // mock functions for Charge int XC_Functional::func_type = 1; @@ -35,7 +28,8 @@ namespace elecstate { double tmp_ucell_omega = 500.0; double tmp_gridecut = 80.0; -void Set_GlobalV_Default() +void + Set_GlobalV_Default () { PARAM.input.nspin = 1; PARAM.input.test_charge = 0; @@ -74,150 +68,155 @@ class ChargeTest : public ::testing::Test Charge* charge; ModulePW::PW_Basis* rhopw; std::string output; - void SetUp() override + void + SetUp () override { - elecstate::Set_GlobalV_Default(); - ucell = utp.SetUcellInfo(); + elecstate::Set_GlobalV_Default (); + ucell = utp.SetUcellInfo (); charge = new Charge; rhopw = new ModulePW::PW_Basis; - rhopw->initgrids(ucell->lat0, ucell->latvec, elecstate::tmp_gridecut); - rhopw->distribute_r(); - rhopw->initparameters(false, elecstate::tmp_gridecut); - rhopw->distribute_g(); + rhopw->initgrids (ucell->lat0, ucell->latvec, elecstate::tmp_gridecut); + rhopw->distribute_r (); + rhopw->initparameters (false, elecstate::tmp_gridecut); + rhopw->distribute_g (); } - void TearDown() override + void + TearDown () override { delete charge; delete rhopw; } }; -TEST_F(ChargeTest, Constructor) +TEST_F (ChargeTest, Constructor) { - EXPECT_FALSE(charge->allocate_rho); - EXPECT_FALSE(charge->allocate_rho_final_scf); + EXPECT_FALSE (charge->allocate_rho); + EXPECT_FALSE (charge->allocate_rho_final_scf); } -TEST_F(ChargeTest, Allocate) +TEST_F (ChargeTest, Allocate) { // ucell info - EXPECT_DOUBLE_EQ(ucell->omega, 265.302); + EXPECT_DOUBLE_EQ (ucell->omega, 265.302); // rhopw info - EXPECT_DOUBLE_EQ(rhopw->lat0, 10.2); - EXPECT_EQ(rhopw->nx, 24); - EXPECT_EQ(rhopw->ny, 24); - EXPECT_EQ(rhopw->nz, 24); - EXPECT_EQ(rhopw->nxyz, 13824); - EXPECT_EQ(rhopw->nrxx, 13824); - EXPECT_EQ(rhopw->npw, 3143); - EXPECT_EQ(rhopw->npwtot, 3143); + EXPECT_DOUBLE_EQ (rhopw->lat0, 10.2); + EXPECT_EQ (rhopw->nx, 24); + EXPECT_EQ (rhopw->ny, 24); + EXPECT_EQ (rhopw->nz, 24); + EXPECT_EQ (rhopw->nxyz, 13824); + EXPECT_EQ (rhopw->nrxx, 13824); + EXPECT_EQ (rhopw->npw, 3143); + EXPECT_EQ (rhopw->npwtot, 3143); // call Charge::allocate() PARAM.input.test_charge = 2; XC_Functional::func_type = 3; XC_Functional::ked_flag = true; - charge->set_rhopw(rhopw); - EXPECT_FALSE(charge->allocate_rho); - const bool kin_den = charge->kin_density(); - charge->allocate(PARAM.input.nspin, kin_den); - EXPECT_TRUE(charge->allocate_rho); + charge->set_rhopw (rhopw); + EXPECT_FALSE (charge->allocate_rho); + const bool kin_den = charge->kin_density (); + charge->allocate (PARAM.input.nspin, kin_den); + EXPECT_TRUE (charge->allocate_rho); // test if Charge::allocate() be called twice - EXPECT_NO_THROW(charge->allocate(PARAM.input.nspin, kin_den)); - EXPECT_TRUE(charge->allocate_rho); + EXPECT_NO_THROW (charge->allocate (PARAM.input.nspin, kin_den)); + EXPECT_TRUE (charge->allocate_rho); } -TEST_F(ChargeTest, SumRho) +TEST_F (ChargeTest, SumRho) { - charge->set_rhopw(rhopw); - EXPECT_FALSE(charge->allocate_rho); - const bool kin_den = charge->kin_density(); - charge->allocate(PARAM.input.nspin, kin_den); - EXPECT_TRUE(charge->allocate_rho); + charge->set_rhopw (rhopw); + EXPECT_FALSE (charge->allocate_rho); + const bool kin_den = charge->kin_density (); + charge->allocate (PARAM.input.nspin, kin_den); + EXPECT_TRUE (charge->allocate_rho); int nspin = (PARAM.input.nspin == 2) ? 2 : 1; for (int is = 0; is < nspin; is++) - { - for (int ir = 0; ir < rhopw->nrxx; ir++) { - charge->rho[is][ir] = 0.1; + for (int ir = 0; ir < rhopw->nrxx; ir++) + { + charge->rho[is][ir] = 0.1; + } } - } - charge->set_omega(&ucell->omega);; - EXPECT_NEAR(charge->sum_rho(), 0.1 * nspin * rhopw->nrxx * ucell->omega / rhopw->nxyz, 1E-10); + charge->set_omega (&ucell->omega); + ; + EXPECT_NEAR (charge->sum_rho (), 0.1 * nspin * rhopw->nrxx * ucell->omega / rhopw->nxyz, 1E-10); } -TEST_F(ChargeTest, RenormalizeRho) +TEST_F (ChargeTest, RenormalizeRho) { - charge->set_rhopw(rhopw); - EXPECT_FALSE(charge->allocate_rho); - const bool kin_den = charge->kin_density(); - charge->allocate(PARAM.input.nspin, kin_den); - EXPECT_TRUE(charge->allocate_rho); + charge->set_rhopw (rhopw); + EXPECT_FALSE (charge->allocate_rho); + const bool kin_den = charge->kin_density (); + charge->allocate (PARAM.input.nspin, kin_den); + EXPECT_TRUE (charge->allocate_rho); int nspin = (PARAM.input.nspin == 2) ? 2 : 1; for (int is = 0; is < nspin; is++) - { - for (int ir = 0; ir < rhopw->nrxx; ir++) { - charge->rho[is][ir] = 0.1; + for (int ir = 0; ir < rhopw->nrxx; ir++) + { + charge->rho[is][ir] = 0.1; + } } - } - EXPECT_EQ(PARAM.input.nelec, 8); - charge->set_omega(&ucell->omega);; - charge->renormalize_rho(); - EXPECT_NEAR(charge->sum_rho(), 8.0, 1e-10); + EXPECT_EQ (PARAM.input.nelec, 8); + charge->set_omega (&ucell->omega); + ; + charge->renormalize_rho (); + EXPECT_NEAR (charge->sum_rho (), 8.0, 1e-10); } -TEST_F(ChargeTest, CheckNe) +TEST_F (ChargeTest, CheckNe) { - charge->set_rhopw(rhopw); - EXPECT_FALSE(charge->allocate_rho); - const bool kin_den = charge->kin_density(); - charge->allocate(PARAM.input.nspin, kin_den); - EXPECT_TRUE(charge->allocate_rho); + charge->set_rhopw (rhopw); + EXPECT_FALSE (charge->allocate_rho); + const bool kin_den = charge->kin_density (); + charge->allocate (PARAM.input.nspin, kin_den); + EXPECT_TRUE (charge->allocate_rho); int nspin = (PARAM.input.nspin == 2) ? 2 : 1; for (int is = 0; is < nspin; is++) - { - for (int ir = 0; ir < rhopw->nrxx; ir++) { - charge->rho[is][ir] = 0.1; + for (int ir = 0; ir < rhopw->nrxx; ir++) + { + charge->rho[is][ir] = 0.1; + } } - } - EXPECT_EQ(PARAM.input.nelec, 8); - charge->set_omega(&ucell->omega);; - charge->renormalize_rho(); - EXPECT_NEAR(charge->sum_rho(), 8.0, 1e-10); - EXPECT_NEAR(charge->cal_rho2ne(charge->rho[0]), 8.0, 1e-10); + EXPECT_EQ (PARAM.input.nelec, 8); + charge->set_omega (&ucell->omega); + ; + charge->renormalize_rho (); + EXPECT_NEAR (charge->sum_rho (), 8.0, 1e-10); + EXPECT_NEAR (charge->cal_rho2ne (charge->rho[0]), 8.0, 1e-10); } -TEST_F(ChargeTest, SaveRhoBeforeSumBand) +TEST_F (ChargeTest, SaveRhoBeforeSumBand) { - charge->set_rhopw(rhopw); - EXPECT_FALSE(charge->allocate_rho); - const bool kin_den = charge->kin_density(); - charge->allocate(PARAM.input.nspin, kin_den); - EXPECT_TRUE(charge->allocate_rho); + charge->set_rhopw (rhopw); + EXPECT_FALSE (charge->allocate_rho); + const bool kin_den = charge->kin_density (); + charge->allocate (PARAM.input.nspin, kin_den); + EXPECT_TRUE (charge->allocate_rho); int nspin = (PARAM.input.nspin == 2) ? 2 : 1; for (int is = 0; is < nspin; is++) - { - for (int ir = 0; ir < rhopw->nrxx; ir++) { - charge->rho[is][ir] = 0.1; + for (int ir = 0; ir < rhopw->nrxx; ir++) + { + charge->rho[is][ir] = 0.1; + } } - } - EXPECT_EQ(PARAM.input.nelec, 8); + EXPECT_EQ (PARAM.input.nelec, 8); XC_Functional::func_type = 3; XC_Functional::ked_flag = true; - charge->set_omega(&ucell->omega);; - charge->renormalize_rho(); - charge->save_rho_before_sum_band(); - EXPECT_NEAR(charge->cal_rho2ne(charge->rho_save[0]), 8.0, 1e-10); + charge->set_omega (&ucell->omega); + ; + charge->renormalize_rho (); + charge->save_rho_before_sum_band (); + EXPECT_NEAR (charge->cal_rho2ne (charge->rho_save[0]), 8.0, 1e-10); } -TEST_F(ChargeTest, InitFinalScf) +TEST_F (ChargeTest, InitFinalScf) { - charge->set_rhopw(rhopw); + charge->set_rhopw (rhopw); XC_Functional::func_type = 1; XC_Functional::ked_flag = false; PARAM.input.test_charge = 2; - charge->init_final_scf(); - EXPECT_TRUE(charge->allocate_rho_final_scf); + charge->init_final_scf (); + EXPECT_TRUE (charge->allocate_rho_final_scf); } - diff --git a/source/source_estate/test/elecstate_base_test.cpp b/source/source_estate/test/elecstate_base_test.cpp index e0d63df8220..0bf7c9cc3ab 100644 --- a/source/source_estate/test/elecstate_base_test.cpp +++ b/source/source_estate/test/elecstate_base_test.cpp @@ -14,73 +14,57 @@ // Mock functions for testing elecstate.cpp namespace elecstate { -void Potential::init_pot(Charge const*) +void + Potential::init_pot (Charge const*) { } -void Potential::cal_v_eff(const Charge* chg, const UnitCell* ucell, ModuleBase::matrix& v_eff) +void + Potential::cal_v_eff (const Charge* chg, const UnitCell* ucell, ModuleBase::matrix& v_eff) { } -void Potential::cal_fixed_v(double* vl_pseudo) -{ -} -Potential::~Potential() +void + Potential::cal_fixed_v (double* vl_pseudo) { } +Potential::~Potential () {} } // namespace elecstate -Charge::Charge() -{ -} -Charge::~Charge() -{ -} -UnitCell::UnitCell() -{ -} -UnitCell::~UnitCell() -{ -} -Parallel_Grid::Parallel_Grid() {}; -Parallel_Grid::~Parallel_Grid() {}; -Magnetism::Magnetism() -{ -} -Magnetism::~Magnetism() -{ -} -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} +Charge::Charge () {} +Charge::~Charge () {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} +Parallel_Grid::Parallel_Grid () {}; +Parallel_Grid::~Parallel_Grid () {}; +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} #include "source_cell/klist.h" -ModulePW::PW_Basis::PW_Basis() -{ -} -ModulePW::PW_Basis::~PW_Basis() -{ -} -ModulePW::PW_Basis_Sup::~PW_Basis_Sup() +ModulePW::PW_Basis::PW_Basis () {} +ModulePW::PW_Basis::~PW_Basis () {} +ModulePW::PW_Basis_Sup::~PW_Basis_Sup () {} +void + ModulePW::PW_Basis::initgrids (double, ModuleBase::Matrix3, double) { } -void ModulePW::PW_Basis::initgrids(double, ModuleBase::Matrix3, double) +void + ModulePW::PW_Basis::initgrids (double, ModuleBase::Matrix3, int, int, int) { } -void ModulePW::PW_Basis::initgrids(double, ModuleBase::Matrix3, int, int, int) +void + ModulePW::PW_Basis::distribute_r () { } -void ModulePW::PW_Basis::distribute_r() +void + Charge::set_rho_core (const UnitCell& ucell, ModuleBase::ComplexMatrix const&, const bool*) { } -void Charge::set_rho_core(const UnitCell& ucell, ModuleBase::ComplexMatrix const&, const bool*) -{ -} -void Charge::init_rho(const UnitCell&, +void + Charge::init_rho (const UnitCell&, const Parallel_Grid&, ModuleBase::ComplexMatrix const&, ModuleSymmetry::Symmetry& symm, @@ -88,13 +72,16 @@ void Charge::init_rho(const UnitCell&, const void*) { } -void Charge::set_rhopw(ModulePW::PW_Basis*) +void + Charge::set_rhopw (ModulePW::PW_Basis*) { } -void Charge::renormalize_rho() +void + Charge::renormalize_rho () { } -void Charge::check_rho() +void + Charge::check_rho () { } @@ -133,7 +120,8 @@ namespace elecstate class MockElecState : public ElecState { public: - void Set_GlobalV_Default() + void + Set_GlobalV_Default () { PARAM.input.nspin = 1; PARAM.input.nelec = 10.0; @@ -158,12 +146,14 @@ class ElecStateTest : public ::testing::Test UnitCell ucell; Parallel_Grid pgrid; std::string output; - void SetUp() + void + SetUp () { elecstate = new elecstate::MockElecState; - elecstate->Set_GlobalV_Default(); + elecstate->Set_GlobalV_Default (); } - void TearDown() + void + TearDown () { delete elecstate; } @@ -171,49 +161,49 @@ class ElecStateTest : public ::testing::Test using ElecStateDeathTest = ElecStateTest; -TEST_F(ElecStateTest, InitNelecSpin) +TEST_F (ElecStateTest, InitNelecSpin) { PARAM.input.nspin = 2; - elecstate->init_nelec_spin(); - EXPECT_EQ(elecstate->nelec_spin[0], 5.0); - EXPECT_EQ(elecstate->nelec_spin[1], 5.0); + elecstate->init_nelec_spin (); + EXPECT_EQ (elecstate->nelec_spin[0], 5.0); + EXPECT_EQ (elecstate->nelec_spin[1], 5.0); } -TEST_F(ElecStateTest, Constructor) +TEST_F (ElecStateTest, Constructor) { Charge* charge = new Charge; ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis; ModulePW::PW_Basis_Big* bigpw = new ModulePW::PW_Basis_Big; - elecstate::ElecState* elecstate_new = new elecstate::ElecState(charge, rhopw, bigpw); - EXPECT_EQ(elecstate_new->charge, charge); - EXPECT_EQ(elecstate_new->bigpw, bigpw); - EXPECT_EQ(elecstate_new->eferm.two_efermi, PARAM.sys.two_fermi); + elecstate::ElecState* elecstate_new = new elecstate::ElecState (charge, rhopw, bigpw); + EXPECT_EQ (elecstate_new->charge, charge); + EXPECT_EQ (elecstate_new->bigpw, bigpw); + EXPECT_EQ (elecstate_new->eferm.two_efermi, PARAM.sys.two_fermi); delete elecstate_new; delete bigpw; delete rhopw; delete charge; } -TEST_F(ElecStateTest, InitKS) +TEST_F (ElecStateTest, InitKS) { Charge* charge = new Charge; ModulePW::PW_Basis_Big* bigpw = new ModulePW::PW_Basis_Big; K_Vectors* klist = new K_Vectors; int nk = 1; - EXPECT_NO_THROW(elecstate->init_ks(charge, klist, nk, bigpw)); - EXPECT_EQ(elecstate->charge, charge); - EXPECT_EQ(elecstate->bigpw, bigpw); - EXPECT_EQ(elecstate->klist, klist); - EXPECT_EQ(elecstate->ekb.nr, nk); - EXPECT_EQ(elecstate->ekb.nc, PARAM.input.nbands); - EXPECT_EQ(elecstate->wg.nr, nk); - EXPECT_EQ(elecstate->wg.nc, PARAM.input.nbands); + EXPECT_NO_THROW (elecstate->init_ks (charge, klist, nk, bigpw)); + EXPECT_EQ (elecstate->charge, charge); + EXPECT_EQ (elecstate->bigpw, bigpw); + EXPECT_EQ (elecstate->klist, klist); + EXPECT_EQ (elecstate->ekb.nr, nk); + EXPECT_EQ (elecstate->ekb.nc, PARAM.input.nbands); + EXPECT_EQ (elecstate->wg.nr, nk); + EXPECT_EQ (elecstate->wg.nc, PARAM.input.nbands); delete klist; delete bigpw; delete charge; } -TEST_F(ElecStateTest, GetRho) +TEST_F (ElecStateTest, GetRho) { Charge* charge = new Charge; ModulePW::PW_Basis_Big* bigpw = new ModulePW::PW_Basis_Big; @@ -222,38 +212,38 @@ TEST_F(ElecStateTest, GetRho) int nrxx = 100; charge->rho = new double*[PARAM.input.nspin]; for (int i = 0; i < PARAM.input.nspin; ++i) - { - charge->rho[i] = new double[nrxx]; - for (int j = 0; j < nrxx; ++j) { - charge->rho[i][j] = 1.0; + charge->rho[i] = new double[nrxx]; + for (int j = 0; j < nrxx; ++j) + { + charge->rho[i][j] = 1.0; + } } - } - elecstate->init_ks(charge, klist, nk, bigpw); - EXPECT_EQ(elecstate->getRho(0), &(charge->rho[0][0])); - EXPECT_EQ(elecstate->getRho(0)[nrxx - 1], 1.0); + elecstate->init_ks (charge, klist, nk, bigpw); + EXPECT_EQ (elecstate->getRho (0), &(charge->rho[0][0])); + EXPECT_EQ (elecstate->getRho (0)[nrxx - 1], 1.0); for (int i = 0; i < PARAM.input.nspin; ++i) - { - delete[] charge->rho[i]; - } + { + delete[] charge->rho[i]; + } delete[] charge->rho; delete klist; delete bigpw; delete charge; } -TEST_F(ElecStateTest, VirtualBaseFuncs) +TEST_F (ElecStateTest, VirtualBaseFuncs) { psi::Psi> psi_complex; psi::Psi psi_real; - EXPECT_NO_THROW(elecstate->psiToRho(psi_complex)); - EXPECT_NO_THROW(elecstate->psiToRho(psi_real)); - EXPECT_NO_THROW(elecstate->print_psi(psi_complex)); - EXPECT_NO_THROW(elecstate->print_psi(psi_real)); - EXPECT_NO_THROW(elecstate->getNewRho()); + EXPECT_NO_THROW (elecstate->psiToRho (psi_complex)); + EXPECT_NO_THROW (elecstate->psiToRho (psi_real)); + EXPECT_NO_THROW (elecstate->print_psi (psi_complex)); + EXPECT_NO_THROW (elecstate->print_psi (psi_real)); + EXPECT_NO_THROW (elecstate->getNewRho ()); } -TEST_F(ElecStateTest, InitSCF) +TEST_F (ElecStateTest, InitSCF) { Charge* charge = new Charge; elecstate->charge = charge; @@ -263,357 +253,372 @@ TEST_F(ElecStateTest, InitSCF) ModuleBase::ComplexMatrix strucfac; elecstate->eferm = efermi; ModuleSymmetry::Symmetry symm; - EXPECT_NO_THROW(elecstate->init_scf(ucell, pgrid, strucfac, nullptr, symm)); + EXPECT_NO_THROW (elecstate->init_scf (ucell, pgrid, strucfac, nullptr, symm)); // delete elecstate->pot is done in the destructor of elecstate delete charge; } -TEST_F(ElecStateTest, FixedWeights) +TEST_F (ElecStateTest, FixedWeights) { - EXPECT_EQ(PARAM.input.nbands, 6); + EXPECT_EQ (PARAM.input.nbands, 6); PARAM.input.nelec = 30; K_Vectors* klist = new K_Vectors; - klist->set_nks(5); + klist->set_nks (5); elecstate->klist = klist; - elecstate->wg.create(klist->get_nks(), PARAM.input.nbands); + elecstate->wg.create (klist->get_nks (), PARAM.input.nbands); std::vector ocp_kb; - ocp_kb.resize(PARAM.input.nbands * elecstate->klist->get_nks()); - for (int i = 0; i < ocp_kb.size(); ++i) - { - ocp_kb[i] = 1.0; - } - elecstate::fixed_weights(ocp_kb, PARAM.input.nbands, PARAM.input.nelec,klist,elecstate->wg,elecstate->skip_weights); - EXPECT_EQ(elecstate->wg(0, 0), 1.0); - EXPECT_EQ(elecstate->wg(klist->get_nks() - 1, PARAM.input.nbands - 1), 1.0); - EXPECT_TRUE(elecstate->skip_weights); + ocp_kb.resize (PARAM.input.nbands * elecstate->klist->get_nks ()); + for (int i = 0; i < ocp_kb.size (); ++i) + { + ocp_kb[i] = 1.0; + } + elecstate::fixed_weights (ocp_kb, + PARAM.input.nbands, + PARAM.input.nelec, + klist, + elecstate->wg, + elecstate->skip_weights); + EXPECT_EQ (elecstate->wg (0, 0), 1.0); + EXPECT_EQ (elecstate->wg (klist->get_nks () - 1, PARAM.input.nbands - 1), 1.0); + EXPECT_TRUE (elecstate->skip_weights); } -TEST_F(ElecStateDeathTest, FixedWeightsWarning1) +TEST_F (ElecStateDeathTest, FixedWeightsWarning1) { - EXPECT_EQ(PARAM.input.nbands, 6); + EXPECT_EQ (PARAM.input.nbands, 6); PARAM.input.nelec = 30; K_Vectors* klist = new K_Vectors; - klist->set_nks(5); + klist->set_nks (5); elecstate->klist = klist; - elecstate->wg.create(klist->get_nks(), PARAM.input.nbands); + elecstate->wg.create (klist->get_nks (), PARAM.input.nbands); std::vector ocp_kb; - ocp_kb.resize(PARAM.input.nbands * elecstate->klist->get_nks() - 1); - for (int i = 0; i < ocp_kb.size(); ++i) - { - ocp_kb[i] = 1.0; - } - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::fixed_weights(ocp_kb, PARAM.input.nbands, PARAM.input.nelec,klist,elecstate->wg,elecstate->skip_weights), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("size of occupation array is wrong , please check ocp_set")); + ocp_kb.resize (PARAM.input.nbands * elecstate->klist->get_nks () - 1); + for (int i = 0; i < ocp_kb.size (); ++i) + { + ocp_kb[i] = 1.0; + } + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::fixed_weights (ocp_kb, + PARAM.input.nbands, + PARAM.input.nelec, + klist, + elecstate->wg, + elecstate->skip_weights), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("size of occupation array is wrong , please check ocp_set")); } -TEST_F(ElecStateDeathTest, FixedWeightsWarning2) +TEST_F (ElecStateDeathTest, FixedWeightsWarning2) { - EXPECT_EQ(PARAM.input.nbands, 6); + EXPECT_EQ (PARAM.input.nbands, 6); PARAM.input.nelec = 29; K_Vectors* klist = new K_Vectors; - klist->set_nks(5); + klist->set_nks (5); elecstate->klist = klist; - elecstate->wg.create(klist->get_nks(), PARAM.input.nbands); + elecstate->wg.create (klist->get_nks (), PARAM.input.nbands); std::vector ocp_kb; - ocp_kb.resize(PARAM.input.nbands * elecstate->klist->get_nks()); - for (int i = 0; i < ocp_kb.size(); ++i) - { - ocp_kb[i] = 1.0; - } - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::fixed_weights(ocp_kb, PARAM.input.nbands, PARAM.input.nelec,klist,elecstate->wg,elecstate->skip_weights), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("total number of occupations is wrong , please check ocp_set")); + ocp_kb.resize (PARAM.input.nbands * elecstate->klist->get_nks ()); + for (int i = 0; i < ocp_kb.size (); ++i) + { + ocp_kb[i] = 1.0; + } + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::fixed_weights (ocp_kb, + PARAM.input.nbands, + PARAM.input.nelec, + klist, + elecstate->wg, + elecstate->skip_weights), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("total number of occupations is wrong , please check ocp_set")); } -TEST_F(ElecStateTest, CalEBand) +TEST_F (ElecStateTest, CalEBand) { - EXPECT_EQ(PARAM.input.nbands, 6); + EXPECT_EQ (PARAM.input.nbands, 6); int nks = 5; - elecstate->wg.create(nks, PARAM.input.nbands); - elecstate->ekb.create(nks, PARAM.input.nbands); + elecstate->wg.create (nks, PARAM.input.nbands); + elecstate->ekb.create (nks, PARAM.input.nbands); for (int ik = 0; ik < nks; ++ik) - { - for (int ib = 0; ib < PARAM.input.nbands; ++ib) { - elecstate->ekb(ik, ib) = 1.0; - elecstate->wg(ik, ib) = 2.0; + for (int ib = 0; ib < PARAM.input.nbands; ++ib) + { + elecstate->ekb (ik, ib) = 1.0; + elecstate->wg (ik, ib) = 2.0; + } } - } GlobalV::KPAR = 2; - elecstate::calEBand(elecstate->ekb, elecstate->wg, elecstate->f_en); - EXPECT_DOUBLE_EQ(elecstate->f_en.eband, 60.0); + elecstate::calEBand (elecstate->ekb, elecstate->wg, elecstate->f_en); + EXPECT_DOUBLE_EQ (elecstate->f_en.eband, 60.0); } -TEST_F(ElecStateTest, CalculateWeightsSkipWeights) +TEST_F (ElecStateTest, CalculateWeightsSkipWeights) { - EXPECT_FALSE(elecstate->skip_weights); + EXPECT_FALSE (elecstate->skip_weights); elecstate->skip_weights = true; - EXPECT_NO_THROW(elecstate::calculate_weights(elecstate->ekb, - elecstate->wg, - elecstate->klist, - elecstate->eferm, - elecstate->f_en, - elecstate->nelec_spin, - elecstate->skip_weights)); + EXPECT_NO_THROW (elecstate::calculate_weights (elecstate->ekb, + elecstate->wg, + elecstate->klist, + elecstate->eferm, + elecstate->f_en, + elecstate->nelec_spin, + elecstate->skip_weights)); } -TEST_F(ElecStateDeathTest, CalculateWeightsFixedOccupations) +TEST_F (ElecStateDeathTest, CalculateWeightsFixedOccupations) { Occupy::fixed_occupations = true; - testing::internal::CaptureStdout(); - EXPECT_EXIT(elecstate::calculate_weights(elecstate->ekb, - elecstate->wg, - elecstate->klist, - elecstate->eferm, - elecstate->f_en, - elecstate->nelec_spin, - elecstate->skip_weights), - ::testing::ExitedWithCode(1), - ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("other occupations, not implemented")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (elecstate::calculate_weights (elecstate->ekb, + elecstate->wg, + elecstate->klist, + elecstate->eferm, + elecstate->f_en, + elecstate->nelec_spin, + elecstate->skip_weights), + ::testing::ExitedWithCode (1), + ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("other occupations, not implemented")); Occupy::fixed_occupations = false; } -TEST_F(ElecStateTest, CalculateWeightsIWeights) +TEST_F (ElecStateTest, CalculateWeightsIWeights) { - EXPECT_FALSE(elecstate->skip_weights); + EXPECT_FALSE (elecstate->skip_weights); int nks = 5; K_Vectors* klist = new K_Vectors; - klist->set_nks(nks); - klist->wk.resize(nks); + klist->set_nks (nks); + klist->wk.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - klist->wk[ik] = 2.0; - } - klist->isk.resize(nks); + { + klist->wk[ik] = 2.0; + } + klist->isk.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - klist->isk[ik] = 0; - } + { + klist->isk[ik] = 0; + } elecstate->eferm.ef = 0.0; elecstate->klist = klist; - elecstate->ekb.create(nks, PARAM.input.nbands); + elecstate->ekb.create (nks, PARAM.input.nbands); for (int ik = 0; ik < nks; ++ik) - { - for (int ib = 0; ib < PARAM.input.nbands; ++ib) { - elecstate->ekb(ik, ib) = 100.0; + for (int ib = 0; ib < PARAM.input.nbands; ++ib) + { + elecstate->ekb (ik, ib) = 100.0; + } } - } - elecstate->wg.create(nks, PARAM.input.nbands); - elecstate::calculate_weights(elecstate->ekb, - elecstate->wg, - elecstate->klist, - elecstate->eferm, - elecstate->f_en, - elecstate->nelec_spin, - elecstate->skip_weights); - EXPECT_DOUBLE_EQ(elecstate->wg(0, 0), 2.0); - EXPECT_DOUBLE_EQ(elecstate->wg(nks - 1, PARAM.input.nelec / 2 - 1), 2.0); - EXPECT_DOUBLE_EQ(elecstate->wg(nks - 1, PARAM.input.nbands - 1), 0.0); - EXPECT_DOUBLE_EQ(elecstate->eferm.ef, 100.0); + elecstate->wg.create (nks, PARAM.input.nbands); + elecstate::calculate_weights (elecstate->ekb, + elecstate->wg, + elecstate->klist, + elecstate->eferm, + elecstate->f_en, + elecstate->nelec_spin, + elecstate->skip_weights); + EXPECT_DOUBLE_EQ (elecstate->wg (0, 0), 2.0); + EXPECT_DOUBLE_EQ (elecstate->wg (nks - 1, PARAM.input.nelec / 2 - 1), 2.0); + EXPECT_DOUBLE_EQ (elecstate->wg (nks - 1, PARAM.input.nbands - 1), 0.0); + EXPECT_DOUBLE_EQ (elecstate->eferm.ef, 100.0); delete klist; } -TEST_F(ElecStateTest, CalculateWeightsIWeightsTwoFermi) +TEST_F (ElecStateTest, CalculateWeightsIWeightsTwoFermi) { // get nelec_spin PARAM.sys.two_fermi = true; PARAM.input.nspin = 2; - elecstate->init_nelec_spin(); - EXPECT_EQ(elecstate->nelec_spin[0], 5.0); - EXPECT_EQ(elecstate->nelec_spin[1], 5.0); + elecstate->init_nelec_spin (); + EXPECT_EQ (elecstate->nelec_spin[0], 5.0); + EXPECT_EQ (elecstate->nelec_spin[1], 5.0); // - EXPECT_FALSE(elecstate->skip_weights); + EXPECT_FALSE (elecstate->skip_weights); int nks = 5 * PARAM.input.nspin; K_Vectors* klist = new K_Vectors; - klist->set_nks(nks); - klist->wk.resize(nks); + klist->set_nks (nks); + klist->wk.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - if (ik < 5) { - klist->wk[ik] = 1.1; - } - else - { - klist->wk[ik] = 1.0; + if (ik < 5) + { + klist->wk[ik] = 1.1; + } + else + { + klist->wk[ik] = 1.0; + } } - } - klist->isk.resize(nks); + klist->isk.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - if (ik < 5) { - klist->isk[ik] = 0; - } - else - { - klist->isk[ik] = 1; + if (ik < 5) + { + klist->isk[ik] = 0; + } + else + { + klist->isk[ik] = 1; + } } - } elecstate->eferm.ef_up = 0.0; elecstate->eferm.ef_dw = 0.0; elecstate->klist = klist; - elecstate->ekb.create(nks, PARAM.input.nbands); + elecstate->ekb.create (nks, PARAM.input.nbands); for (int ik = 0; ik < nks; ++ik) - { - for (int ib = 0; ib < PARAM.input.nbands; ++ib) { - if (ik < 5) - { - elecstate->ekb(ik, ib) = 100.0; - } - else - { - elecstate->ekb(ik, ib) = 200.0; - } + for (int ib = 0; ib < PARAM.input.nbands; ++ib) + { + if (ik < 5) + { + elecstate->ekb (ik, ib) = 100.0; + } + else + { + elecstate->ekb (ik, ib) = 200.0; + } + } } - } - elecstate->wg.create(nks, PARAM.input.nbands); - elecstate::calculate_weights(elecstate->ekb, - elecstate->wg, - elecstate->klist, - elecstate->eferm, - elecstate->f_en, - elecstate->nelec_spin, - elecstate->skip_weights); - EXPECT_DOUBLE_EQ(elecstate->wg(0, 0), 1.1); - EXPECT_DOUBLE_EQ(elecstate->wg(nks - 1, PARAM.input.nelec / 2 - 1), 1.0); - EXPECT_DOUBLE_EQ(elecstate->wg(nks - 1, PARAM.input.nbands - 1), 0.0); - EXPECT_DOUBLE_EQ(elecstate->eferm.ef_up, 100.0); - EXPECT_DOUBLE_EQ(elecstate->eferm.ef_dw, 200.0); + elecstate->wg.create (nks, PARAM.input.nbands); + elecstate::calculate_weights (elecstate->ekb, + elecstate->wg, + elecstate->klist, + elecstate->eferm, + elecstate->f_en, + elecstate->nelec_spin, + elecstate->skip_weights); + EXPECT_DOUBLE_EQ (elecstate->wg (0, 0), 1.1); + EXPECT_DOUBLE_EQ (elecstate->wg (nks - 1, PARAM.input.nelec / 2 - 1), 1.0); + EXPECT_DOUBLE_EQ (elecstate->wg (nks - 1, PARAM.input.nbands - 1), 0.0); + EXPECT_DOUBLE_EQ (elecstate->eferm.ef_up, 100.0); + EXPECT_DOUBLE_EQ (elecstate->eferm.ef_dw, 200.0); delete klist; } -TEST_F(ElecStateTest, CalculateWeightsGWeights) +TEST_F (ElecStateTest, CalculateWeightsGWeights) { Occupy::use_gaussian_broadening = true; - EXPECT_FALSE(elecstate->skip_weights); + EXPECT_FALSE (elecstate->skip_weights); int nks = 5; K_Vectors* klist = new K_Vectors; - klist->set_nks(nks); - klist->wk.resize(nks); + klist->set_nks (nks); + klist->wk.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - klist->wk[ik] = 2.0; - } - klist->isk.resize(nks); + { + klist->wk[ik] = 2.0; + } + klist->isk.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - klist->isk[ik] = 0; - } + { + klist->isk[ik] = 0; + } elecstate->eferm.ef = 0.0; elecstate->klist = klist; - elecstate->ekb.create(nks, PARAM.input.nbands); + elecstate->ekb.create (nks, PARAM.input.nbands); for (int ik = 0; ik < nks; ++ik) - { - for (int ib = 0; ib < PARAM.input.nbands; ++ib) { - elecstate->ekb(ik, ib) = 100.0; + for (int ib = 0; ib < PARAM.input.nbands; ++ib) + { + elecstate->ekb (ik, ib) = 100.0; + } } - } - elecstate->wg.create(nks, PARAM.input.nbands); - elecstate::calculate_weights(elecstate->ekb, - elecstate->wg, - elecstate->klist, - elecstate->eferm, - elecstate->f_en, - elecstate->nelec_spin, - elecstate->skip_weights); + elecstate->wg.create (nks, PARAM.input.nbands); + elecstate::calculate_weights (elecstate->ekb, + elecstate->wg, + elecstate->klist, + elecstate->eferm, + elecstate->f_en, + elecstate->nelec_spin, + elecstate->skip_weights); // PARAM.input.nelec = 10; // PARAM.input.nbands = 6; // nks = 5; // wg = 10/(5*6) = 0.33333333333 - EXPECT_NEAR(elecstate->wg(0, 0), 0.33333333333, 1e-10); - EXPECT_NEAR(elecstate->wg(nks - 1, PARAM.input.nelec / 2 - 1), 0.33333333333, 1e-10); - EXPECT_NEAR(elecstate->wg(nks - 1, PARAM.input.nbands - 1), 0.33333333333, 1e-10); - EXPECT_NEAR(elecstate->eferm.ef, 99.993159296503, 1e-10); + EXPECT_NEAR (elecstate->wg (0, 0), 0.33333333333, 1e-10); + EXPECT_NEAR (elecstate->wg (nks - 1, PARAM.input.nelec / 2 - 1), 0.33333333333, 1e-10); + EXPECT_NEAR (elecstate->wg (nks - 1, PARAM.input.nbands - 1), 0.33333333333, 1e-10); + EXPECT_NEAR (elecstate->eferm.ef, 99.993159296503, 1e-10); delete klist; Occupy::use_gaussian_broadening = false; } -TEST_F(ElecStateTest, CalculateWeightsGWeightsTwoFermi) +TEST_F (ElecStateTest, CalculateWeightsGWeightsTwoFermi) { Occupy::use_gaussian_broadening = true; // get nelec_spin PARAM.sys.two_fermi = true; PARAM.input.nspin = 2; - elecstate->init_nelec_spin(); - EXPECT_EQ(elecstate->nelec_spin[0], 5.0); - EXPECT_EQ(elecstate->nelec_spin[1], 5.0); + elecstate->init_nelec_spin (); + EXPECT_EQ (elecstate->nelec_spin[0], 5.0); + EXPECT_EQ (elecstate->nelec_spin[1], 5.0); // - EXPECT_FALSE(elecstate->skip_weights); + EXPECT_FALSE (elecstate->skip_weights); int nks = 5 * PARAM.input.nspin; K_Vectors* klist = new K_Vectors; - klist->set_nks(nks); - klist->wk.resize(nks); + klist->set_nks (nks); + klist->wk.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - if (ik < 5) - { - klist->wk[ik] = 1.1; - } - else { - klist->wk[ik] = 1.0; + if (ik < 5) + { + klist->wk[ik] = 1.1; + } + else + { + klist->wk[ik] = 1.0; + } } - } - klist->isk.resize(nks); + klist->isk.resize (nks); for (int ik = 0; ik < nks; ++ik) - { - if (ik < 5) - { - klist->isk[ik] = 0; - } - else { - klist->isk[ik] = 1; + if (ik < 5) + { + klist->isk[ik] = 0; + } + else + { + klist->isk[ik] = 1; + } } - } elecstate->eferm.ef_up = 0.0; elecstate->eferm.ef_dw = 0.0; elecstate->klist = klist; - elecstate->ekb.create(nks, PARAM.input.nbands); + elecstate->ekb.create (nks, PARAM.input.nbands); for (int ik = 0; ik < nks; ++ik) - { - for (int ib = 0; ib < PARAM.input.nbands; ++ib) { - if (ik < 5) - { - elecstate->ekb(ik, ib) = 100.0; - } - else - { - elecstate->ekb(ik, ib) = 200.0; - } + for (int ib = 0; ib < PARAM.input.nbands; ++ib) + { + if (ik < 5) + { + elecstate->ekb (ik, ib) = 100.0; + } + else + { + elecstate->ekb (ik, ib) = 200.0; + } + } } - } - elecstate->wg.create(nks, PARAM.input.nbands); - elecstate::calculate_weights(elecstate->ekb, - elecstate->wg, - elecstate->klist, - elecstate->eferm, - elecstate->f_en, - elecstate->nelec_spin, - elecstate->skip_weights); + elecstate->wg.create (nks, PARAM.input.nbands); + elecstate::calculate_weights (elecstate->ekb, + elecstate->wg, + elecstate->klist, + elecstate->eferm, + elecstate->f_en, + elecstate->nelec_spin, + elecstate->skip_weights); // PARAM.input.nelec = 10; // PARAM.input.nbands = 6; // nks = 10; // wg = 10/(10*6) = 0.16666666666 - EXPECT_NEAR(elecstate->wg(0, 0), 0.16666666666, 1e-10); - EXPECT_NEAR(elecstate->wg(nks - 1, PARAM.input.nelec / 2 - 1), 0.16666666666, 1e-10); - EXPECT_NEAR(elecstate->wg(nks - 1, PARAM.input.nbands - 1), 0.16666666666, 1e-10); - EXPECT_NEAR(elecstate->eferm.ef_up, 99.992717105890961, 1e-10); - EXPECT_NEAR(elecstate->eferm.ef_dw, 199.99315929650351, 1e-10); + EXPECT_NEAR (elecstate->wg (0, 0), 0.16666666666, 1e-10); + EXPECT_NEAR (elecstate->wg (nks - 1, PARAM.input.nelec / 2 - 1), 0.16666666666, 1e-10); + EXPECT_NEAR (elecstate->wg (nks - 1, PARAM.input.nbands - 1), 0.16666666666, 1e-10); + EXPECT_NEAR (elecstate->eferm.ef_up, 99.992717105890961, 1e-10); + EXPECT_NEAR (elecstate->eferm.ef_dw, 199.99315929650351, 1e-10); delete klist; Occupy::use_gaussian_broadening = false; } diff --git a/source/source_estate/test/elecstate_energy_test.cpp b/source/source_estate/test/elecstate_energy_test.cpp index 00b94bacc48..36ef119eb20 100644 --- a/source/source_estate/test/elecstate_energy_test.cpp +++ b/source/source_estate/test/elecstate_energy_test.cpp @@ -13,37 +13,45 @@ int XC_Functional::func_type = 1; bool XC_Functional::ked_flag = false; namespace elecstate { -void Potential::get_vnew(Charge const*, ModuleBase::matrix&) +void + Potential::get_vnew (Charge const*, ModuleBase::matrix&) { return; } -double ElecState::get_hartree_energy() +double + ElecState::get_hartree_energy () { return 0.1; } -double ElecState::get_etot_efield() +double + ElecState::get_etot_efield () { return 0.2; } -double ElecState::get_etot_gatefield() +double + ElecState::get_etot_gatefield () { return 0.3; } -double ElecState::get_solvent_model_Ael() +double + ElecState::get_solvent_model_Ael () { return 0.4; } -double ElecState::get_solvent_model_Acav() +double + ElecState::get_solvent_model_Acav () { return 0.5; } #ifdef __LCAO -double ElecState::get_dftu_energy() +double + ElecState::get_dftu_energy () { return 0.6; } #endif -double ElecState::get_local_pp_energy() +double + ElecState::get_local_pp_energy () { return 0.7; } @@ -51,7 +59,6 @@ double ElecState::get_local_pp_energy() #include "source_cell/klist.h" - /*************************************************************** * unit test of functions in elecstate_energy.cpp ****************************************************************/ @@ -65,14 +72,15 @@ namespace elecstate class MockElecState : public ElecState { public: - void Set_GlobalV_Default() + void + Set_GlobalV_Default () { PARAM.input.imp_sol = false; PARAM.input.dft_plus_u = 0; // base class PARAM.input.nspin = 1; PARAM.input.nelec = 10.0; - PARAM.input.nupdown = 0.0; + PARAM.input.nupdown = 0.0; PARAM.sys.two_fermi = false; PARAM.input.nbands = 6; PARAM.sys.nlocal = 6; @@ -84,7 +92,8 @@ class MockElecState : public ElecState PARAM.input.sc_mag_switch = true; } }; -const double* ElecState::getRho(int spin) const +const double* + ElecState::getRho (int spin) const { return &(this->eferm.ef); } // just for mock @@ -94,195 +103,197 @@ class ElecStateEnergyTest : public ::testing::Test { protected: elecstate::MockElecState* elecstate; - void SetUp() override + void + SetUp () override { elecstate = new elecstate::MockElecState; - elecstate->Set_GlobalV_Default(); + elecstate->Set_GlobalV_Default (); } - void TearDown() override + void + TearDown () override { delete elecstate; } }; -TEST_F(ElecStateEnergyTest, CalEnergiesHarris) +TEST_F (ElecStateEnergyTest, CalEnergiesHarris) { elecstate->f_en.deband_harris = 0.1; - elecstate->cal_energies(1); + elecstate->cal_energies (1); // deband_harris + hatree + efiled + gatefield + escon - EXPECT_DOUBLE_EQ(elecstate->f_en.etot_harris, 0.7); + EXPECT_DOUBLE_EQ (elecstate->f_en.etot_harris, 0.7); } -TEST_F(ElecStateEnergyTest, CalEnergiesHarrisImpSol) +TEST_F (ElecStateEnergyTest, CalEnergiesHarrisImpSol) { elecstate->f_en.deband_harris = 0.1; PARAM.input.imp_sol = true; - elecstate->cal_energies(1); + elecstate->cal_energies (1); // deband_harris + hatree + efiled + gatefield + esol_el + esol_cav + escon - EXPECT_DOUBLE_EQ(elecstate->f_en.etot_harris, 1.6); + EXPECT_DOUBLE_EQ (elecstate->f_en.etot_harris, 1.6); } -TEST_F(ElecStateEnergyTest, CalEnergiesHarrisDFTU) +TEST_F (ElecStateEnergyTest, CalEnergiesHarrisDFTU) { elecstate->f_en.deband_harris = 0.1; PARAM.input.dft_plus_u = 1; - elecstate->cal_energies(1); + elecstate->cal_energies (1); // deband_harris + hatree + efiled + gatefield + edftu + escon #ifdef __LCAO - EXPECT_DOUBLE_EQ(elecstate->f_en.etot_harris, 1.3); + EXPECT_DOUBLE_EQ (elecstate->f_en.etot_harris, 1.3); #else - EXPECT_DOUBLE_EQ(elecstate->f_en.etot_harris, 0.7); + EXPECT_DOUBLE_EQ (elecstate->f_en.etot_harris, 0.7); #endif } -TEST_F(ElecStateEnergyTest, CalEnergiesEtot) +TEST_F (ElecStateEnergyTest, CalEnergiesEtot) { elecstate->f_en.deband = 0.1; - elecstate->cal_energies(2); + elecstate->cal_energies (2); // deband + hatree + efiled + gatefield + escon - EXPECT_DOUBLE_EQ(elecstate->f_en.etot, 0.7); + EXPECT_DOUBLE_EQ (elecstate->f_en.etot, 0.7); } -TEST_F(ElecStateEnergyTest, CalEnergiesEtotImpSol) +TEST_F (ElecStateEnergyTest, CalEnergiesEtotImpSol) { elecstate->f_en.deband = 0.1; PARAM.input.imp_sol = true; - elecstate->cal_energies(2); + elecstate->cal_energies (2); // deband + hatree + efiled + gatefield + esol_el + esol_cav + escon - EXPECT_DOUBLE_EQ(elecstate->f_en.etot, 1.6); + EXPECT_DOUBLE_EQ (elecstate->f_en.etot, 1.6); } -TEST_F(ElecStateEnergyTest, CalEnergiesEtotDFTU) +TEST_F (ElecStateEnergyTest, CalEnergiesEtotDFTU) { elecstate->f_en.deband = 0.1; PARAM.input.dft_plus_u = 1; - elecstate->cal_energies(2); + elecstate->cal_energies (2); // deband + hatree + efiled + gatefield + edftu + escon #ifdef __LCAO - EXPECT_DOUBLE_EQ(elecstate->f_en.etot, 1.3); + EXPECT_DOUBLE_EQ (elecstate->f_en.etot, 1.3); #else - EXPECT_DOUBLE_EQ(elecstate->f_en.etot, 0.7); + EXPECT_DOUBLE_EQ (elecstate->f_en.etot, 0.7); #endif } -TEST_F(ElecStateEnergyTest, CalConverged) +TEST_F (ElecStateEnergyTest, CalConverged) { - elecstate->cal_converged(); - EXPECT_TRUE(elecstate->vnew_exist); - EXPECT_DOUBLE_EQ(elecstate->f_en.descf, 0.0); + elecstate->cal_converged (); + EXPECT_TRUE (elecstate->vnew_exist); + EXPECT_DOUBLE_EQ (elecstate->f_en.descf, 0.0); } -TEST_F(ElecStateEnergyTest, CalBandgapTrivial) +TEST_F (ElecStateEnergyTest, CalBandgapTrivial) { - elecstate->cal_bandgap(); - EXPECT_DOUBLE_EQ(elecstate->bandgap, 0.0); + elecstate->cal_bandgap (); + EXPECT_DOUBLE_EQ (elecstate->bandgap, 0.0); } -TEST_F(ElecStateEnergyTest, CalBandgap) +TEST_F (ElecStateEnergyTest, CalBandgap) { K_Vectors* klist = new K_Vectors; - klist->set_nks(5); + klist->set_nks (5); elecstate->klist = klist; - elecstate->ekb.create(klist->get_nks(), PARAM.input.nbands); - for (int ik = 0; ik < klist->get_nks(); ik++) - { - for (int ib = 0; ib < PARAM.input.nbands; ib++) + elecstate->ekb.create (klist->get_nks (), PARAM.input.nbands); + for (int ik = 0; ik < klist->get_nks (); ik++) { - elecstate->ekb(ik, ib) = ib; + for (int ib = 0; ib < PARAM.input.nbands; ib++) + { + elecstate->ekb (ik, ib) = ib; + } } - } elecstate->eferm.ef = 2.5; - elecstate->cal_bandgap(); - EXPECT_DOUBLE_EQ(elecstate->bandgap, 1.0); + elecstate->cal_bandgap (); + EXPECT_DOUBLE_EQ (elecstate->bandgap, 1.0); } -TEST_F(ElecStateEnergyTest, CalBandgapUpDwTrivial) +TEST_F (ElecStateEnergyTest, CalBandgapUpDwTrivial) { - elecstate->cal_bandgap_updw(); - EXPECT_DOUBLE_EQ(elecstate->bandgap_up, 0.0); - EXPECT_DOUBLE_EQ(elecstate->bandgap_dw, 0.0); + elecstate->cal_bandgap_updw (); + EXPECT_DOUBLE_EQ (elecstate->bandgap_up, 0.0); + EXPECT_DOUBLE_EQ (elecstate->bandgap_dw, 0.0); } -TEST_F(ElecStateEnergyTest, CalBandgapUpDw) +TEST_F (ElecStateEnergyTest, CalBandgapUpDw) { K_Vectors* klist = new K_Vectors; - klist->set_nks(6); - klist->isk.resize(6); - for (int ik = 0; ik < klist->get_nks(); ik++) - { - if (ik < 3) + klist->set_nks (6); + klist->isk.resize (6); + for (int ik = 0; ik < klist->get_nks (); ik++) { - klist->isk[ik] = 0; + if (ik < 3) + { + klist->isk[ik] = 0; + } + else + { + klist->isk[ik] = 1; + } } - else - { - klist->isk[ik] = 1; - } - } elecstate->klist = klist; - elecstate->ekb.create(klist->get_nks(), PARAM.input.nbands); - for (int ik = 0; ik < klist->get_nks(); ik++) - { - for (int ib = 0; ib < PARAM.input.nbands; ib++) + elecstate->ekb.create (klist->get_nks (), PARAM.input.nbands); + for (int ik = 0; ik < klist->get_nks (); ik++) { - if (ik < 3) - { - elecstate->ekb(ik, ib) = ib; - } - else - { - elecstate->ekb(ik, ib) = 0.5*ib; - } + for (int ib = 0; ib < PARAM.input.nbands; ib++) + { + if (ik < 3) + { + elecstate->ekb (ik, ib) = ib; + } + else + { + elecstate->ekb (ik, ib) = 0.5 * ib; + } + } } - } elecstate->eferm.ef_up = 0.5; elecstate->eferm.ef_dw = 2.1; - elecstate->cal_bandgap_updw(); - EXPECT_DOUBLE_EQ(elecstate->bandgap_up, 1.0); - EXPECT_DOUBLE_EQ(elecstate->bandgap_dw, 0.5); + elecstate->cal_bandgap_updw (); + EXPECT_DOUBLE_EQ (elecstate->bandgap_up, 1.0); + EXPECT_DOUBLE_EQ (elecstate->bandgap_dw, 0.5); } -TEST_F(ElecStateEnergyTest, CalBandgapBoundaryConditions) +TEST_F (ElecStateEnergyTest, CalBandgapBoundaryConditions) { K_Vectors* klist = new K_Vectors; - klist->set_nks(1); + klist->set_nks (1); elecstate->klist = klist; - elecstate->ekb.create(1, 1); + elecstate->ekb.create (1, 1); // Case 1: Only VBM found (all bands below Fermi level) - elecstate->ekb(0, 0) = -5.0; + elecstate->ekb (0, 0) = -5.0; elecstate->eferm.ef = 0.0; - elecstate->cal_bandgap(); + elecstate->cal_bandgap (); // Only VBM found, CBM is set to eferm.ef, so bandgap should be eferm.ef - vbm - EXPECT_DOUBLE_EQ(elecstate->bandgap, 5.0); + EXPECT_DOUBLE_EQ (elecstate->bandgap, 5.0); // Case 2: Only CBM found (all bands above Fermi level) - elecstate->ekb(0, 0) = 5.0; + elecstate->ekb (0, 0) = 5.0; elecstate->eferm.ef = 0.0; - elecstate->cal_bandgap(); + elecstate->cal_bandgap (); // Only CBM found, VBM is set to eferm.ef, so bandgap should be cbm - eferm.ef - EXPECT_DOUBLE_EQ(elecstate->bandgap, 5.0); + EXPECT_DOUBLE_EQ (elecstate->bandgap, 5.0); } -TEST_F(ElecStateEnergyTest, CalBandgapUpDwBoundaryConditions) +TEST_F (ElecStateEnergyTest, CalBandgapUpDwBoundaryConditions) { K_Vectors* klist = new K_Vectors; - klist->set_nks(2); - klist->isk.resize(2); + klist->set_nks (2); + klist->isk.resize (2); klist->isk[0] = 0; // spin up klist->isk[1] = 1; // spin down elecstate->klist = klist; - elecstate->ekb.create(2, 1); // 2 k-points, 1 band + elecstate->ekb.create (2, 1); // 2 k-points, 1 band // Spin UP: Only VBM (band < ef) - elecstate->ekb(0, 0) = -5.0; + elecstate->ekb (0, 0) = -5.0; elecstate->eferm.ef_up = 0.0; // Spin DW: Only CBM (band > ef) - elecstate->ekb(1, 0) = 5.0; + elecstate->ekb (1, 0) = 5.0; elecstate->eferm.ef_dw = 0.0; - elecstate->cal_bandgap_updw(); + elecstate->cal_bandgap_updw (); // up: Only VBM found, CBM is set to eferm.ef_up, so gap should be eferm.ef_up - vbm_up // dw: Only CBM found, VBM is set to eferm.ef_dw, so gap should be cbm_dw - eferm.ef_dw - EXPECT_DOUBLE_EQ(elecstate->bandgap_up, 5.0); - EXPECT_DOUBLE_EQ(elecstate->bandgap_dw, 5.0); + EXPECT_DOUBLE_EQ (elecstate->bandgap_up, 5.0); + EXPECT_DOUBLE_EQ (elecstate->bandgap_dw, 5.0); } diff --git a/source/source_estate/test/elecstate_fp_energy_test.cpp b/source/source_estate/test/elecstate_fp_energy_test.cpp index 66ef941f37f..a9f3beca38c 100644 --- a/source/source_estate/test/elecstate_fp_energy_test.cpp +++ b/source/source_estate/test/elecstate_fp_energy_test.cpp @@ -24,72 +24,72 @@ class fenergy : public ::testing::Test elecstate::Efermi eferm; }; -TEST_F(fenergy, calculate_etot) +TEST_F (fenergy, calculate_etot) { f_en.eband = 1.0; f_en.deband = 2.0; - f_en.calculate_etot(); - EXPECT_EQ(f_en.etot, 3.0); + f_en.calculate_etot (); + EXPECT_EQ (f_en.etot, 3.0); } -TEST_F(fenergy, calculate_harris) +TEST_F (fenergy, calculate_harris) { f_en.eband = 1.0; f_en.deband_harris = 2.0; - f_en.calculate_harris(); - EXPECT_EQ(f_en.etot_harris, 3.0); + f_en.calculate_harris (); + EXPECT_EQ (f_en.etot_harris, 3.0); } -TEST_F(fenergy, clear_all) +TEST_F (fenergy, clear_all) { f_en.eband = 1.0; f_en.etot = 2.0; - f_en.clear_all(); - EXPECT_EQ(f_en.eband, 0.0); - EXPECT_EQ(f_en.etot, 0.0); + f_en.clear_all (); + EXPECT_EQ (f_en.eband, 0.0); + EXPECT_EQ (f_en.etot, 0.0); } -TEST_F(fenergy, print_all) +TEST_F (fenergy, print_all) { - testing::internal::CaptureStdout(); - f_en.print_all(); - std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("total=")); - EXPECT_THAT(output, testing::HasSubstr("entropy(-TS)=")); + testing::internal::CaptureStdout (); + f_en.print_all (); + std::string output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("total=")); + EXPECT_THAT (output, testing::HasSubstr ("entropy(-TS)=")); } -TEST_F(fenergy, eferm_get_ef) +TEST_F (fenergy, eferm_get_ef) { eferm.two_efermi = false; - eferm.set_efval(0, 0.7); - EXPECT_EQ(eferm.ef, 0.7); - eferm.set_efval(2, 0.77); - EXPECT_EQ(eferm.ef, 0.77); + eferm.set_efval (0, 0.7); + EXPECT_EQ (eferm.ef, 0.7); + eferm.set_efval (2, 0.77); + EXPECT_EQ (eferm.ef, 0.77); eferm.two_efermi = true; - eferm.set_efval(0, 0.6); - EXPECT_EQ(eferm.ef_up, 0.6); - eferm.set_efval(1, -1.0); - EXPECT_EQ(eferm.ef_dw, -1.0); + eferm.set_efval (0, 0.6); + EXPECT_EQ (eferm.ef_up, 0.6); + eferm.set_efval (1, -1.0); + EXPECT_EQ (eferm.ef_dw, -1.0); - testing::internal::CaptureStdout(); - EXPECT_EXIT(eferm.set_efval(3, 1.0);, ::testing::ExitedWithCode(1), ""); - std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Please check NSPIN when TWO_EFERMI is true")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (eferm.set_efval (3, 1.0);, ::testing::ExitedWithCode (1), ""); + std::string output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Please check NSPIN when TWO_EFERMI is true")); } -TEST_F(fenergy, eferm_get_efval) +TEST_F (fenergy, eferm_get_efval) { eferm.ef = 0.0; eferm.ef_up = 1.0; eferm.ef_dw = -1.0; eferm.two_efermi = false; - EXPECT_EQ(eferm.get_efval(0), 0.0); + EXPECT_EQ (eferm.get_efval (0), 0.0); eferm.two_efermi = true; - EXPECT_EQ(eferm.get_efval(0), 1.0); - EXPECT_EQ(eferm.get_efval(1), -1.0); + EXPECT_EQ (eferm.get_efval (0), 1.0); + EXPECT_EQ (eferm.get_efval (1), -1.0); - testing::internal::CaptureStdout(); - EXPECT_EXIT(double tmpp = eferm.get_efval(2);, ::testing::ExitedWithCode(1), ""); - std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Please check NSPIN when TWO_EFERMI is true")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (double tmpp = eferm.get_efval (2);, ::testing::ExitedWithCode (1), ""); + std::string output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, testing::HasSubstr ("Please check NSPIN when TWO_EFERMI is true")); } diff --git a/source/source_estate/test/elecstate_magnetism_test.cpp b/source/source_estate/test/elecstate_magnetism_test.cpp index 2d92f80ea02..0b02f945ced 100644 --- a/source/source_estate/test/elecstate_magnetism_test.cpp +++ b/source/source_estate/test/elecstate_magnetism_test.cpp @@ -21,131 +21,129 @@ * - Magnetism::judge_parallel() * - Magnetism::compute_mag() * - compute mag for spin-polarized system when PARAM.input.nspin = 2 - * - and non-collinear case with PARAM.input.nspin = 4 -*/ + * - and non-collinear case with PARAM.input.nspin = 4 + */ #define private public #include "source_estate/magnetism.h" #undef private -Charge::Charge() -{ -} -Charge::~Charge() -{ -} - +Charge::Charge () {} +Charge::~Charge () {} class MagnetismTest : public ::testing::Test { protected: Magnetism* magnetism; - virtual void SetUp() + virtual void + SetUp () { magnetism = new Magnetism; } - virtual void TearDown() + virtual void + TearDown () { delete magnetism; } }; -TEST_F(MagnetismTest, Magnetism) +TEST_F (MagnetismTest, Magnetism) { - EXPECT_EQ(0.0, magnetism->tot_mag); - EXPECT_EQ(0.0, magnetism->abs_mag); - EXPECT_EQ(nullptr, magnetism->start_mag); + EXPECT_EQ (0.0, magnetism->tot_mag); + EXPECT_EQ (0.0, magnetism->abs_mag); + EXPECT_EQ (nullptr, magnetism->start_mag); } -TEST_F(MagnetismTest, JudgeParallel) +TEST_F (MagnetismTest, JudgeParallel) { double a[3] = {1.0, 0.0, 0.0}; - ModuleBase::Vector3 b(1.0, 0.0, 0.0); - EXPECT_TRUE(magnetism->judge_parallel(a, b)); - b = ModuleBase::Vector3(0.0, 1.0, 0.0); - EXPECT_FALSE(magnetism->judge_parallel(a, b)); + ModuleBase::Vector3 b (1.0, 0.0, 0.0); + EXPECT_TRUE (magnetism->judge_parallel (a, b)); + b = ModuleBase::Vector3 (0.0, 1.0, 0.0); + EXPECT_FALSE (magnetism->judge_parallel (a, b)); } -TEST_F(MagnetismTest, ComputeMagnetizationS2) +TEST_F (MagnetismTest, ComputeMagnetizationS2) { - PARAM.input.nspin = 2; - PARAM.sys.two_fermi = false; - PARAM.input.nelec = 10.0; - - Charge* chr = new Charge; - chr->nrxx = 100; - chr->nxyz = 1000; - chr->rho = new double*[PARAM.input.nspin]; - for (int i=0; i< PARAM.input.nspin; i++) - { - chr->rho[i] = new double[chr->nrxx]; - } - for (int ir=0; ir< chr->nrxx; ir++) - { - chr->rho[0][ir] = 1.00; - chr->rho[1][ir] = 1.01; - } - double* nelec_spin = new double[2]; - magnetism->compute_mag(500.0,chr->nrxx, chr->nxyz, chr->rho, nelec_spin); - EXPECT_DOUBLE_EQ(-0.5, magnetism->tot_mag); - EXPECT_DOUBLE_EQ(0.5, magnetism->abs_mag); - EXPECT_DOUBLE_EQ(4.75, nelec_spin[0]); - EXPECT_DOUBLE_EQ(5.25, nelec_spin[1]); - delete[] nelec_spin; - for (int i=0; i< PARAM.input.nspin; i++) - { - delete[] chr->rho[i]; - } - delete[] chr->rho; - delete chr; + PARAM.input.nspin = 2; + PARAM.sys.two_fermi = false; + PARAM.input.nelec = 10.0; + + Charge* chr = new Charge; + chr->nrxx = 100; + chr->nxyz = 1000; + chr->rho = new double*[PARAM.input.nspin]; + for (int i = 0; i < PARAM.input.nspin; i++) + { + chr->rho[i] = new double[chr->nrxx]; + } + for (int ir = 0; ir < chr->nrxx; ir++) + { + chr->rho[0][ir] = 1.00; + chr->rho[1][ir] = 1.01; + } + double* nelec_spin = new double[2]; + magnetism->compute_mag (500.0, chr->nrxx, chr->nxyz, chr->rho, nelec_spin); + EXPECT_DOUBLE_EQ (-0.5, magnetism->tot_mag); + EXPECT_DOUBLE_EQ (0.5, magnetism->abs_mag); + EXPECT_DOUBLE_EQ (4.75, nelec_spin[0]); + EXPECT_DOUBLE_EQ (5.25, nelec_spin[1]); + delete[] nelec_spin; + for (int i = 0; i < PARAM.input.nspin; i++) + { + delete[] chr->rho[i]; + } + delete[] chr->rho; + delete chr; } -TEST_F(MagnetismTest, ComputeMagnetizationS4) +TEST_F (MagnetismTest, ComputeMagnetizationS4) { - PARAM.input.nspin = 4; - - Charge* chr = new Charge; - chr->rho = new double*[PARAM.input.nspin]; - chr->nrxx = 100; - chr->nxyz = 1000; - for (int i=0; i< PARAM.input.nspin; i++) - { - chr->rho[i] = new double[chr->nrxx]; - } - for (int ir=0; ir< chr->nrxx; ir++) - { - chr->rho[0][ir] = 1.00; - chr->rho[1][ir] = std::sqrt(2.0); - chr->rho[2][ir] = 1.00; - chr->rho[3][ir] = 1.00; - } - double* nelec_spin = new double[4]; - magnetism->compute_mag(500.0,chr->nrxx, chr->nxyz, chr->rho, nelec_spin); - EXPECT_DOUBLE_EQ(100.0, magnetism->abs_mag); - EXPECT_DOUBLE_EQ(50.0*std::sqrt(2.0), magnetism->tot_mag_nc[0]); - EXPECT_DOUBLE_EQ(50.0, magnetism->tot_mag_nc[1]); - EXPECT_DOUBLE_EQ(50.0, magnetism->tot_mag_nc[2]); - delete[] nelec_spin; - for (int i=0; i< PARAM.input.nspin; i++) - { - delete[] chr->rho[i]; - } - delete[] chr->rho; - delete chr; + PARAM.input.nspin = 4; + + Charge* chr = new Charge; + chr->rho = new double*[PARAM.input.nspin]; + chr->nrxx = 100; + chr->nxyz = 1000; + for (int i = 0; i < PARAM.input.nspin; i++) + { + chr->rho[i] = new double[chr->nrxx]; + } + for (int ir = 0; ir < chr->nrxx; ir++) + { + chr->rho[0][ir] = 1.00; + chr->rho[1][ir] = std::sqrt (2.0); + chr->rho[2][ir] = 1.00; + chr->rho[3][ir] = 1.00; + } + double* nelec_spin = new double[4]; + magnetism->compute_mag (500.0, chr->nrxx, chr->nxyz, chr->rho, nelec_spin); + EXPECT_DOUBLE_EQ (100.0, magnetism->abs_mag); + EXPECT_DOUBLE_EQ (50.0 * std::sqrt (2.0), magnetism->tot_mag_nc[0]); + EXPECT_DOUBLE_EQ (50.0, magnetism->tot_mag_nc[1]); + EXPECT_DOUBLE_EQ (50.0, magnetism->tot_mag_nc[2]); + delete[] nelec_spin; + for (int i = 0; i < PARAM.input.nspin; i++) + { + delete[] chr->rho[i]; + } + delete[] chr->rho; + delete chr; } #ifdef __MPI #include -int main(int argc, char **argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD,&GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD,&GlobalV::MY_RANK); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); - testing::InitGoogleTest(&argc, argv); - int result = RUN_ALL_TESTS(); + testing::InitGoogleTest (&argc, argv); + int result = RUN_ALL_TESTS (); - MPI_Finalize(); + MPI_Finalize (); return result; } diff --git a/source/source_estate/test/elecstate_occupy_test.cpp b/source/source_estate/test/elecstate_occupy_test.cpp index f4c17ce24d4..39b47d25f5e 100644 --- a/source/source_estate/test/elecstate_occupy_test.cpp +++ b/source/source_estate/test/elecstate_occupy_test.cpp @@ -19,281 +19,283 @@ #undef private class OccupyTest : public ::testing::Test { -protected: - Occupy occupy; - std::string output; + protected: + Occupy occupy; + std::string output; }; -TEST_F(OccupyTest, Occupy) +TEST_F (OccupyTest, Occupy) { - EXPECT_EQ(occupy.use_gaussian_broadening, false); - EXPECT_FALSE(occupy.gauss()); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_FALSE(occupy.fix()); - EXPECT_EQ(occupy.gaussian_type, 0); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.01); + EXPECT_EQ (occupy.use_gaussian_broadening, false); + EXPECT_FALSE (occupy.gauss ()); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_FALSE (occupy.fix ()); + EXPECT_EQ (occupy.gaussian_type, 0); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.01); } -TEST_F(OccupyTest, DecisionFixed) +TEST_F (OccupyTest, DecisionFixed) { - EXPECT_NO_THROW(occupy.decision("fixed", "fixed", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, false); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, 0); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.0); + EXPECT_NO_THROW (occupy.decision ("fixed", "fixed", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, false); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, 0); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.0); } -TEST_F(OccupyTest, DecisionSmearingFixed) +TEST_F (OccupyTest, DecisionSmearingFixed) { - EXPECT_NO_THROW(occupy.decision("smearing", "fixed", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, false); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, 0); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.0); + EXPECT_NO_THROW (occupy.decision ("smearing", "fixed", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, false); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, 0); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.0); } -TEST_F(OccupyTest, DecisionSmearingGaussian) +TEST_F (OccupyTest, DecisionSmearingGaussian) { - EXPECT_NO_THROW(occupy.decision("smearing", "gaussian", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, 0); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "gaussian", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, 0); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionSmearingGauss) +TEST_F (OccupyTest, DecisionSmearingGauss) { - EXPECT_NO_THROW(occupy.decision("smearing", "gauss", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, 0); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "gauss", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, 0); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionSmearingGaussianWarning) +TEST_F (OccupyTest, DecisionSmearingGaussianWarning) { - testing::internal::CaptureStdout(); - EXPECT_EXIT(occupy.decision("smearing", "gaussian", 0.0), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - // test output on screen - EXPECT_THAT( - output, - testing::HasSubstr("Smearing requires gaussian broadening,but gaussian_parameter = 0(default value = 0.01)")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (occupy.decision ("smearing", "gaussian", 0.0), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + // test output on screen + EXPECT_THAT ( + output, + testing::HasSubstr ("Smearing requires gaussian broadening,but gaussian_parameter = 0(default value = 0.01)")); } -TEST_F(OccupyTest, DecisionSmearingMethfesselPaxton) +TEST_F (OccupyTest, DecisionSmearingMethfesselPaxton) { - EXPECT_NO_THROW(occupy.decision("smearing", "methfessel-paxton", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, 1); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "methfessel-paxton", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, 1); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionSmearingMP) +TEST_F (OccupyTest, DecisionSmearingMP) { - EXPECT_NO_THROW(occupy.decision("smearing", "mp", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, 1); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "mp", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, 1); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecissionMP2) +TEST_F (OccupyTest, DecissionMP2) { - EXPECT_NO_THROW(occupy.decision("smearing", "mp2", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, 2); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "mp2", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, 2); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionMarzariVanderbilt) +TEST_F (OccupyTest, DecisionMarzariVanderbilt) { - EXPECT_NO_THROW(occupy.decision("smearing", "marzari-vanderbilt", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, -1); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "marzari-vanderbilt", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, -1); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionCold) +TEST_F (OccupyTest, DecisionCold) { - EXPECT_NO_THROW(occupy.decision("smearing", "cold", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, -1); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "cold", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, -1); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionMV) +TEST_F (OccupyTest, DecisionMV) { - EXPECT_NO_THROW(occupy.decision("smearing", "mv", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, -1); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "mv", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, -1); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionFermiDirac) +TEST_F (OccupyTest, DecisionFermiDirac) { - EXPECT_NO_THROW(occupy.decision("smearing", "fermi-dirac", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, -99); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "fermi-dirac", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, -99); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionFD) +TEST_F (OccupyTest, DecisionFD) { - EXPECT_NO_THROW(occupy.decision("smearing", "fd", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, true); - EXPECT_EQ(occupy.fixed_occupations, false); - EXPECT_EQ(occupy.gaussian_type, -99); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("smearing", "fd", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, true); + EXPECT_EQ (occupy.fixed_occupations, false); + EXPECT_EQ (occupy.gaussian_type, -99); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionTetrahedraWarning) +TEST_F (OccupyTest, DecisionTetrahedraWarning) { - testing::internal::CaptureStdout(); - EXPECT_EXIT(occupy.decision("tetrahedra", "gaussian", 0.0), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - // test output on screen - EXPECT_THAT(output, testing::HasSubstr("not implemented yet!")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (occupy.decision ("tetrahedra", "gaussian", 0.0), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + // test output on screen + EXPECT_THAT (output, testing::HasSubstr ("not implemented yet!")); } -TEST_F(OccupyTest, DecisionFromInput) +TEST_F (OccupyTest, DecisionFromInput) { - EXPECT_NO_THROW(occupy.decision("from_input", "fd", 0.1)); - EXPECT_EQ(occupy.use_gaussian_broadening, false); - EXPECT_EQ(occupy.fixed_occupations, true); - EXPECT_EQ(occupy.gaussian_type, 0); - EXPECT_DOUBLE_EQ(occupy.gaussian_parameter, 0.1); + EXPECT_NO_THROW (occupy.decision ("from_input", "fd", 0.1)); + EXPECT_EQ (occupy.use_gaussian_broadening, false); + EXPECT_EQ (occupy.fixed_occupations, true); + EXPECT_EQ (occupy.gaussian_type, 0); + EXPECT_DOUBLE_EQ (occupy.gaussian_parameter, 0.1); } -TEST_F(OccupyTest, DecisionArbitrary) +TEST_F (OccupyTest, DecisionArbitrary) { - testing::internal::CaptureStdout(); - EXPECT_EXIT(occupy.decision("arbitrary", "gaussian", 0.0), ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - // test output on screen - EXPECT_THAT(output, testing::HasSubstr("occupations, not implemented")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (occupy.decision ("arbitrary", "gaussian", 0.0), ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + // test output on screen + EXPECT_THAT (output, testing::HasSubstr ("occupations, not implemented")); } -TEST_F(OccupyTest, IweightsNOSPIN) +TEST_F (OccupyTest, IweightsNOSPIN) { - PARAM.input.nspin = 1; - double ef = 0.0; - ModuleBase::matrix wg(1, 1); - std::vector wk(1, 2.0); - ModuleBase::matrix ekb(1, 1); - std::vector isk(1); - ekb(0, 0) = 0.1; - occupy.iweights(1, wk, 1, 2.0, ekb, ef, wg, 0, isk); - EXPECT_DOUBLE_EQ(wg(0, 0), 2.0); - EXPECT_DOUBLE_EQ(ef, 0.1); + PARAM.input.nspin = 1; + double ef = 0.0; + ModuleBase::matrix wg (1, 1); + std::vector wk (1, 2.0); + ModuleBase::matrix ekb (1, 1); + std::vector isk (1); + ekb (0, 0) = 0.1; + occupy.iweights (1, wk, 1, 2.0, ekb, ef, wg, 0, isk); + EXPECT_DOUBLE_EQ (wg (0, 0), 2.0); + EXPECT_DOUBLE_EQ (ef, 0.1); } -TEST_F(OccupyTest, IweightsSPIN) +TEST_F (OccupyTest, IweightsSPIN) { - PARAM.input.nspin = 2; - double ef_up = 0.0; - double ef_dw = 0.0; - ModuleBase::matrix wg(2, 1); - std::vector wk(2, 1.0); - ModuleBase::matrix ekb(2, 1); - std::vector isk(2); - isk[0] = 0; - isk[1] = 1; - ekb(0, 0) = 0.1; - ekb(1, 0) = 0.2; - occupy.iweights(2, wk, 1, 1.0, ekb, ef_up, wg, 0, isk); - occupy.iweights(2, wk, 1, 1.0, ekb, ef_dw, wg, 1, isk); - EXPECT_DOUBLE_EQ(wg(0, 0), 1.0); - EXPECT_DOUBLE_EQ(wg(1, 0), 1.0); - EXPECT_DOUBLE_EQ(ef_up, 0.1); - EXPECT_DOUBLE_EQ(ef_dw, 0.2); + PARAM.input.nspin = 2; + double ef_up = 0.0; + double ef_dw = 0.0; + ModuleBase::matrix wg (2, 1); + std::vector wk (2, 1.0); + ModuleBase::matrix ekb (2, 1); + std::vector isk (2); + isk[0] = 0; + isk[1] = 1; + ekb (0, 0) = 0.1; + ekb (1, 0) = 0.2; + occupy.iweights (2, wk, 1, 1.0, ekb, ef_up, wg, 0, isk); + occupy.iweights (2, wk, 1, 1.0, ekb, ef_dw, wg, 1, isk); + EXPECT_DOUBLE_EQ (wg (0, 0), 1.0); + EXPECT_DOUBLE_EQ (wg (1, 0), 1.0); + EXPECT_DOUBLE_EQ (ef_up, 0.1); + EXPECT_DOUBLE_EQ (ef_dw, 0.2); } -TEST_F(OccupyTest, IweightsWarning) +TEST_F (OccupyTest, IweightsWarning) { - PARAM.input.nspin = 1; - double ef = 0.0; - ModuleBase::matrix wg(1, 1); - std::vector wk(1, 2.0); - ModuleBase::matrix ekb(1, 1); - std::vector isk(1); - ekb(0, 0) = 0.1; + PARAM.input.nspin = 1; + double ef = 0.0; + ModuleBase::matrix wg (1, 1); + std::vector wk (1, 2.0); + ModuleBase::matrix ekb (1, 1); + std::vector isk (1); + ekb (0, 0) = 0.1; - testing::internal::CaptureStdout(); - EXPECT_EXIT(occupy.iweights(1, wk, 1, 1.0, ekb, ef, wg, -1, isk);, ::testing::ExitedWithCode(1), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("It is not a semiconductor or insulator. Please do not set 'smearing_method=fixed', and try other options.")); + testing::internal::CaptureStdout (); + EXPECT_EXIT (occupy.iweights (1, wk, 1, 1.0, ekb, ef, wg, -1, isk);, ::testing::ExitedWithCode (1), ""); + output = testing::internal::GetCapturedStdout (); + EXPECT_THAT (output, + testing::HasSubstr ("It is not a semiconductor or insulator. Please do not set " + "'smearing_method=fixed', and try other options.")); } -TEST_F(OccupyTest, Wgauss) +TEST_F (OccupyTest, Wgauss) { - EXPECT_DOUBLE_EQ(occupy.wgauss(0.0, 0), 0.5); - EXPECT_DOUBLE_EQ(occupy.wgauss(0.0, -1), 0.4006259784506005); - EXPECT_DOUBLE_EQ(occupy.wgauss(0.0, -99), 0.5); - EXPECT_DOUBLE_EQ(occupy.wgauss(0.0, 1), 0.5); - EXPECT_DOUBLE_EQ(occupy.wgauss(0.0, 2), 0.5); - EXPECT_DOUBLE_EQ(occupy.wgauss(10, 0), 1.0); + EXPECT_DOUBLE_EQ (occupy.wgauss (0.0, 0), 0.5); + EXPECT_DOUBLE_EQ (occupy.wgauss (0.0, -1), 0.4006259784506005); + EXPECT_DOUBLE_EQ (occupy.wgauss (0.0, -99), 0.5); + EXPECT_DOUBLE_EQ (occupy.wgauss (0.0, 1), 0.5); + EXPECT_DOUBLE_EQ (occupy.wgauss (0.0, 2), 0.5); + EXPECT_DOUBLE_EQ (occupy.wgauss (10, 0), 1.0); } -TEST_F(OccupyTest, W1gauss) +TEST_F (OccupyTest, W1gauss) { - EXPECT_DOUBLE_EQ(occupy.w1gauss(0.0, 0), -0.28209479177387814); - EXPECT_DOUBLE_EQ(occupy.w1gauss(0.0, -1), -0.1710991401561083); - EXPECT_DOUBLE_EQ(occupy.w1gauss(0.0, -99), -0.69314718055994529); - EXPECT_DOUBLE_EQ(occupy.w1gauss(0.0, 1), -0.14104739588693907); - EXPECT_DOUBLE_EQ(occupy.w1gauss(0.0, 2), -0.10578554691520431); + EXPECT_DOUBLE_EQ (occupy.w1gauss (0.0, 0), -0.28209479177387814); + EXPECT_DOUBLE_EQ (occupy.w1gauss (0.0, -1), -0.1710991401561083); + EXPECT_DOUBLE_EQ (occupy.w1gauss (0.0, -99), -0.69314718055994529); + EXPECT_DOUBLE_EQ (occupy.w1gauss (0.0, 1), -0.14104739588693907); + EXPECT_DOUBLE_EQ (occupy.w1gauss (0.0, 2), -0.10578554691520431); } -TEST_F(OccupyTest, Sumkg) +TEST_F (OccupyTest, Sumkg) { - ModuleBase::matrix ekb(1, 1); - ekb(0, 0) = -1.0; - std::vector wk = {1, 1.0}; - double smearing_sigma = 0.1; - int ngauss = 0; - double e = 0.0; - int is = 0; - std::vector isk = {0, 0}; - EXPECT_DOUBLE_EQ(occupy.sumkg(ekb, 1, 1, wk, smearing_sigma, ngauss, e, is, isk), 1.0); + ModuleBase::matrix ekb (1, 1); + ekb (0, 0) = -1.0; + std::vector wk = {1, 1.0}; + double smearing_sigma = 0.1; + int ngauss = 0; + double e = 0.0; + int is = 0; + std::vector isk = {0, 0}; + EXPECT_DOUBLE_EQ (occupy.sumkg (ekb, 1, 1, wk, smearing_sigma, ngauss, e, is, isk), 1.0); } -TEST_F(OccupyTest, Efermig) +TEST_F (OccupyTest, Efermig) { - ModuleBase::matrix ekb(1, 1); - ekb(0, 0) = -1.0; - std::vector wk = {1, 1.0}; - double smearing_sigma = 0.1; - int ngauss = 0; - double e = 0.0; - int is = 0; - std::vector isk = {0, 0}; - double ef = 0.0; - occupy.efermig(ekb, 1, 1, 1.0, wk, smearing_sigma, ngauss, ef, is, isk); - EXPECT_NEAR(ef, -0.5, 1e-13); + ModuleBase::matrix ekb (1, 1); + ekb (0, 0) = -1.0; + std::vector wk = {1, 1.0}; + double smearing_sigma = 0.1; + int ngauss = 0; + double e = 0.0; + int is = 0; + std::vector isk = {0, 0}; + double ef = 0.0; + occupy.efermig (ekb, 1, 1, 1.0, wk, smearing_sigma, ngauss, ef, is, isk); + EXPECT_NEAR (ef, -0.5, 1e-13); } -TEST_F(OccupyTest, Gweights) +TEST_F (OccupyTest, Gweights) { - ModuleBase::matrix ekb(1, 1); - ekb(0, 0) = -1.0; - std::vector wk = {1, 1.0}; - double smearing_sigma = 0.1; - int ngauss = 0; - double e = 0.0; - int is = 0; - std::vector isk = {0, 0}; - double ef = 0.0; - ModuleBase::matrix wg(1, 1); - wg(0, 0) = 1.0; - double demet = 0.0; - occupy.gweights(1, wk, 1, 1.0, smearing_sigma, ngauss, ekb, ef, demet, wg, is, isk); - EXPECT_NEAR(ef, -0.5, 1e-13); - EXPECT_NEAR(demet, 0.0, 1e-13); - EXPECT_NEAR(wg(0, 0), 1.0, 1e-13); + ModuleBase::matrix ekb (1, 1); + ekb (0, 0) = -1.0; + std::vector wk = {1, 1.0}; + double smearing_sigma = 0.1; + int ngauss = 0; + double e = 0.0; + int is = 0; + std::vector isk = {0, 0}; + double ef = 0.0; + ModuleBase::matrix wg (1, 1); + wg (0, 0) = 1.0; + double demet = 0.0; + occupy.gweights (1, wk, 1, 1.0, smearing_sigma, ngauss, ekb, ef, demet, wg, is, isk); + EXPECT_NEAR (ef, -0.5, 1e-13); + EXPECT_NEAR (demet, 0.0, 1e-13); + EXPECT_NEAR (wg (0, 0), 1.0, 1e-13); } diff --git a/source/source_estate/test/elecstate_print_test.cpp b/source/source_estate/test/elecstate_print_test.cpp index 53dcbb0c589..fc780bc8585 100644 --- a/source/source_estate/test/elecstate_print_test.cpp +++ b/source/source_estate/test/elecstate_print_test.cpp @@ -17,7 +17,8 @@ ****************************************************************/ namespace elecstate { -const double* ElecState::getRho(int spin) const +const double* + ElecState::getRho (int spin) const { return &(this->eferm.ef); } // just for mock @@ -25,22 +26,18 @@ double Efield::etotefield = 1.1; double elecstate::Gatefield::etotgatefield = 2.2; } // namespace elecstate -UnitCell::UnitCell(){} -UnitCell::~UnitCell(){} -Magnetism::Magnetism(){} -Magnetism::~Magnetism(){} -InfoNonlocal::InfoNonlocal(){} -InfoNonlocal::~InfoNonlocal(){} -Charge::Charge() -{ -} -Charge::~Charge() -{ -} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +Charge::Charge () {} +Charge::~Charge () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} int XC_Functional::func_type = 0; bool XC_Functional::ked_flag = false; @@ -63,33 +60,34 @@ class ElecStatePrintTest : public ::testing::Test std::ifstream ifs; std::ofstream ofs; K_Vectors* p_klist; - void SetUp() + void + SetUp () { p_klist = new K_Vectors; - p_klist->set_nks(2); - p_klist->set_nkstot(2); + p_klist->set_nks (2); + p_klist->set_nkstot (2); p_klist->isk = {0, 1}; p_klist->ngk = {100, 101}; - p_klist->kvec_c.resize(2); - p_klist->kvec_c[0].set(0.1, 0.11, 0.111); - p_klist->kvec_c[1].set(0.2, 0.22, 0.222); - p_klist->ik2iktot.resize(2); + p_klist->kvec_c.resize (2); + p_klist->kvec_c[0].set (0.1, 0.11, 0.111); + p_klist->kvec_c[1].set (0.2, 0.22, 0.222); + p_klist->ik2iktot.resize (2); p_klist->ik2iktot[0] = 0; p_klist->ik2iktot[1] = 1; // initialize klist of elecstate elecstate.klist = p_klist; // initialize ekb of elecstate - elecstate.ekb.create(2, 2); - elecstate.ekb(0, 0) = 1.0; - elecstate.ekb(0, 1) = 2.0; - elecstate.ekb(1, 0) = 3.0; - elecstate.ekb(1, 1) = 4.0; + elecstate.ekb.create (2, 2); + elecstate.ekb (0, 0) = 1.0; + elecstate.ekb (0, 1) = 2.0; + elecstate.ekb (1, 0) = 3.0; + elecstate.ekb (1, 1) = 4.0; // initialize wg of elecstate - elecstate.wg.create(2, 2); - elecstate.wg(0, 0) = 0.1; - elecstate.wg(0, 1) = 0.2; - elecstate.wg(1, 0) = 0.3; - elecstate.wg(1, 1) = 0.4; + elecstate.wg.create (2, 2); + elecstate.wg (0, 0) = 0.1; + elecstate.wg (0, 1) = 0.2; + elecstate.wg (1, 0) = 0.3; + elecstate.wg (1, 1) = 0.4; ucell.magnet.tot_mag = 1.1; ucell.magnet.abs_mag = 2.2; ucell.magnet.tot_mag_nc[0] = 3.3; @@ -98,27 +96,28 @@ class ElecStatePrintTest : public ::testing::Test PARAM.input.ks_solver = "dav"; PARAM.sys.log_file = "test.dat"; } - void TearDown() + void + TearDown () { delete p_klist; } }; -TEST_F(ElecStatePrintTest, PrintFormat) +TEST_F (ElecStatePrintTest, PrintFormat) { - GlobalV::ofs_running.open("test.dat", std::ios::out); - elecstate::print_format("test", 0.1); - GlobalV::ofs_running.close(); - ifs.open("test.dat", std::ios::in); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("test +0.1 +1.36057")); - ifs.close(); - std::remove("test.dat"); + GlobalV::ofs_running.open ("test.dat", std::ios::out); + elecstate::print_format ("test", 0.1); + GlobalV::ofs_running.close (); + ifs.open ("test.dat", std::ios::in); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("test +0.1 +1.36057")); + ifs.close (); + std::remove ("test.dat"); } -TEST_F(ElecStatePrintTest, PrintEtot) +TEST_F (ElecStatePrintTest, PrintEtot) { - GlobalV::ofs_running.open("test.dat", std::ios::out); + GlobalV::ofs_running.open ("test.dat", std::ios::out); bool converged = false; int iter = 1; double scf_thr = 0.1; @@ -141,67 +140,83 @@ TEST_F(ElecStatePrintTest, PrintEtot) // iteration of different vdw_method std::vector vdw_methods = {"d2", "d3_0", "d3_bj"}; - for (int i = 0; i < vdw_methods.size(); i++) - { - PARAM.input.vdw_method = vdw_methods[i]; - elecstate::print_etot(ucell.magnet,elecstate, converged, iter, scf_thr, - scf_thr_kin, duration, pw_diag_thr, avg_iter, false); - } + for (int i = 0; i < vdw_methods.size (); i++) + { + PARAM.input.vdw_method = vdw_methods[i]; + elecstate::print_etot (ucell.magnet, + elecstate, + converged, + iter, + scf_thr, + scf_thr_kin, + duration, + pw_diag_thr, + avg_iter, + false); + } // iteration of different ks_solver std::vector ks_solvers = {"cg", "lapack", "genelpa", "dav", "scalapack_gvx", "cusolver"}; - for (int i = 0; i < ks_solvers.size(); i++) - { - PARAM.input.ks_solver = ks_solvers[i]; - testing::internal::CaptureStdout(); + for (int i = 0; i < ks_solvers.size (); i++) + { + PARAM.input.ks_solver = ks_solvers[i]; + testing::internal::CaptureStdout (); - elecstate::print_etot(ucell.magnet,elecstate,converged, iter, scf_thr, - scf_thr_kin, duration, pw_diag_thr, avg_iter, print); + elecstate::print_etot (ucell.magnet, + elecstate, + converged, + iter, + scf_thr, + scf_thr_kin, + duration, + pw_diag_thr, + avg_iter, + print); - output = testing::internal::GetCapturedStdout(); - if (PARAM.input.ks_solver == "cg") - { - EXPECT_THAT(output, testing::HasSubstr("CG")); + output = testing::internal::GetCapturedStdout (); + if (PARAM.input.ks_solver == "cg") + { + EXPECT_THAT (output, testing::HasSubstr ("CG")); + } + else if (PARAM.input.ks_solver == "lapack") + { + EXPECT_THAT (output, testing::HasSubstr ("LA")); + } + else if (PARAM.input.ks_solver == "genelpa") + { + EXPECT_THAT (output, testing::HasSubstr ("GE")); + } + else if (PARAM.input.ks_solver == "dav") + { + EXPECT_THAT (output, testing::HasSubstr ("DA")); + } + else if (PARAM.input.ks_solver == "scalapack_gvx") + { + EXPECT_THAT (output, testing::HasSubstr ("GV")); + } + else if (PARAM.input.ks_solver == "cusolver") + { + EXPECT_THAT (output, testing::HasSubstr ("CU")); + } } - else if (PARAM.input.ks_solver == "lapack") - { - EXPECT_THAT(output, testing::HasSubstr("LA")); - } - else if (PARAM.input.ks_solver == "genelpa") - { - EXPECT_THAT(output, testing::HasSubstr("GE")); - } - else if (PARAM.input.ks_solver == "dav") - { - EXPECT_THAT(output, testing::HasSubstr("DA")); - } - else if (PARAM.input.ks_solver == "scalapack_gvx") - { - EXPECT_THAT(output, testing::HasSubstr("GV")); - } - else if (PARAM.input.ks_solver == "cusolver") - { - EXPECT_THAT(output, testing::HasSubstr("CU")); - } - } - GlobalV::ofs_running.close(); - ifs.open("test.dat", std::ios::in); - std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - EXPECT_THAT(str, testing::HasSubstr("Electron density deviation 0.1")); - EXPECT_THAT(str, testing::HasSubstr("Diago Threshold = 0.1")); - EXPECT_THAT(str, testing::HasSubstr("E_KohnSham")); - EXPECT_THAT(str, testing::HasSubstr("E_vdwD2")); - EXPECT_THAT(str, testing::HasSubstr("E_vdwD3")); - EXPECT_THAT(str, testing::HasSubstr("E_sol_el")); - EXPECT_THAT(str, testing::HasSubstr("E_sol_cav")); - EXPECT_THAT(str, testing::HasSubstr("E_efield")); - EXPECT_THAT(str, testing::HasSubstr("E_gatefield")); - ifs.close(); + GlobalV::ofs_running.close (); + ifs.open ("test.dat", std::ios::in); + std::string str ((std::istreambuf_iterator (ifs)), std::istreambuf_iterator ()); + EXPECT_THAT (str, testing::HasSubstr ("Electron density deviation 0.1")); + EXPECT_THAT (str, testing::HasSubstr ("Diago Threshold = 0.1")); + EXPECT_THAT (str, testing::HasSubstr ("E_KohnSham")); + EXPECT_THAT (str, testing::HasSubstr ("E_vdwD2")); + EXPECT_THAT (str, testing::HasSubstr ("E_vdwD3")); + EXPECT_THAT (str, testing::HasSubstr ("E_sol_el")); + EXPECT_THAT (str, testing::HasSubstr ("E_sol_cav")); + EXPECT_THAT (str, testing::HasSubstr ("E_efield")); + EXPECT_THAT (str, testing::HasSubstr ("E_gatefield")); + ifs.close (); delete elecstate.charge; - std::remove("test.dat"); + std::remove ("test.dat"); } -TEST_F(ElecStatePrintTest, PrintEtotColorS2) +TEST_F (ElecStatePrintTest, PrintEtotColorS2) { bool converged = false; int iter = 1; @@ -223,14 +238,21 @@ TEST_F(ElecStatePrintTest, PrintEtotColorS2) PARAM.input.nspin = 2; GlobalV::MY_RANK = 0; - elecstate::print_etot(ucell.magnet,elecstate,converged, iter, scf_thr, - scf_thr_kin, duration, pw_diag_thr, avg_iter, print); + elecstate::print_etot (ucell.magnet, + elecstate, + converged, + iter, + scf_thr, + scf_thr_kin, + duration, + pw_diag_thr, + avg_iter, + print); delete elecstate.charge; } - -TEST_F(ElecStatePrintTest, PrintEtotColorS4) +TEST_F (ElecStatePrintTest, PrintEtotColorS4) { bool converged = false; int iter = 1; @@ -253,8 +275,16 @@ TEST_F(ElecStatePrintTest, PrintEtotColorS4) PARAM.input.noncolin = true; GlobalV::MY_RANK = 0; - elecstate::print_etot(ucell.magnet,elecstate, converged, iter, scf_thr, scf_thr_kin, - duration, pw_diag_thr, avg_iter, print); + elecstate::print_etot (ucell.magnet, + elecstate, + converged, + iter, + scf_thr, + scf_thr_kin, + duration, + pw_diag_thr, + avg_iter, + print); delete elecstate.charge; } diff --git a/source/source_estate/test/elecstate_pw_test.cpp b/source/source_estate/test/elecstate_pw_test.cpp index c4e57298409..f5cfbf0f90d 100644 --- a/source/source_estate/test/elecstate_pw_test.cpp +++ b/source/source_estate/test/elecstate_pw_test.cpp @@ -17,69 +17,43 @@ int XC_Functional::func_type = 1; namespace elecstate { -void Potential::init_pot(Charge const*) +void + Potential::init_pot (Charge const*) { } -void Potential::cal_v_eff(const Charge* chg, const UnitCell* ucell, ModuleBase::matrix& v_eff) +void + Potential::cal_v_eff (const Charge* chg, const UnitCell* ucell, ModuleBase::matrix& v_eff) { } -void Potential::cal_fixed_v(double* vl_pseudo) -{ -} -Potential::~Potential() +void + Potential::cal_fixed_v (double* vl_pseudo) { } +Potential::~Potential () {} } // namespace elecstate -Charge::Charge() -{ -} -Charge::~Charge() -{ -} -UnitCell::UnitCell() -{ -} -UnitCell::~UnitCell() -{ -} -Magnetism::Magnetism() -{ -} -Magnetism::~Magnetism() -{ -} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} +Charge::Charge () {} +Charge::~Charge () {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} -ORB_gaunt_table::ORB_gaunt_table() -{ -} -ORB_gaunt_table::~ORB_gaunt_table() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} +ORB_gaunt_table::ORB_gaunt_table () {} +ORB_gaunt_table::~ORB_gaunt_table () {} #endif -pseudopot_cell_vl::pseudopot_cell_vl() -{ -} -pseudopot_cell_vl::~pseudopot_cell_vl() -{ -} -pseudopot_cell_vnl::pseudopot_cell_vnl() -{ -} -pseudopot_cell_vnl::~pseudopot_cell_vnl() -{ -} +pseudopot_cell_vl::pseudopot_cell_vl () {} +pseudopot_cell_vl::~pseudopot_cell_vl () {} +pseudopot_cell_vnl::pseudopot_cell_vnl () {} +pseudopot_cell_vnl::~pseudopot_cell_vnl () {} template <> -void pseudopot_cell_vnl::radial_fft_q(base_device::DEVICE_CPU* ctx, +void + pseudopot_cell_vnl::radial_fft_q (base_device::DEVICE_CPU* ctx, const int ng, const int ih, const int jh, @@ -90,7 +64,8 @@ void pseudopot_cell_vnl::radial_fft_q(base_devic { } template <> -void pseudopot_cell_vnl::radial_fft_q(base_device::DEVICE_CPU* ctx, +void + pseudopot_cell_vnl::radial_fft_q (base_device::DEVICE_CPU* ctx, const int ng, const int ih, const int jh, @@ -101,41 +76,43 @@ void pseudopot_cell_vnl::radial_fft_q(base_devi { } template <> -std::complex* pseudopot_cell_vnl::get_vkb_data() const +std::complex* + pseudopot_cell_vnl::get_vkb_data () const { return nullptr; } template <> -std::complex* pseudopot_cell_vnl::get_vkb_data() const +std::complex* + pseudopot_cell_vnl::get_vkb_data () const { return nullptr; } template <> -void pseudopot_cell_vnl::getvnl(base_device::DEVICE_CPU*, +void + pseudopot_cell_vnl::getvnl (base_device::DEVICE_CPU*, const UnitCell&, int const&, std::complex*) const { } template <> -void pseudopot_cell_vnl::getvnl(base_device::DEVICE_CPU*, +void + pseudopot_cell_vnl::getvnl (base_device::DEVICE_CPU*, const UnitCell&, int const&, std::complex*) const { } -Soc::~Soc() -{ -} -Fcoef::~Fcoef() -{ -} +Soc::~Soc () {} +Fcoef::~Fcoef () {} #include "source_cell/klist.h" -void Charge::set_rho_core(const UnitCell& ucell, ModuleBase::ComplexMatrix const&, const bool*) +void + Charge::set_rho_core (const UnitCell& ucell, ModuleBase::ComplexMatrix const&, const bool*) { } -void Charge::init_rho(const UnitCell&, +void + Charge::init_rho (const UnitCell&, const Parallel_Grid&, ModuleBase::ComplexMatrix const&, ModuleSymmetry::Symmetry& symm, @@ -143,17 +120,21 @@ void Charge::init_rho(const UnitCell&, const void*) { } -void Charge::set_rhopw(ModulePW::PW_Basis*) +void + Charge::set_rhopw (ModulePW::PW_Basis*) { } -void Charge::renormalize_rho() +void + Charge::renormalize_rho () { } -void Charge::check_rho() +void + Charge::check_rho () { } -void Set_GlobalV_Default() +void + Set_GlobalV_Default () { PARAM.input.device = "cpu"; PARAM.input.precision = "double"; @@ -162,7 +143,7 @@ void Set_GlobalV_Default() // Base class dependent PARAM.input.nspin = 1; PARAM.input.nelec = 10.0; - PARAM.input.nupdown = 0.0; + PARAM.input.nupdown = 0.0; PARAM.sys.two_fermi = false; PARAM.input.nbands = 6; PARAM.sys.nlocal = 6; @@ -202,13 +183,14 @@ class ElecStatePWTest : public ::testing::Test ModulePW::PW_Basis* rhodpw = nullptr; ModulePW::PW_Basis* rhopw = nullptr; ModulePW::PW_Basis_Big* bigpw = nullptr; - void SetUp() override + void + SetUp () override { - Set_GlobalV_Default(); + Set_GlobalV_Default (); wfcpw = new ModulePW::PW_Basis_K; chg = new Charge; klist = new K_Vectors; - klist->set_nks(5); + klist->set_nks (5); ucell = new UnitCell; ucell->omega = 500.0; ucell->tpiba = 2.0; @@ -218,7 +200,8 @@ class ElecStatePWTest : public ::testing::Test bigpw = new ModulePW::PW_Basis_Big; } - void TearDown() override + void + TearDown () override { delete wfcpw; delete chg; @@ -228,106 +211,106 @@ class ElecStatePWTest : public ::testing::Test delete rhodpw; delete rhopw; if (elecstate_pw_d != nullptr) - { - delete elecstate_pw_d; - } + { + delete elecstate_pw_d; + } if (elecstate_pw_s != nullptr) - { - delete elecstate_pw_s; - } + { + delete elecstate_pw_s; + } } }; -TEST_F(ElecStatePWTest, ConstructorDouble) +TEST_F (ElecStatePWTest, ConstructorDouble) +{ + elecstate_pw_d = new elecstate::ElecStatePW, base_device::DEVICE_CPU> (wfcpw, + chg, + klist, + ucell, + ppcell, + rhopw, + bigpw); + EXPECT_EQ (elecstate_pw_d->classname, "ElecStatePW"); + EXPECT_EQ (elecstate_pw_d->charge, chg); + EXPECT_EQ (elecstate_pw_d->klist, klist); + EXPECT_EQ (elecstate_pw_d->bigpw, bigpw); +} + +TEST_F (ElecStatePWTest, ConstructorSingle) { - elecstate_pw_d = new elecstate::ElecStatePW, base_device::DEVICE_CPU>(wfcpw, + elecstate_pw_s = new elecstate::ElecStatePW, base_device::DEVICE_CPU> (wfcpw, chg, klist, ucell, ppcell, rhopw, bigpw); - EXPECT_EQ(elecstate_pw_d->classname, "ElecStatePW"); - EXPECT_EQ(elecstate_pw_d->charge, chg); - EXPECT_EQ(elecstate_pw_d->klist, klist); - EXPECT_EQ(elecstate_pw_d->bigpw, bigpw); + EXPECT_EQ (elecstate_pw_s->classname, "ElecStatePW"); + EXPECT_EQ (elecstate_pw_s->charge, chg); + EXPECT_EQ (elecstate_pw_s->klist, klist); + EXPECT_EQ (elecstate_pw_s->bigpw, bigpw); } -TEST_F(ElecStatePWTest, ConstructorSingle) +TEST_F (ElecStatePWTest, InitRhoDataDouble) { - elecstate_pw_s = new elecstate::ElecStatePW, base_device::DEVICE_CPU>(wfcpw, - chg, - klist, - ucell, - ppcell, - rhopw, - bigpw); - EXPECT_EQ(elecstate_pw_s->classname, "ElecStatePW"); - EXPECT_EQ(elecstate_pw_s->charge, chg); - EXPECT_EQ(elecstate_pw_s->klist, klist); - EXPECT_EQ(elecstate_pw_s->bigpw, bigpw); + XC_Functional::func_type = 3; + chg->nrxx = 1000; + elecstate_pw_d = new elecstate::ElecStatePW, base_device::DEVICE_CPU> (wfcpw, + chg, + klist, + ucell, + ppcell, + rhopw, + bigpw); + elecstate_pw_d->init_rho_data (); + EXPECT_EQ (elecstate_pw_d->init_rho, true); + EXPECT_EQ (elecstate_pw_d->rho, chg->rho); + EXPECT_EQ (elecstate_pw_d->kin_r, chg->kin_r); } -TEST_F(ElecStatePWTest, InitRhoDataDouble) +TEST_F (ElecStatePWTest, InitRhoDataSingle) { + PARAM.input.precision = "single"; XC_Functional::func_type = 3; + chg->nspin = PARAM.input.nspin; chg->nrxx = 1000; - elecstate_pw_d = new elecstate::ElecStatePW, base_device::DEVICE_CPU>(wfcpw, + elecstate_pw_s = new elecstate::ElecStatePW, base_device::DEVICE_CPU> (wfcpw, chg, klist, ucell, ppcell, rhopw, bigpw); - elecstate_pw_d->init_rho_data(); - EXPECT_EQ(elecstate_pw_d->init_rho, true); - EXPECT_EQ(elecstate_pw_d->rho, chg->rho); - EXPECT_EQ(elecstate_pw_d->kin_r, chg->kin_r); + elecstate_pw_s->init_rho_data (); + EXPECT_EQ (elecstate_pw_s->init_rho, true); + EXPECT_NE (elecstate_pw_s->rho, nullptr); + EXPECT_NE (elecstate_pw_s->kin_r, nullptr); } -TEST_F(ElecStatePWTest, InitRhoDataSingle) -{ - PARAM.input.precision = "single"; - XC_Functional::func_type = 3; - chg->nspin = PARAM.input.nspin; - chg->nrxx = 1000; - elecstate_pw_s = new elecstate::ElecStatePW, base_device::DEVICE_CPU>(wfcpw, - chg, - klist, - ucell, - ppcell, - rhopw, - bigpw); - elecstate_pw_s->init_rho_data(); - EXPECT_EQ(elecstate_pw_s->init_rho, true); - EXPECT_NE(elecstate_pw_s->rho, nullptr); - EXPECT_NE(elecstate_pw_s->kin_r, nullptr); +TEST_F (ElecStatePWTest, ParallelKDouble) +{ + // this is a trivial call due to removing of __MPI + elecstate_pw_d = new elecstate::ElecStatePW, base_device::DEVICE_CPU> (wfcpw, + chg, + klist, + ucell, + ppcell, + rhopw, + bigpw); + EXPECT_NO_THROW (elecstate_pw_d->parallelK ()); } -TEST_F(ElecStatePWTest, ParallelKDouble) +TEST_F (ElecStatePWTest, ParallelKSingle) { - //this is a trivial call due to removing of __MPI - elecstate_pw_d = new elecstate::ElecStatePW, base_device::DEVICE_CPU>(wfcpw, + // this is a trivial call due to removing of __MPI + elecstate_pw_s = new elecstate::ElecStatePW, base_device::DEVICE_CPU> (wfcpw, chg, klist, ucell, ppcell, rhopw, bigpw); - EXPECT_NO_THROW(elecstate_pw_d->parallelK()); -} - -TEST_F(ElecStatePWTest, ParallelKSingle) -{ - //this is a trivial call due to removing of __MPI - elecstate_pw_s = new elecstate::ElecStatePW, base_device::DEVICE_CPU>(wfcpw, - chg, - klist, - ucell, - ppcell, - rhopw, - bigpw); - EXPECT_NO_THROW(elecstate_pw_s->parallelK()); + EXPECT_NO_THROW (elecstate_pw_s->parallelK ()); } #undef protected diff --git a/source/source_estate/test/gint_precision_controller_test.cpp b/source/source_estate/test/gint_precision_controller_test.cpp index c2ea24715f2..db5aa0f1b31 100644 --- a/source/source_estate/test/gint_precision_controller_test.cpp +++ b/source/source_estate/test/gint_precision_controller_test.cpp @@ -2,62 +2,62 @@ #include "../module_charge/gint_precision_controller.h" -TEST(GintPrecisionControllerTest, AutoModeSwitchesToFp64ImmediatelyWhenDrhoIsSmallEnough) +TEST (GintPrecisionControllerTest, AutoModeSwitchesToFp64ImmediatelyWhenDrhoIsSmallEnough) { GintPrecisionController controller; - controller.set_mode("mix"); - controller.reset_for_new_scf(); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp32); + controller.set_mode ("mix"); + controller.reset_for_new_scf (); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp32); - EXPECT_TRUE(controller.update_after_iteration(9.0e-5, 1.0e-7)); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp64); + EXPECT_TRUE (controller.update_after_iteration (9.0e-5, 1.0e-7)); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp64); } -TEST(GintPrecisionControllerTest, DefaultModeStartsAndStaysFp64) +TEST (GintPrecisionControllerTest, DefaultModeStartsAndStaysFp64) { GintPrecisionController controller; - controller.reset_for_new_scf(); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp64); + controller.reset_for_new_scf (); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp64); - EXPECT_FALSE(controller.update_after_iteration(9.0e-5, 1.0e-7)); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp64); + EXPECT_FALSE (controller.update_after_iteration (9.0e-5, 1.0e-7)); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp64); } -TEST(GintPrecisionControllerTest, SingleModeStartsAndStaysFp32) +TEST (GintPrecisionControllerTest, SingleModeStartsAndStaysFp32) { GintPrecisionController controller; - controller.set_mode("single"); - controller.reset_for_new_scf(); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp32); + controller.set_mode ("single"); + controller.reset_for_new_scf (); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp32); - EXPECT_FALSE(controller.update_after_iteration(9.0e-5, 1.0e-7)); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp32); + EXPECT_FALSE (controller.update_after_iteration (9.0e-5, 1.0e-7)); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp32); } -TEST(GintPrecisionControllerTest, MixModeLocksFp64AfterSwitch) +TEST (GintPrecisionControllerTest, MixModeLocksFp64AfterSwitch) { GintPrecisionController controller; - controller.set_mode("mix"); - controller.reset_for_new_scf(); - EXPECT_TRUE(controller.update_after_iteration(9.0e-5, 1.0e-6)); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp64); + controller.set_mode ("mix"); + controller.reset_for_new_scf (); + EXPECT_TRUE (controller.update_after_iteration (9.0e-5, 1.0e-6)); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp64); // After locking, should return false - EXPECT_FALSE(controller.update_after_iteration(1.0, 1.0e-6)); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp64); + EXPECT_FALSE (controller.update_after_iteration (1.0, 1.0e-6)); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp64); } -TEST(GintPrecisionControllerTest, MixModeReturnsFalseWhenDrhoTooLarge) +TEST (GintPrecisionControllerTest, MixModeReturnsFalseWhenDrhoTooLarge) { GintPrecisionController controller; - controller.set_mode("mix"); - controller.reset_for_new_scf(); + controller.set_mode ("mix"); + controller.reset_for_new_scf (); // drho is large, should not switch yet - EXPECT_FALSE(controller.update_after_iteration(1.0, 1.0e-7)); - EXPECT_EQ(controller.current_precision(), ModuleGint::GintPrecision::fp32); + EXPECT_FALSE (controller.update_after_iteration (1.0, 1.0e-7)); + EXPECT_EQ (controller.current_precision (), ModuleGint::GintPrecision::fp32); } diff --git a/source/source_estate/test/potential_new_test.cpp b/source/source_estate/test/potential_new_test.cpp index f91f473efaa..1cfe2c602d6 100644 --- a/source/source_estate/test/potential_new_test.cpp +++ b/source/source_estate/test/potential_new_test.cpp @@ -6,59 +6,37 @@ #include "source_hamilt/module_xc/xc_functional.h" #include "source_io/module_parameter/parameter.h" // mock functions -Structure_Factor::Structure_Factor() -{ -} -Structure_Factor::~Structure_Factor() -{ -} -UnitCell::UnitCell() -{ -} -UnitCell::~UnitCell() -{ -} -Magnetism::Magnetism() -{ -} -Magnetism::~Magnetism() -{ -} -SepPot::SepPot(){} -SepPot::~SepPot(){} -Sep_Cell::Sep_Cell() noexcept {} -Sep_Cell::~Sep_Cell() noexcept {} +Structure_Factor::Structure_Factor () {} +Structure_Factor::~Structure_Factor () {} +UnitCell::UnitCell () {} +UnitCell::~UnitCell () {} +Magnetism::Magnetism () {} +Magnetism::~Magnetism () {} +SepPot::SepPot () {} +SepPot::~SepPot () {} +Sep_Cell::Sep_Cell () noexcept {} +Sep_Cell::~Sep_Cell () noexcept {} #ifdef __LCAO -InfoNonlocal::InfoNonlocal() -{ -} -InfoNonlocal::~InfoNonlocal() -{ -} +InfoNonlocal::InfoNonlocal () {} +InfoNonlocal::~InfoNonlocal () {} #endif -Charge::Charge() -{ -} -Charge::~Charge() -{ -} -surchem::surchem() -{ -} -surchem::~surchem() -{ -} +Charge::Charge () {} +Charge::~Charge () {} +surchem::surchem () {} +surchem::~surchem () {} int XC_Functional::func_type = 1; bool XC_Functional::ked_flag = false; namespace elecstate { -PotBase* Potential::get_pot_type(const std::string& pot_type) +PotBase* + Potential::get_pot_type (const std::string& pot_type) { return new PotBase; } -void Set_GlobalV_Default() +void + Set_GlobalV_Default () { PARAM.input.nspin = 1; PARAM.input.device = "cpu"; @@ -111,568 +89,540 @@ class PotentialNewTest : public ::testing::Test double* etxc = nullptr; double* vtxc = nullptr; elecstate::Potential* pot = nullptr; - virtual void SetUp() + virtual void + SetUp () { rhopw = new ModulePW::PW_Basis; rhodpw = new ModulePW::PW_Basis; ucell = new UnitCell; vloc = new ModuleBase::matrix; - structure_factors = new Structure_Factor(); - solvent = new surchem(); + structure_factors = new Structure_Factor (); + solvent = new surchem (); etxc = new double; vtxc = new double; - elecstate::Set_GlobalV_Default(); + elecstate::Set_GlobalV_Default (); } - virtual void TearDown() + virtual void + TearDown () { if (rhopw != nullptr) - { - delete rhopw; - } + { + delete rhopw; + } if (rhodpw != nullptr) - { - delete rhodpw; - } + { + delete rhodpw; + } if (ucell != nullptr) - { - delete ucell; - } + { + delete ucell; + } if (vloc != nullptr) - { - delete vloc; - } + { + delete vloc; + } if (structure_factors != nullptr) - { - delete structure_factors; - } + { + delete structure_factors; + } if (solvent != nullptr) - { - delete solvent; - } + { + delete solvent; + } if (etxc != nullptr) - { - delete etxc; - } + { + delete etxc; + } if (vtxc != nullptr) - { - delete vtxc; - } + { + delete vtxc; + } if (pot != nullptr) - { - delete pot; - } + { + delete pot; + } } }; -TEST_F(PotentialNewTest, ConstructorCPUDouble) +TEST_F (PotentialNewTest, ConstructorCPUDouble) { rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - EXPECT_TRUE(pot->fixed_mode); - EXPECT_TRUE(pot->dynamic_mode); - EXPECT_EQ(pot->v_eff_fixed.size(), 100); - EXPECT_EQ(pot->v_eff.nr, PARAM.input.nspin); - EXPECT_EQ(pot->v_eff.nc, 100); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + EXPECT_TRUE (pot->fixed_mode); + EXPECT_TRUE (pot->dynamic_mode); + EXPECT_EQ (pot->v_eff_fixed.size (), 100); + EXPECT_EQ (pot->v_eff.nr, PARAM.input.nspin); + EXPECT_EQ (pot->v_eff.nc, 100); } -TEST_F(PotentialNewTest, ConstructorCPUSingle) +TEST_F (PotentialNewTest, ConstructorCPUSingle) { rhopw->nrxx = 100; PARAM.input.precision = "single"; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - EXPECT_TRUE(pot->fixed_mode); - EXPECT_TRUE(pot->dynamic_mode); - EXPECT_EQ(pot->v_eff_fixed.size(), 100); - EXPECT_EQ(pot->v_eff.nr, PARAM.input.nspin); - EXPECT_EQ(pot->v_eff.nc, 100); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + EXPECT_TRUE (pot->fixed_mode); + EXPECT_TRUE (pot->dynamic_mode); + EXPECT_EQ (pot->v_eff_fixed.size (), 100); + EXPECT_EQ (pot->v_eff.nr, PARAM.input.nspin); + EXPECT_EQ (pot->v_eff.nc, 100); } -TEST_F(PotentialNewTest, ConstructorNRXX0) +TEST_F (PotentialNewTest, ConstructorNRXX0) { rhopw->nrxx = 0; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - EXPECT_TRUE(pot->fixed_mode); - EXPECT_TRUE(pot->dynamic_mode); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + EXPECT_TRUE (pot->fixed_mode); + EXPECT_TRUE (pot->dynamic_mode); } -TEST_F(PotentialNewTest, ConstructorXC3) +TEST_F (PotentialNewTest, ConstructorXC3) { XC_Functional::func_type = 3; XC_Functional::ked_flag = true; rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - EXPECT_TRUE(pot->fixed_mode); - EXPECT_TRUE(pot->dynamic_mode); - EXPECT_EQ(pot->v_eff_fixed.size(), 100); - EXPECT_EQ(pot->v_eff.nr, PARAM.input.nspin); - EXPECT_EQ(pot->v_eff.nc, 100); - EXPECT_EQ(pot->vofk_eff.nr, PARAM.input.nspin); - EXPECT_EQ(pot->vofk_eff.nc, 100); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + EXPECT_TRUE (pot->fixed_mode); + EXPECT_TRUE (pot->dynamic_mode); + EXPECT_EQ (pot->v_eff_fixed.size (), 100); + EXPECT_EQ (pot->v_eff.nr, PARAM.input.nspin); + EXPECT_EQ (pot->v_eff.nc, 100); + EXPECT_EQ (pot->vofk_eff.nr, PARAM.input.nspin); + EXPECT_EQ (pot->vofk_eff.nc, 100); } -TEST_F(PotentialNewTest, ConstructorGPUDouble) +TEST_F (PotentialNewTest, ConstructorGPUDouble) { // this is just a trivial call to the GPU code rhopw->nrxx = 100; PARAM.input.device = "gpu"; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - EXPECT_TRUE(pot->fixed_mode); - EXPECT_TRUE(pot->dynamic_mode); - EXPECT_EQ(pot->v_eff_fixed.size(), 100); - EXPECT_EQ(pot->v_eff.nr, PARAM.input.nspin); - EXPECT_EQ(pot->v_eff.nc, 100); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + EXPECT_TRUE (pot->fixed_mode); + EXPECT_TRUE (pot->dynamic_mode); + EXPECT_EQ (pot->v_eff_fixed.size (), 100); + EXPECT_EQ (pot->v_eff.nr, PARAM.input.nspin); + EXPECT_EQ (pot->v_eff.nc, 100); } -TEST_F(PotentialNewTest, ConstructorGPUSingle) +TEST_F (PotentialNewTest, ConstructorGPUSingle) { // this is just a trivial call to the GPU code rhopw->nrxx = 100; PARAM.input.device = "gpu"; PARAM.input.precision = "single"; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - EXPECT_TRUE(pot->fixed_mode); - EXPECT_TRUE(pot->dynamic_mode); - EXPECT_EQ(pot->v_eff_fixed.size(), 100); - EXPECT_EQ(pot->v_eff.nr, PARAM.input.nspin); - EXPECT_EQ(pot->v_eff.nc, 100); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + EXPECT_TRUE (pot->fixed_mode); + EXPECT_TRUE (pot->dynamic_mode); + EXPECT_EQ (pot->v_eff_fixed.size (), 100); + EXPECT_EQ (pot->v_eff.nr, PARAM.input.nspin); + EXPECT_EQ (pot->v_eff.nc, 100); } -TEST_F(PotentialNewTest, Getters) +TEST_F (PotentialNewTest, Getters) { pot = new elecstate::Potential; - pot->veff_smooth.create(10, 10); - pot->vofk_smooth.create(10, 10); + pot->veff_smooth.create (10, 10); + pot->vofk_smooth.create (10, 10); float* foo; - foo = pot->get_veff_smooth_data(); - EXPECT_EQ(foo, pot->s_veff_smooth); - foo = pot->get_vofk_smooth_data(); - EXPECT_EQ(foo, pot->s_vofk_smooth); + foo = pot->get_veff_smooth_data (); + EXPECT_EQ (foo, pot->s_veff_smooth); + foo = pot->get_vofk_smooth_data (); + EXPECT_EQ (foo, pot->s_vofk_smooth); double* doo; - doo = pot->get_veff_smooth_data(); - EXPECT_EQ(doo, pot->d_veff_smooth); - doo = pot->get_vofk_smooth_data(); - EXPECT_EQ(doo, pot->d_vofk_smooth); + doo = pot->get_veff_smooth_data (); + EXPECT_EQ (doo, pot->d_veff_smooth); + doo = pot->get_vofk_smooth_data (); + EXPECT_EQ (doo, pot->d_vofk_smooth); delete foo; delete doo; } -TEST_F(PotentialNewTest, PotRegister) +TEST_F (PotentialNewTest, PotRegister) { pot = new elecstate::Potential; elecstate::PotBase* pot0 = new elecstate::PotBase; - pot->components.push_back(pot0); - EXPECT_EQ(pot->components.size(), 1); + pot->components.push_back (pot0); + EXPECT_EQ (pot->components.size (), 1); std::vector compnents_list = {"hartree", "xc"}; - pot->pot_register(compnents_list); - EXPECT_EQ(pot->components.size(), 2); - EXPECT_FALSE(pot->fixed_done); + pot->pot_register (compnents_list); + EXPECT_EQ (pot->components.size (), 2); + EXPECT_FALSE (pot->fixed_done); } -TEST_F(PotentialNewTest, CalFixedV) +TEST_F (PotentialNewTest, CalFixedV) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - std::vector compnents_list = { - "local", - "hartree", - "xc", - "surchem", - "gatefield" - }; + std::vector compnents_list = {"local", "hartree", "xc", "surchem", "gatefield"}; std::vector fixed = {true, false, false, false, true}; - pot->pot_register(compnents_list); - for (int i = 0; icomponents[i]->fixed_mode = fixed[i]; - } + pot->pot_register (compnents_list); + for (int i = 0; i < compnents_list.size (); i++) + { + pot->components[i]->fixed_mode = fixed[i]; + } double* vl_pseudo = new double[1000]; - pot->cal_fixed_v(vl_pseudo); - for (int i = 0; i < pot->v_eff_fixed.size(); i++) - { - EXPECT_DOUBLE_EQ(pot->v_eff_fixed[i], 0.0); - } + pot->cal_fixed_v (vl_pseudo); + for (int i = 0; i < pot->v_eff_fixed.size (); i++) + { + EXPECT_DOUBLE_EQ (pot->v_eff_fixed[i], 0.0); + } delete[] vl_pseudo; } -TEST_F(PotentialNewTest, CalVeff) +TEST_F (PotentialNewTest, CalVeff) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - std::vector compnents_list = { - "local", - "hartree", - "xc", - "surchem", - "gatefield" - }; + std::vector compnents_list = {"local", "hartree", "xc", "surchem", "gatefield"}; std::vector dynamic = {false, true, true, true, false}; - pot->pot_register(compnents_list); - for (int i = 0; icomponents[i]->dynamic_mode = dynamic[i]; - } + pot->pot_register (compnents_list); + for (int i = 0; i < compnents_list.size (); i++) + { + pot->components[i]->dynamic_mode = dynamic[i]; + } Charge* chg = new Charge; ModuleBase::matrix v_eff; - v_eff.create(2, 100); - pot->cal_v_eff(chg,this->ucell,v_eff); - for (int i = 0; i < pot->v_eff_fixed.size(); i++) - { - EXPECT_DOUBLE_EQ(pot->v_eff_fixed[i], 0.0); - } + v_eff.create (2, 100); + pot->cal_v_eff (chg, this->ucell, v_eff); + for (int i = 0; i < pot->v_eff_fixed.size (); i++) + { + EXPECT_DOUBLE_EQ (pot->v_eff_fixed[i], 0.0); + } delete chg; } -TEST_F(PotentialNewTest, UpdateFromCharge) +TEST_F (PotentialNewTest, UpdateFromCharge) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - std::vector compnents_list = { - "local", - "hartree", - "xc", - "surchem", - "gatefield" - }; + std::vector compnents_list = {"local", "hartree", "xc", "surchem", "gatefield"}; std::vector fixed = {true, false, false, false, true}; std::vector dynamic = {false, true, true, true, false}; - pot->pot_register(compnents_list); - for (int i = 0; icomponents[i]->fixed_mode = fixed[i]; - pot->components[i]->dynamic_mode = dynamic[i]; - } + pot->pot_register (compnents_list); + for (int i = 0; i < compnents_list.size (); i++) + { + pot->components[i]->fixed_mode = fixed[i]; + pot->components[i]->dynamic_mode = dynamic[i]; + } Charge* chg = new Charge; - EXPECT_FALSE(pot->fixed_done); - pot->update_from_charge(chg, this->ucell); - EXPECT_TRUE(pot->fixed_done); + EXPECT_FALSE (pot->fixed_done); + pot->update_from_charge (chg, this->ucell); + EXPECT_TRUE (pot->fixed_done); delete chg; } -TEST_F(PotentialNewTest, InitPot) +TEST_F (PotentialNewTest, InitPot) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - std::vector compnents_list = { - "local", - "hartree", - "xc", - "surchem", - "gatefield" - }; + std::vector compnents_list = {"local", "hartree", "xc", "surchem", "gatefield"}; std::vector fixed = {true, false, false, false, true}; std::vector dynamic = {false, true, true, true, false}; - pot->pot_register(compnents_list); - for (int i = 0; icomponents[i]->fixed_mode = fixed[i]; - pot->components[i]->dynamic_mode = dynamic[i]; - } + pot->pot_register (compnents_list); + for (int i = 0; i < compnents_list.size (); i++) + { + pot->components[i]->fixed_mode = fixed[i]; + pot->components[i]->dynamic_mode = dynamic[i]; + } Charge* chg = new Charge; - EXPECT_FALSE(pot->fixed_done); - pot->init_pot(chg); - EXPECT_TRUE(pot->fixed_done); + EXPECT_FALSE (pot->fixed_done); + pot->init_pot (chg); + EXPECT_TRUE (pot->fixed_done); delete chg; } -TEST_F(PotentialNewTest, GetVnew) +TEST_F (PotentialNewTest, GetVnew) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - std::vector compnents_list = { - "local", - "hartree", - "xc", - "surchem", - "gatefield" - }; + std::vector compnents_list = {"local", "hartree", "xc", "surchem", "gatefield"}; std::vector fixed = {true, false, false, false, true}; std::vector dynamic = {false, true, true, true, false}; - pot->pot_register(compnents_list); - for (int i = 0; icomponents[i]->fixed_mode = fixed[i]; - pot->components[i]->dynamic_mode = dynamic[i]; - } + pot->pot_register (compnents_list); + for (int i = 0; i < compnents_list.size (); i++) + { + pot->components[i]->fixed_mode = fixed[i]; + pot->components[i]->dynamic_mode = dynamic[i]; + } Charge* chg = new Charge; ModuleBase::matrix vnew; - pot->get_vnew(chg, vnew); - EXPECT_EQ(vnew.nr, PARAM.input.nspin); - EXPECT_EQ(vnew.nc, 100); + pot->get_vnew (chg, vnew); + EXPECT_EQ (vnew.nr, PARAM.input.nspin); + EXPECT_EQ (vnew.nc, 100); delete chg; } -TEST_F(PotentialNewTest, GetEffectiveVmatrix) +TEST_F (PotentialNewTest, GetEffectiveVmatrix) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - ModuleBase::matrix v_eff_tmp = pot->get_eff_v(); - const ModuleBase::matrix v_eff_tmp_const = pot->get_eff_v(); - EXPECT_EQ(v_eff_tmp.nr, PARAM.input.nspin); - EXPECT_EQ(v_eff_tmp.nc, 100); - EXPECT_EQ(v_eff_tmp_const.nr, PARAM.input.nspin); - EXPECT_EQ(v_eff_tmp_const.nc, 100); + ModuleBase::matrix v_eff_tmp = pot->get_eff_v (); + const ModuleBase::matrix v_eff_tmp_const = pot->get_eff_v (); + EXPECT_EQ (v_eff_tmp.nr, PARAM.input.nspin); + EXPECT_EQ (v_eff_tmp.nc, 100); + EXPECT_EQ (v_eff_tmp_const.nr, PARAM.input.nspin); + EXPECT_EQ (v_eff_tmp_const.nc, 100); for (int ir = 0; ir < v_eff_tmp.nr; ir++) - { - for (int ic = 0; ic < v_eff_tmp.nc; ic++) { - EXPECT_DOUBLE_EQ(v_eff_tmp(ir, ic), pot->v_eff(ir, ic)); - EXPECT_DOUBLE_EQ(v_eff_tmp_const(ir, ic), pot->v_eff(ir, ic)); + for (int ic = 0; ic < v_eff_tmp.nc; ic++) + { + EXPECT_DOUBLE_EQ (v_eff_tmp (ir, ic), pot->v_eff (ir, ic)); + EXPECT_DOUBLE_EQ (v_eff_tmp_const (ir, ic), pot->v_eff (ir, ic)); + } } - } } -TEST_F(PotentialNewTest, GetEffectiveVarray) +TEST_F (PotentialNewTest, GetEffectiveVarray) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - double* v_eff_tmp = pot->get_eff_v(0); - const double* v_eff_tmp_const = pot->get_eff_v(0); + double* v_eff_tmp = pot->get_eff_v (0); + const double* v_eff_tmp_const = pot->get_eff_v (0); for (int ic = 0; ic < rhopw->nrxx; ic++) - { - EXPECT_DOUBLE_EQ(v_eff_tmp[ic], pot->v_eff(0, ic)); - EXPECT_DOUBLE_EQ(v_eff_tmp_const[ic], pot->v_eff(0, ic)); - } + { + EXPECT_DOUBLE_EQ (v_eff_tmp[ic], pot->v_eff (0, ic)); + EXPECT_DOUBLE_EQ (v_eff_tmp_const[ic], pot->v_eff (0, ic)); + } v_eff_tmp[0] = 1.0; - EXPECT_DOUBLE_EQ(pot->v_eff(0, 0), 1.0); - EXPECT_DOUBLE_EQ(v_eff_tmp_const[0], 1.0); + EXPECT_DOUBLE_EQ (pot->v_eff (0, 0), 1.0); + EXPECT_DOUBLE_EQ (v_eff_tmp_const[0], 1.0); } -TEST_F(PotentialNewTest, GetEffectiveVarrayNullptr) +TEST_F (PotentialNewTest, GetEffectiveVarrayNullptr) { pot = new elecstate::Potential; - EXPECT_EQ(pot->v_eff.nc, 0); - double* v_eff_tmp = pot->get_eff_v(0); - const double* v_eff_tmp_const = pot->get_eff_v(0); - EXPECT_EQ(v_eff_tmp, nullptr); - EXPECT_EQ(v_eff_tmp_const, nullptr); + EXPECT_EQ (pot->v_eff.nc, 0); + double* v_eff_tmp = pot->get_eff_v (0); + const double* v_eff_tmp_const = pot->get_eff_v (0); + EXPECT_EQ (v_eff_tmp, nullptr); + EXPECT_EQ (v_eff_tmp_const, nullptr); } -TEST_F(PotentialNewTest, GetEffectiveVofkmatrix) +TEST_F (PotentialNewTest, GetEffectiveVofkmatrix) { // construct potential XC_Functional::func_type = 3; XC_Functional::ked_flag = true; rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - ModuleBase::matrix vofk_eff_tmp = pot->get_eff_vofk(); - const ModuleBase::matrix vofk_eff_tmp_const = pot->get_eff_vofk(); - EXPECT_EQ(vofk_eff_tmp.nr, PARAM.input.nspin); - EXPECT_EQ(vofk_eff_tmp.nc, 100); - EXPECT_EQ(vofk_eff_tmp_const.nr, PARAM.input.nspin); - EXPECT_EQ(vofk_eff_tmp_const.nc, 100); + ModuleBase::matrix vofk_eff_tmp = pot->get_eff_vofk (); + const ModuleBase::matrix vofk_eff_tmp_const = pot->get_eff_vofk (); + EXPECT_EQ (vofk_eff_tmp.nr, PARAM.input.nspin); + EXPECT_EQ (vofk_eff_tmp.nc, 100); + EXPECT_EQ (vofk_eff_tmp_const.nr, PARAM.input.nspin); + EXPECT_EQ (vofk_eff_tmp_const.nc, 100); for (int ir = 0; ir < vofk_eff_tmp.nr; ir++) - { - for (int ic = 0; ic < vofk_eff_tmp.nc; ic++) { - EXPECT_DOUBLE_EQ(vofk_eff_tmp(ir, ic), pot->vofk_eff(ir, ic)); - EXPECT_DOUBLE_EQ(vofk_eff_tmp_const(ir, ic), pot->vofk_eff(ir, ic)); + for (int ic = 0; ic < vofk_eff_tmp.nc; ic++) + { + EXPECT_DOUBLE_EQ (vofk_eff_tmp (ir, ic), pot->vofk_eff (ir, ic)); + EXPECT_DOUBLE_EQ (vofk_eff_tmp_const (ir, ic), pot->vofk_eff (ir, ic)); + } } - } } -TEST_F(PotentialNewTest, GetEffectiveVofkarray) +TEST_F (PotentialNewTest, GetEffectiveVofkarray) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - double* vofk_eff_tmp = pot->get_eff_vofk(0); - const double* vofk_eff_tmp_const = pot->get_eff_vofk(0); + double* vofk_eff_tmp = pot->get_eff_vofk (0); + const double* vofk_eff_tmp_const = pot->get_eff_vofk (0); for (int ic = 0; ic < rhopw->nrxx; ic++) - { - EXPECT_DOUBLE_EQ(vofk_eff_tmp[ic], pot->vofk_eff(0, ic)); - EXPECT_DOUBLE_EQ(vofk_eff_tmp_const[ic], pot->vofk_eff(0, ic)); - } + { + EXPECT_DOUBLE_EQ (vofk_eff_tmp[ic], pot->vofk_eff (0, ic)); + EXPECT_DOUBLE_EQ (vofk_eff_tmp_const[ic], pot->vofk_eff (0, ic)); + } vofk_eff_tmp[0] = 1.0; - EXPECT_DOUBLE_EQ(pot->vofk_eff(0, 0), 1.0); - EXPECT_DOUBLE_EQ(vofk_eff_tmp_const[0], 1.0); + EXPECT_DOUBLE_EQ (pot->vofk_eff (0, 0), 1.0); + EXPECT_DOUBLE_EQ (vofk_eff_tmp_const[0], 1.0); } -TEST_F(PotentialNewTest, GetEffectiveVofkarrayNullptr) +TEST_F (PotentialNewTest, GetEffectiveVofkarrayNullptr) { pot = new elecstate::Potential; - EXPECT_EQ(pot->v_eff.nc, 0); - double* vofk_eff_tmp = pot->get_eff_vofk(0); - const double* vofk_eff_tmp_const = pot->get_eff_vofk(0); - EXPECT_EQ(vofk_eff_tmp, nullptr); - EXPECT_EQ(vofk_eff_tmp_const, nullptr); + EXPECT_EQ (pot->v_eff.nc, 0); + double* vofk_eff_tmp = pot->get_eff_vofk (0); + const double* vofk_eff_tmp_const = pot->get_eff_vofk (0); + EXPECT_EQ (vofk_eff_tmp, nullptr); + EXPECT_EQ (vofk_eff_tmp_const, nullptr); } -TEST_F(PotentialNewTest, GetFixedV) +TEST_F (PotentialNewTest, GetFixedV) { rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - EXPECT_TRUE(pot->fixed_mode); - EXPECT_TRUE(pot->dynamic_mode); - EXPECT_EQ(pot->v_eff_fixed.size(), 100); - double* v_eff_fixed_tmp = pot->get_fixed_v(); - const double* v_eff_fixed_tmp_const = pot->get_fixed_v(); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + EXPECT_TRUE (pot->fixed_mode); + EXPECT_TRUE (pot->dynamic_mode); + EXPECT_EQ (pot->v_eff_fixed.size (), 100); + double* v_eff_fixed_tmp = pot->get_fixed_v (); + const double* v_eff_fixed_tmp_const = pot->get_fixed_v (); for (int ic = 0; ic < rhopw->nrxx; ic++) - { - v_eff_fixed_tmp[ic] = ic; - EXPECT_DOUBLE_EQ(v_eff_fixed_tmp[ic], pot->v_eff_fixed[ic]); - EXPECT_DOUBLE_EQ(v_eff_fixed_tmp_const[ic], pot->v_eff_fixed[ic]); - } + { + v_eff_fixed_tmp[ic] = ic; + EXPECT_DOUBLE_EQ (v_eff_fixed_tmp[ic], pot->v_eff_fixed[ic]); + EXPECT_DOUBLE_EQ (v_eff_fixed_tmp_const[ic], pot->v_eff_fixed[ic]); + } } -TEST_F(PotentialNewTest, GetVeffSmooth) +TEST_F (PotentialNewTest, GetVeffSmooth) { // construct potential rhopw->nrxx = 100; XC_Functional::func_type = 3; XC_Functional::ked_flag = true; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - ModuleBase::matrix veff_smooth_tmp = pot->get_veff_smooth(); - const ModuleBase::matrix veff_smooth_const_tmp = pot->get_veff_smooth(); - EXPECT_EQ(veff_smooth_tmp.nr, PARAM.input.nspin); - EXPECT_EQ(veff_smooth_tmp.nc, 100); - EXPECT_EQ(veff_smooth_const_tmp.nr, PARAM.input.nspin); - EXPECT_EQ(veff_smooth_const_tmp.nc, 100); + ModuleBase::matrix veff_smooth_tmp = pot->get_veff_smooth (); + const ModuleBase::matrix veff_smooth_const_tmp = pot->get_veff_smooth (); + EXPECT_EQ (veff_smooth_tmp.nr, PARAM.input.nspin); + EXPECT_EQ (veff_smooth_tmp.nc, 100); + EXPECT_EQ (veff_smooth_const_tmp.nr, PARAM.input.nspin); + EXPECT_EQ (veff_smooth_const_tmp.nc, 100); for (int ir = 0; ir < veff_smooth_tmp.nr; ir++) - { - for (int ic = 0; ic < veff_smooth_tmp.nc; ic++) { - EXPECT_DOUBLE_EQ(veff_smooth_tmp(ir, ic), pot->veff_smooth(ir, ic)); - EXPECT_DOUBLE_EQ(veff_smooth_const_tmp(ir, ic), pot->veff_smooth(ir, ic)); + for (int ic = 0; ic < veff_smooth_tmp.nc; ic++) + { + EXPECT_DOUBLE_EQ (veff_smooth_tmp (ir, ic), pot->veff_smooth (ir, ic)); + EXPECT_DOUBLE_EQ (veff_smooth_const_tmp (ir, ic), pot->veff_smooth (ir, ic)); + } } - } } -TEST_F(PotentialNewTest, GetVofkSmooth) +TEST_F (PotentialNewTest, GetVofkSmooth) { // construct potential rhopw->nrxx = 100; - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); // - ModuleBase::matrix vofk_smooth_tmp = pot->get_veff_smooth(); - const ModuleBase::matrix vofk_smooth_const_tmp = pot->get_veff_smooth(); - EXPECT_EQ(vofk_smooth_tmp.nr, PARAM.input.nspin); - EXPECT_EQ(vofk_smooth_tmp.nc, 100); - EXPECT_EQ(vofk_smooth_const_tmp.nr, PARAM.input.nspin); - EXPECT_EQ(vofk_smooth_const_tmp.nc, 100); + ModuleBase::matrix vofk_smooth_tmp = pot->get_veff_smooth (); + const ModuleBase::matrix vofk_smooth_const_tmp = pot->get_veff_smooth (); + EXPECT_EQ (vofk_smooth_tmp.nr, PARAM.input.nspin); + EXPECT_EQ (vofk_smooth_tmp.nc, 100); + EXPECT_EQ (vofk_smooth_const_tmp.nr, PARAM.input.nspin); + EXPECT_EQ (vofk_smooth_const_tmp.nc, 100); for (int ir = 0; ir < vofk_smooth_tmp.nr; ir++) - { - for (int ic = 0; ic < vofk_smooth_tmp.nc; ic++) { - EXPECT_DOUBLE_EQ(vofk_smooth_tmp(ir, ic), pot->vofk_smooth(ir, ic)); - EXPECT_DOUBLE_EQ(vofk_smooth_const_tmp(ir, ic), pot->vofk_smooth(ir, ic)); + for (int ic = 0; ic < vofk_smooth_tmp.nc; ic++) + { + EXPECT_DOUBLE_EQ (vofk_smooth_tmp (ir, ic), pot->vofk_smooth (ir, ic)); + EXPECT_DOUBLE_EQ (vofk_smooth_const_tmp (ir, ic), pot->vofk_smooth (ir, ic)); + } } - } } -TEST_F(PotentialNewTest, InterpolateVrsDoubleGrids) +TEST_F (PotentialNewTest, InterpolateVrsDoubleGrids) { - PARAM.sys.double_grid = true; - XC_Functional::func_type = 3; - XC_Functional::ked_flag = true; - // Init pw_basis - rhopw->initgrids(4, ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1), 4); - rhopw->initparameters(false, 4); - rhopw->setuptransform(); - rhopw->collect_local_pw(); - - rhodpw->initgrids(4, ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1), 6); - rhodpw->initparameters(false, 6); - static_cast(rhodpw)->setuptransform(rhopw); - rhodpw->collect_local_pw(); - - pot = new elecstate::Potential(rhodpw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - - for (int ir = 0; ir < pot->v_eff.nr; ir++) - { - for (int ic = 0; ic < pot->v_eff.nc; ic++) - { - pot->v_eff(ir, ic) = ir + ic; - pot->vofk_eff(ir, ic) = ir + 2 * ic; - } - } - - pot->interpolate_vrs(); - - std::vector expect_veff = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}; - std::vector expect_vofk = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52}; - - int index=0; - for (int ir = 0; ir < pot->veff_smooth.nr; ir++) - { - for (int ic = 0; ic < pot->veff_smooth.nc; ic++) + PARAM.sys.double_grid = true; + XC_Functional::func_type = 3; + XC_Functional::ked_flag = true; + // Init pw_basis + rhopw->initgrids (4, ModuleBase::Matrix3 (1, 0, 0, 0, 1, 0, 0, 0, 1), 4); + rhopw->initparameters (false, 4); + rhopw->setuptransform (); + rhopw->collect_local_pw (); + + rhodpw->initgrids (4, ModuleBase::Matrix3 (1, 0, 0, 0, 1, 0, 0, 0, 1), 6); + rhodpw->initparameters (false, 6); + static_cast (rhodpw)->setuptransform (rhopw); + rhodpw->collect_local_pw (); + + pot = new elecstate::Potential (rhodpw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + + for (int ir = 0; ir < pot->v_eff.nr; ir++) { - EXPECT_DOUBLE_EQ(pot->veff_smooth(ir,ic), expect_veff[index]); - EXPECT_DOUBLE_EQ(pot->vofk_smooth(ir,ic), expect_vofk[index]); - index++; + for (int ic = 0; ic < pot->v_eff.nc; ic++) + { + pot->v_eff (ir, ic) = ir + ic; + pot->vofk_eff (ir, ic) = ir + 2 * ic; + } } - } + pot->interpolate_vrs (); + + std::vector expect_veff + = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}; + std::vector expect_vofk + = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52}; + + int index = 0; + for (int ir = 0; ir < pot->veff_smooth.nr; ir++) + { + for (int ic = 0; ic < pot->veff_smooth.nc; ic++) + { + EXPECT_DOUBLE_EQ (pot->veff_smooth (ir, ic), expect_veff[index]); + EXPECT_DOUBLE_EQ (pot->vofk_smooth (ir, ic), expect_vofk[index]); + index++; + } + } } -TEST_F(PotentialNewTest, InterpolateVrsWarningQuit) +TEST_F (PotentialNewTest, InterpolateVrsWarningQuit) { - PARAM.sys.double_grid = true; + PARAM.sys.double_grid = true; // Init pw_basis - rhopw->initgrids(4, ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1), 4); - rhopw->initparameters(false, 4); - rhopw->setuptransform(); - rhopw->collect_local_pw(); + rhopw->initgrids (4, ModuleBase::Matrix3 (1, 0, 0, 0, 1, 0, 0, 0, 1), 4); + rhopw->initparameters (false, 4); + rhopw->setuptransform (); + rhopw->collect_local_pw (); rhodpw->gamma_only = false; - rhodpw->initgrids(4, ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1), 6); - rhodpw->initparameters(false, 6); - static_cast(rhodpw)->setuptransform(rhopw); - rhodpw->collect_local_pw(); + rhodpw->initgrids (4, ModuleBase::Matrix3 (1, 0, 0, 0, 1, 0, 0, 0, 1), 6); + rhodpw->initparameters (false, 6); + static_cast (rhodpw)->setuptransform (rhopw); + rhodpw->collect_local_pw (); rhodpw->gamma_only = true; - pot = new elecstate::Potential(rhodpw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + pot = new elecstate::Potential (rhodpw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - EXPECT_EXIT(pot->interpolate_vrs(), ::testing::ExitedWithCode(1), ""); + EXPECT_EXIT (pot->interpolate_vrs (), ::testing::ExitedWithCode (1), ""); } -TEST_F(PotentialNewTest, InterpolateVrsSingleGrids) +TEST_F (PotentialNewTest, InterpolateVrsSingleGrids) { - PARAM.sys.double_grid = false; - XC_Functional::func_type = 3; - XC_Functional::ked_flag = true; - // Init pw_basis - rhopw->initgrids(4, ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1), 4); - rhopw->initparameters(false, 4); - rhopw->setuptransform(); - rhopw->collect_local_pw(); - - pot = new elecstate::Potential(rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); - - for (int ir = 0; ir < pot->v_eff.nr; ir++) - { - for (int ic = 0; ic < pot->v_eff.nc; ic++) - { - pot->v_eff(ir, ic) = ir + ic; - pot->vofk_eff(ir, ic) = ir + 2 * ic; - } - } - - pot->interpolate_vrs(); + PARAM.sys.double_grid = false; + XC_Functional::func_type = 3; + XC_Functional::ked_flag = true; + // Init pw_basis + rhopw->initgrids (4, ModuleBase::Matrix3 (1, 0, 0, 0, 1, 0, 0, 0, 1), 4); + rhopw->initparameters (false, 4); + rhopw->setuptransform (); + rhopw->collect_local_pw (); - for (int ir = 0; ir < pot->veff_smooth.nr; ir++) - { - for (int ic = 0; ic < pot->veff_smooth.nc; ic++) + pot = new elecstate::Potential (rhopw, rhopw, ucell, vloc, structure_factors, solvent, etxc, vtxc); + + for (int ir = 0; ir < pot->v_eff.nr; ir++) { - EXPECT_DOUBLE_EQ(pot->veff_smooth(ir,ic), ir+ic); - EXPECT_DOUBLE_EQ(pot->vofk_smooth(ir,ic), ir+2*ic); + for (int ic = 0; ic < pot->v_eff.nc; ic++) + { + pot->v_eff (ir, ic) = ir + ic; + pot->vofk_eff (ir, ic) = ir + 2 * ic; + } } - } + pot->interpolate_vrs (); + + for (int ir = 0; ir < pot->veff_smooth.nr; ir++) + { + for (int ic = 0; ic < pot->veff_smooth.nc; ic++) + { + EXPECT_DOUBLE_EQ (pot->veff_smooth (ir, ic), ir + ic); + EXPECT_DOUBLE_EQ (pot->vofk_smooth (ir, ic), ir + 2 * ic); + } + } } diff --git a/source/source_estate/test/potentials_base_test.cpp b/source/source_estate/test/potentials_base_test.cpp index 048c48ecb48..cbfa6d66e3e 100644 --- a/source/source_estate/test/potentials_base_test.cpp +++ b/source/source_estate/test/potentials_base_test.cpp @@ -1,7 +1,7 @@ #include "gtest/gtest.h" #include "source_estate/module_pot/pot_base.h" -ModuleBase::matrix::~matrix(){} +ModuleBase::matrix::~matrix () {} /*************************************************************** * unit test of functions in pot_base.h @@ -18,12 +18,14 @@ namespace elecstate class MockPot : public PotBase { public: - bool get_fixed_mode() const + bool + get_fixed_mode () const { return fixed_mode; } - bool get_dynamic_mode() const + bool + get_dynamic_mode () const { return dynamic_mode; } @@ -34,12 +36,14 @@ class MockPot : public PotBase class PotBaseTest : public ::testing::Test { protected: - void SetUp() override + void + SetUp () override { pot_base_ = new elecstate::MockPot; } - void TearDown() override + void + TearDown () override { delete pot_base_; } @@ -47,21 +51,21 @@ class PotBaseTest : public ::testing::Test elecstate::MockPot* pot_base_; }; -TEST_F(PotBaseTest, CalVeff) +TEST_F (PotBaseTest, CalVeff) { ModuleBase::matrix v_eff; pot_base_->fixed_mode = true; - if (pot_base_->get_fixed_mode()) - { - EXPECT_NO_THROW(pot_base_->cal_v_eff(nullptr, nullptr, v_eff)); - } + if (pot_base_->get_fixed_mode ()) + { + EXPECT_NO_THROW (pot_base_->cal_v_eff (nullptr, nullptr, v_eff)); + } } -TEST_F(PotBaseTest, CalFixedV) +TEST_F (PotBaseTest, CalFixedV) { pot_base_->dynamic_mode = true; - if (pot_base_->get_dynamic_mode()) - { - EXPECT_NO_THROW(pot_base_->cal_fixed_v(nullptr)); - } + if (pot_base_->get_dynamic_mode ()) + { + EXPECT_NO_THROW (pot_base_->cal_fixed_v (nullptr)); + } } \ No newline at end of file diff --git a/source/source_estate/test/prepare_unitcell.h b/source/source_estate/test/prepare_unitcell.h index eec2fa9a521..9e8d9884f30 100644 --- a/source/source_estate/test/prepare_unitcell.h +++ b/source/source_estate/test/prepare_unitcell.h @@ -1,278 +1,268 @@ #ifndef PREPARE_UNITCELL_H #define PREPARE_UNITCELL_H -#include -#include +#include +#include #include "source_base/mathzone.h" class UcellTestPrepare { -public: - UcellTestPrepare()=default; - UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in, - std::valarray mbl_in, - std::valarray velocity_in); - UcellTestPrepare(const UcellTestPrepare &utp); + public: + UcellTestPrepare () = default; + UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in, + std::valarray mbl_in, + std::valarray velocity_in); + UcellTestPrepare (const UcellTestPrepare& utp); - std::string latname; - int lmaxmax; - bool init_vel; - bool selective_dynamics; - bool relax_new; - std::string fixed_axes; - double lat0; - std::valarray latvec; - std::vector elements; - std::vector pp_files; - std::vector pp_types; - std::vector orb_files; - std::valarray natom; - std::vector atomic_mass; - std::string coor_type; - std::valarray coordinates; - std::valarray mbl; - std::valarray velocity; - // ntype - int ntype; - int atomic_index; + std::string latname; + int lmaxmax; + bool init_vel; + bool selective_dynamics; + bool relax_new; + std::string fixed_axes; + double lat0; + std::valarray latvec; + std::vector elements; + std::vector pp_files; + std::vector pp_types; + std::vector orb_files; + std::valarray natom; + std::vector atomic_mass; + std::string coor_type; + std::valarray coordinates; + std::valarray mbl; + std::valarray velocity; + // ntype + int ntype; + int atomic_index; - std::unique_ptr SetUcellInfo() + std::unique_ptr + SetUcellInfo () { - //basic info - this->ntype = this->elements.size(); - std::unique_ptr ucell(new UnitCell); - ucell->setup(this->latname, - this->ntype, - this->lmaxmax, - this->init_vel, - this->fixed_axes); - delete[] ucell->magnet.start_mag; //mag set here - - ucell->atom_label.resize(ucell->ntype); - ucell->atom_mass.resize(ucell->ntype); - ucell->pseudo_fn.resize(ucell->ntype); - ucell->pseudo_type.resize(ucell->ntype); - ucell->orbital_fn.resize(ucell->ntype); - ucell->magnet.start_mag = new double[ucell->ntype]; //mag set here - ucell->magnet.ux_[0] = 0.0; // ux_ set here - ucell->magnet.ux_[1] = 0.0; - ucell->magnet.ux_[2] = 0.0; - for(int it=0;itntype;++it) - { - ucell->atom_label[it] = this->elements[it]; - ucell->atom_mass[it] = this->atomic_mass[it]; - ucell->pseudo_fn[it] = this->pp_files[it]; - ucell->pseudo_type[it] = this->pp_types[it]; - ucell->orbital_fn[it] = this->orb_files[it]; - ucell->magnet.start_mag[it] = 0.0; //mag set here - } - //lattice info - ucell->lat0 = this->lat0; - ucell->lat0_angstrom = ucell->lat0 * ModuleBase::BOHR_TO_A; - ucell->tpiba = ModuleBase::TWO_PI/ucell->lat0; - ucell->tpiba2 = ucell->tpiba * ucell->tpiba; - ucell->latvec.e11 = this->latvec[0]; - ucell->latvec.e12 = this->latvec[1]; - ucell->latvec.e13 = this->latvec[2]; - ucell->latvec.e21 = this->latvec[3]; - ucell->latvec.e22 = this->latvec[4]; - ucell->latvec.e23 = this->latvec[5]; - ucell->latvec.e31 = this->latvec[6]; - ucell->latvec.e32 = this->latvec[7]; - ucell->latvec.e33 = this->latvec[8]; - ucell->a1.x = ucell->latvec.e11; - ucell->a1.y = ucell->latvec.e12; - ucell->a1.z = ucell->latvec.e13; - ucell->a2.x = ucell->latvec.e21; - ucell->a2.y = ucell->latvec.e22; - ucell->a2.z = ucell->latvec.e23; - ucell->a3.x = ucell->latvec.e31; - ucell->a3.y = ucell->latvec.e32; - ucell->a3.z = ucell->latvec.e33; - ucell->GT = ucell->latvec.Inverse(); - ucell->G = ucell->GT.Transpose(); - ucell->GGT = ucell->G*ucell->GT; - ucell->invGGT = ucell->GGT.Inverse(); - ucell->omega = std::abs(ucell->latvec.Det())*(ucell->lat0)*(ucell->lat0)*(ucell->lat0); - //atomic info - ucell->Coordinate = this->coor_type; - ucell->atoms = new Atom[ucell->ntype]; - ucell->set_atom_flag = true; - this->atomic_index = 0; - for(int it=0;itntype;++it) - { - ucell->atoms[it].label = this->elements[it]; - ucell->atoms[it].nw = 0; - ucell->atoms[it].nwl = 2; - ucell->atoms[it].l_nchi.resize(ucell->atoms[it].nwl+1); - for(int L=0; Latoms[it].nwl+1; L++) - { - ucell->atoms[it].l_nchi[L] = 1; - ucell->atoms[it].nw += (2*L + 1) * ucell->atoms[it].l_nchi[L]; - } - ucell->atoms[it].na = this->natom[it]; - //coordinates and related physical quantities - ucell->atoms[it].tau.resize(ucell->atoms[it].na); - ucell->atoms[it].dis.resize(ucell->atoms[it].na); - ucell->atoms[it].taud.resize(ucell->atoms[it].na); - ucell->atoms[it].vel.resize(ucell->atoms[it].na); - ucell->atoms[it].mag.resize(ucell->atoms[it].na); - ucell->atoms[it].angle1.resize(ucell->atoms[it].na); - ucell->atoms[it].angle2.resize(ucell->atoms[it].na); - ucell->atoms[it].m_loc_.resize(ucell->atoms[it].na); - ucell->atoms[it].mbl.resize(ucell->atoms[it].na); - ucell->atoms[it].mass = ucell->atom_mass[it]; // mass set here - for(int ia=0; iaatoms[it].na; ++ia) - { - if (ucell->Coordinate == "Direct") - { - ucell->atoms[it].taud[ia].x = this->coordinates[this->atomic_index*3+0]; - ucell->atoms[it].taud[ia].y = this->coordinates[this->atomic_index*3+1]; - ucell->atoms[it].taud[ia].z = this->coordinates[this->atomic_index*3+2]; - ucell->atoms[it].tau[ia] = ucell->atoms[it].taud[ia]*ucell->latvec; - } - else if (ucell->Coordinate == "Cartesian") - { - ucell->atoms[it].tau[ia].x = this->coordinates[this->atomic_index*3+0]; - ucell->atoms[it].tau[ia].y = this->coordinates[this->atomic_index*3+1]; - ucell->atoms[it].tau[ia].z = this->coordinates[this->atomic_index*3+2]; - ModuleBase::Mathzone::Cartesian_to_Direct( - ucell->atoms[it].tau[ia].x, ucell->atoms[it].tau[ia].y, ucell->atoms[it].tau[ia].z, - ucell->latvec.e11, ucell->latvec.e12, ucell->latvec.e13, - ucell->latvec.e21, ucell->latvec.e22, ucell->latvec.e23, - ucell->latvec.e31, ucell->latvec.e32, ucell->latvec.e33, - ucell->atoms[it].taud[ia].x, ucell->atoms[it].taud[ia].y, ucell->atoms[it].taud[ia].z); - } - ucell->atoms[it].dis[ia].set(0, 0, 0); - if(this->init_vel) - { - ucell->atoms[it].vel[ia].x = this->velocity[this->atomic_index*3+0]; - ucell->atoms[it].vel[ia].y = this->velocity[this->atomic_index*3+1]; - ucell->atoms[it].vel[ia].z = this->velocity[this->atomic_index*3+2]; - } - else - { - ucell->atoms[it].vel[ia].set(0,0,0); - } - ucell->atoms[it].m_loc_[ia].set(0,0,0); - ucell->atoms[it].angle1[ia] = 0; - ucell->atoms[it].angle2[ia] = 0; - if(this->selective_dynamics) - { - ucell->atoms[it].mbl[ia].x = this->mbl[this->atomic_index*3+0]; - ucell->atoms[it].mbl[ia].y = this->mbl[this->atomic_index*3+1]; - ucell->atoms[it].mbl[ia].z = this->mbl[this->atomic_index*3+2]; - } - else - { - ucell->atoms[it].mbl[ia] = {1,1,1}; - } - ++(this->atomic_index); - } - } - ucell->nat = this->natom.sum(); - return ucell; - }; -}; + // basic info + this->ntype = this->elements.size (); + std::unique_ptr ucell (new UnitCell); + ucell->setup (this->latname, this->ntype, this->lmaxmax, this->init_vel, this->fixed_axes); + delete[] ucell->magnet.start_mag; // mag set here -UcellTestPrepare::UcellTestPrepare(std::string latname_in, - int lmaxmax_in, - bool init_vel_in, - bool selective_dynamics_in, - bool relax_new_in, - std::string fixed_axes_in, - double lat0_in, - std::valarray latvec_in, - std::vector elements_in, - std::vector pp_files_in, - std::vector pp_types_in, - std::vector orb_files_in, - std::valarray natom_in, - std::vector atomic_mass_in, - std::string coor_type_in, - std::valarray coordinates_in, - std::valarray mbl_in, - std::valarray velocity_in): - latname(latname_in), - lmaxmax(lmaxmax_in), - init_vel(init_vel_in), - selective_dynamics(selective_dynamics_in), - relax_new(relax_new_in), - fixed_axes(fixed_axes_in), - lat0(lat0_in), - latvec(latvec_in), - elements(elements_in), - pp_files(pp_files_in), - pp_types(pp_types_in), - orb_files(orb_files_in), - natom(natom_in), - atomic_mass(atomic_mass_in), - coor_type(coor_type_in), - coordinates(coordinates_in), - mbl(mbl_in), - velocity(velocity_in) // velocity assume the existence of mbl in print_stru_file() -{} + ucell->atom_label.resize (ucell->ntype); + ucell->atom_mass.resize (ucell->ntype); + ucell->pseudo_fn.resize (ucell->ntype); + ucell->pseudo_type.resize (ucell->ntype); + ucell->orbital_fn.resize (ucell->ntype); + ucell->magnet.start_mag = new double[ucell->ntype]; // mag set here + ucell->magnet.ux_[0] = 0.0; // ux_ set here + ucell->magnet.ux_[1] = 0.0; + ucell->magnet.ux_[2] = 0.0; + for (int it = 0; it < ucell->ntype; ++it) + { + ucell->atom_label[it] = this->elements[it]; + ucell->atom_mass[it] = this->atomic_mass[it]; + ucell->pseudo_fn[it] = this->pp_files[it]; + ucell->pseudo_type[it] = this->pp_types[it]; + ucell->orbital_fn[it] = this->orb_files[it]; + ucell->magnet.start_mag[it] = 0.0; // mag set here + } + // lattice info + ucell->lat0 = this->lat0; + ucell->lat0_angstrom = ucell->lat0 * ModuleBase::BOHR_TO_A; + ucell->tpiba = ModuleBase::TWO_PI / ucell->lat0; + ucell->tpiba2 = ucell->tpiba * ucell->tpiba; + ucell->latvec.e11 = this->latvec[0]; + ucell->latvec.e12 = this->latvec[1]; + ucell->latvec.e13 = this->latvec[2]; + ucell->latvec.e21 = this->latvec[3]; + ucell->latvec.e22 = this->latvec[4]; + ucell->latvec.e23 = this->latvec[5]; + ucell->latvec.e31 = this->latvec[6]; + ucell->latvec.e32 = this->latvec[7]; + ucell->latvec.e33 = this->latvec[8]; + ucell->a1.x = ucell->latvec.e11; + ucell->a1.y = ucell->latvec.e12; + ucell->a1.z = ucell->latvec.e13; + ucell->a2.x = ucell->latvec.e21; + ucell->a2.y = ucell->latvec.e22; + ucell->a2.z = ucell->latvec.e23; + ucell->a3.x = ucell->latvec.e31; + ucell->a3.y = ucell->latvec.e32; + ucell->a3.z = ucell->latvec.e33; + ucell->GT = ucell->latvec.Inverse (); + ucell->G = ucell->GT.Transpose (); + ucell->GGT = ucell->G * ucell->GT; + ucell->invGGT = ucell->GGT.Inverse (); + ucell->omega = std::abs (ucell->latvec.Det ()) * (ucell->lat0) * (ucell->lat0) * (ucell->lat0); + // atomic info + ucell->Coordinate = this->coor_type; + ucell->atoms = new Atom[ucell->ntype]; + ucell->set_atom_flag = true; + this->atomic_index = 0; + for (int it = 0; it < ucell->ntype; ++it) + { + ucell->atoms[it].label = this->elements[it]; + ucell->atoms[it].nw = 0; + ucell->atoms[it].nwl = 2; + ucell->atoms[it].l_nchi.resize (ucell->atoms[it].nwl + 1); + for (int L = 0; L < ucell->atoms[it].nwl + 1; L++) + { + ucell->atoms[it].l_nchi[L] = 1; + ucell->atoms[it].nw += (2 * L + 1) * ucell->atoms[it].l_nchi[L]; + } + ucell->atoms[it].na = this->natom[it]; + // coordinates and related physical quantities + ucell->atoms[it].tau.resize (ucell->atoms[it].na); + ucell->atoms[it].dis.resize (ucell->atoms[it].na); + ucell->atoms[it].taud.resize (ucell->atoms[it].na); + ucell->atoms[it].vel.resize (ucell->atoms[it].na); + ucell->atoms[it].mag.resize (ucell->atoms[it].na); + ucell->atoms[it].angle1.resize (ucell->atoms[it].na); + ucell->atoms[it].angle2.resize (ucell->atoms[it].na); + ucell->atoms[it].m_loc_.resize (ucell->atoms[it].na); + ucell->atoms[it].mbl.resize (ucell->atoms[it].na); + ucell->atoms[it].mass = ucell->atom_mass[it]; // mass set here + for (int ia = 0; ia < ucell->atoms[it].na; ++ia) + { + if (ucell->Coordinate == "Direct") + { + ucell->atoms[it].taud[ia].x = this->coordinates[this->atomic_index * 3 + 0]; + ucell->atoms[it].taud[ia].y = this->coordinates[this->atomic_index * 3 + 1]; + ucell->atoms[it].taud[ia].z = this->coordinates[this->atomic_index * 3 + 2]; + ucell->atoms[it].tau[ia] = ucell->atoms[it].taud[ia] * ucell->latvec; + } + else if (ucell->Coordinate == "Cartesian") + { + ucell->atoms[it].tau[ia].x = this->coordinates[this->atomic_index * 3 + 0]; + ucell->atoms[it].tau[ia].y = this->coordinates[this->atomic_index * 3 + 1]; + ucell->atoms[it].tau[ia].z = this->coordinates[this->atomic_index * 3 + 2]; + ModuleBase::Mathzone::Cartesian_to_Direct (ucell->atoms[it].tau[ia].x, + ucell->atoms[it].tau[ia].y, + ucell->atoms[it].tau[ia].z, + ucell->latvec.e11, + ucell->latvec.e12, + ucell->latvec.e13, + ucell->latvec.e21, + ucell->latvec.e22, + ucell->latvec.e23, + ucell->latvec.e31, + ucell->latvec.e32, + ucell->latvec.e33, + ucell->atoms[it].taud[ia].x, + ucell->atoms[it].taud[ia].y, + ucell->atoms[it].taud[ia].z); + } + ucell->atoms[it].dis[ia].set (0, 0, 0); + if (this->init_vel) + { + ucell->atoms[it].vel[ia].x = this->velocity[this->atomic_index * 3 + 0]; + ucell->atoms[it].vel[ia].y = this->velocity[this->atomic_index * 3 + 1]; + ucell->atoms[it].vel[ia].z = this->velocity[this->atomic_index * 3 + 2]; + } + else + { + ucell->atoms[it].vel[ia].set (0, 0, 0); + } + ucell->atoms[it].m_loc_[ia].set (0, 0, 0); + ucell->atoms[it].angle1[ia] = 0; + ucell->atoms[it].angle2[ia] = 0; + if (this->selective_dynamics) + { + ucell->atoms[it].mbl[ia].x = this->mbl[this->atomic_index * 3 + 0]; + ucell->atoms[it].mbl[ia].y = this->mbl[this->atomic_index * 3 + 1]; + ucell->atoms[it].mbl[ia].z = this->mbl[this->atomic_index * 3 + 2]; + } + else + { + ucell->atoms[it].mbl[ia] = {1, 1, 1}; + } + ++(this->atomic_index); + } + } + ucell->nat = this->natom.sum (); + return ucell; + }; +}; -UcellTestPrepare::UcellTestPrepare(const UcellTestPrepare &utp): - latname(utp.latname), - lmaxmax(utp.lmaxmax), - init_vel(utp.init_vel), - selective_dynamics(utp.selective_dynamics), - relax_new(utp.relax_new), - fixed_axes(utp.fixed_axes), - lat0(utp.lat0), - latvec(utp.latvec), - elements(utp.elements), - pp_files(utp.pp_files), - pp_types(utp.pp_types), - orb_files(utp.orb_files), - natom(utp.natom), - atomic_mass(utp.atomic_mass), - coor_type(utp.coor_type), - coordinates(utp.coordinates), - mbl(utp.mbl), - velocity(utp.velocity) // velocity assume the existence of mbl in print_stru_file() -{} +UcellTestPrepare::UcellTestPrepare (std::string latname_in, + int lmaxmax_in, + bool init_vel_in, + bool selective_dynamics_in, + bool relax_new_in, + std::string fixed_axes_in, + double lat0_in, + std::valarray latvec_in, + std::vector elements_in, + std::vector pp_files_in, + std::vector pp_types_in, + std::vector orb_files_in, + std::valarray natom_in, + std::vector atomic_mass_in, + std::string coor_type_in, + std::valarray coordinates_in, + std::valarray mbl_in, + std::valarray velocity_in) + : latname (latname_in), lmaxmax (lmaxmax_in), init_vel (init_vel_in), selective_dynamics (selective_dynamics_in), + relax_new (relax_new_in), fixed_axes (fixed_axes_in), lat0 (lat0_in), latvec (latvec_in), elements (elements_in), + pp_files (pp_files_in), pp_types (pp_types_in), orb_files (orb_files_in), natom (natom_in), + atomic_mass (atomic_mass_in), coor_type (coor_type_in), coordinates (coordinates_in), mbl (mbl_in), + velocity (velocity_in) // velocity assume the existence of mbl in print_stru_file() +{ +} -std::map UcellTestLib +UcellTestPrepare::UcellTestPrepare (const UcellTestPrepare& utp) + : latname (utp.latname), lmaxmax (utp.lmaxmax), init_vel (utp.init_vel), + selective_dynamics (utp.selective_dynamics), relax_new (utp.relax_new), fixed_axes (utp.fixed_axes), + lat0 (utp.lat0), latvec (utp.latvec), elements (utp.elements), pp_files (utp.pp_files), pp_types (utp.pp_types), + orb_files (utp.orb_files), natom (utp.natom), atomic_mass (utp.atomic_mass), coor_type (utp.coor_type), + coordinates (utp.coordinates), mbl (utp.mbl), + velocity (utp.velocity) // velocity assume the existence of mbl in print_stru_file() { - {"Si", UcellTestPrepare( - "fcc", //latname - 2, //lmaxmax - true, //init_vel - true, //selective_dyanmics - true, //relax_new - "volume", //fixed_axes - 10.2, //lat0 - {-0.5,0.0,0.5, //latvec - 0.0,0.5,0.5, - -0.5,0.5,0.0}, - {"Si"}, //elements - {"Si.upf"}, //upf file - {"upf201"}, //upf types - {"Si.orb"}, //orb file - {2}, //number of each elements - {28.0}, //atomic mass - "Cartesian", //coordination type - {0.0,0.0,0.0, //atomic coordinates - 0.25,0.25,0.25}, - {1,1,1, - 1,1,1}, - {0,0,0, - 0,0,0})}, +} + +std::map UcellTestLib{ + {"Si", + UcellTestPrepare ("fcc", // latname + 2, // lmaxmax + true, // init_vel + true, // selective_dyanmics + true, // relax_new + "volume", // fixed_axes + 10.2, // lat0 + {-0.5, + 0.0, + 0.5, // latvec + 0.0, + 0.5, + 0.5, + -0.5, + 0.5, + 0.0}, + {"Si"}, // elements + {"Si.upf"}, // upf file + {"upf201"}, // upf types + {"Si.orb"}, // orb file + {2}, // number of each elements + {28.0}, // atomic mass + "Cartesian", // coordination type + {0.0, + 0.0, + 0.0, // atomic coordinates + 0.25, + 0.25, + 0.25}, + {1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0})}, }; #endif diff --git a/source/source_estate/test_mpi/charge_mpi_test.cpp b/source/source_estate/test_mpi/charge_mpi_test.cpp index e3214fbf655..69e53a22b0e 100644 --- a/source/source_estate/test_mpi/charge_mpi_test.cpp +++ b/source/source_estate/test_mpi/charge_mpi_test.cpp @@ -8,22 +8,22 @@ #include "source_io/module_parameter/parameter.h" bool XC_Functional::ked_flag = false; -Charge::Charge() -{ -} -Charge::~Charge() +Charge::Charge () {} +Charge::~Charge () { delete[] rec; delete[] dis; } -auto sum_array = [](const double* v, const int& nv) { - double sum = 0; - for (int i = 0; i < nv; ++i) { - sum += v[i]; -} - return sum; -}; +auto sum_array = [] (const double* v, const int& nv) + { + double sum = 0; + for (int i = 0; i < nv; ++i) + { + sum += v[i]; + } + return sum; + }; /************************************************ * unit test of module_charge/charge_mpi.cpp ***********************************************/ @@ -44,228 +44,231 @@ class ChargeMpiTest : public ::testing::Test std::string output; double lat0 = 4; ModuleBase::Matrix3 latvec; - void SetUp() override + void + SetUp () override { charge = new Charge; } - void TearDown() override + void + TearDown () override { delete charge; } }; -TEST_F(ChargeMpiTest, reduce_diff_pools1) +TEST_F (ChargeMpiTest, reduce_diff_pools1) { if (GlobalV::NPROC >= 2 && GlobalV::NPROC % 2 == 0) - { - GlobalV::KPAR = 2; - Parallel_Global::init_pools(GlobalV::NPROC, - GlobalV::MY_RANK, - PARAM.input.bndpar, - GlobalV::KPAR, - GlobalV::NPROC_IN_BNDGROUP, - GlobalV::RANK_IN_BPGROUP, - GlobalV::MY_BNDGROUP, - GlobalV::NPROC_IN_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::MY_POOL); - ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis(); - rhopw->initmpi(GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, POOL_WORLD); - rhopw->initgrids(lat0, latvec, 40); - rhopw->initparameters(false, 10); - rhopw->setuptransform(); - - int nz = rhopw->nz; - const int nrxx = rhopw->nrxx; - const int nxy = rhopw->nxy; - const int nplane = rhopw->nplane; - charge->nrxx = nrxx; - double* array_rho = new double[nrxx]; - for (int ir = 0; ir < nxy; ++ir) { - for (int iz = 0; iz < nplane; ++iz) - { - array_rho[nplane * ir + iz] = (rhopw->startz_current + iz + ir * nz) / double(nxy * nz); - } - } - double refsum = sum_array(array_rho, nrxx); + GlobalV::KPAR = 2; + Parallel_Global::init_pools (GlobalV::NPROC, + GlobalV::MY_RANK, + PARAM.input.bndpar, + GlobalV::KPAR, + GlobalV::NPROC_IN_BNDGROUP, + GlobalV::RANK_IN_BPGROUP, + GlobalV::MY_BNDGROUP, + GlobalV::NPROC_IN_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::MY_POOL); + ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis (); + rhopw->initmpi (GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, POOL_WORLD); + rhopw->initgrids (lat0, latvec, 40); + rhopw->initparameters (false, 10); + rhopw->setuptransform (); - charge->init_chgmpi(); - charge->reduce_diff_pools(array_rho); - double sum = sum_array(array_rho, nrxx); - EXPECT_EQ(sum, refsum * GlobalV::KPAR); + int nz = rhopw->nz; + const int nrxx = rhopw->nrxx; + const int nxy = rhopw->nxy; + const int nplane = rhopw->nplane; + charge->nrxx = nrxx; + double* array_rho = new double[nrxx]; + for (int ir = 0; ir < nxy; ++ir) + { + for (int iz = 0; iz < nplane; ++iz) + { + array_rho[nplane * ir + iz] = (rhopw->startz_current + iz + ir * nz) / double (nxy * nz); + } + } + double refsum = sum_array (array_rho, nrxx); - delete[] array_rho; - delete rhopw; - } + charge->init_chgmpi (); + charge->reduce_diff_pools (array_rho); + double sum = sum_array (array_rho, nrxx); + EXPECT_EQ (sum, refsum * GlobalV::KPAR); + + delete[] array_rho; + delete rhopw; + } } -TEST_F(ChargeMpiTest, reduce_diff_pools2) +TEST_F (ChargeMpiTest, reduce_diff_pools2) { if (GlobalV::NPROC >= 3) - { - GlobalV::KPAR = 3; - Parallel_Global::divide_pools(GlobalV::NPROC, - GlobalV::MY_RANK, - PARAM.input.bndpar, - GlobalV::KPAR, - GlobalV::NPROC_IN_BNDGROUP, - GlobalV::RANK_IN_BPGROUP, - GlobalV::MY_BNDGROUP, - GlobalV::NPROC_IN_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::MY_POOL); - ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis(); - rhopw->initmpi(GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, POOL_WORLD); - rhopw->initgrids(lat0, latvec, 40); - rhopw->initparameters(false, 10); - rhopw->setuptransform(); - charge->rhopw = rhopw; - - int nz = rhopw->nz; - const int nrxx = rhopw->nrxx; - const int nxy = rhopw->nxy; - const int nplane = rhopw->nplane; - charge->nrxx = nrxx; - double* array_ref = new double[nxy * nz]; - for (int ixyz = 0; ixyz < nxy * nz; ++ixyz) { - array_ref[ixyz] = ixyz / double(nxy * nz); - } - double refsum = sum_array(array_ref, nxy * nz); + GlobalV::KPAR = 3; + Parallel_Global::divide_pools (GlobalV::NPROC, + GlobalV::MY_RANK, + PARAM.input.bndpar, + GlobalV::KPAR, + GlobalV::NPROC_IN_BNDGROUP, + GlobalV::RANK_IN_BPGROUP, + GlobalV::MY_BNDGROUP, + GlobalV::NPROC_IN_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::MY_POOL); + ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis (); + rhopw->initmpi (GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, POOL_WORLD); + rhopw->initgrids (lat0, latvec, 40); + rhopw->initparameters (false, 10); + rhopw->setuptransform (); + charge->rhopw = rhopw; - double* array_rho = new double[nrxx]; - for (int ir = 0; ir < nxy; ++ir) - { - for (int iz = 0; iz < nplane; ++iz) - { - array_rho[nplane * ir + iz] = (rhopw->startz_current + iz + ir * nz) / double(nxy * nz); - } - } + int nz = rhopw->nz; + const int nrxx = rhopw->nrxx; + const int nxy = rhopw->nxy; + const int nplane = rhopw->nplane; + charge->nrxx = nrxx; + double* array_ref = new double[nxy * nz]; + for (int ixyz = 0; ixyz < nxy * nz; ++ixyz) + { + array_ref[ixyz] = ixyz / double (nxy * nz); + } + double refsum = sum_array (array_ref, nxy * nz); - charge->init_chgmpi(); - charge->reduce_diff_pools(array_rho); - double sum = sum_array(array_rho, nrxx); - MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, POOL_WORLD); - EXPECT_EQ(sum, refsum * GlobalV::KPAR); + double* array_rho = new double[nrxx]; + for (int ir = 0; ir < nxy; ++ir) + { + for (int iz = 0; iz < nplane; ++iz) + { + array_rho[nplane * ir + iz] = (rhopw->startz_current + iz + ir * nz) / double (nxy * nz); + } + } - delete[] array_rho; - delete rhopw; - delete[] array_ref; - } + charge->init_chgmpi (); + charge->reduce_diff_pools (array_rho); + double sum = sum_array (array_rho, nrxx); + MPI_Allreduce (MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, POOL_WORLD); + EXPECT_EQ (sum, refsum * GlobalV::KPAR); + + delete[] array_rho; + delete rhopw; + delete[] array_ref; + } } -TEST_F(ChargeMpiTest, rho_mpi) +TEST_F (ChargeMpiTest, rho_mpi) { if (GlobalV::NPROC >= 2) - { - GlobalV::KPAR = 2; - Parallel_Global::divide_pools(GlobalV::NPROC, - GlobalV::MY_RANK, - PARAM.input.bndpar, - GlobalV::KPAR, - GlobalV::NPROC_IN_BNDGROUP, - GlobalV::RANK_IN_BPGROUP, - GlobalV::MY_BNDGROUP, - GlobalV::NPROC_IN_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::MY_POOL); - ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis(); - rhopw->initmpi(GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, POOL_WORLD); - rhopw->initgrids(lat0, latvec, 40); - rhopw->initparameters(false, 10); - rhopw->setuptransform(); - charge->rhopw = rhopw; - PARAM.input.nspin = 1; - charge->rho = new double*[1]; - charge->kin_r = new double*[1]; + { + GlobalV::KPAR = 2; + Parallel_Global::divide_pools (GlobalV::NPROC, + GlobalV::MY_RANK, + PARAM.input.bndpar, + GlobalV::KPAR, + GlobalV::NPROC_IN_BNDGROUP, + GlobalV::RANK_IN_BPGROUP, + GlobalV::MY_BNDGROUP, + GlobalV::NPROC_IN_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::MY_POOL); + ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis (); + rhopw->initmpi (GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, POOL_WORLD); + rhopw->initgrids (lat0, latvec, 40); + rhopw->initparameters (false, 10); + rhopw->setuptransform (); + charge->rhopw = rhopw; + PARAM.input.nspin = 1; + charge->rho = new double*[1]; + charge->kin_r = new double*[1]; - int nz = rhopw->nz; - const int nrxx = rhopw->nrxx; - const int nxy = rhopw->nxy; - const int nplane = rhopw->nplane; - charge->nrxx = nrxx; - charge->rho[0] = new double[nrxx]; - charge->kin_r[0] = new double[nrxx]; - charge->init_chgmpi(); - charge->rho_mpi(); + int nz = rhopw->nz; + const int nrxx = rhopw->nrxx; + const int nxy = rhopw->nxy; + const int nplane = rhopw->nplane; + charge->nrxx = nrxx; + charge->rho[0] = new double[nrxx]; + charge->kin_r[0] = new double[nrxx]; + charge->init_chgmpi (); + charge->rho_mpi (); - delete[] charge->rho[0]; - delete[] charge->rho; - delete[] charge->kin_r[0]; - delete[] charge->kin_r; - delete rhopw; - } + delete[] charge->rho[0]; + delete[] charge->rho; + delete[] charge->kin_r[0]; + delete[] charge->kin_r; + delete rhopw; + } GlobalV::KPAR = 1; - charge->rho_mpi(); + charge->rho_mpi (); } -TEST_F(ChargeMpiTest, kin_r_mpi) +TEST_F (ChargeMpiTest, kin_r_mpi) { if (GlobalV::NPROC >= 2 && GlobalV::NPROC % 2 == 0) - { - const bool ked_flag_old = XC_Functional::ked_flag; - XC_Functional::ked_flag = true; - PARAM.input.nspin = 1; - PARAM.input.bndpar = 1; - GlobalV::KPAR = 2; + { + const bool ked_flag_old = XC_Functional::ked_flag; + XC_Functional::ked_flag = true; + PARAM.input.nspin = 1; + PARAM.input.bndpar = 1; + GlobalV::KPAR = 2; - Parallel_Global::divide_pools(GlobalV::NPROC, - GlobalV::MY_RANK, - PARAM.input.bndpar, - GlobalV::KPAR, - GlobalV::NPROC_IN_BNDGROUP, - GlobalV::RANK_IN_BPGROUP, - GlobalV::MY_BNDGROUP, - GlobalV::NPROC_IN_POOL, - GlobalV::RANK_IN_POOL, - GlobalV::MY_POOL); - ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis(); - rhopw->initmpi(GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, POOL_WORLD); - rhopw->initgrids(lat0, latvec, 40); - rhopw->initparameters(false, 10); - rhopw->setuptransform(); - charge->rhopw = rhopw; + Parallel_Global::divide_pools (GlobalV::NPROC, + GlobalV::MY_RANK, + PARAM.input.bndpar, + GlobalV::KPAR, + GlobalV::NPROC_IN_BNDGROUP, + GlobalV::RANK_IN_BPGROUP, + GlobalV::MY_BNDGROUP, + GlobalV::NPROC_IN_POOL, + GlobalV::RANK_IN_POOL, + GlobalV::MY_POOL); + ModulePW::PW_Basis* rhopw = new ModulePW::PW_Basis (); + rhopw->initmpi (GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, POOL_WORLD); + rhopw->initgrids (lat0, latvec, 40); + rhopw->initparameters (false, 10); + rhopw->setuptransform (); + charge->rhopw = rhopw; - const int nz = rhopw->nz; - const int nrxx = rhopw->nrxx; - const int nxy = rhopw->nxy; - const int nplane = rhopw->nplane; - charge->nrxx = nrxx; - charge->kin_r = new double*[1]; - charge->kin_r[0] = new double[nrxx]; + const int nz = rhopw->nz; + const int nrxx = rhopw->nrxx; + const int nxy = rhopw->nxy; + const int nplane = rhopw->nplane; + charge->nrxx = nrxx; + charge->kin_r = new double*[1]; + charge->kin_r[0] = new double[nrxx]; - for (int ir = 0; ir < nxy; ++ir) - { - for (int iz = 0; iz < nplane; ++iz) - { - charge->kin_r[0][nplane * ir + iz] - = (rhopw->startz_current + iz + ir * nz) / double(nxy * nz); - } - } - const double refsum = sum_array(charge->kin_r[0], nrxx); + for (int ir = 0; ir < nxy; ++ir) + { + for (int iz = 0; iz < nplane; ++iz) + { + charge->kin_r[0][nplane * ir + iz] + = (rhopw->startz_current + iz + ir * nz) / double (nxy * nz); + } + } + const double refsum = sum_array (charge->kin_r[0], nrxx); - charge->init_chgmpi(); - charge->kin_r_mpi(); - const double sum = sum_array(charge->kin_r[0], nrxx); - EXPECT_EQ(sum, refsum * GlobalV::KPAR); + charge->init_chgmpi (); + charge->kin_r_mpi (); + const double sum = sum_array (charge->kin_r[0], nrxx); + EXPECT_EQ (sum, refsum * GlobalV::KPAR); - delete[] charge->kin_r[0]; - delete[] charge->kin_r; - delete rhopw; - XC_Functional::ked_flag = ked_flag_old; - } + delete[] charge->kin_r[0]; + delete[] charge->kin_r; + delete rhopw; + XC_Functional::ked_flag = ked_flag_old; + } } -int main(int argc, char** argv) +int + main (int argc, char** argv) { - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK); int result = 0; - testing::InitGoogleTest(&argc, argv); - result = RUN_ALL_TESTS(); - MPI_Finalize(); + testing::InitGoogleTest (&argc, argv); + result = RUN_ALL_TESTS (); + MPI_Finalize (); return result; } diff --git a/source/source_estate/update_pot.cpp b/source/source_estate/update_pot.cpp index f0f0ef861a7..e62b053e785 100644 --- a/source/source_estate/update_pot.cpp +++ b/source/source_estate/update_pot.cpp @@ -1,20 +1,20 @@ #include "source_estate/update_pot.h" #include "source_estate/cal_ux.h" -void elecstate::update_pot(UnitCell& ucell, // unitcell - elecstate::ElecState* &pelec, // pointer of electrons - const Charge &chr, - const bool conv_esolver - ) // charge density +void + elecstate::update_pot (UnitCell& ucell, // unitcell + elecstate::ElecState*& pelec, // pointer of electrons + const Charge& chr, + const bool conv_esolver) // charge density { if (!conv_esolver) - { - elecstate::cal_ux(ucell); - pelec->pot->update_from_charge(&chr, &ucell); - pelec->f_en.descf = pelec->cal_delta_escf(); - } + { + elecstate::cal_ux (ucell); + pelec->pot->update_from_charge (&chr, &ucell); + pelec->f_en.descf = pelec->cal_delta_escf (); + } else - { - pelec->cal_converged(); - } + { + pelec->cal_converged (); + } } diff --git a/source/source_estate/update_pot.h b/source/source_estate/update_pot.h index cce4c42428f..e45b96c24aa 100644 --- a/source/source_estate/update_pot.h +++ b/source/source_estate/update_pot.h @@ -7,11 +7,10 @@ namespace elecstate { -void update_pot(UnitCell& ucell, // unitcell - elecstate::ElecState* &pelec, // pointer of electrons - const Charge &chr, - const bool conv_esolver); // charge density +void update_pot (UnitCell& ucell, // unitcell + elecstate::ElecState*& pelec, // pointer of electrons + const Charge& chr, + const bool conv_esolver); // charge density } - #endif diff --git a/source/source_hamilt/hamilt.h b/source/source_hamilt/hamilt.h index 3d554c0fe6b..1887e568544 100644 --- a/source/source_hamilt/hamilt.h +++ b/source/source_hamilt/hamilt.h @@ -16,57 +16,79 @@ template class Hamilt : public HamiltBase { public: - virtual ~Hamilt(){}; + virtual ~Hamilt () {}; /// for target K point, update consequence of hPsi() and matrix() - void updateHk(const int ik) override { return; } + void + updateHk (const int ik) override + { + return; + } /// refresh status of Hamiltonian, for example, refresh H(R) and S(R) in LCAO case - void refresh(bool yes = true) override { return; } + void + refresh (bool yes = true) override + { + return; + } /// get the class name - std::string get_classname() const override { return classname; } + std::string + get_classname () const override + { + return classname; + } /// get the operator chain - void* get_ops() override { return static_cast(ops); } + void* + get_ops () override + { + return static_cast (ops); + } /// core function: for solving eigenvalues of Hamiltonian with iterative method - virtual void hPsi( - const T* psi_in, - T* hpsi, - const size_t size) const - { - return; - } - - virtual void sPsi(const T* psi_in, // psi - T* spsi, // spsi - const int nrow, // dimension of spsi: nbands * nrow - const int npw, // number of plane waves - const int nbands // number of bands - ) const + virtual void + hPsi (const T* psi_in, T* hpsi, const size_t size) const + { + return; + } + + virtual void + sPsi (const T* psi_in, // psi + T* spsi, // spsi + const int nrow, // dimension of spsi: nbands * nrow + const int npw, // number of plane waves + const int nbands // number of bands + ) const { - syncmem_op()(spsi, psi_in, static_cast(nbands * nrow)); + syncmem_op () (spsi, psi_in, static_cast (nbands * nrow)); } - /// core function: return H(k) and S(k) matrixs for direct solving eigenvalues. - virtual void matrix( - MatrixBlock> &hk_in, - MatrixBlock> &sk_in){return;} + /// core function: return H(k) and S(k) matrixs for direct solving eigenvalues. + virtual void + matrix (MatrixBlock>& hk_in, MatrixBlock>& sk_in) + { + return; + } - virtual void matrix( - MatrixBlock &hk_in, - MatrixBlock &sk_in){return;} + virtual void + matrix (MatrixBlock& hk_in, MatrixBlock& sk_in) + { + return; + } - virtual std::vector matrix() { return std::vector(); } + virtual std::vector + matrix () + { + return std::vector (); + } std::string classname = "none"; /// first node operator, add operations from each operators Operator* ops = nullptr; -protected: - + protected: Device* ctx = {}; using syncmem_op = base_device::memory::synchronize_memory_op; }; diff --git a/source/source_hamilt/hamilt_base.h b/source/source_hamilt/hamilt_base.h index 06325bf050d..5184b802592 100644 --- a/source/source_hamilt/hamilt_base.h +++ b/source/source_hamilt/hamilt_base.h @@ -16,35 +16,51 @@ namespace hamilt class HamiltBase { public: - virtual ~HamiltBase() {} + virtual ~HamiltBase () {} /** * @brief Update Hamiltonian for a specific k-point * * @param ik k-point index */ - virtual void updateHk(const int ik) { return; } + virtual void + updateHk (const int ik) + { + return; + } /** * @brief Refresh the status of Hamiltonian * * @param yes whether to refresh */ - virtual void refresh(bool yes = true) { return; } + virtual void + refresh (bool yes = true) + { + return; + } /** * @brief Get the class name * * @return class name */ - virtual std::string get_classname() const { return "none"; } + virtual std::string + get_classname () const + { + return "none"; + } /** * @brief Get the operator chain (as void* to avoid template) * * @return pointer to operator chain */ - virtual void* get_ops() { return nullptr; } + virtual void* + get_ops () + { + return nullptr; + } }; } // namespace hamilt diff --git a/source/source_hamilt/matrixblock.h b/source/source_hamilt/matrixblock.h index 481a95b8a58..32979d2df3b 100644 --- a/source/source_hamilt/matrixblock.h +++ b/source/source_hamilt/matrixblock.h @@ -5,7 +5,8 @@ namespace hamilt { -template struct MatrixBlock +template +struct MatrixBlock { /* this is a simple template block of a matrix would change to Eigen in the future */ diff --git a/source/source_hamilt/module_ewald/H_Ewald_pw.cpp b/source/source_hamilt/module_ewald/H_Ewald_pw.cpp index 0f4c7776d70..2f0643745f6 100644 --- a/source/source_hamilt/module_ewald/H_Ewald_pw.cpp +++ b/source/source_hamilt/module_ewald/H_Ewald_pw.cpp @@ -8,50 +8,58 @@ #include "source_base/constants.h" #include "source_base/timer.h" -double H_Ewald_pw::alpha=0.0; +double H_Ewald_pw::alpha = 0.0; int H_Ewald_pw::mxr = 200; -H_Ewald_pw::H_Ewald_pw(){}; -H_Ewald_pw::~H_Ewald_pw(){}; +H_Ewald_pw::H_Ewald_pw () {}; +H_Ewald_pw::~H_Ewald_pw () {}; -int H_Ewald_pw::estimate_mxr(const double &rmax, const ModuleBase::Matrix3 &bg) +int + H_Ewald_pw::estimate_mxr (const double& rmax, const ModuleBase::Matrix3& bg) { double bg1[3]; - bg1[0] = bg.e11; bg1[1] = bg.e12; bg1[2] = bg.e13; - const int nm1 = (int)(dnrm2(3, bg1, 1) * rmax + 2); - bg1[0] = bg.e21; bg1[1] = bg.e22; bg1[2] = bg.e23; - const int nm2 = (int)(dnrm2(3, bg1, 1) * rmax + 2); - bg1[0] = bg.e31; bg1[1] = bg.e32; bg1[2] = bg.e33; - const int nm3 = (int)(dnrm2(3, bg1, 1) * rmax + 2); + bg1[0] = bg.e11; + bg1[1] = bg.e12; + bg1[2] = bg.e13; + const int nm1 = (int)(dnrm2 (3, bg1, 1) * rmax + 2); + bg1[0] = bg.e21; + bg1[1] = bg.e22; + bg1[2] = bg.e23; + const int nm2 = (int)(dnrm2 (3, bg1, 1) * rmax + 2); + bg1[0] = bg.e31; + bg1[1] = bg.e32; + bg1[2] = bg.e33; + const int nm3 = (int)(dnrm2 (3, bg1, 1) * rmax + 2); return (2 * nm1 + 1) * (2 * nm2 + 1) * (2 * nm3 + 1); } -double H_Ewald_pw::compute_ewald(const UnitCell& cell, - const ModulePW::PW_Basis* rho_basis, - const ModuleBase::ComplexMatrix& strucFac) +double + H_Ewald_pw::compute_ewald (const UnitCell& cell, + const ModulePW::PW_Basis* rho_basis, + const ModuleBase::ComplexMatrix& strucFac) { - ModuleBase::TITLE("H_Ewald_pw","compute_ewald"); - ModuleBase::timer::start("H_Ewald_pw","compute_ewald"); - -//---------------------------------------------------------- -// Calculates Ewald energy with both G- and R-space terms. -// Determines optimal alpha. Should hopefully work for any structure. -//---------------------------------------------------------- - //int ng=0; - int nr=0; - int na=0; - int nb=0; - //int nt=0; - int nrm=0; - - double ewaldg=0.0; - double ewaldr=0.0; - double ewalds=0.0; - - ModuleBase::Vector3 dtau ; - double rmax=0.0; - double rr=0.0; - double upperbound=0.0; - double fact=0; + ModuleBase::TITLE ("H_Ewald_pw", "compute_ewald"); + ModuleBase::timer::start ("H_Ewald_pw", "compute_ewald"); + + //---------------------------------------------------------- + // Calculates Ewald energy with both G- and R-space terms. + // Determines optimal alpha. Should hopefully work for any structure. + //---------------------------------------------------------- + // int ng=0; + int nr = 0; + int na = 0; + int nb = 0; + // int nt=0; + int nrm = 0; + + double ewaldg = 0.0; + double ewaldr = 0.0; + double ewalds = 0.0; + + ModuleBase::Vector3 dtau; + double rmax = 0.0; + double rr = 0.0; + double upperbound = 0.0; + double fact = 0; // total ionic charge in the cell // ewald energy computed in reciprocal space // ewald energy computed in real space @@ -67,246 +75,255 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell, // (1) calculate total ionic charge double charge = 0.0; - for (int it = 0;it < cell.ntype;it++) - { + for (int it = 0; it < cell.ntype; it++) { - charge += cell.atoms[it].na * cell.atoms[it].ncpp.zv;//mohan modify 2007-11-7 + { + charge += cell.atoms[it].na * cell.atoms[it].ncpp.zv; // mohan modify 2007-11-7 + } + } + if (PARAM.inp.test_energy) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Total ionic charge", charge); } - } - if(PARAM.inp.test_energy) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Total ionic charge",charge); - } - // (2) calculate the converged value: alpha + // (2) calculate the converged value: alpha H_Ewald_pw::alpha = 2.90; do - { - alpha -= 0.10; - // choose alpha in order to have convergence in the sum over G - // upperbound is a safe upper bound for the error in the sum over G - - if (alpha <= 0.0) { - ModuleBase::WARNING_QUIT("ewald","Can't find optimal alpha."); + alpha -= 0.10; + // choose alpha in order to have convergence in the sum over G + // upperbound is a safe upper bound for the error in the sum over G + + if (alpha <= 0.0) + { + ModuleBase::WARNING_QUIT ("ewald", "Can't find optimal alpha."); + } + upperbound = 2.0 * charge * charge * sqrt (2.0 * alpha / ModuleBase::TWO_PI) + * erfc (sqrt (cell.tpiba2 * rho_basis->ggecut / 4.0 / alpha)); } - upperbound = 2.0 * charge * charge * sqrt(2.0 * alpha / ModuleBase::TWO_PI) * - erfc(sqrt(cell.tpiba2 * rho_basis->ggecut / 4.0 / alpha)); - } while (upperbound > 1.0e-7); - if(PARAM.inp.test_energy) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"alpha",alpha); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"Upper bound",upperbound); - } + if (PARAM.inp.test_energy) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "alpha", alpha); + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "Upper bound", upperbound); + } // G-space sum here. // Determine if this processor contains G=0 and set the constant term if (rho_basis->ig_gge0 >= 0) - { - ewaldg = - charge * charge / alpha / 4.0; - } + { + ewaldg = -charge * charge / alpha / 4.0; + } else - { - ewaldg = 0.0; - } - - // in plane wave basis, only use k=0 point is not - // called "gamma_only", only if the wave functions - // are stored as double type, the gamma_only = true. - // I don't know why "gamma_only" in plane wave - // makes the fact below is 2, that's a little complicated - // to understand. I think that may because only half - // the G vectors are used. Unfortunately implement the - // function hasn't in my plan list yet. - // - // but that's not the term "gamma_only" I want to use in LCAO, - fact = 1.0; - - //GlobalV::ofs_running << "\n pwb.gstart = " << pwb.gstart << std::endl; + { + ewaldg = 0.0; + } + + // in plane wave basis, only use k=0 point is not + // called "gamma_only", only if the wave functions + // are stored as double type, the gamma_only = true. + // I don't know why "gamma_only" in plane wave + // makes the fact below is 2, that's a little complicated + // to understand. I think that may because only half + // the G vectors are used. Unfortunately implement the + // function hasn't in my plan list yet. + // + // but that's not the term "gamma_only" I want to use in LCAO, + fact = 1.0; + + // GlobalV::ofs_running << "\n pwb.gstart = " << pwb.gstart << std::endl; const int ig0 = rho_basis->ig_gge0; for (int ig = 0; ig < rho_basis->npw; ig++) - { - if(ig == ig0) - { - continue; - } - std::complex rhon = ModuleBase::ZERO; - for (int it=0; it(cell.atoms[it].ncpp.zv) * conj(strucFac(it, ig)); - } + if (ig == ig0) + { + continue; + } + std::complex rhon = ModuleBase::ZERO; + for (int it = 0; it < cell.ntype; it++) + { + { + rhon += static_cast (cell.atoms[it].ncpp.zv) * conj (strucFac (it, ig)); + } + } + ewaldg += fact * std::abs (rhon) * std::abs (rhon) * exp (-rho_basis->gg[ig] * cell.tpiba2 / alpha / 4.0) + / rho_basis->gg[ig] / cell.tpiba2; } - ewaldg += fact * std::abs(rhon) * std::abs(rhon) - * exp(- rho_basis->gg[ig] * cell.tpiba2 / alpha / 4.0 ) / rho_basis->gg[ig] / cell.tpiba2; - } ewaldg = ModuleBase::FOUR_PI / cell.omega * ewaldg; -// std::cout << "\n ewaldg = " << ewaldg; + // std::cout << "\n ewaldg = " << ewaldg; // Here add the other constant term - if (rho_basis->ig_gge0 >= 0) - { - for (int it = 0; it < cell.ntype;it++) - { - { - ewaldg = ewaldg - cell.atoms[it].na * cell.atoms[it].ncpp.zv * cell.atoms[it].ncpp.zv * sqrt(8.0 / ModuleBase::TWO_PI * alpha); - } - } - }//mohan modify 2007-11-7, 2010-07-26 + if (rho_basis->ig_gge0 >= 0) + { + for (int it = 0; it < cell.ntype; it++) + { + { + ewaldg = ewaldg + - cell.atoms[it].na * cell.atoms[it].ncpp.zv * cell.atoms[it].ncpp.zv + * sqrt (8.0 / ModuleBase::TWO_PI * alpha); + } + } + } // mohan modify 2007-11-7, 2010-07-26 // R-space sum here (only done for the processor that contains G=0) ewaldr = 0.0; // Compute rmax and dynamically determine mxr (maximum number of r-vectors) // to avoid buffer overflow for very small unit cells or high cutoff energies. - rmax = 4.0 / sqrt(alpha) / cell.lat0; - mxr = H_Ewald_pw::estimate_mxr(rmax, cell.G); - - if(PARAM.inp.test_energy) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"mxr",mxr); - } - std::vector> vec_r(mxr); - std::vector vec_r2(mxr); - std::vector vec_irr(mxr); - int* irr = vec_irr.data(); - ModuleBase::Vector3* r = vec_r.data(); - double* r2 = vec_r2.data(); + rmax = 4.0 / sqrt (alpha) / cell.lat0; + mxr = H_Ewald_pw::estimate_mxr (rmax, cell.G); + + if (PARAM.inp.test_energy) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "mxr", mxr); + } + std::vector> vec_r (mxr); + std::vector vec_r2 (mxr); + std::vector vec_irr (mxr); + int* irr = vec_irr.data (); + ModuleBase::Vector3* r = vec_r.data (); + double* r2 = vec_r2.data (); #ifdef __MPI - if(PARAM.inp.test_energy) - { - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax(unit lat0)",rmax); - } + if (PARAM.inp.test_energy) + { + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "rmax(unit lat0)", rmax); + } int size = 0; int my_rank = 0; - MPI_Comm_size(POOL_WORLD, &size); - MPI_Comm_rank(POOL_WORLD, &my_rank); + MPI_Comm_size (POOL_WORLD, &size); + MPI_Comm_rank (POOL_WORLD, &my_rank); int ia_start = 0; int ia_step = 0; ia_start = my_rank; - ia_step = std::min(cell.nat, size); + ia_step = std::min (cell.nat, size); int it1 = 0; int ia1 = 0; int it2 = 0; int ia2 = 0; - for(int na1=ia_start; na1cell.latvec, bg-->G - // and sum to the real space part + it1 = cell.iat2it[na1]; + ia1 = cell.iat2ia[na1]; - if(PARAM.inp.test_energy>1) - { - ModuleBase::GlobalFunc::OUT("dtau.x",dtau.x); - ModuleBase::GlobalFunc::OUT("dtau.y",dtau.y); - ModuleBase::GlobalFunc::OUT("dtau.z",dtau.z); - ModuleBase::GlobalFunc::OUT("nrm",nrm); - } - for(nr=0; nrcell.latvec, bg-->G + // and sum to the real space part + + if (PARAM.inp.test_energy > 1) + { + ModuleBase::GlobalFunc::OUT ("dtau.x", dtau.x); + ModuleBase::GlobalFunc::OUT ("dtau.y", dtau.y); + ModuleBase::GlobalFunc::OUT ("dtau.z", dtau.z); + ModuleBase::GlobalFunc::OUT ("nrm", nrm); + } + for (nr = 0; nr < nrm; nr++) + { + rr = sqrt (r2[nr]) * cell.lat0; + { + ewaldr = ewaldr + + cell.atoms[it1].ncpp.zv * cell.atoms[it2].ncpp.zv * erfc (sqrt (alpha) * rr) + / rr; + } + } + if (PARAM.inp.test_energy > 1) + { + ModuleBase::GlobalFunc::OUT ("ewaldr", ewaldr); + } } - } - if (PARAM.inp.test_energy>1) - { - ModuleBase::GlobalFunc::OUT("ewaldr",ewaldr); - } } - } #else if (rho_basis->ig_gge0 >= 0) - { - if(PARAM.inp.test_energy) ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax(unit lat0)",rmax); - // with this choice terms up to ZiZj*erfc(4) are counted (erfc(4)=2x10^-8 - int nt1=0; - int nt2=0; - - for (nt1 = 0; nt1 < cell.ntype; nt1++) { - for (nt2 = 0; nt2 < cell.ntype; nt2++) - { - for (na = 0; na < cell.atoms[nt1].na; na++) + if (PARAM.inp.test_energy) { - for (nb = 0; nb < cell.atoms[nt2].na; nb++) - { - //calculate tau[na]-tau[nb] - dtau = cell.atoms[nt1].tau[na] - cell.atoms[nt2].tau[nb]; - //generates nearest-neighbors shells - H_Ewald_pw::rgen(dtau, rmax, irr, cell.latvec, cell.G, r, r2, mxr, nrm); - // at-->cell.latvec, bg-->G - // and sum to the real space part - - if (PARAM.inp.test_energy>1) - { - ModuleBase::GlobalFunc::OUT("dtau.x",dtau.x); - ModuleBase::GlobalFunc::OUT("dtau.y",dtau.y); - ModuleBase::GlobalFunc::OUT("dtau.z",dtau.z); - ModuleBase::GlobalFunc::OUT("nrm",nrm); - } - for (nr = 0;nr < nrm;nr++) + ModuleBase::GlobalFunc::OUT (GlobalV::ofs_running, "rmax(unit lat0)", rmax); + } + // with this choice terms up to ZiZj*erfc(4) are counted (erfc(4)=2x10^-8 + int nt1 = 0; + int nt2 = 0; + + for (nt1 = 0; nt1 < cell.ntype; nt1++) + { + for (nt2 = 0; nt2 < cell.ntype; nt2++) { - rr = sqrt(r2 [nr]) * cell.lat0; - { - ewaldr = ewaldr + cell.atoms[nt1].ncpp.zv * cell.atoms[nt2].ncpp.zv * - erfc(sqrt(alpha) * rr) / rr; - } - } // enddo - if (PARAM.inp.test_energy>1) ModuleBase::GlobalFunc::OUT("ewaldr",ewaldr); - } // enddo - } // enddo - } // nt2 - }//nt1 - } // endif + for (na = 0; na < cell.atoms[nt1].na; na++) + { + for (nb = 0; nb < cell.atoms[nt2].na; nb++) + { + // calculate tau[na]-tau[nb] + dtau = cell.atoms[nt1].tau[na] - cell.atoms[nt2].tau[nb]; + // generates nearest-neighbors shells + H_Ewald_pw::rgen (dtau, rmax, irr, cell.latvec, cell.G, r, r2, mxr, nrm); + // at-->cell.latvec, bg-->G + // and sum to the real space part + + if (PARAM.inp.test_energy > 1) + { + ModuleBase::GlobalFunc::OUT ("dtau.x", dtau.x); + ModuleBase::GlobalFunc::OUT ("dtau.y", dtau.y); + ModuleBase::GlobalFunc::OUT ("dtau.z", dtau.z); + ModuleBase::GlobalFunc::OUT ("nrm", nrm); + } + for (nr = 0; nr < nrm; nr++) + { + rr = sqrt (r2[nr]) * cell.lat0; + { + ewaldr = ewaldr + + cell.atoms[nt1].ncpp.zv * cell.atoms[nt2].ncpp.zv + * erfc (sqrt (alpha) * rr) / rr; + } + } // enddo + if (PARAM.inp.test_energy > 1) + { + ModuleBase::GlobalFunc::OUT ("ewaldr", ewaldr); + } + } // enddo + } // enddo + } // nt2 + } // nt1 + } // endif #endif ewalds = 0.50 * ModuleBase::e2 * (ewaldg + ewaldr); - // mohan fix bug 2010-07-26 - Parallel_Reduce::reduce_pool(ewalds); + // mohan fix bug 2010-07-26 + Parallel_Reduce::reduce_pool (ewalds); - if (PARAM.inp.test_energy>1) - { - ModuleBase::GlobalFunc::OUT("ewaldg",ewaldg); - ModuleBase::GlobalFunc::OUT("ewaldr",ewaldr); - ModuleBase::GlobalFunc::OUT("ewalds",ewalds); - } + if (PARAM.inp.test_energy > 1) + { + ModuleBase::GlobalFunc::OUT ("ewaldg", ewaldg); + ModuleBase::GlobalFunc::OUT ("ewaldr", ewaldr); + ModuleBase::GlobalFunc::OUT ("ewalds", ewalds); + } - ModuleBase::timer::end("H_Ewald_pw","compute_ewald"); + ModuleBase::timer::end ("H_Ewald_pw", "compute_ewald"); return ewalds; } // end function ewald - -void H_Ewald_pw::rgen( - const ModuleBase::Vector3 &dtau, - const double &rmax, - int *irr, - const ModuleBase::Matrix3 &latvec, - const ModuleBase::Matrix3 &G, - ModuleBase::Vector3 *r, - double *r2, - const int mxr, - int &nrm) +void + H_Ewald_pw::rgen (const ModuleBase::Vector3& dtau, + const double& rmax, + int* irr, + const ModuleBase::Matrix3& latvec, + const ModuleBase::Matrix3& G, + ModuleBase::Vector3* r, + double* r2, + const int mxr, + int& nrm) { //------------------------------------------------------------------- // generates neighbours shells (in units of alat) with length @@ -328,12 +345,12 @@ void H_Ewald_pw::rgen( // input: the radius of the sphere in real sp // and here the local variables - int nm1=0; - int nm2=0; - int nm3=0; - int i=0; - int j=0; - int k=0; + int nm1 = 0; + int nm2 = 0; + int nm3 = 0; + int i = 0; + int j = 0; + int k = 0; // index on R vectors for order // maximum values for trial vectors // counters on trial vectors @@ -344,8 +361,8 @@ void H_Ewald_pw::rgen( ModuleBase::Vector3 t; ModuleBase::Vector3 t1; - double tt=0.0; - double bg1[3]={0,0,0}; + double tt = 0.0; + double bg1[3] = {0, 0, 0}; // buffer contains the actual r // buffer cotains the modulus of actual r // used for swapping @@ -355,103 +372,104 @@ void H_Ewald_pw::rgen( nrm = 0; if (rmax == 0.0) - { - return; - } + { + return; + } bg1[0] = G.e11; bg1[1] = G.e12; bg1[2] = G.e13; - nm1 = (int)(dnrm2(3, bg1, 1) * rmax + 2); + nm1 = (int)(dnrm2 (3, bg1, 1) * rmax + 2); bg1[0] = G.e21; bg1[1] = G.e22; bg1[2] = G.e23; - nm2 = (int)(dnrm2(3, bg1, 1) * rmax + 2); + nm2 = (int)(dnrm2 (3, bg1, 1) * rmax + 2); bg1[0] = G.e31; bg1[1] = G.e32; bg1[2] = G.e33; - nm3 = (int)(dnrm2(3, bg1, 1) * rmax + 2); + nm3 = (int)(dnrm2 (3, bg1, 1) * rmax + 2); - if (PARAM.inp.test_energy>1) - { - ModuleBase::GlobalFunc::OUT("nm1",nm1); - ModuleBase::GlobalFunc::OUT("nm2",nm2); - ModuleBase::GlobalFunc::OUT("nm3",nm3); - } + if (PARAM.inp.test_energy > 1) + { + ModuleBase::GlobalFunc::OUT ("nm1", nm1); + ModuleBase::GlobalFunc::OUT ("nm2", nm2); + ModuleBase::GlobalFunc::OUT ("nm3", nm3); + } for (i = -nm1; i <= nm1; i++) // mohan fix bug, add '='. 2009-02-27 - { - for (j = -nm2; j <= nm2; j++) { - for (k = -nm3; k <= nm3; k++) - { - ModuleBase::Vector3 t1(i,j,k); -// out.printV3(t1); - t = t1 * latvec; // bug ! first '*latvec', second '-dtau'. - t = t - dtau; // bug ! t = t - dtau, not t1 = t1 -tau; - -// out.printV3(t); // mohan fix 2bugs here, 2009-2-27 -// out.printM3("latvec",latvec); - - tt = t.x * t.x + t.y * t.y + t.z * t.z; - - if (tt <= rmax * rmax && std::abs(tt) > 1.e-10) + for (j = -nm2; j <= nm2; j++) { - if (nrm >= mxr) - { - ModuleBase::WARNING_QUIT("rgen", "too many r-vectors (nrm=" + std::to_string(nrm) - + ", mxr=" + std::to_string(mxr) + "). Please report this issue."); - } - r[nrm] = t; - r2[nrm] = tt; - nrm++; - } // endif - } // enddo + for (k = -nm3; k <= nm3; k++) + { + ModuleBase::Vector3 t1 (i, j, k); + // out.printV3(t1); + t = t1 * latvec; // bug ! first '*latvec', second '-dtau'. + t = t - dtau; // bug ! t = t - dtau, not t1 = t1 -tau; + + // out.printV3(t); // mohan fix 2bugs here, 2009-2-27 + // out.printM3("latvec",latvec); + + tt = t.x * t.x + t.y * t.y + t.z * t.z; + + if (tt <= rmax * rmax && std::abs (tt) > 1.e-10) + { + if (nrm >= mxr) + { + ModuleBase::WARNING_QUIT ("rgen", + "too many r-vectors (nrm=" + std::to_string (nrm) + + ", mxr=" + std::to_string (mxr) + + "). Please report this issue."); + } + r[nrm] = t; + r2[nrm] = tt; + nrm++; + } // endif + } // enddo + } // enddo } // enddo - } // enddo // reorder the vectors in order of increasing magnitude // initialize the index inside sorting routine irr[0] = 0; if (nrm > 1) - { - ModuleBase::heapsort(nrm, r2, irr); - } - - // mohan fix bug 2011-06-07 - for(int i=0; i &dtau, - const double &rmax, - int *irr, - const ModuleBase::Matrix3 &at, - const ModuleBase::Matrix3 &bg, - ModuleBase::Vector3 *r, - double *r2, - const int mxr, - int &nrm - ); - - // the coefficient of ewald method - static double alpha; + static int estimate_mxr (const double& rmax, const ModuleBase::Matrix3& bg); + + static void rgen (const ModuleBase::Vector3& dtau, + const double& rmax, + int* irr, + const ModuleBase::Matrix3& at, + const ModuleBase::Matrix3& bg, + ModuleBase::Vector3* r, + double* r2, + const int mxr, + int& nrm); + + // the coefficient of ewald method + static double alpha; static int mxr; - }; -#endif //ewald energy +#endif // ewald energy diff --git a/source/source_hamilt/module_ewald/dnrm2.cpp b/source/source_hamilt/module_ewald/dnrm2.cpp index 8402a7dcb2f..6692928a8bc 100644 --- a/source/source_hamilt/module_ewald/dnrm2.cpp +++ b/source/source_hamilt/module_ewald/dnrm2.cpp @@ -1,26 +1,27 @@ #include "dnrm2.h" -#include +#include #include -double dnrm2(const int n, const double *x, const int incx) +double + dnrm2 (const int n, const double* x, const int incx) { // compute Euclidean length (12 norm) of std::vector x, if (n < 0 || incx <= 0) - { - std::cerr << "\n error in dnrm2, n < 0 or incx <= 0, "; - return 0; - } + { + std::cerr << "\n error in dnrm2, n < 0 or incx <= 0, "; + return 0; + } if (n == 0) - { - return 0; - } + { + return 0; + } - double norm2=0.0; - for (int ix=0; ix